diff --git "a/RoboInterVLM_llava_one_vision_7B/trainer_state.json" "b/RoboInterVLM_llava_one_vision_7B/trainer_state.json" deleted file mode 100644--- "a/RoboInterVLM_llava_one_vision_7B/trainer_state.json" +++ /dev/null @@ -1,137685 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.0, - "eval_steps": 500, - "global_step": 19665, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "grad_norm": 8.557192575617696, - "learning_rate": 3.3898305084745764e-08, - "loss": 0.8805, - "step": 1 - }, - { - "epoch": 0.0, - "grad_norm": 9.06872712778614, - "learning_rate": 6.779661016949153e-08, - "loss": 0.8498, - "step": 2 - }, - { - "epoch": 0.0, - "grad_norm": 8.14569355426566, - "learning_rate": 1.0169491525423729e-07, - "loss": 0.8802, - "step": 3 - }, - { - "epoch": 0.0, - "grad_norm": 9.2812101013113, - "learning_rate": 1.3559322033898305e-07, - "loss": 0.8447, - "step": 4 - }, - { - "epoch": 0.0, - "grad_norm": 8.22501739492148, - "learning_rate": 1.6949152542372883e-07, - "loss": 0.8661, - "step": 5 - }, - { - "epoch": 0.0, - "grad_norm": 9.107731386918358, - "learning_rate": 2.0338983050847458e-07, - "loss": 0.9148, - "step": 6 - }, - { - "epoch": 0.0, - "grad_norm": 8.537106942847725, - "learning_rate": 2.3728813559322036e-07, - "loss": 0.7615, - "step": 7 - }, - { - "epoch": 0.0, - "grad_norm": 9.780092872367305, - "learning_rate": 2.711864406779661e-07, - "loss": 0.8196, - "step": 8 - }, - { - "epoch": 0.0, - "grad_norm": 8.790315313064749, - "learning_rate": 3.050847457627119e-07, - "loss": 0.859, - "step": 9 - }, - { - "epoch": 0.0, - "grad_norm": 8.841389331094158, - "learning_rate": 3.3898305084745766e-07, - "loss": 0.8597, - "step": 10 - }, - { - "epoch": 0.0, - "grad_norm": 9.054485774470677, - "learning_rate": 3.7288135593220347e-07, - "loss": 0.8975, - "step": 11 - }, - { - "epoch": 0.0, - "grad_norm": 9.348866446220944, - "learning_rate": 4.0677966101694916e-07, - "loss": 0.8716, - "step": 12 - }, - { - "epoch": 0.0, - "grad_norm": 8.375820124871165, - "learning_rate": 4.4067796610169497e-07, - "loss": 0.8262, - "step": 13 - }, - { - "epoch": 0.0, - "grad_norm": 8.489161661750288, - "learning_rate": 4.745762711864407e-07, - "loss": 0.8317, - "step": 14 - }, - { - "epoch": 0.0, - "grad_norm": 7.647864966680701, - "learning_rate": 5.084745762711865e-07, - "loss": 0.8068, - "step": 15 - }, - { - "epoch": 0.0, - "grad_norm": 11.25567460340579, - "learning_rate": 5.423728813559322e-07, - "loss": 0.8784, - "step": 16 - }, - { - "epoch": 0.0, - "grad_norm": 6.243574981713614, - "learning_rate": 5.76271186440678e-07, - "loss": 0.7368, - "step": 17 - }, - { - "epoch": 0.0, - "grad_norm": 6.459065444090269, - "learning_rate": 6.101694915254238e-07, - "loss": 0.7662, - "step": 18 - }, - { - "epoch": 0.0, - "grad_norm": 7.014917635106802, - "learning_rate": 6.440677966101695e-07, - "loss": 0.7592, - "step": 19 - }, - { - "epoch": 0.0, - "grad_norm": 6.860940141421972, - "learning_rate": 6.779661016949153e-07, - "loss": 0.731, - "step": 20 - }, - { - "epoch": 0.0, - "grad_norm": 6.185099928998005, - "learning_rate": 7.118644067796611e-07, - "loss": 0.6707, - "step": 21 - }, - { - "epoch": 0.0, - "grad_norm": 5.23904808480235, - "learning_rate": 7.457627118644069e-07, - "loss": 0.7006, - "step": 22 - }, - { - "epoch": 0.0, - "grad_norm": 5.327549403384544, - "learning_rate": 7.796610169491527e-07, - "loss": 0.6307, - "step": 23 - }, - { - "epoch": 0.0, - "grad_norm": 4.097537794801515, - "learning_rate": 8.135593220338983e-07, - "loss": 0.6203, - "step": 24 - }, - { - "epoch": 0.0, - "grad_norm": 4.377204730201115, - "learning_rate": 8.474576271186441e-07, - "loss": 0.5821, - "step": 25 - }, - { - "epoch": 0.0, - "grad_norm": 4.358088694702184, - "learning_rate": 8.813559322033899e-07, - "loss": 0.602, - "step": 26 - }, - { - "epoch": 0.0, - "grad_norm": 4.197286363634574, - "learning_rate": 9.152542372881357e-07, - "loss": 0.6424, - "step": 27 - }, - { - "epoch": 0.0, - "grad_norm": 3.799809603049265, - "learning_rate": 9.491525423728814e-07, - "loss": 0.5751, - "step": 28 - }, - { - "epoch": 0.0, - "grad_norm": 3.383671404077906, - "learning_rate": 9.830508474576272e-07, - "loss": 0.5296, - "step": 29 - }, - { - "epoch": 0.0, - "grad_norm": 4.216187266889182, - "learning_rate": 1.016949152542373e-06, - "loss": 0.536, - "step": 30 - }, - { - "epoch": 0.0, - "grad_norm": 3.3106453196134393, - "learning_rate": 1.0508474576271187e-06, - "loss": 0.5286, - "step": 31 - }, - { - "epoch": 0.0, - "grad_norm": 2.618350813260054, - "learning_rate": 1.0847457627118644e-06, - "loss": 0.5057, - "step": 32 - }, - { - "epoch": 0.0, - "grad_norm": 2.5217817090570738, - "learning_rate": 1.1186440677966102e-06, - "loss": 0.4745, - "step": 33 - }, - { - "epoch": 0.0, - "grad_norm": 2.3533426940286937, - "learning_rate": 1.152542372881356e-06, - "loss": 0.4211, - "step": 34 - }, - { - "epoch": 0.0, - "grad_norm": 2.61696040904103, - "learning_rate": 1.186440677966102e-06, - "loss": 0.4285, - "step": 35 - }, - { - "epoch": 0.0, - "grad_norm": 2.456578510501274, - "learning_rate": 1.2203389830508477e-06, - "loss": 0.4165, - "step": 36 - }, - { - "epoch": 0.0, - "grad_norm": 2.6810354384934776, - "learning_rate": 1.2542372881355932e-06, - "loss": 0.4464, - "step": 37 - }, - { - "epoch": 0.0, - "grad_norm": 2.387681024180993, - "learning_rate": 1.288135593220339e-06, - "loss": 0.4644, - "step": 38 - }, - { - "epoch": 0.0, - "grad_norm": 2.4467785580460735, - "learning_rate": 1.322033898305085e-06, - "loss": 0.4539, - "step": 39 - }, - { - "epoch": 0.0, - "grad_norm": 2.4529315649495667, - "learning_rate": 1.3559322033898307e-06, - "loss": 0.4166, - "step": 40 - }, - { - "epoch": 0.0, - "grad_norm": 3.212819968943549, - "learning_rate": 1.3898305084745764e-06, - "loss": 0.3938, - "step": 41 - }, - { - "epoch": 0.0, - "grad_norm": 1.863910204540751, - "learning_rate": 1.4237288135593222e-06, - "loss": 0.3991, - "step": 42 - }, - { - "epoch": 0.0, - "grad_norm": 1.6764048559205706, - "learning_rate": 1.457627118644068e-06, - "loss": 0.372, - "step": 43 - }, - { - "epoch": 0.0, - "grad_norm": 1.799219722876445, - "learning_rate": 1.4915254237288139e-06, - "loss": 0.3803, - "step": 44 - }, - { - "epoch": 0.0, - "grad_norm": 1.9642819860180136, - "learning_rate": 1.5254237288135596e-06, - "loss": 0.39, - "step": 45 - }, - { - "epoch": 0.0, - "grad_norm": 2.102560995531875, - "learning_rate": 1.5593220338983054e-06, - "loss": 0.399, - "step": 46 - }, - { - "epoch": 0.0, - "grad_norm": 2.2717682014897997, - "learning_rate": 1.593220338983051e-06, - "loss": 0.3334, - "step": 47 - }, - { - "epoch": 0.0, - "grad_norm": 1.8451916643138015, - "learning_rate": 1.6271186440677967e-06, - "loss": 0.3806, - "step": 48 - }, - { - "epoch": 0.0, - "grad_norm": 2.791231995618392, - "learning_rate": 1.6610169491525424e-06, - "loss": 0.3786, - "step": 49 - }, - { - "epoch": 0.0, - "grad_norm": 1.8661867054992236, - "learning_rate": 1.6949152542372882e-06, - "loss": 0.3763, - "step": 50 - }, - { - "epoch": 0.0, - "grad_norm": 1.9794021888687487, - "learning_rate": 1.728813559322034e-06, - "loss": 0.3371, - "step": 51 - }, - { - "epoch": 0.0, - "grad_norm": 2.224468293642754, - "learning_rate": 1.7627118644067799e-06, - "loss": 0.3475, - "step": 52 - }, - { - "epoch": 0.0, - "grad_norm": 2.31298307443876, - "learning_rate": 1.7966101694915256e-06, - "loss": 0.3692, - "step": 53 - }, - { - "epoch": 0.0, - "grad_norm": 1.8429593134990803, - "learning_rate": 1.8305084745762714e-06, - "loss": 0.336, - "step": 54 - }, - { - "epoch": 0.0, - "grad_norm": 1.8510671554362415, - "learning_rate": 1.8644067796610171e-06, - "loss": 0.3369, - "step": 55 - }, - { - "epoch": 0.0, - "grad_norm": 2.0884605066914403, - "learning_rate": 1.8983050847457629e-06, - "loss": 0.3531, - "step": 56 - }, - { - "epoch": 0.0, - "grad_norm": 1.7766166145814422, - "learning_rate": 1.932203389830509e-06, - "loss": 0.3224, - "step": 57 - }, - { - "epoch": 0.0, - "grad_norm": 2.00091805426027, - "learning_rate": 1.9661016949152544e-06, - "loss": 0.3207, - "step": 58 - }, - { - "epoch": 0.0, - "grad_norm": 1.671445434483612, - "learning_rate": 2.0000000000000003e-06, - "loss": 0.3473, - "step": 59 - }, - { - "epoch": 0.0, - "grad_norm": 1.8089600378647877, - "learning_rate": 2.033898305084746e-06, - "loss": 0.3158, - "step": 60 - }, - { - "epoch": 0.0, - "grad_norm": 1.857053304370097, - "learning_rate": 2.0677966101694914e-06, - "loss": 0.3353, - "step": 61 - }, - { - "epoch": 0.0, - "grad_norm": 1.8869217170364412, - "learning_rate": 2.1016949152542374e-06, - "loss": 0.2722, - "step": 62 - }, - { - "epoch": 0.0, - "grad_norm": 1.6325695751917972, - "learning_rate": 2.1355932203389833e-06, - "loss": 0.3661, - "step": 63 - }, - { - "epoch": 0.0, - "grad_norm": 1.6938194400308952, - "learning_rate": 2.169491525423729e-06, - "loss": 0.3112, - "step": 64 - }, - { - "epoch": 0.0, - "grad_norm": 3.0006172337912416, - "learning_rate": 2.203389830508475e-06, - "loss": 0.3639, - "step": 65 - }, - { - "epoch": 0.0, - "grad_norm": 2.0199725555296335, - "learning_rate": 2.2372881355932204e-06, - "loss": 0.3086, - "step": 66 - }, - { - "epoch": 0.0, - "grad_norm": 1.8730176329500001, - "learning_rate": 2.2711864406779663e-06, - "loss": 0.323, - "step": 67 - }, - { - "epoch": 0.0, - "grad_norm": 1.7440535132509414, - "learning_rate": 2.305084745762712e-06, - "loss": 0.343, - "step": 68 - }, - { - "epoch": 0.0, - "grad_norm": 2.112486078314485, - "learning_rate": 2.338983050847458e-06, - "loss": 0.3391, - "step": 69 - }, - { - "epoch": 0.0, - "grad_norm": 1.7751924675393387, - "learning_rate": 2.372881355932204e-06, - "loss": 0.3234, - "step": 70 - }, - { - "epoch": 0.0, - "grad_norm": 1.964742351336944, - "learning_rate": 2.4067796610169493e-06, - "loss": 0.2933, - "step": 71 - }, - { - "epoch": 0.0, - "grad_norm": 1.3092146299206104, - "learning_rate": 2.4406779661016953e-06, - "loss": 0.2857, - "step": 72 - }, - { - "epoch": 0.0, - "grad_norm": 1.7904622412420848, - "learning_rate": 2.474576271186441e-06, - "loss": 0.3443, - "step": 73 - }, - { - "epoch": 0.0, - "grad_norm": 1.7863495982308861, - "learning_rate": 2.5084745762711864e-06, - "loss": 0.3031, - "step": 74 - }, - { - "epoch": 0.0, - "grad_norm": 1.5671252584011932, - "learning_rate": 2.5423728813559323e-06, - "loss": 0.2966, - "step": 75 - }, - { - "epoch": 0.0, - "grad_norm": 1.656662139777409, - "learning_rate": 2.576271186440678e-06, - "loss": 0.3134, - "step": 76 - }, - { - "epoch": 0.0, - "grad_norm": 2.041825731687294, - "learning_rate": 2.610169491525424e-06, - "loss": 0.3457, - "step": 77 - }, - { - "epoch": 0.0, - "grad_norm": 1.516375034415401, - "learning_rate": 2.64406779661017e-06, - "loss": 0.2944, - "step": 78 - }, - { - "epoch": 0.0, - "grad_norm": 1.9869017421478328, - "learning_rate": 2.6779661016949153e-06, - "loss": 0.2837, - "step": 79 - }, - { - "epoch": 0.0, - "grad_norm": 1.6175841066321706, - "learning_rate": 2.7118644067796613e-06, - "loss": 0.2864, - "step": 80 - }, - { - "epoch": 0.0, - "grad_norm": 1.7170519798903372, - "learning_rate": 2.745762711864407e-06, - "loss": 0.3284, - "step": 81 - }, - { - "epoch": 0.0, - "grad_norm": 1.7972083881181034, - "learning_rate": 2.779661016949153e-06, - "loss": 0.299, - "step": 82 - }, - { - "epoch": 0.0, - "grad_norm": 2.8307414003842113, - "learning_rate": 2.8135593220338988e-06, - "loss": 0.3023, - "step": 83 - }, - { - "epoch": 0.0, - "grad_norm": 1.613381220629313, - "learning_rate": 2.8474576271186443e-06, - "loss": 0.3124, - "step": 84 - }, - { - "epoch": 0.0, - "grad_norm": 1.7950742462204192, - "learning_rate": 2.8813559322033903e-06, - "loss": 0.3006, - "step": 85 - }, - { - "epoch": 0.0, - "grad_norm": 2.221539092274775, - "learning_rate": 2.915254237288136e-06, - "loss": 0.3048, - "step": 86 - }, - { - "epoch": 0.0, - "grad_norm": 2.259845817762709, - "learning_rate": 2.9491525423728818e-06, - "loss": 0.3044, - "step": 87 - }, - { - "epoch": 0.0, - "grad_norm": 1.541938397353296, - "learning_rate": 2.9830508474576277e-06, - "loss": 0.3434, - "step": 88 - }, - { - "epoch": 0.0, - "grad_norm": 2.1376130907622923, - "learning_rate": 3.0169491525423733e-06, - "loss": 0.3282, - "step": 89 - }, - { - "epoch": 0.0, - "grad_norm": 2.0565278408576604, - "learning_rate": 3.0508474576271192e-06, - "loss": 0.2927, - "step": 90 - }, - { - "epoch": 0.0, - "grad_norm": 1.926909173349644, - "learning_rate": 3.0847457627118648e-06, - "loss": 0.3109, - "step": 91 - }, - { - "epoch": 0.0, - "grad_norm": 2.6185375769384565, - "learning_rate": 3.1186440677966107e-06, - "loss": 0.3094, - "step": 92 - }, - { - "epoch": 0.0, - "grad_norm": 1.8249433332855955, - "learning_rate": 3.1525423728813563e-06, - "loss": 0.3225, - "step": 93 - }, - { - "epoch": 0.0, - "grad_norm": 1.7226923086848356, - "learning_rate": 3.186440677966102e-06, - "loss": 0.293, - "step": 94 - }, - { - "epoch": 0.0, - "grad_norm": 1.8579049970780483, - "learning_rate": 3.2203389830508473e-06, - "loss": 0.297, - "step": 95 - }, - { - "epoch": 0.0, - "grad_norm": 1.8217865526593613, - "learning_rate": 3.2542372881355933e-06, - "loss": 0.299, - "step": 96 - }, - { - "epoch": 0.0, - "grad_norm": 2.9857064112941214, - "learning_rate": 3.288135593220339e-06, - "loss": 0.2983, - "step": 97 - }, - { - "epoch": 0.0, - "grad_norm": 2.0821655536527746, - "learning_rate": 3.322033898305085e-06, - "loss": 0.3261, - "step": 98 - }, - { - "epoch": 0.01, - "grad_norm": 3.1380580706567773, - "learning_rate": 3.3559322033898308e-06, - "loss": 0.2775, - "step": 99 - }, - { - "epoch": 0.01, - "grad_norm": 2.0160814105257923, - "learning_rate": 3.3898305084745763e-06, - "loss": 0.3317, - "step": 100 - }, - { - "epoch": 0.01, - "grad_norm": 2.5909755454757093, - "learning_rate": 3.4237288135593223e-06, - "loss": 0.303, - "step": 101 - }, - { - "epoch": 0.01, - "grad_norm": 12.191902880650215, - "learning_rate": 3.457627118644068e-06, - "loss": 0.3183, - "step": 102 - }, - { - "epoch": 0.01, - "grad_norm": 1.9238805550148785, - "learning_rate": 3.4915254237288138e-06, - "loss": 0.3072, - "step": 103 - }, - { - "epoch": 0.01, - "grad_norm": 2.2333502172845163, - "learning_rate": 3.5254237288135597e-06, - "loss": 0.3586, - "step": 104 - }, - { - "epoch": 0.01, - "grad_norm": 8.589718562654825, - "learning_rate": 3.5593220338983053e-06, - "loss": 0.3147, - "step": 105 - }, - { - "epoch": 0.01, - "grad_norm": 1.6143252679477402, - "learning_rate": 3.5932203389830512e-06, - "loss": 0.2904, - "step": 106 - }, - { - "epoch": 0.01, - "grad_norm": 2.2836235321900076, - "learning_rate": 3.6271186440677968e-06, - "loss": 0.3066, - "step": 107 - }, - { - "epoch": 0.01, - "grad_norm": 1.6830983780860997, - "learning_rate": 3.6610169491525427e-06, - "loss": 0.3336, - "step": 108 - }, - { - "epoch": 0.01, - "grad_norm": 1.7804936986415487, - "learning_rate": 3.6949152542372883e-06, - "loss": 0.3005, - "step": 109 - }, - { - "epoch": 0.01, - "grad_norm": 3.065589112282441, - "learning_rate": 3.7288135593220342e-06, - "loss": 0.2842, - "step": 110 - }, - { - "epoch": 0.01, - "grad_norm": 2.350341154005084, - "learning_rate": 3.76271186440678e-06, - "loss": 0.3179, - "step": 111 - }, - { - "epoch": 0.01, - "grad_norm": 1.4814230337002219, - "learning_rate": 3.7966101694915257e-06, - "loss": 0.2744, - "step": 112 - }, - { - "epoch": 0.01, - "grad_norm": 2.733113410194878, - "learning_rate": 3.830508474576271e-06, - "loss": 0.3027, - "step": 113 - }, - { - "epoch": 0.01, - "grad_norm": 1.5967154788237727, - "learning_rate": 3.864406779661018e-06, - "loss": 0.3258, - "step": 114 - }, - { - "epoch": 0.01, - "grad_norm": 3.3370723281642563, - "learning_rate": 3.898305084745763e-06, - "loss": 0.3114, - "step": 115 - }, - { - "epoch": 0.01, - "grad_norm": 1.6445205745938416, - "learning_rate": 3.932203389830509e-06, - "loss": 0.3116, - "step": 116 - }, - { - "epoch": 0.01, - "grad_norm": 2.0862236046831435, - "learning_rate": 3.966101694915255e-06, - "loss": 0.2998, - "step": 117 - }, - { - "epoch": 0.01, - "grad_norm": 1.5820347545216342, - "learning_rate": 4.000000000000001e-06, - "loss": 0.3014, - "step": 118 - }, - { - "epoch": 0.01, - "grad_norm": 1.9780553225075865, - "learning_rate": 4.033898305084746e-06, - "loss": 0.321, - "step": 119 - }, - { - "epoch": 0.01, - "grad_norm": 2.0824896309123964, - "learning_rate": 4.067796610169492e-06, - "loss": 0.2436, - "step": 120 - }, - { - "epoch": 0.01, - "grad_norm": 2.235099816002165, - "learning_rate": 4.101694915254237e-06, - "loss": 0.2895, - "step": 121 - }, - { - "epoch": 0.01, - "grad_norm": 1.9274858752777464, - "learning_rate": 4.135593220338983e-06, - "loss": 0.2712, - "step": 122 - }, - { - "epoch": 0.01, - "grad_norm": 2.0017257398500234, - "learning_rate": 4.169491525423729e-06, - "loss": 0.2674, - "step": 123 - }, - { - "epoch": 0.01, - "grad_norm": 2.3276432711309596, - "learning_rate": 4.203389830508475e-06, - "loss": 0.3218, - "step": 124 - }, - { - "epoch": 0.01, - "grad_norm": 2.1175621752154283, - "learning_rate": 4.23728813559322e-06, - "loss": 0.3057, - "step": 125 - }, - { - "epoch": 0.01, - "grad_norm": 1.8693993867856755, - "learning_rate": 4.271186440677967e-06, - "loss": 0.2679, - "step": 126 - }, - { - "epoch": 0.01, - "grad_norm": 1.6694157796882232, - "learning_rate": 4.305084745762712e-06, - "loss": 0.2739, - "step": 127 - }, - { - "epoch": 0.01, - "grad_norm": 2.249653153315445, - "learning_rate": 4.338983050847458e-06, - "loss": 0.2827, - "step": 128 - }, - { - "epoch": 0.01, - "grad_norm": 2.6424596204092072, - "learning_rate": 4.372881355932203e-06, - "loss": 0.2826, - "step": 129 - }, - { - "epoch": 0.01, - "grad_norm": 1.7334690566735274, - "learning_rate": 4.40677966101695e-06, - "loss": 0.2932, - "step": 130 - }, - { - "epoch": 0.01, - "grad_norm": 1.9839422437397973, - "learning_rate": 4.440677966101695e-06, - "loss": 0.3081, - "step": 131 - }, - { - "epoch": 0.01, - "grad_norm": 1.953781353665557, - "learning_rate": 4.474576271186441e-06, - "loss": 0.3027, - "step": 132 - }, - { - "epoch": 0.01, - "grad_norm": 1.8911646090025422, - "learning_rate": 4.508474576271187e-06, - "loss": 0.3037, - "step": 133 - }, - { - "epoch": 0.01, - "grad_norm": 1.6962571768979062, - "learning_rate": 4.542372881355933e-06, - "loss": 0.2561, - "step": 134 - }, - { - "epoch": 0.01, - "grad_norm": 1.634664008844036, - "learning_rate": 4.576271186440678e-06, - "loss": 0.2879, - "step": 135 - }, - { - "epoch": 0.01, - "grad_norm": 2.777790158029909, - "learning_rate": 4.610169491525424e-06, - "loss": 0.2721, - "step": 136 - }, - { - "epoch": 0.01, - "grad_norm": 1.7148761472284506, - "learning_rate": 4.64406779661017e-06, - "loss": 0.2717, - "step": 137 - }, - { - "epoch": 0.01, - "grad_norm": 1.7208081889064668, - "learning_rate": 4.677966101694916e-06, - "loss": 0.2838, - "step": 138 - }, - { - "epoch": 0.01, - "grad_norm": 1.89025248616027, - "learning_rate": 4.711864406779661e-06, - "loss": 0.3075, - "step": 139 - }, - { - "epoch": 0.01, - "grad_norm": 1.7365633256432669, - "learning_rate": 4.745762711864408e-06, - "loss": 0.2909, - "step": 140 - }, - { - "epoch": 0.01, - "grad_norm": 1.6009133709225887, - "learning_rate": 4.779661016949153e-06, - "loss": 0.2538, - "step": 141 - }, - { - "epoch": 0.01, - "grad_norm": 2.0332412939427944, - "learning_rate": 4.813559322033899e-06, - "loss": 0.3089, - "step": 142 - }, - { - "epoch": 0.01, - "grad_norm": 1.775911485217657, - "learning_rate": 4.847457627118645e-06, - "loss": 0.3002, - "step": 143 - }, - { - "epoch": 0.01, - "grad_norm": 1.4574169659822291, - "learning_rate": 4.881355932203391e-06, - "loss": 0.2843, - "step": 144 - }, - { - "epoch": 0.01, - "grad_norm": 1.7122297188914908, - "learning_rate": 4.915254237288136e-06, - "loss": 0.2923, - "step": 145 - }, - { - "epoch": 0.01, - "grad_norm": 1.8661430535697405, - "learning_rate": 4.949152542372882e-06, - "loss": 0.2933, - "step": 146 - }, - { - "epoch": 0.01, - "grad_norm": 1.5280893595273282, - "learning_rate": 4.983050847457628e-06, - "loss": 0.3097, - "step": 147 - }, - { - "epoch": 0.01, - "grad_norm": 1.5414815172107994, - "learning_rate": 5.016949152542373e-06, - "loss": 0.2894, - "step": 148 - }, - { - "epoch": 0.01, - "grad_norm": 1.6979336511591085, - "learning_rate": 5.050847457627119e-06, - "loss": 0.3058, - "step": 149 - }, - { - "epoch": 0.01, - "grad_norm": 1.399367962381608, - "learning_rate": 5.084745762711865e-06, - "loss": 0.2826, - "step": 150 - }, - { - "epoch": 0.01, - "grad_norm": 1.666309227933981, - "learning_rate": 5.118644067796611e-06, - "loss": 0.2729, - "step": 151 - }, - { - "epoch": 0.01, - "grad_norm": 1.479961432900143, - "learning_rate": 5.152542372881356e-06, - "loss": 0.2805, - "step": 152 - }, - { - "epoch": 0.01, - "grad_norm": 1.7384952021588505, - "learning_rate": 5.186440677966102e-06, - "loss": 0.2759, - "step": 153 - }, - { - "epoch": 0.01, - "grad_norm": 1.8596650431273607, - "learning_rate": 5.220338983050848e-06, - "loss": 0.2758, - "step": 154 - }, - { - "epoch": 0.01, - "grad_norm": 1.857375302476961, - "learning_rate": 5.254237288135594e-06, - "loss": 0.3048, - "step": 155 - }, - { - "epoch": 0.01, - "grad_norm": 1.7591182469694164, - "learning_rate": 5.28813559322034e-06, - "loss": 0.2762, - "step": 156 - }, - { - "epoch": 0.01, - "grad_norm": 2.2948961387152282, - "learning_rate": 5.322033898305086e-06, - "loss": 0.2911, - "step": 157 - }, - { - "epoch": 0.01, - "grad_norm": 2.261579664075019, - "learning_rate": 5.355932203389831e-06, - "loss": 0.2845, - "step": 158 - }, - { - "epoch": 0.01, - "grad_norm": 2.0906674064521806, - "learning_rate": 5.389830508474577e-06, - "loss": 0.2798, - "step": 159 - }, - { - "epoch": 0.01, - "grad_norm": 1.689900338950711, - "learning_rate": 5.423728813559323e-06, - "loss": 0.3181, - "step": 160 - }, - { - "epoch": 0.01, - "grad_norm": 1.5558610571402536, - "learning_rate": 5.457627118644067e-06, - "loss": 0.2835, - "step": 161 - }, - { - "epoch": 0.01, - "grad_norm": 3.231530175438594, - "learning_rate": 5.491525423728814e-06, - "loss": 0.3044, - "step": 162 - }, - { - "epoch": 0.01, - "grad_norm": 2.6429093181423005, - "learning_rate": 5.525423728813559e-06, - "loss": 0.3068, - "step": 163 - }, - { - "epoch": 0.01, - "grad_norm": 1.7844549696996412, - "learning_rate": 5.559322033898306e-06, - "loss": 0.2853, - "step": 164 - }, - { - "epoch": 0.01, - "grad_norm": 2.0082259106645943, - "learning_rate": 5.593220338983051e-06, - "loss": 0.3477, - "step": 165 - }, - { - "epoch": 0.01, - "grad_norm": 1.960115573884396, - "learning_rate": 5.6271186440677975e-06, - "loss": 0.2772, - "step": 166 - }, - { - "epoch": 0.01, - "grad_norm": 2.068249027081921, - "learning_rate": 5.661016949152542e-06, - "loss": 0.2967, - "step": 167 - }, - { - "epoch": 0.01, - "grad_norm": 1.7051358404672956, - "learning_rate": 5.694915254237289e-06, - "loss": 0.2871, - "step": 168 - }, - { - "epoch": 0.01, - "grad_norm": 1.709105980487174, - "learning_rate": 5.728813559322034e-06, - "loss": 0.2613, - "step": 169 - }, - { - "epoch": 0.01, - "grad_norm": 1.950311162930796, - "learning_rate": 5.7627118644067805e-06, - "loss": 0.2923, - "step": 170 - }, - { - "epoch": 0.01, - "grad_norm": 1.651513874408586, - "learning_rate": 5.796610169491525e-06, - "loss": 0.3087, - "step": 171 - }, - { - "epoch": 0.01, - "grad_norm": 1.710559983293689, - "learning_rate": 5.830508474576272e-06, - "loss": 0.2794, - "step": 172 - }, - { - "epoch": 0.01, - "grad_norm": 1.7771722763697293, - "learning_rate": 5.864406779661017e-06, - "loss": 0.2783, - "step": 173 - }, - { - "epoch": 0.01, - "grad_norm": 2.160139050072537, - "learning_rate": 5.8983050847457635e-06, - "loss": 0.2966, - "step": 174 - }, - { - "epoch": 0.01, - "grad_norm": 1.5002256496386404, - "learning_rate": 5.932203389830509e-06, - "loss": 0.2879, - "step": 175 - }, - { - "epoch": 0.01, - "grad_norm": 1.8875259490488177, - "learning_rate": 5.9661016949152555e-06, - "loss": 0.2937, - "step": 176 - }, - { - "epoch": 0.01, - "grad_norm": 1.7701374784962995, - "learning_rate": 6e-06, - "loss": 0.2667, - "step": 177 - }, - { - "epoch": 0.01, - "grad_norm": 1.7400487952240289, - "learning_rate": 6.0338983050847465e-06, - "loss": 0.317, - "step": 178 - }, - { - "epoch": 0.01, - "grad_norm": 1.6262926424592759, - "learning_rate": 6.067796610169492e-06, - "loss": 0.2988, - "step": 179 - }, - { - "epoch": 0.01, - "grad_norm": 1.7533113610007194, - "learning_rate": 6.1016949152542385e-06, - "loss": 0.2743, - "step": 180 - }, - { - "epoch": 0.01, - "grad_norm": 2.1487191473952074, - "learning_rate": 6.135593220338983e-06, - "loss": 0.2731, - "step": 181 - }, - { - "epoch": 0.01, - "grad_norm": 1.837373024422903, - "learning_rate": 6.1694915254237295e-06, - "loss": 0.2474, - "step": 182 - }, - { - "epoch": 0.01, - "grad_norm": 3.384431938898038, - "learning_rate": 6.203389830508475e-06, - "loss": 0.2928, - "step": 183 - }, - { - "epoch": 0.01, - "grad_norm": 1.5271044696646137, - "learning_rate": 6.2372881355932215e-06, - "loss": 0.2544, - "step": 184 - }, - { - "epoch": 0.01, - "grad_norm": 1.3710907064781839, - "learning_rate": 6.271186440677966e-06, - "loss": 0.2716, - "step": 185 - }, - { - "epoch": 0.01, - "grad_norm": 1.7978500758050704, - "learning_rate": 6.3050847457627125e-06, - "loss": 0.2629, - "step": 186 - }, - { - "epoch": 0.01, - "grad_norm": 1.266493908137502, - "learning_rate": 6.338983050847458e-06, - "loss": 0.2636, - "step": 187 - }, - { - "epoch": 0.01, - "grad_norm": 1.6865435211762627, - "learning_rate": 6.372881355932204e-06, - "loss": 0.2919, - "step": 188 - }, - { - "epoch": 0.01, - "grad_norm": 1.5177106818253951, - "learning_rate": 6.40677966101695e-06, - "loss": 0.2696, - "step": 189 - }, - { - "epoch": 0.01, - "grad_norm": 1.598312898809967, - "learning_rate": 6.440677966101695e-06, - "loss": 0.2941, - "step": 190 - }, - { - "epoch": 0.01, - "grad_norm": 1.7020330476074053, - "learning_rate": 6.474576271186441e-06, - "loss": 0.2566, - "step": 191 - }, - { - "epoch": 0.01, - "grad_norm": 1.6452188115531354, - "learning_rate": 6.508474576271187e-06, - "loss": 0.2627, - "step": 192 - }, - { - "epoch": 0.01, - "grad_norm": 1.5337855227059187, - "learning_rate": 6.542372881355933e-06, - "loss": 0.2749, - "step": 193 - }, - { - "epoch": 0.01, - "grad_norm": 1.5885009119467683, - "learning_rate": 6.576271186440678e-06, - "loss": 0.2871, - "step": 194 - }, - { - "epoch": 0.01, - "grad_norm": 1.9656764093443815, - "learning_rate": 6.610169491525424e-06, - "loss": 0.3027, - "step": 195 - }, - { - "epoch": 0.01, - "grad_norm": 2.0483602782105494, - "learning_rate": 6.64406779661017e-06, - "loss": 0.3092, - "step": 196 - }, - { - "epoch": 0.01, - "grad_norm": 1.8565023223461687, - "learning_rate": 6.677966101694916e-06, - "loss": 0.3084, - "step": 197 - }, - { - "epoch": 0.01, - "grad_norm": 1.7646513939378161, - "learning_rate": 6.7118644067796615e-06, - "loss": 0.2953, - "step": 198 - }, - { - "epoch": 0.01, - "grad_norm": 1.6957118175929442, - "learning_rate": 6.745762711864408e-06, - "loss": 0.2939, - "step": 199 - }, - { - "epoch": 0.01, - "grad_norm": 1.6271626182864425, - "learning_rate": 6.779661016949153e-06, - "loss": 0.2732, - "step": 200 - }, - { - "epoch": 0.01, - "grad_norm": 2.665492075397036, - "learning_rate": 6.813559322033899e-06, - "loss": 0.2643, - "step": 201 - }, - { - "epoch": 0.01, - "grad_norm": 1.7849521054857391, - "learning_rate": 6.8474576271186445e-06, - "loss": 0.2967, - "step": 202 - }, - { - "epoch": 0.01, - "grad_norm": 1.6961191232197494, - "learning_rate": 6.881355932203391e-06, - "loss": 0.3109, - "step": 203 - }, - { - "epoch": 0.01, - "grad_norm": 1.725277238030002, - "learning_rate": 6.915254237288136e-06, - "loss": 0.2882, - "step": 204 - }, - { - "epoch": 0.01, - "grad_norm": 1.7040601722570679, - "learning_rate": 6.949152542372882e-06, - "loss": 0.261, - "step": 205 - }, - { - "epoch": 0.01, - "grad_norm": 1.7790750902245551, - "learning_rate": 6.9830508474576275e-06, - "loss": 0.2928, - "step": 206 - }, - { - "epoch": 0.01, - "grad_norm": 1.8954542367183922, - "learning_rate": 7.016949152542374e-06, - "loss": 0.2961, - "step": 207 - }, - { - "epoch": 0.01, - "grad_norm": 2.0100427678525317, - "learning_rate": 7.0508474576271195e-06, - "loss": 0.271, - "step": 208 - }, - { - "epoch": 0.01, - "grad_norm": 1.635043641028692, - "learning_rate": 7.084745762711865e-06, - "loss": 0.3017, - "step": 209 - }, - { - "epoch": 0.01, - "grad_norm": 1.5721374879331982, - "learning_rate": 7.1186440677966106e-06, - "loss": 0.2586, - "step": 210 - }, - { - "epoch": 0.01, - "grad_norm": 1.7623509032189595, - "learning_rate": 7.152542372881357e-06, - "loss": 0.2852, - "step": 211 - }, - { - "epoch": 0.01, - "grad_norm": 1.8084559334984929, - "learning_rate": 7.1864406779661025e-06, - "loss": 0.2502, - "step": 212 - }, - { - "epoch": 0.01, - "grad_norm": 1.5700471733368953, - "learning_rate": 7.220338983050849e-06, - "loss": 0.2658, - "step": 213 - }, - { - "epoch": 0.01, - "grad_norm": 1.7884143221055777, - "learning_rate": 7.2542372881355936e-06, - "loss": 0.2813, - "step": 214 - }, - { - "epoch": 0.01, - "grad_norm": 1.8403028285709235, - "learning_rate": 7.288135593220339e-06, - "loss": 0.2509, - "step": 215 - }, - { - "epoch": 0.01, - "grad_norm": 1.6358262298861554, - "learning_rate": 7.3220338983050855e-06, - "loss": 0.2608, - "step": 216 - }, - { - "epoch": 0.01, - "grad_norm": 8.065627300198091, - "learning_rate": 7.355932203389831e-06, - "loss": 0.2775, - "step": 217 - }, - { - "epoch": 0.01, - "grad_norm": 1.5288052157129923, - "learning_rate": 7.3898305084745766e-06, - "loss": 0.2694, - "step": 218 - }, - { - "epoch": 0.01, - "grad_norm": 1.6187358313701743, - "learning_rate": 7.423728813559322e-06, - "loss": 0.2611, - "step": 219 - }, - { - "epoch": 0.01, - "grad_norm": 1.500589738935865, - "learning_rate": 7.4576271186440685e-06, - "loss": 0.2611, - "step": 220 - }, - { - "epoch": 0.01, - "grad_norm": 1.6846488257425314, - "learning_rate": 7.491525423728814e-06, - "loss": 0.2625, - "step": 221 - }, - { - "epoch": 0.01, - "grad_norm": 2.5149283412876415, - "learning_rate": 7.52542372881356e-06, - "loss": 0.2698, - "step": 222 - }, - { - "epoch": 0.01, - "grad_norm": 1.520702365266604, - "learning_rate": 7.559322033898305e-06, - "loss": 0.2756, - "step": 223 - }, - { - "epoch": 0.01, - "grad_norm": 1.6031162343212535, - "learning_rate": 7.5932203389830515e-06, - "loss": 0.2617, - "step": 224 - }, - { - "epoch": 0.01, - "grad_norm": 1.6952860116491963, - "learning_rate": 7.627118644067797e-06, - "loss": 0.2818, - "step": 225 - }, - { - "epoch": 0.01, - "grad_norm": 1.477255037322905, - "learning_rate": 7.661016949152543e-06, - "loss": 0.2496, - "step": 226 - }, - { - "epoch": 0.01, - "grad_norm": 1.6660271734854857, - "learning_rate": 7.694915254237289e-06, - "loss": 0.2768, - "step": 227 - }, - { - "epoch": 0.01, - "grad_norm": 1.9775731672940686, - "learning_rate": 7.728813559322035e-06, - "loss": 0.2535, - "step": 228 - }, - { - "epoch": 0.01, - "grad_norm": 1.7011300558433908, - "learning_rate": 7.76271186440678e-06, - "loss": 0.2851, - "step": 229 - }, - { - "epoch": 0.01, - "grad_norm": 1.646079153476468, - "learning_rate": 7.796610169491526e-06, - "loss": 0.3166, - "step": 230 - }, - { - "epoch": 0.01, - "grad_norm": 1.4393049865391425, - "learning_rate": 7.830508474576271e-06, - "loss": 0.3049, - "step": 231 - }, - { - "epoch": 0.01, - "grad_norm": 1.6556807137319012, - "learning_rate": 7.864406779661017e-06, - "loss": 0.2875, - "step": 232 - }, - { - "epoch": 0.01, - "grad_norm": 1.8731866145038525, - "learning_rate": 7.898305084745764e-06, - "loss": 0.2701, - "step": 233 - }, - { - "epoch": 0.01, - "grad_norm": 1.62571796111418, - "learning_rate": 7.93220338983051e-06, - "loss": 0.2728, - "step": 234 - }, - { - "epoch": 0.01, - "grad_norm": 1.487202975941064, - "learning_rate": 7.966101694915255e-06, - "loss": 0.2719, - "step": 235 - }, - { - "epoch": 0.01, - "grad_norm": 1.535136361003896, - "learning_rate": 8.000000000000001e-06, - "loss": 0.2615, - "step": 236 - }, - { - "epoch": 0.01, - "grad_norm": 1.6670845697053458, - "learning_rate": 8.033898305084746e-06, - "loss": 0.2603, - "step": 237 - }, - { - "epoch": 0.01, - "grad_norm": 1.2513572304096718, - "learning_rate": 8.067796610169492e-06, - "loss": 0.2842, - "step": 238 - }, - { - "epoch": 0.01, - "grad_norm": 1.6010215262108967, - "learning_rate": 8.101694915254237e-06, - "loss": 0.2575, - "step": 239 - }, - { - "epoch": 0.01, - "grad_norm": 2.264897174430261, - "learning_rate": 8.135593220338983e-06, - "loss": 0.28, - "step": 240 - }, - { - "epoch": 0.01, - "grad_norm": 1.5210659548113379, - "learning_rate": 8.16949152542373e-06, - "loss": 0.2957, - "step": 241 - }, - { - "epoch": 0.01, - "grad_norm": 1.5993802470049059, - "learning_rate": 8.203389830508475e-06, - "loss": 0.284, - "step": 242 - }, - { - "epoch": 0.01, - "grad_norm": 1.4987229304363077, - "learning_rate": 8.237288135593221e-06, - "loss": 0.2617, - "step": 243 - }, - { - "epoch": 0.01, - "grad_norm": 1.45585250607327, - "learning_rate": 8.271186440677966e-06, - "loss": 0.2637, - "step": 244 - }, - { - "epoch": 0.01, - "grad_norm": 1.538155435073543, - "learning_rate": 8.305084745762712e-06, - "loss": 0.2741, - "step": 245 - }, - { - "epoch": 0.01, - "grad_norm": 2.1391423571076142, - "learning_rate": 8.338983050847458e-06, - "loss": 0.2631, - "step": 246 - }, - { - "epoch": 0.01, - "grad_norm": 1.7253417394805781, - "learning_rate": 8.372881355932205e-06, - "loss": 0.2565, - "step": 247 - }, - { - "epoch": 0.01, - "grad_norm": 1.6275070709595192, - "learning_rate": 8.40677966101695e-06, - "loss": 0.2644, - "step": 248 - }, - { - "epoch": 0.01, - "grad_norm": 1.6116776111875923, - "learning_rate": 8.440677966101696e-06, - "loss": 0.2802, - "step": 249 - }, - { - "epoch": 0.01, - "grad_norm": 1.570689718867318, - "learning_rate": 8.47457627118644e-06, - "loss": 0.2606, - "step": 250 - }, - { - "epoch": 0.01, - "grad_norm": 1.5257535084444283, - "learning_rate": 8.508474576271187e-06, - "loss": 0.2729, - "step": 251 - }, - { - "epoch": 0.01, - "grad_norm": 1.4065841492399755, - "learning_rate": 8.542372881355933e-06, - "loss": 0.2552, - "step": 252 - }, - { - "epoch": 0.01, - "grad_norm": 1.580942735174244, - "learning_rate": 8.57627118644068e-06, - "loss": 0.2869, - "step": 253 - }, - { - "epoch": 0.01, - "grad_norm": 1.7823741192008924, - "learning_rate": 8.610169491525424e-06, - "loss": 0.2458, - "step": 254 - }, - { - "epoch": 0.01, - "grad_norm": 1.6882930628464303, - "learning_rate": 8.64406779661017e-06, - "loss": 0.2797, - "step": 255 - }, - { - "epoch": 0.01, - "grad_norm": 1.5710000159998525, - "learning_rate": 8.677966101694915e-06, - "loss": 0.2596, - "step": 256 - }, - { - "epoch": 0.01, - "grad_norm": 1.5117969977132588, - "learning_rate": 8.711864406779662e-06, - "loss": 0.2915, - "step": 257 - }, - { - "epoch": 0.01, - "grad_norm": 2.0879634715923228, - "learning_rate": 8.745762711864407e-06, - "loss": 0.2928, - "step": 258 - }, - { - "epoch": 0.01, - "grad_norm": 1.4763672894599624, - "learning_rate": 8.779661016949153e-06, - "loss": 0.2661, - "step": 259 - }, - { - "epoch": 0.01, - "grad_norm": 1.5887813347318072, - "learning_rate": 8.8135593220339e-06, - "loss": 0.2644, - "step": 260 - }, - { - "epoch": 0.01, - "grad_norm": 2.1284805411680634, - "learning_rate": 8.847457627118646e-06, - "loss": 0.2896, - "step": 261 - }, - { - "epoch": 0.01, - "grad_norm": 1.6764740798105202, - "learning_rate": 8.88135593220339e-06, - "loss": 0.2679, - "step": 262 - }, - { - "epoch": 0.01, - "grad_norm": 1.5530926451627123, - "learning_rate": 8.915254237288137e-06, - "loss": 0.2618, - "step": 263 - }, - { - "epoch": 0.01, - "grad_norm": 1.5762836341877522, - "learning_rate": 8.949152542372881e-06, - "loss": 0.278, - "step": 264 - }, - { - "epoch": 0.01, - "grad_norm": 2.229849002174887, - "learning_rate": 8.983050847457628e-06, - "loss": 0.2623, - "step": 265 - }, - { - "epoch": 0.01, - "grad_norm": 1.360225880617487, - "learning_rate": 9.016949152542374e-06, - "loss": 0.2769, - "step": 266 - }, - { - "epoch": 0.01, - "grad_norm": 1.578613608691057, - "learning_rate": 9.05084745762712e-06, - "loss": 0.2935, - "step": 267 - }, - { - "epoch": 0.01, - "grad_norm": 1.5343613642802507, - "learning_rate": 9.084745762711865e-06, - "loss": 0.2827, - "step": 268 - }, - { - "epoch": 0.01, - "grad_norm": 1.9651340500582921, - "learning_rate": 9.11864406779661e-06, - "loss": 0.3065, - "step": 269 - }, - { - "epoch": 0.01, - "grad_norm": 1.189584326247112, - "learning_rate": 9.152542372881356e-06, - "loss": 0.2452, - "step": 270 - }, - { - "epoch": 0.01, - "grad_norm": 1.568110390610819, - "learning_rate": 9.186440677966101e-06, - "loss": 0.2642, - "step": 271 - }, - { - "epoch": 0.01, - "grad_norm": 1.490918665724689, - "learning_rate": 9.220338983050847e-06, - "loss": 0.2484, - "step": 272 - }, - { - "epoch": 0.01, - "grad_norm": 1.5178780940378127, - "learning_rate": 9.254237288135594e-06, - "loss": 0.2615, - "step": 273 - }, - { - "epoch": 0.01, - "grad_norm": 1.4243185626593395, - "learning_rate": 9.28813559322034e-06, - "loss": 0.2706, - "step": 274 - }, - { - "epoch": 0.01, - "grad_norm": 1.3943699378544865, - "learning_rate": 9.322033898305085e-06, - "loss": 0.2846, - "step": 275 - }, - { - "epoch": 0.01, - "grad_norm": 1.39958325547155, - "learning_rate": 9.355932203389831e-06, - "loss": 0.2644, - "step": 276 - }, - { - "epoch": 0.01, - "grad_norm": 1.5566516750657318, - "learning_rate": 9.389830508474576e-06, - "loss": 0.2709, - "step": 277 - }, - { - "epoch": 0.01, - "grad_norm": 1.5807818263236182, - "learning_rate": 9.423728813559322e-06, - "loss": 0.2904, - "step": 278 - }, - { - "epoch": 0.01, - "grad_norm": 1.6657073622633627, - "learning_rate": 9.457627118644069e-06, - "loss": 0.2831, - "step": 279 - }, - { - "epoch": 0.01, - "grad_norm": 1.5508860676326361, - "learning_rate": 9.491525423728815e-06, - "loss": 0.3093, - "step": 280 - }, - { - "epoch": 0.01, - "grad_norm": 1.4241612321960757, - "learning_rate": 9.52542372881356e-06, - "loss": 0.2769, - "step": 281 - }, - { - "epoch": 0.01, - "grad_norm": 1.5006924170004527, - "learning_rate": 9.559322033898306e-06, - "loss": 0.2415, - "step": 282 - }, - { - "epoch": 0.01, - "grad_norm": 1.5294543822090856, - "learning_rate": 9.593220338983051e-06, - "loss": 0.2515, - "step": 283 - }, - { - "epoch": 0.01, - "grad_norm": 1.533886353920441, - "learning_rate": 9.627118644067797e-06, - "loss": 0.2829, - "step": 284 - }, - { - "epoch": 0.01, - "grad_norm": 2.069418788425172, - "learning_rate": 9.661016949152544e-06, - "loss": 0.2887, - "step": 285 - }, - { - "epoch": 0.01, - "grad_norm": 1.5889620658480574, - "learning_rate": 9.69491525423729e-06, - "loss": 0.2991, - "step": 286 - }, - { - "epoch": 0.01, - "grad_norm": 1.4597563573302703, - "learning_rate": 9.728813559322035e-06, - "loss": 0.2624, - "step": 287 - }, - { - "epoch": 0.01, - "grad_norm": 1.740665680466997, - "learning_rate": 9.762711864406781e-06, - "loss": 0.2881, - "step": 288 - }, - { - "epoch": 0.01, - "grad_norm": 1.3885281952656445, - "learning_rate": 9.796610169491526e-06, - "loss": 0.2958, - "step": 289 - }, - { - "epoch": 0.01, - "grad_norm": 1.4980986665212326, - "learning_rate": 9.830508474576272e-06, - "loss": 0.2894, - "step": 290 - }, - { - "epoch": 0.01, - "grad_norm": 1.6970976017778991, - "learning_rate": 9.864406779661017e-06, - "loss": 0.277, - "step": 291 - }, - { - "epoch": 0.01, - "grad_norm": 1.5465766596610715, - "learning_rate": 9.898305084745763e-06, - "loss": 0.2723, - "step": 292 - }, - { - "epoch": 0.01, - "grad_norm": 1.7023966032743651, - "learning_rate": 9.93220338983051e-06, - "loss": 0.2977, - "step": 293 - }, - { - "epoch": 0.01, - "grad_norm": 1.3390528257847034, - "learning_rate": 9.966101694915256e-06, - "loss": 0.2537, - "step": 294 - }, - { - "epoch": 0.02, - "grad_norm": 1.1821890114626603, - "learning_rate": 1e-05, - "loss": 0.2668, - "step": 295 - }, - { - "epoch": 0.02, - "grad_norm": 1.7286845032215452, - "learning_rate": 1.0033898305084746e-05, - "loss": 0.2744, - "step": 296 - }, - { - "epoch": 0.02, - "grad_norm": 1.4950792801793105, - "learning_rate": 1.0067796610169492e-05, - "loss": 0.272, - "step": 297 - }, - { - "epoch": 0.02, - "grad_norm": 1.4700432391190763, - "learning_rate": 1.0101694915254238e-05, - "loss": 0.2662, - "step": 298 - }, - { - "epoch": 0.02, - "grad_norm": 1.3967794895014582, - "learning_rate": 1.0135593220338985e-05, - "loss": 0.2802, - "step": 299 - }, - { - "epoch": 0.02, - "grad_norm": 2.391964862237865, - "learning_rate": 1.016949152542373e-05, - "loss": 0.273, - "step": 300 - }, - { - "epoch": 0.02, - "grad_norm": 1.4563931631110092, - "learning_rate": 1.0203389830508474e-05, - "loss": 0.2905, - "step": 301 - }, - { - "epoch": 0.02, - "grad_norm": 1.284413744656201, - "learning_rate": 1.0237288135593222e-05, - "loss": 0.2749, - "step": 302 - }, - { - "epoch": 0.02, - "grad_norm": 1.7292121695347487, - "learning_rate": 1.0271186440677967e-05, - "loss": 0.243, - "step": 303 - }, - { - "epoch": 0.02, - "grad_norm": 1.8232969594205204, - "learning_rate": 1.0305084745762712e-05, - "loss": 0.2722, - "step": 304 - }, - { - "epoch": 0.02, - "grad_norm": 1.7932246344435712, - "learning_rate": 1.0338983050847458e-05, - "loss": 0.2684, - "step": 305 - }, - { - "epoch": 0.02, - "grad_norm": 1.533073576390442, - "learning_rate": 1.0372881355932204e-05, - "loss": 0.2745, - "step": 306 - }, - { - "epoch": 0.02, - "grad_norm": 1.8333910248684946, - "learning_rate": 1.040677966101695e-05, - "loss": 0.293, - "step": 307 - }, - { - "epoch": 0.02, - "grad_norm": 3.1995055633474583, - "learning_rate": 1.0440677966101695e-05, - "loss": 0.2629, - "step": 308 - }, - { - "epoch": 0.02, - "grad_norm": 1.4645465210433868, - "learning_rate": 1.047457627118644e-05, - "loss": 0.2698, - "step": 309 - }, - { - "epoch": 0.02, - "grad_norm": 1.346835487540612, - "learning_rate": 1.0508474576271188e-05, - "loss": 0.2704, - "step": 310 - }, - { - "epoch": 0.02, - "grad_norm": 1.4321145393072345, - "learning_rate": 1.0542372881355933e-05, - "loss": 0.2677, - "step": 311 - }, - { - "epoch": 0.02, - "grad_norm": 1.5265056300536548, - "learning_rate": 1.057627118644068e-05, - "loss": 0.2803, - "step": 312 - }, - { - "epoch": 0.02, - "grad_norm": 1.3311183401050715, - "learning_rate": 1.0610169491525424e-05, - "loss": 0.2662, - "step": 313 - }, - { - "epoch": 0.02, - "grad_norm": 1.4977642596654024, - "learning_rate": 1.0644067796610172e-05, - "loss": 0.2729, - "step": 314 - }, - { - "epoch": 0.02, - "grad_norm": 1.5866201311518897, - "learning_rate": 1.0677966101694917e-05, - "loss": 0.255, - "step": 315 - }, - { - "epoch": 0.02, - "grad_norm": 1.5568209460381872, - "learning_rate": 1.0711864406779661e-05, - "loss": 0.2771, - "step": 316 - }, - { - "epoch": 0.02, - "grad_norm": 1.3484579322691495, - "learning_rate": 1.0745762711864408e-05, - "loss": 0.254, - "step": 317 - }, - { - "epoch": 0.02, - "grad_norm": 1.4060019872392868, - "learning_rate": 1.0779661016949154e-05, - "loss": 0.2847, - "step": 318 - }, - { - "epoch": 0.02, - "grad_norm": 1.6215123876307729, - "learning_rate": 1.08135593220339e-05, - "loss": 0.2582, - "step": 319 - }, - { - "epoch": 0.02, - "grad_norm": 1.1960240824180124, - "learning_rate": 1.0847457627118645e-05, - "loss": 0.2757, - "step": 320 - }, - { - "epoch": 0.02, - "grad_norm": 2.6496098507964483, - "learning_rate": 1.088135593220339e-05, - "loss": 0.2946, - "step": 321 - }, - { - "epoch": 0.02, - "grad_norm": 1.3862470565731742, - "learning_rate": 1.0915254237288135e-05, - "loss": 0.2768, - "step": 322 - }, - { - "epoch": 0.02, - "grad_norm": 1.4544121582161837, - "learning_rate": 1.0949152542372883e-05, - "loss": 0.3032, - "step": 323 - }, - { - "epoch": 0.02, - "grad_norm": 1.4175535736163114, - "learning_rate": 1.0983050847457627e-05, - "loss": 0.2686, - "step": 324 - }, - { - "epoch": 0.02, - "grad_norm": 1.3191834379151826, - "learning_rate": 1.1016949152542374e-05, - "loss": 0.252, - "step": 325 - }, - { - "epoch": 0.02, - "grad_norm": 1.33272323985383, - "learning_rate": 1.1050847457627118e-05, - "loss": 0.2779, - "step": 326 - }, - { - "epoch": 0.02, - "grad_norm": 1.660681621213467, - "learning_rate": 1.1084745762711867e-05, - "loss": 0.2794, - "step": 327 - }, - { - "epoch": 0.02, - "grad_norm": 1.431182692042468, - "learning_rate": 1.1118644067796611e-05, - "loss": 0.2699, - "step": 328 - }, - { - "epoch": 0.02, - "grad_norm": 1.4489972397114494, - "learning_rate": 1.1152542372881356e-05, - "loss": 0.2682, - "step": 329 - }, - { - "epoch": 0.02, - "grad_norm": 1.6541146528927058, - "learning_rate": 1.1186440677966102e-05, - "loss": 0.3242, - "step": 330 - }, - { - "epoch": 0.02, - "grad_norm": 1.331208350127372, - "learning_rate": 1.1220338983050849e-05, - "loss": 0.266, - "step": 331 - }, - { - "epoch": 0.02, - "grad_norm": 1.4591943463955406, - "learning_rate": 1.1254237288135595e-05, - "loss": 0.2462, - "step": 332 - }, - { - "epoch": 0.02, - "grad_norm": 1.4940367024477517, - "learning_rate": 1.128813559322034e-05, - "loss": 0.2634, - "step": 333 - }, - { - "epoch": 0.02, - "grad_norm": 1.6020801701154066, - "learning_rate": 1.1322033898305084e-05, - "loss": 0.258, - "step": 334 - }, - { - "epoch": 0.02, - "grad_norm": 1.567603002600629, - "learning_rate": 1.1355932203389833e-05, - "loss": 0.2893, - "step": 335 - }, - { - "epoch": 0.02, - "grad_norm": 1.4512891656223712, - "learning_rate": 1.1389830508474577e-05, - "loss": 0.2592, - "step": 336 - }, - { - "epoch": 0.02, - "grad_norm": 1.9013614792745004, - "learning_rate": 1.1423728813559322e-05, - "loss": 0.3183, - "step": 337 - }, - { - "epoch": 0.02, - "grad_norm": 1.4816696067269588, - "learning_rate": 1.1457627118644068e-05, - "loss": 0.2521, - "step": 338 - }, - { - "epoch": 0.02, - "grad_norm": 1.4699744125682022, - "learning_rate": 1.1491525423728815e-05, - "loss": 0.2435, - "step": 339 - }, - { - "epoch": 0.02, - "grad_norm": 1.5292539331602504, - "learning_rate": 1.1525423728813561e-05, - "loss": 0.2773, - "step": 340 - }, - { - "epoch": 0.02, - "grad_norm": 1.493805068160698, - "learning_rate": 1.1559322033898306e-05, - "loss": 0.2776, - "step": 341 - }, - { - "epoch": 0.02, - "grad_norm": 2.3320441267535226, - "learning_rate": 1.159322033898305e-05, - "loss": 0.3148, - "step": 342 - }, - { - "epoch": 0.02, - "grad_norm": 1.5768942640637522, - "learning_rate": 1.1627118644067799e-05, - "loss": 0.2882, - "step": 343 - }, - { - "epoch": 0.02, - "grad_norm": 2.6327174925876067, - "learning_rate": 1.1661016949152543e-05, - "loss": 0.2643, - "step": 344 - }, - { - "epoch": 0.02, - "grad_norm": 1.5979901584661973, - "learning_rate": 1.169491525423729e-05, - "loss": 0.2793, - "step": 345 - }, - { - "epoch": 0.02, - "grad_norm": 1.490949422523808, - "learning_rate": 1.1728813559322034e-05, - "loss": 0.2797, - "step": 346 - }, - { - "epoch": 0.02, - "grad_norm": 1.6373314185665537, - "learning_rate": 1.1762711864406782e-05, - "loss": 0.2702, - "step": 347 - }, - { - "epoch": 0.02, - "grad_norm": 1.494597348638564, - "learning_rate": 1.1796610169491527e-05, - "loss": 0.256, - "step": 348 - }, - { - "epoch": 0.02, - "grad_norm": 1.268989549770058, - "learning_rate": 1.1830508474576272e-05, - "loss": 0.275, - "step": 349 - }, - { - "epoch": 0.02, - "grad_norm": 1.5728102611683135, - "learning_rate": 1.1864406779661018e-05, - "loss": 0.2747, - "step": 350 - }, - { - "epoch": 0.02, - "grad_norm": 1.4554919008312315, - "learning_rate": 1.1898305084745763e-05, - "loss": 0.2656, - "step": 351 - }, - { - "epoch": 0.02, - "grad_norm": 1.6882217452125865, - "learning_rate": 1.1932203389830511e-05, - "loss": 0.2724, - "step": 352 - }, - { - "epoch": 0.02, - "grad_norm": 1.5458553639090058, - "learning_rate": 1.1966101694915256e-05, - "loss": 0.2574, - "step": 353 - }, - { - "epoch": 0.02, - "grad_norm": 2.9734651946977806, - "learning_rate": 1.2e-05, - "loss": 0.2682, - "step": 354 - }, - { - "epoch": 0.02, - "grad_norm": 1.4674881583289108, - "learning_rate": 1.2033898305084745e-05, - "loss": 0.2624, - "step": 355 - }, - { - "epoch": 0.02, - "grad_norm": 1.6725844123567122, - "learning_rate": 1.2067796610169493e-05, - "loss": 0.2693, - "step": 356 - }, - { - "epoch": 0.02, - "grad_norm": 1.3657325069194421, - "learning_rate": 1.2101694915254238e-05, - "loss": 0.2422, - "step": 357 - }, - { - "epoch": 0.02, - "grad_norm": 1.4560212112782298, - "learning_rate": 1.2135593220338984e-05, - "loss": 0.2519, - "step": 358 - }, - { - "epoch": 0.02, - "grad_norm": 1.3112148572383935, - "learning_rate": 1.2169491525423729e-05, - "loss": 0.2769, - "step": 359 - }, - { - "epoch": 0.02, - "grad_norm": 1.491608195320627, - "learning_rate": 1.2203389830508477e-05, - "loss": 0.2526, - "step": 360 - }, - { - "epoch": 0.02, - "grad_norm": 2.3012264572970316, - "learning_rate": 1.2237288135593222e-05, - "loss": 0.2841, - "step": 361 - }, - { - "epoch": 0.02, - "grad_norm": 1.42262697227907, - "learning_rate": 1.2271186440677966e-05, - "loss": 0.264, - "step": 362 - }, - { - "epoch": 0.02, - "grad_norm": 1.376381527131282, - "learning_rate": 1.2305084745762713e-05, - "loss": 0.2895, - "step": 363 - }, - { - "epoch": 0.02, - "grad_norm": 1.3621092285439624, - "learning_rate": 1.2338983050847459e-05, - "loss": 0.2818, - "step": 364 - }, - { - "epoch": 0.02, - "grad_norm": 1.6416902061672696, - "learning_rate": 1.2372881355932205e-05, - "loss": 0.242, - "step": 365 - }, - { - "epoch": 0.02, - "grad_norm": 1.8070792328678054, - "learning_rate": 1.240677966101695e-05, - "loss": 0.2513, - "step": 366 - }, - { - "epoch": 0.02, - "grad_norm": 1.4230650127004605, - "learning_rate": 1.2440677966101695e-05, - "loss": 0.2663, - "step": 367 - }, - { - "epoch": 0.02, - "grad_norm": 1.5876663584225577, - "learning_rate": 1.2474576271186443e-05, - "loss": 0.2619, - "step": 368 - }, - { - "epoch": 0.02, - "grad_norm": 2.4967545863007254, - "learning_rate": 1.2508474576271188e-05, - "loss": 0.2822, - "step": 369 - }, - { - "epoch": 0.02, - "grad_norm": 1.9039535303784283, - "learning_rate": 1.2542372881355932e-05, - "loss": 0.284, - "step": 370 - }, - { - "epoch": 0.02, - "grad_norm": 1.428940565663837, - "learning_rate": 1.2576271186440679e-05, - "loss": 0.2601, - "step": 371 - }, - { - "epoch": 0.02, - "grad_norm": 1.5662155563814324, - "learning_rate": 1.2610169491525425e-05, - "loss": 0.2873, - "step": 372 - }, - { - "epoch": 0.02, - "grad_norm": 1.2971094059613077, - "learning_rate": 1.2644067796610171e-05, - "loss": 0.2626, - "step": 373 - }, - { - "epoch": 0.02, - "grad_norm": 1.0351725300274326, - "learning_rate": 1.2677966101694916e-05, - "loss": 0.2743, - "step": 374 - }, - { - "epoch": 0.02, - "grad_norm": 1.3553284978125044, - "learning_rate": 1.2711864406779661e-05, - "loss": 0.2709, - "step": 375 - }, - { - "epoch": 0.02, - "grad_norm": 1.5882208330471586, - "learning_rate": 1.2745762711864407e-05, - "loss": 0.2708, - "step": 376 - }, - { - "epoch": 0.02, - "grad_norm": 1.8462534147511642, - "learning_rate": 1.2779661016949154e-05, - "loss": 0.2699, - "step": 377 - }, - { - "epoch": 0.02, - "grad_norm": 1.426504473136271, - "learning_rate": 1.28135593220339e-05, - "loss": 0.281, - "step": 378 - }, - { - "epoch": 0.02, - "grad_norm": 1.2340057175968697, - "learning_rate": 1.2847457627118645e-05, - "loss": 0.2483, - "step": 379 - }, - { - "epoch": 0.02, - "grad_norm": 1.835330825736432, - "learning_rate": 1.288135593220339e-05, - "loss": 0.308, - "step": 380 - }, - { - "epoch": 0.02, - "grad_norm": 1.3981944760568665, - "learning_rate": 1.2915254237288137e-05, - "loss": 0.2507, - "step": 381 - }, - { - "epoch": 0.02, - "grad_norm": 1.3749165361391003, - "learning_rate": 1.2949152542372882e-05, - "loss": 0.2647, - "step": 382 - }, - { - "epoch": 0.02, - "grad_norm": 2.0292717133598073, - "learning_rate": 1.2983050847457629e-05, - "loss": 0.2718, - "step": 383 - }, - { - "epoch": 0.02, - "grad_norm": 1.4857167490857963, - "learning_rate": 1.3016949152542373e-05, - "loss": 0.2635, - "step": 384 - }, - { - "epoch": 0.02, - "grad_norm": 1.747307923320907, - "learning_rate": 1.305084745762712e-05, - "loss": 0.2706, - "step": 385 - }, - { - "epoch": 0.02, - "grad_norm": 1.3277925058690871, - "learning_rate": 1.3084745762711866e-05, - "loss": 0.2432, - "step": 386 - }, - { - "epoch": 0.02, - "grad_norm": 1.5142700170780168, - "learning_rate": 1.311864406779661e-05, - "loss": 0.2811, - "step": 387 - }, - { - "epoch": 0.02, - "grad_norm": 1.3779037473413898, - "learning_rate": 1.3152542372881355e-05, - "loss": 0.2839, - "step": 388 - }, - { - "epoch": 0.02, - "grad_norm": 2.5766348414501605, - "learning_rate": 1.3186440677966103e-05, - "loss": 0.2622, - "step": 389 - }, - { - "epoch": 0.02, - "grad_norm": 1.2036696780959797, - "learning_rate": 1.3220338983050848e-05, - "loss": 0.2799, - "step": 390 - }, - { - "epoch": 0.02, - "grad_norm": 1.3338172319722734, - "learning_rate": 1.3254237288135595e-05, - "loss": 0.2516, - "step": 391 - }, - { - "epoch": 0.02, - "grad_norm": 1.5466043637609173, - "learning_rate": 1.328813559322034e-05, - "loss": 0.2892, - "step": 392 - }, - { - "epoch": 0.02, - "grad_norm": 1.375902595553232, - "learning_rate": 1.3322033898305087e-05, - "loss": 0.3, - "step": 393 - }, - { - "epoch": 0.02, - "grad_norm": 1.223183507661743, - "learning_rate": 1.3355932203389832e-05, - "loss": 0.2864, - "step": 394 - }, - { - "epoch": 0.02, - "grad_norm": 1.4529473301965807, - "learning_rate": 1.3389830508474577e-05, - "loss": 0.2643, - "step": 395 - }, - { - "epoch": 0.02, - "grad_norm": 1.3608254167875427, - "learning_rate": 1.3423728813559323e-05, - "loss": 0.2635, - "step": 396 - }, - { - "epoch": 0.02, - "grad_norm": 1.2513147886137037, - "learning_rate": 1.345762711864407e-05, - "loss": 0.2513, - "step": 397 - }, - { - "epoch": 0.02, - "grad_norm": 1.5112671514145013, - "learning_rate": 1.3491525423728816e-05, - "loss": 0.2861, - "step": 398 - }, - { - "epoch": 0.02, - "grad_norm": 1.6150957465634022, - "learning_rate": 1.352542372881356e-05, - "loss": 0.2701, - "step": 399 - }, - { - "epoch": 0.02, - "grad_norm": 1.7792619826658456, - "learning_rate": 1.3559322033898305e-05, - "loss": 0.2726, - "step": 400 - }, - { - "epoch": 0.02, - "grad_norm": 1.3186392092611758, - "learning_rate": 1.3593220338983053e-05, - "loss": 0.2749, - "step": 401 - }, - { - "epoch": 0.02, - "grad_norm": 1.255587345530338, - "learning_rate": 1.3627118644067798e-05, - "loss": 0.2524, - "step": 402 - }, - { - "epoch": 0.02, - "grad_norm": 1.2400872274349326, - "learning_rate": 1.3661016949152543e-05, - "loss": 0.2778, - "step": 403 - }, - { - "epoch": 0.02, - "grad_norm": 1.482742495732322, - "learning_rate": 1.3694915254237289e-05, - "loss": 0.2698, - "step": 404 - }, - { - "epoch": 0.02, - "grad_norm": 1.9013554292200026, - "learning_rate": 1.3728813559322034e-05, - "loss": 0.2835, - "step": 405 - }, - { - "epoch": 0.02, - "grad_norm": 1.401766043993974, - "learning_rate": 1.3762711864406782e-05, - "loss": 0.2991, - "step": 406 - }, - { - "epoch": 0.02, - "grad_norm": 1.881122545071624, - "learning_rate": 1.3796610169491527e-05, - "loss": 0.3155, - "step": 407 - }, - { - "epoch": 0.02, - "grad_norm": 1.644242035858314, - "learning_rate": 1.3830508474576271e-05, - "loss": 0.2766, - "step": 408 - }, - { - "epoch": 0.02, - "grad_norm": 1.4160651170704013, - "learning_rate": 1.3864406779661018e-05, - "loss": 0.2864, - "step": 409 - }, - { - "epoch": 0.02, - "grad_norm": 1.1874236164670124, - "learning_rate": 1.3898305084745764e-05, - "loss": 0.275, - "step": 410 - }, - { - "epoch": 0.02, - "grad_norm": 1.2883313224686657, - "learning_rate": 1.393220338983051e-05, - "loss": 0.2647, - "step": 411 - }, - { - "epoch": 0.02, - "grad_norm": 1.2956543891841616, - "learning_rate": 1.3966101694915255e-05, - "loss": 0.2598, - "step": 412 - }, - { - "epoch": 0.02, - "grad_norm": 1.4091672402968367, - "learning_rate": 1.4e-05, - "loss": 0.2744, - "step": 413 - }, - { - "epoch": 0.02, - "grad_norm": 1.3024750495831694, - "learning_rate": 1.4033898305084748e-05, - "loss": 0.2525, - "step": 414 - }, - { - "epoch": 0.02, - "grad_norm": 1.7724406208457262, - "learning_rate": 1.4067796610169493e-05, - "loss": 0.2916, - "step": 415 - }, - { - "epoch": 0.02, - "grad_norm": 1.306539549408704, - "learning_rate": 1.4101694915254239e-05, - "loss": 0.2697, - "step": 416 - }, - { - "epoch": 0.02, - "grad_norm": 1.4086019817090085, - "learning_rate": 1.4135593220338984e-05, - "loss": 0.2597, - "step": 417 - }, - { - "epoch": 0.02, - "grad_norm": 1.5154347103816932, - "learning_rate": 1.416949152542373e-05, - "loss": 0.2622, - "step": 418 - }, - { - "epoch": 0.02, - "grad_norm": 1.2824331064750205, - "learning_rate": 1.4203389830508476e-05, - "loss": 0.295, - "step": 419 - }, - { - "epoch": 0.02, - "grad_norm": 1.1915074345746457, - "learning_rate": 1.4237288135593221e-05, - "loss": 0.2642, - "step": 420 - }, - { - "epoch": 0.02, - "grad_norm": 1.1801999544960764, - "learning_rate": 1.4271186440677966e-05, - "loss": 0.2695, - "step": 421 - }, - { - "epoch": 0.02, - "grad_norm": 1.2427987470169837, - "learning_rate": 1.4305084745762714e-05, - "loss": 0.2693, - "step": 422 - }, - { - "epoch": 0.02, - "grad_norm": 1.416370988934776, - "learning_rate": 1.4338983050847459e-05, - "loss": 0.2566, - "step": 423 - }, - { - "epoch": 0.02, - "grad_norm": 1.4784492398159457, - "learning_rate": 1.4372881355932205e-05, - "loss": 0.3128, - "step": 424 - }, - { - "epoch": 0.02, - "grad_norm": 1.7103590658489138, - "learning_rate": 1.440677966101695e-05, - "loss": 0.2916, - "step": 425 - }, - { - "epoch": 0.02, - "grad_norm": 1.177582810375724, - "learning_rate": 1.4440677966101698e-05, - "loss": 0.2787, - "step": 426 - }, - { - "epoch": 0.02, - "grad_norm": 1.2187682328759377, - "learning_rate": 1.4474576271186442e-05, - "loss": 0.2766, - "step": 427 - }, - { - "epoch": 0.02, - "grad_norm": 1.1786396709929452, - "learning_rate": 1.4508474576271187e-05, - "loss": 0.2958, - "step": 428 - }, - { - "epoch": 0.02, - "grad_norm": 1.1586043727922222, - "learning_rate": 1.4542372881355933e-05, - "loss": 0.2629, - "step": 429 - }, - { - "epoch": 0.02, - "grad_norm": 1.374274393866389, - "learning_rate": 1.4576271186440678e-05, - "loss": 0.2725, - "step": 430 - }, - { - "epoch": 0.02, - "grad_norm": 1.1422580383010243, - "learning_rate": 1.4610169491525426e-05, - "loss": 0.2608, - "step": 431 - }, - { - "epoch": 0.02, - "grad_norm": 1.362018091483847, - "learning_rate": 1.4644067796610171e-05, - "loss": 0.2686, - "step": 432 - }, - { - "epoch": 0.02, - "grad_norm": 1.3772004572389647, - "learning_rate": 1.4677966101694916e-05, - "loss": 0.2848, - "step": 433 - }, - { - "epoch": 0.02, - "grad_norm": 1.3697625449391713, - "learning_rate": 1.4711864406779662e-05, - "loss": 0.2618, - "step": 434 - }, - { - "epoch": 0.02, - "grad_norm": 1.2817523722761615, - "learning_rate": 1.4745762711864408e-05, - "loss": 0.281, - "step": 435 - }, - { - "epoch": 0.02, - "grad_norm": 1.5667550162037722, - "learning_rate": 1.4779661016949153e-05, - "loss": 0.3107, - "step": 436 - }, - { - "epoch": 0.02, - "grad_norm": 1.2154704540265984, - "learning_rate": 1.48135593220339e-05, - "loss": 0.2641, - "step": 437 - }, - { - "epoch": 0.02, - "grad_norm": 1.601548252501335, - "learning_rate": 1.4847457627118644e-05, - "loss": 0.2749, - "step": 438 - }, - { - "epoch": 0.02, - "grad_norm": 1.3412392808454243, - "learning_rate": 1.4881355932203392e-05, - "loss": 0.2591, - "step": 439 - }, - { - "epoch": 0.02, - "grad_norm": 1.8745181492381693, - "learning_rate": 1.4915254237288137e-05, - "loss": 0.293, - "step": 440 - }, - { - "epoch": 0.02, - "grad_norm": 1.2032286439232478, - "learning_rate": 1.4949152542372882e-05, - "loss": 0.2488, - "step": 441 - }, - { - "epoch": 0.02, - "grad_norm": 1.8027795794557806, - "learning_rate": 1.4983050847457628e-05, - "loss": 0.2828, - "step": 442 - }, - { - "epoch": 0.02, - "grad_norm": 1.1544444706346058, - "learning_rate": 1.5016949152542374e-05, - "loss": 0.2894, - "step": 443 - }, - { - "epoch": 0.02, - "grad_norm": 1.28630079163346, - "learning_rate": 1.505084745762712e-05, - "loss": 0.2826, - "step": 444 - }, - { - "epoch": 0.02, - "grad_norm": 1.3571363572134902, - "learning_rate": 1.5084745762711865e-05, - "loss": 0.2838, - "step": 445 - }, - { - "epoch": 0.02, - "grad_norm": 1.4725896993155925, - "learning_rate": 1.511864406779661e-05, - "loss": 0.2623, - "step": 446 - }, - { - "epoch": 0.02, - "grad_norm": 1.1861507973687793, - "learning_rate": 1.5152542372881358e-05, - "loss": 0.2835, - "step": 447 - }, - { - "epoch": 0.02, - "grad_norm": 1.8819167655356706, - "learning_rate": 1.5186440677966103e-05, - "loss": 0.2677, - "step": 448 - }, - { - "epoch": 0.02, - "grad_norm": 1.1347425370954585, - "learning_rate": 1.522033898305085e-05, - "loss": 0.2794, - "step": 449 - }, - { - "epoch": 0.02, - "grad_norm": 1.1153160184489386, - "learning_rate": 1.5254237288135594e-05, - "loss": 0.2658, - "step": 450 - }, - { - "epoch": 0.02, - "grad_norm": 1.1449747260362184, - "learning_rate": 1.528813559322034e-05, - "loss": 0.2524, - "step": 451 - }, - { - "epoch": 0.02, - "grad_norm": 1.441613690575214, - "learning_rate": 1.5322033898305085e-05, - "loss": 0.2901, - "step": 452 - }, - { - "epoch": 0.02, - "grad_norm": 1.1305687396821262, - "learning_rate": 1.5355932203389833e-05, - "loss": 0.2702, - "step": 453 - }, - { - "epoch": 0.02, - "grad_norm": 1.627737800353934, - "learning_rate": 1.5389830508474578e-05, - "loss": 0.2708, - "step": 454 - }, - { - "epoch": 0.02, - "grad_norm": 1.1744672296713026, - "learning_rate": 1.5423728813559326e-05, - "loss": 0.2516, - "step": 455 - }, - { - "epoch": 0.02, - "grad_norm": 1.2999579170148163, - "learning_rate": 1.545762711864407e-05, - "loss": 0.2595, - "step": 456 - }, - { - "epoch": 0.02, - "grad_norm": 1.407224076389098, - "learning_rate": 1.5491525423728815e-05, - "loss": 0.2781, - "step": 457 - }, - { - "epoch": 0.02, - "grad_norm": 1.7356557602560885, - "learning_rate": 1.552542372881356e-05, - "loss": 0.274, - "step": 458 - }, - { - "epoch": 0.02, - "grad_norm": 1.7821775541345888, - "learning_rate": 1.5559322033898305e-05, - "loss": 0.2563, - "step": 459 - }, - { - "epoch": 0.02, - "grad_norm": 1.3388894848421589, - "learning_rate": 1.5593220338983053e-05, - "loss": 0.2848, - "step": 460 - }, - { - "epoch": 0.02, - "grad_norm": 1.42437970488922, - "learning_rate": 1.5627118644067798e-05, - "loss": 0.261, - "step": 461 - }, - { - "epoch": 0.02, - "grad_norm": 1.2321061249146887, - "learning_rate": 1.5661016949152542e-05, - "loss": 0.2685, - "step": 462 - }, - { - "epoch": 0.02, - "grad_norm": 1.6009091932685735, - "learning_rate": 1.5694915254237287e-05, - "loss": 0.2772, - "step": 463 - }, - { - "epoch": 0.02, - "grad_norm": 1.2297059855573542, - "learning_rate": 1.5728813559322035e-05, - "loss": 0.2537, - "step": 464 - }, - { - "epoch": 0.02, - "grad_norm": 1.3453190050509531, - "learning_rate": 1.576271186440678e-05, - "loss": 0.2881, - "step": 465 - }, - { - "epoch": 0.02, - "grad_norm": 1.9238560454482667, - "learning_rate": 1.5796610169491528e-05, - "loss": 0.2704, - "step": 466 - }, - { - "epoch": 0.02, - "grad_norm": 1.5999053962529295, - "learning_rate": 1.5830508474576272e-05, - "loss": 0.2626, - "step": 467 - }, - { - "epoch": 0.02, - "grad_norm": 1.3191503928216304, - "learning_rate": 1.586440677966102e-05, - "loss": 0.277, - "step": 468 - }, - { - "epoch": 0.02, - "grad_norm": 1.5589440578069296, - "learning_rate": 1.5898305084745765e-05, - "loss": 0.2668, - "step": 469 - }, - { - "epoch": 0.02, - "grad_norm": 1.0608630567816568, - "learning_rate": 1.593220338983051e-05, - "loss": 0.2587, - "step": 470 - }, - { - "epoch": 0.02, - "grad_norm": 1.278828909895488, - "learning_rate": 1.5966101694915255e-05, - "loss": 0.2757, - "step": 471 - }, - { - "epoch": 0.02, - "grad_norm": 1.2073777948507576, - "learning_rate": 1.6000000000000003e-05, - "loss": 0.3151, - "step": 472 - }, - { - "epoch": 0.02, - "grad_norm": 1.3296212003545202, - "learning_rate": 1.6033898305084747e-05, - "loss": 0.289, - "step": 473 - }, - { - "epoch": 0.02, - "grad_norm": 1.1975562289235413, - "learning_rate": 1.6067796610169492e-05, - "loss": 0.269, - "step": 474 - }, - { - "epoch": 0.02, - "grad_norm": 1.890410123709992, - "learning_rate": 1.6101694915254237e-05, - "loss": 0.248, - "step": 475 - }, - { - "epoch": 0.02, - "grad_norm": 1.2316296356173326, - "learning_rate": 1.6135593220338985e-05, - "loss": 0.2764, - "step": 476 - }, - { - "epoch": 0.02, - "grad_norm": 1.158451224340742, - "learning_rate": 1.616949152542373e-05, - "loss": 0.2515, - "step": 477 - }, - { - "epoch": 0.02, - "grad_norm": 1.386768235161273, - "learning_rate": 1.6203389830508474e-05, - "loss": 0.2766, - "step": 478 - }, - { - "epoch": 0.02, - "grad_norm": 0.9916586427606384, - "learning_rate": 1.6237288135593222e-05, - "loss": 0.2425, - "step": 479 - }, - { - "epoch": 0.02, - "grad_norm": 1.0663820855084396, - "learning_rate": 1.6271186440677967e-05, - "loss": 0.2849, - "step": 480 - }, - { - "epoch": 0.02, - "grad_norm": 1.2016145341855438, - "learning_rate": 1.6305084745762715e-05, - "loss": 0.2686, - "step": 481 - }, - { - "epoch": 0.02, - "grad_norm": 1.1723921344826131, - "learning_rate": 1.633898305084746e-05, - "loss": 0.2573, - "step": 482 - }, - { - "epoch": 0.02, - "grad_norm": 1.2111557499904375, - "learning_rate": 1.6372881355932204e-05, - "loss": 0.2532, - "step": 483 - }, - { - "epoch": 0.02, - "grad_norm": 1.699199017647184, - "learning_rate": 1.640677966101695e-05, - "loss": 0.2902, - "step": 484 - }, - { - "epoch": 0.02, - "grad_norm": 1.2909577938024068, - "learning_rate": 1.6440677966101697e-05, - "loss": 0.2663, - "step": 485 - }, - { - "epoch": 0.02, - "grad_norm": 1.5571826032846792, - "learning_rate": 1.6474576271186442e-05, - "loss": 0.2645, - "step": 486 - }, - { - "epoch": 0.02, - "grad_norm": 1.1279016844343919, - "learning_rate": 1.6508474576271187e-05, - "loss": 0.2576, - "step": 487 - }, - { - "epoch": 0.02, - "grad_norm": 1.5591821887853188, - "learning_rate": 1.654237288135593e-05, - "loss": 0.2851, - "step": 488 - }, - { - "epoch": 0.02, - "grad_norm": 1.2092586739280375, - "learning_rate": 1.657627118644068e-05, - "loss": 0.2801, - "step": 489 - }, - { - "epoch": 0.02, - "grad_norm": 1.1463301267011572, - "learning_rate": 1.6610169491525424e-05, - "loss": 0.2563, - "step": 490 - }, - { - "epoch": 0.02, - "grad_norm": 1.2444534543779873, - "learning_rate": 1.6644067796610172e-05, - "loss": 0.2642, - "step": 491 - }, - { - "epoch": 0.03, - "grad_norm": 1.3314286719081205, - "learning_rate": 1.6677966101694917e-05, - "loss": 0.2806, - "step": 492 - }, - { - "epoch": 0.03, - "grad_norm": 1.4953543966574252, - "learning_rate": 1.671186440677966e-05, - "loss": 0.2812, - "step": 493 - }, - { - "epoch": 0.03, - "grad_norm": 1.229405126380989, - "learning_rate": 1.674576271186441e-05, - "loss": 0.2702, - "step": 494 - }, - { - "epoch": 0.03, - "grad_norm": 1.226387305787883, - "learning_rate": 1.6779661016949154e-05, - "loss": 0.2672, - "step": 495 - }, - { - "epoch": 0.03, - "grad_norm": 1.2279350727222604, - "learning_rate": 1.68135593220339e-05, - "loss": 0.2746, - "step": 496 - }, - { - "epoch": 0.03, - "grad_norm": 1.4752201164031455, - "learning_rate": 1.6847457627118647e-05, - "loss": 0.2615, - "step": 497 - }, - { - "epoch": 0.03, - "grad_norm": 1.5171752111549475, - "learning_rate": 1.6881355932203392e-05, - "loss": 0.2511, - "step": 498 - }, - { - "epoch": 0.03, - "grad_norm": 1.4085635220195363, - "learning_rate": 1.6915254237288136e-05, - "loss": 0.2503, - "step": 499 - }, - { - "epoch": 0.03, - "grad_norm": 1.5619747875254615, - "learning_rate": 1.694915254237288e-05, - "loss": 0.279, - "step": 500 - }, - { - "epoch": 0.03, - "grad_norm": 1.6055369708361142, - "learning_rate": 1.698305084745763e-05, - "loss": 0.2639, - "step": 501 - }, - { - "epoch": 0.03, - "grad_norm": 1.6065530819560707, - "learning_rate": 1.7016949152542374e-05, - "loss": 0.2877, - "step": 502 - }, - { - "epoch": 0.03, - "grad_norm": 1.3686957461363003, - "learning_rate": 1.705084745762712e-05, - "loss": 0.2679, - "step": 503 - }, - { - "epoch": 0.03, - "grad_norm": 1.3246185897098746, - "learning_rate": 1.7084745762711867e-05, - "loss": 0.2803, - "step": 504 - }, - { - "epoch": 0.03, - "grad_norm": 1.601972430115074, - "learning_rate": 1.711864406779661e-05, - "loss": 0.2557, - "step": 505 - }, - { - "epoch": 0.03, - "grad_norm": 1.4960766298227883, - "learning_rate": 1.715254237288136e-05, - "loss": 0.2722, - "step": 506 - }, - { - "epoch": 0.03, - "grad_norm": 1.7165264426107494, - "learning_rate": 1.7186440677966104e-05, - "loss": 0.2747, - "step": 507 - }, - { - "epoch": 0.03, - "grad_norm": 1.300412910903492, - "learning_rate": 1.722033898305085e-05, - "loss": 0.2704, - "step": 508 - }, - { - "epoch": 0.03, - "grad_norm": 1.4490955968513795, - "learning_rate": 1.7254237288135597e-05, - "loss": 0.2591, - "step": 509 - }, - { - "epoch": 0.03, - "grad_norm": 1.3759244398932398, - "learning_rate": 1.728813559322034e-05, - "loss": 0.2969, - "step": 510 - }, - { - "epoch": 0.03, - "grad_norm": 1.6898284753441082, - "learning_rate": 1.7322033898305086e-05, - "loss": 0.2443, - "step": 511 - }, - { - "epoch": 0.03, - "grad_norm": 1.4318962229777292, - "learning_rate": 1.735593220338983e-05, - "loss": 0.2513, - "step": 512 - }, - { - "epoch": 0.03, - "grad_norm": 1.743666701146938, - "learning_rate": 1.7389830508474576e-05, - "loss": 0.2814, - "step": 513 - }, - { - "epoch": 0.03, - "grad_norm": 1.7193073886473862, - "learning_rate": 1.7423728813559324e-05, - "loss": 0.2888, - "step": 514 - }, - { - "epoch": 0.03, - "grad_norm": 1.557692647375557, - "learning_rate": 1.745762711864407e-05, - "loss": 0.2702, - "step": 515 - }, - { - "epoch": 0.03, - "grad_norm": 1.3850957268156736, - "learning_rate": 1.7491525423728813e-05, - "loss": 0.2806, - "step": 516 - }, - { - "epoch": 0.03, - "grad_norm": 1.687921295616476, - "learning_rate": 1.752542372881356e-05, - "loss": 0.246, - "step": 517 - }, - { - "epoch": 0.03, - "grad_norm": 1.1706695625942483, - "learning_rate": 1.7559322033898306e-05, - "loss": 0.251, - "step": 518 - }, - { - "epoch": 0.03, - "grad_norm": 4.037585934141602, - "learning_rate": 1.7593220338983054e-05, - "loss": 0.27, - "step": 519 - }, - { - "epoch": 0.03, - "grad_norm": 1.3341641611216237, - "learning_rate": 1.76271186440678e-05, - "loss": 0.2909, - "step": 520 - }, - { - "epoch": 0.03, - "grad_norm": 1.1083063405567553, - "learning_rate": 1.7661016949152543e-05, - "loss": 0.273, - "step": 521 - }, - { - "epoch": 0.03, - "grad_norm": 1.1166108132403247, - "learning_rate": 1.769491525423729e-05, - "loss": 0.2647, - "step": 522 - }, - { - "epoch": 0.03, - "grad_norm": 1.4730148417394753, - "learning_rate": 1.7728813559322036e-05, - "loss": 0.2574, - "step": 523 - }, - { - "epoch": 0.03, - "grad_norm": 1.3796044596292965, - "learning_rate": 1.776271186440678e-05, - "loss": 0.2809, - "step": 524 - }, - { - "epoch": 0.03, - "grad_norm": 6.668912240239683, - "learning_rate": 1.7796610169491526e-05, - "loss": 0.2896, - "step": 525 - }, - { - "epoch": 0.03, - "grad_norm": 1.6221075546474033, - "learning_rate": 1.7830508474576274e-05, - "loss": 0.2429, - "step": 526 - }, - { - "epoch": 0.03, - "grad_norm": 1.314326489119452, - "learning_rate": 1.7864406779661018e-05, - "loss": 0.2974, - "step": 527 - }, - { - "epoch": 0.03, - "grad_norm": 1.4011654180586948, - "learning_rate": 1.7898305084745763e-05, - "loss": 0.2805, - "step": 528 - }, - { - "epoch": 0.03, - "grad_norm": 1.456593465147238, - "learning_rate": 1.7932203389830508e-05, - "loss": 0.271, - "step": 529 - }, - { - "epoch": 0.03, - "grad_norm": 1.4153772547557295, - "learning_rate": 1.7966101694915256e-05, - "loss": 0.2993, - "step": 530 - }, - { - "epoch": 0.03, - "grad_norm": 1.460962611843914, - "learning_rate": 1.8e-05, - "loss": 0.288, - "step": 531 - }, - { - "epoch": 0.03, - "grad_norm": 1.3671451429644492, - "learning_rate": 1.803389830508475e-05, - "loss": 0.2591, - "step": 532 - }, - { - "epoch": 0.03, - "grad_norm": 1.558548773828584, - "learning_rate": 1.8067796610169493e-05, - "loss": 0.2689, - "step": 533 - }, - { - "epoch": 0.03, - "grad_norm": 1.5127491344620718, - "learning_rate": 1.810169491525424e-05, - "loss": 0.3259, - "step": 534 - }, - { - "epoch": 0.03, - "grad_norm": 1.5486731950661254, - "learning_rate": 1.8135593220338986e-05, - "loss": 0.2712, - "step": 535 - }, - { - "epoch": 0.03, - "grad_norm": 1.3193510633104664, - "learning_rate": 1.816949152542373e-05, - "loss": 0.2838, - "step": 536 - }, - { - "epoch": 0.03, - "grad_norm": 1.0997689513888096, - "learning_rate": 1.8203389830508475e-05, - "loss": 0.2525, - "step": 537 - }, - { - "epoch": 0.03, - "grad_norm": 1.2756030235865132, - "learning_rate": 1.823728813559322e-05, - "loss": 0.307, - "step": 538 - }, - { - "epoch": 0.03, - "grad_norm": 1.4687241798166086, - "learning_rate": 1.8271186440677968e-05, - "loss": 0.2428, - "step": 539 - }, - { - "epoch": 0.03, - "grad_norm": 1.3399067424295357, - "learning_rate": 1.8305084745762713e-05, - "loss": 0.2732, - "step": 540 - }, - { - "epoch": 0.03, - "grad_norm": 1.6084487910737866, - "learning_rate": 1.8338983050847458e-05, - "loss": 0.2552, - "step": 541 - }, - { - "epoch": 0.03, - "grad_norm": 1.2944041185222261, - "learning_rate": 1.8372881355932202e-05, - "loss": 0.2804, - "step": 542 - }, - { - "epoch": 0.03, - "grad_norm": 1.2566959055725666, - "learning_rate": 1.840677966101695e-05, - "loss": 0.259, - "step": 543 - }, - { - "epoch": 0.03, - "grad_norm": 1.2933124442569186, - "learning_rate": 1.8440677966101695e-05, - "loss": 0.2676, - "step": 544 - }, - { - "epoch": 0.03, - "grad_norm": 1.3273078884878304, - "learning_rate": 1.8474576271186443e-05, - "loss": 0.2719, - "step": 545 - }, - { - "epoch": 0.03, - "grad_norm": 1.263916893620895, - "learning_rate": 1.8508474576271188e-05, - "loss": 0.286, - "step": 546 - }, - { - "epoch": 0.03, - "grad_norm": 1.1352925308401638, - "learning_rate": 1.8542372881355936e-05, - "loss": 0.2449, - "step": 547 - }, - { - "epoch": 0.03, - "grad_norm": 1.4287019035022674, - "learning_rate": 1.857627118644068e-05, - "loss": 0.2854, - "step": 548 - }, - { - "epoch": 0.03, - "grad_norm": 1.536114169133046, - "learning_rate": 1.8610169491525425e-05, - "loss": 0.2749, - "step": 549 - }, - { - "epoch": 0.03, - "grad_norm": 1.3636384713719114, - "learning_rate": 1.864406779661017e-05, - "loss": 0.2798, - "step": 550 - }, - { - "epoch": 0.03, - "grad_norm": 1.1282443226800742, - "learning_rate": 1.8677966101694918e-05, - "loss": 0.2545, - "step": 551 - }, - { - "epoch": 0.03, - "grad_norm": 1.4372325985492267, - "learning_rate": 1.8711864406779663e-05, - "loss": 0.2764, - "step": 552 - }, - { - "epoch": 0.03, - "grad_norm": 1.431962567950389, - "learning_rate": 1.8745762711864407e-05, - "loss": 0.2535, - "step": 553 - }, - { - "epoch": 0.03, - "grad_norm": 1.2145997199986456, - "learning_rate": 1.8779661016949152e-05, - "loss": 0.2566, - "step": 554 - }, - { - "epoch": 0.03, - "grad_norm": 1.1352608835127773, - "learning_rate": 1.88135593220339e-05, - "loss": 0.2535, - "step": 555 - }, - { - "epoch": 0.03, - "grad_norm": 1.2210496354415843, - "learning_rate": 1.8847457627118645e-05, - "loss": 0.2643, - "step": 556 - }, - { - "epoch": 0.03, - "grad_norm": 1.1232924683009815, - "learning_rate": 1.8881355932203393e-05, - "loss": 0.2627, - "step": 557 - }, - { - "epoch": 0.03, - "grad_norm": 3.4024455416720127, - "learning_rate": 1.8915254237288138e-05, - "loss": 0.2525, - "step": 558 - }, - { - "epoch": 0.03, - "grad_norm": 1.3887487502150935, - "learning_rate": 1.8949152542372882e-05, - "loss": 0.2758, - "step": 559 - }, - { - "epoch": 0.03, - "grad_norm": 1.232504365515857, - "learning_rate": 1.898305084745763e-05, - "loss": 0.2406, - "step": 560 - }, - { - "epoch": 0.03, - "grad_norm": 1.3940857376689135, - "learning_rate": 1.9016949152542375e-05, - "loss": 0.2558, - "step": 561 - }, - { - "epoch": 0.03, - "grad_norm": 1.345932352677997, - "learning_rate": 1.905084745762712e-05, - "loss": 0.2626, - "step": 562 - }, - { - "epoch": 0.03, - "grad_norm": 1.1157309455332585, - "learning_rate": 1.9084745762711868e-05, - "loss": 0.2632, - "step": 563 - }, - { - "epoch": 0.03, - "grad_norm": 1.156636964733872, - "learning_rate": 1.9118644067796613e-05, - "loss": 0.2781, - "step": 564 - }, - { - "epoch": 0.03, - "grad_norm": 1.0729683625503257, - "learning_rate": 1.9152542372881357e-05, - "loss": 0.2457, - "step": 565 - }, - { - "epoch": 0.03, - "grad_norm": 1.1647734275933048, - "learning_rate": 1.9186440677966102e-05, - "loss": 0.2393, - "step": 566 - }, - { - "epoch": 0.03, - "grad_norm": 1.5138294469581877, - "learning_rate": 1.9220338983050847e-05, - "loss": 0.2876, - "step": 567 - }, - { - "epoch": 0.03, - "grad_norm": 1.323351272486395, - "learning_rate": 1.9254237288135595e-05, - "loss": 0.2396, - "step": 568 - }, - { - "epoch": 0.03, - "grad_norm": 1.9451745243471656, - "learning_rate": 1.928813559322034e-05, - "loss": 0.2514, - "step": 569 - }, - { - "epoch": 0.03, - "grad_norm": 1.2208860898726068, - "learning_rate": 1.9322033898305087e-05, - "loss": 0.2516, - "step": 570 - }, - { - "epoch": 0.03, - "grad_norm": 1.3855491705683636, - "learning_rate": 1.9355932203389832e-05, - "loss": 0.2758, - "step": 571 - }, - { - "epoch": 0.03, - "grad_norm": 1.429772998796651, - "learning_rate": 1.938983050847458e-05, - "loss": 0.2356, - "step": 572 - }, - { - "epoch": 0.03, - "grad_norm": 1.7057138660762692, - "learning_rate": 1.9423728813559325e-05, - "loss": 0.2283, - "step": 573 - }, - { - "epoch": 0.03, - "grad_norm": 1.51201577923191, - "learning_rate": 1.945762711864407e-05, - "loss": 0.2671, - "step": 574 - }, - { - "epoch": 0.03, - "grad_norm": 1.2651595136689437, - "learning_rate": 1.9491525423728814e-05, - "loss": 0.2801, - "step": 575 - }, - { - "epoch": 0.03, - "grad_norm": 1.3249553121989968, - "learning_rate": 1.9525423728813562e-05, - "loss": 0.2787, - "step": 576 - }, - { - "epoch": 0.03, - "grad_norm": 2.74385495359413, - "learning_rate": 1.9559322033898307e-05, - "loss": 0.2608, - "step": 577 - }, - { - "epoch": 0.03, - "grad_norm": 1.6741267385582472, - "learning_rate": 1.9593220338983052e-05, - "loss": 0.258, - "step": 578 - }, - { - "epoch": 0.03, - "grad_norm": 1.4888420560673257, - "learning_rate": 1.9627118644067796e-05, - "loss": 0.281, - "step": 579 - }, - { - "epoch": 0.03, - "grad_norm": 1.4190781041419613, - "learning_rate": 1.9661016949152545e-05, - "loss": 0.2438, - "step": 580 - }, - { - "epoch": 0.03, - "grad_norm": 2.12210293134904, - "learning_rate": 1.969491525423729e-05, - "loss": 0.2877, - "step": 581 - }, - { - "epoch": 0.03, - "grad_norm": 2.441475622095831, - "learning_rate": 1.9728813559322034e-05, - "loss": 0.253, - "step": 582 - }, - { - "epoch": 0.03, - "grad_norm": 3.745675132840984, - "learning_rate": 1.9762711864406782e-05, - "loss": 0.2806, - "step": 583 - }, - { - "epoch": 0.03, - "grad_norm": 2.3836639201458314, - "learning_rate": 1.9796610169491527e-05, - "loss": 0.3177, - "step": 584 - }, - { - "epoch": 0.03, - "grad_norm": 3.30532147466716, - "learning_rate": 1.9830508474576275e-05, - "loss": 0.2846, - "step": 585 - }, - { - "epoch": 0.03, - "grad_norm": 1.5409660828860403, - "learning_rate": 1.986440677966102e-05, - "loss": 0.2681, - "step": 586 - }, - { - "epoch": 0.03, - "grad_norm": 4.731352615733341, - "learning_rate": 1.9898305084745764e-05, - "loss": 0.2567, - "step": 587 - }, - { - "epoch": 0.03, - "grad_norm": 1.475042781560892, - "learning_rate": 1.9932203389830512e-05, - "loss": 0.2628, - "step": 588 - }, - { - "epoch": 0.03, - "grad_norm": 6.0834959806057185, - "learning_rate": 1.9966101694915257e-05, - "loss": 0.24, - "step": 589 - }, - { - "epoch": 0.03, - "grad_norm": 1.3873201495539649, - "learning_rate": 2e-05, - "loss": 0.26, - "step": 590 - }, - { - "epoch": 0.03, - "grad_norm": 1.3568449606917727, - "learning_rate": 1.999999986437472e-05, - "loss": 0.279, - "step": 591 - }, - { - "epoch": 0.03, - "grad_norm": 1.4736919604577718, - "learning_rate": 1.9999999457498875e-05, - "loss": 0.2684, - "step": 592 - }, - { - "epoch": 0.03, - "grad_norm": 1.2388398729176897, - "learning_rate": 1.9999998779372483e-05, - "loss": 0.296, - "step": 593 - }, - { - "epoch": 0.03, - "grad_norm": 1.266582495935962, - "learning_rate": 1.9999997829995557e-05, - "loss": 0.2539, - "step": 594 - }, - { - "epoch": 0.03, - "grad_norm": 1.563096295904235, - "learning_rate": 1.9999996609368124e-05, - "loss": 0.2957, - "step": 595 - }, - { - "epoch": 0.03, - "grad_norm": 1.5591054305818515, - "learning_rate": 1.9999995117490224e-05, - "loss": 0.2662, - "step": 596 - }, - { - "epoch": 0.03, - "grad_norm": 1.65621574378536, - "learning_rate": 1.9999993354361887e-05, - "loss": 0.2403, - "step": 597 - }, - { - "epoch": 0.03, - "grad_norm": 1.2548699939118981, - "learning_rate": 1.9999991319983162e-05, - "loss": 0.2813, - "step": 598 - }, - { - "epoch": 0.03, - "grad_norm": 1.8410217890854863, - "learning_rate": 1.9999989014354117e-05, - "loss": 0.2371, - "step": 599 - }, - { - "epoch": 0.03, - "grad_norm": 1.4153342484952254, - "learning_rate": 1.9999986437474797e-05, - "loss": 0.2565, - "step": 600 - }, - { - "epoch": 0.03, - "grad_norm": 1.10289019805283, - "learning_rate": 1.9999983589345282e-05, - "loss": 0.2698, - "step": 601 - }, - { - "epoch": 0.03, - "grad_norm": 4.453681957989118, - "learning_rate": 1.9999980469965646e-05, - "loss": 0.2506, - "step": 602 - }, - { - "epoch": 0.03, - "grad_norm": 1.948473664806546, - "learning_rate": 1.999997707933598e-05, - "loss": 0.2494, - "step": 603 - }, - { - "epoch": 0.03, - "grad_norm": 1.3618436212411482, - "learning_rate": 1.9999973417456367e-05, - "loss": 0.2644, - "step": 604 - }, - { - "epoch": 0.03, - "grad_norm": 1.3724384206589046, - "learning_rate": 1.999996948432691e-05, - "loss": 0.295, - "step": 605 - }, - { - "epoch": 0.03, - "grad_norm": 1.4885167672678044, - "learning_rate": 1.999996527994772e-05, - "loss": 0.2799, - "step": 606 - }, - { - "epoch": 0.03, - "grad_norm": 1.177622041217393, - "learning_rate": 1.9999960804318904e-05, - "loss": 0.2878, - "step": 607 - }, - { - "epoch": 0.03, - "grad_norm": 2.0149772969197515, - "learning_rate": 1.999995605744059e-05, - "loss": 0.257, - "step": 608 - }, - { - "epoch": 0.03, - "grad_norm": 1.691709339113414, - "learning_rate": 1.9999951039312897e-05, - "loss": 0.2524, - "step": 609 - }, - { - "epoch": 0.03, - "grad_norm": 1.0999445528030725, - "learning_rate": 1.999994574993597e-05, - "loss": 0.2612, - "step": 610 - }, - { - "epoch": 0.03, - "grad_norm": 1.5337654360480533, - "learning_rate": 1.999994018930995e-05, - "loss": 0.2948, - "step": 611 - }, - { - "epoch": 0.03, - "grad_norm": 1.1245129713817184, - "learning_rate": 1.9999934357434986e-05, - "loss": 0.2783, - "step": 612 - }, - { - "epoch": 0.03, - "grad_norm": 1.051000583585932, - "learning_rate": 1.9999928254311242e-05, - "loss": 0.2663, - "step": 613 - }, - { - "epoch": 0.03, - "grad_norm": 1.6065272631126895, - "learning_rate": 1.9999921879938875e-05, - "loss": 0.2648, - "step": 614 - }, - { - "epoch": 0.03, - "grad_norm": 1.040871656632654, - "learning_rate": 1.9999915234318064e-05, - "loss": 0.258, - "step": 615 - }, - { - "epoch": 0.03, - "grad_norm": 1.0489036653596298, - "learning_rate": 1.9999908317448985e-05, - "loss": 0.2619, - "step": 616 - }, - { - "epoch": 0.03, - "grad_norm": 1.2406900738256312, - "learning_rate": 1.9999901129331832e-05, - "loss": 0.2688, - "step": 617 - }, - { - "epoch": 0.03, - "grad_norm": 1.3093334914999668, - "learning_rate": 1.9999893669966794e-05, - "loss": 0.2856, - "step": 618 - }, - { - "epoch": 0.03, - "grad_norm": 1.0150981610834626, - "learning_rate": 1.9999885939354077e-05, - "loss": 0.267, - "step": 619 - }, - { - "epoch": 0.03, - "grad_norm": 1.1050649685427374, - "learning_rate": 1.9999877937493886e-05, - "loss": 0.2558, - "step": 620 - }, - { - "epoch": 0.03, - "grad_norm": 1.0775846893782826, - "learning_rate": 1.9999869664386443e-05, - "loss": 0.2675, - "step": 621 - }, - { - "epoch": 0.03, - "grad_norm": 1.0419452826452065, - "learning_rate": 1.999986112003197e-05, - "loss": 0.2645, - "step": 622 - }, - { - "epoch": 0.03, - "grad_norm": 1.3183338349176241, - "learning_rate": 1.99998523044307e-05, - "loss": 0.3004, - "step": 623 - }, - { - "epoch": 0.03, - "grad_norm": 0.9943967830586373, - "learning_rate": 1.999984321758287e-05, - "loss": 0.3, - "step": 624 - }, - { - "epoch": 0.03, - "grad_norm": 1.498860798184339, - "learning_rate": 1.999983385948873e-05, - "loss": 0.2547, - "step": 625 - }, - { - "epoch": 0.03, - "grad_norm": 4.537978887587886, - "learning_rate": 1.9999824230148532e-05, - "loss": 0.2938, - "step": 626 - }, - { - "epoch": 0.03, - "grad_norm": 0.9265795045788969, - "learning_rate": 1.999981432956254e-05, - "loss": 0.2698, - "step": 627 - }, - { - "epoch": 0.03, - "grad_norm": 1.92643007386415, - "learning_rate": 1.999980415773101e-05, - "loss": 0.3034, - "step": 628 - }, - { - "epoch": 0.03, - "grad_norm": 1.0860148365443336, - "learning_rate": 1.9999793714654236e-05, - "loss": 0.2617, - "step": 629 - }, - { - "epoch": 0.03, - "grad_norm": 3.232951628312408, - "learning_rate": 1.9999783000332486e-05, - "loss": 0.2436, - "step": 630 - }, - { - "epoch": 0.03, - "grad_norm": 1.6804316633229621, - "learning_rate": 1.9999772014766062e-05, - "loss": 0.305, - "step": 631 - }, - { - "epoch": 0.03, - "grad_norm": 2.0772468232195704, - "learning_rate": 1.9999760757955258e-05, - "loss": 0.284, - "step": 632 - }, - { - "epoch": 0.03, - "grad_norm": 1.4342896278895008, - "learning_rate": 1.9999749229900376e-05, - "loss": 0.2677, - "step": 633 - }, - { - "epoch": 0.03, - "grad_norm": 1.8471599254915816, - "learning_rate": 1.9999737430601734e-05, - "loss": 0.2557, - "step": 634 - }, - { - "epoch": 0.03, - "grad_norm": 1.281316376095995, - "learning_rate": 1.9999725360059648e-05, - "loss": 0.2817, - "step": 635 - }, - { - "epoch": 0.03, - "grad_norm": 1.3402740898159577, - "learning_rate": 1.9999713018274444e-05, - "loss": 0.265, - "step": 636 - }, - { - "epoch": 0.03, - "grad_norm": 1.3958741995969437, - "learning_rate": 1.9999700405246462e-05, - "loss": 0.3047, - "step": 637 - }, - { - "epoch": 0.03, - "grad_norm": 1.4108555514675845, - "learning_rate": 1.9999687520976043e-05, - "loss": 0.2721, - "step": 638 - }, - { - "epoch": 0.03, - "grad_norm": 1.2241788885846367, - "learning_rate": 1.9999674365463532e-05, - "loss": 0.2645, - "step": 639 - }, - { - "epoch": 0.03, - "grad_norm": 1.211714854940445, - "learning_rate": 1.999966093870929e-05, - "loss": 0.2947, - "step": 640 - }, - { - "epoch": 0.03, - "grad_norm": 1.2759128182319426, - "learning_rate": 1.999964724071368e-05, - "loss": 0.2615, - "step": 641 - }, - { - "epoch": 0.03, - "grad_norm": 1.437640733114652, - "learning_rate": 1.999963327147708e-05, - "loss": 0.2614, - "step": 642 - }, - { - "epoch": 0.03, - "grad_norm": 1.4274344790032485, - "learning_rate": 1.9999619030999853e-05, - "loss": 0.2999, - "step": 643 - }, - { - "epoch": 0.03, - "grad_norm": 2.1171499634893127, - "learning_rate": 1.9999604519282403e-05, - "loss": 0.2506, - "step": 644 - }, - { - "epoch": 0.03, - "grad_norm": 1.3503945628596226, - "learning_rate": 1.999958973632511e-05, - "loss": 0.2618, - "step": 645 - }, - { - "epoch": 0.03, - "grad_norm": 1.259518347435971, - "learning_rate": 1.9999574682128385e-05, - "loss": 0.2696, - "step": 646 - }, - { - "epoch": 0.03, - "grad_norm": 1.2025649744782203, - "learning_rate": 1.999955935669263e-05, - "loss": 0.2773, - "step": 647 - }, - { - "epoch": 0.03, - "grad_norm": 1.3748457996542727, - "learning_rate": 1.9999543760018264e-05, - "loss": 0.2731, - "step": 648 - }, - { - "epoch": 0.03, - "grad_norm": 1.286447108152475, - "learning_rate": 1.999952789210571e-05, - "loss": 0.2513, - "step": 649 - }, - { - "epoch": 0.03, - "grad_norm": 1.1625739396436945, - "learning_rate": 1.999951175295539e-05, - "loss": 0.2805, - "step": 650 - }, - { - "epoch": 0.03, - "grad_norm": 0.8934326598683626, - "learning_rate": 1.9999495342567754e-05, - "loss": 0.2703, - "step": 651 - }, - { - "epoch": 0.03, - "grad_norm": 1.2766979998578325, - "learning_rate": 1.999947866094324e-05, - "loss": 0.2709, - "step": 652 - }, - { - "epoch": 0.03, - "grad_norm": 1.1893920363930803, - "learning_rate": 1.999946170808231e-05, - "loss": 0.27, - "step": 653 - }, - { - "epoch": 0.03, - "grad_norm": 1.4471066279469218, - "learning_rate": 1.9999444483985408e-05, - "loss": 0.2706, - "step": 654 - }, - { - "epoch": 0.03, - "grad_norm": 1.0597340037872298, - "learning_rate": 1.9999426988653012e-05, - "loss": 0.2862, - "step": 655 - }, - { - "epoch": 0.03, - "grad_norm": 1.1600664080975713, - "learning_rate": 1.9999409222085596e-05, - "loss": 0.2725, - "step": 656 - }, - { - "epoch": 0.03, - "grad_norm": 1.2152493757257978, - "learning_rate": 1.9999391184283638e-05, - "loss": 0.2869, - "step": 657 - }, - { - "epoch": 0.03, - "grad_norm": 1.2957871045516876, - "learning_rate": 1.999937287524763e-05, - "loss": 0.2851, - "step": 658 - }, - { - "epoch": 0.03, - "grad_norm": 1.1059643575300484, - "learning_rate": 1.9999354294978066e-05, - "loss": 0.2534, - "step": 659 - }, - { - "epoch": 0.03, - "grad_norm": 1.0210468864347002, - "learning_rate": 1.9999335443475452e-05, - "loss": 0.2608, - "step": 660 - }, - { - "epoch": 0.03, - "grad_norm": 1.2949778276813804, - "learning_rate": 1.9999316320740302e-05, - "loss": 0.2689, - "step": 661 - }, - { - "epoch": 0.03, - "grad_norm": 1.1647843952730665, - "learning_rate": 1.9999296926773133e-05, - "loss": 0.2436, - "step": 662 - }, - { - "epoch": 0.03, - "grad_norm": 1.3682879718976364, - "learning_rate": 1.9999277261574468e-05, - "loss": 0.2564, - "step": 663 - }, - { - "epoch": 0.03, - "grad_norm": 1.2468871282706768, - "learning_rate": 1.999925732514484e-05, - "loss": 0.2714, - "step": 664 - }, - { - "epoch": 0.03, - "grad_norm": 1.7519741264204172, - "learning_rate": 1.99992371174848e-05, - "loss": 0.2612, - "step": 665 - }, - { - "epoch": 0.03, - "grad_norm": 1.161839164666825, - "learning_rate": 1.999921663859488e-05, - "loss": 0.2479, - "step": 666 - }, - { - "epoch": 0.03, - "grad_norm": 1.2751282318849002, - "learning_rate": 1.999919588847565e-05, - "loss": 0.2568, - "step": 667 - }, - { - "epoch": 0.03, - "grad_norm": 1.0965692887004719, - "learning_rate": 1.999917486712766e-05, - "loss": 0.269, - "step": 668 - }, - { - "epoch": 0.03, - "grad_norm": 1.0701031723437957, - "learning_rate": 1.9999153574551492e-05, - "loss": 0.2679, - "step": 669 - }, - { - "epoch": 0.03, - "grad_norm": 1.2142348695150131, - "learning_rate": 1.999913201074772e-05, - "loss": 0.265, - "step": 670 - }, - { - "epoch": 0.03, - "grad_norm": 1.06668955891612, - "learning_rate": 1.9999110175716924e-05, - "loss": 0.2521, - "step": 671 - }, - { - "epoch": 0.03, - "grad_norm": 1.0533089187255256, - "learning_rate": 1.99990880694597e-05, - "loss": 0.256, - "step": 672 - }, - { - "epoch": 0.03, - "grad_norm": 1.0623846929343919, - "learning_rate": 1.9999065691976648e-05, - "loss": 0.2956, - "step": 673 - }, - { - "epoch": 0.03, - "grad_norm": 0.9917831191421542, - "learning_rate": 1.9999043043268375e-05, - "loss": 0.2521, - "step": 674 - }, - { - "epoch": 0.03, - "grad_norm": 1.0909337781250295, - "learning_rate": 1.9999020123335496e-05, - "loss": 0.2541, - "step": 675 - }, - { - "epoch": 0.03, - "grad_norm": 1.2088864473371073, - "learning_rate": 1.9998996932178625e-05, - "loss": 0.2353, - "step": 676 - }, - { - "epoch": 0.03, - "grad_norm": 1.267002024540383, - "learning_rate": 1.9998973469798404e-05, - "loss": 0.2871, - "step": 677 - }, - { - "epoch": 0.03, - "grad_norm": 1.26664688395502, - "learning_rate": 1.9998949736195464e-05, - "loss": 0.2697, - "step": 678 - }, - { - "epoch": 0.03, - "grad_norm": 1.1258218909222184, - "learning_rate": 1.9998925731370443e-05, - "loss": 0.2767, - "step": 679 - }, - { - "epoch": 0.03, - "grad_norm": 1.4333128018170362, - "learning_rate": 1.9998901455324e-05, - "loss": 0.272, - "step": 680 - }, - { - "epoch": 0.03, - "grad_norm": 1.1539615363566418, - "learning_rate": 1.999887690805679e-05, - "loss": 0.2703, - "step": 681 - }, - { - "epoch": 0.03, - "grad_norm": 1.3055896441511934, - "learning_rate": 1.9998852089569473e-05, - "loss": 0.2805, - "step": 682 - }, - { - "epoch": 0.03, - "grad_norm": 0.9049653729869355, - "learning_rate": 1.9998826999862736e-05, - "loss": 0.2588, - "step": 683 - }, - { - "epoch": 0.03, - "grad_norm": 1.638495294480143, - "learning_rate": 1.9998801638937245e-05, - "loss": 0.2485, - "step": 684 - }, - { - "epoch": 0.03, - "grad_norm": 1.2575064722605738, - "learning_rate": 1.9998776006793702e-05, - "loss": 0.2661, - "step": 685 - }, - { - "epoch": 0.03, - "grad_norm": 1.1924520993769014, - "learning_rate": 1.999875010343279e-05, - "loss": 0.2907, - "step": 686 - }, - { - "epoch": 0.03, - "grad_norm": 1.0528956283908704, - "learning_rate": 1.999872392885522e-05, - "loss": 0.2597, - "step": 687 - }, - { - "epoch": 0.03, - "grad_norm": 1.0761853664702166, - "learning_rate": 1.9998697483061698e-05, - "loss": 0.2267, - "step": 688 - }, - { - "epoch": 0.04, - "grad_norm": 1.7045786645036725, - "learning_rate": 1.9998670766052942e-05, - "loss": 0.2515, - "step": 689 - }, - { - "epoch": 0.04, - "grad_norm": 1.0632182594277078, - "learning_rate": 1.9998643777829674e-05, - "loss": 0.2673, - "step": 690 - }, - { - "epoch": 0.04, - "grad_norm": 1.3976256225702135, - "learning_rate": 1.9998616518392633e-05, - "loss": 0.2639, - "step": 691 - }, - { - "epoch": 0.04, - "grad_norm": 1.0113588487491112, - "learning_rate": 1.999858898774255e-05, - "loss": 0.2794, - "step": 692 - }, - { - "epoch": 0.04, - "grad_norm": 1.0553084109359292, - "learning_rate": 1.999856118588018e-05, - "loss": 0.2411, - "step": 693 - }, - { - "epoch": 0.04, - "grad_norm": 1.0623825667352056, - "learning_rate": 1.999853311280627e-05, - "loss": 0.2567, - "step": 694 - }, - { - "epoch": 0.04, - "grad_norm": 1.6341578895660698, - "learning_rate": 1.9998504768521588e-05, - "loss": 0.2903, - "step": 695 - }, - { - "epoch": 0.04, - "grad_norm": 1.5184615805888484, - "learning_rate": 1.99984761530269e-05, - "loss": 0.2781, - "step": 696 - }, - { - "epoch": 0.04, - "grad_norm": 1.3640734092403373, - "learning_rate": 1.9998447266322974e-05, - "loss": 0.2571, - "step": 697 - }, - { - "epoch": 0.04, - "grad_norm": 1.0545124494588674, - "learning_rate": 1.9998418108410606e-05, - "loss": 0.2442, - "step": 698 - }, - { - "epoch": 0.04, - "grad_norm": 1.2493399526273568, - "learning_rate": 1.9998388679290583e-05, - "loss": 0.2984, - "step": 699 - }, - { - "epoch": 0.04, - "grad_norm": 1.1295762074044364, - "learning_rate": 1.9998358978963702e-05, - "loss": 0.2512, - "step": 700 - }, - { - "epoch": 0.04, - "grad_norm": 1.1603953126986037, - "learning_rate": 1.9998329007430767e-05, - "loss": 0.2696, - "step": 701 - }, - { - "epoch": 0.04, - "grad_norm": 1.2509835069608402, - "learning_rate": 1.9998298764692596e-05, - "loss": 0.2651, - "step": 702 - }, - { - "epoch": 0.04, - "grad_norm": 1.3348261112019737, - "learning_rate": 1.9998268250750006e-05, - "loss": 0.2583, - "step": 703 - }, - { - "epoch": 0.04, - "grad_norm": 1.3375545121799972, - "learning_rate": 1.9998237465603822e-05, - "loss": 0.28, - "step": 704 - }, - { - "epoch": 0.04, - "grad_norm": 1.363579686038191, - "learning_rate": 1.9998206409254886e-05, - "loss": 0.2937, - "step": 705 - }, - { - "epoch": 0.04, - "grad_norm": 1.6025082506089567, - "learning_rate": 1.9998175081704035e-05, - "loss": 0.2634, - "step": 706 - }, - { - "epoch": 0.04, - "grad_norm": 1.2722761240387512, - "learning_rate": 1.9998143482952117e-05, - "loss": 0.2599, - "step": 707 - }, - { - "epoch": 0.04, - "grad_norm": 5.599998265070209, - "learning_rate": 1.9998111612999995e-05, - "loss": 0.2505, - "step": 708 - }, - { - "epoch": 0.04, - "grad_norm": 1.317103804306265, - "learning_rate": 1.999807947184853e-05, - "loss": 0.2807, - "step": 709 - }, - { - "epoch": 0.04, - "grad_norm": 1.3710100936702176, - "learning_rate": 1.99980470594986e-05, - "loss": 0.2641, - "step": 710 - }, - { - "epoch": 0.04, - "grad_norm": 1.108376334270745, - "learning_rate": 1.9998014375951073e-05, - "loss": 0.2403, - "step": 711 - }, - { - "epoch": 0.04, - "grad_norm": 1.252535265060163, - "learning_rate": 1.999798142120684e-05, - "loss": 0.2769, - "step": 712 - }, - { - "epoch": 0.04, - "grad_norm": 1.3742660328462404, - "learning_rate": 1.99979481952668e-05, - "loss": 0.2906, - "step": 713 - }, - { - "epoch": 0.04, - "grad_norm": 3.644541022537266, - "learning_rate": 1.999791469813185e-05, - "loss": 0.257, - "step": 714 - }, - { - "epoch": 0.04, - "grad_norm": 1.0749151196722126, - "learning_rate": 1.9997880929802895e-05, - "loss": 0.262, - "step": 715 - }, - { - "epoch": 0.04, - "grad_norm": 1.1730231560972142, - "learning_rate": 1.999784689028086e-05, - "loss": 0.2644, - "step": 716 - }, - { - "epoch": 0.04, - "grad_norm": 1.6294158398432395, - "learning_rate": 1.999781257956666e-05, - "loss": 0.2706, - "step": 717 - }, - { - "epoch": 0.04, - "grad_norm": 1.0705353040911285, - "learning_rate": 1.999777799766123e-05, - "loss": 0.265, - "step": 718 - }, - { - "epoch": 0.04, - "grad_norm": 1.1518326861280468, - "learning_rate": 1.9997743144565513e-05, - "loss": 0.2823, - "step": 719 - }, - { - "epoch": 0.04, - "grad_norm": 1.17251836953955, - "learning_rate": 1.999770802028044e-05, - "loss": 0.2623, - "step": 720 - }, - { - "epoch": 0.04, - "grad_norm": 1.1856563147370942, - "learning_rate": 1.9997672624806976e-05, - "loss": 0.2777, - "step": 721 - }, - { - "epoch": 0.04, - "grad_norm": 1.4003250521694823, - "learning_rate": 1.999763695814608e-05, - "loss": 0.2464, - "step": 722 - }, - { - "epoch": 0.04, - "grad_norm": 2.5113897733883497, - "learning_rate": 1.9997601020298713e-05, - "loss": 0.2613, - "step": 723 - }, - { - "epoch": 0.04, - "grad_norm": 1.2219147153367094, - "learning_rate": 1.9997564811265854e-05, - "loss": 0.2616, - "step": 724 - }, - { - "epoch": 0.04, - "grad_norm": 1.4067252099576408, - "learning_rate": 1.999752833104849e-05, - "loss": 0.2842, - "step": 725 - }, - { - "epoch": 0.04, - "grad_norm": 1.0208594719205228, - "learning_rate": 1.9997491579647595e-05, - "loss": 0.2505, - "step": 726 - }, - { - "epoch": 0.04, - "grad_norm": 1.1036370771367006, - "learning_rate": 1.9997454557064185e-05, - "loss": 0.2498, - "step": 727 - }, - { - "epoch": 0.04, - "grad_norm": 1.7207498654087374, - "learning_rate": 1.9997417263299256e-05, - "loss": 0.2713, - "step": 728 - }, - { - "epoch": 0.04, - "grad_norm": 1.3992568424381269, - "learning_rate": 1.999737969835381e-05, - "loss": 0.2955, - "step": 729 - }, - { - "epoch": 0.04, - "grad_norm": 1.9982495451376947, - "learning_rate": 1.9997341862228886e-05, - "loss": 0.2583, - "step": 730 - }, - { - "epoch": 0.04, - "grad_norm": 1.3960847560767105, - "learning_rate": 1.999730375492549e-05, - "loss": 0.2939, - "step": 731 - }, - { - "epoch": 0.04, - "grad_norm": 1.214687864500847, - "learning_rate": 1.999726537644467e-05, - "loss": 0.254, - "step": 732 - }, - { - "epoch": 0.04, - "grad_norm": 1.4135176505419116, - "learning_rate": 1.9997226726787462e-05, - "loss": 0.3036, - "step": 733 - }, - { - "epoch": 0.04, - "grad_norm": 1.3404019047194708, - "learning_rate": 1.9997187805954916e-05, - "loss": 0.2803, - "step": 734 - }, - { - "epoch": 0.04, - "grad_norm": 1.0201205942546145, - "learning_rate": 1.999714861394808e-05, - "loss": 0.273, - "step": 735 - }, - { - "epoch": 0.04, - "grad_norm": 1.0396421943283112, - "learning_rate": 1.999710915076803e-05, - "loss": 0.2595, - "step": 736 - }, - { - "epoch": 0.04, - "grad_norm": 1.0398657233769981, - "learning_rate": 1.9997069416415824e-05, - "loss": 0.2853, - "step": 737 - }, - { - "epoch": 0.04, - "grad_norm": 1.0920534147148957, - "learning_rate": 1.9997029410892546e-05, - "loss": 0.241, - "step": 738 - }, - { - "epoch": 0.04, - "grad_norm": 1.0928042063839294, - "learning_rate": 1.9996989134199287e-05, - "loss": 0.2144, - "step": 739 - }, - { - "epoch": 0.04, - "grad_norm": 1.0198803572927333, - "learning_rate": 1.9996948586337127e-05, - "loss": 0.2572, - "step": 740 - }, - { - "epoch": 0.04, - "grad_norm": 1.4319185641684506, - "learning_rate": 1.9996907767307175e-05, - "loss": 0.2694, - "step": 741 - }, - { - "epoch": 0.04, - "grad_norm": 1.0858520979144273, - "learning_rate": 1.9996866677110534e-05, - "loss": 0.2594, - "step": 742 - }, - { - "epoch": 0.04, - "grad_norm": 1.3255623969997914, - "learning_rate": 1.999682531574832e-05, - "loss": 0.2538, - "step": 743 - }, - { - "epoch": 0.04, - "grad_norm": 1.0108943423155112, - "learning_rate": 1.9996783683221652e-05, - "loss": 0.2516, - "step": 744 - }, - { - "epoch": 0.04, - "grad_norm": 1.4094537154588183, - "learning_rate": 1.9996741779531665e-05, - "loss": 0.2707, - "step": 745 - }, - { - "epoch": 0.04, - "grad_norm": 1.1793015053224885, - "learning_rate": 1.9996699604679493e-05, - "loss": 0.2646, - "step": 746 - }, - { - "epoch": 0.04, - "grad_norm": 1.0792637757557646, - "learning_rate": 1.9996657158666276e-05, - "loss": 0.2478, - "step": 747 - }, - { - "epoch": 0.04, - "grad_norm": 1.039527097370698, - "learning_rate": 1.999661444149317e-05, - "loss": 0.2534, - "step": 748 - }, - { - "epoch": 0.04, - "grad_norm": 1.2559400175483508, - "learning_rate": 1.9996571453161338e-05, - "loss": 0.2882, - "step": 749 - }, - { - "epoch": 0.04, - "grad_norm": 1.1366971909735748, - "learning_rate": 1.9996528193671936e-05, - "loss": 0.2505, - "step": 750 - }, - { - "epoch": 0.04, - "grad_norm": 1.3078857802825534, - "learning_rate": 1.9996484663026143e-05, - "loss": 0.2835, - "step": 751 - }, - { - "epoch": 0.04, - "grad_norm": 1.0829375306862181, - "learning_rate": 1.999644086122514e-05, - "loss": 0.2479, - "step": 752 - }, - { - "epoch": 0.04, - "grad_norm": 1.4273810049424516, - "learning_rate": 1.999639678827011e-05, - "loss": 0.2669, - "step": 753 - }, - { - "epoch": 0.04, - "grad_norm": 1.3117200257674768, - "learning_rate": 1.9996352444162257e-05, - "loss": 0.2834, - "step": 754 - }, - { - "epoch": 0.04, - "grad_norm": 1.3033840703007538, - "learning_rate": 1.999630782890278e-05, - "loss": 0.24, - "step": 755 - }, - { - "epoch": 0.04, - "grad_norm": 1.4419353437645621, - "learning_rate": 1.9996262942492882e-05, - "loss": 0.2608, - "step": 756 - }, - { - "epoch": 0.04, - "grad_norm": 2.0632655417967007, - "learning_rate": 1.9996217784933794e-05, - "loss": 0.293, - "step": 757 - }, - { - "epoch": 0.04, - "grad_norm": 1.138628289340763, - "learning_rate": 1.999617235622673e-05, - "loss": 0.2835, - "step": 758 - }, - { - "epoch": 0.04, - "grad_norm": 1.8702915385259948, - "learning_rate": 1.999612665637293e-05, - "loss": 0.2692, - "step": 759 - }, - { - "epoch": 0.04, - "grad_norm": 1.130927538844785, - "learning_rate": 1.9996080685373628e-05, - "loss": 0.2563, - "step": 760 - }, - { - "epoch": 0.04, - "grad_norm": 1.1131329974735835, - "learning_rate": 1.999603444323007e-05, - "loss": 0.2744, - "step": 761 - }, - { - "epoch": 0.04, - "grad_norm": 1.5508343576824948, - "learning_rate": 1.9995987929943518e-05, - "loss": 0.2657, - "step": 762 - }, - { - "epoch": 0.04, - "grad_norm": 1.3218831953876735, - "learning_rate": 1.9995941145515224e-05, - "loss": 0.2584, - "step": 763 - }, - { - "epoch": 0.04, - "grad_norm": 1.1460227432051737, - "learning_rate": 1.9995894089946466e-05, - "loss": 0.2883, - "step": 764 - }, - { - "epoch": 0.04, - "grad_norm": 1.0196084487669637, - "learning_rate": 1.9995846763238514e-05, - "loss": 0.2501, - "step": 765 - }, - { - "epoch": 0.04, - "grad_norm": 1.0954833712086312, - "learning_rate": 1.9995799165392653e-05, - "loss": 0.2882, - "step": 766 - }, - { - "epoch": 0.04, - "grad_norm": 1.2774372509323548, - "learning_rate": 1.9995751296410176e-05, - "loss": 0.2535, - "step": 767 - }, - { - "epoch": 0.04, - "grad_norm": 1.103024655690764, - "learning_rate": 1.9995703156292382e-05, - "loss": 0.2699, - "step": 768 - }, - { - "epoch": 0.04, - "grad_norm": 1.126010661023456, - "learning_rate": 1.9995654745040575e-05, - "loss": 0.2746, - "step": 769 - }, - { - "epoch": 0.04, - "grad_norm": 1.2036940841199122, - "learning_rate": 1.999560606265607e-05, - "loss": 0.2846, - "step": 770 - }, - { - "epoch": 0.04, - "grad_norm": 1.053928772845716, - "learning_rate": 1.999555710914018e-05, - "loss": 0.2536, - "step": 771 - }, - { - "epoch": 0.04, - "grad_norm": 1.1459541009748426, - "learning_rate": 1.9995507884494244e-05, - "loss": 0.2503, - "step": 772 - }, - { - "epoch": 0.04, - "grad_norm": 1.2796107407868114, - "learning_rate": 1.999545838871959e-05, - "loss": 0.2575, - "step": 773 - }, - { - "epoch": 0.04, - "grad_norm": 1.313315822502975, - "learning_rate": 1.9995408621817566e-05, - "loss": 0.2616, - "step": 774 - }, - { - "epoch": 0.04, - "grad_norm": 1.6089597975605014, - "learning_rate": 1.9995358583789514e-05, - "loss": 0.245, - "step": 775 - }, - { - "epoch": 0.04, - "grad_norm": 1.1189952245805992, - "learning_rate": 1.99953082746368e-05, - "loss": 0.2719, - "step": 776 - }, - { - "epoch": 0.04, - "grad_norm": 1.1676561428296084, - "learning_rate": 1.9995257694360778e-05, - "loss": 0.2515, - "step": 777 - }, - { - "epoch": 0.04, - "grad_norm": 1.2390945203221548, - "learning_rate": 1.9995206842962833e-05, - "loss": 0.2815, - "step": 778 - }, - { - "epoch": 0.04, - "grad_norm": 1.284354717320916, - "learning_rate": 1.9995155720444336e-05, - "loss": 0.2474, - "step": 779 - }, - { - "epoch": 0.04, - "grad_norm": 1.2223633934856006, - "learning_rate": 1.9995104326806675e-05, - "loss": 0.2695, - "step": 780 - }, - { - "epoch": 0.04, - "grad_norm": 1.1957041076695898, - "learning_rate": 1.9995052662051244e-05, - "loss": 0.2401, - "step": 781 - }, - { - "epoch": 0.04, - "grad_norm": 1.6931627034823373, - "learning_rate": 1.999500072617945e-05, - "loss": 0.2469, - "step": 782 - }, - { - "epoch": 0.04, - "grad_norm": 1.1597265079302603, - "learning_rate": 1.999494851919269e-05, - "loss": 0.2868, - "step": 783 - }, - { - "epoch": 0.04, - "grad_norm": 1.8762144776870082, - "learning_rate": 1.999489604109239e-05, - "loss": 0.2919, - "step": 784 - }, - { - "epoch": 0.04, - "grad_norm": 1.1508642549126946, - "learning_rate": 1.999484329187997e-05, - "loss": 0.2577, - "step": 785 - }, - { - "epoch": 0.04, - "grad_norm": 1.1939915989038365, - "learning_rate": 1.9994790271556862e-05, - "loss": 0.2597, - "step": 786 - }, - { - "epoch": 0.04, - "grad_norm": 1.1376437711036782, - "learning_rate": 1.9994736980124502e-05, - "loss": 0.2614, - "step": 787 - }, - { - "epoch": 0.04, - "grad_norm": 1.1683273366283589, - "learning_rate": 1.9994683417584336e-05, - "loss": 0.2736, - "step": 788 - }, - { - "epoch": 0.04, - "grad_norm": 1.6975838851394127, - "learning_rate": 1.999462958393782e-05, - "loss": 0.2642, - "step": 789 - }, - { - "epoch": 0.04, - "grad_norm": 1.3563670815198583, - "learning_rate": 1.999457547918641e-05, - "loss": 0.2493, - "step": 790 - }, - { - "epoch": 0.04, - "grad_norm": 1.1854453853936364, - "learning_rate": 1.999452110333158e-05, - "loss": 0.2397, - "step": 791 - }, - { - "epoch": 0.04, - "grad_norm": 1.5034728936630222, - "learning_rate": 1.9994466456374796e-05, - "loss": 0.2834, - "step": 792 - }, - { - "epoch": 0.04, - "grad_norm": 0.9981359156760405, - "learning_rate": 1.9994411538317546e-05, - "loss": 0.2322, - "step": 793 - }, - { - "epoch": 0.04, - "grad_norm": 1.2749037646530144, - "learning_rate": 1.999435634916132e-05, - "loss": 0.2485, - "step": 794 - }, - { - "epoch": 0.04, - "grad_norm": 1.1573639241050016, - "learning_rate": 1.9994300888907613e-05, - "loss": 0.2847, - "step": 795 - }, - { - "epoch": 0.04, - "grad_norm": 0.9231922613268816, - "learning_rate": 1.999424515755793e-05, - "loss": 0.2389, - "step": 796 - }, - { - "epoch": 0.04, - "grad_norm": 1.092813811557751, - "learning_rate": 1.9994189155113778e-05, - "loss": 0.2567, - "step": 797 - }, - { - "epoch": 0.04, - "grad_norm": 1.097683225891256, - "learning_rate": 1.9994132881576685e-05, - "loss": 0.2702, - "step": 798 - }, - { - "epoch": 0.04, - "grad_norm": 1.1363590369891523, - "learning_rate": 1.9994076336948175e-05, - "loss": 0.2784, - "step": 799 - }, - { - "epoch": 0.04, - "grad_norm": 0.8879204954807518, - "learning_rate": 1.999401952122978e-05, - "loss": 0.2435, - "step": 800 - }, - { - "epoch": 0.04, - "grad_norm": 1.033548685282584, - "learning_rate": 1.9993962434423037e-05, - "loss": 0.2656, - "step": 801 - }, - { - "epoch": 0.04, - "grad_norm": 1.1875030010672916, - "learning_rate": 1.99939050765295e-05, - "loss": 0.2726, - "step": 802 - }, - { - "epoch": 0.04, - "grad_norm": 1.079155108315839, - "learning_rate": 1.9993847447550722e-05, - "loss": 0.2373, - "step": 803 - }, - { - "epoch": 0.04, - "grad_norm": 1.1246899751217057, - "learning_rate": 1.9993789547488268e-05, - "loss": 0.2584, - "step": 804 - }, - { - "epoch": 0.04, - "grad_norm": 1.0258829824673186, - "learning_rate": 1.999373137634371e-05, - "loss": 0.2619, - "step": 805 - }, - { - "epoch": 0.04, - "grad_norm": 1.105454260644435, - "learning_rate": 1.9993672934118625e-05, - "loss": 0.3153, - "step": 806 - }, - { - "epoch": 0.04, - "grad_norm": 0.971938599700853, - "learning_rate": 1.9993614220814594e-05, - "loss": 0.2531, - "step": 807 - }, - { - "epoch": 0.04, - "grad_norm": 1.1308239034584113, - "learning_rate": 1.9993555236433216e-05, - "loss": 0.2378, - "step": 808 - }, - { - "epoch": 0.04, - "grad_norm": 1.1821286909927475, - "learning_rate": 1.9993495980976084e-05, - "loss": 0.2461, - "step": 809 - }, - { - "epoch": 0.04, - "grad_norm": 1.0491796907842892, - "learning_rate": 1.9993436454444814e-05, - "loss": 0.2863, - "step": 810 - }, - { - "epoch": 0.04, - "grad_norm": 1.1794335903141098, - "learning_rate": 1.999337665684101e-05, - "loss": 0.2662, - "step": 811 - }, - { - "epoch": 0.04, - "grad_norm": 1.1534916181430674, - "learning_rate": 1.9993316588166307e-05, - "loss": 0.2768, - "step": 812 - }, - { - "epoch": 0.04, - "grad_norm": 1.127511520512681, - "learning_rate": 1.999325624842232e-05, - "loss": 0.2873, - "step": 813 - }, - { - "epoch": 0.04, - "grad_norm": 1.0337648173317513, - "learning_rate": 1.9993195637610695e-05, - "loss": 0.2319, - "step": 814 - }, - { - "epoch": 0.04, - "grad_norm": 1.1132394984821739, - "learning_rate": 1.9993134755733075e-05, - "loss": 0.2471, - "step": 815 - }, - { - "epoch": 0.04, - "grad_norm": 1.1475184237917153, - "learning_rate": 1.9993073602791108e-05, - "loss": 0.2765, - "step": 816 - }, - { - "epoch": 0.04, - "grad_norm": 1.333803344892296, - "learning_rate": 1.999301217878646e-05, - "loss": 0.2513, - "step": 817 - }, - { - "epoch": 0.04, - "grad_norm": 1.0499954721840008, - "learning_rate": 1.9992950483720787e-05, - "loss": 0.2605, - "step": 818 - }, - { - "epoch": 0.04, - "grad_norm": 1.3012802506530357, - "learning_rate": 1.999288851759577e-05, - "loss": 0.2676, - "step": 819 - }, - { - "epoch": 0.04, - "grad_norm": 1.4136693582266662, - "learning_rate": 1.9992826280413087e-05, - "loss": 0.2397, - "step": 820 - }, - { - "epoch": 0.04, - "grad_norm": 1.3517183530845074, - "learning_rate": 1.9992763772174427e-05, - "loss": 0.2772, - "step": 821 - }, - { - "epoch": 0.04, - "grad_norm": 0.9780065045060403, - "learning_rate": 1.9992700992881486e-05, - "loss": 0.2579, - "step": 822 - }, - { - "epoch": 0.04, - "grad_norm": 1.107034582008244, - "learning_rate": 1.9992637942535963e-05, - "loss": 0.3015, - "step": 823 - }, - { - "epoch": 0.04, - "grad_norm": 1.208106797361871, - "learning_rate": 1.9992574621139575e-05, - "loss": 0.2684, - "step": 824 - }, - { - "epoch": 0.04, - "grad_norm": 1.0879601895663045, - "learning_rate": 1.9992511028694036e-05, - "loss": 0.288, - "step": 825 - }, - { - "epoch": 0.04, - "grad_norm": 2.116435309323238, - "learning_rate": 1.999244716520107e-05, - "loss": 0.2718, - "step": 826 - }, - { - "epoch": 0.04, - "grad_norm": 1.3059275763713378, - "learning_rate": 1.9992383030662412e-05, - "loss": 0.307, - "step": 827 - }, - { - "epoch": 0.04, - "grad_norm": 1.6808967976744835, - "learning_rate": 1.9992318625079796e-05, - "loss": 0.2571, - "step": 828 - }, - { - "epoch": 0.04, - "grad_norm": 1.2453853874792142, - "learning_rate": 1.9992253948454975e-05, - "loss": 0.265, - "step": 829 - }, - { - "epoch": 0.04, - "grad_norm": 1.2037147547935059, - "learning_rate": 1.99921890007897e-05, - "loss": 0.2994, - "step": 830 - }, - { - "epoch": 0.04, - "grad_norm": 1.420063191800624, - "learning_rate": 1.9992123782085738e-05, - "loss": 0.2597, - "step": 831 - }, - { - "epoch": 0.04, - "grad_norm": 1.2273296556423356, - "learning_rate": 1.9992058292344853e-05, - "loss": 0.2513, - "step": 832 - }, - { - "epoch": 0.04, - "grad_norm": 1.1062136182815394, - "learning_rate": 1.9991992531568817e-05, - "loss": 0.2663, - "step": 833 - }, - { - "epoch": 0.04, - "grad_norm": 1.08435612624478, - "learning_rate": 1.9991926499759426e-05, - "loss": 0.2818, - "step": 834 - }, - { - "epoch": 0.04, - "grad_norm": 1.159958172965142, - "learning_rate": 1.999186019691846e-05, - "loss": 0.2732, - "step": 835 - }, - { - "epoch": 0.04, - "grad_norm": 1.0939438567851123, - "learning_rate": 1.9991793623047724e-05, - "loss": 0.2321, - "step": 836 - }, - { - "epoch": 0.04, - "grad_norm": 1.8205766434289725, - "learning_rate": 1.999172677814902e-05, - "loss": 0.2246, - "step": 837 - }, - { - "epoch": 0.04, - "grad_norm": 1.1672477240865653, - "learning_rate": 1.9991659662224166e-05, - "loss": 0.2701, - "step": 838 - }, - { - "epoch": 0.04, - "grad_norm": 1.129230834652707, - "learning_rate": 1.9991592275274976e-05, - "loss": 0.2614, - "step": 839 - }, - { - "epoch": 0.04, - "grad_norm": 1.7214179515732553, - "learning_rate": 1.9991524617303282e-05, - "loss": 0.2762, - "step": 840 - }, - { - "epoch": 0.04, - "grad_norm": 1.2950757995564135, - "learning_rate": 1.999145668831092e-05, - "loss": 0.2626, - "step": 841 - }, - { - "epoch": 0.04, - "grad_norm": 1.5470842349719043, - "learning_rate": 1.999138848829973e-05, - "loss": 0.2667, - "step": 842 - }, - { - "epoch": 0.04, - "grad_norm": 1.5064490380389803, - "learning_rate": 1.9991320017271562e-05, - "loss": 0.2616, - "step": 843 - }, - { - "epoch": 0.04, - "grad_norm": 1.1662089984074246, - "learning_rate": 1.9991251275228274e-05, - "loss": 0.2637, - "step": 844 - }, - { - "epoch": 0.04, - "grad_norm": 1.2511580698565579, - "learning_rate": 1.9991182262171734e-05, - "loss": 0.2811, - "step": 845 - }, - { - "epoch": 0.04, - "grad_norm": 1.1962141316679449, - "learning_rate": 1.9991112978103807e-05, - "loss": 0.2535, - "step": 846 - }, - { - "epoch": 0.04, - "grad_norm": 1.2435377890556183, - "learning_rate": 1.9991043423026377e-05, - "loss": 0.2707, - "step": 847 - }, - { - "epoch": 0.04, - "grad_norm": 1.125868593032254, - "learning_rate": 1.999097359694133e-05, - "loss": 0.2737, - "step": 848 - }, - { - "epoch": 0.04, - "grad_norm": 1.171550604371576, - "learning_rate": 1.999090349985056e-05, - "loss": 0.2519, - "step": 849 - }, - { - "epoch": 0.04, - "grad_norm": 1.3676752292310235, - "learning_rate": 1.999083313175597e-05, - "loss": 0.2485, - "step": 850 - }, - { - "epoch": 0.04, - "grad_norm": 1.2978859424907145, - "learning_rate": 1.9990762492659466e-05, - "loss": 0.2642, - "step": 851 - }, - { - "epoch": 0.04, - "grad_norm": 1.1216709783383152, - "learning_rate": 1.9990691582562963e-05, - "loss": 0.2571, - "step": 852 - }, - { - "epoch": 0.04, - "grad_norm": 1.4066273485739214, - "learning_rate": 1.9990620401468392e-05, - "loss": 0.2656, - "step": 853 - }, - { - "epoch": 0.04, - "grad_norm": 1.0970791764561405, - "learning_rate": 1.9990548949377674e-05, - "loss": 0.2225, - "step": 854 - }, - { - "epoch": 0.04, - "grad_norm": 1.1687899119933483, - "learning_rate": 1.999047722629275e-05, - "loss": 0.2735, - "step": 855 - }, - { - "epoch": 0.04, - "grad_norm": 1.5732209820343088, - "learning_rate": 1.999040523221557e-05, - "loss": 0.2543, - "step": 856 - }, - { - "epoch": 0.04, - "grad_norm": 1.725733160854452, - "learning_rate": 1.9990332967148082e-05, - "loss": 0.2354, - "step": 857 - }, - { - "epoch": 0.04, - "grad_norm": 2.28006853250872, - "learning_rate": 1.999026043109225e-05, - "loss": 0.2678, - "step": 858 - }, - { - "epoch": 0.04, - "grad_norm": 1.172545431709117, - "learning_rate": 1.9990187624050038e-05, - "loss": 0.2394, - "step": 859 - }, - { - "epoch": 0.04, - "grad_norm": 1.2752673779385229, - "learning_rate": 1.9990114546023423e-05, - "loss": 0.2663, - "step": 860 - }, - { - "epoch": 0.04, - "grad_norm": 1.0882818240971561, - "learning_rate": 1.999004119701439e-05, - "loss": 0.2499, - "step": 861 - }, - { - "epoch": 0.04, - "grad_norm": 0.9920608283590259, - "learning_rate": 1.9989967577024922e-05, - "loss": 0.2615, - "step": 862 - }, - { - "epoch": 0.04, - "grad_norm": 1.0741793303028535, - "learning_rate": 1.9989893686057016e-05, - "loss": 0.2781, - "step": 863 - }, - { - "epoch": 0.04, - "grad_norm": 1.1532640931043636, - "learning_rate": 1.9989819524112683e-05, - "loss": 0.2701, - "step": 864 - }, - { - "epoch": 0.04, - "grad_norm": 1.0592006955548727, - "learning_rate": 1.998974509119393e-05, - "loss": 0.2384, - "step": 865 - }, - { - "epoch": 0.04, - "grad_norm": 1.0794821739805955, - "learning_rate": 1.9989670387302783e-05, - "loss": 0.2575, - "step": 866 - }, - { - "epoch": 0.04, - "grad_norm": 0.9108946007980193, - "learning_rate": 1.9989595412441252e-05, - "loss": 0.2738, - "step": 867 - }, - { - "epoch": 0.04, - "grad_norm": 1.2301240916180438, - "learning_rate": 1.9989520166611388e-05, - "loss": 0.285, - "step": 868 - }, - { - "epoch": 0.04, - "grad_norm": 1.1232165575207511, - "learning_rate": 1.9989444649815226e-05, - "loss": 0.2408, - "step": 869 - }, - { - "epoch": 0.04, - "grad_norm": 1.3153763910607554, - "learning_rate": 1.9989368862054814e-05, - "loss": 0.2648, - "step": 870 - }, - { - "epoch": 0.04, - "grad_norm": 1.067960049988388, - "learning_rate": 1.9989292803332203e-05, - "loss": 0.2372, - "step": 871 - }, - { - "epoch": 0.04, - "grad_norm": 1.1159094593203644, - "learning_rate": 1.9989216473649466e-05, - "loss": 0.2354, - "step": 872 - }, - { - "epoch": 0.04, - "grad_norm": 1.0141369128982671, - "learning_rate": 1.998913987300866e-05, - "loss": 0.2533, - "step": 873 - }, - { - "epoch": 0.04, - "grad_norm": 1.0006771174963607, - "learning_rate": 1.998906300141188e-05, - "loss": 0.2808, - "step": 874 - }, - { - "epoch": 0.04, - "grad_norm": 1.116211648499011, - "learning_rate": 1.9988985858861193e-05, - "loss": 0.2637, - "step": 875 - }, - { - "epoch": 0.04, - "grad_norm": 1.2578617963336698, - "learning_rate": 1.9988908445358705e-05, - "loss": 0.2743, - "step": 876 - }, - { - "epoch": 0.04, - "grad_norm": 1.5131417020303695, - "learning_rate": 1.998883076090651e-05, - "loss": 0.2753, - "step": 877 - }, - { - "epoch": 0.04, - "grad_norm": 1.2599095826842825, - "learning_rate": 1.9988752805506723e-05, - "loss": 0.2678, - "step": 878 - }, - { - "epoch": 0.04, - "grad_norm": 0.9360444508399847, - "learning_rate": 1.9988674579161444e-05, - "loss": 0.2284, - "step": 879 - }, - { - "epoch": 0.04, - "grad_norm": 1.4269838584838916, - "learning_rate": 1.9988596081872805e-05, - "loss": 0.2552, - "step": 880 - }, - { - "epoch": 0.04, - "grad_norm": 1.2384542201113082, - "learning_rate": 1.9988517313642934e-05, - "loss": 0.2617, - "step": 881 - }, - { - "epoch": 0.04, - "grad_norm": 1.4157479977396126, - "learning_rate": 1.9988438274473966e-05, - "loss": 0.2937, - "step": 882 - }, - { - "epoch": 0.04, - "grad_norm": 1.481445241615998, - "learning_rate": 1.9988358964368046e-05, - "loss": 0.2472, - "step": 883 - }, - { - "epoch": 0.04, - "grad_norm": 1.4095283492098256, - "learning_rate": 1.9988279383327324e-05, - "loss": 0.279, - "step": 884 - }, - { - "epoch": 0.05, - "grad_norm": 1.1291656446775273, - "learning_rate": 1.9988199531353963e-05, - "loss": 0.2332, - "step": 885 - }, - { - "epoch": 0.05, - "grad_norm": 1.556219150900177, - "learning_rate": 1.998811940845012e-05, - "loss": 0.2166, - "step": 886 - }, - { - "epoch": 0.05, - "grad_norm": 1.1722666149602972, - "learning_rate": 1.998803901461798e-05, - "loss": 0.2168, - "step": 887 - }, - { - "epoch": 0.05, - "grad_norm": 1.362960636009129, - "learning_rate": 1.998795834985971e-05, - "loss": 0.2438, - "step": 888 - }, - { - "epoch": 0.05, - "grad_norm": 1.2382505267852677, - "learning_rate": 1.998787741417751e-05, - "loss": 0.2446, - "step": 889 - }, - { - "epoch": 0.05, - "grad_norm": 1.2519924974220127, - "learning_rate": 1.9987796207573573e-05, - "loss": 0.2247, - "step": 890 - }, - { - "epoch": 0.05, - "grad_norm": 1.138520059051484, - "learning_rate": 1.9987714730050098e-05, - "loss": 0.2394, - "step": 891 - }, - { - "epoch": 0.05, - "grad_norm": 1.184081525001222, - "learning_rate": 1.9987632981609297e-05, - "loss": 0.2342, - "step": 892 - }, - { - "epoch": 0.05, - "grad_norm": 1.4228525464925417, - "learning_rate": 1.9987550962253387e-05, - "loss": 0.2505, - "step": 893 - }, - { - "epoch": 0.05, - "grad_norm": 1.1732583727474242, - "learning_rate": 1.998746867198459e-05, - "loss": 0.2408, - "step": 894 - }, - { - "epoch": 0.05, - "grad_norm": 1.2929452831438806, - "learning_rate": 1.9987386110805146e-05, - "loss": 0.2636, - "step": 895 - }, - { - "epoch": 0.05, - "grad_norm": 1.1957505225992544, - "learning_rate": 1.9987303278717288e-05, - "loss": 0.2597, - "step": 896 - }, - { - "epoch": 0.05, - "grad_norm": 1.3406390837672997, - "learning_rate": 1.9987220175723265e-05, - "loss": 0.2643, - "step": 897 - }, - { - "epoch": 0.05, - "grad_norm": 1.1369635625540935, - "learning_rate": 1.998713680182533e-05, - "loss": 0.2523, - "step": 898 - }, - { - "epoch": 0.05, - "grad_norm": 1.5937134878395935, - "learning_rate": 1.9987053157025748e-05, - "loss": 0.2677, - "step": 899 - }, - { - "epoch": 0.05, - "grad_norm": 1.305439396439621, - "learning_rate": 1.998696924132678e-05, - "loss": 0.2856, - "step": 900 - }, - { - "epoch": 0.05, - "grad_norm": 1.3427530047357976, - "learning_rate": 1.9986885054730708e-05, - "loss": 0.2232, - "step": 901 - }, - { - "epoch": 0.05, - "grad_norm": 1.3349578827424304, - "learning_rate": 1.9986800597239817e-05, - "loss": 0.2687, - "step": 902 - }, - { - "epoch": 0.05, - "grad_norm": 1.2393032091995557, - "learning_rate": 1.9986715868856396e-05, - "loss": 0.2342, - "step": 903 - }, - { - "epoch": 0.05, - "grad_norm": 1.349656127097357, - "learning_rate": 1.998663086958274e-05, - "loss": 0.2683, - "step": 904 - }, - { - "epoch": 0.05, - "grad_norm": 1.4183409271315817, - "learning_rate": 1.998654559942116e-05, - "loss": 0.2843, - "step": 905 - }, - { - "epoch": 0.05, - "grad_norm": 1.8087602257457172, - "learning_rate": 1.998646005837397e-05, - "loss": 0.2616, - "step": 906 - }, - { - "epoch": 0.05, - "grad_norm": 1.5093979621304114, - "learning_rate": 1.998637424644348e-05, - "loss": 0.2367, - "step": 907 - }, - { - "epoch": 0.05, - "grad_norm": 1.2972565928670108, - "learning_rate": 1.998628816363203e-05, - "loss": 0.2765, - "step": 908 - }, - { - "epoch": 0.05, - "grad_norm": 1.1342135617558307, - "learning_rate": 1.9986201809941945e-05, - "loss": 0.2605, - "step": 909 - }, - { - "epoch": 0.05, - "grad_norm": 1.622288471832347, - "learning_rate": 1.9986115185375576e-05, - "loss": 0.2756, - "step": 910 - }, - { - "epoch": 0.05, - "grad_norm": 8.712971571944573, - "learning_rate": 1.9986028289935263e-05, - "loss": 0.3367, - "step": 911 - }, - { - "epoch": 0.05, - "grad_norm": 1.831004903324416, - "learning_rate": 1.9985941123623374e-05, - "loss": 0.2635, - "step": 912 - }, - { - "epoch": 0.05, - "grad_norm": 37.253091376437865, - "learning_rate": 1.9985853686442266e-05, - "loss": 0.425, - "step": 913 - }, - { - "epoch": 0.05, - "grad_norm": 1.6139419406539022, - "learning_rate": 1.9985765978394315e-05, - "loss": 0.2935, - "step": 914 - }, - { - "epoch": 0.05, - "grad_norm": 1.6099763905589324, - "learning_rate": 1.9985677999481898e-05, - "loss": 0.2702, - "step": 915 - }, - { - "epoch": 0.05, - "grad_norm": 1.6413119822026072, - "learning_rate": 1.9985589749707395e-05, - "loss": 0.2629, - "step": 916 - }, - { - "epoch": 0.05, - "grad_norm": 1.3524571716304779, - "learning_rate": 1.9985501229073213e-05, - "loss": 0.2703, - "step": 917 - }, - { - "epoch": 0.05, - "grad_norm": 1.0938158950381753, - "learning_rate": 1.998541243758174e-05, - "loss": 0.2704, - "step": 918 - }, - { - "epoch": 0.05, - "grad_norm": 1.1489715020729094, - "learning_rate": 1.9985323375235395e-05, - "loss": 0.2543, - "step": 919 - }, - { - "epoch": 0.05, - "grad_norm": 1.1731682133705816, - "learning_rate": 1.9985234042036588e-05, - "loss": 0.2608, - "step": 920 - }, - { - "epoch": 0.05, - "grad_norm": 0.9738308855528613, - "learning_rate": 1.9985144437987743e-05, - "loss": 0.2424, - "step": 921 - }, - { - "epoch": 0.05, - "grad_norm": 1.0626982152800977, - "learning_rate": 1.9985054563091295e-05, - "loss": 0.2758, - "step": 922 - }, - { - "epoch": 0.05, - "grad_norm": 1.426479209067473, - "learning_rate": 1.9984964417349675e-05, - "loss": 0.2733, - "step": 923 - }, - { - "epoch": 0.05, - "grad_norm": 1.1284519447109926, - "learning_rate": 1.998487400076533e-05, - "loss": 0.2457, - "step": 924 - }, - { - "epoch": 0.05, - "grad_norm": 1.0912469894763157, - "learning_rate": 1.9984783313340715e-05, - "loss": 0.2408, - "step": 925 - }, - { - "epoch": 0.05, - "grad_norm": 1.1812135886717172, - "learning_rate": 1.998469235507829e-05, - "loss": 0.2503, - "step": 926 - }, - { - "epoch": 0.05, - "grad_norm": 1.0073430784559474, - "learning_rate": 1.998460112598052e-05, - "loss": 0.244, - "step": 927 - }, - { - "epoch": 0.05, - "grad_norm": 1.3404204748861077, - "learning_rate": 1.998450962604988e-05, - "loss": 0.2885, - "step": 928 - }, - { - "epoch": 0.05, - "grad_norm": 1.7811568491496874, - "learning_rate": 1.9984417855288853e-05, - "loss": 0.2516, - "step": 929 - }, - { - "epoch": 0.05, - "grad_norm": 1.5443231025515465, - "learning_rate": 1.998432581369993e-05, - "loss": 0.2572, - "step": 930 - }, - { - "epoch": 0.05, - "grad_norm": 1.4293337544325062, - "learning_rate": 1.9984233501285602e-05, - "loss": 0.2992, - "step": 931 - }, - { - "epoch": 0.05, - "grad_norm": 1.284982874168832, - "learning_rate": 1.9984140918048376e-05, - "loss": 0.2399, - "step": 932 - }, - { - "epoch": 0.05, - "grad_norm": 1.3961467293148901, - "learning_rate": 1.9984048063990766e-05, - "loss": 0.2564, - "step": 933 - }, - { - "epoch": 0.05, - "grad_norm": 1.1794505830153215, - "learning_rate": 1.9983954939115286e-05, - "loss": 0.2417, - "step": 934 - }, - { - "epoch": 0.05, - "grad_norm": 1.2760274907553164, - "learning_rate": 1.9983861543424467e-05, - "loss": 0.2662, - "step": 935 - }, - { - "epoch": 0.05, - "grad_norm": 1.1984319455046042, - "learning_rate": 1.998376787692084e-05, - "loss": 0.2402, - "step": 936 - }, - { - "epoch": 0.05, - "grad_norm": 1.5134177656969858, - "learning_rate": 1.9983673939606946e-05, - "loss": 0.2641, - "step": 937 - }, - { - "epoch": 0.05, - "grad_norm": 1.08231174372219, - "learning_rate": 1.9983579731485326e-05, - "loss": 0.2398, - "step": 938 - }, - { - "epoch": 0.05, - "grad_norm": 1.3307559472577821, - "learning_rate": 1.998348525255855e-05, - "loss": 0.2773, - "step": 939 - }, - { - "epoch": 0.05, - "grad_norm": 1.256364695732902, - "learning_rate": 1.9983390502829168e-05, - "loss": 0.2815, - "step": 940 - }, - { - "epoch": 0.05, - "grad_norm": 1.2766424443497033, - "learning_rate": 1.9983295482299752e-05, - "loss": 0.2555, - "step": 941 - }, - { - "epoch": 0.05, - "grad_norm": 1.3076871853427046, - "learning_rate": 1.998320019097289e-05, - "loss": 0.234, - "step": 942 - }, - { - "epoch": 0.05, - "grad_norm": 1.7653987555897812, - "learning_rate": 1.9983104628851154e-05, - "loss": 0.2476, - "step": 943 - }, - { - "epoch": 0.05, - "grad_norm": 4.147047712588286, - "learning_rate": 1.9983008795937142e-05, - "loss": 0.291, - "step": 944 - }, - { - "epoch": 0.05, - "grad_norm": 1.9005551250112118, - "learning_rate": 1.9982912692233455e-05, - "loss": 0.2655, - "step": 945 - }, - { - "epoch": 0.05, - "grad_norm": 1.2830320142702902, - "learning_rate": 1.9982816317742694e-05, - "loss": 0.2607, - "step": 946 - }, - { - "epoch": 0.05, - "grad_norm": 1.6188483702984462, - "learning_rate": 1.9982719672467476e-05, - "loss": 0.2535, - "step": 947 - }, - { - "epoch": 0.05, - "grad_norm": 2.494189922904832, - "learning_rate": 1.9982622756410425e-05, - "loss": 0.263, - "step": 948 - }, - { - "epoch": 0.05, - "grad_norm": 1.6891101259528638, - "learning_rate": 1.998252556957417e-05, - "loss": 0.2674, - "step": 949 - }, - { - "epoch": 0.05, - "grad_norm": 1.4203377893373739, - "learning_rate": 1.998242811196134e-05, - "loss": 0.2332, - "step": 950 - }, - { - "epoch": 0.05, - "grad_norm": 1.5932351561865459, - "learning_rate": 1.9982330383574586e-05, - "loss": 0.2421, - "step": 951 - }, - { - "epoch": 0.05, - "grad_norm": 2.03124317920117, - "learning_rate": 1.998223238441656e-05, - "loss": 0.2498, - "step": 952 - }, - { - "epoch": 0.05, - "grad_norm": 1.8931422127100161, - "learning_rate": 1.9982134114489912e-05, - "loss": 0.2673, - "step": 953 - }, - { - "epoch": 0.05, - "grad_norm": 1.5731660080894065, - "learning_rate": 1.9982035573797315e-05, - "loss": 0.2766, - "step": 954 - }, - { - "epoch": 0.05, - "grad_norm": 1.2214546421076942, - "learning_rate": 1.9981936762341438e-05, - "loss": 0.2317, - "step": 955 - }, - { - "epoch": 0.05, - "grad_norm": 1.3338466940653078, - "learning_rate": 1.9981837680124963e-05, - "loss": 0.2238, - "step": 956 - }, - { - "epoch": 0.05, - "grad_norm": 1.4219834080515363, - "learning_rate": 1.9981738327150575e-05, - "loss": 0.2452, - "step": 957 - }, - { - "epoch": 0.05, - "grad_norm": 1.281365091349734, - "learning_rate": 1.9981638703420977e-05, - "loss": 0.2621, - "step": 958 - }, - { - "epoch": 0.05, - "grad_norm": 1.2303691416352147, - "learning_rate": 1.998153880893886e-05, - "loss": 0.2442, - "step": 959 - }, - { - "epoch": 0.05, - "grad_norm": 2.7164286811059375, - "learning_rate": 1.998143864370694e-05, - "loss": 0.25, - "step": 960 - }, - { - "epoch": 0.05, - "grad_norm": 1.1619798707542857, - "learning_rate": 1.998133820772793e-05, - "loss": 0.2448, - "step": 961 - }, - { - "epoch": 0.05, - "grad_norm": 1.4654543872298886, - "learning_rate": 1.998123750100456e-05, - "loss": 0.2592, - "step": 962 - }, - { - "epoch": 0.05, - "grad_norm": 1.0862977637195583, - "learning_rate": 1.9981136523539565e-05, - "loss": 0.2426, - "step": 963 - }, - { - "epoch": 0.05, - "grad_norm": 1.2390086218034442, - "learning_rate": 1.9981035275335672e-05, - "loss": 0.2557, - "step": 964 - }, - { - "epoch": 0.05, - "grad_norm": 1.504056753106432, - "learning_rate": 1.9980933756395635e-05, - "loss": 0.2697, - "step": 965 - }, - { - "epoch": 0.05, - "grad_norm": 1.398764234923714, - "learning_rate": 1.9980831966722204e-05, - "loss": 0.2398, - "step": 966 - }, - { - "epoch": 0.05, - "grad_norm": 1.2339532264497461, - "learning_rate": 1.9980729906318145e-05, - "loss": 0.264, - "step": 967 - }, - { - "epoch": 0.05, - "grad_norm": 1.4824979512173875, - "learning_rate": 1.998062757518622e-05, - "loss": 0.2453, - "step": 968 - }, - { - "epoch": 0.05, - "grad_norm": 1.7206314812947987, - "learning_rate": 1.998052497332921e-05, - "loss": 0.2569, - "step": 969 - }, - { - "epoch": 0.05, - "grad_norm": 1.6681319774857464, - "learning_rate": 1.99804221007499e-05, - "loss": 0.2418, - "step": 970 - }, - { - "epoch": 0.05, - "grad_norm": 1.2922792928709421, - "learning_rate": 1.9980318957451073e-05, - "loss": 0.233, - "step": 971 - }, - { - "epoch": 0.05, - "grad_norm": 1.3283707199306902, - "learning_rate": 1.9980215543435532e-05, - "loss": 0.2715, - "step": 972 - }, - { - "epoch": 0.05, - "grad_norm": 1.7199472670960634, - "learning_rate": 1.998011185870608e-05, - "loss": 0.2525, - "step": 973 - }, - { - "epoch": 0.05, - "grad_norm": 1.2668493722493168, - "learning_rate": 1.998000790326553e-05, - "loss": 0.25, - "step": 974 - }, - { - "epoch": 0.05, - "grad_norm": 3.4536780933019773, - "learning_rate": 1.9979903677116705e-05, - "loss": 0.2754, - "step": 975 - }, - { - "epoch": 0.05, - "grad_norm": 1.395271992192874, - "learning_rate": 1.9979799180262423e-05, - "loss": 0.2559, - "step": 976 - }, - { - "epoch": 0.05, - "grad_norm": 2.691859147708012, - "learning_rate": 1.997969441270553e-05, - "loss": 0.2739, - "step": 977 - }, - { - "epoch": 0.05, - "grad_norm": 1.3249583444644388, - "learning_rate": 1.997958937444886e-05, - "loss": 0.2406, - "step": 978 - }, - { - "epoch": 0.05, - "grad_norm": 1.1293700203760684, - "learning_rate": 1.9979484065495264e-05, - "loss": 0.2522, - "step": 979 - }, - { - "epoch": 0.05, - "grad_norm": 1.6352020696036085, - "learning_rate": 1.99793784858476e-05, - "loss": 0.2365, - "step": 980 - }, - { - "epoch": 0.05, - "grad_norm": 1.4766571166239415, - "learning_rate": 1.997927263550873e-05, - "loss": 0.2377, - "step": 981 - }, - { - "epoch": 0.05, - "grad_norm": 1.5825322786834481, - "learning_rate": 1.997916651448153e-05, - "loss": 0.283, - "step": 982 - }, - { - "epoch": 0.05, - "grad_norm": 1.5960016847073644, - "learning_rate": 1.997906012276887e-05, - "loss": 0.2336, - "step": 983 - }, - { - "epoch": 0.05, - "grad_norm": 1.7464179676633318, - "learning_rate": 1.9978953460373643e-05, - "loss": 0.2491, - "step": 984 - }, - { - "epoch": 0.05, - "grad_norm": 1.332022414231344, - "learning_rate": 1.997884652729874e-05, - "loss": 0.2708, - "step": 985 - }, - { - "epoch": 0.05, - "grad_norm": 1.4284193259813256, - "learning_rate": 1.997873932354706e-05, - "loss": 0.2558, - "step": 986 - }, - { - "epoch": 0.05, - "grad_norm": 1.2548510619479858, - "learning_rate": 1.9978631849121514e-05, - "loss": 0.23, - "step": 987 - }, - { - "epoch": 0.05, - "grad_norm": 2.5253198374407417, - "learning_rate": 1.997852410402501e-05, - "loss": 0.2598, - "step": 988 - }, - { - "epoch": 0.05, - "grad_norm": 1.6856588529543604, - "learning_rate": 1.9978416088260483e-05, - "loss": 0.2466, - "step": 989 - }, - { - "epoch": 0.05, - "grad_norm": 1.204933102528033, - "learning_rate": 1.9978307801830855e-05, - "loss": 0.2608, - "step": 990 - }, - { - "epoch": 0.05, - "grad_norm": 1.403160012398721, - "learning_rate": 1.997819924473906e-05, - "loss": 0.2457, - "step": 991 - }, - { - "epoch": 0.05, - "grad_norm": 1.3417670872825422, - "learning_rate": 1.997809041698805e-05, - "loss": 0.2481, - "step": 992 - }, - { - "epoch": 0.05, - "grad_norm": 1.9679789941414283, - "learning_rate": 1.9977981318580773e-05, - "loss": 0.2481, - "step": 993 - }, - { - "epoch": 0.05, - "grad_norm": 1.802566030293649, - "learning_rate": 1.9977871949520188e-05, - "loss": 0.2454, - "step": 994 - }, - { - "epoch": 0.05, - "grad_norm": 1.1290121014796652, - "learning_rate": 1.9977762309809266e-05, - "loss": 0.2532, - "step": 995 - }, - { - "epoch": 0.05, - "grad_norm": 1.4914383201086971, - "learning_rate": 1.9977652399450976e-05, - "loss": 0.273, - "step": 996 - }, - { - "epoch": 0.05, - "grad_norm": 1.4738780508956397, - "learning_rate": 1.99775422184483e-05, - "loss": 0.2562, - "step": 997 - }, - { - "epoch": 0.05, - "grad_norm": 1.431594373320625, - "learning_rate": 1.997743176680423e-05, - "loss": 0.2681, - "step": 998 - }, - { - "epoch": 0.05, - "grad_norm": 1.2916825659721045, - "learning_rate": 1.997732104452176e-05, - "loss": 0.2434, - "step": 999 - }, - { - "epoch": 0.05, - "grad_norm": 1.887959469677641, - "learning_rate": 1.997721005160389e-05, - "loss": 0.2451, - "step": 1000 - }, - { - "epoch": 0.05, - "grad_norm": 1.4206349978995914, - "learning_rate": 1.9977098788053637e-05, - "loss": 0.262, - "step": 1001 - }, - { - "epoch": 0.05, - "grad_norm": 1.146152194745477, - "learning_rate": 1.9976987253874016e-05, - "loss": 0.2638, - "step": 1002 - }, - { - "epoch": 0.05, - "grad_norm": 1.318586277766884, - "learning_rate": 1.997687544906805e-05, - "loss": 0.2635, - "step": 1003 - }, - { - "epoch": 0.05, - "grad_norm": 1.3449964495777744, - "learning_rate": 1.9976763373638773e-05, - "loss": 0.2805, - "step": 1004 - }, - { - "epoch": 0.05, - "grad_norm": 1.3295176549041057, - "learning_rate": 1.997665102758923e-05, - "loss": 0.2925, - "step": 1005 - }, - { - "epoch": 0.05, - "grad_norm": 1.356327669023187, - "learning_rate": 1.997653841092246e-05, - "loss": 0.2487, - "step": 1006 - }, - { - "epoch": 0.05, - "grad_norm": 1.4765934626224548, - "learning_rate": 1.9976425523641527e-05, - "loss": 0.2929, - "step": 1007 - }, - { - "epoch": 0.05, - "grad_norm": 1.497001144648849, - "learning_rate": 1.9976312365749484e-05, - "loss": 0.2477, - "step": 1008 - }, - { - "epoch": 0.05, - "grad_norm": 1.5166775118877334, - "learning_rate": 1.9976198937249408e-05, - "loss": 0.2361, - "step": 1009 - }, - { - "epoch": 0.05, - "grad_norm": 1.4083767570633934, - "learning_rate": 1.997608523814437e-05, - "loss": 0.254, - "step": 1010 - }, - { - "epoch": 0.05, - "grad_norm": 1.540794321329871, - "learning_rate": 1.9975971268437457e-05, - "loss": 0.2738, - "step": 1011 - }, - { - "epoch": 0.05, - "grad_norm": 1.668700196997005, - "learning_rate": 1.997585702813176e-05, - "loss": 0.2793, - "step": 1012 - }, - { - "epoch": 0.05, - "grad_norm": 1.5112483099308371, - "learning_rate": 1.9975742517230377e-05, - "loss": 0.2443, - "step": 1013 - }, - { - "epoch": 0.05, - "grad_norm": 1.5205434194057652, - "learning_rate": 1.9975627735736416e-05, - "loss": 0.2532, - "step": 1014 - }, - { - "epoch": 0.05, - "grad_norm": 1.40062000800819, - "learning_rate": 1.9975512683652985e-05, - "loss": 0.2509, - "step": 1015 - }, - { - "epoch": 0.05, - "grad_norm": 1.4963993851155675, - "learning_rate": 1.9975397360983216e-05, - "loss": 0.24, - "step": 1016 - }, - { - "epoch": 0.05, - "grad_norm": 2.1069124215856547, - "learning_rate": 1.9975281767730226e-05, - "loss": 0.2256, - "step": 1017 - }, - { - "epoch": 0.05, - "grad_norm": 1.4490239252320818, - "learning_rate": 1.9975165903897155e-05, - "loss": 0.2318, - "step": 1018 - }, - { - "epoch": 0.05, - "grad_norm": 1.594997764001681, - "learning_rate": 1.9975049769487147e-05, - "loss": 0.2416, - "step": 1019 - }, - { - "epoch": 0.05, - "grad_norm": 1.7645257393696694, - "learning_rate": 1.9974933364503347e-05, - "loss": 0.2589, - "step": 1020 - }, - { - "epoch": 0.05, - "grad_norm": 1.288053085705425, - "learning_rate": 1.9974816688948923e-05, - "loss": 0.2536, - "step": 1021 - }, - { - "epoch": 0.05, - "grad_norm": 1.2860181373293385, - "learning_rate": 1.9974699742827028e-05, - "loss": 0.2414, - "step": 1022 - }, - { - "epoch": 0.05, - "grad_norm": 1.4077289076246466, - "learning_rate": 1.997458252614084e-05, - "loss": 0.2416, - "step": 1023 - }, - { - "epoch": 0.05, - "grad_norm": 2.378778097881167, - "learning_rate": 1.9974465038893535e-05, - "loss": 0.2535, - "step": 1024 - }, - { - "epoch": 0.05, - "grad_norm": 1.3503965136445129, - "learning_rate": 1.9974347281088305e-05, - "loss": 0.2676, - "step": 1025 - }, - { - "epoch": 0.05, - "grad_norm": 1.236924180487533, - "learning_rate": 1.9974229252728345e-05, - "loss": 0.2545, - "step": 1026 - }, - { - "epoch": 0.05, - "grad_norm": 1.0346377489547494, - "learning_rate": 1.9974110953816846e-05, - "loss": 0.239, - "step": 1027 - }, - { - "epoch": 0.05, - "grad_norm": 1.3754934009638555, - "learning_rate": 1.997399238435703e-05, - "loss": 0.288, - "step": 1028 - }, - { - "epoch": 0.05, - "grad_norm": 1.2183415405104483, - "learning_rate": 1.99738735443521e-05, - "loss": 0.2624, - "step": 1029 - }, - { - "epoch": 0.05, - "grad_norm": 0.9613397656490309, - "learning_rate": 1.9973754433805294e-05, - "loss": 0.2424, - "step": 1030 - }, - { - "epoch": 0.05, - "grad_norm": 1.6289514132511271, - "learning_rate": 1.9973635052719836e-05, - "loss": 0.2523, - "step": 1031 - }, - { - "epoch": 0.05, - "grad_norm": 1.0487668403001444, - "learning_rate": 1.997351540109896e-05, - "loss": 0.2854, - "step": 1032 - }, - { - "epoch": 0.05, - "grad_norm": 1.3730747343031353, - "learning_rate": 1.9973395478945917e-05, - "loss": 0.251, - "step": 1033 - }, - { - "epoch": 0.05, - "grad_norm": 1.247569885594585, - "learning_rate": 1.9973275286263955e-05, - "loss": 0.2465, - "step": 1034 - }, - { - "epoch": 0.05, - "grad_norm": 1.200220973084389, - "learning_rate": 1.9973154823056343e-05, - "loss": 0.2733, - "step": 1035 - }, - { - "epoch": 0.05, - "grad_norm": 1.1170295827926, - "learning_rate": 1.997303408932634e-05, - "loss": 0.2553, - "step": 1036 - }, - { - "epoch": 0.05, - "grad_norm": 1.234375683527027, - "learning_rate": 1.9972913085077225e-05, - "loss": 0.2682, - "step": 1037 - }, - { - "epoch": 0.05, - "grad_norm": 1.2679487419761017, - "learning_rate": 1.997279181031228e-05, - "loss": 0.272, - "step": 1038 - }, - { - "epoch": 0.05, - "grad_norm": 1.6447639613922878, - "learning_rate": 1.997267026503479e-05, - "loss": 0.3011, - "step": 1039 - }, - { - "epoch": 0.05, - "grad_norm": 1.3634475769344572, - "learning_rate": 1.997254844924806e-05, - "loss": 0.2631, - "step": 1040 - }, - { - "epoch": 0.05, - "grad_norm": 1.2718450470082385, - "learning_rate": 1.997242636295539e-05, - "loss": 0.2412, - "step": 1041 - }, - { - "epoch": 0.05, - "grad_norm": 1.0108447929482707, - "learning_rate": 1.997230400616009e-05, - "loss": 0.237, - "step": 1042 - }, - { - "epoch": 0.05, - "grad_norm": 1.1645213079118888, - "learning_rate": 1.997218137886548e-05, - "loss": 0.2645, - "step": 1043 - }, - { - "epoch": 0.05, - "grad_norm": 1.0614791909352574, - "learning_rate": 1.997205848107489e-05, - "loss": 0.2693, - "step": 1044 - }, - { - "epoch": 0.05, - "grad_norm": 1.4634827744750394, - "learning_rate": 1.9971935312791646e-05, - "loss": 0.2357, - "step": 1045 - }, - { - "epoch": 0.05, - "grad_norm": 0.9872275898199967, - "learning_rate": 1.9971811874019096e-05, - "loss": 0.2554, - "step": 1046 - }, - { - "epoch": 0.05, - "grad_norm": 1.2016246852199257, - "learning_rate": 1.9971688164760588e-05, - "loss": 0.2769, - "step": 1047 - }, - { - "epoch": 0.05, - "grad_norm": 3.9982730088115166, - "learning_rate": 1.997156418501947e-05, - "loss": 0.2465, - "step": 1048 - }, - { - "epoch": 0.05, - "grad_norm": 1.6992277962147386, - "learning_rate": 1.9971439934799113e-05, - "loss": 0.2723, - "step": 1049 - }, - { - "epoch": 0.05, - "grad_norm": 1.0004795906439163, - "learning_rate": 1.9971315414102886e-05, - "loss": 0.2699, - "step": 1050 - }, - { - "epoch": 0.05, - "grad_norm": 1.2129500121770678, - "learning_rate": 1.9971190622934164e-05, - "loss": 0.2474, - "step": 1051 - }, - { - "epoch": 0.05, - "grad_norm": 2.131678439889529, - "learning_rate": 1.9971065561296334e-05, - "loss": 0.2638, - "step": 1052 - }, - { - "epoch": 0.05, - "grad_norm": 1.1363756749163911, - "learning_rate": 1.9970940229192785e-05, - "loss": 0.2406, - "step": 1053 - }, - { - "epoch": 0.05, - "grad_norm": 1.2498200840780742, - "learning_rate": 1.997081462662692e-05, - "loss": 0.2639, - "step": 1054 - }, - { - "epoch": 0.05, - "grad_norm": 1.1762334890050876, - "learning_rate": 1.997068875360215e-05, - "loss": 0.2543, - "step": 1055 - }, - { - "epoch": 0.05, - "grad_norm": 1.0024363148955997, - "learning_rate": 1.9970562610121878e-05, - "loss": 0.2289, - "step": 1056 - }, - { - "epoch": 0.05, - "grad_norm": 1.1811960528971635, - "learning_rate": 1.9970436196189534e-05, - "loss": 0.2478, - "step": 1057 - }, - { - "epoch": 0.05, - "grad_norm": 1.1637800873859996, - "learning_rate": 1.9970309511808544e-05, - "loss": 0.2465, - "step": 1058 - }, - { - "epoch": 0.05, - "grad_norm": 0.979550169262176, - "learning_rate": 1.997018255698235e-05, - "loss": 0.2214, - "step": 1059 - }, - { - "epoch": 0.05, - "grad_norm": 1.16589763971035, - "learning_rate": 1.9970055331714383e-05, - "loss": 0.2711, - "step": 1060 - }, - { - "epoch": 0.05, - "grad_norm": 1.1246384724575211, - "learning_rate": 1.9969927836008106e-05, - "loss": 0.2775, - "step": 1061 - }, - { - "epoch": 0.05, - "grad_norm": 1.7036162064431464, - "learning_rate": 1.9969800069866977e-05, - "loss": 0.2495, - "step": 1062 - }, - { - "epoch": 0.05, - "grad_norm": 1.1887124846196262, - "learning_rate": 1.9969672033294456e-05, - "loss": 0.2403, - "step": 1063 - }, - { - "epoch": 0.05, - "grad_norm": 1.1111538706770012, - "learning_rate": 1.9969543726294015e-05, - "loss": 0.2787, - "step": 1064 - }, - { - "epoch": 0.05, - "grad_norm": 1.344197692512503, - "learning_rate": 1.996941514886914e-05, - "loss": 0.2406, - "step": 1065 - }, - { - "epoch": 0.05, - "grad_norm": 0.9370858572857556, - "learning_rate": 1.9969286301023313e-05, - "loss": 0.2453, - "step": 1066 - }, - { - "epoch": 0.05, - "grad_norm": 1.317987916437125, - "learning_rate": 1.9969157182760038e-05, - "loss": 0.2469, - "step": 1067 - }, - { - "epoch": 0.05, - "grad_norm": 1.4308374640331052, - "learning_rate": 1.9969027794082805e-05, - "loss": 0.2618, - "step": 1068 - }, - { - "epoch": 0.05, - "grad_norm": 1.0128360475469447, - "learning_rate": 1.9968898134995133e-05, - "loss": 0.2589, - "step": 1069 - }, - { - "epoch": 0.05, - "grad_norm": 1.2206987095575548, - "learning_rate": 1.9968768205500537e-05, - "loss": 0.2539, - "step": 1070 - }, - { - "epoch": 0.05, - "grad_norm": 1.2616967144566469, - "learning_rate": 1.996863800560254e-05, - "loss": 0.2561, - "step": 1071 - }, - { - "epoch": 0.05, - "grad_norm": 1.2420509612739192, - "learning_rate": 1.9968507535304673e-05, - "loss": 0.2488, - "step": 1072 - }, - { - "epoch": 0.05, - "grad_norm": 1.6531805252472223, - "learning_rate": 1.9968376794610476e-05, - "loss": 0.2473, - "step": 1073 - }, - { - "epoch": 0.05, - "grad_norm": 1.512339883342327, - "learning_rate": 1.9968245783523494e-05, - "loss": 0.2761, - "step": 1074 - }, - { - "epoch": 0.05, - "grad_norm": 1.1497848143449239, - "learning_rate": 1.9968114502047285e-05, - "loss": 0.2543, - "step": 1075 - }, - { - "epoch": 0.05, - "grad_norm": 1.3298769541275925, - "learning_rate": 1.9967982950185406e-05, - "loss": 0.2466, - "step": 1076 - }, - { - "epoch": 0.05, - "grad_norm": 1.1633256939356942, - "learning_rate": 1.9967851127941428e-05, - "loss": 0.2452, - "step": 1077 - }, - { - "epoch": 0.05, - "grad_norm": 1.4220440751364178, - "learning_rate": 1.9967719035318923e-05, - "loss": 0.2764, - "step": 1078 - }, - { - "epoch": 0.05, - "grad_norm": 1.1132936198571883, - "learning_rate": 1.996758667232148e-05, - "loss": 0.26, - "step": 1079 - }, - { - "epoch": 0.05, - "grad_norm": 1.6312747105898855, - "learning_rate": 1.996745403895268e-05, - "loss": 0.2669, - "step": 1080 - }, - { - "epoch": 0.05, - "grad_norm": 1.6402142909338209, - "learning_rate": 1.996732113521613e-05, - "loss": 0.2457, - "step": 1081 - }, - { - "epoch": 0.06, - "grad_norm": 2.0667893373200155, - "learning_rate": 1.996718796111543e-05, - "loss": 0.2608, - "step": 1082 - }, - { - "epoch": 0.06, - "grad_norm": 1.852776127345349, - "learning_rate": 1.9967054516654192e-05, - "loss": 0.2296, - "step": 1083 - }, - { - "epoch": 0.06, - "grad_norm": 1.409690469886677, - "learning_rate": 1.996692080183604e-05, - "loss": 0.2808, - "step": 1084 - }, - { - "epoch": 0.06, - "grad_norm": 1.3043579795586937, - "learning_rate": 1.9966786816664595e-05, - "loss": 0.2389, - "step": 1085 - }, - { - "epoch": 0.06, - "grad_norm": 1.5235254463247614, - "learning_rate": 1.9966652561143497e-05, - "loss": 0.2704, - "step": 1086 - }, - { - "epoch": 0.06, - "grad_norm": 1.1451019290362825, - "learning_rate": 1.9966518035276386e-05, - "loss": 0.2436, - "step": 1087 - }, - { - "epoch": 0.06, - "grad_norm": 1.1315370371336715, - "learning_rate": 1.996638323906691e-05, - "loss": 0.2551, - "step": 1088 - }, - { - "epoch": 0.06, - "grad_norm": 1.2459189550689043, - "learning_rate": 1.9966248172518724e-05, - "loss": 0.2388, - "step": 1089 - }, - { - "epoch": 0.06, - "grad_norm": 1.0925475069915203, - "learning_rate": 1.9966112835635493e-05, - "loss": 0.2755, - "step": 1090 - }, - { - "epoch": 0.06, - "grad_norm": 1.5310245971490808, - "learning_rate": 1.996597722842089e-05, - "loss": 0.2449, - "step": 1091 - }, - { - "epoch": 0.06, - "grad_norm": 1.1293064532018613, - "learning_rate": 1.9965841350878594e-05, - "loss": 0.2457, - "step": 1092 - }, - { - "epoch": 0.06, - "grad_norm": 1.270511290540653, - "learning_rate": 1.9965705203012288e-05, - "loss": 0.2584, - "step": 1093 - }, - { - "epoch": 0.06, - "grad_norm": 1.3994165582848725, - "learning_rate": 1.9965568784825665e-05, - "loss": 0.2679, - "step": 1094 - }, - { - "epoch": 0.06, - "grad_norm": 1.2426772281808531, - "learning_rate": 1.9965432096322423e-05, - "loss": 0.2421, - "step": 1095 - }, - { - "epoch": 0.06, - "grad_norm": 1.3840821487760482, - "learning_rate": 1.9965295137506275e-05, - "loss": 0.2817, - "step": 1096 - }, - { - "epoch": 0.06, - "grad_norm": 1.5904477416527278, - "learning_rate": 1.9965157908380934e-05, - "loss": 0.2625, - "step": 1097 - }, - { - "epoch": 0.06, - "grad_norm": 1.0619560169492481, - "learning_rate": 1.996502040895012e-05, - "loss": 0.2489, - "step": 1098 - }, - { - "epoch": 0.06, - "grad_norm": 1.1477414335171585, - "learning_rate": 1.9964882639217564e-05, - "loss": 0.2561, - "step": 1099 - }, - { - "epoch": 0.06, - "grad_norm": 1.2290065958182672, - "learning_rate": 1.9964744599187006e-05, - "loss": 0.2423, - "step": 1100 - }, - { - "epoch": 0.06, - "grad_norm": 1.0360763227358927, - "learning_rate": 1.9964606288862187e-05, - "loss": 0.2513, - "step": 1101 - }, - { - "epoch": 0.06, - "grad_norm": 1.0421375048836479, - "learning_rate": 1.9964467708246858e-05, - "loss": 0.2321, - "step": 1102 - }, - { - "epoch": 0.06, - "grad_norm": 1.1176614561117166, - "learning_rate": 1.9964328857344782e-05, - "loss": 0.2346, - "step": 1103 - }, - { - "epoch": 0.06, - "grad_norm": 1.1864842864031386, - "learning_rate": 1.9964189736159724e-05, - "loss": 0.2379, - "step": 1104 - }, - { - "epoch": 0.06, - "grad_norm": 0.9928267143128622, - "learning_rate": 1.9964050344695454e-05, - "loss": 0.2471, - "step": 1105 - }, - { - "epoch": 0.06, - "grad_norm": 1.2547197785017616, - "learning_rate": 1.9963910682955755e-05, - "loss": 0.2796, - "step": 1106 - }, - { - "epoch": 0.06, - "grad_norm": 1.108318882422375, - "learning_rate": 1.9963770750944416e-05, - "loss": 0.2731, - "step": 1107 - }, - { - "epoch": 0.06, - "grad_norm": 1.3881418626610218, - "learning_rate": 1.9963630548665234e-05, - "loss": 0.2565, - "step": 1108 - }, - { - "epoch": 0.06, - "grad_norm": 1.0514992110398136, - "learning_rate": 1.9963490076122013e-05, - "loss": 0.2517, - "step": 1109 - }, - { - "epoch": 0.06, - "grad_norm": 1.0593218515735172, - "learning_rate": 1.9963349333318557e-05, - "loss": 0.2534, - "step": 1110 - }, - { - "epoch": 0.06, - "grad_norm": 1.118821356451065, - "learning_rate": 1.996320832025869e-05, - "loss": 0.2423, - "step": 1111 - }, - { - "epoch": 0.06, - "grad_norm": 1.303669999694777, - "learning_rate": 1.9963067036946234e-05, - "loss": 0.2487, - "step": 1112 - }, - { - "epoch": 0.06, - "grad_norm": 0.9298685030587246, - "learning_rate": 1.996292548338502e-05, - "loss": 0.255, - "step": 1113 - }, - { - "epoch": 0.06, - "grad_norm": 1.1029318356890085, - "learning_rate": 1.9962783659578893e-05, - "loss": 0.2728, - "step": 1114 - }, - { - "epoch": 0.06, - "grad_norm": 1.0197604969095162, - "learning_rate": 1.9962641565531694e-05, - "loss": 0.2539, - "step": 1115 - }, - { - "epoch": 0.06, - "grad_norm": 1.4415415643758744, - "learning_rate": 1.9962499201247278e-05, - "loss": 0.2628, - "step": 1116 - }, - { - "epoch": 0.06, - "grad_norm": 1.083338942951384, - "learning_rate": 1.996235656672951e-05, - "loss": 0.2233, - "step": 1117 - }, - { - "epoch": 0.06, - "grad_norm": 2.1160686080415427, - "learning_rate": 1.9962213661982258e-05, - "loss": 0.2383, - "step": 1118 - }, - { - "epoch": 0.06, - "grad_norm": 1.3011374875117159, - "learning_rate": 1.99620704870094e-05, - "loss": 0.2546, - "step": 1119 - }, - { - "epoch": 0.06, - "grad_norm": 1.2663679967062615, - "learning_rate": 1.9961927041814818e-05, - "loss": 0.2764, - "step": 1120 - }, - { - "epoch": 0.06, - "grad_norm": 4.08364336655249, - "learning_rate": 1.99617833264024e-05, - "loss": 0.2728, - "step": 1121 - }, - { - "epoch": 0.06, - "grad_norm": 1.1913191519966877, - "learning_rate": 1.9961639340776044e-05, - "loss": 0.2662, - "step": 1122 - }, - { - "epoch": 0.06, - "grad_norm": 1.337006850164463, - "learning_rate": 1.9961495084939663e-05, - "loss": 0.2617, - "step": 1123 - }, - { - "epoch": 0.06, - "grad_norm": 0.9984066446142431, - "learning_rate": 1.9961350558897165e-05, - "loss": 0.2304, - "step": 1124 - }, - { - "epoch": 0.06, - "grad_norm": 1.1472444721091424, - "learning_rate": 1.996120576265247e-05, - "loss": 0.2582, - "step": 1125 - }, - { - "epoch": 0.06, - "grad_norm": 1.0510611344781502, - "learning_rate": 1.99610606962095e-05, - "loss": 0.2411, - "step": 1126 - }, - { - "epoch": 0.06, - "grad_norm": 1.1413980782504782, - "learning_rate": 1.99609153595722e-05, - "loss": 0.2501, - "step": 1127 - }, - { - "epoch": 0.06, - "grad_norm": 0.9585331468733518, - "learning_rate": 1.9960769752744508e-05, - "loss": 0.2266, - "step": 1128 - }, - { - "epoch": 0.06, - "grad_norm": 1.0384063965422534, - "learning_rate": 1.9960623875730376e-05, - "loss": 0.2433, - "step": 1129 - }, - { - "epoch": 0.06, - "grad_norm": 0.9887923427738782, - "learning_rate": 1.9960477728533756e-05, - "loss": 0.242, - "step": 1130 - }, - { - "epoch": 0.06, - "grad_norm": 1.2281455484801602, - "learning_rate": 1.9960331311158618e-05, - "loss": 0.2478, - "step": 1131 - }, - { - "epoch": 0.06, - "grad_norm": 1.1420820808451997, - "learning_rate": 1.9960184623608927e-05, - "loss": 0.2313, - "step": 1132 - }, - { - "epoch": 0.06, - "grad_norm": 1.1291272243771713, - "learning_rate": 1.996003766588867e-05, - "loss": 0.2435, - "step": 1133 - }, - { - "epoch": 0.06, - "grad_norm": 1.291842843117987, - "learning_rate": 1.9959890438001826e-05, - "loss": 0.2505, - "step": 1134 - }, - { - "epoch": 0.06, - "grad_norm": 1.0968369271525031, - "learning_rate": 1.9959742939952393e-05, - "loss": 0.2585, - "step": 1135 - }, - { - "epoch": 0.06, - "grad_norm": 1.1421824643955416, - "learning_rate": 1.9959595171744367e-05, - "loss": 0.2725, - "step": 1136 - }, - { - "epoch": 0.06, - "grad_norm": 0.9766039383545265, - "learning_rate": 1.9959447133381762e-05, - "loss": 0.2533, - "step": 1137 - }, - { - "epoch": 0.06, - "grad_norm": 0.9345271213518466, - "learning_rate": 1.995929882486859e-05, - "loss": 0.241, - "step": 1138 - }, - { - "epoch": 0.06, - "grad_norm": 1.187322374200805, - "learning_rate": 1.9959150246208876e-05, - "loss": 0.2347, - "step": 1139 - }, - { - "epoch": 0.06, - "grad_norm": 1.0532689372785176, - "learning_rate": 1.995900139740665e-05, - "loss": 0.2603, - "step": 1140 - }, - { - "epoch": 0.06, - "grad_norm": 1.5080362513166135, - "learning_rate": 1.9958852278465946e-05, - "loss": 0.2028, - "step": 1141 - }, - { - "epoch": 0.06, - "grad_norm": 1.1438427953969899, - "learning_rate": 1.9958702889390813e-05, - "loss": 0.2538, - "step": 1142 - }, - { - "epoch": 0.06, - "grad_norm": 1.3178650171897648, - "learning_rate": 1.99585532301853e-05, - "loss": 0.2687, - "step": 1143 - }, - { - "epoch": 0.06, - "grad_norm": 1.0837016465004479, - "learning_rate": 1.9958403300853472e-05, - "loss": 0.2732, - "step": 1144 - }, - { - "epoch": 0.06, - "grad_norm": 1.151385755054102, - "learning_rate": 1.9958253101399388e-05, - "loss": 0.2318, - "step": 1145 - }, - { - "epoch": 0.06, - "grad_norm": 1.0980774803415834, - "learning_rate": 1.9958102631827127e-05, - "loss": 0.2398, - "step": 1146 - }, - { - "epoch": 0.06, - "grad_norm": 1.0518834716128322, - "learning_rate": 1.995795189214077e-05, - "loss": 0.2567, - "step": 1147 - }, - { - "epoch": 0.06, - "grad_norm": 0.9774327625199531, - "learning_rate": 1.9957800882344406e-05, - "loss": 0.2912, - "step": 1148 - }, - { - "epoch": 0.06, - "grad_norm": 0.9752705364199556, - "learning_rate": 1.9957649602442132e-05, - "loss": 0.2637, - "step": 1149 - }, - { - "epoch": 0.06, - "grad_norm": 1.22461622064381, - "learning_rate": 1.9957498052438046e-05, - "loss": 0.2682, - "step": 1150 - }, - { - "epoch": 0.06, - "grad_norm": 0.9363047948353536, - "learning_rate": 1.9957346232336264e-05, - "loss": 0.2608, - "step": 1151 - }, - { - "epoch": 0.06, - "grad_norm": 1.0908039664636027, - "learning_rate": 1.9957194142140907e-05, - "loss": 0.2642, - "step": 1152 - }, - { - "epoch": 0.06, - "grad_norm": 1.298250529492443, - "learning_rate": 1.9957041781856094e-05, - "loss": 0.2428, - "step": 1153 - }, - { - "epoch": 0.06, - "grad_norm": 0.8691045152210589, - "learning_rate": 1.995688915148596e-05, - "loss": 0.252, - "step": 1154 - }, - { - "epoch": 0.06, - "grad_norm": 1.5017543963557427, - "learning_rate": 1.9956736251034643e-05, - "loss": 0.2807, - "step": 1155 - }, - { - "epoch": 0.06, - "grad_norm": 1.425121321973548, - "learning_rate": 1.99565830805063e-05, - "loss": 0.2524, - "step": 1156 - }, - { - "epoch": 0.06, - "grad_norm": 1.3588406022460622, - "learning_rate": 1.995642963990507e-05, - "loss": 0.2631, - "step": 1157 - }, - { - "epoch": 0.06, - "grad_norm": 2.012820438420422, - "learning_rate": 1.995627592923513e-05, - "loss": 0.2583, - "step": 1158 - }, - { - "epoch": 0.06, - "grad_norm": 1.0423757816916552, - "learning_rate": 1.995612194850064e-05, - "loss": 0.231, - "step": 1159 - }, - { - "epoch": 0.06, - "grad_norm": 1.1506603528204253, - "learning_rate": 1.9955967697705782e-05, - "loss": 0.2758, - "step": 1160 - }, - { - "epoch": 0.06, - "grad_norm": 1.120430771480744, - "learning_rate": 1.9955813176854735e-05, - "loss": 0.2574, - "step": 1161 - }, - { - "epoch": 0.06, - "grad_norm": 1.0850296741610588, - "learning_rate": 1.9955658385951695e-05, - "loss": 0.2612, - "step": 1162 - }, - { - "epoch": 0.06, - "grad_norm": 1.1501268535631535, - "learning_rate": 1.9955503325000857e-05, - "loss": 0.26, - "step": 1163 - }, - { - "epoch": 0.06, - "grad_norm": 1.2212111778915191, - "learning_rate": 1.9955347994006432e-05, - "loss": 0.2737, - "step": 1164 - }, - { - "epoch": 0.06, - "grad_norm": 1.4028969597594856, - "learning_rate": 1.9955192392972628e-05, - "loss": 0.2324, - "step": 1165 - }, - { - "epoch": 0.06, - "grad_norm": 1.3102571784603596, - "learning_rate": 1.995503652190367e-05, - "loss": 0.2674, - "step": 1166 - }, - { - "epoch": 0.06, - "grad_norm": 0.9897987458802096, - "learning_rate": 1.9954880380803787e-05, - "loss": 0.2423, - "step": 1167 - }, - { - "epoch": 0.06, - "grad_norm": 0.9207734644130189, - "learning_rate": 1.9954723969677206e-05, - "loss": 0.2306, - "step": 1168 - }, - { - "epoch": 0.06, - "grad_norm": 1.1295488992326463, - "learning_rate": 1.9954567288528174e-05, - "loss": 0.2418, - "step": 1169 - }, - { - "epoch": 0.06, - "grad_norm": 1.2685761887350202, - "learning_rate": 1.9954410337360945e-05, - "loss": 0.2357, - "step": 1170 - }, - { - "epoch": 0.06, - "grad_norm": 0.9861608199592001, - "learning_rate": 1.9954253116179772e-05, - "loss": 0.2426, - "step": 1171 - }, - { - "epoch": 0.06, - "grad_norm": 1.0280451362848153, - "learning_rate": 1.9954095624988924e-05, - "loss": 0.2339, - "step": 1172 - }, - { - "epoch": 0.06, - "grad_norm": 1.0937539428083092, - "learning_rate": 1.9953937863792666e-05, - "loss": 0.2407, - "step": 1173 - }, - { - "epoch": 0.06, - "grad_norm": 1.5164023930909465, - "learning_rate": 1.9953779832595285e-05, - "loss": 0.271, - "step": 1174 - }, - { - "epoch": 0.06, - "grad_norm": 1.0085274941712419, - "learning_rate": 1.995362153140106e-05, - "loss": 0.2501, - "step": 1175 - }, - { - "epoch": 0.06, - "grad_norm": 1.0757387120719115, - "learning_rate": 1.9953462960214293e-05, - "loss": 0.2299, - "step": 1176 - }, - { - "epoch": 0.06, - "grad_norm": 1.562846048479879, - "learning_rate": 1.995330411903928e-05, - "loss": 0.2913, - "step": 1177 - }, - { - "epoch": 0.06, - "grad_norm": 1.670780598619956, - "learning_rate": 1.995314500788033e-05, - "loss": 0.2559, - "step": 1178 - }, - { - "epoch": 0.06, - "grad_norm": 1.0999296700893124, - "learning_rate": 1.9952985626741757e-05, - "loss": 0.2413, - "step": 1179 - }, - { - "epoch": 0.06, - "grad_norm": 1.2266840303048263, - "learning_rate": 1.995282597562789e-05, - "loss": 0.2299, - "step": 1180 - }, - { - "epoch": 0.06, - "grad_norm": 1.12382611130755, - "learning_rate": 1.9952666054543053e-05, - "loss": 0.2557, - "step": 1181 - }, - { - "epoch": 0.06, - "grad_norm": 0.8852569009332808, - "learning_rate": 1.995250586349159e-05, - "loss": 0.2543, - "step": 1182 - }, - { - "epoch": 0.06, - "grad_norm": 0.9698648745098383, - "learning_rate": 1.9952345402477844e-05, - "loss": 0.2397, - "step": 1183 - }, - { - "epoch": 0.06, - "grad_norm": 1.0757537964497887, - "learning_rate": 1.9952184671506167e-05, - "loss": 0.2867, - "step": 1184 - }, - { - "epoch": 0.06, - "grad_norm": 2.047570354258211, - "learning_rate": 1.9952023670580915e-05, - "loss": 0.2373, - "step": 1185 - }, - { - "epoch": 0.06, - "grad_norm": 1.3164713777096313, - "learning_rate": 1.9951862399706463e-05, - "loss": 0.2745, - "step": 1186 - }, - { - "epoch": 0.06, - "grad_norm": 1.254144229233726, - "learning_rate": 1.995170085888718e-05, - "loss": 0.2355, - "step": 1187 - }, - { - "epoch": 0.06, - "grad_norm": 1.0187453637220607, - "learning_rate": 1.9951539048127447e-05, - "loss": 0.2368, - "step": 1188 - }, - { - "epoch": 0.06, - "grad_norm": 0.985069297112168, - "learning_rate": 1.9951376967431658e-05, - "loss": 0.2376, - "step": 1189 - }, - { - "epoch": 0.06, - "grad_norm": 1.1871515603562464, - "learning_rate": 1.9951214616804203e-05, - "loss": 0.2481, - "step": 1190 - }, - { - "epoch": 0.06, - "grad_norm": 1.1725021607449277, - "learning_rate": 1.9951051996249492e-05, - "loss": 0.2618, - "step": 1191 - }, - { - "epoch": 0.06, - "grad_norm": 1.0558607145471346, - "learning_rate": 1.9950889105771937e-05, - "loss": 0.2689, - "step": 1192 - }, - { - "epoch": 0.06, - "grad_norm": 0.8759610787740514, - "learning_rate": 1.995072594537595e-05, - "loss": 0.258, - "step": 1193 - }, - { - "epoch": 0.06, - "grad_norm": 1.0177805167346698, - "learning_rate": 1.9950562515065957e-05, - "loss": 0.2662, - "step": 1194 - }, - { - "epoch": 0.06, - "grad_norm": 3.21473439251119, - "learning_rate": 1.9950398814846396e-05, - "loss": 0.2414, - "step": 1195 - }, - { - "epoch": 0.06, - "grad_norm": 1.0701077252548996, - "learning_rate": 1.9950234844721707e-05, - "loss": 0.2294, - "step": 1196 - }, - { - "epoch": 0.06, - "grad_norm": 1.304305485860522, - "learning_rate": 1.9950070604696332e-05, - "loss": 0.2575, - "step": 1197 - }, - { - "epoch": 0.06, - "grad_norm": 1.4979470689692984, - "learning_rate": 1.994990609477473e-05, - "loss": 0.2103, - "step": 1198 - }, - { - "epoch": 0.06, - "grad_norm": 1.881741820714266, - "learning_rate": 1.994974131496137e-05, - "loss": 0.2499, - "step": 1199 - }, - { - "epoch": 0.06, - "grad_norm": 1.4797426522385466, - "learning_rate": 1.9949576265260708e-05, - "loss": 0.2396, - "step": 1200 - }, - { - "epoch": 0.06, - "grad_norm": 1.3031654108455368, - "learning_rate": 1.994941094567723e-05, - "loss": 0.2342, - "step": 1201 - }, - { - "epoch": 0.06, - "grad_norm": 1.5757734679789772, - "learning_rate": 1.9949245356215415e-05, - "loss": 0.2518, - "step": 1202 - }, - { - "epoch": 0.06, - "grad_norm": 1.2205859526102094, - "learning_rate": 1.9949079496879763e-05, - "loss": 0.2384, - "step": 1203 - }, - { - "epoch": 0.06, - "grad_norm": 1.3153888292338682, - "learning_rate": 1.9948913367674766e-05, - "loss": 0.2674, - "step": 1204 - }, - { - "epoch": 0.06, - "grad_norm": 1.118783886908271, - "learning_rate": 1.994874696860493e-05, - "loss": 0.2526, - "step": 1205 - }, - { - "epoch": 0.06, - "grad_norm": 1.51742792693216, - "learning_rate": 1.9948580299674774e-05, - "loss": 0.2485, - "step": 1206 - }, - { - "epoch": 0.06, - "grad_norm": 1.0380364345723683, - "learning_rate": 1.994841336088881e-05, - "loss": 0.2567, - "step": 1207 - }, - { - "epoch": 0.06, - "grad_norm": 1.1678784841168433, - "learning_rate": 1.9948246152251576e-05, - "loss": 0.2431, - "step": 1208 - }, - { - "epoch": 0.06, - "grad_norm": 1.3366080159215235, - "learning_rate": 1.9948078673767604e-05, - "loss": 0.2875, - "step": 1209 - }, - { - "epoch": 0.06, - "grad_norm": 1.63814443418371, - "learning_rate": 1.9947910925441435e-05, - "loss": 0.2752, - "step": 1210 - }, - { - "epoch": 0.06, - "grad_norm": 1.2934593570974755, - "learning_rate": 1.9947742907277617e-05, - "loss": 0.2762, - "step": 1211 - }, - { - "epoch": 0.06, - "grad_norm": 1.348419593558008, - "learning_rate": 1.9947574619280713e-05, - "loss": 0.2296, - "step": 1212 - }, - { - "epoch": 0.06, - "grad_norm": 0.9139456450488086, - "learning_rate": 1.9947406061455287e-05, - "loss": 0.2519, - "step": 1213 - }, - { - "epoch": 0.06, - "grad_norm": 0.9703076073163888, - "learning_rate": 1.994723723380591e-05, - "loss": 0.2621, - "step": 1214 - }, - { - "epoch": 0.06, - "grad_norm": 1.3465532732447363, - "learning_rate": 1.994706813633716e-05, - "loss": 0.2558, - "step": 1215 - }, - { - "epoch": 0.06, - "grad_norm": 1.284682342016494, - "learning_rate": 1.9946898769053625e-05, - "loss": 0.2363, - "step": 1216 - }, - { - "epoch": 0.06, - "grad_norm": 1.1236941900852206, - "learning_rate": 1.9946729131959902e-05, - "loss": 0.2692, - "step": 1217 - }, - { - "epoch": 0.06, - "grad_norm": 1.3346742349950482, - "learning_rate": 1.9946559225060585e-05, - "loss": 0.2413, - "step": 1218 - }, - { - "epoch": 0.06, - "grad_norm": 1.2810878909267052, - "learning_rate": 1.9946389048360288e-05, - "loss": 0.2243, - "step": 1219 - }, - { - "epoch": 0.06, - "grad_norm": 1.2926565224995998, - "learning_rate": 1.9946218601863626e-05, - "loss": 0.2635, - "step": 1220 - }, - { - "epoch": 0.06, - "grad_norm": 1.3114698308075319, - "learning_rate": 1.9946047885575224e-05, - "loss": 0.2714, - "step": 1221 - }, - { - "epoch": 0.06, - "grad_norm": 3.1178514386861154, - "learning_rate": 1.9945876899499712e-05, - "loss": 0.2486, - "step": 1222 - }, - { - "epoch": 0.06, - "grad_norm": 1.114371429943034, - "learning_rate": 1.9945705643641727e-05, - "loss": 0.2325, - "step": 1223 - }, - { - "epoch": 0.06, - "grad_norm": 1.4891443815170118, - "learning_rate": 1.9945534118005913e-05, - "loss": 0.2446, - "step": 1224 - }, - { - "epoch": 0.06, - "grad_norm": 1.2381114684264194, - "learning_rate": 1.9945362322596926e-05, - "loss": 0.2606, - "step": 1225 - }, - { - "epoch": 0.06, - "grad_norm": 1.1075696539896358, - "learning_rate": 1.9945190257419424e-05, - "loss": 0.2496, - "step": 1226 - }, - { - "epoch": 0.06, - "grad_norm": 1.2353450742088958, - "learning_rate": 1.9945017922478076e-05, - "loss": 0.2721, - "step": 1227 - }, - { - "epoch": 0.06, - "grad_norm": 1.225499079059848, - "learning_rate": 1.994484531777755e-05, - "loss": 0.2375, - "step": 1228 - }, - { - "epoch": 0.06, - "grad_norm": 1.3675113150352771, - "learning_rate": 1.994467244332254e-05, - "loss": 0.2543, - "step": 1229 - }, - { - "epoch": 0.06, - "grad_norm": 1.483126597664391, - "learning_rate": 1.9944499299117724e-05, - "loss": 0.2659, - "step": 1230 - }, - { - "epoch": 0.06, - "grad_norm": 1.4329247239553178, - "learning_rate": 1.9944325885167807e-05, - "loss": 0.2541, - "step": 1231 - }, - { - "epoch": 0.06, - "grad_norm": 1.1956395859727627, - "learning_rate": 1.9944152201477483e-05, - "loss": 0.2331, - "step": 1232 - }, - { - "epoch": 0.06, - "grad_norm": 1.0789307326922348, - "learning_rate": 1.994397824805147e-05, - "loss": 0.2247, - "step": 1233 - }, - { - "epoch": 0.06, - "grad_norm": 1.2295779281449082, - "learning_rate": 1.9943804024894486e-05, - "loss": 0.2609, - "step": 1234 - }, - { - "epoch": 0.06, - "grad_norm": 1.1590034791235422, - "learning_rate": 1.994362953201126e-05, - "loss": 0.2666, - "step": 1235 - }, - { - "epoch": 0.06, - "grad_norm": 1.0564212568535971, - "learning_rate": 1.9943454769406515e-05, - "loss": 0.2412, - "step": 1236 - }, - { - "epoch": 0.06, - "grad_norm": 1.5078051441569378, - "learning_rate": 1.9943279737085003e-05, - "loss": 0.2494, - "step": 1237 - }, - { - "epoch": 0.06, - "grad_norm": 1.216524313899798, - "learning_rate": 1.9943104435051466e-05, - "loss": 0.2622, - "step": 1238 - }, - { - "epoch": 0.06, - "grad_norm": 1.5140052363285328, - "learning_rate": 1.994292886331066e-05, - "loss": 0.2611, - "step": 1239 - }, - { - "epoch": 0.06, - "grad_norm": 1.2042848747214754, - "learning_rate": 1.994275302186734e-05, - "loss": 0.2446, - "step": 1240 - }, - { - "epoch": 0.06, - "grad_norm": 1.2721633003954307, - "learning_rate": 1.994257691072629e-05, - "loss": 0.252, - "step": 1241 - }, - { - "epoch": 0.06, - "grad_norm": 1.1749578657632214, - "learning_rate": 1.994240052989228e-05, - "loss": 0.2317, - "step": 1242 - }, - { - "epoch": 0.06, - "grad_norm": 1.2140607390698037, - "learning_rate": 1.994222387937009e-05, - "loss": 0.2465, - "step": 1243 - }, - { - "epoch": 0.06, - "grad_norm": 1.2318297370878173, - "learning_rate": 1.9942046959164516e-05, - "loss": 0.2477, - "step": 1244 - }, - { - "epoch": 0.06, - "grad_norm": 1.0822095702256438, - "learning_rate": 1.994186976928036e-05, - "loss": 0.2563, - "step": 1245 - }, - { - "epoch": 0.06, - "grad_norm": 1.2863539137070017, - "learning_rate": 1.9941692309722422e-05, - "loss": 0.2482, - "step": 1246 - }, - { - "epoch": 0.06, - "grad_norm": 0.9198636083932812, - "learning_rate": 1.994151458049552e-05, - "loss": 0.2487, - "step": 1247 - }, - { - "epoch": 0.06, - "grad_norm": 1.0390473686422546, - "learning_rate": 1.9941336581604474e-05, - "loss": 0.2725, - "step": 1248 - }, - { - "epoch": 0.06, - "grad_norm": 0.9622996627966769, - "learning_rate": 1.994115831305411e-05, - "loss": 0.2508, - "step": 1249 - }, - { - "epoch": 0.06, - "grad_norm": 1.1572359650318538, - "learning_rate": 1.9940979774849264e-05, - "loss": 0.2259, - "step": 1250 - }, - { - "epoch": 0.06, - "grad_norm": 1.0814302644289238, - "learning_rate": 1.9940800966994785e-05, - "loss": 0.2188, - "step": 1251 - }, - { - "epoch": 0.06, - "grad_norm": 1.1509504124606063, - "learning_rate": 1.9940621889495516e-05, - "loss": 0.2391, - "step": 1252 - }, - { - "epoch": 0.06, - "grad_norm": 1.1232777139387475, - "learning_rate": 1.9940442542356315e-05, - "loss": 0.2296, - "step": 1253 - }, - { - "epoch": 0.06, - "grad_norm": 1.1470671685669127, - "learning_rate": 1.9940262925582052e-05, - "loss": 0.2782, - "step": 1254 - }, - { - "epoch": 0.06, - "grad_norm": 1.4878598163784487, - "learning_rate": 1.9940083039177594e-05, - "loss": 0.2556, - "step": 1255 - }, - { - "epoch": 0.06, - "grad_norm": 2.419684202169054, - "learning_rate": 1.993990288314782e-05, - "loss": 0.2509, - "step": 1256 - }, - { - "epoch": 0.06, - "grad_norm": 1.0335157524606504, - "learning_rate": 1.9939722457497625e-05, - "loss": 0.2177, - "step": 1257 - }, - { - "epoch": 0.06, - "grad_norm": 1.1965718148733537, - "learning_rate": 1.993954176223189e-05, - "loss": 0.268, - "step": 1258 - }, - { - "epoch": 0.06, - "grad_norm": 1.0653255609824037, - "learning_rate": 1.9939360797355527e-05, - "loss": 0.2258, - "step": 1259 - }, - { - "epoch": 0.06, - "grad_norm": 1.0147997894620475, - "learning_rate": 1.9939179562873437e-05, - "loss": 0.2402, - "step": 1260 - }, - { - "epoch": 0.06, - "grad_norm": 1.6590099622321888, - "learning_rate": 1.9938998058790546e-05, - "loss": 0.241, - "step": 1261 - }, - { - "epoch": 0.06, - "grad_norm": 1.9272334105915023, - "learning_rate": 1.9938816285111768e-05, - "loss": 0.2633, - "step": 1262 - }, - { - "epoch": 0.06, - "grad_norm": 0.9696781916820303, - "learning_rate": 1.9938634241842037e-05, - "loss": 0.2746, - "step": 1263 - }, - { - "epoch": 0.06, - "grad_norm": 1.075879149837418, - "learning_rate": 1.993845192898629e-05, - "loss": 0.2481, - "step": 1264 - }, - { - "epoch": 0.06, - "grad_norm": 1.050216008594986, - "learning_rate": 1.9938269346549473e-05, - "loss": 0.2562, - "step": 1265 - }, - { - "epoch": 0.06, - "grad_norm": 1.0603398741984067, - "learning_rate": 1.993808649453654e-05, - "loss": 0.2201, - "step": 1266 - }, - { - "epoch": 0.06, - "grad_norm": 1.1759766212394545, - "learning_rate": 1.993790337295245e-05, - "loss": 0.2338, - "step": 1267 - }, - { - "epoch": 0.06, - "grad_norm": 1.3895706761656446, - "learning_rate": 1.993771998180217e-05, - "loss": 0.2334, - "step": 1268 - }, - { - "epoch": 0.06, - "grad_norm": 1.007624866161374, - "learning_rate": 1.9937536321090673e-05, - "loss": 0.2513, - "step": 1269 - }, - { - "epoch": 0.06, - "grad_norm": 2.468659081059867, - "learning_rate": 1.9937352390822945e-05, - "loss": 0.2439, - "step": 1270 - }, - { - "epoch": 0.06, - "grad_norm": 1.126146886783081, - "learning_rate": 1.993716819100397e-05, - "loss": 0.2822, - "step": 1271 - }, - { - "epoch": 0.06, - "grad_norm": 0.9947140946371513, - "learning_rate": 1.9936983721638745e-05, - "loss": 0.2452, - "step": 1272 - }, - { - "epoch": 0.06, - "grad_norm": 1.022459747505446, - "learning_rate": 1.9936798982732274e-05, - "loss": 0.2747, - "step": 1273 - }, - { - "epoch": 0.06, - "grad_norm": 1.0124242493363043, - "learning_rate": 1.9936613974289575e-05, - "loss": 0.2578, - "step": 1274 - }, - { - "epoch": 0.06, - "grad_norm": 1.7048341660404465, - "learning_rate": 1.9936428696315656e-05, - "loss": 0.2352, - "step": 1275 - }, - { - "epoch": 0.06, - "grad_norm": 1.0907511105393048, - "learning_rate": 1.993624314881555e-05, - "loss": 0.265, - "step": 1276 - }, - { - "epoch": 0.06, - "grad_norm": 1.08612995999494, - "learning_rate": 1.9936057331794284e-05, - "loss": 0.2474, - "step": 1277 - }, - { - "epoch": 0.06, - "grad_norm": 1.0279733992904583, - "learning_rate": 1.9935871245256907e-05, - "loss": 0.2478, - "step": 1278 - }, - { - "epoch": 0.07, - "grad_norm": 1.3072664522397148, - "learning_rate": 1.9935684889208455e-05, - "loss": 0.244, - "step": 1279 - }, - { - "epoch": 0.07, - "grad_norm": 0.9953628542258297, - "learning_rate": 1.9935498263653994e-05, - "loss": 0.2457, - "step": 1280 - }, - { - "epoch": 0.07, - "grad_norm": 0.8782714215203581, - "learning_rate": 1.993531136859858e-05, - "loss": 0.2319, - "step": 1281 - }, - { - "epoch": 0.07, - "grad_norm": 1.0964019610858486, - "learning_rate": 1.9935124204047283e-05, - "loss": 0.2362, - "step": 1282 - }, - { - "epoch": 0.07, - "grad_norm": 1.1545713070308408, - "learning_rate": 1.9934936770005184e-05, - "loss": 0.2196, - "step": 1283 - }, - { - "epoch": 0.07, - "grad_norm": 2.7314670261178784, - "learning_rate": 1.993474906647736e-05, - "loss": 0.2384, - "step": 1284 - }, - { - "epoch": 0.07, - "grad_norm": 1.0590502228754426, - "learning_rate": 1.993456109346891e-05, - "loss": 0.2324, - "step": 1285 - }, - { - "epoch": 0.07, - "grad_norm": 1.0498547537114502, - "learning_rate": 1.9934372850984925e-05, - "loss": 0.2585, - "step": 1286 - }, - { - "epoch": 0.07, - "grad_norm": 1.1848581217189293, - "learning_rate": 1.9934184339030517e-05, - "loss": 0.2664, - "step": 1287 - }, - { - "epoch": 0.07, - "grad_norm": 2.072168224985916, - "learning_rate": 1.99339955576108e-05, - "loss": 0.2768, - "step": 1288 - }, - { - "epoch": 0.07, - "grad_norm": 1.3175290108699542, - "learning_rate": 1.993380650673089e-05, - "loss": 0.2889, - "step": 1289 - }, - { - "epoch": 0.07, - "grad_norm": 0.9720547354553507, - "learning_rate": 1.9933617186395917e-05, - "loss": 0.2231, - "step": 1290 - }, - { - "epoch": 0.07, - "grad_norm": 1.373607960688602, - "learning_rate": 1.993342759661102e-05, - "loss": 0.2704, - "step": 1291 - }, - { - "epoch": 0.07, - "grad_norm": 1.3201599152256596, - "learning_rate": 1.9933237737381336e-05, - "loss": 0.2411, - "step": 1292 - }, - { - "epoch": 0.07, - "grad_norm": 1.090537077664811, - "learning_rate": 1.993304760871202e-05, - "loss": 0.2439, - "step": 1293 - }, - { - "epoch": 0.07, - "grad_norm": 1.2988085961178586, - "learning_rate": 1.993285721060822e-05, - "loss": 0.2552, - "step": 1294 - }, - { - "epoch": 0.07, - "grad_norm": 1.12247096396022, - "learning_rate": 1.9932666543075113e-05, - "loss": 0.2555, - "step": 1295 - }, - { - "epoch": 0.07, - "grad_norm": 1.474840116009373, - "learning_rate": 1.9932475606117865e-05, - "loss": 0.2577, - "step": 1296 - }, - { - "epoch": 0.07, - "grad_norm": 1.5661222125417564, - "learning_rate": 1.9932284399741653e-05, - "loss": 0.2386, - "step": 1297 - }, - { - "epoch": 0.07, - "grad_norm": 1.3560953139033376, - "learning_rate": 1.9932092923951667e-05, - "loss": 0.2418, - "step": 1298 - }, - { - "epoch": 0.07, - "grad_norm": 1.1428106762278663, - "learning_rate": 1.99319011787531e-05, - "loss": 0.2286, - "step": 1299 - }, - { - "epoch": 0.07, - "grad_norm": 1.6241541244210773, - "learning_rate": 1.993170916415115e-05, - "loss": 0.2679, - "step": 1300 - }, - { - "epoch": 0.07, - "grad_norm": 1.1611966906057736, - "learning_rate": 1.993151688015103e-05, - "loss": 0.2799, - "step": 1301 - }, - { - "epoch": 0.07, - "grad_norm": 1.06454836864526, - "learning_rate": 1.993132432675795e-05, - "loss": 0.224, - "step": 1302 - }, - { - "epoch": 0.07, - "grad_norm": 1.4358338142793268, - "learning_rate": 1.993113150397714e-05, - "loss": 0.2535, - "step": 1303 - }, - { - "epoch": 0.07, - "grad_norm": 1.1185775618889204, - "learning_rate": 1.993093841181383e-05, - "loss": 0.2704, - "step": 1304 - }, - { - "epoch": 0.07, - "grad_norm": 0.8895921291367405, - "learning_rate": 1.993074505027325e-05, - "loss": 0.2508, - "step": 1305 - }, - { - "epoch": 0.07, - "grad_norm": 0.9817015060406692, - "learning_rate": 1.9930551419360653e-05, - "loss": 0.2346, - "step": 1306 - }, - { - "epoch": 0.07, - "grad_norm": 2.9722902810607787, - "learning_rate": 1.9930357519081286e-05, - "loss": 0.2384, - "step": 1307 - }, - { - "epoch": 0.07, - "grad_norm": 1.250573129564375, - "learning_rate": 1.993016334944041e-05, - "loss": 0.2486, - "step": 1308 - }, - { - "epoch": 0.07, - "grad_norm": 1.1498427550944075, - "learning_rate": 1.9929968910443294e-05, - "loss": 0.2396, - "step": 1309 - }, - { - "epoch": 0.07, - "grad_norm": 1.0067228344309367, - "learning_rate": 1.992977420209521e-05, - "loss": 0.2576, - "step": 1310 - }, - { - "epoch": 0.07, - "grad_norm": 1.2293727519204154, - "learning_rate": 1.9929579224401436e-05, - "loss": 0.2297, - "step": 1311 - }, - { - "epoch": 0.07, - "grad_norm": 1.5442224539939846, - "learning_rate": 1.992938397736727e-05, - "loss": 0.2366, - "step": 1312 - }, - { - "epoch": 0.07, - "grad_norm": 1.3856656195152042, - "learning_rate": 1.9929188460998e-05, - "loss": 0.2534, - "step": 1313 - }, - { - "epoch": 0.07, - "grad_norm": 1.0568400725170732, - "learning_rate": 1.992899267529893e-05, - "loss": 0.2555, - "step": 1314 - }, - { - "epoch": 0.07, - "grad_norm": 1.3167276569636372, - "learning_rate": 1.9928796620275377e-05, - "loss": 0.2651, - "step": 1315 - }, - { - "epoch": 0.07, - "grad_norm": 1.5992657211036883, - "learning_rate": 1.9928600295932655e-05, - "loss": 0.2644, - "step": 1316 - }, - { - "epoch": 0.07, - "grad_norm": 1.2137062647139218, - "learning_rate": 1.992840370227609e-05, - "loss": 0.2527, - "step": 1317 - }, - { - "epoch": 0.07, - "grad_norm": 1.1268154425617247, - "learning_rate": 1.992820683931101e-05, - "loss": 0.2532, - "step": 1318 - }, - { - "epoch": 0.07, - "grad_norm": 4.83134570568081, - "learning_rate": 1.992800970704276e-05, - "loss": 0.2679, - "step": 1319 - }, - { - "epoch": 0.07, - "grad_norm": 1.2780231232806325, - "learning_rate": 1.9927812305476685e-05, - "loss": 0.2515, - "step": 1320 - }, - { - "epoch": 0.07, - "grad_norm": 0.9825579835071978, - "learning_rate": 1.9927614634618142e-05, - "loss": 0.2491, - "step": 1321 - }, - { - "epoch": 0.07, - "grad_norm": 1.0270718091675821, - "learning_rate": 1.9927416694472493e-05, - "loss": 0.2598, - "step": 1322 - }, - { - "epoch": 0.07, - "grad_norm": 0.8900160649600508, - "learning_rate": 1.9927218485045103e-05, - "loss": 0.2355, - "step": 1323 - }, - { - "epoch": 0.07, - "grad_norm": 1.4166876201090004, - "learning_rate": 1.992702000634135e-05, - "loss": 0.2445, - "step": 1324 - }, - { - "epoch": 0.07, - "grad_norm": 0.9133842813076238, - "learning_rate": 1.9926821258366622e-05, - "loss": 0.2533, - "step": 1325 - }, - { - "epoch": 0.07, - "grad_norm": 1.1537899021882343, - "learning_rate": 1.9926622241126306e-05, - "loss": 0.2521, - "step": 1326 - }, - { - "epoch": 0.07, - "grad_norm": 1.237239241656995, - "learning_rate": 1.99264229546258e-05, - "loss": 0.2769, - "step": 1327 - }, - { - "epoch": 0.07, - "grad_norm": 0.9828066104197971, - "learning_rate": 1.992622339887051e-05, - "loss": 0.2688, - "step": 1328 - }, - { - "epoch": 0.07, - "grad_norm": 0.9653344834028247, - "learning_rate": 1.992602357386585e-05, - "loss": 0.2613, - "step": 1329 - }, - { - "epoch": 0.07, - "grad_norm": 1.1792847900807026, - "learning_rate": 1.9925823479617242e-05, - "loss": 0.2516, - "step": 1330 - }, - { - "epoch": 0.07, - "grad_norm": 0.9584914944637997, - "learning_rate": 1.9925623116130105e-05, - "loss": 0.2474, - "step": 1331 - }, - { - "epoch": 0.07, - "grad_norm": 1.1377392233359997, - "learning_rate": 1.9925422483409886e-05, - "loss": 0.2352, - "step": 1332 - }, - { - "epoch": 0.07, - "grad_norm": 1.2468284883092173, - "learning_rate": 1.992522158146202e-05, - "loss": 0.241, - "step": 1333 - }, - { - "epoch": 0.07, - "grad_norm": 1.3281788584533107, - "learning_rate": 1.9925020410291963e-05, - "loss": 0.2457, - "step": 1334 - }, - { - "epoch": 0.07, - "grad_norm": 1.3139206487167114, - "learning_rate": 1.992481896990516e-05, - "loss": 0.2725, - "step": 1335 - }, - { - "epoch": 0.07, - "grad_norm": 1.1181599807995584, - "learning_rate": 1.9924617260307088e-05, - "loss": 0.2496, - "step": 1336 - }, - { - "epoch": 0.07, - "grad_norm": 1.095318282833729, - "learning_rate": 1.9924415281503204e-05, - "loss": 0.2635, - "step": 1337 - }, - { - "epoch": 0.07, - "grad_norm": 1.3388267621206682, - "learning_rate": 1.9924213033499e-05, - "loss": 0.254, - "step": 1338 - }, - { - "epoch": 0.07, - "grad_norm": 1.2570545418236239, - "learning_rate": 1.9924010516299956e-05, - "loss": 0.2343, - "step": 1339 - }, - { - "epoch": 0.07, - "grad_norm": 1.8375633870522983, - "learning_rate": 1.9923807729911567e-05, - "loss": 0.2485, - "step": 1340 - }, - { - "epoch": 0.07, - "grad_norm": 1.2080640469217854, - "learning_rate": 1.9923604674339336e-05, - "loss": 0.2477, - "step": 1341 - }, - { - "epoch": 0.07, - "grad_norm": 1.3859634411593529, - "learning_rate": 1.9923401349588762e-05, - "loss": 0.2701, - "step": 1342 - }, - { - "epoch": 0.07, - "grad_norm": 1.2651163070331262, - "learning_rate": 1.9923197755665368e-05, - "loss": 0.237, - "step": 1343 - }, - { - "epoch": 0.07, - "grad_norm": 1.212850146390197, - "learning_rate": 1.9922993892574676e-05, - "loss": 0.2636, - "step": 1344 - }, - { - "epoch": 0.07, - "grad_norm": 1.5856155338509108, - "learning_rate": 1.9922789760322213e-05, - "loss": 0.2572, - "step": 1345 - }, - { - "epoch": 0.07, - "grad_norm": 1.3322458710609553, - "learning_rate": 1.9922585358913515e-05, - "loss": 0.2538, - "step": 1346 - }, - { - "epoch": 0.07, - "grad_norm": 1.1453763616701749, - "learning_rate": 1.992238068835413e-05, - "loss": 0.2127, - "step": 1347 - }, - { - "epoch": 0.07, - "grad_norm": 1.4671152149814002, - "learning_rate": 1.9922175748649612e-05, - "loss": 0.2467, - "step": 1348 - }, - { - "epoch": 0.07, - "grad_norm": 0.96344423338482, - "learning_rate": 1.9921970539805513e-05, - "loss": 0.2449, - "step": 1349 - }, - { - "epoch": 0.07, - "grad_norm": 1.3164717303997437, - "learning_rate": 1.9921765061827405e-05, - "loss": 0.2527, - "step": 1350 - }, - { - "epoch": 0.07, - "grad_norm": 1.1273036629419297, - "learning_rate": 1.992155931472086e-05, - "loss": 0.2335, - "step": 1351 - }, - { - "epoch": 0.07, - "grad_norm": 1.3271998637630977, - "learning_rate": 1.9921353298491453e-05, - "loss": 0.233, - "step": 1352 - }, - { - "epoch": 0.07, - "grad_norm": 1.083621975560061, - "learning_rate": 1.9921147013144782e-05, - "loss": 0.26, - "step": 1353 - }, - { - "epoch": 0.07, - "grad_norm": 1.1429372463643215, - "learning_rate": 1.9920940458686434e-05, - "loss": 0.2267, - "step": 1354 - }, - { - "epoch": 0.07, - "grad_norm": 1.3916262889864177, - "learning_rate": 1.992073363512202e-05, - "loss": 0.2518, - "step": 1355 - }, - { - "epoch": 0.07, - "grad_norm": 1.5964270594798335, - "learning_rate": 1.9920526542457143e-05, - "loss": 0.2459, - "step": 1356 - }, - { - "epoch": 0.07, - "grad_norm": 1.1391799157648104, - "learning_rate": 1.9920319180697422e-05, - "loss": 0.2825, - "step": 1357 - }, - { - "epoch": 0.07, - "grad_norm": 1.1659412582972448, - "learning_rate": 1.9920111549848486e-05, - "loss": 0.2372, - "step": 1358 - }, - { - "epoch": 0.07, - "grad_norm": 1.4113772014832484, - "learning_rate": 1.991990364991596e-05, - "loss": 0.2844, - "step": 1359 - }, - { - "epoch": 0.07, - "grad_norm": 1.2647682628121506, - "learning_rate": 1.991969548090549e-05, - "loss": 0.2307, - "step": 1360 - }, - { - "epoch": 0.07, - "grad_norm": 1.2706451265074896, - "learning_rate": 1.9919487042822722e-05, - "loss": 0.2504, - "step": 1361 - }, - { - "epoch": 0.07, - "grad_norm": 1.3043546137160325, - "learning_rate": 1.9919278335673306e-05, - "loss": 0.2653, - "step": 1362 - }, - { - "epoch": 0.07, - "grad_norm": 1.526648857603099, - "learning_rate": 1.9919069359462906e-05, - "loss": 0.2352, - "step": 1363 - }, - { - "epoch": 0.07, - "grad_norm": 0.9163989199581128, - "learning_rate": 1.9918860114197186e-05, - "loss": 0.2623, - "step": 1364 - }, - { - "epoch": 0.07, - "grad_norm": 1.2210780161581907, - "learning_rate": 1.9918650599881828e-05, - "loss": 0.2463, - "step": 1365 - }, - { - "epoch": 0.07, - "grad_norm": 1.2037239438393557, - "learning_rate": 1.9918440816522514e-05, - "loss": 0.2681, - "step": 1366 - }, - { - "epoch": 0.07, - "grad_norm": 1.2841089377890307, - "learning_rate": 1.991823076412493e-05, - "loss": 0.25, - "step": 1367 - }, - { - "epoch": 0.07, - "grad_norm": 1.2099082139270674, - "learning_rate": 1.9918020442694773e-05, - "loss": 0.2374, - "step": 1368 - }, - { - "epoch": 0.07, - "grad_norm": 1.5448627661147787, - "learning_rate": 1.9917809852237754e-05, - "loss": 0.2389, - "step": 1369 - }, - { - "epoch": 0.07, - "grad_norm": 1.3484794901039256, - "learning_rate": 1.9917598992759587e-05, - "loss": 0.2431, - "step": 1370 - }, - { - "epoch": 0.07, - "grad_norm": 1.3696444473216656, - "learning_rate": 1.9917387864265983e-05, - "loss": 0.2469, - "step": 1371 - }, - { - "epoch": 0.07, - "grad_norm": 1.1573883129027844, - "learning_rate": 1.9917176466762673e-05, - "loss": 0.2794, - "step": 1372 - }, - { - "epoch": 0.07, - "grad_norm": 0.9521797753529773, - "learning_rate": 1.991696480025539e-05, - "loss": 0.2609, - "step": 1373 - }, - { - "epoch": 0.07, - "grad_norm": 0.9701217511012938, - "learning_rate": 1.991675286474988e-05, - "loss": 0.2452, - "step": 1374 - }, - { - "epoch": 0.07, - "grad_norm": 1.2255199211334986, - "learning_rate": 1.9916540660251887e-05, - "loss": 0.2657, - "step": 1375 - }, - { - "epoch": 0.07, - "grad_norm": 1.2802572417504723, - "learning_rate": 1.9916328186767168e-05, - "loss": 0.2507, - "step": 1376 - }, - { - "epoch": 0.07, - "grad_norm": 1.2780752266677757, - "learning_rate": 1.9916115444301488e-05, - "loss": 0.2475, - "step": 1377 - }, - { - "epoch": 0.07, - "grad_norm": 1.1917746479811409, - "learning_rate": 1.9915902432860615e-05, - "loss": 0.2778, - "step": 1378 - }, - { - "epoch": 0.07, - "grad_norm": 1.1122713843909182, - "learning_rate": 1.9915689152450328e-05, - "loss": 0.257, - "step": 1379 - }, - { - "epoch": 0.07, - "grad_norm": 1.372238893365632, - "learning_rate": 1.9915475603076414e-05, - "loss": 0.2623, - "step": 1380 - }, - { - "epoch": 0.07, - "grad_norm": 1.3604773532215377, - "learning_rate": 1.9915261784744664e-05, - "loss": 0.2317, - "step": 1381 - }, - { - "epoch": 0.07, - "grad_norm": 1.200145793587868, - "learning_rate": 1.9915047697460878e-05, - "loss": 0.2662, - "step": 1382 - }, - { - "epoch": 0.07, - "grad_norm": 1.0781961411781062, - "learning_rate": 1.9914833341230863e-05, - "loss": 0.2551, - "step": 1383 - }, - { - "epoch": 0.07, - "grad_norm": 0.9837813270598623, - "learning_rate": 1.9914618716060437e-05, - "loss": 0.259, - "step": 1384 - }, - { - "epoch": 0.07, - "grad_norm": 1.1325359219128366, - "learning_rate": 1.9914403821955414e-05, - "loss": 0.2386, - "step": 1385 - }, - { - "epoch": 0.07, - "grad_norm": 1.0389864770057453, - "learning_rate": 1.9914188658921628e-05, - "loss": 0.236, - "step": 1386 - }, - { - "epoch": 0.07, - "grad_norm": 1.3836282909574422, - "learning_rate": 1.9913973226964917e-05, - "loss": 0.2576, - "step": 1387 - }, - { - "epoch": 0.07, - "grad_norm": 1.0113155814154398, - "learning_rate": 1.991375752609112e-05, - "loss": 0.2626, - "step": 1388 - }, - { - "epoch": 0.07, - "grad_norm": 1.0919641731577683, - "learning_rate": 1.991354155630609e-05, - "loss": 0.2562, - "step": 1389 - }, - { - "epoch": 0.07, - "grad_norm": 1.4916606982981637, - "learning_rate": 1.9913325317615684e-05, - "loss": 0.2225, - "step": 1390 - }, - { - "epoch": 0.07, - "grad_norm": 1.269965773751471, - "learning_rate": 1.9913108810025776e-05, - "loss": 0.2719, - "step": 1391 - }, - { - "epoch": 0.07, - "grad_norm": 2.1927198996440365, - "learning_rate": 1.9912892033542225e-05, - "loss": 0.2448, - "step": 1392 - }, - { - "epoch": 0.07, - "grad_norm": 1.0247277433743283, - "learning_rate": 1.991267498817092e-05, - "loss": 0.2456, - "step": 1393 - }, - { - "epoch": 0.07, - "grad_norm": 0.999133874275022, - "learning_rate": 1.9912457673917745e-05, - "loss": 0.2409, - "step": 1394 - }, - { - "epoch": 0.07, - "grad_norm": 1.160387438965137, - "learning_rate": 1.9912240090788595e-05, - "loss": 0.2619, - "step": 1395 - }, - { - "epoch": 0.07, - "grad_norm": 1.208793083153623, - "learning_rate": 1.9912022238789374e-05, - "loss": 0.242, - "step": 1396 - }, - { - "epoch": 0.07, - "grad_norm": 1.8843530643442636, - "learning_rate": 1.991180411792599e-05, - "loss": 0.2707, - "step": 1397 - }, - { - "epoch": 0.07, - "grad_norm": 1.1037363784292973, - "learning_rate": 1.9911585728204362e-05, - "loss": 0.2335, - "step": 1398 - }, - { - "epoch": 0.07, - "grad_norm": 0.954476589084465, - "learning_rate": 1.9911367069630408e-05, - "loss": 0.2311, - "step": 1399 - }, - { - "epoch": 0.07, - "grad_norm": 1.0015353333121517, - "learning_rate": 1.991114814221006e-05, - "loss": 0.2325, - "step": 1400 - }, - { - "epoch": 0.07, - "grad_norm": 0.9980972684885128, - "learning_rate": 1.9910928945949264e-05, - "loss": 0.2394, - "step": 1401 - }, - { - "epoch": 0.07, - "grad_norm": 1.7186452306443267, - "learning_rate": 1.9910709480853957e-05, - "loss": 0.2424, - "step": 1402 - }, - { - "epoch": 0.07, - "grad_norm": 0.9696052813958719, - "learning_rate": 1.9910489746930097e-05, - "loss": 0.26, - "step": 1403 - }, - { - "epoch": 0.07, - "grad_norm": 0.7690335657622046, - "learning_rate": 1.9910269744183645e-05, - "loss": 0.2129, - "step": 1404 - }, - { - "epoch": 0.07, - "grad_norm": 1.1975760811845586, - "learning_rate": 1.9910049472620564e-05, - "loss": 0.2611, - "step": 1405 - }, - { - "epoch": 0.07, - "grad_norm": 0.8503953892746059, - "learning_rate": 1.990982893224683e-05, - "loss": 0.2246, - "step": 1406 - }, - { - "epoch": 0.07, - "grad_norm": 0.847893913853286, - "learning_rate": 1.990960812306843e-05, - "loss": 0.2194, - "step": 1407 - }, - { - "epoch": 0.07, - "grad_norm": 0.9348019252601966, - "learning_rate": 1.990938704509135e-05, - "loss": 0.221, - "step": 1408 - }, - { - "epoch": 0.07, - "grad_norm": 0.9761686610221613, - "learning_rate": 1.9909165698321585e-05, - "loss": 0.243, - "step": 1409 - }, - { - "epoch": 0.07, - "grad_norm": 1.1722248378675564, - "learning_rate": 1.990894408276514e-05, - "loss": 0.2547, - "step": 1410 - }, - { - "epoch": 0.07, - "grad_norm": 0.8053092328818088, - "learning_rate": 1.9908722198428027e-05, - "loss": 0.227, - "step": 1411 - }, - { - "epoch": 0.07, - "grad_norm": 0.9665903219728691, - "learning_rate": 1.9908500045316264e-05, - "loss": 0.2451, - "step": 1412 - }, - { - "epoch": 0.07, - "grad_norm": 0.953084935420593, - "learning_rate": 1.9908277623435878e-05, - "loss": 0.2318, - "step": 1413 - }, - { - "epoch": 0.07, - "grad_norm": 1.5767742424384938, - "learning_rate": 1.9908054932792903e-05, - "loss": 0.2865, - "step": 1414 - }, - { - "epoch": 0.07, - "grad_norm": 1.0425077282991027, - "learning_rate": 1.9907831973393377e-05, - "loss": 0.2654, - "step": 1415 - }, - { - "epoch": 0.07, - "grad_norm": 0.9923093106371809, - "learning_rate": 1.9907608745243356e-05, - "loss": 0.2429, - "step": 1416 - }, - { - "epoch": 0.07, - "grad_norm": 1.104082202213175, - "learning_rate": 1.9907385248348882e-05, - "loss": 0.2542, - "step": 1417 - }, - { - "epoch": 0.07, - "grad_norm": 1.1450188139943347, - "learning_rate": 1.990716148271602e-05, - "loss": 0.2285, - "step": 1418 - }, - { - "epoch": 0.07, - "grad_norm": 1.0882818233930411, - "learning_rate": 1.990693744835085e-05, - "loss": 0.2331, - "step": 1419 - }, - { - "epoch": 0.07, - "grad_norm": 1.0449521038261802, - "learning_rate": 1.990671314525944e-05, - "loss": 0.2442, - "step": 1420 - }, - { - "epoch": 0.07, - "grad_norm": 1.1106530955831952, - "learning_rate": 1.9906488573447875e-05, - "loss": 0.2584, - "step": 1421 - }, - { - "epoch": 0.07, - "grad_norm": 1.0370821761153255, - "learning_rate": 1.990626373292225e-05, - "loss": 0.2335, - "step": 1422 - }, - { - "epoch": 0.07, - "grad_norm": 1.13841965133708, - "learning_rate": 1.9906038623688658e-05, - "loss": 0.2388, - "step": 1423 - }, - { - "epoch": 0.07, - "grad_norm": 0.9337248189403204, - "learning_rate": 1.9905813245753214e-05, - "loss": 0.253, - "step": 1424 - }, - { - "epoch": 0.07, - "grad_norm": 0.8239707311358239, - "learning_rate": 1.9905587599122022e-05, - "loss": 0.2417, - "step": 1425 - }, - { - "epoch": 0.07, - "grad_norm": 0.8948997842459009, - "learning_rate": 1.990536168380121e-05, - "loss": 0.2574, - "step": 1426 - }, - { - "epoch": 0.07, - "grad_norm": 2.344858167440341, - "learning_rate": 1.9905135499796903e-05, - "loss": 0.2557, - "step": 1427 - }, - { - "epoch": 0.07, - "grad_norm": 0.8315357711462754, - "learning_rate": 1.9904909047115233e-05, - "loss": 0.267, - "step": 1428 - }, - { - "epoch": 0.07, - "grad_norm": 0.7994238898963966, - "learning_rate": 1.990468232576235e-05, - "loss": 0.2377, - "step": 1429 - }, - { - "epoch": 0.07, - "grad_norm": 0.8653614776519574, - "learning_rate": 1.9904455335744395e-05, - "loss": 0.2432, - "step": 1430 - }, - { - "epoch": 0.07, - "grad_norm": 0.8806812047415478, - "learning_rate": 1.990422807706753e-05, - "loss": 0.2571, - "step": 1431 - }, - { - "epoch": 0.07, - "grad_norm": 0.9214838257317811, - "learning_rate": 1.990400054973792e-05, - "loss": 0.2043, - "step": 1432 - }, - { - "epoch": 0.07, - "grad_norm": 1.8867702484440099, - "learning_rate": 1.9903772753761736e-05, - "loss": 0.2562, - "step": 1433 - }, - { - "epoch": 0.07, - "grad_norm": 0.8546974694213872, - "learning_rate": 1.990354468914516e-05, - "loss": 0.2408, - "step": 1434 - }, - { - "epoch": 0.07, - "grad_norm": 1.1024048985123924, - "learning_rate": 1.990331635589437e-05, - "loss": 0.2714, - "step": 1435 - }, - { - "epoch": 0.07, - "grad_norm": 1.1446317404952169, - "learning_rate": 1.9903087754015567e-05, - "loss": 0.2347, - "step": 1436 - }, - { - "epoch": 0.07, - "grad_norm": 0.919739139874813, - "learning_rate": 1.9902858883514948e-05, - "loss": 0.2527, - "step": 1437 - }, - { - "epoch": 0.07, - "grad_norm": 1.435959400311835, - "learning_rate": 1.990262974439872e-05, - "loss": 0.2464, - "step": 1438 - }, - { - "epoch": 0.07, - "grad_norm": 3.8152490838875965, - "learning_rate": 1.9902400336673107e-05, - "loss": 0.2828, - "step": 1439 - }, - { - "epoch": 0.07, - "grad_norm": 0.9967524541458044, - "learning_rate": 1.9902170660344323e-05, - "loss": 0.2598, - "step": 1440 - }, - { - "epoch": 0.07, - "grad_norm": 1.066795566885315, - "learning_rate": 1.99019407154186e-05, - "loss": 0.2262, - "step": 1441 - }, - { - "epoch": 0.07, - "grad_norm": 1.0092050775090406, - "learning_rate": 1.9901710501902177e-05, - "loss": 0.2339, - "step": 1442 - }, - { - "epoch": 0.07, - "grad_norm": 1.1680892173535489, - "learning_rate": 1.9901480019801297e-05, - "loss": 0.217, - "step": 1443 - }, - { - "epoch": 0.07, - "grad_norm": 0.9901721765525188, - "learning_rate": 1.990124926912221e-05, - "loss": 0.2208, - "step": 1444 - }, - { - "epoch": 0.07, - "grad_norm": 1.3413707759726372, - "learning_rate": 1.990101824987118e-05, - "loss": 0.2542, - "step": 1445 - }, - { - "epoch": 0.07, - "grad_norm": 1.0970427933144882, - "learning_rate": 1.9900786962054468e-05, - "loss": 0.2431, - "step": 1446 - }, - { - "epoch": 0.07, - "grad_norm": 1.1698370073558522, - "learning_rate": 1.9900555405678354e-05, - "loss": 0.264, - "step": 1447 - }, - { - "epoch": 0.07, - "grad_norm": 5.996959606051725, - "learning_rate": 1.990032358074911e-05, - "loss": 0.2658, - "step": 1448 - }, - { - "epoch": 0.07, - "grad_norm": 1.3875556188415172, - "learning_rate": 1.9900091487273035e-05, - "loss": 0.2672, - "step": 1449 - }, - { - "epoch": 0.07, - "grad_norm": 1.1253872775336855, - "learning_rate": 1.9899859125256417e-05, - "loss": 0.2545, - "step": 1450 - }, - { - "epoch": 0.07, - "grad_norm": 1.065186848280434, - "learning_rate": 1.989962649470556e-05, - "loss": 0.2541, - "step": 1451 - }, - { - "epoch": 0.07, - "grad_norm": 1.1268591743661116, - "learning_rate": 1.989939359562678e-05, - "loss": 0.2451, - "step": 1452 - }, - { - "epoch": 0.07, - "grad_norm": 1.2351628782733877, - "learning_rate": 1.9899160428026383e-05, - "loss": 0.2669, - "step": 1453 - }, - { - "epoch": 0.07, - "grad_norm": 1.2936651171469973, - "learning_rate": 1.9898926991910704e-05, - "loss": 0.2772, - "step": 1454 - }, - { - "epoch": 0.07, - "grad_norm": 1.2085038183262553, - "learning_rate": 1.989869328728607e-05, - "loss": 0.2427, - "step": 1455 - }, - { - "epoch": 0.07, - "grad_norm": 1.4147175272036487, - "learning_rate": 1.9898459314158825e-05, - "loss": 0.2632, - "step": 1456 - }, - { - "epoch": 0.07, - "grad_norm": 0.9223842251997182, - "learning_rate": 1.989822507253531e-05, - "loss": 0.221, - "step": 1457 - }, - { - "epoch": 0.07, - "grad_norm": 1.2893374006720397, - "learning_rate": 1.9897990562421882e-05, - "loss": 0.2401, - "step": 1458 - }, - { - "epoch": 0.07, - "grad_norm": 0.9799151191327158, - "learning_rate": 1.9897755783824897e-05, - "loss": 0.2362, - "step": 1459 - }, - { - "epoch": 0.07, - "grad_norm": 1.1257687249716646, - "learning_rate": 1.989752073675073e-05, - "loss": 0.2519, - "step": 1460 - }, - { - "epoch": 0.07, - "grad_norm": 1.0071942580571653, - "learning_rate": 1.9897285421205753e-05, - "loss": 0.2347, - "step": 1461 - }, - { - "epoch": 0.07, - "grad_norm": 1.127580590734185, - "learning_rate": 1.989704983719635e-05, - "loss": 0.2469, - "step": 1462 - }, - { - "epoch": 0.07, - "grad_norm": 7.490512732609794, - "learning_rate": 1.9896813984728915e-05, - "loss": 0.2626, - "step": 1463 - }, - { - "epoch": 0.07, - "grad_norm": 1.5208393177639197, - "learning_rate": 1.9896577863809836e-05, - "loss": 0.2562, - "step": 1464 - }, - { - "epoch": 0.07, - "grad_norm": 1.3154094170613637, - "learning_rate": 1.9896341474445526e-05, - "loss": 0.2369, - "step": 1465 - }, - { - "epoch": 0.07, - "grad_norm": 1.3400389440702367, - "learning_rate": 1.9896104816642393e-05, - "loss": 0.2435, - "step": 1466 - }, - { - "epoch": 0.07, - "grad_norm": 1.245380768826678, - "learning_rate": 1.989586789040686e-05, - "loss": 0.2342, - "step": 1467 - }, - { - "epoch": 0.07, - "grad_norm": 1.0622069292754814, - "learning_rate": 1.9895630695745353e-05, - "loss": 0.2514, - "step": 1468 - }, - { - "epoch": 0.07, - "grad_norm": 1.3739800782133331, - "learning_rate": 1.98953932326643e-05, - "loss": 0.2709, - "step": 1469 - }, - { - "epoch": 0.07, - "grad_norm": 1.2438019019651285, - "learning_rate": 1.9895155501170153e-05, - "loss": 0.224, - "step": 1470 - }, - { - "epoch": 0.07, - "grad_norm": 1.1099411604601386, - "learning_rate": 1.9894917501269346e-05, - "loss": 0.2386, - "step": 1471 - }, - { - "epoch": 0.07, - "grad_norm": 1.0999919296078493, - "learning_rate": 1.989467923296835e-05, - "loss": 0.2301, - "step": 1472 - }, - { - "epoch": 0.07, - "grad_norm": 0.8744198630330524, - "learning_rate": 1.9894440696273615e-05, - "loss": 0.2443, - "step": 1473 - }, - { - "epoch": 0.07, - "grad_norm": 1.462670102238129, - "learning_rate": 1.9894201891191624e-05, - "loss": 0.2574, - "step": 1474 - }, - { - "epoch": 0.08, - "grad_norm": 3.522143438867789, - "learning_rate": 1.9893962817728842e-05, - "loss": 0.2598, - "step": 1475 - }, - { - "epoch": 0.08, - "grad_norm": 2.66521405994223, - "learning_rate": 1.9893723475891762e-05, - "loss": 0.227, - "step": 1476 - }, - { - "epoch": 0.08, - "grad_norm": 1.030802909005997, - "learning_rate": 1.9893483865686875e-05, - "loss": 0.2425, - "step": 1477 - }, - { - "epoch": 0.08, - "grad_norm": 1.619965380448205, - "learning_rate": 1.989324398712068e-05, - "loss": 0.2383, - "step": 1478 - }, - { - "epoch": 0.08, - "grad_norm": 1.5818095937642507, - "learning_rate": 1.9893003840199677e-05, - "loss": 0.2516, - "step": 1479 - }, - { - "epoch": 0.08, - "grad_norm": 1.4259409101129912, - "learning_rate": 1.989276342493039e-05, - "loss": 0.2405, - "step": 1480 - }, - { - "epoch": 0.08, - "grad_norm": 0.9980110737147162, - "learning_rate": 1.989252274131934e-05, - "loss": 0.2323, - "step": 1481 - }, - { - "epoch": 0.08, - "grad_norm": 1.0821845943196076, - "learning_rate": 1.9892281789373047e-05, - "loss": 0.2628, - "step": 1482 - }, - { - "epoch": 0.08, - "grad_norm": 1.0011387530258766, - "learning_rate": 1.9892040569098054e-05, - "loss": 0.2427, - "step": 1483 - }, - { - "epoch": 0.08, - "grad_norm": 1.296208187020809, - "learning_rate": 1.98917990805009e-05, - "loss": 0.2274, - "step": 1484 - }, - { - "epoch": 0.08, - "grad_norm": 1.17330571312088, - "learning_rate": 1.989155732358814e-05, - "loss": 0.2519, - "step": 1485 - }, - { - "epoch": 0.08, - "grad_norm": 1.0333952943167788, - "learning_rate": 1.9891315298366327e-05, - "loss": 0.2308, - "step": 1486 - }, - { - "epoch": 0.08, - "grad_norm": 1.1104592349324152, - "learning_rate": 1.9891073004842026e-05, - "loss": 0.2546, - "step": 1487 - }, - { - "epoch": 0.08, - "grad_norm": 1.2835504367778445, - "learning_rate": 1.9890830443021814e-05, - "loss": 0.2398, - "step": 1488 - }, - { - "epoch": 0.08, - "grad_norm": 1.1519730418068233, - "learning_rate": 1.9890587612912268e-05, - "loss": 0.2599, - "step": 1489 - }, - { - "epoch": 0.08, - "grad_norm": 1.1704596933418903, - "learning_rate": 1.9890344514519974e-05, - "loss": 0.2392, - "step": 1490 - }, - { - "epoch": 0.08, - "grad_norm": 1.349986371882224, - "learning_rate": 1.9890101147851526e-05, - "loss": 0.2604, - "step": 1491 - }, - { - "epoch": 0.08, - "grad_norm": 1.097941104349231, - "learning_rate": 1.9889857512913523e-05, - "loss": 0.2556, - "step": 1492 - }, - { - "epoch": 0.08, - "grad_norm": 1.7466568104138178, - "learning_rate": 1.988961360971258e-05, - "loss": 0.2476, - "step": 1493 - }, - { - "epoch": 0.08, - "grad_norm": 1.1822046880072066, - "learning_rate": 1.988936943825531e-05, - "loss": 0.2595, - "step": 1494 - }, - { - "epoch": 0.08, - "grad_norm": 0.9970149311719365, - "learning_rate": 1.9889124998548332e-05, - "loss": 0.2517, - "step": 1495 - }, - { - "epoch": 0.08, - "grad_norm": 1.0518883109686619, - "learning_rate": 1.9888880290598282e-05, - "loss": 0.2647, - "step": 1496 - }, - { - "epoch": 0.08, - "grad_norm": 1.2107601881178482, - "learning_rate": 1.9888635314411797e-05, - "loss": 0.2647, - "step": 1497 - }, - { - "epoch": 0.08, - "grad_norm": 1.0645260886991201, - "learning_rate": 1.9888390069995516e-05, - "loss": 0.2764, - "step": 1498 - }, - { - "epoch": 0.08, - "grad_norm": 1.1982898262283437, - "learning_rate": 1.98881445573561e-05, - "loss": 0.243, - "step": 1499 - }, - { - "epoch": 0.08, - "grad_norm": 1.2710638079525047, - "learning_rate": 1.9887898776500203e-05, - "loss": 0.2552, - "step": 1500 - }, - { - "epoch": 0.08, - "grad_norm": 1.2871447436383632, - "learning_rate": 1.9887652727434492e-05, - "loss": 0.2446, - "step": 1501 - }, - { - "epoch": 0.08, - "grad_norm": 1.1434019027245008, - "learning_rate": 1.9887406410165644e-05, - "loss": 0.2352, - "step": 1502 - }, - { - "epoch": 0.08, - "grad_norm": 1.405194585504932, - "learning_rate": 1.988715982470034e-05, - "loss": 0.2275, - "step": 1503 - }, - { - "epoch": 0.08, - "grad_norm": 1.4582424939786653, - "learning_rate": 1.9886912971045263e-05, - "loss": 0.216, - "step": 1504 - }, - { - "epoch": 0.08, - "grad_norm": 1.7604856830727356, - "learning_rate": 1.9886665849207116e-05, - "loss": 0.268, - "step": 1505 - }, - { - "epoch": 0.08, - "grad_norm": 1.0954405919257795, - "learning_rate": 1.98864184591926e-05, - "loss": 0.2502, - "step": 1506 - }, - { - "epoch": 0.08, - "grad_norm": 1.226051267596389, - "learning_rate": 1.9886170801008423e-05, - "loss": 0.2505, - "step": 1507 - }, - { - "epoch": 0.08, - "grad_norm": 1.2075409268454134, - "learning_rate": 1.9885922874661308e-05, - "loss": 0.2461, - "step": 1508 - }, - { - "epoch": 0.08, - "grad_norm": 1.4044291962244422, - "learning_rate": 1.9885674680157974e-05, - "loss": 0.2331, - "step": 1509 - }, - { - "epoch": 0.08, - "grad_norm": 1.892375728058062, - "learning_rate": 1.9885426217505154e-05, - "loss": 0.2579, - "step": 1510 - }, - { - "epoch": 0.08, - "grad_norm": 1.0109769323742304, - "learning_rate": 1.9885177486709595e-05, - "loss": 0.2407, - "step": 1511 - }, - { - "epoch": 0.08, - "grad_norm": 1.2523333881157754, - "learning_rate": 1.988492848777803e-05, - "loss": 0.2368, - "step": 1512 - }, - { - "epoch": 0.08, - "grad_norm": 0.9684049169883828, - "learning_rate": 1.9884679220717232e-05, - "loss": 0.238, - "step": 1513 - }, - { - "epoch": 0.08, - "grad_norm": 0.985704737514277, - "learning_rate": 1.9884429685533947e-05, - "loss": 0.2292, - "step": 1514 - }, - { - "epoch": 0.08, - "grad_norm": 1.5076127760722757, - "learning_rate": 1.9884179882234946e-05, - "loss": 0.2582, - "step": 1515 - }, - { - "epoch": 0.08, - "grad_norm": 1.1491759101287482, - "learning_rate": 1.988392981082701e-05, - "loss": 0.2619, - "step": 1516 - }, - { - "epoch": 0.08, - "grad_norm": 1.016203850638015, - "learning_rate": 1.9883679471316918e-05, - "loss": 0.2411, - "step": 1517 - }, - { - "epoch": 0.08, - "grad_norm": 1.237079163922459, - "learning_rate": 1.9883428863711463e-05, - "loss": 0.2125, - "step": 1518 - }, - { - "epoch": 0.08, - "grad_norm": 1.1480551559538474, - "learning_rate": 1.9883177988017444e-05, - "loss": 0.2565, - "step": 1519 - }, - { - "epoch": 0.08, - "grad_norm": 1.1236030361228728, - "learning_rate": 1.9882926844241662e-05, - "loss": 0.2568, - "step": 1520 - }, - { - "epoch": 0.08, - "grad_norm": 1.0466955366612953, - "learning_rate": 1.988267543239093e-05, - "loss": 0.2243, - "step": 1521 - }, - { - "epoch": 0.08, - "grad_norm": 1.1433635714952062, - "learning_rate": 1.988242375247207e-05, - "loss": 0.2695, - "step": 1522 - }, - { - "epoch": 0.08, - "grad_norm": 0.9521221469115966, - "learning_rate": 1.9882171804491905e-05, - "loss": 0.2548, - "step": 1523 - }, - { - "epoch": 0.08, - "grad_norm": 1.032790584538378, - "learning_rate": 1.9881919588457274e-05, - "loss": 0.219, - "step": 1524 - }, - { - "epoch": 0.08, - "grad_norm": 1.1202926836453848, - "learning_rate": 1.9881667104375018e-05, - "loss": 0.2256, - "step": 1525 - }, - { - "epoch": 0.08, - "grad_norm": 1.0362677561309366, - "learning_rate": 1.988141435225198e-05, - "loss": 0.2424, - "step": 1526 - }, - { - "epoch": 0.08, - "grad_norm": 1.108121046698207, - "learning_rate": 1.9881161332095024e-05, - "loss": 0.259, - "step": 1527 - }, - { - "epoch": 0.08, - "grad_norm": 1.106705413680249, - "learning_rate": 1.9880908043911004e-05, - "loss": 0.2494, - "step": 1528 - }, - { - "epoch": 0.08, - "grad_norm": 0.8919959929553708, - "learning_rate": 1.9880654487706797e-05, - "loss": 0.2384, - "step": 1529 - }, - { - "epoch": 0.08, - "grad_norm": 1.1403536176285372, - "learning_rate": 1.988040066348928e-05, - "loss": 0.2442, - "step": 1530 - }, - { - "epoch": 0.08, - "grad_norm": 1.0397301835981851, - "learning_rate": 1.9880146571265337e-05, - "loss": 0.2215, - "step": 1531 - }, - { - "epoch": 0.08, - "grad_norm": 1.0570264901588597, - "learning_rate": 1.987989221104186e-05, - "loss": 0.2186, - "step": 1532 - }, - { - "epoch": 0.08, - "grad_norm": 1.5454973258691305, - "learning_rate": 1.987963758282575e-05, - "loss": 0.2435, - "step": 1533 - }, - { - "epoch": 0.08, - "grad_norm": 1.00095769815178, - "learning_rate": 1.987938268662391e-05, - "loss": 0.2398, - "step": 1534 - }, - { - "epoch": 0.08, - "grad_norm": 1.0314401299992415, - "learning_rate": 1.9879127522443255e-05, - "loss": 0.2516, - "step": 1535 - }, - { - "epoch": 0.08, - "grad_norm": 1.0239176880004133, - "learning_rate": 1.9878872090290714e-05, - "loss": 0.2209, - "step": 1536 - }, - { - "epoch": 0.08, - "grad_norm": 2.1516319522863183, - "learning_rate": 1.98786163901732e-05, - "loss": 0.2179, - "step": 1537 - }, - { - "epoch": 0.08, - "grad_norm": 1.0899641412385346, - "learning_rate": 1.9878360422097666e-05, - "loss": 0.2629, - "step": 1538 - }, - { - "epoch": 0.08, - "grad_norm": 1.1161182738196964, - "learning_rate": 1.9878104186071047e-05, - "loss": 0.2669, - "step": 1539 - }, - { - "epoch": 0.08, - "grad_norm": 0.9331629099333119, - "learning_rate": 1.9877847682100294e-05, - "loss": 0.2226, - "step": 1540 - }, - { - "epoch": 0.08, - "grad_norm": 0.8936707439041887, - "learning_rate": 1.9877590910192362e-05, - "loss": 0.2725, - "step": 1541 - }, - { - "epoch": 0.08, - "grad_norm": 1.127775496155315, - "learning_rate": 1.987733387035422e-05, - "loss": 0.2563, - "step": 1542 - }, - { - "epoch": 0.08, - "grad_norm": 1.1260913141490958, - "learning_rate": 1.9877076562592844e-05, - "loss": 0.2322, - "step": 1543 - }, - { - "epoch": 0.08, - "grad_norm": 0.8980360219871005, - "learning_rate": 1.9876818986915202e-05, - "loss": 0.2206, - "step": 1544 - }, - { - "epoch": 0.08, - "grad_norm": 0.9796120458036948, - "learning_rate": 1.9876561143328287e-05, - "loss": 0.2516, - "step": 1545 - }, - { - "epoch": 0.08, - "grad_norm": 1.2629491204181673, - "learning_rate": 1.9876303031839094e-05, - "loss": 0.2083, - "step": 1546 - }, - { - "epoch": 0.08, - "grad_norm": 1.0874082802575349, - "learning_rate": 1.9876044652454627e-05, - "loss": 0.2649, - "step": 1547 - }, - { - "epoch": 0.08, - "grad_norm": 1.1914776664656248, - "learning_rate": 1.987578600518189e-05, - "loss": 0.2606, - "step": 1548 - }, - { - "epoch": 0.08, - "grad_norm": 0.9057171643487041, - "learning_rate": 1.9875527090027897e-05, - "loss": 0.2399, - "step": 1549 - }, - { - "epoch": 0.08, - "grad_norm": 1.0640987292525308, - "learning_rate": 1.9875267906999674e-05, - "loss": 0.2369, - "step": 1550 - }, - { - "epoch": 0.08, - "grad_norm": 1.1909353635895097, - "learning_rate": 1.987500845610425e-05, - "loss": 0.2415, - "step": 1551 - }, - { - "epoch": 0.08, - "grad_norm": 1.1777676002953559, - "learning_rate": 1.987474873734867e-05, - "loss": 0.2381, - "step": 1552 - }, - { - "epoch": 0.08, - "grad_norm": 1.0400464992950833, - "learning_rate": 1.987448875073997e-05, - "loss": 0.2272, - "step": 1553 - }, - { - "epoch": 0.08, - "grad_norm": 0.9159186299023466, - "learning_rate": 1.9874228496285203e-05, - "loss": 0.2326, - "step": 1554 - }, - { - "epoch": 0.08, - "grad_norm": 1.0776077764374352, - "learning_rate": 1.9873967973991432e-05, - "loss": 0.251, - "step": 1555 - }, - { - "epoch": 0.08, - "grad_norm": 1.4205363639530653, - "learning_rate": 1.987370718386572e-05, - "loss": 0.2427, - "step": 1556 - }, - { - "epoch": 0.08, - "grad_norm": 1.1243700994075487, - "learning_rate": 1.9873446125915145e-05, - "loss": 0.2768, - "step": 1557 - }, - { - "epoch": 0.08, - "grad_norm": 1.1028876890940438, - "learning_rate": 1.9873184800146785e-05, - "loss": 0.2489, - "step": 1558 - }, - { - "epoch": 0.08, - "grad_norm": 1.0126681453424553, - "learning_rate": 1.987292320656773e-05, - "loss": 0.2471, - "step": 1559 - }, - { - "epoch": 0.08, - "grad_norm": 0.9566294464744491, - "learning_rate": 1.9872661345185076e-05, - "loss": 0.2393, - "step": 1560 - }, - { - "epoch": 0.08, - "grad_norm": 1.3368115973005088, - "learning_rate": 1.9872399216005928e-05, - "loss": 0.2478, - "step": 1561 - }, - { - "epoch": 0.08, - "grad_norm": 1.592650857695837, - "learning_rate": 1.9872136819037388e-05, - "loss": 0.2437, - "step": 1562 - }, - { - "epoch": 0.08, - "grad_norm": 1.1068509765308028, - "learning_rate": 1.9871874154286585e-05, - "loss": 0.2383, - "step": 1563 - }, - { - "epoch": 0.08, - "grad_norm": 1.0576437487873738, - "learning_rate": 1.987161122176063e-05, - "loss": 0.2348, - "step": 1564 - }, - { - "epoch": 0.08, - "grad_norm": 1.0079938384705214, - "learning_rate": 1.9871348021466673e-05, - "loss": 0.2355, - "step": 1565 - }, - { - "epoch": 0.08, - "grad_norm": 1.4585402407253452, - "learning_rate": 1.9871084553411835e-05, - "loss": 0.2369, - "step": 1566 - }, - { - "epoch": 0.08, - "grad_norm": 1.0320403920202317, - "learning_rate": 1.9870820817603276e-05, - "loss": 0.2191, - "step": 1567 - }, - { - "epoch": 0.08, - "grad_norm": 1.0270957769308535, - "learning_rate": 1.9870556814048145e-05, - "loss": 0.2427, - "step": 1568 - }, - { - "epoch": 0.08, - "grad_norm": 1.2074542042203558, - "learning_rate": 1.9870292542753596e-05, - "loss": 0.2504, - "step": 1569 - }, - { - "epoch": 0.08, - "grad_norm": 0.942970746262455, - "learning_rate": 1.987002800372681e-05, - "loss": 0.2212, - "step": 1570 - }, - { - "epoch": 0.08, - "grad_norm": 0.9104940249554769, - "learning_rate": 1.9869763196974957e-05, - "loss": 0.2596, - "step": 1571 - }, - { - "epoch": 0.08, - "grad_norm": 0.8644721366170993, - "learning_rate": 1.986949812250522e-05, - "loss": 0.2258, - "step": 1572 - }, - { - "epoch": 0.08, - "grad_norm": 1.1098327646726607, - "learning_rate": 1.986923278032479e-05, - "loss": 0.2533, - "step": 1573 - }, - { - "epoch": 0.08, - "grad_norm": 1.4782949178290852, - "learning_rate": 1.986896717044086e-05, - "loss": 0.3009, - "step": 1574 - }, - { - "epoch": 0.08, - "grad_norm": 1.0298685807340697, - "learning_rate": 1.986870129286064e-05, - "loss": 0.2592, - "step": 1575 - }, - { - "epoch": 0.08, - "grad_norm": 1.2470618274705578, - "learning_rate": 1.986843514759134e-05, - "loss": 0.2426, - "step": 1576 - }, - { - "epoch": 0.08, - "grad_norm": 1.266134386190633, - "learning_rate": 1.986816873464018e-05, - "loss": 0.2532, - "step": 1577 - }, - { - "epoch": 0.08, - "grad_norm": 1.4067436040344086, - "learning_rate": 1.9867902054014382e-05, - "loss": 0.279, - "step": 1578 - }, - { - "epoch": 0.08, - "grad_norm": 0.8698043011661225, - "learning_rate": 1.986763510572119e-05, - "loss": 0.2399, - "step": 1579 - }, - { - "epoch": 0.08, - "grad_norm": 1.0193089125859338, - "learning_rate": 1.986736788976783e-05, - "loss": 0.2277, - "step": 1580 - }, - { - "epoch": 0.08, - "grad_norm": 1.17900717751552, - "learning_rate": 1.9867100406161563e-05, - "loss": 0.2609, - "step": 1581 - }, - { - "epoch": 0.08, - "grad_norm": 1.062165241963998, - "learning_rate": 1.986683265490964e-05, - "loss": 0.2747, - "step": 1582 - }, - { - "epoch": 0.08, - "grad_norm": 1.137051073249643, - "learning_rate": 1.9866564636019326e-05, - "loss": 0.2288, - "step": 1583 - }, - { - "epoch": 0.08, - "grad_norm": 1.1032651435013483, - "learning_rate": 1.9866296349497885e-05, - "loss": 0.2267, - "step": 1584 - }, - { - "epoch": 0.08, - "grad_norm": 0.9910774254352182, - "learning_rate": 1.9866027795352604e-05, - "loss": 0.2767, - "step": 1585 - }, - { - "epoch": 0.08, - "grad_norm": 1.0635540904541145, - "learning_rate": 1.986575897359076e-05, - "loss": 0.2722, - "step": 1586 - }, - { - "epoch": 0.08, - "grad_norm": 0.9511029033089041, - "learning_rate": 1.9865489884219643e-05, - "loss": 0.2284, - "step": 1587 - }, - { - "epoch": 0.08, - "grad_norm": 1.139944510002614, - "learning_rate": 1.9865220527246556e-05, - "loss": 0.2355, - "step": 1588 - }, - { - "epoch": 0.08, - "grad_norm": 1.3021053893170482, - "learning_rate": 1.986495090267881e-05, - "loss": 0.2657, - "step": 1589 - }, - { - "epoch": 0.08, - "grad_norm": 1.0402349598351108, - "learning_rate": 1.986468101052371e-05, - "loss": 0.2491, - "step": 1590 - }, - { - "epoch": 0.08, - "grad_norm": 0.9833347233482201, - "learning_rate": 1.986441085078858e-05, - "loss": 0.2408, - "step": 1591 - }, - { - "epoch": 0.08, - "grad_norm": 0.9702896415133163, - "learning_rate": 1.986414042348075e-05, - "loss": 0.2296, - "step": 1592 - }, - { - "epoch": 0.08, - "grad_norm": 1.8403703196326306, - "learning_rate": 1.9863869728607553e-05, - "loss": 0.2327, - "step": 1593 - }, - { - "epoch": 0.08, - "grad_norm": 0.9887059891575193, - "learning_rate": 1.986359876617633e-05, - "loss": 0.2416, - "step": 1594 - }, - { - "epoch": 0.08, - "grad_norm": 1.15694928653404, - "learning_rate": 1.9863327536194438e-05, - "loss": 0.2196, - "step": 1595 - }, - { - "epoch": 0.08, - "grad_norm": 1.0068578045610728, - "learning_rate": 1.9863056038669225e-05, - "loss": 0.2533, - "step": 1596 - }, - { - "epoch": 0.08, - "grad_norm": 0.9916793991803713, - "learning_rate": 1.9862784273608066e-05, - "loss": 0.2549, - "step": 1597 - }, - { - "epoch": 0.08, - "grad_norm": 1.0031710272512935, - "learning_rate": 1.986251224101832e-05, - "loss": 0.231, - "step": 1598 - }, - { - "epoch": 0.08, - "grad_norm": 1.021474671000375, - "learning_rate": 1.9862239940907377e-05, - "loss": 0.2491, - "step": 1599 - }, - { - "epoch": 0.08, - "grad_norm": 0.8310914145791256, - "learning_rate": 1.986196737328262e-05, - "loss": 0.2451, - "step": 1600 - }, - { - "epoch": 0.08, - "grad_norm": 0.8690234167987716, - "learning_rate": 1.9861694538151436e-05, - "loss": 0.2687, - "step": 1601 - }, - { - "epoch": 0.08, - "grad_norm": 1.1227562514141765, - "learning_rate": 1.9861421435521234e-05, - "loss": 0.2742, - "step": 1602 - }, - { - "epoch": 0.08, - "grad_norm": 0.9654518363415745, - "learning_rate": 1.9861148065399414e-05, - "loss": 0.2318, - "step": 1603 - }, - { - "epoch": 0.08, - "grad_norm": 1.1076305396603072, - "learning_rate": 1.98608744277934e-05, - "loss": 0.2421, - "step": 1604 - }, - { - "epoch": 0.08, - "grad_norm": 1.4547345595987675, - "learning_rate": 1.986060052271061e-05, - "loss": 0.2305, - "step": 1605 - }, - { - "epoch": 0.08, - "grad_norm": 0.8491699775291529, - "learning_rate": 1.9860326350158472e-05, - "loss": 0.225, - "step": 1606 - }, - { - "epoch": 0.08, - "grad_norm": 0.8319543451433087, - "learning_rate": 1.9860051910144426e-05, - "loss": 0.2435, - "step": 1607 - }, - { - "epoch": 0.08, - "grad_norm": 0.8100604809756528, - "learning_rate": 1.9859777202675915e-05, - "loss": 0.2111, - "step": 1608 - }, - { - "epoch": 0.08, - "grad_norm": 0.9527615886593352, - "learning_rate": 1.985950222776039e-05, - "loss": 0.2644, - "step": 1609 - }, - { - "epoch": 0.08, - "grad_norm": 1.0894915270026155, - "learning_rate": 1.9859226985405312e-05, - "loss": 0.2577, - "step": 1610 - }, - { - "epoch": 0.08, - "grad_norm": 1.215983338220503, - "learning_rate": 1.985895147561814e-05, - "loss": 0.2239, - "step": 1611 - }, - { - "epoch": 0.08, - "grad_norm": 0.9302355264268377, - "learning_rate": 1.985867569840636e-05, - "loss": 0.2541, - "step": 1612 - }, - { - "epoch": 0.08, - "grad_norm": 4.600810373609161, - "learning_rate": 1.985839965377744e-05, - "loss": 0.24, - "step": 1613 - }, - { - "epoch": 0.08, - "grad_norm": 1.2071590122653046, - "learning_rate": 1.9858123341738877e-05, - "loss": 0.255, - "step": 1614 - }, - { - "epoch": 0.08, - "grad_norm": 1.078164560236254, - "learning_rate": 1.9857846762298157e-05, - "loss": 0.2397, - "step": 1615 - }, - { - "epoch": 0.08, - "grad_norm": 1.2358085343430798, - "learning_rate": 1.9857569915462787e-05, - "loss": 0.2642, - "step": 1616 - }, - { - "epoch": 0.08, - "grad_norm": 1.136443380213396, - "learning_rate": 1.9857292801240278e-05, - "loss": 0.2348, - "step": 1617 - }, - { - "epoch": 0.08, - "grad_norm": 1.256005542958859, - "learning_rate": 1.985701541963814e-05, - "loss": 0.2263, - "step": 1618 - }, - { - "epoch": 0.08, - "grad_norm": 0.9861752701380436, - "learning_rate": 1.9856737770663908e-05, - "loss": 0.2171, - "step": 1619 - }, - { - "epoch": 0.08, - "grad_norm": 1.0699500859875848, - "learning_rate": 1.9856459854325108e-05, - "loss": 0.2312, - "step": 1620 - }, - { - "epoch": 0.08, - "grad_norm": 1.0277239815636015, - "learning_rate": 1.9856181670629272e-05, - "loss": 0.2221, - "step": 1621 - }, - { - "epoch": 0.08, - "grad_norm": 1.1622788998264484, - "learning_rate": 1.985590321958396e-05, - "loss": 0.2658, - "step": 1622 - }, - { - "epoch": 0.08, - "grad_norm": 0.9654394119932761, - "learning_rate": 1.985562450119671e-05, - "loss": 0.2127, - "step": 1623 - }, - { - "epoch": 0.08, - "grad_norm": 0.8818972933724379, - "learning_rate": 1.985534551547509e-05, - "loss": 0.2402, - "step": 1624 - }, - { - "epoch": 0.08, - "grad_norm": 1.0822541435500859, - "learning_rate": 1.9855066262426663e-05, - "loss": 0.245, - "step": 1625 - }, - { - "epoch": 0.08, - "grad_norm": 1.0792182766553684, - "learning_rate": 1.9854786742059012e-05, - "loss": 0.2229, - "step": 1626 - }, - { - "epoch": 0.08, - "grad_norm": 0.9164683009523013, - "learning_rate": 1.9854506954379714e-05, - "loss": 0.2442, - "step": 1627 - }, - { - "epoch": 0.08, - "grad_norm": 1.1155262220942679, - "learning_rate": 1.9854226899396356e-05, - "loss": 0.2657, - "step": 1628 - }, - { - "epoch": 0.08, - "grad_norm": 1.3021145519257342, - "learning_rate": 1.9853946577116536e-05, - "loss": 0.23, - "step": 1629 - }, - { - "epoch": 0.08, - "grad_norm": 1.1384442147020182, - "learning_rate": 1.985366598754786e-05, - "loss": 0.2619, - "step": 1630 - }, - { - "epoch": 0.08, - "grad_norm": 1.5198425729758447, - "learning_rate": 1.985338513069794e-05, - "loss": 0.2424, - "step": 1631 - }, - { - "epoch": 0.08, - "grad_norm": 1.2746960811741264, - "learning_rate": 1.9853104006574387e-05, - "loss": 0.2398, - "step": 1632 - }, - { - "epoch": 0.08, - "grad_norm": 0.9081894042267109, - "learning_rate": 1.9852822615184835e-05, - "loss": 0.2712, - "step": 1633 - }, - { - "epoch": 0.08, - "grad_norm": 0.9654287887774892, - "learning_rate": 1.9852540956536912e-05, - "loss": 0.2482, - "step": 1634 - }, - { - "epoch": 0.08, - "grad_norm": 0.9874173268909807, - "learning_rate": 1.985225903063826e-05, - "loss": 0.2628, - "step": 1635 - }, - { - "epoch": 0.08, - "grad_norm": 0.8587542362193802, - "learning_rate": 1.9851976837496522e-05, - "loss": 0.2567, - "step": 1636 - }, - { - "epoch": 0.08, - "grad_norm": 0.8944630553655413, - "learning_rate": 1.9851694377119358e-05, - "loss": 0.2313, - "step": 1637 - }, - { - "epoch": 0.08, - "grad_norm": 0.9580136632504814, - "learning_rate": 1.985141164951443e-05, - "loss": 0.2637, - "step": 1638 - }, - { - "epoch": 0.08, - "grad_norm": 1.0240173091734972, - "learning_rate": 1.98511286546894e-05, - "loss": 0.2588, - "step": 1639 - }, - { - "epoch": 0.08, - "grad_norm": 1.1632183346348985, - "learning_rate": 1.985084539265195e-05, - "loss": 0.2492, - "step": 1640 - }, - { - "epoch": 0.08, - "grad_norm": 1.2418168208945377, - "learning_rate": 1.9850561863409763e-05, - "loss": 0.245, - "step": 1641 - }, - { - "epoch": 0.08, - "grad_norm": 1.2675161459462643, - "learning_rate": 1.985027806697053e-05, - "loss": 0.2235, - "step": 1642 - }, - { - "epoch": 0.08, - "grad_norm": 1.4699273219751203, - "learning_rate": 1.984999400334195e-05, - "loss": 0.2787, - "step": 1643 - }, - { - "epoch": 0.08, - "grad_norm": 1.3416851354072679, - "learning_rate": 1.9849709672531724e-05, - "loss": 0.2144, - "step": 1644 - }, - { - "epoch": 0.08, - "grad_norm": 1.588177614535934, - "learning_rate": 1.9849425074547565e-05, - "loss": 0.2643, - "step": 1645 - }, - { - "epoch": 0.08, - "grad_norm": 1.4621532798948187, - "learning_rate": 1.98491402093972e-05, - "loss": 0.2434, - "step": 1646 - }, - { - "epoch": 0.08, - "grad_norm": 1.016718930677667, - "learning_rate": 1.9848855077088346e-05, - "loss": 0.245, - "step": 1647 - }, - { - "epoch": 0.08, - "grad_norm": 0.960429641737833, - "learning_rate": 1.984856967762874e-05, - "loss": 0.2353, - "step": 1648 - }, - { - "epoch": 0.08, - "grad_norm": 1.0038547952300165, - "learning_rate": 1.984828401102613e-05, - "loss": 0.2471, - "step": 1649 - }, - { - "epoch": 0.08, - "grad_norm": 1.5716038810095376, - "learning_rate": 1.9847998077288255e-05, - "loss": 0.2624, - "step": 1650 - }, - { - "epoch": 0.08, - "grad_norm": 1.0463153168335042, - "learning_rate": 1.984771187642288e-05, - "loss": 0.2196, - "step": 1651 - }, - { - "epoch": 0.08, - "grad_norm": 1.1341781324491753, - "learning_rate": 1.9847425408437763e-05, - "loss": 0.2548, - "step": 1652 - }, - { - "epoch": 0.08, - "grad_norm": 1.3841618420365462, - "learning_rate": 1.9847138673340675e-05, - "loss": 0.2438, - "step": 1653 - }, - { - "epoch": 0.08, - "grad_norm": 1.0169550684413193, - "learning_rate": 1.9846851671139394e-05, - "loss": 0.2607, - "step": 1654 - }, - { - "epoch": 0.08, - "grad_norm": 1.3519043264751935, - "learning_rate": 1.984656440184171e-05, - "loss": 0.2607, - "step": 1655 - }, - { - "epoch": 0.08, - "grad_norm": 1.2608149031054832, - "learning_rate": 1.9846276865455403e-05, - "loss": 0.2437, - "step": 1656 - }, - { - "epoch": 0.08, - "grad_norm": 1.0108753451264925, - "learning_rate": 1.9845989061988283e-05, - "loss": 0.2487, - "step": 1657 - }, - { - "epoch": 0.08, - "grad_norm": 0.9027978818842535, - "learning_rate": 1.9845700991448154e-05, - "loss": 0.2359, - "step": 1658 - }, - { - "epoch": 0.08, - "grad_norm": 1.250101052431056, - "learning_rate": 1.984541265384283e-05, - "loss": 0.2632, - "step": 1659 - }, - { - "epoch": 0.08, - "grad_norm": 1.3325062156063474, - "learning_rate": 1.9845124049180132e-05, - "loss": 0.2225, - "step": 1660 - }, - { - "epoch": 0.08, - "grad_norm": 0.9988706987608464, - "learning_rate": 1.9844835177467886e-05, - "loss": 0.2321, - "step": 1661 - }, - { - "epoch": 0.08, - "grad_norm": 1.120658383739874, - "learning_rate": 1.984454603871393e-05, - "loss": 0.2576, - "step": 1662 - }, - { - "epoch": 0.08, - "grad_norm": 0.9601587400055499, - "learning_rate": 1.984425663292611e-05, - "loss": 0.2143, - "step": 1663 - }, - { - "epoch": 0.08, - "grad_norm": 0.9261788091835301, - "learning_rate": 1.984396696011227e-05, - "loss": 0.2469, - "step": 1664 - }, - { - "epoch": 0.08, - "grad_norm": 1.5508358929347974, - "learning_rate": 1.984367702028027e-05, - "loss": 0.2642, - "step": 1665 - }, - { - "epoch": 0.08, - "grad_norm": 0.9767778816683551, - "learning_rate": 1.9843386813437976e-05, - "loss": 0.2367, - "step": 1666 - }, - { - "epoch": 0.08, - "grad_norm": 1.0196991564528861, - "learning_rate": 1.984309633959326e-05, - "loss": 0.2413, - "step": 1667 - }, - { - "epoch": 0.08, - "grad_norm": 1.316436967597623, - "learning_rate": 1.9842805598753997e-05, - "loss": 0.2642, - "step": 1668 - }, - { - "epoch": 0.08, - "grad_norm": 1.2167182228798439, - "learning_rate": 1.984251459092808e-05, - "loss": 0.2275, - "step": 1669 - }, - { - "epoch": 0.08, - "grad_norm": 1.1146683861566014, - "learning_rate": 1.9842223316123393e-05, - "loss": 0.2346, - "step": 1670 - }, - { - "epoch": 0.08, - "grad_norm": 1.3334225033325622, - "learning_rate": 1.9841931774347846e-05, - "loss": 0.2545, - "step": 1671 - }, - { - "epoch": 0.09, - "grad_norm": 1.3786897506461797, - "learning_rate": 1.9841639965609344e-05, - "loss": 0.2298, - "step": 1672 - }, - { - "epoch": 0.09, - "grad_norm": 1.612244430590351, - "learning_rate": 1.9841347889915804e-05, - "loss": 0.2449, - "step": 1673 - }, - { - "epoch": 0.09, - "grad_norm": 1.2949843857838457, - "learning_rate": 1.9841055547275142e-05, - "loss": 0.2405, - "step": 1674 - }, - { - "epoch": 0.09, - "grad_norm": 1.229655683707706, - "learning_rate": 1.9840762937695296e-05, - "loss": 0.2706, - "step": 1675 - }, - { - "epoch": 0.09, - "grad_norm": 1.5452909783071291, - "learning_rate": 1.98404700611842e-05, - "loss": 0.2369, - "step": 1676 - }, - { - "epoch": 0.09, - "grad_norm": 0.9981909241193303, - "learning_rate": 1.9840176917749795e-05, - "loss": 0.2494, - "step": 1677 - }, - { - "epoch": 0.09, - "grad_norm": 1.0870733215780806, - "learning_rate": 1.9839883507400037e-05, - "loss": 0.2528, - "step": 1678 - }, - { - "epoch": 0.09, - "grad_norm": 1.0528706352105188, - "learning_rate": 1.9839589830142882e-05, - "loss": 0.257, - "step": 1679 - }, - { - "epoch": 0.09, - "grad_norm": 1.1201032075648276, - "learning_rate": 1.98392958859863e-05, - "loss": 0.2492, - "step": 1680 - }, - { - "epoch": 0.09, - "grad_norm": 1.0474478400549914, - "learning_rate": 1.983900167493826e-05, - "loss": 0.2926, - "step": 1681 - }, - { - "epoch": 0.09, - "grad_norm": 1.0154346670627037, - "learning_rate": 1.983870719700674e-05, - "loss": 0.2386, - "step": 1682 - }, - { - "epoch": 0.09, - "grad_norm": 0.8657370203828151, - "learning_rate": 1.9838412452199732e-05, - "loss": 0.242, - "step": 1683 - }, - { - "epoch": 0.09, - "grad_norm": 0.918886234549711, - "learning_rate": 1.9838117440525236e-05, - "loss": 0.2439, - "step": 1684 - }, - { - "epoch": 0.09, - "grad_norm": 0.9403024089951526, - "learning_rate": 1.9837822161991248e-05, - "loss": 0.2429, - "step": 1685 - }, - { - "epoch": 0.09, - "grad_norm": 1.084076095905887, - "learning_rate": 1.9837526616605774e-05, - "loss": 0.2335, - "step": 1686 - }, - { - "epoch": 0.09, - "grad_norm": 1.7367729442066835, - "learning_rate": 1.9837230804376838e-05, - "loss": 0.2293, - "step": 1687 - }, - { - "epoch": 0.09, - "grad_norm": 0.9329776639522541, - "learning_rate": 1.983693472531246e-05, - "loss": 0.2169, - "step": 1688 - }, - { - "epoch": 0.09, - "grad_norm": 1.2153526164638409, - "learning_rate": 1.983663837942067e-05, - "loss": 0.2542, - "step": 1689 - }, - { - "epoch": 0.09, - "grad_norm": 0.956868994422886, - "learning_rate": 1.983634176670951e-05, - "loss": 0.2591, - "step": 1690 - }, - { - "epoch": 0.09, - "grad_norm": 1.029881962928075, - "learning_rate": 1.9836044887187023e-05, - "loss": 0.2213, - "step": 1691 - }, - { - "epoch": 0.09, - "grad_norm": 1.1669934190529483, - "learning_rate": 1.9835747740861266e-05, - "loss": 0.2626, - "step": 1692 - }, - { - "epoch": 0.09, - "grad_norm": 0.9980708214268654, - "learning_rate": 1.9835450327740293e-05, - "loss": 0.2788, - "step": 1693 - }, - { - "epoch": 0.09, - "grad_norm": 1.158179447044645, - "learning_rate": 1.9835152647832175e-05, - "loss": 0.2486, - "step": 1694 - }, - { - "epoch": 0.09, - "grad_norm": 0.9135665839164294, - "learning_rate": 1.9834854701144986e-05, - "loss": 0.2286, - "step": 1695 - }, - { - "epoch": 0.09, - "grad_norm": 1.0453395818232172, - "learning_rate": 1.983455648768681e-05, - "loss": 0.2274, - "step": 1696 - }, - { - "epoch": 0.09, - "grad_norm": 1.0728487666452273, - "learning_rate": 1.983425800746573e-05, - "loss": 0.2582, - "step": 1697 - }, - { - "epoch": 0.09, - "grad_norm": 0.9397892154114075, - "learning_rate": 1.983395926048985e-05, - "loss": 0.2259, - "step": 1698 - }, - { - "epoch": 0.09, - "grad_norm": 0.9379066761125769, - "learning_rate": 1.9833660246767267e-05, - "loss": 0.2386, - "step": 1699 - }, - { - "epoch": 0.09, - "grad_norm": 1.2392563662188694, - "learning_rate": 1.9833360966306095e-05, - "loss": 0.237, - "step": 1700 - }, - { - "epoch": 0.09, - "grad_norm": 1.2609688920322724, - "learning_rate": 1.9833061419114452e-05, - "loss": 0.2885, - "step": 1701 - }, - { - "epoch": 0.09, - "grad_norm": 1.1060909223384836, - "learning_rate": 1.9832761605200464e-05, - "loss": 0.2539, - "step": 1702 - }, - { - "epoch": 0.09, - "grad_norm": 1.0220662279210742, - "learning_rate": 1.9832461524572258e-05, - "loss": 0.2272, - "step": 1703 - }, - { - "epoch": 0.09, - "grad_norm": 1.071997778470365, - "learning_rate": 1.983216117723798e-05, - "loss": 0.2574, - "step": 1704 - }, - { - "epoch": 0.09, - "grad_norm": 1.0151680433606107, - "learning_rate": 1.9831860563205776e-05, - "loss": 0.2401, - "step": 1705 - }, - { - "epoch": 0.09, - "grad_norm": 0.987979664831528, - "learning_rate": 1.98315596824838e-05, - "loss": 0.2201, - "step": 1706 - }, - { - "epoch": 0.09, - "grad_norm": 0.8212769811622768, - "learning_rate": 1.9831258535080206e-05, - "loss": 0.2338, - "step": 1707 - }, - { - "epoch": 0.09, - "grad_norm": 1.1635767738327358, - "learning_rate": 1.9830957121003176e-05, - "loss": 0.2394, - "step": 1708 - }, - { - "epoch": 0.09, - "grad_norm": 1.0173061683887226, - "learning_rate": 1.983065544026087e-05, - "loss": 0.264, - "step": 1709 - }, - { - "epoch": 0.09, - "grad_norm": 1.833164217466478, - "learning_rate": 1.9830353492861493e-05, - "loss": 0.2578, - "step": 1710 - }, - { - "epoch": 0.09, - "grad_norm": 1.022547453107026, - "learning_rate": 1.983005127881321e-05, - "loss": 0.2651, - "step": 1711 - }, - { - "epoch": 0.09, - "grad_norm": 1.0363103004554934, - "learning_rate": 1.9829748798124237e-05, - "loss": 0.2612, - "step": 1712 - }, - { - "epoch": 0.09, - "grad_norm": 0.9714966214210872, - "learning_rate": 1.9829446050802776e-05, - "loss": 0.2519, - "step": 1713 - }, - { - "epoch": 0.09, - "grad_norm": 0.9434534750808347, - "learning_rate": 1.9829143036857027e-05, - "loss": 0.2293, - "step": 1714 - }, - { - "epoch": 0.09, - "grad_norm": 1.195774260854126, - "learning_rate": 1.9828839756295223e-05, - "loss": 0.2517, - "step": 1715 - }, - { - "epoch": 0.09, - "grad_norm": 1.0262577604343457, - "learning_rate": 1.9828536209125584e-05, - "loss": 0.2463, - "step": 1716 - }, - { - "epoch": 0.09, - "grad_norm": 1.842309395951756, - "learning_rate": 1.9828232395356347e-05, - "loss": 0.2409, - "step": 1717 - }, - { - "epoch": 0.09, - "grad_norm": 1.2461010415205944, - "learning_rate": 1.982792831499575e-05, - "loss": 0.2425, - "step": 1718 - }, - { - "epoch": 0.09, - "grad_norm": 0.9829452005423613, - "learning_rate": 1.982762396805204e-05, - "loss": 0.2484, - "step": 1719 - }, - { - "epoch": 0.09, - "grad_norm": 0.984884649513512, - "learning_rate": 1.982731935453348e-05, - "loss": 0.2389, - "step": 1720 - }, - { - "epoch": 0.09, - "grad_norm": 1.1119583351575009, - "learning_rate": 1.9827014474448324e-05, - "loss": 0.2518, - "step": 1721 - }, - { - "epoch": 0.09, - "grad_norm": 1.002796824835171, - "learning_rate": 1.9826709327804846e-05, - "loss": 0.2284, - "step": 1722 - }, - { - "epoch": 0.09, - "grad_norm": 1.0157183698966548, - "learning_rate": 1.982640391461132e-05, - "loss": 0.2415, - "step": 1723 - }, - { - "epoch": 0.09, - "grad_norm": 1.260041652519267, - "learning_rate": 1.982609823487604e-05, - "loss": 0.2351, - "step": 1724 - }, - { - "epoch": 0.09, - "grad_norm": 1.3519455184908302, - "learning_rate": 1.9825792288607284e-05, - "loss": 0.2451, - "step": 1725 - }, - { - "epoch": 0.09, - "grad_norm": 1.0062626123603624, - "learning_rate": 1.982548607581336e-05, - "loss": 0.2467, - "step": 1726 - }, - { - "epoch": 0.09, - "grad_norm": 1.2439664837325628, - "learning_rate": 1.9825179596502567e-05, - "loss": 0.2509, - "step": 1727 - }, - { - "epoch": 0.09, - "grad_norm": 0.8243535374289274, - "learning_rate": 1.9824872850683226e-05, - "loss": 0.2075, - "step": 1728 - }, - { - "epoch": 0.09, - "grad_norm": 1.1505514359912987, - "learning_rate": 1.9824565838363657e-05, - "loss": 0.2419, - "step": 1729 - }, - { - "epoch": 0.09, - "grad_norm": 1.269435382278997, - "learning_rate": 1.9824258559552182e-05, - "loss": 0.239, - "step": 1730 - }, - { - "epoch": 0.09, - "grad_norm": 2.31208623122475, - "learning_rate": 1.9823951014257138e-05, - "loss": 0.2527, - "step": 1731 - }, - { - "epoch": 0.09, - "grad_norm": 1.206820363011641, - "learning_rate": 1.9823643202486867e-05, - "loss": 0.2221, - "step": 1732 - }, - { - "epoch": 0.09, - "grad_norm": 1.2041683786551478, - "learning_rate": 1.982333512424972e-05, - "loss": 0.2213, - "step": 1733 - }, - { - "epoch": 0.09, - "grad_norm": 1.0255291639405049, - "learning_rate": 1.9823026779554055e-05, - "loss": 0.2405, - "step": 1734 - }, - { - "epoch": 0.09, - "grad_norm": 1.1134042500477659, - "learning_rate": 1.982271816840823e-05, - "loss": 0.2355, - "step": 1735 - }, - { - "epoch": 0.09, - "grad_norm": 1.122621923354994, - "learning_rate": 1.982240929082062e-05, - "loss": 0.2591, - "step": 1736 - }, - { - "epoch": 0.09, - "grad_norm": 1.0109011146022282, - "learning_rate": 1.9822100146799607e-05, - "loss": 0.2268, - "step": 1737 - }, - { - "epoch": 0.09, - "grad_norm": 1.1939519658509656, - "learning_rate": 1.982179073635357e-05, - "loss": 0.2648, - "step": 1738 - }, - { - "epoch": 0.09, - "grad_norm": 1.5178683098536014, - "learning_rate": 1.9821481059490906e-05, - "loss": 0.2232, - "step": 1739 - }, - { - "epoch": 0.09, - "grad_norm": 1.9516149012483963, - "learning_rate": 1.982117111622001e-05, - "loss": 0.2476, - "step": 1740 - }, - { - "epoch": 0.09, - "grad_norm": 1.2321003493682299, - "learning_rate": 1.98208609065493e-05, - "loss": 0.2064, - "step": 1741 - }, - { - "epoch": 0.09, - "grad_norm": 1.1263545813097904, - "learning_rate": 1.982055043048718e-05, - "loss": 0.219, - "step": 1742 - }, - { - "epoch": 0.09, - "grad_norm": 0.9164576815422517, - "learning_rate": 1.982023968804207e-05, - "loss": 0.2325, - "step": 1743 - }, - { - "epoch": 0.09, - "grad_norm": 1.481776837320407, - "learning_rate": 1.981992867922241e-05, - "loss": 0.2732, - "step": 1744 - }, - { - "epoch": 0.09, - "grad_norm": 1.0480431065887095, - "learning_rate": 1.981961740403663e-05, - "loss": 0.225, - "step": 1745 - }, - { - "epoch": 0.09, - "grad_norm": 1.1453713530810714, - "learning_rate": 1.981930586249317e-05, - "loss": 0.2555, - "step": 1746 - }, - { - "epoch": 0.09, - "grad_norm": 1.0440512071308312, - "learning_rate": 1.9818994054600484e-05, - "loss": 0.2443, - "step": 1747 - }, - { - "epoch": 0.09, - "grad_norm": 1.3140536888347236, - "learning_rate": 1.981868198036703e-05, - "loss": 0.2296, - "step": 1748 - }, - { - "epoch": 0.09, - "grad_norm": 0.9589913304538162, - "learning_rate": 1.9818369639801273e-05, - "loss": 0.2269, - "step": 1749 - }, - { - "epoch": 0.09, - "grad_norm": 0.9421444481840565, - "learning_rate": 1.9818057032911687e-05, - "loss": 0.236, - "step": 1750 - }, - { - "epoch": 0.09, - "grad_norm": 1.011810083369825, - "learning_rate": 1.9817744159706746e-05, - "loss": 0.2542, - "step": 1751 - }, - { - "epoch": 0.09, - "grad_norm": 0.9867132876670602, - "learning_rate": 1.9817431020194942e-05, - "loss": 0.2248, - "step": 1752 - }, - { - "epoch": 0.09, - "grad_norm": 1.2165872518452028, - "learning_rate": 1.9817117614384767e-05, - "loss": 0.2315, - "step": 1753 - }, - { - "epoch": 0.09, - "grad_norm": 1.6573462337949036, - "learning_rate": 1.9816803942284724e-05, - "loss": 0.2482, - "step": 1754 - }, - { - "epoch": 0.09, - "grad_norm": 1.0905321049950607, - "learning_rate": 1.9816490003903316e-05, - "loss": 0.2451, - "step": 1755 - }, - { - "epoch": 0.09, - "grad_norm": 1.2974795740116438, - "learning_rate": 1.9816175799249064e-05, - "loss": 0.262, - "step": 1756 - }, - { - "epoch": 0.09, - "grad_norm": 0.9930878435396299, - "learning_rate": 1.981586132833049e-05, - "loss": 0.2834, - "step": 1757 - }, - { - "epoch": 0.09, - "grad_norm": 0.9804621023472282, - "learning_rate": 1.981554659115612e-05, - "loss": 0.249, - "step": 1758 - }, - { - "epoch": 0.09, - "grad_norm": 0.8228160230043197, - "learning_rate": 1.98152315877345e-05, - "loss": 0.2267, - "step": 1759 - }, - { - "epoch": 0.09, - "grad_norm": 0.8087224046227851, - "learning_rate": 1.9814916318074163e-05, - "loss": 0.2372, - "step": 1760 - }, - { - "epoch": 0.09, - "grad_norm": 0.9374525942013675, - "learning_rate": 1.981460078218367e-05, - "loss": 0.2595, - "step": 1761 - }, - { - "epoch": 0.09, - "grad_norm": 0.9126980800579981, - "learning_rate": 1.9814284980071578e-05, - "loss": 0.2429, - "step": 1762 - }, - { - "epoch": 0.09, - "grad_norm": 0.7815278328067415, - "learning_rate": 1.9813968911746447e-05, - "loss": 0.2128, - "step": 1763 - }, - { - "epoch": 0.09, - "grad_norm": 1.1630201006053036, - "learning_rate": 1.9813652577216858e-05, - "loss": 0.2348, - "step": 1764 - }, - { - "epoch": 0.09, - "grad_norm": 1.0067825312667709, - "learning_rate": 1.9813335976491387e-05, - "loss": 0.2426, - "step": 1765 - }, - { - "epoch": 0.09, - "grad_norm": 2.7904482493750873, - "learning_rate": 1.9813019109578623e-05, - "loss": 0.2409, - "step": 1766 - }, - { - "epoch": 0.09, - "grad_norm": 1.0847359740312001, - "learning_rate": 1.981270197648716e-05, - "loss": 0.2616, - "step": 1767 - }, - { - "epoch": 0.09, - "grad_norm": 0.935404199004101, - "learning_rate": 1.9812384577225604e-05, - "loss": 0.2277, - "step": 1768 - }, - { - "epoch": 0.09, - "grad_norm": 1.289585307813352, - "learning_rate": 1.9812066911802565e-05, - "loss": 0.2553, - "step": 1769 - }, - { - "epoch": 0.09, - "grad_norm": 1.4646201642946093, - "learning_rate": 1.981174898022665e-05, - "loss": 0.2478, - "step": 1770 - }, - { - "epoch": 0.09, - "grad_norm": 1.3498844676297264, - "learning_rate": 1.9811430782506496e-05, - "loss": 0.2371, - "step": 1771 - }, - { - "epoch": 0.09, - "grad_norm": 2.195156883689694, - "learning_rate": 1.9811112318650725e-05, - "loss": 0.2526, - "step": 1772 - }, - { - "epoch": 0.09, - "grad_norm": 1.7032110116525865, - "learning_rate": 1.9810793588667977e-05, - "loss": 0.258, - "step": 1773 - }, - { - "epoch": 0.09, - "grad_norm": 0.886767523375247, - "learning_rate": 1.98104745925669e-05, - "loss": 0.2432, - "step": 1774 - }, - { - "epoch": 0.09, - "grad_norm": 1.0485652010268987, - "learning_rate": 1.9810155330356147e-05, - "loss": 0.2278, - "step": 1775 - }, - { - "epoch": 0.09, - "grad_norm": 1.0753335063192004, - "learning_rate": 1.9809835802044378e-05, - "loss": 0.2317, - "step": 1776 - }, - { - "epoch": 0.09, - "grad_norm": 1.2770843961058662, - "learning_rate": 1.9809516007640255e-05, - "loss": 0.229, - "step": 1777 - }, - { - "epoch": 0.09, - "grad_norm": 0.8403924810021453, - "learning_rate": 1.9809195947152458e-05, - "loss": 0.2282, - "step": 1778 - }, - { - "epoch": 0.09, - "grad_norm": 1.0793162103481402, - "learning_rate": 1.9808875620589667e-05, - "loss": 0.2234, - "step": 1779 - }, - { - "epoch": 0.09, - "grad_norm": 1.1238385198046674, - "learning_rate": 1.980855502796057e-05, - "loss": 0.2174, - "step": 1780 - }, - { - "epoch": 0.09, - "grad_norm": 1.2699355390187514, - "learning_rate": 1.9808234169273864e-05, - "loss": 0.2288, - "step": 1781 - }, - { - "epoch": 0.09, - "grad_norm": 1.011158926633347, - "learning_rate": 1.9807913044538252e-05, - "loss": 0.2485, - "step": 1782 - }, - { - "epoch": 0.09, - "grad_norm": 1.4074623352058435, - "learning_rate": 1.9807591653762447e-05, - "loss": 0.2307, - "step": 1783 - }, - { - "epoch": 0.09, - "grad_norm": 0.9928912356239473, - "learning_rate": 1.980726999695516e-05, - "loss": 0.2675, - "step": 1784 - }, - { - "epoch": 0.09, - "grad_norm": 1.0184293111623297, - "learning_rate": 1.9806948074125123e-05, - "loss": 0.2908, - "step": 1785 - }, - { - "epoch": 0.09, - "grad_norm": 1.1122490801754585, - "learning_rate": 1.9806625885281065e-05, - "loss": 0.2621, - "step": 1786 - }, - { - "epoch": 0.09, - "grad_norm": 1.1598479098892676, - "learning_rate": 1.9806303430431727e-05, - "loss": 0.2441, - "step": 1787 - }, - { - "epoch": 0.09, - "grad_norm": 0.979354192254303, - "learning_rate": 1.9805980709585855e-05, - "loss": 0.2375, - "step": 1788 - }, - { - "epoch": 0.09, - "grad_norm": 1.267730797707709, - "learning_rate": 1.9805657722752202e-05, - "loss": 0.2284, - "step": 1789 - }, - { - "epoch": 0.09, - "grad_norm": 0.9952643727050997, - "learning_rate": 1.980533446993953e-05, - "loss": 0.2453, - "step": 1790 - }, - { - "epoch": 0.09, - "grad_norm": 1.0233808075949027, - "learning_rate": 1.9805010951156605e-05, - "loss": 0.2396, - "step": 1791 - }, - { - "epoch": 0.09, - "grad_norm": 1.024484392071274, - "learning_rate": 1.9804687166412204e-05, - "loss": 0.2423, - "step": 1792 - }, - { - "epoch": 0.09, - "grad_norm": 1.1779536450407442, - "learning_rate": 1.980436311571511e-05, - "loss": 0.2506, - "step": 1793 - }, - { - "epoch": 0.09, - "grad_norm": 1.110809751968605, - "learning_rate": 1.9804038799074114e-05, - "loss": 0.2379, - "step": 1794 - }, - { - "epoch": 0.09, - "grad_norm": 0.911356847281592, - "learning_rate": 1.9803714216498013e-05, - "loss": 0.2459, - "step": 1795 - }, - { - "epoch": 0.09, - "grad_norm": 1.2508497514282289, - "learning_rate": 1.9803389367995606e-05, - "loss": 0.2559, - "step": 1796 - }, - { - "epoch": 0.09, - "grad_norm": 1.2547143824197837, - "learning_rate": 1.9803064253575713e-05, - "loss": 0.2678, - "step": 1797 - }, - { - "epoch": 0.09, - "grad_norm": 1.1171374299241046, - "learning_rate": 1.9802738873247146e-05, - "loss": 0.2373, - "step": 1798 - }, - { - "epoch": 0.09, - "grad_norm": 1.3682294514056483, - "learning_rate": 1.9802413227018732e-05, - "loss": 0.2634, - "step": 1799 - }, - { - "epoch": 0.09, - "grad_norm": 1.6593452822995967, - "learning_rate": 1.980208731489931e-05, - "loss": 0.2565, - "step": 1800 - }, - { - "epoch": 0.09, - "grad_norm": 1.33397124612498, - "learning_rate": 1.9801761136897713e-05, - "loss": 0.2465, - "step": 1801 - }, - { - "epoch": 0.09, - "grad_norm": 1.2179198485821112, - "learning_rate": 1.980143469302279e-05, - "loss": 0.251, - "step": 1802 - }, - { - "epoch": 0.09, - "grad_norm": 1.248443571251299, - "learning_rate": 1.9801107983283403e-05, - "loss": 0.2264, - "step": 1803 - }, - { - "epoch": 0.09, - "grad_norm": 1.2624127344026586, - "learning_rate": 1.9800781007688403e-05, - "loss": 0.2419, - "step": 1804 - }, - { - "epoch": 0.09, - "grad_norm": 1.0773293999784925, - "learning_rate": 1.9800453766246668e-05, - "loss": 0.2478, - "step": 1805 - }, - { - "epoch": 0.09, - "grad_norm": 1.2539597583340516, - "learning_rate": 1.980012625896707e-05, - "loss": 0.242, - "step": 1806 - }, - { - "epoch": 0.09, - "grad_norm": 1.0523566435986238, - "learning_rate": 1.979979848585849e-05, - "loss": 0.2438, - "step": 1807 - }, - { - "epoch": 0.09, - "grad_norm": 1.2783462412732363, - "learning_rate": 1.9799470446929827e-05, - "loss": 0.2322, - "step": 1808 - }, - { - "epoch": 0.09, - "grad_norm": 2.501195818234163, - "learning_rate": 1.9799142142189974e-05, - "loss": 0.2366, - "step": 1809 - }, - { - "epoch": 0.09, - "grad_norm": 1.0351472253286227, - "learning_rate": 1.9798813571647835e-05, - "loss": 0.2475, - "step": 1810 - }, - { - "epoch": 0.09, - "grad_norm": 1.141466973878634, - "learning_rate": 1.9798484735312327e-05, - "loss": 0.224, - "step": 1811 - }, - { - "epoch": 0.09, - "grad_norm": 1.3377399022823369, - "learning_rate": 1.9798155633192368e-05, - "loss": 0.2341, - "step": 1812 - }, - { - "epoch": 0.09, - "grad_norm": 1.4789245607733694, - "learning_rate": 1.979782626529688e-05, - "loss": 0.2666, - "step": 1813 - }, - { - "epoch": 0.09, - "grad_norm": 1.7178934408624136, - "learning_rate": 1.9797496631634804e-05, - "loss": 0.2506, - "step": 1814 - }, - { - "epoch": 0.09, - "grad_norm": 0.9579183307072763, - "learning_rate": 1.9797166732215078e-05, - "loss": 0.2334, - "step": 1815 - }, - { - "epoch": 0.09, - "grad_norm": 1.1702378825834747, - "learning_rate": 1.979683656704665e-05, - "loss": 0.2367, - "step": 1816 - }, - { - "epoch": 0.09, - "grad_norm": 1.2623264964220406, - "learning_rate": 1.979650613613848e-05, - "loss": 0.2264, - "step": 1817 - }, - { - "epoch": 0.09, - "grad_norm": 1.1794467571917981, - "learning_rate": 1.979617543949952e-05, - "loss": 0.2339, - "step": 1818 - }, - { - "epoch": 0.09, - "grad_norm": 0.91157451744252, - "learning_rate": 1.9795844477138756e-05, - "loss": 0.225, - "step": 1819 - }, - { - "epoch": 0.09, - "grad_norm": 1.2309863507665357, - "learning_rate": 1.9795513249065155e-05, - "loss": 0.253, - "step": 1820 - }, - { - "epoch": 0.09, - "grad_norm": 1.1783442633169707, - "learning_rate": 1.97951817552877e-05, - "loss": 0.2379, - "step": 1821 - }, - { - "epoch": 0.09, - "grad_norm": 1.094279223019567, - "learning_rate": 1.9794849995815392e-05, - "loss": 0.2457, - "step": 1822 - }, - { - "epoch": 0.09, - "grad_norm": 1.3138667208821027, - "learning_rate": 1.979451797065722e-05, - "loss": 0.259, - "step": 1823 - }, - { - "epoch": 0.09, - "grad_norm": 1.2491422875129075, - "learning_rate": 1.97941856798222e-05, - "loss": 0.2364, - "step": 1824 - }, - { - "epoch": 0.09, - "grad_norm": 1.186450281638106, - "learning_rate": 1.979385312331934e-05, - "loss": 0.2312, - "step": 1825 - }, - { - "epoch": 0.09, - "grad_norm": 1.1194218673747123, - "learning_rate": 1.9793520301157656e-05, - "loss": 0.2473, - "step": 1826 - }, - { - "epoch": 0.09, - "grad_norm": 1.5625455110596913, - "learning_rate": 1.9793187213346183e-05, - "loss": 0.2544, - "step": 1827 - }, - { - "epoch": 0.09, - "grad_norm": 1.1394717462108623, - "learning_rate": 1.9792853859893953e-05, - "loss": 0.2318, - "step": 1828 - }, - { - "epoch": 0.09, - "grad_norm": 1.979714128320049, - "learning_rate": 1.9792520240810012e-05, - "loss": 0.2555, - "step": 1829 - }, - { - "epoch": 0.09, - "grad_norm": 1.0604405691060503, - "learning_rate": 1.9792186356103403e-05, - "loss": 0.2421, - "step": 1830 - }, - { - "epoch": 0.09, - "grad_norm": 0.9962422834651873, - "learning_rate": 1.9791852205783186e-05, - "loss": 0.2274, - "step": 1831 - }, - { - "epoch": 0.09, - "grad_norm": 1.0115858456203548, - "learning_rate": 1.9791517789858428e-05, - "loss": 0.218, - "step": 1832 - }, - { - "epoch": 0.09, - "grad_norm": 1.1584982837673712, - "learning_rate": 1.9791183108338195e-05, - "loss": 0.2291, - "step": 1833 - }, - { - "epoch": 0.09, - "grad_norm": 1.4205913603650355, - "learning_rate": 1.9790848161231568e-05, - "loss": 0.2258, - "step": 1834 - }, - { - "epoch": 0.09, - "grad_norm": 0.9593511399242401, - "learning_rate": 1.9790512948547633e-05, - "loss": 0.2421, - "step": 1835 - }, - { - "epoch": 0.09, - "grad_norm": 1.205982035714139, - "learning_rate": 1.9790177470295474e-05, - "loss": 0.2452, - "step": 1836 - }, - { - "epoch": 0.09, - "grad_norm": 1.0021327684946961, - "learning_rate": 1.9789841726484208e-05, - "loss": 0.2571, - "step": 1837 - }, - { - "epoch": 0.09, - "grad_norm": 1.1537386848613858, - "learning_rate": 1.9789505717122926e-05, - "loss": 0.2639, - "step": 1838 - }, - { - "epoch": 0.09, - "grad_norm": 1.251241669990851, - "learning_rate": 1.978916944222075e-05, - "loss": 0.2597, - "step": 1839 - }, - { - "epoch": 0.09, - "grad_norm": 1.3627173950389766, - "learning_rate": 1.97888329017868e-05, - "loss": 0.2549, - "step": 1840 - }, - { - "epoch": 0.09, - "grad_norm": 1.1280632425903117, - "learning_rate": 1.9788496095830205e-05, - "loss": 0.2427, - "step": 1841 - }, - { - "epoch": 0.09, - "grad_norm": 1.2424096456708325, - "learning_rate": 1.97881590243601e-05, - "loss": 0.2507, - "step": 1842 - }, - { - "epoch": 0.09, - "grad_norm": 1.1268147690233024, - "learning_rate": 1.978782168738563e-05, - "loss": 0.2313, - "step": 1843 - }, - { - "epoch": 0.09, - "grad_norm": 0.9731217683506375, - "learning_rate": 1.9787484084915943e-05, - "loss": 0.1961, - "step": 1844 - }, - { - "epoch": 0.09, - "grad_norm": 0.9932726226384986, - "learning_rate": 1.9787146216960196e-05, - "loss": 0.2704, - "step": 1845 - }, - { - "epoch": 0.09, - "grad_norm": 1.3258885680331116, - "learning_rate": 1.978680808352756e-05, - "loss": 0.2223, - "step": 1846 - }, - { - "epoch": 0.09, - "grad_norm": 1.179125947707762, - "learning_rate": 1.9786469684627193e-05, - "loss": 0.2233, - "step": 1847 - }, - { - "epoch": 0.09, - "grad_norm": 1.181157569743411, - "learning_rate": 1.978613102026829e-05, - "loss": 0.2614, - "step": 1848 - }, - { - "epoch": 0.09, - "grad_norm": 1.2465357791351852, - "learning_rate": 1.9785792090460026e-05, - "loss": 0.1947, - "step": 1849 - }, - { - "epoch": 0.09, - "grad_norm": 1.1762006333045996, - "learning_rate": 1.9785452895211606e-05, - "loss": 0.2488, - "step": 1850 - }, - { - "epoch": 0.09, - "grad_norm": 0.9732313500927374, - "learning_rate": 1.978511343453222e-05, - "loss": 0.2208, - "step": 1851 - }, - { - "epoch": 0.09, - "grad_norm": 2.930795569689893, - "learning_rate": 1.9784773708431076e-05, - "loss": 0.2319, - "step": 1852 - }, - { - "epoch": 0.09, - "grad_norm": 1.1402431579192085, - "learning_rate": 1.9784433716917397e-05, - "loss": 0.2394, - "step": 1853 - }, - { - "epoch": 0.09, - "grad_norm": 1.0338244690058258, - "learning_rate": 1.97840934600004e-05, - "loss": 0.2477, - "step": 1854 - }, - { - "epoch": 0.09, - "grad_norm": 1.6535929415927046, - "learning_rate": 1.9783752937689312e-05, - "loss": 0.2408, - "step": 1855 - }, - { - "epoch": 0.09, - "grad_norm": 1.370824985835338, - "learning_rate": 1.9783412149993374e-05, - "loss": 0.2349, - "step": 1856 - }, - { - "epoch": 0.09, - "grad_norm": 1.3956801985306262, - "learning_rate": 1.978307109692183e-05, - "loss": 0.2534, - "step": 1857 - }, - { - "epoch": 0.09, - "grad_norm": 1.3940946373038978, - "learning_rate": 1.978272977848393e-05, - "loss": 0.286, - "step": 1858 - }, - { - "epoch": 0.09, - "grad_norm": 1.0987035112902765, - "learning_rate": 1.9782388194688933e-05, - "loss": 0.2779, - "step": 1859 - }, - { - "epoch": 0.09, - "grad_norm": 1.1882153135309836, - "learning_rate": 1.9782046345546102e-05, - "loss": 0.2235, - "step": 1860 - }, - { - "epoch": 0.09, - "grad_norm": 1.2008243551349194, - "learning_rate": 1.9781704231064715e-05, - "loss": 0.2693, - "step": 1861 - }, - { - "epoch": 0.09, - "grad_norm": 1.1751556702307224, - "learning_rate": 1.9781361851254044e-05, - "loss": 0.2543, - "step": 1862 - }, - { - "epoch": 0.09, - "grad_norm": 1.1081469900399938, - "learning_rate": 1.9781019206123382e-05, - "loss": 0.2512, - "step": 1863 - }, - { - "epoch": 0.09, - "grad_norm": 0.9908211347171663, - "learning_rate": 1.978067629568202e-05, - "loss": 0.2035, - "step": 1864 - }, - { - "epoch": 0.09, - "grad_norm": 1.2227189853228269, - "learning_rate": 1.9780333119939264e-05, - "loss": 0.235, - "step": 1865 - }, - { - "epoch": 0.09, - "grad_norm": 1.2011150507746902, - "learning_rate": 1.9779989678904416e-05, - "loss": 0.2399, - "step": 1866 - }, - { - "epoch": 0.09, - "grad_norm": 1.3756169840284638, - "learning_rate": 1.97796459725868e-05, - "loss": 0.2533, - "step": 1867 - }, - { - "epoch": 0.09, - "grad_norm": 2.7413831052381767, - "learning_rate": 1.9779302000995732e-05, - "loss": 0.2174, - "step": 1868 - }, - { - "epoch": 0.1, - "grad_norm": 1.2214321925490699, - "learning_rate": 1.9778957764140545e-05, - "loss": 0.2509, - "step": 1869 - }, - { - "epoch": 0.1, - "grad_norm": 2.048946823233343, - "learning_rate": 1.9778613262030577e-05, - "loss": 0.2396, - "step": 1870 - }, - { - "epoch": 0.1, - "grad_norm": 1.0703035941982966, - "learning_rate": 1.9778268494675172e-05, - "loss": 0.2352, - "step": 1871 - }, - { - "epoch": 0.1, - "grad_norm": 2.010644102322746, - "learning_rate": 1.977792346208368e-05, - "loss": 0.2269, - "step": 1872 - }, - { - "epoch": 0.1, - "grad_norm": 2.2422707614304045, - "learning_rate": 1.9777578164265464e-05, - "loss": 0.2476, - "step": 1873 - }, - { - "epoch": 0.1, - "grad_norm": 1.2907222314859268, - "learning_rate": 1.9777232601229887e-05, - "loss": 0.2204, - "step": 1874 - }, - { - "epoch": 0.1, - "grad_norm": 1.3521410331749686, - "learning_rate": 1.9776886772986325e-05, - "loss": 0.2366, - "step": 1875 - }, - { - "epoch": 0.1, - "grad_norm": 1.0863288578749053, - "learning_rate": 1.9776540679544154e-05, - "loss": 0.2297, - "step": 1876 - }, - { - "epoch": 0.1, - "grad_norm": 1.0241804063042204, - "learning_rate": 1.977619432091277e-05, - "loss": 0.2277, - "step": 1877 - }, - { - "epoch": 0.1, - "grad_norm": 1.359207853748398, - "learning_rate": 1.977584769710156e-05, - "loss": 0.2602, - "step": 1878 - }, - { - "epoch": 0.1, - "grad_norm": 1.488727423100929, - "learning_rate": 1.9775500808119926e-05, - "loss": 0.2123, - "step": 1879 - }, - { - "epoch": 0.1, - "grad_norm": 0.9310924451888012, - "learning_rate": 1.9775153653977284e-05, - "loss": 0.2347, - "step": 1880 - }, - { - "epoch": 0.1, - "grad_norm": 1.0882134751609247, - "learning_rate": 1.9774806234683047e-05, - "loss": 0.2168, - "step": 1881 - }, - { - "epoch": 0.1, - "grad_norm": 1.1032916586088626, - "learning_rate": 1.9774458550246636e-05, - "loss": 0.2346, - "step": 1882 - }, - { - "epoch": 0.1, - "grad_norm": 0.8964220837477248, - "learning_rate": 1.977411060067749e-05, - "loss": 0.2192, - "step": 1883 - }, - { - "epoch": 0.1, - "grad_norm": 1.2271518408134103, - "learning_rate": 1.977376238598504e-05, - "loss": 0.2207, - "step": 1884 - }, - { - "epoch": 0.1, - "grad_norm": 1.2657120013958592, - "learning_rate": 1.977341390617873e-05, - "loss": 0.2216, - "step": 1885 - }, - { - "epoch": 0.1, - "grad_norm": 1.1240450447057986, - "learning_rate": 1.9773065161268015e-05, - "loss": 0.2152, - "step": 1886 - }, - { - "epoch": 0.1, - "grad_norm": 1.140021817870832, - "learning_rate": 1.977271615126236e-05, - "loss": 0.2405, - "step": 1887 - }, - { - "epoch": 0.1, - "grad_norm": 0.9646328044306108, - "learning_rate": 1.9772366876171224e-05, - "loss": 0.2214, - "step": 1888 - }, - { - "epoch": 0.1, - "grad_norm": 1.0229657136088213, - "learning_rate": 1.9772017336004085e-05, - "loss": 0.2486, - "step": 1889 - }, - { - "epoch": 0.1, - "grad_norm": 1.0801181204271537, - "learning_rate": 1.9771667530770427e-05, - "loss": 0.2413, - "step": 1890 - }, - { - "epoch": 0.1, - "grad_norm": 1.0024092708899657, - "learning_rate": 1.9771317460479733e-05, - "loss": 0.2554, - "step": 1891 - }, - { - "epoch": 0.1, - "grad_norm": 0.9461916604616549, - "learning_rate": 1.9770967125141502e-05, - "loss": 0.2465, - "step": 1892 - }, - { - "epoch": 0.1, - "grad_norm": 1.4241403509851787, - "learning_rate": 1.9770616524765236e-05, - "loss": 0.2618, - "step": 1893 - }, - { - "epoch": 0.1, - "grad_norm": 1.2192273387499462, - "learning_rate": 1.9770265659360445e-05, - "loss": 0.2477, - "step": 1894 - }, - { - "epoch": 0.1, - "grad_norm": 0.9149517263311271, - "learning_rate": 1.9769914528936646e-05, - "loss": 0.2203, - "step": 1895 - }, - { - "epoch": 0.1, - "grad_norm": 0.9640595665851582, - "learning_rate": 1.976956313350336e-05, - "loss": 0.255, - "step": 1896 - }, - { - "epoch": 0.1, - "grad_norm": 1.2296809453079092, - "learning_rate": 1.9769211473070124e-05, - "loss": 0.2257, - "step": 1897 - }, - { - "epoch": 0.1, - "grad_norm": 1.0503214717220533, - "learning_rate": 1.9768859547646476e-05, - "loss": 0.245, - "step": 1898 - }, - { - "epoch": 0.1, - "grad_norm": 1.0261216658899286, - "learning_rate": 1.976850735724196e-05, - "loss": 0.2228, - "step": 1899 - }, - { - "epoch": 0.1, - "grad_norm": 0.8879896992019953, - "learning_rate": 1.9768154901866136e-05, - "loss": 0.2362, - "step": 1900 - }, - { - "epoch": 0.1, - "grad_norm": 0.944882194134527, - "learning_rate": 1.9767802181528552e-05, - "loss": 0.2428, - "step": 1901 - }, - { - "epoch": 0.1, - "grad_norm": 1.4309989587560117, - "learning_rate": 1.9767449196238785e-05, - "loss": 0.2513, - "step": 1902 - }, - { - "epoch": 0.1, - "grad_norm": 1.1054306441049362, - "learning_rate": 1.9767095946006405e-05, - "loss": 0.2416, - "step": 1903 - }, - { - "epoch": 0.1, - "grad_norm": 0.9790156761267768, - "learning_rate": 1.9766742430840998e-05, - "loss": 0.2196, - "step": 1904 - }, - { - "epoch": 0.1, - "grad_norm": 1.225542879755533, - "learning_rate": 1.9766388650752152e-05, - "loss": 0.2531, - "step": 1905 - }, - { - "epoch": 0.1, - "grad_norm": 1.1451682777421768, - "learning_rate": 1.976603460574946e-05, - "loss": 0.2252, - "step": 1906 - }, - { - "epoch": 0.1, - "grad_norm": 1.1697641021216751, - "learning_rate": 1.9765680295842525e-05, - "loss": 0.2374, - "step": 1907 - }, - { - "epoch": 0.1, - "grad_norm": 1.3119766391360652, - "learning_rate": 1.9765325721040964e-05, - "loss": 0.2328, - "step": 1908 - }, - { - "epoch": 0.1, - "grad_norm": 0.8776972094573054, - "learning_rate": 1.976497088135439e-05, - "loss": 0.2255, - "step": 1909 - }, - { - "epoch": 0.1, - "grad_norm": 2.901221087412436, - "learning_rate": 1.976461577679243e-05, - "loss": 0.2538, - "step": 1910 - }, - { - "epoch": 0.1, - "grad_norm": 0.9364330186554849, - "learning_rate": 1.9764260407364714e-05, - "loss": 0.2266, - "step": 1911 - }, - { - "epoch": 0.1, - "grad_norm": 0.8343122185679239, - "learning_rate": 1.9763904773080886e-05, - "loss": 0.241, - "step": 1912 - }, - { - "epoch": 0.1, - "grad_norm": 1.5044879595592187, - "learning_rate": 1.9763548873950586e-05, - "loss": 0.2721, - "step": 1913 - }, - { - "epoch": 0.1, - "grad_norm": 1.0495765131484909, - "learning_rate": 1.9763192709983473e-05, - "loss": 0.2417, - "step": 1914 - }, - { - "epoch": 0.1, - "grad_norm": 1.05508985163837, - "learning_rate": 1.9762836281189207e-05, - "loss": 0.2227, - "step": 1915 - }, - { - "epoch": 0.1, - "grad_norm": 1.0155022843798927, - "learning_rate": 1.9762479587577457e-05, - "loss": 0.2425, - "step": 1916 - }, - { - "epoch": 0.1, - "grad_norm": 1.0967121155196264, - "learning_rate": 1.976212262915789e-05, - "loss": 0.2335, - "step": 1917 - }, - { - "epoch": 0.1, - "grad_norm": 1.37545605462818, - "learning_rate": 1.9761765405940203e-05, - "loss": 0.2407, - "step": 1918 - }, - { - "epoch": 0.1, - "grad_norm": 0.9818909497523616, - "learning_rate": 1.9761407917934073e-05, - "loss": 0.2493, - "step": 1919 - }, - { - "epoch": 0.1, - "grad_norm": 1.137871337305637, - "learning_rate": 1.9761050165149208e-05, - "loss": 0.246, - "step": 1920 - }, - { - "epoch": 0.1, - "grad_norm": 0.9860186561324186, - "learning_rate": 1.9760692147595298e-05, - "loss": 0.243, - "step": 1921 - }, - { - "epoch": 0.1, - "grad_norm": 1.0181895969287278, - "learning_rate": 1.9760333865282067e-05, - "loss": 0.2473, - "step": 1922 - }, - { - "epoch": 0.1, - "grad_norm": 0.8406082864526969, - "learning_rate": 1.975997531821923e-05, - "loss": 0.2075, - "step": 1923 - }, - { - "epoch": 0.1, - "grad_norm": 1.9227285386867792, - "learning_rate": 1.9759616506416506e-05, - "loss": 0.2367, - "step": 1924 - }, - { - "epoch": 0.1, - "grad_norm": 1.5616621406325664, - "learning_rate": 1.975925742988364e-05, - "loss": 0.2383, - "step": 1925 - }, - { - "epoch": 0.1, - "grad_norm": 1.3935247996994191, - "learning_rate": 1.975889808863036e-05, - "loss": 0.2452, - "step": 1926 - }, - { - "epoch": 0.1, - "grad_norm": 1.2943208678791482, - "learning_rate": 1.975853848266642e-05, - "loss": 0.2407, - "step": 1927 - }, - { - "epoch": 0.1, - "grad_norm": 1.450294411426727, - "learning_rate": 1.975817861200157e-05, - "loss": 0.2277, - "step": 1928 - }, - { - "epoch": 0.1, - "grad_norm": 0.9018073347670972, - "learning_rate": 1.9757818476645573e-05, - "loss": 0.2498, - "step": 1929 - }, - { - "epoch": 0.1, - "grad_norm": 1.0314892631011348, - "learning_rate": 1.9757458076608204e-05, - "loss": 0.2412, - "step": 1930 - }, - { - "epoch": 0.1, - "grad_norm": 1.1697931974945825, - "learning_rate": 1.975709741189923e-05, - "loss": 0.2312, - "step": 1931 - }, - { - "epoch": 0.1, - "grad_norm": 0.9365982325740557, - "learning_rate": 1.975673648252844e-05, - "loss": 0.2147, - "step": 1932 - }, - { - "epoch": 0.1, - "grad_norm": 0.8698438621254598, - "learning_rate": 1.975637528850562e-05, - "loss": 0.2396, - "step": 1933 - }, - { - "epoch": 0.1, - "grad_norm": 1.1560916597632185, - "learning_rate": 1.9756013829840568e-05, - "loss": 0.2024, - "step": 1934 - }, - { - "epoch": 0.1, - "grad_norm": 0.8347486142298968, - "learning_rate": 1.9755652106543094e-05, - "loss": 0.2421, - "step": 1935 - }, - { - "epoch": 0.1, - "grad_norm": 0.920153099028764, - "learning_rate": 1.9755290118623e-05, - "loss": 0.2228, - "step": 1936 - }, - { - "epoch": 0.1, - "grad_norm": 0.9913299797122175, - "learning_rate": 1.9754927866090115e-05, - "loss": 0.251, - "step": 1937 - }, - { - "epoch": 0.1, - "grad_norm": 1.0404221817815027, - "learning_rate": 1.975456534895426e-05, - "loss": 0.2099, - "step": 1938 - }, - { - "epoch": 0.1, - "grad_norm": 1.115116727740608, - "learning_rate": 1.975420256722527e-05, - "loss": 0.248, - "step": 1939 - }, - { - "epoch": 0.1, - "grad_norm": 1.1369950824968853, - "learning_rate": 1.9753839520912984e-05, - "loss": 0.2212, - "step": 1940 - }, - { - "epoch": 0.1, - "grad_norm": 1.4411550000757647, - "learning_rate": 1.9753476210027248e-05, - "loss": 0.204, - "step": 1941 - }, - { - "epoch": 0.1, - "grad_norm": 0.877079500076538, - "learning_rate": 1.975311263457792e-05, - "loss": 0.236, - "step": 1942 - }, - { - "epoch": 0.1, - "grad_norm": 1.2958517471483217, - "learning_rate": 1.9752748794574858e-05, - "loss": 0.2547, - "step": 1943 - }, - { - "epoch": 0.1, - "grad_norm": 1.102209497560389, - "learning_rate": 1.9752384690027937e-05, - "loss": 0.2082, - "step": 1944 - }, - { - "epoch": 0.1, - "grad_norm": 1.0121374445198599, - "learning_rate": 1.975202032094703e-05, - "loss": 0.2849, - "step": 1945 - }, - { - "epoch": 0.1, - "grad_norm": 0.8864239305753996, - "learning_rate": 1.9751655687342022e-05, - "loss": 0.2282, - "step": 1946 - }, - { - "epoch": 0.1, - "grad_norm": 0.9967365811704525, - "learning_rate": 1.9751290789222804e-05, - "loss": 0.2322, - "step": 1947 - }, - { - "epoch": 0.1, - "grad_norm": 1.1234867484412785, - "learning_rate": 1.975092562659927e-05, - "loss": 0.2789, - "step": 1948 - }, - { - "epoch": 0.1, - "grad_norm": 0.8313426797342862, - "learning_rate": 1.9750560199481325e-05, - "loss": 0.218, - "step": 1949 - }, - { - "epoch": 0.1, - "grad_norm": 1.0417507918529505, - "learning_rate": 1.975019450787889e-05, - "loss": 0.2595, - "step": 1950 - }, - { - "epoch": 0.1, - "grad_norm": 1.1414670268025355, - "learning_rate": 1.9749828551801875e-05, - "loss": 0.2483, - "step": 1951 - }, - { - "epoch": 0.1, - "grad_norm": 1.0376239315288212, - "learning_rate": 1.974946233126021e-05, - "loss": 0.231, - "step": 1952 - }, - { - "epoch": 0.1, - "grad_norm": 1.1091441540992661, - "learning_rate": 1.9749095846263828e-05, - "loss": 0.2234, - "step": 1953 - }, - { - "epoch": 0.1, - "grad_norm": 1.087702774894137, - "learning_rate": 1.974872909682267e-05, - "loss": 0.2496, - "step": 1954 - }, - { - "epoch": 0.1, - "grad_norm": 1.0600141655170918, - "learning_rate": 1.974836208294669e-05, - "loss": 0.2432, - "step": 1955 - }, - { - "epoch": 0.1, - "grad_norm": 1.3603611052790359, - "learning_rate": 1.9747994804645835e-05, - "loss": 0.2283, - "step": 1956 - }, - { - "epoch": 0.1, - "grad_norm": 1.0742920942448784, - "learning_rate": 1.9747627261930066e-05, - "loss": 0.2387, - "step": 1957 - }, - { - "epoch": 0.1, - "grad_norm": 1.1454768361187495, - "learning_rate": 1.974725945480936e-05, - "loss": 0.2743, - "step": 1958 - }, - { - "epoch": 0.1, - "grad_norm": 1.033670082913112, - "learning_rate": 1.9746891383293692e-05, - "loss": 0.2661, - "step": 1959 - }, - { - "epoch": 0.1, - "grad_norm": 2.2940527105563957, - "learning_rate": 1.9746523047393046e-05, - "loss": 0.2791, - "step": 1960 - }, - { - "epoch": 0.1, - "grad_norm": 0.8717314285910045, - "learning_rate": 1.974615444711741e-05, - "loss": 0.2261, - "step": 1961 - }, - { - "epoch": 0.1, - "grad_norm": 1.1675295248221975, - "learning_rate": 1.974578558247678e-05, - "loss": 0.2474, - "step": 1962 - }, - { - "epoch": 0.1, - "grad_norm": 1.1768908708954267, - "learning_rate": 1.9745416453481168e-05, - "loss": 0.2453, - "step": 1963 - }, - { - "epoch": 0.1, - "grad_norm": 1.0965930944631577, - "learning_rate": 1.974504706014059e-05, - "loss": 0.2422, - "step": 1964 - }, - { - "epoch": 0.1, - "grad_norm": 1.3209827499231572, - "learning_rate": 1.9744677402465053e-05, - "loss": 0.2617, - "step": 1965 - }, - { - "epoch": 0.1, - "grad_norm": 1.0704828420643713, - "learning_rate": 1.9744307480464595e-05, - "loss": 0.24, - "step": 1966 - }, - { - "epoch": 0.1, - "grad_norm": 1.000209014735969, - "learning_rate": 1.9743937294149244e-05, - "loss": 0.2334, - "step": 1967 - }, - { - "epoch": 0.1, - "grad_norm": 1.2831002301749044, - "learning_rate": 1.9743566843529045e-05, - "loss": 0.2512, - "step": 1968 - }, - { - "epoch": 0.1, - "grad_norm": 1.1344015282589346, - "learning_rate": 1.9743196128614045e-05, - "loss": 0.2488, - "step": 1969 - }, - { - "epoch": 0.1, - "grad_norm": 0.9536869918552521, - "learning_rate": 1.97428251494143e-05, - "loss": 0.2471, - "step": 1970 - }, - { - "epoch": 0.1, - "grad_norm": 1.2110892765554457, - "learning_rate": 1.974245390593987e-05, - "loss": 0.2298, - "step": 1971 - }, - { - "epoch": 0.1, - "grad_norm": 1.1996062868964599, - "learning_rate": 1.974208239820083e-05, - "loss": 0.2441, - "step": 1972 - }, - { - "epoch": 0.1, - "grad_norm": 0.8974209218869812, - "learning_rate": 1.9741710626207255e-05, - "loss": 0.2358, - "step": 1973 - }, - { - "epoch": 0.1, - "grad_norm": 1.4554750921379593, - "learning_rate": 1.9741338589969226e-05, - "loss": 0.263, - "step": 1974 - }, - { - "epoch": 0.1, - "grad_norm": 1.0392813150984546, - "learning_rate": 1.9740966289496844e-05, - "loss": 0.2458, - "step": 1975 - }, - { - "epoch": 0.1, - "grad_norm": 0.9963122217447735, - "learning_rate": 1.9740593724800194e-05, - "loss": 0.26, - "step": 1976 - }, - { - "epoch": 0.1, - "grad_norm": 0.9737554516896297, - "learning_rate": 1.9740220895889393e-05, - "loss": 0.2259, - "step": 1977 - }, - { - "epoch": 0.1, - "grad_norm": 0.8923416935134135, - "learning_rate": 1.973984780277455e-05, - "loss": 0.2476, - "step": 1978 - }, - { - "epoch": 0.1, - "grad_norm": 1.172847400701586, - "learning_rate": 1.9739474445465783e-05, - "loss": 0.2455, - "step": 1979 - }, - { - "epoch": 0.1, - "grad_norm": 0.9256808107004492, - "learning_rate": 1.9739100823973226e-05, - "loss": 0.2285, - "step": 1980 - }, - { - "epoch": 0.1, - "grad_norm": 1.5604533353401182, - "learning_rate": 1.9738726938307e-05, - "loss": 0.2567, - "step": 1981 - }, - { - "epoch": 0.1, - "grad_norm": 0.8144426438895492, - "learning_rate": 1.9738352788477268e-05, - "loss": 0.2263, - "step": 1982 - }, - { - "epoch": 0.1, - "grad_norm": 1.0438242662359962, - "learning_rate": 1.9737978374494157e-05, - "loss": 0.2192, - "step": 1983 - }, - { - "epoch": 0.1, - "grad_norm": 1.3678826201890977, - "learning_rate": 1.9737603696367836e-05, - "loss": 0.2379, - "step": 1984 - }, - { - "epoch": 0.1, - "grad_norm": 0.909867140325504, - "learning_rate": 1.9737228754108467e-05, - "loss": 0.2626, - "step": 1985 - }, - { - "epoch": 0.1, - "grad_norm": 1.0062470083627526, - "learning_rate": 1.9736853547726214e-05, - "loss": 0.2115, - "step": 1986 - }, - { - "epoch": 0.1, - "grad_norm": 0.9377825018993832, - "learning_rate": 1.973647807723126e-05, - "loss": 0.255, - "step": 1987 - }, - { - "epoch": 0.1, - "grad_norm": 1.0494091703851158, - "learning_rate": 1.973610234263379e-05, - "loss": 0.2518, - "step": 1988 - }, - { - "epoch": 0.1, - "grad_norm": 1.1987444586881062, - "learning_rate": 1.9735726343943992e-05, - "loss": 0.2384, - "step": 1989 - }, - { - "epoch": 0.1, - "grad_norm": 1.02642903480859, - "learning_rate": 1.973535008117207e-05, - "loss": 0.267, - "step": 1990 - }, - { - "epoch": 0.1, - "grad_norm": 0.9161340787331664, - "learning_rate": 1.9734973554328223e-05, - "loss": 0.2229, - "step": 1991 - }, - { - "epoch": 0.1, - "grad_norm": 1.8948289353381529, - "learning_rate": 1.9734596763422672e-05, - "loss": 0.2656, - "step": 1992 - }, - { - "epoch": 0.1, - "grad_norm": 0.918415437776545, - "learning_rate": 1.973421970846563e-05, - "loss": 0.2379, - "step": 1993 - }, - { - "epoch": 0.1, - "grad_norm": 1.9143249901978743, - "learning_rate": 1.9733842389467334e-05, - "loss": 0.254, - "step": 1994 - }, - { - "epoch": 0.1, - "grad_norm": 0.8228313878161293, - "learning_rate": 1.9733464806438007e-05, - "loss": 0.2356, - "step": 1995 - }, - { - "epoch": 0.1, - "grad_norm": 0.9175875302693642, - "learning_rate": 1.97330869593879e-05, - "loss": 0.2135, - "step": 1996 - }, - { - "epoch": 0.1, - "grad_norm": 1.1814645469727503, - "learning_rate": 1.973270884832726e-05, - "loss": 0.2161, - "step": 1997 - }, - { - "epoch": 0.1, - "grad_norm": 0.9231461656642026, - "learning_rate": 1.9732330473266347e-05, - "loss": 0.2537, - "step": 1998 - }, - { - "epoch": 0.1, - "grad_norm": 0.9796736831409919, - "learning_rate": 1.9731951834215414e-05, - "loss": 0.2183, - "step": 1999 - }, - { - "epoch": 0.1, - "grad_norm": 0.9496265829684779, - "learning_rate": 1.973157293118474e-05, - "loss": 0.2479, - "step": 2000 - }, - { - "epoch": 0.1, - "grad_norm": 0.9231151077653849, - "learning_rate": 1.9731193764184603e-05, - "loss": 0.222, - "step": 2001 - }, - { - "epoch": 0.1, - "grad_norm": 1.0860127244202018, - "learning_rate": 1.9730814333225285e-05, - "loss": 0.2175, - "step": 2002 - }, - { - "epoch": 0.1, - "grad_norm": 1.179552345473745, - "learning_rate": 1.9730434638317076e-05, - "loss": 0.2203, - "step": 2003 - }, - { - "epoch": 0.1, - "grad_norm": 1.1933311940151612, - "learning_rate": 1.9730054679470278e-05, - "loss": 0.2185, - "step": 2004 - }, - { - "epoch": 0.1, - "grad_norm": 0.8347201140855571, - "learning_rate": 1.97296744566952e-05, - "loss": 0.2103, - "step": 2005 - }, - { - "epoch": 0.1, - "grad_norm": 1.4454228072145152, - "learning_rate": 1.9729293970002146e-05, - "loss": 0.2386, - "step": 2006 - }, - { - "epoch": 0.1, - "grad_norm": 1.095418832067566, - "learning_rate": 1.972891321940145e-05, - "loss": 0.236, - "step": 2007 - }, - { - "epoch": 0.1, - "grad_norm": 0.9516989176459635, - "learning_rate": 1.9728532204903433e-05, - "loss": 0.2302, - "step": 2008 - }, - { - "epoch": 0.1, - "grad_norm": 1.0057330914000895, - "learning_rate": 1.972815092651843e-05, - "loss": 0.2372, - "step": 2009 - }, - { - "epoch": 0.1, - "grad_norm": 0.895760637885882, - "learning_rate": 1.9727769384256784e-05, - "loss": 0.2467, - "step": 2010 - }, - { - "epoch": 0.1, - "grad_norm": 0.9447309607983849, - "learning_rate": 1.972738757812884e-05, - "loss": 0.2336, - "step": 2011 - }, - { - "epoch": 0.1, - "grad_norm": 1.8281939870261643, - "learning_rate": 1.972700550814496e-05, - "loss": 0.2278, - "step": 2012 - }, - { - "epoch": 0.1, - "grad_norm": 0.9350882822101689, - "learning_rate": 1.9726623174315513e-05, - "loss": 0.2353, - "step": 2013 - }, - { - "epoch": 0.1, - "grad_norm": 0.9463149137617953, - "learning_rate": 1.9726240576650856e-05, - "loss": 0.2321, - "step": 2014 - }, - { - "epoch": 0.1, - "grad_norm": 0.9948397774746942, - "learning_rate": 1.9725857715161375e-05, - "loss": 0.2411, - "step": 2015 - }, - { - "epoch": 0.1, - "grad_norm": 1.2647259419814574, - "learning_rate": 1.9725474589857456e-05, - "loss": 0.2351, - "step": 2016 - }, - { - "epoch": 0.1, - "grad_norm": 0.9016649755178238, - "learning_rate": 1.972509120074949e-05, - "loss": 0.2386, - "step": 2017 - }, - { - "epoch": 0.1, - "grad_norm": 1.1877608427174775, - "learning_rate": 1.9724707547847873e-05, - "loss": 0.2469, - "step": 2018 - }, - { - "epoch": 0.1, - "grad_norm": 1.7100428965467536, - "learning_rate": 1.9724323631163016e-05, - "loss": 0.2349, - "step": 2019 - }, - { - "epoch": 0.1, - "grad_norm": 1.345463433391392, - "learning_rate": 1.972393945070533e-05, - "loss": 0.2513, - "step": 2020 - }, - { - "epoch": 0.1, - "grad_norm": 0.9091717965875171, - "learning_rate": 1.972355500648524e-05, - "loss": 0.233, - "step": 2021 - }, - { - "epoch": 0.1, - "grad_norm": 1.223819582299652, - "learning_rate": 1.9723170298513166e-05, - "loss": 0.2505, - "step": 2022 - }, - { - "epoch": 0.1, - "grad_norm": 1.2243075830635146, - "learning_rate": 1.9722785326799554e-05, - "loss": 0.2262, - "step": 2023 - }, - { - "epoch": 0.1, - "grad_norm": 1.0350825147530838, - "learning_rate": 1.9722400091354837e-05, - "loss": 0.2277, - "step": 2024 - }, - { - "epoch": 0.1, - "grad_norm": 1.935339865872896, - "learning_rate": 1.9722014592189472e-05, - "loss": 0.22, - "step": 2025 - }, - { - "epoch": 0.1, - "grad_norm": 1.166453007673294, - "learning_rate": 1.972162882931391e-05, - "loss": 0.2548, - "step": 2026 - }, - { - "epoch": 0.1, - "grad_norm": 1.0351107766845962, - "learning_rate": 1.9721242802738615e-05, - "loss": 0.2426, - "step": 2027 - }, - { - "epoch": 0.1, - "grad_norm": 1.084600790162855, - "learning_rate": 1.9720856512474065e-05, - "loss": 0.2137, - "step": 2028 - }, - { - "epoch": 0.1, - "grad_norm": 0.9560170541274214, - "learning_rate": 1.972046995853073e-05, - "loss": 0.2629, - "step": 2029 - }, - { - "epoch": 0.1, - "grad_norm": 1.4103631271603914, - "learning_rate": 1.9720083140919097e-05, - "loss": 0.2184, - "step": 2030 - }, - { - "epoch": 0.1, - "grad_norm": 1.4870866452091123, - "learning_rate": 1.9719696059649665e-05, - "loss": 0.2314, - "step": 2031 - }, - { - "epoch": 0.1, - "grad_norm": 1.0031746692829822, - "learning_rate": 1.9719308714732924e-05, - "loss": 0.2556, - "step": 2032 - }, - { - "epoch": 0.1, - "grad_norm": 1.0804349871082373, - "learning_rate": 1.9718921106179384e-05, - "loss": 0.2393, - "step": 2033 - }, - { - "epoch": 0.1, - "grad_norm": 2.029145379261695, - "learning_rate": 1.9718533233999565e-05, - "loss": 0.2449, - "step": 2034 - }, - { - "epoch": 0.1, - "grad_norm": 1.075889733451575, - "learning_rate": 1.9718145098203977e-05, - "loss": 0.2061, - "step": 2035 - }, - { - "epoch": 0.1, - "grad_norm": 1.5370899898173875, - "learning_rate": 1.971775669880316e-05, - "loss": 0.2207, - "step": 2036 - }, - { - "epoch": 0.1, - "grad_norm": 0.9389617118447757, - "learning_rate": 1.971736803580764e-05, - "loss": 0.2453, - "step": 2037 - }, - { - "epoch": 0.1, - "grad_norm": 0.9431192525993586, - "learning_rate": 1.9716979109227965e-05, - "loss": 0.2128, - "step": 2038 - }, - { - "epoch": 0.1, - "grad_norm": 0.8151755033716819, - "learning_rate": 1.9716589919074682e-05, - "loss": 0.2378, - "step": 2039 - }, - { - "epoch": 0.1, - "grad_norm": 0.9130866798995204, - "learning_rate": 1.9716200465358352e-05, - "loss": 0.2421, - "step": 2040 - }, - { - "epoch": 0.1, - "grad_norm": 1.0000040861444954, - "learning_rate": 1.971581074808953e-05, - "loss": 0.2559, - "step": 2041 - }, - { - "epoch": 0.1, - "grad_norm": 1.0485492882545504, - "learning_rate": 1.9715420767278794e-05, - "loss": 0.2318, - "step": 2042 - }, - { - "epoch": 0.1, - "grad_norm": 1.137323578167587, - "learning_rate": 1.9715030522936724e-05, - "loss": 0.2508, - "step": 2043 - }, - { - "epoch": 0.1, - "grad_norm": 1.2110860738142555, - "learning_rate": 1.9714640015073902e-05, - "loss": 0.234, - "step": 2044 - }, - { - "epoch": 0.1, - "grad_norm": 0.9126273522332989, - "learning_rate": 1.9714249243700916e-05, - "loss": 0.2512, - "step": 2045 - }, - { - "epoch": 0.1, - "grad_norm": 1.105935543821545, - "learning_rate": 1.9713858208828376e-05, - "loss": 0.2222, - "step": 2046 - }, - { - "epoch": 0.1, - "grad_norm": 1.176499937899214, - "learning_rate": 1.971346691046688e-05, - "loss": 0.2465, - "step": 2047 - }, - { - "epoch": 0.1, - "grad_norm": 1.044320635285446, - "learning_rate": 1.971307534862705e-05, - "loss": 0.2773, - "step": 2048 - }, - { - "epoch": 0.1, - "grad_norm": 0.8622995262128048, - "learning_rate": 1.9712683523319498e-05, - "loss": 0.2372, - "step": 2049 - }, - { - "epoch": 0.1, - "grad_norm": 0.9419349078412376, - "learning_rate": 1.9712291434554858e-05, - "loss": 0.2445, - "step": 2050 - }, - { - "epoch": 0.1, - "grad_norm": 1.051102010160456, - "learning_rate": 1.9711899082343763e-05, - "loss": 0.2449, - "step": 2051 - }, - { - "epoch": 0.1, - "grad_norm": 0.9448524300694392, - "learning_rate": 1.971150646669686e-05, - "loss": 0.2452, - "step": 2052 - }, - { - "epoch": 0.1, - "grad_norm": 1.216094189344481, - "learning_rate": 1.9711113587624795e-05, - "loss": 0.2247, - "step": 2053 - }, - { - "epoch": 0.1, - "grad_norm": 1.063800136560177, - "learning_rate": 1.9710720445138225e-05, - "loss": 0.2196, - "step": 2054 - }, - { - "epoch": 0.1, - "grad_norm": 1.6769989193035522, - "learning_rate": 1.9710327039247814e-05, - "loss": 0.2664, - "step": 2055 - }, - { - "epoch": 0.1, - "grad_norm": 1.2891307743333376, - "learning_rate": 1.9709933369964235e-05, - "loss": 0.2428, - "step": 2056 - }, - { - "epoch": 0.1, - "grad_norm": 0.8979671748669272, - "learning_rate": 1.970953943729816e-05, - "loss": 0.2286, - "step": 2057 - }, - { - "epoch": 0.1, - "grad_norm": 1.3718627427597734, - "learning_rate": 1.9709145241260283e-05, - "loss": 0.2354, - "step": 2058 - }, - { - "epoch": 0.1, - "grad_norm": 1.4770658344351117, - "learning_rate": 1.9708750781861294e-05, - "loss": 0.2556, - "step": 2059 - }, - { - "epoch": 0.1, - "grad_norm": 1.5425129152295496, - "learning_rate": 1.970835605911189e-05, - "loss": 0.2405, - "step": 2060 - }, - { - "epoch": 0.1, - "grad_norm": 1.0673330540386734, - "learning_rate": 1.970796107302278e-05, - "loss": 0.2262, - "step": 2061 - }, - { - "epoch": 0.1, - "grad_norm": 1.2216615188163307, - "learning_rate": 1.970756582360468e-05, - "loss": 0.2431, - "step": 2062 - }, - { - "epoch": 0.1, - "grad_norm": 1.370798987347387, - "learning_rate": 1.9707170310868303e-05, - "loss": 0.2266, - "step": 2063 - }, - { - "epoch": 0.1, - "grad_norm": 1.357861573424895, - "learning_rate": 1.9706774534824387e-05, - "loss": 0.2137, - "step": 2064 - }, - { - "epoch": 0.11, - "grad_norm": 0.991296929358314, - "learning_rate": 1.9706378495483664e-05, - "loss": 0.2243, - "step": 2065 - }, - { - "epoch": 0.11, - "grad_norm": 1.1184402264270916, - "learning_rate": 1.9705982192856874e-05, - "loss": 0.2171, - "step": 2066 - }, - { - "epoch": 0.11, - "grad_norm": 1.0273223493964336, - "learning_rate": 1.9705585626954772e-05, - "loss": 0.23, - "step": 2067 - }, - { - "epoch": 0.11, - "grad_norm": 1.0079792072394853, - "learning_rate": 1.9705188797788108e-05, - "loss": 0.2138, - "step": 2068 - }, - { - "epoch": 0.11, - "grad_norm": 0.7765221583486901, - "learning_rate": 1.9704791705367653e-05, - "loss": 0.2036, - "step": 2069 - }, - { - "epoch": 0.11, - "grad_norm": 2.4568975454390984, - "learning_rate": 1.9704394349704174e-05, - "loss": 0.2434, - "step": 2070 - }, - { - "epoch": 0.11, - "grad_norm": 1.221785001763761, - "learning_rate": 1.970399673080845e-05, - "loss": 0.2401, - "step": 2071 - }, - { - "epoch": 0.11, - "grad_norm": 1.0248697227267982, - "learning_rate": 1.970359884869126e-05, - "loss": 0.2156, - "step": 2072 - }, - { - "epoch": 0.11, - "grad_norm": 1.10514229232445, - "learning_rate": 1.9703200703363415e-05, - "loss": 0.2445, - "step": 2073 - }, - { - "epoch": 0.11, - "grad_norm": 1.2233075570824092, - "learning_rate": 1.9702802294835695e-05, - "loss": 0.2647, - "step": 2074 - }, - { - "epoch": 0.11, - "grad_norm": 1.411425527125798, - "learning_rate": 1.9702403623118918e-05, - "loss": 0.2264, - "step": 2075 - }, - { - "epoch": 0.11, - "grad_norm": 1.255491000596999, - "learning_rate": 1.970200468822389e-05, - "loss": 0.2211, - "step": 2076 - }, - { - "epoch": 0.11, - "grad_norm": 1.1531607324724602, - "learning_rate": 1.970160549016144e-05, - "loss": 0.2471, - "step": 2077 - }, - { - "epoch": 0.11, - "grad_norm": 1.1173493303154376, - "learning_rate": 1.9701206028942398e-05, - "loss": 0.2495, - "step": 2078 - }, - { - "epoch": 0.11, - "grad_norm": 1.1526086421139528, - "learning_rate": 1.970080630457759e-05, - "loss": 0.2303, - "step": 2079 - }, - { - "epoch": 0.11, - "grad_norm": 0.9464962515330188, - "learning_rate": 1.970040631707786e-05, - "loss": 0.2603, - "step": 2080 - }, - { - "epoch": 0.11, - "grad_norm": 0.8871094596594084, - "learning_rate": 1.9700006066454066e-05, - "loss": 0.2512, - "step": 2081 - }, - { - "epoch": 0.11, - "grad_norm": 1.0292996944262494, - "learning_rate": 1.9699605552717056e-05, - "loss": 0.2385, - "step": 2082 - }, - { - "epoch": 0.11, - "grad_norm": 1.1987567719919687, - "learning_rate": 1.96992047758777e-05, - "loss": 0.2412, - "step": 2083 - }, - { - "epoch": 0.11, - "grad_norm": 0.8475327484829858, - "learning_rate": 1.9698803735946867e-05, - "loss": 0.2352, - "step": 2084 - }, - { - "epoch": 0.11, - "grad_norm": 0.9053438157379965, - "learning_rate": 1.9698402432935432e-05, - "loss": 0.2431, - "step": 2085 - }, - { - "epoch": 0.11, - "grad_norm": 1.336674943053752, - "learning_rate": 1.9698000866854284e-05, - "loss": 0.2356, - "step": 2086 - }, - { - "epoch": 0.11, - "grad_norm": 1.2977002169496847, - "learning_rate": 1.9697599037714315e-05, - "loss": 0.2374, - "step": 2087 - }, - { - "epoch": 0.11, - "grad_norm": 1.2799090702528029, - "learning_rate": 1.9697196945526427e-05, - "loss": 0.2252, - "step": 2088 - }, - { - "epoch": 0.11, - "grad_norm": 0.9752193191002771, - "learning_rate": 1.969679459030152e-05, - "loss": 0.2678, - "step": 2089 - }, - { - "epoch": 0.11, - "grad_norm": 0.8762395610925323, - "learning_rate": 1.9696391972050516e-05, - "loss": 0.2144, - "step": 2090 - }, - { - "epoch": 0.11, - "grad_norm": 1.1460100146302559, - "learning_rate": 1.969598909078433e-05, - "loss": 0.2508, - "step": 2091 - }, - { - "epoch": 0.11, - "grad_norm": 0.9559822932848991, - "learning_rate": 1.969558594651389e-05, - "loss": 0.2487, - "step": 2092 - }, - { - "epoch": 0.11, - "grad_norm": 0.9839613710855772, - "learning_rate": 1.9695182539250138e-05, - "loss": 0.2392, - "step": 2093 - }, - { - "epoch": 0.11, - "grad_norm": 0.8264913114980849, - "learning_rate": 1.969477886900401e-05, - "loss": 0.2213, - "step": 2094 - }, - { - "epoch": 0.11, - "grad_norm": 1.0003343905023958, - "learning_rate": 1.9694374935786457e-05, - "loss": 0.2235, - "step": 2095 - }, - { - "epoch": 0.11, - "grad_norm": 1.5516384795930434, - "learning_rate": 1.9693970739608437e-05, - "loss": 0.2463, - "step": 2096 - }, - { - "epoch": 0.11, - "grad_norm": 2.0876525839249633, - "learning_rate": 1.9693566280480914e-05, - "loss": 0.226, - "step": 2097 - }, - { - "epoch": 0.11, - "grad_norm": 1.346822529145253, - "learning_rate": 1.9693161558414856e-05, - "loss": 0.2393, - "step": 2098 - }, - { - "epoch": 0.11, - "grad_norm": 1.5812948122941315, - "learning_rate": 1.9692756573421246e-05, - "loss": 0.2685, - "step": 2099 - }, - { - "epoch": 0.11, - "grad_norm": 0.9825131769766344, - "learning_rate": 1.9692351325511066e-05, - "loss": 0.2434, - "step": 2100 - }, - { - "epoch": 0.11, - "grad_norm": 2.517270664330921, - "learning_rate": 1.9691945814695306e-05, - "loss": 0.2442, - "step": 2101 - }, - { - "epoch": 0.11, - "grad_norm": 1.5284733760701465, - "learning_rate": 1.9691540040984972e-05, - "loss": 0.2366, - "step": 2102 - }, - { - "epoch": 0.11, - "grad_norm": 1.7611185683006056, - "learning_rate": 1.9691134004391064e-05, - "loss": 0.2571, - "step": 2103 - }, - { - "epoch": 0.11, - "grad_norm": 1.2502143870501974, - "learning_rate": 1.9690727704924598e-05, - "loss": 0.2439, - "step": 2104 - }, - { - "epoch": 0.11, - "grad_norm": 1.155788006118132, - "learning_rate": 1.9690321142596602e-05, - "loss": 0.2368, - "step": 2105 - }, - { - "epoch": 0.11, - "grad_norm": 1.0104694476849565, - "learning_rate": 1.968991431741809e-05, - "loss": 0.2437, - "step": 2106 - }, - { - "epoch": 0.11, - "grad_norm": 1.3811992101020232, - "learning_rate": 1.968950722940011e-05, - "loss": 0.2269, - "step": 2107 - }, - { - "epoch": 0.11, - "grad_norm": 1.0196354555505611, - "learning_rate": 1.9689099878553698e-05, - "loss": 0.2316, - "step": 2108 - }, - { - "epoch": 0.11, - "grad_norm": 1.701387510634705, - "learning_rate": 1.9688692264889905e-05, - "loss": 0.2166, - "step": 2109 - }, - { - "epoch": 0.11, - "grad_norm": 0.8829947026926955, - "learning_rate": 1.9688284388419784e-05, - "loss": 0.2411, - "step": 2110 - }, - { - "epoch": 0.11, - "grad_norm": 1.1844084066745377, - "learning_rate": 1.9687876249154402e-05, - "loss": 0.2682, - "step": 2111 - }, - { - "epoch": 0.11, - "grad_norm": 1.0692862669633376, - "learning_rate": 1.9687467847104834e-05, - "loss": 0.2247, - "step": 2112 - }, - { - "epoch": 0.11, - "grad_norm": 1.931930597121846, - "learning_rate": 1.9687059182282152e-05, - "loss": 0.2397, - "step": 2113 - }, - { - "epoch": 0.11, - "grad_norm": 0.9933543404165622, - "learning_rate": 1.968665025469744e-05, - "loss": 0.2026, - "step": 2114 - }, - { - "epoch": 0.11, - "grad_norm": 1.2298239063976584, - "learning_rate": 1.9686241064361792e-05, - "loss": 0.2491, - "step": 2115 - }, - { - "epoch": 0.11, - "grad_norm": 1.0308486801387686, - "learning_rate": 1.9685831611286312e-05, - "loss": 0.2167, - "step": 2116 - }, - { - "epoch": 0.11, - "grad_norm": 1.232081656908198, - "learning_rate": 1.96854218954821e-05, - "loss": 0.2442, - "step": 2117 - }, - { - "epoch": 0.11, - "grad_norm": 1.1575287552055014, - "learning_rate": 1.9685011916960276e-05, - "loss": 0.2477, - "step": 2118 - }, - { - "epoch": 0.11, - "grad_norm": 1.1704875982681098, - "learning_rate": 1.9684601675731952e-05, - "loss": 0.2328, - "step": 2119 - }, - { - "epoch": 0.11, - "grad_norm": 1.0464516245169295, - "learning_rate": 1.9684191171808262e-05, - "loss": 0.2445, - "step": 2120 - }, - { - "epoch": 0.11, - "grad_norm": 1.238441161592338, - "learning_rate": 1.968378040520034e-05, - "loss": 0.2454, - "step": 2121 - }, - { - "epoch": 0.11, - "grad_norm": 1.1120056639856326, - "learning_rate": 1.9683369375919325e-05, - "loss": 0.2353, - "step": 2122 - }, - { - "epoch": 0.11, - "grad_norm": 1.4210318912864548, - "learning_rate": 1.9682958083976374e-05, - "loss": 0.2377, - "step": 2123 - }, - { - "epoch": 0.11, - "grad_norm": 0.9675502607652713, - "learning_rate": 1.9682546529382635e-05, - "loss": 0.2382, - "step": 2124 - }, - { - "epoch": 0.11, - "grad_norm": 0.8799019495856499, - "learning_rate": 1.968213471214927e-05, - "loss": 0.2327, - "step": 2125 - }, - { - "epoch": 0.11, - "grad_norm": 1.1715411569228944, - "learning_rate": 1.968172263228746e-05, - "loss": 0.237, - "step": 2126 - }, - { - "epoch": 0.11, - "grad_norm": 0.9671697534047214, - "learning_rate": 1.9681310289808377e-05, - "loss": 0.2416, - "step": 2127 - }, - { - "epoch": 0.11, - "grad_norm": 1.0001367934628327, - "learning_rate": 1.9680897684723205e-05, - "loss": 0.2347, - "step": 2128 - }, - { - "epoch": 0.11, - "grad_norm": 0.8494973958552655, - "learning_rate": 1.9680484817043134e-05, - "loss": 0.2301, - "step": 2129 - }, - { - "epoch": 0.11, - "grad_norm": 0.9240069516003688, - "learning_rate": 1.9680071686779368e-05, - "loss": 0.2379, - "step": 2130 - }, - { - "epoch": 0.11, - "grad_norm": 1.3136456921955122, - "learning_rate": 1.9679658293943112e-05, - "loss": 0.2417, - "step": 2131 - }, - { - "epoch": 0.11, - "grad_norm": 1.0592213424376784, - "learning_rate": 1.9679244638545572e-05, - "loss": 0.2174, - "step": 2132 - }, - { - "epoch": 0.11, - "grad_norm": 0.910805874421009, - "learning_rate": 1.967883072059798e-05, - "loss": 0.237, - "step": 2133 - }, - { - "epoch": 0.11, - "grad_norm": 0.8839617866905263, - "learning_rate": 1.9678416540111557e-05, - "loss": 0.2356, - "step": 2134 - }, - { - "epoch": 0.11, - "grad_norm": 1.2152257087176324, - "learning_rate": 1.9678002097097537e-05, - "loss": 0.2349, - "step": 2135 - }, - { - "epoch": 0.11, - "grad_norm": 1.1044155337772443, - "learning_rate": 1.9677587391567164e-05, - "loss": 0.2365, - "step": 2136 - }, - { - "epoch": 0.11, - "grad_norm": 2.7787980072399576, - "learning_rate": 1.967717242353169e-05, - "loss": 0.2265, - "step": 2137 - }, - { - "epoch": 0.11, - "grad_norm": 1.3616430182996238, - "learning_rate": 1.9676757193002363e-05, - "loss": 0.2746, - "step": 2138 - }, - { - "epoch": 0.11, - "grad_norm": 1.1228586924105723, - "learning_rate": 1.9676341699990452e-05, - "loss": 0.2186, - "step": 2139 - }, - { - "epoch": 0.11, - "grad_norm": 1.2125802259045932, - "learning_rate": 1.9675925944507226e-05, - "loss": 0.2519, - "step": 2140 - }, - { - "epoch": 0.11, - "grad_norm": 0.9444977302475596, - "learning_rate": 1.9675509926563964e-05, - "loss": 0.2566, - "step": 2141 - }, - { - "epoch": 0.11, - "grad_norm": 1.5317024122818927, - "learning_rate": 1.9675093646171947e-05, - "loss": 0.2401, - "step": 2142 - }, - { - "epoch": 0.11, - "grad_norm": 1.7667854821812148, - "learning_rate": 1.967467710334247e-05, - "loss": 0.2389, - "step": 2143 - }, - { - "epoch": 0.11, - "grad_norm": 1.189612959109022, - "learning_rate": 1.9674260298086825e-05, - "loss": 0.2352, - "step": 2144 - }, - { - "epoch": 0.11, - "grad_norm": 0.992641156412239, - "learning_rate": 1.967384323041633e-05, - "loss": 0.225, - "step": 2145 - }, - { - "epoch": 0.11, - "grad_norm": 0.9343197806222634, - "learning_rate": 1.9673425900342286e-05, - "loss": 0.2178, - "step": 2146 - }, - { - "epoch": 0.11, - "grad_norm": 1.1016381303751634, - "learning_rate": 1.9673008307876017e-05, - "loss": 0.2271, - "step": 2147 - }, - { - "epoch": 0.11, - "grad_norm": 0.9690005406542, - "learning_rate": 1.9672590453028855e-05, - "loss": 0.228, - "step": 2148 - }, - { - "epoch": 0.11, - "grad_norm": 1.235254056286498, - "learning_rate": 1.967217233581213e-05, - "loss": 0.2428, - "step": 2149 - }, - { - "epoch": 0.11, - "grad_norm": 1.109454195598941, - "learning_rate": 1.9671753956237187e-05, - "loss": 0.2327, - "step": 2150 - }, - { - "epoch": 0.11, - "grad_norm": 3.5720857648078095, - "learning_rate": 1.9671335314315365e-05, - "loss": 0.2501, - "step": 2151 - }, - { - "epoch": 0.11, - "grad_norm": 1.0473294245674567, - "learning_rate": 1.967091641005803e-05, - "loss": 0.222, - "step": 2152 - }, - { - "epoch": 0.11, - "grad_norm": 1.2244939085245228, - "learning_rate": 1.967049724347654e-05, - "loss": 0.2284, - "step": 2153 - }, - { - "epoch": 0.11, - "grad_norm": 1.3558829537666295, - "learning_rate": 1.967007781458227e-05, - "loss": 0.2353, - "step": 2154 - }, - { - "epoch": 0.11, - "grad_norm": 0.9497548159666054, - "learning_rate": 1.966965812338659e-05, - "loss": 0.2122, - "step": 2155 - }, - { - "epoch": 0.11, - "grad_norm": 0.9943932518361649, - "learning_rate": 1.9669238169900886e-05, - "loss": 0.2103, - "step": 2156 - }, - { - "epoch": 0.11, - "grad_norm": 4.593225948018636, - "learning_rate": 1.966881795413655e-05, - "loss": 0.2542, - "step": 2157 - }, - { - "epoch": 0.11, - "grad_norm": 1.282690775096383, - "learning_rate": 1.9668397476104983e-05, - "loss": 0.2286, - "step": 2158 - }, - { - "epoch": 0.11, - "grad_norm": 1.101149696753262, - "learning_rate": 1.966797673581759e-05, - "loss": 0.2089, - "step": 2159 - }, - { - "epoch": 0.11, - "grad_norm": 2.905210614202573, - "learning_rate": 1.966755573328578e-05, - "loss": 0.23, - "step": 2160 - }, - { - "epoch": 0.11, - "grad_norm": 1.0378262137010232, - "learning_rate": 1.9667134468520974e-05, - "loss": 0.2432, - "step": 2161 - }, - { - "epoch": 0.11, - "grad_norm": 1.0797024746109964, - "learning_rate": 1.96667129415346e-05, - "loss": 0.2282, - "step": 2162 - }, - { - "epoch": 0.11, - "grad_norm": 1.2276995992881126, - "learning_rate": 1.966629115233809e-05, - "loss": 0.211, - "step": 2163 - }, - { - "epoch": 0.11, - "grad_norm": 1.0352266908341288, - "learning_rate": 1.9665869100942888e-05, - "loss": 0.2472, - "step": 2164 - }, - { - "epoch": 0.11, - "grad_norm": 1.417599642847786, - "learning_rate": 1.9665446787360444e-05, - "loss": 0.2273, - "step": 2165 - }, - { - "epoch": 0.11, - "grad_norm": 0.9920449774927103, - "learning_rate": 1.9665024211602208e-05, - "loss": 0.242, - "step": 2166 - }, - { - "epoch": 0.11, - "grad_norm": 1.2296252775505276, - "learning_rate": 1.9664601373679644e-05, - "loss": 0.2491, - "step": 2167 - }, - { - "epoch": 0.11, - "grad_norm": 1.0402566138277567, - "learning_rate": 1.966417827360422e-05, - "loss": 0.2284, - "step": 2168 - }, - { - "epoch": 0.11, - "grad_norm": 1.1053617884173759, - "learning_rate": 1.9663754911387414e-05, - "loss": 0.2385, - "step": 2169 - }, - { - "epoch": 0.11, - "grad_norm": 1.1119095497921, - "learning_rate": 1.9663331287040713e-05, - "loss": 0.2539, - "step": 2170 - }, - { - "epoch": 0.11, - "grad_norm": 1.0983948450880103, - "learning_rate": 1.9662907400575606e-05, - "loss": 0.2137, - "step": 2171 - }, - { - "epoch": 0.11, - "grad_norm": 1.269949230715848, - "learning_rate": 1.9662483252003585e-05, - "loss": 0.2293, - "step": 2172 - }, - { - "epoch": 0.11, - "grad_norm": 1.0564708933309246, - "learning_rate": 1.9662058841336164e-05, - "loss": 0.2217, - "step": 2173 - }, - { - "epoch": 0.11, - "grad_norm": 1.1216048180385836, - "learning_rate": 1.966163416858485e-05, - "loss": 0.2748, - "step": 2174 - }, - { - "epoch": 0.11, - "grad_norm": 6.508310528343341, - "learning_rate": 1.9661209233761167e-05, - "loss": 0.227, - "step": 2175 - }, - { - "epoch": 0.11, - "grad_norm": 1.094002964335264, - "learning_rate": 1.9660784036876636e-05, - "loss": 0.2422, - "step": 2176 - }, - { - "epoch": 0.11, - "grad_norm": 1.2065998099809236, - "learning_rate": 1.9660358577942788e-05, - "loss": 0.2282, - "step": 2177 - }, - { - "epoch": 0.11, - "grad_norm": 1.5059749594559086, - "learning_rate": 1.965993285697117e-05, - "loss": 0.2618, - "step": 2178 - }, - { - "epoch": 0.11, - "grad_norm": 1.0542248630235662, - "learning_rate": 1.965950687397333e-05, - "loss": 0.227, - "step": 2179 - }, - { - "epoch": 0.11, - "grad_norm": 1.1505226952579715, - "learning_rate": 1.965908062896082e-05, - "loss": 0.2321, - "step": 2180 - }, - { - "epoch": 0.11, - "grad_norm": 0.9613747888101222, - "learning_rate": 1.96586541219452e-05, - "loss": 0.2532, - "step": 2181 - }, - { - "epoch": 0.11, - "grad_norm": 1.1209519540120025, - "learning_rate": 1.9658227352938044e-05, - "loss": 0.215, - "step": 2182 - }, - { - "epoch": 0.11, - "grad_norm": 1.1901700934240698, - "learning_rate": 1.9657800321950925e-05, - "loss": 0.233, - "step": 2183 - }, - { - "epoch": 0.11, - "grad_norm": 1.0703733820154298, - "learning_rate": 1.9657373028995427e-05, - "loss": 0.2294, - "step": 2184 - }, - { - "epoch": 0.11, - "grad_norm": 1.0694757472302956, - "learning_rate": 1.965694547408314e-05, - "loss": 0.2351, - "step": 2185 - }, - { - "epoch": 0.11, - "grad_norm": 1.06619999016574, - "learning_rate": 1.9656517657225658e-05, - "loss": 0.2091, - "step": 2186 - }, - { - "epoch": 0.11, - "grad_norm": 1.2948971683345574, - "learning_rate": 1.9656089578434595e-05, - "loss": 0.2643, - "step": 2187 - }, - { - "epoch": 0.11, - "grad_norm": 1.16157269655789, - "learning_rate": 1.9655661237721554e-05, - "loss": 0.2298, - "step": 2188 - }, - { - "epoch": 0.11, - "grad_norm": 1.4539573594884465, - "learning_rate": 1.9655232635098157e-05, - "loss": 0.2289, - "step": 2189 - }, - { - "epoch": 0.11, - "grad_norm": 1.7694359425810158, - "learning_rate": 1.965480377057603e-05, - "loss": 0.1937, - "step": 2190 - }, - { - "epoch": 0.11, - "grad_norm": 1.3928943982351816, - "learning_rate": 1.96543746441668e-05, - "loss": 0.2257, - "step": 2191 - }, - { - "epoch": 0.11, - "grad_norm": 1.2322222758893566, - "learning_rate": 1.965394525588212e-05, - "loss": 0.2518, - "step": 2192 - }, - { - "epoch": 0.11, - "grad_norm": 1.2909786558614489, - "learning_rate": 1.9653515605733625e-05, - "loss": 0.2674, - "step": 2193 - }, - { - "epoch": 0.11, - "grad_norm": 1.610007776399509, - "learning_rate": 1.9653085693732976e-05, - "loss": 0.24, - "step": 2194 - }, - { - "epoch": 0.11, - "grad_norm": 1.224363132147249, - "learning_rate": 1.965265551989183e-05, - "loss": 0.2476, - "step": 2195 - }, - { - "epoch": 0.11, - "grad_norm": 1.105382530444532, - "learning_rate": 1.965222508422186e-05, - "loss": 0.2573, - "step": 2196 - }, - { - "epoch": 0.11, - "grad_norm": 1.146983000583085, - "learning_rate": 1.9651794386734743e-05, - "loss": 0.2428, - "step": 2197 - }, - { - "epoch": 0.11, - "grad_norm": 1.1987218803478044, - "learning_rate": 1.965136342744215e-05, - "loss": 0.2421, - "step": 2198 - }, - { - "epoch": 0.11, - "grad_norm": 1.3590799693199993, - "learning_rate": 1.9650932206355786e-05, - "loss": 0.2656, - "step": 2199 - }, - { - "epoch": 0.11, - "grad_norm": 0.9900346782252395, - "learning_rate": 1.9650500723487335e-05, - "loss": 0.2067, - "step": 2200 - }, - { - "epoch": 0.11, - "grad_norm": 1.077951461086788, - "learning_rate": 1.9650068978848512e-05, - "loss": 0.2341, - "step": 2201 - }, - { - "epoch": 0.11, - "grad_norm": 1.907756752533414, - "learning_rate": 1.964963697245102e-05, - "loss": 0.2265, - "step": 2202 - }, - { - "epoch": 0.11, - "grad_norm": 1.2688186973976037, - "learning_rate": 1.964920470430658e-05, - "loss": 0.2449, - "step": 2203 - }, - { - "epoch": 0.11, - "grad_norm": 1.1829663405600277, - "learning_rate": 1.964877217442692e-05, - "loss": 0.2643, - "step": 2204 - }, - { - "epoch": 0.11, - "grad_norm": 1.8793853900121442, - "learning_rate": 1.964833938282377e-05, - "loss": 0.2251, - "step": 2205 - }, - { - "epoch": 0.11, - "grad_norm": 1.2663396192955865, - "learning_rate": 1.9647906329508866e-05, - "loss": 0.2563, - "step": 2206 - }, - { - "epoch": 0.11, - "grad_norm": 1.086276962256967, - "learning_rate": 1.9647473014493958e-05, - "loss": 0.2063, - "step": 2207 - }, - { - "epoch": 0.11, - "grad_norm": 1.6805919171172707, - "learning_rate": 1.9647039437790802e-05, - "loss": 0.2422, - "step": 2208 - }, - { - "epoch": 0.11, - "grad_norm": 1.3180450409740612, - "learning_rate": 1.9646605599411155e-05, - "loss": 0.2218, - "step": 2209 - }, - { - "epoch": 0.11, - "grad_norm": 1.0908244445799435, - "learning_rate": 1.964617149936679e-05, - "loss": 0.2358, - "step": 2210 - }, - { - "epoch": 0.11, - "grad_norm": 1.3992407176335435, - "learning_rate": 1.9645737137669473e-05, - "loss": 0.2655, - "step": 2211 - }, - { - "epoch": 0.11, - "grad_norm": 1.2630430210609305, - "learning_rate": 1.9645302514330994e-05, - "loss": 0.2403, - "step": 2212 - }, - { - "epoch": 0.11, - "grad_norm": 1.1339372584391034, - "learning_rate": 1.9644867629363137e-05, - "loss": 0.2247, - "step": 2213 - }, - { - "epoch": 0.11, - "grad_norm": 1.1341369372627903, - "learning_rate": 1.9644432482777703e-05, - "loss": 0.2674, - "step": 2214 - }, - { - "epoch": 0.11, - "grad_norm": 1.3731513614446267, - "learning_rate": 1.964399707458649e-05, - "loss": 0.2195, - "step": 2215 - }, - { - "epoch": 0.11, - "grad_norm": 1.3995092356589611, - "learning_rate": 1.9643561404801317e-05, - "loss": 0.2371, - "step": 2216 - }, - { - "epoch": 0.11, - "grad_norm": 1.0985880612220564, - "learning_rate": 1.9643125473433992e-05, - "loss": 0.2136, - "step": 2217 - }, - { - "epoch": 0.11, - "grad_norm": 0.8997296488322348, - "learning_rate": 1.9642689280496347e-05, - "loss": 0.1975, - "step": 2218 - }, - { - "epoch": 0.11, - "grad_norm": 1.7606210335459154, - "learning_rate": 1.9642252826000206e-05, - "loss": 0.2158, - "step": 2219 - }, - { - "epoch": 0.11, - "grad_norm": 0.9434073289543887, - "learning_rate": 1.9641816109957415e-05, - "loss": 0.2276, - "step": 2220 - }, - { - "epoch": 0.11, - "grad_norm": 3.9260696285730123, - "learning_rate": 1.9641379132379822e-05, - "loss": 0.2486, - "step": 2221 - }, - { - "epoch": 0.11, - "grad_norm": 1.058546288904667, - "learning_rate": 1.964094189327927e-05, - "loss": 0.2338, - "step": 2222 - }, - { - "epoch": 0.11, - "grad_norm": 1.394988426693415, - "learning_rate": 1.9640504392667626e-05, - "loss": 0.2476, - "step": 2223 - }, - { - "epoch": 0.11, - "grad_norm": 1.0985601759324406, - "learning_rate": 1.9640066630556756e-05, - "loss": 0.2159, - "step": 2224 - }, - { - "epoch": 0.11, - "grad_norm": 1.2145342696137336, - "learning_rate": 1.9639628606958535e-05, - "loss": 0.2311, - "step": 2225 - }, - { - "epoch": 0.11, - "grad_norm": 1.4288512822935417, - "learning_rate": 1.9639190321884842e-05, - "loss": 0.2285, - "step": 2226 - }, - { - "epoch": 0.11, - "grad_norm": 1.1520142737740142, - "learning_rate": 1.9638751775347568e-05, - "loss": 0.2225, - "step": 2227 - }, - { - "epoch": 0.11, - "grad_norm": 1.0531822751852433, - "learning_rate": 1.963831296735861e-05, - "loss": 0.2425, - "step": 2228 - }, - { - "epoch": 0.11, - "grad_norm": 2.359747733426235, - "learning_rate": 1.9637873897929866e-05, - "loss": 0.2537, - "step": 2229 - }, - { - "epoch": 0.11, - "grad_norm": 1.126644132650638, - "learning_rate": 1.9637434567073246e-05, - "loss": 0.2411, - "step": 2230 - }, - { - "epoch": 0.11, - "grad_norm": 1.5801327871398765, - "learning_rate": 1.9636994974800673e-05, - "loss": 0.2442, - "step": 2231 - }, - { - "epoch": 0.11, - "grad_norm": 1.4688906285868015, - "learning_rate": 1.9636555121124063e-05, - "loss": 0.2321, - "step": 2232 - }, - { - "epoch": 0.11, - "grad_norm": 1.4452794250821794, - "learning_rate": 1.963611500605535e-05, - "loss": 0.2282, - "step": 2233 - }, - { - "epoch": 0.11, - "grad_norm": 1.2114241613636223, - "learning_rate": 1.963567462960648e-05, - "loss": 0.2344, - "step": 2234 - }, - { - "epoch": 0.11, - "grad_norm": 1.213601910804405, - "learning_rate": 1.963523399178939e-05, - "loss": 0.2561, - "step": 2235 - }, - { - "epoch": 0.11, - "grad_norm": 1.1851182812942889, - "learning_rate": 1.963479309261603e-05, - "loss": 0.2364, - "step": 2236 - }, - { - "epoch": 0.11, - "grad_norm": 1.8871984432827353, - "learning_rate": 1.9634351932098364e-05, - "loss": 0.2364, - "step": 2237 - }, - { - "epoch": 0.11, - "grad_norm": 1.2435776107199095, - "learning_rate": 1.9633910510248357e-05, - "loss": 0.2381, - "step": 2238 - }, - { - "epoch": 0.11, - "grad_norm": 1.3475281308085763, - "learning_rate": 1.9633468827077986e-05, - "loss": 0.2418, - "step": 2239 - }, - { - "epoch": 0.11, - "grad_norm": 1.239713466612954, - "learning_rate": 1.9633026882599228e-05, - "loss": 0.2504, - "step": 2240 - }, - { - "epoch": 0.11, - "grad_norm": 1.2328229260264705, - "learning_rate": 1.963258467682407e-05, - "loss": 0.241, - "step": 2241 - }, - { - "epoch": 0.11, - "grad_norm": 1.0341545828230583, - "learning_rate": 1.9632142209764514e-05, - "loss": 0.2174, - "step": 2242 - }, - { - "epoch": 0.11, - "grad_norm": 1.2902668946694982, - "learning_rate": 1.963169948143255e-05, - "loss": 0.2275, - "step": 2243 - }, - { - "epoch": 0.11, - "grad_norm": 1.0121788933691918, - "learning_rate": 1.9631256491840197e-05, - "loss": 0.2229, - "step": 2244 - }, - { - "epoch": 0.11, - "grad_norm": 0.9654980378203284, - "learning_rate": 1.9630813240999468e-05, - "loss": 0.2423, - "step": 2245 - }, - { - "epoch": 0.11, - "grad_norm": 1.3478278298336825, - "learning_rate": 1.963036972892238e-05, - "loss": 0.2736, - "step": 2246 - }, - { - "epoch": 0.11, - "grad_norm": 1.2691420401208928, - "learning_rate": 1.962992595562098e-05, - "loss": 0.2059, - "step": 2247 - }, - { - "epoch": 0.11, - "grad_norm": 1.374703279576238, - "learning_rate": 1.9629481921107287e-05, - "loss": 0.2613, - "step": 2248 - }, - { - "epoch": 0.11, - "grad_norm": 1.0652932569776843, - "learning_rate": 1.9629037625393352e-05, - "loss": 0.2316, - "step": 2249 - }, - { - "epoch": 0.11, - "grad_norm": 1.5137256334450104, - "learning_rate": 1.962859306849123e-05, - "loss": 0.2392, - "step": 2250 - }, - { - "epoch": 0.11, - "grad_norm": 1.107016666508369, - "learning_rate": 1.962814825041298e-05, - "loss": 0.2253, - "step": 2251 - }, - { - "epoch": 0.11, - "grad_norm": 0.9550128240410778, - "learning_rate": 1.962770317117066e-05, - "loss": 0.1973, - "step": 2252 - }, - { - "epoch": 0.11, - "grad_norm": 1.297841502121561, - "learning_rate": 1.9627257830776352e-05, - "loss": 0.2174, - "step": 2253 - }, - { - "epoch": 0.11, - "grad_norm": 1.1093582584200514, - "learning_rate": 1.9626812229242128e-05, - "loss": 0.2306, - "step": 2254 - }, - { - "epoch": 0.11, - "grad_norm": 1.2245798746882979, - "learning_rate": 1.962636636658008e-05, - "loss": 0.2535, - "step": 2255 - }, - { - "epoch": 0.11, - "grad_norm": 1.644044637919812, - "learning_rate": 1.9625920242802302e-05, - "loss": 0.2554, - "step": 2256 - }, - { - "epoch": 0.11, - "grad_norm": 1.1361609839406392, - "learning_rate": 1.962547385792089e-05, - "loss": 0.2492, - "step": 2257 - }, - { - "epoch": 0.11, - "grad_norm": 1.6662903840032885, - "learning_rate": 1.962502721194796e-05, - "loss": 0.224, - "step": 2258 - }, - { - "epoch": 0.11, - "grad_norm": 0.9270518048299475, - "learning_rate": 1.962458030489562e-05, - "loss": 0.2125, - "step": 2259 - }, - { - "epoch": 0.11, - "grad_norm": 0.9755561228878188, - "learning_rate": 1.9624133136775998e-05, - "loss": 0.2293, - "step": 2260 - }, - { - "epoch": 0.11, - "grad_norm": 1.8168211138034966, - "learning_rate": 1.962368570760122e-05, - "loss": 0.2527, - "step": 2261 - }, - { - "epoch": 0.12, - "grad_norm": 6.481777245879265, - "learning_rate": 1.9623238017383426e-05, - "loss": 0.2372, - "step": 2262 - }, - { - "epoch": 0.12, - "grad_norm": 0.9735772662420452, - "learning_rate": 1.9622790066134754e-05, - "loss": 0.2282, - "step": 2263 - }, - { - "epoch": 0.12, - "grad_norm": 1.1837794954838359, - "learning_rate": 1.962234185386736e-05, - "loss": 0.2151, - "step": 2264 - }, - { - "epoch": 0.12, - "grad_norm": 1.0788263704931076, - "learning_rate": 1.9621893380593398e-05, - "loss": 0.2346, - "step": 2265 - }, - { - "epoch": 0.12, - "grad_norm": 1.3895384690518788, - "learning_rate": 1.9621444646325036e-05, - "loss": 0.2514, - "step": 2266 - }, - { - "epoch": 0.12, - "grad_norm": 1.1628169725919841, - "learning_rate": 1.9620995651074443e-05, - "loss": 0.2242, - "step": 2267 - }, - { - "epoch": 0.12, - "grad_norm": 1.168404660899102, - "learning_rate": 1.9620546394853802e-05, - "loss": 0.2227, - "step": 2268 - }, - { - "epoch": 0.12, - "grad_norm": 1.1351391787739238, - "learning_rate": 1.9620096877675294e-05, - "loss": 0.2563, - "step": 2269 - }, - { - "epoch": 0.12, - "grad_norm": 1.0035468703101598, - "learning_rate": 1.9619647099551118e-05, - "loss": 0.2242, - "step": 2270 - }, - { - "epoch": 0.12, - "grad_norm": 1.0863642017242183, - "learning_rate": 1.9619197060493465e-05, - "loss": 0.2245, - "step": 2271 - }, - { - "epoch": 0.12, - "grad_norm": 1.3521259808783213, - "learning_rate": 1.9618746760514554e-05, - "loss": 0.2247, - "step": 2272 - }, - { - "epoch": 0.12, - "grad_norm": 1.1789057952904218, - "learning_rate": 1.9618296199626594e-05, - "loss": 0.2412, - "step": 2273 - }, - { - "epoch": 0.12, - "grad_norm": 1.1695044838914121, - "learning_rate": 1.9617845377841804e-05, - "loss": 0.2295, - "step": 2274 - }, - { - "epoch": 0.12, - "grad_norm": 0.911670320372682, - "learning_rate": 1.9617394295172415e-05, - "loss": 0.2248, - "step": 2275 - }, - { - "epoch": 0.12, - "grad_norm": 1.4974238095285362, - "learning_rate": 1.9616942951630668e-05, - "loss": 0.2576, - "step": 2276 - }, - { - "epoch": 0.12, - "grad_norm": 1.065884469776496, - "learning_rate": 1.9616491347228793e-05, - "loss": 0.2129, - "step": 2277 - }, - { - "epoch": 0.12, - "grad_norm": 1.0689950684727134, - "learning_rate": 1.961603948197905e-05, - "loss": 0.2123, - "step": 2278 - }, - { - "epoch": 0.12, - "grad_norm": 1.122557763242407, - "learning_rate": 1.9615587355893693e-05, - "loss": 0.2124, - "step": 2279 - }, - { - "epoch": 0.12, - "grad_norm": 1.6997662112278131, - "learning_rate": 1.9615134968984984e-05, - "loss": 0.2229, - "step": 2280 - }, - { - "epoch": 0.12, - "grad_norm": 1.427459018774372, - "learning_rate": 1.96146823212652e-05, - "loss": 0.2395, - "step": 2281 - }, - { - "epoch": 0.12, - "grad_norm": 1.174838610488931, - "learning_rate": 1.961422941274661e-05, - "loss": 0.2395, - "step": 2282 - }, - { - "epoch": 0.12, - "grad_norm": 1.0444712650544752, - "learning_rate": 1.9613776243441507e-05, - "loss": 0.2293, - "step": 2283 - }, - { - "epoch": 0.12, - "grad_norm": 0.9928779696890382, - "learning_rate": 1.9613322813362182e-05, - "loss": 0.2452, - "step": 2284 - }, - { - "epoch": 0.12, - "grad_norm": 0.9898668843506018, - "learning_rate": 1.961286912252093e-05, - "loss": 0.236, - "step": 2285 - }, - { - "epoch": 0.12, - "grad_norm": 1.0320698498409984, - "learning_rate": 1.961241517093006e-05, - "loss": 0.2426, - "step": 2286 - }, - { - "epoch": 0.12, - "grad_norm": 0.9456851193220972, - "learning_rate": 1.9611960958601886e-05, - "loss": 0.2317, - "step": 2287 - }, - { - "epoch": 0.12, - "grad_norm": 1.0427893908436772, - "learning_rate": 1.9611506485548728e-05, - "loss": 0.2243, - "step": 2288 - }, - { - "epoch": 0.12, - "grad_norm": 0.990790934611057, - "learning_rate": 1.9611051751782915e-05, - "loss": 0.2276, - "step": 2289 - }, - { - "epoch": 0.12, - "grad_norm": 1.4299898481089397, - "learning_rate": 1.961059675731678e-05, - "loss": 0.2421, - "step": 2290 - }, - { - "epoch": 0.12, - "grad_norm": 1.0567116203220435, - "learning_rate": 1.9610141502162662e-05, - "loss": 0.2549, - "step": 2291 - }, - { - "epoch": 0.12, - "grad_norm": 0.9813611728893015, - "learning_rate": 1.9609685986332918e-05, - "loss": 0.2272, - "step": 2292 - }, - { - "epoch": 0.12, - "grad_norm": 1.2665166548155724, - "learning_rate": 1.9609230209839894e-05, - "loss": 0.2406, - "step": 2293 - }, - { - "epoch": 0.12, - "grad_norm": 1.3882608563003989, - "learning_rate": 1.9608774172695964e-05, - "loss": 0.2168, - "step": 2294 - }, - { - "epoch": 0.12, - "grad_norm": 1.2389235543966157, - "learning_rate": 1.9608317874913484e-05, - "loss": 0.2293, - "step": 2295 - }, - { - "epoch": 0.12, - "grad_norm": 0.867329992139809, - "learning_rate": 1.9607861316504848e-05, - "loss": 0.2149, - "step": 2296 - }, - { - "epoch": 0.12, - "grad_norm": 0.9020334001335076, - "learning_rate": 1.9607404497482422e-05, - "loss": 0.2277, - "step": 2297 - }, - { - "epoch": 0.12, - "grad_norm": 0.845846595246084, - "learning_rate": 1.9606947417858614e-05, - "loss": 0.251, - "step": 2298 - }, - { - "epoch": 0.12, - "grad_norm": 1.1287125708824222, - "learning_rate": 1.960649007764581e-05, - "loss": 0.2455, - "step": 2299 - }, - { - "epoch": 0.12, - "grad_norm": 0.9295412239995064, - "learning_rate": 1.960603247685642e-05, - "loss": 0.2402, - "step": 2300 - }, - { - "epoch": 0.12, - "grad_norm": 0.9073561280730207, - "learning_rate": 1.9605574615502857e-05, - "loss": 0.2625, - "step": 2301 - }, - { - "epoch": 0.12, - "grad_norm": 1.561672595584736, - "learning_rate": 1.9605116493597544e-05, - "loss": 0.2539, - "step": 2302 - }, - { - "epoch": 0.12, - "grad_norm": 0.9185494380688095, - "learning_rate": 1.96046581111529e-05, - "loss": 0.2491, - "step": 2303 - }, - { - "epoch": 0.12, - "grad_norm": 0.9986931205874869, - "learning_rate": 1.9604199468181363e-05, - "loss": 0.2366, - "step": 2304 - }, - { - "epoch": 0.12, - "grad_norm": 0.9453153360183175, - "learning_rate": 1.960374056469537e-05, - "loss": 0.2174, - "step": 2305 - }, - { - "epoch": 0.12, - "grad_norm": 2.427772384150711, - "learning_rate": 1.9603281400707378e-05, - "loss": 0.2388, - "step": 2306 - }, - { - "epoch": 0.12, - "grad_norm": 1.0900731948657933, - "learning_rate": 1.9602821976229835e-05, - "loss": 0.23, - "step": 2307 - }, - { - "epoch": 0.12, - "grad_norm": 0.9054697290691388, - "learning_rate": 1.96023622912752e-05, - "loss": 0.2297, - "step": 2308 - }, - { - "epoch": 0.12, - "grad_norm": 0.9245203886194943, - "learning_rate": 1.9601902345855944e-05, - "loss": 0.2433, - "step": 2309 - }, - { - "epoch": 0.12, - "grad_norm": 1.0519878918731516, - "learning_rate": 1.9601442139984548e-05, - "loss": 0.2548, - "step": 2310 - }, - { - "epoch": 0.12, - "grad_norm": 0.8339937473761487, - "learning_rate": 1.9600981673673488e-05, - "loss": 0.2238, - "step": 2311 - }, - { - "epoch": 0.12, - "grad_norm": 1.178375488144858, - "learning_rate": 1.9600520946935263e-05, - "loss": 0.221, - "step": 2312 - }, - { - "epoch": 0.12, - "grad_norm": 0.8533874274139382, - "learning_rate": 1.9600059959782364e-05, - "loss": 0.2291, - "step": 2313 - }, - { - "epoch": 0.12, - "grad_norm": 0.9109953738205394, - "learning_rate": 1.9599598712227294e-05, - "loss": 0.2276, - "step": 2314 - }, - { - "epoch": 0.12, - "grad_norm": 0.8551206349574578, - "learning_rate": 1.9599137204282566e-05, - "loss": 0.2404, - "step": 2315 - }, - { - "epoch": 0.12, - "grad_norm": 0.9342971710035325, - "learning_rate": 1.95986754359607e-05, - "loss": 0.2176, - "step": 2316 - }, - { - "epoch": 0.12, - "grad_norm": 1.0563711413685022, - "learning_rate": 1.959821340727422e-05, - "loss": 0.2443, - "step": 2317 - }, - { - "epoch": 0.12, - "grad_norm": 0.9026438168465336, - "learning_rate": 1.9597751118235662e-05, - "loss": 0.2257, - "step": 2318 - }, - { - "epoch": 0.12, - "grad_norm": 1.0318289234723361, - "learning_rate": 1.9597288568857563e-05, - "loss": 0.219, - "step": 2319 - }, - { - "epoch": 0.12, - "grad_norm": 0.8967950538630484, - "learning_rate": 1.9596825759152466e-05, - "loss": 0.2617, - "step": 2320 - }, - { - "epoch": 0.12, - "grad_norm": 0.9702418393564891, - "learning_rate": 1.959636268913293e-05, - "loss": 0.2492, - "step": 2321 - }, - { - "epoch": 0.12, - "grad_norm": 1.306990740051603, - "learning_rate": 1.9595899358811515e-05, - "loss": 0.2268, - "step": 2322 - }, - { - "epoch": 0.12, - "grad_norm": 0.8623225432994753, - "learning_rate": 1.9595435768200785e-05, - "loss": 0.2351, - "step": 2323 - }, - { - "epoch": 0.12, - "grad_norm": 0.8927352070901556, - "learning_rate": 1.9594971917313323e-05, - "loss": 0.2814, - "step": 2324 - }, - { - "epoch": 0.12, - "grad_norm": 1.1475126857496665, - "learning_rate": 1.9594507806161703e-05, - "loss": 0.2089, - "step": 2325 - }, - { - "epoch": 0.12, - "grad_norm": 0.9149296536113013, - "learning_rate": 1.9594043434758515e-05, - "loss": 0.2321, - "step": 2326 - }, - { - "epoch": 0.12, - "grad_norm": 1.198714270590255, - "learning_rate": 1.959357880311636e-05, - "loss": 0.2626, - "step": 2327 - }, - { - "epoch": 0.12, - "grad_norm": 1.0938019109764625, - "learning_rate": 1.9593113911247836e-05, - "loss": 0.2646, - "step": 2328 - }, - { - "epoch": 0.12, - "grad_norm": 1.4410644951371105, - "learning_rate": 1.9592648759165555e-05, - "loss": 0.2555, - "step": 2329 - }, - { - "epoch": 0.12, - "grad_norm": 1.1039407282923293, - "learning_rate": 1.9592183346882135e-05, - "loss": 0.2521, - "step": 2330 - }, - { - "epoch": 0.12, - "grad_norm": 1.6247005333228324, - "learning_rate": 1.95917176744102e-05, - "loss": 0.2366, - "step": 2331 - }, - { - "epoch": 0.12, - "grad_norm": 1.4229636128215575, - "learning_rate": 1.9591251741762384e-05, - "loss": 0.2175, - "step": 2332 - }, - { - "epoch": 0.12, - "grad_norm": 0.9608883015159911, - "learning_rate": 1.959078554895132e-05, - "loss": 0.2267, - "step": 2333 - }, - { - "epoch": 0.12, - "grad_norm": 0.8501715708850301, - "learning_rate": 1.959031909598966e-05, - "loss": 0.2308, - "step": 2334 - }, - { - "epoch": 0.12, - "grad_norm": 0.8588300419178304, - "learning_rate": 1.958985238289005e-05, - "loss": 0.218, - "step": 2335 - }, - { - "epoch": 0.12, - "grad_norm": 1.1299837382980906, - "learning_rate": 1.9589385409665152e-05, - "loss": 0.2247, - "step": 2336 - }, - { - "epoch": 0.12, - "grad_norm": 0.8643912599959122, - "learning_rate": 1.9588918176327632e-05, - "loss": 0.2627, - "step": 2337 - }, - { - "epoch": 0.12, - "grad_norm": 1.0214914522161034, - "learning_rate": 1.9588450682890167e-05, - "loss": 0.2192, - "step": 2338 - }, - { - "epoch": 0.12, - "grad_norm": 0.8877732443442983, - "learning_rate": 1.9587982929365434e-05, - "loss": 0.2435, - "step": 2339 - }, - { - "epoch": 0.12, - "grad_norm": 1.2859468661713847, - "learning_rate": 1.9587514915766124e-05, - "loss": 0.2073, - "step": 2340 - }, - { - "epoch": 0.12, - "grad_norm": 1.0567286604363095, - "learning_rate": 1.958704664210493e-05, - "loss": 0.2664, - "step": 2341 - }, - { - "epoch": 0.12, - "grad_norm": 0.9992382427909487, - "learning_rate": 1.9586578108394555e-05, - "loss": 0.2252, - "step": 2342 - }, - { - "epoch": 0.12, - "grad_norm": 0.9196579061142152, - "learning_rate": 1.9586109314647705e-05, - "loss": 0.228, - "step": 2343 - }, - { - "epoch": 0.12, - "grad_norm": 1.166095192504243, - "learning_rate": 1.9585640260877102e-05, - "loss": 0.2525, - "step": 2344 - }, - { - "epoch": 0.12, - "grad_norm": 0.8412404902739583, - "learning_rate": 1.958517094709546e-05, - "loss": 0.2287, - "step": 2345 - }, - { - "epoch": 0.12, - "grad_norm": 0.919152691293656, - "learning_rate": 1.9584701373315523e-05, - "loss": 0.2087, - "step": 2346 - }, - { - "epoch": 0.12, - "grad_norm": 1.3970527229544, - "learning_rate": 1.9584231539550012e-05, - "loss": 0.2386, - "step": 2347 - }, - { - "epoch": 0.12, - "grad_norm": 0.8814552434609396, - "learning_rate": 1.9583761445811686e-05, - "loss": 0.204, - "step": 2348 - }, - { - "epoch": 0.12, - "grad_norm": 1.1366633521260399, - "learning_rate": 1.9583291092113283e-05, - "loss": 0.2125, - "step": 2349 - }, - { - "epoch": 0.12, - "grad_norm": 1.5839696990479637, - "learning_rate": 1.958282047846757e-05, - "loss": 0.2572, - "step": 2350 - }, - { - "epoch": 0.12, - "grad_norm": 0.989785096957875, - "learning_rate": 1.9582349604887313e-05, - "loss": 0.2419, - "step": 2351 - }, - { - "epoch": 0.12, - "grad_norm": 1.551925396859981, - "learning_rate": 1.958187847138528e-05, - "loss": 0.2099, - "step": 2352 - }, - { - "epoch": 0.12, - "grad_norm": 1.0599349357850392, - "learning_rate": 1.958140707797425e-05, - "loss": 0.2289, - "step": 2353 - }, - { - "epoch": 0.12, - "grad_norm": 0.9251893046227032, - "learning_rate": 1.9580935424667015e-05, - "loss": 0.2462, - "step": 2354 - }, - { - "epoch": 0.12, - "grad_norm": 1.0352904888157413, - "learning_rate": 1.9580463511476365e-05, - "loss": 0.2442, - "step": 2355 - }, - { - "epoch": 0.12, - "grad_norm": 0.8960283950787143, - "learning_rate": 1.95799913384151e-05, - "loss": 0.2167, - "step": 2356 - }, - { - "epoch": 0.12, - "grad_norm": 1.0395096607347472, - "learning_rate": 1.9579518905496032e-05, - "loss": 0.2407, - "step": 2357 - }, - { - "epoch": 0.12, - "grad_norm": 0.9843402187850503, - "learning_rate": 1.9579046212731968e-05, - "loss": 0.2396, - "step": 2358 - }, - { - "epoch": 0.12, - "grad_norm": 2.112271692780199, - "learning_rate": 1.957857326013574e-05, - "loss": 0.2292, - "step": 2359 - }, - { - "epoch": 0.12, - "grad_norm": 1.221454329725577, - "learning_rate": 1.9578100047720164e-05, - "loss": 0.2175, - "step": 2360 - }, - { - "epoch": 0.12, - "grad_norm": 0.9484425312273643, - "learning_rate": 1.957762657549809e-05, - "loss": 0.2285, - "step": 2361 - }, - { - "epoch": 0.12, - "grad_norm": 0.9496247993199408, - "learning_rate": 1.957715284348235e-05, - "loss": 0.2407, - "step": 2362 - }, - { - "epoch": 0.12, - "grad_norm": 1.0254007460980028, - "learning_rate": 1.95766788516858e-05, - "loss": 0.2231, - "step": 2363 - }, - { - "epoch": 0.12, - "grad_norm": 0.896010356490778, - "learning_rate": 1.9576204600121293e-05, - "loss": 0.2314, - "step": 2364 - }, - { - "epoch": 0.12, - "grad_norm": 0.9778267316564238, - "learning_rate": 1.9575730088801696e-05, - "loss": 0.2544, - "step": 2365 - }, - { - "epoch": 0.12, - "grad_norm": 1.0418675619291378, - "learning_rate": 1.957525531773988e-05, - "loss": 0.2347, - "step": 2366 - }, - { - "epoch": 0.12, - "grad_norm": 1.34539811818473, - "learning_rate": 1.9574780286948724e-05, - "loss": 0.2461, - "step": 2367 - }, - { - "epoch": 0.12, - "grad_norm": 1.2117159081752875, - "learning_rate": 1.957430499644111e-05, - "loss": 0.2365, - "step": 2368 - }, - { - "epoch": 0.12, - "grad_norm": 0.9916311588570211, - "learning_rate": 1.9573829446229935e-05, - "loss": 0.2423, - "step": 2369 - }, - { - "epoch": 0.12, - "grad_norm": 0.8642744609344306, - "learning_rate": 1.9573353636328094e-05, - "loss": 0.1956, - "step": 2370 - }, - { - "epoch": 0.12, - "grad_norm": 1.2779774017391479, - "learning_rate": 1.9572877566748495e-05, - "loss": 0.2349, - "step": 2371 - }, - { - "epoch": 0.12, - "grad_norm": 0.9530889379498755, - "learning_rate": 1.957240123750405e-05, - "loss": 0.2795, - "step": 2372 - }, - { - "epoch": 0.12, - "grad_norm": 2.63061203732391, - "learning_rate": 1.9571924648607684e-05, - "loss": 0.2606, - "step": 2373 - }, - { - "epoch": 0.12, - "grad_norm": 1.206120338499393, - "learning_rate": 1.9571447800072318e-05, - "loss": 0.2514, - "step": 2374 - }, - { - "epoch": 0.12, - "grad_norm": 0.8251362601351317, - "learning_rate": 1.957097069191089e-05, - "loss": 0.2104, - "step": 2375 - }, - { - "epoch": 0.12, - "grad_norm": 1.0358611536294893, - "learning_rate": 1.9570493324136344e-05, - "loss": 0.2304, - "step": 2376 - }, - { - "epoch": 0.12, - "grad_norm": 0.9166959393585152, - "learning_rate": 1.9570015696761623e-05, - "loss": 0.2572, - "step": 2377 - }, - { - "epoch": 0.12, - "grad_norm": 1.0968226275193573, - "learning_rate": 1.9569537809799687e-05, - "loss": 0.2106, - "step": 2378 - }, - { - "epoch": 0.12, - "grad_norm": 1.284618905986818, - "learning_rate": 1.9569059663263498e-05, - "loss": 0.218, - "step": 2379 - }, - { - "epoch": 0.12, - "grad_norm": 1.07367940810242, - "learning_rate": 1.9568581257166025e-05, - "loss": 0.2238, - "step": 2380 - }, - { - "epoch": 0.12, - "grad_norm": 0.9241633450880841, - "learning_rate": 1.9568102591520246e-05, - "loss": 0.2443, - "step": 2381 - }, - { - "epoch": 0.12, - "grad_norm": 1.8693666074144997, - "learning_rate": 1.956762366633914e-05, - "loss": 0.2408, - "step": 2382 - }, - { - "epoch": 0.12, - "grad_norm": 1.152812884267505, - "learning_rate": 1.956714448163571e-05, - "loss": 0.2606, - "step": 2383 - }, - { - "epoch": 0.12, - "grad_norm": 1.0120887204947118, - "learning_rate": 1.9566665037422937e-05, - "loss": 0.2273, - "step": 2384 - }, - { - "epoch": 0.12, - "grad_norm": 0.8850621950766456, - "learning_rate": 1.9566185333713835e-05, - "loss": 0.2189, - "step": 2385 - }, - { - "epoch": 0.12, - "grad_norm": 1.0453797190650638, - "learning_rate": 1.956570537052142e-05, - "loss": 0.2307, - "step": 2386 - }, - { - "epoch": 0.12, - "grad_norm": 0.966999828439038, - "learning_rate": 1.9565225147858704e-05, - "loss": 0.2481, - "step": 2387 - }, - { - "epoch": 0.12, - "grad_norm": 1.795290828022784, - "learning_rate": 1.9564744665738714e-05, - "loss": 0.2282, - "step": 2388 - }, - { - "epoch": 0.12, - "grad_norm": 0.9236235449239979, - "learning_rate": 1.9564263924174488e-05, - "loss": 0.2359, - "step": 2389 - }, - { - "epoch": 0.12, - "grad_norm": 0.987110655783361, - "learning_rate": 1.9563782923179063e-05, - "loss": 0.2457, - "step": 2390 - }, - { - "epoch": 0.12, - "grad_norm": 0.9355041913405805, - "learning_rate": 1.9563301662765482e-05, - "loss": 0.225, - "step": 2391 - }, - { - "epoch": 0.12, - "grad_norm": 0.8777824597160528, - "learning_rate": 1.9562820142946808e-05, - "loss": 0.2172, - "step": 2392 - }, - { - "epoch": 0.12, - "grad_norm": 0.9367037353854338, - "learning_rate": 1.9562338363736095e-05, - "loss": 0.2321, - "step": 2393 - }, - { - "epoch": 0.12, - "grad_norm": 1.6404557075320965, - "learning_rate": 1.9561856325146414e-05, - "loss": 0.2112, - "step": 2394 - }, - { - "epoch": 0.12, - "grad_norm": 1.0918037055500003, - "learning_rate": 1.956137402719084e-05, - "loss": 0.226, - "step": 2395 - }, - { - "epoch": 0.12, - "grad_norm": 1.0405002814531292, - "learning_rate": 1.9560891469882457e-05, - "loss": 0.2411, - "step": 2396 - }, - { - "epoch": 0.12, - "grad_norm": 1.0119293437402799, - "learning_rate": 1.9560408653234352e-05, - "loss": 0.2338, - "step": 2397 - }, - { - "epoch": 0.12, - "grad_norm": 1.0379855418069062, - "learning_rate": 1.9559925577259622e-05, - "loss": 0.2481, - "step": 2398 - }, - { - "epoch": 0.12, - "grad_norm": 1.1508134685563278, - "learning_rate": 1.9559442241971373e-05, - "loss": 0.2326, - "step": 2399 - }, - { - "epoch": 0.12, - "grad_norm": 0.8364922526982728, - "learning_rate": 1.955895864738271e-05, - "loss": 0.2106, - "step": 2400 - }, - { - "epoch": 0.12, - "grad_norm": 1.0375308864937787, - "learning_rate": 1.955847479350675e-05, - "loss": 0.2316, - "step": 2401 - }, - { - "epoch": 0.12, - "grad_norm": 1.0535751080622453, - "learning_rate": 1.955799068035663e-05, - "loss": 0.2389, - "step": 2402 - }, - { - "epoch": 0.12, - "grad_norm": 1.928196115611736, - "learning_rate": 1.955750630794547e-05, - "loss": 0.231, - "step": 2403 - }, - { - "epoch": 0.12, - "grad_norm": 0.9919358376481566, - "learning_rate": 1.955702167628641e-05, - "loss": 0.2104, - "step": 2404 - }, - { - "epoch": 0.12, - "grad_norm": 1.0893302608613895, - "learning_rate": 1.9556536785392598e-05, - "loss": 0.2239, - "step": 2405 - }, - { - "epoch": 0.12, - "grad_norm": 1.5843172922704782, - "learning_rate": 1.9556051635277184e-05, - "loss": 0.2484, - "step": 2406 - }, - { - "epoch": 0.12, - "grad_norm": 1.188746173343023, - "learning_rate": 1.9555566225953333e-05, - "loss": 0.2239, - "step": 2407 - }, - { - "epoch": 0.12, - "grad_norm": 0.8501232211498138, - "learning_rate": 1.9555080557434206e-05, - "loss": 0.2239, - "step": 2408 - }, - { - "epoch": 0.12, - "grad_norm": 1.6514133365172499, - "learning_rate": 1.955459462973298e-05, - "loss": 0.2575, - "step": 2409 - }, - { - "epoch": 0.12, - "grad_norm": 0.8867170258575549, - "learning_rate": 1.9554108442862836e-05, - "loss": 0.2368, - "step": 2410 - }, - { - "epoch": 0.12, - "grad_norm": 1.1563166041868196, - "learning_rate": 1.955362199683696e-05, - "loss": 0.2319, - "step": 2411 - }, - { - "epoch": 0.12, - "grad_norm": 1.0302301616491012, - "learning_rate": 1.9553135291668548e-05, - "loss": 0.2467, - "step": 2412 - }, - { - "epoch": 0.12, - "grad_norm": 1.1775203268994718, - "learning_rate": 1.95526483273708e-05, - "loss": 0.248, - "step": 2413 - }, - { - "epoch": 0.12, - "grad_norm": 1.2472244643540031, - "learning_rate": 1.9552161103956932e-05, - "loss": 0.2336, - "step": 2414 - }, - { - "epoch": 0.12, - "grad_norm": 0.9942684568484321, - "learning_rate": 1.955167362144015e-05, - "loss": 0.2312, - "step": 2415 - }, - { - "epoch": 0.12, - "grad_norm": 1.2827573628017537, - "learning_rate": 1.955118587983368e-05, - "loss": 0.2337, - "step": 2416 - }, - { - "epoch": 0.12, - "grad_norm": 1.0779477429174507, - "learning_rate": 1.9550697879150757e-05, - "loss": 0.2528, - "step": 2417 - }, - { - "epoch": 0.12, - "grad_norm": 0.9287186494476223, - "learning_rate": 1.9550209619404616e-05, - "loss": 0.2146, - "step": 2418 - }, - { - "epoch": 0.12, - "grad_norm": 0.9780419354412737, - "learning_rate": 1.9549721100608494e-05, - "loss": 0.2442, - "step": 2419 - }, - { - "epoch": 0.12, - "grad_norm": 1.0299340666660326, - "learning_rate": 1.954923232277565e-05, - "loss": 0.2229, - "step": 2420 - }, - { - "epoch": 0.12, - "grad_norm": 0.9339979389685626, - "learning_rate": 1.954874328591934e-05, - "loss": 0.2441, - "step": 2421 - }, - { - "epoch": 0.12, - "grad_norm": 0.9663170606947568, - "learning_rate": 1.9548253990052833e-05, - "loss": 0.2303, - "step": 2422 - }, - { - "epoch": 0.12, - "grad_norm": 1.0915466600904569, - "learning_rate": 1.9547764435189395e-05, - "loss": 0.2477, - "step": 2423 - }, - { - "epoch": 0.12, - "grad_norm": 2.575099114186478, - "learning_rate": 1.9547274621342303e-05, - "loss": 0.2472, - "step": 2424 - }, - { - "epoch": 0.12, - "grad_norm": 0.9211825722405153, - "learning_rate": 1.9546784548524852e-05, - "loss": 0.2182, - "step": 2425 - }, - { - "epoch": 0.12, - "grad_norm": 1.6693874263644388, - "learning_rate": 1.954629421675033e-05, - "loss": 0.2339, - "step": 2426 - }, - { - "epoch": 0.12, - "grad_norm": 1.560856670186122, - "learning_rate": 1.954580362603204e-05, - "loss": 0.244, - "step": 2427 - }, - { - "epoch": 0.12, - "grad_norm": 1.3346896255835428, - "learning_rate": 1.954531277638328e-05, - "loss": 0.2254, - "step": 2428 - }, - { - "epoch": 0.12, - "grad_norm": 0.9549915898367675, - "learning_rate": 1.954482166781738e-05, - "loss": 0.2545, - "step": 2429 - }, - { - "epoch": 0.12, - "grad_norm": 1.1193417924601143, - "learning_rate": 1.9544330300347655e-05, - "loss": 0.2506, - "step": 2430 - }, - { - "epoch": 0.12, - "grad_norm": 1.1071622856945116, - "learning_rate": 1.9543838673987424e-05, - "loss": 0.2501, - "step": 2431 - }, - { - "epoch": 0.12, - "grad_norm": 1.0377912267602951, - "learning_rate": 1.9543346788750032e-05, - "loss": 0.2332, - "step": 2432 - }, - { - "epoch": 0.12, - "grad_norm": 0.880835359230946, - "learning_rate": 1.9542854644648824e-05, - "loss": 0.2402, - "step": 2433 - }, - { - "epoch": 0.12, - "grad_norm": 0.934264549762433, - "learning_rate": 1.954236224169714e-05, - "loss": 0.2234, - "step": 2434 - }, - { - "epoch": 0.12, - "grad_norm": 0.946717586900411, - "learning_rate": 1.9541869579908343e-05, - "loss": 0.2546, - "step": 2435 - }, - { - "epoch": 0.12, - "grad_norm": 2.0749355807516685, - "learning_rate": 1.9541376659295796e-05, - "loss": 0.2423, - "step": 2436 - }, - { - "epoch": 0.12, - "grad_norm": 1.0938551679100832, - "learning_rate": 1.9540883479872863e-05, - "loss": 0.2355, - "step": 2437 - }, - { - "epoch": 0.12, - "grad_norm": 0.9996299987531758, - "learning_rate": 1.954039004165293e-05, - "loss": 0.2184, - "step": 2438 - }, - { - "epoch": 0.12, - "grad_norm": 0.9625907051201483, - "learning_rate": 1.953989634464938e-05, - "loss": 0.2111, - "step": 2439 - }, - { - "epoch": 0.12, - "grad_norm": 0.9399174909270136, - "learning_rate": 1.9539402388875598e-05, - "loss": 0.2269, - "step": 2440 - }, - { - "epoch": 0.12, - "grad_norm": 1.119626479040853, - "learning_rate": 1.9538908174344994e-05, - "loss": 0.2429, - "step": 2441 - }, - { - "epoch": 0.12, - "grad_norm": 1.0974321999890257, - "learning_rate": 1.9538413701070964e-05, - "loss": 0.2383, - "step": 2442 - }, - { - "epoch": 0.12, - "grad_norm": 1.2114085536835701, - "learning_rate": 1.9537918969066923e-05, - "loss": 0.2101, - "step": 2443 - }, - { - "epoch": 0.12, - "grad_norm": 1.6206815067868514, - "learning_rate": 1.953742397834629e-05, - "loss": 0.2238, - "step": 2444 - }, - { - "epoch": 0.12, - "grad_norm": 1.1385008062544557, - "learning_rate": 1.9536928728922496e-05, - "loss": 0.1996, - "step": 2445 - }, - { - "epoch": 0.12, - "grad_norm": 0.9357832841400486, - "learning_rate": 1.953643322080897e-05, - "loss": 0.2372, - "step": 2446 - }, - { - "epoch": 0.12, - "grad_norm": 0.9088175364992762, - "learning_rate": 1.9535937454019155e-05, - "loss": 0.2301, - "step": 2447 - }, - { - "epoch": 0.12, - "grad_norm": 1.0977441998849506, - "learning_rate": 1.9535441428566496e-05, - "loss": 0.2296, - "step": 2448 - }, - { - "epoch": 0.12, - "grad_norm": 1.0568730184364237, - "learning_rate": 1.9534945144464452e-05, - "loss": 0.2048, - "step": 2449 - }, - { - "epoch": 0.12, - "grad_norm": 1.7694547827815719, - "learning_rate": 1.953444860172648e-05, - "loss": 0.2341, - "step": 2450 - }, - { - "epoch": 0.12, - "grad_norm": 0.9535818827060624, - "learning_rate": 1.9533951800366052e-05, - "loss": 0.2231, - "step": 2451 - }, - { - "epoch": 0.12, - "grad_norm": 2.0852235238905874, - "learning_rate": 1.9533454740396645e-05, - "loss": 0.2207, - "step": 2452 - }, - { - "epoch": 0.12, - "grad_norm": 1.2273715146657684, - "learning_rate": 1.953295742183174e-05, - "loss": 0.2329, - "step": 2453 - }, - { - "epoch": 0.12, - "grad_norm": 1.0468236296362416, - "learning_rate": 1.9532459844684824e-05, - "loss": 0.2469, - "step": 2454 - }, - { - "epoch": 0.12, - "grad_norm": 0.888539113561735, - "learning_rate": 1.9531962008969396e-05, - "loss": 0.213, - "step": 2455 - }, - { - "epoch": 0.12, - "grad_norm": 0.901360077521206, - "learning_rate": 1.953146391469896e-05, - "loss": 0.2335, - "step": 2456 - }, - { - "epoch": 0.12, - "grad_norm": 1.0327000960606898, - "learning_rate": 1.953096556188703e-05, - "loss": 0.2289, - "step": 2457 - }, - { - "epoch": 0.12, - "grad_norm": 0.9485176494343361, - "learning_rate": 1.9530466950547118e-05, - "loss": 0.2263, - "step": 2458 - }, - { - "epoch": 0.13, - "grad_norm": 1.1105931610889015, - "learning_rate": 1.9529968080692753e-05, - "loss": 0.2316, - "step": 2459 - }, - { - "epoch": 0.13, - "grad_norm": 0.9852744012552714, - "learning_rate": 1.9529468952337468e-05, - "loss": 0.2566, - "step": 2460 - }, - { - "epoch": 0.13, - "grad_norm": 1.037940345459345, - "learning_rate": 1.9528969565494792e-05, - "loss": 0.2414, - "step": 2461 - }, - { - "epoch": 0.13, - "grad_norm": 1.376993818426987, - "learning_rate": 1.9528469920178287e-05, - "loss": 0.2499, - "step": 2462 - }, - { - "epoch": 0.13, - "grad_norm": 0.8866653946404849, - "learning_rate": 1.9527970016401493e-05, - "loss": 0.2225, - "step": 2463 - }, - { - "epoch": 0.13, - "grad_norm": 1.0459832052160578, - "learning_rate": 1.9527469854177973e-05, - "loss": 0.2262, - "step": 2464 - }, - { - "epoch": 0.13, - "grad_norm": 0.9970960002026228, - "learning_rate": 1.9526969433521298e-05, - "loss": 0.2127, - "step": 2465 - }, - { - "epoch": 0.13, - "grad_norm": 1.031139726816588, - "learning_rate": 1.9526468754445035e-05, - "loss": 0.239, - "step": 2466 - }, - { - "epoch": 0.13, - "grad_norm": 0.9082242265666298, - "learning_rate": 1.9525967816962775e-05, - "loss": 0.2135, - "step": 2467 - }, - { - "epoch": 0.13, - "grad_norm": 1.0929122867480856, - "learning_rate": 1.9525466621088093e-05, - "loss": 0.2361, - "step": 2468 - }, - { - "epoch": 0.13, - "grad_norm": 0.843789342979408, - "learning_rate": 1.95249651668346e-05, - "loss": 0.2289, - "step": 2469 - }, - { - "epoch": 0.13, - "grad_norm": 1.3704990673651014, - "learning_rate": 1.952446345421588e-05, - "loss": 0.254, - "step": 2470 - }, - { - "epoch": 0.13, - "grad_norm": 0.861102949292528, - "learning_rate": 1.9523961483245552e-05, - "loss": 0.2082, - "step": 2471 - }, - { - "epoch": 0.13, - "grad_norm": 0.9862142239764253, - "learning_rate": 1.9523459253937233e-05, - "loss": 0.2239, - "step": 2472 - }, - { - "epoch": 0.13, - "grad_norm": 0.9025994750433144, - "learning_rate": 1.9522956766304543e-05, - "loss": 0.2438, - "step": 2473 - }, - { - "epoch": 0.13, - "grad_norm": 1.0347478039910887, - "learning_rate": 1.9522454020361116e-05, - "loss": 0.2403, - "step": 2474 - }, - { - "epoch": 0.13, - "grad_norm": 1.268028229249795, - "learning_rate": 1.9521951016120582e-05, - "loss": 0.2541, - "step": 2475 - }, - { - "epoch": 0.13, - "grad_norm": 0.9898697278724068, - "learning_rate": 1.952144775359659e-05, - "loss": 0.2636, - "step": 2476 - }, - { - "epoch": 0.13, - "grad_norm": 0.9029126837260496, - "learning_rate": 1.9520944232802793e-05, - "loss": 0.2285, - "step": 2477 - }, - { - "epoch": 0.13, - "grad_norm": 3.4451648613384407, - "learning_rate": 1.9520440453752842e-05, - "loss": 0.2512, - "step": 2478 - }, - { - "epoch": 0.13, - "grad_norm": 1.3173582760945324, - "learning_rate": 1.951993641646041e-05, - "loss": 0.2275, - "step": 2479 - }, - { - "epoch": 0.13, - "grad_norm": 1.1085719825276406, - "learning_rate": 1.951943212093916e-05, - "loss": 0.2278, - "step": 2480 - }, - { - "epoch": 0.13, - "grad_norm": 1.0384753106293034, - "learning_rate": 1.951892756720278e-05, - "loss": 0.2648, - "step": 2481 - }, - { - "epoch": 0.13, - "grad_norm": 1.100632928939612, - "learning_rate": 1.9518422755264947e-05, - "loss": 0.2049, - "step": 2482 - }, - { - "epoch": 0.13, - "grad_norm": 1.1089547410194291, - "learning_rate": 1.9517917685139365e-05, - "loss": 0.2381, - "step": 2483 - }, - { - "epoch": 0.13, - "grad_norm": 2.1286418385854793, - "learning_rate": 1.9517412356839727e-05, - "loss": 0.2673, - "step": 2484 - }, - { - "epoch": 0.13, - "grad_norm": 0.7957571895575681, - "learning_rate": 1.951690677037974e-05, - "loss": 0.2002, - "step": 2485 - }, - { - "epoch": 0.13, - "grad_norm": 1.2857267543737927, - "learning_rate": 1.9516400925773118e-05, - "loss": 0.2252, - "step": 2486 - }, - { - "epoch": 0.13, - "grad_norm": 0.9023184796103526, - "learning_rate": 1.9515894823033584e-05, - "loss": 0.2185, - "step": 2487 - }, - { - "epoch": 0.13, - "grad_norm": 1.1830975982529575, - "learning_rate": 1.9515388462174868e-05, - "loss": 0.261, - "step": 2488 - }, - { - "epoch": 0.13, - "grad_norm": 0.9656013811551463, - "learning_rate": 1.95148818432107e-05, - "loss": 0.2556, - "step": 2489 - }, - { - "epoch": 0.13, - "grad_norm": 0.9506680880228655, - "learning_rate": 1.9514374966154826e-05, - "loss": 0.2196, - "step": 2490 - }, - { - "epoch": 0.13, - "grad_norm": 1.0597012016818077, - "learning_rate": 1.951386783102099e-05, - "loss": 0.2247, - "step": 2491 - }, - { - "epoch": 0.13, - "grad_norm": 0.9004544786269002, - "learning_rate": 1.9513360437822957e-05, - "loss": 0.2284, - "step": 2492 - }, - { - "epoch": 0.13, - "grad_norm": 1.0277177890445819, - "learning_rate": 1.9512852786574483e-05, - "loss": 0.2368, - "step": 2493 - }, - { - "epoch": 0.13, - "grad_norm": 0.8764913099954449, - "learning_rate": 1.951234487728934e-05, - "loss": 0.2349, - "step": 2494 - }, - { - "epoch": 0.13, - "grad_norm": 1.0231867249114779, - "learning_rate": 1.9511836709981306e-05, - "loss": 0.2336, - "step": 2495 - }, - { - "epoch": 0.13, - "grad_norm": 1.1317906311257189, - "learning_rate": 1.951132828466416e-05, - "loss": 0.2067, - "step": 2496 - }, - { - "epoch": 0.13, - "grad_norm": 1.0525609819493371, - "learning_rate": 1.95108196013517e-05, - "loss": 0.2466, - "step": 2497 - }, - { - "epoch": 0.13, - "grad_norm": 1.3414958850247156, - "learning_rate": 1.951031066005772e-05, - "loss": 0.2371, - "step": 2498 - }, - { - "epoch": 0.13, - "grad_norm": 0.9807391428552752, - "learning_rate": 1.9509801460796027e-05, - "loss": 0.2522, - "step": 2499 - }, - { - "epoch": 0.13, - "grad_norm": 1.1634163941213589, - "learning_rate": 1.950929200358043e-05, - "loss": 0.2359, - "step": 2500 - }, - { - "epoch": 0.13, - "grad_norm": 0.858070387539989, - "learning_rate": 1.9508782288424754e-05, - "loss": 0.2278, - "step": 2501 - }, - { - "epoch": 0.13, - "grad_norm": 0.968181824391059, - "learning_rate": 1.950827231534282e-05, - "loss": 0.2317, - "step": 2502 - }, - { - "epoch": 0.13, - "grad_norm": 1.1859112241043284, - "learning_rate": 1.950776208434846e-05, - "loss": 0.2141, - "step": 2503 - }, - { - "epoch": 0.13, - "grad_norm": 1.1055241908293456, - "learning_rate": 1.9507251595455524e-05, - "loss": 0.233, - "step": 2504 - }, - { - "epoch": 0.13, - "grad_norm": 1.1720307433593404, - "learning_rate": 1.9506740848677845e-05, - "loss": 0.2295, - "step": 2505 - }, - { - "epoch": 0.13, - "grad_norm": 0.9640362423866915, - "learning_rate": 1.9506229844029283e-05, - "loss": 0.2212, - "step": 2506 - }, - { - "epoch": 0.13, - "grad_norm": 1.1471940629954807, - "learning_rate": 1.95057185815237e-05, - "loss": 0.2176, - "step": 2507 - }, - { - "epoch": 0.13, - "grad_norm": 1.2772531433878958, - "learning_rate": 1.9505207061174966e-05, - "loss": 0.245, - "step": 2508 - }, - { - "epoch": 0.13, - "grad_norm": 1.024101923763926, - "learning_rate": 1.9504695282996953e-05, - "loss": 0.2446, - "step": 2509 - }, - { - "epoch": 0.13, - "grad_norm": 1.1602529558305126, - "learning_rate": 1.9504183247003544e-05, - "loss": 0.2296, - "step": 2510 - }, - { - "epoch": 0.13, - "grad_norm": 1.1835773861670562, - "learning_rate": 1.9503670953208628e-05, - "loss": 0.2329, - "step": 2511 - }, - { - "epoch": 0.13, - "grad_norm": 0.9684880160608303, - "learning_rate": 1.9503158401626098e-05, - "loss": 0.2482, - "step": 2512 - }, - { - "epoch": 0.13, - "grad_norm": 1.3001737299396119, - "learning_rate": 1.950264559226986e-05, - "loss": 0.2391, - "step": 2513 - }, - { - "epoch": 0.13, - "grad_norm": 1.056275760297049, - "learning_rate": 1.9502132525153826e-05, - "loss": 0.2292, - "step": 2514 - }, - { - "epoch": 0.13, - "grad_norm": 0.8285779120395363, - "learning_rate": 1.950161920029191e-05, - "loss": 0.2096, - "step": 2515 - }, - { - "epoch": 0.13, - "grad_norm": 1.2265740847100444, - "learning_rate": 1.9501105617698034e-05, - "loss": 0.2525, - "step": 2516 - }, - { - "epoch": 0.13, - "grad_norm": 1.168129362633706, - "learning_rate": 1.9500591777386134e-05, - "loss": 0.214, - "step": 2517 - }, - { - "epoch": 0.13, - "grad_norm": 1.3091587397759643, - "learning_rate": 1.9500077679370145e-05, - "loss": 0.265, - "step": 2518 - }, - { - "epoch": 0.13, - "grad_norm": 1.1204506664591425, - "learning_rate": 1.949956332366401e-05, - "loss": 0.2612, - "step": 2519 - }, - { - "epoch": 0.13, - "grad_norm": 1.064079571330691, - "learning_rate": 1.9499048710281686e-05, - "loss": 0.2295, - "step": 2520 - }, - { - "epoch": 0.13, - "grad_norm": 1.1114983928013271, - "learning_rate": 1.949853383923713e-05, - "loss": 0.2359, - "step": 2521 - }, - { - "epoch": 0.13, - "grad_norm": 1.1622974627712408, - "learning_rate": 1.9498018710544306e-05, - "loss": 0.2127, - "step": 2522 - }, - { - "epoch": 0.13, - "grad_norm": 1.2721738857153555, - "learning_rate": 1.9497503324217188e-05, - "loss": 0.2444, - "step": 2523 - }, - { - "epoch": 0.13, - "grad_norm": 1.2230204085056002, - "learning_rate": 1.9496987680269755e-05, - "loss": 0.2441, - "step": 2524 - }, - { - "epoch": 0.13, - "grad_norm": 1.1287902286094293, - "learning_rate": 1.9496471778715996e-05, - "loss": 0.2071, - "step": 2525 - }, - { - "epoch": 0.13, - "grad_norm": 1.0371896187150622, - "learning_rate": 1.94959556195699e-05, - "loss": 0.2307, - "step": 2526 - }, - { - "epoch": 0.13, - "grad_norm": 1.019653166099954, - "learning_rate": 1.9495439202845478e-05, - "loss": 0.2605, - "step": 2527 - }, - { - "epoch": 0.13, - "grad_norm": 1.2402110275780256, - "learning_rate": 1.9494922528556727e-05, - "loss": 0.2256, - "step": 2528 - }, - { - "epoch": 0.13, - "grad_norm": 0.9884338951179454, - "learning_rate": 1.9494405596717664e-05, - "loss": 0.2233, - "step": 2529 - }, - { - "epoch": 0.13, - "grad_norm": 1.0533813644665326, - "learning_rate": 1.949388840734232e-05, - "loss": 0.2037, - "step": 2530 - }, - { - "epoch": 0.13, - "grad_norm": 0.967803989425464, - "learning_rate": 1.949337096044471e-05, - "loss": 0.2355, - "step": 2531 - }, - { - "epoch": 0.13, - "grad_norm": 1.1704508502228437, - "learning_rate": 1.949285325603888e-05, - "loss": 0.2442, - "step": 2532 - }, - { - "epoch": 0.13, - "grad_norm": 0.8687769631305765, - "learning_rate": 1.9492335294138868e-05, - "loss": 0.2589, - "step": 2533 - }, - { - "epoch": 0.13, - "grad_norm": 1.0410902336408165, - "learning_rate": 1.9491817074758727e-05, - "loss": 0.2424, - "step": 2534 - }, - { - "epoch": 0.13, - "grad_norm": 1.0019116803639123, - "learning_rate": 1.949129859791251e-05, - "loss": 0.2116, - "step": 2535 - }, - { - "epoch": 0.13, - "grad_norm": 1.0089129827853796, - "learning_rate": 1.9490779863614284e-05, - "loss": 0.2288, - "step": 2536 - }, - { - "epoch": 0.13, - "grad_norm": 0.9476886305903411, - "learning_rate": 1.9490260871878114e-05, - "loss": 0.2283, - "step": 2537 - }, - { - "epoch": 0.13, - "grad_norm": 2.7249136953545823, - "learning_rate": 1.9489741622718087e-05, - "loss": 0.2329, - "step": 2538 - }, - { - "epoch": 0.13, - "grad_norm": 1.6606994488067524, - "learning_rate": 1.9489222116148278e-05, - "loss": 0.245, - "step": 2539 - }, - { - "epoch": 0.13, - "grad_norm": 1.1349112515004496, - "learning_rate": 1.948870235218279e-05, - "loss": 0.217, - "step": 2540 - }, - { - "epoch": 0.13, - "grad_norm": 1.0864289977825945, - "learning_rate": 1.9488182330835706e-05, - "loss": 0.249, - "step": 2541 - }, - { - "epoch": 0.13, - "grad_norm": 1.0162834015559579, - "learning_rate": 1.9487662052121145e-05, - "loss": 0.2613, - "step": 2542 - }, - { - "epoch": 0.13, - "grad_norm": 1.3802411208694967, - "learning_rate": 1.9487141516053214e-05, - "loss": 0.2406, - "step": 2543 - }, - { - "epoch": 0.13, - "grad_norm": 1.3959880188571363, - "learning_rate": 1.9486620722646036e-05, - "loss": 0.2466, - "step": 2544 - }, - { - "epoch": 0.13, - "grad_norm": 0.9696196695677537, - "learning_rate": 1.948609967191373e-05, - "loss": 0.2519, - "step": 2545 - }, - { - "epoch": 0.13, - "grad_norm": 1.1169183926233577, - "learning_rate": 1.9485578363870438e-05, - "loss": 0.2221, - "step": 2546 - }, - { - "epoch": 0.13, - "grad_norm": 0.9685353962725984, - "learning_rate": 1.9485056798530296e-05, - "loss": 0.2147, - "step": 2547 - }, - { - "epoch": 0.13, - "grad_norm": 1.3676253959318463, - "learning_rate": 1.9484534975907454e-05, - "loss": 0.2331, - "step": 2548 - }, - { - "epoch": 0.13, - "grad_norm": 1.072106777885036, - "learning_rate": 1.9484012896016064e-05, - "loss": 0.2405, - "step": 2549 - }, - { - "epoch": 0.13, - "grad_norm": 1.9564555833352242, - "learning_rate": 1.948349055887029e-05, - "loss": 0.2367, - "step": 2550 - }, - { - "epoch": 0.13, - "grad_norm": 1.1139384170444573, - "learning_rate": 1.9482967964484297e-05, - "loss": 0.2386, - "step": 2551 - }, - { - "epoch": 0.13, - "grad_norm": 0.8949825457977273, - "learning_rate": 1.9482445112872265e-05, - "loss": 0.2142, - "step": 2552 - }, - { - "epoch": 0.13, - "grad_norm": 0.9924360109156986, - "learning_rate": 1.948192200404837e-05, - "loss": 0.2308, - "step": 2553 - }, - { - "epoch": 0.13, - "grad_norm": 1.213906686434998, - "learning_rate": 1.948139863802681e-05, - "loss": 0.2162, - "step": 2554 - }, - { - "epoch": 0.13, - "grad_norm": 1.245261071851514, - "learning_rate": 1.9480875014821776e-05, - "loss": 0.2611, - "step": 2555 - }, - { - "epoch": 0.13, - "grad_norm": 0.9617311900940123, - "learning_rate": 1.9480351134447466e-05, - "loss": 0.2092, - "step": 2556 - }, - { - "epoch": 0.13, - "grad_norm": 0.8336642002205943, - "learning_rate": 1.94798269969181e-05, - "loss": 0.2081, - "step": 2557 - }, - { - "epoch": 0.13, - "grad_norm": 1.2352999776362503, - "learning_rate": 1.947930260224789e-05, - "loss": 0.2334, - "step": 2558 - }, - { - "epoch": 0.13, - "grad_norm": 1.2133376413858712, - "learning_rate": 1.9478777950451063e-05, - "loss": 0.2486, - "step": 2559 - }, - { - "epoch": 0.13, - "grad_norm": 0.8660840115714117, - "learning_rate": 1.9478253041541848e-05, - "loss": 0.2115, - "step": 2560 - }, - { - "epoch": 0.13, - "grad_norm": 1.0302407585810627, - "learning_rate": 1.9477727875534483e-05, - "loss": 0.238, - "step": 2561 - }, - { - "epoch": 0.13, - "grad_norm": 0.998569291011515, - "learning_rate": 1.9477202452443217e-05, - "loss": 0.226, - "step": 2562 - }, - { - "epoch": 0.13, - "grad_norm": 1.2341549907489868, - "learning_rate": 1.9476676772282297e-05, - "loss": 0.2366, - "step": 2563 - }, - { - "epoch": 0.13, - "grad_norm": 0.9984474900319265, - "learning_rate": 1.9476150835065983e-05, - "loss": 0.2373, - "step": 2564 - }, - { - "epoch": 0.13, - "grad_norm": 0.8639329130273585, - "learning_rate": 1.9475624640808542e-05, - "loss": 0.2377, - "step": 2565 - }, - { - "epoch": 0.13, - "grad_norm": 0.8980352825774708, - "learning_rate": 1.9475098189524253e-05, - "loss": 0.2344, - "step": 2566 - }, - { - "epoch": 0.13, - "grad_norm": 1.0722782379588431, - "learning_rate": 1.9474571481227385e-05, - "loss": 0.2404, - "step": 2567 - }, - { - "epoch": 0.13, - "grad_norm": 1.0137257435535263, - "learning_rate": 1.947404451593223e-05, - "loss": 0.2399, - "step": 2568 - }, - { - "epoch": 0.13, - "grad_norm": 3.063657778639185, - "learning_rate": 1.9473517293653084e-05, - "loss": 0.2402, - "step": 2569 - }, - { - "epoch": 0.13, - "grad_norm": 0.9111470386340367, - "learning_rate": 1.947298981440425e-05, - "loss": 0.2421, - "step": 2570 - }, - { - "epoch": 0.13, - "grad_norm": 1.0021174540309534, - "learning_rate": 1.947246207820003e-05, - "loss": 0.2228, - "step": 2571 - }, - { - "epoch": 0.13, - "grad_norm": 1.1306991183837498, - "learning_rate": 1.947193408505474e-05, - "loss": 0.2297, - "step": 2572 - }, - { - "epoch": 0.13, - "grad_norm": 0.9252103940645041, - "learning_rate": 1.9471405834982702e-05, - "loss": 0.2478, - "step": 2573 - }, - { - "epoch": 0.13, - "grad_norm": 1.3628088679806831, - "learning_rate": 1.947087732799825e-05, - "loss": 0.243, - "step": 2574 - }, - { - "epoch": 0.13, - "grad_norm": 0.9693549537628366, - "learning_rate": 1.947034856411571e-05, - "loss": 0.2315, - "step": 2575 - }, - { - "epoch": 0.13, - "grad_norm": 0.9421192104463941, - "learning_rate": 1.9469819543349433e-05, - "loss": 0.2135, - "step": 2576 - }, - { - "epoch": 0.13, - "grad_norm": 0.8127772980411968, - "learning_rate": 1.9469290265713767e-05, - "loss": 0.2272, - "step": 2577 - }, - { - "epoch": 0.13, - "grad_norm": 1.8785917986766272, - "learning_rate": 1.9468760731223065e-05, - "loss": 0.2169, - "step": 2578 - }, - { - "epoch": 0.13, - "grad_norm": 0.8235476076278131, - "learning_rate": 1.9468230939891695e-05, - "loss": 0.2154, - "step": 2579 - }, - { - "epoch": 0.13, - "grad_norm": 1.1338475304907736, - "learning_rate": 1.9467700891734027e-05, - "loss": 0.2246, - "step": 2580 - }, - { - "epoch": 0.13, - "grad_norm": 1.088495536191225, - "learning_rate": 1.9467170586764436e-05, - "loss": 0.238, - "step": 2581 - }, - { - "epoch": 0.13, - "grad_norm": 2.508331904771354, - "learning_rate": 1.946664002499731e-05, - "loss": 0.2506, - "step": 2582 - }, - { - "epoch": 0.13, - "grad_norm": 1.3740198237235974, - "learning_rate": 1.9466109206447036e-05, - "loss": 0.2363, - "step": 2583 - }, - { - "epoch": 0.13, - "grad_norm": 2.0443872256988116, - "learning_rate": 1.9465578131128017e-05, - "loss": 0.2265, - "step": 2584 - }, - { - "epoch": 0.13, - "grad_norm": 1.9073842275417658, - "learning_rate": 1.9465046799054657e-05, - "loss": 0.2197, - "step": 2585 - }, - { - "epoch": 0.13, - "grad_norm": 1.6196237127154511, - "learning_rate": 1.9464515210241368e-05, - "loss": 0.2237, - "step": 2586 - }, - { - "epoch": 0.13, - "grad_norm": 1.1295037581852974, - "learning_rate": 1.9463983364702567e-05, - "loss": 0.2357, - "step": 2587 - }, - { - "epoch": 0.13, - "grad_norm": 0.8859703033393711, - "learning_rate": 1.9463451262452685e-05, - "loss": 0.2127, - "step": 2588 - }, - { - "epoch": 0.13, - "grad_norm": 1.0225681338883512, - "learning_rate": 1.946291890350615e-05, - "loss": 0.2223, - "step": 2589 - }, - { - "epoch": 0.13, - "grad_norm": 0.9560884298829112, - "learning_rate": 1.946238628787741e-05, - "loss": 0.1999, - "step": 2590 - }, - { - "epoch": 0.13, - "grad_norm": 1.3680393661657546, - "learning_rate": 1.9461853415580902e-05, - "loss": 0.2471, - "step": 2591 - }, - { - "epoch": 0.13, - "grad_norm": 0.9219507253318284, - "learning_rate": 1.9461320286631088e-05, - "loss": 0.2394, - "step": 2592 - }, - { - "epoch": 0.13, - "grad_norm": 0.9532444945302694, - "learning_rate": 1.946078690104243e-05, - "loss": 0.231, - "step": 2593 - }, - { - "epoch": 0.13, - "grad_norm": 0.8964117763557078, - "learning_rate": 1.946025325882939e-05, - "loss": 0.2198, - "step": 2594 - }, - { - "epoch": 0.13, - "grad_norm": 0.9022765936891728, - "learning_rate": 1.945971936000645e-05, - "loss": 0.2021, - "step": 2595 - }, - { - "epoch": 0.13, - "grad_norm": 1.0377148811164694, - "learning_rate": 1.945918520458808e-05, - "loss": 0.2258, - "step": 2596 - }, - { - "epoch": 0.13, - "grad_norm": 0.8926644487193754, - "learning_rate": 1.9458650792588784e-05, - "loss": 0.2272, - "step": 2597 - }, - { - "epoch": 0.13, - "grad_norm": 1.2339397203295959, - "learning_rate": 1.945811612402305e-05, - "loss": 0.2071, - "step": 2598 - }, - { - "epoch": 0.13, - "grad_norm": 1.1447126510888062, - "learning_rate": 1.945758119890538e-05, - "loss": 0.2384, - "step": 2599 - }, - { - "epoch": 0.13, - "grad_norm": 0.94796043499078, - "learning_rate": 1.9457046017250283e-05, - "loss": 0.2329, - "step": 2600 - }, - { - "epoch": 0.13, - "grad_norm": 1.7726117349573447, - "learning_rate": 1.9456510579072282e-05, - "loss": 0.2293, - "step": 2601 - }, - { - "epoch": 0.13, - "grad_norm": 1.1989169212832826, - "learning_rate": 1.94559748843859e-05, - "loss": 0.2463, - "step": 2602 - }, - { - "epoch": 0.13, - "grad_norm": 1.074481968333032, - "learning_rate": 1.9455438933205662e-05, - "loss": 0.2264, - "step": 2603 - }, - { - "epoch": 0.13, - "grad_norm": 0.9435469677018979, - "learning_rate": 1.945490272554611e-05, - "loss": 0.2463, - "step": 2604 - }, - { - "epoch": 0.13, - "grad_norm": 2.1295581894773257, - "learning_rate": 1.9454366261421786e-05, - "loss": 0.2323, - "step": 2605 - }, - { - "epoch": 0.13, - "grad_norm": 0.8943028767051312, - "learning_rate": 1.9453829540847243e-05, - "loss": 0.2174, - "step": 2606 - }, - { - "epoch": 0.13, - "grad_norm": 1.2457900588078497, - "learning_rate": 1.9453292563837043e-05, - "loss": 0.2574, - "step": 2607 - }, - { - "epoch": 0.13, - "grad_norm": 0.8224583557257457, - "learning_rate": 1.9452755330405745e-05, - "loss": 0.2154, - "step": 2608 - }, - { - "epoch": 0.13, - "grad_norm": 1.0649357865229008, - "learning_rate": 1.9452217840567927e-05, - "loss": 0.2397, - "step": 2609 - }, - { - "epoch": 0.13, - "grad_norm": 0.925172871095016, - "learning_rate": 1.9451680094338163e-05, - "loss": 0.2485, - "step": 2610 - }, - { - "epoch": 0.13, - "grad_norm": 1.3335832249417243, - "learning_rate": 1.9451142091731045e-05, - "loss": 0.2324, - "step": 2611 - }, - { - "epoch": 0.13, - "grad_norm": 0.9363316898690303, - "learning_rate": 1.9450603832761165e-05, - "loss": 0.2601, - "step": 2612 - }, - { - "epoch": 0.13, - "grad_norm": 1.0677907449571924, - "learning_rate": 1.945006531744312e-05, - "loss": 0.2292, - "step": 2613 - }, - { - "epoch": 0.13, - "grad_norm": 1.3032820403922316, - "learning_rate": 1.9449526545791523e-05, - "loss": 0.2523, - "step": 2614 - }, - { - "epoch": 0.13, - "grad_norm": 0.9162782130809316, - "learning_rate": 1.9448987517820982e-05, - "loss": 0.2226, - "step": 2615 - }, - { - "epoch": 0.13, - "grad_norm": 1.025744133538097, - "learning_rate": 1.944844823354612e-05, - "loss": 0.2215, - "step": 2616 - }, - { - "epoch": 0.13, - "grad_norm": 0.9187446041207256, - "learning_rate": 1.944790869298157e-05, - "loss": 0.2469, - "step": 2617 - }, - { - "epoch": 0.13, - "grad_norm": 0.869231477206988, - "learning_rate": 1.9447368896141958e-05, - "loss": 0.2336, - "step": 2618 - }, - { - "epoch": 0.13, - "grad_norm": 1.1103999027549547, - "learning_rate": 1.9446828843041933e-05, - "loss": 0.2437, - "step": 2619 - }, - { - "epoch": 0.13, - "grad_norm": 1.0309658164029158, - "learning_rate": 1.9446288533696145e-05, - "loss": 0.2207, - "step": 2620 - }, - { - "epoch": 0.13, - "grad_norm": 1.0021576454715904, - "learning_rate": 1.9445747968119246e-05, - "loss": 0.2232, - "step": 2621 - }, - { - "epoch": 0.13, - "grad_norm": 1.0926465258868414, - "learning_rate": 1.9445207146325894e-05, - "loss": 0.2556, - "step": 2622 - }, - { - "epoch": 0.13, - "grad_norm": 1.67810028770524, - "learning_rate": 1.9444666068330772e-05, - "loss": 0.208, - "step": 2623 - }, - { - "epoch": 0.13, - "grad_norm": 0.8767727670984775, - "learning_rate": 1.9444124734148543e-05, - "loss": 0.226, - "step": 2624 - }, - { - "epoch": 0.13, - "grad_norm": 1.2002140113970021, - "learning_rate": 1.9443583143793904e-05, - "loss": 0.2337, - "step": 2625 - }, - { - "epoch": 0.13, - "grad_norm": 1.0739431790098095, - "learning_rate": 1.9443041297281536e-05, - "loss": 0.2426, - "step": 2626 - }, - { - "epoch": 0.13, - "grad_norm": 1.2736818974157589, - "learning_rate": 1.9442499194626138e-05, - "loss": 0.2387, - "step": 2627 - }, - { - "epoch": 0.13, - "grad_norm": 1.1784427170299878, - "learning_rate": 1.9441956835842416e-05, - "loss": 0.2219, - "step": 2628 - }, - { - "epoch": 0.13, - "grad_norm": 1.1776402200746279, - "learning_rate": 1.9441414220945083e-05, - "loss": 0.2268, - "step": 2629 - }, - { - "epoch": 0.13, - "grad_norm": 1.0166012698350608, - "learning_rate": 1.9440871349948856e-05, - "loss": 0.2448, - "step": 2630 - }, - { - "epoch": 0.13, - "grad_norm": 1.1629080894535453, - "learning_rate": 1.9440328222868457e-05, - "loss": 0.2375, - "step": 2631 - }, - { - "epoch": 0.13, - "grad_norm": 2.5773238347578813, - "learning_rate": 1.9439784839718627e-05, - "loss": 0.2214, - "step": 2632 - }, - { - "epoch": 0.13, - "grad_norm": 0.8701043666547783, - "learning_rate": 1.94392412005141e-05, - "loss": 0.2403, - "step": 2633 - }, - { - "epoch": 0.13, - "grad_norm": 0.9530883571583211, - "learning_rate": 1.943869730526962e-05, - "loss": 0.2313, - "step": 2634 - }, - { - "epoch": 0.13, - "grad_norm": 1.2905919957477667, - "learning_rate": 1.9438153153999942e-05, - "loss": 0.2338, - "step": 2635 - }, - { - "epoch": 0.13, - "grad_norm": 1.3297130904337844, - "learning_rate": 1.9437608746719828e-05, - "loss": 0.2568, - "step": 2636 - }, - { - "epoch": 0.13, - "grad_norm": 1.1409616790457267, - "learning_rate": 1.943706408344404e-05, - "loss": 0.2696, - "step": 2637 - }, - { - "epoch": 0.13, - "grad_norm": 1.165365917351629, - "learning_rate": 1.9436519164187363e-05, - "loss": 0.2398, - "step": 2638 - }, - { - "epoch": 0.13, - "grad_norm": 1.6898833986979496, - "learning_rate": 1.9435973988964564e-05, - "loss": 0.2401, - "step": 2639 - }, - { - "epoch": 0.13, - "grad_norm": 1.6757623447359549, - "learning_rate": 1.943542855779044e-05, - "loss": 0.2353, - "step": 2640 - }, - { - "epoch": 0.13, - "grad_norm": 1.0909672887562558, - "learning_rate": 1.9434882870679783e-05, - "loss": 0.2225, - "step": 2641 - }, - { - "epoch": 0.13, - "grad_norm": 1.286445863100236, - "learning_rate": 1.9434336927647397e-05, - "loss": 0.216, - "step": 2642 - }, - { - "epoch": 0.13, - "grad_norm": 1.1677363817710942, - "learning_rate": 1.9433790728708085e-05, - "loss": 0.2267, - "step": 2643 - }, - { - "epoch": 0.13, - "grad_norm": 1.0503125887814437, - "learning_rate": 1.943324427387667e-05, - "loss": 0.242, - "step": 2644 - }, - { - "epoch": 0.13, - "grad_norm": 2.007415443963795, - "learning_rate": 1.9432697563167974e-05, - "loss": 0.2323, - "step": 2645 - }, - { - "epoch": 0.13, - "grad_norm": 1.3820066306983543, - "learning_rate": 1.9432150596596818e-05, - "loss": 0.2487, - "step": 2646 - }, - { - "epoch": 0.13, - "grad_norm": 1.0342232451505902, - "learning_rate": 1.9431603374178048e-05, - "loss": 0.2394, - "step": 2647 - }, - { - "epoch": 0.13, - "grad_norm": 1.5428237143556407, - "learning_rate": 1.94310558959265e-05, - "loss": 0.2356, - "step": 2648 - }, - { - "epoch": 0.13, - "grad_norm": 1.67906044044975, - "learning_rate": 1.943050816185703e-05, - "loss": 0.2331, - "step": 2649 - }, - { - "epoch": 0.13, - "grad_norm": 1.0458855765510422, - "learning_rate": 1.9429960171984496e-05, - "loss": 0.2103, - "step": 2650 - }, - { - "epoch": 0.13, - "grad_norm": 0.9721893985186519, - "learning_rate": 1.9429411926323756e-05, - "loss": 0.2038, - "step": 2651 - }, - { - "epoch": 0.13, - "grad_norm": 1.0903666819111881, - "learning_rate": 1.942886342488969e-05, - "loss": 0.2179, - "step": 2652 - }, - { - "epoch": 0.13, - "grad_norm": 1.5080139281275566, - "learning_rate": 1.9428314667697166e-05, - "loss": 0.2317, - "step": 2653 - }, - { - "epoch": 0.13, - "grad_norm": 1.6690664079020918, - "learning_rate": 1.9427765654761078e-05, - "loss": 0.2292, - "step": 2654 - }, - { - "epoch": 0.14, - "grad_norm": 1.3396452269621446, - "learning_rate": 1.9427216386096313e-05, - "loss": 0.2412, - "step": 2655 - }, - { - "epoch": 0.14, - "grad_norm": 1.5807328497654407, - "learning_rate": 1.942666686171777e-05, - "loss": 0.2357, - "step": 2656 - }, - { - "epoch": 0.14, - "grad_norm": 1.190131146360561, - "learning_rate": 1.9426117081640356e-05, - "loss": 0.2241, - "step": 2657 - }, - { - "epoch": 0.14, - "grad_norm": 0.9877783524716975, - "learning_rate": 1.9425567045878983e-05, - "loss": 0.2049, - "step": 2658 - }, - { - "epoch": 0.14, - "grad_norm": 1.6005142246281456, - "learning_rate": 1.942501675444857e-05, - "loss": 0.203, - "step": 2659 - }, - { - "epoch": 0.14, - "grad_norm": 1.7675592499022506, - "learning_rate": 1.942446620736405e-05, - "loss": 0.2501, - "step": 2660 - }, - { - "epoch": 0.14, - "grad_norm": 1.3275111289487056, - "learning_rate": 1.942391540464035e-05, - "loss": 0.2689, - "step": 2661 - }, - { - "epoch": 0.14, - "grad_norm": 1.0635503262207242, - "learning_rate": 1.942336434629241e-05, - "loss": 0.2241, - "step": 2662 - }, - { - "epoch": 0.14, - "grad_norm": 1.5034106101267861, - "learning_rate": 1.9422813032335183e-05, - "loss": 0.2364, - "step": 2663 - }, - { - "epoch": 0.14, - "grad_norm": 1.1863865222873722, - "learning_rate": 1.942226146278362e-05, - "loss": 0.2401, - "step": 2664 - }, - { - "epoch": 0.14, - "grad_norm": 2.2804977196215885, - "learning_rate": 1.9421709637652683e-05, - "loss": 0.2521, - "step": 2665 - }, - { - "epoch": 0.14, - "grad_norm": 1.3243453835764538, - "learning_rate": 1.9421157556957335e-05, - "loss": 0.2466, - "step": 2666 - }, - { - "epoch": 0.14, - "grad_norm": 1.114670908193584, - "learning_rate": 1.9420605220712563e-05, - "loss": 0.2216, - "step": 2667 - }, - { - "epoch": 0.14, - "grad_norm": 0.9544414132223076, - "learning_rate": 1.942005262893334e-05, - "loss": 0.2341, - "step": 2668 - }, - { - "epoch": 0.14, - "grad_norm": 1.0708571897346173, - "learning_rate": 1.9419499781634655e-05, - "loss": 0.2332, - "step": 2669 - }, - { - "epoch": 0.14, - "grad_norm": 0.8900595168867483, - "learning_rate": 1.9418946678831507e-05, - "loss": 0.2251, - "step": 2670 - }, - { - "epoch": 0.14, - "grad_norm": 1.129854489633797, - "learning_rate": 1.9418393320538898e-05, - "loss": 0.2257, - "step": 2671 - }, - { - "epoch": 0.14, - "grad_norm": 0.7338804200785907, - "learning_rate": 1.9417839706771842e-05, - "loss": 0.2239, - "step": 2672 - }, - { - "epoch": 0.14, - "grad_norm": 0.9958041310863106, - "learning_rate": 1.941728583754535e-05, - "loss": 0.2195, - "step": 2673 - }, - { - "epoch": 0.14, - "grad_norm": 1.3733645464339348, - "learning_rate": 1.9416731712874446e-05, - "loss": 0.2589, - "step": 2674 - }, - { - "epoch": 0.14, - "grad_norm": 0.9909583269793304, - "learning_rate": 1.9416177332774162e-05, - "loss": 0.2301, - "step": 2675 - }, - { - "epoch": 0.14, - "grad_norm": 1.0248646828945818, - "learning_rate": 1.941562269725954e-05, - "loss": 0.2144, - "step": 2676 - }, - { - "epoch": 0.14, - "grad_norm": 1.008259417023293, - "learning_rate": 1.9415067806345618e-05, - "loss": 0.2314, - "step": 2677 - }, - { - "epoch": 0.14, - "grad_norm": 1.0543469075480103, - "learning_rate": 1.9414512660047447e-05, - "loss": 0.2312, - "step": 2678 - }, - { - "epoch": 0.14, - "grad_norm": 1.1781005557537256, - "learning_rate": 1.9413957258380096e-05, - "loss": 0.2802, - "step": 2679 - }, - { - "epoch": 0.14, - "grad_norm": 1.4001122745244066, - "learning_rate": 1.9413401601358616e-05, - "loss": 0.2215, - "step": 2680 - }, - { - "epoch": 0.14, - "grad_norm": 1.3713329500791462, - "learning_rate": 1.9412845688998088e-05, - "loss": 0.2685, - "step": 2681 - }, - { - "epoch": 0.14, - "grad_norm": 0.886757575995524, - "learning_rate": 1.941228952131359e-05, - "loss": 0.223, - "step": 2682 - }, - { - "epoch": 0.14, - "grad_norm": 1.22683645359689, - "learning_rate": 1.9411733098320206e-05, - "loss": 0.2672, - "step": 2683 - }, - { - "epoch": 0.14, - "grad_norm": 1.5315058602311509, - "learning_rate": 1.941117642003303e-05, - "loss": 0.2429, - "step": 2684 - }, - { - "epoch": 0.14, - "grad_norm": 1.1110857691169762, - "learning_rate": 1.9410619486467165e-05, - "loss": 0.2305, - "step": 2685 - }, - { - "epoch": 0.14, - "grad_norm": 1.258428468415178, - "learning_rate": 1.941006229763771e-05, - "loss": 0.2534, - "step": 2686 - }, - { - "epoch": 0.14, - "grad_norm": 1.0637285182214002, - "learning_rate": 1.9409504853559785e-05, - "loss": 0.2362, - "step": 2687 - }, - { - "epoch": 0.14, - "grad_norm": 0.9076212147606959, - "learning_rate": 1.9408947154248513e-05, - "loss": 0.2272, - "step": 2688 - }, - { - "epoch": 0.14, - "grad_norm": 1.3770753242493052, - "learning_rate": 1.9408389199719014e-05, - "loss": 0.2113, - "step": 2689 - }, - { - "epoch": 0.14, - "grad_norm": 0.9711446101427936, - "learning_rate": 1.940783098998643e-05, - "loss": 0.1977, - "step": 2690 - }, - { - "epoch": 0.14, - "grad_norm": 1.140062040259186, - "learning_rate": 1.9407272525065898e-05, - "loss": 0.2167, - "step": 2691 - }, - { - "epoch": 0.14, - "grad_norm": 1.1803908714893359, - "learning_rate": 1.9406713804972565e-05, - "loss": 0.2327, - "step": 2692 - }, - { - "epoch": 0.14, - "grad_norm": 0.8373370160514211, - "learning_rate": 1.940615482972159e-05, - "loss": 0.2331, - "step": 2693 - }, - { - "epoch": 0.14, - "grad_norm": 1.7754881930206292, - "learning_rate": 1.9405595599328135e-05, - "loss": 0.2258, - "step": 2694 - }, - { - "epoch": 0.14, - "grad_norm": 1.0043781465210622, - "learning_rate": 1.940503611380737e-05, - "loss": 0.2151, - "step": 2695 - }, - { - "epoch": 0.14, - "grad_norm": 1.8156833311388194, - "learning_rate": 1.9404476373174464e-05, - "loss": 0.2327, - "step": 2696 - }, - { - "epoch": 0.14, - "grad_norm": 1.0110757172867997, - "learning_rate": 1.940391637744461e-05, - "loss": 0.208, - "step": 2697 - }, - { - "epoch": 0.14, - "grad_norm": 1.267672729778902, - "learning_rate": 1.9403356126632992e-05, - "loss": 0.2283, - "step": 2698 - }, - { - "epoch": 0.14, - "grad_norm": 1.0261197599948402, - "learning_rate": 1.9402795620754804e-05, - "loss": 0.2338, - "step": 2699 - }, - { - "epoch": 0.14, - "grad_norm": 1.329068764323991, - "learning_rate": 1.9402234859825257e-05, - "loss": 0.2397, - "step": 2700 - }, - { - "epoch": 0.14, - "grad_norm": 1.1760870358990558, - "learning_rate": 1.940167384385956e-05, - "loss": 0.2614, - "step": 2701 - }, - { - "epoch": 0.14, - "grad_norm": 1.3730668241267234, - "learning_rate": 1.9401112572872925e-05, - "loss": 0.2195, - "step": 2702 - }, - { - "epoch": 0.14, - "grad_norm": 1.2577938917534763, - "learning_rate": 1.9400551046880585e-05, - "loss": 0.2342, - "step": 2703 - }, - { - "epoch": 0.14, - "grad_norm": 0.9975635494089065, - "learning_rate": 1.9399989265897764e-05, - "loss": 0.2163, - "step": 2704 - }, - { - "epoch": 0.14, - "grad_norm": 0.9901243552258541, - "learning_rate": 1.9399427229939704e-05, - "loss": 0.2293, - "step": 2705 - }, - { - "epoch": 0.14, - "grad_norm": 1.1380070535962723, - "learning_rate": 1.939886493902165e-05, - "loss": 0.2385, - "step": 2706 - }, - { - "epoch": 0.14, - "grad_norm": 0.8633683152623378, - "learning_rate": 1.9398302393158853e-05, - "loss": 0.214, - "step": 2707 - }, - { - "epoch": 0.14, - "grad_norm": 1.3079801081816878, - "learning_rate": 1.939773959236657e-05, - "loss": 0.246, - "step": 2708 - }, - { - "epoch": 0.14, - "grad_norm": 1.4099778762372743, - "learning_rate": 1.9397176536660074e-05, - "loss": 0.2494, - "step": 2709 - }, - { - "epoch": 0.14, - "grad_norm": 1.3922786096799684, - "learning_rate": 1.939661322605463e-05, - "loss": 0.2666, - "step": 2710 - }, - { - "epoch": 0.14, - "grad_norm": 1.2057784347406664, - "learning_rate": 1.9396049660565525e-05, - "loss": 0.2277, - "step": 2711 - }, - { - "epoch": 0.14, - "grad_norm": 1.6404060910870868, - "learning_rate": 1.939548584020804e-05, - "loss": 0.2288, - "step": 2712 - }, - { - "epoch": 0.14, - "grad_norm": 0.9016999206905085, - "learning_rate": 1.9394921764997475e-05, - "loss": 0.1847, - "step": 2713 - }, - { - "epoch": 0.14, - "grad_norm": 1.162953170396771, - "learning_rate": 1.939435743494912e-05, - "loss": 0.2327, - "step": 2714 - }, - { - "epoch": 0.14, - "grad_norm": 1.1998203345890663, - "learning_rate": 1.9393792850078294e-05, - "loss": 0.2468, - "step": 2715 - }, - { - "epoch": 0.14, - "grad_norm": 1.3223511057603083, - "learning_rate": 1.9393228010400303e-05, - "loss": 0.2008, - "step": 2716 - }, - { - "epoch": 0.14, - "grad_norm": 1.2970323911231219, - "learning_rate": 1.9392662915930476e-05, - "loss": 0.252, - "step": 2717 - }, - { - "epoch": 0.14, - "grad_norm": 0.9859759934321387, - "learning_rate": 1.9392097566684132e-05, - "loss": 0.2326, - "step": 2718 - }, - { - "epoch": 0.14, - "grad_norm": 1.5788317329342427, - "learning_rate": 1.9391531962676614e-05, - "loss": 0.2219, - "step": 2719 - }, - { - "epoch": 0.14, - "grad_norm": 1.4030978767199909, - "learning_rate": 1.939096610392326e-05, - "loss": 0.2487, - "step": 2720 - }, - { - "epoch": 0.14, - "grad_norm": 1.0004712075775568, - "learning_rate": 1.939039999043942e-05, - "loss": 0.2151, - "step": 2721 - }, - { - "epoch": 0.14, - "grad_norm": 0.9521128417870931, - "learning_rate": 1.938983362224045e-05, - "loss": 0.2365, - "step": 2722 - }, - { - "epoch": 0.14, - "grad_norm": 0.8453050326579191, - "learning_rate": 1.9389266999341717e-05, - "loss": 0.228, - "step": 2723 - }, - { - "epoch": 0.14, - "grad_norm": 0.9700950773605047, - "learning_rate": 1.938870012175858e-05, - "loss": 0.2038, - "step": 2724 - }, - { - "epoch": 0.14, - "grad_norm": 1.3553509430700317, - "learning_rate": 1.9388132989506422e-05, - "loss": 0.2297, - "step": 2725 - }, - { - "epoch": 0.14, - "grad_norm": 0.9893932786413834, - "learning_rate": 1.938756560260063e-05, - "loss": 0.2173, - "step": 2726 - }, - { - "epoch": 0.14, - "grad_norm": 1.219016122502431, - "learning_rate": 1.938699796105659e-05, - "loss": 0.2452, - "step": 2727 - }, - { - "epoch": 0.14, - "grad_norm": 1.1359466605415534, - "learning_rate": 1.93864300648897e-05, - "loss": 0.2274, - "step": 2728 - }, - { - "epoch": 0.14, - "grad_norm": 1.9213735652946478, - "learning_rate": 1.9385861914115365e-05, - "loss": 0.217, - "step": 2729 - }, - { - "epoch": 0.14, - "grad_norm": 1.0375854513096163, - "learning_rate": 1.9385293508748994e-05, - "loss": 0.2354, - "step": 2730 - }, - { - "epoch": 0.14, - "grad_norm": 1.1904506309150737, - "learning_rate": 1.9384724848806007e-05, - "loss": 0.2345, - "step": 2731 - }, - { - "epoch": 0.14, - "grad_norm": 1.0897334514227217, - "learning_rate": 1.938415593430183e-05, - "loss": 0.2282, - "step": 2732 - }, - { - "epoch": 0.14, - "grad_norm": 1.3949774336848564, - "learning_rate": 1.938358676525189e-05, - "loss": 0.2158, - "step": 2733 - }, - { - "epoch": 0.14, - "grad_norm": 1.2083550955177704, - "learning_rate": 1.938301734167163e-05, - "loss": 0.2311, - "step": 2734 - }, - { - "epoch": 0.14, - "grad_norm": 1.1414932076518072, - "learning_rate": 1.9382447663576495e-05, - "loss": 0.2166, - "step": 2735 - }, - { - "epoch": 0.14, - "grad_norm": 1.2718276826173287, - "learning_rate": 1.9381877730981938e-05, - "loss": 0.2304, - "step": 2736 - }, - { - "epoch": 0.14, - "grad_norm": 1.8383903990044212, - "learning_rate": 1.9381307543903416e-05, - "loss": 0.2392, - "step": 2737 - }, - { - "epoch": 0.14, - "grad_norm": 1.1569915189018203, - "learning_rate": 1.93807371023564e-05, - "loss": 0.2274, - "step": 2738 - }, - { - "epoch": 0.14, - "grad_norm": 1.4849041251151516, - "learning_rate": 1.9380166406356357e-05, - "loss": 0.2647, - "step": 2739 - }, - { - "epoch": 0.14, - "grad_norm": 0.9769699732654332, - "learning_rate": 1.9379595455918773e-05, - "loss": 0.2233, - "step": 2740 - }, - { - "epoch": 0.14, - "grad_norm": 1.1559082444358126, - "learning_rate": 1.937902425105913e-05, - "loss": 0.225, - "step": 2741 - }, - { - "epoch": 0.14, - "grad_norm": 0.9116840164315465, - "learning_rate": 1.9378452791792924e-05, - "loss": 0.2057, - "step": 2742 - }, - { - "epoch": 0.14, - "grad_norm": 1.0057407085044325, - "learning_rate": 1.937788107813566e-05, - "loss": 0.2108, - "step": 2743 - }, - { - "epoch": 0.14, - "grad_norm": 1.0099264312055576, - "learning_rate": 1.937730911010284e-05, - "loss": 0.2116, - "step": 2744 - }, - { - "epoch": 0.14, - "grad_norm": 1.4555501133886994, - "learning_rate": 1.9376736887709982e-05, - "loss": 0.216, - "step": 2745 - }, - { - "epoch": 0.14, - "grad_norm": 1.0396223279709482, - "learning_rate": 1.9376164410972604e-05, - "loss": 0.229, - "step": 2746 - }, - { - "epoch": 0.14, - "grad_norm": 1.1136116076973643, - "learning_rate": 1.9375591679906242e-05, - "loss": 0.2507, - "step": 2747 - }, - { - "epoch": 0.14, - "grad_norm": 0.8975991818619756, - "learning_rate": 1.937501869452642e-05, - "loss": 0.2231, - "step": 2748 - }, - { - "epoch": 0.14, - "grad_norm": 1.5743246579158048, - "learning_rate": 1.937444545484869e-05, - "loss": 0.2339, - "step": 2749 - }, - { - "epoch": 0.14, - "grad_norm": 2.469957685828871, - "learning_rate": 1.9373871960888594e-05, - "loss": 0.2389, - "step": 2750 - }, - { - "epoch": 0.14, - "grad_norm": 1.0671056971589354, - "learning_rate": 1.9373298212661697e-05, - "loss": 0.2364, - "step": 2751 - }, - { - "epoch": 0.14, - "grad_norm": 0.88066037520169, - "learning_rate": 1.9372724210183552e-05, - "loss": 0.2524, - "step": 2752 - }, - { - "epoch": 0.14, - "grad_norm": 1.109280660148114, - "learning_rate": 1.9372149953469733e-05, - "loss": 0.2065, - "step": 2753 - }, - { - "epoch": 0.14, - "grad_norm": 1.4632192072371528, - "learning_rate": 1.937157544253582e-05, - "loss": 0.2408, - "step": 2754 - }, - { - "epoch": 0.14, - "grad_norm": 0.9689206571329406, - "learning_rate": 1.9371000677397393e-05, - "loss": 0.2171, - "step": 2755 - }, - { - "epoch": 0.14, - "grad_norm": 1.250810549389868, - "learning_rate": 1.9370425658070043e-05, - "loss": 0.25, - "step": 2756 - }, - { - "epoch": 0.14, - "grad_norm": 0.9785658371917585, - "learning_rate": 1.936985038456937e-05, - "loss": 0.2174, - "step": 2757 - }, - { - "epoch": 0.14, - "grad_norm": 0.9513477898582572, - "learning_rate": 1.936927485691097e-05, - "loss": 0.2498, - "step": 2758 - }, - { - "epoch": 0.14, - "grad_norm": 0.9823602970822934, - "learning_rate": 1.9368699075110467e-05, - "loss": 0.2077, - "step": 2759 - }, - { - "epoch": 0.14, - "grad_norm": 1.01832212753293, - "learning_rate": 1.9368123039183468e-05, - "loss": 0.2258, - "step": 2760 - }, - { - "epoch": 0.14, - "grad_norm": 1.0482144964607565, - "learning_rate": 1.9367546749145605e-05, - "loss": 0.2374, - "step": 2761 - }, - { - "epoch": 0.14, - "grad_norm": 1.1170462467104787, - "learning_rate": 1.9366970205012508e-05, - "loss": 0.218, - "step": 2762 - }, - { - "epoch": 0.14, - "grad_norm": 1.2372709983607768, - "learning_rate": 1.9366393406799813e-05, - "loss": 0.2195, - "step": 2763 - }, - { - "epoch": 0.14, - "grad_norm": 4.606539485103394, - "learning_rate": 1.9365816354523167e-05, - "loss": 0.2416, - "step": 2764 - }, - { - "epoch": 0.14, - "grad_norm": 2.08146758226909, - "learning_rate": 1.9365239048198227e-05, - "loss": 0.2265, - "step": 2765 - }, - { - "epoch": 0.14, - "grad_norm": 0.9354277640682458, - "learning_rate": 1.9364661487840645e-05, - "loss": 0.2565, - "step": 2766 - }, - { - "epoch": 0.14, - "grad_norm": 1.194024150331995, - "learning_rate": 1.9364083673466094e-05, - "loss": 0.2313, - "step": 2767 - }, - { - "epoch": 0.14, - "grad_norm": 1.208037331414121, - "learning_rate": 1.9363505605090243e-05, - "loss": 0.2331, - "step": 2768 - }, - { - "epoch": 0.14, - "grad_norm": 2.654984133731084, - "learning_rate": 1.9362927282728774e-05, - "loss": 0.244, - "step": 2769 - }, - { - "epoch": 0.14, - "grad_norm": 1.1251978450679647, - "learning_rate": 1.9362348706397374e-05, - "loss": 0.2296, - "step": 2770 - }, - { - "epoch": 0.14, - "grad_norm": 1.089003515036792, - "learning_rate": 1.9361769876111734e-05, - "loss": 0.2047, - "step": 2771 - }, - { - "epoch": 0.14, - "grad_norm": 1.0686045742352648, - "learning_rate": 1.936119079188756e-05, - "loss": 0.2195, - "step": 2772 - }, - { - "epoch": 0.14, - "grad_norm": 0.912428797587506, - "learning_rate": 1.936061145374056e-05, - "loss": 0.2152, - "step": 2773 - }, - { - "epoch": 0.14, - "grad_norm": 1.2551911812214693, - "learning_rate": 1.936003186168644e-05, - "loss": 0.2467, - "step": 2774 - }, - { - "epoch": 0.14, - "grad_norm": 0.9804946097373898, - "learning_rate": 1.935945201574093e-05, - "loss": 0.2328, - "step": 2775 - }, - { - "epoch": 0.14, - "grad_norm": 1.2927497100162066, - "learning_rate": 1.9358871915919754e-05, - "loss": 0.2347, - "step": 2776 - }, - { - "epoch": 0.14, - "grad_norm": 1.2909319229029048, - "learning_rate": 1.935829156223865e-05, - "loss": 0.2266, - "step": 2777 - }, - { - "epoch": 0.14, - "grad_norm": 1.1159629090785874, - "learning_rate": 1.935771095471336e-05, - "loss": 0.2208, - "step": 2778 - }, - { - "epoch": 0.14, - "grad_norm": 1.3930226613031322, - "learning_rate": 1.935713009335963e-05, - "loss": 0.2331, - "step": 2779 - }, - { - "epoch": 0.14, - "grad_norm": 1.0452858269400926, - "learning_rate": 1.9356548978193216e-05, - "loss": 0.2501, - "step": 2780 - }, - { - "epoch": 0.14, - "grad_norm": 0.9458649340006015, - "learning_rate": 1.9355967609229886e-05, - "loss": 0.2338, - "step": 2781 - }, - { - "epoch": 0.14, - "grad_norm": 1.2348517530609575, - "learning_rate": 1.9355385986485406e-05, - "loss": 0.2253, - "step": 2782 - }, - { - "epoch": 0.14, - "grad_norm": 1.5834193611796006, - "learning_rate": 1.935480410997555e-05, - "loss": 0.2621, - "step": 2783 - }, - { - "epoch": 0.14, - "grad_norm": 0.8904407124477265, - "learning_rate": 1.9354221979716107e-05, - "loss": 0.2173, - "step": 2784 - }, - { - "epoch": 0.14, - "grad_norm": 1.9469089622240592, - "learning_rate": 1.9353639595722863e-05, - "loss": 0.2164, - "step": 2785 - }, - { - "epoch": 0.14, - "grad_norm": 1.7623120361802205, - "learning_rate": 1.9353056958011613e-05, - "loss": 0.2241, - "step": 2786 - }, - { - "epoch": 0.14, - "grad_norm": 1.343534262397146, - "learning_rate": 1.935247406659817e-05, - "loss": 0.2292, - "step": 2787 - }, - { - "epoch": 0.14, - "grad_norm": 3.9677492958050955, - "learning_rate": 1.935189092149834e-05, - "loss": 0.237, - "step": 2788 - }, - { - "epoch": 0.14, - "grad_norm": 1.0513218979485803, - "learning_rate": 1.9351307522727936e-05, - "loss": 0.2477, - "step": 2789 - }, - { - "epoch": 0.14, - "grad_norm": 1.0806439607517602, - "learning_rate": 1.935072387030279e-05, - "loss": 0.2478, - "step": 2790 - }, - { - "epoch": 0.14, - "grad_norm": 1.0885677043023188, - "learning_rate": 1.9350139964238732e-05, - "loss": 0.2577, - "step": 2791 - }, - { - "epoch": 0.14, - "grad_norm": 0.9436811681196573, - "learning_rate": 1.9349555804551598e-05, - "loss": 0.2148, - "step": 2792 - }, - { - "epoch": 0.14, - "grad_norm": 1.4197577391194087, - "learning_rate": 1.9348971391257235e-05, - "loss": 0.2472, - "step": 2793 - }, - { - "epoch": 0.14, - "grad_norm": 2.0348878799009253, - "learning_rate": 1.9348386724371495e-05, - "loss": 0.2477, - "step": 2794 - }, - { - "epoch": 0.14, - "grad_norm": 1.1077434643852102, - "learning_rate": 1.9347801803910236e-05, - "loss": 0.2239, - "step": 2795 - }, - { - "epoch": 0.14, - "grad_norm": 1.3548339541610674, - "learning_rate": 1.9347216629889326e-05, - "loss": 0.224, - "step": 2796 - }, - { - "epoch": 0.14, - "grad_norm": 1.7495935883278109, - "learning_rate": 1.9346631202324638e-05, - "loss": 0.2304, - "step": 2797 - }, - { - "epoch": 0.14, - "grad_norm": 1.101603564197832, - "learning_rate": 1.9346045521232048e-05, - "loss": 0.2365, - "step": 2798 - }, - { - "epoch": 0.14, - "grad_norm": 1.1903832241877375, - "learning_rate": 1.9345459586627448e-05, - "loss": 0.2371, - "step": 2799 - }, - { - "epoch": 0.14, - "grad_norm": 1.1802158056744014, - "learning_rate": 1.9344873398526733e-05, - "loss": 0.2418, - "step": 2800 - }, - { - "epoch": 0.14, - "grad_norm": 1.503865653521154, - "learning_rate": 1.934428695694579e-05, - "loss": 0.2316, - "step": 2801 - }, - { - "epoch": 0.14, - "grad_norm": 12.801483119089678, - "learning_rate": 1.9343700261900543e-05, - "loss": 0.2447, - "step": 2802 - }, - { - "epoch": 0.14, - "grad_norm": 2.688265735472681, - "learning_rate": 1.9343113313406893e-05, - "loss": 0.2316, - "step": 2803 - }, - { - "epoch": 0.14, - "grad_norm": 1.522621184047923, - "learning_rate": 1.9342526111480772e-05, - "loss": 0.2275, - "step": 2804 - }, - { - "epoch": 0.14, - "grad_norm": 2.6548636105769403, - "learning_rate": 1.9341938656138097e-05, - "loss": 0.2378, - "step": 2805 - }, - { - "epoch": 0.14, - "grad_norm": 1.149292127360877, - "learning_rate": 1.9341350947394812e-05, - "loss": 0.2148, - "step": 2806 - }, - { - "epoch": 0.14, - "grad_norm": 1.0514337747612676, - "learning_rate": 1.9340762985266853e-05, - "loss": 0.2271, - "step": 2807 - }, - { - "epoch": 0.14, - "grad_norm": 1.0551929436991787, - "learning_rate": 1.9340174769770173e-05, - "loss": 0.2237, - "step": 2808 - }, - { - "epoch": 0.14, - "grad_norm": 1.3379299345155393, - "learning_rate": 1.933958630092072e-05, - "loss": 0.2207, - "step": 2809 - }, - { - "epoch": 0.14, - "grad_norm": 1.5583520618210185, - "learning_rate": 1.9338997578734466e-05, - "loss": 0.2106, - "step": 2810 - }, - { - "epoch": 0.14, - "grad_norm": 1.4166387778203358, - "learning_rate": 1.9338408603227374e-05, - "loss": 0.2488, - "step": 2811 - }, - { - "epoch": 0.14, - "grad_norm": 1.2264068153220962, - "learning_rate": 1.9337819374415422e-05, - "loss": 0.2566, - "step": 2812 - }, - { - "epoch": 0.14, - "grad_norm": 1.1433870801860593, - "learning_rate": 1.933722989231459e-05, - "loss": 0.2185, - "step": 2813 - }, - { - "epoch": 0.14, - "grad_norm": 0.8592350172417055, - "learning_rate": 1.933664015694087e-05, - "loss": 0.2098, - "step": 2814 - }, - { - "epoch": 0.14, - "grad_norm": 0.8753480279923114, - "learning_rate": 1.933605016831026e-05, - "loss": 0.2096, - "step": 2815 - }, - { - "epoch": 0.14, - "grad_norm": 1.0621829098299027, - "learning_rate": 1.933545992643876e-05, - "loss": 0.2262, - "step": 2816 - }, - { - "epoch": 0.14, - "grad_norm": 1.294220489663147, - "learning_rate": 1.9334869431342386e-05, - "loss": 0.236, - "step": 2817 - }, - { - "epoch": 0.14, - "grad_norm": 1.387910567461303, - "learning_rate": 1.9334278683037145e-05, - "loss": 0.227, - "step": 2818 - }, - { - "epoch": 0.14, - "grad_norm": 1.102746476766718, - "learning_rate": 1.9333687681539073e-05, - "loss": 0.2447, - "step": 2819 - }, - { - "epoch": 0.14, - "grad_norm": 0.8837216385130431, - "learning_rate": 1.9333096426864194e-05, - "loss": 0.2374, - "step": 2820 - }, - { - "epoch": 0.14, - "grad_norm": 1.0590666816622536, - "learning_rate": 1.9332504919028548e-05, - "loss": 0.2266, - "step": 2821 - }, - { - "epoch": 0.14, - "grad_norm": 1.2261448440402158, - "learning_rate": 1.9331913158048175e-05, - "loss": 0.2126, - "step": 2822 - }, - { - "epoch": 0.14, - "grad_norm": 2.3830268899911844, - "learning_rate": 1.9331321143939134e-05, - "loss": 0.1973, - "step": 2823 - }, - { - "epoch": 0.14, - "grad_norm": 1.2466959722982942, - "learning_rate": 1.933072887671748e-05, - "loss": 0.2186, - "step": 2824 - }, - { - "epoch": 0.14, - "grad_norm": 1.1776701511053869, - "learning_rate": 1.933013635639928e-05, - "loss": 0.2299, - "step": 2825 - }, - { - "epoch": 0.14, - "grad_norm": 1.079444331175208, - "learning_rate": 1.93295435830006e-05, - "loss": 0.243, - "step": 2826 - }, - { - "epoch": 0.14, - "grad_norm": 0.9769968665319854, - "learning_rate": 1.9328950556537523e-05, - "loss": 0.2288, - "step": 2827 - }, - { - "epoch": 0.14, - "grad_norm": 2.233898058764008, - "learning_rate": 1.932835727702614e-05, - "loss": 0.2327, - "step": 2828 - }, - { - "epoch": 0.14, - "grad_norm": 0.9367566360481667, - "learning_rate": 1.9327763744482536e-05, - "loss": 0.2117, - "step": 2829 - }, - { - "epoch": 0.14, - "grad_norm": 1.3818642138448198, - "learning_rate": 1.9327169958922813e-05, - "loss": 0.2181, - "step": 2830 - }, - { - "epoch": 0.14, - "grad_norm": 1.2256836948803158, - "learning_rate": 1.932657592036308e-05, - "loss": 0.2203, - "step": 2831 - }, - { - "epoch": 0.14, - "grad_norm": 1.104918269812186, - "learning_rate": 1.9325981628819448e-05, - "loss": 0.2184, - "step": 2832 - }, - { - "epoch": 0.14, - "grad_norm": 1.4675592439172866, - "learning_rate": 1.9325387084308036e-05, - "loss": 0.2331, - "step": 2833 - }, - { - "epoch": 0.14, - "grad_norm": 3.220652819519909, - "learning_rate": 1.9324792286844977e-05, - "loss": 0.2333, - "step": 2834 - }, - { - "epoch": 0.14, - "grad_norm": 1.2065214985533652, - "learning_rate": 1.9324197236446397e-05, - "loss": 0.2497, - "step": 2835 - }, - { - "epoch": 0.14, - "grad_norm": 1.0117364195372431, - "learning_rate": 1.932360193312844e-05, - "loss": 0.2159, - "step": 2836 - }, - { - "epoch": 0.14, - "grad_norm": 1.1569651541855233, - "learning_rate": 1.9323006376907253e-05, - "loss": 0.2282, - "step": 2837 - }, - { - "epoch": 0.14, - "grad_norm": 1.1876368415555707, - "learning_rate": 1.9322410567798996e-05, - "loss": 0.2212, - "step": 2838 - }, - { - "epoch": 0.14, - "grad_norm": 1.3740372088877062, - "learning_rate": 1.932181450581982e-05, - "loss": 0.2384, - "step": 2839 - }, - { - "epoch": 0.14, - "grad_norm": 1.1016021890316428, - "learning_rate": 1.9321218190985906e-05, - "loss": 0.222, - "step": 2840 - }, - { - "epoch": 0.14, - "grad_norm": 0.9683116783883369, - "learning_rate": 1.9320621623313416e-05, - "loss": 0.2128, - "step": 2841 - }, - { - "epoch": 0.14, - "grad_norm": 1.1316210342046384, - "learning_rate": 1.932002480281854e-05, - "loss": 0.2403, - "step": 2842 - }, - { - "epoch": 0.14, - "grad_norm": 0.9264831837106831, - "learning_rate": 1.9319427729517467e-05, - "loss": 0.2175, - "step": 2843 - }, - { - "epoch": 0.14, - "grad_norm": 1.03768617926261, - "learning_rate": 1.9318830403426388e-05, - "loss": 0.2477, - "step": 2844 - }, - { - "epoch": 0.14, - "grad_norm": 1.2202169153802127, - "learning_rate": 1.9318232824561507e-05, - "loss": 0.2141, - "step": 2845 - }, - { - "epoch": 0.14, - "grad_norm": 0.8568755411257019, - "learning_rate": 1.9317634992939034e-05, - "loss": 0.2263, - "step": 2846 - }, - { - "epoch": 0.14, - "grad_norm": 1.0483047904842462, - "learning_rate": 1.931703690857519e-05, - "loss": 0.2382, - "step": 2847 - }, - { - "epoch": 0.14, - "grad_norm": 2.329723964272318, - "learning_rate": 1.9316438571486188e-05, - "loss": 0.1943, - "step": 2848 - }, - { - "epoch": 0.14, - "grad_norm": 2.3214312045060064, - "learning_rate": 1.9315839981688267e-05, - "loss": 0.2255, - "step": 2849 - }, - { - "epoch": 0.14, - "grad_norm": 1.6453793440023385, - "learning_rate": 1.931524113919766e-05, - "loss": 0.2587, - "step": 2850 - }, - { - "epoch": 0.14, - "grad_norm": 1.2808218991790954, - "learning_rate": 1.931464204403061e-05, - "loss": 0.2426, - "step": 2851 - }, - { - "epoch": 0.15, - "grad_norm": 1.1321376470227988, - "learning_rate": 1.931404269620337e-05, - "loss": 0.2192, - "step": 2852 - }, - { - "epoch": 0.15, - "grad_norm": 0.8540458805915087, - "learning_rate": 1.9313443095732197e-05, - "loss": 0.2372, - "step": 2853 - }, - { - "epoch": 0.15, - "grad_norm": 1.629983136852726, - "learning_rate": 1.9312843242633354e-05, - "loss": 0.2371, - "step": 2854 - }, - { - "epoch": 0.15, - "grad_norm": 0.9867139867736547, - "learning_rate": 1.931224313692311e-05, - "loss": 0.2338, - "step": 2855 - }, - { - "epoch": 0.15, - "grad_norm": 1.0737634184425395, - "learning_rate": 1.9311642778617742e-05, - "loss": 0.231, - "step": 2856 - }, - { - "epoch": 0.15, - "grad_norm": 1.03320041635422, - "learning_rate": 1.931104216773354e-05, - "loss": 0.2224, - "step": 2857 - }, - { - "epoch": 0.15, - "grad_norm": 1.1224550744055295, - "learning_rate": 1.9310441304286794e-05, - "loss": 0.2247, - "step": 2858 - }, - { - "epoch": 0.15, - "grad_norm": 0.9518778508395871, - "learning_rate": 1.9309840188293803e-05, - "loss": 0.2337, - "step": 2859 - }, - { - "epoch": 0.15, - "grad_norm": 0.9511248974571589, - "learning_rate": 1.930923881977087e-05, - "loss": 0.2395, - "step": 2860 - }, - { - "epoch": 0.15, - "grad_norm": 1.1447901689296558, - "learning_rate": 1.9308637198734307e-05, - "loss": 0.22, - "step": 2861 - }, - { - "epoch": 0.15, - "grad_norm": 1.1499640522355206, - "learning_rate": 1.9308035325200436e-05, - "loss": 0.242, - "step": 2862 - }, - { - "epoch": 0.15, - "grad_norm": 1.3407711277898409, - "learning_rate": 1.9307433199185582e-05, - "loss": 0.2421, - "step": 2863 - }, - { - "epoch": 0.15, - "grad_norm": 1.0156304338906854, - "learning_rate": 1.9306830820706074e-05, - "loss": 0.2197, - "step": 2864 - }, - { - "epoch": 0.15, - "grad_norm": 1.974378473587215, - "learning_rate": 1.9306228189778255e-05, - "loss": 0.2275, - "step": 2865 - }, - { - "epoch": 0.15, - "grad_norm": 1.0145603045608926, - "learning_rate": 1.930562530641847e-05, - "loss": 0.1958, - "step": 2866 - }, - { - "epoch": 0.15, - "grad_norm": 1.0589342389369067, - "learning_rate": 1.9305022170643077e-05, - "loss": 0.2191, - "step": 2867 - }, - { - "epoch": 0.15, - "grad_norm": 1.0494184592272218, - "learning_rate": 1.9304418782468427e-05, - "loss": 0.2211, - "step": 2868 - }, - { - "epoch": 0.15, - "grad_norm": 0.826301537596647, - "learning_rate": 1.9303815141910894e-05, - "loss": 0.2075, - "step": 2869 - }, - { - "epoch": 0.15, - "grad_norm": 0.8674197245882275, - "learning_rate": 1.930321124898685e-05, - "loss": 0.2213, - "step": 2870 - }, - { - "epoch": 0.15, - "grad_norm": 1.28917194083532, - "learning_rate": 1.930260710371268e-05, - "loss": 0.2432, - "step": 2871 - }, - { - "epoch": 0.15, - "grad_norm": 1.9229131007885853, - "learning_rate": 1.9302002706104762e-05, - "loss": 0.2072, - "step": 2872 - }, - { - "epoch": 0.15, - "grad_norm": 2.9223149186191164, - "learning_rate": 1.9301398056179493e-05, - "loss": 0.2302, - "step": 2873 - }, - { - "epoch": 0.15, - "grad_norm": 1.6633490926322958, - "learning_rate": 1.930079315395328e-05, - "loss": 0.2199, - "step": 2874 - }, - { - "epoch": 0.15, - "grad_norm": 0.9742814069706348, - "learning_rate": 1.930018799944253e-05, - "loss": 0.2316, - "step": 2875 - }, - { - "epoch": 0.15, - "grad_norm": 1.1254691217888606, - "learning_rate": 1.929958259266365e-05, - "loss": 0.2259, - "step": 2876 - }, - { - "epoch": 0.15, - "grad_norm": 0.8419765118945145, - "learning_rate": 1.9298976933633068e-05, - "loss": 0.2367, - "step": 2877 - }, - { - "epoch": 0.15, - "grad_norm": 1.0637810663668488, - "learning_rate": 1.929837102236721e-05, - "loss": 0.2029, - "step": 2878 - }, - { - "epoch": 0.15, - "grad_norm": 1.1969913921398376, - "learning_rate": 1.9297764858882516e-05, - "loss": 0.219, - "step": 2879 - }, - { - "epoch": 0.15, - "grad_norm": 0.9500843843503507, - "learning_rate": 1.929715844319542e-05, - "loss": 0.204, - "step": 2880 - }, - { - "epoch": 0.15, - "grad_norm": 0.9159934453202914, - "learning_rate": 1.9296551775322383e-05, - "loss": 0.2314, - "step": 2881 - }, - { - "epoch": 0.15, - "grad_norm": 0.9819107607663788, - "learning_rate": 1.9295944855279853e-05, - "loss": 0.2259, - "step": 2882 - }, - { - "epoch": 0.15, - "grad_norm": 1.0649590229898775, - "learning_rate": 1.9295337683084292e-05, - "loss": 0.1982, - "step": 2883 - }, - { - "epoch": 0.15, - "grad_norm": 1.2130419725924906, - "learning_rate": 1.929473025875217e-05, - "loss": 0.23, - "step": 2884 - }, - { - "epoch": 0.15, - "grad_norm": 0.9468141626402687, - "learning_rate": 1.9294122582299964e-05, - "loss": 0.2211, - "step": 2885 - }, - { - "epoch": 0.15, - "grad_norm": 1.0795576693617677, - "learning_rate": 1.929351465374416e-05, - "loss": 0.2469, - "step": 2886 - }, - { - "epoch": 0.15, - "grad_norm": 0.7962182832585174, - "learning_rate": 1.9292906473101246e-05, - "loss": 0.1996, - "step": 2887 - }, - { - "epoch": 0.15, - "grad_norm": 0.9714002340563536, - "learning_rate": 1.929229804038772e-05, - "loss": 0.2417, - "step": 2888 - }, - { - "epoch": 0.15, - "grad_norm": 1.09410087899835, - "learning_rate": 1.9291689355620088e-05, - "loss": 0.2343, - "step": 2889 - }, - { - "epoch": 0.15, - "grad_norm": 0.7838595149854972, - "learning_rate": 1.9291080418814852e-05, - "loss": 0.1895, - "step": 2890 - }, - { - "epoch": 0.15, - "grad_norm": 1.274523284116608, - "learning_rate": 1.9290471229988536e-05, - "loss": 0.2156, - "step": 2891 - }, - { - "epoch": 0.15, - "grad_norm": 0.9507973660783633, - "learning_rate": 1.9289861789157666e-05, - "loss": 0.237, - "step": 2892 - }, - { - "epoch": 0.15, - "grad_norm": 1.1444280596229304, - "learning_rate": 1.9289252096338767e-05, - "loss": 0.2345, - "step": 2893 - }, - { - "epoch": 0.15, - "grad_norm": 0.8911921538107122, - "learning_rate": 1.928864215154838e-05, - "loss": 0.2183, - "step": 2894 - }, - { - "epoch": 0.15, - "grad_norm": 0.9689915619745783, - "learning_rate": 1.928803195480305e-05, - "loss": 0.2347, - "step": 2895 - }, - { - "epoch": 0.15, - "grad_norm": 1.1021079330412795, - "learning_rate": 1.9287421506119332e-05, - "loss": 0.2305, - "step": 2896 - }, - { - "epoch": 0.15, - "grad_norm": 1.0599028799692833, - "learning_rate": 1.9286810805513774e-05, - "loss": 0.2447, - "step": 2897 - }, - { - "epoch": 0.15, - "grad_norm": 1.0162405743349308, - "learning_rate": 1.9286199853002956e-05, - "loss": 0.2046, - "step": 2898 - }, - { - "epoch": 0.15, - "grad_norm": 1.1035504592713803, - "learning_rate": 1.928558864860344e-05, - "loss": 0.2172, - "step": 2899 - }, - { - "epoch": 0.15, - "grad_norm": 1.303054013613804, - "learning_rate": 1.9284977192331807e-05, - "loss": 0.2312, - "step": 2900 - }, - { - "epoch": 0.15, - "grad_norm": 1.2167740655890367, - "learning_rate": 1.9284365484204645e-05, - "loss": 0.2389, - "step": 2901 - }, - { - "epoch": 0.15, - "grad_norm": 1.3669197391194035, - "learning_rate": 1.9283753524238542e-05, - "loss": 0.2293, - "step": 2902 - }, - { - "epoch": 0.15, - "grad_norm": 1.1895888740056975, - "learning_rate": 1.92831413124501e-05, - "loss": 0.2241, - "step": 2903 - }, - { - "epoch": 0.15, - "grad_norm": 1.2522969542985063, - "learning_rate": 1.9282528848855925e-05, - "loss": 0.2601, - "step": 2904 - }, - { - "epoch": 0.15, - "grad_norm": 1.302544121030843, - "learning_rate": 1.9281916133472636e-05, - "loss": 0.1939, - "step": 2905 - }, - { - "epoch": 0.15, - "grad_norm": 1.0861778850757935, - "learning_rate": 1.9281303166316846e-05, - "loss": 0.2684, - "step": 2906 - }, - { - "epoch": 0.15, - "grad_norm": 1.2882850775483696, - "learning_rate": 1.928068994740518e-05, - "loss": 0.2334, - "step": 2907 - }, - { - "epoch": 0.15, - "grad_norm": 1.2321169609007878, - "learning_rate": 1.9280076476754276e-05, - "loss": 0.2164, - "step": 2908 - }, - { - "epoch": 0.15, - "grad_norm": 1.6514277681396714, - "learning_rate": 1.9279462754380774e-05, - "loss": 0.2389, - "step": 2909 - }, - { - "epoch": 0.15, - "grad_norm": 1.1270106887925726, - "learning_rate": 1.927884878030132e-05, - "loss": 0.2225, - "step": 2910 - }, - { - "epoch": 0.15, - "grad_norm": 1.8445104404776635, - "learning_rate": 1.927823455453257e-05, - "loss": 0.2282, - "step": 2911 - }, - { - "epoch": 0.15, - "grad_norm": 1.1310180099501284, - "learning_rate": 1.9277620077091184e-05, - "loss": 0.2357, - "step": 2912 - }, - { - "epoch": 0.15, - "grad_norm": 1.1734832772791386, - "learning_rate": 1.927700534799383e-05, - "loss": 0.2436, - "step": 2913 - }, - { - "epoch": 0.15, - "grad_norm": 1.055450159546421, - "learning_rate": 1.927639036725718e-05, - "loss": 0.2293, - "step": 2914 - }, - { - "epoch": 0.15, - "grad_norm": 1.235235898756223, - "learning_rate": 1.927577513489792e-05, - "loss": 0.2186, - "step": 2915 - }, - { - "epoch": 0.15, - "grad_norm": 1.0584009839980613, - "learning_rate": 1.927515965093273e-05, - "loss": 0.2259, - "step": 2916 - }, - { - "epoch": 0.15, - "grad_norm": 1.0460374119343292, - "learning_rate": 1.9274543915378315e-05, - "loss": 0.2276, - "step": 2917 - }, - { - "epoch": 0.15, - "grad_norm": 1.670885330919633, - "learning_rate": 1.927392792825137e-05, - "loss": 0.2071, - "step": 2918 - }, - { - "epoch": 0.15, - "grad_norm": 1.0249029467969002, - "learning_rate": 1.927331168956861e-05, - "loss": 0.2114, - "step": 2919 - }, - { - "epoch": 0.15, - "grad_norm": 0.9630012806325837, - "learning_rate": 1.9272695199346743e-05, - "loss": 0.2633, - "step": 2920 - }, - { - "epoch": 0.15, - "grad_norm": 1.0913292371471266, - "learning_rate": 1.92720784576025e-05, - "loss": 0.2235, - "step": 2921 - }, - { - "epoch": 0.15, - "grad_norm": 0.8716918819429021, - "learning_rate": 1.92714614643526e-05, - "loss": 0.2295, - "step": 2922 - }, - { - "epoch": 0.15, - "grad_norm": 1.1952869315038404, - "learning_rate": 1.9270844219613785e-05, - "loss": 0.2343, - "step": 2923 - }, - { - "epoch": 0.15, - "grad_norm": 0.9901067479541756, - "learning_rate": 1.9270226723402798e-05, - "loss": 0.2148, - "step": 2924 - }, - { - "epoch": 0.15, - "grad_norm": 0.9465534365045836, - "learning_rate": 1.926960897573639e-05, - "loss": 0.2189, - "step": 2925 - }, - { - "epoch": 0.15, - "grad_norm": 0.9050720063547557, - "learning_rate": 1.926899097663131e-05, - "loss": 0.205, - "step": 2926 - }, - { - "epoch": 0.15, - "grad_norm": 1.3984753431326877, - "learning_rate": 1.926837272610433e-05, - "loss": 0.2278, - "step": 2927 - }, - { - "epoch": 0.15, - "grad_norm": 1.331648313954839, - "learning_rate": 1.9267754224172216e-05, - "loss": 0.2252, - "step": 2928 - }, - { - "epoch": 0.15, - "grad_norm": 1.3474581608803735, - "learning_rate": 1.926713547085174e-05, - "loss": 0.2242, - "step": 2929 - }, - { - "epoch": 0.15, - "grad_norm": 1.1764702056906664, - "learning_rate": 1.9266516466159697e-05, - "loss": 0.2275, - "step": 2930 - }, - { - "epoch": 0.15, - "grad_norm": 0.9102442675036209, - "learning_rate": 1.9265897210112868e-05, - "loss": 0.209, - "step": 2931 - }, - { - "epoch": 0.15, - "grad_norm": 0.9579038746913898, - "learning_rate": 1.9265277702728058e-05, - "loss": 0.2238, - "step": 2932 - }, - { - "epoch": 0.15, - "grad_norm": 1.087611433992817, - "learning_rate": 1.9264657944022063e-05, - "loss": 0.2219, - "step": 2933 - }, - { - "epoch": 0.15, - "grad_norm": 0.9513234172795997, - "learning_rate": 1.92640379340117e-05, - "loss": 0.2293, - "step": 2934 - }, - { - "epoch": 0.15, - "grad_norm": 1.035375087455176, - "learning_rate": 1.9263417672713786e-05, - "loss": 0.2013, - "step": 2935 - }, - { - "epoch": 0.15, - "grad_norm": 0.9867731544458642, - "learning_rate": 1.926279716014514e-05, - "loss": 0.2239, - "step": 2936 - }, - { - "epoch": 0.15, - "grad_norm": 1.004772357407247, - "learning_rate": 1.92621763963226e-05, - "loss": 0.2428, - "step": 2937 - }, - { - "epoch": 0.15, - "grad_norm": 1.2468846047363114, - "learning_rate": 1.9261555381263003e-05, - "loss": 0.264, - "step": 2938 - }, - { - "epoch": 0.15, - "grad_norm": 1.3487744229263021, - "learning_rate": 1.926093411498319e-05, - "loss": 0.2162, - "step": 2939 - }, - { - "epoch": 0.15, - "grad_norm": 1.2469162410816559, - "learning_rate": 1.926031259750002e-05, - "loss": 0.2244, - "step": 2940 - }, - { - "epoch": 0.15, - "grad_norm": 1.1165749330292827, - "learning_rate": 1.9259690828830345e-05, - "loss": 0.2609, - "step": 2941 - }, - { - "epoch": 0.15, - "grad_norm": 0.879048795841088, - "learning_rate": 1.925906880899104e-05, - "loss": 0.2225, - "step": 2942 - }, - { - "epoch": 0.15, - "grad_norm": 1.1138683990180442, - "learning_rate": 1.9258446537998964e-05, - "loss": 0.209, - "step": 2943 - }, - { - "epoch": 0.15, - "grad_norm": 1.563574900308686, - "learning_rate": 1.9257824015871005e-05, - "loss": 0.2385, - "step": 2944 - }, - { - "epoch": 0.15, - "grad_norm": 1.8906959033595399, - "learning_rate": 1.9257201242624045e-05, - "loss": 0.2098, - "step": 2945 - }, - { - "epoch": 0.15, - "grad_norm": 1.030668698944916, - "learning_rate": 1.925657821827498e-05, - "loss": 0.2218, - "step": 2946 - }, - { - "epoch": 0.15, - "grad_norm": 1.096503438378198, - "learning_rate": 1.9255954942840706e-05, - "loss": 0.2421, - "step": 2947 - }, - { - "epoch": 0.15, - "grad_norm": 1.2449681975950357, - "learning_rate": 1.9255331416338134e-05, - "loss": 0.2259, - "step": 2948 - }, - { - "epoch": 0.15, - "grad_norm": 1.067845526085949, - "learning_rate": 1.9254707638784174e-05, - "loss": 0.2051, - "step": 2949 - }, - { - "epoch": 0.15, - "grad_norm": 1.645151745527325, - "learning_rate": 1.9254083610195745e-05, - "loss": 0.2271, - "step": 2950 - }, - { - "epoch": 0.15, - "grad_norm": 1.440117632626847, - "learning_rate": 1.9253459330589776e-05, - "loss": 0.2204, - "step": 2951 - }, - { - "epoch": 0.15, - "grad_norm": 1.294112671200197, - "learning_rate": 1.9252834799983197e-05, - "loss": 0.2179, - "step": 2952 - }, - { - "epoch": 0.15, - "grad_norm": 1.0002684462624296, - "learning_rate": 1.9252210018392957e-05, - "loss": 0.2398, - "step": 2953 - }, - { - "epoch": 0.15, - "grad_norm": 0.9685935114284663, - "learning_rate": 1.9251584985835996e-05, - "loss": 0.2193, - "step": 2954 - }, - { - "epoch": 0.15, - "grad_norm": 1.0508821576329554, - "learning_rate": 1.9250959702329268e-05, - "loss": 0.2246, - "step": 2955 - }, - { - "epoch": 0.15, - "grad_norm": 0.8610381375921086, - "learning_rate": 1.9250334167889737e-05, - "loss": 0.2236, - "step": 2956 - }, - { - "epoch": 0.15, - "grad_norm": 1.260320103814766, - "learning_rate": 1.9249708382534372e-05, - "loss": 0.2235, - "step": 2957 - }, - { - "epoch": 0.15, - "grad_norm": 1.070880397566758, - "learning_rate": 1.924908234628014e-05, - "loss": 0.2206, - "step": 2958 - }, - { - "epoch": 0.15, - "grad_norm": 0.8849634135287198, - "learning_rate": 1.9248456059144028e-05, - "loss": 0.2156, - "step": 2959 - }, - { - "epoch": 0.15, - "grad_norm": 0.8334624823803712, - "learning_rate": 1.9247829521143023e-05, - "loss": 0.2195, - "step": 2960 - }, - { - "epoch": 0.15, - "grad_norm": 0.9487501038077363, - "learning_rate": 1.924720273229412e-05, - "loss": 0.2473, - "step": 2961 - }, - { - "epoch": 0.15, - "grad_norm": 3.294794933510296, - "learning_rate": 1.9246575692614323e-05, - "loss": 0.2442, - "step": 2962 - }, - { - "epoch": 0.15, - "grad_norm": 1.2417909542707162, - "learning_rate": 1.9245948402120634e-05, - "loss": 0.2306, - "step": 2963 - }, - { - "epoch": 0.15, - "grad_norm": 1.0910392732978642, - "learning_rate": 1.9245320860830075e-05, - "loss": 0.2041, - "step": 2964 - }, - { - "epoch": 0.15, - "grad_norm": 0.9316213997274377, - "learning_rate": 1.9244693068759668e-05, - "loss": 0.2398, - "step": 2965 - }, - { - "epoch": 0.15, - "grad_norm": 1.3031353937967904, - "learning_rate": 1.9244065025926434e-05, - "loss": 0.2096, - "step": 2966 - }, - { - "epoch": 0.15, - "grad_norm": 0.7483928647007118, - "learning_rate": 1.9243436732347418e-05, - "loss": 0.2105, - "step": 2967 - }, - { - "epoch": 0.15, - "grad_norm": 1.2147163505835121, - "learning_rate": 1.9242808188039658e-05, - "loss": 0.242, - "step": 2968 - }, - { - "epoch": 0.15, - "grad_norm": 0.9165976744588308, - "learning_rate": 1.92421793930202e-05, - "loss": 0.2287, - "step": 2969 - }, - { - "epoch": 0.15, - "grad_norm": 1.129164002317354, - "learning_rate": 1.924155034730611e-05, - "loss": 0.2114, - "step": 2970 - }, - { - "epoch": 0.15, - "grad_norm": 0.8668190231085972, - "learning_rate": 1.924092105091444e-05, - "loss": 0.219, - "step": 2971 - }, - { - "epoch": 0.15, - "grad_norm": 0.8988161489801656, - "learning_rate": 1.9240291503862266e-05, - "loss": 0.2329, - "step": 2972 - }, - { - "epoch": 0.15, - "grad_norm": 1.5519469682120905, - "learning_rate": 1.9239661706166663e-05, - "loss": 0.2372, - "step": 2973 - }, - { - "epoch": 0.15, - "grad_norm": 0.9317913944468382, - "learning_rate": 1.9239031657844718e-05, - "loss": 0.2376, - "step": 2974 - }, - { - "epoch": 0.15, - "grad_norm": 1.0039810802977571, - "learning_rate": 1.9238401358913513e-05, - "loss": 0.2426, - "step": 2975 - }, - { - "epoch": 0.15, - "grad_norm": 0.8406615489980125, - "learning_rate": 1.923777080939015e-05, - "loss": 0.2081, - "step": 2976 - }, - { - "epoch": 0.15, - "grad_norm": 0.9565145582443978, - "learning_rate": 1.9237140009291733e-05, - "loss": 0.2146, - "step": 2977 - }, - { - "epoch": 0.15, - "grad_norm": 0.9123321433259106, - "learning_rate": 1.9236508958635372e-05, - "loss": 0.2132, - "step": 2978 - }, - { - "epoch": 0.15, - "grad_norm": 1.0533861024137494, - "learning_rate": 1.923587765743818e-05, - "loss": 0.1994, - "step": 2979 - }, - { - "epoch": 0.15, - "grad_norm": 1.2296651007941344, - "learning_rate": 1.923524610571729e-05, - "loss": 0.2353, - "step": 2980 - }, - { - "epoch": 0.15, - "grad_norm": 1.0783244560851948, - "learning_rate": 1.9234614303489823e-05, - "loss": 0.2487, - "step": 2981 - }, - { - "epoch": 0.15, - "grad_norm": 1.1787683750063183, - "learning_rate": 1.9233982250772927e-05, - "loss": 0.247, - "step": 2982 - }, - { - "epoch": 0.15, - "grad_norm": 1.0130160636525272, - "learning_rate": 1.9233349947583735e-05, - "loss": 0.2336, - "step": 2983 - }, - { - "epoch": 0.15, - "grad_norm": 1.4574101701417077, - "learning_rate": 1.923271739393941e-05, - "loss": 0.2048, - "step": 2984 - }, - { - "epoch": 0.15, - "grad_norm": 1.3197654603803377, - "learning_rate": 1.9232084589857103e-05, - "loss": 0.2364, - "step": 2985 - }, - { - "epoch": 0.15, - "grad_norm": 1.207157479828292, - "learning_rate": 1.9231451535353977e-05, - "loss": 0.2134, - "step": 2986 - }, - { - "epoch": 0.15, - "grad_norm": 0.9671518118956477, - "learning_rate": 1.9230818230447207e-05, - "loss": 0.209, - "step": 2987 - }, - { - "epoch": 0.15, - "grad_norm": 0.954083161294547, - "learning_rate": 1.9230184675153974e-05, - "loss": 0.2262, - "step": 2988 - }, - { - "epoch": 0.15, - "grad_norm": 1.0068100034817131, - "learning_rate": 1.9229550869491456e-05, - "loss": 0.2074, - "step": 2989 - }, - { - "epoch": 0.15, - "grad_norm": 1.2304033656255684, - "learning_rate": 1.9228916813476855e-05, - "loss": 0.2365, - "step": 2990 - }, - { - "epoch": 0.15, - "grad_norm": 1.1480477035501895, - "learning_rate": 1.922828250712736e-05, - "loss": 0.2176, - "step": 2991 - }, - { - "epoch": 0.15, - "grad_norm": 1.6072046871345662, - "learning_rate": 1.9227647950460184e-05, - "loss": 0.2173, - "step": 2992 - }, - { - "epoch": 0.15, - "grad_norm": 1.510713170555769, - "learning_rate": 1.9227013143492534e-05, - "loss": 0.2311, - "step": 2993 - }, - { - "epoch": 0.15, - "grad_norm": 1.607032952016177, - "learning_rate": 1.922637808624163e-05, - "loss": 0.2301, - "step": 2994 - }, - { - "epoch": 0.15, - "grad_norm": 1.2996906089038012, - "learning_rate": 1.92257427787247e-05, - "loss": 0.2596, - "step": 2995 - }, - { - "epoch": 0.15, - "grad_norm": 1.0050341574660457, - "learning_rate": 1.922510722095898e-05, - "loss": 0.2361, - "step": 2996 - }, - { - "epoch": 0.15, - "grad_norm": 0.9414520504409785, - "learning_rate": 1.92244714129617e-05, - "loss": 0.2298, - "step": 2997 - }, - { - "epoch": 0.15, - "grad_norm": 0.9724450849054851, - "learning_rate": 1.9223835354750117e-05, - "loss": 0.2228, - "step": 2998 - }, - { - "epoch": 0.15, - "grad_norm": 0.9975235707500363, - "learning_rate": 1.9223199046341477e-05, - "loss": 0.2471, - "step": 2999 - }, - { - "epoch": 0.15, - "grad_norm": 0.9784518821562274, - "learning_rate": 1.922256248775304e-05, - "loss": 0.222, - "step": 3000 - }, - { - "epoch": 0.15, - "grad_norm": 0.8459640070005651, - "learning_rate": 1.9221925679002076e-05, - "loss": 0.2217, - "step": 3001 - }, - { - "epoch": 0.15, - "grad_norm": 1.255243443188498, - "learning_rate": 1.9221288620105857e-05, - "loss": 0.2356, - "step": 3002 - }, - { - "epoch": 0.15, - "grad_norm": 0.8893090922997737, - "learning_rate": 1.9220651311081666e-05, - "loss": 0.2279, - "step": 3003 - }, - { - "epoch": 0.15, - "grad_norm": 0.9402338401958679, - "learning_rate": 1.922001375194678e-05, - "loss": 0.2093, - "step": 3004 - }, - { - "epoch": 0.15, - "grad_norm": 0.8455121896699624, - "learning_rate": 1.9219375942718508e-05, - "loss": 0.2226, - "step": 3005 - }, - { - "epoch": 0.15, - "grad_norm": 1.2765300051535464, - "learning_rate": 1.921873788341414e-05, - "loss": 0.2542, - "step": 3006 - }, - { - "epoch": 0.15, - "grad_norm": 1.2140638684126275, - "learning_rate": 1.9218099574050985e-05, - "loss": 0.2534, - "step": 3007 - }, - { - "epoch": 0.15, - "grad_norm": 1.0750642375996193, - "learning_rate": 1.9217461014646362e-05, - "loss": 0.2138, - "step": 3008 - }, - { - "epoch": 0.15, - "grad_norm": 1.246919525779567, - "learning_rate": 1.9216822205217586e-05, - "loss": 0.2293, - "step": 3009 - }, - { - "epoch": 0.15, - "grad_norm": 1.0217795188677212, - "learning_rate": 1.9216183145781984e-05, - "loss": 0.2211, - "step": 3010 - }, - { - "epoch": 0.15, - "grad_norm": 1.587256853484376, - "learning_rate": 1.92155438363569e-05, - "loss": 0.2676, - "step": 3011 - }, - { - "epoch": 0.15, - "grad_norm": 1.1552003644377378, - "learning_rate": 1.9214904276959664e-05, - "loss": 0.2202, - "step": 3012 - }, - { - "epoch": 0.15, - "grad_norm": 1.110685839383147, - "learning_rate": 1.921426446760763e-05, - "loss": 0.2331, - "step": 3013 - }, - { - "epoch": 0.15, - "grad_norm": 1.7931555285092697, - "learning_rate": 1.9213624408318155e-05, - "loss": 0.2201, - "step": 3014 - }, - { - "epoch": 0.15, - "grad_norm": 1.2477592588381938, - "learning_rate": 1.9212984099108594e-05, - "loss": 0.2276, - "step": 3015 - }, - { - "epoch": 0.15, - "grad_norm": 1.4367511021452408, - "learning_rate": 1.921234353999632e-05, - "loss": 0.2284, - "step": 3016 - }, - { - "epoch": 0.15, - "grad_norm": 1.306302205604074, - "learning_rate": 1.921170273099871e-05, - "loss": 0.2194, - "step": 3017 - }, - { - "epoch": 0.15, - "grad_norm": 1.0782994679740574, - "learning_rate": 1.921106167213314e-05, - "loss": 0.2372, - "step": 3018 - }, - { - "epoch": 0.15, - "grad_norm": 1.0505196944402984, - "learning_rate": 1.9210420363417e-05, - "loss": 0.2144, - "step": 3019 - }, - { - "epoch": 0.15, - "grad_norm": 1.9353288519979084, - "learning_rate": 1.920977880486769e-05, - "loss": 0.2283, - "step": 3020 - }, - { - "epoch": 0.15, - "grad_norm": 0.974558823252189, - "learning_rate": 1.920913699650261e-05, - "loss": 0.217, - "step": 3021 - }, - { - "epoch": 0.15, - "grad_norm": 0.9563677813824926, - "learning_rate": 1.920849493833917e-05, - "loss": 0.2466, - "step": 3022 - }, - { - "epoch": 0.15, - "grad_norm": 0.9999446931006459, - "learning_rate": 1.9207852630394782e-05, - "loss": 0.1926, - "step": 3023 - }, - { - "epoch": 0.15, - "grad_norm": 1.1905647435368694, - "learning_rate": 1.920721007268687e-05, - "loss": 0.211, - "step": 3024 - }, - { - "epoch": 0.15, - "grad_norm": 0.9547156412821325, - "learning_rate": 1.9206567265232867e-05, - "loss": 0.2114, - "step": 3025 - }, - { - "epoch": 0.15, - "grad_norm": 1.7980185646523026, - "learning_rate": 1.920592420805021e-05, - "loss": 0.2466, - "step": 3026 - }, - { - "epoch": 0.15, - "grad_norm": 1.160882813189863, - "learning_rate": 1.9205280901156332e-05, - "loss": 0.203, - "step": 3027 - }, - { - "epoch": 0.15, - "grad_norm": 1.482553328977277, - "learning_rate": 1.9204637344568694e-05, - "loss": 0.2092, - "step": 3028 - }, - { - "epoch": 0.15, - "grad_norm": 1.8412106230735823, - "learning_rate": 1.920399353830475e-05, - "loss": 0.2172, - "step": 3029 - }, - { - "epoch": 0.15, - "grad_norm": 1.029983017735956, - "learning_rate": 1.920334948238196e-05, - "loss": 0.2332, - "step": 3030 - }, - { - "epoch": 0.15, - "grad_norm": 0.9229551534640521, - "learning_rate": 1.9202705176817794e-05, - "loss": 0.2158, - "step": 3031 - }, - { - "epoch": 0.15, - "grad_norm": 1.1423176753684943, - "learning_rate": 1.920206062162973e-05, - "loss": 0.2294, - "step": 3032 - }, - { - "epoch": 0.15, - "grad_norm": 0.9970226971429255, - "learning_rate": 1.9201415816835254e-05, - "loss": 0.2271, - "step": 3033 - }, - { - "epoch": 0.15, - "grad_norm": 1.1127263130100777, - "learning_rate": 1.9200770762451854e-05, - "loss": 0.2265, - "step": 3034 - }, - { - "epoch": 0.15, - "grad_norm": 0.9070022790029587, - "learning_rate": 1.9200125458497025e-05, - "loss": 0.226, - "step": 3035 - }, - { - "epoch": 0.15, - "grad_norm": 1.4140524654210664, - "learning_rate": 1.9199479904988277e-05, - "loss": 0.2193, - "step": 3036 - }, - { - "epoch": 0.15, - "grad_norm": 1.2310860599371727, - "learning_rate": 1.9198834101943115e-05, - "loss": 0.2257, - "step": 3037 - }, - { - "epoch": 0.15, - "grad_norm": 1.2545161274463466, - "learning_rate": 1.9198188049379055e-05, - "loss": 0.2317, - "step": 3038 - }, - { - "epoch": 0.15, - "grad_norm": 1.0034022129693965, - "learning_rate": 1.919754174731363e-05, - "loss": 0.235, - "step": 3039 - }, - { - "epoch": 0.15, - "grad_norm": 1.114243885076107, - "learning_rate": 1.9196895195764363e-05, - "loss": 0.243, - "step": 3040 - }, - { - "epoch": 0.15, - "grad_norm": 1.0156746704279098, - "learning_rate": 1.9196248394748794e-05, - "loss": 0.2241, - "step": 3041 - }, - { - "epoch": 0.15, - "grad_norm": 1.0509811949541532, - "learning_rate": 1.919560134428447e-05, - "loss": 0.2409, - "step": 3042 - }, - { - "epoch": 0.15, - "grad_norm": 0.9396601478975011, - "learning_rate": 1.919495404438894e-05, - "loss": 0.2226, - "step": 3043 - }, - { - "epoch": 0.15, - "grad_norm": 4.312189831656218, - "learning_rate": 1.919430649507976e-05, - "loss": 0.2228, - "step": 3044 - }, - { - "epoch": 0.15, - "grad_norm": 1.1669179726287418, - "learning_rate": 1.9193658696374498e-05, - "loss": 0.2098, - "step": 3045 - }, - { - "epoch": 0.15, - "grad_norm": 1.053822503469039, - "learning_rate": 1.9193010648290725e-05, - "loss": 0.219, - "step": 3046 - }, - { - "epoch": 0.15, - "grad_norm": 2.897292649110749, - "learning_rate": 1.919236235084602e-05, - "loss": 0.2268, - "step": 3047 - }, - { - "epoch": 0.15, - "grad_norm": 1.1825640096018137, - "learning_rate": 1.9191713804057965e-05, - "loss": 0.2277, - "step": 3048 - }, - { - "epoch": 0.16, - "grad_norm": 1.0258482915835343, - "learning_rate": 1.9191065007944153e-05, - "loss": 0.229, - "step": 3049 - }, - { - "epoch": 0.16, - "grad_norm": 1.0150255181566332, - "learning_rate": 1.9190415962522186e-05, - "loss": 0.2122, - "step": 3050 - }, - { - "epoch": 0.16, - "grad_norm": 1.221461841386964, - "learning_rate": 1.9189766667809667e-05, - "loss": 0.2288, - "step": 3051 - }, - { - "epoch": 0.16, - "grad_norm": 1.0912579290474678, - "learning_rate": 1.9189117123824208e-05, - "loss": 0.2157, - "step": 3052 - }, - { - "epoch": 0.16, - "grad_norm": 1.092173960673062, - "learning_rate": 1.9188467330583428e-05, - "loss": 0.2197, - "step": 3053 - }, - { - "epoch": 0.16, - "grad_norm": 1.2105441945424267, - "learning_rate": 1.918781728810495e-05, - "loss": 0.2308, - "step": 3054 - }, - { - "epoch": 0.16, - "grad_norm": 1.0786894300997212, - "learning_rate": 1.9187166996406413e-05, - "loss": 0.2213, - "step": 3055 - }, - { - "epoch": 0.16, - "grad_norm": 1.0532621519315912, - "learning_rate": 1.918651645550545e-05, - "loss": 0.2421, - "step": 3056 - }, - { - "epoch": 0.16, - "grad_norm": 0.9811524305129725, - "learning_rate": 1.9185865665419708e-05, - "loss": 0.2341, - "step": 3057 - }, - { - "epoch": 0.16, - "grad_norm": 1.0652674203246557, - "learning_rate": 1.9185214626166845e-05, - "loss": 0.2249, - "step": 3058 - }, - { - "epoch": 0.16, - "grad_norm": 0.9858359011987853, - "learning_rate": 1.9184563337764516e-05, - "loss": 0.22, - "step": 3059 - }, - { - "epoch": 0.16, - "grad_norm": 0.9134652124007379, - "learning_rate": 1.9183911800230384e-05, - "loss": 0.2318, - "step": 3060 - }, - { - "epoch": 0.16, - "grad_norm": 1.2143347015760846, - "learning_rate": 1.9183260013582126e-05, - "loss": 0.2309, - "step": 3061 - }, - { - "epoch": 0.16, - "grad_norm": 0.9738448412568219, - "learning_rate": 1.9182607977837424e-05, - "loss": 0.2371, - "step": 3062 - }, - { - "epoch": 0.16, - "grad_norm": 0.7966606408722735, - "learning_rate": 1.9181955693013962e-05, - "loss": 0.2107, - "step": 3063 - }, - { - "epoch": 0.16, - "grad_norm": 1.4371746934422849, - "learning_rate": 1.918130315912943e-05, - "loss": 0.2405, - "step": 3064 - }, - { - "epoch": 0.16, - "grad_norm": 0.9880342880142227, - "learning_rate": 1.9180650376201536e-05, - "loss": 0.2291, - "step": 3065 - }, - { - "epoch": 0.16, - "grad_norm": 1.0844528067687451, - "learning_rate": 1.917999734424798e-05, - "loss": 0.2339, - "step": 3066 - }, - { - "epoch": 0.16, - "grad_norm": 1.273986948076706, - "learning_rate": 1.9179344063286475e-05, - "loss": 0.2195, - "step": 3067 - }, - { - "epoch": 0.16, - "grad_norm": 0.8814553758365897, - "learning_rate": 1.917869053333475e-05, - "loss": 0.2086, - "step": 3068 - }, - { - "epoch": 0.16, - "grad_norm": 1.077182563935586, - "learning_rate": 1.9178036754410518e-05, - "loss": 0.2181, - "step": 3069 - }, - { - "epoch": 0.16, - "grad_norm": 0.9425617308554278, - "learning_rate": 1.9177382726531527e-05, - "loss": 0.2067, - "step": 3070 - }, - { - "epoch": 0.16, - "grad_norm": 1.013551998686744, - "learning_rate": 1.9176728449715506e-05, - "loss": 0.2191, - "step": 3071 - }, - { - "epoch": 0.16, - "grad_norm": 1.0285491895566228, - "learning_rate": 1.9176073923980212e-05, - "loss": 0.2442, - "step": 3072 - }, - { - "epoch": 0.16, - "grad_norm": 1.0493606012843992, - "learning_rate": 1.917541914934339e-05, - "loss": 0.206, - "step": 3073 - }, - { - "epoch": 0.16, - "grad_norm": 1.2117368635392565, - "learning_rate": 1.917476412582281e-05, - "loss": 0.2272, - "step": 3074 - }, - { - "epoch": 0.16, - "grad_norm": 1.5386868025963205, - "learning_rate": 1.9174108853436234e-05, - "loss": 0.2331, - "step": 3075 - }, - { - "epoch": 0.16, - "grad_norm": 1.311440053988242, - "learning_rate": 1.9173453332201436e-05, - "loss": 0.2102, - "step": 3076 - }, - { - "epoch": 0.16, - "grad_norm": 1.4581511839772705, - "learning_rate": 1.91727975621362e-05, - "loss": 0.2173, - "step": 3077 - }, - { - "epoch": 0.16, - "grad_norm": 0.9644674644204856, - "learning_rate": 1.917214154325831e-05, - "loss": 0.2111, - "step": 3078 - }, - { - "epoch": 0.16, - "grad_norm": 1.1068412843688058, - "learning_rate": 1.917148527558556e-05, - "loss": 0.2383, - "step": 3079 - }, - { - "epoch": 0.16, - "grad_norm": 1.1228544945994425, - "learning_rate": 1.917082875913576e-05, - "loss": 0.2257, - "step": 3080 - }, - { - "epoch": 0.16, - "grad_norm": 1.1290992454356428, - "learning_rate": 1.9170171993926708e-05, - "loss": 0.2302, - "step": 3081 - }, - { - "epoch": 0.16, - "grad_norm": 1.5171743857569169, - "learning_rate": 1.9169514979976224e-05, - "loss": 0.2399, - "step": 3082 - }, - { - "epoch": 0.16, - "grad_norm": 1.179035837795795, - "learning_rate": 1.9168857717302128e-05, - "loss": 0.2181, - "step": 3083 - }, - { - "epoch": 0.16, - "grad_norm": 0.9525048557256401, - "learning_rate": 1.9168200205922248e-05, - "loss": 0.2206, - "step": 3084 - }, - { - "epoch": 0.16, - "grad_norm": 0.9192591243102435, - "learning_rate": 1.916754244585442e-05, - "loss": 0.2114, - "step": 3085 - }, - { - "epoch": 0.16, - "grad_norm": 1.2411058008519829, - "learning_rate": 1.9166884437116486e-05, - "loss": 0.2307, - "step": 3086 - }, - { - "epoch": 0.16, - "grad_norm": 1.4298363741722075, - "learning_rate": 1.9166226179726294e-05, - "loss": 0.2412, - "step": 3087 - }, - { - "epoch": 0.16, - "grad_norm": 1.051321479238389, - "learning_rate": 1.9165567673701696e-05, - "loss": 0.2105, - "step": 3088 - }, - { - "epoch": 0.16, - "grad_norm": 1.1983398040289723, - "learning_rate": 1.9164908919060562e-05, - "loss": 0.2134, - "step": 3089 - }, - { - "epoch": 0.16, - "grad_norm": 0.9650459782933238, - "learning_rate": 1.9164249915820753e-05, - "loss": 0.2188, - "step": 3090 - }, - { - "epoch": 0.16, - "grad_norm": 1.1929879231929374, - "learning_rate": 1.9163590664000145e-05, - "loss": 0.2292, - "step": 3091 - }, - { - "epoch": 0.16, - "grad_norm": 0.9577148201922246, - "learning_rate": 1.916293116361663e-05, - "loss": 0.2178, - "step": 3092 - }, - { - "epoch": 0.16, - "grad_norm": 1.0158885393736095, - "learning_rate": 1.916227141468808e-05, - "loss": 0.2073, - "step": 3093 - }, - { - "epoch": 0.16, - "grad_norm": 1.2815901739893645, - "learning_rate": 1.9161611417232407e-05, - "loss": 0.1999, - "step": 3094 - }, - { - "epoch": 0.16, - "grad_norm": 1.197536993244301, - "learning_rate": 1.9160951171267508e-05, - "loss": 0.2276, - "step": 3095 - }, - { - "epoch": 0.16, - "grad_norm": 1.2323433276514397, - "learning_rate": 1.9160290676811288e-05, - "loss": 0.2112, - "step": 3096 - }, - { - "epoch": 0.16, - "grad_norm": 1.119380261757278, - "learning_rate": 1.9159629933881666e-05, - "loss": 0.2015, - "step": 3097 - }, - { - "epoch": 0.16, - "grad_norm": 1.045749768250887, - "learning_rate": 1.915896894249657e-05, - "loss": 0.2191, - "step": 3098 - }, - { - "epoch": 0.16, - "grad_norm": 0.9658151735731435, - "learning_rate": 1.9158307702673917e-05, - "loss": 0.2234, - "step": 3099 - }, - { - "epoch": 0.16, - "grad_norm": 1.9059897342936998, - "learning_rate": 1.9157646214431653e-05, - "loss": 0.2434, - "step": 3100 - }, - { - "epoch": 0.16, - "grad_norm": 1.1570549601192839, - "learning_rate": 1.9156984477787717e-05, - "loss": 0.2198, - "step": 3101 - }, - { - "epoch": 0.16, - "grad_norm": 1.142206977481465, - "learning_rate": 1.9156322492760064e-05, - "loss": 0.2396, - "step": 3102 - }, - { - "epoch": 0.16, - "grad_norm": 1.4612871008091388, - "learning_rate": 1.915566025936664e-05, - "loss": 0.2434, - "step": 3103 - }, - { - "epoch": 0.16, - "grad_norm": 1.161012691350098, - "learning_rate": 1.9154997777625418e-05, - "loss": 0.2316, - "step": 3104 - }, - { - "epoch": 0.16, - "grad_norm": 1.0674154619263267, - "learning_rate": 1.9154335047554364e-05, - "loss": 0.2143, - "step": 3105 - }, - { - "epoch": 0.16, - "grad_norm": 1.420357609574483, - "learning_rate": 1.9153672069171454e-05, - "loss": 0.222, - "step": 3106 - }, - { - "epoch": 0.16, - "grad_norm": 1.1620397000841887, - "learning_rate": 1.9153008842494673e-05, - "loss": 0.235, - "step": 3107 - }, - { - "epoch": 0.16, - "grad_norm": 1.1933801258599444, - "learning_rate": 1.9152345367542008e-05, - "loss": 0.221, - "step": 3108 - }, - { - "epoch": 0.16, - "grad_norm": 0.92162970400094, - "learning_rate": 1.915168164433146e-05, - "loss": 0.2049, - "step": 3109 - }, - { - "epoch": 0.16, - "grad_norm": 1.0999983754740923, - "learning_rate": 1.9151017672881032e-05, - "loss": 0.2288, - "step": 3110 - }, - { - "epoch": 0.16, - "grad_norm": 1.7372094733382606, - "learning_rate": 1.915035345320873e-05, - "loss": 0.262, - "step": 3111 - }, - { - "epoch": 0.16, - "grad_norm": 1.158268282961799, - "learning_rate": 1.9149688985332575e-05, - "loss": 0.2077, - "step": 3112 - }, - { - "epoch": 0.16, - "grad_norm": 1.0296742546521, - "learning_rate": 1.914902426927059e-05, - "loss": 0.2285, - "step": 3113 - }, - { - "epoch": 0.16, - "grad_norm": 0.8850667462372587, - "learning_rate": 1.9148359305040802e-05, - "loss": 0.223, - "step": 3114 - }, - { - "epoch": 0.16, - "grad_norm": 1.1648725757139722, - "learning_rate": 1.9147694092661254e-05, - "loss": 0.222, - "step": 3115 - }, - { - "epoch": 0.16, - "grad_norm": 1.1864890985053547, - "learning_rate": 1.914702863214999e-05, - "loss": 0.2313, - "step": 3116 - }, - { - "epoch": 0.16, - "grad_norm": 1.9597519082119035, - "learning_rate": 1.9146362923525053e-05, - "loss": 0.2367, - "step": 3117 - }, - { - "epoch": 0.16, - "grad_norm": 1.0768884920382897, - "learning_rate": 1.9145696966804505e-05, - "loss": 0.2137, - "step": 3118 - }, - { - "epoch": 0.16, - "grad_norm": 1.2765280153968082, - "learning_rate": 1.914503076200641e-05, - "loss": 0.2335, - "step": 3119 - }, - { - "epoch": 0.16, - "grad_norm": 0.891702028568063, - "learning_rate": 1.9144364309148842e-05, - "loss": 0.2163, - "step": 3120 - }, - { - "epoch": 0.16, - "grad_norm": 1.0401173324989785, - "learning_rate": 1.9143697608249873e-05, - "loss": 0.2443, - "step": 3121 - }, - { - "epoch": 0.16, - "grad_norm": 1.150292160815212, - "learning_rate": 1.914303065932759e-05, - "loss": 0.2442, - "step": 3122 - }, - { - "epoch": 0.16, - "grad_norm": 0.9544448291925663, - "learning_rate": 1.9142363462400087e-05, - "loss": 0.2108, - "step": 3123 - }, - { - "epoch": 0.16, - "grad_norm": 0.9610639977235427, - "learning_rate": 1.914169601748546e-05, - "loss": 0.2076, - "step": 3124 - }, - { - "epoch": 0.16, - "grad_norm": 1.0201336974255446, - "learning_rate": 1.9141028324601808e-05, - "loss": 0.2314, - "step": 3125 - }, - { - "epoch": 0.16, - "grad_norm": 1.189912911424515, - "learning_rate": 1.9140360383767248e-05, - "loss": 0.1984, - "step": 3126 - }, - { - "epoch": 0.16, - "grad_norm": 1.2961454181303826, - "learning_rate": 1.9139692194999894e-05, - "loss": 0.2383, - "step": 3127 - }, - { - "epoch": 0.16, - "grad_norm": 1.4274584800275039, - "learning_rate": 1.9139023758317875e-05, - "loss": 0.235, - "step": 3128 - }, - { - "epoch": 0.16, - "grad_norm": 0.8855836301343938, - "learning_rate": 1.913835507373932e-05, - "loss": 0.2156, - "step": 3129 - }, - { - "epoch": 0.16, - "grad_norm": 1.0934329027469938, - "learning_rate": 1.9137686141282368e-05, - "loss": 0.2233, - "step": 3130 - }, - { - "epoch": 0.16, - "grad_norm": 0.8189168623294839, - "learning_rate": 1.9137016960965164e-05, - "loss": 0.1943, - "step": 3131 - }, - { - "epoch": 0.16, - "grad_norm": 1.2129777212990402, - "learning_rate": 1.9136347532805855e-05, - "loss": 0.2486, - "step": 3132 - }, - { - "epoch": 0.16, - "grad_norm": 0.8058648848724157, - "learning_rate": 1.9135677856822606e-05, - "loss": 0.2119, - "step": 3133 - }, - { - "epoch": 0.16, - "grad_norm": 1.01784769091552, - "learning_rate": 1.9135007933033583e-05, - "loss": 0.2201, - "step": 3134 - }, - { - "epoch": 0.16, - "grad_norm": 0.9792483281736258, - "learning_rate": 1.913433776145695e-05, - "loss": 0.238, - "step": 3135 - }, - { - "epoch": 0.16, - "grad_norm": 1.0007834040170969, - "learning_rate": 1.9133667342110887e-05, - "loss": 0.1993, - "step": 3136 - }, - { - "epoch": 0.16, - "grad_norm": 1.1539922532606464, - "learning_rate": 1.9132996675013583e-05, - "loss": 0.2184, - "step": 3137 - }, - { - "epoch": 0.16, - "grad_norm": 1.1508044653257241, - "learning_rate": 1.913232576018323e-05, - "loss": 0.2313, - "step": 3138 - }, - { - "epoch": 0.16, - "grad_norm": 1.069508988072164, - "learning_rate": 1.9131654597638024e-05, - "loss": 0.2287, - "step": 3139 - }, - { - "epoch": 0.16, - "grad_norm": 1.374642745001945, - "learning_rate": 1.9130983187396174e-05, - "loss": 0.2417, - "step": 3140 - }, - { - "epoch": 0.16, - "grad_norm": 1.131383765935505, - "learning_rate": 1.9130311529475886e-05, - "loss": 0.2367, - "step": 3141 - }, - { - "epoch": 0.16, - "grad_norm": 0.9307771276706998, - "learning_rate": 1.9129639623895382e-05, - "loss": 0.2307, - "step": 3142 - }, - { - "epoch": 0.16, - "grad_norm": 1.3643681521091495, - "learning_rate": 1.9128967470672887e-05, - "loss": 0.2675, - "step": 3143 - }, - { - "epoch": 0.16, - "grad_norm": 1.0992636255726658, - "learning_rate": 1.9128295069826636e-05, - "loss": 0.238, - "step": 3144 - }, - { - "epoch": 0.16, - "grad_norm": 0.8847005171553731, - "learning_rate": 1.9127622421374866e-05, - "loss": 0.2194, - "step": 3145 - }, - { - "epoch": 0.16, - "grad_norm": 1.2081783271673872, - "learning_rate": 1.912694952533582e-05, - "loss": 0.2383, - "step": 3146 - }, - { - "epoch": 0.16, - "grad_norm": 0.9796553656595477, - "learning_rate": 1.9126276381727752e-05, - "loss": 0.2247, - "step": 3147 - }, - { - "epoch": 0.16, - "grad_norm": 0.8756885079427202, - "learning_rate": 1.9125602990568925e-05, - "loss": 0.1946, - "step": 3148 - }, - { - "epoch": 0.16, - "grad_norm": 1.2317980453228476, - "learning_rate": 1.91249293518776e-05, - "loss": 0.2202, - "step": 3149 - }, - { - "epoch": 0.16, - "grad_norm": 1.0438633428794166, - "learning_rate": 1.9124255465672053e-05, - "loss": 0.2118, - "step": 3150 - }, - { - "epoch": 0.16, - "grad_norm": 0.9645029574938986, - "learning_rate": 1.9123581331970558e-05, - "loss": 0.2419, - "step": 3151 - }, - { - "epoch": 0.16, - "grad_norm": 1.0684720842692546, - "learning_rate": 1.9122906950791406e-05, - "loss": 0.2385, - "step": 3152 - }, - { - "epoch": 0.16, - "grad_norm": 1.3288111295644136, - "learning_rate": 1.9122232322152883e-05, - "loss": 0.1971, - "step": 3153 - }, - { - "epoch": 0.16, - "grad_norm": 1.033806895095567, - "learning_rate": 1.91215574460733e-05, - "loss": 0.2276, - "step": 3154 - }, - { - "epoch": 0.16, - "grad_norm": 3.0238508770319936, - "learning_rate": 1.9120882322570952e-05, - "loss": 0.2428, - "step": 3155 - }, - { - "epoch": 0.16, - "grad_norm": 1.03641331436747, - "learning_rate": 1.912020695166416e-05, - "loss": 0.2319, - "step": 3156 - }, - { - "epoch": 0.16, - "grad_norm": 1.5371657052324845, - "learning_rate": 1.9119531333371233e-05, - "loss": 0.2649, - "step": 3157 - }, - { - "epoch": 0.16, - "grad_norm": 0.9765508272616171, - "learning_rate": 1.9118855467710507e-05, - "loss": 0.1991, - "step": 3158 - }, - { - "epoch": 0.16, - "grad_norm": 1.1342684309424567, - "learning_rate": 1.911817935470031e-05, - "loss": 0.2457, - "step": 3159 - }, - { - "epoch": 0.16, - "grad_norm": 1.332260724706468, - "learning_rate": 1.9117502994358984e-05, - "loss": 0.2313, - "step": 3160 - }, - { - "epoch": 0.16, - "grad_norm": 1.2189330783389225, - "learning_rate": 1.9116826386704873e-05, - "loss": 0.2363, - "step": 3161 - }, - { - "epoch": 0.16, - "grad_norm": 2.037243374045091, - "learning_rate": 1.9116149531756333e-05, - "loss": 0.2506, - "step": 3162 - }, - { - "epoch": 0.16, - "grad_norm": 1.4394206019917581, - "learning_rate": 1.9115472429531722e-05, - "loss": 0.2078, - "step": 3163 - }, - { - "epoch": 0.16, - "grad_norm": 1.2227594911150255, - "learning_rate": 1.911479508004941e-05, - "loss": 0.2286, - "step": 3164 - }, - { - "epoch": 0.16, - "grad_norm": 0.8577485490636886, - "learning_rate": 1.911411748332776e-05, - "loss": 0.2209, - "step": 3165 - }, - { - "epoch": 0.16, - "grad_norm": 1.2941653244360138, - "learning_rate": 1.9113439639385164e-05, - "loss": 0.2426, - "step": 3166 - }, - { - "epoch": 0.16, - "grad_norm": 1.0202153420555122, - "learning_rate": 1.9112761548239996e-05, - "loss": 0.2191, - "step": 3167 - }, - { - "epoch": 0.16, - "grad_norm": 1.6811743560837742, - "learning_rate": 1.911208320991066e-05, - "loss": 0.2217, - "step": 3168 - }, - { - "epoch": 0.16, - "grad_norm": 1.4294763694872188, - "learning_rate": 1.9111404624415554e-05, - "loss": 0.2437, - "step": 3169 - }, - { - "epoch": 0.16, - "grad_norm": 2.3818248282341945, - "learning_rate": 1.9110725791773085e-05, - "loss": 0.2393, - "step": 3170 - }, - { - "epoch": 0.16, - "grad_norm": 0.9783655888403653, - "learning_rate": 1.911004671200166e-05, - "loss": 0.2074, - "step": 3171 - }, - { - "epoch": 0.16, - "grad_norm": 1.3146336646097434, - "learning_rate": 1.9109367385119705e-05, - "loss": 0.2205, - "step": 3172 - }, - { - "epoch": 0.16, - "grad_norm": 0.8869805809256699, - "learning_rate": 1.9108687811145645e-05, - "loss": 0.2042, - "step": 3173 - }, - { - "epoch": 0.16, - "grad_norm": 1.0622836441472836, - "learning_rate": 1.9108007990097913e-05, - "loss": 0.2256, - "step": 3174 - }, - { - "epoch": 0.16, - "grad_norm": 0.9475793238096982, - "learning_rate": 1.910732792199495e-05, - "loss": 0.2319, - "step": 3175 - }, - { - "epoch": 0.16, - "grad_norm": 1.0748427136473677, - "learning_rate": 1.9106647606855203e-05, - "loss": 0.2387, - "step": 3176 - }, - { - "epoch": 0.16, - "grad_norm": 1.0525166947996967, - "learning_rate": 1.9105967044697125e-05, - "loss": 0.2305, - "step": 3177 - }, - { - "epoch": 0.16, - "grad_norm": 1.0904798663674182, - "learning_rate": 1.9105286235539178e-05, - "loss": 0.2356, - "step": 3178 - }, - { - "epoch": 0.16, - "grad_norm": 1.1674600619281295, - "learning_rate": 1.9104605179399827e-05, - "loss": 0.2236, - "step": 3179 - }, - { - "epoch": 0.16, - "grad_norm": 1.090127583465506, - "learning_rate": 1.9103923876297544e-05, - "loss": 0.2298, - "step": 3180 - }, - { - "epoch": 0.16, - "grad_norm": 0.8327437385704604, - "learning_rate": 1.9103242326250815e-05, - "loss": 0.2066, - "step": 3181 - }, - { - "epoch": 0.16, - "grad_norm": 1.1699184536903102, - "learning_rate": 1.9102560529278122e-05, - "loss": 0.234, - "step": 3182 - }, - { - "epoch": 0.16, - "grad_norm": 1.1169478692759198, - "learning_rate": 1.910187848539796e-05, - "loss": 0.2445, - "step": 3183 - }, - { - "epoch": 0.16, - "grad_norm": 0.8226074400974731, - "learning_rate": 1.9101196194628834e-05, - "loss": 0.1799, - "step": 3184 - }, - { - "epoch": 0.16, - "grad_norm": 0.9114950516377383, - "learning_rate": 1.9100513656989244e-05, - "loss": 0.2268, - "step": 3185 - }, - { - "epoch": 0.16, - "grad_norm": 0.9785174645275201, - "learning_rate": 1.9099830872497707e-05, - "loss": 0.2279, - "step": 3186 - }, - { - "epoch": 0.16, - "grad_norm": 0.8498906357021067, - "learning_rate": 1.909914784117274e-05, - "loss": 0.1969, - "step": 3187 - }, - { - "epoch": 0.16, - "grad_norm": 1.16707129988209, - "learning_rate": 1.9098464563032878e-05, - "loss": 0.243, - "step": 3188 - }, - { - "epoch": 0.16, - "grad_norm": 1.1269165149984086, - "learning_rate": 1.9097781038096652e-05, - "loss": 0.2092, - "step": 3189 - }, - { - "epoch": 0.16, - "grad_norm": 1.2750144293869767, - "learning_rate": 1.9097097266382598e-05, - "loss": 0.2094, - "step": 3190 - }, - { - "epoch": 0.16, - "grad_norm": 1.0660217584543055, - "learning_rate": 1.909641324790927e-05, - "loss": 0.2109, - "step": 3191 - }, - { - "epoch": 0.16, - "grad_norm": 1.122843501272006, - "learning_rate": 1.909572898269522e-05, - "loss": 0.2205, - "step": 3192 - }, - { - "epoch": 0.16, - "grad_norm": 1.1216680184980137, - "learning_rate": 1.9095044470759004e-05, - "loss": 0.2398, - "step": 3193 - }, - { - "epoch": 0.16, - "grad_norm": 2.32198193611617, - "learning_rate": 1.9094359712119192e-05, - "loss": 0.2416, - "step": 3194 - }, - { - "epoch": 0.16, - "grad_norm": 1.2047762964034372, - "learning_rate": 1.9093674706794363e-05, - "loss": 0.2091, - "step": 3195 - }, - { - "epoch": 0.16, - "grad_norm": 1.0298224869937613, - "learning_rate": 1.9092989454803094e-05, - "loss": 0.2606, - "step": 3196 - }, - { - "epoch": 0.16, - "grad_norm": 1.5457240549213624, - "learning_rate": 1.909230395616397e-05, - "loss": 0.2052, - "step": 3197 - }, - { - "epoch": 0.16, - "grad_norm": 0.9502655176707185, - "learning_rate": 1.909161821089559e-05, - "loss": 0.2274, - "step": 3198 - }, - { - "epoch": 0.16, - "grad_norm": 1.01536728621932, - "learning_rate": 1.9090932219016548e-05, - "loss": 0.2049, - "step": 3199 - }, - { - "epoch": 0.16, - "grad_norm": 2.2487157303403302, - "learning_rate": 1.909024598054546e-05, - "loss": 0.2306, - "step": 3200 - }, - { - "epoch": 0.16, - "grad_norm": 1.2078412801534302, - "learning_rate": 1.9089559495500934e-05, - "loss": 0.2371, - "step": 3201 - }, - { - "epoch": 0.16, - "grad_norm": 1.0483351379753305, - "learning_rate": 1.90888727639016e-05, - "loss": 0.2389, - "step": 3202 - }, - { - "epoch": 0.16, - "grad_norm": 1.128869956503603, - "learning_rate": 1.908818578576607e-05, - "loss": 0.235, - "step": 3203 - }, - { - "epoch": 0.16, - "grad_norm": 1.0102531146450082, - "learning_rate": 1.9087498561112992e-05, - "loss": 0.211, - "step": 3204 - }, - { - "epoch": 0.16, - "grad_norm": 1.0299810783657681, - "learning_rate": 1.9086811089961e-05, - "loss": 0.2387, - "step": 3205 - }, - { - "epoch": 0.16, - "grad_norm": 0.9802814387541344, - "learning_rate": 1.9086123372328748e-05, - "loss": 0.2237, - "step": 3206 - }, - { - "epoch": 0.16, - "grad_norm": 0.9266592538656312, - "learning_rate": 1.9085435408234882e-05, - "loss": 0.2249, - "step": 3207 - }, - { - "epoch": 0.16, - "grad_norm": 0.9220119641068419, - "learning_rate": 1.9084747197698068e-05, - "loss": 0.2266, - "step": 3208 - }, - { - "epoch": 0.16, - "grad_norm": 1.1514474631890381, - "learning_rate": 1.9084058740736974e-05, - "loss": 0.2119, - "step": 3209 - }, - { - "epoch": 0.16, - "grad_norm": 1.1158632704978646, - "learning_rate": 1.9083370037370276e-05, - "loss": 0.219, - "step": 3210 - }, - { - "epoch": 0.16, - "grad_norm": 1.0141902460312189, - "learning_rate": 1.908268108761665e-05, - "loss": 0.2253, - "step": 3211 - }, - { - "epoch": 0.16, - "grad_norm": 1.6050271110549044, - "learning_rate": 1.9081991891494787e-05, - "loss": 0.2286, - "step": 3212 - }, - { - "epoch": 0.16, - "grad_norm": 1.2109911266896056, - "learning_rate": 1.908130244902338e-05, - "loss": 0.2159, - "step": 3213 - }, - { - "epoch": 0.16, - "grad_norm": 1.3546715824770614, - "learning_rate": 1.9080612760221134e-05, - "loss": 0.2547, - "step": 3214 - }, - { - "epoch": 0.16, - "grad_norm": 1.7959685045832672, - "learning_rate": 1.907992282510675e-05, - "loss": 0.2115, - "step": 3215 - }, - { - "epoch": 0.16, - "grad_norm": 1.5087742014255319, - "learning_rate": 1.9079232643698947e-05, - "loss": 0.2251, - "step": 3216 - }, - { - "epoch": 0.16, - "grad_norm": 0.924666360606091, - "learning_rate": 1.907854221601645e-05, - "loss": 0.2053, - "step": 3217 - }, - { - "epoch": 0.16, - "grad_norm": 0.9530973107451134, - "learning_rate": 1.9077851542077978e-05, - "loss": 0.2252, - "step": 3218 - }, - { - "epoch": 0.16, - "grad_norm": 0.950666139394692, - "learning_rate": 1.9077160621902274e-05, - "loss": 0.2505, - "step": 3219 - }, - { - "epoch": 0.16, - "grad_norm": 1.5103529027736473, - "learning_rate": 1.9076469455508072e-05, - "loss": 0.2258, - "step": 3220 - }, - { - "epoch": 0.16, - "grad_norm": 1.0096711939190677, - "learning_rate": 1.9075778042914126e-05, - "loss": 0.2177, - "step": 3221 - }, - { - "epoch": 0.16, - "grad_norm": 1.0032885320723282, - "learning_rate": 1.9075086384139187e-05, - "loss": 0.2226, - "step": 3222 - }, - { - "epoch": 0.16, - "grad_norm": 1.206005456150721, - "learning_rate": 1.9074394479202017e-05, - "loss": 0.2112, - "step": 3223 - }, - { - "epoch": 0.16, - "grad_norm": 1.150609400759136, - "learning_rate": 1.9073702328121382e-05, - "loss": 0.2051, - "step": 3224 - }, - { - "epoch": 0.16, - "grad_norm": 1.1756724863924575, - "learning_rate": 1.9073009930916063e-05, - "loss": 0.2261, - "step": 3225 - }, - { - "epoch": 0.16, - "grad_norm": 1.5673489315487212, - "learning_rate": 1.9072317287604837e-05, - "loss": 0.1954, - "step": 3226 - }, - { - "epoch": 0.16, - "grad_norm": 1.568033424099215, - "learning_rate": 1.907162439820649e-05, - "loss": 0.2572, - "step": 3227 - }, - { - "epoch": 0.16, - "grad_norm": 0.9642478242450317, - "learning_rate": 1.907093126273982e-05, - "loss": 0.2057, - "step": 3228 - }, - { - "epoch": 0.16, - "grad_norm": 6.618424478919842, - "learning_rate": 1.907023788122363e-05, - "loss": 0.2433, - "step": 3229 - }, - { - "epoch": 0.16, - "grad_norm": 0.9986359526257098, - "learning_rate": 1.906954425367672e-05, - "loss": 0.2171, - "step": 3230 - }, - { - "epoch": 0.16, - "grad_norm": 1.1235099739225607, - "learning_rate": 1.906885038011791e-05, - "loss": 0.2228, - "step": 3231 - }, - { - "epoch": 0.16, - "grad_norm": 1.154482508979427, - "learning_rate": 1.906815626056602e-05, - "loss": 0.2396, - "step": 3232 - }, - { - "epoch": 0.16, - "grad_norm": 1.3890043410430053, - "learning_rate": 1.9067461895039888e-05, - "loss": 0.2621, - "step": 3233 - }, - { - "epoch": 0.16, - "grad_norm": 1.358399667722323, - "learning_rate": 1.906676728355833e-05, - "loss": 0.2182, - "step": 3234 - }, - { - "epoch": 0.16, - "grad_norm": 1.0794146703989658, - "learning_rate": 1.9066072426140203e-05, - "loss": 0.2255, - "step": 3235 - }, - { - "epoch": 0.16, - "grad_norm": 0.9904688704649632, - "learning_rate": 1.9065377322804347e-05, - "loss": 0.227, - "step": 3236 - }, - { - "epoch": 0.16, - "grad_norm": 1.558693059373171, - "learning_rate": 1.9064681973569622e-05, - "loss": 0.2473, - "step": 3237 - }, - { - "epoch": 0.16, - "grad_norm": 0.9947430603467428, - "learning_rate": 1.9063986378454884e-05, - "loss": 0.2325, - "step": 3238 - }, - { - "epoch": 0.16, - "grad_norm": 1.2265249569014518, - "learning_rate": 1.9063290537479004e-05, - "loss": 0.2128, - "step": 3239 - }, - { - "epoch": 0.16, - "grad_norm": 1.0611571246887315, - "learning_rate": 1.9062594450660857e-05, - "loss": 0.2107, - "step": 3240 - }, - { - "epoch": 0.16, - "grad_norm": 1.1814888608068674, - "learning_rate": 1.9061898118019326e-05, - "loss": 0.2456, - "step": 3241 - }, - { - "epoch": 0.16, - "grad_norm": 2.7326384660130874, - "learning_rate": 1.9061201539573292e-05, - "loss": 0.2249, - "step": 3242 - }, - { - "epoch": 0.16, - "grad_norm": 1.0426774919663655, - "learning_rate": 1.9060504715341654e-05, - "loss": 0.2328, - "step": 3243 - }, - { - "epoch": 0.16, - "grad_norm": 1.318233062490341, - "learning_rate": 1.905980764534332e-05, - "loss": 0.2281, - "step": 3244 - }, - { - "epoch": 0.17, - "grad_norm": 1.2924652569164976, - "learning_rate": 1.9059110329597185e-05, - "loss": 0.2243, - "step": 3245 - }, - { - "epoch": 0.17, - "grad_norm": 1.2512314642191262, - "learning_rate": 1.9058412768122175e-05, - "loss": 0.2308, - "step": 3246 - }, - { - "epoch": 0.17, - "grad_norm": 1.083747561104402, - "learning_rate": 1.9057714960937205e-05, - "loss": 0.2267, - "step": 3247 - }, - { - "epoch": 0.17, - "grad_norm": 1.0294046112909658, - "learning_rate": 1.9057016908061205e-05, - "loss": 0.2287, - "step": 3248 - }, - { - "epoch": 0.17, - "grad_norm": 1.3535822475892574, - "learning_rate": 1.905631860951311e-05, - "loss": 0.2515, - "step": 3249 - }, - { - "epoch": 0.17, - "grad_norm": 2.7848478161203745, - "learning_rate": 1.905562006531186e-05, - "loss": 0.2369, - "step": 3250 - }, - { - "epoch": 0.17, - "grad_norm": 1.3252968240520733, - "learning_rate": 1.905492127547641e-05, - "loss": 0.2383, - "step": 3251 - }, - { - "epoch": 0.17, - "grad_norm": 1.093520045965069, - "learning_rate": 1.9054222240025706e-05, - "loss": 0.2449, - "step": 3252 - }, - { - "epoch": 0.17, - "grad_norm": 1.0924413513876863, - "learning_rate": 1.905352295897871e-05, - "loss": 0.2235, - "step": 3253 - }, - { - "epoch": 0.17, - "grad_norm": 1.1210881431514452, - "learning_rate": 1.9052823432354396e-05, - "loss": 0.2335, - "step": 3254 - }, - { - "epoch": 0.17, - "grad_norm": 1.2761817272497882, - "learning_rate": 1.905212366017173e-05, - "loss": 0.2387, - "step": 3255 - }, - { - "epoch": 0.17, - "grad_norm": 1.6487483979669733, - "learning_rate": 1.90514236424497e-05, - "loss": 0.213, - "step": 3256 - }, - { - "epoch": 0.17, - "grad_norm": 1.0463799652349561, - "learning_rate": 1.9050723379207296e-05, - "loss": 0.2427, - "step": 3257 - }, - { - "epoch": 0.17, - "grad_norm": 0.9613364989367936, - "learning_rate": 1.9050022870463507e-05, - "loss": 0.2282, - "step": 3258 - }, - { - "epoch": 0.17, - "grad_norm": 1.0254909889735437, - "learning_rate": 1.9049322116237336e-05, - "loss": 0.2341, - "step": 3259 - }, - { - "epoch": 0.17, - "grad_norm": 1.4414193791955492, - "learning_rate": 1.9048621116547793e-05, - "loss": 0.2174, - "step": 3260 - }, - { - "epoch": 0.17, - "grad_norm": 1.5035822960911616, - "learning_rate": 1.904791987141389e-05, - "loss": 0.2139, - "step": 3261 - }, - { - "epoch": 0.17, - "grad_norm": 0.9153873984869315, - "learning_rate": 1.9047218380854652e-05, - "loss": 0.2077, - "step": 3262 - }, - { - "epoch": 0.17, - "grad_norm": 1.204895202902941, - "learning_rate": 1.90465166448891e-05, - "loss": 0.2203, - "step": 3263 - }, - { - "epoch": 0.17, - "grad_norm": 1.235198349138494, - "learning_rate": 1.9045814663536275e-05, - "loss": 0.2553, - "step": 3264 - }, - { - "epoch": 0.17, - "grad_norm": 1.6304314205413217, - "learning_rate": 1.9045112436815217e-05, - "loss": 0.2391, - "step": 3265 - }, - { - "epoch": 0.17, - "grad_norm": 0.9506320321314072, - "learning_rate": 1.904440996474497e-05, - "loss": 0.2381, - "step": 3266 - }, - { - "epoch": 0.17, - "grad_norm": 1.1184506110462116, - "learning_rate": 1.9043707247344596e-05, - "loss": 0.212, - "step": 3267 - }, - { - "epoch": 0.17, - "grad_norm": 1.058991829044029, - "learning_rate": 1.904300428463315e-05, - "loss": 0.2115, - "step": 3268 - }, - { - "epoch": 0.17, - "grad_norm": 1.5336373043447653, - "learning_rate": 1.9042301076629704e-05, - "loss": 0.2197, - "step": 3269 - }, - { - "epoch": 0.17, - "grad_norm": 1.1747866988506088, - "learning_rate": 1.9041597623353327e-05, - "loss": 0.2236, - "step": 3270 - }, - { - "epoch": 0.17, - "grad_norm": 1.3912454174431848, - "learning_rate": 1.9040893924823108e-05, - "loss": 0.2098, - "step": 3271 - }, - { - "epoch": 0.17, - "grad_norm": 1.2756488798101397, - "learning_rate": 1.9040189981058128e-05, - "loss": 0.2524, - "step": 3272 - }, - { - "epoch": 0.17, - "grad_norm": 1.8518406486526697, - "learning_rate": 1.9039485792077485e-05, - "loss": 0.2237, - "step": 3273 - }, - { - "epoch": 0.17, - "grad_norm": 1.2949126687880619, - "learning_rate": 1.903878135790028e-05, - "loss": 0.2297, - "step": 3274 - }, - { - "epoch": 0.17, - "grad_norm": 1.193263514417983, - "learning_rate": 1.903807667854562e-05, - "loss": 0.2105, - "step": 3275 - }, - { - "epoch": 0.17, - "grad_norm": 1.3225038029048628, - "learning_rate": 1.9037371754032618e-05, - "loss": 0.2345, - "step": 3276 - }, - { - "epoch": 0.17, - "grad_norm": 1.1300634224130286, - "learning_rate": 1.9036666584380398e-05, - "loss": 0.1938, - "step": 3277 - }, - { - "epoch": 0.17, - "grad_norm": 0.9816663216784832, - "learning_rate": 1.9035961169608087e-05, - "loss": 0.247, - "step": 3278 - }, - { - "epoch": 0.17, - "grad_norm": 1.5861016137562751, - "learning_rate": 1.9035255509734816e-05, - "loss": 0.2081, - "step": 3279 - }, - { - "epoch": 0.17, - "grad_norm": 1.082690305307336, - "learning_rate": 1.903454960477973e-05, - "loss": 0.219, - "step": 3280 - }, - { - "epoch": 0.17, - "grad_norm": 1.3516616270012032, - "learning_rate": 1.903384345476198e-05, - "loss": 0.1982, - "step": 3281 - }, - { - "epoch": 0.17, - "grad_norm": 1.1284573755467393, - "learning_rate": 1.9033137059700712e-05, - "loss": 0.2256, - "step": 3282 - }, - { - "epoch": 0.17, - "grad_norm": 1.0071641984300603, - "learning_rate": 1.9032430419615094e-05, - "loss": 0.2387, - "step": 3283 - }, - { - "epoch": 0.17, - "grad_norm": 0.9908369990968336, - "learning_rate": 1.9031723534524287e-05, - "loss": 0.2043, - "step": 3284 - }, - { - "epoch": 0.17, - "grad_norm": 0.9434326851921663, - "learning_rate": 1.903101640444747e-05, - "loss": 0.1993, - "step": 3285 - }, - { - "epoch": 0.17, - "grad_norm": 0.9552024564147497, - "learning_rate": 1.9030309029403825e-05, - "loss": 0.2083, - "step": 3286 - }, - { - "epoch": 0.17, - "grad_norm": 1.7097234946706654, - "learning_rate": 1.9029601409412536e-05, - "loss": 0.2347, - "step": 3287 - }, - { - "epoch": 0.17, - "grad_norm": 1.2451337932069761, - "learning_rate": 1.90288935444928e-05, - "loss": 0.2595, - "step": 3288 - }, - { - "epoch": 0.17, - "grad_norm": 1.1152757703933747, - "learning_rate": 1.9028185434663814e-05, - "loss": 0.221, - "step": 3289 - }, - { - "epoch": 0.17, - "grad_norm": 0.8721790062954682, - "learning_rate": 1.902747707994479e-05, - "loss": 0.2246, - "step": 3290 - }, - { - "epoch": 0.17, - "grad_norm": 0.9772443493974295, - "learning_rate": 1.902676848035494e-05, - "loss": 0.2276, - "step": 3291 - }, - { - "epoch": 0.17, - "grad_norm": 1.1658165944676975, - "learning_rate": 1.9026059635913484e-05, - "loss": 0.2275, - "step": 3292 - }, - { - "epoch": 0.17, - "grad_norm": 1.4208291073865067, - "learning_rate": 1.9025350546639654e-05, - "loss": 0.2043, - "step": 3293 - }, - { - "epoch": 0.17, - "grad_norm": 1.1546386268942097, - "learning_rate": 1.902464121255268e-05, - "loss": 0.2295, - "step": 3294 - }, - { - "epoch": 0.17, - "grad_norm": 0.9581823924399043, - "learning_rate": 1.90239316336718e-05, - "loss": 0.2291, - "step": 3295 - }, - { - "epoch": 0.17, - "grad_norm": 0.9678091509808123, - "learning_rate": 1.9023221810016268e-05, - "loss": 0.2213, - "step": 3296 - }, - { - "epoch": 0.17, - "grad_norm": 0.9801111140281928, - "learning_rate": 1.9022511741605334e-05, - "loss": 0.2178, - "step": 3297 - }, - { - "epoch": 0.17, - "grad_norm": 1.2702708802151763, - "learning_rate": 1.9021801428458258e-05, - "loss": 0.2208, - "step": 3298 - }, - { - "epoch": 0.17, - "grad_norm": 0.9622585836212532, - "learning_rate": 1.902109087059431e-05, - "loss": 0.2191, - "step": 3299 - }, - { - "epoch": 0.17, - "grad_norm": 1.0074093143700964, - "learning_rate": 1.902038006803276e-05, - "loss": 0.2198, - "step": 3300 - }, - { - "epoch": 0.17, - "grad_norm": 1.0348778645975119, - "learning_rate": 1.9019669020792896e-05, - "loss": 0.2068, - "step": 3301 - }, - { - "epoch": 0.17, - "grad_norm": 0.878794499122924, - "learning_rate": 1.9018957728893997e-05, - "loss": 0.1931, - "step": 3302 - }, - { - "epoch": 0.17, - "grad_norm": 0.9688383797935949, - "learning_rate": 1.901824619235536e-05, - "loss": 0.2115, - "step": 3303 - }, - { - "epoch": 0.17, - "grad_norm": 1.1918226103761322, - "learning_rate": 1.9017534411196286e-05, - "loss": 0.2262, - "step": 3304 - }, - { - "epoch": 0.17, - "grad_norm": 0.909382788046008, - "learning_rate": 1.9016822385436086e-05, - "loss": 0.2242, - "step": 3305 - }, - { - "epoch": 0.17, - "grad_norm": 0.8945541647722558, - "learning_rate": 1.9016110115094064e-05, - "loss": 0.2313, - "step": 3306 - }, - { - "epoch": 0.17, - "grad_norm": 0.9286600500193802, - "learning_rate": 1.9015397600189548e-05, - "loss": 0.2113, - "step": 3307 - }, - { - "epoch": 0.17, - "grad_norm": 1.0063058750169587, - "learning_rate": 1.9014684840741863e-05, - "loss": 0.2239, - "step": 3308 - }, - { - "epoch": 0.17, - "grad_norm": 1.1075815585913915, - "learning_rate": 1.9013971836770342e-05, - "loss": 0.2347, - "step": 3309 - }, - { - "epoch": 0.17, - "grad_norm": 0.9537824608305381, - "learning_rate": 1.9013258588294324e-05, - "loss": 0.2021, - "step": 3310 - }, - { - "epoch": 0.17, - "grad_norm": 1.3363127593510533, - "learning_rate": 1.9012545095333163e-05, - "loss": 0.2246, - "step": 3311 - }, - { - "epoch": 0.17, - "grad_norm": 1.0966856534204155, - "learning_rate": 1.9011831357906204e-05, - "loss": 0.2427, - "step": 3312 - }, - { - "epoch": 0.17, - "grad_norm": 1.0293981595871236, - "learning_rate": 1.901111737603281e-05, - "loss": 0.2134, - "step": 3313 - }, - { - "epoch": 0.17, - "grad_norm": 1.2481574644414424, - "learning_rate": 1.9010403149732347e-05, - "loss": 0.2696, - "step": 3314 - }, - { - "epoch": 0.17, - "grad_norm": 0.9090088403265032, - "learning_rate": 1.900968867902419e-05, - "loss": 0.214, - "step": 3315 - }, - { - "epoch": 0.17, - "grad_norm": 1.228778878223172, - "learning_rate": 1.9008973963927722e-05, - "loss": 0.2327, - "step": 3316 - }, - { - "epoch": 0.17, - "grad_norm": 1.6815691699994177, - "learning_rate": 1.9008259004462326e-05, - "loss": 0.2281, - "step": 3317 - }, - { - "epoch": 0.17, - "grad_norm": 1.0204039327333942, - "learning_rate": 1.9007543800647395e-05, - "loss": 0.2123, - "step": 3318 - }, - { - "epoch": 0.17, - "grad_norm": 0.9460619920119041, - "learning_rate": 1.9006828352502328e-05, - "loss": 0.257, - "step": 3319 - }, - { - "epoch": 0.17, - "grad_norm": 1.270528757519636, - "learning_rate": 1.9006112660046535e-05, - "loss": 0.2336, - "step": 3320 - }, - { - "epoch": 0.17, - "grad_norm": 3.407866180437645, - "learning_rate": 1.9005396723299426e-05, - "loss": 0.2366, - "step": 3321 - }, - { - "epoch": 0.17, - "grad_norm": 1.1886401077079094, - "learning_rate": 1.9004680542280423e-05, - "loss": 0.2178, - "step": 3322 - }, - { - "epoch": 0.17, - "grad_norm": 1.3281765035685824, - "learning_rate": 1.900396411700895e-05, - "loss": 0.2115, - "step": 3323 - }, - { - "epoch": 0.17, - "grad_norm": 1.3100309021873207, - "learning_rate": 1.9003247447504447e-05, - "loss": 0.2379, - "step": 3324 - }, - { - "epoch": 0.17, - "grad_norm": 2.9845075767531712, - "learning_rate": 1.900253053378634e-05, - "loss": 0.233, - "step": 3325 - }, - { - "epoch": 0.17, - "grad_norm": 2.6700218837971317, - "learning_rate": 1.9001813375874093e-05, - "loss": 0.2322, - "step": 3326 - }, - { - "epoch": 0.17, - "grad_norm": 1.0300567032531562, - "learning_rate": 1.900109597378714e-05, - "loss": 0.2444, - "step": 3327 - }, - { - "epoch": 0.17, - "grad_norm": 1.10901757705295, - "learning_rate": 1.900037832754496e-05, - "loss": 0.2259, - "step": 3328 - }, - { - "epoch": 0.17, - "grad_norm": 0.9447163714069051, - "learning_rate": 1.8999660437167003e-05, - "loss": 0.2366, - "step": 3329 - }, - { - "epoch": 0.17, - "grad_norm": 1.307139230075517, - "learning_rate": 1.8998942302672753e-05, - "loss": 0.256, - "step": 3330 - }, - { - "epoch": 0.17, - "grad_norm": 1.371731200627975, - "learning_rate": 1.8998223924081683e-05, - "loss": 0.2393, - "step": 3331 - }, - { - "epoch": 0.17, - "grad_norm": 1.1905128490939072, - "learning_rate": 1.899750530141328e-05, - "loss": 0.2536, - "step": 3332 - }, - { - "epoch": 0.17, - "grad_norm": 0.9503692709073092, - "learning_rate": 1.8996786434687035e-05, - "loss": 0.2219, - "step": 3333 - }, - { - "epoch": 0.17, - "grad_norm": 0.9439331749855291, - "learning_rate": 1.8996067323922454e-05, - "loss": 0.2323, - "step": 3334 - }, - { - "epoch": 0.17, - "grad_norm": 0.9532307715531738, - "learning_rate": 1.8995347969139034e-05, - "loss": 0.2252, - "step": 3335 - }, - { - "epoch": 0.17, - "grad_norm": 1.552957222597281, - "learning_rate": 1.8994628370356296e-05, - "loss": 0.2267, - "step": 3336 - }, - { - "epoch": 0.17, - "grad_norm": 1.2716668963571127, - "learning_rate": 1.8993908527593756e-05, - "loss": 0.2372, - "step": 3337 - }, - { - "epoch": 0.17, - "grad_norm": 0.7846699126598632, - "learning_rate": 1.899318844087094e-05, - "loss": 0.196, - "step": 3338 - }, - { - "epoch": 0.17, - "grad_norm": 1.0129496638676174, - "learning_rate": 1.8992468110207376e-05, - "loss": 0.2155, - "step": 3339 - }, - { - "epoch": 0.17, - "grad_norm": 0.8783271979901689, - "learning_rate": 1.8991747535622607e-05, - "loss": 0.2038, - "step": 3340 - }, - { - "epoch": 0.17, - "grad_norm": 0.9169434293805285, - "learning_rate": 1.8991026717136182e-05, - "loss": 0.2245, - "step": 3341 - }, - { - "epoch": 0.17, - "grad_norm": 0.9595660483435009, - "learning_rate": 1.8990305654767646e-05, - "loss": 0.2497, - "step": 3342 - }, - { - "epoch": 0.17, - "grad_norm": 1.5644155971430844, - "learning_rate": 1.8989584348536563e-05, - "loss": 0.201, - "step": 3343 - }, - { - "epoch": 0.17, - "grad_norm": 1.3322471354041998, - "learning_rate": 1.8988862798462496e-05, - "loss": 0.2064, - "step": 3344 - }, - { - "epoch": 0.17, - "grad_norm": 0.9960819465156675, - "learning_rate": 1.8988141004565017e-05, - "loss": 0.2317, - "step": 3345 - }, - { - "epoch": 0.17, - "grad_norm": 1.025625915335841, - "learning_rate": 1.8987418966863708e-05, - "loss": 0.223, - "step": 3346 - }, - { - "epoch": 0.17, - "grad_norm": 0.9463078148518489, - "learning_rate": 1.898669668537815e-05, - "loss": 0.2218, - "step": 3347 - }, - { - "epoch": 0.17, - "grad_norm": 1.0451425798298368, - "learning_rate": 1.898597416012794e-05, - "loss": 0.2416, - "step": 3348 - }, - { - "epoch": 0.17, - "grad_norm": 1.220357318287464, - "learning_rate": 1.898525139113267e-05, - "loss": 0.2352, - "step": 3349 - }, - { - "epoch": 0.17, - "grad_norm": 0.9201741657190172, - "learning_rate": 1.898452837841195e-05, - "loss": 0.1981, - "step": 3350 - }, - { - "epoch": 0.17, - "grad_norm": 1.1030367729470085, - "learning_rate": 1.898380512198539e-05, - "loss": 0.206, - "step": 3351 - }, - { - "epoch": 0.17, - "grad_norm": 1.018621044627046, - "learning_rate": 1.898308162187261e-05, - "loss": 0.208, - "step": 3352 - }, - { - "epoch": 0.17, - "grad_norm": 1.0250380740128227, - "learning_rate": 1.898235787809323e-05, - "loss": 0.2217, - "step": 3353 - }, - { - "epoch": 0.17, - "grad_norm": 1.0443360909922148, - "learning_rate": 1.8981633890666886e-05, - "loss": 0.2061, - "step": 3354 - }, - { - "epoch": 0.17, - "grad_norm": 1.118094985175832, - "learning_rate": 1.8980909659613217e-05, - "loss": 0.242, - "step": 3355 - }, - { - "epoch": 0.17, - "grad_norm": 1.1719394317035994, - "learning_rate": 1.8980185184951864e-05, - "loss": 0.2262, - "step": 3356 - }, - { - "epoch": 0.17, - "grad_norm": 1.1119499336976413, - "learning_rate": 1.8979460466702483e-05, - "loss": 0.2172, - "step": 3357 - }, - { - "epoch": 0.17, - "grad_norm": 0.9199146540350096, - "learning_rate": 1.897873550488473e-05, - "loss": 0.2207, - "step": 3358 - }, - { - "epoch": 0.17, - "grad_norm": 1.597157202627985, - "learning_rate": 1.8978010299518268e-05, - "loss": 0.2236, - "step": 3359 - }, - { - "epoch": 0.17, - "grad_norm": 1.145656969444609, - "learning_rate": 1.897728485062277e-05, - "loss": 0.2448, - "step": 3360 - }, - { - "epoch": 0.17, - "grad_norm": 1.196306172255373, - "learning_rate": 1.8976559158217913e-05, - "loss": 0.2421, - "step": 3361 - }, - { - "epoch": 0.17, - "grad_norm": 1.0543796183789402, - "learning_rate": 1.8975833222323383e-05, - "loss": 0.2004, - "step": 3362 - }, - { - "epoch": 0.17, - "grad_norm": 1.1382328752417115, - "learning_rate": 1.897510704295887e-05, - "loss": 0.2205, - "step": 3363 - }, - { - "epoch": 0.17, - "grad_norm": 1.2594780069794822, - "learning_rate": 1.897438062014407e-05, - "loss": 0.2492, - "step": 3364 - }, - { - "epoch": 0.17, - "grad_norm": 1.1099254758261405, - "learning_rate": 1.897365395389869e-05, - "loss": 0.2082, - "step": 3365 - }, - { - "epoch": 0.17, - "grad_norm": 1.105618387840811, - "learning_rate": 1.8972927044242438e-05, - "loss": 0.2275, - "step": 3366 - }, - { - "epoch": 0.17, - "grad_norm": 1.2596995074456943, - "learning_rate": 1.8972199891195034e-05, - "loss": 0.2188, - "step": 3367 - }, - { - "epoch": 0.17, - "grad_norm": 1.1453752628087497, - "learning_rate": 1.8971472494776203e-05, - "loss": 0.2037, - "step": 3368 - }, - { - "epoch": 0.17, - "grad_norm": 1.850769972503201, - "learning_rate": 1.8970744855005674e-05, - "loss": 0.2566, - "step": 3369 - }, - { - "epoch": 0.17, - "grad_norm": 1.539628585952559, - "learning_rate": 1.897001697190318e-05, - "loss": 0.2761, - "step": 3370 - }, - { - "epoch": 0.17, - "grad_norm": 1.1468763974627258, - "learning_rate": 1.8969288845488473e-05, - "loss": 0.2271, - "step": 3371 - }, - { - "epoch": 0.17, - "grad_norm": 1.2157208655996792, - "learning_rate": 1.8968560475781297e-05, - "loss": 0.2655, - "step": 3372 - }, - { - "epoch": 0.17, - "grad_norm": 1.1547576211357717, - "learning_rate": 1.8967831862801414e-05, - "loss": 0.213, - "step": 3373 - }, - { - "epoch": 0.17, - "grad_norm": 1.2452954267397751, - "learning_rate": 1.8967103006568583e-05, - "loss": 0.2269, - "step": 3374 - }, - { - "epoch": 0.17, - "grad_norm": 1.1873167847152575, - "learning_rate": 1.8966373907102577e-05, - "loss": 0.2443, - "step": 3375 - }, - { - "epoch": 0.17, - "grad_norm": 1.0945146479285706, - "learning_rate": 1.8965644564423173e-05, - "loss": 0.2098, - "step": 3376 - }, - { - "epoch": 0.17, - "grad_norm": 1.59138745969418, - "learning_rate": 1.8964914978550154e-05, - "loss": 0.2093, - "step": 3377 - }, - { - "epoch": 0.17, - "grad_norm": 1.0840798791455932, - "learning_rate": 1.896418514950331e-05, - "loss": 0.2356, - "step": 3378 - }, - { - "epoch": 0.17, - "grad_norm": 1.207943842119613, - "learning_rate": 1.8963455077302435e-05, - "loss": 0.2311, - "step": 3379 - }, - { - "epoch": 0.17, - "grad_norm": 1.1114184292207892, - "learning_rate": 1.896272476196734e-05, - "loss": 0.2061, - "step": 3380 - }, - { - "epoch": 0.17, - "grad_norm": 1.6099084050038648, - "learning_rate": 1.8961994203517822e-05, - "loss": 0.2319, - "step": 3381 - }, - { - "epoch": 0.17, - "grad_norm": 0.9832513731356815, - "learning_rate": 1.896126340197371e-05, - "loss": 0.2509, - "step": 3382 - }, - { - "epoch": 0.17, - "grad_norm": 1.2736609473875997, - "learning_rate": 1.896053235735482e-05, - "loss": 0.2466, - "step": 3383 - }, - { - "epoch": 0.17, - "grad_norm": 1.028911036798015, - "learning_rate": 1.8959801069680986e-05, - "loss": 0.2225, - "step": 3384 - }, - { - "epoch": 0.17, - "grad_norm": 1.378791568014497, - "learning_rate": 1.8959069538972043e-05, - "loss": 0.203, - "step": 3385 - }, - { - "epoch": 0.17, - "grad_norm": 1.2369113654728618, - "learning_rate": 1.895833776524783e-05, - "loss": 0.2125, - "step": 3386 - }, - { - "epoch": 0.17, - "grad_norm": 1.192780451638109, - "learning_rate": 1.89576057485282e-05, - "loss": 0.2195, - "step": 3387 - }, - { - "epoch": 0.17, - "grad_norm": 1.3540915753515144, - "learning_rate": 1.8956873488833008e-05, - "loss": 0.2378, - "step": 3388 - }, - { - "epoch": 0.17, - "grad_norm": 1.194988330128871, - "learning_rate": 1.8956140986182116e-05, - "loss": 0.2271, - "step": 3389 - }, - { - "epoch": 0.17, - "grad_norm": 1.473213780154375, - "learning_rate": 1.8955408240595396e-05, - "loss": 0.2136, - "step": 3390 - }, - { - "epoch": 0.17, - "grad_norm": 1.8201258083185985, - "learning_rate": 1.8954675252092717e-05, - "loss": 0.2231, - "step": 3391 - }, - { - "epoch": 0.17, - "grad_norm": 1.8603983223800908, - "learning_rate": 1.895394202069397e-05, - "loss": 0.2228, - "step": 3392 - }, - { - "epoch": 0.17, - "grad_norm": 2.894489027911129, - "learning_rate": 1.895320854641904e-05, - "loss": 0.2111, - "step": 3393 - }, - { - "epoch": 0.17, - "grad_norm": 1.1816876791342712, - "learning_rate": 1.8952474829287825e-05, - "loss": 0.2426, - "step": 3394 - }, - { - "epoch": 0.17, - "grad_norm": 1.155318420616888, - "learning_rate": 1.895174086932022e-05, - "loss": 0.2128, - "step": 3395 - }, - { - "epoch": 0.17, - "grad_norm": 0.9597525424228827, - "learning_rate": 1.895100666653614e-05, - "loss": 0.2171, - "step": 3396 - }, - { - "epoch": 0.17, - "grad_norm": 0.9415740622946664, - "learning_rate": 1.8950272220955497e-05, - "loss": 0.2316, - "step": 3397 - }, - { - "epoch": 0.17, - "grad_norm": 0.9863523292989811, - "learning_rate": 1.8949537532598213e-05, - "loss": 0.2078, - "step": 3398 - }, - { - "epoch": 0.17, - "grad_norm": 1.6101817215152627, - "learning_rate": 1.8948802601484224e-05, - "loss": 0.2297, - "step": 3399 - }, - { - "epoch": 0.17, - "grad_norm": 1.2594625137553501, - "learning_rate": 1.8948067427633456e-05, - "loss": 0.2318, - "step": 3400 - }, - { - "epoch": 0.17, - "grad_norm": 1.7816854513161775, - "learning_rate": 1.8947332011065853e-05, - "loss": 0.1975, - "step": 3401 - }, - { - "epoch": 0.17, - "grad_norm": 1.1061855335081938, - "learning_rate": 1.8946596351801363e-05, - "loss": 0.2419, - "step": 3402 - }, - { - "epoch": 0.17, - "grad_norm": 1.5434594029241404, - "learning_rate": 1.8945860449859945e-05, - "loss": 0.2057, - "step": 3403 - }, - { - "epoch": 0.17, - "grad_norm": 1.1371130973679873, - "learning_rate": 1.8945124305261555e-05, - "loss": 0.2249, - "step": 3404 - }, - { - "epoch": 0.17, - "grad_norm": 0.9609027280934379, - "learning_rate": 1.8944387918026162e-05, - "loss": 0.2081, - "step": 3405 - }, - { - "epoch": 0.17, - "grad_norm": 0.9246659917659515, - "learning_rate": 1.8943651288173743e-05, - "loss": 0.2175, - "step": 3406 - }, - { - "epoch": 0.17, - "grad_norm": 0.9675906075566838, - "learning_rate": 1.8942914415724275e-05, - "loss": 0.2108, - "step": 3407 - }, - { - "epoch": 0.17, - "grad_norm": 1.167654837285515, - "learning_rate": 1.8942177300697753e-05, - "loss": 0.2064, - "step": 3408 - }, - { - "epoch": 0.17, - "grad_norm": 2.30419671209794, - "learning_rate": 1.8941439943114162e-05, - "loss": 0.1932, - "step": 3409 - }, - { - "epoch": 0.17, - "grad_norm": 1.6847133694504224, - "learning_rate": 1.8940702342993512e-05, - "loss": 0.2183, - "step": 3410 - }, - { - "epoch": 0.17, - "grad_norm": 1.2953727771799972, - "learning_rate": 1.8939964500355806e-05, - "loss": 0.2059, - "step": 3411 - }, - { - "epoch": 0.17, - "grad_norm": 1.3106937767502531, - "learning_rate": 1.8939226415221054e-05, - "loss": 0.205, - "step": 3412 - }, - { - "epoch": 0.17, - "grad_norm": 1.592550504669995, - "learning_rate": 1.893848808760928e-05, - "loss": 0.1927, - "step": 3413 - }, - { - "epoch": 0.17, - "grad_norm": 1.4358442707709609, - "learning_rate": 1.8937749517540516e-05, - "loss": 0.2047, - "step": 3414 - }, - { - "epoch": 0.17, - "grad_norm": 1.2130697178931753, - "learning_rate": 1.8937010705034788e-05, - "loss": 0.2185, - "step": 3415 - }, - { - "epoch": 0.17, - "grad_norm": 1.204625251601906, - "learning_rate": 1.8936271650112143e-05, - "loss": 0.2199, - "step": 3416 - }, - { - "epoch": 0.17, - "grad_norm": 1.648059762048116, - "learning_rate": 1.8935532352792624e-05, - "loss": 0.2435, - "step": 3417 - }, - { - "epoch": 0.17, - "grad_norm": 1.5113204599402539, - "learning_rate": 1.8934792813096283e-05, - "loss": 0.2171, - "step": 3418 - }, - { - "epoch": 0.17, - "grad_norm": 1.08312601172551, - "learning_rate": 1.8934053031043185e-05, - "loss": 0.2164, - "step": 3419 - }, - { - "epoch": 0.17, - "grad_norm": 2.8703153818528033, - "learning_rate": 1.8933313006653392e-05, - "loss": 0.2151, - "step": 3420 - }, - { - "epoch": 0.17, - "grad_norm": 0.9451239356452712, - "learning_rate": 1.893257273994698e-05, - "loss": 0.2394, - "step": 3421 - }, - { - "epoch": 0.17, - "grad_norm": 1.2083503347871671, - "learning_rate": 1.8931832230944026e-05, - "loss": 0.2349, - "step": 3422 - }, - { - "epoch": 0.17, - "grad_norm": 1.2617570854350872, - "learning_rate": 1.8931091479664622e-05, - "loss": 0.2264, - "step": 3423 - }, - { - "epoch": 0.17, - "grad_norm": 1.1220719798890133, - "learning_rate": 1.8930350486128855e-05, - "loss": 0.2257, - "step": 3424 - }, - { - "epoch": 0.17, - "grad_norm": 2.210152001777275, - "learning_rate": 1.8929609250356827e-05, - "loss": 0.2366, - "step": 3425 - }, - { - "epoch": 0.17, - "grad_norm": 1.5213675641639302, - "learning_rate": 1.8928867772368644e-05, - "loss": 0.2119, - "step": 3426 - }, - { - "epoch": 0.17, - "grad_norm": 1.161765898603026, - "learning_rate": 1.892812605218442e-05, - "loss": 0.2024, - "step": 3427 - }, - { - "epoch": 0.17, - "grad_norm": 1.038853006912805, - "learning_rate": 1.8927384089824267e-05, - "loss": 0.1975, - "step": 3428 - }, - { - "epoch": 0.17, - "grad_norm": 1.2892374237322095, - "learning_rate": 1.8926641885308325e-05, - "loss": 0.2194, - "step": 3429 - }, - { - "epoch": 0.17, - "grad_norm": 1.5090525000593058, - "learning_rate": 1.8925899438656708e-05, - "loss": 0.2007, - "step": 3430 - }, - { - "epoch": 0.17, - "grad_norm": 1.3066340896057012, - "learning_rate": 1.892515674988957e-05, - "loss": 0.2439, - "step": 3431 - }, - { - "epoch": 0.17, - "grad_norm": 2.6703291899021413, - "learning_rate": 1.892441381902705e-05, - "loss": 0.2068, - "step": 3432 - }, - { - "epoch": 0.17, - "grad_norm": 1.470448296567249, - "learning_rate": 1.8923670646089303e-05, - "loss": 0.218, - "step": 3433 - }, - { - "epoch": 0.17, - "grad_norm": 1.3832623440700667, - "learning_rate": 1.8922927231096482e-05, - "loss": 0.2101, - "step": 3434 - }, - { - "epoch": 0.17, - "grad_norm": 1.2149799232129805, - "learning_rate": 1.892218357406876e-05, - "loss": 0.2379, - "step": 3435 - }, - { - "epoch": 0.17, - "grad_norm": 1.2009435208243155, - "learning_rate": 1.89214396750263e-05, - "loss": 0.2305, - "step": 3436 - }, - { - "epoch": 0.17, - "grad_norm": 1.9612695423719364, - "learning_rate": 1.892069553398929e-05, - "loss": 0.2457, - "step": 3437 - }, - { - "epoch": 0.17, - "grad_norm": 1.1203701248483504, - "learning_rate": 1.8919951150977908e-05, - "loss": 0.2138, - "step": 3438 - }, - { - "epoch": 0.17, - "grad_norm": 1.1046369072938969, - "learning_rate": 1.8919206526012346e-05, - "loss": 0.222, - "step": 3439 - }, - { - "epoch": 0.17, - "grad_norm": 1.0531311533915446, - "learning_rate": 1.8918461659112805e-05, - "loss": 0.2276, - "step": 3440 - }, - { - "epoch": 0.17, - "grad_norm": 1.291918364659948, - "learning_rate": 1.8917716550299485e-05, - "loss": 0.2109, - "step": 3441 - }, - { - "epoch": 0.18, - "grad_norm": 1.5619486000971663, - "learning_rate": 1.8916971199592603e-05, - "loss": 0.222, - "step": 3442 - }, - { - "epoch": 0.18, - "grad_norm": 2.11363710814902, - "learning_rate": 1.891622560701237e-05, - "loss": 0.2221, - "step": 3443 - }, - { - "epoch": 0.18, - "grad_norm": 1.8271041690104084, - "learning_rate": 1.8915479772579017e-05, - "loss": 0.223, - "step": 3444 - }, - { - "epoch": 0.18, - "grad_norm": 1.1204943235932574, - "learning_rate": 1.891473369631277e-05, - "loss": 0.2328, - "step": 3445 - }, - { - "epoch": 0.18, - "grad_norm": 1.0413365146492488, - "learning_rate": 1.891398737823387e-05, - "loss": 0.2104, - "step": 3446 - }, - { - "epoch": 0.18, - "grad_norm": 1.2649950767159048, - "learning_rate": 1.8913240818362556e-05, - "loss": 0.2119, - "step": 3447 - }, - { - "epoch": 0.18, - "grad_norm": 1.4830297934477925, - "learning_rate": 1.8912494016719084e-05, - "loss": 0.2071, - "step": 3448 - }, - { - "epoch": 0.18, - "grad_norm": 1.1955636236263514, - "learning_rate": 1.8911746973323706e-05, - "loss": 0.2253, - "step": 3449 - }, - { - "epoch": 0.18, - "grad_norm": 1.268887849540954, - "learning_rate": 1.8910999688196688e-05, - "loss": 0.2314, - "step": 3450 - }, - { - "epoch": 0.18, - "grad_norm": 1.8540019787261037, - "learning_rate": 1.8910252161358302e-05, - "loss": 0.2186, - "step": 3451 - }, - { - "epoch": 0.18, - "grad_norm": 1.3500922534295132, - "learning_rate": 1.8909504392828822e-05, - "loss": 0.193, - "step": 3452 - }, - { - "epoch": 0.18, - "grad_norm": 1.1737633956260982, - "learning_rate": 1.8908756382628534e-05, - "loss": 0.2178, - "step": 3453 - }, - { - "epoch": 0.18, - "grad_norm": 1.5649160231502703, - "learning_rate": 1.8908008130777724e-05, - "loss": 0.2198, - "step": 3454 - }, - { - "epoch": 0.18, - "grad_norm": 10.954757815776317, - "learning_rate": 1.890725963729669e-05, - "loss": 0.2328, - "step": 3455 - }, - { - "epoch": 0.18, - "grad_norm": 1.5228254564372452, - "learning_rate": 1.8906510902205736e-05, - "loss": 0.2107, - "step": 3456 - }, - { - "epoch": 0.18, - "grad_norm": 1.0442097517235953, - "learning_rate": 1.890576192552517e-05, - "loss": 0.1938, - "step": 3457 - }, - { - "epoch": 0.18, - "grad_norm": 1.3410754929651154, - "learning_rate": 1.890501270727531e-05, - "loss": 0.205, - "step": 3458 - }, - { - "epoch": 0.18, - "grad_norm": 1.1872730608250062, - "learning_rate": 1.8904263247476478e-05, - "loss": 0.2256, - "step": 3459 - }, - { - "epoch": 0.18, - "grad_norm": 5.085472544504074, - "learning_rate": 1.8903513546149e-05, - "loss": 0.1979, - "step": 3460 - }, - { - "epoch": 0.18, - "grad_norm": 3.133249139850197, - "learning_rate": 1.8902763603313213e-05, - "loss": 0.2256, - "step": 3461 - }, - { - "epoch": 0.18, - "grad_norm": 3.24180581737015, - "learning_rate": 1.8902013418989464e-05, - "loss": 0.2318, - "step": 3462 - }, - { - "epoch": 0.18, - "grad_norm": 1.8748478086219742, - "learning_rate": 1.89012629931981e-05, - "loss": 0.2209, - "step": 3463 - }, - { - "epoch": 0.18, - "grad_norm": 1.6952368589548503, - "learning_rate": 1.890051232595947e-05, - "loss": 0.2124, - "step": 3464 - }, - { - "epoch": 0.18, - "grad_norm": 1.1286193647776563, - "learning_rate": 1.8899761417293944e-05, - "loss": 0.214, - "step": 3465 - }, - { - "epoch": 0.18, - "grad_norm": 2.3691731443740385, - "learning_rate": 1.8899010267221884e-05, - "loss": 0.236, - "step": 3466 - }, - { - "epoch": 0.18, - "grad_norm": 5.106432902418719, - "learning_rate": 1.8898258875763668e-05, - "loss": 0.1958, - "step": 3467 - }, - { - "epoch": 0.18, - "grad_norm": 2.42216513069837, - "learning_rate": 1.889750724293968e-05, - "loss": 0.2178, - "step": 3468 - }, - { - "epoch": 0.18, - "grad_norm": 1.039236057239844, - "learning_rate": 1.88967553687703e-05, - "loss": 0.211, - "step": 3469 - }, - { - "epoch": 0.18, - "grad_norm": 1.1630241938144983, - "learning_rate": 1.8896003253275934e-05, - "loss": 0.2031, - "step": 3470 - }, - { - "epoch": 0.18, - "grad_norm": 1.133200488856877, - "learning_rate": 1.8895250896476976e-05, - "loss": 0.2157, - "step": 3471 - }, - { - "epoch": 0.18, - "grad_norm": 1.4087899695633774, - "learning_rate": 1.8894498298393835e-05, - "loss": 0.2696, - "step": 3472 - }, - { - "epoch": 0.18, - "grad_norm": 1.7305735648730336, - "learning_rate": 1.8893745459046922e-05, - "loss": 0.2323, - "step": 3473 - }, - { - "epoch": 0.18, - "grad_norm": 3.142204157272831, - "learning_rate": 1.8892992378456664e-05, - "loss": 0.2282, - "step": 3474 - }, - { - "epoch": 0.18, - "grad_norm": 1.1118952952593542, - "learning_rate": 1.8892239056643487e-05, - "loss": 0.2036, - "step": 3475 - }, - { - "epoch": 0.18, - "grad_norm": 1.0687130740086, - "learning_rate": 1.8891485493627823e-05, - "loss": 0.2208, - "step": 3476 - }, - { - "epoch": 0.18, - "grad_norm": 1.6734111624578498, - "learning_rate": 1.8890731689430108e-05, - "loss": 0.2395, - "step": 3477 - }, - { - "epoch": 0.18, - "grad_norm": 1.3438494750974113, - "learning_rate": 1.88899776440708e-05, - "loss": 0.2236, - "step": 3478 - }, - { - "epoch": 0.18, - "grad_norm": 1.2048291784648975, - "learning_rate": 1.8889223357570342e-05, - "loss": 0.2327, - "step": 3479 - }, - { - "epoch": 0.18, - "grad_norm": 2.0046820562623493, - "learning_rate": 1.88884688299492e-05, - "loss": 0.2235, - "step": 3480 - }, - { - "epoch": 0.18, - "grad_norm": 1.5238429554318127, - "learning_rate": 1.8887714061227838e-05, - "loss": 0.2112, - "step": 3481 - }, - { - "epoch": 0.18, - "grad_norm": 1.213570258302131, - "learning_rate": 1.8886959051426733e-05, - "loss": 0.1763, - "step": 3482 - }, - { - "epoch": 0.18, - "grad_norm": 1.0069459742245659, - "learning_rate": 1.8886203800566357e-05, - "loss": 0.2067, - "step": 3483 - }, - { - "epoch": 0.18, - "grad_norm": 1.198997190207289, - "learning_rate": 1.8885448308667204e-05, - "loss": 0.2202, - "step": 3484 - }, - { - "epoch": 0.18, - "grad_norm": 1.1565023181231882, - "learning_rate": 1.8884692575749762e-05, - "loss": 0.2215, - "step": 3485 - }, - { - "epoch": 0.18, - "grad_norm": 1.2061530708342951, - "learning_rate": 1.8883936601834533e-05, - "loss": 0.1979, - "step": 3486 - }, - { - "epoch": 0.18, - "grad_norm": 1.0102406613321953, - "learning_rate": 1.8883180386942022e-05, - "loss": 0.2072, - "step": 3487 - }, - { - "epoch": 0.18, - "grad_norm": 2.6719172697318467, - "learning_rate": 1.8882423931092745e-05, - "loss": 0.2198, - "step": 3488 - }, - { - "epoch": 0.18, - "grad_norm": 1.55648843029096, - "learning_rate": 1.888166723430721e-05, - "loss": 0.2062, - "step": 3489 - }, - { - "epoch": 0.18, - "grad_norm": 1.2375859182455549, - "learning_rate": 1.8880910296605956e-05, - "loss": 0.1965, - "step": 3490 - }, - { - "epoch": 0.18, - "grad_norm": 1.2731145678375624, - "learning_rate": 1.8880153118009505e-05, - "loss": 0.2256, - "step": 3491 - }, - { - "epoch": 0.18, - "grad_norm": 1.164193051691617, - "learning_rate": 1.8879395698538402e-05, - "loss": 0.2163, - "step": 3492 - }, - { - "epoch": 0.18, - "grad_norm": 1.3166783793707386, - "learning_rate": 1.8878638038213186e-05, - "loss": 0.2129, - "step": 3493 - }, - { - "epoch": 0.18, - "grad_norm": 1.1521288362770579, - "learning_rate": 1.8877880137054413e-05, - "loss": 0.2269, - "step": 3494 - }, - { - "epoch": 0.18, - "grad_norm": 1.1986007406716817, - "learning_rate": 1.8877121995082638e-05, - "loss": 0.223, - "step": 3495 - }, - { - "epoch": 0.18, - "grad_norm": 1.0455207674415665, - "learning_rate": 1.887636361231843e-05, - "loss": 0.2218, - "step": 3496 - }, - { - "epoch": 0.18, - "grad_norm": 0.975037927396976, - "learning_rate": 1.887560498878236e-05, - "loss": 0.205, - "step": 3497 - }, - { - "epoch": 0.18, - "grad_norm": 0.99183527876214, - "learning_rate": 1.8874846124495e-05, - "loss": 0.2056, - "step": 3498 - }, - { - "epoch": 0.18, - "grad_norm": 1.3402372514211767, - "learning_rate": 1.8874087019476937e-05, - "loss": 0.2011, - "step": 3499 - }, - { - "epoch": 0.18, - "grad_norm": 1.3365592978089498, - "learning_rate": 1.8873327673748765e-05, - "loss": 0.2356, - "step": 3500 - }, - { - "epoch": 0.18, - "grad_norm": 1.4976108619117818, - "learning_rate": 1.8872568087331074e-05, - "loss": 0.2298, - "step": 3501 - }, - { - "epoch": 0.18, - "grad_norm": 1.0639877022857973, - "learning_rate": 1.8871808260244476e-05, - "loss": 0.2193, - "step": 3502 - }, - { - "epoch": 0.18, - "grad_norm": 1.029750538822256, - "learning_rate": 1.8871048192509576e-05, - "loss": 0.2353, - "step": 3503 - }, - { - "epoch": 0.18, - "grad_norm": 1.5591330622484874, - "learning_rate": 1.8870287884147e-05, - "loss": 0.2064, - "step": 3504 - }, - { - "epoch": 0.18, - "grad_norm": 1.2796091932736209, - "learning_rate": 1.8869527335177354e-05, - "loss": 0.2481, - "step": 3505 - }, - { - "epoch": 0.18, - "grad_norm": 1.1622857079794304, - "learning_rate": 1.8868766545621286e-05, - "loss": 0.2504, - "step": 3506 - }, - { - "epoch": 0.18, - "grad_norm": 1.1477897384360691, - "learning_rate": 1.886800551549942e-05, - "loss": 0.2014, - "step": 3507 - }, - { - "epoch": 0.18, - "grad_norm": 1.1667763938280227, - "learning_rate": 1.88672442448324e-05, - "loss": 0.2078, - "step": 3508 - }, - { - "epoch": 0.18, - "grad_norm": 1.137503241315197, - "learning_rate": 1.886648273364089e-05, - "loss": 0.2268, - "step": 3509 - }, - { - "epoch": 0.18, - "grad_norm": 1.5931312444608623, - "learning_rate": 1.886572098194553e-05, - "loss": 0.2399, - "step": 3510 - }, - { - "epoch": 0.18, - "grad_norm": 0.998589900804111, - "learning_rate": 1.8864958989766982e-05, - "loss": 0.2177, - "step": 3511 - }, - { - "epoch": 0.18, - "grad_norm": 1.0766991349805863, - "learning_rate": 1.8864196757125926e-05, - "loss": 0.219, - "step": 3512 - }, - { - "epoch": 0.18, - "grad_norm": 1.2669927981226383, - "learning_rate": 1.886343428404303e-05, - "loss": 0.2455, - "step": 3513 - }, - { - "epoch": 0.18, - "grad_norm": 1.3821721941809648, - "learning_rate": 1.8862671570538983e-05, - "loss": 0.2291, - "step": 3514 - }, - { - "epoch": 0.18, - "grad_norm": 1.9922061065377965, - "learning_rate": 1.8861908616634465e-05, - "loss": 0.2355, - "step": 3515 - }, - { - "epoch": 0.18, - "grad_norm": 0.9647774432744618, - "learning_rate": 1.8861145422350175e-05, - "loss": 0.2225, - "step": 3516 - }, - { - "epoch": 0.18, - "grad_norm": 1.2450905932652228, - "learning_rate": 1.8860381987706815e-05, - "loss": 0.1923, - "step": 3517 - }, - { - "epoch": 0.18, - "grad_norm": 1.174949296255601, - "learning_rate": 1.8859618312725097e-05, - "loss": 0.2273, - "step": 3518 - }, - { - "epoch": 0.18, - "grad_norm": 0.9185629784663724, - "learning_rate": 1.885885439742573e-05, - "loss": 0.2421, - "step": 3519 - }, - { - "epoch": 0.18, - "grad_norm": 1.0252428894923662, - "learning_rate": 1.8858090241829435e-05, - "loss": 0.2276, - "step": 3520 - }, - { - "epoch": 0.18, - "grad_norm": 1.3017122731058193, - "learning_rate": 1.8857325845956943e-05, - "loss": 0.2324, - "step": 3521 - }, - { - "epoch": 0.18, - "grad_norm": 1.3801729559240783, - "learning_rate": 1.8856561209828985e-05, - "loss": 0.2102, - "step": 3522 - }, - { - "epoch": 0.18, - "grad_norm": 1.518892942443358, - "learning_rate": 1.8855796333466306e-05, - "loss": 0.2125, - "step": 3523 - }, - { - "epoch": 0.18, - "grad_norm": 2.623769747136324, - "learning_rate": 1.8855031216889654e-05, - "loss": 0.2158, - "step": 3524 - }, - { - "epoch": 0.18, - "grad_norm": 1.0182322618961068, - "learning_rate": 1.8854265860119777e-05, - "loss": 0.2311, - "step": 3525 - }, - { - "epoch": 0.18, - "grad_norm": 1.6124376494565882, - "learning_rate": 1.8853500263177438e-05, - "loss": 0.243, - "step": 3526 - }, - { - "epoch": 0.18, - "grad_norm": 1.3650878655188263, - "learning_rate": 1.8852734426083407e-05, - "loss": 0.2351, - "step": 3527 - }, - { - "epoch": 0.18, - "grad_norm": 1.3001949420466634, - "learning_rate": 1.8851968348858452e-05, - "loss": 0.2381, - "step": 3528 - }, - { - "epoch": 0.18, - "grad_norm": 1.0896299953265736, - "learning_rate": 1.8851202031523357e-05, - "loss": 0.1966, - "step": 3529 - }, - { - "epoch": 0.18, - "grad_norm": 1.149107350644368, - "learning_rate": 1.8850435474098903e-05, - "loss": 0.2114, - "step": 3530 - }, - { - "epoch": 0.18, - "grad_norm": 1.1056201887813963, - "learning_rate": 1.8849668676605892e-05, - "loss": 0.2094, - "step": 3531 - }, - { - "epoch": 0.18, - "grad_norm": 1.0996511570022964, - "learning_rate": 1.884890163906512e-05, - "loss": 0.2075, - "step": 3532 - }, - { - "epoch": 0.18, - "grad_norm": 1.2245004022629145, - "learning_rate": 1.8848134361497385e-05, - "loss": 0.2215, - "step": 3533 - }, - { - "epoch": 0.18, - "grad_norm": 0.9834659334969303, - "learning_rate": 1.8847366843923512e-05, - "loss": 0.2498, - "step": 3534 - }, - { - "epoch": 0.18, - "grad_norm": 1.034031100069306, - "learning_rate": 1.8846599086364307e-05, - "loss": 0.2281, - "step": 3535 - }, - { - "epoch": 0.18, - "grad_norm": 1.1124308025623266, - "learning_rate": 1.8845831088840607e-05, - "loss": 0.2255, - "step": 3536 - }, - { - "epoch": 0.18, - "grad_norm": 0.9862681621323288, - "learning_rate": 1.884506285137324e-05, - "loss": 0.205, - "step": 3537 - }, - { - "epoch": 0.18, - "grad_norm": 0.9765289155940603, - "learning_rate": 1.884429437398304e-05, - "loss": 0.2293, - "step": 3538 - }, - { - "epoch": 0.18, - "grad_norm": 1.1166529919622326, - "learning_rate": 1.8843525656690856e-05, - "loss": 0.2419, - "step": 3539 - }, - { - "epoch": 0.18, - "grad_norm": 1.310916406449081, - "learning_rate": 1.884275669951754e-05, - "loss": 0.2174, - "step": 3540 - }, - { - "epoch": 0.18, - "grad_norm": 1.1772746264552305, - "learning_rate": 1.8841987502483947e-05, - "loss": 0.2124, - "step": 3541 - }, - { - "epoch": 0.18, - "grad_norm": 2.001411267184766, - "learning_rate": 1.8841218065610946e-05, - "loss": 0.2046, - "step": 3542 - }, - { - "epoch": 0.18, - "grad_norm": 1.7958941047302297, - "learning_rate": 1.8840448388919404e-05, - "loss": 0.2354, - "step": 3543 - }, - { - "epoch": 0.18, - "grad_norm": 1.3784337374121243, - "learning_rate": 1.8839678472430202e-05, - "loss": 0.2234, - "step": 3544 - }, - { - "epoch": 0.18, - "grad_norm": 1.386781588632047, - "learning_rate": 1.883890831616422e-05, - "loss": 0.2139, - "step": 3545 - }, - { - "epoch": 0.18, - "grad_norm": 1.0081326543848246, - "learning_rate": 1.8838137920142353e-05, - "loss": 0.2172, - "step": 3546 - }, - { - "epoch": 0.18, - "grad_norm": 1.925414447020237, - "learning_rate": 1.8837367284385495e-05, - "loss": 0.2212, - "step": 3547 - }, - { - "epoch": 0.18, - "grad_norm": 1.3165789878342848, - "learning_rate": 1.883659640891455e-05, - "loss": 0.218, - "step": 3548 - }, - { - "epoch": 0.18, - "grad_norm": 1.4404730700778872, - "learning_rate": 1.883582529375043e-05, - "loss": 0.2222, - "step": 3549 - }, - { - "epoch": 0.18, - "grad_norm": 1.5777733140194132, - "learning_rate": 1.883505393891405e-05, - "loss": 0.247, - "step": 3550 - }, - { - "epoch": 0.18, - "grad_norm": 1.363219632836876, - "learning_rate": 1.883428234442633e-05, - "loss": 0.2386, - "step": 3551 - }, - { - "epoch": 0.18, - "grad_norm": 2.3569503082449117, - "learning_rate": 1.8833510510308205e-05, - "loss": 0.2317, - "step": 3552 - }, - { - "epoch": 0.18, - "grad_norm": 1.2587246937536893, - "learning_rate": 1.8832738436580606e-05, - "loss": 0.2188, - "step": 3553 - }, - { - "epoch": 0.18, - "grad_norm": 1.12233768922461, - "learning_rate": 1.883196612326448e-05, - "loss": 0.2236, - "step": 3554 - }, - { - "epoch": 0.18, - "grad_norm": 1.2126151051536773, - "learning_rate": 1.8831193570380773e-05, - "loss": 0.2086, - "step": 3555 - }, - { - "epoch": 0.18, - "grad_norm": 1.2393899252829297, - "learning_rate": 1.883042077795044e-05, - "loss": 0.2391, - "step": 3556 - }, - { - "epoch": 0.18, - "grad_norm": 1.4836398610286818, - "learning_rate": 1.8829647745994445e-05, - "loss": 0.2554, - "step": 3557 - }, - { - "epoch": 0.18, - "grad_norm": 0.9733825299769355, - "learning_rate": 1.882887447453376e-05, - "loss": 0.2076, - "step": 3558 - }, - { - "epoch": 0.18, - "grad_norm": 1.169740840648058, - "learning_rate": 1.8828100963589357e-05, - "loss": 0.1992, - "step": 3559 - }, - { - "epoch": 0.18, - "grad_norm": 1.0783743006030566, - "learning_rate": 1.882732721318221e-05, - "loss": 0.2232, - "step": 3560 - }, - { - "epoch": 0.18, - "grad_norm": 1.2929527357581567, - "learning_rate": 1.882655322333332e-05, - "loss": 0.2312, - "step": 3561 - }, - { - "epoch": 0.18, - "grad_norm": 0.8276497602849175, - "learning_rate": 1.8825778994063672e-05, - "loss": 0.1998, - "step": 3562 - }, - { - "epoch": 0.18, - "grad_norm": 1.822612547922766, - "learning_rate": 1.882500452539427e-05, - "loss": 0.2186, - "step": 3563 - }, - { - "epoch": 0.18, - "grad_norm": 1.185368918244924, - "learning_rate": 1.8824229817346124e-05, - "loss": 0.2269, - "step": 3564 - }, - { - "epoch": 0.18, - "grad_norm": 1.044763669380613, - "learning_rate": 1.8823454869940243e-05, - "loss": 0.2022, - "step": 3565 - }, - { - "epoch": 0.18, - "grad_norm": 1.6220896032433259, - "learning_rate": 1.8822679683197654e-05, - "loss": 0.2309, - "step": 3566 - }, - { - "epoch": 0.18, - "grad_norm": 0.9932681947347778, - "learning_rate": 1.882190425713938e-05, - "loss": 0.2171, - "step": 3567 - }, - { - "epoch": 0.18, - "grad_norm": 1.3632488336960016, - "learning_rate": 1.882112859178645e-05, - "loss": 0.2188, - "step": 3568 - }, - { - "epoch": 0.18, - "grad_norm": 1.6684759325774463, - "learning_rate": 1.8820352687159912e-05, - "loss": 0.2151, - "step": 3569 - }, - { - "epoch": 0.18, - "grad_norm": 1.1266573469024315, - "learning_rate": 1.881957654328081e-05, - "loss": 0.2164, - "step": 3570 - }, - { - "epoch": 0.18, - "grad_norm": 1.2447540550144214, - "learning_rate": 1.8818800160170193e-05, - "loss": 0.2361, - "step": 3571 - }, - { - "epoch": 0.18, - "grad_norm": 1.3002904781282625, - "learning_rate": 1.8818023537849124e-05, - "loss": 0.2265, - "step": 3572 - }, - { - "epoch": 0.18, - "grad_norm": 1.0874912434200428, - "learning_rate": 1.8817246676338674e-05, - "loss": 0.2042, - "step": 3573 - }, - { - "epoch": 0.18, - "grad_norm": 1.2065451355021113, - "learning_rate": 1.8816469575659905e-05, - "loss": 0.1963, - "step": 3574 - }, - { - "epoch": 0.18, - "grad_norm": 1.6011427686798987, - "learning_rate": 1.8815692235833903e-05, - "loss": 0.2542, - "step": 3575 - }, - { - "epoch": 0.18, - "grad_norm": 1.0630131633964672, - "learning_rate": 1.881491465688175e-05, - "loss": 0.1862, - "step": 3576 - }, - { - "epoch": 0.18, - "grad_norm": 1.192757320656651, - "learning_rate": 1.881413683882454e-05, - "loss": 0.2016, - "step": 3577 - }, - { - "epoch": 0.18, - "grad_norm": 1.0317469350067587, - "learning_rate": 1.881335878168337e-05, - "loss": 0.2074, - "step": 3578 - }, - { - "epoch": 0.18, - "grad_norm": 1.239604331744985, - "learning_rate": 1.881258048547934e-05, - "loss": 0.2103, - "step": 3579 - }, - { - "epoch": 0.18, - "grad_norm": 1.3107029936706969, - "learning_rate": 1.8811801950233576e-05, - "loss": 0.2162, - "step": 3580 - }, - { - "epoch": 0.18, - "grad_norm": 1.0157843501686425, - "learning_rate": 1.881102317596718e-05, - "loss": 0.2354, - "step": 3581 - }, - { - "epoch": 0.18, - "grad_norm": 2.9623200968488543, - "learning_rate": 1.8810244162701282e-05, - "loss": 0.229, - "step": 3582 - }, - { - "epoch": 0.18, - "grad_norm": 1.1685273424380547, - "learning_rate": 1.8809464910457018e-05, - "loss": 0.244, - "step": 3583 - }, - { - "epoch": 0.18, - "grad_norm": 1.1103339740841685, - "learning_rate": 1.880868541925552e-05, - "loss": 0.2169, - "step": 3584 - }, - { - "epoch": 0.18, - "grad_norm": 1.311960033700126, - "learning_rate": 1.8807905689117932e-05, - "loss": 0.2723, - "step": 3585 - }, - { - "epoch": 0.18, - "grad_norm": 1.1836207348653984, - "learning_rate": 1.8807125720065402e-05, - "loss": 0.2174, - "step": 3586 - }, - { - "epoch": 0.18, - "grad_norm": 1.006572134286583, - "learning_rate": 1.8806345512119094e-05, - "loss": 0.2188, - "step": 3587 - }, - { - "epoch": 0.18, - "grad_norm": 0.9780003684667589, - "learning_rate": 1.880556506530016e-05, - "loss": 0.2171, - "step": 3588 - }, - { - "epoch": 0.18, - "grad_norm": 1.1732560568868864, - "learning_rate": 1.8804784379629782e-05, - "loss": 0.1826, - "step": 3589 - }, - { - "epoch": 0.18, - "grad_norm": 0.9094677603104601, - "learning_rate": 1.880400345512913e-05, - "loss": 0.2436, - "step": 3590 - }, - { - "epoch": 0.18, - "grad_norm": 1.5987479896408254, - "learning_rate": 1.8803222291819384e-05, - "loss": 0.2424, - "step": 3591 - }, - { - "epoch": 0.18, - "grad_norm": 1.9268098947402248, - "learning_rate": 1.8802440889721738e-05, - "loss": 0.2332, - "step": 3592 - }, - { - "epoch": 0.18, - "grad_norm": 1.1590799031555827, - "learning_rate": 1.8801659248857387e-05, - "loss": 0.2174, - "step": 3593 - }, - { - "epoch": 0.18, - "grad_norm": 0.8734401012949662, - "learning_rate": 1.880087736924753e-05, - "loss": 0.1889, - "step": 3594 - }, - { - "epoch": 0.18, - "grad_norm": 0.9763292582262146, - "learning_rate": 1.8800095250913378e-05, - "loss": 0.2301, - "step": 3595 - }, - { - "epoch": 0.18, - "grad_norm": 1.1036759320154053, - "learning_rate": 1.8799312893876144e-05, - "loss": 0.2354, - "step": 3596 - }, - { - "epoch": 0.18, - "grad_norm": 1.2068573877020075, - "learning_rate": 1.8798530298157053e-05, - "loss": 0.2257, - "step": 3597 - }, - { - "epoch": 0.18, - "grad_norm": 1.3724862448248425, - "learning_rate": 1.879774746377733e-05, - "loss": 0.2239, - "step": 3598 - }, - { - "epoch": 0.18, - "grad_norm": 1.4278671837999257, - "learning_rate": 1.8796964390758208e-05, - "loss": 0.2295, - "step": 3599 - }, - { - "epoch": 0.18, - "grad_norm": 1.0753048332681465, - "learning_rate": 1.879618107912093e-05, - "loss": 0.2333, - "step": 3600 - }, - { - "epoch": 0.18, - "grad_norm": 1.435980345300096, - "learning_rate": 1.8795397528886744e-05, - "loss": 0.195, - "step": 3601 - }, - { - "epoch": 0.18, - "grad_norm": 1.4710044713102228, - "learning_rate": 1.8794613740076905e-05, - "loss": 0.2436, - "step": 3602 - }, - { - "epoch": 0.18, - "grad_norm": 1.1614473226565285, - "learning_rate": 1.8793829712712674e-05, - "loss": 0.2332, - "step": 3603 - }, - { - "epoch": 0.18, - "grad_norm": 1.1045520516660092, - "learning_rate": 1.879304544681531e-05, - "loss": 0.224, - "step": 3604 - }, - { - "epoch": 0.18, - "grad_norm": 1.319759137203225, - "learning_rate": 1.8792260942406093e-05, - "loss": 0.2268, - "step": 3605 - }, - { - "epoch": 0.18, - "grad_norm": 1.1487889779994411, - "learning_rate": 1.87914761995063e-05, - "loss": 0.2067, - "step": 3606 - }, - { - "epoch": 0.18, - "grad_norm": 1.3210062460129575, - "learning_rate": 1.8790691218137223e-05, - "loss": 0.1936, - "step": 3607 - }, - { - "epoch": 0.18, - "grad_norm": 1.0186501739958207, - "learning_rate": 1.8789905998320148e-05, - "loss": 0.2092, - "step": 3608 - }, - { - "epoch": 0.18, - "grad_norm": 1.1235348787736643, - "learning_rate": 1.8789120540076377e-05, - "loss": 0.2477, - "step": 3609 - }, - { - "epoch": 0.18, - "grad_norm": 0.8505143793933634, - "learning_rate": 1.8788334843427213e-05, - "loss": 0.2175, - "step": 3610 - }, - { - "epoch": 0.18, - "grad_norm": 1.0042245499894789, - "learning_rate": 1.878754890839397e-05, - "loss": 0.2083, - "step": 3611 - }, - { - "epoch": 0.18, - "grad_norm": 1.088541889713761, - "learning_rate": 1.8786762734997967e-05, - "loss": 0.2161, - "step": 3612 - }, - { - "epoch": 0.18, - "grad_norm": 1.7685156296862572, - "learning_rate": 1.878597632326053e-05, - "loss": 0.2166, - "step": 3613 - }, - { - "epoch": 0.18, - "grad_norm": 1.12394810734318, - "learning_rate": 1.8785189673202987e-05, - "loss": 0.2276, - "step": 3614 - }, - { - "epoch": 0.18, - "grad_norm": 2.3965025337178076, - "learning_rate": 1.8784402784846683e-05, - "loss": 0.2035, - "step": 3615 - }, - { - "epoch": 0.18, - "grad_norm": 1.0159742971193932, - "learning_rate": 1.8783615658212954e-05, - "loss": 0.2333, - "step": 3616 - }, - { - "epoch": 0.18, - "grad_norm": 1.3262982321738739, - "learning_rate": 1.8782828293323148e-05, - "loss": 0.2175, - "step": 3617 - }, - { - "epoch": 0.18, - "grad_norm": 0.9727403288284343, - "learning_rate": 1.8782040690198638e-05, - "loss": 0.2162, - "step": 3618 - }, - { - "epoch": 0.18, - "grad_norm": 1.147172083948449, - "learning_rate": 1.8781252848860774e-05, - "loss": 0.1996, - "step": 3619 - }, - { - "epoch": 0.18, - "grad_norm": 1.6406618327820583, - "learning_rate": 1.878046476933093e-05, - "loss": 0.2053, - "step": 3620 - }, - { - "epoch": 0.18, - "grad_norm": 1.1381954215972985, - "learning_rate": 1.8779676451630483e-05, - "loss": 0.2293, - "step": 3621 - }, - { - "epoch": 0.18, - "grad_norm": 1.2442005673537484, - "learning_rate": 1.877888789578082e-05, - "loss": 0.22, - "step": 3622 - }, - { - "epoch": 0.18, - "grad_norm": 1.12587064239475, - "learning_rate": 1.877809910180332e-05, - "loss": 0.2325, - "step": 3623 - }, - { - "epoch": 0.18, - "grad_norm": 0.9723723171760922, - "learning_rate": 1.8777310069719395e-05, - "loss": 0.216, - "step": 3624 - }, - { - "epoch": 0.18, - "grad_norm": 0.8174785819072351, - "learning_rate": 1.8776520799550432e-05, - "loss": 0.2109, - "step": 3625 - }, - { - "epoch": 0.18, - "grad_norm": 0.9122603631781808, - "learning_rate": 1.8775731291317848e-05, - "loss": 0.2309, - "step": 3626 - }, - { - "epoch": 0.18, - "grad_norm": 0.8848068716660894, - "learning_rate": 1.877494154504306e-05, - "loss": 0.2194, - "step": 3627 - }, - { - "epoch": 0.18, - "grad_norm": 0.9278675771741361, - "learning_rate": 1.8774151560747483e-05, - "loss": 0.2183, - "step": 3628 - }, - { - "epoch": 0.18, - "grad_norm": 1.9191651194789452, - "learning_rate": 1.8773361338452552e-05, - "loss": 0.2161, - "step": 3629 - }, - { - "epoch": 0.18, - "grad_norm": 1.0679724192636588, - "learning_rate": 1.87725708781797e-05, - "loss": 0.2244, - "step": 3630 - }, - { - "epoch": 0.18, - "grad_norm": 1.0051450683135599, - "learning_rate": 1.8771780179950365e-05, - "loss": 0.236, - "step": 3631 - }, - { - "epoch": 0.18, - "grad_norm": 1.054895456949411, - "learning_rate": 1.8770989243785996e-05, - "loss": 0.2241, - "step": 3632 - }, - { - "epoch": 0.18, - "grad_norm": 0.9056539769716427, - "learning_rate": 1.8770198069708053e-05, - "loss": 0.2322, - "step": 3633 - }, - { - "epoch": 0.18, - "grad_norm": 1.0234613563851183, - "learning_rate": 1.8769406657737987e-05, - "loss": 0.2316, - "step": 3634 - }, - { - "epoch": 0.18, - "grad_norm": 0.9956590739601004, - "learning_rate": 1.8768615007897274e-05, - "loss": 0.199, - "step": 3635 - }, - { - "epoch": 0.18, - "grad_norm": 0.8869981627351374, - "learning_rate": 1.8767823120207382e-05, - "loss": 0.2109, - "step": 3636 - }, - { - "epoch": 0.18, - "grad_norm": 0.9934235815819739, - "learning_rate": 1.876703099468979e-05, - "loss": 0.2304, - "step": 3637 - }, - { - "epoch": 0.18, - "grad_norm": 1.4633457424825305, - "learning_rate": 1.8766238631365993e-05, - "loss": 0.2083, - "step": 3638 - }, - { - "epoch": 0.19, - "grad_norm": 1.1406976495870418, - "learning_rate": 1.8765446030257475e-05, - "loss": 0.2153, - "step": 3639 - }, - { - "epoch": 0.19, - "grad_norm": 0.9879296696273988, - "learning_rate": 1.8764653191385737e-05, - "loss": 0.2085, - "step": 3640 - }, - { - "epoch": 0.19, - "grad_norm": 0.910282344054396, - "learning_rate": 1.876386011477229e-05, - "loss": 0.2094, - "step": 3641 - }, - { - "epoch": 0.19, - "grad_norm": 1.0136285939917482, - "learning_rate": 1.8763066800438638e-05, - "loss": 0.2312, - "step": 3642 - }, - { - "epoch": 0.19, - "grad_norm": 0.9982969903185347, - "learning_rate": 1.8762273248406308e-05, - "loss": 0.2199, - "step": 3643 - }, - { - "epoch": 0.19, - "grad_norm": 1.0218645304104221, - "learning_rate": 1.8761479458696817e-05, - "loss": 0.2435, - "step": 3644 - }, - { - "epoch": 0.19, - "grad_norm": 0.801950250637673, - "learning_rate": 1.87606854313317e-05, - "loss": 0.2279, - "step": 3645 - }, - { - "epoch": 0.19, - "grad_norm": 0.9830240715600229, - "learning_rate": 1.87598911663325e-05, - "loss": 0.202, - "step": 3646 - }, - { - "epoch": 0.19, - "grad_norm": 1.062684827845211, - "learning_rate": 1.8759096663720757e-05, - "loss": 0.2038, - "step": 3647 - }, - { - "epoch": 0.19, - "grad_norm": 0.8424173998687214, - "learning_rate": 1.8758301923518022e-05, - "loss": 0.2075, - "step": 3648 - }, - { - "epoch": 0.19, - "grad_norm": 1.2659538205110992, - "learning_rate": 1.8757506945745853e-05, - "loss": 0.2135, - "step": 3649 - }, - { - "epoch": 0.19, - "grad_norm": 0.8779279555202277, - "learning_rate": 1.875671173042581e-05, - "loss": 0.2124, - "step": 3650 - }, - { - "epoch": 0.19, - "grad_norm": 1.0724684529868374, - "learning_rate": 1.875591627757947e-05, - "loss": 0.2271, - "step": 3651 - }, - { - "epoch": 0.19, - "grad_norm": 0.997713641099804, - "learning_rate": 1.8755120587228407e-05, - "loss": 0.211, - "step": 3652 - }, - { - "epoch": 0.19, - "grad_norm": 0.9509704006661915, - "learning_rate": 1.87543246593942e-05, - "loss": 0.2153, - "step": 3653 - }, - { - "epoch": 0.19, - "grad_norm": 0.9876044502985013, - "learning_rate": 1.8753528494098448e-05, - "loss": 0.2446, - "step": 3654 - }, - { - "epoch": 0.19, - "grad_norm": 0.9074009768086965, - "learning_rate": 1.8752732091362737e-05, - "loss": 0.2376, - "step": 3655 - }, - { - "epoch": 0.19, - "grad_norm": 0.8466323825950363, - "learning_rate": 1.8751935451208672e-05, - "loss": 0.2198, - "step": 3656 - }, - { - "epoch": 0.19, - "grad_norm": 0.9408926489341524, - "learning_rate": 1.875113857365787e-05, - "loss": 0.2191, - "step": 3657 - }, - { - "epoch": 0.19, - "grad_norm": 0.9072872274928622, - "learning_rate": 1.8750341458731934e-05, - "loss": 0.2319, - "step": 3658 - }, - { - "epoch": 0.19, - "grad_norm": 0.9542440451622091, - "learning_rate": 1.8749544106452493e-05, - "loss": 0.1976, - "step": 3659 - }, - { - "epoch": 0.19, - "grad_norm": 0.7645091657265274, - "learning_rate": 1.8748746516841173e-05, - "loss": 0.2083, - "step": 3660 - }, - { - "epoch": 0.19, - "grad_norm": 1.156226521004843, - "learning_rate": 1.8747948689919613e-05, - "loss": 0.2487, - "step": 3661 - }, - { - "epoch": 0.19, - "grad_norm": 0.955620834216678, - "learning_rate": 1.8747150625709447e-05, - "loss": 0.2347, - "step": 3662 - }, - { - "epoch": 0.19, - "grad_norm": 0.9942108737792252, - "learning_rate": 1.874635232423233e-05, - "loss": 0.2062, - "step": 3663 - }, - { - "epoch": 0.19, - "grad_norm": 1.007020797262451, - "learning_rate": 1.874555378550991e-05, - "loss": 0.2038, - "step": 3664 - }, - { - "epoch": 0.19, - "grad_norm": 0.9402919883523149, - "learning_rate": 1.874475500956385e-05, - "loss": 0.2045, - "step": 3665 - }, - { - "epoch": 0.19, - "grad_norm": 1.3820371958137763, - "learning_rate": 1.8743955996415816e-05, - "loss": 0.2158, - "step": 3666 - }, - { - "epoch": 0.19, - "grad_norm": 1.3681512159679738, - "learning_rate": 1.8743156746087484e-05, - "loss": 0.2455, - "step": 3667 - }, - { - "epoch": 0.19, - "grad_norm": 0.8308239350203164, - "learning_rate": 1.874235725860053e-05, - "loss": 0.1835, - "step": 3668 - }, - { - "epoch": 0.19, - "grad_norm": 0.9181442338099888, - "learning_rate": 1.874155753397664e-05, - "loss": 0.2669, - "step": 3669 - }, - { - "epoch": 0.19, - "grad_norm": 1.5206039594663598, - "learning_rate": 1.874075757223751e-05, - "loss": 0.2338, - "step": 3670 - }, - { - "epoch": 0.19, - "grad_norm": 1.1480071255535726, - "learning_rate": 1.8739957373404835e-05, - "loss": 0.2121, - "step": 3671 - }, - { - "epoch": 0.19, - "grad_norm": 1.389739264223165, - "learning_rate": 1.8739156937500323e-05, - "loss": 0.2205, - "step": 3672 - }, - { - "epoch": 0.19, - "grad_norm": 1.238895768277098, - "learning_rate": 1.8738356264545685e-05, - "loss": 0.218, - "step": 3673 - }, - { - "epoch": 0.19, - "grad_norm": 0.9526703867083856, - "learning_rate": 1.8737555354562644e-05, - "loss": 0.2024, - "step": 3674 - }, - { - "epoch": 0.19, - "grad_norm": 0.8969813658287388, - "learning_rate": 1.873675420757292e-05, - "loss": 0.2295, - "step": 3675 - }, - { - "epoch": 0.19, - "grad_norm": 1.2168913052504897, - "learning_rate": 1.873595282359824e-05, - "loss": 0.2147, - "step": 3676 - }, - { - "epoch": 0.19, - "grad_norm": 1.021148886487509, - "learning_rate": 1.873515120266035e-05, - "loss": 0.2271, - "step": 3677 - }, - { - "epoch": 0.19, - "grad_norm": 1.089162382736061, - "learning_rate": 1.8734349344780985e-05, - "loss": 0.2538, - "step": 3678 - }, - { - "epoch": 0.19, - "grad_norm": 1.087846460938777, - "learning_rate": 1.8733547249981904e-05, - "loss": 0.2473, - "step": 3679 - }, - { - "epoch": 0.19, - "grad_norm": 1.6091696891202787, - "learning_rate": 1.8732744918284866e-05, - "loss": 0.2228, - "step": 3680 - }, - { - "epoch": 0.19, - "grad_norm": 0.9178567817841584, - "learning_rate": 1.873194234971162e-05, - "loss": 0.1982, - "step": 3681 - }, - { - "epoch": 0.19, - "grad_norm": 0.9954008637114219, - "learning_rate": 1.8731139544283952e-05, - "loss": 0.1894, - "step": 3682 - }, - { - "epoch": 0.19, - "grad_norm": 0.8191302667217233, - "learning_rate": 1.873033650202363e-05, - "loss": 0.2321, - "step": 3683 - }, - { - "epoch": 0.19, - "grad_norm": 1.7547033099045546, - "learning_rate": 1.872953322295243e-05, - "loss": 0.2222, - "step": 3684 - }, - { - "epoch": 0.19, - "grad_norm": 1.0682176638270313, - "learning_rate": 1.8728729707092156e-05, - "loss": 0.2266, - "step": 3685 - }, - { - "epoch": 0.19, - "grad_norm": 1.0770283192110661, - "learning_rate": 1.8727925954464588e-05, - "loss": 0.209, - "step": 3686 - }, - { - "epoch": 0.19, - "grad_norm": 1.0130660718947984, - "learning_rate": 1.8727121965091542e-05, - "loss": 0.2055, - "step": 3687 - }, - { - "epoch": 0.19, - "grad_norm": 1.1315284288169722, - "learning_rate": 1.8726317738994817e-05, - "loss": 0.2378, - "step": 3688 - }, - { - "epoch": 0.19, - "grad_norm": 1.0757318688843531, - "learning_rate": 1.8725513276196232e-05, - "loss": 0.214, - "step": 3689 - }, - { - "epoch": 0.19, - "grad_norm": 0.8001330749908178, - "learning_rate": 1.8724708576717607e-05, - "loss": 0.2101, - "step": 3690 - }, - { - "epoch": 0.19, - "grad_norm": 1.2760143890057158, - "learning_rate": 1.872390364058077e-05, - "loss": 0.2274, - "step": 3691 - }, - { - "epoch": 0.19, - "grad_norm": 1.198424221315609, - "learning_rate": 1.872309846780755e-05, - "loss": 0.2008, - "step": 3692 - }, - { - "epoch": 0.19, - "grad_norm": 1.085392171878298, - "learning_rate": 1.8722293058419794e-05, - "loss": 0.2215, - "step": 3693 - }, - { - "epoch": 0.19, - "grad_norm": 1.0167219653500086, - "learning_rate": 1.8721487412439344e-05, - "loss": 0.2413, - "step": 3694 - }, - { - "epoch": 0.19, - "grad_norm": 1.6452475011732255, - "learning_rate": 1.8720681529888057e-05, - "loss": 0.2311, - "step": 3695 - }, - { - "epoch": 0.19, - "grad_norm": 1.148723352911266, - "learning_rate": 1.8719875410787793e-05, - "loss": 0.2149, - "step": 3696 - }, - { - "epoch": 0.19, - "grad_norm": 0.9979650627009616, - "learning_rate": 1.8719069055160415e-05, - "loss": 0.2114, - "step": 3697 - }, - { - "epoch": 0.19, - "grad_norm": 0.878102123527887, - "learning_rate": 1.8718262463027795e-05, - "loss": 0.2167, - "step": 3698 - }, - { - "epoch": 0.19, - "grad_norm": 0.8587460522038438, - "learning_rate": 1.8717455634411813e-05, - "loss": 0.2157, - "step": 3699 - }, - { - "epoch": 0.19, - "grad_norm": 0.9939679426048222, - "learning_rate": 1.8716648569334355e-05, - "loss": 0.2142, - "step": 3700 - }, - { - "epoch": 0.19, - "grad_norm": 1.0933099196445442, - "learning_rate": 1.8715841267817313e-05, - "loss": 0.24, - "step": 3701 - }, - { - "epoch": 0.19, - "grad_norm": 0.9072361235671731, - "learning_rate": 1.8715033729882585e-05, - "loss": 0.2267, - "step": 3702 - }, - { - "epoch": 0.19, - "grad_norm": 1.5972984097976861, - "learning_rate": 1.8714225955552074e-05, - "loss": 0.2271, - "step": 3703 - }, - { - "epoch": 0.19, - "grad_norm": 1.195176661277982, - "learning_rate": 1.8713417944847688e-05, - "loss": 0.2464, - "step": 3704 - }, - { - "epoch": 0.19, - "grad_norm": 0.964239160509171, - "learning_rate": 1.871260969779135e-05, - "loss": 0.224, - "step": 3705 - }, - { - "epoch": 0.19, - "grad_norm": 1.1009244798701132, - "learning_rate": 1.8711801214404986e-05, - "loss": 0.2277, - "step": 3706 - }, - { - "epoch": 0.19, - "grad_norm": 1.4679413308921536, - "learning_rate": 1.871099249471052e-05, - "loss": 0.2207, - "step": 3707 - }, - { - "epoch": 0.19, - "grad_norm": 1.4215098692749524, - "learning_rate": 1.871018353872989e-05, - "loss": 0.2172, - "step": 3708 - }, - { - "epoch": 0.19, - "grad_norm": 1.0315434809422932, - "learning_rate": 1.870937434648504e-05, - "loss": 0.221, - "step": 3709 - }, - { - "epoch": 0.19, - "grad_norm": 0.8370794377256259, - "learning_rate": 1.8708564917997917e-05, - "loss": 0.2127, - "step": 3710 - }, - { - "epoch": 0.19, - "grad_norm": 1.3562200726557778, - "learning_rate": 1.870775525329048e-05, - "loss": 0.2168, - "step": 3711 - }, - { - "epoch": 0.19, - "grad_norm": 1.0137792718910874, - "learning_rate": 1.870694535238469e-05, - "loss": 0.2214, - "step": 3712 - }, - { - "epoch": 0.19, - "grad_norm": 0.9076632568937337, - "learning_rate": 1.8706135215302517e-05, - "loss": 0.2201, - "step": 3713 - }, - { - "epoch": 0.19, - "grad_norm": 0.878009141999452, - "learning_rate": 1.8705324842065933e-05, - "loss": 0.2232, - "step": 3714 - }, - { - "epoch": 0.19, - "grad_norm": 1.0509396224366285, - "learning_rate": 1.870451423269692e-05, - "loss": 0.212, - "step": 3715 - }, - { - "epoch": 0.19, - "grad_norm": 1.043258822287421, - "learning_rate": 1.870370338721747e-05, - "loss": 0.212, - "step": 3716 - }, - { - "epoch": 0.19, - "grad_norm": 1.0121568442288489, - "learning_rate": 1.870289230564957e-05, - "loss": 0.2585, - "step": 3717 - }, - { - "epoch": 0.19, - "grad_norm": 1.0953299936197005, - "learning_rate": 1.870208098801523e-05, - "loss": 0.2228, - "step": 3718 - }, - { - "epoch": 0.19, - "grad_norm": 1.0607744241849235, - "learning_rate": 1.8701269434336447e-05, - "loss": 0.2489, - "step": 3719 - }, - { - "epoch": 0.19, - "grad_norm": 0.8472590526760381, - "learning_rate": 1.870045764463524e-05, - "loss": 0.2173, - "step": 3720 - }, - { - "epoch": 0.19, - "grad_norm": 1.0576878592684522, - "learning_rate": 1.8699645618933628e-05, - "loss": 0.2177, - "step": 3721 - }, - { - "epoch": 0.19, - "grad_norm": 1.0360486988527098, - "learning_rate": 1.869883335725364e-05, - "loss": 0.2238, - "step": 3722 - }, - { - "epoch": 0.19, - "grad_norm": 1.1969945924023768, - "learning_rate": 1.8698020859617303e-05, - "loss": 0.229, - "step": 3723 - }, - { - "epoch": 0.19, - "grad_norm": 1.1733345648184206, - "learning_rate": 1.869720812604666e-05, - "loss": 0.2289, - "step": 3724 - }, - { - "epoch": 0.19, - "grad_norm": 1.1734416691118115, - "learning_rate": 1.8696395156563753e-05, - "loss": 0.2121, - "step": 3725 - }, - { - "epoch": 0.19, - "grad_norm": 1.7741114933790696, - "learning_rate": 1.8695581951190636e-05, - "loss": 0.2237, - "step": 3726 - }, - { - "epoch": 0.19, - "grad_norm": 0.8425428534413996, - "learning_rate": 1.8694768509949374e-05, - "loss": 0.2082, - "step": 3727 - }, - { - "epoch": 0.19, - "grad_norm": 0.908533200343883, - "learning_rate": 1.8693954832862017e-05, - "loss": 0.2088, - "step": 3728 - }, - { - "epoch": 0.19, - "grad_norm": 1.084859595161081, - "learning_rate": 1.869314091995065e-05, - "loss": 0.191, - "step": 3729 - }, - { - "epoch": 0.19, - "grad_norm": 1.840271807447316, - "learning_rate": 1.8692326771237344e-05, - "loss": 0.258, - "step": 3730 - }, - { - "epoch": 0.19, - "grad_norm": 0.9170431065446714, - "learning_rate": 1.8691512386744183e-05, - "loss": 0.225, - "step": 3731 - }, - { - "epoch": 0.19, - "grad_norm": 1.0010752864096515, - "learning_rate": 1.8690697766493252e-05, - "loss": 0.2519, - "step": 3732 - }, - { - "epoch": 0.19, - "grad_norm": 0.8686061614515938, - "learning_rate": 1.8689882910506658e-05, - "loss": 0.2076, - "step": 3733 - }, - { - "epoch": 0.19, - "grad_norm": 1.0749405681204596, - "learning_rate": 1.8689067818806503e-05, - "loss": 0.2127, - "step": 3734 - }, - { - "epoch": 0.19, - "grad_norm": 1.0258137289693825, - "learning_rate": 1.8688252491414886e-05, - "loss": 0.2108, - "step": 3735 - }, - { - "epoch": 0.19, - "grad_norm": 1.0969570804965412, - "learning_rate": 1.868743692835393e-05, - "loss": 0.1829, - "step": 3736 - }, - { - "epoch": 0.19, - "grad_norm": 1.259486363094936, - "learning_rate": 1.868662112964576e-05, - "loss": 0.2184, - "step": 3737 - }, - { - "epoch": 0.19, - "grad_norm": 1.0370119946877436, - "learning_rate": 1.8685805095312498e-05, - "loss": 0.2101, - "step": 3738 - }, - { - "epoch": 0.19, - "grad_norm": 1.0349854099522016, - "learning_rate": 1.8684988825376285e-05, - "loss": 0.21, - "step": 3739 - }, - { - "epoch": 0.19, - "grad_norm": 1.2262944485867913, - "learning_rate": 1.8684172319859258e-05, - "loss": 0.2563, - "step": 3740 - }, - { - "epoch": 0.19, - "grad_norm": 1.1655799061302887, - "learning_rate": 1.8683355578783567e-05, - "loss": 0.2179, - "step": 3741 - }, - { - "epoch": 0.19, - "grad_norm": 2.4736756001428297, - "learning_rate": 1.868253860217137e-05, - "loss": 0.2407, - "step": 3742 - }, - { - "epoch": 0.19, - "grad_norm": 0.9410716912521949, - "learning_rate": 1.8681721390044817e-05, - "loss": 0.2069, - "step": 3743 - }, - { - "epoch": 0.19, - "grad_norm": 1.4025378391460672, - "learning_rate": 1.868090394242608e-05, - "loss": 0.2414, - "step": 3744 - }, - { - "epoch": 0.19, - "grad_norm": 1.313299060245258, - "learning_rate": 1.8680086259337337e-05, - "loss": 0.2157, - "step": 3745 - }, - { - "epoch": 0.19, - "grad_norm": 0.920574490398171, - "learning_rate": 1.8679268340800764e-05, - "loss": 0.2423, - "step": 3746 - }, - { - "epoch": 0.19, - "grad_norm": 1.0078079186913158, - "learning_rate": 1.8678450186838545e-05, - "loss": 0.2178, - "step": 3747 - }, - { - "epoch": 0.19, - "grad_norm": 0.8418987504966208, - "learning_rate": 1.8677631797472874e-05, - "loss": 0.2322, - "step": 3748 - }, - { - "epoch": 0.19, - "grad_norm": 1.2665665598701452, - "learning_rate": 1.867681317272595e-05, - "loss": 0.2214, - "step": 3749 - }, - { - "epoch": 0.19, - "grad_norm": 0.9829511796364977, - "learning_rate": 1.867599431261998e-05, - "loss": 0.2184, - "step": 3750 - }, - { - "epoch": 0.19, - "grad_norm": 1.3460645041914767, - "learning_rate": 1.8675175217177176e-05, - "loss": 0.2331, - "step": 3751 - }, - { - "epoch": 0.19, - "grad_norm": 0.9805157466720837, - "learning_rate": 1.867435588641975e-05, - "loss": 0.2293, - "step": 3752 - }, - { - "epoch": 0.19, - "grad_norm": 1.234993024768991, - "learning_rate": 1.8673536320369936e-05, - "loss": 0.2311, - "step": 3753 - }, - { - "epoch": 0.19, - "grad_norm": 1.373040649077192, - "learning_rate": 1.8672716519049957e-05, - "loss": 0.2258, - "step": 3754 - }, - { - "epoch": 0.19, - "grad_norm": 1.0665080193056045, - "learning_rate": 1.867189648248205e-05, - "loss": 0.2163, - "step": 3755 - }, - { - "epoch": 0.19, - "grad_norm": 1.42465957114889, - "learning_rate": 1.8671076210688467e-05, - "loss": 0.2035, - "step": 3756 - }, - { - "epoch": 0.19, - "grad_norm": 1.2238898901999282, - "learning_rate": 1.867025570369145e-05, - "loss": 0.2401, - "step": 3757 - }, - { - "epoch": 0.19, - "grad_norm": 1.176117512236095, - "learning_rate": 1.8669434961513256e-05, - "loss": 0.2117, - "step": 3758 - }, - { - "epoch": 0.19, - "grad_norm": 1.1875416816543796, - "learning_rate": 1.866861398417615e-05, - "loss": 0.2207, - "step": 3759 - }, - { - "epoch": 0.19, - "grad_norm": 1.2449151104729497, - "learning_rate": 1.8667792771702397e-05, - "loss": 0.2156, - "step": 3760 - }, - { - "epoch": 0.19, - "grad_norm": 0.8601865869175008, - "learning_rate": 1.8666971324114277e-05, - "loss": 0.1938, - "step": 3761 - }, - { - "epoch": 0.19, - "grad_norm": 1.0584334643359323, - "learning_rate": 1.866614964143407e-05, - "loss": 0.2206, - "step": 3762 - }, - { - "epoch": 0.19, - "grad_norm": 0.9599952315995963, - "learning_rate": 1.8665327723684065e-05, - "loss": 0.2334, - "step": 3763 - }, - { - "epoch": 0.19, - "grad_norm": 3.068548285779253, - "learning_rate": 1.8664505570886557e-05, - "loss": 0.2138, - "step": 3764 - }, - { - "epoch": 0.19, - "grad_norm": 0.9137698886234791, - "learning_rate": 1.8663683183063846e-05, - "loss": 0.2418, - "step": 3765 - }, - { - "epoch": 0.19, - "grad_norm": 0.8822421507354169, - "learning_rate": 1.8662860560238238e-05, - "loss": 0.2357, - "step": 3766 - }, - { - "epoch": 0.19, - "grad_norm": 0.9839602464096109, - "learning_rate": 1.866203770243205e-05, - "loss": 0.219, - "step": 3767 - }, - { - "epoch": 0.19, - "grad_norm": 1.017921041143888, - "learning_rate": 1.86612146096676e-05, - "loss": 0.1929, - "step": 3768 - }, - { - "epoch": 0.19, - "grad_norm": 0.9666760157968136, - "learning_rate": 1.8660391281967213e-05, - "loss": 0.2327, - "step": 3769 - }, - { - "epoch": 0.19, - "grad_norm": 1.2427749818036062, - "learning_rate": 1.8659567719353223e-05, - "loss": 0.2206, - "step": 3770 - }, - { - "epoch": 0.19, - "grad_norm": 1.0818309717884633, - "learning_rate": 1.865874392184797e-05, - "loss": 0.2248, - "step": 3771 - }, - { - "epoch": 0.19, - "grad_norm": 1.5483282083497667, - "learning_rate": 1.86579198894738e-05, - "loss": 0.2569, - "step": 3772 - }, - { - "epoch": 0.19, - "grad_norm": 1.0491292439109805, - "learning_rate": 1.8657095622253064e-05, - "loss": 0.2199, - "step": 3773 - }, - { - "epoch": 0.19, - "grad_norm": 0.98191139928569, - "learning_rate": 1.8656271120208118e-05, - "loss": 0.2415, - "step": 3774 - }, - { - "epoch": 0.19, - "grad_norm": 0.8249343376451618, - "learning_rate": 1.8655446383361332e-05, - "loss": 0.2419, - "step": 3775 - }, - { - "epoch": 0.19, - "grad_norm": 1.0439989508003285, - "learning_rate": 1.8654621411735072e-05, - "loss": 0.2125, - "step": 3776 - }, - { - "epoch": 0.19, - "grad_norm": 0.9956541278544652, - "learning_rate": 1.8653796205351717e-05, - "loss": 0.2423, - "step": 3777 - }, - { - "epoch": 0.19, - "grad_norm": 0.7951566731952017, - "learning_rate": 1.865297076423365e-05, - "loss": 0.2042, - "step": 3778 - }, - { - "epoch": 0.19, - "grad_norm": 1.0345974375671212, - "learning_rate": 1.8652145088403267e-05, - "loss": 0.2209, - "step": 3779 - }, - { - "epoch": 0.19, - "grad_norm": 0.9969738180774795, - "learning_rate": 1.8651319177882957e-05, - "loss": 0.2287, - "step": 3780 - }, - { - "epoch": 0.19, - "grad_norm": 1.4389658031539585, - "learning_rate": 1.8650493032695124e-05, - "loss": 0.2236, - "step": 3781 - }, - { - "epoch": 0.19, - "grad_norm": 1.134999809634415, - "learning_rate": 1.8649666652862183e-05, - "loss": 0.2247, - "step": 3782 - }, - { - "epoch": 0.19, - "grad_norm": 0.9820850253804223, - "learning_rate": 1.864884003840654e-05, - "loss": 0.2137, - "step": 3783 - }, - { - "epoch": 0.19, - "grad_norm": 0.8792425222701588, - "learning_rate": 1.8648013189350628e-05, - "loss": 0.2228, - "step": 3784 - }, - { - "epoch": 0.19, - "grad_norm": 1.2159676018930894, - "learning_rate": 1.864718610571687e-05, - "loss": 0.2443, - "step": 3785 - }, - { - "epoch": 0.19, - "grad_norm": 0.9169076017661532, - "learning_rate": 1.8646358787527697e-05, - "loss": 0.197, - "step": 3786 - }, - { - "epoch": 0.19, - "grad_norm": 1.1325155925959844, - "learning_rate": 1.8645531234805554e-05, - "loss": 0.2191, - "step": 3787 - }, - { - "epoch": 0.19, - "grad_norm": 1.2026712867680391, - "learning_rate": 1.864470344757289e-05, - "loss": 0.2246, - "step": 3788 - }, - { - "epoch": 0.19, - "grad_norm": 1.1107125406261882, - "learning_rate": 1.8643875425852155e-05, - "loss": 0.2394, - "step": 3789 - }, - { - "epoch": 0.19, - "grad_norm": 1.4123550933251017, - "learning_rate": 1.864304716966581e-05, - "loss": 0.2606, - "step": 3790 - }, - { - "epoch": 0.19, - "grad_norm": 1.325865887213283, - "learning_rate": 1.8642218679036324e-05, - "loss": 0.2172, - "step": 3791 - }, - { - "epoch": 0.19, - "grad_norm": 1.0620807363636982, - "learning_rate": 1.8641389953986165e-05, - "loss": 0.2409, - "step": 3792 - }, - { - "epoch": 0.19, - "grad_norm": 1.071205185857897, - "learning_rate": 1.864056099453782e-05, - "loss": 0.2386, - "step": 3793 - }, - { - "epoch": 0.19, - "grad_norm": 0.8456964313282443, - "learning_rate": 1.8639731800713766e-05, - "loss": 0.1992, - "step": 3794 - }, - { - "epoch": 0.19, - "grad_norm": 1.1931661849786743, - "learning_rate": 1.8638902372536502e-05, - "loss": 0.2136, - "step": 3795 - }, - { - "epoch": 0.19, - "grad_norm": 1.120405301713219, - "learning_rate": 1.8638072710028523e-05, - "loss": 0.2087, - "step": 3796 - }, - { - "epoch": 0.19, - "grad_norm": 0.9879170452045153, - "learning_rate": 1.8637242813212334e-05, - "loss": 0.2056, - "step": 3797 - }, - { - "epoch": 0.19, - "grad_norm": 3.2824930991489314, - "learning_rate": 1.8636412682110445e-05, - "loss": 0.2151, - "step": 3798 - }, - { - "epoch": 0.19, - "grad_norm": 1.293469054776063, - "learning_rate": 1.8635582316745374e-05, - "loss": 0.2052, - "step": 3799 - }, - { - "epoch": 0.19, - "grad_norm": 1.025378532115027, - "learning_rate": 1.8634751717139644e-05, - "loss": 0.2118, - "step": 3800 - }, - { - "epoch": 0.19, - "grad_norm": 0.7830661665580738, - "learning_rate": 1.863392088331579e-05, - "loss": 0.2128, - "step": 3801 - }, - { - "epoch": 0.19, - "grad_norm": 1.3021193311965715, - "learning_rate": 1.863308981529634e-05, - "loss": 0.2381, - "step": 3802 - }, - { - "epoch": 0.19, - "grad_norm": 0.762346379086396, - "learning_rate": 1.863225851310384e-05, - "loss": 0.2063, - "step": 3803 - }, - { - "epoch": 0.19, - "grad_norm": 1.4515520631191818, - "learning_rate": 1.8631426976760844e-05, - "loss": 0.2143, - "step": 3804 - }, - { - "epoch": 0.19, - "grad_norm": 1.171511742865271, - "learning_rate": 1.8630595206289904e-05, - "loss": 0.227, - "step": 3805 - }, - { - "epoch": 0.19, - "grad_norm": 0.8935558090507881, - "learning_rate": 1.862976320171358e-05, - "loss": 0.2127, - "step": 3806 - }, - { - "epoch": 0.19, - "grad_norm": 0.9868740593439831, - "learning_rate": 1.8628930963054444e-05, - "loss": 0.2191, - "step": 3807 - }, - { - "epoch": 0.19, - "grad_norm": 0.9324619016671554, - "learning_rate": 1.8628098490335064e-05, - "loss": 0.2184, - "step": 3808 - }, - { - "epoch": 0.19, - "grad_norm": 1.002216078090249, - "learning_rate": 1.8627265783578028e-05, - "loss": 0.2105, - "step": 3809 - }, - { - "epoch": 0.19, - "grad_norm": 1.1699929258731216, - "learning_rate": 1.862643284280592e-05, - "loss": 0.2267, - "step": 3810 - }, - { - "epoch": 0.19, - "grad_norm": 0.837422894813569, - "learning_rate": 1.8625599668041334e-05, - "loss": 0.1931, - "step": 3811 - }, - { - "epoch": 0.19, - "grad_norm": 0.9597112271853646, - "learning_rate": 1.862476625930687e-05, - "loss": 0.222, - "step": 3812 - }, - { - "epoch": 0.19, - "grad_norm": 0.9219677680827443, - "learning_rate": 1.8623932616625133e-05, - "loss": 0.2037, - "step": 3813 - }, - { - "epoch": 0.19, - "grad_norm": 0.9555404866784017, - "learning_rate": 1.8623098740018736e-05, - "loss": 0.244, - "step": 3814 - }, - { - "epoch": 0.19, - "grad_norm": 0.8565637533356807, - "learning_rate": 1.86222646295103e-05, - "loss": 0.2166, - "step": 3815 - }, - { - "epoch": 0.19, - "grad_norm": 1.0481091626759864, - "learning_rate": 1.8621430285122447e-05, - "loss": 0.231, - "step": 3816 - }, - { - "epoch": 0.19, - "grad_norm": 0.8774766428514378, - "learning_rate": 1.8620595706877816e-05, - "loss": 0.214, - "step": 3817 - }, - { - "epoch": 0.19, - "grad_norm": 0.9865676986752081, - "learning_rate": 1.8619760894799034e-05, - "loss": 0.2236, - "step": 3818 - }, - { - "epoch": 0.19, - "grad_norm": 1.2433378952932157, - "learning_rate": 1.8618925848908757e-05, - "loss": 0.2178, - "step": 3819 - }, - { - "epoch": 0.19, - "grad_norm": 0.8438590361016094, - "learning_rate": 1.861809056922962e-05, - "loss": 0.2188, - "step": 3820 - }, - { - "epoch": 0.19, - "grad_norm": 0.9083392931246398, - "learning_rate": 1.8617255055784298e-05, - "loss": 0.2152, - "step": 3821 - }, - { - "epoch": 0.19, - "grad_norm": 0.9041176626279603, - "learning_rate": 1.8616419308595443e-05, - "loss": 0.2471, - "step": 3822 - }, - { - "epoch": 0.19, - "grad_norm": 0.8617167966436966, - "learning_rate": 1.8615583327685726e-05, - "loss": 0.2185, - "step": 3823 - }, - { - "epoch": 0.19, - "grad_norm": 0.831294907600138, - "learning_rate": 1.8614747113077826e-05, - "loss": 0.238, - "step": 3824 - }, - { - "epoch": 0.19, - "grad_norm": 1.1312425674849975, - "learning_rate": 1.8613910664794424e-05, - "loss": 0.2646, - "step": 3825 - }, - { - "epoch": 0.19, - "grad_norm": 0.9087120849416299, - "learning_rate": 1.861307398285821e-05, - "loss": 0.2043, - "step": 3826 - }, - { - "epoch": 0.19, - "grad_norm": 1.0711854562301435, - "learning_rate": 1.8612237067291878e-05, - "loss": 0.2137, - "step": 3827 - }, - { - "epoch": 0.19, - "grad_norm": 1.1537888299444026, - "learning_rate": 1.8611399918118124e-05, - "loss": 0.2275, - "step": 3828 - }, - { - "epoch": 0.19, - "grad_norm": 1.053077637022216, - "learning_rate": 1.8610562535359667e-05, - "loss": 0.2573, - "step": 3829 - }, - { - "epoch": 0.19, - "grad_norm": 0.8794836263083736, - "learning_rate": 1.8609724919039213e-05, - "loss": 0.2217, - "step": 3830 - }, - { - "epoch": 0.19, - "grad_norm": 1.1927003302097705, - "learning_rate": 1.8608887069179483e-05, - "loss": 0.2239, - "step": 3831 - }, - { - "epoch": 0.19, - "grad_norm": 0.9961929880291288, - "learning_rate": 1.8608048985803205e-05, - "loss": 0.2101, - "step": 3832 - }, - { - "epoch": 0.19, - "grad_norm": 0.7476771727973698, - "learning_rate": 1.8607210668933114e-05, - "loss": 0.2028, - "step": 3833 - }, - { - "epoch": 0.19, - "grad_norm": 1.1708729140370346, - "learning_rate": 1.8606372118591943e-05, - "loss": 0.2418, - "step": 3834 - }, - { - "epoch": 0.2, - "grad_norm": 1.95154019222484, - "learning_rate": 1.8605533334802448e-05, - "loss": 0.22, - "step": 3835 - }, - { - "epoch": 0.2, - "grad_norm": 0.9420981207262242, - "learning_rate": 1.8604694317587372e-05, - "loss": 0.1962, - "step": 3836 - }, - { - "epoch": 0.2, - "grad_norm": 0.7574527137216649, - "learning_rate": 1.8603855066969478e-05, - "loss": 0.2019, - "step": 3837 - }, - { - "epoch": 0.2, - "grad_norm": 1.1338212264538146, - "learning_rate": 1.860301558297153e-05, - "loss": 0.234, - "step": 3838 - }, - { - "epoch": 0.2, - "grad_norm": 0.9869099042527179, - "learning_rate": 1.8602175865616296e-05, - "loss": 0.233, - "step": 3839 - }, - { - "epoch": 0.2, - "grad_norm": 0.9117557983918922, - "learning_rate": 1.8601335914926558e-05, - "loss": 0.2072, - "step": 3840 - }, - { - "epoch": 0.2, - "grad_norm": 1.0213142783338018, - "learning_rate": 1.8600495730925095e-05, - "loss": 0.2166, - "step": 3841 - }, - { - "epoch": 0.2, - "grad_norm": 1.0960729307387658, - "learning_rate": 1.8599655313634702e-05, - "loss": 0.1988, - "step": 3842 - }, - { - "epoch": 0.2, - "grad_norm": 1.2800084319729785, - "learning_rate": 1.859881466307817e-05, - "loss": 0.2403, - "step": 3843 - }, - { - "epoch": 0.2, - "grad_norm": 1.643017787270073, - "learning_rate": 1.8597973779278307e-05, - "loss": 0.2231, - "step": 3844 - }, - { - "epoch": 0.2, - "grad_norm": 1.0880616948226376, - "learning_rate": 1.859713266225792e-05, - "loss": 0.2414, - "step": 3845 - }, - { - "epoch": 0.2, - "grad_norm": 0.9851831423429016, - "learning_rate": 1.859629131203982e-05, - "loss": 0.2098, - "step": 3846 - }, - { - "epoch": 0.2, - "grad_norm": 0.9979121868241856, - "learning_rate": 1.859544972864684e-05, - "loss": 0.223, - "step": 3847 - }, - { - "epoch": 0.2, - "grad_norm": 0.9845690582968679, - "learning_rate": 1.8594607912101797e-05, - "loss": 0.2172, - "step": 3848 - }, - { - "epoch": 0.2, - "grad_norm": 1.2156036882618944, - "learning_rate": 1.8593765862427526e-05, - "loss": 0.2268, - "step": 3849 - }, - { - "epoch": 0.2, - "grad_norm": 2.037311775310428, - "learning_rate": 1.8592923579646874e-05, - "loss": 0.2036, - "step": 3850 - }, - { - "epoch": 0.2, - "grad_norm": 2.254690899134614, - "learning_rate": 1.8592081063782685e-05, - "loss": 0.2185, - "step": 3851 - }, - { - "epoch": 0.2, - "grad_norm": 1.3888197734442886, - "learning_rate": 1.8591238314857806e-05, - "loss": 0.2096, - "step": 3852 - }, - { - "epoch": 0.2, - "grad_norm": 1.7310357928446478, - "learning_rate": 1.859039533289511e-05, - "loss": 0.2426, - "step": 3853 - }, - { - "epoch": 0.2, - "grad_norm": 0.8752949059665149, - "learning_rate": 1.858955211791745e-05, - "loss": 0.2248, - "step": 3854 - }, - { - "epoch": 0.2, - "grad_norm": 0.9799619031728044, - "learning_rate": 1.858870866994771e-05, - "loss": 0.2234, - "step": 3855 - }, - { - "epoch": 0.2, - "grad_norm": 0.8881141779724255, - "learning_rate": 1.8587864989008758e-05, - "loss": 0.2308, - "step": 3856 - }, - { - "epoch": 0.2, - "grad_norm": 0.929858899344705, - "learning_rate": 1.8587021075123482e-05, - "loss": 0.1902, - "step": 3857 - }, - { - "epoch": 0.2, - "grad_norm": 0.8424469468404533, - "learning_rate": 1.8586176928314774e-05, - "loss": 0.2072, - "step": 3858 - }, - { - "epoch": 0.2, - "grad_norm": 1.7706289278768073, - "learning_rate": 1.858533254860553e-05, - "loss": 0.223, - "step": 3859 - }, - { - "epoch": 0.2, - "grad_norm": 1.2901367072560834, - "learning_rate": 1.8584487936018663e-05, - "loss": 0.2194, - "step": 3860 - }, - { - "epoch": 0.2, - "grad_norm": 0.9724146249544461, - "learning_rate": 1.8583643090577072e-05, - "loss": 0.2297, - "step": 3861 - }, - { - "epoch": 0.2, - "grad_norm": 0.9272187008477141, - "learning_rate": 1.8582798012303674e-05, - "loss": 0.2279, - "step": 3862 - }, - { - "epoch": 0.2, - "grad_norm": 0.8680660636380514, - "learning_rate": 1.85819527012214e-05, - "loss": 0.2178, - "step": 3863 - }, - { - "epoch": 0.2, - "grad_norm": 1.0452921030928661, - "learning_rate": 1.8581107157353175e-05, - "loss": 0.2579, - "step": 3864 - }, - { - "epoch": 0.2, - "grad_norm": 0.8615649767726768, - "learning_rate": 1.8580261380721932e-05, - "loss": 0.2024, - "step": 3865 - }, - { - "epoch": 0.2, - "grad_norm": 1.2325980969486496, - "learning_rate": 1.8579415371350613e-05, - "loss": 0.2322, - "step": 3866 - }, - { - "epoch": 0.2, - "grad_norm": 0.8933378457355367, - "learning_rate": 1.8578569129262168e-05, - "loss": 0.2031, - "step": 3867 - }, - { - "epoch": 0.2, - "grad_norm": 0.9453357111940053, - "learning_rate": 1.857772265447955e-05, - "loss": 0.2287, - "step": 3868 - }, - { - "epoch": 0.2, - "grad_norm": 0.9394950082674527, - "learning_rate": 1.8576875947025725e-05, - "loss": 0.2162, - "step": 3869 - }, - { - "epoch": 0.2, - "grad_norm": 1.3055556466692884, - "learning_rate": 1.8576029006923653e-05, - "loss": 0.2314, - "step": 3870 - }, - { - "epoch": 0.2, - "grad_norm": 0.9320739710953067, - "learning_rate": 1.8575181834196308e-05, - "loss": 0.2099, - "step": 3871 - }, - { - "epoch": 0.2, - "grad_norm": 1.2266250587407823, - "learning_rate": 1.857433442886667e-05, - "loss": 0.2115, - "step": 3872 - }, - { - "epoch": 0.2, - "grad_norm": 1.034361450688478, - "learning_rate": 1.8573486790957732e-05, - "loss": 0.2377, - "step": 3873 - }, - { - "epoch": 0.2, - "grad_norm": 1.061886386036358, - "learning_rate": 1.8572638920492476e-05, - "loss": 0.2303, - "step": 3874 - }, - { - "epoch": 0.2, - "grad_norm": 0.9135745761327706, - "learning_rate": 1.857179081749391e-05, - "loss": 0.227, - "step": 3875 - }, - { - "epoch": 0.2, - "grad_norm": 2.692975173577015, - "learning_rate": 1.8570942481985027e-05, - "loss": 0.2338, - "step": 3876 - }, - { - "epoch": 0.2, - "grad_norm": 1.114073471726701, - "learning_rate": 1.857009391398885e-05, - "loss": 0.2414, - "step": 3877 - }, - { - "epoch": 0.2, - "grad_norm": 0.8604903674257696, - "learning_rate": 1.856924511352839e-05, - "loss": 0.2341, - "step": 3878 - }, - { - "epoch": 0.2, - "grad_norm": 1.0061238541965627, - "learning_rate": 1.8568396080626673e-05, - "loss": 0.2605, - "step": 3879 - }, - { - "epoch": 0.2, - "grad_norm": 0.8186410015687037, - "learning_rate": 1.8567546815306726e-05, - "loss": 0.2205, - "step": 3880 - }, - { - "epoch": 0.2, - "grad_norm": 0.9094401156407541, - "learning_rate": 1.856669731759159e-05, - "loss": 0.2217, - "step": 3881 - }, - { - "epoch": 0.2, - "grad_norm": 0.8157865383260736, - "learning_rate": 1.8565847587504305e-05, - "loss": 0.2128, - "step": 3882 - }, - { - "epoch": 0.2, - "grad_norm": 1.0473730645092414, - "learning_rate": 1.856499762506792e-05, - "loss": 0.213, - "step": 3883 - }, - { - "epoch": 0.2, - "grad_norm": 0.9802341644805417, - "learning_rate": 1.8564147430305493e-05, - "loss": 0.272, - "step": 3884 - }, - { - "epoch": 0.2, - "grad_norm": 0.9530522304023589, - "learning_rate": 1.8563297003240078e-05, - "loss": 0.225, - "step": 3885 - }, - { - "epoch": 0.2, - "grad_norm": 0.9902349476942686, - "learning_rate": 1.8562446343894753e-05, - "loss": 0.214, - "step": 3886 - }, - { - "epoch": 0.2, - "grad_norm": 0.9234130370394749, - "learning_rate": 1.8561595452292587e-05, - "loss": 0.2148, - "step": 3887 - }, - { - "epoch": 0.2, - "grad_norm": 0.948780172751885, - "learning_rate": 1.8560744328456657e-05, - "loss": 0.2093, - "step": 3888 - }, - { - "epoch": 0.2, - "grad_norm": 1.1596350769709731, - "learning_rate": 1.855989297241006e-05, - "loss": 0.2314, - "step": 3889 - }, - { - "epoch": 0.2, - "grad_norm": 0.8493065367210507, - "learning_rate": 1.8559041384175874e-05, - "loss": 0.2287, - "step": 3890 - }, - { - "epoch": 0.2, - "grad_norm": 1.0826434951585162, - "learning_rate": 1.8558189563777213e-05, - "loss": 0.2296, - "step": 3891 - }, - { - "epoch": 0.2, - "grad_norm": 1.233129074251226, - "learning_rate": 1.8557337511237178e-05, - "loss": 0.2124, - "step": 3892 - }, - { - "epoch": 0.2, - "grad_norm": 2.275827478065935, - "learning_rate": 1.8556485226578877e-05, - "loss": 0.2059, - "step": 3893 - }, - { - "epoch": 0.2, - "grad_norm": 1.0039858133301296, - "learning_rate": 1.8555632709825427e-05, - "loss": 0.2189, - "step": 3894 - }, - { - "epoch": 0.2, - "grad_norm": 1.298069959049279, - "learning_rate": 1.8554779960999963e-05, - "loss": 0.2339, - "step": 3895 - }, - { - "epoch": 0.2, - "grad_norm": 0.9895375285393244, - "learning_rate": 1.8553926980125608e-05, - "loss": 0.2092, - "step": 3896 - }, - { - "epoch": 0.2, - "grad_norm": 1.2551382881470876, - "learning_rate": 1.85530737672255e-05, - "loss": 0.2152, - "step": 3897 - }, - { - "epoch": 0.2, - "grad_norm": 1.0977374431389633, - "learning_rate": 1.8552220322322782e-05, - "loss": 0.235, - "step": 3898 - }, - { - "epoch": 0.2, - "grad_norm": 1.1044374060859814, - "learning_rate": 1.8551366645440604e-05, - "loss": 0.2008, - "step": 3899 - }, - { - "epoch": 0.2, - "grad_norm": 1.2014769038354687, - "learning_rate": 1.8550512736602125e-05, - "loss": 0.2217, - "step": 3900 - }, - { - "epoch": 0.2, - "grad_norm": 0.9532092971485372, - "learning_rate": 1.8549658595830505e-05, - "loss": 0.2228, - "step": 3901 - }, - { - "epoch": 0.2, - "grad_norm": 0.841470746178915, - "learning_rate": 1.8548804223148914e-05, - "loss": 0.2394, - "step": 3902 - }, - { - "epoch": 0.2, - "grad_norm": 1.1083325341107813, - "learning_rate": 1.854794961858052e-05, - "loss": 0.2163, - "step": 3903 - }, - { - "epoch": 0.2, - "grad_norm": 0.9741102853470924, - "learning_rate": 1.8547094782148513e-05, - "loss": 0.2372, - "step": 3904 - }, - { - "epoch": 0.2, - "grad_norm": 0.9752661376202427, - "learning_rate": 1.8546239713876077e-05, - "loss": 0.2322, - "step": 3905 - }, - { - "epoch": 0.2, - "grad_norm": 1.077642457000483, - "learning_rate": 1.8545384413786406e-05, - "loss": 0.2166, - "step": 3906 - }, - { - "epoch": 0.2, - "grad_norm": 1.112417457963183, - "learning_rate": 1.85445288819027e-05, - "loss": 0.2521, - "step": 3907 - }, - { - "epoch": 0.2, - "grad_norm": 1.133826703746313, - "learning_rate": 1.8543673118248167e-05, - "loss": 0.207, - "step": 3908 - }, - { - "epoch": 0.2, - "grad_norm": 0.9714603769415149, - "learning_rate": 1.854281712284602e-05, - "loss": 0.2076, - "step": 3909 - }, - { - "epoch": 0.2, - "grad_norm": 1.0225184907059015, - "learning_rate": 1.8541960895719473e-05, - "loss": 0.228, - "step": 3910 - }, - { - "epoch": 0.2, - "grad_norm": 1.420720984524292, - "learning_rate": 1.8541104436891756e-05, - "loss": 0.2016, - "step": 3911 - }, - { - "epoch": 0.2, - "grad_norm": 0.9235109665556697, - "learning_rate": 1.8540247746386095e-05, - "loss": 0.2355, - "step": 3912 - }, - { - "epoch": 0.2, - "grad_norm": 1.0854904386542399, - "learning_rate": 1.8539390824225735e-05, - "loss": 0.2196, - "step": 3913 - }, - { - "epoch": 0.2, - "grad_norm": 0.9777382832585857, - "learning_rate": 1.8538533670433912e-05, - "loss": 0.2034, - "step": 3914 - }, - { - "epoch": 0.2, - "grad_norm": 1.0064862201866491, - "learning_rate": 1.8537676285033886e-05, - "loss": 0.1908, - "step": 3915 - }, - { - "epoch": 0.2, - "grad_norm": 0.9964750671495186, - "learning_rate": 1.8536818668048906e-05, - "loss": 0.2261, - "step": 3916 - }, - { - "epoch": 0.2, - "grad_norm": 1.0298903324507094, - "learning_rate": 1.8535960819502243e-05, - "loss": 0.2136, - "step": 3917 - }, - { - "epoch": 0.2, - "grad_norm": 1.023825717413679, - "learning_rate": 1.853510273941715e-05, - "loss": 0.2548, - "step": 3918 - }, - { - "epoch": 0.2, - "grad_norm": 1.0799038437526578, - "learning_rate": 1.8534244427816924e-05, - "loss": 0.2313, - "step": 3919 - }, - { - "epoch": 0.2, - "grad_norm": 1.683700267828652, - "learning_rate": 1.853338588472483e-05, - "loss": 0.2098, - "step": 3920 - }, - { - "epoch": 0.2, - "grad_norm": 1.002263888788098, - "learning_rate": 1.8532527110164163e-05, - "loss": 0.2272, - "step": 3921 - }, - { - "epoch": 0.2, - "grad_norm": 0.8523969928905435, - "learning_rate": 1.8531668104158215e-05, - "loss": 0.1953, - "step": 3922 - }, - { - "epoch": 0.2, - "grad_norm": 0.9766389662463112, - "learning_rate": 1.853080886673029e-05, - "loss": 0.2032, - "step": 3923 - }, - { - "epoch": 0.2, - "grad_norm": 0.9793413109004094, - "learning_rate": 1.8529949397903692e-05, - "loss": 0.2229, - "step": 3924 - }, - { - "epoch": 0.2, - "grad_norm": 1.0853519331853911, - "learning_rate": 1.8529089697701735e-05, - "loss": 0.2243, - "step": 3925 - }, - { - "epoch": 0.2, - "grad_norm": 1.0091941317388682, - "learning_rate": 1.8528229766147737e-05, - "loss": 0.2156, - "step": 3926 - }, - { - "epoch": 0.2, - "grad_norm": 1.428407186069211, - "learning_rate": 1.8527369603265027e-05, - "loss": 0.2173, - "step": 3927 - }, - { - "epoch": 0.2, - "grad_norm": 0.9884423582417243, - "learning_rate": 1.852650920907693e-05, - "loss": 0.2074, - "step": 3928 - }, - { - "epoch": 0.2, - "grad_norm": 1.0081667501207707, - "learning_rate": 1.8525648583606797e-05, - "loss": 0.2362, - "step": 3929 - }, - { - "epoch": 0.2, - "grad_norm": 0.9400868797070588, - "learning_rate": 1.8524787726877956e-05, - "loss": 0.2298, - "step": 3930 - }, - { - "epoch": 0.2, - "grad_norm": 1.0042055292298442, - "learning_rate": 1.8523926638913772e-05, - "loss": 0.207, - "step": 3931 - }, - { - "epoch": 0.2, - "grad_norm": 1.0274160199055828, - "learning_rate": 1.8523065319737597e-05, - "loss": 0.2311, - "step": 3932 - }, - { - "epoch": 0.2, - "grad_norm": 0.8839768613869868, - "learning_rate": 1.8522203769372794e-05, - "loss": 0.2022, - "step": 3933 - }, - { - "epoch": 0.2, - "grad_norm": 0.9604317794149866, - "learning_rate": 1.8521341987842726e-05, - "loss": 0.2192, - "step": 3934 - }, - { - "epoch": 0.2, - "grad_norm": 1.1657719535264452, - "learning_rate": 1.8520479975170783e-05, - "loss": 0.228, - "step": 3935 - }, - { - "epoch": 0.2, - "grad_norm": 1.0916660032667729, - "learning_rate": 1.8519617731380334e-05, - "loss": 0.2314, - "step": 3936 - }, - { - "epoch": 0.2, - "grad_norm": 0.9529536523373129, - "learning_rate": 1.8518755256494776e-05, - "loss": 0.2706, - "step": 3937 - }, - { - "epoch": 0.2, - "grad_norm": 0.9799853626728516, - "learning_rate": 1.85178925505375e-05, - "loss": 0.2381, - "step": 3938 - }, - { - "epoch": 0.2, - "grad_norm": 1.987670708955151, - "learning_rate": 1.851702961353191e-05, - "loss": 0.222, - "step": 3939 - }, - { - "epoch": 0.2, - "grad_norm": 1.0361197104887185, - "learning_rate": 1.8516166445501405e-05, - "loss": 0.2459, - "step": 3940 - }, - { - "epoch": 0.2, - "grad_norm": 1.1496121131841177, - "learning_rate": 1.8515303046469407e-05, - "loss": 0.2305, - "step": 3941 - }, - { - "epoch": 0.2, - "grad_norm": 1.175067670408769, - "learning_rate": 1.851443941645933e-05, - "loss": 0.2046, - "step": 3942 - }, - { - "epoch": 0.2, - "grad_norm": 1.0394576988286925, - "learning_rate": 1.851357555549461e-05, - "loss": 0.2249, - "step": 3943 - }, - { - "epoch": 0.2, - "grad_norm": 1.1378105288243487, - "learning_rate": 1.8512711463598666e-05, - "loss": 0.2161, - "step": 3944 - }, - { - "epoch": 0.2, - "grad_norm": 0.9691228729639899, - "learning_rate": 1.8511847140794944e-05, - "loss": 0.2451, - "step": 3945 - }, - { - "epoch": 0.2, - "grad_norm": 0.9107941557336705, - "learning_rate": 1.8510982587106888e-05, - "loss": 0.2163, - "step": 3946 - }, - { - "epoch": 0.2, - "grad_norm": 0.8438094197302133, - "learning_rate": 1.8510117802557948e-05, - "loss": 0.2172, - "step": 3947 - }, - { - "epoch": 0.2, - "grad_norm": 1.122561032622386, - "learning_rate": 1.8509252787171585e-05, - "loss": 0.2091, - "step": 3948 - }, - { - "epoch": 0.2, - "grad_norm": 1.1670322241173445, - "learning_rate": 1.8508387540971258e-05, - "loss": 0.2265, - "step": 3949 - }, - { - "epoch": 0.2, - "grad_norm": 0.8737935359329443, - "learning_rate": 1.8507522063980436e-05, - "loss": 0.2014, - "step": 3950 - }, - { - "epoch": 0.2, - "grad_norm": 0.957459298710243, - "learning_rate": 1.85066563562226e-05, - "loss": 0.2232, - "step": 3951 - }, - { - "epoch": 0.2, - "grad_norm": 0.9140556809479499, - "learning_rate": 1.850579041772123e-05, - "loss": 0.2213, - "step": 3952 - }, - { - "epoch": 0.2, - "grad_norm": 0.9341055775625612, - "learning_rate": 1.850492424849981e-05, - "loss": 0.2475, - "step": 3953 - }, - { - "epoch": 0.2, - "grad_norm": 1.8285233367924434, - "learning_rate": 1.8504057848581846e-05, - "loss": 0.2267, - "step": 3954 - }, - { - "epoch": 0.2, - "grad_norm": 1.2274016809994188, - "learning_rate": 1.850319121799083e-05, - "loss": 0.2167, - "step": 3955 - }, - { - "epoch": 0.2, - "grad_norm": 0.9569895683683748, - "learning_rate": 1.850232435675027e-05, - "loss": 0.2139, - "step": 3956 - }, - { - "epoch": 0.2, - "grad_norm": 0.9707813355784036, - "learning_rate": 1.8501457264883684e-05, - "loss": 0.2247, - "step": 3957 - }, - { - "epoch": 0.2, - "grad_norm": 1.6606571832523476, - "learning_rate": 1.850058994241459e-05, - "loss": 0.207, - "step": 3958 - }, - { - "epoch": 0.2, - "grad_norm": 0.9890119440101421, - "learning_rate": 1.8499722389366513e-05, - "loss": 0.2126, - "step": 3959 - }, - { - "epoch": 0.2, - "grad_norm": 0.8781210948864995, - "learning_rate": 1.8498854605762982e-05, - "loss": 0.2236, - "step": 3960 - }, - { - "epoch": 0.2, - "grad_norm": 1.2677885365794521, - "learning_rate": 1.8497986591627546e-05, - "loss": 0.2384, - "step": 3961 - }, - { - "epoch": 0.2, - "grad_norm": 0.8925478669144093, - "learning_rate": 1.849711834698374e-05, - "loss": 0.192, - "step": 3962 - }, - { - "epoch": 0.2, - "grad_norm": 1.096343147470085, - "learning_rate": 1.849624987185512e-05, - "loss": 0.1939, - "step": 3963 - }, - { - "epoch": 0.2, - "grad_norm": 0.9426657053588514, - "learning_rate": 1.8495381166265243e-05, - "loss": 0.2222, - "step": 3964 - }, - { - "epoch": 0.2, - "grad_norm": 1.0983582096020919, - "learning_rate": 1.849451223023767e-05, - "loss": 0.2271, - "step": 3965 - }, - { - "epoch": 0.2, - "grad_norm": 1.1097098319224445, - "learning_rate": 1.8493643063795973e-05, - "loss": 0.21, - "step": 3966 - }, - { - "epoch": 0.2, - "grad_norm": 0.9815198326231998, - "learning_rate": 1.8492773666963734e-05, - "loss": 0.197, - "step": 3967 - }, - { - "epoch": 0.2, - "grad_norm": 1.1798340581543854, - "learning_rate": 1.8491904039764523e-05, - "loss": 0.2297, - "step": 3968 - }, - { - "epoch": 0.2, - "grad_norm": 1.0813074125313995, - "learning_rate": 1.849103418222194e-05, - "loss": 0.24, - "step": 3969 - }, - { - "epoch": 0.2, - "grad_norm": 1.0502962640861704, - "learning_rate": 1.849016409435957e-05, - "loss": 0.2127, - "step": 3970 - }, - { - "epoch": 0.2, - "grad_norm": 1.005448488445372, - "learning_rate": 1.848929377620102e-05, - "loss": 0.2212, - "step": 3971 - }, - { - "epoch": 0.2, - "grad_norm": 1.3691830425652256, - "learning_rate": 1.84884232277699e-05, - "loss": 0.2317, - "step": 3972 - }, - { - "epoch": 0.2, - "grad_norm": 1.4197298065121104, - "learning_rate": 1.848755244908982e-05, - "loss": 0.2427, - "step": 3973 - }, - { - "epoch": 0.2, - "grad_norm": 1.2433237283809655, - "learning_rate": 1.84866814401844e-05, - "loss": 0.2357, - "step": 3974 - }, - { - "epoch": 0.2, - "grad_norm": 1.5182774149915292, - "learning_rate": 1.8485810201077266e-05, - "loss": 0.2133, - "step": 3975 - }, - { - "epoch": 0.2, - "grad_norm": 1.0417444382667709, - "learning_rate": 1.8484938731792052e-05, - "loss": 0.247, - "step": 3976 - }, - { - "epoch": 0.2, - "grad_norm": 1.1087194985346591, - "learning_rate": 1.8484067032352394e-05, - "loss": 0.2289, - "step": 3977 - }, - { - "epoch": 0.2, - "grad_norm": 3.227029058062171, - "learning_rate": 1.848319510278194e-05, - "loss": 0.2142, - "step": 3978 - }, - { - "epoch": 0.2, - "grad_norm": 1.0410937941188434, - "learning_rate": 1.848232294310434e-05, - "loss": 0.207, - "step": 3979 - }, - { - "epoch": 0.2, - "grad_norm": 1.013795250561338, - "learning_rate": 1.848145055334325e-05, - "loss": 0.2171, - "step": 3980 - }, - { - "epoch": 0.2, - "grad_norm": 0.9561721192043167, - "learning_rate": 1.8480577933522337e-05, - "loss": 0.2293, - "step": 3981 - }, - { - "epoch": 0.2, - "grad_norm": 0.9544153277598909, - "learning_rate": 1.8479705083665265e-05, - "loss": 0.1992, - "step": 3982 - }, - { - "epoch": 0.2, - "grad_norm": 2.4377696993777693, - "learning_rate": 1.8478832003795718e-05, - "loss": 0.2259, - "step": 3983 - }, - { - "epoch": 0.2, - "grad_norm": 1.0252596856842724, - "learning_rate": 1.847795869393737e-05, - "loss": 0.2228, - "step": 3984 - }, - { - "epoch": 0.2, - "grad_norm": 1.1646243348780456, - "learning_rate": 1.8477085154113913e-05, - "loss": 0.2211, - "step": 3985 - }, - { - "epoch": 0.2, - "grad_norm": 1.435138324835618, - "learning_rate": 1.8476211384349048e-05, - "loss": 0.2141, - "step": 3986 - }, - { - "epoch": 0.2, - "grad_norm": 1.1034704971175715, - "learning_rate": 1.8475337384666464e-05, - "loss": 0.2461, - "step": 3987 - }, - { - "epoch": 0.2, - "grad_norm": 1.040306523846338, - "learning_rate": 1.8474463155089882e-05, - "loss": 0.2177, - "step": 3988 - }, - { - "epoch": 0.2, - "grad_norm": 0.9668078218640307, - "learning_rate": 1.8473588695643002e-05, - "loss": 0.2249, - "step": 3989 - }, - { - "epoch": 0.2, - "grad_norm": 0.8420323004225635, - "learning_rate": 1.8472714006349554e-05, - "loss": 0.2322, - "step": 3990 - }, - { - "epoch": 0.2, - "grad_norm": 1.0936358241608921, - "learning_rate": 1.8471839087233255e-05, - "loss": 0.241, - "step": 3991 - }, - { - "epoch": 0.2, - "grad_norm": 1.139357797659001, - "learning_rate": 1.8470963938317846e-05, - "loss": 0.2545, - "step": 3992 - }, - { - "epoch": 0.2, - "grad_norm": 0.9151370967374843, - "learning_rate": 1.847008855962706e-05, - "loss": 0.2387, - "step": 3993 - }, - { - "epoch": 0.2, - "grad_norm": 0.9739624100709218, - "learning_rate": 1.8469212951184646e-05, - "loss": 0.2377, - "step": 3994 - }, - { - "epoch": 0.2, - "grad_norm": 0.9524532724244088, - "learning_rate": 1.846833711301435e-05, - "loss": 0.2261, - "step": 3995 - }, - { - "epoch": 0.2, - "grad_norm": 0.8491076395346986, - "learning_rate": 1.8467461045139933e-05, - "loss": 0.2342, - "step": 3996 - }, - { - "epoch": 0.2, - "grad_norm": 1.129923799376718, - "learning_rate": 1.8466584747585153e-05, - "loss": 0.2223, - "step": 3997 - }, - { - "epoch": 0.2, - "grad_norm": 1.7812650811136905, - "learning_rate": 1.846570822037379e-05, - "loss": 0.2453, - "step": 3998 - }, - { - "epoch": 0.2, - "grad_norm": 0.9166033090870708, - "learning_rate": 1.846483146352961e-05, - "loss": 0.2068, - "step": 3999 - }, - { - "epoch": 0.2, - "grad_norm": 1.3533118562066813, - "learning_rate": 1.84639544770764e-05, - "loss": 0.2234, - "step": 4000 - }, - { - "epoch": 0.2, - "grad_norm": 1.0150576918701262, - "learning_rate": 1.8463077261037946e-05, - "loss": 0.2315, - "step": 4001 - }, - { - "epoch": 0.2, - "grad_norm": 0.7586040200761496, - "learning_rate": 1.846219981543804e-05, - "loss": 0.1942, - "step": 4002 - }, - { - "epoch": 0.2, - "grad_norm": 1.086690733510189, - "learning_rate": 1.8461322140300487e-05, - "loss": 0.2162, - "step": 4003 - }, - { - "epoch": 0.2, - "grad_norm": 1.0870007848264334, - "learning_rate": 1.8460444235649097e-05, - "loss": 0.2274, - "step": 4004 - }, - { - "epoch": 0.2, - "grad_norm": 1.0814676375496997, - "learning_rate": 1.8459566101507675e-05, - "loss": 0.2228, - "step": 4005 - }, - { - "epoch": 0.2, - "grad_norm": 1.2847826029249587, - "learning_rate": 1.845868773790005e-05, - "loss": 0.2466, - "step": 4006 - }, - { - "epoch": 0.2, - "grad_norm": 1.0485152699205598, - "learning_rate": 1.845780914485004e-05, - "loss": 0.2193, - "step": 4007 - }, - { - "epoch": 0.2, - "grad_norm": 1.3231739436000518, - "learning_rate": 1.8456930322381476e-05, - "loss": 0.1984, - "step": 4008 - }, - { - "epoch": 0.2, - "grad_norm": 1.1393536976170808, - "learning_rate": 1.8456051270518204e-05, - "loss": 0.1964, - "step": 4009 - }, - { - "epoch": 0.2, - "grad_norm": 0.9858193805660505, - "learning_rate": 1.845517198928406e-05, - "loss": 0.2093, - "step": 4010 - }, - { - "epoch": 0.2, - "grad_norm": 2.102746359893761, - "learning_rate": 1.8454292478702898e-05, - "loss": 0.2292, - "step": 4011 - }, - { - "epoch": 0.2, - "grad_norm": 1.2060706071043537, - "learning_rate": 1.8453412738798577e-05, - "loss": 0.2282, - "step": 4012 - }, - { - "epoch": 0.2, - "grad_norm": 0.9578800418566528, - "learning_rate": 1.8452532769594956e-05, - "loss": 0.1901, - "step": 4013 - }, - { - "epoch": 0.2, - "grad_norm": 1.0015965893238714, - "learning_rate": 1.845165257111591e-05, - "loss": 0.2223, - "step": 4014 - }, - { - "epoch": 0.2, - "grad_norm": 2.23810803358979, - "learning_rate": 1.845077214338531e-05, - "loss": 0.1973, - "step": 4015 - }, - { - "epoch": 0.2, - "grad_norm": 0.8611856599209402, - "learning_rate": 1.8449891486427037e-05, - "loss": 0.2108, - "step": 4016 - }, - { - "epoch": 0.2, - "grad_norm": 1.0767077470178619, - "learning_rate": 1.844901060026498e-05, - "loss": 0.2134, - "step": 4017 - }, - { - "epoch": 0.2, - "grad_norm": 0.9266494162460877, - "learning_rate": 1.844812948492303e-05, - "loss": 0.1851, - "step": 4018 - }, - { - "epoch": 0.2, - "grad_norm": 1.0603717922287728, - "learning_rate": 1.8447248140425093e-05, - "loss": 0.2049, - "step": 4019 - }, - { - "epoch": 0.2, - "grad_norm": 1.3324952507743006, - "learning_rate": 1.8446366566795075e-05, - "loss": 0.2196, - "step": 4020 - }, - { - "epoch": 0.2, - "grad_norm": 0.9255293165654925, - "learning_rate": 1.8445484764056886e-05, - "loss": 0.2316, - "step": 4021 - }, - { - "epoch": 0.2, - "grad_norm": 1.216017989027153, - "learning_rate": 1.8444602732234448e-05, - "loss": 0.2204, - "step": 4022 - }, - { - "epoch": 0.2, - "grad_norm": 1.389842141852318, - "learning_rate": 1.844372047135168e-05, - "loss": 0.2344, - "step": 4023 - }, - { - "epoch": 0.2, - "grad_norm": 1.4110691252305398, - "learning_rate": 1.844283798143252e-05, - "loss": 0.1954, - "step": 4024 - }, - { - "epoch": 0.2, - "grad_norm": 1.005032913346493, - "learning_rate": 1.84419552625009e-05, - "loss": 0.2107, - "step": 4025 - }, - { - "epoch": 0.2, - "grad_norm": 0.9342081803585582, - "learning_rate": 1.8441072314580768e-05, - "loss": 0.2152, - "step": 4026 - }, - { - "epoch": 0.2, - "grad_norm": 1.53410098202529, - "learning_rate": 1.8440189137696075e-05, - "loss": 0.2326, - "step": 4027 - }, - { - "epoch": 0.2, - "grad_norm": 1.281389367713628, - "learning_rate": 1.843930573187077e-05, - "loss": 0.2088, - "step": 4028 - }, - { - "epoch": 0.2, - "grad_norm": 0.9824915018152038, - "learning_rate": 1.8438422097128824e-05, - "loss": 0.221, - "step": 4029 - }, - { - "epoch": 0.2, - "grad_norm": 0.9399054436239845, - "learning_rate": 1.8437538233494205e-05, - "loss": 0.2168, - "step": 4030 - }, - { - "epoch": 0.2, - "grad_norm": 1.0611693014693109, - "learning_rate": 1.8436654140990877e-05, - "loss": 0.2201, - "step": 4031 - }, - { - "epoch": 0.21, - "grad_norm": 2.6926024066852987, - "learning_rate": 1.8435769819642835e-05, - "loss": 0.2168, - "step": 4032 - }, - { - "epoch": 0.21, - "grad_norm": 1.425124544776791, - "learning_rate": 1.843488526947406e-05, - "loss": 0.2332, - "step": 4033 - }, - { - "epoch": 0.21, - "grad_norm": 1.2986176817240906, - "learning_rate": 1.843400049050854e-05, - "loss": 0.2054, - "step": 4034 - }, - { - "epoch": 0.21, - "grad_norm": 1.4592279022760886, - "learning_rate": 1.8433115482770286e-05, - "loss": 0.1938, - "step": 4035 - }, - { - "epoch": 0.21, - "grad_norm": 1.1344940384527544, - "learning_rate": 1.8432230246283298e-05, - "loss": 0.2134, - "step": 4036 - }, - { - "epoch": 0.21, - "grad_norm": 1.4920957263868553, - "learning_rate": 1.8431344781071586e-05, - "loss": 0.2299, - "step": 4037 - }, - { - "epoch": 0.21, - "grad_norm": 1.6332568235688114, - "learning_rate": 1.8430459087159172e-05, - "loss": 0.2037, - "step": 4038 - }, - { - "epoch": 0.21, - "grad_norm": 1.0305365277043756, - "learning_rate": 1.842957316457008e-05, - "loss": 0.2396, - "step": 4039 - }, - { - "epoch": 0.21, - "grad_norm": 1.2429204314009332, - "learning_rate": 1.8428687013328338e-05, - "loss": 0.2146, - "step": 4040 - }, - { - "epoch": 0.21, - "grad_norm": 1.46064835422682, - "learning_rate": 1.8427800633457984e-05, - "loss": 0.213, - "step": 4041 - }, - { - "epoch": 0.21, - "grad_norm": 1.315399648856405, - "learning_rate": 1.8426914024983064e-05, - "loss": 0.2228, - "step": 4042 - }, - { - "epoch": 0.21, - "grad_norm": 1.148265946387433, - "learning_rate": 1.8426027187927622e-05, - "loss": 0.2231, - "step": 4043 - }, - { - "epoch": 0.21, - "grad_norm": 1.2784427788505968, - "learning_rate": 1.842514012231572e-05, - "loss": 0.214, - "step": 4044 - }, - { - "epoch": 0.21, - "grad_norm": 1.3232033473579259, - "learning_rate": 1.8424252828171415e-05, - "loss": 0.2424, - "step": 4045 - }, - { - "epoch": 0.21, - "grad_norm": 0.8993518609330069, - "learning_rate": 1.842336530551878e-05, - "loss": 0.2106, - "step": 4046 - }, - { - "epoch": 0.21, - "grad_norm": 0.9717114990888179, - "learning_rate": 1.8422477554381877e-05, - "loss": 0.2137, - "step": 4047 - }, - { - "epoch": 0.21, - "grad_norm": 1.1005113781951843, - "learning_rate": 1.84215895747848e-05, - "loss": 0.2485, - "step": 4048 - }, - { - "epoch": 0.21, - "grad_norm": 1.78659538266863, - "learning_rate": 1.842070136675163e-05, - "loss": 0.2314, - "step": 4049 - }, - { - "epoch": 0.21, - "grad_norm": 1.0992894999940652, - "learning_rate": 1.8419812930306456e-05, - "loss": 0.2355, - "step": 4050 - }, - { - "epoch": 0.21, - "grad_norm": 0.9472424028153374, - "learning_rate": 1.8418924265473387e-05, - "loss": 0.2097, - "step": 4051 - }, - { - "epoch": 0.21, - "grad_norm": 1.2428118737564922, - "learning_rate": 1.8418035372276518e-05, - "loss": 0.2048, - "step": 4052 - }, - { - "epoch": 0.21, - "grad_norm": 1.115620427326506, - "learning_rate": 1.8417146250739963e-05, - "loss": 0.245, - "step": 4053 - }, - { - "epoch": 0.21, - "grad_norm": 1.1725585511883763, - "learning_rate": 1.8416256900887846e-05, - "loss": 0.2305, - "step": 4054 - }, - { - "epoch": 0.21, - "grad_norm": 1.164010195877797, - "learning_rate": 1.841536732274428e-05, - "loss": 0.2317, - "step": 4055 - }, - { - "epoch": 0.21, - "grad_norm": 0.9901967708490926, - "learning_rate": 1.8414477516333404e-05, - "loss": 0.2073, - "step": 4056 - }, - { - "epoch": 0.21, - "grad_norm": 1.203022719344018, - "learning_rate": 1.8413587481679343e-05, - "loss": 0.221, - "step": 4057 - }, - { - "epoch": 0.21, - "grad_norm": 1.1685669183493879, - "learning_rate": 1.841269721880625e-05, - "loss": 0.2065, - "step": 4058 - }, - { - "epoch": 0.21, - "grad_norm": 1.1292187794920885, - "learning_rate": 1.8411806727738276e-05, - "loss": 0.2128, - "step": 4059 - }, - { - "epoch": 0.21, - "grad_norm": 1.0460899988252699, - "learning_rate": 1.8410916008499563e-05, - "loss": 0.2065, - "step": 4060 - }, - { - "epoch": 0.21, - "grad_norm": 0.9625875477372454, - "learning_rate": 1.8410025061114278e-05, - "loss": 0.1914, - "step": 4061 - }, - { - "epoch": 0.21, - "grad_norm": 1.013377246352948, - "learning_rate": 1.840913388560659e-05, - "loss": 0.2486, - "step": 4062 - }, - { - "epoch": 0.21, - "grad_norm": 1.054808112700336, - "learning_rate": 1.840824248200067e-05, - "loss": 0.2036, - "step": 4063 - }, - { - "epoch": 0.21, - "grad_norm": 0.915720731760347, - "learning_rate": 1.8407350850320698e-05, - "loss": 0.215, - "step": 4064 - }, - { - "epoch": 0.21, - "grad_norm": 0.9131373890768805, - "learning_rate": 1.840645899059086e-05, - "loss": 0.2176, - "step": 4065 - }, - { - "epoch": 0.21, - "grad_norm": 1.4499765627745933, - "learning_rate": 1.8405566902835345e-05, - "loss": 0.2072, - "step": 4066 - }, - { - "epoch": 0.21, - "grad_norm": 1.0072741469965514, - "learning_rate": 1.8404674587078355e-05, - "loss": 0.216, - "step": 4067 - }, - { - "epoch": 0.21, - "grad_norm": 1.116054906081107, - "learning_rate": 1.840378204334409e-05, - "loss": 0.2289, - "step": 4068 - }, - { - "epoch": 0.21, - "grad_norm": 6.393753900145944, - "learning_rate": 1.8402889271656764e-05, - "loss": 0.2156, - "step": 4069 - }, - { - "epoch": 0.21, - "grad_norm": 1.7990680573099183, - "learning_rate": 1.8401996272040593e-05, - "loss": 0.2357, - "step": 4070 - }, - { - "epoch": 0.21, - "grad_norm": 0.9247437119879226, - "learning_rate": 1.8401103044519798e-05, - "loss": 0.2104, - "step": 4071 - }, - { - "epoch": 0.21, - "grad_norm": 1.0478663800704129, - "learning_rate": 1.8400209589118606e-05, - "loss": 0.2605, - "step": 4072 - }, - { - "epoch": 0.21, - "grad_norm": 1.606507652653388, - "learning_rate": 1.839931590586126e-05, - "loss": 0.2146, - "step": 4073 - }, - { - "epoch": 0.21, - "grad_norm": 0.8734486664573401, - "learning_rate": 1.8398421994771987e-05, - "loss": 0.2386, - "step": 4074 - }, - { - "epoch": 0.21, - "grad_norm": 0.8981227604859134, - "learning_rate": 1.839752785587505e-05, - "loss": 0.248, - "step": 4075 - }, - { - "epoch": 0.21, - "grad_norm": 0.8195126193179083, - "learning_rate": 1.8396633489194693e-05, - "loss": 0.2247, - "step": 4076 - }, - { - "epoch": 0.21, - "grad_norm": 0.9562878691843798, - "learning_rate": 1.839573889475518e-05, - "loss": 0.2231, - "step": 4077 - }, - { - "epoch": 0.21, - "grad_norm": 1.0949687933363768, - "learning_rate": 1.8394844072580772e-05, - "loss": 0.2342, - "step": 4078 - }, - { - "epoch": 0.21, - "grad_norm": 0.8892593014012868, - "learning_rate": 1.8393949022695747e-05, - "loss": 0.1962, - "step": 4079 - }, - { - "epoch": 0.21, - "grad_norm": 1.1401884052275038, - "learning_rate": 1.839305374512438e-05, - "loss": 0.2407, - "step": 4080 - }, - { - "epoch": 0.21, - "grad_norm": 0.9840977904794602, - "learning_rate": 1.8392158239890957e-05, - "loss": 0.2285, - "step": 4081 - }, - { - "epoch": 0.21, - "grad_norm": 1.0557285677899844, - "learning_rate": 1.839126250701977e-05, - "loss": 0.1971, - "step": 4082 - }, - { - "epoch": 0.21, - "grad_norm": 1.0082003899112544, - "learning_rate": 1.8390366546535107e-05, - "loss": 0.2373, - "step": 4083 - }, - { - "epoch": 0.21, - "grad_norm": 1.482950351936631, - "learning_rate": 1.8389470358461282e-05, - "loss": 0.217, - "step": 4084 - }, - { - "epoch": 0.21, - "grad_norm": 1.6604273182636915, - "learning_rate": 1.8388573942822597e-05, - "loss": 0.1997, - "step": 4085 - }, - { - "epoch": 0.21, - "grad_norm": 1.061341683760589, - "learning_rate": 1.8387677299643374e-05, - "loss": 0.2058, - "step": 4086 - }, - { - "epoch": 0.21, - "grad_norm": 0.8804008446399976, - "learning_rate": 1.838678042894793e-05, - "loss": 0.2171, - "step": 4087 - }, - { - "epoch": 0.21, - "grad_norm": 1.159037826236392, - "learning_rate": 1.838588333076059e-05, - "loss": 0.2258, - "step": 4088 - }, - { - "epoch": 0.21, - "grad_norm": 1.3007209396237813, - "learning_rate": 1.8384986005105694e-05, - "loss": 0.197, - "step": 4089 - }, - { - "epoch": 0.21, - "grad_norm": 1.1739064368395806, - "learning_rate": 1.838408845200758e-05, - "loss": 0.2115, - "step": 4090 - }, - { - "epoch": 0.21, - "grad_norm": 1.3309068976708802, - "learning_rate": 1.8383190671490586e-05, - "loss": 0.2085, - "step": 4091 - }, - { - "epoch": 0.21, - "grad_norm": 1.0072035766635603, - "learning_rate": 1.838229266357908e-05, - "loss": 0.2207, - "step": 4092 - }, - { - "epoch": 0.21, - "grad_norm": 1.0280329947360802, - "learning_rate": 1.8381394428297406e-05, - "loss": 0.2161, - "step": 4093 - }, - { - "epoch": 0.21, - "grad_norm": 0.9584474084596254, - "learning_rate": 1.8380495965669938e-05, - "loss": 0.2323, - "step": 4094 - }, - { - "epoch": 0.21, - "grad_norm": 1.192651766726789, - "learning_rate": 1.8379597275721043e-05, - "loss": 0.2065, - "step": 4095 - }, - { - "epoch": 0.21, - "grad_norm": 1.279939931690827, - "learning_rate": 1.83786983584751e-05, - "loss": 0.2361, - "step": 4096 - }, - { - "epoch": 0.21, - "grad_norm": 1.6723687874802045, - "learning_rate": 1.8377799213956487e-05, - "loss": 0.2095, - "step": 4097 - }, - { - "epoch": 0.21, - "grad_norm": 1.0774746273066567, - "learning_rate": 1.83768998421896e-05, - "loss": 0.2343, - "step": 4098 - }, - { - "epoch": 0.21, - "grad_norm": 2.8065607882690657, - "learning_rate": 1.837600024319883e-05, - "loss": 0.2407, - "step": 4099 - }, - { - "epoch": 0.21, - "grad_norm": 0.9560382672546652, - "learning_rate": 1.8375100417008584e-05, - "loss": 0.2235, - "step": 4100 - }, - { - "epoch": 0.21, - "grad_norm": 0.9020948472107335, - "learning_rate": 1.8374200363643263e-05, - "loss": 0.2353, - "step": 4101 - }, - { - "epoch": 0.21, - "grad_norm": 0.9690265583262132, - "learning_rate": 1.8373300083127284e-05, - "loss": 0.2275, - "step": 4102 - }, - { - "epoch": 0.21, - "grad_norm": 2.3507623603560543, - "learning_rate": 1.8372399575485068e-05, - "loss": 0.2636, - "step": 4103 - }, - { - "epoch": 0.21, - "grad_norm": 1.4806389422531303, - "learning_rate": 1.837149884074104e-05, - "loss": 0.1946, - "step": 4104 - }, - { - "epoch": 0.21, - "grad_norm": 1.002612736176136, - "learning_rate": 1.8370597878919633e-05, - "loss": 0.2273, - "step": 4105 - }, - { - "epoch": 0.21, - "grad_norm": 1.0672473118919987, - "learning_rate": 1.8369696690045288e-05, - "loss": 0.2122, - "step": 4106 - }, - { - "epoch": 0.21, - "grad_norm": 1.029358419541676, - "learning_rate": 1.8368795274142446e-05, - "loss": 0.2098, - "step": 4107 - }, - { - "epoch": 0.21, - "grad_norm": 0.8667773516314685, - "learning_rate": 1.8367893631235558e-05, - "loss": 0.2162, - "step": 4108 - }, - { - "epoch": 0.21, - "grad_norm": 0.8290441652256468, - "learning_rate": 1.8366991761349084e-05, - "loss": 0.2286, - "step": 4109 - }, - { - "epoch": 0.21, - "grad_norm": 1.6958139792602507, - "learning_rate": 1.8366089664507488e-05, - "loss": 0.219, - "step": 4110 - }, - { - "epoch": 0.21, - "grad_norm": 1.04419481834506, - "learning_rate": 1.836518734073523e-05, - "loss": 0.2209, - "step": 4111 - }, - { - "epoch": 0.21, - "grad_norm": 0.9153247072007938, - "learning_rate": 1.8364284790056804e-05, - "loss": 0.246, - "step": 4112 - }, - { - "epoch": 0.21, - "grad_norm": 1.0906657376798792, - "learning_rate": 1.8363382012496672e-05, - "loss": 0.1906, - "step": 4113 - }, - { - "epoch": 0.21, - "grad_norm": 1.0728346171084582, - "learning_rate": 1.8362479008079334e-05, - "loss": 0.2121, - "step": 4114 - }, - { - "epoch": 0.21, - "grad_norm": 1.0315598757042255, - "learning_rate": 1.836157577682928e-05, - "loss": 0.2437, - "step": 4115 - }, - { - "epoch": 0.21, - "grad_norm": 0.9828449756288894, - "learning_rate": 1.8360672318771013e-05, - "loss": 0.2268, - "step": 4116 - }, - { - "epoch": 0.21, - "grad_norm": 1.1873815853782363, - "learning_rate": 1.8359768633929035e-05, - "loss": 0.217, - "step": 4117 - }, - { - "epoch": 0.21, - "grad_norm": 0.8210484261375784, - "learning_rate": 1.835886472232786e-05, - "loss": 0.1881, - "step": 4118 - }, - { - "epoch": 0.21, - "grad_norm": 1.2900341454161641, - "learning_rate": 1.835796058399201e-05, - "loss": 0.2366, - "step": 4119 - }, - { - "epoch": 0.21, - "grad_norm": 1.0392124418066642, - "learning_rate": 1.8357056218946003e-05, - "loss": 0.2439, - "step": 4120 - }, - { - "epoch": 0.21, - "grad_norm": 0.8318283207469718, - "learning_rate": 1.8356151627214377e-05, - "loss": 0.206, - "step": 4121 - }, - { - "epoch": 0.21, - "grad_norm": 0.827521381720995, - "learning_rate": 1.8355246808821664e-05, - "loss": 0.2123, - "step": 4122 - }, - { - "epoch": 0.21, - "grad_norm": 1.0971340042134967, - "learning_rate": 1.835434176379241e-05, - "loss": 0.208, - "step": 4123 - }, - { - "epoch": 0.21, - "grad_norm": 1.0739765981940956, - "learning_rate": 1.8353436492151165e-05, - "loss": 0.2095, - "step": 4124 - }, - { - "epoch": 0.21, - "grad_norm": 0.6935205221026808, - "learning_rate": 1.8352530993922483e-05, - "loss": 0.1864, - "step": 4125 - }, - { - "epoch": 0.21, - "grad_norm": 1.247650082249108, - "learning_rate": 1.8351625269130927e-05, - "loss": 0.2093, - "step": 4126 - }, - { - "epoch": 0.21, - "grad_norm": 0.9056832888194429, - "learning_rate": 1.8350719317801062e-05, - "loss": 0.214, - "step": 4127 - }, - { - "epoch": 0.21, - "grad_norm": 1.3095563898978886, - "learning_rate": 1.8349813139957464e-05, - "loss": 0.2077, - "step": 4128 - }, - { - "epoch": 0.21, - "grad_norm": 0.9821047572278827, - "learning_rate": 1.8348906735624716e-05, - "loss": 0.2112, - "step": 4129 - }, - { - "epoch": 0.21, - "grad_norm": 1.0638206523680762, - "learning_rate": 1.8348000104827396e-05, - "loss": 0.2321, - "step": 4130 - }, - { - "epoch": 0.21, - "grad_norm": 0.7993847668276511, - "learning_rate": 1.8347093247590106e-05, - "loss": 0.199, - "step": 4131 - }, - { - "epoch": 0.21, - "grad_norm": 0.9723068373547615, - "learning_rate": 1.834618616393744e-05, - "loss": 0.1977, - "step": 4132 - }, - { - "epoch": 0.21, - "grad_norm": 1.2098802854352386, - "learning_rate": 1.8345278853894e-05, - "loss": 0.2031, - "step": 4133 - }, - { - "epoch": 0.21, - "grad_norm": 0.9292414406170786, - "learning_rate": 1.8344371317484402e-05, - "loss": 0.2152, - "step": 4134 - }, - { - "epoch": 0.21, - "grad_norm": 1.0821646783609271, - "learning_rate": 1.834346355473326e-05, - "loss": 0.2294, - "step": 4135 - }, - { - "epoch": 0.21, - "grad_norm": 0.9828080305508093, - "learning_rate": 1.83425555656652e-05, - "loss": 0.2166, - "step": 4136 - }, - { - "epoch": 0.21, - "grad_norm": 1.0148458077937668, - "learning_rate": 1.8341647350304844e-05, - "loss": 0.2078, - "step": 4137 - }, - { - "epoch": 0.21, - "grad_norm": 0.8506156892427307, - "learning_rate": 1.8340738908676837e-05, - "loss": 0.2113, - "step": 4138 - }, - { - "epoch": 0.21, - "grad_norm": 0.9936066991084263, - "learning_rate": 1.8339830240805814e-05, - "loss": 0.2344, - "step": 4139 - }, - { - "epoch": 0.21, - "grad_norm": 1.1197923087940107, - "learning_rate": 1.8338921346716426e-05, - "loss": 0.2049, - "step": 4140 - }, - { - "epoch": 0.21, - "grad_norm": 0.8916498553865597, - "learning_rate": 1.8338012226433322e-05, - "loss": 0.2569, - "step": 4141 - }, - { - "epoch": 0.21, - "grad_norm": 0.9060713658218383, - "learning_rate": 1.833710287998117e-05, - "loss": 0.2112, - "step": 4142 - }, - { - "epoch": 0.21, - "grad_norm": 1.4070229822572158, - "learning_rate": 1.833619330738463e-05, - "loss": 0.2375, - "step": 4143 - }, - { - "epoch": 0.21, - "grad_norm": 0.9140861151714124, - "learning_rate": 1.8335283508668375e-05, - "loss": 0.2301, - "step": 4144 - }, - { - "epoch": 0.21, - "grad_norm": 1.1048618491381426, - "learning_rate": 1.8334373483857083e-05, - "loss": 0.2266, - "step": 4145 - }, - { - "epoch": 0.21, - "grad_norm": 0.9368278246902417, - "learning_rate": 1.8333463232975442e-05, - "loss": 0.2336, - "step": 4146 - }, - { - "epoch": 0.21, - "grad_norm": 0.9103085291862169, - "learning_rate": 1.833255275604814e-05, - "loss": 0.2229, - "step": 4147 - }, - { - "epoch": 0.21, - "grad_norm": 0.8832449772404098, - "learning_rate": 1.8331642053099873e-05, - "loss": 0.2189, - "step": 4148 - }, - { - "epoch": 0.21, - "grad_norm": 0.8529873970875689, - "learning_rate": 1.8330731124155347e-05, - "loss": 0.231, - "step": 4149 - }, - { - "epoch": 0.21, - "grad_norm": 0.9615479076879223, - "learning_rate": 1.8329819969239263e-05, - "loss": 0.2316, - "step": 4150 - }, - { - "epoch": 0.21, - "grad_norm": 0.8526259123087873, - "learning_rate": 1.8328908588376345e-05, - "loss": 0.2168, - "step": 4151 - }, - { - "epoch": 0.21, - "grad_norm": 1.0171620974352265, - "learning_rate": 1.832799698159131e-05, - "loss": 0.2518, - "step": 4152 - }, - { - "epoch": 0.21, - "grad_norm": 1.078204216193984, - "learning_rate": 1.832708514890889e-05, - "loss": 0.2242, - "step": 4153 - }, - { - "epoch": 0.21, - "grad_norm": 0.9815351765693545, - "learning_rate": 1.8326173090353815e-05, - "loss": 0.2537, - "step": 4154 - }, - { - "epoch": 0.21, - "grad_norm": 1.7385114890983147, - "learning_rate": 1.832526080595082e-05, - "loss": 0.2249, - "step": 4155 - }, - { - "epoch": 0.21, - "grad_norm": 1.0942431223529165, - "learning_rate": 1.8324348295724658e-05, - "loss": 0.2312, - "step": 4156 - }, - { - "epoch": 0.21, - "grad_norm": 0.905791708407855, - "learning_rate": 1.8323435559700077e-05, - "loss": 0.2104, - "step": 4157 - }, - { - "epoch": 0.21, - "grad_norm": 1.0635355993872433, - "learning_rate": 1.832252259790184e-05, - "loss": 0.2407, - "step": 4158 - }, - { - "epoch": 0.21, - "grad_norm": 0.9400388503599094, - "learning_rate": 1.8321609410354705e-05, - "loss": 0.2014, - "step": 4159 - }, - { - "epoch": 0.21, - "grad_norm": 1.1323684487566767, - "learning_rate": 1.8320695997083443e-05, - "loss": 0.2531, - "step": 4160 - }, - { - "epoch": 0.21, - "grad_norm": 0.9332268296880125, - "learning_rate": 1.8319782358112836e-05, - "loss": 0.206, - "step": 4161 - }, - { - "epoch": 0.21, - "grad_norm": 0.9945749482242312, - "learning_rate": 1.8318868493467657e-05, - "loss": 0.2189, - "step": 4162 - }, - { - "epoch": 0.21, - "grad_norm": 1.1305659185344676, - "learning_rate": 1.8317954403172708e-05, - "loss": 0.2187, - "step": 4163 - }, - { - "epoch": 0.21, - "grad_norm": 1.1061371469329477, - "learning_rate": 1.831704008725277e-05, - "loss": 0.2221, - "step": 4164 - }, - { - "epoch": 0.21, - "grad_norm": 0.8551129861299791, - "learning_rate": 1.8316125545732653e-05, - "loss": 0.1808, - "step": 4165 - }, - { - "epoch": 0.21, - "grad_norm": 0.9663446712855458, - "learning_rate": 1.831521077863716e-05, - "loss": 0.2057, - "step": 4166 - }, - { - "epoch": 0.21, - "grad_norm": 0.9287018223950793, - "learning_rate": 1.8314295785991103e-05, - "loss": 0.2417, - "step": 4167 - }, - { - "epoch": 0.21, - "grad_norm": 1.200372329075485, - "learning_rate": 1.8313380567819306e-05, - "loss": 0.2132, - "step": 4168 - }, - { - "epoch": 0.21, - "grad_norm": 1.015871584887287, - "learning_rate": 1.831246512414659e-05, - "loss": 0.2127, - "step": 4169 - }, - { - "epoch": 0.21, - "grad_norm": 1.0873608708788338, - "learning_rate": 1.8311549454997788e-05, - "loss": 0.2173, - "step": 4170 - }, - { - "epoch": 0.21, - "grad_norm": 0.8060995006211655, - "learning_rate": 1.8310633560397737e-05, - "loss": 0.2203, - "step": 4171 - }, - { - "epoch": 0.21, - "grad_norm": 0.9484747795626762, - "learning_rate": 1.8309717440371282e-05, - "loss": 0.2061, - "step": 4172 - }, - { - "epoch": 0.21, - "grad_norm": 1.011706199342892, - "learning_rate": 1.8308801094943275e-05, - "loss": 0.2099, - "step": 4173 - }, - { - "epoch": 0.21, - "grad_norm": 1.0158471698512874, - "learning_rate": 1.8307884524138564e-05, - "loss": 0.2141, - "step": 4174 - }, - { - "epoch": 0.21, - "grad_norm": 1.0120488489692896, - "learning_rate": 1.830696772798202e-05, - "loss": 0.2109, - "step": 4175 - }, - { - "epoch": 0.21, - "grad_norm": 1.1944801073571805, - "learning_rate": 1.8306050706498503e-05, - "loss": 0.2105, - "step": 4176 - }, - { - "epoch": 0.21, - "grad_norm": 1.1529388807643852, - "learning_rate": 1.8305133459712897e-05, - "loss": 0.2308, - "step": 4177 - }, - { - "epoch": 0.21, - "grad_norm": 0.95321287948936, - "learning_rate": 1.8304215987650074e-05, - "loss": 0.2175, - "step": 4178 - }, - { - "epoch": 0.21, - "grad_norm": 1.0593397775678555, - "learning_rate": 1.830329829033492e-05, - "loss": 0.2177, - "step": 4179 - }, - { - "epoch": 0.21, - "grad_norm": 2.0005744492424244, - "learning_rate": 1.8302380367792336e-05, - "loss": 0.2282, - "step": 4180 - }, - { - "epoch": 0.21, - "grad_norm": 0.9528885425476983, - "learning_rate": 1.8301462220047213e-05, - "loss": 0.2404, - "step": 4181 - }, - { - "epoch": 0.21, - "grad_norm": 1.4416274627260237, - "learning_rate": 1.830054384712446e-05, - "loss": 0.2366, - "step": 4182 - }, - { - "epoch": 0.21, - "grad_norm": 0.8464081363401605, - "learning_rate": 1.8299625249048986e-05, - "loss": 0.2069, - "step": 4183 - }, - { - "epoch": 0.21, - "grad_norm": 0.9436797273759833, - "learning_rate": 1.8298706425845707e-05, - "loss": 0.2426, - "step": 4184 - }, - { - "epoch": 0.21, - "grad_norm": 0.7711041358692556, - "learning_rate": 1.829778737753955e-05, - "loss": 0.2209, - "step": 4185 - }, - { - "epoch": 0.21, - "grad_norm": 0.9606908868034619, - "learning_rate": 1.8296868104155437e-05, - "loss": 0.1989, - "step": 4186 - }, - { - "epoch": 0.21, - "grad_norm": 0.8278759406694916, - "learning_rate": 1.8295948605718316e-05, - "loss": 0.2192, - "step": 4187 - }, - { - "epoch": 0.21, - "grad_norm": 0.921098878397764, - "learning_rate": 1.8295028882253113e-05, - "loss": 0.2132, - "step": 4188 - }, - { - "epoch": 0.21, - "grad_norm": 0.8795989807205208, - "learning_rate": 1.8294108933784788e-05, - "loss": 0.2211, - "step": 4189 - }, - { - "epoch": 0.21, - "grad_norm": 1.2470788534129764, - "learning_rate": 1.8293188760338285e-05, - "loss": 0.188, - "step": 4190 - }, - { - "epoch": 0.21, - "grad_norm": 0.9763265103504567, - "learning_rate": 1.829226836193857e-05, - "loss": 0.2235, - "step": 4191 - }, - { - "epoch": 0.21, - "grad_norm": 1.1680172459987113, - "learning_rate": 1.829134773861061e-05, - "loss": 0.2086, - "step": 4192 - }, - { - "epoch": 0.21, - "grad_norm": 0.9284572674758612, - "learning_rate": 1.8290426890379372e-05, - "loss": 0.2337, - "step": 4193 - }, - { - "epoch": 0.21, - "grad_norm": 0.8362341065194638, - "learning_rate": 1.8289505817269838e-05, - "loss": 0.199, - "step": 4194 - }, - { - "epoch": 0.21, - "grad_norm": 0.884264449424886, - "learning_rate": 1.8288584519306985e-05, - "loss": 0.2095, - "step": 4195 - }, - { - "epoch": 0.21, - "grad_norm": 1.1174268482330303, - "learning_rate": 1.8287662996515815e-05, - "loss": 0.239, - "step": 4196 - }, - { - "epoch": 0.21, - "grad_norm": 0.9739645939599759, - "learning_rate": 1.8286741248921317e-05, - "loss": 0.2161, - "step": 4197 - }, - { - "epoch": 0.21, - "grad_norm": 0.96075065186622, - "learning_rate": 1.8285819276548494e-05, - "loss": 0.2154, - "step": 4198 - }, - { - "epoch": 0.21, - "grad_norm": 0.7959212458472489, - "learning_rate": 1.8284897079422356e-05, - "loss": 0.2084, - "step": 4199 - }, - { - "epoch": 0.21, - "grad_norm": 1.171455556259447, - "learning_rate": 1.8283974657567915e-05, - "loss": 0.2254, - "step": 4200 - }, - { - "epoch": 0.21, - "grad_norm": 0.943038970137586, - "learning_rate": 1.8283052011010195e-05, - "loss": 0.1983, - "step": 4201 - }, - { - "epoch": 0.21, - "grad_norm": 0.8424994263691186, - "learning_rate": 1.828212913977422e-05, - "loss": 0.2031, - "step": 4202 - }, - { - "epoch": 0.21, - "grad_norm": 0.9446157746444535, - "learning_rate": 1.8281206043885024e-05, - "loss": 0.2401, - "step": 4203 - }, - { - "epoch": 0.21, - "grad_norm": 1.1912114330547572, - "learning_rate": 1.8280282723367647e-05, - "loss": 0.2461, - "step": 4204 - }, - { - "epoch": 0.21, - "grad_norm": 0.9212399950951802, - "learning_rate": 1.8279359178247134e-05, - "loss": 0.2282, - "step": 4205 - }, - { - "epoch": 0.21, - "grad_norm": 0.9175859980076985, - "learning_rate": 1.8278435408548538e-05, - "loss": 0.2271, - "step": 4206 - }, - { - "epoch": 0.21, - "grad_norm": 0.8472401194222476, - "learning_rate": 1.827751141429691e-05, - "loss": 0.2393, - "step": 4207 - }, - { - "epoch": 0.21, - "grad_norm": 0.866124853602909, - "learning_rate": 1.827658719551732e-05, - "loss": 0.2121, - "step": 4208 - }, - { - "epoch": 0.21, - "grad_norm": 0.8963703373904084, - "learning_rate": 1.8275662752234834e-05, - "loss": 0.2013, - "step": 4209 - }, - { - "epoch": 0.21, - "grad_norm": 1.0791283238506562, - "learning_rate": 1.8274738084474525e-05, - "loss": 0.2207, - "step": 4210 - }, - { - "epoch": 0.21, - "grad_norm": 0.8172063107163877, - "learning_rate": 1.8273813192261483e-05, - "loss": 0.2197, - "step": 4211 - }, - { - "epoch": 0.21, - "grad_norm": 1.1167127205533103, - "learning_rate": 1.827288807562079e-05, - "loss": 0.2292, - "step": 4212 - }, - { - "epoch": 0.21, - "grad_norm": 0.765006058341567, - "learning_rate": 1.8271962734577536e-05, - "loss": 0.2197, - "step": 4213 - }, - { - "epoch": 0.21, - "grad_norm": 0.7670932099972653, - "learning_rate": 1.827103716915683e-05, - "loss": 0.2094, - "step": 4214 - }, - { - "epoch": 0.21, - "grad_norm": 0.9312624220759745, - "learning_rate": 1.8270111379383773e-05, - "loss": 0.2108, - "step": 4215 - }, - { - "epoch": 0.21, - "grad_norm": 0.9058587404838406, - "learning_rate": 1.826918536528348e-05, - "loss": 0.2213, - "step": 4216 - }, - { - "epoch": 0.21, - "grad_norm": 0.8238733963219388, - "learning_rate": 1.8268259126881064e-05, - "loss": 0.2249, - "step": 4217 - }, - { - "epoch": 0.21, - "grad_norm": 0.960822749130999, - "learning_rate": 1.8267332664201653e-05, - "loss": 0.2311, - "step": 4218 - }, - { - "epoch": 0.21, - "grad_norm": 1.242662687452094, - "learning_rate": 1.8266405977270377e-05, - "loss": 0.2289, - "step": 4219 - }, - { - "epoch": 0.21, - "grad_norm": 0.9535625295217197, - "learning_rate": 1.826547906611237e-05, - "loss": 0.2116, - "step": 4220 - }, - { - "epoch": 0.21, - "grad_norm": 0.8201907851659748, - "learning_rate": 1.826455193075278e-05, - "loss": 0.1827, - "step": 4221 - }, - { - "epoch": 0.21, - "grad_norm": 0.9915348756782004, - "learning_rate": 1.826362457121675e-05, - "loss": 0.2064, - "step": 4222 - }, - { - "epoch": 0.21, - "grad_norm": 0.783989783933111, - "learning_rate": 1.8262696987529434e-05, - "loss": 0.2036, - "step": 4223 - }, - { - "epoch": 0.21, - "grad_norm": 0.8067961548800195, - "learning_rate": 1.8261769179716e-05, - "loss": 0.2137, - "step": 4224 - }, - { - "epoch": 0.21, - "grad_norm": 0.7977405884808337, - "learning_rate": 1.826084114780161e-05, - "loss": 0.2121, - "step": 4225 - }, - { - "epoch": 0.21, - "grad_norm": 0.8556151782037259, - "learning_rate": 1.8259912891811433e-05, - "loss": 0.2025, - "step": 4226 - }, - { - "epoch": 0.21, - "grad_norm": 0.9531497815790603, - "learning_rate": 1.8258984411770656e-05, - "loss": 0.2398, - "step": 4227 - }, - { - "epoch": 0.22, - "grad_norm": 1.0105056727131227, - "learning_rate": 1.825805570770446e-05, - "loss": 0.2234, - "step": 4228 - }, - { - "epoch": 0.22, - "grad_norm": 0.7200577937137307, - "learning_rate": 1.825712677963804e-05, - "loss": 0.2025, - "step": 4229 - }, - { - "epoch": 0.22, - "grad_norm": 0.8241487265119518, - "learning_rate": 1.8256197627596582e-05, - "loss": 0.2261, - "step": 4230 - }, - { - "epoch": 0.22, - "grad_norm": 0.9253391184043739, - "learning_rate": 1.8255268251605303e-05, - "loss": 0.2122, - "step": 4231 - }, - { - "epoch": 0.22, - "grad_norm": 1.201686032556116, - "learning_rate": 1.8254338651689402e-05, - "loss": 0.2139, - "step": 4232 - }, - { - "epoch": 0.22, - "grad_norm": 0.8768725928195028, - "learning_rate": 1.82534088278741e-05, - "loss": 0.205, - "step": 4233 - }, - { - "epoch": 0.22, - "grad_norm": 1.059742002088647, - "learning_rate": 1.825247878018462e-05, - "loss": 0.1921, - "step": 4234 - }, - { - "epoch": 0.22, - "grad_norm": 0.916012985496521, - "learning_rate": 1.8251548508646186e-05, - "loss": 0.2451, - "step": 4235 - }, - { - "epoch": 0.22, - "grad_norm": 0.9015541425224542, - "learning_rate": 1.825061801328403e-05, - "loss": 0.2123, - "step": 4236 - }, - { - "epoch": 0.22, - "grad_norm": 1.0308071881686403, - "learning_rate": 1.8249687294123396e-05, - "loss": 0.2194, - "step": 4237 - }, - { - "epoch": 0.22, - "grad_norm": 0.9070747617664877, - "learning_rate": 1.8248756351189533e-05, - "loss": 0.1742, - "step": 4238 - }, - { - "epoch": 0.22, - "grad_norm": 0.8358651708734051, - "learning_rate": 1.8247825184507683e-05, - "loss": 0.2048, - "step": 4239 - }, - { - "epoch": 0.22, - "grad_norm": 0.8122009336805338, - "learning_rate": 1.8246893794103113e-05, - "loss": 0.2149, - "step": 4240 - }, - { - "epoch": 0.22, - "grad_norm": 1.5371587945589475, - "learning_rate": 1.8245962180001075e-05, - "loss": 0.2427, - "step": 4241 - }, - { - "epoch": 0.22, - "grad_norm": 3.0558139292489592, - "learning_rate": 1.824503034222685e-05, - "loss": 0.219, - "step": 4242 - }, - { - "epoch": 0.22, - "grad_norm": 0.8009101214707464, - "learning_rate": 1.8244098280805715e-05, - "loss": 0.2196, - "step": 4243 - }, - { - "epoch": 0.22, - "grad_norm": 0.7883653453666257, - "learning_rate": 1.8243165995762947e-05, - "loss": 0.2169, - "step": 4244 - }, - { - "epoch": 0.22, - "grad_norm": 1.4303484593440836, - "learning_rate": 1.8242233487123837e-05, - "loss": 0.2248, - "step": 4245 - }, - { - "epoch": 0.22, - "grad_norm": 0.9900955967331986, - "learning_rate": 1.8241300754913677e-05, - "loss": 0.2525, - "step": 4246 - }, - { - "epoch": 0.22, - "grad_norm": 1.0160311186185382, - "learning_rate": 1.824036779915777e-05, - "loss": 0.1988, - "step": 4247 - }, - { - "epoch": 0.22, - "grad_norm": 0.7568400522467681, - "learning_rate": 1.8239434619881418e-05, - "loss": 0.201, - "step": 4248 - }, - { - "epoch": 0.22, - "grad_norm": 1.0200762232544731, - "learning_rate": 1.8238501217109938e-05, - "loss": 0.2425, - "step": 4249 - }, - { - "epoch": 0.22, - "grad_norm": 0.7002983112880843, - "learning_rate": 1.8237567590868645e-05, - "loss": 0.196, - "step": 4250 - }, - { - "epoch": 0.22, - "grad_norm": 0.9417901704321855, - "learning_rate": 1.823663374118287e-05, - "loss": 0.1868, - "step": 4251 - }, - { - "epoch": 0.22, - "grad_norm": 1.0660457871013291, - "learning_rate": 1.8235699668077938e-05, - "loss": 0.2183, - "step": 4252 - }, - { - "epoch": 0.22, - "grad_norm": 1.3133422232494105, - "learning_rate": 1.8234765371579186e-05, - "loss": 0.2164, - "step": 4253 - }, - { - "epoch": 0.22, - "grad_norm": 1.766017062175045, - "learning_rate": 1.8233830851711957e-05, - "loss": 0.2402, - "step": 4254 - }, - { - "epoch": 0.22, - "grad_norm": 1.474811583060738, - "learning_rate": 1.8232896108501606e-05, - "loss": 0.2328, - "step": 4255 - }, - { - "epoch": 0.22, - "grad_norm": 0.7342884869301584, - "learning_rate": 1.823196114197348e-05, - "loss": 0.2072, - "step": 4256 - }, - { - "epoch": 0.22, - "grad_norm": 1.1920093532059117, - "learning_rate": 1.823102595215294e-05, - "loss": 0.238, - "step": 4257 - }, - { - "epoch": 0.22, - "grad_norm": 0.9002978568597515, - "learning_rate": 1.823009053906536e-05, - "loss": 0.2045, - "step": 4258 - }, - { - "epoch": 0.22, - "grad_norm": 1.0125908254260827, - "learning_rate": 1.8229154902736113e-05, - "loss": 0.2097, - "step": 4259 - }, - { - "epoch": 0.22, - "grad_norm": 0.9305152927324657, - "learning_rate": 1.822821904319057e-05, - "loss": 0.237, - "step": 4260 - }, - { - "epoch": 0.22, - "grad_norm": 0.9620551761878576, - "learning_rate": 1.822728296045412e-05, - "loss": 0.2567, - "step": 4261 - }, - { - "epoch": 0.22, - "grad_norm": 0.9060764864986361, - "learning_rate": 1.8226346654552155e-05, - "loss": 0.2139, - "step": 4262 - }, - { - "epoch": 0.22, - "grad_norm": 0.8190218386720736, - "learning_rate": 1.8225410125510073e-05, - "loss": 0.2387, - "step": 4263 - }, - { - "epoch": 0.22, - "grad_norm": 0.7968878989634448, - "learning_rate": 1.8224473373353276e-05, - "loss": 0.189, - "step": 4264 - }, - { - "epoch": 0.22, - "grad_norm": 0.9643006351217062, - "learning_rate": 1.8223536398107177e-05, - "loss": 0.2083, - "step": 4265 - }, - { - "epoch": 0.22, - "grad_norm": 1.0532074306008665, - "learning_rate": 1.8222599199797188e-05, - "loss": 0.2094, - "step": 4266 - }, - { - "epoch": 0.22, - "grad_norm": 0.9184139480026474, - "learning_rate": 1.822166177844873e-05, - "loss": 0.1897, - "step": 4267 - }, - { - "epoch": 0.22, - "grad_norm": 1.0147986932673607, - "learning_rate": 1.8220724134087232e-05, - "loss": 0.2154, - "step": 4268 - }, - { - "epoch": 0.22, - "grad_norm": 0.9907113242466752, - "learning_rate": 1.8219786266738125e-05, - "loss": 0.1967, - "step": 4269 - }, - { - "epoch": 0.22, - "grad_norm": 0.8044965495570748, - "learning_rate": 1.8218848176426857e-05, - "loss": 0.2316, - "step": 4270 - }, - { - "epoch": 0.22, - "grad_norm": 0.9013298789933274, - "learning_rate": 1.8217909863178868e-05, - "loss": 0.2534, - "step": 4271 - }, - { - "epoch": 0.22, - "grad_norm": 0.8488274163325105, - "learning_rate": 1.8216971327019603e-05, - "loss": 0.2254, - "step": 4272 - }, - { - "epoch": 0.22, - "grad_norm": 0.8406913678522667, - "learning_rate": 1.821603256797453e-05, - "loss": 0.1963, - "step": 4273 - }, - { - "epoch": 0.22, - "grad_norm": 1.6575549638758935, - "learning_rate": 1.821509358606911e-05, - "loss": 0.2213, - "step": 4274 - }, - { - "epoch": 0.22, - "grad_norm": 0.8083485169095919, - "learning_rate": 1.8214154381328815e-05, - "loss": 0.2073, - "step": 4275 - }, - { - "epoch": 0.22, - "grad_norm": 0.9830926770574077, - "learning_rate": 1.8213214953779114e-05, - "loss": 0.2458, - "step": 4276 - }, - { - "epoch": 0.22, - "grad_norm": 0.7787523267533243, - "learning_rate": 1.8212275303445498e-05, - "loss": 0.2295, - "step": 4277 - }, - { - "epoch": 0.22, - "grad_norm": 0.8838887511848333, - "learning_rate": 1.8211335430353444e-05, - "loss": 0.2107, - "step": 4278 - }, - { - "epoch": 0.22, - "grad_norm": 1.0056438528043299, - "learning_rate": 1.821039533452846e-05, - "loss": 0.2183, - "step": 4279 - }, - { - "epoch": 0.22, - "grad_norm": 0.6772895668147175, - "learning_rate": 1.8209455015996034e-05, - "loss": 0.1904, - "step": 4280 - }, - { - "epoch": 0.22, - "grad_norm": 0.8927283512952466, - "learning_rate": 1.820851447478168e-05, - "loss": 0.2229, - "step": 4281 - }, - { - "epoch": 0.22, - "grad_norm": 0.9770840012710763, - "learning_rate": 1.8207573710910905e-05, - "loss": 0.2418, - "step": 4282 - }, - { - "epoch": 0.22, - "grad_norm": 0.8916316262230806, - "learning_rate": 1.820663272440923e-05, - "loss": 0.2156, - "step": 4283 - }, - { - "epoch": 0.22, - "grad_norm": 1.2917448104325193, - "learning_rate": 1.8205691515302183e-05, - "loss": 0.2447, - "step": 4284 - }, - { - "epoch": 0.22, - "grad_norm": 0.9632645005230199, - "learning_rate": 1.8204750083615283e-05, - "loss": 0.2363, - "step": 4285 - }, - { - "epoch": 0.22, - "grad_norm": 0.8966795248674965, - "learning_rate": 1.8203808429374078e-05, - "loss": 0.2229, - "step": 4286 - }, - { - "epoch": 0.22, - "grad_norm": 1.1035150706057364, - "learning_rate": 1.8202866552604104e-05, - "loss": 0.2508, - "step": 4287 - }, - { - "epoch": 0.22, - "grad_norm": 1.4050937171557807, - "learning_rate": 1.8201924453330914e-05, - "loss": 0.186, - "step": 4288 - }, - { - "epoch": 0.22, - "grad_norm": 1.0667736728743291, - "learning_rate": 1.8200982131580058e-05, - "loss": 0.2044, - "step": 4289 - }, - { - "epoch": 0.22, - "grad_norm": 0.8705957052526101, - "learning_rate": 1.82000395873771e-05, - "loss": 0.2234, - "step": 4290 - }, - { - "epoch": 0.22, - "grad_norm": 0.7867090078526178, - "learning_rate": 1.8199096820747603e-05, - "loss": 0.2106, - "step": 4291 - }, - { - "epoch": 0.22, - "grad_norm": 0.904053305616991, - "learning_rate": 1.819815383171714e-05, - "loss": 0.2097, - "step": 4292 - }, - { - "epoch": 0.22, - "grad_norm": 0.9641010472452598, - "learning_rate": 1.8197210620311297e-05, - "loss": 0.2055, - "step": 4293 - }, - { - "epoch": 0.22, - "grad_norm": 2.3678290393221864, - "learning_rate": 1.819626718655565e-05, - "loss": 0.2159, - "step": 4294 - }, - { - "epoch": 0.22, - "grad_norm": 1.1708498945803798, - "learning_rate": 1.819532353047579e-05, - "loss": 0.2329, - "step": 4295 - }, - { - "epoch": 0.22, - "grad_norm": 1.1560113389228808, - "learning_rate": 1.819437965209732e-05, - "loss": 0.2324, - "step": 4296 - }, - { - "epoch": 0.22, - "grad_norm": 0.9184708309489177, - "learning_rate": 1.819343555144584e-05, - "loss": 0.2311, - "step": 4297 - }, - { - "epoch": 0.22, - "grad_norm": 0.9343546421034021, - "learning_rate": 1.8192491228546953e-05, - "loss": 0.199, - "step": 4298 - }, - { - "epoch": 0.22, - "grad_norm": 0.8418429083232742, - "learning_rate": 1.8191546683426283e-05, - "loss": 0.2182, - "step": 4299 - }, - { - "epoch": 0.22, - "grad_norm": 1.1363584516751166, - "learning_rate": 1.8190601916109444e-05, - "loss": 0.2071, - "step": 4300 - }, - { - "epoch": 0.22, - "grad_norm": 0.9556669160393405, - "learning_rate": 1.8189656926622066e-05, - "loss": 0.227, - "step": 4301 - }, - { - "epoch": 0.22, - "grad_norm": 1.0732355260283306, - "learning_rate": 1.8188711714989785e-05, - "loss": 0.2038, - "step": 4302 - }, - { - "epoch": 0.22, - "grad_norm": 0.7696215465058844, - "learning_rate": 1.818776628123823e-05, - "loss": 0.1983, - "step": 4303 - }, - { - "epoch": 0.22, - "grad_norm": 1.2331616863270052, - "learning_rate": 1.818682062539306e-05, - "loss": 0.2271, - "step": 4304 - }, - { - "epoch": 0.22, - "grad_norm": 1.0942386697318467, - "learning_rate": 1.8185874747479912e-05, - "loss": 0.2177, - "step": 4305 - }, - { - "epoch": 0.22, - "grad_norm": 1.4450879115009336, - "learning_rate": 1.818492864752445e-05, - "loss": 0.2128, - "step": 4306 - }, - { - "epoch": 0.22, - "grad_norm": 0.8904155962150285, - "learning_rate": 1.8183982325552338e-05, - "loss": 0.2184, - "step": 4307 - }, - { - "epoch": 0.22, - "grad_norm": 0.9143306634271516, - "learning_rate": 1.8183035781589238e-05, - "loss": 0.2208, - "step": 4308 - }, - { - "epoch": 0.22, - "grad_norm": 0.9853438320265109, - "learning_rate": 1.8182089015660836e-05, - "loss": 0.2013, - "step": 4309 - }, - { - "epoch": 0.22, - "grad_norm": 0.9777431632030403, - "learning_rate": 1.8181142027792807e-05, - "loss": 0.229, - "step": 4310 - }, - { - "epoch": 0.22, - "grad_norm": 0.8076202137496955, - "learning_rate": 1.8180194818010833e-05, - "loss": 0.193, - "step": 4311 - }, - { - "epoch": 0.22, - "grad_norm": 0.8673930948175105, - "learning_rate": 1.8179247386340617e-05, - "loss": 0.2152, - "step": 4312 - }, - { - "epoch": 0.22, - "grad_norm": 1.3583227306954313, - "learning_rate": 1.8178299732807853e-05, - "loss": 0.2024, - "step": 4313 - }, - { - "epoch": 0.22, - "grad_norm": 0.9205591976032004, - "learning_rate": 1.8177351857438248e-05, - "loss": 0.2488, - "step": 4314 - }, - { - "epoch": 0.22, - "grad_norm": 1.082096516321356, - "learning_rate": 1.817640376025751e-05, - "loss": 0.2381, - "step": 4315 - }, - { - "epoch": 0.22, - "grad_norm": 1.4396388035174683, - "learning_rate": 1.817545544129136e-05, - "loss": 0.247, - "step": 4316 - }, - { - "epoch": 0.22, - "grad_norm": 0.8504382943282013, - "learning_rate": 1.8174506900565518e-05, - "loss": 0.1785, - "step": 4317 - }, - { - "epoch": 0.22, - "grad_norm": 0.9081660689283023, - "learning_rate": 1.8173558138105717e-05, - "loss": 0.2023, - "step": 4318 - }, - { - "epoch": 0.22, - "grad_norm": 1.446106615498979, - "learning_rate": 1.8172609153937685e-05, - "loss": 0.2047, - "step": 4319 - }, - { - "epoch": 0.22, - "grad_norm": 1.005259860103065, - "learning_rate": 1.8171659948087175e-05, - "loss": 0.2266, - "step": 4320 - }, - { - "epoch": 0.22, - "grad_norm": 1.111929062773582, - "learning_rate": 1.8170710520579923e-05, - "loss": 0.2047, - "step": 4321 - }, - { - "epoch": 0.22, - "grad_norm": 0.9763553401215549, - "learning_rate": 1.8169760871441683e-05, - "loss": 0.199, - "step": 4322 - }, - { - "epoch": 0.22, - "grad_norm": 1.2920743833932382, - "learning_rate": 1.8168811000698224e-05, - "loss": 0.2212, - "step": 4323 - }, - { - "epoch": 0.22, - "grad_norm": 1.0361696005188632, - "learning_rate": 1.8167860908375307e-05, - "loss": 0.1859, - "step": 4324 - }, - { - "epoch": 0.22, - "grad_norm": 0.9516237535805719, - "learning_rate": 1.8166910594498695e-05, - "loss": 0.2199, - "step": 4325 - }, - { - "epoch": 0.22, - "grad_norm": 1.7244801875399887, - "learning_rate": 1.8165960059094174e-05, - "loss": 0.2319, - "step": 4326 - }, - { - "epoch": 0.22, - "grad_norm": 1.051239459001659, - "learning_rate": 1.8165009302187526e-05, - "loss": 0.2182, - "step": 4327 - }, - { - "epoch": 0.22, - "grad_norm": 0.8694760347180661, - "learning_rate": 1.816405832380454e-05, - "loss": 0.2181, - "step": 4328 - }, - { - "epoch": 0.22, - "grad_norm": 0.9093718969648045, - "learning_rate": 1.8163107123971012e-05, - "loss": 0.199, - "step": 4329 - }, - { - "epoch": 0.22, - "grad_norm": 0.7777573023668687, - "learning_rate": 1.816215570271274e-05, - "loss": 0.1974, - "step": 4330 - }, - { - "epoch": 0.22, - "grad_norm": 4.887007737951543, - "learning_rate": 1.8161204060055533e-05, - "loss": 0.2242, - "step": 4331 - }, - { - "epoch": 0.22, - "grad_norm": 1.0061088241270033, - "learning_rate": 1.8160252196025205e-05, - "loss": 0.2336, - "step": 4332 - }, - { - "epoch": 0.22, - "grad_norm": 0.9102452692339625, - "learning_rate": 1.8159300110647576e-05, - "loss": 0.1983, - "step": 4333 - }, - { - "epoch": 0.22, - "grad_norm": 1.9184937177752597, - "learning_rate": 1.815834780394847e-05, - "loss": 0.1995, - "step": 4334 - }, - { - "epoch": 0.22, - "grad_norm": 0.8389755034590195, - "learning_rate": 1.8157395275953722e-05, - "loss": 0.22, - "step": 4335 - }, - { - "epoch": 0.22, - "grad_norm": 0.9664717962957975, - "learning_rate": 1.8156442526689165e-05, - "loss": 0.2253, - "step": 4336 - }, - { - "epoch": 0.22, - "grad_norm": 1.206033123512191, - "learning_rate": 1.8155489556180643e-05, - "loss": 0.2136, - "step": 4337 - }, - { - "epoch": 0.22, - "grad_norm": 0.9918052284301853, - "learning_rate": 1.8154536364454003e-05, - "loss": 0.2266, - "step": 4338 - }, - { - "epoch": 0.22, - "grad_norm": 0.8834020921272379, - "learning_rate": 1.8153582951535108e-05, - "loss": 0.2002, - "step": 4339 - }, - { - "epoch": 0.22, - "grad_norm": 1.474921529252916, - "learning_rate": 1.8152629317449814e-05, - "loss": 0.2103, - "step": 4340 - }, - { - "epoch": 0.22, - "grad_norm": 0.7762391625825757, - "learning_rate": 1.815167546222399e-05, - "loss": 0.1926, - "step": 4341 - }, - { - "epoch": 0.22, - "grad_norm": 1.0448223957540366, - "learning_rate": 1.8150721385883508e-05, - "loss": 0.2103, - "step": 4342 - }, - { - "epoch": 0.22, - "grad_norm": 0.8368969397679634, - "learning_rate": 1.8149767088454248e-05, - "loss": 0.205, - "step": 4343 - }, - { - "epoch": 0.22, - "grad_norm": 1.2714716714784662, - "learning_rate": 1.8148812569962095e-05, - "loss": 0.1951, - "step": 4344 - }, - { - "epoch": 0.22, - "grad_norm": 1.3140737363744213, - "learning_rate": 1.814785783043294e-05, - "loss": 0.211, - "step": 4345 - }, - { - "epoch": 0.22, - "grad_norm": 0.909422860508571, - "learning_rate": 1.8146902869892682e-05, - "loss": 0.2139, - "step": 4346 - }, - { - "epoch": 0.22, - "grad_norm": 0.9128532924458792, - "learning_rate": 1.8145947688367224e-05, - "loss": 0.2059, - "step": 4347 - }, - { - "epoch": 0.22, - "grad_norm": 1.4055777535594336, - "learning_rate": 1.8144992285882478e-05, - "loss": 0.208, - "step": 4348 - }, - { - "epoch": 0.22, - "grad_norm": 1.6393726545336686, - "learning_rate": 1.814403666246435e-05, - "loss": 0.2216, - "step": 4349 - }, - { - "epoch": 0.22, - "grad_norm": 1.012495449839806, - "learning_rate": 1.814308081813877e-05, - "loss": 0.227, - "step": 4350 - }, - { - "epoch": 0.22, - "grad_norm": 0.8809379247618583, - "learning_rate": 1.8142124752931662e-05, - "loss": 0.2018, - "step": 4351 - }, - { - "epoch": 0.22, - "grad_norm": 1.0518298070909586, - "learning_rate": 1.8141168466868962e-05, - "loss": 0.2372, - "step": 4352 - }, - { - "epoch": 0.22, - "grad_norm": 1.185197093186988, - "learning_rate": 1.8140211959976608e-05, - "loss": 0.221, - "step": 4353 - }, - { - "epoch": 0.22, - "grad_norm": 0.8956554633281654, - "learning_rate": 1.8139255232280545e-05, - "loss": 0.2144, - "step": 4354 - }, - { - "epoch": 0.22, - "grad_norm": 0.8595694807094763, - "learning_rate": 1.813829828380672e-05, - "loss": 0.2269, - "step": 4355 - }, - { - "epoch": 0.22, - "grad_norm": 0.8742860722630785, - "learning_rate": 1.81373411145811e-05, - "loss": 0.207, - "step": 4356 - }, - { - "epoch": 0.22, - "grad_norm": 0.8565448494079153, - "learning_rate": 1.8136383724629637e-05, - "loss": 0.203, - "step": 4357 - }, - { - "epoch": 0.22, - "grad_norm": 0.7449865604280017, - "learning_rate": 1.8135426113978312e-05, - "loss": 0.1995, - "step": 4358 - }, - { - "epoch": 0.22, - "grad_norm": 0.9507927834239618, - "learning_rate": 1.8134468282653092e-05, - "loss": 0.2466, - "step": 4359 - }, - { - "epoch": 0.22, - "grad_norm": 0.8884456657081311, - "learning_rate": 1.8133510230679956e-05, - "loss": 0.2351, - "step": 4360 - }, - { - "epoch": 0.22, - "grad_norm": 0.9767879397277819, - "learning_rate": 1.8132551958084902e-05, - "loss": 0.2417, - "step": 4361 - }, - { - "epoch": 0.22, - "grad_norm": 1.028331879327762, - "learning_rate": 1.8131593464893913e-05, - "loss": 0.194, - "step": 4362 - }, - { - "epoch": 0.22, - "grad_norm": 0.9500463986143157, - "learning_rate": 1.813063475113299e-05, - "loss": 0.2152, - "step": 4363 - }, - { - "epoch": 0.22, - "grad_norm": 1.1014894639629662, - "learning_rate": 1.8129675816828144e-05, - "loss": 0.2095, - "step": 4364 - }, - { - "epoch": 0.22, - "grad_norm": 0.8469646740472858, - "learning_rate": 1.8128716662005385e-05, - "loss": 0.2113, - "step": 4365 - }, - { - "epoch": 0.22, - "grad_norm": 0.8171205513709684, - "learning_rate": 1.812775728669072e-05, - "loss": 0.217, - "step": 4366 - }, - { - "epoch": 0.22, - "grad_norm": 1.052823131487029, - "learning_rate": 1.812679769091018e-05, - "loss": 0.2206, - "step": 4367 - }, - { - "epoch": 0.22, - "grad_norm": 1.0175056353860659, - "learning_rate": 1.81258378746898e-05, - "loss": 0.2229, - "step": 4368 - }, - { - "epoch": 0.22, - "grad_norm": 0.8699858373898745, - "learning_rate": 1.8124877838055604e-05, - "loss": 0.2248, - "step": 4369 - }, - { - "epoch": 0.22, - "grad_norm": 0.9018639095903852, - "learning_rate": 1.8123917581033642e-05, - "loss": 0.2302, - "step": 4370 - }, - { - "epoch": 0.22, - "grad_norm": 0.8339848039989683, - "learning_rate": 1.8122957103649953e-05, - "loss": 0.2337, - "step": 4371 - }, - { - "epoch": 0.22, - "grad_norm": 0.8561161698245993, - "learning_rate": 1.81219964059306e-05, - "loss": 0.1902, - "step": 4372 - }, - { - "epoch": 0.22, - "grad_norm": 0.9040620220822301, - "learning_rate": 1.8121035487901627e-05, - "loss": 0.2363, - "step": 4373 - }, - { - "epoch": 0.22, - "grad_norm": 0.8117035779864158, - "learning_rate": 1.8120074349589114e-05, - "loss": 0.2052, - "step": 4374 - }, - { - "epoch": 0.22, - "grad_norm": 0.7757265435166741, - "learning_rate": 1.8119112991019123e-05, - "loss": 0.1952, - "step": 4375 - }, - { - "epoch": 0.22, - "grad_norm": 1.3018928098117228, - "learning_rate": 1.811815141221773e-05, - "loss": 0.2489, - "step": 4376 - }, - { - "epoch": 0.22, - "grad_norm": 0.8514347367780373, - "learning_rate": 1.8117189613211023e-05, - "loss": 0.2428, - "step": 4377 - }, - { - "epoch": 0.22, - "grad_norm": 0.9570291743456296, - "learning_rate": 1.8116227594025092e-05, - "loss": 0.198, - "step": 4378 - }, - { - "epoch": 0.22, - "grad_norm": 0.6959724960679516, - "learning_rate": 1.811526535468603e-05, - "loss": 0.2027, - "step": 4379 - }, - { - "epoch": 0.22, - "grad_norm": 1.0305907382430886, - "learning_rate": 1.8114302895219936e-05, - "loss": 0.2047, - "step": 4380 - }, - { - "epoch": 0.22, - "grad_norm": 1.171345519575611, - "learning_rate": 1.8113340215652916e-05, - "loss": 0.2283, - "step": 4381 - }, - { - "epoch": 0.22, - "grad_norm": 0.8155710544954411, - "learning_rate": 1.8112377316011086e-05, - "loss": 0.2179, - "step": 4382 - }, - { - "epoch": 0.22, - "grad_norm": 0.7938806830241489, - "learning_rate": 1.8111414196320562e-05, - "loss": 0.1821, - "step": 4383 - }, - { - "epoch": 0.22, - "grad_norm": 0.8821549536458148, - "learning_rate": 1.811045085660747e-05, - "loss": 0.2104, - "step": 4384 - }, - { - "epoch": 0.22, - "grad_norm": 0.9977566410273979, - "learning_rate": 1.8109487296897944e-05, - "loss": 0.2039, - "step": 4385 - }, - { - "epoch": 0.22, - "grad_norm": 0.8429772323990313, - "learning_rate": 1.8108523517218112e-05, - "loss": 0.2079, - "step": 4386 - }, - { - "epoch": 0.22, - "grad_norm": 0.932220713953027, - "learning_rate": 1.8107559517594123e-05, - "loss": 0.2242, - "step": 4387 - }, - { - "epoch": 0.22, - "grad_norm": 0.9463348327375277, - "learning_rate": 1.8106595298052124e-05, - "loss": 0.2109, - "step": 4388 - }, - { - "epoch": 0.22, - "grad_norm": 1.058926891012995, - "learning_rate": 1.810563085861827e-05, - "loss": 0.221, - "step": 4389 - }, - { - "epoch": 0.22, - "grad_norm": 0.7967025288772267, - "learning_rate": 1.810466619931872e-05, - "loss": 0.2069, - "step": 4390 - }, - { - "epoch": 0.22, - "grad_norm": 1.3536066297444167, - "learning_rate": 1.8103701320179644e-05, - "loss": 0.2321, - "step": 4391 - }, - { - "epoch": 0.22, - "grad_norm": 0.943141752961332, - "learning_rate": 1.8102736221227212e-05, - "loss": 0.2142, - "step": 4392 - }, - { - "epoch": 0.22, - "grad_norm": 0.8920841817138594, - "learning_rate": 1.81017709024876e-05, - "loss": 0.2064, - "step": 4393 - }, - { - "epoch": 0.22, - "grad_norm": 0.7738233796062852, - "learning_rate": 1.8100805363986996e-05, - "loss": 0.2137, - "step": 4394 - }, - { - "epoch": 0.22, - "grad_norm": 1.1456208241941903, - "learning_rate": 1.809983960575159e-05, - "loss": 0.2198, - "step": 4395 - }, - { - "epoch": 0.22, - "grad_norm": 0.7602743082590431, - "learning_rate": 1.8098873627807576e-05, - "loss": 0.2049, - "step": 4396 - }, - { - "epoch": 0.22, - "grad_norm": 0.8663595195507451, - "learning_rate": 1.8097907430181157e-05, - "loss": 0.2049, - "step": 4397 - }, - { - "epoch": 0.22, - "grad_norm": 0.9999632680595777, - "learning_rate": 1.8096941012898545e-05, - "loss": 0.2197, - "step": 4398 - }, - { - "epoch": 0.22, - "grad_norm": 0.899024685249853, - "learning_rate": 1.8095974375985945e-05, - "loss": 0.2233, - "step": 4399 - }, - { - "epoch": 0.22, - "grad_norm": 0.9689895282474176, - "learning_rate": 1.8095007519469583e-05, - "loss": 0.2149, - "step": 4400 - }, - { - "epoch": 0.22, - "grad_norm": 0.8861221836852705, - "learning_rate": 1.8094040443375692e-05, - "loss": 0.201, - "step": 4401 - }, - { - "epoch": 0.22, - "grad_norm": 1.147376630225734, - "learning_rate": 1.8093073147730492e-05, - "loss": 0.2381, - "step": 4402 - }, - { - "epoch": 0.22, - "grad_norm": 0.9875533678572314, - "learning_rate": 1.8092105632560227e-05, - "loss": 0.2176, - "step": 4403 - }, - { - "epoch": 0.22, - "grad_norm": 1.8599201761775375, - "learning_rate": 1.8091137897891138e-05, - "loss": 0.2246, - "step": 4404 - }, - { - "epoch": 0.22, - "grad_norm": 2.548059073446093, - "learning_rate": 1.8090169943749477e-05, - "loss": 0.2429, - "step": 4405 - }, - { - "epoch": 0.22, - "grad_norm": 1.4045767740844242, - "learning_rate": 1.80892017701615e-05, - "loss": 0.2402, - "step": 4406 - }, - { - "epoch": 0.22, - "grad_norm": 1.0615450153346424, - "learning_rate": 1.808823337715347e-05, - "loss": 0.2311, - "step": 4407 - }, - { - "epoch": 0.22, - "grad_norm": 1.4170629407778401, - "learning_rate": 1.8087264764751645e-05, - "loss": 0.1938, - "step": 4408 - }, - { - "epoch": 0.22, - "grad_norm": 0.9763221746153268, - "learning_rate": 1.8086295932982315e-05, - "loss": 0.2535, - "step": 4409 - }, - { - "epoch": 0.22, - "grad_norm": 0.7656690291051992, - "learning_rate": 1.8085326881871747e-05, - "loss": 0.1918, - "step": 4410 - }, - { - "epoch": 0.22, - "grad_norm": 1.136065388243066, - "learning_rate": 1.8084357611446232e-05, - "loss": 0.2491, - "step": 4411 - }, - { - "epoch": 0.22, - "grad_norm": 1.082286482252834, - "learning_rate": 1.808338812173206e-05, - "loss": 0.1978, - "step": 4412 - }, - { - "epoch": 0.22, - "grad_norm": 1.6479564360605128, - "learning_rate": 1.8082418412755527e-05, - "loss": 0.2176, - "step": 4413 - }, - { - "epoch": 0.22, - "grad_norm": 0.8694686452576879, - "learning_rate": 1.808144848454294e-05, - "loss": 0.2314, - "step": 4414 - }, - { - "epoch": 0.22, - "grad_norm": 0.7858990839349932, - "learning_rate": 1.8080478337120604e-05, - "loss": 0.2163, - "step": 4415 - }, - { - "epoch": 0.22, - "grad_norm": 0.839738455990263, - "learning_rate": 1.807950797051484e-05, - "loss": 0.1958, - "step": 4416 - }, - { - "epoch": 0.22, - "grad_norm": 1.2351394659797974, - "learning_rate": 1.8078537384751968e-05, - "loss": 0.2261, - "step": 4417 - }, - { - "epoch": 0.22, - "grad_norm": 0.9296011925545019, - "learning_rate": 1.8077566579858306e-05, - "loss": 0.2095, - "step": 4418 - }, - { - "epoch": 0.22, - "grad_norm": 0.9096268661088964, - "learning_rate": 1.80765955558602e-05, - "loss": 0.2349, - "step": 4419 - }, - { - "epoch": 0.22, - "grad_norm": 0.9189740778262212, - "learning_rate": 1.8075624312783984e-05, - "loss": 0.2157, - "step": 4420 - }, - { - "epoch": 0.22, - "grad_norm": 0.8732626554297037, - "learning_rate": 1.8074652850656e-05, - "loss": 0.2223, - "step": 4421 - }, - { - "epoch": 0.22, - "grad_norm": 2.5194779373667795, - "learning_rate": 1.80736811695026e-05, - "loss": 0.237, - "step": 4422 - }, - { - "epoch": 0.22, - "grad_norm": 2.011478056498499, - "learning_rate": 1.807270926935014e-05, - "loss": 0.2526, - "step": 4423 - }, - { - "epoch": 0.22, - "grad_norm": 1.072836618698817, - "learning_rate": 1.8071737150224993e-05, - "loss": 0.2207, - "step": 4424 - }, - { - "epoch": 0.23, - "grad_norm": 1.003240262280801, - "learning_rate": 1.8070764812153518e-05, - "loss": 0.2195, - "step": 4425 - }, - { - "epoch": 0.23, - "grad_norm": 0.8553296303325555, - "learning_rate": 1.8069792255162088e-05, - "loss": 0.2143, - "step": 4426 - }, - { - "epoch": 0.23, - "grad_norm": 0.9706401479122935, - "learning_rate": 1.8068819479277087e-05, - "loss": 0.2185, - "step": 4427 - }, - { - "epoch": 0.23, - "grad_norm": 0.8534705006698775, - "learning_rate": 1.8067846484524905e-05, - "loss": 0.1879, - "step": 4428 - }, - { - "epoch": 0.23, - "grad_norm": 0.8578306627076671, - "learning_rate": 1.806687327093193e-05, - "loss": 0.2213, - "step": 4429 - }, - { - "epoch": 0.23, - "grad_norm": 0.9165415623957645, - "learning_rate": 1.806589983852456e-05, - "loss": 0.2171, - "step": 4430 - }, - { - "epoch": 0.23, - "grad_norm": 1.0313737327911443, - "learning_rate": 1.8064926187329205e-05, - "loss": 0.2255, - "step": 4431 - }, - { - "epoch": 0.23, - "grad_norm": 0.9197916913770108, - "learning_rate": 1.8063952317372265e-05, - "loss": 0.1972, - "step": 4432 - }, - { - "epoch": 0.23, - "grad_norm": 1.3479992755805241, - "learning_rate": 1.8062978228680172e-05, - "loss": 0.2628, - "step": 4433 - }, - { - "epoch": 0.23, - "grad_norm": 0.8254972992938581, - "learning_rate": 1.8062003921279335e-05, - "loss": 0.2115, - "step": 4434 - }, - { - "epoch": 0.23, - "grad_norm": 0.8963891068322509, - "learning_rate": 1.8061029395196186e-05, - "loss": 0.2027, - "step": 4435 - }, - { - "epoch": 0.23, - "grad_norm": 0.9369312287584682, - "learning_rate": 1.8060054650457157e-05, - "loss": 0.2128, - "step": 4436 - }, - { - "epoch": 0.23, - "grad_norm": 1.4957509743791233, - "learning_rate": 1.8059079687088695e-05, - "loss": 0.1977, - "step": 4437 - }, - { - "epoch": 0.23, - "grad_norm": 0.8374581910764776, - "learning_rate": 1.805810450511724e-05, - "loss": 0.2171, - "step": 4438 - }, - { - "epoch": 0.23, - "grad_norm": 0.9093962161701271, - "learning_rate": 1.8057129104569246e-05, - "loss": 0.2155, - "step": 4439 - }, - { - "epoch": 0.23, - "grad_norm": 1.0819334654052752, - "learning_rate": 1.8056153485471167e-05, - "loss": 0.1863, - "step": 4440 - }, - { - "epoch": 0.23, - "grad_norm": 0.8524882699421736, - "learning_rate": 1.805517764784947e-05, - "loss": 0.2006, - "step": 4441 - }, - { - "epoch": 0.23, - "grad_norm": 1.1360487230080902, - "learning_rate": 1.8054201591730627e-05, - "loss": 0.2413, - "step": 4442 - }, - { - "epoch": 0.23, - "grad_norm": 0.91748364528214, - "learning_rate": 1.8053225317141106e-05, - "loss": 0.1981, - "step": 4443 - }, - { - "epoch": 0.23, - "grad_norm": 0.9272715829052295, - "learning_rate": 1.80522488241074e-05, - "loss": 0.2271, - "step": 4444 - }, - { - "epoch": 0.23, - "grad_norm": 0.8682592457592926, - "learning_rate": 1.8051272112655983e-05, - "loss": 0.2152, - "step": 4445 - }, - { - "epoch": 0.23, - "grad_norm": 0.9262847244346153, - "learning_rate": 1.805029518281336e-05, - "loss": 0.2093, - "step": 4446 - }, - { - "epoch": 0.23, - "grad_norm": 0.9188337809176588, - "learning_rate": 1.8049318034606025e-05, - "loss": 0.2187, - "step": 4447 - }, - { - "epoch": 0.23, - "grad_norm": 1.2028573072889497, - "learning_rate": 1.8048340668060483e-05, - "loss": 0.2444, - "step": 4448 - }, - { - "epoch": 0.23, - "grad_norm": 0.8717286742127507, - "learning_rate": 1.8047363083203245e-05, - "loss": 0.2009, - "step": 4449 - }, - { - "epoch": 0.23, - "grad_norm": 0.8561068852096136, - "learning_rate": 1.8046385280060832e-05, - "loss": 0.2097, - "step": 4450 - }, - { - "epoch": 0.23, - "grad_norm": 1.0878027107170953, - "learning_rate": 1.8045407258659763e-05, - "loss": 0.2059, - "step": 4451 - }, - { - "epoch": 0.23, - "grad_norm": 1.0482682779855905, - "learning_rate": 1.8044429019026565e-05, - "loss": 0.2468, - "step": 4452 - }, - { - "epoch": 0.23, - "grad_norm": 1.1067602177028681, - "learning_rate": 1.8043450561187775e-05, - "loss": 0.2366, - "step": 4453 - }, - { - "epoch": 0.23, - "grad_norm": 0.9203132473066146, - "learning_rate": 1.8042471885169934e-05, - "loss": 0.2343, - "step": 4454 - }, - { - "epoch": 0.23, - "grad_norm": 0.9013155181915874, - "learning_rate": 1.8041492990999595e-05, - "loss": 0.2291, - "step": 4455 - }, - { - "epoch": 0.23, - "grad_norm": 1.002047390111081, - "learning_rate": 1.8040513878703296e-05, - "loss": 0.2341, - "step": 4456 - }, - { - "epoch": 0.23, - "grad_norm": 0.7729887234708337, - "learning_rate": 1.803953454830761e-05, - "loss": 0.2187, - "step": 4457 - }, - { - "epoch": 0.23, - "grad_norm": 0.8295487988881244, - "learning_rate": 1.803855499983909e-05, - "loss": 0.2292, - "step": 4458 - }, - { - "epoch": 0.23, - "grad_norm": 0.9514396998810606, - "learning_rate": 1.803757523332432e-05, - "loss": 0.2199, - "step": 4459 - }, - { - "epoch": 0.23, - "grad_norm": 0.8581480115536537, - "learning_rate": 1.8036595248789862e-05, - "loss": 0.2185, - "step": 4460 - }, - { - "epoch": 0.23, - "grad_norm": 0.9062317373994906, - "learning_rate": 1.8035615046262305e-05, - "loss": 0.2406, - "step": 4461 - }, - { - "epoch": 0.23, - "grad_norm": 0.7518853352440985, - "learning_rate": 1.8034634625768234e-05, - "loss": 0.2094, - "step": 4462 - }, - { - "epoch": 0.23, - "grad_norm": 0.7640860466551985, - "learning_rate": 1.803365398733425e-05, - "loss": 0.2243, - "step": 4463 - }, - { - "epoch": 0.23, - "grad_norm": 1.0101277920294256, - "learning_rate": 1.803267313098694e-05, - "loss": 0.237, - "step": 4464 - }, - { - "epoch": 0.23, - "grad_norm": 1.1549794199372658, - "learning_rate": 1.8031692056752926e-05, - "loss": 0.207, - "step": 4465 - }, - { - "epoch": 0.23, - "grad_norm": 1.0995658668374693, - "learning_rate": 1.8030710764658804e-05, - "loss": 0.2352, - "step": 4466 - }, - { - "epoch": 0.23, - "grad_norm": 0.9735906734273259, - "learning_rate": 1.8029729254731204e-05, - "loss": 0.1905, - "step": 4467 - }, - { - "epoch": 0.23, - "grad_norm": 0.8437175055191298, - "learning_rate": 1.8028747526996745e-05, - "loss": 0.2186, - "step": 4468 - }, - { - "epoch": 0.23, - "grad_norm": 0.7296734364054673, - "learning_rate": 1.802776558148205e-05, - "loss": 0.2166, - "step": 4469 - }, - { - "epoch": 0.23, - "grad_norm": 1.047129361115201, - "learning_rate": 1.8026783418213768e-05, - "loss": 0.2186, - "step": 4470 - }, - { - "epoch": 0.23, - "grad_norm": 0.8087153659486408, - "learning_rate": 1.8025801037218528e-05, - "loss": 0.22, - "step": 4471 - }, - { - "epoch": 0.23, - "grad_norm": 0.8948010111876558, - "learning_rate": 1.8024818438522984e-05, - "loss": 0.2275, - "step": 4472 - }, - { - "epoch": 0.23, - "grad_norm": 2.4481573277116695, - "learning_rate": 1.802383562215378e-05, - "loss": 0.2121, - "step": 4473 - }, - { - "epoch": 0.23, - "grad_norm": 0.8894301960584611, - "learning_rate": 1.802285258813759e-05, - "loss": 0.2199, - "step": 4474 - }, - { - "epoch": 0.23, - "grad_norm": 0.8852898290343183, - "learning_rate": 1.8021869336501066e-05, - "loss": 0.2371, - "step": 4475 - }, - { - "epoch": 0.23, - "grad_norm": 0.861758973541769, - "learning_rate": 1.8020885867270883e-05, - "loss": 0.2244, - "step": 4476 - }, - { - "epoch": 0.23, - "grad_norm": 0.8802216161853176, - "learning_rate": 1.8019902180473717e-05, - "loss": 0.2146, - "step": 4477 - }, - { - "epoch": 0.23, - "grad_norm": 0.7756772290225344, - "learning_rate": 1.8018918276136255e-05, - "loss": 0.1897, - "step": 4478 - }, - { - "epoch": 0.23, - "grad_norm": 0.7696394101610075, - "learning_rate": 1.801793415428518e-05, - "loss": 0.2345, - "step": 4479 - }, - { - "epoch": 0.23, - "grad_norm": 0.9654896233324601, - "learning_rate": 1.8016949814947188e-05, - "loss": 0.2156, - "step": 4480 - }, - { - "epoch": 0.23, - "grad_norm": 0.9399179508195146, - "learning_rate": 1.801596525814898e-05, - "loss": 0.2044, - "step": 4481 - }, - { - "epoch": 0.23, - "grad_norm": 0.8600710403521159, - "learning_rate": 1.8014980483917263e-05, - "loss": 0.2086, - "step": 4482 - }, - { - "epoch": 0.23, - "grad_norm": 1.4090094998911113, - "learning_rate": 1.8013995492278744e-05, - "loss": 0.1984, - "step": 4483 - }, - { - "epoch": 0.23, - "grad_norm": 0.7783023424568493, - "learning_rate": 1.8013010283260146e-05, - "loss": 0.2126, - "step": 4484 - }, - { - "epoch": 0.23, - "grad_norm": 0.9956937185992838, - "learning_rate": 1.801202485688819e-05, - "loss": 0.1867, - "step": 4485 - }, - { - "epoch": 0.23, - "grad_norm": 0.9183789056572046, - "learning_rate": 1.801103921318961e-05, - "loss": 0.205, - "step": 4486 - }, - { - "epoch": 0.23, - "grad_norm": 0.9357084137452508, - "learning_rate": 1.801005335219114e-05, - "loss": 0.2161, - "step": 4487 - }, - { - "epoch": 0.23, - "grad_norm": 0.8767707480218008, - "learning_rate": 1.8009067273919518e-05, - "loss": 0.2093, - "step": 4488 - }, - { - "epoch": 0.23, - "grad_norm": 0.8962239859524315, - "learning_rate": 1.8008080978401493e-05, - "loss": 0.2267, - "step": 4489 - }, - { - "epoch": 0.23, - "grad_norm": 0.719171880216484, - "learning_rate": 1.8007094465663823e-05, - "loss": 0.2018, - "step": 4490 - }, - { - "epoch": 0.23, - "grad_norm": 1.2807368660385212, - "learning_rate": 1.8006107735733258e-05, - "loss": 0.2312, - "step": 4491 - }, - { - "epoch": 0.23, - "grad_norm": 1.0189680130523846, - "learning_rate": 1.8005120788636574e-05, - "loss": 0.2144, - "step": 4492 - }, - { - "epoch": 0.23, - "grad_norm": 0.8109524372881051, - "learning_rate": 1.8004133624400535e-05, - "loss": 0.1963, - "step": 4493 - }, - { - "epoch": 0.23, - "grad_norm": 0.7260968412969371, - "learning_rate": 1.8003146243051916e-05, - "loss": 0.2186, - "step": 4494 - }, - { - "epoch": 0.23, - "grad_norm": 0.8667090314175855, - "learning_rate": 1.8002158644617508e-05, - "loss": 0.1815, - "step": 4495 - }, - { - "epoch": 0.23, - "grad_norm": 0.8716288269195112, - "learning_rate": 1.800117082912409e-05, - "loss": 0.2088, - "step": 4496 - }, - { - "epoch": 0.23, - "grad_norm": 0.9035607795569606, - "learning_rate": 1.8000182796598466e-05, - "loss": 0.2078, - "step": 4497 - }, - { - "epoch": 0.23, - "grad_norm": 1.0125622963855248, - "learning_rate": 1.7999194547067426e-05, - "loss": 0.2469, - "step": 4498 - }, - { - "epoch": 0.23, - "grad_norm": 1.291516448269081, - "learning_rate": 1.7998206080557786e-05, - "loss": 0.2066, - "step": 4499 - }, - { - "epoch": 0.23, - "grad_norm": 1.3954274607458725, - "learning_rate": 1.7997217397096355e-05, - "loss": 0.182, - "step": 4500 - }, - { - "epoch": 0.23, - "grad_norm": 1.0939380315378482, - "learning_rate": 1.7996228496709948e-05, - "loss": 0.2223, - "step": 4501 - }, - { - "epoch": 0.23, - "grad_norm": 1.1328049643732674, - "learning_rate": 1.7995239379425393e-05, - "loss": 0.2029, - "step": 4502 - }, - { - "epoch": 0.23, - "grad_norm": 0.8570053201008193, - "learning_rate": 1.7994250045269518e-05, - "loss": 0.2048, - "step": 4503 - }, - { - "epoch": 0.23, - "grad_norm": 1.4699377915752037, - "learning_rate": 1.799326049426916e-05, - "loss": 0.2187, - "step": 4504 - }, - { - "epoch": 0.23, - "grad_norm": 0.8427132865080169, - "learning_rate": 1.7992270726451158e-05, - "loss": 0.2206, - "step": 4505 - }, - { - "epoch": 0.23, - "grad_norm": 1.0109750267372164, - "learning_rate": 1.7991280741842362e-05, - "loss": 0.2087, - "step": 4506 - }, - { - "epoch": 0.23, - "grad_norm": 0.8486429289414997, - "learning_rate": 1.799029054046962e-05, - "loss": 0.2015, - "step": 4507 - }, - { - "epoch": 0.23, - "grad_norm": 0.8440700530295214, - "learning_rate": 1.7989300122359802e-05, - "loss": 0.1891, - "step": 4508 - }, - { - "epoch": 0.23, - "grad_norm": 1.3246815330558233, - "learning_rate": 1.7988309487539767e-05, - "loss": 0.2286, - "step": 4509 - }, - { - "epoch": 0.23, - "grad_norm": 1.0044511865123302, - "learning_rate": 1.7987318636036382e-05, - "loss": 0.2147, - "step": 4510 - }, - { - "epoch": 0.23, - "grad_norm": 0.9616052245640965, - "learning_rate": 1.798632756787653e-05, - "loss": 0.2006, - "step": 4511 - }, - { - "epoch": 0.23, - "grad_norm": 1.007859581807647, - "learning_rate": 1.7985336283087092e-05, - "loss": 0.2149, - "step": 4512 - }, - { - "epoch": 0.23, - "grad_norm": 0.9269182161077623, - "learning_rate": 1.7984344781694957e-05, - "loss": 0.2327, - "step": 4513 - }, - { - "epoch": 0.23, - "grad_norm": 1.1873968613561616, - "learning_rate": 1.7983353063727014e-05, - "loss": 0.1958, - "step": 4514 - }, - { - "epoch": 0.23, - "grad_norm": 0.7675397374303244, - "learning_rate": 1.7982361129210172e-05, - "loss": 0.2071, - "step": 4515 - }, - { - "epoch": 0.23, - "grad_norm": 0.8842111254876002, - "learning_rate": 1.7981368978171336e-05, - "loss": 0.2177, - "step": 4516 - }, - { - "epoch": 0.23, - "grad_norm": 0.9063416103174554, - "learning_rate": 1.798037661063741e-05, - "loss": 0.2146, - "step": 4517 - }, - { - "epoch": 0.23, - "grad_norm": 0.8730461639506438, - "learning_rate": 1.7979384026635323e-05, - "loss": 0.2323, - "step": 4518 - }, - { - "epoch": 0.23, - "grad_norm": 0.9719249466422303, - "learning_rate": 1.7978391226191993e-05, - "loss": 0.2191, - "step": 4519 - }, - { - "epoch": 0.23, - "grad_norm": 1.098487152706154, - "learning_rate": 1.7977398209334347e-05, - "loss": 0.2052, - "step": 4520 - }, - { - "epoch": 0.23, - "grad_norm": 0.7818334103477671, - "learning_rate": 1.7976404976089327e-05, - "loss": 0.2216, - "step": 4521 - }, - { - "epoch": 0.23, - "grad_norm": 1.3452405241980792, - "learning_rate": 1.797541152648387e-05, - "loss": 0.2454, - "step": 4522 - }, - { - "epoch": 0.23, - "grad_norm": 0.9202422091727288, - "learning_rate": 1.7974417860544924e-05, - "loss": 0.2089, - "step": 4523 - }, - { - "epoch": 0.23, - "grad_norm": 1.0015645377017552, - "learning_rate": 1.797342397829944e-05, - "loss": 0.222, - "step": 4524 - }, - { - "epoch": 0.23, - "grad_norm": 0.944338652787783, - "learning_rate": 1.797242987977439e-05, - "loss": 0.2476, - "step": 4525 - }, - { - "epoch": 0.23, - "grad_norm": 0.887805161891117, - "learning_rate": 1.7971435564996723e-05, - "loss": 0.2149, - "step": 4526 - }, - { - "epoch": 0.23, - "grad_norm": 1.463548393298575, - "learning_rate": 1.7970441033993416e-05, - "loss": 0.2617, - "step": 4527 - }, - { - "epoch": 0.23, - "grad_norm": 1.064025199962864, - "learning_rate": 1.7969446286791448e-05, - "loss": 0.2037, - "step": 4528 - }, - { - "epoch": 0.23, - "grad_norm": 0.7289751041322153, - "learning_rate": 1.7968451323417798e-05, - "loss": 0.1879, - "step": 4529 - }, - { - "epoch": 0.23, - "grad_norm": 1.1059318897182686, - "learning_rate": 1.7967456143899453e-05, - "loss": 0.2267, - "step": 4530 - }, - { - "epoch": 0.23, - "grad_norm": 0.8711198948794134, - "learning_rate": 1.7966460748263412e-05, - "loss": 0.2093, - "step": 4531 - }, - { - "epoch": 0.23, - "grad_norm": 0.8817964338927127, - "learning_rate": 1.7965465136536677e-05, - "loss": 0.2207, - "step": 4532 - }, - { - "epoch": 0.23, - "grad_norm": 1.087682593067581, - "learning_rate": 1.7964469308746246e-05, - "loss": 0.2616, - "step": 4533 - }, - { - "epoch": 0.23, - "grad_norm": 0.9860911239425852, - "learning_rate": 1.7963473264919137e-05, - "loss": 0.2439, - "step": 4534 - }, - { - "epoch": 0.23, - "grad_norm": 0.8686097317747554, - "learning_rate": 1.7962477005082367e-05, - "loss": 0.2096, - "step": 4535 - }, - { - "epoch": 0.23, - "grad_norm": 0.851414230084223, - "learning_rate": 1.7961480529262956e-05, - "loss": 0.232, - "step": 4536 - }, - { - "epoch": 0.23, - "grad_norm": 0.9712965457220927, - "learning_rate": 1.7960483837487935e-05, - "loss": 0.2178, - "step": 4537 - }, - { - "epoch": 0.23, - "grad_norm": 0.7603316153751536, - "learning_rate": 1.7959486929784344e-05, - "loss": 0.2008, - "step": 4538 - }, - { - "epoch": 0.23, - "grad_norm": 1.305590804409271, - "learning_rate": 1.7958489806179214e-05, - "loss": 0.2322, - "step": 4539 - }, - { - "epoch": 0.23, - "grad_norm": 1.5399175125976559, - "learning_rate": 1.7957492466699606e-05, - "loss": 0.2103, - "step": 4540 - }, - { - "epoch": 0.23, - "grad_norm": 2.4627194244171826, - "learning_rate": 1.7956494911372564e-05, - "loss": 0.2199, - "step": 4541 - }, - { - "epoch": 0.23, - "grad_norm": 0.8683388348736799, - "learning_rate": 1.7955497140225145e-05, - "loss": 0.204, - "step": 4542 - }, - { - "epoch": 0.23, - "grad_norm": 0.9920027394506458, - "learning_rate": 1.7954499153284418e-05, - "loss": 0.214, - "step": 4543 - }, - { - "epoch": 0.23, - "grad_norm": 0.859917912663988, - "learning_rate": 1.7953500950577453e-05, - "loss": 0.1982, - "step": 4544 - }, - { - "epoch": 0.23, - "grad_norm": 1.0837324969574404, - "learning_rate": 1.7952502532131326e-05, - "loss": 0.2313, - "step": 4545 - }, - { - "epoch": 0.23, - "grad_norm": 0.8575927627504152, - "learning_rate": 1.795150389797312e-05, - "loss": 0.1942, - "step": 4546 - }, - { - "epoch": 0.23, - "grad_norm": 1.0625777986683949, - "learning_rate": 1.7950505048129917e-05, - "loss": 0.2335, - "step": 4547 - }, - { - "epoch": 0.23, - "grad_norm": 1.0713708397729715, - "learning_rate": 1.794950598262882e-05, - "loss": 0.2093, - "step": 4548 - }, - { - "epoch": 0.23, - "grad_norm": 1.229848965244706, - "learning_rate": 1.794850670149692e-05, - "loss": 0.2176, - "step": 4549 - }, - { - "epoch": 0.23, - "grad_norm": 1.1091741040774479, - "learning_rate": 1.794750720476133e-05, - "loss": 0.231, - "step": 4550 - }, - { - "epoch": 0.23, - "grad_norm": 1.2156486310229675, - "learning_rate": 1.7946507492449158e-05, - "loss": 0.2472, - "step": 4551 - }, - { - "epoch": 0.23, - "grad_norm": 1.1075155329929134, - "learning_rate": 1.794550756458752e-05, - "loss": 0.2283, - "step": 4552 - }, - { - "epoch": 0.23, - "grad_norm": 1.3998141024285211, - "learning_rate": 1.794450742120354e-05, - "loss": 0.2225, - "step": 4553 - }, - { - "epoch": 0.23, - "grad_norm": 1.011647270613291, - "learning_rate": 1.794350706232435e-05, - "loss": 0.2363, - "step": 4554 - }, - { - "epoch": 0.23, - "grad_norm": 0.9205082012421598, - "learning_rate": 1.794250648797708e-05, - "loss": 0.1958, - "step": 4555 - }, - { - "epoch": 0.23, - "grad_norm": 0.9069710312413244, - "learning_rate": 1.7941505698188873e-05, - "loss": 0.2098, - "step": 4556 - }, - { - "epoch": 0.23, - "grad_norm": 1.4467541990079829, - "learning_rate": 1.7940504692986876e-05, - "loss": 0.2196, - "step": 4557 - }, - { - "epoch": 0.23, - "grad_norm": 0.993067278432018, - "learning_rate": 1.793950347239824e-05, - "loss": 0.2426, - "step": 4558 - }, - { - "epoch": 0.23, - "grad_norm": 0.8076871561454932, - "learning_rate": 1.7938502036450128e-05, - "loss": 0.2241, - "step": 4559 - }, - { - "epoch": 0.23, - "grad_norm": 1.2539845527913558, - "learning_rate": 1.7937500385169695e-05, - "loss": 0.2151, - "step": 4560 - }, - { - "epoch": 0.23, - "grad_norm": 0.8088486414006637, - "learning_rate": 1.793649851858412e-05, - "loss": 0.2041, - "step": 4561 - }, - { - "epoch": 0.23, - "grad_norm": 1.186444873543506, - "learning_rate": 1.793549643672057e-05, - "loss": 0.2007, - "step": 4562 - }, - { - "epoch": 0.23, - "grad_norm": 1.041431065850028, - "learning_rate": 1.793449413960623e-05, - "loss": 0.2139, - "step": 4563 - }, - { - "epoch": 0.23, - "grad_norm": 1.5800775392251105, - "learning_rate": 1.7933491627268297e-05, - "loss": 0.2198, - "step": 4564 - }, - { - "epoch": 0.23, - "grad_norm": 0.9856604566007479, - "learning_rate": 1.793248889973395e-05, - "loss": 0.2261, - "step": 4565 - }, - { - "epoch": 0.23, - "grad_norm": 1.2768031361915468, - "learning_rate": 1.793148595703039e-05, - "loss": 0.211, - "step": 4566 - }, - { - "epoch": 0.23, - "grad_norm": 1.2841177512879824, - "learning_rate": 1.793048279918483e-05, - "loss": 0.1976, - "step": 4567 - }, - { - "epoch": 0.23, - "grad_norm": 0.9419078723736817, - "learning_rate": 1.7929479426224473e-05, - "loss": 0.2049, - "step": 4568 - }, - { - "epoch": 0.23, - "grad_norm": 1.17988154802931, - "learning_rate": 1.792847583817654e-05, - "loss": 0.204, - "step": 4569 - }, - { - "epoch": 0.23, - "grad_norm": 1.230600233347415, - "learning_rate": 1.7927472035068252e-05, - "loss": 0.2062, - "step": 4570 - }, - { - "epoch": 0.23, - "grad_norm": 0.9443095266324063, - "learning_rate": 1.792646801692684e-05, - "loss": 0.2091, - "step": 4571 - }, - { - "epoch": 0.23, - "grad_norm": 0.8913426583688561, - "learning_rate": 1.7925463783779534e-05, - "loss": 0.2403, - "step": 4572 - }, - { - "epoch": 0.23, - "grad_norm": 1.1466558580416424, - "learning_rate": 1.7924459335653575e-05, - "loss": 0.1967, - "step": 4573 - }, - { - "epoch": 0.23, - "grad_norm": 0.8535354994960708, - "learning_rate": 1.7923454672576208e-05, - "loss": 0.2254, - "step": 4574 - }, - { - "epoch": 0.23, - "grad_norm": 0.9666574182720151, - "learning_rate": 1.7922449794574686e-05, - "loss": 0.2471, - "step": 4575 - }, - { - "epoch": 0.23, - "grad_norm": 1.0837227290752445, - "learning_rate": 1.7921444701676267e-05, - "loss": 0.2148, - "step": 4576 - }, - { - "epoch": 0.23, - "grad_norm": 2.0686405368447454, - "learning_rate": 1.7920439393908212e-05, - "loss": 0.2152, - "step": 4577 - }, - { - "epoch": 0.23, - "grad_norm": 0.9850592832279286, - "learning_rate": 1.791943387129779e-05, - "loss": 0.2447, - "step": 4578 - }, - { - "epoch": 0.23, - "grad_norm": 1.155939191794011, - "learning_rate": 1.7918428133872278e-05, - "loss": 0.2238, - "step": 4579 - }, - { - "epoch": 0.23, - "grad_norm": 1.1574756641823711, - "learning_rate": 1.7917422181658954e-05, - "loss": 0.2134, - "step": 4580 - }, - { - "epoch": 0.23, - "grad_norm": 1.2070656126027186, - "learning_rate": 1.791641601468511e-05, - "loss": 0.1976, - "step": 4581 - }, - { - "epoch": 0.23, - "grad_norm": 2.436803255730702, - "learning_rate": 1.791540963297803e-05, - "loss": 0.2095, - "step": 4582 - }, - { - "epoch": 0.23, - "grad_norm": 0.9244624436617368, - "learning_rate": 1.791440303656502e-05, - "loss": 0.226, - "step": 4583 - }, - { - "epoch": 0.23, - "grad_norm": 1.9845022785819262, - "learning_rate": 1.791339622547338e-05, - "loss": 0.2091, - "step": 4584 - }, - { - "epoch": 0.23, - "grad_norm": 2.5238633874008007, - "learning_rate": 1.791238919973042e-05, - "loss": 0.2155, - "step": 4585 - }, - { - "epoch": 0.23, - "grad_norm": 1.213475206616372, - "learning_rate": 1.7911381959363456e-05, - "loss": 0.2218, - "step": 4586 - }, - { - "epoch": 0.23, - "grad_norm": 0.8457478367488127, - "learning_rate": 1.7910374504399812e-05, - "loss": 0.2093, - "step": 4587 - }, - { - "epoch": 0.23, - "grad_norm": 0.9261967112688656, - "learning_rate": 1.7909366834866805e-05, - "loss": 0.2165, - "step": 4588 - }, - { - "epoch": 0.23, - "grad_norm": 1.1208944218946013, - "learning_rate": 1.7908358950791784e-05, - "loss": 0.2366, - "step": 4589 - }, - { - "epoch": 0.23, - "grad_norm": 1.0587552863356118, - "learning_rate": 1.7907350852202078e-05, - "loss": 0.1997, - "step": 4590 - }, - { - "epoch": 0.23, - "grad_norm": 1.7929672703579542, - "learning_rate": 1.7906342539125032e-05, - "loss": 0.2123, - "step": 4591 - }, - { - "epoch": 0.23, - "grad_norm": 1.0807088767456519, - "learning_rate": 1.7905334011588e-05, - "loss": 0.214, - "step": 4592 - }, - { - "epoch": 0.23, - "grad_norm": 1.1805389793523693, - "learning_rate": 1.7904325269618335e-05, - "loss": 0.251, - "step": 4593 - }, - { - "epoch": 0.23, - "grad_norm": 1.530345554012429, - "learning_rate": 1.7903316313243398e-05, - "loss": 0.2266, - "step": 4594 - }, - { - "epoch": 0.23, - "grad_norm": 1.2375683235074402, - "learning_rate": 1.790230714249056e-05, - "loss": 0.2357, - "step": 4595 - }, - { - "epoch": 0.23, - "grad_norm": 1.2876949405743958, - "learning_rate": 1.7901297757387198e-05, - "loss": 0.2271, - "step": 4596 - }, - { - "epoch": 0.23, - "grad_norm": 1.0113586380944988, - "learning_rate": 1.7900288157960687e-05, - "loss": 0.2156, - "step": 4597 - }, - { - "epoch": 0.23, - "grad_norm": 0.9330662813880115, - "learning_rate": 1.7899278344238414e-05, - "loss": 0.2374, - "step": 4598 - }, - { - "epoch": 0.23, - "grad_norm": 1.393123277375178, - "learning_rate": 1.7898268316247767e-05, - "loss": 0.2137, - "step": 4599 - }, - { - "epoch": 0.23, - "grad_norm": 0.9077590609258267, - "learning_rate": 1.7897258074016152e-05, - "loss": 0.2313, - "step": 4600 - }, - { - "epoch": 0.23, - "grad_norm": 1.2412729292962352, - "learning_rate": 1.789624761757096e-05, - "loss": 0.2221, - "step": 4601 - }, - { - "epoch": 0.23, - "grad_norm": 1.0000254167505898, - "learning_rate": 1.7895236946939605e-05, - "loss": 0.2125, - "step": 4602 - }, - { - "epoch": 0.23, - "grad_norm": 1.1119944212749695, - "learning_rate": 1.7894226062149504e-05, - "loss": 0.1924, - "step": 4603 - }, - { - "epoch": 0.23, - "grad_norm": 0.9252582068282856, - "learning_rate": 1.7893214963228075e-05, - "loss": 0.2079, - "step": 4604 - }, - { - "epoch": 0.23, - "grad_norm": 1.6891110956771893, - "learning_rate": 1.7892203650202747e-05, - "loss": 0.2346, - "step": 4605 - }, - { - "epoch": 0.23, - "grad_norm": 1.1950376349533391, - "learning_rate": 1.7891192123100945e-05, - "loss": 0.2271, - "step": 4606 - }, - { - "epoch": 0.23, - "grad_norm": 1.437835315691099, - "learning_rate": 1.7890180381950113e-05, - "loss": 0.2208, - "step": 4607 - }, - { - "epoch": 0.23, - "grad_norm": 1.3801696979354348, - "learning_rate": 1.7889168426777693e-05, - "loss": 0.2373, - "step": 4608 - }, - { - "epoch": 0.23, - "grad_norm": 1.109176689844192, - "learning_rate": 1.7888156257611134e-05, - "loss": 0.1968, - "step": 4609 - }, - { - "epoch": 0.23, - "grad_norm": 1.687963492958192, - "learning_rate": 1.7887143874477887e-05, - "loss": 0.2459, - "step": 4610 - }, - { - "epoch": 0.23, - "grad_norm": 1.7422820151768772, - "learning_rate": 1.788613127740542e-05, - "loss": 0.2015, - "step": 4611 - }, - { - "epoch": 0.23, - "grad_norm": 1.7380741907169548, - "learning_rate": 1.7885118466421198e-05, - "loss": 0.215, - "step": 4612 - }, - { - "epoch": 0.23, - "grad_norm": 1.904065245765066, - "learning_rate": 1.788410544155269e-05, - "loss": 0.2017, - "step": 4613 - }, - { - "epoch": 0.23, - "grad_norm": 1.3133917809519462, - "learning_rate": 1.788309220282738e-05, - "loss": 0.1889, - "step": 4614 - }, - { - "epoch": 0.23, - "grad_norm": 1.5825665488429745, - "learning_rate": 1.788207875027274e-05, - "loss": 0.2168, - "step": 4615 - }, - { - "epoch": 0.23, - "grad_norm": 1.4843139736052093, - "learning_rate": 1.788106508391628e-05, - "loss": 0.2074, - "step": 4616 - }, - { - "epoch": 0.23, - "grad_norm": 1.7859163752239253, - "learning_rate": 1.788005120378548e-05, - "loss": 0.2277, - "step": 4617 - }, - { - "epoch": 0.23, - "grad_norm": 1.1062328359599314, - "learning_rate": 1.787903710990784e-05, - "loss": 0.2155, - "step": 4618 - }, - { - "epoch": 0.23, - "grad_norm": 1.509252746197654, - "learning_rate": 1.7878022802310882e-05, - "loss": 0.1734, - "step": 4619 - }, - { - "epoch": 0.23, - "grad_norm": 1.3313165535638438, - "learning_rate": 1.7877008281022107e-05, - "loss": 0.2105, - "step": 4620 - }, - { - "epoch": 0.23, - "grad_norm": 1.8325763249283111, - "learning_rate": 1.7875993546069036e-05, - "loss": 0.1965, - "step": 4621 - }, - { - "epoch": 0.24, - "grad_norm": 1.154417787978655, - "learning_rate": 1.7874978597479196e-05, - "loss": 0.2303, - "step": 4622 - }, - { - "epoch": 0.24, - "grad_norm": 1.319512505731216, - "learning_rate": 1.7873963435280122e-05, - "loss": 0.2245, - "step": 4623 - }, - { - "epoch": 0.24, - "grad_norm": 1.9450188451477706, - "learning_rate": 1.787294805949934e-05, - "loss": 0.2398, - "step": 4624 - }, - { - "epoch": 0.24, - "grad_norm": 1.4580963839574002, - "learning_rate": 1.7871932470164396e-05, - "loss": 0.2106, - "step": 4625 - }, - { - "epoch": 0.24, - "grad_norm": 1.0446207440063051, - "learning_rate": 1.787091666730284e-05, - "loss": 0.2261, - "step": 4626 - }, - { - "epoch": 0.24, - "grad_norm": 1.675123317131691, - "learning_rate": 1.7869900650942228e-05, - "loss": 0.1998, - "step": 4627 - }, - { - "epoch": 0.24, - "grad_norm": 1.4430054449111767, - "learning_rate": 1.7868884421110115e-05, - "loss": 0.2103, - "step": 4628 - }, - { - "epoch": 0.24, - "grad_norm": 0.9541529011227019, - "learning_rate": 1.7867867977834067e-05, - "loss": 0.2249, - "step": 4629 - }, - { - "epoch": 0.24, - "grad_norm": 1.3281638875589192, - "learning_rate": 1.7866851321141655e-05, - "loss": 0.2276, - "step": 4630 - }, - { - "epoch": 0.24, - "grad_norm": 1.151641575951757, - "learning_rate": 1.7865834451060458e-05, - "loss": 0.2107, - "step": 4631 - }, - { - "epoch": 0.24, - "grad_norm": 1.2337527355105584, - "learning_rate": 1.7864817367618058e-05, - "loss": 0.2081, - "step": 4632 - }, - { - "epoch": 0.24, - "grad_norm": 1.5264640015634976, - "learning_rate": 1.7863800070842038e-05, - "loss": 0.1938, - "step": 4633 - }, - { - "epoch": 0.24, - "grad_norm": 1.1466772922197281, - "learning_rate": 1.7862782560760004e-05, - "loss": 0.2127, - "step": 4634 - }, - { - "epoch": 0.24, - "grad_norm": 11.273989830855673, - "learning_rate": 1.7861764837399544e-05, - "loss": 0.1881, - "step": 4635 - }, - { - "epoch": 0.24, - "grad_norm": 1.3201828598781473, - "learning_rate": 1.786074690078827e-05, - "loss": 0.2431, - "step": 4636 - }, - { - "epoch": 0.24, - "grad_norm": 1.5748727581569109, - "learning_rate": 1.785972875095379e-05, - "loss": 0.1946, - "step": 4637 - }, - { - "epoch": 0.24, - "grad_norm": 1.450493532647831, - "learning_rate": 1.785871038792373e-05, - "loss": 0.1972, - "step": 4638 - }, - { - "epoch": 0.24, - "grad_norm": 1.2015481182386216, - "learning_rate": 1.7857691811725702e-05, - "loss": 0.2096, - "step": 4639 - }, - { - "epoch": 0.24, - "grad_norm": 2.25848049603337, - "learning_rate": 1.785667302238734e-05, - "loss": 0.2265, - "step": 4640 - }, - { - "epoch": 0.24, - "grad_norm": 4.349525557841808, - "learning_rate": 1.785565401993628e-05, - "loss": 0.2234, - "step": 4641 - }, - { - "epoch": 0.24, - "grad_norm": 1.8130057352094755, - "learning_rate": 1.785463480440016e-05, - "loss": 0.2088, - "step": 4642 - }, - { - "epoch": 0.24, - "grad_norm": 1.8040832321063347, - "learning_rate": 1.7853615375806627e-05, - "loss": 0.2371, - "step": 4643 - }, - { - "epoch": 0.24, - "grad_norm": 1.4228604220338537, - "learning_rate": 1.7852595734183333e-05, - "loss": 0.216, - "step": 4644 - }, - { - "epoch": 0.24, - "grad_norm": 2.310556142843705, - "learning_rate": 1.7851575879557937e-05, - "loss": 0.2051, - "step": 4645 - }, - { - "epoch": 0.24, - "grad_norm": 1.7051940112440398, - "learning_rate": 1.78505558119581e-05, - "loss": 0.1949, - "step": 4646 - }, - { - "epoch": 0.24, - "grad_norm": 1.366779457579173, - "learning_rate": 1.7849535531411498e-05, - "loss": 0.1994, - "step": 4647 - }, - { - "epoch": 0.24, - "grad_norm": 10.223823201496756, - "learning_rate": 1.7848515037945797e-05, - "loss": 0.2092, - "step": 4648 - }, - { - "epoch": 0.24, - "grad_norm": 1.9146218576980645, - "learning_rate": 1.784749433158868e-05, - "loss": 0.2279, - "step": 4649 - }, - { - "epoch": 0.24, - "grad_norm": 1.8574756201466331, - "learning_rate": 1.7846473412367845e-05, - "loss": 0.2066, - "step": 4650 - }, - { - "epoch": 0.24, - "grad_norm": 5.236915808421173, - "learning_rate": 1.7845452280310967e-05, - "loss": 0.217, - "step": 4651 - }, - { - "epoch": 0.24, - "grad_norm": 1.721888692196491, - "learning_rate": 1.784443093544576e-05, - "loss": 0.2415, - "step": 4652 - }, - { - "epoch": 0.24, - "grad_norm": 1.487684047623923, - "learning_rate": 1.7843409377799914e-05, - "loss": 0.2167, - "step": 4653 - }, - { - "epoch": 0.24, - "grad_norm": 1.7412624169564745, - "learning_rate": 1.7842387607401148e-05, - "loss": 0.2083, - "step": 4654 - }, - { - "epoch": 0.24, - "grad_norm": 1.40719210246589, - "learning_rate": 1.7841365624277176e-05, - "loss": 0.2482, - "step": 4655 - }, - { - "epoch": 0.24, - "grad_norm": 1.529733771727649, - "learning_rate": 1.7840343428455716e-05, - "loss": 0.2119, - "step": 4656 - }, - { - "epoch": 0.24, - "grad_norm": 1.7667059024593066, - "learning_rate": 1.78393210199645e-05, - "loss": 0.2074, - "step": 4657 - }, - { - "epoch": 0.24, - "grad_norm": 1.1288614053658006, - "learning_rate": 1.7838298398831263e-05, - "loss": 0.2383, - "step": 4658 - }, - { - "epoch": 0.24, - "grad_norm": 1.026408149145776, - "learning_rate": 1.783727556508373e-05, - "loss": 0.2058, - "step": 4659 - }, - { - "epoch": 0.24, - "grad_norm": 1.4706443339849393, - "learning_rate": 1.783625251874966e-05, - "loss": 0.2228, - "step": 4660 - }, - { - "epoch": 0.24, - "grad_norm": 1.0917198179045484, - "learning_rate": 1.78352292598568e-05, - "loss": 0.2419, - "step": 4661 - }, - { - "epoch": 0.24, - "grad_norm": 0.9710987685726439, - "learning_rate": 1.78342057884329e-05, - "loss": 0.2331, - "step": 4662 - }, - { - "epoch": 0.24, - "grad_norm": 1.2486942535582013, - "learning_rate": 1.7833182104505727e-05, - "loss": 0.2098, - "step": 4663 - }, - { - "epoch": 0.24, - "grad_norm": 1.4638630936861796, - "learning_rate": 1.7832158208103046e-05, - "loss": 0.2116, - "step": 4664 - }, - { - "epoch": 0.24, - "grad_norm": 1.2763911256155431, - "learning_rate": 1.7831134099252633e-05, - "loss": 0.228, - "step": 4665 - }, - { - "epoch": 0.24, - "grad_norm": 1.2850151021671732, - "learning_rate": 1.7830109777982264e-05, - "loss": 0.2158, - "step": 4666 - }, - { - "epoch": 0.24, - "grad_norm": 1.3120744860308333, - "learning_rate": 1.7829085244319722e-05, - "loss": 0.2157, - "step": 4667 - }, - { - "epoch": 0.24, - "grad_norm": 1.495724487270258, - "learning_rate": 1.7828060498292807e-05, - "loss": 0.2049, - "step": 4668 - }, - { - "epoch": 0.24, - "grad_norm": 1.1252993166897158, - "learning_rate": 1.7827035539929304e-05, - "loss": 0.221, - "step": 4669 - }, - { - "epoch": 0.24, - "grad_norm": 1.0361332579638716, - "learning_rate": 1.7826010369257023e-05, - "loss": 0.2397, - "step": 4670 - }, - { - "epoch": 0.24, - "grad_norm": 0.9062157745950375, - "learning_rate": 1.7824984986303767e-05, - "loss": 0.2221, - "step": 4671 - }, - { - "epoch": 0.24, - "grad_norm": 1.0599935506985416, - "learning_rate": 1.782395939109735e-05, - "loss": 0.1988, - "step": 4672 - }, - { - "epoch": 0.24, - "grad_norm": 1.0761451179044146, - "learning_rate": 1.7822933583665595e-05, - "loss": 0.1832, - "step": 4673 - }, - { - "epoch": 0.24, - "grad_norm": 1.2453986166897961, - "learning_rate": 1.782190756403632e-05, - "loss": 0.2137, - "step": 4674 - }, - { - "epoch": 0.24, - "grad_norm": 0.9699109057442683, - "learning_rate": 1.7820881332237366e-05, - "loss": 0.2233, - "step": 4675 - }, - { - "epoch": 0.24, - "grad_norm": 1.0480747153682368, - "learning_rate": 1.7819854888296563e-05, - "loss": 0.2394, - "step": 4676 - }, - { - "epoch": 0.24, - "grad_norm": 1.139973735215863, - "learning_rate": 1.7818828232241756e-05, - "loss": 0.2286, - "step": 4677 - }, - { - "epoch": 0.24, - "grad_norm": 1.6594254452523631, - "learning_rate": 1.781780136410079e-05, - "loss": 0.2264, - "step": 4678 - }, - { - "epoch": 0.24, - "grad_norm": 0.892424483882286, - "learning_rate": 1.7816774283901518e-05, - "loss": 0.2145, - "step": 4679 - }, - { - "epoch": 0.24, - "grad_norm": 0.9634119808029931, - "learning_rate": 1.7815746991671804e-05, - "loss": 0.2322, - "step": 4680 - }, - { - "epoch": 0.24, - "grad_norm": 1.0708682845486202, - "learning_rate": 1.781471948743951e-05, - "loss": 0.2455, - "step": 4681 - }, - { - "epoch": 0.24, - "grad_norm": 1.5709346655885017, - "learning_rate": 1.781369177123251e-05, - "loss": 0.2179, - "step": 4682 - }, - { - "epoch": 0.24, - "grad_norm": 1.0173880811846758, - "learning_rate": 1.7812663843078677e-05, - "loss": 0.1937, - "step": 4683 - }, - { - "epoch": 0.24, - "grad_norm": 0.8846051241989114, - "learning_rate": 1.78116357030059e-05, - "loss": 0.1912, - "step": 4684 - }, - { - "epoch": 0.24, - "grad_norm": 1.251193835858283, - "learning_rate": 1.7810607351042062e-05, - "loss": 0.2091, - "step": 4685 - }, - { - "epoch": 0.24, - "grad_norm": 1.1693427107436283, - "learning_rate": 1.780957878721506e-05, - "loss": 0.2114, - "step": 4686 - }, - { - "epoch": 0.24, - "grad_norm": 1.1338587726128928, - "learning_rate": 1.7808550011552788e-05, - "loss": 0.2243, - "step": 4687 - }, - { - "epoch": 0.24, - "grad_norm": 0.8121839280092648, - "learning_rate": 1.780752102408316e-05, - "loss": 0.2332, - "step": 4688 - }, - { - "epoch": 0.24, - "grad_norm": 0.9126963444453843, - "learning_rate": 1.780649182483408e-05, - "loss": 0.2059, - "step": 4689 - }, - { - "epoch": 0.24, - "grad_norm": 0.9287395868561739, - "learning_rate": 1.780546241383347e-05, - "loss": 0.2336, - "step": 4690 - }, - { - "epoch": 0.24, - "grad_norm": 0.861224039628638, - "learning_rate": 1.7804432791109253e-05, - "loss": 0.193, - "step": 4691 - }, - { - "epoch": 0.24, - "grad_norm": 1.2011478859769236, - "learning_rate": 1.7803402956689353e-05, - "loss": 0.2309, - "step": 4692 - }, - { - "epoch": 0.24, - "grad_norm": 0.8349673231029032, - "learning_rate": 1.7802372910601707e-05, - "loss": 0.22, - "step": 4693 - }, - { - "epoch": 0.24, - "grad_norm": 1.1764232090330085, - "learning_rate": 1.7801342652874256e-05, - "loss": 0.2082, - "step": 4694 - }, - { - "epoch": 0.24, - "grad_norm": 1.31740133504077, - "learning_rate": 1.7800312183534946e-05, - "loss": 0.1989, - "step": 4695 - }, - { - "epoch": 0.24, - "grad_norm": 0.8859932028097436, - "learning_rate": 1.7799281502611725e-05, - "loss": 0.2259, - "step": 4696 - }, - { - "epoch": 0.24, - "grad_norm": 1.068731884988904, - "learning_rate": 1.7798250610132555e-05, - "loss": 0.2286, - "step": 4697 - }, - { - "epoch": 0.24, - "grad_norm": 0.9480913590614505, - "learning_rate": 1.7797219506125393e-05, - "loss": 0.2121, - "step": 4698 - }, - { - "epoch": 0.24, - "grad_norm": 1.1259551001945691, - "learning_rate": 1.7796188190618217e-05, - "loss": 0.2447, - "step": 4699 - }, - { - "epoch": 0.24, - "grad_norm": 1.0424075409961195, - "learning_rate": 1.7795156663638993e-05, - "loss": 0.2487, - "step": 4700 - }, - { - "epoch": 0.24, - "grad_norm": 0.822807998801817, - "learning_rate": 1.7794124925215706e-05, - "loss": 0.2308, - "step": 4701 - }, - { - "epoch": 0.24, - "grad_norm": 0.9446368597507817, - "learning_rate": 1.7793092975376337e-05, - "loss": 0.2051, - "step": 4702 - }, - { - "epoch": 0.24, - "grad_norm": 0.8001210824626251, - "learning_rate": 1.779206081414888e-05, - "loss": 0.2054, - "step": 4703 - }, - { - "epoch": 0.24, - "grad_norm": 0.7007932947970836, - "learning_rate": 1.779102844156134e-05, - "loss": 0.2126, - "step": 4704 - }, - { - "epoch": 0.24, - "grad_norm": 1.018612629375446, - "learning_rate": 1.778999585764171e-05, - "loss": 0.1843, - "step": 4705 - }, - { - "epoch": 0.24, - "grad_norm": 2.571513674538329, - "learning_rate": 1.7788963062418e-05, - "loss": 0.2114, - "step": 4706 - }, - { - "epoch": 0.24, - "grad_norm": 0.7412928909282924, - "learning_rate": 1.778793005591823e-05, - "loss": 0.1917, - "step": 4707 - }, - { - "epoch": 0.24, - "grad_norm": 0.8497538991645085, - "learning_rate": 1.7786896838170414e-05, - "loss": 0.1995, - "step": 4708 - }, - { - "epoch": 0.24, - "grad_norm": 0.8581836762327298, - "learning_rate": 1.7785863409202587e-05, - "loss": 0.2296, - "step": 4709 - }, - { - "epoch": 0.24, - "grad_norm": 1.0205692084242963, - "learning_rate": 1.778482976904277e-05, - "loss": 0.2222, - "step": 4710 - }, - { - "epoch": 0.24, - "grad_norm": 0.7773003288749312, - "learning_rate": 1.7783795917719006e-05, - "loss": 0.2123, - "step": 4711 - }, - { - "epoch": 0.24, - "grad_norm": 0.8669931706685519, - "learning_rate": 1.7782761855259343e-05, - "loss": 0.196, - "step": 4712 - }, - { - "epoch": 0.24, - "grad_norm": 1.261557946455817, - "learning_rate": 1.778172758169182e-05, - "loss": 0.2428, - "step": 4713 - }, - { - "epoch": 0.24, - "grad_norm": 0.8542981027231593, - "learning_rate": 1.77806930970445e-05, - "loss": 0.1949, - "step": 4714 - }, - { - "epoch": 0.24, - "grad_norm": 1.1725172257027152, - "learning_rate": 1.7779658401345437e-05, - "loss": 0.2447, - "step": 4715 - }, - { - "epoch": 0.24, - "grad_norm": 0.9171805823393477, - "learning_rate": 1.7778623494622703e-05, - "loss": 0.2291, - "step": 4716 - }, - { - "epoch": 0.24, - "grad_norm": 0.8329476077948494, - "learning_rate": 1.7777588376904367e-05, - "loss": 0.1999, - "step": 4717 - }, - { - "epoch": 0.24, - "grad_norm": 0.8706417523196315, - "learning_rate": 1.777655304821851e-05, - "loss": 0.1986, - "step": 4718 - }, - { - "epoch": 0.24, - "grad_norm": 0.9122189195503517, - "learning_rate": 1.7775517508593208e-05, - "loss": 0.2286, - "step": 4719 - }, - { - "epoch": 0.24, - "grad_norm": 0.9221898541481, - "learning_rate": 1.7774481758056553e-05, - "loss": 0.2033, - "step": 4720 - }, - { - "epoch": 0.24, - "grad_norm": 0.913523118983051, - "learning_rate": 1.7773445796636647e-05, - "loss": 0.2321, - "step": 4721 - }, - { - "epoch": 0.24, - "grad_norm": 1.1853772461053191, - "learning_rate": 1.777240962436158e-05, - "loss": 0.2175, - "step": 4722 - }, - { - "epoch": 0.24, - "grad_norm": 0.9836678716893172, - "learning_rate": 1.7771373241259463e-05, - "loss": 0.2354, - "step": 4723 - }, - { - "epoch": 0.24, - "grad_norm": 4.933538952221682, - "learning_rate": 1.777033664735841e-05, - "loss": 0.2198, - "step": 4724 - }, - { - "epoch": 0.24, - "grad_norm": 0.9053145119436731, - "learning_rate": 1.7769299842686537e-05, - "loss": 0.1993, - "step": 4725 - }, - { - "epoch": 0.24, - "grad_norm": 1.4561874749144696, - "learning_rate": 1.7768262827271967e-05, - "loss": 0.21, - "step": 4726 - }, - { - "epoch": 0.24, - "grad_norm": 0.8923224164256436, - "learning_rate": 1.7767225601142827e-05, - "loss": 0.2417, - "step": 4727 - }, - { - "epoch": 0.24, - "grad_norm": 2.1688299476678536, - "learning_rate": 1.7766188164327255e-05, - "loss": 0.2227, - "step": 4728 - }, - { - "epoch": 0.24, - "grad_norm": 1.1615482229124374, - "learning_rate": 1.7765150516853393e-05, - "loss": 0.2098, - "step": 4729 - }, - { - "epoch": 0.24, - "grad_norm": 0.9914331330545041, - "learning_rate": 1.776411265874938e-05, - "loss": 0.207, - "step": 4730 - }, - { - "epoch": 0.24, - "grad_norm": 1.216108796145146, - "learning_rate": 1.7763074590043373e-05, - "loss": 0.2333, - "step": 4731 - }, - { - "epoch": 0.24, - "grad_norm": 1.0612278807406503, - "learning_rate": 1.7762036310763533e-05, - "loss": 0.2331, - "step": 4732 - }, - { - "epoch": 0.24, - "grad_norm": 0.9308158763736551, - "learning_rate": 1.7760997820938017e-05, - "loss": 0.2139, - "step": 4733 - }, - { - "epoch": 0.24, - "grad_norm": 1.02496181162953, - "learning_rate": 1.7759959120594995e-05, - "loss": 0.2208, - "step": 4734 - }, - { - "epoch": 0.24, - "grad_norm": 0.9621707948335559, - "learning_rate": 1.7758920209762646e-05, - "loss": 0.2095, - "step": 4735 - }, - { - "epoch": 0.24, - "grad_norm": 0.8073536972901817, - "learning_rate": 1.7757881088469152e-05, - "loss": 0.213, - "step": 4736 - }, - { - "epoch": 0.24, - "grad_norm": 3.602279863576911, - "learning_rate": 1.775684175674269e-05, - "loss": 0.1964, - "step": 4737 - }, - { - "epoch": 0.24, - "grad_norm": 0.7919440808875956, - "learning_rate": 1.7755802214611456e-05, - "loss": 0.2221, - "step": 4738 - }, - { - "epoch": 0.24, - "grad_norm": 1.2264881157763754, - "learning_rate": 1.7754762462103653e-05, - "loss": 0.2061, - "step": 4739 - }, - { - "epoch": 0.24, - "grad_norm": 1.0081452576317012, - "learning_rate": 1.775372249924748e-05, - "loss": 0.2341, - "step": 4740 - }, - { - "epoch": 0.24, - "grad_norm": 1.4966706046285734, - "learning_rate": 1.775268232607114e-05, - "loss": 0.1968, - "step": 4741 - }, - { - "epoch": 0.24, - "grad_norm": 0.9282699330047943, - "learning_rate": 1.775164194260286e-05, - "loss": 0.1917, - "step": 4742 - }, - { - "epoch": 0.24, - "grad_norm": 1.0831685554881305, - "learning_rate": 1.7750601348870857e-05, - "loss": 0.2245, - "step": 4743 - }, - { - "epoch": 0.24, - "grad_norm": 1.0721042421741023, - "learning_rate": 1.774956054490335e-05, - "loss": 0.226, - "step": 4744 - }, - { - "epoch": 0.24, - "grad_norm": 1.1269726140008207, - "learning_rate": 1.7748519530728578e-05, - "loss": 0.2066, - "step": 4745 - }, - { - "epoch": 0.24, - "grad_norm": 1.3184525037031192, - "learning_rate": 1.7747478306374774e-05, - "loss": 0.1888, - "step": 4746 - }, - { - "epoch": 0.24, - "grad_norm": 1.447342958685647, - "learning_rate": 1.7746436871870185e-05, - "loss": 0.2092, - "step": 4747 - }, - { - "epoch": 0.24, - "grad_norm": 1.0632538284053596, - "learning_rate": 1.7745395227243057e-05, - "loss": 0.2367, - "step": 4748 - }, - { - "epoch": 0.24, - "grad_norm": 0.8571598963074442, - "learning_rate": 1.7744353372521645e-05, - "loss": 0.1998, - "step": 4749 - }, - { - "epoch": 0.24, - "grad_norm": 1.4045763430768659, - "learning_rate": 1.7743311307734212e-05, - "loss": 0.2075, - "step": 4750 - }, - { - "epoch": 0.24, - "grad_norm": 1.140852461397593, - "learning_rate": 1.7742269032909022e-05, - "loss": 0.2327, - "step": 4751 - }, - { - "epoch": 0.24, - "grad_norm": 1.0450382616949419, - "learning_rate": 1.774122654807435e-05, - "loss": 0.2048, - "step": 4752 - }, - { - "epoch": 0.24, - "grad_norm": 0.9913868061762731, - "learning_rate": 1.7740183853258463e-05, - "loss": 0.2349, - "step": 4753 - }, - { - "epoch": 0.24, - "grad_norm": 0.9902220057154458, - "learning_rate": 1.773914094848966e-05, - "loss": 0.1981, - "step": 4754 - }, - { - "epoch": 0.24, - "grad_norm": 0.9141793025191702, - "learning_rate": 1.7738097833796218e-05, - "loss": 0.225, - "step": 4755 - }, - { - "epoch": 0.24, - "grad_norm": 1.122258777024154, - "learning_rate": 1.7737054509206437e-05, - "loss": 0.2029, - "step": 4756 - }, - { - "epoch": 0.24, - "grad_norm": 0.9388722008012684, - "learning_rate": 1.773601097474861e-05, - "loss": 0.1799, - "step": 4757 - }, - { - "epoch": 0.24, - "grad_norm": 1.1338319108176849, - "learning_rate": 1.7734967230451053e-05, - "loss": 0.198, - "step": 4758 - }, - { - "epoch": 0.24, - "grad_norm": 1.1696782476614866, - "learning_rate": 1.7733923276342072e-05, - "loss": 0.2082, - "step": 4759 - }, - { - "epoch": 0.24, - "grad_norm": 0.9570850227198349, - "learning_rate": 1.7732879112449987e-05, - "loss": 0.1937, - "step": 4760 - }, - { - "epoch": 0.24, - "grad_norm": 0.9987829452675776, - "learning_rate": 1.773183473880312e-05, - "loss": 0.2065, - "step": 4761 - }, - { - "epoch": 0.24, - "grad_norm": 0.8658344198195547, - "learning_rate": 1.7730790155429796e-05, - "loss": 0.2535, - "step": 4762 - }, - { - "epoch": 0.24, - "grad_norm": 1.5105970576810357, - "learning_rate": 1.7729745362358354e-05, - "loss": 0.2292, - "step": 4763 - }, - { - "epoch": 0.24, - "grad_norm": 1.030621919076012, - "learning_rate": 1.772870035961713e-05, - "loss": 0.2424, - "step": 4764 - }, - { - "epoch": 0.24, - "grad_norm": 0.7911133713120762, - "learning_rate": 1.772765514723448e-05, - "loss": 0.2031, - "step": 4765 - }, - { - "epoch": 0.24, - "grad_norm": 0.7733501456620687, - "learning_rate": 1.7726609725238736e-05, - "loss": 0.2015, - "step": 4766 - }, - { - "epoch": 0.24, - "grad_norm": 0.7483844203985907, - "learning_rate": 1.7725564093658273e-05, - "loss": 0.2144, - "step": 4767 - }, - { - "epoch": 0.24, - "grad_norm": 0.832420970371779, - "learning_rate": 1.772451825252145e-05, - "loss": 0.1933, - "step": 4768 - }, - { - "epoch": 0.24, - "grad_norm": 0.9269564472066407, - "learning_rate": 1.7723472201856632e-05, - "loss": 0.2117, - "step": 4769 - }, - { - "epoch": 0.24, - "grad_norm": 0.7062623972950685, - "learning_rate": 1.7722425941692193e-05, - "loss": 0.2011, - "step": 4770 - }, - { - "epoch": 0.24, - "grad_norm": 1.300632009889857, - "learning_rate": 1.7721379472056512e-05, - "loss": 0.235, - "step": 4771 - }, - { - "epoch": 0.24, - "grad_norm": 0.9395077257423332, - "learning_rate": 1.772033279297798e-05, - "loss": 0.214, - "step": 4772 - }, - { - "epoch": 0.24, - "grad_norm": 0.8855510946435825, - "learning_rate": 1.7719285904484984e-05, - "loss": 0.2153, - "step": 4773 - }, - { - "epoch": 0.24, - "grad_norm": 0.9062652247205932, - "learning_rate": 1.771823880660592e-05, - "loss": 0.2036, - "step": 4774 - }, - { - "epoch": 0.24, - "grad_norm": 1.2632706385556125, - "learning_rate": 1.7717191499369195e-05, - "loss": 0.2172, - "step": 4775 - }, - { - "epoch": 0.24, - "grad_norm": 1.0616280497428603, - "learning_rate": 1.7716143982803214e-05, - "loss": 0.214, - "step": 4776 - }, - { - "epoch": 0.24, - "grad_norm": 1.0610019253510121, - "learning_rate": 1.7715096256936387e-05, - "loss": 0.2148, - "step": 4777 - }, - { - "epoch": 0.24, - "grad_norm": 0.9265199693220165, - "learning_rate": 1.7714048321797146e-05, - "loss": 0.194, - "step": 4778 - }, - { - "epoch": 0.24, - "grad_norm": 1.4833546752386788, - "learning_rate": 1.7713000177413905e-05, - "loss": 0.2144, - "step": 4779 - }, - { - "epoch": 0.24, - "grad_norm": 2.119156400854007, - "learning_rate": 1.77119518238151e-05, - "loss": 0.2137, - "step": 4780 - }, - { - "epoch": 0.24, - "grad_norm": 0.8713331110192771, - "learning_rate": 1.7710903261029162e-05, - "loss": 0.1895, - "step": 4781 - }, - { - "epoch": 0.24, - "grad_norm": 1.4736802633037827, - "learning_rate": 1.770985448908454e-05, - "loss": 0.2002, - "step": 4782 - }, - { - "epoch": 0.24, - "grad_norm": 0.9670421103121505, - "learning_rate": 1.770880550800968e-05, - "loss": 0.2246, - "step": 4783 - }, - { - "epoch": 0.24, - "grad_norm": 0.9997782459952628, - "learning_rate": 1.7707756317833037e-05, - "loss": 0.2155, - "step": 4784 - }, - { - "epoch": 0.24, - "grad_norm": 1.0048297808434645, - "learning_rate": 1.7706706918583065e-05, - "loss": 0.22, - "step": 4785 - }, - { - "epoch": 0.24, - "grad_norm": 0.8625274276807509, - "learning_rate": 1.7705657310288234e-05, - "loss": 0.1931, - "step": 4786 - }, - { - "epoch": 0.24, - "grad_norm": 1.0333429133868393, - "learning_rate": 1.7704607492977016e-05, - "loss": 0.2056, - "step": 4787 - }, - { - "epoch": 0.24, - "grad_norm": 0.8165586544427031, - "learning_rate": 1.770355746667788e-05, - "loss": 0.2255, - "step": 4788 - }, - { - "epoch": 0.24, - "grad_norm": 0.9446874307612251, - "learning_rate": 1.7702507231419316e-05, - "loss": 0.2284, - "step": 4789 - }, - { - "epoch": 0.24, - "grad_norm": 1.157327733356626, - "learning_rate": 1.7701456787229805e-05, - "loss": 0.2121, - "step": 4790 - }, - { - "epoch": 0.24, - "grad_norm": 0.9067004989487868, - "learning_rate": 1.7700406134137846e-05, - "loss": 0.2084, - "step": 4791 - }, - { - "epoch": 0.24, - "grad_norm": 1.5375842708722038, - "learning_rate": 1.7699355272171936e-05, - "loss": 0.2111, - "step": 4792 - }, - { - "epoch": 0.24, - "grad_norm": 1.579611023345228, - "learning_rate": 1.769830420136058e-05, - "loss": 0.2197, - "step": 4793 - }, - { - "epoch": 0.24, - "grad_norm": 0.7815056142120229, - "learning_rate": 1.7697252921732288e-05, - "loss": 0.2399, - "step": 4794 - }, - { - "epoch": 0.24, - "grad_norm": 1.0464247436420646, - "learning_rate": 1.7696201433315572e-05, - "loss": 0.2047, - "step": 4795 - }, - { - "epoch": 0.24, - "grad_norm": 0.7803618338517158, - "learning_rate": 1.769514973613896e-05, - "loss": 0.2282, - "step": 4796 - }, - { - "epoch": 0.24, - "grad_norm": 3.0589890669182576, - "learning_rate": 1.7694097830230977e-05, - "loss": 0.217, - "step": 4797 - }, - { - "epoch": 0.24, - "grad_norm": 0.8290031373697013, - "learning_rate": 1.7693045715620154e-05, - "loss": 0.2312, - "step": 4798 - }, - { - "epoch": 0.24, - "grad_norm": 0.9998618652420123, - "learning_rate": 1.7691993392335033e-05, - "loss": 0.1801, - "step": 4799 - }, - { - "epoch": 0.24, - "grad_norm": 2.939470955111584, - "learning_rate": 1.7690940860404158e-05, - "loss": 0.2376, - "step": 4800 - }, - { - "epoch": 0.24, - "grad_norm": 0.7765224504735883, - "learning_rate": 1.7689888119856075e-05, - "loss": 0.1927, - "step": 4801 - }, - { - "epoch": 0.24, - "grad_norm": 0.803742550457734, - "learning_rate": 1.7688835170719346e-05, - "loss": 0.2185, - "step": 4802 - }, - { - "epoch": 0.24, - "grad_norm": 1.0415101084628973, - "learning_rate": 1.7687782013022526e-05, - "loss": 0.1941, - "step": 4803 - }, - { - "epoch": 0.24, - "grad_norm": 0.7424308880860995, - "learning_rate": 1.7686728646794184e-05, - "loss": 0.2005, - "step": 4804 - }, - { - "epoch": 0.24, - "grad_norm": 0.8623911571639182, - "learning_rate": 1.7685675072062894e-05, - "loss": 0.2083, - "step": 4805 - }, - { - "epoch": 0.24, - "grad_norm": 0.8419284534298985, - "learning_rate": 1.7684621288857233e-05, - "loss": 0.2208, - "step": 4806 - }, - { - "epoch": 0.24, - "grad_norm": 1.7300335906306568, - "learning_rate": 1.7683567297205786e-05, - "loss": 0.2105, - "step": 4807 - }, - { - "epoch": 0.24, - "grad_norm": 1.05541219527479, - "learning_rate": 1.7682513097137143e-05, - "loss": 0.2279, - "step": 4808 - }, - { - "epoch": 0.24, - "grad_norm": 1.0619360379113207, - "learning_rate": 1.76814586886799e-05, - "loss": 0.2034, - "step": 4809 - }, - { - "epoch": 0.24, - "grad_norm": 1.0246215374768057, - "learning_rate": 1.7680404071862653e-05, - "loss": 0.2148, - "step": 4810 - }, - { - "epoch": 0.24, - "grad_norm": 0.8031646650335315, - "learning_rate": 1.7679349246714012e-05, - "loss": 0.2003, - "step": 4811 - }, - { - "epoch": 0.24, - "grad_norm": 0.8335112525439298, - "learning_rate": 1.767829421326259e-05, - "loss": 0.2031, - "step": 4812 - }, - { - "epoch": 0.24, - "grad_norm": 1.5996760040622504, - "learning_rate": 1.7677238971537004e-05, - "loss": 0.224, - "step": 4813 - }, - { - "epoch": 0.24, - "grad_norm": 1.0144403532971613, - "learning_rate": 1.7676183521565876e-05, - "loss": 0.2219, - "step": 4814 - }, - { - "epoch": 0.24, - "grad_norm": 1.0481470431681141, - "learning_rate": 1.767512786337784e-05, - "loss": 0.2139, - "step": 4815 - }, - { - "epoch": 0.24, - "grad_norm": 1.3381315237653075, - "learning_rate": 1.7674071997001525e-05, - "loss": 0.2254, - "step": 4816 - }, - { - "epoch": 0.24, - "grad_norm": 0.7657901269899813, - "learning_rate": 1.767301592246557e-05, - "loss": 0.1899, - "step": 4817 - }, - { - "epoch": 0.25, - "grad_norm": 1.2415438690318472, - "learning_rate": 1.767195963979863e-05, - "loss": 0.2357, - "step": 4818 - }, - { - "epoch": 0.25, - "grad_norm": 6.371155851734972, - "learning_rate": 1.767090314902935e-05, - "loss": 0.2256, - "step": 4819 - }, - { - "epoch": 0.25, - "grad_norm": 1.027278506127918, - "learning_rate": 1.7669846450186384e-05, - "loss": 0.2021, - "step": 4820 - }, - { - "epoch": 0.25, - "grad_norm": 1.0454534167228677, - "learning_rate": 1.7668789543298407e-05, - "loss": 0.2124, - "step": 4821 - }, - { - "epoch": 0.25, - "grad_norm": 0.8698202989216001, - "learning_rate": 1.7667732428394077e-05, - "loss": 0.2048, - "step": 4822 - }, - { - "epoch": 0.25, - "grad_norm": 0.8752795249443815, - "learning_rate": 1.7666675105502073e-05, - "loss": 0.2053, - "step": 4823 - }, - { - "epoch": 0.25, - "grad_norm": 1.2287231186117786, - "learning_rate": 1.7665617574651074e-05, - "loss": 0.2013, - "step": 4824 - }, - { - "epoch": 0.25, - "grad_norm": 0.7811101027066473, - "learning_rate": 1.7664559835869763e-05, - "loss": 0.1969, - "step": 4825 - }, - { - "epoch": 0.25, - "grad_norm": 0.9831821506037443, - "learning_rate": 1.7663501889186837e-05, - "loss": 0.2071, - "step": 4826 - }, - { - "epoch": 0.25, - "grad_norm": 1.3321080093873667, - "learning_rate": 1.7662443734630987e-05, - "loss": 0.2057, - "step": 4827 - }, - { - "epoch": 0.25, - "grad_norm": 0.9328654862358803, - "learning_rate": 1.7661385372230918e-05, - "loss": 0.198, - "step": 4828 - }, - { - "epoch": 0.25, - "grad_norm": 0.9866929554002803, - "learning_rate": 1.766032680201534e-05, - "loss": 0.2252, - "step": 4829 - }, - { - "epoch": 0.25, - "grad_norm": 0.8040091299802148, - "learning_rate": 1.7659268024012962e-05, - "loss": 0.2133, - "step": 4830 - }, - { - "epoch": 0.25, - "grad_norm": 0.929652719573642, - "learning_rate": 1.7658209038252507e-05, - "loss": 0.1771, - "step": 4831 - }, - { - "epoch": 0.25, - "grad_norm": 0.9325894915758147, - "learning_rate": 1.76571498447627e-05, - "loss": 0.228, - "step": 4832 - }, - { - "epoch": 0.25, - "grad_norm": 1.0497336023331287, - "learning_rate": 1.765609044357227e-05, - "loss": 0.2139, - "step": 4833 - }, - { - "epoch": 0.25, - "grad_norm": 1.0788425842404294, - "learning_rate": 1.7655030834709954e-05, - "loss": 0.2446, - "step": 4834 - }, - { - "epoch": 0.25, - "grad_norm": 0.8148673513502684, - "learning_rate": 1.7653971018204498e-05, - "loss": 0.2052, - "step": 4835 - }, - { - "epoch": 0.25, - "grad_norm": 1.035118517547822, - "learning_rate": 1.7652910994084642e-05, - "loss": 0.1889, - "step": 4836 - }, - { - "epoch": 0.25, - "grad_norm": 1.8874921404778846, - "learning_rate": 1.7651850762379146e-05, - "loss": 0.2144, - "step": 4837 - }, - { - "epoch": 0.25, - "grad_norm": 0.971609414329634, - "learning_rate": 1.7650790323116764e-05, - "loss": 0.2261, - "step": 4838 - }, - { - "epoch": 0.25, - "grad_norm": 0.9666178939534561, - "learning_rate": 1.764972967632626e-05, - "loss": 0.2039, - "step": 4839 - }, - { - "epoch": 0.25, - "grad_norm": 0.8557814059940211, - "learning_rate": 1.764866882203641e-05, - "loss": 0.2122, - "step": 4840 - }, - { - "epoch": 0.25, - "grad_norm": 0.879809517867278, - "learning_rate": 1.7647607760275987e-05, - "loss": 0.2397, - "step": 4841 - }, - { - "epoch": 0.25, - "grad_norm": 1.1701570979540563, - "learning_rate": 1.764654649107377e-05, - "loss": 0.2348, - "step": 4842 - }, - { - "epoch": 0.25, - "grad_norm": 0.9819385381229103, - "learning_rate": 1.7645485014458545e-05, - "loss": 0.2138, - "step": 4843 - }, - { - "epoch": 0.25, - "grad_norm": 1.0372094524749425, - "learning_rate": 1.764442333045911e-05, - "loss": 0.2172, - "step": 4844 - }, - { - "epoch": 0.25, - "grad_norm": 1.1991618227826033, - "learning_rate": 1.764336143910426e-05, - "loss": 0.245, - "step": 4845 - }, - { - "epoch": 0.25, - "grad_norm": 1.2515952300942674, - "learning_rate": 1.76422993404228e-05, - "loss": 0.2346, - "step": 4846 - }, - { - "epoch": 0.25, - "grad_norm": 1.162773600935501, - "learning_rate": 1.7641237034443535e-05, - "loss": 0.2349, - "step": 4847 - }, - { - "epoch": 0.25, - "grad_norm": 1.198901515461344, - "learning_rate": 1.764017452119529e-05, - "loss": 0.2354, - "step": 4848 - }, - { - "epoch": 0.25, - "grad_norm": 0.9150902917184182, - "learning_rate": 1.7639111800706874e-05, - "loss": 0.2176, - "step": 4849 - }, - { - "epoch": 0.25, - "grad_norm": 0.9261690607074005, - "learning_rate": 1.7638048873007122e-05, - "loss": 0.2096, - "step": 4850 - }, - { - "epoch": 0.25, - "grad_norm": 0.9936123554925016, - "learning_rate": 1.7636985738124862e-05, - "loss": 0.2199, - "step": 4851 - }, - { - "epoch": 0.25, - "grad_norm": 0.8808245515832294, - "learning_rate": 1.7635922396088932e-05, - "loss": 0.1955, - "step": 4852 - }, - { - "epoch": 0.25, - "grad_norm": 1.109091782939819, - "learning_rate": 1.7634858846928174e-05, - "loss": 0.2231, - "step": 4853 - }, - { - "epoch": 0.25, - "grad_norm": 0.8979865795412102, - "learning_rate": 1.7633795090671445e-05, - "loss": 0.2311, - "step": 4854 - }, - { - "epoch": 0.25, - "grad_norm": 2.1646120782227802, - "learning_rate": 1.7632731127347588e-05, - "loss": 0.2348, - "step": 4855 - }, - { - "epoch": 0.25, - "grad_norm": 0.8728535210494023, - "learning_rate": 1.763166695698547e-05, - "loss": 0.2663, - "step": 4856 - }, - { - "epoch": 0.25, - "grad_norm": 0.9821654410268483, - "learning_rate": 1.7630602579613952e-05, - "loss": 0.2136, - "step": 4857 - }, - { - "epoch": 0.25, - "grad_norm": 2.0888253463187754, - "learning_rate": 1.7629537995261913e-05, - "loss": 0.233, - "step": 4858 - }, - { - "epoch": 0.25, - "grad_norm": 1.010011282244793, - "learning_rate": 1.7628473203958217e-05, - "loss": 0.1997, - "step": 4859 - }, - { - "epoch": 0.25, - "grad_norm": 1.0148151126522487, - "learning_rate": 1.7627408205731762e-05, - "loss": 0.1972, - "step": 4860 - }, - { - "epoch": 0.25, - "grad_norm": 1.0225367877080298, - "learning_rate": 1.7626343000611424e-05, - "loss": 0.2238, - "step": 4861 - }, - { - "epoch": 0.25, - "grad_norm": 1.0413589410630688, - "learning_rate": 1.7625277588626105e-05, - "loss": 0.1706, - "step": 4862 - }, - { - "epoch": 0.25, - "grad_norm": 2.9075675650831743, - "learning_rate": 1.76242119698047e-05, - "loss": 0.2213, - "step": 4863 - }, - { - "epoch": 0.25, - "grad_norm": 0.9576335241403465, - "learning_rate": 1.7623146144176114e-05, - "loss": 0.1945, - "step": 4864 - }, - { - "epoch": 0.25, - "grad_norm": 0.9841155313451155, - "learning_rate": 1.7622080111769257e-05, - "loss": 0.2088, - "step": 4865 - }, - { - "epoch": 0.25, - "grad_norm": 0.8088195105556512, - "learning_rate": 1.762101387261305e-05, - "loss": 0.2216, - "step": 4866 - }, - { - "epoch": 0.25, - "grad_norm": 0.7040924319850286, - "learning_rate": 1.7619947426736404e-05, - "loss": 0.2108, - "step": 4867 - }, - { - "epoch": 0.25, - "grad_norm": 0.8647540788705015, - "learning_rate": 1.761888077416826e-05, - "loss": 0.2055, - "step": 4868 - }, - { - "epoch": 0.25, - "grad_norm": 1.4917793213736366, - "learning_rate": 1.7617813914937544e-05, - "loss": 0.1867, - "step": 4869 - }, - { - "epoch": 0.25, - "grad_norm": 0.7589814055129415, - "learning_rate": 1.7616746849073195e-05, - "loss": 0.2267, - "step": 4870 - }, - { - "epoch": 0.25, - "grad_norm": 1.064675978697834, - "learning_rate": 1.7615679576604157e-05, - "loss": 0.2145, - "step": 4871 - }, - { - "epoch": 0.25, - "grad_norm": 1.8149268942899142, - "learning_rate": 1.761461209755938e-05, - "loss": 0.2243, - "step": 4872 - }, - { - "epoch": 0.25, - "grad_norm": 0.9404075709892658, - "learning_rate": 1.761354441196782e-05, - "loss": 0.2033, - "step": 4873 - }, - { - "epoch": 0.25, - "grad_norm": 2.066560634970023, - "learning_rate": 1.7612476519858437e-05, - "loss": 0.1929, - "step": 4874 - }, - { - "epoch": 0.25, - "grad_norm": 0.8873007469232738, - "learning_rate": 1.76114084212602e-05, - "loss": 0.2135, - "step": 4875 - }, - { - "epoch": 0.25, - "grad_norm": 1.0803485202137888, - "learning_rate": 1.761034011620208e-05, - "loss": 0.2024, - "step": 4876 - }, - { - "epoch": 0.25, - "grad_norm": 0.7213845346451273, - "learning_rate": 1.7609271604713055e-05, - "loss": 0.1998, - "step": 4877 - }, - { - "epoch": 0.25, - "grad_norm": 1.121037230608576, - "learning_rate": 1.7608202886822107e-05, - "loss": 0.2315, - "step": 4878 - }, - { - "epoch": 0.25, - "grad_norm": 0.9896700881943471, - "learning_rate": 1.7607133962558226e-05, - "loss": 0.2267, - "step": 4879 - }, - { - "epoch": 0.25, - "grad_norm": 1.266664377396315, - "learning_rate": 1.7606064831950403e-05, - "loss": 0.2078, - "step": 4880 - }, - { - "epoch": 0.25, - "grad_norm": 0.8553061465111234, - "learning_rate": 1.7604995495027645e-05, - "loss": 0.2116, - "step": 4881 - }, - { - "epoch": 0.25, - "grad_norm": 1.042777046982744, - "learning_rate": 1.7603925951818954e-05, - "loss": 0.2112, - "step": 4882 - }, - { - "epoch": 0.25, - "grad_norm": 0.8740583129550602, - "learning_rate": 1.7602856202353346e-05, - "loss": 0.2023, - "step": 4883 - }, - { - "epoch": 0.25, - "grad_norm": 0.9759766435581416, - "learning_rate": 1.760178624665983e-05, - "loss": 0.2227, - "step": 4884 - }, - { - "epoch": 0.25, - "grad_norm": 0.8085400387638324, - "learning_rate": 1.760071608476743e-05, - "loss": 0.2034, - "step": 4885 - }, - { - "epoch": 0.25, - "grad_norm": 0.833384795728059, - "learning_rate": 1.759964571670518e-05, - "loss": 0.2134, - "step": 4886 - }, - { - "epoch": 0.25, - "grad_norm": 0.9648949804215201, - "learning_rate": 1.7598575142502112e-05, - "loss": 0.2157, - "step": 4887 - }, - { - "epoch": 0.25, - "grad_norm": 4.111719389453267, - "learning_rate": 1.7597504362187263e-05, - "loss": 0.1994, - "step": 4888 - }, - { - "epoch": 0.25, - "grad_norm": 0.8866865010030395, - "learning_rate": 1.759643337578968e-05, - "loss": 0.2051, - "step": 4889 - }, - { - "epoch": 0.25, - "grad_norm": 0.8504575300897302, - "learning_rate": 1.759536218333841e-05, - "loss": 0.232, - "step": 4890 - }, - { - "epoch": 0.25, - "grad_norm": 1.0365543055672188, - "learning_rate": 1.7594290784862516e-05, - "loss": 0.2141, - "step": 4891 - }, - { - "epoch": 0.25, - "grad_norm": 1.0281837501246553, - "learning_rate": 1.7593219180391053e-05, - "loss": 0.2039, - "step": 4892 - }, - { - "epoch": 0.25, - "grad_norm": 0.9744470951075427, - "learning_rate": 1.759214736995309e-05, - "loss": 0.2091, - "step": 4893 - }, - { - "epoch": 0.25, - "grad_norm": 0.9084367183414519, - "learning_rate": 1.7591075353577702e-05, - "loss": 0.226, - "step": 4894 - }, - { - "epoch": 0.25, - "grad_norm": 0.8317928554365319, - "learning_rate": 1.7590003131293967e-05, - "loss": 0.2417, - "step": 4895 - }, - { - "epoch": 0.25, - "grad_norm": 0.7541092473554227, - "learning_rate": 1.758893070313097e-05, - "loss": 0.1997, - "step": 4896 - }, - { - "epoch": 0.25, - "grad_norm": 0.7883788654157217, - "learning_rate": 1.7587858069117794e-05, - "loss": 0.17, - "step": 4897 - }, - { - "epoch": 0.25, - "grad_norm": 1.2109666508636387, - "learning_rate": 1.7586785229283543e-05, - "loss": 0.2183, - "step": 4898 - }, - { - "epoch": 0.25, - "grad_norm": 0.721893397519787, - "learning_rate": 1.7585712183657312e-05, - "loss": 0.2016, - "step": 4899 - }, - { - "epoch": 0.25, - "grad_norm": 1.7380981007588767, - "learning_rate": 1.758463893226821e-05, - "loss": 0.2279, - "step": 4900 - }, - { - "epoch": 0.25, - "grad_norm": 1.1129843085791946, - "learning_rate": 1.758356547514535e-05, - "loss": 0.2218, - "step": 4901 - }, - { - "epoch": 0.25, - "grad_norm": 0.9678562315284823, - "learning_rate": 1.7582491812317846e-05, - "loss": 0.1974, - "step": 4902 - }, - { - "epoch": 0.25, - "grad_norm": 1.131902572948813, - "learning_rate": 1.7581417943814827e-05, - "loss": 0.2106, - "step": 4903 - }, - { - "epoch": 0.25, - "grad_norm": 0.9348090511857947, - "learning_rate": 1.7580343869665416e-05, - "loss": 0.2027, - "step": 4904 - }, - { - "epoch": 0.25, - "grad_norm": 1.457110700204861, - "learning_rate": 1.757926958989875e-05, - "loss": 0.211, - "step": 4905 - }, - { - "epoch": 0.25, - "grad_norm": 1.2125401140409062, - "learning_rate": 1.7578195104543964e-05, - "loss": 0.186, - "step": 4906 - }, - { - "epoch": 0.25, - "grad_norm": 1.1174812078893102, - "learning_rate": 1.7577120413630213e-05, - "loss": 0.2141, - "step": 4907 - }, - { - "epoch": 0.25, - "grad_norm": 1.125396472928048, - "learning_rate": 1.757604551718664e-05, - "loss": 0.2079, - "step": 4908 - }, - { - "epoch": 0.25, - "grad_norm": 0.887645588146478, - "learning_rate": 1.7574970415242407e-05, - "loss": 0.2031, - "step": 4909 - }, - { - "epoch": 0.25, - "grad_norm": 1.2491922728175417, - "learning_rate": 1.757389510782667e-05, - "loss": 0.2104, - "step": 4910 - }, - { - "epoch": 0.25, - "grad_norm": 0.9282309508640144, - "learning_rate": 1.75728195949686e-05, - "loss": 0.1882, - "step": 4911 - }, - { - "epoch": 0.25, - "grad_norm": 1.3550172160894849, - "learning_rate": 1.7571743876697377e-05, - "loss": 0.2003, - "step": 4912 - }, - { - "epoch": 0.25, - "grad_norm": 1.0095560289307193, - "learning_rate": 1.7570667953042167e-05, - "loss": 0.1877, - "step": 4913 - }, - { - "epoch": 0.25, - "grad_norm": 1.1898247035889686, - "learning_rate": 1.7569591824032168e-05, - "loss": 0.2217, - "step": 4914 - }, - { - "epoch": 0.25, - "grad_norm": 1.5094226280385097, - "learning_rate": 1.7568515489696558e-05, - "loss": 0.2195, - "step": 4915 - }, - { - "epoch": 0.25, - "grad_norm": 2.0270852292948054, - "learning_rate": 1.7567438950064542e-05, - "loss": 0.1776, - "step": 4916 - }, - { - "epoch": 0.25, - "grad_norm": 0.8945831319463787, - "learning_rate": 1.7566362205165313e-05, - "loss": 0.2195, - "step": 4917 - }, - { - "epoch": 0.25, - "grad_norm": 1.0540510808229446, - "learning_rate": 1.7565285255028083e-05, - "loss": 0.2182, - "step": 4918 - }, - { - "epoch": 0.25, - "grad_norm": 0.9530058829379024, - "learning_rate": 1.756420809968206e-05, - "loss": 0.2165, - "step": 4919 - }, - { - "epoch": 0.25, - "grad_norm": 0.9591411145842712, - "learning_rate": 1.756313073915647e-05, - "loss": 0.2227, - "step": 4920 - }, - { - "epoch": 0.25, - "grad_norm": 0.8538213378369253, - "learning_rate": 1.756205317348053e-05, - "loss": 0.1935, - "step": 4921 - }, - { - "epoch": 0.25, - "grad_norm": 1.000411790089436, - "learning_rate": 1.756097540268347e-05, - "loss": 0.2327, - "step": 4922 - }, - { - "epoch": 0.25, - "grad_norm": 1.0420033351873208, - "learning_rate": 1.7559897426794528e-05, - "loss": 0.2128, - "step": 4923 - }, - { - "epoch": 0.25, - "grad_norm": 0.9623079713245224, - "learning_rate": 1.7558819245842938e-05, - "loss": 0.1922, - "step": 4924 - }, - { - "epoch": 0.25, - "grad_norm": 1.1098096767015824, - "learning_rate": 1.7557740859857953e-05, - "loss": 0.2266, - "step": 4925 - }, - { - "epoch": 0.25, - "grad_norm": 1.3491601834510454, - "learning_rate": 1.7556662268868817e-05, - "loss": 0.2058, - "step": 4926 - }, - { - "epoch": 0.25, - "grad_norm": 0.9653203134414965, - "learning_rate": 1.7555583472904788e-05, - "loss": 0.2003, - "step": 4927 - }, - { - "epoch": 0.25, - "grad_norm": 0.864274668092387, - "learning_rate": 1.7554504471995134e-05, - "loss": 0.2023, - "step": 4928 - }, - { - "epoch": 0.25, - "grad_norm": 1.0880717825194903, - "learning_rate": 1.7553425266169118e-05, - "loss": 0.2149, - "step": 4929 - }, - { - "epoch": 0.25, - "grad_norm": 1.0489082281570887, - "learning_rate": 1.7552345855456017e-05, - "loss": 0.2129, - "step": 4930 - }, - { - "epoch": 0.25, - "grad_norm": 1.052512902454979, - "learning_rate": 1.7551266239885104e-05, - "loss": 0.2266, - "step": 4931 - }, - { - "epoch": 0.25, - "grad_norm": 1.6579917251985647, - "learning_rate": 1.755018641948567e-05, - "loss": 0.1981, - "step": 4932 - }, - { - "epoch": 0.25, - "grad_norm": 1.7821592754300142, - "learning_rate": 1.7549106394287004e-05, - "loss": 0.2269, - "step": 4933 - }, - { - "epoch": 0.25, - "grad_norm": 0.8870274340321501, - "learning_rate": 1.75480261643184e-05, - "loss": 0.2051, - "step": 4934 - }, - { - "epoch": 0.25, - "grad_norm": 0.8643188466997564, - "learning_rate": 1.7546945729609162e-05, - "loss": 0.222, - "step": 4935 - }, - { - "epoch": 0.25, - "grad_norm": 0.8508180991596589, - "learning_rate": 1.7545865090188594e-05, - "loss": 0.2269, - "step": 4936 - }, - { - "epoch": 0.25, - "grad_norm": 1.0361811340997602, - "learning_rate": 1.7544784246086007e-05, - "loss": 0.1965, - "step": 4937 - }, - { - "epoch": 0.25, - "grad_norm": 0.7944009776152027, - "learning_rate": 1.7543703197330722e-05, - "loss": 0.2294, - "step": 4938 - }, - { - "epoch": 0.25, - "grad_norm": 1.2904691970827866, - "learning_rate": 1.754262194395206e-05, - "loss": 0.1905, - "step": 4939 - }, - { - "epoch": 0.25, - "grad_norm": 1.2425741683662073, - "learning_rate": 1.7541540485979357e-05, - "loss": 0.2298, - "step": 4940 - }, - { - "epoch": 0.25, - "grad_norm": 1.0170380196630249, - "learning_rate": 1.754045882344194e-05, - "loss": 0.2172, - "step": 4941 - }, - { - "epoch": 0.25, - "grad_norm": 0.8377036824445884, - "learning_rate": 1.753937695636915e-05, - "loss": 0.2257, - "step": 4942 - }, - { - "epoch": 0.25, - "grad_norm": 1.4429064752446406, - "learning_rate": 1.7538294884790333e-05, - "loss": 0.2265, - "step": 4943 - }, - { - "epoch": 0.25, - "grad_norm": 1.002250270190205, - "learning_rate": 1.7537212608734842e-05, - "loss": 0.2341, - "step": 4944 - }, - { - "epoch": 0.25, - "grad_norm": 1.0738514608014265, - "learning_rate": 1.7536130128232035e-05, - "loss": 0.2029, - "step": 4945 - }, - { - "epoch": 0.25, - "grad_norm": 0.8699095271611501, - "learning_rate": 1.7535047443311274e-05, - "loss": 0.1972, - "step": 4946 - }, - { - "epoch": 0.25, - "grad_norm": 1.115828821103785, - "learning_rate": 1.7533964554001923e-05, - "loss": 0.2171, - "step": 4947 - }, - { - "epoch": 0.25, - "grad_norm": 0.8999017703628961, - "learning_rate": 1.753288146033336e-05, - "loss": 0.2086, - "step": 4948 - }, - { - "epoch": 0.25, - "grad_norm": 0.9623098645662769, - "learning_rate": 1.753179816233496e-05, - "loss": 0.1941, - "step": 4949 - }, - { - "epoch": 0.25, - "grad_norm": 1.148386681921377, - "learning_rate": 1.7530714660036112e-05, - "loss": 0.2169, - "step": 4950 - }, - { - "epoch": 0.25, - "grad_norm": 0.9251444910619677, - "learning_rate": 1.7529630953466202e-05, - "loss": 0.1907, - "step": 4951 - }, - { - "epoch": 0.25, - "grad_norm": 0.7047593827282227, - "learning_rate": 1.7528547042654626e-05, - "loss": 0.1885, - "step": 4952 - }, - { - "epoch": 0.25, - "grad_norm": 0.8934978764290634, - "learning_rate": 1.7527462927630786e-05, - "loss": 0.211, - "step": 4953 - }, - { - "epoch": 0.25, - "grad_norm": 1.0067227280512174, - "learning_rate": 1.752637860842409e-05, - "loss": 0.1983, - "step": 4954 - }, - { - "epoch": 0.25, - "grad_norm": 1.594869450914085, - "learning_rate": 1.752529408506395e-05, - "loss": 0.2195, - "step": 4955 - }, - { - "epoch": 0.25, - "grad_norm": 0.9634578527813551, - "learning_rate": 1.7524209357579782e-05, - "loss": 0.2178, - "step": 4956 - }, - { - "epoch": 0.25, - "grad_norm": 1.2163076294046304, - "learning_rate": 1.752312442600101e-05, - "loss": 0.2299, - "step": 4957 - }, - { - "epoch": 0.25, - "grad_norm": 0.827651648508648, - "learning_rate": 1.7522039290357066e-05, - "loss": 0.2053, - "step": 4958 - }, - { - "epoch": 0.25, - "grad_norm": 0.8964013881252514, - "learning_rate": 1.7520953950677374e-05, - "loss": 0.2302, - "step": 4959 - }, - { - "epoch": 0.25, - "grad_norm": 0.9926659228293512, - "learning_rate": 1.751986840699139e-05, - "loss": 0.1978, - "step": 4960 - }, - { - "epoch": 0.25, - "grad_norm": 1.4285563481945713, - "learning_rate": 1.7518782659328545e-05, - "loss": 0.2139, - "step": 4961 - }, - { - "epoch": 0.25, - "grad_norm": 0.8479583255530937, - "learning_rate": 1.7517696707718297e-05, - "loss": 0.204, - "step": 4962 - }, - { - "epoch": 0.25, - "grad_norm": 0.7944980238304211, - "learning_rate": 1.7516610552190104e-05, - "loss": 0.2052, - "step": 4963 - }, - { - "epoch": 0.25, - "grad_norm": 1.1398659985067, - "learning_rate": 1.751552419277342e-05, - "loss": 0.2133, - "step": 4964 - }, - { - "epoch": 0.25, - "grad_norm": 0.9269866998395468, - "learning_rate": 1.751443762949772e-05, - "loss": 0.2054, - "step": 4965 - }, - { - "epoch": 0.25, - "grad_norm": 1.006432875016119, - "learning_rate": 1.7513350862392478e-05, - "loss": 0.2559, - "step": 4966 - }, - { - "epoch": 0.25, - "grad_norm": 0.8283899901874466, - "learning_rate": 1.7512263891487165e-05, - "loss": 0.2087, - "step": 4967 - }, - { - "epoch": 0.25, - "grad_norm": 0.9388658415746001, - "learning_rate": 1.7511176716811275e-05, - "loss": 0.2167, - "step": 4968 - }, - { - "epoch": 0.25, - "grad_norm": 1.138438684960691, - "learning_rate": 1.7510089338394287e-05, - "loss": 0.1984, - "step": 4969 - }, - { - "epoch": 0.25, - "grad_norm": 1.1752742054523158, - "learning_rate": 1.7509001756265704e-05, - "loss": 0.2174, - "step": 4970 - }, - { - "epoch": 0.25, - "grad_norm": 0.9476066784239713, - "learning_rate": 1.7507913970455024e-05, - "loss": 0.2176, - "step": 4971 - }, - { - "epoch": 0.25, - "grad_norm": 1.472689980809985, - "learning_rate": 1.750682598099175e-05, - "loss": 0.2131, - "step": 4972 - }, - { - "epoch": 0.25, - "grad_norm": 1.0265826255610757, - "learning_rate": 1.7505737787905404e-05, - "loss": 0.2237, - "step": 4973 - }, - { - "epoch": 0.25, - "grad_norm": 1.5419271677885265, - "learning_rate": 1.7504649391225493e-05, - "loss": 0.2308, - "step": 4974 - }, - { - "epoch": 0.25, - "grad_norm": 1.8149367596422321, - "learning_rate": 1.7503560790981545e-05, - "loss": 0.1969, - "step": 4975 - }, - { - "epoch": 0.25, - "grad_norm": 1.104977220763604, - "learning_rate": 1.750247198720308e-05, - "loss": 0.2062, - "step": 4976 - }, - { - "epoch": 0.25, - "grad_norm": 0.9566919598399071, - "learning_rate": 1.750138297991965e-05, - "loss": 0.1797, - "step": 4977 - }, - { - "epoch": 0.25, - "grad_norm": 1.1870863493849544, - "learning_rate": 1.7500293769160773e-05, - "loss": 0.2253, - "step": 4978 - }, - { - "epoch": 0.25, - "grad_norm": 0.976047243062067, - "learning_rate": 1.749920435495601e-05, - "loss": 0.2292, - "step": 4979 - }, - { - "epoch": 0.25, - "grad_norm": 0.8263157669810269, - "learning_rate": 1.7498114737334902e-05, - "loss": 0.1972, - "step": 4980 - }, - { - "epoch": 0.25, - "grad_norm": 0.9403072898460152, - "learning_rate": 1.749702491632701e-05, - "loss": 0.22, - "step": 4981 - }, - { - "epoch": 0.25, - "grad_norm": 1.2141379406623052, - "learning_rate": 1.749593489196189e-05, - "loss": 0.1972, - "step": 4982 - }, - { - "epoch": 0.25, - "grad_norm": 1.7395791963743559, - "learning_rate": 1.7494844664269117e-05, - "loss": 0.2383, - "step": 4983 - }, - { - "epoch": 0.25, - "grad_norm": 0.8786426046599091, - "learning_rate": 1.749375423327826e-05, - "loss": 0.2054, - "step": 4984 - }, - { - "epoch": 0.25, - "grad_norm": 0.8585034424431195, - "learning_rate": 1.7492663599018893e-05, - "loss": 0.2157, - "step": 4985 - }, - { - "epoch": 0.25, - "grad_norm": 0.9268853902764058, - "learning_rate": 1.7491572761520604e-05, - "loss": 0.2231, - "step": 4986 - }, - { - "epoch": 0.25, - "grad_norm": 1.0354260982717227, - "learning_rate": 1.749048172081298e-05, - "loss": 0.2163, - "step": 4987 - }, - { - "epoch": 0.25, - "grad_norm": 0.8511031855858545, - "learning_rate": 1.7489390476925616e-05, - "loss": 0.2072, - "step": 4988 - }, - { - "epoch": 0.25, - "grad_norm": 0.8786659576418835, - "learning_rate": 1.7488299029888117e-05, - "loss": 0.2133, - "step": 4989 - }, - { - "epoch": 0.25, - "grad_norm": 0.812193258729004, - "learning_rate": 1.7487207379730078e-05, - "loss": 0.2303, - "step": 4990 - }, - { - "epoch": 0.25, - "grad_norm": 2.377958020015114, - "learning_rate": 1.7486115526481117e-05, - "loss": 0.2166, - "step": 4991 - }, - { - "epoch": 0.25, - "grad_norm": 0.8504237116518911, - "learning_rate": 1.748502347017085e-05, - "loss": 0.2049, - "step": 4992 - }, - { - "epoch": 0.25, - "grad_norm": 0.9399276635507209, - "learning_rate": 1.74839312108289e-05, - "loss": 0.1958, - "step": 4993 - }, - { - "epoch": 0.25, - "grad_norm": 0.834340680227393, - "learning_rate": 1.748283874848489e-05, - "loss": 0.2146, - "step": 4994 - }, - { - "epoch": 0.25, - "grad_norm": 1.1110556630181991, - "learning_rate": 1.748174608316846e-05, - "loss": 0.2286, - "step": 4995 - }, - { - "epoch": 0.25, - "grad_norm": 1.208705629511072, - "learning_rate": 1.748065321490924e-05, - "loss": 0.1975, - "step": 4996 - }, - { - "epoch": 0.25, - "grad_norm": 1.2830729263319827, - "learning_rate": 1.7479560143736885e-05, - "loss": 0.1994, - "step": 4997 - }, - { - "epoch": 0.25, - "grad_norm": 0.9408077019695862, - "learning_rate": 1.7478466869681035e-05, - "loss": 0.198, - "step": 4998 - }, - { - "epoch": 0.25, - "grad_norm": 1.5477988939109983, - "learning_rate": 1.7477373392771352e-05, - "loss": 0.2293, - "step": 4999 - }, - { - "epoch": 0.25, - "grad_norm": 1.1463803370648873, - "learning_rate": 1.747627971303749e-05, - "loss": 0.2188, - "step": 5000 - }, - { - "epoch": 0.25, - "grad_norm": 0.8189903246416517, - "learning_rate": 1.7475185830509124e-05, - "loss": 0.2073, - "step": 5001 - }, - { - "epoch": 0.25, - "grad_norm": 1.2662754100903848, - "learning_rate": 1.7474091745215912e-05, - "loss": 0.2507, - "step": 5002 - }, - { - "epoch": 0.25, - "grad_norm": 1.1058798998476678, - "learning_rate": 1.7472997457187543e-05, - "loss": 0.2354, - "step": 5003 - }, - { - "epoch": 0.25, - "grad_norm": 0.9845891354226091, - "learning_rate": 1.74719029664537e-05, - "loss": 0.2061, - "step": 5004 - }, - { - "epoch": 0.25, - "grad_norm": 0.9242568026610173, - "learning_rate": 1.747080827304406e-05, - "loss": 0.2387, - "step": 5005 - }, - { - "epoch": 0.25, - "grad_norm": 0.9957505132812781, - "learning_rate": 1.746971337698833e-05, - "loss": 0.2267, - "step": 5006 - }, - { - "epoch": 0.25, - "grad_norm": 1.130839216768605, - "learning_rate": 1.74686182783162e-05, - "loss": 0.2294, - "step": 5007 - }, - { - "epoch": 0.25, - "grad_norm": 1.0601687365795447, - "learning_rate": 1.7467522977057375e-05, - "loss": 0.2127, - "step": 5008 - }, - { - "epoch": 0.25, - "grad_norm": 1.4708274030519979, - "learning_rate": 1.746642747324157e-05, - "loss": 0.1969, - "step": 5009 - }, - { - "epoch": 0.25, - "grad_norm": 0.9259996950306866, - "learning_rate": 1.74653317668985e-05, - "loss": 0.2041, - "step": 5010 - }, - { - "epoch": 0.25, - "grad_norm": 0.98602895533177, - "learning_rate": 1.7464235858057878e-05, - "loss": 0.204, - "step": 5011 - }, - { - "epoch": 0.25, - "grad_norm": 1.1042160602475162, - "learning_rate": 1.7463139746749443e-05, - "loss": 0.2173, - "step": 5012 - }, - { - "epoch": 0.25, - "grad_norm": 1.328890259856893, - "learning_rate": 1.7462043433002915e-05, - "loss": 0.2021, - "step": 5013 - }, - { - "epoch": 0.25, - "grad_norm": 1.0417663851102328, - "learning_rate": 1.7460946916848042e-05, - "loss": 0.2108, - "step": 5014 - }, - { - "epoch": 0.26, - "grad_norm": 0.9022596817041325, - "learning_rate": 1.7459850198314562e-05, - "loss": 0.1964, - "step": 5015 - }, - { - "epoch": 0.26, - "grad_norm": 1.3093436909694542, - "learning_rate": 1.7458753277432223e-05, - "loss": 0.2168, - "step": 5016 - }, - { - "epoch": 0.26, - "grad_norm": 4.579107620746813, - "learning_rate": 1.745765615423078e-05, - "loss": 0.195, - "step": 5017 - }, - { - "epoch": 0.26, - "grad_norm": 0.9487815613788572, - "learning_rate": 1.7456558828739993e-05, - "loss": 0.2148, - "step": 5018 - }, - { - "epoch": 0.26, - "grad_norm": 1.2607977547964462, - "learning_rate": 1.7455461300989627e-05, - "loss": 0.2157, - "step": 5019 - }, - { - "epoch": 0.26, - "grad_norm": 0.9884044516273045, - "learning_rate": 1.7454363571009452e-05, - "loss": 0.2017, - "step": 5020 - }, - { - "epoch": 0.26, - "grad_norm": 0.9234874314136033, - "learning_rate": 1.7453265638829246e-05, - "loss": 0.2083, - "step": 5021 - }, - { - "epoch": 0.26, - "grad_norm": 0.9101887407052885, - "learning_rate": 1.745216750447878e-05, - "loss": 0.2083, - "step": 5022 - }, - { - "epoch": 0.26, - "grad_norm": 0.9324602885160264, - "learning_rate": 1.7451069167987858e-05, - "loss": 0.2164, - "step": 5023 - }, - { - "epoch": 0.26, - "grad_norm": 1.327783331629871, - "learning_rate": 1.7449970629386265e-05, - "loss": 0.1904, - "step": 5024 - }, - { - "epoch": 0.26, - "grad_norm": 0.9224667945636565, - "learning_rate": 1.7448871888703792e-05, - "loss": 0.1895, - "step": 5025 - }, - { - "epoch": 0.26, - "grad_norm": 0.8781524280150395, - "learning_rate": 1.744777294597025e-05, - "loss": 0.2284, - "step": 5026 - }, - { - "epoch": 0.26, - "grad_norm": 1.106374290900814, - "learning_rate": 1.744667380121545e-05, - "loss": 0.2332, - "step": 5027 - }, - { - "epoch": 0.26, - "grad_norm": 1.2153769878955571, - "learning_rate": 1.7445574454469202e-05, - "loss": 0.2294, - "step": 5028 - }, - { - "epoch": 0.26, - "grad_norm": 1.241271721583726, - "learning_rate": 1.744447490576132e-05, - "loss": 0.2285, - "step": 5029 - }, - { - "epoch": 0.26, - "grad_norm": 1.0396512825817412, - "learning_rate": 1.744337515512164e-05, - "loss": 0.2046, - "step": 5030 - }, - { - "epoch": 0.26, - "grad_norm": 0.990253885865504, - "learning_rate": 1.744227520257999e-05, - "loss": 0.1984, - "step": 5031 - }, - { - "epoch": 0.26, - "grad_norm": 0.9219416918548322, - "learning_rate": 1.7441175048166203e-05, - "loss": 0.2099, - "step": 5032 - }, - { - "epoch": 0.26, - "grad_norm": 0.9096825504551227, - "learning_rate": 1.7440074691910123e-05, - "loss": 0.2179, - "step": 5033 - }, - { - "epoch": 0.26, - "grad_norm": 0.9631200723736666, - "learning_rate": 1.7438974133841596e-05, - "loss": 0.2316, - "step": 5034 - }, - { - "epoch": 0.26, - "grad_norm": 0.951258868171326, - "learning_rate": 1.7437873373990478e-05, - "loss": 0.2176, - "step": 5035 - }, - { - "epoch": 0.26, - "grad_norm": 0.9007417553081281, - "learning_rate": 1.7436772412386622e-05, - "loss": 0.2053, - "step": 5036 - }, - { - "epoch": 0.26, - "grad_norm": 1.0672720680830408, - "learning_rate": 1.7435671249059895e-05, - "loss": 0.2219, - "step": 5037 - }, - { - "epoch": 0.26, - "grad_norm": 0.913837085638295, - "learning_rate": 1.743456988404017e-05, - "loss": 0.2218, - "step": 5038 - }, - { - "epoch": 0.26, - "grad_norm": 0.8086054513570482, - "learning_rate": 1.743346831735731e-05, - "loss": 0.1971, - "step": 5039 - }, - { - "epoch": 0.26, - "grad_norm": 0.9734111808136677, - "learning_rate": 1.7432366549041203e-05, - "loss": 0.2252, - "step": 5040 - }, - { - "epoch": 0.26, - "grad_norm": 0.9860366282496003, - "learning_rate": 1.7431264579121734e-05, - "loss": 0.1978, - "step": 5041 - }, - { - "epoch": 0.26, - "grad_norm": 0.9523717046703011, - "learning_rate": 1.7430162407628796e-05, - "loss": 0.1968, - "step": 5042 - }, - { - "epoch": 0.26, - "grad_norm": 0.8510264207270812, - "learning_rate": 1.742906003459228e-05, - "loss": 0.228, - "step": 5043 - }, - { - "epoch": 0.26, - "grad_norm": 1.0463789344345444, - "learning_rate": 1.7427957460042092e-05, - "loss": 0.1962, - "step": 5044 - }, - { - "epoch": 0.26, - "grad_norm": 1.0830758748704274, - "learning_rate": 1.742685468400814e-05, - "loss": 0.2348, - "step": 5045 - }, - { - "epoch": 0.26, - "grad_norm": 1.1041907730218539, - "learning_rate": 1.7425751706520337e-05, - "loss": 0.2205, - "step": 5046 - }, - { - "epoch": 0.26, - "grad_norm": 1.1479770893064867, - "learning_rate": 1.7424648527608594e-05, - "loss": 0.2145, - "step": 5047 - }, - { - "epoch": 0.26, - "grad_norm": 0.9543567978429918, - "learning_rate": 1.742354514730284e-05, - "loss": 0.2198, - "step": 5048 - }, - { - "epoch": 0.26, - "grad_norm": 1.2461996880851625, - "learning_rate": 1.742244156563301e-05, - "loss": 0.2192, - "step": 5049 - }, - { - "epoch": 0.26, - "grad_norm": 1.0231145847760585, - "learning_rate": 1.742133778262903e-05, - "loss": 0.2013, - "step": 5050 - }, - { - "epoch": 0.26, - "grad_norm": 0.8834962163343502, - "learning_rate": 1.7420233798320848e-05, - "loss": 0.1999, - "step": 5051 - }, - { - "epoch": 0.26, - "grad_norm": 1.2485676337162055, - "learning_rate": 1.74191296127384e-05, - "loss": 0.2233, - "step": 5052 - }, - { - "epoch": 0.26, - "grad_norm": 1.239142632740132, - "learning_rate": 1.7418025225911642e-05, - "loss": 0.2113, - "step": 5053 - }, - { - "epoch": 0.26, - "grad_norm": 0.9004485805006647, - "learning_rate": 1.7416920637870535e-05, - "loss": 0.192, - "step": 5054 - }, - { - "epoch": 0.26, - "grad_norm": 0.938876208690321, - "learning_rate": 1.7415815848645032e-05, - "loss": 0.2166, - "step": 5055 - }, - { - "epoch": 0.26, - "grad_norm": 1.0012870727151062, - "learning_rate": 1.741471085826511e-05, - "loss": 0.2053, - "step": 5056 - }, - { - "epoch": 0.26, - "grad_norm": 1.9550687135535754, - "learning_rate": 1.7413605666760733e-05, - "loss": 0.2296, - "step": 5057 - }, - { - "epoch": 0.26, - "grad_norm": 1.0829094222653786, - "learning_rate": 1.7412500274161885e-05, - "loss": 0.2173, - "step": 5058 - }, - { - "epoch": 0.26, - "grad_norm": 1.1394625432475365, - "learning_rate": 1.741139468049855e-05, - "loss": 0.1889, - "step": 5059 - }, - { - "epoch": 0.26, - "grad_norm": 0.934634491288444, - "learning_rate": 1.7410288885800716e-05, - "loss": 0.2201, - "step": 5060 - }, - { - "epoch": 0.26, - "grad_norm": 1.0153039446794108, - "learning_rate": 1.7409182890098372e-05, - "loss": 0.195, - "step": 5061 - }, - { - "epoch": 0.26, - "grad_norm": 1.6814140355594438, - "learning_rate": 1.7408076693421528e-05, - "loss": 0.2589, - "step": 5062 - }, - { - "epoch": 0.26, - "grad_norm": 1.06895135418714, - "learning_rate": 1.7406970295800188e-05, - "loss": 0.2224, - "step": 5063 - }, - { - "epoch": 0.26, - "grad_norm": 0.964438213306339, - "learning_rate": 1.7405863697264357e-05, - "loss": 0.2029, - "step": 5064 - }, - { - "epoch": 0.26, - "grad_norm": 1.388743643425397, - "learning_rate": 1.7404756897844054e-05, - "loss": 0.2133, - "step": 5065 - }, - { - "epoch": 0.26, - "grad_norm": 1.285559298704333, - "learning_rate": 1.7403649897569302e-05, - "loss": 0.2066, - "step": 5066 - }, - { - "epoch": 0.26, - "grad_norm": 0.8019958506858845, - "learning_rate": 1.740254269647013e-05, - "loss": 0.1981, - "step": 5067 - }, - { - "epoch": 0.26, - "grad_norm": 1.224315249511999, - "learning_rate": 1.7401435294576566e-05, - "loss": 0.2235, - "step": 5068 - }, - { - "epoch": 0.26, - "grad_norm": 1.2892782160471126, - "learning_rate": 1.7400327691918657e-05, - "loss": 0.2211, - "step": 5069 - }, - { - "epoch": 0.26, - "grad_norm": 1.1369492211634333, - "learning_rate": 1.7399219888526438e-05, - "loss": 0.207, - "step": 5070 - }, - { - "epoch": 0.26, - "grad_norm": 0.8704337063333867, - "learning_rate": 1.7398111884429966e-05, - "loss": 0.1868, - "step": 5071 - }, - { - "epoch": 0.26, - "grad_norm": 0.9232480538530867, - "learning_rate": 1.7397003679659285e-05, - "loss": 0.1932, - "step": 5072 - }, - { - "epoch": 0.26, - "grad_norm": 0.90226151158112, - "learning_rate": 1.7395895274244464e-05, - "loss": 0.2037, - "step": 5073 - }, - { - "epoch": 0.26, - "grad_norm": 1.0136440681877585, - "learning_rate": 1.7394786668215564e-05, - "loss": 0.1827, - "step": 5074 - }, - { - "epoch": 0.26, - "grad_norm": 1.1129209039716363, - "learning_rate": 1.739367786160266e-05, - "loss": 0.207, - "step": 5075 - }, - { - "epoch": 0.26, - "grad_norm": 1.204496096714119, - "learning_rate": 1.7392568854435828e-05, - "loss": 0.2223, - "step": 5076 - }, - { - "epoch": 0.26, - "grad_norm": 0.9851629720093069, - "learning_rate": 1.7391459646745145e-05, - "loss": 0.2275, - "step": 5077 - }, - { - "epoch": 0.26, - "grad_norm": 0.8517560781459264, - "learning_rate": 1.7390350238560706e-05, - "loss": 0.1947, - "step": 5078 - }, - { - "epoch": 0.26, - "grad_norm": 0.9295862908238195, - "learning_rate": 1.7389240629912594e-05, - "loss": 0.2254, - "step": 5079 - }, - { - "epoch": 0.26, - "grad_norm": 0.9968261336024243, - "learning_rate": 1.7388130820830914e-05, - "loss": 0.2028, - "step": 5080 - }, - { - "epoch": 0.26, - "grad_norm": 1.0777716957127221, - "learning_rate": 1.738702081134577e-05, - "loss": 0.2021, - "step": 5081 - }, - { - "epoch": 0.26, - "grad_norm": 0.8685885199262032, - "learning_rate": 1.738591060148727e-05, - "loss": 0.2068, - "step": 5082 - }, - { - "epoch": 0.26, - "grad_norm": 1.37429320393285, - "learning_rate": 1.738480019128553e-05, - "loss": 0.225, - "step": 5083 - }, - { - "epoch": 0.26, - "grad_norm": 1.068890387233058, - "learning_rate": 1.7383689580770662e-05, - "loss": 0.2211, - "step": 5084 - }, - { - "epoch": 0.26, - "grad_norm": 1.0787377651617818, - "learning_rate": 1.73825787699728e-05, - "loss": 0.212, - "step": 5085 - }, - { - "epoch": 0.26, - "grad_norm": 0.9387024177777231, - "learning_rate": 1.738146775892207e-05, - "loss": 0.2207, - "step": 5086 - }, - { - "epoch": 0.26, - "grad_norm": 0.8724959929584131, - "learning_rate": 1.738035654764861e-05, - "loss": 0.2258, - "step": 5087 - }, - { - "epoch": 0.26, - "grad_norm": 1.0341884160681272, - "learning_rate": 1.7379245136182563e-05, - "loss": 0.1936, - "step": 5088 - }, - { - "epoch": 0.26, - "grad_norm": 0.9305601735593984, - "learning_rate": 1.7378133524554076e-05, - "loss": 0.2028, - "step": 5089 - }, - { - "epoch": 0.26, - "grad_norm": 0.9216192771869709, - "learning_rate": 1.73770217127933e-05, - "loss": 0.2057, - "step": 5090 - }, - { - "epoch": 0.26, - "grad_norm": 0.8275065447729806, - "learning_rate": 1.737590970093039e-05, - "loss": 0.2191, - "step": 5091 - }, - { - "epoch": 0.26, - "grad_norm": 1.8141071722166489, - "learning_rate": 1.737479748899552e-05, - "loss": 0.2119, - "step": 5092 - }, - { - "epoch": 0.26, - "grad_norm": 0.9472226924117711, - "learning_rate": 1.7373685077018844e-05, - "loss": 0.2188, - "step": 5093 - }, - { - "epoch": 0.26, - "grad_norm": 1.047574692637895, - "learning_rate": 1.7372572465030545e-05, - "loss": 0.2104, - "step": 5094 - }, - { - "epoch": 0.26, - "grad_norm": 0.9203578125292801, - "learning_rate": 1.7371459653060806e-05, - "loss": 0.212, - "step": 5095 - }, - { - "epoch": 0.26, - "grad_norm": 0.8055320516963432, - "learning_rate": 1.7370346641139805e-05, - "loss": 0.2044, - "step": 5096 - }, - { - "epoch": 0.26, - "grad_norm": 0.842063707877496, - "learning_rate": 1.7369233429297734e-05, - "loss": 0.2212, - "step": 5097 - }, - { - "epoch": 0.26, - "grad_norm": 1.021204666571421, - "learning_rate": 1.7368120017564792e-05, - "loss": 0.2016, - "step": 5098 - }, - { - "epoch": 0.26, - "grad_norm": 1.310478716770612, - "learning_rate": 1.7367006405971177e-05, - "loss": 0.2464, - "step": 5099 - }, - { - "epoch": 0.26, - "grad_norm": 1.4498907688492093, - "learning_rate": 1.7365892594547097e-05, - "loss": 0.202, - "step": 5100 - }, - { - "epoch": 0.26, - "grad_norm": 1.196400429856508, - "learning_rate": 1.7364778583322765e-05, - "loss": 0.2247, - "step": 5101 - }, - { - "epoch": 0.26, - "grad_norm": 0.923844439015725, - "learning_rate": 1.7363664372328398e-05, - "loss": 0.2332, - "step": 5102 - }, - { - "epoch": 0.26, - "grad_norm": 0.9109385255003308, - "learning_rate": 1.736254996159422e-05, - "loss": 0.207, - "step": 5103 - }, - { - "epoch": 0.26, - "grad_norm": 1.0074463288140154, - "learning_rate": 1.7361435351150456e-05, - "loss": 0.2194, - "step": 5104 - }, - { - "epoch": 0.26, - "grad_norm": 0.9833506313364749, - "learning_rate": 1.7360320541027342e-05, - "loss": 0.2277, - "step": 5105 - }, - { - "epoch": 0.26, - "grad_norm": 0.7589870505508941, - "learning_rate": 1.7359205531255123e-05, - "loss": 0.2, - "step": 5106 - }, - { - "epoch": 0.26, - "grad_norm": 0.9118865622399176, - "learning_rate": 1.735809032186403e-05, - "loss": 0.2307, - "step": 5107 - }, - { - "epoch": 0.26, - "grad_norm": 0.9536475657355715, - "learning_rate": 1.7356974912884327e-05, - "loss": 0.2089, - "step": 5108 - }, - { - "epoch": 0.26, - "grad_norm": 0.8118782600313672, - "learning_rate": 1.7355859304346262e-05, - "loss": 0.2264, - "step": 5109 - }, - { - "epoch": 0.26, - "grad_norm": 0.9823121461102297, - "learning_rate": 1.7354743496280103e-05, - "loss": 0.2034, - "step": 5110 - }, - { - "epoch": 0.26, - "grad_norm": 0.903102188316509, - "learning_rate": 1.7353627488716106e-05, - "loss": 0.2166, - "step": 5111 - }, - { - "epoch": 0.26, - "grad_norm": 1.0695530528902415, - "learning_rate": 1.7352511281684548e-05, - "loss": 0.2328, - "step": 5112 - }, - { - "epoch": 0.26, - "grad_norm": 1.0200358766120496, - "learning_rate": 1.7351394875215707e-05, - "loss": 0.1933, - "step": 5113 - }, - { - "epoch": 0.26, - "grad_norm": 1.1098375204015731, - "learning_rate": 1.7350278269339867e-05, - "loss": 0.2238, - "step": 5114 - }, - { - "epoch": 0.26, - "grad_norm": 0.9829062711668459, - "learning_rate": 1.7349161464087312e-05, - "loss": 0.2239, - "step": 5115 - }, - { - "epoch": 0.26, - "grad_norm": 0.9539037518616844, - "learning_rate": 1.7348044459488334e-05, - "loss": 0.1879, - "step": 5116 - }, - { - "epoch": 0.26, - "grad_norm": 0.9057009263172614, - "learning_rate": 1.734692725557324e-05, - "loss": 0.2127, - "step": 5117 - }, - { - "epoch": 0.26, - "grad_norm": 0.9057268038085381, - "learning_rate": 1.734580985237233e-05, - "loss": 0.2221, - "step": 5118 - }, - { - "epoch": 0.26, - "grad_norm": 0.8635195105524243, - "learning_rate": 1.7344692249915907e-05, - "loss": 0.2007, - "step": 5119 - }, - { - "epoch": 0.26, - "grad_norm": 1.5332147512028922, - "learning_rate": 1.7343574448234294e-05, - "loss": 0.1949, - "step": 5120 - }, - { - "epoch": 0.26, - "grad_norm": 0.92037678511703, - "learning_rate": 1.7342456447357813e-05, - "loss": 0.2245, - "step": 5121 - }, - { - "epoch": 0.26, - "grad_norm": 1.3309735135939107, - "learning_rate": 1.7341338247316785e-05, - "loss": 0.2805, - "step": 5122 - }, - { - "epoch": 0.26, - "grad_norm": 1.0574651253476115, - "learning_rate": 1.734021984814154e-05, - "loss": 0.2256, - "step": 5123 - }, - { - "epoch": 0.26, - "grad_norm": 1.0460865418770287, - "learning_rate": 1.7339101249862418e-05, - "loss": 0.1988, - "step": 5124 - }, - { - "epoch": 0.26, - "grad_norm": 1.1198594990219086, - "learning_rate": 1.7337982452509757e-05, - "loss": 0.2306, - "step": 5125 - }, - { - "epoch": 0.26, - "grad_norm": 0.8641486909806005, - "learning_rate": 1.7336863456113912e-05, - "loss": 0.1677, - "step": 5126 - }, - { - "epoch": 0.26, - "grad_norm": 0.9883756135069262, - "learning_rate": 1.7335744260705233e-05, - "loss": 0.1941, - "step": 5127 - }, - { - "epoch": 0.26, - "grad_norm": 1.038637478516568, - "learning_rate": 1.733462486631407e-05, - "loss": 0.2101, - "step": 5128 - }, - { - "epoch": 0.26, - "grad_norm": 0.7833925304111683, - "learning_rate": 1.73335052729708e-05, - "loss": 0.1937, - "step": 5129 - }, - { - "epoch": 0.26, - "grad_norm": 1.1097990773913913, - "learning_rate": 1.733238548070578e-05, - "loss": 0.2329, - "step": 5130 - }, - { - "epoch": 0.26, - "grad_norm": 0.9441953595842283, - "learning_rate": 1.7331265489549392e-05, - "loss": 0.2259, - "step": 5131 - }, - { - "epoch": 0.26, - "grad_norm": 1.0517679100815562, - "learning_rate": 1.7330145299532014e-05, - "loss": 0.1908, - "step": 5132 - }, - { - "epoch": 0.26, - "grad_norm": 0.865174450000809, - "learning_rate": 1.7329024910684033e-05, - "loss": 0.2338, - "step": 5133 - }, - { - "epoch": 0.26, - "grad_norm": 1.4011916507400322, - "learning_rate": 1.7327904323035833e-05, - "loss": 0.1923, - "step": 5134 - }, - { - "epoch": 0.26, - "grad_norm": 1.733663183504703, - "learning_rate": 1.7326783536617817e-05, - "loss": 0.2025, - "step": 5135 - }, - { - "epoch": 0.26, - "grad_norm": 1.2850760579337643, - "learning_rate": 1.7325662551460382e-05, - "loss": 0.2243, - "step": 5136 - }, - { - "epoch": 0.26, - "grad_norm": 0.8465786613373185, - "learning_rate": 1.7324541367593938e-05, - "loss": 0.2175, - "step": 5137 - }, - { - "epoch": 0.26, - "grad_norm": 0.8575404475842421, - "learning_rate": 1.7323419985048895e-05, - "loss": 0.2039, - "step": 5138 - }, - { - "epoch": 0.26, - "grad_norm": 1.410440921873463, - "learning_rate": 1.732229840385567e-05, - "loss": 0.1937, - "step": 5139 - }, - { - "epoch": 0.26, - "grad_norm": 1.7958882699551637, - "learning_rate": 1.732117662404469e-05, - "loss": 0.2388, - "step": 5140 - }, - { - "epoch": 0.26, - "grad_norm": 1.3727292538167888, - "learning_rate": 1.7320054645646376e-05, - "loss": 0.2214, - "step": 5141 - }, - { - "epoch": 0.26, - "grad_norm": 1.3762093404062987, - "learning_rate": 1.7318932468691172e-05, - "loss": 0.2023, - "step": 5142 - }, - { - "epoch": 0.26, - "grad_norm": 0.9535075619962287, - "learning_rate": 1.7317810093209507e-05, - "loss": 0.1909, - "step": 5143 - }, - { - "epoch": 0.26, - "grad_norm": 1.0330727306732685, - "learning_rate": 1.731668751923183e-05, - "loss": 0.2154, - "step": 5144 - }, - { - "epoch": 0.26, - "grad_norm": 0.8244231717387553, - "learning_rate": 1.7315564746788592e-05, - "loss": 0.2179, - "step": 5145 - }, - { - "epoch": 0.26, - "grad_norm": 0.9174505172121481, - "learning_rate": 1.731444177591025e-05, - "loss": 0.2175, - "step": 5146 - }, - { - "epoch": 0.26, - "grad_norm": 0.9969529480674448, - "learning_rate": 1.7313318606627258e-05, - "loss": 0.2279, - "step": 5147 - }, - { - "epoch": 0.26, - "grad_norm": 1.2100766546934472, - "learning_rate": 1.7312195238970088e-05, - "loss": 0.1959, - "step": 5148 - }, - { - "epoch": 0.26, - "grad_norm": 1.412580848356862, - "learning_rate": 1.7311071672969206e-05, - "loss": 0.2057, - "step": 5149 - }, - { - "epoch": 0.26, - "grad_norm": 0.9606173224523024, - "learning_rate": 1.7309947908655096e-05, - "loss": 0.2338, - "step": 5150 - }, - { - "epoch": 0.26, - "grad_norm": 1.0224410815863882, - "learning_rate": 1.7308823946058237e-05, - "loss": 0.2161, - "step": 5151 - }, - { - "epoch": 0.26, - "grad_norm": 1.16150172069741, - "learning_rate": 1.7307699785209108e-05, - "loss": 0.1782, - "step": 5152 - }, - { - "epoch": 0.26, - "grad_norm": 0.9738594797923933, - "learning_rate": 1.7306575426138213e-05, - "loss": 0.229, - "step": 5153 - }, - { - "epoch": 0.26, - "grad_norm": 0.9858951663213912, - "learning_rate": 1.730545086887605e-05, - "loss": 0.2131, - "step": 5154 - }, - { - "epoch": 0.26, - "grad_norm": 0.8323012000824099, - "learning_rate": 1.730432611345312e-05, - "loss": 0.2032, - "step": 5155 - }, - { - "epoch": 0.26, - "grad_norm": 0.8765788794254222, - "learning_rate": 1.730320115989993e-05, - "loss": 0.2255, - "step": 5156 - }, - { - "epoch": 0.26, - "grad_norm": 0.8654955411348836, - "learning_rate": 1.7302076008246993e-05, - "loss": 0.1953, - "step": 5157 - }, - { - "epoch": 0.26, - "grad_norm": 1.1368959010757456, - "learning_rate": 1.7300950658524836e-05, - "loss": 0.2067, - "step": 5158 - }, - { - "epoch": 0.26, - "grad_norm": 1.0193148376193706, - "learning_rate": 1.729982511076398e-05, - "loss": 0.2171, - "step": 5159 - }, - { - "epoch": 0.26, - "grad_norm": 3.084242175875511, - "learning_rate": 1.7298699364994952e-05, - "loss": 0.2046, - "step": 5160 - }, - { - "epoch": 0.26, - "grad_norm": 0.9197173344240579, - "learning_rate": 1.7297573421248294e-05, - "loss": 0.2357, - "step": 5161 - }, - { - "epoch": 0.26, - "grad_norm": 1.1499815341386719, - "learning_rate": 1.729644727955454e-05, - "loss": 0.192, - "step": 5162 - }, - { - "epoch": 0.26, - "grad_norm": 1.2309580105851474, - "learning_rate": 1.7295320939944247e-05, - "loss": 0.2381, - "step": 5163 - }, - { - "epoch": 0.26, - "grad_norm": 1.1276580222219936, - "learning_rate": 1.729419440244796e-05, - "loss": 0.2211, - "step": 5164 - }, - { - "epoch": 0.26, - "grad_norm": 2.034431671433726, - "learning_rate": 1.729306766709624e-05, - "loss": 0.202, - "step": 5165 - }, - { - "epoch": 0.26, - "grad_norm": 0.8705311980286389, - "learning_rate": 1.7291940733919645e-05, - "loss": 0.2149, - "step": 5166 - }, - { - "epoch": 0.26, - "grad_norm": 0.8392682674202115, - "learning_rate": 1.7290813602948748e-05, - "loss": 0.2088, - "step": 5167 - }, - { - "epoch": 0.26, - "grad_norm": 0.8684905385029499, - "learning_rate": 1.7289686274214116e-05, - "loss": 0.2112, - "step": 5168 - }, - { - "epoch": 0.26, - "grad_norm": 1.1264314185559987, - "learning_rate": 1.7288558747746335e-05, - "loss": 0.2012, - "step": 5169 - }, - { - "epoch": 0.26, - "grad_norm": 0.978250214493829, - "learning_rate": 1.7287431023575988e-05, - "loss": 0.2155, - "step": 5170 - }, - { - "epoch": 0.26, - "grad_norm": 0.9360368977001948, - "learning_rate": 1.728630310173366e-05, - "loss": 0.2361, - "step": 5171 - }, - { - "epoch": 0.26, - "grad_norm": 0.8958072123216545, - "learning_rate": 1.7285174982249947e-05, - "loss": 0.2179, - "step": 5172 - }, - { - "epoch": 0.26, - "grad_norm": 0.941632424104645, - "learning_rate": 1.7284046665155456e-05, - "loss": 0.2368, - "step": 5173 - }, - { - "epoch": 0.26, - "grad_norm": 0.8263123681636818, - "learning_rate": 1.7282918150480786e-05, - "loss": 0.204, - "step": 5174 - }, - { - "epoch": 0.26, - "grad_norm": 0.914557956965905, - "learning_rate": 1.728178943825655e-05, - "loss": 0.1889, - "step": 5175 - }, - { - "epoch": 0.26, - "grad_norm": 0.9954073324336042, - "learning_rate": 1.7280660528513362e-05, - "loss": 0.2202, - "step": 5176 - }, - { - "epoch": 0.26, - "grad_norm": 0.7576450292859586, - "learning_rate": 1.727953142128185e-05, - "loss": 0.176, - "step": 5177 - }, - { - "epoch": 0.26, - "grad_norm": 0.8326638995643959, - "learning_rate": 1.727840211659263e-05, - "loss": 0.2173, - "step": 5178 - }, - { - "epoch": 0.26, - "grad_norm": 0.9662341961029348, - "learning_rate": 1.727727261447635e-05, - "loss": 0.208, - "step": 5179 - }, - { - "epoch": 0.26, - "grad_norm": 1.1586971973050766, - "learning_rate": 1.7276142914963635e-05, - "loss": 0.2183, - "step": 5180 - }, - { - "epoch": 0.26, - "grad_norm": 1.2003375851518439, - "learning_rate": 1.727501301808513e-05, - "loss": 0.2245, - "step": 5181 - }, - { - "epoch": 0.26, - "grad_norm": 0.9655590656767455, - "learning_rate": 1.7273882923871492e-05, - "loss": 0.1938, - "step": 5182 - }, - { - "epoch": 0.26, - "grad_norm": 1.3799769940471969, - "learning_rate": 1.7272752632353365e-05, - "loss": 0.223, - "step": 5183 - }, - { - "epoch": 0.26, - "grad_norm": 0.8849271828718694, - "learning_rate": 1.727162214356141e-05, - "loss": 0.2208, - "step": 5184 - }, - { - "epoch": 0.26, - "grad_norm": 0.8298189179543192, - "learning_rate": 1.72704914575263e-05, - "loss": 0.1903, - "step": 5185 - }, - { - "epoch": 0.26, - "grad_norm": 0.9025161258137357, - "learning_rate": 1.7269360574278694e-05, - "loss": 0.2043, - "step": 5186 - }, - { - "epoch": 0.26, - "grad_norm": 0.9525245007094877, - "learning_rate": 1.7268229493849273e-05, - "loss": 0.1838, - "step": 5187 - }, - { - "epoch": 0.26, - "grad_norm": 0.7302099244104475, - "learning_rate": 1.7267098216268715e-05, - "loss": 0.2172, - "step": 5188 - }, - { - "epoch": 0.26, - "grad_norm": 1.539391710700766, - "learning_rate": 1.726596674156771e-05, - "loss": 0.249, - "step": 5189 - }, - { - "epoch": 0.26, - "grad_norm": 1.207634354231271, - "learning_rate": 1.7264835069776945e-05, - "loss": 0.2087, - "step": 5190 - }, - { - "epoch": 0.26, - "grad_norm": 1.7730743543155159, - "learning_rate": 1.726370320092712e-05, - "loss": 0.2335, - "step": 5191 - }, - { - "epoch": 0.26, - "grad_norm": 1.1122396560369499, - "learning_rate": 1.7262571135048934e-05, - "loss": 0.2164, - "step": 5192 - }, - { - "epoch": 0.26, - "grad_norm": 1.3805955610238752, - "learning_rate": 1.7261438872173096e-05, - "loss": 0.2053, - "step": 5193 - }, - { - "epoch": 0.26, - "grad_norm": 1.0468968152705946, - "learning_rate": 1.7260306412330317e-05, - "loss": 0.207, - "step": 5194 - }, - { - "epoch": 0.26, - "grad_norm": 0.8783581393917873, - "learning_rate": 1.725917375555132e-05, - "loss": 0.2215, - "step": 5195 - }, - { - "epoch": 0.26, - "grad_norm": 1.3218838017025807, - "learning_rate": 1.7258040901866824e-05, - "loss": 0.209, - "step": 5196 - }, - { - "epoch": 0.26, - "grad_norm": 1.3075623769540963, - "learning_rate": 1.725690785130756e-05, - "loss": 0.2252, - "step": 5197 - }, - { - "epoch": 0.26, - "grad_norm": 1.2522311782873228, - "learning_rate": 1.7255774603904253e-05, - "loss": 0.1953, - "step": 5198 - }, - { - "epoch": 0.26, - "grad_norm": 0.9995002246188047, - "learning_rate": 1.7254641159687657e-05, - "loss": 0.2169, - "step": 5199 - }, - { - "epoch": 0.26, - "grad_norm": 0.8811425044279627, - "learning_rate": 1.725350751868851e-05, - "loss": 0.1869, - "step": 5200 - }, - { - "epoch": 0.26, - "grad_norm": 1.0446346367689991, - "learning_rate": 1.725237368093756e-05, - "loss": 0.1975, - "step": 5201 - }, - { - "epoch": 0.26, - "grad_norm": 0.8818232613937096, - "learning_rate": 1.7251239646465562e-05, - "loss": 0.2087, - "step": 5202 - }, - { - "epoch": 0.26, - "grad_norm": 1.070969961207307, - "learning_rate": 1.7250105415303283e-05, - "loss": 0.2288, - "step": 5203 - }, - { - "epoch": 0.26, - "grad_norm": 0.8885154254095409, - "learning_rate": 1.7248970987481484e-05, - "loss": 0.2268, - "step": 5204 - }, - { - "epoch": 0.26, - "grad_norm": 1.0101696219659841, - "learning_rate": 1.7247836363030935e-05, - "loss": 0.1933, - "step": 5205 - }, - { - "epoch": 0.26, - "grad_norm": 1.0928642196121847, - "learning_rate": 1.724670154198242e-05, - "loss": 0.2081, - "step": 5206 - }, - { - "epoch": 0.26, - "grad_norm": 0.8613315400435708, - "learning_rate": 1.7245566524366713e-05, - "loss": 0.2135, - "step": 5207 - }, - { - "epoch": 0.26, - "grad_norm": 0.8440440917985735, - "learning_rate": 1.7244431310214604e-05, - "loss": 0.234, - "step": 5208 - }, - { - "epoch": 0.26, - "grad_norm": 1.2084843767372881, - "learning_rate": 1.724329589955689e-05, - "loss": 0.1991, - "step": 5209 - }, - { - "epoch": 0.26, - "grad_norm": 1.2607093167904153, - "learning_rate": 1.7242160292424362e-05, - "loss": 0.2204, - "step": 5210 - }, - { - "epoch": 0.26, - "grad_norm": 0.9823639926148594, - "learning_rate": 1.724102448884783e-05, - "loss": 0.2363, - "step": 5211 - }, - { - "epoch": 0.27, - "grad_norm": 0.9200310958626141, - "learning_rate": 1.7239888488858097e-05, - "loss": 0.2204, - "step": 5212 - }, - { - "epoch": 0.27, - "grad_norm": 0.950300322829358, - "learning_rate": 1.723875229248598e-05, - "loss": 0.2235, - "step": 5213 - }, - { - "epoch": 0.27, - "grad_norm": 1.0896260359504228, - "learning_rate": 1.72376158997623e-05, - "loss": 0.2371, - "step": 5214 - }, - { - "epoch": 0.27, - "grad_norm": 0.9389995805140761, - "learning_rate": 1.7236479310717878e-05, - "loss": 0.2168, - "step": 5215 - }, - { - "epoch": 0.27, - "grad_norm": 0.970088000932802, - "learning_rate": 1.723534252538355e-05, - "loss": 0.2205, - "step": 5216 - }, - { - "epoch": 0.27, - "grad_norm": 1.1210916950770902, - "learning_rate": 1.7234205543790143e-05, - "loss": 0.2066, - "step": 5217 - }, - { - "epoch": 0.27, - "grad_norm": 0.8368801598300282, - "learning_rate": 1.7233068365968505e-05, - "loss": 0.2272, - "step": 5218 - }, - { - "epoch": 0.27, - "grad_norm": 1.007554450271422, - "learning_rate": 1.723193099194948e-05, - "loss": 0.2177, - "step": 5219 - }, - { - "epoch": 0.27, - "grad_norm": 1.127711066292918, - "learning_rate": 1.7230793421763914e-05, - "loss": 0.1948, - "step": 5220 - }, - { - "epoch": 0.27, - "grad_norm": 1.4235497287302739, - "learning_rate": 1.722965565544267e-05, - "loss": 0.1932, - "step": 5221 - }, - { - "epoch": 0.27, - "grad_norm": 1.263286240902716, - "learning_rate": 1.722851769301661e-05, - "loss": 0.2192, - "step": 5222 - }, - { - "epoch": 0.27, - "grad_norm": 0.8378216432441894, - "learning_rate": 1.7227379534516594e-05, - "loss": 0.1854, - "step": 5223 - }, - { - "epoch": 0.27, - "grad_norm": 0.87419967006041, - "learning_rate": 1.7226241179973505e-05, - "loss": 0.2109, - "step": 5224 - }, - { - "epoch": 0.27, - "grad_norm": 0.7866001766190502, - "learning_rate": 1.7225102629418217e-05, - "loss": 0.2234, - "step": 5225 - }, - { - "epoch": 0.27, - "grad_norm": 1.030357376854233, - "learning_rate": 1.7223963882881606e-05, - "loss": 0.1997, - "step": 5226 - }, - { - "epoch": 0.27, - "grad_norm": 0.9039436771158188, - "learning_rate": 1.722282494039457e-05, - "loss": 0.2306, - "step": 5227 - }, - { - "epoch": 0.27, - "grad_norm": 0.8311084196052098, - "learning_rate": 1.7221685801988003e-05, - "loss": 0.1968, - "step": 5228 - }, - { - "epoch": 0.27, - "grad_norm": 1.0155689933704966, - "learning_rate": 1.7220546467692797e-05, - "loss": 0.1919, - "step": 5229 - }, - { - "epoch": 0.27, - "grad_norm": 0.7621332743514103, - "learning_rate": 1.721940693753986e-05, - "loss": 0.1836, - "step": 5230 - }, - { - "epoch": 0.27, - "grad_norm": 0.9408075793675481, - "learning_rate": 1.7218267211560103e-05, - "loss": 0.2248, - "step": 5231 - }, - { - "epoch": 0.27, - "grad_norm": 0.978169976254573, - "learning_rate": 1.7217127289784437e-05, - "loss": 0.2243, - "step": 5232 - }, - { - "epoch": 0.27, - "grad_norm": 0.9131227974824523, - "learning_rate": 1.7215987172243788e-05, - "loss": 0.2016, - "step": 5233 - }, - { - "epoch": 0.27, - "grad_norm": 1.0586303575143619, - "learning_rate": 1.721484685896908e-05, - "loss": 0.2054, - "step": 5234 - }, - { - "epoch": 0.27, - "grad_norm": 0.7416757017851802, - "learning_rate": 1.7213706349991243e-05, - "loss": 0.2054, - "step": 5235 - }, - { - "epoch": 0.27, - "grad_norm": 1.2013216628849288, - "learning_rate": 1.721256564534122e-05, - "loss": 0.2013, - "step": 5236 - }, - { - "epoch": 0.27, - "grad_norm": 1.4472292421550423, - "learning_rate": 1.7211424745049935e-05, - "loss": 0.2044, - "step": 5237 - }, - { - "epoch": 0.27, - "grad_norm": 1.1853024744443268, - "learning_rate": 1.7210283649148355e-05, - "loss": 0.2098, - "step": 5238 - }, - { - "epoch": 0.27, - "grad_norm": 0.941182147562369, - "learning_rate": 1.720914235766742e-05, - "loss": 0.2263, - "step": 5239 - }, - { - "epoch": 0.27, - "grad_norm": 1.3061098361084038, - "learning_rate": 1.7208000870638094e-05, - "loss": 0.2085, - "step": 5240 - }, - { - "epoch": 0.27, - "grad_norm": 0.8182517884818539, - "learning_rate": 1.7206859188091334e-05, - "loss": 0.2026, - "step": 5241 - }, - { - "epoch": 0.27, - "grad_norm": 0.8225484731142085, - "learning_rate": 1.7205717310058115e-05, - "loss": 0.2006, - "step": 5242 - }, - { - "epoch": 0.27, - "grad_norm": 0.9473367325916228, - "learning_rate": 1.7204575236569403e-05, - "loss": 0.2237, - "step": 5243 - }, - { - "epoch": 0.27, - "grad_norm": 1.4132935672607803, - "learning_rate": 1.7203432967656185e-05, - "loss": 0.165, - "step": 5244 - }, - { - "epoch": 0.27, - "grad_norm": 0.9167655284706728, - "learning_rate": 1.7202290503349436e-05, - "loss": 0.1945, - "step": 5245 - }, - { - "epoch": 0.27, - "grad_norm": 0.9433089720769943, - "learning_rate": 1.7201147843680156e-05, - "loss": 0.2056, - "step": 5246 - }, - { - "epoch": 0.27, - "grad_norm": 1.5687627971666505, - "learning_rate": 1.7200004988679332e-05, - "loss": 0.2077, - "step": 5247 - }, - { - "epoch": 0.27, - "grad_norm": 1.0573084994768132, - "learning_rate": 1.7198861938377965e-05, - "loss": 0.2293, - "step": 5248 - }, - { - "epoch": 0.27, - "grad_norm": 0.7877683252059432, - "learning_rate": 1.719771869280706e-05, - "loss": 0.1943, - "step": 5249 - }, - { - "epoch": 0.27, - "grad_norm": 1.0970146564562546, - "learning_rate": 1.719657525199763e-05, - "loss": 0.2058, - "step": 5250 - }, - { - "epoch": 0.27, - "grad_norm": 0.9594541757438699, - "learning_rate": 1.7195431615980692e-05, - "loss": 0.2311, - "step": 5251 - }, - { - "epoch": 0.27, - "grad_norm": 0.9958431420570283, - "learning_rate": 1.719428778478726e-05, - "loss": 0.221, - "step": 5252 - }, - { - "epoch": 0.27, - "grad_norm": 1.1668962676306558, - "learning_rate": 1.719314375844837e-05, - "loss": 0.2285, - "step": 5253 - }, - { - "epoch": 0.27, - "grad_norm": 1.0370614060296535, - "learning_rate": 1.719199953699505e-05, - "loss": 0.2159, - "step": 5254 - }, - { - "epoch": 0.27, - "grad_norm": 0.8308609951209482, - "learning_rate": 1.7190855120458333e-05, - "loss": 0.2, - "step": 5255 - }, - { - "epoch": 0.27, - "grad_norm": 1.0656123715589458, - "learning_rate": 1.7189710508869266e-05, - "loss": 0.2023, - "step": 5256 - }, - { - "epoch": 0.27, - "grad_norm": 0.883717672221715, - "learning_rate": 1.7188565702258893e-05, - "loss": 0.235, - "step": 5257 - }, - { - "epoch": 0.27, - "grad_norm": 0.8786535948719226, - "learning_rate": 1.7187420700658273e-05, - "loss": 0.1991, - "step": 5258 - }, - { - "epoch": 0.27, - "grad_norm": 1.0064167074518728, - "learning_rate": 1.718627550409846e-05, - "loss": 0.2273, - "step": 5259 - }, - { - "epoch": 0.27, - "grad_norm": 1.2174073349081103, - "learning_rate": 1.7185130112610518e-05, - "loss": 0.2033, - "step": 5260 - }, - { - "epoch": 0.27, - "grad_norm": 1.0550775089638038, - "learning_rate": 1.7183984526225517e-05, - "loss": 0.2257, - "step": 5261 - }, - { - "epoch": 0.27, - "grad_norm": 0.843216437626411, - "learning_rate": 1.7182838744974525e-05, - "loss": 0.2314, - "step": 5262 - }, - { - "epoch": 0.27, - "grad_norm": 0.9792723396409431, - "learning_rate": 1.7181692768888632e-05, - "loss": 0.1869, - "step": 5263 - }, - { - "epoch": 0.27, - "grad_norm": 0.8365306985513885, - "learning_rate": 1.7180546597998913e-05, - "loss": 0.2035, - "step": 5264 - }, - { - "epoch": 0.27, - "grad_norm": 1.1671518444866484, - "learning_rate": 1.7179400232336462e-05, - "loss": 0.2152, - "step": 5265 - }, - { - "epoch": 0.27, - "grad_norm": 1.0216996902622235, - "learning_rate": 1.7178253671932378e-05, - "loss": 0.1933, - "step": 5266 - }, - { - "epoch": 0.27, - "grad_norm": 0.9408021541356948, - "learning_rate": 1.7177106916817754e-05, - "loss": 0.202, - "step": 5267 - }, - { - "epoch": 0.27, - "grad_norm": 1.076468275502716, - "learning_rate": 1.7175959967023703e-05, - "loss": 0.2212, - "step": 5268 - }, - { - "epoch": 0.27, - "grad_norm": 0.8578660342363781, - "learning_rate": 1.717481282258133e-05, - "loss": 0.1909, - "step": 5269 - }, - { - "epoch": 0.27, - "grad_norm": 1.398141662645059, - "learning_rate": 1.7173665483521757e-05, - "loss": 0.2328, - "step": 5270 - }, - { - "epoch": 0.27, - "grad_norm": 1.1515910090053505, - "learning_rate": 1.7172517949876098e-05, - "loss": 0.2135, - "step": 5271 - }, - { - "epoch": 0.27, - "grad_norm": 0.8627620038988313, - "learning_rate": 1.7171370221675486e-05, - "loss": 0.1891, - "step": 5272 - }, - { - "epoch": 0.27, - "grad_norm": 1.196025450833706, - "learning_rate": 1.7170222298951053e-05, - "loss": 0.2216, - "step": 5273 - }, - { - "epoch": 0.27, - "grad_norm": 0.8852782147065706, - "learning_rate": 1.7169074181733934e-05, - "loss": 0.1954, - "step": 5274 - }, - { - "epoch": 0.27, - "grad_norm": 1.1495828840508941, - "learning_rate": 1.7167925870055273e-05, - "loss": 0.1853, - "step": 5275 - }, - { - "epoch": 0.27, - "grad_norm": 1.0454671880927513, - "learning_rate": 1.716677736394622e-05, - "loss": 0.206, - "step": 5276 - }, - { - "epoch": 0.27, - "grad_norm": 1.1007704616492229, - "learning_rate": 1.7165628663437923e-05, - "loss": 0.1847, - "step": 5277 - }, - { - "epoch": 0.27, - "grad_norm": 0.9550909862275008, - "learning_rate": 1.7164479768561546e-05, - "loss": 0.1888, - "step": 5278 - }, - { - "epoch": 0.27, - "grad_norm": 1.2416770226952552, - "learning_rate": 1.7163330679348248e-05, - "loss": 0.1962, - "step": 5279 - }, - { - "epoch": 0.27, - "grad_norm": 0.9840509198815576, - "learning_rate": 1.7162181395829204e-05, - "loss": 0.2141, - "step": 5280 - }, - { - "epoch": 0.27, - "grad_norm": 1.1730068768818405, - "learning_rate": 1.7161031918035584e-05, - "loss": 0.2345, - "step": 5281 - }, - { - "epoch": 0.27, - "grad_norm": 1.0294990181578525, - "learning_rate": 1.715988224599857e-05, - "loss": 0.2259, - "step": 5282 - }, - { - "epoch": 0.27, - "grad_norm": 0.8718280425482781, - "learning_rate": 1.7158732379749342e-05, - "loss": 0.2206, - "step": 5283 - }, - { - "epoch": 0.27, - "grad_norm": 0.7973950150881813, - "learning_rate": 1.71575823193191e-05, - "loss": 0.2011, - "step": 5284 - }, - { - "epoch": 0.27, - "grad_norm": 0.8958789502079094, - "learning_rate": 1.7156432064739024e-05, - "loss": 0.235, - "step": 5285 - }, - { - "epoch": 0.27, - "grad_norm": 0.9867998258952296, - "learning_rate": 1.7155281616040333e-05, - "loss": 0.2324, - "step": 5286 - }, - { - "epoch": 0.27, - "grad_norm": 0.9559868928584956, - "learning_rate": 1.715413097325422e-05, - "loss": 0.2009, - "step": 5287 - }, - { - "epoch": 0.27, - "grad_norm": 0.9716198690195488, - "learning_rate": 1.71529801364119e-05, - "loss": 0.1964, - "step": 5288 - }, - { - "epoch": 0.27, - "grad_norm": 2.1129669967352243, - "learning_rate": 1.715182910554459e-05, - "loss": 0.2745, - "step": 5289 - }, - { - "epoch": 0.27, - "grad_norm": 0.9123072409362243, - "learning_rate": 1.7150677880683515e-05, - "loss": 0.2095, - "step": 5290 - }, - { - "epoch": 0.27, - "grad_norm": 1.0062131124197695, - "learning_rate": 1.7149526461859897e-05, - "loss": 0.2296, - "step": 5291 - }, - { - "epoch": 0.27, - "grad_norm": 1.0380383036645535, - "learning_rate": 1.7148374849104965e-05, - "loss": 0.2074, - "step": 5292 - }, - { - "epoch": 0.27, - "grad_norm": 1.5639900438854735, - "learning_rate": 1.7147223042449968e-05, - "loss": 0.1836, - "step": 5293 - }, - { - "epoch": 0.27, - "grad_norm": 0.7888321865748255, - "learning_rate": 1.7146071041926138e-05, - "loss": 0.1985, - "step": 5294 - }, - { - "epoch": 0.27, - "grad_norm": 1.433020224907867, - "learning_rate": 1.714491884756473e-05, - "loss": 0.252, - "step": 5295 - }, - { - "epoch": 0.27, - "grad_norm": 0.9519082354925571, - "learning_rate": 1.7143766459396993e-05, - "loss": 0.2113, - "step": 5296 - }, - { - "epoch": 0.27, - "grad_norm": 1.0719575839448903, - "learning_rate": 1.7142613877454186e-05, - "loss": 0.2011, - "step": 5297 - }, - { - "epoch": 0.27, - "grad_norm": 1.0089385606249057, - "learning_rate": 1.714146110176758e-05, - "loss": 0.2314, - "step": 5298 - }, - { - "epoch": 0.27, - "grad_norm": 1.2971638756917503, - "learning_rate": 1.714030813236843e-05, - "loss": 0.2287, - "step": 5299 - }, - { - "epoch": 0.27, - "grad_norm": 1.3336062167999805, - "learning_rate": 1.7139154969288026e-05, - "loss": 0.2174, - "step": 5300 - }, - { - "epoch": 0.27, - "grad_norm": 1.0890198977625634, - "learning_rate": 1.7138001612557636e-05, - "loss": 0.2095, - "step": 5301 - }, - { - "epoch": 0.27, - "grad_norm": 1.2127147798939326, - "learning_rate": 1.7136848062208552e-05, - "loss": 0.2234, - "step": 5302 - }, - { - "epoch": 0.27, - "grad_norm": 3.4830948947069063, - "learning_rate": 1.7135694318272057e-05, - "loss": 0.2158, - "step": 5303 - }, - { - "epoch": 0.27, - "grad_norm": 1.1718966869611827, - "learning_rate": 1.7134540380779453e-05, - "loss": 0.2205, - "step": 5304 - }, - { - "epoch": 0.27, - "grad_norm": 1.302483243747173, - "learning_rate": 1.713338624976204e-05, - "loss": 0.209, - "step": 5305 - }, - { - "epoch": 0.27, - "grad_norm": 0.9086727219113531, - "learning_rate": 1.713223192525112e-05, - "loss": 0.191, - "step": 5306 - }, - { - "epoch": 0.27, - "grad_norm": 1.11751268899965, - "learning_rate": 1.7131077407278008e-05, - "loss": 0.2278, - "step": 5307 - }, - { - "epoch": 0.27, - "grad_norm": 1.0514724800840336, - "learning_rate": 1.7129922695874016e-05, - "loss": 0.2207, - "step": 5308 - }, - { - "epoch": 0.27, - "grad_norm": 0.9040266788733141, - "learning_rate": 1.712876779107047e-05, - "loss": 0.2056, - "step": 5309 - }, - { - "epoch": 0.27, - "grad_norm": 0.9642309087364644, - "learning_rate": 1.7127612692898695e-05, - "loss": 0.1947, - "step": 5310 - }, - { - "epoch": 0.27, - "grad_norm": 0.9401415784326851, - "learning_rate": 1.7126457401390023e-05, - "loss": 0.2014, - "step": 5311 - }, - { - "epoch": 0.27, - "grad_norm": 1.027856827944775, - "learning_rate": 1.712530191657579e-05, - "loss": 0.2217, - "step": 5312 - }, - { - "epoch": 0.27, - "grad_norm": 0.926767366522984, - "learning_rate": 1.712414623848734e-05, - "loss": 0.1958, - "step": 5313 - }, - { - "epoch": 0.27, - "grad_norm": 1.0775741073707663, - "learning_rate": 1.712299036715602e-05, - "loss": 0.234, - "step": 5314 - }, - { - "epoch": 0.27, - "grad_norm": 1.0525832503522692, - "learning_rate": 1.712183430261319e-05, - "loss": 0.1927, - "step": 5315 - }, - { - "epoch": 0.27, - "grad_norm": 0.8435504630743897, - "learning_rate": 1.71206780448902e-05, - "loss": 0.1961, - "step": 5316 - }, - { - "epoch": 0.27, - "grad_norm": 0.8021204498469657, - "learning_rate": 1.711952159401841e-05, - "loss": 0.1926, - "step": 5317 - }, - { - "epoch": 0.27, - "grad_norm": 1.0147291827406735, - "learning_rate": 1.71183649500292e-05, - "loss": 0.1947, - "step": 5318 - }, - { - "epoch": 0.27, - "grad_norm": 1.0304586584190933, - "learning_rate": 1.711720811295394e-05, - "loss": 0.2174, - "step": 5319 - }, - { - "epoch": 0.27, - "grad_norm": 0.9570126657180933, - "learning_rate": 1.7116051082824003e-05, - "loss": 0.2029, - "step": 5320 - }, - { - "epoch": 0.27, - "grad_norm": 0.9625640745359515, - "learning_rate": 1.711489385967078e-05, - "loss": 0.1857, - "step": 5321 - }, - { - "epoch": 0.27, - "grad_norm": 0.9279472961440479, - "learning_rate": 1.7113736443525662e-05, - "loss": 0.2104, - "step": 5322 - }, - { - "epoch": 0.27, - "grad_norm": 0.9756088976113342, - "learning_rate": 1.7112578834420036e-05, - "loss": 0.2136, - "step": 5323 - }, - { - "epoch": 0.27, - "grad_norm": 1.0709776914176408, - "learning_rate": 1.7111421032385313e-05, - "loss": 0.2043, - "step": 5324 - }, - { - "epoch": 0.27, - "grad_norm": 1.225056753679554, - "learning_rate": 1.711026303745289e-05, - "loss": 0.222, - "step": 5325 - }, - { - "epoch": 0.27, - "grad_norm": 0.9022739276514431, - "learning_rate": 1.710910484965418e-05, - "loss": 0.2045, - "step": 5326 - }, - { - "epoch": 0.27, - "grad_norm": 0.7728402557707814, - "learning_rate": 1.71079464690206e-05, - "loss": 0.2314, - "step": 5327 - }, - { - "epoch": 0.27, - "grad_norm": 0.8080405879850907, - "learning_rate": 1.7106787895583573e-05, - "loss": 0.232, - "step": 5328 - }, - { - "epoch": 0.27, - "grad_norm": 0.7797907753458592, - "learning_rate": 1.710562912937452e-05, - "loss": 0.2095, - "step": 5329 - }, - { - "epoch": 0.27, - "grad_norm": 1.5049046662803585, - "learning_rate": 1.710447017042488e-05, - "loss": 0.2189, - "step": 5330 - }, - { - "epoch": 0.27, - "grad_norm": 0.8657038274319842, - "learning_rate": 1.710331101876608e-05, - "loss": 0.2169, - "step": 5331 - }, - { - "epoch": 0.27, - "grad_norm": 0.9621461414357568, - "learning_rate": 1.7102151674429567e-05, - "loss": 0.211, - "step": 5332 - }, - { - "epoch": 0.27, - "grad_norm": 1.3494794196811704, - "learning_rate": 1.7100992137446792e-05, - "loss": 0.1997, - "step": 5333 - }, - { - "epoch": 0.27, - "grad_norm": 1.0959570082489183, - "learning_rate": 1.7099832407849203e-05, - "loss": 0.2073, - "step": 5334 - }, - { - "epoch": 0.27, - "grad_norm": 1.0047881510760754, - "learning_rate": 1.709867248566826e-05, - "loss": 0.2302, - "step": 5335 - }, - { - "epoch": 0.27, - "grad_norm": 1.292243114304556, - "learning_rate": 1.7097512370935422e-05, - "loss": 0.256, - "step": 5336 - }, - { - "epoch": 0.27, - "grad_norm": 0.8680614701378598, - "learning_rate": 1.7096352063682163e-05, - "loss": 0.2063, - "step": 5337 - }, - { - "epoch": 0.27, - "grad_norm": 1.4413192657041078, - "learning_rate": 1.709519156393995e-05, - "loss": 0.2173, - "step": 5338 - }, - { - "epoch": 0.27, - "grad_norm": 0.9953817989447833, - "learning_rate": 1.709403087174027e-05, - "loss": 0.2115, - "step": 5339 - }, - { - "epoch": 0.27, - "grad_norm": 1.0349009846998212, - "learning_rate": 1.70928699871146e-05, - "loss": 0.2315, - "step": 5340 - }, - { - "epoch": 0.27, - "grad_norm": 0.977550830932778, - "learning_rate": 1.709170891009443e-05, - "loss": 0.1951, - "step": 5341 - }, - { - "epoch": 0.27, - "grad_norm": 0.9936609207937576, - "learning_rate": 1.7090547640711256e-05, - "loss": 0.2179, - "step": 5342 - }, - { - "epoch": 0.27, - "grad_norm": 1.0861275402229364, - "learning_rate": 1.7089386178996576e-05, - "loss": 0.2168, - "step": 5343 - }, - { - "epoch": 0.27, - "grad_norm": 0.7740154237094764, - "learning_rate": 1.70882245249819e-05, - "loss": 0.2033, - "step": 5344 - }, - { - "epoch": 0.27, - "grad_norm": 1.137396135861429, - "learning_rate": 1.7087062678698726e-05, - "loss": 0.2073, - "step": 5345 - }, - { - "epoch": 0.27, - "grad_norm": 0.9734578826182253, - "learning_rate": 1.7085900640178582e-05, - "loss": 0.2231, - "step": 5346 - }, - { - "epoch": 0.27, - "grad_norm": 2.023946483249804, - "learning_rate": 1.7084738409452982e-05, - "loss": 0.2204, - "step": 5347 - }, - { - "epoch": 0.27, - "grad_norm": 0.9838807350978188, - "learning_rate": 1.7083575986553448e-05, - "loss": 0.2084, - "step": 5348 - }, - { - "epoch": 0.27, - "grad_norm": 1.038334702873617, - "learning_rate": 1.708241337151152e-05, - "loss": 0.2181, - "step": 5349 - }, - { - "epoch": 0.27, - "grad_norm": 0.9705124443322362, - "learning_rate": 1.708125056435873e-05, - "loss": 0.2053, - "step": 5350 - }, - { - "epoch": 0.27, - "grad_norm": 1.0641275795654905, - "learning_rate": 1.7080087565126613e-05, - "loss": 0.2133, - "step": 5351 - }, - { - "epoch": 0.27, - "grad_norm": 0.8789516998052838, - "learning_rate": 1.707892437384673e-05, - "loss": 0.2174, - "step": 5352 - }, - { - "epoch": 0.27, - "grad_norm": 0.9768442549284431, - "learning_rate": 1.7077760990550617e-05, - "loss": 0.2115, - "step": 5353 - }, - { - "epoch": 0.27, - "grad_norm": 1.0576896748411604, - "learning_rate": 1.7076597415269836e-05, - "loss": 0.2261, - "step": 5354 - }, - { - "epoch": 0.27, - "grad_norm": 2.04442745089693, - "learning_rate": 1.7075433648035952e-05, - "loss": 0.2209, - "step": 5355 - }, - { - "epoch": 0.27, - "grad_norm": 1.9242450796659158, - "learning_rate": 1.707426968888053e-05, - "loss": 0.2035, - "step": 5356 - }, - { - "epoch": 0.27, - "grad_norm": 0.8041864219716768, - "learning_rate": 1.7073105537835145e-05, - "loss": 0.2223, - "step": 5357 - }, - { - "epoch": 0.27, - "grad_norm": 1.0618635761951816, - "learning_rate": 1.7071941194931372e-05, - "loss": 0.1791, - "step": 5358 - }, - { - "epoch": 0.27, - "grad_norm": 0.8489978326658181, - "learning_rate": 1.7070776660200797e-05, - "loss": 0.2106, - "step": 5359 - }, - { - "epoch": 0.27, - "grad_norm": 1.0697877918430914, - "learning_rate": 1.7069611933675006e-05, - "loss": 0.2105, - "step": 5360 - }, - { - "epoch": 0.27, - "grad_norm": 1.06294677154257, - "learning_rate": 1.7068447015385587e-05, - "loss": 0.236, - "step": 5361 - }, - { - "epoch": 0.27, - "grad_norm": 1.2730267833270381, - "learning_rate": 1.706728190536415e-05, - "loss": 0.2269, - "step": 5362 - }, - { - "epoch": 0.27, - "grad_norm": 1.1527764705868002, - "learning_rate": 1.7066116603642285e-05, - "loss": 0.2155, - "step": 5363 - }, - { - "epoch": 0.27, - "grad_norm": 0.9380764122116448, - "learning_rate": 1.706495111025161e-05, - "loss": 0.2209, - "step": 5364 - }, - { - "epoch": 0.27, - "grad_norm": 2.816402075469522, - "learning_rate": 1.706378542522374e-05, - "loss": 0.2285, - "step": 5365 - }, - { - "epoch": 0.27, - "grad_norm": 1.097092753302864, - "learning_rate": 1.706261954859029e-05, - "loss": 0.223, - "step": 5366 - }, - { - "epoch": 0.27, - "grad_norm": 1.2072634018517636, - "learning_rate": 1.7061453480382885e-05, - "loss": 0.2088, - "step": 5367 - }, - { - "epoch": 0.27, - "grad_norm": 1.1616017699973211, - "learning_rate": 1.7060287220633158e-05, - "loss": 0.1903, - "step": 5368 - }, - { - "epoch": 0.27, - "grad_norm": 1.0131673326230828, - "learning_rate": 1.7059120769372737e-05, - "loss": 0.204, - "step": 5369 - }, - { - "epoch": 0.27, - "grad_norm": 1.0226302459685408, - "learning_rate": 1.7057954126633268e-05, - "loss": 0.1971, - "step": 5370 - }, - { - "epoch": 0.27, - "grad_norm": 1.1991289041476931, - "learning_rate": 1.7056787292446396e-05, - "loss": 0.2156, - "step": 5371 - }, - { - "epoch": 0.27, - "grad_norm": 0.9740162925818302, - "learning_rate": 1.7055620266843776e-05, - "loss": 0.2157, - "step": 5372 - }, - { - "epoch": 0.27, - "grad_norm": 1.3082793580892642, - "learning_rate": 1.705445304985705e-05, - "loss": 0.1786, - "step": 5373 - }, - { - "epoch": 0.27, - "grad_norm": 0.9009398830787593, - "learning_rate": 1.7053285641517886e-05, - "loss": 0.1898, - "step": 5374 - }, - { - "epoch": 0.27, - "grad_norm": 1.333310758399028, - "learning_rate": 1.7052118041857954e-05, - "loss": 0.2291, - "step": 5375 - }, - { - "epoch": 0.27, - "grad_norm": 0.8219067225406175, - "learning_rate": 1.7050950250908923e-05, - "loss": 0.2086, - "step": 5376 - }, - { - "epoch": 0.27, - "grad_norm": 1.0504821356893528, - "learning_rate": 1.7049782268702464e-05, - "loss": 0.2185, - "step": 5377 - }, - { - "epoch": 0.27, - "grad_norm": 1.5062762094220585, - "learning_rate": 1.7048614095270264e-05, - "loss": 0.2142, - "step": 5378 - }, - { - "epoch": 0.27, - "grad_norm": 1.1517461171713312, - "learning_rate": 1.704744573064401e-05, - "loss": 0.2252, - "step": 5379 - }, - { - "epoch": 0.27, - "grad_norm": 1.0666768851989106, - "learning_rate": 1.704627717485539e-05, - "loss": 0.2154, - "step": 5380 - }, - { - "epoch": 0.27, - "grad_norm": 0.9800546805869343, - "learning_rate": 1.7045108427936104e-05, - "loss": 0.2039, - "step": 5381 - }, - { - "epoch": 0.27, - "grad_norm": 1.285558315197764, - "learning_rate": 1.7043939489917858e-05, - "loss": 0.217, - "step": 5382 - }, - { - "epoch": 0.27, - "grad_norm": 0.9672045321469797, - "learning_rate": 1.7042770360832353e-05, - "loss": 0.2049, - "step": 5383 - }, - { - "epoch": 0.27, - "grad_norm": 1.3148918349554373, - "learning_rate": 1.7041601040711303e-05, - "loss": 0.2531, - "step": 5384 - }, - { - "epoch": 0.27, - "grad_norm": 0.9287482626051508, - "learning_rate": 1.7040431529586427e-05, - "loss": 0.1955, - "step": 5385 - }, - { - "epoch": 0.27, - "grad_norm": 1.0439263751970198, - "learning_rate": 1.7039261827489452e-05, - "loss": 0.2057, - "step": 5386 - }, - { - "epoch": 0.27, - "grad_norm": 1.0966870805891058, - "learning_rate": 1.7038091934452098e-05, - "loss": 0.2182, - "step": 5387 - }, - { - "epoch": 0.27, - "grad_norm": 1.0429284976399116, - "learning_rate": 1.7036921850506104e-05, - "loss": 0.2444, - "step": 5388 - }, - { - "epoch": 0.27, - "grad_norm": 1.9220008646181936, - "learning_rate": 1.7035751575683208e-05, - "loss": 0.2106, - "step": 5389 - }, - { - "epoch": 0.27, - "grad_norm": 1.2418202057946766, - "learning_rate": 1.7034581110015156e-05, - "loss": 0.2111, - "step": 5390 - }, - { - "epoch": 0.27, - "grad_norm": 0.9337888990910338, - "learning_rate": 1.7033410453533687e-05, - "loss": 0.2124, - "step": 5391 - }, - { - "epoch": 0.27, - "grad_norm": 1.2735895782979454, - "learning_rate": 1.7032239606270567e-05, - "loss": 0.1977, - "step": 5392 - }, - { - "epoch": 0.27, - "grad_norm": 0.8751710465726789, - "learning_rate": 1.7031068568257548e-05, - "loss": 0.1978, - "step": 5393 - }, - { - "epoch": 0.27, - "grad_norm": 0.8455923178009026, - "learning_rate": 1.7029897339526404e-05, - "loss": 0.2147, - "step": 5394 - }, - { - "epoch": 0.27, - "grad_norm": 0.826334770638359, - "learning_rate": 1.702872592010889e-05, - "loss": 0.2223, - "step": 5395 - }, - { - "epoch": 0.27, - "grad_norm": 1.0356782106426614, - "learning_rate": 1.702755431003679e-05, - "loss": 0.216, - "step": 5396 - }, - { - "epoch": 0.27, - "grad_norm": 1.0489926641495397, - "learning_rate": 1.7026382509341885e-05, - "loss": 0.2214, - "step": 5397 - }, - { - "epoch": 0.27, - "grad_norm": 0.9830309908645947, - "learning_rate": 1.7025210518055954e-05, - "loss": 0.2126, - "step": 5398 - }, - { - "epoch": 0.27, - "grad_norm": 0.8860550610226454, - "learning_rate": 1.7024038336210794e-05, - "loss": 0.1982, - "step": 5399 - }, - { - "epoch": 0.27, - "grad_norm": 1.0477678845976919, - "learning_rate": 1.7022865963838195e-05, - "loss": 0.1764, - "step": 5400 - }, - { - "epoch": 0.27, - "grad_norm": 0.9756090995833591, - "learning_rate": 1.7021693400969962e-05, - "loss": 0.2074, - "step": 5401 - }, - { - "epoch": 0.27, - "grad_norm": 1.0863202596740222, - "learning_rate": 1.7020520647637894e-05, - "loss": 0.2004, - "step": 5402 - }, - { - "epoch": 0.27, - "grad_norm": 2.6093505669717603, - "learning_rate": 1.701934770387381e-05, - "loss": 0.2044, - "step": 5403 - }, - { - "epoch": 0.27, - "grad_norm": 1.1912297537469665, - "learning_rate": 1.7018174569709523e-05, - "loss": 0.214, - "step": 5404 - }, - { - "epoch": 0.27, - "grad_norm": 1.0552055408422767, - "learning_rate": 1.7017001245176857e-05, - "loss": 0.2056, - "step": 5405 - }, - { - "epoch": 0.27, - "grad_norm": 0.9568556007681108, - "learning_rate": 1.7015827730307637e-05, - "loss": 0.2009, - "step": 5406 - }, - { - "epoch": 0.27, - "grad_norm": 1.0428933384888612, - "learning_rate": 1.701465402513369e-05, - "loss": 0.1951, - "step": 5407 - }, - { - "epoch": 0.28, - "grad_norm": 1.0154243359082187, - "learning_rate": 1.7013480129686857e-05, - "loss": 0.2184, - "step": 5408 - }, - { - "epoch": 0.28, - "grad_norm": 1.1022197933165605, - "learning_rate": 1.701230604399898e-05, - "loss": 0.197, - "step": 5409 - }, - { - "epoch": 0.28, - "grad_norm": 1.0560707879020101, - "learning_rate": 1.7011131768101906e-05, - "loss": 0.2162, - "step": 5410 - }, - { - "epoch": 0.28, - "grad_norm": 1.2588170514748378, - "learning_rate": 1.7009957302027484e-05, - "loss": 0.2096, - "step": 5411 - }, - { - "epoch": 0.28, - "grad_norm": 1.0979029434665841, - "learning_rate": 1.7008782645807578e-05, - "loss": 0.2112, - "step": 5412 - }, - { - "epoch": 0.28, - "grad_norm": 0.9524557231896512, - "learning_rate": 1.7007607799474045e-05, - "loss": 0.2133, - "step": 5413 - }, - { - "epoch": 0.28, - "grad_norm": 1.1895621186357845, - "learning_rate": 1.7006432763058753e-05, - "loss": 0.2, - "step": 5414 - }, - { - "epoch": 0.28, - "grad_norm": 1.190375294075288, - "learning_rate": 1.7005257536593577e-05, - "loss": 0.2149, - "step": 5415 - }, - { - "epoch": 0.28, - "grad_norm": 0.9149426868161031, - "learning_rate": 1.7004082120110396e-05, - "loss": 0.2138, - "step": 5416 - }, - { - "epoch": 0.28, - "grad_norm": 0.885553028586094, - "learning_rate": 1.7002906513641094e-05, - "loss": 0.2094, - "step": 5417 - }, - { - "epoch": 0.28, - "grad_norm": 1.1377344849438038, - "learning_rate": 1.7001730717217554e-05, - "loss": 0.2188, - "step": 5418 - }, - { - "epoch": 0.28, - "grad_norm": 1.0142042105802078, - "learning_rate": 1.700055473087167e-05, - "loss": 0.2463, - "step": 5419 - }, - { - "epoch": 0.28, - "grad_norm": 0.9577845607969336, - "learning_rate": 1.699937855463535e-05, - "loss": 0.2003, - "step": 5420 - }, - { - "epoch": 0.28, - "grad_norm": 0.7987401617507317, - "learning_rate": 1.699820218854049e-05, - "loss": 0.196, - "step": 5421 - }, - { - "epoch": 0.28, - "grad_norm": 0.9466080898054804, - "learning_rate": 1.6997025632618996e-05, - "loss": 0.2141, - "step": 5422 - }, - { - "epoch": 0.28, - "grad_norm": 0.9591314964338855, - "learning_rate": 1.6995848886902794e-05, - "loss": 0.2099, - "step": 5423 - }, - { - "epoch": 0.28, - "grad_norm": 1.0143917225888177, - "learning_rate": 1.699467195142379e-05, - "loss": 0.2189, - "step": 5424 - }, - { - "epoch": 0.28, - "grad_norm": 1.1287154569913822, - "learning_rate": 1.6993494826213917e-05, - "loss": 0.2119, - "step": 5425 - }, - { - "epoch": 0.28, - "grad_norm": 0.9108997932383712, - "learning_rate": 1.6992317511305103e-05, - "loss": 0.2298, - "step": 5426 - }, - { - "epoch": 0.28, - "grad_norm": 1.0312293374855317, - "learning_rate": 1.6991140006729277e-05, - "loss": 0.2044, - "step": 5427 - }, - { - "epoch": 0.28, - "grad_norm": 1.2243294060862493, - "learning_rate": 1.6989962312518384e-05, - "loss": 0.2113, - "step": 5428 - }, - { - "epoch": 0.28, - "grad_norm": 1.3494977850309595, - "learning_rate": 1.698878442870437e-05, - "loss": 0.2011, - "step": 5429 - }, - { - "epoch": 0.28, - "grad_norm": 1.6045698540700097, - "learning_rate": 1.6987606355319184e-05, - "loss": 0.2081, - "step": 5430 - }, - { - "epoch": 0.28, - "grad_norm": 0.9385287988505853, - "learning_rate": 1.698642809239478e-05, - "loss": 0.189, - "step": 5431 - }, - { - "epoch": 0.28, - "grad_norm": 1.064849914586746, - "learning_rate": 1.698524963996312e-05, - "loss": 0.238, - "step": 5432 - }, - { - "epoch": 0.28, - "grad_norm": 0.9401539962006684, - "learning_rate": 1.698407099805617e-05, - "loss": 0.1883, - "step": 5433 - }, - { - "epoch": 0.28, - "grad_norm": 1.2014313002921773, - "learning_rate": 1.69828921667059e-05, - "loss": 0.1861, - "step": 5434 - }, - { - "epoch": 0.28, - "grad_norm": 1.0598940147011735, - "learning_rate": 1.6981713145944284e-05, - "loss": 0.2166, - "step": 5435 - }, - { - "epoch": 0.28, - "grad_norm": 1.1006504747535542, - "learning_rate": 1.6980533935803306e-05, - "loss": 0.2054, - "step": 5436 - }, - { - "epoch": 0.28, - "grad_norm": 0.8626033245045626, - "learning_rate": 1.6979354536314946e-05, - "loss": 0.2281, - "step": 5437 - }, - { - "epoch": 0.28, - "grad_norm": 0.8596006588035439, - "learning_rate": 1.6978174947511206e-05, - "loss": 0.2064, - "step": 5438 - }, - { - "epoch": 0.28, - "grad_norm": 1.4517008204949062, - "learning_rate": 1.6976995169424072e-05, - "loss": 0.2104, - "step": 5439 - }, - { - "epoch": 0.28, - "grad_norm": 0.8330081037185146, - "learning_rate": 1.6975815202085556e-05, - "loss": 0.191, - "step": 5440 - }, - { - "epoch": 0.28, - "grad_norm": 1.2590492794567847, - "learning_rate": 1.6974635045527652e-05, - "loss": 0.2134, - "step": 5441 - }, - { - "epoch": 0.28, - "grad_norm": 1.002732843409242, - "learning_rate": 1.6973454699782382e-05, - "loss": 0.2081, - "step": 5442 - }, - { - "epoch": 0.28, - "grad_norm": 0.7695496820937182, - "learning_rate": 1.697227416488176e-05, - "loss": 0.224, - "step": 5443 - }, - { - "epoch": 0.28, - "grad_norm": 0.7855777505010354, - "learning_rate": 1.6971093440857808e-05, - "loss": 0.1951, - "step": 5444 - }, - { - "epoch": 0.28, - "grad_norm": 0.9924584904825682, - "learning_rate": 1.6969912527742547e-05, - "loss": 0.2015, - "step": 5445 - }, - { - "epoch": 0.28, - "grad_norm": 1.1286658064526591, - "learning_rate": 1.696873142556802e-05, - "loss": 0.2269, - "step": 5446 - }, - { - "epoch": 0.28, - "grad_norm": 1.1905921151254606, - "learning_rate": 1.6967550134366256e-05, - "loss": 0.2266, - "step": 5447 - }, - { - "epoch": 0.28, - "grad_norm": 0.9938986283838219, - "learning_rate": 1.6966368654169305e-05, - "loss": 0.2068, - "step": 5448 - }, - { - "epoch": 0.28, - "grad_norm": 0.9412002207458522, - "learning_rate": 1.696518698500921e-05, - "loss": 0.1996, - "step": 5449 - }, - { - "epoch": 0.28, - "grad_norm": 0.8128848611817997, - "learning_rate": 1.696400512691802e-05, - "loss": 0.2133, - "step": 5450 - }, - { - "epoch": 0.28, - "grad_norm": 0.7118661473447155, - "learning_rate": 1.6962823079927803e-05, - "loss": 0.2049, - "step": 5451 - }, - { - "epoch": 0.28, - "grad_norm": 0.9659168027327373, - "learning_rate": 1.696164084407062e-05, - "loss": 0.1916, - "step": 5452 - }, - { - "epoch": 0.28, - "grad_norm": 1.07456804180084, - "learning_rate": 1.6960458419378528e-05, - "loss": 0.2201, - "step": 5453 - }, - { - "epoch": 0.28, - "grad_norm": 1.1286968393726051, - "learning_rate": 1.695927580588361e-05, - "loss": 0.2281, - "step": 5454 - }, - { - "epoch": 0.28, - "grad_norm": 0.9546368608833864, - "learning_rate": 1.6958093003617942e-05, - "loss": 0.2243, - "step": 5455 - }, - { - "epoch": 0.28, - "grad_norm": 0.8286795365597308, - "learning_rate": 1.6956910012613612e-05, - "loss": 0.211, - "step": 5456 - }, - { - "epoch": 0.28, - "grad_norm": 3.1624253033639174, - "learning_rate": 1.6955726832902705e-05, - "loss": 0.1959, - "step": 5457 - }, - { - "epoch": 0.28, - "grad_norm": 0.8258952586684022, - "learning_rate": 1.6954543464517313e-05, - "loss": 0.2002, - "step": 5458 - }, - { - "epoch": 0.28, - "grad_norm": 0.9861277252982845, - "learning_rate": 1.6953359907489538e-05, - "loss": 0.1835, - "step": 5459 - }, - { - "epoch": 0.28, - "grad_norm": 0.869053520172057, - "learning_rate": 1.695217616185148e-05, - "loss": 0.2033, - "step": 5460 - }, - { - "epoch": 0.28, - "grad_norm": 1.466873269888362, - "learning_rate": 1.6950992227635252e-05, - "loss": 0.214, - "step": 5461 - }, - { - "epoch": 0.28, - "grad_norm": 0.9216917177291614, - "learning_rate": 1.6949808104872965e-05, - "loss": 0.208, - "step": 5462 - }, - { - "epoch": 0.28, - "grad_norm": 1.1146734573826524, - "learning_rate": 1.6948623793596744e-05, - "loss": 0.2057, - "step": 5463 - }, - { - "epoch": 0.28, - "grad_norm": 0.9795441070290263, - "learning_rate": 1.694743929383871e-05, - "loss": 0.2031, - "step": 5464 - }, - { - "epoch": 0.28, - "grad_norm": 0.9643666484708967, - "learning_rate": 1.6946254605630995e-05, - "loss": 0.2045, - "step": 5465 - }, - { - "epoch": 0.28, - "grad_norm": 0.9659569712091012, - "learning_rate": 1.6945069729005726e-05, - "loss": 0.1829, - "step": 5466 - }, - { - "epoch": 0.28, - "grad_norm": 0.8573246043762399, - "learning_rate": 1.6943884663995055e-05, - "loss": 0.2012, - "step": 5467 - }, - { - "epoch": 0.28, - "grad_norm": 1.1065850167937314, - "learning_rate": 1.6942699410631114e-05, - "loss": 0.1991, - "step": 5468 - }, - { - "epoch": 0.28, - "grad_norm": 2.7600803308859483, - "learning_rate": 1.6941513968946063e-05, - "loss": 0.2072, - "step": 5469 - }, - { - "epoch": 0.28, - "grad_norm": 0.9756031387165406, - "learning_rate": 1.6940328338972053e-05, - "loss": 0.2149, - "step": 5470 - }, - { - "epoch": 0.28, - "grad_norm": 0.7830774903264235, - "learning_rate": 1.6939142520741243e-05, - "loss": 0.1987, - "step": 5471 - }, - { - "epoch": 0.28, - "grad_norm": 1.047546367636695, - "learning_rate": 1.6937956514285797e-05, - "loss": 0.2123, - "step": 5472 - }, - { - "epoch": 0.28, - "grad_norm": 1.2211101494147778, - "learning_rate": 1.6936770319637896e-05, - "loss": 0.1906, - "step": 5473 - }, - { - "epoch": 0.28, - "grad_norm": 0.9542329622300272, - "learning_rate": 1.6935583936829706e-05, - "loss": 0.2119, - "step": 5474 - }, - { - "epoch": 0.28, - "grad_norm": 0.99799970671572, - "learning_rate": 1.693439736589341e-05, - "loss": 0.2135, - "step": 5475 - }, - { - "epoch": 0.28, - "grad_norm": 1.2219047082819476, - "learning_rate": 1.693321060686119e-05, - "loss": 0.2051, - "step": 5476 - }, - { - "epoch": 0.28, - "grad_norm": 1.0655680430773613, - "learning_rate": 1.6932023659765248e-05, - "loss": 0.2079, - "step": 5477 - }, - { - "epoch": 0.28, - "grad_norm": 0.8407756911500547, - "learning_rate": 1.6930836524637766e-05, - "loss": 0.2046, - "step": 5478 - }, - { - "epoch": 0.28, - "grad_norm": 1.0019352759531008, - "learning_rate": 1.6929649201510953e-05, - "loss": 0.2061, - "step": 5479 - }, - { - "epoch": 0.28, - "grad_norm": 1.0336266052542888, - "learning_rate": 1.692846169041702e-05, - "loss": 0.2073, - "step": 5480 - }, - { - "epoch": 0.28, - "grad_norm": 1.1592968084273145, - "learning_rate": 1.6927273991388164e-05, - "loss": 0.2137, - "step": 5481 - }, - { - "epoch": 0.28, - "grad_norm": 1.0472714014176197, - "learning_rate": 1.6926086104456613e-05, - "loss": 0.1932, - "step": 5482 - }, - { - "epoch": 0.28, - "grad_norm": 0.8884190104412825, - "learning_rate": 1.6924898029654585e-05, - "loss": 0.1962, - "step": 5483 - }, - { - "epoch": 0.28, - "grad_norm": 1.440890522067889, - "learning_rate": 1.692370976701431e-05, - "loss": 0.2311, - "step": 5484 - }, - { - "epoch": 0.28, - "grad_norm": 0.9975412180086698, - "learning_rate": 1.6922521316568014e-05, - "loss": 0.1937, - "step": 5485 - }, - { - "epoch": 0.28, - "grad_norm": 0.9162614283067528, - "learning_rate": 1.6921332678347936e-05, - "loss": 0.2477, - "step": 5486 - }, - { - "epoch": 0.28, - "grad_norm": 0.885433998276788, - "learning_rate": 1.6920143852386316e-05, - "loss": 0.1839, - "step": 5487 - }, - { - "epoch": 0.28, - "grad_norm": 0.9263433712536561, - "learning_rate": 1.6918954838715408e-05, - "loss": 0.2161, - "step": 5488 - }, - { - "epoch": 0.28, - "grad_norm": 1.053505012946194, - "learning_rate": 1.6917765637367455e-05, - "loss": 0.2018, - "step": 5489 - }, - { - "epoch": 0.28, - "grad_norm": 1.3169890952877474, - "learning_rate": 1.691657624837472e-05, - "loss": 0.2152, - "step": 5490 - }, - { - "epoch": 0.28, - "grad_norm": 0.7815313992450333, - "learning_rate": 1.6915386671769463e-05, - "loss": 0.1914, - "step": 5491 - }, - { - "epoch": 0.28, - "grad_norm": 1.070490158653429, - "learning_rate": 1.6914196907583952e-05, - "loss": 0.231, - "step": 5492 - }, - { - "epoch": 0.28, - "grad_norm": 0.8897247721716537, - "learning_rate": 1.6913006955850462e-05, - "loss": 0.2147, - "step": 5493 - }, - { - "epoch": 0.28, - "grad_norm": 1.2170042600343527, - "learning_rate": 1.6911816816601266e-05, - "loss": 0.2403, - "step": 5494 - }, - { - "epoch": 0.28, - "grad_norm": 1.202057324771667, - "learning_rate": 1.691062648986865e-05, - "loss": 0.21, - "step": 5495 - }, - { - "epoch": 0.28, - "grad_norm": 1.0040241770319058, - "learning_rate": 1.69094359756849e-05, - "loss": 0.2084, - "step": 5496 - }, - { - "epoch": 0.28, - "grad_norm": 0.8620668771810918, - "learning_rate": 1.6908245274082306e-05, - "loss": 0.2105, - "step": 5497 - }, - { - "epoch": 0.28, - "grad_norm": 1.2031020687777687, - "learning_rate": 1.690705438509317e-05, - "loss": 0.2091, - "step": 5498 - }, - { - "epoch": 0.28, - "grad_norm": 2.2348820456964273, - "learning_rate": 1.6905863308749793e-05, - "loss": 0.1794, - "step": 5499 - }, - { - "epoch": 0.28, - "grad_norm": 1.0425667322586452, - "learning_rate": 1.6904672045084485e-05, - "loss": 0.2207, - "step": 5500 - }, - { - "epoch": 0.28, - "grad_norm": 1.0625047037067803, - "learning_rate": 1.6903480594129557e-05, - "loss": 0.1962, - "step": 5501 - }, - { - "epoch": 0.28, - "grad_norm": 1.1736637841443045, - "learning_rate": 1.6902288955917328e-05, - "loss": 0.2155, - "step": 5502 - }, - { - "epoch": 0.28, - "grad_norm": 4.101980767830775, - "learning_rate": 1.690109713048012e-05, - "loss": 0.2163, - "step": 5503 - }, - { - "epoch": 0.28, - "grad_norm": 5.937151736434946, - "learning_rate": 1.6899905117850266e-05, - "loss": 0.1959, - "step": 5504 - }, - { - "epoch": 0.28, - "grad_norm": 0.8079687624922969, - "learning_rate": 1.6898712918060093e-05, - "loss": 0.1853, - "step": 5505 - }, - { - "epoch": 0.28, - "grad_norm": 0.8451325110889567, - "learning_rate": 1.6897520531141944e-05, - "loss": 0.1939, - "step": 5506 - }, - { - "epoch": 0.28, - "grad_norm": 0.9312220398678531, - "learning_rate": 1.6896327957128162e-05, - "loss": 0.2163, - "step": 5507 - }, - { - "epoch": 0.28, - "grad_norm": 0.9727108910026903, - "learning_rate": 1.689513519605109e-05, - "loss": 0.1939, - "step": 5508 - }, - { - "epoch": 0.28, - "grad_norm": 0.7698826201117085, - "learning_rate": 1.689394224794309e-05, - "loss": 0.2068, - "step": 5509 - }, - { - "epoch": 0.28, - "grad_norm": 0.7688375248720871, - "learning_rate": 1.689274911283652e-05, - "loss": 0.196, - "step": 5510 - }, - { - "epoch": 0.28, - "grad_norm": 1.0006142145056547, - "learning_rate": 1.6891555790763735e-05, - "loss": 0.2128, - "step": 5511 - }, - { - "epoch": 0.28, - "grad_norm": 0.9641337809883561, - "learning_rate": 1.6890362281757117e-05, - "loss": 0.2134, - "step": 5512 - }, - { - "epoch": 0.28, - "grad_norm": 0.8013917063334224, - "learning_rate": 1.6889168585849027e-05, - "loss": 0.2173, - "step": 5513 - }, - { - "epoch": 0.28, - "grad_norm": 0.8104111308085543, - "learning_rate": 1.688797470307185e-05, - "loss": 0.1928, - "step": 5514 - }, - { - "epoch": 0.28, - "grad_norm": 0.8837868269865045, - "learning_rate": 1.6886780633457975e-05, - "loss": 0.2148, - "step": 5515 - }, - { - "epoch": 0.28, - "grad_norm": 0.9583015710960666, - "learning_rate": 1.688558637703978e-05, - "loss": 0.1941, - "step": 5516 - }, - { - "epoch": 0.28, - "grad_norm": 0.8237325286578135, - "learning_rate": 1.688439193384967e-05, - "loss": 0.2069, - "step": 5517 - }, - { - "epoch": 0.28, - "grad_norm": 1.285974324365605, - "learning_rate": 1.688319730392004e-05, - "loss": 0.2282, - "step": 5518 - }, - { - "epoch": 0.28, - "grad_norm": 1.1747843825560265, - "learning_rate": 1.6882002487283293e-05, - "loss": 0.2305, - "step": 5519 - }, - { - "epoch": 0.28, - "grad_norm": 2.165339327087881, - "learning_rate": 1.688080748397184e-05, - "loss": 0.2201, - "step": 5520 - }, - { - "epoch": 0.28, - "grad_norm": 1.0824986772041116, - "learning_rate": 1.6879612294018092e-05, - "loss": 0.2254, - "step": 5521 - }, - { - "epoch": 0.28, - "grad_norm": 1.5266727806258882, - "learning_rate": 1.687841691745448e-05, - "loss": 0.2098, - "step": 5522 - }, - { - "epoch": 0.28, - "grad_norm": 1.0719337185200175, - "learning_rate": 1.6877221354313413e-05, - "loss": 0.1957, - "step": 5523 - }, - { - "epoch": 0.28, - "grad_norm": 0.9076648477056829, - "learning_rate": 1.6876025604627335e-05, - "loss": 0.1939, - "step": 5524 - }, - { - "epoch": 0.28, - "grad_norm": 1.7657803013254962, - "learning_rate": 1.6874829668428667e-05, - "loss": 0.2222, - "step": 5525 - }, - { - "epoch": 0.28, - "grad_norm": 1.3297842157111655, - "learning_rate": 1.6873633545749858e-05, - "loss": 0.2118, - "step": 5526 - }, - { - "epoch": 0.28, - "grad_norm": 0.8951088432162874, - "learning_rate": 1.6872437236623352e-05, - "loss": 0.2007, - "step": 5527 - }, - { - "epoch": 0.28, - "grad_norm": 1.2104995388774757, - "learning_rate": 1.68712407410816e-05, - "loss": 0.1915, - "step": 5528 - }, - { - "epoch": 0.28, - "grad_norm": 1.1709013879049428, - "learning_rate": 1.6870044059157052e-05, - "loss": 0.2232, - "step": 5529 - }, - { - "epoch": 0.28, - "grad_norm": 1.236545303278789, - "learning_rate": 1.686884719088217e-05, - "loss": 0.2066, - "step": 5530 - }, - { - "epoch": 0.28, - "grad_norm": 1.0617137566736374, - "learning_rate": 1.6867650136289425e-05, - "loss": 0.2365, - "step": 5531 - }, - { - "epoch": 0.28, - "grad_norm": 1.3100906770551048, - "learning_rate": 1.686645289541128e-05, - "loss": 0.2079, - "step": 5532 - }, - { - "epoch": 0.28, - "grad_norm": 0.9947827430940916, - "learning_rate": 1.686525546828021e-05, - "loss": 0.1977, - "step": 5533 - }, - { - "epoch": 0.28, - "grad_norm": 2.6173318374449783, - "learning_rate": 1.6864057854928696e-05, - "loss": 0.216, - "step": 5534 - }, - { - "epoch": 0.28, - "grad_norm": 0.8658319621042003, - "learning_rate": 1.686286005538923e-05, - "loss": 0.1902, - "step": 5535 - }, - { - "epoch": 0.28, - "grad_norm": 0.9118008242410937, - "learning_rate": 1.6861662069694292e-05, - "loss": 0.2222, - "step": 5536 - }, - { - "epoch": 0.28, - "grad_norm": 4.287722536221362, - "learning_rate": 1.686046389787639e-05, - "loss": 0.209, - "step": 5537 - }, - { - "epoch": 0.28, - "grad_norm": 2.6445443639482815, - "learning_rate": 1.6859265539968014e-05, - "loss": 0.2136, - "step": 5538 - }, - { - "epoch": 0.28, - "grad_norm": 1.2204081239684939, - "learning_rate": 1.6858066996001673e-05, - "loss": 0.2172, - "step": 5539 - }, - { - "epoch": 0.28, - "grad_norm": 1.0577270671622425, - "learning_rate": 1.6856868266009874e-05, - "loss": 0.1842, - "step": 5540 - }, - { - "epoch": 0.28, - "grad_norm": 1.24709712484204, - "learning_rate": 1.6855669350025138e-05, - "loss": 0.2265, - "step": 5541 - }, - { - "epoch": 0.28, - "grad_norm": 1.0953891982266504, - "learning_rate": 1.6854470248079983e-05, - "loss": 0.2333, - "step": 5542 - }, - { - "epoch": 0.28, - "grad_norm": 1.9000461617024391, - "learning_rate": 1.685327096020694e-05, - "loss": 0.221, - "step": 5543 - }, - { - "epoch": 0.28, - "grad_norm": 0.9692217209212479, - "learning_rate": 1.685207148643853e-05, - "loss": 0.1821, - "step": 5544 - }, - { - "epoch": 0.28, - "grad_norm": 0.9836872882390156, - "learning_rate": 1.6850871826807297e-05, - "loss": 0.2122, - "step": 5545 - }, - { - "epoch": 0.28, - "grad_norm": 1.1278818571254685, - "learning_rate": 1.6849671981345775e-05, - "loss": 0.1943, - "step": 5546 - }, - { - "epoch": 0.28, - "grad_norm": 1.1074602416317132, - "learning_rate": 1.6848471950086517e-05, - "loss": 0.224, - "step": 5547 - }, - { - "epoch": 0.28, - "grad_norm": 1.005589817604628, - "learning_rate": 1.684727173306207e-05, - "loss": 0.2247, - "step": 5548 - }, - { - "epoch": 0.28, - "grad_norm": 0.9020066072063216, - "learning_rate": 1.684607133030499e-05, - "loss": 0.2225, - "step": 5549 - }, - { - "epoch": 0.28, - "grad_norm": 0.7223123330717156, - "learning_rate": 1.684487074184784e-05, - "loss": 0.1739, - "step": 5550 - }, - { - "epoch": 0.28, - "grad_norm": 0.7908254662329712, - "learning_rate": 1.6843669967723183e-05, - "loss": 0.2086, - "step": 5551 - }, - { - "epoch": 0.28, - "grad_norm": 0.9050190208821911, - "learning_rate": 1.6842469007963592e-05, - "loss": 0.1978, - "step": 5552 - }, - { - "epoch": 0.28, - "grad_norm": 1.1064952641542272, - "learning_rate": 1.6841267862601644e-05, - "loss": 0.199, - "step": 5553 - }, - { - "epoch": 0.28, - "grad_norm": 1.3459381134914081, - "learning_rate": 1.6840066531669915e-05, - "loss": 0.2242, - "step": 5554 - }, - { - "epoch": 0.28, - "grad_norm": 0.8388784954524903, - "learning_rate": 1.6838865015200995e-05, - "loss": 0.21, - "step": 5555 - }, - { - "epoch": 0.28, - "grad_norm": 0.9451542750441775, - "learning_rate": 1.683766331322748e-05, - "loss": 0.2214, - "step": 5556 - }, - { - "epoch": 0.28, - "grad_norm": 0.8709892311875053, - "learning_rate": 1.683646142578196e-05, - "loss": 0.2011, - "step": 5557 - }, - { - "epoch": 0.28, - "grad_norm": 0.9928480478934592, - "learning_rate": 1.6835259352897035e-05, - "loss": 0.2044, - "step": 5558 - }, - { - "epoch": 0.28, - "grad_norm": 0.9502941509055366, - "learning_rate": 1.6834057094605314e-05, - "loss": 0.2281, - "step": 5559 - }, - { - "epoch": 0.28, - "grad_norm": 1.0910863752995716, - "learning_rate": 1.683285465093941e-05, - "loss": 0.1846, - "step": 5560 - }, - { - "epoch": 0.28, - "grad_norm": 2.5288457696683095, - "learning_rate": 1.683165202193194e-05, - "loss": 0.1984, - "step": 5561 - }, - { - "epoch": 0.28, - "grad_norm": 1.8747400245648658, - "learning_rate": 1.683044920761552e-05, - "loss": 0.2311, - "step": 5562 - }, - { - "epoch": 0.28, - "grad_norm": 1.0847339869897128, - "learning_rate": 1.682924620802278e-05, - "loss": 0.1875, - "step": 5563 - }, - { - "epoch": 0.28, - "grad_norm": 1.024929122243032, - "learning_rate": 1.682804302318635e-05, - "loss": 0.1864, - "step": 5564 - }, - { - "epoch": 0.28, - "grad_norm": 1.6855645827959116, - "learning_rate": 1.6826839653138872e-05, - "loss": 0.1951, - "step": 5565 - }, - { - "epoch": 0.28, - "grad_norm": 0.9463126278497938, - "learning_rate": 1.6825636097912976e-05, - "loss": 0.2164, - "step": 5566 - }, - { - "epoch": 0.28, - "grad_norm": 0.7997011520507737, - "learning_rate": 1.682443235754132e-05, - "loss": 0.1905, - "step": 5567 - }, - { - "epoch": 0.28, - "grad_norm": 1.3388113262501908, - "learning_rate": 1.682322843205655e-05, - "loss": 0.2205, - "step": 5568 - }, - { - "epoch": 0.28, - "grad_norm": 0.9707915728471659, - "learning_rate": 1.6822024321491323e-05, - "loss": 0.2247, - "step": 5569 - }, - { - "epoch": 0.28, - "grad_norm": 0.8366771355218784, - "learning_rate": 1.6820820025878298e-05, - "loss": 0.196, - "step": 5570 - }, - { - "epoch": 0.28, - "grad_norm": 1.1658522644977178, - "learning_rate": 1.6819615545250146e-05, - "loss": 0.2072, - "step": 5571 - }, - { - "epoch": 0.28, - "grad_norm": 1.0906354617526341, - "learning_rate": 1.681841087963954e-05, - "loss": 0.2709, - "step": 5572 - }, - { - "epoch": 0.28, - "grad_norm": 0.7832304082448883, - "learning_rate": 1.681720602907915e-05, - "loss": 0.1903, - "step": 5573 - }, - { - "epoch": 0.28, - "grad_norm": 0.9031727797125559, - "learning_rate": 1.6816000993601668e-05, - "loss": 0.1975, - "step": 5574 - }, - { - "epoch": 0.28, - "grad_norm": 0.8269271957462649, - "learning_rate": 1.6814795773239766e-05, - "loss": 0.1893, - "step": 5575 - }, - { - "epoch": 0.28, - "grad_norm": 2.409115031127353, - "learning_rate": 1.681359036802615e-05, - "loss": 0.2361, - "step": 5576 - }, - { - "epoch": 0.28, - "grad_norm": 1.0714923444580238, - "learning_rate": 1.681238477799351e-05, - "loss": 0.2124, - "step": 5577 - }, - { - "epoch": 0.28, - "grad_norm": 1.4448729012174302, - "learning_rate": 1.6811179003174546e-05, - "loss": 0.2037, - "step": 5578 - }, - { - "epoch": 0.28, - "grad_norm": 0.9301528109817786, - "learning_rate": 1.6809973043601962e-05, - "loss": 0.1971, - "step": 5579 - }, - { - "epoch": 0.28, - "grad_norm": 1.1091802621706934, - "learning_rate": 1.680876689930848e-05, - "loss": 0.2285, - "step": 5580 - }, - { - "epoch": 0.28, - "grad_norm": 0.7086801941850318, - "learning_rate": 1.680756057032681e-05, - "loss": 0.1964, - "step": 5581 - }, - { - "epoch": 0.28, - "grad_norm": 1.0176127550275247, - "learning_rate": 1.680635405668968e-05, - "loss": 0.2213, - "step": 5582 - }, - { - "epoch": 0.28, - "grad_norm": 1.4627825962559355, - "learning_rate": 1.6805147358429806e-05, - "loss": 0.1863, - "step": 5583 - }, - { - "epoch": 0.28, - "grad_norm": 0.7935095943088147, - "learning_rate": 1.6803940475579926e-05, - "loss": 0.1861, - "step": 5584 - }, - { - "epoch": 0.28, - "grad_norm": 1.2494953657874224, - "learning_rate": 1.680273340817278e-05, - "loss": 0.2112, - "step": 5585 - }, - { - "epoch": 0.28, - "grad_norm": 1.154399835221216, - "learning_rate": 1.68015261562411e-05, - "loss": 0.2179, - "step": 5586 - }, - { - "epoch": 0.28, - "grad_norm": 0.9786789665166755, - "learning_rate": 1.6800318719817647e-05, - "loss": 0.2403, - "step": 5587 - }, - { - "epoch": 0.28, - "grad_norm": 0.7028235365975033, - "learning_rate": 1.679911109893516e-05, - "loss": 0.1846, - "step": 5588 - }, - { - "epoch": 0.28, - "grad_norm": 3.1778933740553295, - "learning_rate": 1.67979032936264e-05, - "loss": 0.2242, - "step": 5589 - }, - { - "epoch": 0.28, - "grad_norm": 1.398224553047776, - "learning_rate": 1.679669530392413e-05, - "loss": 0.2028, - "step": 5590 - }, - { - "epoch": 0.28, - "grad_norm": 1.3966578774852827, - "learning_rate": 1.679548712986111e-05, - "loss": 0.2242, - "step": 5591 - }, - { - "epoch": 0.28, - "grad_norm": 0.9520399610008522, - "learning_rate": 1.6794278771470127e-05, - "loss": 0.1944, - "step": 5592 - }, - { - "epoch": 0.28, - "grad_norm": 0.8628424629593858, - "learning_rate": 1.6793070228783946e-05, - "loss": 0.1972, - "step": 5593 - }, - { - "epoch": 0.28, - "grad_norm": 0.8946564228844422, - "learning_rate": 1.679186150183535e-05, - "loss": 0.2161, - "step": 5594 - }, - { - "epoch": 0.28, - "grad_norm": 1.660295366849027, - "learning_rate": 1.6790652590657125e-05, - "loss": 0.2274, - "step": 5595 - }, - { - "epoch": 0.28, - "grad_norm": 1.1028149430210776, - "learning_rate": 1.678944349528207e-05, - "loss": 0.2041, - "step": 5596 - }, - { - "epoch": 0.28, - "grad_norm": 0.9057071652801462, - "learning_rate": 1.6788234215742974e-05, - "loss": 0.224, - "step": 5597 - }, - { - "epoch": 0.28, - "grad_norm": 0.8486970283199143, - "learning_rate": 1.6787024752072642e-05, - "loss": 0.2148, - "step": 5598 - }, - { - "epoch": 0.28, - "grad_norm": 1.1979543365735743, - "learning_rate": 1.678581510430388e-05, - "loss": 0.2268, - "step": 5599 - }, - { - "epoch": 0.28, - "grad_norm": 1.0996428666993319, - "learning_rate": 1.6784605272469502e-05, - "loss": 0.2101, - "step": 5600 - }, - { - "epoch": 0.28, - "grad_norm": 1.1156512400914844, - "learning_rate": 1.6783395256602318e-05, - "loss": 0.2249, - "step": 5601 - }, - { - "epoch": 0.28, - "grad_norm": 1.0102994579614044, - "learning_rate": 1.6782185056735157e-05, - "loss": 0.2209, - "step": 5602 - }, - { - "epoch": 0.28, - "grad_norm": 3.1022746944347066, - "learning_rate": 1.6780974672900845e-05, - "loss": 0.1736, - "step": 5603 - }, - { - "epoch": 0.28, - "grad_norm": 0.8559003285821831, - "learning_rate": 1.677976410513221e-05, - "loss": 0.2073, - "step": 5604 - }, - { - "epoch": 0.29, - "grad_norm": 1.316426081816249, - "learning_rate": 1.6778553353462092e-05, - "loss": 0.2115, - "step": 5605 - }, - { - "epoch": 0.29, - "grad_norm": 1.3715287544859591, - "learning_rate": 1.677734241792333e-05, - "loss": 0.2162, - "step": 5606 - }, - { - "epoch": 0.29, - "grad_norm": 0.8883864765063251, - "learning_rate": 1.677613129854877e-05, - "loss": 0.203, - "step": 5607 - }, - { - "epoch": 0.29, - "grad_norm": 0.8644309330656957, - "learning_rate": 1.6774919995371272e-05, - "loss": 0.1833, - "step": 5608 - }, - { - "epoch": 0.29, - "grad_norm": 0.8490153399647028, - "learning_rate": 1.6773708508423683e-05, - "loss": 0.2016, - "step": 5609 - }, - { - "epoch": 0.29, - "grad_norm": 0.9282538393726152, - "learning_rate": 1.6772496837738866e-05, - "loss": 0.2365, - "step": 5610 - }, - { - "epoch": 0.29, - "grad_norm": 1.0473637461227412, - "learning_rate": 1.6771284983349693e-05, - "loss": 0.2206, - "step": 5611 - }, - { - "epoch": 0.29, - "grad_norm": 1.0559113462080079, - "learning_rate": 1.6770072945289034e-05, - "loss": 0.2596, - "step": 5612 - }, - { - "epoch": 0.29, - "grad_norm": 0.7196028723600773, - "learning_rate": 1.676886072358976e-05, - "loss": 0.2199, - "step": 5613 - }, - { - "epoch": 0.29, - "grad_norm": 1.091513271080775, - "learning_rate": 1.676764831828476e-05, - "loss": 0.2019, - "step": 5614 - }, - { - "epoch": 0.29, - "grad_norm": 0.9225670399473616, - "learning_rate": 1.6766435729406913e-05, - "loss": 0.2347, - "step": 5615 - }, - { - "epoch": 0.29, - "grad_norm": 0.99503266690628, - "learning_rate": 1.6765222956989117e-05, - "loss": 0.2061, - "step": 5616 - }, - { - "epoch": 0.29, - "grad_norm": 0.878108183400604, - "learning_rate": 1.6764010001064268e-05, - "loss": 0.1947, - "step": 5617 - }, - { - "epoch": 0.29, - "grad_norm": 0.881611187195488, - "learning_rate": 1.6762796861665262e-05, - "loss": 0.2059, - "step": 5618 - }, - { - "epoch": 0.29, - "grad_norm": 0.8924377250902428, - "learning_rate": 1.6761583538825013e-05, - "loss": 0.2228, - "step": 5619 - }, - { - "epoch": 0.29, - "grad_norm": 0.8815121230345735, - "learning_rate": 1.6760370032576424e-05, - "loss": 0.2137, - "step": 5620 - }, - { - "epoch": 0.29, - "grad_norm": 0.8511565240034018, - "learning_rate": 1.6759156342952422e-05, - "loss": 0.2009, - "step": 5621 - }, - { - "epoch": 0.29, - "grad_norm": 1.1423106856019447, - "learning_rate": 1.6757942469985917e-05, - "loss": 0.2177, - "step": 5622 - }, - { - "epoch": 0.29, - "grad_norm": 4.370018827440741, - "learning_rate": 1.6756728413709843e-05, - "loss": 0.2134, - "step": 5623 - }, - { - "epoch": 0.29, - "grad_norm": 1.3487268044836134, - "learning_rate": 1.6755514174157127e-05, - "loss": 0.2188, - "step": 5624 - }, - { - "epoch": 0.29, - "grad_norm": 1.090300940977795, - "learning_rate": 1.675429975136071e-05, - "loss": 0.2023, - "step": 5625 - }, - { - "epoch": 0.29, - "grad_norm": 0.9414580517820581, - "learning_rate": 1.675308514535353e-05, - "loss": 0.2219, - "step": 5626 - }, - { - "epoch": 0.29, - "grad_norm": 0.8682789778569923, - "learning_rate": 1.6751870356168534e-05, - "loss": 0.1966, - "step": 5627 - }, - { - "epoch": 0.29, - "grad_norm": 0.8623789355882996, - "learning_rate": 1.6750655383838674e-05, - "loss": 0.2179, - "step": 5628 - }, - { - "epoch": 0.29, - "grad_norm": 0.9338422907144169, - "learning_rate": 1.6749440228396903e-05, - "loss": 0.2091, - "step": 5629 - }, - { - "epoch": 0.29, - "grad_norm": 1.1181363031884917, - "learning_rate": 1.6748224889876188e-05, - "loss": 0.2387, - "step": 5630 - }, - { - "epoch": 0.29, - "grad_norm": 1.623438042143102, - "learning_rate": 1.674700936830949e-05, - "loss": 0.2126, - "step": 5631 - }, - { - "epoch": 0.29, - "grad_norm": 0.910801099431657, - "learning_rate": 1.6745793663729785e-05, - "loss": 0.238, - "step": 5632 - }, - { - "epoch": 0.29, - "grad_norm": 0.9199691919612899, - "learning_rate": 1.674457777617004e-05, - "loss": 0.1937, - "step": 5633 - }, - { - "epoch": 0.29, - "grad_norm": 0.9524353356354959, - "learning_rate": 1.6743361705663246e-05, - "loss": 0.2159, - "step": 5634 - }, - { - "epoch": 0.29, - "grad_norm": 0.8468532219803875, - "learning_rate": 1.6742145452242383e-05, - "loss": 0.2097, - "step": 5635 - }, - { - "epoch": 0.29, - "grad_norm": 0.8347163633932896, - "learning_rate": 1.6740929015940442e-05, - "loss": 0.209, - "step": 5636 - }, - { - "epoch": 0.29, - "grad_norm": 1.0916796899646724, - "learning_rate": 1.6739712396790424e-05, - "loss": 0.2056, - "step": 5637 - }, - { - "epoch": 0.29, - "grad_norm": 0.9631455506402163, - "learning_rate": 1.673849559482533e-05, - "loss": 0.1959, - "step": 5638 - }, - { - "epoch": 0.29, - "grad_norm": 0.9242865697867108, - "learning_rate": 1.6737278610078153e-05, - "loss": 0.1952, - "step": 5639 - }, - { - "epoch": 0.29, - "grad_norm": 1.074821995609312, - "learning_rate": 1.6736061442581922e-05, - "loss": 0.2034, - "step": 5640 - }, - { - "epoch": 0.29, - "grad_norm": 0.9970315145305882, - "learning_rate": 1.673484409236964e-05, - "loss": 0.1959, - "step": 5641 - }, - { - "epoch": 0.29, - "grad_norm": 0.8036034716113344, - "learning_rate": 1.673362655947433e-05, - "loss": 0.2198, - "step": 5642 - }, - { - "epoch": 0.29, - "grad_norm": 0.9439196709366106, - "learning_rate": 1.673240884392902e-05, - "loss": 0.1962, - "step": 5643 - }, - { - "epoch": 0.29, - "grad_norm": 0.881953076253939, - "learning_rate": 1.6731190945766742e-05, - "loss": 0.2086, - "step": 5644 - }, - { - "epoch": 0.29, - "grad_norm": 1.0536395032248527, - "learning_rate": 1.672997286502053e-05, - "loss": 0.2236, - "step": 5645 - }, - { - "epoch": 0.29, - "grad_norm": 1.3389673197902134, - "learning_rate": 1.672875460172342e-05, - "loss": 0.2045, - "step": 5646 - }, - { - "epoch": 0.29, - "grad_norm": 0.9475155150541045, - "learning_rate": 1.6727536155908466e-05, - "loss": 0.2071, - "step": 5647 - }, - { - "epoch": 0.29, - "grad_norm": 0.9946118684590498, - "learning_rate": 1.672631752760871e-05, - "loss": 0.2139, - "step": 5648 - }, - { - "epoch": 0.29, - "grad_norm": 1.1607991460269982, - "learning_rate": 1.6725098716857212e-05, - "loss": 0.2155, - "step": 5649 - }, - { - "epoch": 0.29, - "grad_norm": 0.8941861618108317, - "learning_rate": 1.672387972368703e-05, - "loss": 0.2113, - "step": 5650 - }, - { - "epoch": 0.29, - "grad_norm": 1.086806213120535, - "learning_rate": 1.6722660548131235e-05, - "loss": 0.2116, - "step": 5651 - }, - { - "epoch": 0.29, - "grad_norm": 0.7956815228190831, - "learning_rate": 1.6721441190222893e-05, - "loss": 0.1886, - "step": 5652 - }, - { - "epoch": 0.29, - "grad_norm": 1.0590701485177547, - "learning_rate": 1.6720221649995076e-05, - "loss": 0.2069, - "step": 5653 - }, - { - "epoch": 0.29, - "grad_norm": 0.7979807415368692, - "learning_rate": 1.6719001927480867e-05, - "loss": 0.2057, - "step": 5654 - }, - { - "epoch": 0.29, - "grad_norm": 1.0005418175143537, - "learning_rate": 1.6717782022713353e-05, - "loss": 0.2211, - "step": 5655 - }, - { - "epoch": 0.29, - "grad_norm": 0.9628323917247162, - "learning_rate": 1.671656193572562e-05, - "loss": 0.2178, - "step": 5656 - }, - { - "epoch": 0.29, - "grad_norm": 1.5734424180049371, - "learning_rate": 1.671534166655077e-05, - "loss": 0.1901, - "step": 5657 - }, - { - "epoch": 0.29, - "grad_norm": 1.55494116484686, - "learning_rate": 1.6714121215221894e-05, - "loss": 0.229, - "step": 5658 - }, - { - "epoch": 0.29, - "grad_norm": 0.9325780349684512, - "learning_rate": 1.67129005817721e-05, - "loss": 0.2171, - "step": 5659 - }, - { - "epoch": 0.29, - "grad_norm": 2.2796034374258602, - "learning_rate": 1.67116797662345e-05, - "loss": 0.2123, - "step": 5660 - }, - { - "epoch": 0.29, - "grad_norm": 1.0749231903118135, - "learning_rate": 1.6710458768642207e-05, - "loss": 0.2215, - "step": 5661 - }, - { - "epoch": 0.29, - "grad_norm": 1.1093769231076407, - "learning_rate": 1.670923758902834e-05, - "loss": 0.2224, - "step": 5662 - }, - { - "epoch": 0.29, - "grad_norm": 1.3186374701895731, - "learning_rate": 1.6708016227426026e-05, - "loss": 0.1889, - "step": 5663 - }, - { - "epoch": 0.29, - "grad_norm": 1.067109564591355, - "learning_rate": 1.6706794683868392e-05, - "loss": 0.2334, - "step": 5664 - }, - { - "epoch": 0.29, - "grad_norm": 1.2771290744730772, - "learning_rate": 1.6705572958388576e-05, - "loss": 0.2373, - "step": 5665 - }, - { - "epoch": 0.29, - "grad_norm": 0.994192818216985, - "learning_rate": 1.6704351051019713e-05, - "loss": 0.2172, - "step": 5666 - }, - { - "epoch": 0.29, - "grad_norm": 0.8792652678481304, - "learning_rate": 1.6703128961794947e-05, - "loss": 0.1987, - "step": 5667 - }, - { - "epoch": 0.29, - "grad_norm": 1.3800397889807778, - "learning_rate": 1.670190669074743e-05, - "loss": 0.2263, - "step": 5668 - }, - { - "epoch": 0.29, - "grad_norm": 0.840686344309017, - "learning_rate": 1.670068423791032e-05, - "loss": 0.1914, - "step": 5669 - }, - { - "epoch": 0.29, - "grad_norm": 0.8809991020409779, - "learning_rate": 1.6699461603316765e-05, - "loss": 0.1922, - "step": 5670 - }, - { - "epoch": 0.29, - "grad_norm": 1.1474302610352636, - "learning_rate": 1.669823878699994e-05, - "loss": 0.2132, - "step": 5671 - }, - { - "epoch": 0.29, - "grad_norm": 0.9949574631829177, - "learning_rate": 1.669701578899301e-05, - "loss": 0.2054, - "step": 5672 - }, - { - "epoch": 0.29, - "grad_norm": 2.378673043957621, - "learning_rate": 1.6695792609329148e-05, - "loss": 0.1907, - "step": 5673 - }, - { - "epoch": 0.29, - "grad_norm": 0.9551872363706768, - "learning_rate": 1.669456924804153e-05, - "loss": 0.1831, - "step": 5674 - }, - { - "epoch": 0.29, - "grad_norm": 0.921282116685391, - "learning_rate": 1.6693345705163343e-05, - "loss": 0.1964, - "step": 5675 - }, - { - "epoch": 0.29, - "grad_norm": 1.415101477622893, - "learning_rate": 1.669212198072778e-05, - "loss": 0.2267, - "step": 5676 - }, - { - "epoch": 0.29, - "grad_norm": 1.2433915586599227, - "learning_rate": 1.669089807476803e-05, - "loss": 0.2407, - "step": 5677 - }, - { - "epoch": 0.29, - "grad_norm": 1.1426110298544896, - "learning_rate": 1.668967398731729e-05, - "loss": 0.2146, - "step": 5678 - }, - { - "epoch": 0.29, - "grad_norm": 0.8677298754657188, - "learning_rate": 1.6688449718408763e-05, - "loss": 0.2143, - "step": 5679 - }, - { - "epoch": 0.29, - "grad_norm": 0.845779757673886, - "learning_rate": 1.6687225268075665e-05, - "loss": 0.2377, - "step": 5680 - }, - { - "epoch": 0.29, - "grad_norm": 0.7281009544445249, - "learning_rate": 1.6686000636351197e-05, - "loss": 0.1995, - "step": 5681 - }, - { - "epoch": 0.29, - "grad_norm": 0.8926328124730796, - "learning_rate": 1.6684775823268592e-05, - "loss": 0.1922, - "step": 5682 - }, - { - "epoch": 0.29, - "grad_norm": 0.923995341210268, - "learning_rate": 1.668355082886106e-05, - "loss": 0.1937, - "step": 5683 - }, - { - "epoch": 0.29, - "grad_norm": 1.391058018212231, - "learning_rate": 1.6682325653161833e-05, - "loss": 0.2005, - "step": 5684 - }, - { - "epoch": 0.29, - "grad_norm": 0.9626507421652647, - "learning_rate": 1.668110029620415e-05, - "loss": 0.2245, - "step": 5685 - }, - { - "epoch": 0.29, - "grad_norm": 1.0274059668926714, - "learning_rate": 1.6679874758021238e-05, - "loss": 0.184, - "step": 5686 - }, - { - "epoch": 0.29, - "grad_norm": 1.0835223734839512, - "learning_rate": 1.6678649038646353e-05, - "loss": 0.2178, - "step": 5687 - }, - { - "epoch": 0.29, - "grad_norm": 1.0583879888167973, - "learning_rate": 1.667742313811273e-05, - "loss": 0.2051, - "step": 5688 - }, - { - "epoch": 0.29, - "grad_norm": 0.826642326401598, - "learning_rate": 1.667619705645363e-05, - "loss": 0.2006, - "step": 5689 - }, - { - "epoch": 0.29, - "grad_norm": 0.8127180877352143, - "learning_rate": 1.667497079370231e-05, - "loss": 0.2119, - "step": 5690 - }, - { - "epoch": 0.29, - "grad_norm": 1.4911742939755106, - "learning_rate": 1.6673744349892027e-05, - "loss": 0.2051, - "step": 5691 - }, - { - "epoch": 0.29, - "grad_norm": 0.8507874783495339, - "learning_rate": 1.6672517725056052e-05, - "loss": 0.1919, - "step": 5692 - }, - { - "epoch": 0.29, - "grad_norm": 1.2710015881210284, - "learning_rate": 1.6671290919227656e-05, - "loss": 0.2389, - "step": 5693 - }, - { - "epoch": 0.29, - "grad_norm": 0.9906515157794544, - "learning_rate": 1.667006393244012e-05, - "loss": 0.2384, - "step": 5694 - }, - { - "epoch": 0.29, - "grad_norm": 0.9793754164793437, - "learning_rate": 1.666883676472672e-05, - "loss": 0.2152, - "step": 5695 - }, - { - "epoch": 0.29, - "grad_norm": 0.8065468973363352, - "learning_rate": 1.666760941612075e-05, - "loss": 0.2056, - "step": 5696 - }, - { - "epoch": 0.29, - "grad_norm": 0.9731836300048301, - "learning_rate": 1.666638188665549e-05, - "loss": 0.2213, - "step": 5697 - }, - { - "epoch": 0.29, - "grad_norm": 2.226280971426743, - "learning_rate": 1.6665154176364252e-05, - "loss": 0.2196, - "step": 5698 - }, - { - "epoch": 0.29, - "grad_norm": 0.9443958794853763, - "learning_rate": 1.666392628528033e-05, - "loss": 0.1855, - "step": 5699 - }, - { - "epoch": 0.29, - "grad_norm": 1.1573762089894577, - "learning_rate": 1.666269821343703e-05, - "loss": 0.201, - "step": 5700 - }, - { - "epoch": 0.29, - "grad_norm": 0.9768856011391813, - "learning_rate": 1.666146996086766e-05, - "loss": 0.2271, - "step": 5701 - }, - { - "epoch": 0.29, - "grad_norm": 0.8118714858889647, - "learning_rate": 1.6660241527605546e-05, - "loss": 0.1818, - "step": 5702 - }, - { - "epoch": 0.29, - "grad_norm": 0.8388414321070643, - "learning_rate": 1.6659012913684005e-05, - "loss": 0.2118, - "step": 5703 - }, - { - "epoch": 0.29, - "grad_norm": 0.8937549781822656, - "learning_rate": 1.665778411913636e-05, - "loss": 0.2302, - "step": 5704 - }, - { - "epoch": 0.29, - "grad_norm": 1.0990565637143503, - "learning_rate": 1.6656555143995946e-05, - "loss": 0.211, - "step": 5705 - }, - { - "epoch": 0.29, - "grad_norm": 1.0010147981590252, - "learning_rate": 1.66553259882961e-05, - "loss": 0.2307, - "step": 5706 - }, - { - "epoch": 0.29, - "grad_norm": 0.8837306750998283, - "learning_rate": 1.6654096652070157e-05, - "loss": 0.1806, - "step": 5707 - }, - { - "epoch": 0.29, - "grad_norm": 1.8684906796406004, - "learning_rate": 1.665286713535147e-05, - "loss": 0.2114, - "step": 5708 - }, - { - "epoch": 0.29, - "grad_norm": 0.9092706245114834, - "learning_rate": 1.6651637438173382e-05, - "loss": 0.1945, - "step": 5709 - }, - { - "epoch": 0.29, - "grad_norm": 1.0686983902228964, - "learning_rate": 1.665040756056926e-05, - "loss": 0.23, - "step": 5710 - }, - { - "epoch": 0.29, - "grad_norm": 1.1221867067651907, - "learning_rate": 1.6649177502572447e-05, - "loss": 0.2209, - "step": 5711 - }, - { - "epoch": 0.29, - "grad_norm": 1.0704699851234865, - "learning_rate": 1.6647947264216328e-05, - "loss": 0.1899, - "step": 5712 - }, - { - "epoch": 0.29, - "grad_norm": 0.991710562833805, - "learning_rate": 1.664671684553426e-05, - "loss": 0.1844, - "step": 5713 - }, - { - "epoch": 0.29, - "grad_norm": 0.8965807307602568, - "learning_rate": 1.6645486246559622e-05, - "loss": 0.1953, - "step": 5714 - }, - { - "epoch": 0.29, - "grad_norm": 1.0461114963512261, - "learning_rate": 1.6644255467325793e-05, - "loss": 0.2339, - "step": 5715 - }, - { - "epoch": 0.29, - "grad_norm": 0.8634193165469928, - "learning_rate": 1.6643024507866158e-05, - "loss": 0.1947, - "step": 5716 - }, - { - "epoch": 0.29, - "grad_norm": 0.8478908905893784, - "learning_rate": 1.664179336821411e-05, - "loss": 0.2118, - "step": 5717 - }, - { - "epoch": 0.29, - "grad_norm": 0.9551220717526766, - "learning_rate": 1.6640562048403044e-05, - "loss": 0.198, - "step": 5718 - }, - { - "epoch": 0.29, - "grad_norm": 1.1053131099679867, - "learning_rate": 1.6639330548466356e-05, - "loss": 0.2285, - "step": 5719 - }, - { - "epoch": 0.29, - "grad_norm": 0.8436838705449303, - "learning_rate": 1.6638098868437453e-05, - "loss": 0.1869, - "step": 5720 - }, - { - "epoch": 0.29, - "grad_norm": 2.41614560693824, - "learning_rate": 1.663686700834974e-05, - "loss": 0.2166, - "step": 5721 - }, - { - "epoch": 0.29, - "grad_norm": 1.451927439591581, - "learning_rate": 1.6635634968236637e-05, - "loss": 0.2365, - "step": 5722 - }, - { - "epoch": 0.29, - "grad_norm": 0.993119411046374, - "learning_rate": 1.663440274813156e-05, - "loss": 0.1974, - "step": 5723 - }, - { - "epoch": 0.29, - "grad_norm": 3.097312693258662, - "learning_rate": 1.6633170348067935e-05, - "loss": 0.1944, - "step": 5724 - }, - { - "epoch": 0.29, - "grad_norm": 1.1593390713884788, - "learning_rate": 1.663193776807919e-05, - "loss": 0.2147, - "step": 5725 - }, - { - "epoch": 0.29, - "grad_norm": 1.159035057231313, - "learning_rate": 1.6630705008198757e-05, - "loss": 0.2045, - "step": 5726 - }, - { - "epoch": 0.29, - "grad_norm": 0.9332523049846795, - "learning_rate": 1.6629472068460077e-05, - "loss": 0.1989, - "step": 5727 - }, - { - "epoch": 0.29, - "grad_norm": 1.2142552198529883, - "learning_rate": 1.662823894889659e-05, - "loss": 0.2172, - "step": 5728 - }, - { - "epoch": 0.29, - "grad_norm": 1.49516565416373, - "learning_rate": 1.6627005649541746e-05, - "loss": 0.2127, - "step": 5729 - }, - { - "epoch": 0.29, - "grad_norm": 1.4027560988903203, - "learning_rate": 1.6625772170429005e-05, - "loss": 0.1918, - "step": 5730 - }, - { - "epoch": 0.29, - "grad_norm": 3.202650465639793, - "learning_rate": 1.6624538511591817e-05, - "loss": 0.2219, - "step": 5731 - }, - { - "epoch": 0.29, - "grad_norm": 1.2748809823636216, - "learning_rate": 1.6623304673063647e-05, - "loss": 0.2256, - "step": 5732 - }, - { - "epoch": 0.29, - "grad_norm": 1.0268563268112219, - "learning_rate": 1.6622070654877966e-05, - "loss": 0.2014, - "step": 5733 - }, - { - "epoch": 0.29, - "grad_norm": 1.1245407843284232, - "learning_rate": 1.6620836457068242e-05, - "loss": 0.2189, - "step": 5734 - }, - { - "epoch": 0.29, - "grad_norm": 0.9168931745402805, - "learning_rate": 1.6619602079667956e-05, - "loss": 0.2112, - "step": 5735 - }, - { - "epoch": 0.29, - "grad_norm": 0.8845526859215986, - "learning_rate": 1.661836752271059e-05, - "loss": 0.2323, - "step": 5736 - }, - { - "epoch": 0.29, - "grad_norm": 1.0410697350692784, - "learning_rate": 1.6617132786229634e-05, - "loss": 0.1992, - "step": 5737 - }, - { - "epoch": 0.29, - "grad_norm": 2.0815951639272545, - "learning_rate": 1.661589787025857e-05, - "loss": 0.2061, - "step": 5738 - }, - { - "epoch": 0.29, - "grad_norm": 1.133386163528876, - "learning_rate": 1.6614662774830908e-05, - "loss": 0.2232, - "step": 5739 - }, - { - "epoch": 0.29, - "grad_norm": 1.3203570744289945, - "learning_rate": 1.6613427499980143e-05, - "loss": 0.1992, - "step": 5740 - }, - { - "epoch": 0.29, - "grad_norm": 0.8843085131207743, - "learning_rate": 1.6612192045739787e-05, - "loss": 0.2039, - "step": 5741 - }, - { - "epoch": 0.29, - "grad_norm": 0.8025452416194425, - "learning_rate": 1.6610956412143346e-05, - "loss": 0.1985, - "step": 5742 - }, - { - "epoch": 0.29, - "grad_norm": 1.489947977642656, - "learning_rate": 1.6609720599224337e-05, - "loss": 0.2007, - "step": 5743 - }, - { - "epoch": 0.29, - "grad_norm": 0.9234053910717891, - "learning_rate": 1.6608484607016283e-05, - "loss": 0.2143, - "step": 5744 - }, - { - "epoch": 0.29, - "grad_norm": 0.9372514810803722, - "learning_rate": 1.6607248435552714e-05, - "loss": 0.2024, - "step": 5745 - }, - { - "epoch": 0.29, - "grad_norm": 1.107377154971928, - "learning_rate": 1.6606012084867158e-05, - "loss": 0.2152, - "step": 5746 - }, - { - "epoch": 0.29, - "grad_norm": 0.922792485971133, - "learning_rate": 1.660477555499315e-05, - "loss": 0.2083, - "step": 5747 - }, - { - "epoch": 0.29, - "grad_norm": 0.9716374962394986, - "learning_rate": 1.660353884596423e-05, - "loss": 0.1889, - "step": 5748 - }, - { - "epoch": 0.29, - "grad_norm": 1.03480208202746, - "learning_rate": 1.6602301957813945e-05, - "loss": 0.23, - "step": 5749 - }, - { - "epoch": 0.29, - "grad_norm": 1.2601246611472028, - "learning_rate": 1.6601064890575852e-05, - "loss": 0.2107, - "step": 5750 - }, - { - "epoch": 0.29, - "grad_norm": 0.9725700733302011, - "learning_rate": 1.6599827644283496e-05, - "loss": 0.1852, - "step": 5751 - }, - { - "epoch": 0.29, - "grad_norm": 0.8676443914009231, - "learning_rate": 1.6598590218970448e-05, - "loss": 0.2128, - "step": 5752 - }, - { - "epoch": 0.29, - "grad_norm": 0.9559585801515682, - "learning_rate": 1.6597352614670265e-05, - "loss": 0.1967, - "step": 5753 - }, - { - "epoch": 0.29, - "grad_norm": 0.9523708018419746, - "learning_rate": 1.6596114831416516e-05, - "loss": 0.1927, - "step": 5754 - }, - { - "epoch": 0.29, - "grad_norm": 1.5704200689560264, - "learning_rate": 1.6594876869242785e-05, - "loss": 0.2105, - "step": 5755 - }, - { - "epoch": 0.29, - "grad_norm": 1.2809466782681533, - "learning_rate": 1.659363872818264e-05, - "loss": 0.1994, - "step": 5756 - }, - { - "epoch": 0.29, - "grad_norm": 0.7852549758135308, - "learning_rate": 1.6592400408269678e-05, - "loss": 0.1898, - "step": 5757 - }, - { - "epoch": 0.29, - "grad_norm": 1.468007426452819, - "learning_rate": 1.659116190953748e-05, - "loss": 0.1984, - "step": 5758 - }, - { - "epoch": 0.29, - "grad_norm": 1.0512057713606473, - "learning_rate": 1.6589923232019646e-05, - "loss": 0.236, - "step": 5759 - }, - { - "epoch": 0.29, - "grad_norm": 0.8335140202496528, - "learning_rate": 1.6588684375749767e-05, - "loss": 0.2051, - "step": 5760 - }, - { - "epoch": 0.29, - "grad_norm": 0.8435524615441553, - "learning_rate": 1.6587445340761456e-05, - "loss": 0.1985, - "step": 5761 - }, - { - "epoch": 0.29, - "grad_norm": 1.136818437779603, - "learning_rate": 1.658620612708832e-05, - "loss": 0.203, - "step": 5762 - }, - { - "epoch": 0.29, - "grad_norm": 1.5529887857017122, - "learning_rate": 1.6584966734763966e-05, - "loss": 0.1925, - "step": 5763 - }, - { - "epoch": 0.29, - "grad_norm": 1.0151767977894686, - "learning_rate": 1.6583727163822016e-05, - "loss": 0.2128, - "step": 5764 - }, - { - "epoch": 0.29, - "grad_norm": 0.8870410825868733, - "learning_rate": 1.6582487414296097e-05, - "loss": 0.2046, - "step": 5765 - }, - { - "epoch": 0.29, - "grad_norm": 1.3136083487300818, - "learning_rate": 1.6581247486219837e-05, - "loss": 0.2081, - "step": 5766 - }, - { - "epoch": 0.29, - "grad_norm": 1.075196880227764, - "learning_rate": 1.6580007379626868e-05, - "loss": 0.2084, - "step": 5767 - }, - { - "epoch": 0.29, - "grad_norm": 0.928292447081866, - "learning_rate": 1.6578767094550826e-05, - "loss": 0.2121, - "step": 5768 - }, - { - "epoch": 0.29, - "grad_norm": 1.1057184105594184, - "learning_rate": 1.6577526631025352e-05, - "loss": 0.2214, - "step": 5769 - }, - { - "epoch": 0.29, - "grad_norm": 1.084890084007609, - "learning_rate": 1.65762859890841e-05, - "loss": 0.2286, - "step": 5770 - }, - { - "epoch": 0.29, - "grad_norm": 1.0065522632622652, - "learning_rate": 1.6575045168760716e-05, - "loss": 0.1941, - "step": 5771 - }, - { - "epoch": 0.29, - "grad_norm": 1.0942944625729116, - "learning_rate": 1.6573804170088866e-05, - "loss": 0.2055, - "step": 5772 - }, - { - "epoch": 0.29, - "grad_norm": 1.2163497184581087, - "learning_rate": 1.65725629931022e-05, - "loss": 0.2141, - "step": 5773 - }, - { - "epoch": 0.29, - "grad_norm": 1.2837384138894168, - "learning_rate": 1.65713216378344e-05, - "loss": 0.2065, - "step": 5774 - }, - { - "epoch": 0.29, - "grad_norm": 1.1628408812538042, - "learning_rate": 1.6570080104319122e-05, - "loss": 0.2181, - "step": 5775 - }, - { - "epoch": 0.29, - "grad_norm": 1.0705948505531206, - "learning_rate": 1.656883839259005e-05, - "loss": 0.2109, - "step": 5776 - }, - { - "epoch": 0.29, - "grad_norm": 1.6840407787429668, - "learning_rate": 1.656759650268087e-05, - "loss": 0.2008, - "step": 5777 - }, - { - "epoch": 0.29, - "grad_norm": 1.2944722190769227, - "learning_rate": 1.6566354434625262e-05, - "loss": 0.2134, - "step": 5778 - }, - { - "epoch": 0.29, - "grad_norm": 1.7357663578551865, - "learning_rate": 1.656511218845692e-05, - "loss": 0.1994, - "step": 5779 - }, - { - "epoch": 0.29, - "grad_norm": 1.3509628119207966, - "learning_rate": 1.6563869764209538e-05, - "loss": 0.1859, - "step": 5780 - }, - { - "epoch": 0.29, - "grad_norm": 1.133614725165837, - "learning_rate": 1.656262716191682e-05, - "loss": 0.2061, - "step": 5781 - }, - { - "epoch": 0.29, - "grad_norm": 1.0005438686506853, - "learning_rate": 1.6561384381612463e-05, - "loss": 0.1961, - "step": 5782 - }, - { - "epoch": 0.29, - "grad_norm": 1.2009133423409168, - "learning_rate": 1.656014142333019e-05, - "loss": 0.2023, - "step": 5783 - }, - { - "epoch": 0.29, - "grad_norm": 1.1461548411705642, - "learning_rate": 1.6558898287103708e-05, - "loss": 0.2175, - "step": 5784 - }, - { - "epoch": 0.29, - "grad_norm": 1.102960060107423, - "learning_rate": 1.6557654972966743e-05, - "loss": 0.2301, - "step": 5785 - }, - { - "epoch": 0.29, - "grad_norm": 0.8208184710818014, - "learning_rate": 1.6556411480953012e-05, - "loss": 0.2057, - "step": 5786 - }, - { - "epoch": 0.29, - "grad_norm": 0.9678370278571667, - "learning_rate": 1.655516781109625e-05, - "loss": 0.2112, - "step": 5787 - }, - { - "epoch": 0.29, - "grad_norm": 0.7878003614739206, - "learning_rate": 1.6553923963430193e-05, - "loss": 0.1782, - "step": 5788 - }, - { - "epoch": 0.29, - "grad_norm": 1.0807032380326957, - "learning_rate": 1.655267993798858e-05, - "loss": 0.1907, - "step": 5789 - }, - { - "epoch": 0.29, - "grad_norm": 1.0574042335885612, - "learning_rate": 1.655143573480515e-05, - "loss": 0.2407, - "step": 5790 - }, - { - "epoch": 0.29, - "grad_norm": 0.9989306136577616, - "learning_rate": 1.6550191353913657e-05, - "loss": 0.212, - "step": 5791 - }, - { - "epoch": 0.29, - "grad_norm": 0.9239187621127245, - "learning_rate": 1.654894679534785e-05, - "loss": 0.1757, - "step": 5792 - }, - { - "epoch": 0.29, - "grad_norm": 0.9822578991598856, - "learning_rate": 1.6547702059141497e-05, - "loss": 0.1973, - "step": 5793 - }, - { - "epoch": 0.29, - "grad_norm": 1.0885857577961602, - "learning_rate": 1.6546457145328354e-05, - "loss": 0.2046, - "step": 5794 - }, - { - "epoch": 0.29, - "grad_norm": 1.1196107951161263, - "learning_rate": 1.654521205394219e-05, - "loss": 0.1986, - "step": 5795 - }, - { - "epoch": 0.29, - "grad_norm": 1.0144618703541814, - "learning_rate": 1.654396678501678e-05, - "loss": 0.1774, - "step": 5796 - }, - { - "epoch": 0.29, - "grad_norm": 0.9975357074513678, - "learning_rate": 1.65427213385859e-05, - "loss": 0.1916, - "step": 5797 - }, - { - "epoch": 0.29, - "grad_norm": 1.027741848788133, - "learning_rate": 1.6541475714683337e-05, - "loss": 0.197, - "step": 5798 - }, - { - "epoch": 0.29, - "grad_norm": 0.7796913200996538, - "learning_rate": 1.6540229913342875e-05, - "loss": 0.1971, - "step": 5799 - }, - { - "epoch": 0.29, - "grad_norm": 0.7289629178138828, - "learning_rate": 1.6538983934598304e-05, - "loss": 0.201, - "step": 5800 - }, - { - "epoch": 0.29, - "grad_norm": 1.2443187942554474, - "learning_rate": 1.653773777848343e-05, - "loss": 0.2208, - "step": 5801 - }, - { - "epoch": 0.3, - "grad_norm": 1.013422787334009, - "learning_rate": 1.6536491445032044e-05, - "loss": 0.1913, - "step": 5802 - }, - { - "epoch": 0.3, - "grad_norm": 5.358326711355817, - "learning_rate": 1.6535244934277962e-05, - "loss": 0.2023, - "step": 5803 - }, - { - "epoch": 0.3, - "grad_norm": 1.5021160626864412, - "learning_rate": 1.653399824625499e-05, - "loss": 0.2047, - "step": 5804 - }, - { - "epoch": 0.3, - "grad_norm": 0.8694947879982603, - "learning_rate": 1.653275138099695e-05, - "loss": 0.24, - "step": 5805 - }, - { - "epoch": 0.3, - "grad_norm": 1.0982253887611164, - "learning_rate": 1.6531504338537653e-05, - "loss": 0.2039, - "step": 5806 - }, - { - "epoch": 0.3, - "grad_norm": 0.9651108431730049, - "learning_rate": 1.6530257118910936e-05, - "loss": 0.1938, - "step": 5807 - }, - { - "epoch": 0.3, - "grad_norm": 1.2459276680152085, - "learning_rate": 1.6529009722150626e-05, - "loss": 0.2199, - "step": 5808 - }, - { - "epoch": 0.3, - "grad_norm": 1.0178850451064618, - "learning_rate": 1.652776214829056e-05, - "loss": 0.2025, - "step": 5809 - }, - { - "epoch": 0.3, - "grad_norm": 1.1226569227170007, - "learning_rate": 1.6526514397364575e-05, - "loss": 0.2068, - "step": 5810 - }, - { - "epoch": 0.3, - "grad_norm": 1.5468864133936497, - "learning_rate": 1.652526646940652e-05, - "loss": 0.203, - "step": 5811 - }, - { - "epoch": 0.3, - "grad_norm": 1.046251258894582, - "learning_rate": 1.6524018364450243e-05, - "loss": 0.1911, - "step": 5812 - }, - { - "epoch": 0.3, - "grad_norm": 0.9299981519638938, - "learning_rate": 1.6522770082529596e-05, - "loss": 0.2273, - "step": 5813 - }, - { - "epoch": 0.3, - "grad_norm": 0.867663303689732, - "learning_rate": 1.6521521623678445e-05, - "loss": 0.2123, - "step": 5814 - }, - { - "epoch": 0.3, - "grad_norm": 0.8664470400619952, - "learning_rate": 1.6520272987930652e-05, - "loss": 0.215, - "step": 5815 - }, - { - "epoch": 0.3, - "grad_norm": 0.9126660340051115, - "learning_rate": 1.6519024175320083e-05, - "loss": 0.2117, - "step": 5816 - }, - { - "epoch": 0.3, - "grad_norm": 1.0413653605044608, - "learning_rate": 1.651777518588062e-05, - "loss": 0.2017, - "step": 5817 - }, - { - "epoch": 0.3, - "grad_norm": 1.3946594861416581, - "learning_rate": 1.6516526019646134e-05, - "loss": 0.1966, - "step": 5818 - }, - { - "epoch": 0.3, - "grad_norm": 1.3855714810992754, - "learning_rate": 1.651527667665051e-05, - "loss": 0.2036, - "step": 5819 - }, - { - "epoch": 0.3, - "grad_norm": 0.8732515849837948, - "learning_rate": 1.6514027156927645e-05, - "loss": 0.2048, - "step": 5820 - }, - { - "epoch": 0.3, - "grad_norm": 2.534939302966109, - "learning_rate": 1.6512777460511416e-05, - "loss": 0.2151, - "step": 5821 - }, - { - "epoch": 0.3, - "grad_norm": 0.8623181831359924, - "learning_rate": 1.6511527587435736e-05, - "loss": 0.1974, - "step": 5822 - }, - { - "epoch": 0.3, - "grad_norm": 1.0794220973605955, - "learning_rate": 1.6510277537734503e-05, - "loss": 0.2174, - "step": 5823 - }, - { - "epoch": 0.3, - "grad_norm": 1.5431023442504375, - "learning_rate": 1.6509027311441622e-05, - "loss": 0.2106, - "step": 5824 - }, - { - "epoch": 0.3, - "grad_norm": 1.0567470028378723, - "learning_rate": 1.6507776908591008e-05, - "loss": 0.2049, - "step": 5825 - }, - { - "epoch": 0.3, - "grad_norm": 0.9985409094547987, - "learning_rate": 1.6506526329216577e-05, - "loss": 0.226, - "step": 5826 - }, - { - "epoch": 0.3, - "grad_norm": 0.9380862280445232, - "learning_rate": 1.6505275573352256e-05, - "loss": 0.2239, - "step": 5827 - }, - { - "epoch": 0.3, - "grad_norm": 1.02642850762763, - "learning_rate": 1.6504024641031962e-05, - "loss": 0.22, - "step": 5828 - }, - { - "epoch": 0.3, - "grad_norm": 1.5253979388506016, - "learning_rate": 1.6502773532289636e-05, - "loss": 0.2042, - "step": 5829 - }, - { - "epoch": 0.3, - "grad_norm": 1.3658204535216976, - "learning_rate": 1.650152224715921e-05, - "loss": 0.1972, - "step": 5830 - }, - { - "epoch": 0.3, - "grad_norm": 1.7792321176824304, - "learning_rate": 1.6500270785674622e-05, - "loss": 0.1887, - "step": 5831 - }, - { - "epoch": 0.3, - "grad_norm": 1.0631302674580914, - "learning_rate": 1.6499019147869826e-05, - "loss": 0.1867, - "step": 5832 - }, - { - "epoch": 0.3, - "grad_norm": 0.9993820993819318, - "learning_rate": 1.649776733377877e-05, - "loss": 0.2004, - "step": 5833 - }, - { - "epoch": 0.3, - "grad_norm": 1.1175435765314348, - "learning_rate": 1.6496515343435402e-05, - "loss": 0.215, - "step": 5834 - }, - { - "epoch": 0.3, - "grad_norm": 0.7984194198142733, - "learning_rate": 1.6495263176873693e-05, - "loss": 0.1992, - "step": 5835 - }, - { - "epoch": 0.3, - "grad_norm": 0.8747750342396716, - "learning_rate": 1.6494010834127606e-05, - "loss": 0.21, - "step": 5836 - }, - { - "epoch": 0.3, - "grad_norm": 1.0164685258600892, - "learning_rate": 1.6492758315231105e-05, - "loss": 0.2239, - "step": 5837 - }, - { - "epoch": 0.3, - "grad_norm": 3.2855641459344684, - "learning_rate": 1.6491505620218164e-05, - "loss": 0.2095, - "step": 5838 - }, - { - "epoch": 0.3, - "grad_norm": 1.0643574580669881, - "learning_rate": 1.649025274912277e-05, - "loss": 0.2103, - "step": 5839 - }, - { - "epoch": 0.3, - "grad_norm": 0.9935202396359508, - "learning_rate": 1.6488999701978905e-05, - "loss": 0.2034, - "step": 5840 - }, - { - "epoch": 0.3, - "grad_norm": 0.8713947124605383, - "learning_rate": 1.6487746478820553e-05, - "loss": 0.2141, - "step": 5841 - }, - { - "epoch": 0.3, - "grad_norm": 0.8350889530294335, - "learning_rate": 1.6486493079681717e-05, - "loss": 0.1887, - "step": 5842 - }, - { - "epoch": 0.3, - "grad_norm": 0.9041295959036052, - "learning_rate": 1.6485239504596388e-05, - "loss": 0.1954, - "step": 5843 - }, - { - "epoch": 0.3, - "grad_norm": 0.9224791675877131, - "learning_rate": 1.6483985753598568e-05, - "loss": 0.1839, - "step": 5844 - }, - { - "epoch": 0.3, - "grad_norm": 1.2302875736385745, - "learning_rate": 1.6482731826722268e-05, - "loss": 0.2014, - "step": 5845 - }, - { - "epoch": 0.3, - "grad_norm": 0.9097035251363624, - "learning_rate": 1.6481477724001505e-05, - "loss": 0.2199, - "step": 5846 - }, - { - "epoch": 0.3, - "grad_norm": 0.9888881253592569, - "learning_rate": 1.648022344547029e-05, - "loss": 0.2227, - "step": 5847 - }, - { - "epoch": 0.3, - "grad_norm": 0.9414438514171718, - "learning_rate": 1.647896899116265e-05, - "loss": 0.2229, - "step": 5848 - }, - { - "epoch": 0.3, - "grad_norm": 1.0139577064085958, - "learning_rate": 1.647771436111261e-05, - "loss": 0.2129, - "step": 5849 - }, - { - "epoch": 0.3, - "grad_norm": 0.8495467191827688, - "learning_rate": 1.64764595553542e-05, - "loss": 0.1753, - "step": 5850 - }, - { - "epoch": 0.3, - "grad_norm": 1.1991959064883873, - "learning_rate": 1.647520457392146e-05, - "loss": 0.2039, - "step": 5851 - }, - { - "epoch": 0.3, - "grad_norm": 0.8550431663988788, - "learning_rate": 1.647394941684843e-05, - "loss": 0.221, - "step": 5852 - }, - { - "epoch": 0.3, - "grad_norm": 0.8606637731862601, - "learning_rate": 1.6472694084169155e-05, - "loss": 0.1815, - "step": 5853 - }, - { - "epoch": 0.3, - "grad_norm": 1.0369770786766133, - "learning_rate": 1.6471438575917688e-05, - "loss": 0.2189, - "step": 5854 - }, - { - "epoch": 0.3, - "grad_norm": 1.0865897305584387, - "learning_rate": 1.6470182892128085e-05, - "loss": 0.2183, - "step": 5855 - }, - { - "epoch": 0.3, - "grad_norm": 0.9732329093878587, - "learning_rate": 1.6468927032834407e-05, - "loss": 0.2007, - "step": 5856 - }, - { - "epoch": 0.3, - "grad_norm": 0.9412361804662748, - "learning_rate": 1.6467670998070715e-05, - "loss": 0.2058, - "step": 5857 - }, - { - "epoch": 0.3, - "grad_norm": 1.2031680501967463, - "learning_rate": 1.6466414787871084e-05, - "loss": 0.1962, - "step": 5858 - }, - { - "epoch": 0.3, - "grad_norm": 0.9115571475165246, - "learning_rate": 1.6465158402269585e-05, - "loss": 0.2157, - "step": 5859 - }, - { - "epoch": 0.3, - "grad_norm": 0.7158282006426111, - "learning_rate": 1.64639018413003e-05, - "loss": 0.1991, - "step": 5860 - }, - { - "epoch": 0.3, - "grad_norm": 0.8445846725413154, - "learning_rate": 1.6462645104997313e-05, - "loss": 0.1883, - "step": 5861 - }, - { - "epoch": 0.3, - "grad_norm": 0.8719816706104393, - "learning_rate": 1.646138819339471e-05, - "loss": 0.2028, - "step": 5862 - }, - { - "epoch": 0.3, - "grad_norm": 0.9688970258918401, - "learning_rate": 1.646013110652659e-05, - "loss": 0.2212, - "step": 5863 - }, - { - "epoch": 0.3, - "grad_norm": 0.888364909503381, - "learning_rate": 1.645887384442705e-05, - "loss": 0.2056, - "step": 5864 - }, - { - "epoch": 0.3, - "grad_norm": 0.8982603781397506, - "learning_rate": 1.645761640713019e-05, - "loss": 0.2103, - "step": 5865 - }, - { - "epoch": 0.3, - "grad_norm": 2.3715133981948604, - "learning_rate": 1.645635879467012e-05, - "loss": 0.2104, - "step": 5866 - }, - { - "epoch": 0.3, - "grad_norm": 1.002168238431376, - "learning_rate": 1.6455101007080955e-05, - "loss": 0.2034, - "step": 5867 - }, - { - "epoch": 0.3, - "grad_norm": 0.9555106086590374, - "learning_rate": 1.645384304439681e-05, - "loss": 0.2587, - "step": 5868 - }, - { - "epoch": 0.3, - "grad_norm": 1.1883950255284266, - "learning_rate": 1.6452584906651807e-05, - "loss": 0.2111, - "step": 5869 - }, - { - "epoch": 0.3, - "grad_norm": 1.0928930058177924, - "learning_rate": 1.6451326593880072e-05, - "loss": 0.2152, - "step": 5870 - }, - { - "epoch": 0.3, - "grad_norm": 1.022071758104498, - "learning_rate": 1.6450068106115745e-05, - "loss": 0.2027, - "step": 5871 - }, - { - "epoch": 0.3, - "grad_norm": 0.9165368983625263, - "learning_rate": 1.644880944339295e-05, - "loss": 0.1926, - "step": 5872 - }, - { - "epoch": 0.3, - "grad_norm": 0.9855256169629955, - "learning_rate": 1.6447550605745836e-05, - "loss": 0.2126, - "step": 5873 - }, - { - "epoch": 0.3, - "grad_norm": 0.9811264576955663, - "learning_rate": 1.644629159320855e-05, - "loss": 0.2095, - "step": 5874 - }, - { - "epoch": 0.3, - "grad_norm": 0.8399754500955879, - "learning_rate": 1.644503240581524e-05, - "loss": 0.1823, - "step": 5875 - }, - { - "epoch": 0.3, - "grad_norm": 1.0565131500331548, - "learning_rate": 1.6443773043600058e-05, - "loss": 0.2225, - "step": 5876 - }, - { - "epoch": 0.3, - "grad_norm": 1.2131651665261163, - "learning_rate": 1.6442513506597175e-05, - "loss": 0.2317, - "step": 5877 - }, - { - "epoch": 0.3, - "grad_norm": 0.9404071637103668, - "learning_rate": 1.6441253794840745e-05, - "loss": 0.1882, - "step": 5878 - }, - { - "epoch": 0.3, - "grad_norm": 0.9782996232409031, - "learning_rate": 1.6439993908364942e-05, - "loss": 0.1891, - "step": 5879 - }, - { - "epoch": 0.3, - "grad_norm": 0.9048113744479341, - "learning_rate": 1.643873384720394e-05, - "loss": 0.2286, - "step": 5880 - }, - { - "epoch": 0.3, - "grad_norm": 1.0659766855293744, - "learning_rate": 1.643747361139192e-05, - "loss": 0.2317, - "step": 5881 - }, - { - "epoch": 0.3, - "grad_norm": 0.8068037627212562, - "learning_rate": 1.6436213200963065e-05, - "loss": 0.198, - "step": 5882 - }, - { - "epoch": 0.3, - "grad_norm": 0.877210694130185, - "learning_rate": 1.643495261595156e-05, - "loss": 0.211, - "step": 5883 - }, - { - "epoch": 0.3, - "grad_norm": 0.8623766726369708, - "learning_rate": 1.6433691856391608e-05, - "loss": 0.2107, - "step": 5884 - }, - { - "epoch": 0.3, - "grad_norm": 1.0274789198072334, - "learning_rate": 1.6432430922317396e-05, - "loss": 0.2024, - "step": 5885 - }, - { - "epoch": 0.3, - "grad_norm": 1.3082314211100265, - "learning_rate": 1.6431169813763134e-05, - "loss": 0.2281, - "step": 5886 - }, - { - "epoch": 0.3, - "grad_norm": 0.9357881374870074, - "learning_rate": 1.6429908530763027e-05, - "loss": 0.2018, - "step": 5887 - }, - { - "epoch": 0.3, - "grad_norm": 0.8417090098117563, - "learning_rate": 1.6428647073351287e-05, - "loss": 0.2225, - "step": 5888 - }, - { - "epoch": 0.3, - "grad_norm": 0.8117492998568088, - "learning_rate": 1.6427385441562135e-05, - "loss": 0.1911, - "step": 5889 - }, - { - "epoch": 0.3, - "grad_norm": 0.9145794378876565, - "learning_rate": 1.6426123635429787e-05, - "loss": 0.2137, - "step": 5890 - }, - { - "epoch": 0.3, - "grad_norm": 1.2038725188113313, - "learning_rate": 1.6424861654988477e-05, - "loss": 0.2157, - "step": 5891 - }, - { - "epoch": 0.3, - "grad_norm": 0.9292827245859995, - "learning_rate": 1.6423599500272424e-05, - "loss": 0.1931, - "step": 5892 - }, - { - "epoch": 0.3, - "grad_norm": 0.9946593686418843, - "learning_rate": 1.6422337171315878e-05, - "loss": 0.1904, - "step": 5893 - }, - { - "epoch": 0.3, - "grad_norm": 1.3859397288247326, - "learning_rate": 1.642107466815307e-05, - "loss": 0.2022, - "step": 5894 - }, - { - "epoch": 0.3, - "grad_norm": 0.8991319027869209, - "learning_rate": 1.6419811990818252e-05, - "loss": 0.2132, - "step": 5895 - }, - { - "epoch": 0.3, - "grad_norm": 1.0301125345203759, - "learning_rate": 1.6418549139345667e-05, - "loss": 0.2348, - "step": 5896 - }, - { - "epoch": 0.3, - "grad_norm": 1.0126074489129862, - "learning_rate": 1.641728611376958e-05, - "loss": 0.215, - "step": 5897 - }, - { - "epoch": 0.3, - "grad_norm": 0.8804663931110788, - "learning_rate": 1.641602291412424e-05, - "loss": 0.1978, - "step": 5898 - }, - { - "epoch": 0.3, - "grad_norm": 0.9066843573868038, - "learning_rate": 1.641475954044392e-05, - "loss": 0.214, - "step": 5899 - }, - { - "epoch": 0.3, - "grad_norm": 0.8398951594937323, - "learning_rate": 1.641349599276288e-05, - "loss": 0.1948, - "step": 5900 - }, - { - "epoch": 0.3, - "grad_norm": 0.88342216108323, - "learning_rate": 1.64122322711154e-05, - "loss": 0.2018, - "step": 5901 - }, - { - "epoch": 0.3, - "grad_norm": 0.8609414098283307, - "learning_rate": 1.6410968375535762e-05, - "loss": 0.2192, - "step": 5902 - }, - { - "epoch": 0.3, - "grad_norm": 0.7894700819728515, - "learning_rate": 1.640970430605824e-05, - "loss": 0.2002, - "step": 5903 - }, - { - "epoch": 0.3, - "grad_norm": 0.948260892075998, - "learning_rate": 1.640844006271713e-05, - "loss": 0.2033, - "step": 5904 - }, - { - "epoch": 0.3, - "grad_norm": 0.9018493655463135, - "learning_rate": 1.640717564554672e-05, - "loss": 0.2039, - "step": 5905 - }, - { - "epoch": 0.3, - "grad_norm": 0.8166650211310612, - "learning_rate": 1.6405911054581307e-05, - "loss": 0.1891, - "step": 5906 - }, - { - "epoch": 0.3, - "grad_norm": 0.9446127345677011, - "learning_rate": 1.6404646289855194e-05, - "loss": 0.2209, - "step": 5907 - }, - { - "epoch": 0.3, - "grad_norm": 0.8743844759657704, - "learning_rate": 1.640338135140269e-05, - "loss": 0.1924, - "step": 5908 - }, - { - "epoch": 0.3, - "grad_norm": 1.4321428005760437, - "learning_rate": 1.640211623925811e-05, - "loss": 0.2296, - "step": 5909 - }, - { - "epoch": 0.3, - "grad_norm": 0.9025069904442802, - "learning_rate": 1.640085095345576e-05, - "loss": 0.2115, - "step": 5910 - }, - { - "epoch": 0.3, - "grad_norm": 0.8714708162046169, - "learning_rate": 1.6399585494029968e-05, - "loss": 0.1909, - "step": 5911 - }, - { - "epoch": 0.3, - "grad_norm": 1.1490013763920408, - "learning_rate": 1.639831986101506e-05, - "loss": 0.2036, - "step": 5912 - }, - { - "epoch": 0.3, - "grad_norm": 0.9508681180299058, - "learning_rate": 1.639705405444536e-05, - "loss": 0.1905, - "step": 5913 - }, - { - "epoch": 0.3, - "grad_norm": 1.235841414052421, - "learning_rate": 1.6395788074355212e-05, - "loss": 0.1929, - "step": 5914 - }, - { - "epoch": 0.3, - "grad_norm": 1.1104003104638973, - "learning_rate": 1.639452192077895e-05, - "loss": 0.1946, - "step": 5915 - }, - { - "epoch": 0.3, - "grad_norm": 0.9560855958680096, - "learning_rate": 1.6393255593750917e-05, - "loss": 0.2202, - "step": 5916 - }, - { - "epoch": 0.3, - "grad_norm": 0.8003227650607686, - "learning_rate": 1.6391989093305468e-05, - "loss": 0.1872, - "step": 5917 - }, - { - "epoch": 0.3, - "grad_norm": 1.093372947587186, - "learning_rate": 1.6390722419476952e-05, - "loss": 0.215, - "step": 5918 - }, - { - "epoch": 0.3, - "grad_norm": 0.8234618930446632, - "learning_rate": 1.638945557229973e-05, - "loss": 0.2064, - "step": 5919 - }, - { - "epoch": 0.3, - "grad_norm": 1.0258032410734896, - "learning_rate": 1.6388188551808166e-05, - "loss": 0.1988, - "step": 5920 - }, - { - "epoch": 0.3, - "grad_norm": 1.1774833686716462, - "learning_rate": 1.6386921358036624e-05, - "loss": 0.1894, - "step": 5921 - }, - { - "epoch": 0.3, - "grad_norm": 0.8458940053240332, - "learning_rate": 1.638565399101948e-05, - "loss": 0.211, - "step": 5922 - }, - { - "epoch": 0.3, - "grad_norm": 1.0143614561029222, - "learning_rate": 1.6384386450791114e-05, - "loss": 0.2019, - "step": 5923 - }, - { - "epoch": 0.3, - "grad_norm": 0.9030318476331984, - "learning_rate": 1.6383118737385903e-05, - "loss": 0.1939, - "step": 5924 - }, - { - "epoch": 0.3, - "grad_norm": 0.9946142914549149, - "learning_rate": 1.6381850850838232e-05, - "loss": 0.1889, - "step": 5925 - }, - { - "epoch": 0.3, - "grad_norm": 0.9871772763534351, - "learning_rate": 1.63805827911825e-05, - "loss": 0.1854, - "step": 5926 - }, - { - "epoch": 0.3, - "grad_norm": 0.9040610810639874, - "learning_rate": 1.63793145584531e-05, - "loss": 0.1925, - "step": 5927 - }, - { - "epoch": 0.3, - "grad_norm": 0.9290544683412078, - "learning_rate": 1.637804615268443e-05, - "loss": 0.2287, - "step": 5928 - }, - { - "epoch": 0.3, - "grad_norm": 0.9562484980139982, - "learning_rate": 1.63767775739109e-05, - "loss": 0.2036, - "step": 5929 - }, - { - "epoch": 0.3, - "grad_norm": 0.8777655403386205, - "learning_rate": 1.6375508822166917e-05, - "loss": 0.2296, - "step": 5930 - }, - { - "epoch": 0.3, - "grad_norm": 0.8735047939100099, - "learning_rate": 1.63742398974869e-05, - "loss": 0.1851, - "step": 5931 - }, - { - "epoch": 0.3, - "grad_norm": 0.935282896007759, - "learning_rate": 1.6372970799905262e-05, - "loss": 0.2209, - "step": 5932 - }, - { - "epoch": 0.3, - "grad_norm": 1.0901327351521781, - "learning_rate": 1.6371701529456433e-05, - "loss": 0.2119, - "step": 5933 - }, - { - "epoch": 0.3, - "grad_norm": 1.1528605527009306, - "learning_rate": 1.6370432086174837e-05, - "loss": 0.1906, - "step": 5934 - }, - { - "epoch": 0.3, - "grad_norm": 0.8206100964092897, - "learning_rate": 1.6369162470094915e-05, - "loss": 0.2065, - "step": 5935 - }, - { - "epoch": 0.3, - "grad_norm": 0.8480007652060755, - "learning_rate": 1.63678926812511e-05, - "loss": 0.1984, - "step": 5936 - }, - { - "epoch": 0.3, - "grad_norm": 1.2839298260001515, - "learning_rate": 1.6366622719677834e-05, - "loss": 0.2135, - "step": 5937 - }, - { - "epoch": 0.3, - "grad_norm": 0.9534265566922148, - "learning_rate": 1.6365352585409572e-05, - "loss": 0.2263, - "step": 5938 - }, - { - "epoch": 0.3, - "grad_norm": 0.962950305698948, - "learning_rate": 1.636408227848076e-05, - "loss": 0.2058, - "step": 5939 - }, - { - "epoch": 0.3, - "grad_norm": 0.9968989531366338, - "learning_rate": 1.6362811798925852e-05, - "loss": 0.2056, - "step": 5940 - }, - { - "epoch": 0.3, - "grad_norm": 0.9642269498234763, - "learning_rate": 1.636154114677932e-05, - "loss": 0.2174, - "step": 5941 - }, - { - "epoch": 0.3, - "grad_norm": 0.8536369510964444, - "learning_rate": 1.636027032207562e-05, - "loss": 0.1977, - "step": 5942 - }, - { - "epoch": 0.3, - "grad_norm": 2.701056712938503, - "learning_rate": 1.6358999324849235e-05, - "loss": 0.2213, - "step": 5943 - }, - { - "epoch": 0.3, - "grad_norm": 0.7754497041155105, - "learning_rate": 1.635772815513463e-05, - "loss": 0.1935, - "step": 5944 - }, - { - "epoch": 0.3, - "grad_norm": 0.9182076271424672, - "learning_rate": 1.635645681296629e-05, - "loss": 0.1837, - "step": 5945 - }, - { - "epoch": 0.3, - "grad_norm": 0.9513656758902006, - "learning_rate": 1.63551852983787e-05, - "loss": 0.2034, - "step": 5946 - }, - { - "epoch": 0.3, - "grad_norm": 0.8099373859425659, - "learning_rate": 1.635391361140635e-05, - "loss": 0.2035, - "step": 5947 - }, - { - "epoch": 0.3, - "grad_norm": 1.2689296573815851, - "learning_rate": 1.6352641752083734e-05, - "loss": 0.2376, - "step": 5948 - }, - { - "epoch": 0.3, - "grad_norm": 1.3730141177994373, - "learning_rate": 1.6351369720445353e-05, - "loss": 0.2268, - "step": 5949 - }, - { - "epoch": 0.3, - "grad_norm": 0.8013748927283166, - "learning_rate": 1.6350097516525705e-05, - "loss": 0.2, - "step": 5950 - }, - { - "epoch": 0.3, - "grad_norm": 1.116575591861395, - "learning_rate": 1.634882514035931e-05, - "loss": 0.1904, - "step": 5951 - }, - { - "epoch": 0.3, - "grad_norm": 1.0059605669840697, - "learning_rate": 1.6347552591980672e-05, - "loss": 0.2203, - "step": 5952 - }, - { - "epoch": 0.3, - "grad_norm": 0.9044501685247852, - "learning_rate": 1.634627987142431e-05, - "loss": 0.2197, - "step": 5953 - }, - { - "epoch": 0.3, - "grad_norm": 1.1633662000849947, - "learning_rate": 1.6345006978724748e-05, - "loss": 0.2302, - "step": 5954 - }, - { - "epoch": 0.3, - "grad_norm": 1.0048216985971365, - "learning_rate": 1.6343733913916516e-05, - "loss": 0.199, - "step": 5955 - }, - { - "epoch": 0.3, - "grad_norm": 0.7956734026992229, - "learning_rate": 1.634246067703414e-05, - "loss": 0.2101, - "step": 5956 - }, - { - "epoch": 0.3, - "grad_norm": 1.0101755028948003, - "learning_rate": 1.6341187268112162e-05, - "loss": 0.2084, - "step": 5957 - }, - { - "epoch": 0.3, - "grad_norm": 0.9222892642766556, - "learning_rate": 1.633991368718512e-05, - "loss": 0.2193, - "step": 5958 - }, - { - "epoch": 0.3, - "grad_norm": 0.9677295069555725, - "learning_rate": 1.6338639934287563e-05, - "loss": 0.2085, - "step": 5959 - }, - { - "epoch": 0.3, - "grad_norm": 0.9158364680199044, - "learning_rate": 1.6337366009454037e-05, - "loss": 0.1924, - "step": 5960 - }, - { - "epoch": 0.3, - "grad_norm": 0.944337670541037, - "learning_rate": 1.6336091912719102e-05, - "loss": 0.2005, - "step": 5961 - }, - { - "epoch": 0.3, - "grad_norm": 0.8085176627955762, - "learning_rate": 1.6334817644117316e-05, - "loss": 0.2191, - "step": 5962 - }, - { - "epoch": 0.3, - "grad_norm": 1.4056936577366979, - "learning_rate": 1.633354320368324e-05, - "loss": 0.1941, - "step": 5963 - }, - { - "epoch": 0.3, - "grad_norm": 0.8614378964196189, - "learning_rate": 1.6332268591451454e-05, - "loss": 0.2031, - "step": 5964 - }, - { - "epoch": 0.3, - "grad_norm": 0.934286179464845, - "learning_rate": 1.633099380745652e-05, - "loss": 0.1998, - "step": 5965 - }, - { - "epoch": 0.3, - "grad_norm": 0.9754800276459915, - "learning_rate": 1.6329718851733024e-05, - "loss": 0.2147, - "step": 5966 - }, - { - "epoch": 0.3, - "grad_norm": 1.1766704671644548, - "learning_rate": 1.6328443724315544e-05, - "loss": 0.2143, - "step": 5967 - }, - { - "epoch": 0.3, - "grad_norm": 1.0562421455698277, - "learning_rate": 1.6327168425238672e-05, - "loss": 0.213, - "step": 5968 - }, - { - "epoch": 0.3, - "grad_norm": 0.9481384551774131, - "learning_rate": 1.6325892954536997e-05, - "loss": 0.1783, - "step": 5969 - }, - { - "epoch": 0.3, - "grad_norm": 1.1198994360550176, - "learning_rate": 1.6324617312245123e-05, - "loss": 0.1809, - "step": 5970 - }, - { - "epoch": 0.3, - "grad_norm": 0.865446262601342, - "learning_rate": 1.632334149839764e-05, - "loss": 0.167, - "step": 5971 - }, - { - "epoch": 0.3, - "grad_norm": 0.8359926893079918, - "learning_rate": 1.632206551302917e-05, - "loss": 0.2142, - "step": 5972 - }, - { - "epoch": 0.3, - "grad_norm": 1.1003436066740737, - "learning_rate": 1.632078935617431e-05, - "loss": 0.1987, - "step": 5973 - }, - { - "epoch": 0.3, - "grad_norm": 1.1175808214952012, - "learning_rate": 1.6319513027867683e-05, - "loss": 0.2127, - "step": 5974 - }, - { - "epoch": 0.3, - "grad_norm": 1.09109343841727, - "learning_rate": 1.631823652814391e-05, - "loss": 0.2195, - "step": 5975 - }, - { - "epoch": 0.3, - "grad_norm": 1.5938819437245713, - "learning_rate": 1.631695985703761e-05, - "loss": 0.1932, - "step": 5976 - }, - { - "epoch": 0.3, - "grad_norm": 1.1572532065360304, - "learning_rate": 1.631568301458342e-05, - "loss": 0.2291, - "step": 5977 - }, - { - "epoch": 0.3, - "grad_norm": 0.7935200691091274, - "learning_rate": 1.6314406000815975e-05, - "loss": 0.1829, - "step": 5978 - }, - { - "epoch": 0.3, - "grad_norm": 1.1058627197706676, - "learning_rate": 1.6313128815769904e-05, - "loss": 0.2027, - "step": 5979 - }, - { - "epoch": 0.3, - "grad_norm": 0.8970087572102301, - "learning_rate": 1.631185145947986e-05, - "loss": 0.2225, - "step": 5980 - }, - { - "epoch": 0.3, - "grad_norm": 0.8393393193043988, - "learning_rate": 1.631057393198049e-05, - "loss": 0.2061, - "step": 5981 - }, - { - "epoch": 0.3, - "grad_norm": 0.9851589297321419, - "learning_rate": 1.6309296233306446e-05, - "loss": 0.2127, - "step": 5982 - }, - { - "epoch": 0.3, - "grad_norm": 1.3751830177191615, - "learning_rate": 1.6308018363492385e-05, - "loss": 0.2221, - "step": 5983 - }, - { - "epoch": 0.3, - "grad_norm": 0.8238273156343242, - "learning_rate": 1.630674032257297e-05, - "loss": 0.1916, - "step": 5984 - }, - { - "epoch": 0.3, - "grad_norm": 1.0898527791933963, - "learning_rate": 1.6305462110582863e-05, - "loss": 0.2429, - "step": 5985 - }, - { - "epoch": 0.3, - "grad_norm": 1.302930626821737, - "learning_rate": 1.6304183727556747e-05, - "loss": 0.2166, - "step": 5986 - }, - { - "epoch": 0.3, - "grad_norm": 3.39479291669915, - "learning_rate": 1.6302905173529285e-05, - "loss": 0.2118, - "step": 5987 - }, - { - "epoch": 0.3, - "grad_norm": 1.4736219557226864, - "learning_rate": 1.6301626448535168e-05, - "loss": 0.2022, - "step": 5988 - }, - { - "epoch": 0.3, - "grad_norm": 0.9127817268931459, - "learning_rate": 1.6300347552609074e-05, - "loss": 0.1959, - "step": 5989 - }, - { - "epoch": 0.3, - "grad_norm": 1.008387597169717, - "learning_rate": 1.62990684857857e-05, - "loss": 0.227, - "step": 5990 - }, - { - "epoch": 0.3, - "grad_norm": 0.8037508248908429, - "learning_rate": 1.6297789248099736e-05, - "loss": 0.21, - "step": 5991 - }, - { - "epoch": 0.3, - "grad_norm": 1.087320987869786, - "learning_rate": 1.6296509839585885e-05, - "loss": 0.2035, - "step": 5992 - }, - { - "epoch": 0.3, - "grad_norm": 0.8294639592735905, - "learning_rate": 1.6295230260278847e-05, - "loss": 0.2123, - "step": 5993 - }, - { - "epoch": 0.3, - "grad_norm": 0.7337293869270486, - "learning_rate": 1.6293950510213335e-05, - "loss": 0.2082, - "step": 5994 - }, - { - "epoch": 0.3, - "grad_norm": 0.8497313029047259, - "learning_rate": 1.6292670589424057e-05, - "loss": 0.2148, - "step": 5995 - }, - { - "epoch": 0.3, - "grad_norm": 1.6162721450291733, - "learning_rate": 1.6291390497945737e-05, - "loss": 0.2115, - "step": 5996 - }, - { - "epoch": 0.3, - "grad_norm": 0.8330036247733196, - "learning_rate": 1.6290110235813094e-05, - "loss": 0.1815, - "step": 5997 - }, - { - "epoch": 0.31, - "grad_norm": 1.1318812757855268, - "learning_rate": 1.6288829803060853e-05, - "loss": 0.2048, - "step": 5998 - }, - { - "epoch": 0.31, - "grad_norm": 1.82097750858233, - "learning_rate": 1.6287549199723745e-05, - "loss": 0.2189, - "step": 5999 - }, - { - "epoch": 0.31, - "grad_norm": 0.8236429171653487, - "learning_rate": 1.6286268425836517e-05, - "loss": 0.2, - "step": 6000 - }, - { - "epoch": 0.31, - "grad_norm": 1.5845029614639, - "learning_rate": 1.62849874814339e-05, - "loss": 0.1795, - "step": 6001 - }, - { - "epoch": 0.31, - "grad_norm": 1.4522997519291503, - "learning_rate": 1.6283706366550646e-05, - "loss": 0.2155, - "step": 6002 - }, - { - "epoch": 0.31, - "grad_norm": 0.9965723060845276, - "learning_rate": 1.6282425081221498e-05, - "loss": 0.1913, - "step": 6003 - }, - { - "epoch": 0.31, - "grad_norm": 1.0240651787267885, - "learning_rate": 1.6281143625481214e-05, - "loss": 0.1995, - "step": 6004 - }, - { - "epoch": 0.31, - "grad_norm": 0.7303063496135936, - "learning_rate": 1.627986199936456e-05, - "loss": 0.2153, - "step": 6005 - }, - { - "epoch": 0.31, - "grad_norm": 0.9488526266709439, - "learning_rate": 1.6278580202906287e-05, - "loss": 0.2074, - "step": 6006 - }, - { - "epoch": 0.31, - "grad_norm": 0.8874494026802576, - "learning_rate": 1.6277298236141177e-05, - "loss": 0.234, - "step": 6007 - }, - { - "epoch": 0.31, - "grad_norm": 0.9477817405838913, - "learning_rate": 1.6276016099103995e-05, - "loss": 0.2199, - "step": 6008 - }, - { - "epoch": 0.31, - "grad_norm": 1.1014385615064697, - "learning_rate": 1.6274733791829522e-05, - "loss": 0.2199, - "step": 6009 - }, - { - "epoch": 0.31, - "grad_norm": 1.0774681703386346, - "learning_rate": 1.627345131435254e-05, - "loss": 0.2538, - "step": 6010 - }, - { - "epoch": 0.31, - "grad_norm": 1.2211872301986042, - "learning_rate": 1.6272168666707838e-05, - "loss": 0.2199, - "step": 6011 - }, - { - "epoch": 0.31, - "grad_norm": 0.7944085611231179, - "learning_rate": 1.627088584893021e-05, - "loss": 0.1968, - "step": 6012 - }, - { - "epoch": 0.31, - "grad_norm": 1.1626975384193183, - "learning_rate": 1.6269602861054442e-05, - "loss": 0.2084, - "step": 6013 - }, - { - "epoch": 0.31, - "grad_norm": 0.9266890007568821, - "learning_rate": 1.6268319703115348e-05, - "loss": 0.1952, - "step": 6014 - }, - { - "epoch": 0.31, - "grad_norm": 1.0407450828399152, - "learning_rate": 1.6267036375147728e-05, - "loss": 0.2085, - "step": 6015 - }, - { - "epoch": 0.31, - "grad_norm": 0.9540943475088866, - "learning_rate": 1.6265752877186386e-05, - "loss": 0.1963, - "step": 6016 - }, - { - "epoch": 0.31, - "grad_norm": 0.9119042826028682, - "learning_rate": 1.626446920926615e-05, - "loss": 0.2015, - "step": 6017 - }, - { - "epoch": 0.31, - "grad_norm": 1.3939619942230101, - "learning_rate": 1.626318537142183e-05, - "loss": 0.1915, - "step": 6018 - }, - { - "epoch": 0.31, - "grad_norm": 1.4344270785747297, - "learning_rate": 1.6261901363688257e-05, - "loss": 0.2255, - "step": 6019 - }, - { - "epoch": 0.31, - "grad_norm": 1.079480238045351, - "learning_rate": 1.626061718610025e-05, - "loss": 0.2304, - "step": 6020 - }, - { - "epoch": 0.31, - "grad_norm": 1.4324024219544331, - "learning_rate": 1.625933283869265e-05, - "loss": 0.1926, - "step": 6021 - }, - { - "epoch": 0.31, - "grad_norm": 1.7344142079737424, - "learning_rate": 1.6258048321500294e-05, - "loss": 0.1956, - "step": 6022 - }, - { - "epoch": 0.31, - "grad_norm": 1.1070084317838647, - "learning_rate": 1.6256763634558024e-05, - "loss": 0.2114, - "step": 6023 - }, - { - "epoch": 0.31, - "grad_norm": 0.9764029510305736, - "learning_rate": 1.625547877790069e-05, - "loss": 0.1983, - "step": 6024 - }, - { - "epoch": 0.31, - "grad_norm": 0.9096497335946024, - "learning_rate": 1.6254193751563137e-05, - "loss": 0.2244, - "step": 6025 - }, - { - "epoch": 0.31, - "grad_norm": 0.9647081068711773, - "learning_rate": 1.6252908555580223e-05, - "loss": 0.2021, - "step": 6026 - }, - { - "epoch": 0.31, - "grad_norm": 1.003658864196987, - "learning_rate": 1.6251623189986815e-05, - "loss": 0.2282, - "step": 6027 - }, - { - "epoch": 0.31, - "grad_norm": 1.0027742910910584, - "learning_rate": 1.6250337654817774e-05, - "loss": 0.2084, - "step": 6028 - }, - { - "epoch": 0.31, - "grad_norm": 1.0873519198431463, - "learning_rate": 1.624905195010797e-05, - "loss": 0.2172, - "step": 6029 - }, - { - "epoch": 0.31, - "grad_norm": 1.0149186797500493, - "learning_rate": 1.6247766075892283e-05, - "loss": 0.2034, - "step": 6030 - }, - { - "epoch": 0.31, - "grad_norm": 0.9063023009686626, - "learning_rate": 1.624648003220558e-05, - "loss": 0.1857, - "step": 6031 - }, - { - "epoch": 0.31, - "grad_norm": 0.7650021001433587, - "learning_rate": 1.624519381908276e-05, - "loss": 0.2074, - "step": 6032 - }, - { - "epoch": 0.31, - "grad_norm": 1.3530888182017393, - "learning_rate": 1.6243907436558705e-05, - "loss": 0.2268, - "step": 6033 - }, - { - "epoch": 0.31, - "grad_norm": 0.911985352429917, - "learning_rate": 1.624262088466831e-05, - "loss": 0.2375, - "step": 6034 - }, - { - "epoch": 0.31, - "grad_norm": 1.6303058569455007, - "learning_rate": 1.6241334163446465e-05, - "loss": 0.2074, - "step": 6035 - }, - { - "epoch": 0.31, - "grad_norm": 1.459715495070587, - "learning_rate": 1.6240047272928082e-05, - "loss": 0.2149, - "step": 6036 - }, - { - "epoch": 0.31, - "grad_norm": 0.8229690605766699, - "learning_rate": 1.6238760213148064e-05, - "loss": 0.1893, - "step": 6037 - }, - { - "epoch": 0.31, - "grad_norm": 1.6615636863545942, - "learning_rate": 1.6237472984141322e-05, - "loss": 0.2066, - "step": 6038 - }, - { - "epoch": 0.31, - "grad_norm": 0.8825843487781814, - "learning_rate": 1.623618558594277e-05, - "loss": 0.1996, - "step": 6039 - }, - { - "epoch": 0.31, - "grad_norm": 0.8986305451461882, - "learning_rate": 1.6234898018587336e-05, - "loss": 0.193, - "step": 6040 - }, - { - "epoch": 0.31, - "grad_norm": 0.8603017400911558, - "learning_rate": 1.623361028210994e-05, - "loss": 0.2074, - "step": 6041 - }, - { - "epoch": 0.31, - "grad_norm": 1.0757625435404399, - "learning_rate": 1.6232322376545516e-05, - "loss": 0.1834, - "step": 6042 - }, - { - "epoch": 0.31, - "grad_norm": 0.9255175015178886, - "learning_rate": 1.623103430192899e-05, - "loss": 0.1739, - "step": 6043 - }, - { - "epoch": 0.31, - "grad_norm": 1.0095454531080827, - "learning_rate": 1.6229746058295312e-05, - "loss": 0.2135, - "step": 6044 - }, - { - "epoch": 0.31, - "grad_norm": 1.7730502584757073, - "learning_rate": 1.6228457645679414e-05, - "loss": 0.2339, - "step": 6045 - }, - { - "epoch": 0.31, - "grad_norm": 0.9101283209796647, - "learning_rate": 1.6227169064116255e-05, - "loss": 0.2184, - "step": 6046 - }, - { - "epoch": 0.31, - "grad_norm": 1.4470438726261499, - "learning_rate": 1.622588031364078e-05, - "loss": 0.2147, - "step": 6047 - }, - { - "epoch": 0.31, - "grad_norm": 1.6151800077895164, - "learning_rate": 1.6224591394287954e-05, - "loss": 0.2114, - "step": 6048 - }, - { - "epoch": 0.31, - "grad_norm": 1.141700862481167, - "learning_rate": 1.6223302306092733e-05, - "loss": 0.2097, - "step": 6049 - }, - { - "epoch": 0.31, - "grad_norm": 3.6258373665253605, - "learning_rate": 1.6222013049090086e-05, - "loss": 0.2116, - "step": 6050 - }, - { - "epoch": 0.31, - "grad_norm": 1.0270473789404706, - "learning_rate": 1.6220723623314983e-05, - "loss": 0.2229, - "step": 6051 - }, - { - "epoch": 0.31, - "grad_norm": 0.9431739891813424, - "learning_rate": 1.6219434028802402e-05, - "loss": 0.2079, - "step": 6052 - }, - { - "epoch": 0.31, - "grad_norm": 1.0931954007419473, - "learning_rate": 1.621814426558732e-05, - "loss": 0.225, - "step": 6053 - }, - { - "epoch": 0.31, - "grad_norm": 1.5694243876987546, - "learning_rate": 1.6216854333704725e-05, - "loss": 0.2058, - "step": 6054 - }, - { - "epoch": 0.31, - "grad_norm": 1.858906512456257, - "learning_rate": 1.6215564233189606e-05, - "loss": 0.2106, - "step": 6055 - }, - { - "epoch": 0.31, - "grad_norm": 0.8471245246956891, - "learning_rate": 1.621427396407695e-05, - "loss": 0.2, - "step": 6056 - }, - { - "epoch": 0.31, - "grad_norm": 1.0547280866759583, - "learning_rate": 1.6212983526401767e-05, - "loss": 0.2011, - "step": 6057 - }, - { - "epoch": 0.31, - "grad_norm": 1.119564287962234, - "learning_rate": 1.6211692920199054e-05, - "loss": 0.1756, - "step": 6058 - }, - { - "epoch": 0.31, - "grad_norm": 1.1099677356408273, - "learning_rate": 1.621040214550382e-05, - "loss": 0.177, - "step": 6059 - }, - { - "epoch": 0.31, - "grad_norm": 1.0913921470754946, - "learning_rate": 1.6209111202351076e-05, - "loss": 0.22, - "step": 6060 - }, - { - "epoch": 0.31, - "grad_norm": 1.0658146194201514, - "learning_rate": 1.620782009077584e-05, - "loss": 0.2421, - "step": 6061 - }, - { - "epoch": 0.31, - "grad_norm": 2.5260663580908203, - "learning_rate": 1.6206528810813135e-05, - "loss": 0.1937, - "step": 6062 - }, - { - "epoch": 0.31, - "grad_norm": 0.9794394801934251, - "learning_rate": 1.6205237362497982e-05, - "loss": 0.2261, - "step": 6063 - }, - { - "epoch": 0.31, - "grad_norm": 1.5247938545986877, - "learning_rate": 1.6203945745865418e-05, - "loss": 0.2084, - "step": 6064 - }, - { - "epoch": 0.31, - "grad_norm": 1.242093586082393, - "learning_rate": 1.6202653960950474e-05, - "loss": 0.2221, - "step": 6065 - }, - { - "epoch": 0.31, - "grad_norm": 1.6609521159811682, - "learning_rate": 1.6201362007788193e-05, - "loss": 0.1981, - "step": 6066 - }, - { - "epoch": 0.31, - "grad_norm": 0.9717974454332525, - "learning_rate": 1.6200069886413612e-05, - "loss": 0.2035, - "step": 6067 - }, - { - "epoch": 0.31, - "grad_norm": 1.040278624958183, - "learning_rate": 1.6198777596861792e-05, - "loss": 0.2517, - "step": 6068 - }, - { - "epoch": 0.31, - "grad_norm": 1.060313397045809, - "learning_rate": 1.6197485139167775e-05, - "loss": 0.2244, - "step": 6069 - }, - { - "epoch": 0.31, - "grad_norm": 1.4185748386978991, - "learning_rate": 1.619619251336663e-05, - "loss": 0.1927, - "step": 6070 - }, - { - "epoch": 0.31, - "grad_norm": 1.1902479620651243, - "learning_rate": 1.6194899719493404e-05, - "loss": 0.2443, - "step": 6071 - }, - { - "epoch": 0.31, - "grad_norm": 1.0869353007806424, - "learning_rate": 1.619360675758318e-05, - "loss": 0.1993, - "step": 6072 - }, - { - "epoch": 0.31, - "grad_norm": 1.4174221590710505, - "learning_rate": 1.619231362767102e-05, - "loss": 0.1729, - "step": 6073 - }, - { - "epoch": 0.31, - "grad_norm": 1.0794084394279622, - "learning_rate": 1.6191020329792003e-05, - "loss": 0.1664, - "step": 6074 - }, - { - "epoch": 0.31, - "grad_norm": 0.7769533876934619, - "learning_rate": 1.6189726863981212e-05, - "loss": 0.1774, - "step": 6075 - }, - { - "epoch": 0.31, - "grad_norm": 1.0305866506804409, - "learning_rate": 1.618843323027373e-05, - "loss": 0.2087, - "step": 6076 - }, - { - "epoch": 0.31, - "grad_norm": 0.8876921764605629, - "learning_rate": 1.6187139428704645e-05, - "loss": 0.2175, - "step": 6077 - }, - { - "epoch": 0.31, - "grad_norm": 1.0822377892878023, - "learning_rate": 1.6185845459309053e-05, - "loss": 0.1939, - "step": 6078 - }, - { - "epoch": 0.31, - "grad_norm": 0.9176729470876167, - "learning_rate": 1.6184551322122056e-05, - "loss": 0.1904, - "step": 6079 - }, - { - "epoch": 0.31, - "grad_norm": 1.1716604783757618, - "learning_rate": 1.6183257017178754e-05, - "loss": 0.1996, - "step": 6080 - }, - { - "epoch": 0.31, - "grad_norm": 1.2684319856473547, - "learning_rate": 1.6181962544514257e-05, - "loss": 0.1879, - "step": 6081 - }, - { - "epoch": 0.31, - "grad_norm": 0.8193278183743143, - "learning_rate": 1.6180667904163675e-05, - "loss": 0.1868, - "step": 6082 - }, - { - "epoch": 0.31, - "grad_norm": 1.5171040957386068, - "learning_rate": 1.617937309616213e-05, - "loss": 0.219, - "step": 6083 - }, - { - "epoch": 0.31, - "grad_norm": 1.33064610781682, - "learning_rate": 1.6178078120544735e-05, - "loss": 0.205, - "step": 6084 - }, - { - "epoch": 0.31, - "grad_norm": 1.6137351930166788, - "learning_rate": 1.6176782977346626e-05, - "loss": 0.1999, - "step": 6085 - }, - { - "epoch": 0.31, - "grad_norm": 1.5597204402629181, - "learning_rate": 1.6175487666602928e-05, - "loss": 0.215, - "step": 6086 - }, - { - "epoch": 0.31, - "grad_norm": 1.1470285061130765, - "learning_rate": 1.617419218834878e-05, - "loss": 0.2121, - "step": 6087 - }, - { - "epoch": 0.31, - "grad_norm": 1.2800202663819764, - "learning_rate": 1.617289654261932e-05, - "loss": 0.2111, - "step": 6088 - }, - { - "epoch": 0.31, - "grad_norm": 1.545652925213808, - "learning_rate": 1.6171600729449693e-05, - "loss": 0.2338, - "step": 6089 - }, - { - "epoch": 0.31, - "grad_norm": 1.1290088033332193, - "learning_rate": 1.617030474887505e-05, - "loss": 0.1965, - "step": 6090 - }, - { - "epoch": 0.31, - "grad_norm": 0.969297537706917, - "learning_rate": 1.616900860093054e-05, - "loss": 0.2065, - "step": 6091 - }, - { - "epoch": 0.31, - "grad_norm": 1.190717952530125, - "learning_rate": 1.616771228565132e-05, - "loss": 0.195, - "step": 6092 - }, - { - "epoch": 0.31, - "grad_norm": 1.1595524076758887, - "learning_rate": 1.616641580307256e-05, - "loss": 0.2136, - "step": 6093 - }, - { - "epoch": 0.31, - "grad_norm": 1.6403050277216027, - "learning_rate": 1.616511915322942e-05, - "loss": 0.1861, - "step": 6094 - }, - { - "epoch": 0.31, - "grad_norm": 1.6437870280410332, - "learning_rate": 1.6163822336157076e-05, - "loss": 0.21, - "step": 6095 - }, - { - "epoch": 0.31, - "grad_norm": 1.8419726499255795, - "learning_rate": 1.6162525351890702e-05, - "loss": 0.207, - "step": 6096 - }, - { - "epoch": 0.31, - "grad_norm": 1.1212531960451904, - "learning_rate": 1.6161228200465485e-05, - "loss": 0.1988, - "step": 6097 - }, - { - "epoch": 0.31, - "grad_norm": 1.051091374373422, - "learning_rate": 1.61599308819166e-05, - "loss": 0.2186, - "step": 6098 - }, - { - "epoch": 0.31, - "grad_norm": 0.9688417139595524, - "learning_rate": 1.6158633396279243e-05, - "loss": 0.243, - "step": 6099 - }, - { - "epoch": 0.31, - "grad_norm": 1.2062547757471067, - "learning_rate": 1.6157335743588606e-05, - "loss": 0.2107, - "step": 6100 - }, - { - "epoch": 0.31, - "grad_norm": 1.0249173195322316, - "learning_rate": 1.6156037923879893e-05, - "loss": 0.2127, - "step": 6101 - }, - { - "epoch": 0.31, - "grad_norm": 1.3317006096755801, - "learning_rate": 1.61547399371883e-05, - "loss": 0.2066, - "step": 6102 - }, - { - "epoch": 0.31, - "grad_norm": 1.0908772213653302, - "learning_rate": 1.6153441783549043e-05, - "loss": 0.2107, - "step": 6103 - }, - { - "epoch": 0.31, - "grad_norm": 2.061902894350475, - "learning_rate": 1.6152143462997325e-05, - "loss": 0.2165, - "step": 6104 - }, - { - "epoch": 0.31, - "grad_norm": 1.4954267996419317, - "learning_rate": 1.615084497556837e-05, - "loss": 0.2195, - "step": 6105 - }, - { - "epoch": 0.31, - "grad_norm": 1.5649819771869884, - "learning_rate": 1.61495463212974e-05, - "loss": 0.2019, - "step": 6106 - }, - { - "epoch": 0.31, - "grad_norm": 1.2063094496662974, - "learning_rate": 1.6148247500219635e-05, - "loss": 0.2021, - "step": 6107 - }, - { - "epoch": 0.31, - "grad_norm": 0.995772199176662, - "learning_rate": 1.614694851237031e-05, - "loss": 0.1937, - "step": 6108 - }, - { - "epoch": 0.31, - "grad_norm": 2.9327743983007237, - "learning_rate": 1.614564935778466e-05, - "loss": 0.2104, - "step": 6109 - }, - { - "epoch": 0.31, - "grad_norm": 1.4621831264462208, - "learning_rate": 1.6144350036497925e-05, - "loss": 0.1952, - "step": 6110 - }, - { - "epoch": 0.31, - "grad_norm": 1.882534748644571, - "learning_rate": 1.6143050548545348e-05, - "loss": 0.1974, - "step": 6111 - }, - { - "epoch": 0.31, - "grad_norm": 2.1978580185486694, - "learning_rate": 1.6141750893962176e-05, - "loss": 0.1929, - "step": 6112 - }, - { - "epoch": 0.31, - "grad_norm": 2.1465567091981104, - "learning_rate": 1.6140451072783666e-05, - "loss": 0.2049, - "step": 6113 - }, - { - "epoch": 0.31, - "grad_norm": 1.2664074830992735, - "learning_rate": 1.613915108504507e-05, - "loss": 0.1923, - "step": 6114 - }, - { - "epoch": 0.31, - "grad_norm": 1.0980669945835926, - "learning_rate": 1.613785093078166e-05, - "loss": 0.1896, - "step": 6115 - }, - { - "epoch": 0.31, - "grad_norm": 1.631937385976018, - "learning_rate": 1.6136550610028694e-05, - "loss": 0.1885, - "step": 6116 - }, - { - "epoch": 0.31, - "grad_norm": 1.5889117710056704, - "learning_rate": 1.6135250122821445e-05, - "loss": 0.1946, - "step": 6117 - }, - { - "epoch": 0.31, - "grad_norm": 1.5134914654859153, - "learning_rate": 1.6133949469195194e-05, - "loss": 0.2162, - "step": 6118 - }, - { - "epoch": 0.31, - "grad_norm": 1.2476327546290005, - "learning_rate": 1.6132648649185214e-05, - "loss": 0.2031, - "step": 6119 - }, - { - "epoch": 0.31, - "grad_norm": 1.8453200099035407, - "learning_rate": 1.6131347662826793e-05, - "loss": 0.1868, - "step": 6120 - }, - { - "epoch": 0.31, - "grad_norm": 1.319105677189608, - "learning_rate": 1.613004651015522e-05, - "loss": 0.2438, - "step": 6121 - }, - { - "epoch": 0.31, - "grad_norm": 2.0640604006619943, - "learning_rate": 1.612874519120579e-05, - "loss": 0.2214, - "step": 6122 - }, - { - "epoch": 0.31, - "grad_norm": 1.4071140029611668, - "learning_rate": 1.61274437060138e-05, - "loss": 0.2094, - "step": 6123 - }, - { - "epoch": 0.31, - "grad_norm": 1.4588025197496317, - "learning_rate": 1.6126142054614556e-05, - "loss": 0.2015, - "step": 6124 - }, - { - "epoch": 0.31, - "grad_norm": 2.1702119223872676, - "learning_rate": 1.612484023704336e-05, - "loss": 0.1994, - "step": 6125 - }, - { - "epoch": 0.31, - "grad_norm": 1.3620761809710489, - "learning_rate": 1.6123538253335526e-05, - "loss": 0.1852, - "step": 6126 - }, - { - "epoch": 0.31, - "grad_norm": 2.4131413570897076, - "learning_rate": 1.612223610352637e-05, - "loss": 0.2007, - "step": 6127 - }, - { - "epoch": 0.31, - "grad_norm": 1.835054207929354, - "learning_rate": 1.612093378765122e-05, - "loss": 0.1974, - "step": 6128 - }, - { - "epoch": 0.31, - "grad_norm": 1.6071319397296138, - "learning_rate": 1.611963130574539e-05, - "loss": 0.2054, - "step": 6129 - }, - { - "epoch": 0.31, - "grad_norm": 1.7980661677245373, - "learning_rate": 1.6118328657844216e-05, - "loss": 0.2101, - "step": 6130 - }, - { - "epoch": 0.31, - "grad_norm": 1.5776819747938176, - "learning_rate": 1.6117025843983032e-05, - "loss": 0.2358, - "step": 6131 - }, - { - "epoch": 0.31, - "grad_norm": 2.877337042985936, - "learning_rate": 1.6115722864197178e-05, - "loss": 0.2195, - "step": 6132 - }, - { - "epoch": 0.31, - "grad_norm": 2.5470479776051027, - "learning_rate": 1.6114419718521994e-05, - "loss": 0.2126, - "step": 6133 - }, - { - "epoch": 0.31, - "grad_norm": 3.749177645710888, - "learning_rate": 1.6113116406992833e-05, - "loss": 0.1907, - "step": 6134 - }, - { - "epoch": 0.31, - "grad_norm": 1.4421813486978317, - "learning_rate": 1.611181292964504e-05, - "loss": 0.207, - "step": 6135 - }, - { - "epoch": 0.31, - "grad_norm": 1.520289919949835, - "learning_rate": 1.611050928651398e-05, - "loss": 0.2052, - "step": 6136 - }, - { - "epoch": 0.31, - "grad_norm": 1.293791051110462, - "learning_rate": 1.6109205477635006e-05, - "loss": 0.2182, - "step": 6137 - }, - { - "epoch": 0.31, - "grad_norm": 5.513386645098428, - "learning_rate": 1.610790150304349e-05, - "loss": 0.2319, - "step": 6138 - }, - { - "epoch": 0.31, - "grad_norm": 3.0876460560077863, - "learning_rate": 1.61065973627748e-05, - "loss": 0.2333, - "step": 6139 - }, - { - "epoch": 0.31, - "grad_norm": 2.1635359350738135, - "learning_rate": 1.6105293056864314e-05, - "loss": 0.1949, - "step": 6140 - }, - { - "epoch": 0.31, - "grad_norm": 1.9290528759839747, - "learning_rate": 1.6103988585347408e-05, - "loss": 0.2172, - "step": 6141 - }, - { - "epoch": 0.31, - "grad_norm": 1.5256634967346925, - "learning_rate": 1.6102683948259467e-05, - "loss": 0.1897, - "step": 6142 - }, - { - "epoch": 0.31, - "grad_norm": 1.5949036318919134, - "learning_rate": 1.610137914563588e-05, - "loss": 0.2081, - "step": 6143 - }, - { - "epoch": 0.31, - "grad_norm": 1.454851335074028, - "learning_rate": 1.6100074177512038e-05, - "loss": 0.199, - "step": 6144 - }, - { - "epoch": 0.31, - "grad_norm": 1.7598947260802447, - "learning_rate": 1.6098769043923338e-05, - "loss": 0.2103, - "step": 6145 - }, - { - "epoch": 0.31, - "grad_norm": 1.4634098370805022, - "learning_rate": 1.6097463744905187e-05, - "loss": 0.1987, - "step": 6146 - }, - { - "epoch": 0.31, - "grad_norm": 1.2310446246519797, - "learning_rate": 1.6096158280492984e-05, - "loss": 0.1591, - "step": 6147 - }, - { - "epoch": 0.31, - "grad_norm": 1.3524333215013153, - "learning_rate": 1.6094852650722145e-05, - "loss": 0.2018, - "step": 6148 - }, - { - "epoch": 0.31, - "grad_norm": 1.6759301777793807, - "learning_rate": 1.6093546855628085e-05, - "loss": 0.1788, - "step": 6149 - }, - { - "epoch": 0.31, - "grad_norm": 1.2458571119083635, - "learning_rate": 1.609224089524622e-05, - "loss": 0.2098, - "step": 6150 - }, - { - "epoch": 0.31, - "grad_norm": 1.5002867548310084, - "learning_rate": 1.6090934769611976e-05, - "loss": 0.2265, - "step": 6151 - }, - { - "epoch": 0.31, - "grad_norm": 1.1122774148111543, - "learning_rate": 1.6089628478760785e-05, - "loss": 0.2059, - "step": 6152 - }, - { - "epoch": 0.31, - "grad_norm": 1.1362389543734666, - "learning_rate": 1.6088322022728076e-05, - "loss": 0.2541, - "step": 6153 - }, - { - "epoch": 0.31, - "grad_norm": 1.268390507330005, - "learning_rate": 1.608701540154929e-05, - "loss": 0.1765, - "step": 6154 - }, - { - "epoch": 0.31, - "grad_norm": 1.6803598721739206, - "learning_rate": 1.6085708615259863e-05, - "loss": 0.1931, - "step": 6155 - }, - { - "epoch": 0.31, - "grad_norm": 3.066069526341508, - "learning_rate": 1.608440166389525e-05, - "loss": 0.1848, - "step": 6156 - }, - { - "epoch": 0.31, - "grad_norm": 1.1775493264874153, - "learning_rate": 1.6083094547490895e-05, - "loss": 0.1911, - "step": 6157 - }, - { - "epoch": 0.31, - "grad_norm": 1.316939659878754, - "learning_rate": 1.6081787266082258e-05, - "loss": 0.204, - "step": 6158 - }, - { - "epoch": 0.31, - "grad_norm": 1.1886676203084303, - "learning_rate": 1.60804798197048e-05, - "loss": 0.193, - "step": 6159 - }, - { - "epoch": 0.31, - "grad_norm": 1.0711579452613742, - "learning_rate": 1.6079172208393985e-05, - "loss": 0.1887, - "step": 6160 - }, - { - "epoch": 0.31, - "grad_norm": 1.3376326440975657, - "learning_rate": 1.607786443218528e-05, - "loss": 0.1986, - "step": 6161 - }, - { - "epoch": 0.31, - "grad_norm": 1.1083942748161777, - "learning_rate": 1.6076556491114152e-05, - "loss": 0.2099, - "step": 6162 - }, - { - "epoch": 0.31, - "grad_norm": 1.614406990008451, - "learning_rate": 1.6075248385216093e-05, - "loss": 0.2101, - "step": 6163 - }, - { - "epoch": 0.31, - "grad_norm": 1.0930356453004402, - "learning_rate": 1.6073940114526574e-05, - "loss": 0.2174, - "step": 6164 - }, - { - "epoch": 0.31, - "grad_norm": 1.0492328588433775, - "learning_rate": 1.607263167908109e-05, - "loss": 0.2359, - "step": 6165 - }, - { - "epoch": 0.31, - "grad_norm": 1.2424165398353795, - "learning_rate": 1.6071323078915128e-05, - "loss": 0.2419, - "step": 6166 - }, - { - "epoch": 0.31, - "grad_norm": 0.8259912298969864, - "learning_rate": 1.6070014314064183e-05, - "loss": 0.2406, - "step": 6167 - }, - { - "epoch": 0.31, - "grad_norm": 0.8474954794357589, - "learning_rate": 1.6068705384563757e-05, - "loss": 0.2092, - "step": 6168 - }, - { - "epoch": 0.31, - "grad_norm": 1.0906208809203402, - "learning_rate": 1.6067396290449356e-05, - "loss": 0.2131, - "step": 6169 - }, - { - "epoch": 0.31, - "grad_norm": 1.6127793598314828, - "learning_rate": 1.6066087031756485e-05, - "loss": 0.2096, - "step": 6170 - }, - { - "epoch": 0.31, - "grad_norm": 0.9349034239176398, - "learning_rate": 1.6064777608520666e-05, - "loss": 0.2152, - "step": 6171 - }, - { - "epoch": 0.31, - "grad_norm": 1.393936008162501, - "learning_rate": 1.606346802077741e-05, - "loss": 0.2057, - "step": 6172 - }, - { - "epoch": 0.31, - "grad_norm": 1.204797087355292, - "learning_rate": 1.6062158268562237e-05, - "loss": 0.207, - "step": 6173 - }, - { - "epoch": 0.31, - "grad_norm": 1.1014356914347432, - "learning_rate": 1.6060848351910685e-05, - "loss": 0.2191, - "step": 6174 - }, - { - "epoch": 0.31, - "grad_norm": 0.9548581883876565, - "learning_rate": 1.6059538270858275e-05, - "loss": 0.1874, - "step": 6175 - }, - { - "epoch": 0.31, - "grad_norm": 1.1023120629875385, - "learning_rate": 1.6058228025440548e-05, - "loss": 0.2297, - "step": 6176 - }, - { - "epoch": 0.31, - "grad_norm": 0.9500603896039294, - "learning_rate": 1.6056917615693045e-05, - "loss": 0.2156, - "step": 6177 - }, - { - "epoch": 0.31, - "grad_norm": 1.0595746338846443, - "learning_rate": 1.605560704165131e-05, - "loss": 0.1836, - "step": 6178 - }, - { - "epoch": 0.31, - "grad_norm": 1.601444715454401, - "learning_rate": 1.6054296303350886e-05, - "loss": 0.2015, - "step": 6179 - }, - { - "epoch": 0.31, - "grad_norm": 1.2146261540538896, - "learning_rate": 1.605298540082734e-05, - "loss": 0.2129, - "step": 6180 - }, - { - "epoch": 0.31, - "grad_norm": 0.873388952037026, - "learning_rate": 1.605167433411622e-05, - "loss": 0.2025, - "step": 6181 - }, - { - "epoch": 0.31, - "grad_norm": 1.726024719124285, - "learning_rate": 1.6050363103253093e-05, - "loss": 0.1764, - "step": 6182 - }, - { - "epoch": 0.31, - "grad_norm": 1.0938330909494385, - "learning_rate": 1.6049051708273526e-05, - "loss": 0.2067, - "step": 6183 - }, - { - "epoch": 0.31, - "grad_norm": 1.079586704593921, - "learning_rate": 1.6047740149213085e-05, - "loss": 0.1896, - "step": 6184 - }, - { - "epoch": 0.31, - "grad_norm": 1.2714408408105322, - "learning_rate": 1.6046428426107354e-05, - "loss": 0.2357, - "step": 6185 - }, - { - "epoch": 0.31, - "grad_norm": 1.2726345863274997, - "learning_rate": 1.604511653899191e-05, - "loss": 0.2032, - "step": 6186 - }, - { - "epoch": 0.31, - "grad_norm": 1.1207757688502018, - "learning_rate": 1.604380448790234e-05, - "loss": 0.2256, - "step": 6187 - }, - { - "epoch": 0.31, - "grad_norm": 1.093894661508314, - "learning_rate": 1.604249227287423e-05, - "loss": 0.2159, - "step": 6188 - }, - { - "epoch": 0.31, - "grad_norm": 1.197879422109431, - "learning_rate": 1.6041179893943178e-05, - "loss": 0.2238, - "step": 6189 - }, - { - "epoch": 0.31, - "grad_norm": 1.019478303665953, - "learning_rate": 1.6039867351144778e-05, - "loss": 0.19, - "step": 6190 - }, - { - "epoch": 0.31, - "grad_norm": 0.892825698089129, - "learning_rate": 1.6038554644514634e-05, - "loss": 0.2223, - "step": 6191 - }, - { - "epoch": 0.31, - "grad_norm": 1.018159470859247, - "learning_rate": 1.6037241774088355e-05, - "loss": 0.1961, - "step": 6192 - }, - { - "epoch": 0.31, - "grad_norm": 1.5166257860517596, - "learning_rate": 1.6035928739901555e-05, - "loss": 0.2292, - "step": 6193 - }, - { - "epoch": 0.31, - "grad_norm": 0.8754496142517274, - "learning_rate": 1.6034615541989845e-05, - "loss": 0.2046, - "step": 6194 - }, - { - "epoch": 0.32, - "grad_norm": 1.2530659916913465, - "learning_rate": 1.603330218038885e-05, - "loss": 0.2013, - "step": 6195 - }, - { - "epoch": 0.32, - "grad_norm": 1.2702145828391984, - "learning_rate": 1.6031988655134186e-05, - "loss": 0.1842, - "step": 6196 - }, - { - "epoch": 0.32, - "grad_norm": 1.0243935600013727, - "learning_rate": 1.6030674966261496e-05, - "loss": 0.2063, - "step": 6197 - }, - { - "epoch": 0.32, - "grad_norm": 1.0114038595335235, - "learning_rate": 1.60293611138064e-05, - "loss": 0.2125, - "step": 6198 - }, - { - "epoch": 0.32, - "grad_norm": 0.9342042489144418, - "learning_rate": 1.6028047097804548e-05, - "loss": 0.1981, - "step": 6199 - }, - { - "epoch": 0.32, - "grad_norm": 1.0782026950345764, - "learning_rate": 1.6026732918291577e-05, - "loss": 0.171, - "step": 6200 - }, - { - "epoch": 0.32, - "grad_norm": 0.8232405471508174, - "learning_rate": 1.6025418575303135e-05, - "loss": 0.2016, - "step": 6201 - }, - { - "epoch": 0.32, - "grad_norm": 0.8404034639112508, - "learning_rate": 1.6024104068874877e-05, - "loss": 0.1885, - "step": 6202 - }, - { - "epoch": 0.32, - "grad_norm": 0.9614631039331818, - "learning_rate": 1.6022789399042454e-05, - "loss": 0.2043, - "step": 6203 - }, - { - "epoch": 0.32, - "grad_norm": 1.6367109191337368, - "learning_rate": 1.6021474565841527e-05, - "loss": 0.2171, - "step": 6204 - }, - { - "epoch": 0.32, - "grad_norm": 1.109163775941658, - "learning_rate": 1.602015956930776e-05, - "loss": 0.1892, - "step": 6205 - }, - { - "epoch": 0.32, - "grad_norm": 0.9729785517016886, - "learning_rate": 1.601884440947683e-05, - "loss": 0.201, - "step": 6206 - }, - { - "epoch": 0.32, - "grad_norm": 0.9578508782413219, - "learning_rate": 1.60175290863844e-05, - "loss": 0.1853, - "step": 6207 - }, - { - "epoch": 0.32, - "grad_norm": 1.2746326590873274, - "learning_rate": 1.601621360006616e-05, - "loss": 0.2266, - "step": 6208 - }, - { - "epoch": 0.32, - "grad_norm": 0.8786848186524946, - "learning_rate": 1.601489795055778e-05, - "loss": 0.213, - "step": 6209 - }, - { - "epoch": 0.32, - "grad_norm": 0.9956355351416327, - "learning_rate": 1.6013582137894957e-05, - "loss": 0.1952, - "step": 6210 - }, - { - "epoch": 0.32, - "grad_norm": 0.8905803437082221, - "learning_rate": 1.6012266162113378e-05, - "loss": 0.1862, - "step": 6211 - }, - { - "epoch": 0.32, - "grad_norm": 1.276417244394672, - "learning_rate": 1.601095002324874e-05, - "loss": 0.2076, - "step": 6212 - }, - { - "epoch": 0.32, - "grad_norm": 1.0567990624078263, - "learning_rate": 1.6009633721336745e-05, - "loss": 0.2151, - "step": 6213 - }, - { - "epoch": 0.32, - "grad_norm": 1.103289688097003, - "learning_rate": 1.6008317256413092e-05, - "loss": 0.2407, - "step": 6214 - }, - { - "epoch": 0.32, - "grad_norm": 1.0858152536697987, - "learning_rate": 1.6007000628513498e-05, - "loss": 0.176, - "step": 6215 - }, - { - "epoch": 0.32, - "grad_norm": 1.024115694386055, - "learning_rate": 1.600568383767367e-05, - "loss": 0.1804, - "step": 6216 - }, - { - "epoch": 0.32, - "grad_norm": 1.7915259534803418, - "learning_rate": 1.600436688392933e-05, - "loss": 0.2151, - "step": 6217 - }, - { - "epoch": 0.32, - "grad_norm": 0.9311081543873564, - "learning_rate": 1.6003049767316196e-05, - "loss": 0.2032, - "step": 6218 - }, - { - "epoch": 0.32, - "grad_norm": 3.0398814378944885, - "learning_rate": 1.6001732487870002e-05, - "loss": 0.2193, - "step": 6219 - }, - { - "epoch": 0.32, - "grad_norm": 0.7722538840376368, - "learning_rate": 1.6000415045626474e-05, - "loss": 0.1869, - "step": 6220 - }, - { - "epoch": 0.32, - "grad_norm": 0.7243802802542608, - "learning_rate": 1.599909744062135e-05, - "loss": 0.1802, - "step": 6221 - }, - { - "epoch": 0.32, - "grad_norm": 0.9532863679444862, - "learning_rate": 1.5997779672890367e-05, - "loss": 0.1937, - "step": 6222 - }, - { - "epoch": 0.32, - "grad_norm": 0.7103301266208392, - "learning_rate": 1.5996461742469273e-05, - "loss": 0.1711, - "step": 6223 - }, - { - "epoch": 0.32, - "grad_norm": 0.9093473889476353, - "learning_rate": 1.5995143649393814e-05, - "loss": 0.191, - "step": 6224 - }, - { - "epoch": 0.32, - "grad_norm": 0.9615416909069292, - "learning_rate": 1.5993825393699746e-05, - "loss": 0.2005, - "step": 6225 - }, - { - "epoch": 0.32, - "grad_norm": 0.9817798027230897, - "learning_rate": 1.5992506975422826e-05, - "loss": 0.2155, - "step": 6226 - }, - { - "epoch": 0.32, - "grad_norm": 1.0178129605623831, - "learning_rate": 1.5991188394598817e-05, - "loss": 0.2074, - "step": 6227 - }, - { - "epoch": 0.32, - "grad_norm": 0.9295853846888892, - "learning_rate": 1.598986965126348e-05, - "loss": 0.1846, - "step": 6228 - }, - { - "epoch": 0.32, - "grad_norm": 2.4018557858414185, - "learning_rate": 1.5988550745452593e-05, - "loss": 0.1982, - "step": 6229 - }, - { - "epoch": 0.32, - "grad_norm": 0.9804221302826391, - "learning_rate": 1.598723167720193e-05, - "loss": 0.2109, - "step": 6230 - }, - { - "epoch": 0.32, - "grad_norm": 1.7358355239165495, - "learning_rate": 1.598591244654727e-05, - "loss": 0.1874, - "step": 6231 - }, - { - "epoch": 0.32, - "grad_norm": 0.9962790646475322, - "learning_rate": 1.5984593053524395e-05, - "loss": 0.1918, - "step": 6232 - }, - { - "epoch": 0.32, - "grad_norm": 1.035123937100931, - "learning_rate": 1.5983273498169095e-05, - "loss": 0.1729, - "step": 6233 - }, - { - "epoch": 0.32, - "grad_norm": 1.7762721872372287, - "learning_rate": 1.5981953780517166e-05, - "loss": 0.2003, - "step": 6234 - }, - { - "epoch": 0.32, - "grad_norm": 0.922755801851708, - "learning_rate": 1.59806339006044e-05, - "loss": 0.1896, - "step": 6235 - }, - { - "epoch": 0.32, - "grad_norm": 1.3481005397591015, - "learning_rate": 1.59793138584666e-05, - "loss": 0.1951, - "step": 6236 - }, - { - "epoch": 0.32, - "grad_norm": 0.7834068245640107, - "learning_rate": 1.597799365413958e-05, - "loss": 0.1814, - "step": 6237 - }, - { - "epoch": 0.32, - "grad_norm": 3.1028864518367505, - "learning_rate": 1.597667328765914e-05, - "loss": 0.2181, - "step": 6238 - }, - { - "epoch": 0.32, - "grad_norm": 0.9488861711209522, - "learning_rate": 1.5975352759061103e-05, - "loss": 0.2215, - "step": 6239 - }, - { - "epoch": 0.32, - "grad_norm": 1.3137009303969078, - "learning_rate": 1.597403206838128e-05, - "loss": 0.1913, - "step": 6240 - }, - { - "epoch": 0.32, - "grad_norm": 1.0603030370685838, - "learning_rate": 1.5972711215655504e-05, - "loss": 0.2342, - "step": 6241 - }, - { - "epoch": 0.32, - "grad_norm": 1.005721988712769, - "learning_rate": 1.5971390200919597e-05, - "loss": 0.1838, - "step": 6242 - }, - { - "epoch": 0.32, - "grad_norm": 1.9540413565559143, - "learning_rate": 1.5970069024209394e-05, - "loss": 0.2288, - "step": 6243 - }, - { - "epoch": 0.32, - "grad_norm": 1.056861645845795, - "learning_rate": 1.596874768556073e-05, - "loss": 0.2376, - "step": 6244 - }, - { - "epoch": 0.32, - "grad_norm": 1.8608096734735928, - "learning_rate": 1.5967426185009448e-05, - "loss": 0.1994, - "step": 6245 - }, - { - "epoch": 0.32, - "grad_norm": 0.9993820475578914, - "learning_rate": 1.5966104522591397e-05, - "loss": 0.2048, - "step": 6246 - }, - { - "epoch": 0.32, - "grad_norm": 1.052166738593178, - "learning_rate": 1.5964782698342423e-05, - "loss": 0.2037, - "step": 6247 - }, - { - "epoch": 0.32, - "grad_norm": 2.4623813432862196, - "learning_rate": 1.596346071229838e-05, - "loss": 0.2014, - "step": 6248 - }, - { - "epoch": 0.32, - "grad_norm": 1.4257348201093032, - "learning_rate": 1.5962138564495128e-05, - "loss": 0.2111, - "step": 6249 - }, - { - "epoch": 0.32, - "grad_norm": 0.8422300324358466, - "learning_rate": 1.596081625496853e-05, - "loss": 0.2061, - "step": 6250 - }, - { - "epoch": 0.32, - "grad_norm": 0.8232571074717809, - "learning_rate": 1.5959493783754456e-05, - "loss": 0.2089, - "step": 6251 - }, - { - "epoch": 0.32, - "grad_norm": 1.7018511761221633, - "learning_rate": 1.595817115088878e-05, - "loss": 0.1917, - "step": 6252 - }, - { - "epoch": 0.32, - "grad_norm": 0.9579800641887645, - "learning_rate": 1.5956848356407365e-05, - "loss": 0.2103, - "step": 6253 - }, - { - "epoch": 0.32, - "grad_norm": 1.010412464498688, - "learning_rate": 1.595552540034611e-05, - "loss": 0.2362, - "step": 6254 - }, - { - "epoch": 0.32, - "grad_norm": 0.8006817815454528, - "learning_rate": 1.595420228274089e-05, - "loss": 0.1742, - "step": 6255 - }, - { - "epoch": 0.32, - "grad_norm": 0.9325631107213238, - "learning_rate": 1.59528790036276e-05, - "loss": 0.1993, - "step": 6256 - }, - { - "epoch": 0.32, - "grad_norm": 1.926514554437551, - "learning_rate": 1.5951555563042128e-05, - "loss": 0.2306, - "step": 6257 - }, - { - "epoch": 0.32, - "grad_norm": 1.7324924450988743, - "learning_rate": 1.5950231961020373e-05, - "loss": 0.214, - "step": 6258 - }, - { - "epoch": 0.32, - "grad_norm": 1.0247474294843106, - "learning_rate": 1.5948908197598242e-05, - "loss": 0.1911, - "step": 6259 - }, - { - "epoch": 0.32, - "grad_norm": 1.1524455317274755, - "learning_rate": 1.594758427281164e-05, - "loss": 0.2054, - "step": 6260 - }, - { - "epoch": 0.32, - "grad_norm": 1.3710386575529927, - "learning_rate": 1.5946260186696477e-05, - "loss": 0.236, - "step": 6261 - }, - { - "epoch": 0.32, - "grad_norm": 1.177146195681321, - "learning_rate": 1.5944935939288675e-05, - "loss": 0.1931, - "step": 6262 - }, - { - "epoch": 0.32, - "grad_norm": 1.0671464671711801, - "learning_rate": 1.5943611530624147e-05, - "loss": 0.2121, - "step": 6263 - }, - { - "epoch": 0.32, - "grad_norm": 0.9365291413240289, - "learning_rate": 1.594228696073882e-05, - "loss": 0.201, - "step": 6264 - }, - { - "epoch": 0.32, - "grad_norm": 0.9882062230623252, - "learning_rate": 1.5940962229668625e-05, - "loss": 0.2131, - "step": 6265 - }, - { - "epoch": 0.32, - "grad_norm": 1.0502799091230823, - "learning_rate": 1.5939637337449493e-05, - "loss": 0.2051, - "step": 6266 - }, - { - "epoch": 0.32, - "grad_norm": 0.86354473763294, - "learning_rate": 1.5938312284117367e-05, - "loss": 0.1884, - "step": 6267 - }, - { - "epoch": 0.32, - "grad_norm": 1.3278074739442671, - "learning_rate": 1.593698706970818e-05, - "loss": 0.2101, - "step": 6268 - }, - { - "epoch": 0.32, - "grad_norm": 1.4480295763019384, - "learning_rate": 1.593566169425788e-05, - "loss": 0.2023, - "step": 6269 - }, - { - "epoch": 0.32, - "grad_norm": 1.2014894821170918, - "learning_rate": 1.5934336157802427e-05, - "loss": 0.2066, - "step": 6270 - }, - { - "epoch": 0.32, - "grad_norm": 1.47506376769695, - "learning_rate": 1.593301046037777e-05, - "loss": 0.2199, - "step": 6271 - }, - { - "epoch": 0.32, - "grad_norm": 1.1430677599552572, - "learning_rate": 1.5931684602019866e-05, - "loss": 0.2121, - "step": 6272 - }, - { - "epoch": 0.32, - "grad_norm": 0.856863370411823, - "learning_rate": 1.5930358582764686e-05, - "loss": 0.2003, - "step": 6273 - }, - { - "epoch": 0.32, - "grad_norm": 0.8649431457227784, - "learning_rate": 1.5929032402648194e-05, - "loss": 0.1998, - "step": 6274 - }, - { - "epoch": 0.32, - "grad_norm": 0.8653885888780428, - "learning_rate": 1.5927706061706363e-05, - "loss": 0.2049, - "step": 6275 - }, - { - "epoch": 0.32, - "grad_norm": 0.8381520198334308, - "learning_rate": 1.592637955997517e-05, - "loss": 0.1964, - "step": 6276 - }, - { - "epoch": 0.32, - "grad_norm": 1.1500336757509857, - "learning_rate": 1.5925052897490595e-05, - "loss": 0.1976, - "step": 6277 - }, - { - "epoch": 0.32, - "grad_norm": 1.0400983335658338, - "learning_rate": 1.5923726074288627e-05, - "loss": 0.2028, - "step": 6278 - }, - { - "epoch": 0.32, - "grad_norm": 0.7803330730926793, - "learning_rate": 1.5922399090405255e-05, - "loss": 0.1951, - "step": 6279 - }, - { - "epoch": 0.32, - "grad_norm": 1.2882797730496849, - "learning_rate": 1.592107194587647e-05, - "loss": 0.2098, - "step": 6280 - }, - { - "epoch": 0.32, - "grad_norm": 0.949332819796093, - "learning_rate": 1.591974464073828e-05, - "loss": 0.1939, - "step": 6281 - }, - { - "epoch": 0.32, - "grad_norm": 0.8669114548502495, - "learning_rate": 1.591841717502668e-05, - "loss": 0.2039, - "step": 6282 - }, - { - "epoch": 0.32, - "grad_norm": 0.8760982488429814, - "learning_rate": 1.5917089548777678e-05, - "loss": 0.1997, - "step": 6283 - }, - { - "epoch": 0.32, - "grad_norm": 3.425809164271548, - "learning_rate": 1.591576176202729e-05, - "loss": 0.2227, - "step": 6284 - }, - { - "epoch": 0.32, - "grad_norm": 1.0285643242186238, - "learning_rate": 1.5914433814811524e-05, - "loss": 0.2049, - "step": 6285 - }, - { - "epoch": 0.32, - "grad_norm": 0.9789355523220069, - "learning_rate": 1.591310570716641e-05, - "loss": 0.2057, - "step": 6286 - }, - { - "epoch": 0.32, - "grad_norm": 0.9412502930882807, - "learning_rate": 1.5911777439127975e-05, - "loss": 0.1977, - "step": 6287 - }, - { - "epoch": 0.32, - "grad_norm": 1.021924695120764, - "learning_rate": 1.591044901073224e-05, - "loss": 0.2136, - "step": 6288 - }, - { - "epoch": 0.32, - "grad_norm": 1.0255290361753848, - "learning_rate": 1.590912042201524e-05, - "loss": 0.196, - "step": 6289 - }, - { - "epoch": 0.32, - "grad_norm": 0.9600707351927388, - "learning_rate": 1.5907791673013016e-05, - "loss": 0.1881, - "step": 6290 - }, - { - "epoch": 0.32, - "grad_norm": 1.2638585822881137, - "learning_rate": 1.5906462763761606e-05, - "loss": 0.1805, - "step": 6291 - }, - { - "epoch": 0.32, - "grad_norm": 1.0002196629413556, - "learning_rate": 1.5905133694297065e-05, - "loss": 0.2008, - "step": 6292 - }, - { - "epoch": 0.32, - "grad_norm": 1.0106544911549504, - "learning_rate": 1.5903804464655437e-05, - "loss": 0.2092, - "step": 6293 - }, - { - "epoch": 0.32, - "grad_norm": 1.314786481203474, - "learning_rate": 1.590247507487278e-05, - "loss": 0.2297, - "step": 6294 - }, - { - "epoch": 0.32, - "grad_norm": 2.112341901630231, - "learning_rate": 1.5901145524985155e-05, - "loss": 0.1985, - "step": 6295 - }, - { - "epoch": 0.32, - "grad_norm": 1.1707892567465332, - "learning_rate": 1.5899815815028618e-05, - "loss": 0.2032, - "step": 6296 - }, - { - "epoch": 0.32, - "grad_norm": 1.0298589833768144, - "learning_rate": 1.589848594503925e-05, - "loss": 0.2026, - "step": 6297 - }, - { - "epoch": 0.32, - "grad_norm": 1.1082543126402602, - "learning_rate": 1.5897155915053113e-05, - "loss": 0.2433, - "step": 6298 - }, - { - "epoch": 0.32, - "grad_norm": 1.6329212771296873, - "learning_rate": 1.5895825725106295e-05, - "loss": 0.1874, - "step": 6299 - }, - { - "epoch": 0.32, - "grad_norm": 1.1679904879141962, - "learning_rate": 1.5894495375234865e-05, - "loss": 0.2348, - "step": 6300 - }, - { - "epoch": 0.32, - "grad_norm": 0.7321960858669198, - "learning_rate": 1.589316486547492e-05, - "loss": 0.1893, - "step": 6301 - }, - { - "epoch": 0.32, - "grad_norm": 0.8250329944711491, - "learning_rate": 1.589183419586254e-05, - "loss": 0.2105, - "step": 6302 - }, - { - "epoch": 0.32, - "grad_norm": 0.8952598550183528, - "learning_rate": 1.5890503366433827e-05, - "loss": 0.217, - "step": 6303 - }, - { - "epoch": 0.32, - "grad_norm": 1.930619450747476, - "learning_rate": 1.5889172377224877e-05, - "loss": 0.208, - "step": 6304 - }, - { - "epoch": 0.32, - "grad_norm": 0.805703441367176, - "learning_rate": 1.5887841228271794e-05, - "loss": 0.1803, - "step": 6305 - }, - { - "epoch": 0.32, - "grad_norm": 1.1449471204861255, - "learning_rate": 1.5886509919610683e-05, - "loss": 0.2274, - "step": 6306 - }, - { - "epoch": 0.32, - "grad_norm": 1.0149187593723974, - "learning_rate": 1.588517845127766e-05, - "loss": 0.2062, - "step": 6307 - }, - { - "epoch": 0.32, - "grad_norm": 1.517302821901913, - "learning_rate": 1.5883846823308843e-05, - "loss": 0.1797, - "step": 6308 - }, - { - "epoch": 0.32, - "grad_norm": 0.81017351323962, - "learning_rate": 1.5882515035740345e-05, - "loss": 0.2159, - "step": 6309 - }, - { - "epoch": 0.32, - "grad_norm": 0.8318977084874352, - "learning_rate": 1.5881183088608293e-05, - "loss": 0.2093, - "step": 6310 - }, - { - "epoch": 0.32, - "grad_norm": 1.681851519727326, - "learning_rate": 1.587985098194882e-05, - "loss": 0.1841, - "step": 6311 - }, - { - "epoch": 0.32, - "grad_norm": 0.8934676155716955, - "learning_rate": 1.5878518715798053e-05, - "loss": 0.1861, - "step": 6312 - }, - { - "epoch": 0.32, - "grad_norm": 0.9779274252743558, - "learning_rate": 1.587718629019214e-05, - "loss": 0.213, - "step": 6313 - }, - { - "epoch": 0.32, - "grad_norm": 0.9371299745261433, - "learning_rate": 1.5875853705167214e-05, - "loss": 0.193, - "step": 6314 - }, - { - "epoch": 0.32, - "grad_norm": 0.8759217719403662, - "learning_rate": 1.5874520960759423e-05, - "loss": 0.1968, - "step": 6315 - }, - { - "epoch": 0.32, - "grad_norm": 1.1231242328686373, - "learning_rate": 1.5873188057004924e-05, - "loss": 0.1896, - "step": 6316 - }, - { - "epoch": 0.32, - "grad_norm": 1.0473174398740164, - "learning_rate": 1.5871854993939862e-05, - "loss": 0.2339, - "step": 6317 - }, - { - "epoch": 0.32, - "grad_norm": 1.1763710546308124, - "learning_rate": 1.5870521771600402e-05, - "loss": 0.2014, - "step": 6318 - }, - { - "epoch": 0.32, - "grad_norm": 1.0078407380626875, - "learning_rate": 1.5869188390022708e-05, - "loss": 0.2139, - "step": 6319 - }, - { - "epoch": 0.32, - "grad_norm": 1.1132752101430765, - "learning_rate": 1.586785484924295e-05, - "loss": 0.213, - "step": 6320 - }, - { - "epoch": 0.32, - "grad_norm": 1.1250644268142815, - "learning_rate": 1.5866521149297294e-05, - "loss": 0.2078, - "step": 6321 - }, - { - "epoch": 0.32, - "grad_norm": 1.0358756499365598, - "learning_rate": 1.586518729022192e-05, - "loss": 0.2045, - "step": 6322 - }, - { - "epoch": 0.32, - "grad_norm": 1.4276322383882027, - "learning_rate": 1.5863853272053017e-05, - "loss": 0.2078, - "step": 6323 - }, - { - "epoch": 0.32, - "grad_norm": 1.1332020736682589, - "learning_rate": 1.5862519094826753e-05, - "loss": 0.1952, - "step": 6324 - }, - { - "epoch": 0.32, - "grad_norm": 1.3461541846759482, - "learning_rate": 1.5861184758579332e-05, - "loss": 0.1962, - "step": 6325 - }, - { - "epoch": 0.32, - "grad_norm": 0.9436324996098256, - "learning_rate": 1.5859850263346945e-05, - "loss": 0.1897, - "step": 6326 - }, - { - "epoch": 0.32, - "grad_norm": 1.08030280388521, - "learning_rate": 1.5858515609165786e-05, - "loss": 0.2137, - "step": 6327 - }, - { - "epoch": 0.32, - "grad_norm": 1.2932692522911984, - "learning_rate": 1.5857180796072064e-05, - "loss": 0.1772, - "step": 6328 - }, - { - "epoch": 0.32, - "grad_norm": 1.0491984582849074, - "learning_rate": 1.5855845824101978e-05, - "loss": 0.1888, - "step": 6329 - }, - { - "epoch": 0.32, - "grad_norm": 1.023037106386847, - "learning_rate": 1.5854510693291744e-05, - "loss": 0.1993, - "step": 6330 - }, - { - "epoch": 0.32, - "grad_norm": 0.8958489251993921, - "learning_rate": 1.5853175403677577e-05, - "loss": 0.216, - "step": 6331 - }, - { - "epoch": 0.32, - "grad_norm": 0.9890468500643292, - "learning_rate": 1.5851839955295697e-05, - "loss": 0.2089, - "step": 6332 - }, - { - "epoch": 0.32, - "grad_norm": 1.1823366094267416, - "learning_rate": 1.5850504348182327e-05, - "loss": 0.2098, - "step": 6333 - }, - { - "epoch": 0.32, - "grad_norm": 1.427086580965463, - "learning_rate": 1.5849168582373698e-05, - "loss": 0.2417, - "step": 6334 - }, - { - "epoch": 0.32, - "grad_norm": 0.9748347836779382, - "learning_rate": 1.584783265790604e-05, - "loss": 0.2211, - "step": 6335 - }, - { - "epoch": 0.32, - "grad_norm": 0.8419382988560267, - "learning_rate": 1.584649657481559e-05, - "loss": 0.2043, - "step": 6336 - }, - { - "epoch": 0.32, - "grad_norm": 1.0180216149981338, - "learning_rate": 1.5845160333138593e-05, - "loss": 0.2057, - "step": 6337 - }, - { - "epoch": 0.32, - "grad_norm": 2.438738728038019, - "learning_rate": 1.584382393291129e-05, - "loss": 0.2175, - "step": 6338 - }, - { - "epoch": 0.32, - "grad_norm": 0.8929441096966545, - "learning_rate": 1.584248737416993e-05, - "loss": 0.1956, - "step": 6339 - }, - { - "epoch": 0.32, - "grad_norm": 3.4119312182239425, - "learning_rate": 1.584115065695077e-05, - "loss": 0.2038, - "step": 6340 - }, - { - "epoch": 0.32, - "grad_norm": 0.9668934126268923, - "learning_rate": 1.583981378129007e-05, - "loss": 0.2106, - "step": 6341 - }, - { - "epoch": 0.32, - "grad_norm": 1.194773506228996, - "learning_rate": 1.5838476747224096e-05, - "loss": 0.2029, - "step": 6342 - }, - { - "epoch": 0.32, - "grad_norm": 1.4189946860982916, - "learning_rate": 1.5837139554789106e-05, - "loss": 0.1852, - "step": 6343 - }, - { - "epoch": 0.32, - "grad_norm": 1.7200228788960104, - "learning_rate": 1.5835802204021374e-05, - "loss": 0.1945, - "step": 6344 - }, - { - "epoch": 0.32, - "grad_norm": 1.049067356353654, - "learning_rate": 1.583446469495718e-05, - "loss": 0.2132, - "step": 6345 - }, - { - "epoch": 0.32, - "grad_norm": 0.8484716174067365, - "learning_rate": 1.5833127027632803e-05, - "loss": 0.1767, - "step": 6346 - }, - { - "epoch": 0.32, - "grad_norm": 1.2644673501039674, - "learning_rate": 1.5831789202084524e-05, - "loss": 0.196, - "step": 6347 - }, - { - "epoch": 0.32, - "grad_norm": 1.0563048846551186, - "learning_rate": 1.5830451218348634e-05, - "loss": 0.1971, - "step": 6348 - }, - { - "epoch": 0.32, - "grad_norm": 0.8555301237648472, - "learning_rate": 1.5829113076461426e-05, - "loss": 0.1986, - "step": 6349 - }, - { - "epoch": 0.32, - "grad_norm": 1.0299193138241485, - "learning_rate": 1.5827774776459195e-05, - "loss": 0.1942, - "step": 6350 - }, - { - "epoch": 0.32, - "grad_norm": 1.4125485894893952, - "learning_rate": 1.5826436318378248e-05, - "loss": 0.1968, - "step": 6351 - }, - { - "epoch": 0.32, - "grad_norm": 0.9133171869368052, - "learning_rate": 1.5825097702254885e-05, - "loss": 0.2082, - "step": 6352 - }, - { - "epoch": 0.32, - "grad_norm": 1.330384605741934, - "learning_rate": 1.5823758928125416e-05, - "loss": 0.2016, - "step": 6353 - }, - { - "epoch": 0.32, - "grad_norm": 1.4065688789992765, - "learning_rate": 1.5822419996026156e-05, - "loss": 0.1982, - "step": 6354 - }, - { - "epoch": 0.32, - "grad_norm": 1.0543744183417312, - "learning_rate": 1.5821080905993425e-05, - "loss": 0.2171, - "step": 6355 - }, - { - "epoch": 0.32, - "grad_norm": 1.010192278983979, - "learning_rate": 1.5819741658063548e-05, - "loss": 0.209, - "step": 6356 - }, - { - "epoch": 0.32, - "grad_norm": 1.1998783585968111, - "learning_rate": 1.5818402252272852e-05, - "loss": 0.2112, - "step": 6357 - }, - { - "epoch": 0.32, - "grad_norm": 1.3962202709631413, - "learning_rate": 1.581706268865766e-05, - "loss": 0.1958, - "step": 6358 - }, - { - "epoch": 0.32, - "grad_norm": 1.154206370575021, - "learning_rate": 1.5815722967254317e-05, - "loss": 0.2085, - "step": 6359 - }, - { - "epoch": 0.32, - "grad_norm": 1.9722024390165005, - "learning_rate": 1.5814383088099164e-05, - "loss": 0.2087, - "step": 6360 - }, - { - "epoch": 0.32, - "grad_norm": 0.9929739020758829, - "learning_rate": 1.5813043051228536e-05, - "loss": 0.2216, - "step": 6361 - }, - { - "epoch": 0.32, - "grad_norm": 1.016156213865893, - "learning_rate": 1.581170285667879e-05, - "loss": 0.1903, - "step": 6362 - }, - { - "epoch": 0.32, - "grad_norm": 2.5700458159966972, - "learning_rate": 1.5810362504486274e-05, - "loss": 0.2197, - "step": 6363 - }, - { - "epoch": 0.32, - "grad_norm": 1.1682437605649398, - "learning_rate": 1.5809021994687346e-05, - "loss": 0.1977, - "step": 6364 - }, - { - "epoch": 0.32, - "grad_norm": 1.2753786330700614, - "learning_rate": 1.5807681327318372e-05, - "loss": 0.2343, - "step": 6365 - }, - { - "epoch": 0.32, - "grad_norm": 1.130208997823953, - "learning_rate": 1.580634050241571e-05, - "loss": 0.1902, - "step": 6366 - }, - { - "epoch": 0.32, - "grad_norm": 1.1777548703995049, - "learning_rate": 1.5804999520015735e-05, - "loss": 0.1963, - "step": 6367 - }, - { - "epoch": 0.32, - "grad_norm": 0.9107676559244275, - "learning_rate": 1.5803658380154822e-05, - "loss": 0.1923, - "step": 6368 - }, - { - "epoch": 0.32, - "grad_norm": 1.0898334399860248, - "learning_rate": 1.5802317082869346e-05, - "loss": 0.1831, - "step": 6369 - }, - { - "epoch": 0.32, - "grad_norm": 1.3729466617948922, - "learning_rate": 1.5800975628195692e-05, - "loss": 0.1964, - "step": 6370 - }, - { - "epoch": 0.32, - "grad_norm": 0.9014294103357848, - "learning_rate": 1.5799634016170245e-05, - "loss": 0.1943, - "step": 6371 - }, - { - "epoch": 0.32, - "grad_norm": 1.0305709033064328, - "learning_rate": 1.57982922468294e-05, - "loss": 0.1945, - "step": 6372 - }, - { - "epoch": 0.32, - "grad_norm": 0.955380919591037, - "learning_rate": 1.5796950320209548e-05, - "loss": 0.2081, - "step": 6373 - }, - { - "epoch": 0.32, - "grad_norm": 0.9686561104117304, - "learning_rate": 1.5795608236347092e-05, - "loss": 0.1725, - "step": 6374 - }, - { - "epoch": 0.32, - "grad_norm": 0.9817649288066675, - "learning_rate": 1.5794265995278438e-05, - "loss": 0.2223, - "step": 6375 - }, - { - "epoch": 0.32, - "grad_norm": 0.9605643115340182, - "learning_rate": 1.5792923597039988e-05, - "loss": 0.1965, - "step": 6376 - }, - { - "epoch": 0.32, - "grad_norm": 0.9827142089808804, - "learning_rate": 1.579158104166816e-05, - "loss": 0.2116, - "step": 6377 - }, - { - "epoch": 0.32, - "grad_norm": 1.9111504054520023, - "learning_rate": 1.579023832919937e-05, - "loss": 0.1855, - "step": 6378 - }, - { - "epoch": 0.32, - "grad_norm": 1.2261708996905023, - "learning_rate": 1.5788895459670036e-05, - "loss": 0.2072, - "step": 6379 - }, - { - "epoch": 0.32, - "grad_norm": 1.2240954515580862, - "learning_rate": 1.5787552433116586e-05, - "loss": 0.2059, - "step": 6380 - }, - { - "epoch": 0.32, - "grad_norm": 0.9802222648434512, - "learning_rate": 1.5786209249575452e-05, - "loss": 0.2064, - "step": 6381 - }, - { - "epoch": 0.32, - "grad_norm": 1.4678277710491507, - "learning_rate": 1.5784865909083064e-05, - "loss": 0.2009, - "step": 6382 - }, - { - "epoch": 0.32, - "grad_norm": 2.0693945382145387, - "learning_rate": 1.5783522411675857e-05, - "loss": 0.2025, - "step": 6383 - }, - { - "epoch": 0.32, - "grad_norm": 1.5394552546625184, - "learning_rate": 1.5782178757390284e-05, - "loss": 0.1903, - "step": 6384 - }, - { - "epoch": 0.32, - "grad_norm": 1.0743984781457123, - "learning_rate": 1.5780834946262782e-05, - "loss": 0.2184, - "step": 6385 - }, - { - "epoch": 0.32, - "grad_norm": 1.585305608844427, - "learning_rate": 1.5779490978329806e-05, - "loss": 0.2141, - "step": 6386 - }, - { - "epoch": 0.32, - "grad_norm": 0.9386347211553212, - "learning_rate": 1.5778146853627813e-05, - "loss": 0.1865, - "step": 6387 - }, - { - "epoch": 0.32, - "grad_norm": 1.0530458992126261, - "learning_rate": 1.577680257219326e-05, - "loss": 0.1897, - "step": 6388 - }, - { - "epoch": 0.32, - "grad_norm": 1.2499711672577036, - "learning_rate": 1.577545813406261e-05, - "loss": 0.2078, - "step": 6389 - }, - { - "epoch": 0.32, - "grad_norm": 1.127326725810852, - "learning_rate": 1.5774113539272332e-05, - "loss": 0.2238, - "step": 6390 - }, - { - "epoch": 0.32, - "grad_norm": 1.3929687223303266, - "learning_rate": 1.57727687878589e-05, - "loss": 0.1949, - "step": 6391 - }, - { - "epoch": 0.33, - "grad_norm": 0.9969779103098971, - "learning_rate": 1.5771423879858783e-05, - "loss": 0.1882, - "step": 6392 - }, - { - "epoch": 0.33, - "grad_norm": 1.41306573541083, - "learning_rate": 1.5770078815308473e-05, - "loss": 0.1869, - "step": 6393 - }, - { - "epoch": 0.33, - "grad_norm": 0.838933203170488, - "learning_rate": 1.5768733594244445e-05, - "loss": 0.2058, - "step": 6394 - }, - { - "epoch": 0.33, - "grad_norm": 1.0659843745048814, - "learning_rate": 1.5767388216703196e-05, - "loss": 0.2124, - "step": 6395 - }, - { - "epoch": 0.33, - "grad_norm": 1.152946095323606, - "learning_rate": 1.5766042682721213e-05, - "loss": 0.2095, - "step": 6396 - }, - { - "epoch": 0.33, - "grad_norm": 2.0228491280513095, - "learning_rate": 1.5764696992335e-05, - "loss": 0.2322, - "step": 6397 - }, - { - "epoch": 0.33, - "grad_norm": 1.3934301752230933, - "learning_rate": 1.5763351145581052e-05, - "loss": 0.2031, - "step": 6398 - }, - { - "epoch": 0.33, - "grad_norm": 1.0083370881264966, - "learning_rate": 1.576200514249588e-05, - "loss": 0.217, - "step": 6399 - }, - { - "epoch": 0.33, - "grad_norm": 1.4430508995785853, - "learning_rate": 1.576065898311599e-05, - "loss": 0.1991, - "step": 6400 - }, - { - "epoch": 0.33, - "grad_norm": 1.6340672012097428, - "learning_rate": 1.5759312667477907e-05, - "loss": 0.1983, - "step": 6401 - }, - { - "epoch": 0.33, - "grad_norm": 1.6808778569379283, - "learning_rate": 1.575796619561814e-05, - "loss": 0.1861, - "step": 6402 - }, - { - "epoch": 0.33, - "grad_norm": 1.2748513907509549, - "learning_rate": 1.5756619567573213e-05, - "loss": 0.2164, - "step": 6403 - }, - { - "epoch": 0.33, - "grad_norm": 0.936128174627036, - "learning_rate": 1.5755272783379656e-05, - "loss": 0.1953, - "step": 6404 - }, - { - "epoch": 0.33, - "grad_norm": 1.2888785381934886, - "learning_rate": 1.5753925843073997e-05, - "loss": 0.2246, - "step": 6405 - }, - { - "epoch": 0.33, - "grad_norm": 1.078888056580051, - "learning_rate": 1.5752578746692776e-05, - "loss": 0.1997, - "step": 6406 - }, - { - "epoch": 0.33, - "grad_norm": 0.9884413373743257, - "learning_rate": 1.5751231494272535e-05, - "loss": 0.1972, - "step": 6407 - }, - { - "epoch": 0.33, - "grad_norm": 1.882073537819897, - "learning_rate": 1.574988408584981e-05, - "loss": 0.2124, - "step": 6408 - }, - { - "epoch": 0.33, - "grad_norm": 0.8856297252603791, - "learning_rate": 1.574853652146116e-05, - "loss": 0.2084, - "step": 6409 - }, - { - "epoch": 0.33, - "grad_norm": 0.9096131648444687, - "learning_rate": 1.5747188801143126e-05, - "loss": 0.2104, - "step": 6410 - }, - { - "epoch": 0.33, - "grad_norm": 1.1483305633397578, - "learning_rate": 1.574584092493227e-05, - "loss": 0.2229, - "step": 6411 - }, - { - "epoch": 0.33, - "grad_norm": 1.2905589927471228, - "learning_rate": 1.574449289286516e-05, - "loss": 0.1895, - "step": 6412 - }, - { - "epoch": 0.33, - "grad_norm": 1.055202980082456, - "learning_rate": 1.5743144704978358e-05, - "loss": 0.2162, - "step": 6413 - }, - { - "epoch": 0.33, - "grad_norm": 1.571357528956249, - "learning_rate": 1.5741796361308424e-05, - "loss": 0.2185, - "step": 6414 - }, - { - "epoch": 0.33, - "grad_norm": 1.27518127262942, - "learning_rate": 1.5740447861891946e-05, - "loss": 0.2178, - "step": 6415 - }, - { - "epoch": 0.33, - "grad_norm": 1.1005877561884827, - "learning_rate": 1.573909920676549e-05, - "loss": 0.2353, - "step": 6416 - }, - { - "epoch": 0.33, - "grad_norm": 0.8713251975400061, - "learning_rate": 1.5737750395965646e-05, - "loss": 0.1908, - "step": 6417 - }, - { - "epoch": 0.33, - "grad_norm": 1.2112967064517417, - "learning_rate": 1.5736401429529e-05, - "loss": 0.1913, - "step": 6418 - }, - { - "epoch": 0.33, - "grad_norm": 1.9991676495662414, - "learning_rate": 1.573505230749214e-05, - "loss": 0.1855, - "step": 6419 - }, - { - "epoch": 0.33, - "grad_norm": 0.8813185706705601, - "learning_rate": 1.573370302989166e-05, - "loss": 0.211, - "step": 6420 - }, - { - "epoch": 0.33, - "grad_norm": 1.2830617593144382, - "learning_rate": 1.5732353596764168e-05, - "loss": 0.19, - "step": 6421 - }, - { - "epoch": 0.33, - "grad_norm": 1.3583092644558936, - "learning_rate": 1.5731004008146255e-05, - "loss": 0.1827, - "step": 6422 - }, - { - "epoch": 0.33, - "grad_norm": 1.0762212789850942, - "learning_rate": 1.5729654264074536e-05, - "loss": 0.1934, - "step": 6423 - }, - { - "epoch": 0.33, - "grad_norm": 1.2099248654148724, - "learning_rate": 1.572830436458562e-05, - "loss": 0.1947, - "step": 6424 - }, - { - "epoch": 0.33, - "grad_norm": 1.592646821803759, - "learning_rate": 1.5726954309716128e-05, - "loss": 0.1849, - "step": 6425 - }, - { - "epoch": 0.33, - "grad_norm": 1.0505585309130414, - "learning_rate": 1.5725604099502673e-05, - "loss": 0.2032, - "step": 6426 - }, - { - "epoch": 0.33, - "grad_norm": 1.5026472365739723, - "learning_rate": 1.572425373398189e-05, - "loss": 0.1994, - "step": 6427 - }, - { - "epoch": 0.33, - "grad_norm": 0.8287932239533964, - "learning_rate": 1.5722903213190393e-05, - "loss": 0.1654, - "step": 6428 - }, - { - "epoch": 0.33, - "grad_norm": 1.0151895468230154, - "learning_rate": 1.572155253716483e-05, - "loss": 0.1894, - "step": 6429 - }, - { - "epoch": 0.33, - "grad_norm": 1.813161979459579, - "learning_rate": 1.5720201705941827e-05, - "loss": 0.2153, - "step": 6430 - }, - { - "epoch": 0.33, - "grad_norm": 1.4623926629210309, - "learning_rate": 1.571885071955803e-05, - "loss": 0.1959, - "step": 6431 - }, - { - "epoch": 0.33, - "grad_norm": 2.197393529509465, - "learning_rate": 1.5717499578050083e-05, - "loss": 0.2121, - "step": 6432 - }, - { - "epoch": 0.33, - "grad_norm": 0.8507227862656661, - "learning_rate": 1.571614828145464e-05, - "loss": 0.1791, - "step": 6433 - }, - { - "epoch": 0.33, - "grad_norm": 0.9143042226597085, - "learning_rate": 1.5714796829808352e-05, - "loss": 0.2176, - "step": 6434 - }, - { - "epoch": 0.33, - "grad_norm": 1.0246240592941134, - "learning_rate": 1.5713445223147876e-05, - "loss": 0.2225, - "step": 6435 - }, - { - "epoch": 0.33, - "grad_norm": 1.2215280314440582, - "learning_rate": 1.5712093461509878e-05, - "loss": 0.2066, - "step": 6436 - }, - { - "epoch": 0.33, - "grad_norm": 1.947008583860387, - "learning_rate": 1.5710741544931017e-05, - "loss": 0.2278, - "step": 6437 - }, - { - "epoch": 0.33, - "grad_norm": 1.1323169093199634, - "learning_rate": 1.5709389473447974e-05, - "loss": 0.2134, - "step": 6438 - }, - { - "epoch": 0.33, - "grad_norm": 0.8026527983512476, - "learning_rate": 1.5708037247097418e-05, - "loss": 0.2012, - "step": 6439 - }, - { - "epoch": 0.33, - "grad_norm": 1.1969855479924654, - "learning_rate": 1.5706684865916025e-05, - "loss": 0.2282, - "step": 6440 - }, - { - "epoch": 0.33, - "grad_norm": 0.9403857301981011, - "learning_rate": 1.570533232994049e-05, - "loss": 0.1967, - "step": 6441 - }, - { - "epoch": 0.33, - "grad_norm": 2.305053539772037, - "learning_rate": 1.570397963920749e-05, - "loss": 0.1965, - "step": 6442 - }, - { - "epoch": 0.33, - "grad_norm": 1.1363241248573346, - "learning_rate": 1.5702626793753717e-05, - "loss": 0.2044, - "step": 6443 - }, - { - "epoch": 0.33, - "grad_norm": 0.8737256816832991, - "learning_rate": 1.5701273793615876e-05, - "loss": 0.2048, - "step": 6444 - }, - { - "epoch": 0.33, - "grad_norm": 1.6215538187724758, - "learning_rate": 1.5699920638830656e-05, - "loss": 0.218, - "step": 6445 - }, - { - "epoch": 0.33, - "grad_norm": 1.0878235244057186, - "learning_rate": 1.5698567329434768e-05, - "loss": 0.222, - "step": 6446 - }, - { - "epoch": 0.33, - "grad_norm": 1.0513041461655643, - "learning_rate": 1.569721386546492e-05, - "loss": 0.2166, - "step": 6447 - }, - { - "epoch": 0.33, - "grad_norm": 0.9622985569927587, - "learning_rate": 1.5695860246957826e-05, - "loss": 0.1979, - "step": 6448 - }, - { - "epoch": 0.33, - "grad_norm": 1.106124143025721, - "learning_rate": 1.5694506473950198e-05, - "loss": 0.2146, - "step": 6449 - }, - { - "epoch": 0.33, - "grad_norm": 0.9022371205892392, - "learning_rate": 1.5693152546478762e-05, - "loss": 0.1904, - "step": 6450 - }, - { - "epoch": 0.33, - "grad_norm": 0.8059048062766315, - "learning_rate": 1.569179846458024e-05, - "loss": 0.1933, - "step": 6451 - }, - { - "epoch": 0.33, - "grad_norm": 1.8776693830331572, - "learning_rate": 1.5690444228291366e-05, - "loss": 0.2228, - "step": 6452 - }, - { - "epoch": 0.33, - "grad_norm": 1.0309478566891377, - "learning_rate": 1.568908983764887e-05, - "loss": 0.1922, - "step": 6453 - }, - { - "epoch": 0.33, - "grad_norm": 1.050309678890914, - "learning_rate": 1.568773529268949e-05, - "loss": 0.2045, - "step": 6454 - }, - { - "epoch": 0.33, - "grad_norm": 1.0229060671888315, - "learning_rate": 1.5686380593449966e-05, - "loss": 0.2047, - "step": 6455 - }, - { - "epoch": 0.33, - "grad_norm": 1.001874802951346, - "learning_rate": 1.5685025739967056e-05, - "loss": 0.229, - "step": 6456 - }, - { - "epoch": 0.33, - "grad_norm": 0.9513827785300544, - "learning_rate": 1.568367073227749e-05, - "loss": 0.1857, - "step": 6457 - }, - { - "epoch": 0.33, - "grad_norm": 0.8963176883806334, - "learning_rate": 1.5682315570418043e-05, - "loss": 0.1848, - "step": 6458 - }, - { - "epoch": 0.33, - "grad_norm": 0.9336871285878903, - "learning_rate": 1.5680960254425467e-05, - "loss": 0.2199, - "step": 6459 - }, - { - "epoch": 0.33, - "grad_norm": 0.9301503405854965, - "learning_rate": 1.5679604784336516e-05, - "loss": 0.1947, - "step": 6460 - }, - { - "epoch": 0.33, - "grad_norm": 0.8430696671678639, - "learning_rate": 1.567824916018797e-05, - "loss": 0.2281, - "step": 6461 - }, - { - "epoch": 0.33, - "grad_norm": 1.0779499877725116, - "learning_rate": 1.567689338201659e-05, - "loss": 0.2232, - "step": 6462 - }, - { - "epoch": 0.33, - "grad_norm": 0.9323416339337731, - "learning_rate": 1.5675537449859158e-05, - "loss": 0.2095, - "step": 6463 - }, - { - "epoch": 0.33, - "grad_norm": 1.1019950763615698, - "learning_rate": 1.5674181363752452e-05, - "loss": 0.1894, - "step": 6464 - }, - { - "epoch": 0.33, - "grad_norm": 1.2348306315579887, - "learning_rate": 1.5672825123733257e-05, - "loss": 0.1921, - "step": 6465 - }, - { - "epoch": 0.33, - "grad_norm": 1.0231332496881906, - "learning_rate": 1.567146872983836e-05, - "loss": 0.1972, - "step": 6466 - }, - { - "epoch": 0.33, - "grad_norm": 2.220781301600539, - "learning_rate": 1.5670112182104552e-05, - "loss": 0.1978, - "step": 6467 - }, - { - "epoch": 0.33, - "grad_norm": 1.1006520137959355, - "learning_rate": 1.566875548056863e-05, - "loss": 0.2557, - "step": 6468 - }, - { - "epoch": 0.33, - "grad_norm": 1.0133967400662975, - "learning_rate": 1.5667398625267402e-05, - "loss": 0.1999, - "step": 6469 - }, - { - "epoch": 0.33, - "grad_norm": 0.9847310910093158, - "learning_rate": 1.566604161623766e-05, - "loss": 0.1787, - "step": 6470 - }, - { - "epoch": 0.33, - "grad_norm": 1.3193309942941929, - "learning_rate": 1.5664684453516218e-05, - "loss": 0.2028, - "step": 6471 - }, - { - "epoch": 0.33, - "grad_norm": 1.0438169210754187, - "learning_rate": 1.5663327137139893e-05, - "loss": 0.1959, - "step": 6472 - }, - { - "epoch": 0.33, - "grad_norm": 0.9198281649456738, - "learning_rate": 1.56619696671455e-05, - "loss": 0.2137, - "step": 6473 - }, - { - "epoch": 0.33, - "grad_norm": 1.3123289776496023, - "learning_rate": 1.5660612043569864e-05, - "loss": 0.2134, - "step": 6474 - }, - { - "epoch": 0.33, - "grad_norm": 0.9629029433242554, - "learning_rate": 1.56592542664498e-05, - "loss": 0.1986, - "step": 6475 - }, - { - "epoch": 0.33, - "grad_norm": 1.0251036113371328, - "learning_rate": 1.5657896335822147e-05, - "loss": 0.1771, - "step": 6476 - }, - { - "epoch": 0.33, - "grad_norm": 0.8869554671856225, - "learning_rate": 1.5656538251723734e-05, - "loss": 0.1902, - "step": 6477 - }, - { - "epoch": 0.33, - "grad_norm": 1.3756739947044674, - "learning_rate": 1.5655180014191404e-05, - "loss": 0.2119, - "step": 6478 - }, - { - "epoch": 0.33, - "grad_norm": 0.9633315984036738, - "learning_rate": 1.5653821623261998e-05, - "loss": 0.2, - "step": 6479 - }, - { - "epoch": 0.33, - "grad_norm": 9.251828938015827, - "learning_rate": 1.565246307897236e-05, - "loss": 0.203, - "step": 6480 - }, - { - "epoch": 0.33, - "grad_norm": 1.14068294685859, - "learning_rate": 1.565110438135934e-05, - "loss": 0.191, - "step": 6481 - }, - { - "epoch": 0.33, - "grad_norm": 1.5452138562508575, - "learning_rate": 1.5649745530459794e-05, - "loss": 0.1995, - "step": 6482 - }, - { - "epoch": 0.33, - "grad_norm": 1.0536420677133722, - "learning_rate": 1.5648386526310582e-05, - "loss": 0.1993, - "step": 6483 - }, - { - "epoch": 0.33, - "grad_norm": 1.1356216475216754, - "learning_rate": 1.564702736894857e-05, - "loss": 0.2186, - "step": 6484 - }, - { - "epoch": 0.33, - "grad_norm": 0.8532781011175651, - "learning_rate": 1.5645668058410617e-05, - "loss": 0.2082, - "step": 6485 - }, - { - "epoch": 0.33, - "grad_norm": 0.8455231181313126, - "learning_rate": 1.56443085947336e-05, - "loss": 0.1931, - "step": 6486 - }, - { - "epoch": 0.33, - "grad_norm": 1.2629784905997012, - "learning_rate": 1.5642948977954395e-05, - "loss": 0.2528, - "step": 6487 - }, - { - "epoch": 0.33, - "grad_norm": 1.2047458771536588, - "learning_rate": 1.564158920810988e-05, - "loss": 0.2098, - "step": 6488 - }, - { - "epoch": 0.33, - "grad_norm": 1.154183894969296, - "learning_rate": 1.5640229285236938e-05, - "loss": 0.2142, - "step": 6489 - }, - { - "epoch": 0.33, - "grad_norm": 1.4439578060813603, - "learning_rate": 1.563886920937246e-05, - "loss": 0.1939, - "step": 6490 - }, - { - "epoch": 0.33, - "grad_norm": 1.1269334348934161, - "learning_rate": 1.5637508980553335e-05, - "loss": 0.202, - "step": 6491 - }, - { - "epoch": 0.33, - "grad_norm": 0.8474971196663091, - "learning_rate": 1.563614859881646e-05, - "loss": 0.1944, - "step": 6492 - }, - { - "epoch": 0.33, - "grad_norm": 0.775445743159406, - "learning_rate": 1.5634788064198736e-05, - "loss": 0.1838, - "step": 6493 - }, - { - "epoch": 0.33, - "grad_norm": 0.8829878157374699, - "learning_rate": 1.5633427376737072e-05, - "loss": 0.2099, - "step": 6494 - }, - { - "epoch": 0.33, - "grad_norm": 1.388770427746395, - "learning_rate": 1.5632066536468367e-05, - "loss": 0.1908, - "step": 6495 - }, - { - "epoch": 0.33, - "grad_norm": 1.0099467639292705, - "learning_rate": 1.5630705543429542e-05, - "loss": 0.191, - "step": 6496 - }, - { - "epoch": 0.33, - "grad_norm": 0.9404162366537719, - "learning_rate": 1.5629344397657506e-05, - "loss": 0.194, - "step": 6497 - }, - { - "epoch": 0.33, - "grad_norm": 0.9446683305039197, - "learning_rate": 1.562798309918919e-05, - "loss": 0.1796, - "step": 6498 - }, - { - "epoch": 0.33, - "grad_norm": 0.9309802969044433, - "learning_rate": 1.5626621648061514e-05, - "loss": 0.202, - "step": 6499 - }, - { - "epoch": 0.33, - "grad_norm": 1.7207205399944414, - "learning_rate": 1.5625260044311405e-05, - "loss": 0.2094, - "step": 6500 - }, - { - "epoch": 0.33, - "grad_norm": 1.8054028028686318, - "learning_rate": 1.5623898287975806e-05, - "loss": 0.204, - "step": 6501 - }, - { - "epoch": 0.33, - "grad_norm": 1.4060494249650275, - "learning_rate": 1.562253637909164e-05, - "loss": 0.194, - "step": 6502 - }, - { - "epoch": 0.33, - "grad_norm": 0.8105266496711662, - "learning_rate": 1.5621174317695862e-05, - "loss": 0.2025, - "step": 6503 - }, - { - "epoch": 0.33, - "grad_norm": 0.8944536557109091, - "learning_rate": 1.561981210382541e-05, - "loss": 0.2159, - "step": 6504 - }, - { - "epoch": 0.33, - "grad_norm": 0.7844477751552985, - "learning_rate": 1.5618449737517242e-05, - "loss": 0.2015, - "step": 6505 - }, - { - "epoch": 0.33, - "grad_norm": 0.8709928078499433, - "learning_rate": 1.5617087218808307e-05, - "loss": 0.1814, - "step": 6506 - }, - { - "epoch": 0.33, - "grad_norm": 1.3657649463539294, - "learning_rate": 1.5615724547735562e-05, - "loss": 0.2111, - "step": 6507 - }, - { - "epoch": 0.33, - "grad_norm": 1.3173488373145754, - "learning_rate": 1.561436172433597e-05, - "loss": 0.1981, - "step": 6508 - }, - { - "epoch": 0.33, - "grad_norm": 1.078990856080944, - "learning_rate": 1.56129987486465e-05, - "loss": 0.2151, - "step": 6509 - }, - { - "epoch": 0.33, - "grad_norm": 0.8717188418025383, - "learning_rate": 1.5611635620704128e-05, - "loss": 0.1983, - "step": 6510 - }, - { - "epoch": 0.33, - "grad_norm": 0.8053767630345547, - "learning_rate": 1.5610272340545814e-05, - "loss": 0.1947, - "step": 6511 - }, - { - "epoch": 0.33, - "grad_norm": 0.842561014891099, - "learning_rate": 1.560890890820855e-05, - "loss": 0.1883, - "step": 6512 - }, - { - "epoch": 0.33, - "grad_norm": 1.1991401747191601, - "learning_rate": 1.5607545323729313e-05, - "loss": 0.1756, - "step": 6513 - }, - { - "epoch": 0.33, - "grad_norm": 1.059056198298021, - "learning_rate": 1.5606181587145097e-05, - "loss": 0.2366, - "step": 6514 - }, - { - "epoch": 0.33, - "grad_norm": 1.7460856086533205, - "learning_rate": 1.5604817698492886e-05, - "loss": 0.2369, - "step": 6515 - }, - { - "epoch": 0.33, - "grad_norm": 1.0681687279055996, - "learning_rate": 1.560345365780968e-05, - "loss": 0.2114, - "step": 6516 - }, - { - "epoch": 0.33, - "grad_norm": 1.240767782666872, - "learning_rate": 1.5602089465132474e-05, - "loss": 0.197, - "step": 6517 - }, - { - "epoch": 0.33, - "grad_norm": 1.6283348417848393, - "learning_rate": 1.5600725120498273e-05, - "loss": 0.1867, - "step": 6518 - }, - { - "epoch": 0.33, - "grad_norm": 0.8753049390892731, - "learning_rate": 1.5599360623944092e-05, - "loss": 0.198, - "step": 6519 - }, - { - "epoch": 0.33, - "grad_norm": 0.8253790374894722, - "learning_rate": 1.5597995975506936e-05, - "loss": 0.2095, - "step": 6520 - }, - { - "epoch": 0.33, - "grad_norm": 1.679152258436176, - "learning_rate": 1.5596631175223823e-05, - "loss": 0.2221, - "step": 6521 - }, - { - "epoch": 0.33, - "grad_norm": 1.363989379907467, - "learning_rate": 1.559526622313177e-05, - "loss": 0.1862, - "step": 6522 - }, - { - "epoch": 0.33, - "grad_norm": 5.99462758090583, - "learning_rate": 1.559390111926781e-05, - "loss": 0.2021, - "step": 6523 - }, - { - "epoch": 0.33, - "grad_norm": 1.105152738508639, - "learning_rate": 1.559253586366896e-05, - "loss": 0.2166, - "step": 6524 - }, - { - "epoch": 0.33, - "grad_norm": 0.8798114773050834, - "learning_rate": 1.5591170456372264e-05, - "loss": 0.2005, - "step": 6525 - }, - { - "epoch": 0.33, - "grad_norm": 1.2415787362049853, - "learning_rate": 1.5589804897414757e-05, - "loss": 0.2013, - "step": 6526 - }, - { - "epoch": 0.33, - "grad_norm": 1.1061386725225641, - "learning_rate": 1.5588439186833467e-05, - "loss": 0.2422, - "step": 6527 - }, - { - "epoch": 0.33, - "grad_norm": 0.7700214560139843, - "learning_rate": 1.5587073324665457e-05, - "loss": 0.197, - "step": 6528 - }, - { - "epoch": 0.33, - "grad_norm": 1.3002606779597166, - "learning_rate": 1.558570731094776e-05, - "loss": 0.2082, - "step": 6529 - }, - { - "epoch": 0.33, - "grad_norm": 1.120351330109009, - "learning_rate": 1.558434114571744e-05, - "loss": 0.202, - "step": 6530 - }, - { - "epoch": 0.33, - "grad_norm": 1.4396268572009103, - "learning_rate": 1.558297482901155e-05, - "loss": 0.2189, - "step": 6531 - }, - { - "epoch": 0.33, - "grad_norm": 1.0460960319227817, - "learning_rate": 1.5581608360867154e-05, - "loss": 0.217, - "step": 6532 - }, - { - "epoch": 0.33, - "grad_norm": 1.0874166588094762, - "learning_rate": 1.5580241741321317e-05, - "loss": 0.2019, - "step": 6533 - }, - { - "epoch": 0.33, - "grad_norm": 1.1894921553008606, - "learning_rate": 1.5578874970411105e-05, - "loss": 0.1977, - "step": 6534 - }, - { - "epoch": 0.33, - "grad_norm": 0.9151050298336578, - "learning_rate": 1.5577508048173596e-05, - "loss": 0.1988, - "step": 6535 - }, - { - "epoch": 0.33, - "grad_norm": 1.8028004085361633, - "learning_rate": 1.5576140974645868e-05, - "loss": 0.2192, - "step": 6536 - }, - { - "epoch": 0.33, - "grad_norm": 1.0884779279303818, - "learning_rate": 1.5574773749865e-05, - "loss": 0.2029, - "step": 6537 - }, - { - "epoch": 0.33, - "grad_norm": 0.8143331503528025, - "learning_rate": 1.5573406373868077e-05, - "loss": 0.2073, - "step": 6538 - }, - { - "epoch": 0.33, - "grad_norm": 1.0414387222930812, - "learning_rate": 1.5572038846692193e-05, - "loss": 0.1941, - "step": 6539 - }, - { - "epoch": 0.33, - "grad_norm": 1.4755430457912222, - "learning_rate": 1.557067116837444e-05, - "loss": 0.1945, - "step": 6540 - }, - { - "epoch": 0.33, - "grad_norm": 1.063393639956194, - "learning_rate": 1.5569303338951914e-05, - "loss": 0.2216, - "step": 6541 - }, - { - "epoch": 0.33, - "grad_norm": 3.354543793682284, - "learning_rate": 1.5567935358461724e-05, - "loss": 0.22, - "step": 6542 - }, - { - "epoch": 0.33, - "grad_norm": 1.1240307573927577, - "learning_rate": 1.5566567226940974e-05, - "loss": 0.2162, - "step": 6543 - }, - { - "epoch": 0.33, - "grad_norm": 0.8625214215267929, - "learning_rate": 1.556519894442677e-05, - "loss": 0.207, - "step": 6544 - }, - { - "epoch": 0.33, - "grad_norm": 0.9190977071427817, - "learning_rate": 1.5563830510956234e-05, - "loss": 0.1947, - "step": 6545 - }, - { - "epoch": 0.33, - "grad_norm": 1.7671693867274025, - "learning_rate": 1.556246192656648e-05, - "loss": 0.1928, - "step": 6546 - }, - { - "epoch": 0.33, - "grad_norm": 0.9212945113465504, - "learning_rate": 1.556109319129463e-05, - "loss": 0.2034, - "step": 6547 - }, - { - "epoch": 0.33, - "grad_norm": 0.7883832295181457, - "learning_rate": 1.5559724305177814e-05, - "loss": 0.217, - "step": 6548 - }, - { - "epoch": 0.33, - "grad_norm": 1.5995930916405159, - "learning_rate": 1.5558355268253166e-05, - "loss": 0.1975, - "step": 6549 - }, - { - "epoch": 0.33, - "grad_norm": 0.8519660979050935, - "learning_rate": 1.555698608055781e-05, - "loss": 0.195, - "step": 6550 - }, - { - "epoch": 0.33, - "grad_norm": 1.1385807258455753, - "learning_rate": 1.5555616742128897e-05, - "loss": 0.1953, - "step": 6551 - }, - { - "epoch": 0.33, - "grad_norm": 0.8376325527210272, - "learning_rate": 1.5554247253003567e-05, - "loss": 0.1755, - "step": 6552 - }, - { - "epoch": 0.33, - "grad_norm": 0.790951262306196, - "learning_rate": 1.5552877613218964e-05, - "loss": 0.1992, - "step": 6553 - }, - { - "epoch": 0.33, - "grad_norm": 1.0898402865456223, - "learning_rate": 1.555150782281224e-05, - "loss": 0.1971, - "step": 6554 - }, - { - "epoch": 0.33, - "grad_norm": 1.0067994486256289, - "learning_rate": 1.555013788182056e-05, - "loss": 0.2048, - "step": 6555 - }, - { - "epoch": 0.33, - "grad_norm": 1.205884701347545, - "learning_rate": 1.554876779028107e-05, - "loss": 0.2196, - "step": 6556 - }, - { - "epoch": 0.33, - "grad_norm": 1.322602602161279, - "learning_rate": 1.5547397548230943e-05, - "loss": 0.2039, - "step": 6557 - }, - { - "epoch": 0.33, - "grad_norm": 0.7720006927722426, - "learning_rate": 1.554602715570735e-05, - "loss": 0.186, - "step": 6558 - }, - { - "epoch": 0.33, - "grad_norm": 1.0980304504859055, - "learning_rate": 1.554465661274745e-05, - "loss": 0.2072, - "step": 6559 - }, - { - "epoch": 0.33, - "grad_norm": 1.0093278518246684, - "learning_rate": 1.5543285919388426e-05, - "loss": 0.2197, - "step": 6560 - }, - { - "epoch": 0.33, - "grad_norm": 1.2019674945560757, - "learning_rate": 1.554191507566746e-05, - "loss": 0.1965, - "step": 6561 - }, - { - "epoch": 0.33, - "grad_norm": 0.9670683442046764, - "learning_rate": 1.5540544081621736e-05, - "loss": 0.2288, - "step": 6562 - }, - { - "epoch": 0.33, - "grad_norm": 0.949605601109131, - "learning_rate": 1.5539172937288437e-05, - "loss": 0.1993, - "step": 6563 - }, - { - "epoch": 0.33, - "grad_norm": 0.9442795776353967, - "learning_rate": 1.5537801642704763e-05, - "loss": 0.2031, - "step": 6564 - }, - { - "epoch": 0.33, - "grad_norm": 1.0005769850834774, - "learning_rate": 1.5536430197907904e-05, - "loss": 0.2164, - "step": 6565 - }, - { - "epoch": 0.33, - "grad_norm": 0.9143813378018446, - "learning_rate": 1.5535058602935065e-05, - "loss": 0.1804, - "step": 6566 - }, - { - "epoch": 0.33, - "grad_norm": 0.8805621406735882, - "learning_rate": 1.5533686857823447e-05, - "loss": 0.2052, - "step": 6567 - }, - { - "epoch": 0.33, - "grad_norm": 0.9730532725900689, - "learning_rate": 1.5532314962610263e-05, - "loss": 0.2081, - "step": 6568 - }, - { - "epoch": 0.33, - "grad_norm": 1.1364802182104736, - "learning_rate": 1.553094291733272e-05, - "loss": 0.2015, - "step": 6569 - }, - { - "epoch": 0.33, - "grad_norm": 0.95822533473699, - "learning_rate": 1.552957072202804e-05, - "loss": 0.2093, - "step": 6570 - }, - { - "epoch": 0.33, - "grad_norm": 1.3920492379804228, - "learning_rate": 1.5528198376733444e-05, - "loss": 0.1986, - "step": 6571 - }, - { - "epoch": 0.33, - "grad_norm": 0.8671365676659566, - "learning_rate": 1.552682588148615e-05, - "loss": 0.2204, - "step": 6572 - }, - { - "epoch": 0.33, - "grad_norm": 0.8180428446337018, - "learning_rate": 1.5525453236323396e-05, - "loss": 0.1759, - "step": 6573 - }, - { - "epoch": 0.33, - "grad_norm": 1.0814408848789787, - "learning_rate": 1.5524080441282408e-05, - "loss": 0.1984, - "step": 6574 - }, - { - "epoch": 0.33, - "grad_norm": 0.8779769686721743, - "learning_rate": 1.5522707496400425e-05, - "loss": 0.1806, - "step": 6575 - }, - { - "epoch": 0.33, - "grad_norm": 0.9724116612781548, - "learning_rate": 1.5521334401714692e-05, - "loss": 0.1971, - "step": 6576 - }, - { - "epoch": 0.33, - "grad_norm": 0.916346755900272, - "learning_rate": 1.551996115726245e-05, - "loss": 0.1832, - "step": 6577 - }, - { - "epoch": 0.33, - "grad_norm": 1.2003326335004167, - "learning_rate": 1.5518587763080956e-05, - "loss": 0.2176, - "step": 6578 - }, - { - "epoch": 0.33, - "grad_norm": 1.1705118254184124, - "learning_rate": 1.551721421920745e-05, - "loss": 0.194, - "step": 6579 - }, - { - "epoch": 0.33, - "grad_norm": 1.0446029535231542, - "learning_rate": 1.55158405256792e-05, - "loss": 0.2208, - "step": 6580 - }, - { - "epoch": 0.33, - "grad_norm": 1.0562633998936752, - "learning_rate": 1.551446668253346e-05, - "loss": 0.2092, - "step": 6581 - }, - { - "epoch": 0.33, - "grad_norm": 0.8909198049009172, - "learning_rate": 1.5513092689807505e-05, - "loss": 0.2235, - "step": 6582 - }, - { - "epoch": 0.33, - "grad_norm": 1.1344678807289725, - "learning_rate": 1.5511718547538596e-05, - "loss": 0.1994, - "step": 6583 - }, - { - "epoch": 0.33, - "grad_norm": 1.1833341979307461, - "learning_rate": 1.551034425576401e-05, - "loss": 0.1932, - "step": 6584 - }, - { - "epoch": 0.33, - "grad_norm": 0.9294885641880941, - "learning_rate": 1.5508969814521026e-05, - "loss": 0.2624, - "step": 6585 - }, - { - "epoch": 0.33, - "grad_norm": 0.9262316728429739, - "learning_rate": 1.550759522384693e-05, - "loss": 0.2206, - "step": 6586 - }, - { - "epoch": 0.33, - "grad_norm": 0.793394516959776, - "learning_rate": 1.5506220483778994e-05, - "loss": 0.1744, - "step": 6587 - }, - { - "epoch": 0.34, - "grad_norm": 1.0311135703428027, - "learning_rate": 1.550484559435452e-05, - "loss": 0.2464, - "step": 6588 - }, - { - "epoch": 0.34, - "grad_norm": 1.1685635601265862, - "learning_rate": 1.5503470555610797e-05, - "loss": 0.2192, - "step": 6589 - }, - { - "epoch": 0.34, - "grad_norm": 1.2425139947073716, - "learning_rate": 1.5502095367585124e-05, - "loss": 0.2053, - "step": 6590 - }, - { - "epoch": 0.34, - "grad_norm": 1.1825964827556712, - "learning_rate": 1.5500720030314805e-05, - "loss": 0.2195, - "step": 6591 - }, - { - "epoch": 0.34, - "grad_norm": 1.0157324689975242, - "learning_rate": 1.5499344543837144e-05, - "loss": 0.2074, - "step": 6592 - }, - { - "epoch": 0.34, - "grad_norm": 1.1504316423024223, - "learning_rate": 1.549796890818945e-05, - "loss": 0.2221, - "step": 6593 - }, - { - "epoch": 0.34, - "grad_norm": 1.1223861108775626, - "learning_rate": 1.5496593123409042e-05, - "loss": 0.1861, - "step": 6594 - }, - { - "epoch": 0.34, - "grad_norm": 1.5017109310443328, - "learning_rate": 1.549521718953323e-05, - "loss": 0.1979, - "step": 6595 - }, - { - "epoch": 0.34, - "grad_norm": 0.9587162726326114, - "learning_rate": 1.549384110659935e-05, - "loss": 0.2071, - "step": 6596 - }, - { - "epoch": 0.34, - "grad_norm": 1.4159051199900652, - "learning_rate": 1.5492464874644713e-05, - "loss": 0.2177, - "step": 6597 - }, - { - "epoch": 0.34, - "grad_norm": 1.468995929268174, - "learning_rate": 1.5491088493706657e-05, - "loss": 0.2205, - "step": 6598 - }, - { - "epoch": 0.34, - "grad_norm": 1.1113322064440028, - "learning_rate": 1.548971196382252e-05, - "loss": 0.1878, - "step": 6599 - }, - { - "epoch": 0.34, - "grad_norm": 1.1024645553263197, - "learning_rate": 1.548833528502963e-05, - "loss": 0.2177, - "step": 6600 - }, - { - "epoch": 0.34, - "grad_norm": 0.9146618497170281, - "learning_rate": 1.5486958457365338e-05, - "loss": 0.1983, - "step": 6601 - }, - { - "epoch": 0.34, - "grad_norm": 1.004880063563026, - "learning_rate": 1.5485581480866985e-05, - "loss": 0.1895, - "step": 6602 - }, - { - "epoch": 0.34, - "grad_norm": 0.8620626063742314, - "learning_rate": 1.5484204355571927e-05, - "loss": 0.1999, - "step": 6603 - }, - { - "epoch": 0.34, - "grad_norm": 1.6230596374204962, - "learning_rate": 1.5482827081517516e-05, - "loss": 0.2073, - "step": 6604 - }, - { - "epoch": 0.34, - "grad_norm": 0.8772018169236586, - "learning_rate": 1.5481449658741112e-05, - "loss": 0.1892, - "step": 6605 - }, - { - "epoch": 0.34, - "grad_norm": 0.9995458502864405, - "learning_rate": 1.5480072087280075e-05, - "loss": 0.1928, - "step": 6606 - }, - { - "epoch": 0.34, - "grad_norm": 1.1101682764237155, - "learning_rate": 1.5478694367171772e-05, - "loss": 0.1975, - "step": 6607 - }, - { - "epoch": 0.34, - "grad_norm": 1.1474296693817685, - "learning_rate": 1.547731649845358e-05, - "loss": 0.2093, - "step": 6608 - }, - { - "epoch": 0.34, - "grad_norm": 1.0708314496289437, - "learning_rate": 1.5475938481162862e-05, - "loss": 0.215, - "step": 6609 - }, - { - "epoch": 0.34, - "grad_norm": 1.4647452705106907, - "learning_rate": 1.5474560315337007e-05, - "loss": 0.2148, - "step": 6610 - }, - { - "epoch": 0.34, - "grad_norm": 1.0394882698957975, - "learning_rate": 1.5473182001013394e-05, - "loss": 0.18, - "step": 6611 - }, - { - "epoch": 0.34, - "grad_norm": 1.177655440718952, - "learning_rate": 1.547180353822941e-05, - "loss": 0.204, - "step": 6612 - }, - { - "epoch": 0.34, - "grad_norm": 1.6098661456952317, - "learning_rate": 1.5470424927022442e-05, - "loss": 0.2157, - "step": 6613 - }, - { - "epoch": 0.34, - "grad_norm": 1.4013179662489288, - "learning_rate": 1.5469046167429895e-05, - "loss": 0.1768, - "step": 6614 - }, - { - "epoch": 0.34, - "grad_norm": 0.8876557891047067, - "learning_rate": 1.5467667259489157e-05, - "loss": 0.1968, - "step": 6615 - }, - { - "epoch": 0.34, - "grad_norm": 0.7364338213122036, - "learning_rate": 1.546628820323764e-05, - "loss": 0.1951, - "step": 6616 - }, - { - "epoch": 0.34, - "grad_norm": 0.8233505032421118, - "learning_rate": 1.5464908998712743e-05, - "loss": 0.2177, - "step": 6617 - }, - { - "epoch": 0.34, - "grad_norm": 0.7988196118879016, - "learning_rate": 1.5463529645951884e-05, - "loss": 0.1774, - "step": 6618 - }, - { - "epoch": 0.34, - "grad_norm": 1.7863804154493699, - "learning_rate": 1.5462150144992473e-05, - "loss": 0.2375, - "step": 6619 - }, - { - "epoch": 0.34, - "grad_norm": 1.1876939499505215, - "learning_rate": 1.546077049587193e-05, - "loss": 0.1965, - "step": 6620 - }, - { - "epoch": 0.34, - "grad_norm": 0.7738270979735855, - "learning_rate": 1.545939069862768e-05, - "loss": 0.2106, - "step": 6621 - }, - { - "epoch": 0.34, - "grad_norm": 0.8918711817915994, - "learning_rate": 1.545801075329715e-05, - "loss": 0.1929, - "step": 6622 - }, - { - "epoch": 0.34, - "grad_norm": 0.9377869351034032, - "learning_rate": 1.5456630659917768e-05, - "loss": 0.1894, - "step": 6623 - }, - { - "epoch": 0.34, - "grad_norm": 0.8214995234734142, - "learning_rate": 1.5455250418526976e-05, - "loss": 0.2025, - "step": 6624 - }, - { - "epoch": 0.34, - "grad_norm": 1.9899732290557761, - "learning_rate": 1.5453870029162202e-05, - "loss": 0.1985, - "step": 6625 - }, - { - "epoch": 0.34, - "grad_norm": 1.0791010808495334, - "learning_rate": 1.5452489491860897e-05, - "loss": 0.2171, - "step": 6626 - }, - { - "epoch": 0.34, - "grad_norm": 0.8035978835832673, - "learning_rate": 1.5451108806660508e-05, - "loss": 0.1964, - "step": 6627 - }, - { - "epoch": 0.34, - "grad_norm": 0.8693180246456279, - "learning_rate": 1.5449727973598487e-05, - "loss": 0.1976, - "step": 6628 - }, - { - "epoch": 0.34, - "grad_norm": 0.8965073835730478, - "learning_rate": 1.544834699271228e-05, - "loss": 0.2127, - "step": 6629 - }, - { - "epoch": 0.34, - "grad_norm": 1.1466026109131418, - "learning_rate": 1.5446965864039357e-05, - "loss": 0.1774, - "step": 6630 - }, - { - "epoch": 0.34, - "grad_norm": 0.9670915922502352, - "learning_rate": 1.544558458761718e-05, - "loss": 0.2116, - "step": 6631 - }, - { - "epoch": 0.34, - "grad_norm": 1.3603515418339192, - "learning_rate": 1.5444203163483212e-05, - "loss": 0.2025, - "step": 6632 - }, - { - "epoch": 0.34, - "grad_norm": 0.9222393157039742, - "learning_rate": 1.544282159167492e-05, - "loss": 0.2049, - "step": 6633 - }, - { - "epoch": 0.34, - "grad_norm": 0.9098251873140276, - "learning_rate": 1.5441439872229793e-05, - "loss": 0.1982, - "step": 6634 - }, - { - "epoch": 0.34, - "grad_norm": 1.0500483855202372, - "learning_rate": 1.5440058005185295e-05, - "loss": 0.219, - "step": 6635 - }, - { - "epoch": 0.34, - "grad_norm": 0.9415410885951533, - "learning_rate": 1.5438675990578923e-05, - "loss": 0.1917, - "step": 6636 - }, - { - "epoch": 0.34, - "grad_norm": 0.9378779126221987, - "learning_rate": 1.5437293828448153e-05, - "loss": 0.2228, - "step": 6637 - }, - { - "epoch": 0.34, - "grad_norm": 0.9004931303198556, - "learning_rate": 1.5435911518830485e-05, - "loss": 0.2029, - "step": 6638 - }, - { - "epoch": 0.34, - "grad_norm": 2.782645150359459, - "learning_rate": 1.5434529061763405e-05, - "loss": 0.1996, - "step": 6639 - }, - { - "epoch": 0.34, - "grad_norm": 1.004324869305781, - "learning_rate": 1.543314645728442e-05, - "loss": 0.2044, - "step": 6640 - }, - { - "epoch": 0.34, - "grad_norm": 1.6039110715845524, - "learning_rate": 1.543176370543103e-05, - "loss": 0.2098, - "step": 6641 - }, - { - "epoch": 0.34, - "grad_norm": 1.2148324779559043, - "learning_rate": 1.5430380806240744e-05, - "loss": 0.2267, - "step": 6642 - }, - { - "epoch": 0.34, - "grad_norm": 0.9685722004536197, - "learning_rate": 1.5428997759751073e-05, - "loss": 0.1986, - "step": 6643 - }, - { - "epoch": 0.34, - "grad_norm": 1.4287506123498237, - "learning_rate": 1.5427614565999527e-05, - "loss": 0.1698, - "step": 6644 - }, - { - "epoch": 0.34, - "grad_norm": 1.2560403408390468, - "learning_rate": 1.542623122502363e-05, - "loss": 0.2371, - "step": 6645 - }, - { - "epoch": 0.34, - "grad_norm": 1.493029626176336, - "learning_rate": 1.5424847736860907e-05, - "loss": 0.2216, - "step": 6646 - }, - { - "epoch": 0.34, - "grad_norm": 1.502436700329338, - "learning_rate": 1.5423464101548883e-05, - "loss": 0.217, - "step": 6647 - }, - { - "epoch": 0.34, - "grad_norm": 1.7559959460499155, - "learning_rate": 1.5422080319125085e-05, - "loss": 0.2062, - "step": 6648 - }, - { - "epoch": 0.34, - "grad_norm": 1.0631140913339923, - "learning_rate": 1.5420696389627057e-05, - "loss": 0.2386, - "step": 6649 - }, - { - "epoch": 0.34, - "grad_norm": 1.224313506950377, - "learning_rate": 1.5419312313092328e-05, - "loss": 0.1964, - "step": 6650 - }, - { - "epoch": 0.34, - "grad_norm": 1.3134515484754445, - "learning_rate": 1.541792808955845e-05, - "loss": 0.1774, - "step": 6651 - }, - { - "epoch": 0.34, - "grad_norm": 0.9334696811383187, - "learning_rate": 1.5416543719062967e-05, - "loss": 0.1961, - "step": 6652 - }, - { - "epoch": 0.34, - "grad_norm": 5.177362982763627, - "learning_rate": 1.541515920164343e-05, - "loss": 0.2205, - "step": 6653 - }, - { - "epoch": 0.34, - "grad_norm": 1.2813984057128454, - "learning_rate": 1.541377453733739e-05, - "loss": 0.1877, - "step": 6654 - }, - { - "epoch": 0.34, - "grad_norm": 0.8656007817415847, - "learning_rate": 1.541238972618241e-05, - "loss": 0.1998, - "step": 6655 - }, - { - "epoch": 0.34, - "grad_norm": 1.0367298136556913, - "learning_rate": 1.541100476821606e-05, - "loss": 0.2136, - "step": 6656 - }, - { - "epoch": 0.34, - "grad_norm": 0.9975140810289721, - "learning_rate": 1.5409619663475894e-05, - "loss": 0.2154, - "step": 6657 - }, - { - "epoch": 0.34, - "grad_norm": 1.2458834798559473, - "learning_rate": 1.540823441199949e-05, - "loss": 0.193, - "step": 6658 - }, - { - "epoch": 0.34, - "grad_norm": 1.5142136985614558, - "learning_rate": 1.540684901382442e-05, - "loss": 0.2157, - "step": 6659 - }, - { - "epoch": 0.34, - "grad_norm": 0.8924500791959433, - "learning_rate": 1.540546346898827e-05, - "loss": 0.2003, - "step": 6660 - }, - { - "epoch": 0.34, - "grad_norm": 0.9653697735047038, - "learning_rate": 1.5404077777528613e-05, - "loss": 0.2131, - "step": 6661 - }, - { - "epoch": 0.34, - "grad_norm": 0.8638110891762141, - "learning_rate": 1.5402691939483046e-05, - "loss": 0.1747, - "step": 6662 - }, - { - "epoch": 0.34, - "grad_norm": 0.9144960253692133, - "learning_rate": 1.540130595488915e-05, - "loss": 0.2166, - "step": 6663 - }, - { - "epoch": 0.34, - "grad_norm": 0.735671035910116, - "learning_rate": 1.539991982378453e-05, - "loss": 0.1921, - "step": 6664 - }, - { - "epoch": 0.34, - "grad_norm": 1.2657061752905912, - "learning_rate": 1.539853354620678e-05, - "loss": 0.2117, - "step": 6665 - }, - { - "epoch": 0.34, - "grad_norm": 1.283712998444193, - "learning_rate": 1.53971471221935e-05, - "loss": 0.179, - "step": 6666 - }, - { - "epoch": 0.34, - "grad_norm": 0.8989145391276644, - "learning_rate": 1.53957605517823e-05, - "loss": 0.1903, - "step": 6667 - }, - { - "epoch": 0.34, - "grad_norm": 1.0048519214602194, - "learning_rate": 1.539437383501079e-05, - "loss": 0.2057, - "step": 6668 - }, - { - "epoch": 0.34, - "grad_norm": 1.0132983115014504, - "learning_rate": 1.5392986971916583e-05, - "loss": 0.2205, - "step": 6669 - }, - { - "epoch": 0.34, - "grad_norm": 1.4896675971356934, - "learning_rate": 1.53915999625373e-05, - "loss": 0.1955, - "step": 6670 - }, - { - "epoch": 0.34, - "grad_norm": 1.5033619249115908, - "learning_rate": 1.539021280691057e-05, - "loss": 0.2164, - "step": 6671 - }, - { - "epoch": 0.34, - "grad_norm": 1.4593355835166457, - "learning_rate": 1.5388825505074006e-05, - "loss": 0.2004, - "step": 6672 - }, - { - "epoch": 0.34, - "grad_norm": 1.148201487459412, - "learning_rate": 1.538743805706525e-05, - "loss": 0.1927, - "step": 6673 - }, - { - "epoch": 0.34, - "grad_norm": 1.6333105220118933, - "learning_rate": 1.538605046292193e-05, - "loss": 0.2123, - "step": 6674 - }, - { - "epoch": 0.34, - "grad_norm": 0.9890080278145551, - "learning_rate": 1.5384662722681688e-05, - "loss": 0.1891, - "step": 6675 - }, - { - "epoch": 0.34, - "grad_norm": 0.9610666592755823, - "learning_rate": 1.5383274836382163e-05, - "loss": 0.1825, - "step": 6676 - }, - { - "epoch": 0.34, - "grad_norm": 1.0378885292526392, - "learning_rate": 1.5381886804061005e-05, - "loss": 0.2171, - "step": 6677 - }, - { - "epoch": 0.34, - "grad_norm": 1.1277837007123201, - "learning_rate": 1.5380498625755867e-05, - "loss": 0.2176, - "step": 6678 - }, - { - "epoch": 0.34, - "grad_norm": 1.0877352146653516, - "learning_rate": 1.5379110301504397e-05, - "loss": 0.2081, - "step": 6679 - }, - { - "epoch": 0.34, - "grad_norm": 1.2947756337756804, - "learning_rate": 1.5377721831344258e-05, - "loss": 0.2119, - "step": 6680 - }, - { - "epoch": 0.34, - "grad_norm": 1.117584689887842, - "learning_rate": 1.5376333215313106e-05, - "loss": 0.2149, - "step": 6681 - }, - { - "epoch": 0.34, - "grad_norm": 1.2078699493400893, - "learning_rate": 1.5374944453448617e-05, - "loss": 0.2057, - "step": 6682 - }, - { - "epoch": 0.34, - "grad_norm": 0.9548572667866763, - "learning_rate": 1.5373555545788456e-05, - "loss": 0.2049, - "step": 6683 - }, - { - "epoch": 0.34, - "grad_norm": 1.3772461895252333, - "learning_rate": 1.5372166492370297e-05, - "loss": 0.1893, - "step": 6684 - }, - { - "epoch": 0.34, - "grad_norm": 0.9630206874145618, - "learning_rate": 1.5370777293231814e-05, - "loss": 0.1958, - "step": 6685 - }, - { - "epoch": 0.34, - "grad_norm": 1.3183565853223211, - "learning_rate": 1.5369387948410695e-05, - "loss": 0.2091, - "step": 6686 - }, - { - "epoch": 0.34, - "grad_norm": 1.0363802125546604, - "learning_rate": 1.536799845794463e-05, - "loss": 0.241, - "step": 6687 - }, - { - "epoch": 0.34, - "grad_norm": 0.8656078936422779, - "learning_rate": 1.53666088218713e-05, - "loss": 0.1964, - "step": 6688 - }, - { - "epoch": 0.34, - "grad_norm": 0.9737685003439084, - "learning_rate": 1.5365219040228402e-05, - "loss": 0.1881, - "step": 6689 - }, - { - "epoch": 0.34, - "grad_norm": 1.0616642657522841, - "learning_rate": 1.5363829113053633e-05, - "loss": 0.237, - "step": 6690 - }, - { - "epoch": 0.34, - "grad_norm": 0.8708419272926546, - "learning_rate": 1.53624390403847e-05, - "loss": 0.1886, - "step": 6691 - }, - { - "epoch": 0.34, - "grad_norm": 1.1335578938991093, - "learning_rate": 1.5361048822259302e-05, - "loss": 0.2157, - "step": 6692 - }, - { - "epoch": 0.34, - "grad_norm": 1.326790601511181, - "learning_rate": 1.5359658458715158e-05, - "loss": 0.228, - "step": 6693 - }, - { - "epoch": 0.34, - "grad_norm": 0.9248727896429133, - "learning_rate": 1.5358267949789968e-05, - "loss": 0.2035, - "step": 6694 - }, - { - "epoch": 0.34, - "grad_norm": 1.2076276869173062, - "learning_rate": 1.535687729552146e-05, - "loss": 0.2129, - "step": 6695 - }, - { - "epoch": 0.34, - "grad_norm": 1.8942627148982174, - "learning_rate": 1.5355486495947353e-05, - "loss": 0.2178, - "step": 6696 - }, - { - "epoch": 0.34, - "grad_norm": 0.8807289461435734, - "learning_rate": 1.5354095551105374e-05, - "loss": 0.2192, - "step": 6697 - }, - { - "epoch": 0.34, - "grad_norm": 1.842682012974875, - "learning_rate": 1.5352704461033247e-05, - "loss": 0.203, - "step": 6698 - }, - { - "epoch": 0.34, - "grad_norm": 0.9057246496827717, - "learning_rate": 1.5351313225768713e-05, - "loss": 0.1882, - "step": 6699 - }, - { - "epoch": 0.34, - "grad_norm": 2.1617478367242047, - "learning_rate": 1.5349921845349504e-05, - "loss": 0.2036, - "step": 6700 - }, - { - "epoch": 0.34, - "grad_norm": 0.8422370104317792, - "learning_rate": 1.5348530319813365e-05, - "loss": 0.2045, - "step": 6701 - }, - { - "epoch": 0.34, - "grad_norm": 1.172859237684915, - "learning_rate": 1.5347138649198036e-05, - "loss": 0.1777, - "step": 6702 - }, - { - "epoch": 0.34, - "grad_norm": 0.851183073220529, - "learning_rate": 1.5345746833541268e-05, - "loss": 0.1951, - "step": 6703 - }, - { - "epoch": 0.34, - "grad_norm": 1.3893811227479087, - "learning_rate": 1.5344354872880817e-05, - "loss": 0.1876, - "step": 6704 - }, - { - "epoch": 0.34, - "grad_norm": 1.2961449575191275, - "learning_rate": 1.534296276725444e-05, - "loss": 0.2221, - "step": 6705 - }, - { - "epoch": 0.34, - "grad_norm": 1.471827649921739, - "learning_rate": 1.5341570516699893e-05, - "loss": 0.1939, - "step": 6706 - }, - { - "epoch": 0.34, - "grad_norm": 0.8421797836882626, - "learning_rate": 1.5340178121254944e-05, - "loss": 0.2059, - "step": 6707 - }, - { - "epoch": 0.34, - "grad_norm": 0.968441843112463, - "learning_rate": 1.5338785580957366e-05, - "loss": 0.2059, - "step": 6708 - }, - { - "epoch": 0.34, - "grad_norm": 1.1233535791260196, - "learning_rate": 1.5337392895844923e-05, - "loss": 0.2172, - "step": 6709 - }, - { - "epoch": 0.34, - "grad_norm": 0.8078361898244383, - "learning_rate": 1.53360000659554e-05, - "loss": 0.1763, - "step": 6710 - }, - { - "epoch": 0.34, - "grad_norm": 0.7165845447840065, - "learning_rate": 1.533460709132657e-05, - "loss": 0.1835, - "step": 6711 - }, - { - "epoch": 0.34, - "grad_norm": 3.5322930348415293, - "learning_rate": 1.5333213971996223e-05, - "loss": 0.2092, - "step": 6712 - }, - { - "epoch": 0.34, - "grad_norm": 0.9859837992060969, - "learning_rate": 1.5331820708002148e-05, - "loss": 0.2159, - "step": 6713 - }, - { - "epoch": 0.34, - "grad_norm": 0.9879252355628823, - "learning_rate": 1.533042729938213e-05, - "loss": 0.1894, - "step": 6714 - }, - { - "epoch": 0.34, - "grad_norm": 0.8812018427849039, - "learning_rate": 1.5329033746173975e-05, - "loss": 0.1833, - "step": 6715 - }, - { - "epoch": 0.34, - "grad_norm": 1.4601846109173515, - "learning_rate": 1.5327640048415476e-05, - "loss": 0.2237, - "step": 6716 - }, - { - "epoch": 0.34, - "grad_norm": 1.0581061376121739, - "learning_rate": 1.5326246206144443e-05, - "loss": 0.2092, - "step": 6717 - }, - { - "epoch": 0.34, - "grad_norm": 0.7203814696096753, - "learning_rate": 1.532485221939868e-05, - "loss": 0.1884, - "step": 6718 - }, - { - "epoch": 0.34, - "grad_norm": 1.2641468880148858, - "learning_rate": 1.5323458088216e-05, - "loss": 0.2215, - "step": 6719 - }, - { - "epoch": 0.34, - "grad_norm": 0.9041915947266804, - "learning_rate": 1.5322063812634213e-05, - "loss": 0.182, - "step": 6720 - }, - { - "epoch": 0.34, - "grad_norm": 0.9312729062386421, - "learning_rate": 1.532066939269115e-05, - "loss": 0.2172, - "step": 6721 - }, - { - "epoch": 0.34, - "grad_norm": 1.7275672748749746, - "learning_rate": 1.531927482842463e-05, - "loss": 0.1762, - "step": 6722 - }, - { - "epoch": 0.34, - "grad_norm": 0.9269472309163541, - "learning_rate": 1.531788011987248e-05, - "loss": 0.206, - "step": 6723 - }, - { - "epoch": 0.34, - "grad_norm": 1.1568597220771661, - "learning_rate": 1.5316485267072528e-05, - "loss": 0.1887, - "step": 6724 - }, - { - "epoch": 0.34, - "grad_norm": 0.9338998705897923, - "learning_rate": 1.5315090270062612e-05, - "loss": 0.203, - "step": 6725 - }, - { - "epoch": 0.34, - "grad_norm": 1.2715192485484121, - "learning_rate": 1.5313695128880578e-05, - "loss": 0.2145, - "step": 6726 - }, - { - "epoch": 0.34, - "grad_norm": 1.0559807576449627, - "learning_rate": 1.531229984356426e-05, - "loss": 0.1945, - "step": 6727 - }, - { - "epoch": 0.34, - "grad_norm": 1.1443412285252021, - "learning_rate": 1.5310904414151505e-05, - "loss": 0.2212, - "step": 6728 - }, - { - "epoch": 0.34, - "grad_norm": 1.2483375607136242, - "learning_rate": 1.530950884068017e-05, - "loss": 0.2071, - "step": 6729 - }, - { - "epoch": 0.34, - "grad_norm": 1.1469468803995153, - "learning_rate": 1.530811312318811e-05, - "loss": 0.2045, - "step": 6730 - }, - { - "epoch": 0.34, - "grad_norm": 1.2036637391446452, - "learning_rate": 1.530671726171318e-05, - "loss": 0.1866, - "step": 6731 - }, - { - "epoch": 0.34, - "grad_norm": 1.198487986397559, - "learning_rate": 1.530532125629325e-05, - "loss": 0.2133, - "step": 6732 - }, - { - "epoch": 0.34, - "grad_norm": 0.7976346078056735, - "learning_rate": 1.5303925106966176e-05, - "loss": 0.2204, - "step": 6733 - }, - { - "epoch": 0.34, - "grad_norm": 1.205968836433249, - "learning_rate": 1.5302528813769832e-05, - "loss": 0.2076, - "step": 6734 - }, - { - "epoch": 0.34, - "grad_norm": 0.9959892711878979, - "learning_rate": 1.5301132376742097e-05, - "loss": 0.2149, - "step": 6735 - }, - { - "epoch": 0.34, - "grad_norm": 1.0439639919923365, - "learning_rate": 1.5299735795920852e-05, - "loss": 0.2182, - "step": 6736 - }, - { - "epoch": 0.34, - "grad_norm": 1.3157354916778414, - "learning_rate": 1.5298339071343965e-05, - "loss": 0.1834, - "step": 6737 - }, - { - "epoch": 0.34, - "grad_norm": 2.008918421266012, - "learning_rate": 1.5296942203049336e-05, - "loss": 0.2016, - "step": 6738 - }, - { - "epoch": 0.34, - "grad_norm": 1.0213466637424429, - "learning_rate": 1.5295545191074854e-05, - "loss": 0.1827, - "step": 6739 - }, - { - "epoch": 0.34, - "grad_norm": 1.393285649452404, - "learning_rate": 1.5294148035458406e-05, - "loss": 0.1925, - "step": 6740 - }, - { - "epoch": 0.34, - "grad_norm": 1.611635542327183, - "learning_rate": 1.529275073623789e-05, - "loss": 0.2131, - "step": 6741 - }, - { - "epoch": 0.34, - "grad_norm": 1.9380453950042238, - "learning_rate": 1.5291353293451216e-05, - "loss": 0.2109, - "step": 6742 - }, - { - "epoch": 0.34, - "grad_norm": 1.780304518112081, - "learning_rate": 1.5289955707136282e-05, - "loss": 0.21, - "step": 6743 - }, - { - "epoch": 0.34, - "grad_norm": 1.276791397406679, - "learning_rate": 1.5288557977331006e-05, - "loss": 0.1979, - "step": 6744 - }, - { - "epoch": 0.34, - "grad_norm": 1.0230013122394754, - "learning_rate": 1.528716010407329e-05, - "loss": 0.2092, - "step": 6745 - }, - { - "epoch": 0.34, - "grad_norm": 0.911278338876281, - "learning_rate": 1.528576208740106e-05, - "loss": 0.1953, - "step": 6746 - }, - { - "epoch": 0.34, - "grad_norm": 1.8718440559042655, - "learning_rate": 1.5284363927352234e-05, - "loss": 0.2164, - "step": 6747 - }, - { - "epoch": 0.34, - "grad_norm": 0.960141783585586, - "learning_rate": 1.528296562396474e-05, - "loss": 0.2246, - "step": 6748 - }, - { - "epoch": 0.34, - "grad_norm": 1.0057641215159536, - "learning_rate": 1.5281567177276504e-05, - "loss": 0.2114, - "step": 6749 - }, - { - "epoch": 0.34, - "grad_norm": 1.1115177064216062, - "learning_rate": 1.5280168587325462e-05, - "loss": 0.2166, - "step": 6750 - }, - { - "epoch": 0.34, - "grad_norm": 0.9984220338545741, - "learning_rate": 1.5278769854149544e-05, - "loss": 0.2036, - "step": 6751 - }, - { - "epoch": 0.34, - "grad_norm": 1.0728686079952425, - "learning_rate": 1.5277370977786698e-05, - "loss": 0.2143, - "step": 6752 - }, - { - "epoch": 0.34, - "grad_norm": 1.00311623585292, - "learning_rate": 1.527597195827487e-05, - "loss": 0.2022, - "step": 6753 - }, - { - "epoch": 0.34, - "grad_norm": 1.4416304137365434, - "learning_rate": 1.5274572795652e-05, - "loss": 0.1861, - "step": 6754 - }, - { - "epoch": 0.34, - "grad_norm": 1.353646006666074, - "learning_rate": 1.5273173489956045e-05, - "loss": 0.1966, - "step": 6755 - }, - { - "epoch": 0.34, - "grad_norm": 1.6279188709189063, - "learning_rate": 1.5271774041224965e-05, - "loss": 0.2124, - "step": 6756 - }, - { - "epoch": 0.34, - "grad_norm": 1.2112152569319565, - "learning_rate": 1.5270374449496713e-05, - "loss": 0.1893, - "step": 6757 - }, - { - "epoch": 0.34, - "grad_norm": 1.9026300065868202, - "learning_rate": 1.526897471480926e-05, - "loss": 0.2145, - "step": 6758 - }, - { - "epoch": 0.34, - "grad_norm": 1.4535905698686216, - "learning_rate": 1.5267574837200567e-05, - "loss": 0.1969, - "step": 6759 - }, - { - "epoch": 0.34, - "grad_norm": 0.9155814290404279, - "learning_rate": 1.5266174816708607e-05, - "loss": 0.2077, - "step": 6760 - }, - { - "epoch": 0.34, - "grad_norm": 1.4571808090129743, - "learning_rate": 1.526477465337136e-05, - "loss": 0.2047, - "step": 6761 - }, - { - "epoch": 0.34, - "grad_norm": 1.700143986136528, - "learning_rate": 1.5263374347226804e-05, - "loss": 0.207, - "step": 6762 - }, - { - "epoch": 0.34, - "grad_norm": 1.3011385282165446, - "learning_rate": 1.526197389831292e-05, - "loss": 0.1692, - "step": 6763 - }, - { - "epoch": 0.34, - "grad_norm": 1.0324443289219334, - "learning_rate": 1.52605733066677e-05, - "loss": 0.2005, - "step": 6764 - }, - { - "epoch": 0.34, - "grad_norm": 0.9902437055657942, - "learning_rate": 1.5259172572329132e-05, - "loss": 0.181, - "step": 6765 - }, - { - "epoch": 0.34, - "grad_norm": 1.4277802318478219, - "learning_rate": 1.5257771695335207e-05, - "loss": 0.1872, - "step": 6766 - }, - { - "epoch": 0.34, - "grad_norm": 0.9587925594158275, - "learning_rate": 1.5256370675723928e-05, - "loss": 0.1822, - "step": 6767 - }, - { - "epoch": 0.34, - "grad_norm": 1.169017797107394, - "learning_rate": 1.52549695135333e-05, - "loss": 0.2135, - "step": 6768 - }, - { - "epoch": 0.34, - "grad_norm": 1.0043873111957573, - "learning_rate": 1.5253568208801324e-05, - "loss": 0.2032, - "step": 6769 - }, - { - "epoch": 0.34, - "grad_norm": 0.9227197727775629, - "learning_rate": 1.5252166761566018e-05, - "loss": 0.2208, - "step": 6770 - }, - { - "epoch": 0.34, - "grad_norm": 0.9284620558439093, - "learning_rate": 1.5250765171865391e-05, - "loss": 0.2122, - "step": 6771 - }, - { - "epoch": 0.34, - "grad_norm": 1.2444535096292564, - "learning_rate": 1.5249363439737458e-05, - "loss": 0.2359, - "step": 6772 - }, - { - "epoch": 0.34, - "grad_norm": 0.8917021534524678, - "learning_rate": 1.5247961565220251e-05, - "loss": 0.1886, - "step": 6773 - }, - { - "epoch": 0.34, - "grad_norm": 1.0070824235122227, - "learning_rate": 1.5246559548351786e-05, - "loss": 0.1946, - "step": 6774 - }, - { - "epoch": 0.34, - "grad_norm": 1.0143443272664339, - "learning_rate": 1.5245157389170099e-05, - "loss": 0.2004, - "step": 6775 - }, - { - "epoch": 0.34, - "grad_norm": 0.8925425331684878, - "learning_rate": 1.5243755087713221e-05, - "loss": 0.1978, - "step": 6776 - }, - { - "epoch": 0.34, - "grad_norm": 0.9067333501121372, - "learning_rate": 1.5242352644019188e-05, - "loss": 0.1804, - "step": 6777 - }, - { - "epoch": 0.34, - "grad_norm": 0.8315046848626775, - "learning_rate": 1.5240950058126047e-05, - "loss": 0.2057, - "step": 6778 - }, - { - "epoch": 0.34, - "grad_norm": 1.2926472057668967, - "learning_rate": 1.5239547330071838e-05, - "loss": 0.1967, - "step": 6779 - }, - { - "epoch": 0.34, - "grad_norm": 1.4130144050799631, - "learning_rate": 1.5238144459894612e-05, - "loss": 0.2164, - "step": 6780 - }, - { - "epoch": 0.34, - "grad_norm": 1.5404583219652528, - "learning_rate": 1.523674144763242e-05, - "loss": 0.2328, - "step": 6781 - }, - { - "epoch": 0.34, - "grad_norm": 1.4509180543256275, - "learning_rate": 1.5235338293323322e-05, - "loss": 0.1865, - "step": 6782 - }, - { - "epoch": 0.34, - "grad_norm": 0.9155452259245301, - "learning_rate": 1.5233934997005377e-05, - "loss": 0.1868, - "step": 6783 - }, - { - "epoch": 0.34, - "grad_norm": 0.9785165899029814, - "learning_rate": 1.523253155871665e-05, - "loss": 0.2161, - "step": 6784 - }, - { - "epoch": 0.35, - "grad_norm": 0.9681027179097117, - "learning_rate": 1.5231127978495208e-05, - "loss": 0.1961, - "step": 6785 - }, - { - "epoch": 0.35, - "grad_norm": 1.291598955342103, - "learning_rate": 1.5229724256379124e-05, - "loss": 0.1936, - "step": 6786 - }, - { - "epoch": 0.35, - "grad_norm": 1.2179833211574773, - "learning_rate": 1.5228320392406476e-05, - "loss": 0.1859, - "step": 6787 - }, - { - "epoch": 0.35, - "grad_norm": 1.132302330300989, - "learning_rate": 1.522691638661534e-05, - "loss": 0.1998, - "step": 6788 - }, - { - "epoch": 0.35, - "grad_norm": 0.9243465153919224, - "learning_rate": 1.5225512239043805e-05, - "loss": 0.1908, - "step": 6789 - }, - { - "epoch": 0.35, - "grad_norm": 0.794549770398676, - "learning_rate": 1.5224107949729952e-05, - "loss": 0.1878, - "step": 6790 - }, - { - "epoch": 0.35, - "grad_norm": 1.1940722120482383, - "learning_rate": 1.5222703518711876e-05, - "loss": 0.1877, - "step": 6791 - }, - { - "epoch": 0.35, - "grad_norm": 1.006766552572996, - "learning_rate": 1.5221298946027674e-05, - "loss": 0.1982, - "step": 6792 - }, - { - "epoch": 0.35, - "grad_norm": 1.2122944014485775, - "learning_rate": 1.5219894231715443e-05, - "loss": 0.1661, - "step": 6793 - }, - { - "epoch": 0.35, - "grad_norm": 0.7706146021851047, - "learning_rate": 1.521848937581328e-05, - "loss": 0.1752, - "step": 6794 - }, - { - "epoch": 0.35, - "grad_norm": 0.8239227910933536, - "learning_rate": 1.5217084378359306e-05, - "loss": 0.1822, - "step": 6795 - }, - { - "epoch": 0.35, - "grad_norm": 0.8731121198592614, - "learning_rate": 1.5215679239391621e-05, - "loss": 0.1922, - "step": 6796 - }, - { - "epoch": 0.35, - "grad_norm": 1.218467420504002, - "learning_rate": 1.5214273958948343e-05, - "loss": 0.198, - "step": 6797 - }, - { - "epoch": 0.35, - "grad_norm": 1.232086371764998, - "learning_rate": 1.5212868537067587e-05, - "loss": 0.1799, - "step": 6798 - }, - { - "epoch": 0.35, - "grad_norm": 0.8349998328027023, - "learning_rate": 1.5211462973787478e-05, - "loss": 0.1858, - "step": 6799 - }, - { - "epoch": 0.35, - "grad_norm": 0.9479616491015617, - "learning_rate": 1.5210057269146141e-05, - "loss": 0.2169, - "step": 6800 - }, - { - "epoch": 0.35, - "grad_norm": 1.021839551116278, - "learning_rate": 1.5208651423181709e-05, - "loss": 0.2123, - "step": 6801 - }, - { - "epoch": 0.35, - "grad_norm": 0.7427252710048137, - "learning_rate": 1.5207245435932312e-05, - "loss": 0.2014, - "step": 6802 - }, - { - "epoch": 0.35, - "grad_norm": 1.3946540105732994, - "learning_rate": 1.5205839307436088e-05, - "loss": 0.206, - "step": 6803 - }, - { - "epoch": 0.35, - "grad_norm": 0.9192566427112906, - "learning_rate": 1.5204433037731177e-05, - "loss": 0.1898, - "step": 6804 - }, - { - "epoch": 0.35, - "grad_norm": 1.0661683152184878, - "learning_rate": 1.5203026626855728e-05, - "loss": 0.217, - "step": 6805 - }, - { - "epoch": 0.35, - "grad_norm": 0.9281868878900686, - "learning_rate": 1.5201620074847888e-05, - "loss": 0.2235, - "step": 6806 - }, - { - "epoch": 0.35, - "grad_norm": 0.8851710721864421, - "learning_rate": 1.5200213381745807e-05, - "loss": 0.2138, - "step": 6807 - }, - { - "epoch": 0.35, - "grad_norm": 0.8892054097059979, - "learning_rate": 1.5198806547587648e-05, - "loss": 0.2257, - "step": 6808 - }, - { - "epoch": 0.35, - "grad_norm": 1.2053124528725399, - "learning_rate": 1.5197399572411566e-05, - "loss": 0.1975, - "step": 6809 - }, - { - "epoch": 0.35, - "grad_norm": 0.960929308363589, - "learning_rate": 1.5195992456255728e-05, - "loss": 0.1986, - "step": 6810 - }, - { - "epoch": 0.35, - "grad_norm": 1.0636077023307362, - "learning_rate": 1.51945851991583e-05, - "loss": 0.2068, - "step": 6811 - }, - { - "epoch": 0.35, - "grad_norm": 0.867963274482051, - "learning_rate": 1.5193177801157456e-05, - "loss": 0.1769, - "step": 6812 - }, - { - "epoch": 0.35, - "grad_norm": 0.8875259655818101, - "learning_rate": 1.5191770262291367e-05, - "loss": 0.1952, - "step": 6813 - }, - { - "epoch": 0.35, - "grad_norm": 0.9265142993267459, - "learning_rate": 1.5190362582598223e-05, - "loss": 0.1973, - "step": 6814 - }, - { - "epoch": 0.35, - "grad_norm": 1.0713731845568697, - "learning_rate": 1.5188954762116197e-05, - "loss": 0.1891, - "step": 6815 - }, - { - "epoch": 0.35, - "grad_norm": 1.170054639379894, - "learning_rate": 1.518754680088348e-05, - "loss": 0.1806, - "step": 6816 - }, - { - "epoch": 0.35, - "grad_norm": 1.5358593286525253, - "learning_rate": 1.5186138698938262e-05, - "loss": 0.2304, - "step": 6817 - }, - { - "epoch": 0.35, - "grad_norm": 1.2267745756341035, - "learning_rate": 1.5184730456318742e-05, - "loss": 0.21, - "step": 6818 - }, - { - "epoch": 0.35, - "grad_norm": 1.2369692943548545, - "learning_rate": 1.5183322073063113e-05, - "loss": 0.1932, - "step": 6819 - }, - { - "epoch": 0.35, - "grad_norm": 0.9842943782482564, - "learning_rate": 1.5181913549209582e-05, - "loss": 0.199, - "step": 6820 - }, - { - "epoch": 0.35, - "grad_norm": 1.1127812635949084, - "learning_rate": 1.5180504884796352e-05, - "loss": 0.1981, - "step": 6821 - }, - { - "epoch": 0.35, - "grad_norm": 1.15296947156494, - "learning_rate": 1.5179096079861633e-05, - "loss": 0.1961, - "step": 6822 - }, - { - "epoch": 0.35, - "grad_norm": 1.3997778340175386, - "learning_rate": 1.5177687134443644e-05, - "loss": 0.2149, - "step": 6823 - }, - { - "epoch": 0.35, - "grad_norm": 1.1547947538700556, - "learning_rate": 1.51762780485806e-05, - "loss": 0.2027, - "step": 6824 - }, - { - "epoch": 0.35, - "grad_norm": 1.2744762270890364, - "learning_rate": 1.5174868822310715e-05, - "loss": 0.1796, - "step": 6825 - }, - { - "epoch": 0.35, - "grad_norm": 0.8809148208865116, - "learning_rate": 1.5173459455672225e-05, - "loss": 0.2006, - "step": 6826 - }, - { - "epoch": 0.35, - "grad_norm": 0.7797320005770062, - "learning_rate": 1.5172049948703356e-05, - "loss": 0.2017, - "step": 6827 - }, - { - "epoch": 0.35, - "grad_norm": 1.074741508538611, - "learning_rate": 1.5170640301442339e-05, - "loss": 0.2007, - "step": 6828 - }, - { - "epoch": 0.35, - "grad_norm": 1.1055816149226974, - "learning_rate": 1.516923051392741e-05, - "loss": 0.2054, - "step": 6829 - }, - { - "epoch": 0.35, - "grad_norm": 0.8006123473485938, - "learning_rate": 1.516782058619681e-05, - "loss": 0.2051, - "step": 6830 - }, - { - "epoch": 0.35, - "grad_norm": 0.9676537160614846, - "learning_rate": 1.516641051828879e-05, - "loss": 0.177, - "step": 6831 - }, - { - "epoch": 0.35, - "grad_norm": 1.2592218417012093, - "learning_rate": 1.5165000310241592e-05, - "loss": 0.1988, - "step": 6832 - }, - { - "epoch": 0.35, - "grad_norm": 0.7618946631132122, - "learning_rate": 1.5163589962093466e-05, - "loss": 0.1668, - "step": 6833 - }, - { - "epoch": 0.35, - "grad_norm": 0.8695663377266801, - "learning_rate": 1.5162179473882668e-05, - "loss": 0.1817, - "step": 6834 - }, - { - "epoch": 0.35, - "grad_norm": 1.1313227587735268, - "learning_rate": 1.5160768845647464e-05, - "loss": 0.2208, - "step": 6835 - }, - { - "epoch": 0.35, - "grad_norm": 1.7788299794995026, - "learning_rate": 1.5159358077426114e-05, - "loss": 0.2113, - "step": 6836 - }, - { - "epoch": 0.35, - "grad_norm": 1.0772132694371461, - "learning_rate": 1.5157947169256886e-05, - "loss": 0.2016, - "step": 6837 - }, - { - "epoch": 0.35, - "grad_norm": 0.8650455266282737, - "learning_rate": 1.515653612117805e-05, - "loss": 0.1963, - "step": 6838 - }, - { - "epoch": 0.35, - "grad_norm": 1.0680136956495005, - "learning_rate": 1.5155124933227876e-05, - "loss": 0.2127, - "step": 6839 - }, - { - "epoch": 0.35, - "grad_norm": 1.0971031970802028, - "learning_rate": 1.515371360544465e-05, - "loss": 0.1729, - "step": 6840 - }, - { - "epoch": 0.35, - "grad_norm": 0.9096219601191085, - "learning_rate": 1.5152302137866653e-05, - "loss": 0.1937, - "step": 6841 - }, - { - "epoch": 0.35, - "grad_norm": 0.9816498176529843, - "learning_rate": 1.5150890530532165e-05, - "loss": 0.1817, - "step": 6842 - }, - { - "epoch": 0.35, - "grad_norm": 1.3215012680366147, - "learning_rate": 1.5149478783479484e-05, - "loss": 0.2153, - "step": 6843 - }, - { - "epoch": 0.35, - "grad_norm": 1.101868774315477, - "learning_rate": 1.51480668967469e-05, - "loss": 0.2143, - "step": 6844 - }, - { - "epoch": 0.35, - "grad_norm": 0.8953111804081765, - "learning_rate": 1.514665487037271e-05, - "loss": 0.2136, - "step": 6845 - }, - { - "epoch": 0.35, - "grad_norm": 0.9155024355336064, - "learning_rate": 1.5145242704395215e-05, - "loss": 0.1709, - "step": 6846 - }, - { - "epoch": 0.35, - "grad_norm": 1.194699702974594, - "learning_rate": 1.5143830398852722e-05, - "loss": 0.2143, - "step": 6847 - }, - { - "epoch": 0.35, - "grad_norm": 0.815983227890357, - "learning_rate": 1.5142417953783536e-05, - "loss": 0.1805, - "step": 6848 - }, - { - "epoch": 0.35, - "grad_norm": 1.3899500734068588, - "learning_rate": 1.5141005369225976e-05, - "loss": 0.1977, - "step": 6849 - }, - { - "epoch": 0.35, - "grad_norm": 1.8772900169020614, - "learning_rate": 1.5139592645218355e-05, - "loss": 0.2173, - "step": 6850 - }, - { - "epoch": 0.35, - "grad_norm": 4.3715237882830085, - "learning_rate": 1.5138179781798994e-05, - "loss": 0.2088, - "step": 6851 - }, - { - "epoch": 0.35, - "grad_norm": 0.9728600232206785, - "learning_rate": 1.513676677900621e-05, - "loss": 0.2141, - "step": 6852 - }, - { - "epoch": 0.35, - "grad_norm": 1.2467143391094206, - "learning_rate": 1.5135353636878343e-05, - "loss": 0.2127, - "step": 6853 - }, - { - "epoch": 0.35, - "grad_norm": 2.4410997273671753, - "learning_rate": 1.5133940355453717e-05, - "loss": 0.1994, - "step": 6854 - }, - { - "epoch": 0.35, - "grad_norm": 1.0130725754368426, - "learning_rate": 1.513252693477067e-05, - "loss": 0.191, - "step": 6855 - }, - { - "epoch": 0.35, - "grad_norm": 0.9178135147372484, - "learning_rate": 1.5131113374867537e-05, - "loss": 0.157, - "step": 6856 - }, - { - "epoch": 0.35, - "grad_norm": 1.0776349600749444, - "learning_rate": 1.5129699675782666e-05, - "loss": 0.2282, - "step": 6857 - }, - { - "epoch": 0.35, - "grad_norm": 1.2034064612598838, - "learning_rate": 1.5128285837554404e-05, - "loss": 0.2008, - "step": 6858 - }, - { - "epoch": 0.35, - "grad_norm": 1.0843112113793854, - "learning_rate": 1.5126871860221098e-05, - "loss": 0.1881, - "step": 6859 - }, - { - "epoch": 0.35, - "grad_norm": 1.0250256906050836, - "learning_rate": 1.5125457743821098e-05, - "loss": 0.2129, - "step": 6860 - }, - { - "epoch": 0.35, - "grad_norm": 0.9537784444716515, - "learning_rate": 1.5124043488392772e-05, - "loss": 0.2287, - "step": 6861 - }, - { - "epoch": 0.35, - "grad_norm": 1.0963638776333486, - "learning_rate": 1.5122629093974476e-05, - "loss": 0.1926, - "step": 6862 - }, - { - "epoch": 0.35, - "grad_norm": 1.18682479755038, - "learning_rate": 1.5121214560604579e-05, - "loss": 0.2174, - "step": 6863 - }, - { - "epoch": 0.35, - "grad_norm": 0.8760608982889208, - "learning_rate": 1.5119799888321444e-05, - "loss": 0.2168, - "step": 6864 - }, - { - "epoch": 0.35, - "grad_norm": 1.0627961084019029, - "learning_rate": 1.5118385077163446e-05, - "loss": 0.1941, - "step": 6865 - }, - { - "epoch": 0.35, - "grad_norm": 1.6957928296348652, - "learning_rate": 1.5116970127168969e-05, - "loss": 0.2307, - "step": 6866 - }, - { - "epoch": 0.35, - "grad_norm": 1.0456603258940167, - "learning_rate": 1.5115555038376386e-05, - "loss": 0.2038, - "step": 6867 - }, - { - "epoch": 0.35, - "grad_norm": 1.0644078326243325, - "learning_rate": 1.5114139810824084e-05, - "loss": 0.2085, - "step": 6868 - }, - { - "epoch": 0.35, - "grad_norm": 0.9346639998075899, - "learning_rate": 1.5112724444550449e-05, - "loss": 0.1976, - "step": 6869 - }, - { - "epoch": 0.35, - "grad_norm": 0.9871990707491872, - "learning_rate": 1.5111308939593876e-05, - "loss": 0.2325, - "step": 6870 - }, - { - "epoch": 0.35, - "grad_norm": 0.862600105336854, - "learning_rate": 1.510989329599276e-05, - "loss": 0.2257, - "step": 6871 - }, - { - "epoch": 0.35, - "grad_norm": 0.8081876993422035, - "learning_rate": 1.51084775137855e-05, - "loss": 0.1908, - "step": 6872 - }, - { - "epoch": 0.35, - "grad_norm": 1.1136152808647861, - "learning_rate": 1.5107061593010497e-05, - "loss": 0.1845, - "step": 6873 - }, - { - "epoch": 0.35, - "grad_norm": 0.9049398819784213, - "learning_rate": 1.5105645533706161e-05, - "loss": 0.2169, - "step": 6874 - }, - { - "epoch": 0.35, - "grad_norm": 0.8437673319992209, - "learning_rate": 1.5104229335910901e-05, - "loss": 0.1851, - "step": 6875 - }, - { - "epoch": 0.35, - "grad_norm": 0.9911256372429206, - "learning_rate": 1.5102812999663136e-05, - "loss": 0.204, - "step": 6876 - }, - { - "epoch": 0.35, - "grad_norm": 1.059383024074309, - "learning_rate": 1.5101396525001275e-05, - "loss": 0.2161, - "step": 6877 - }, - { - "epoch": 0.35, - "grad_norm": 0.807238408215533, - "learning_rate": 1.5099979911963747e-05, - "loss": 0.1751, - "step": 6878 - }, - { - "epoch": 0.35, - "grad_norm": 1.0651713275527612, - "learning_rate": 1.5098563160588975e-05, - "loss": 0.1957, - "step": 6879 - }, - { - "epoch": 0.35, - "grad_norm": 1.1544476706394258, - "learning_rate": 1.5097146270915391e-05, - "loss": 0.2063, - "step": 6880 - }, - { - "epoch": 0.35, - "grad_norm": 1.0130726774046317, - "learning_rate": 1.5095729242981426e-05, - "loss": 0.2123, - "step": 6881 - }, - { - "epoch": 0.35, - "grad_norm": 0.946893609225108, - "learning_rate": 1.5094312076825514e-05, - "loss": 0.1984, - "step": 6882 - }, - { - "epoch": 0.35, - "grad_norm": 0.9464456103251006, - "learning_rate": 1.5092894772486104e-05, - "loss": 0.2102, - "step": 6883 - }, - { - "epoch": 0.35, - "grad_norm": 1.0216789633156547, - "learning_rate": 1.5091477330001634e-05, - "loss": 0.1997, - "step": 6884 - }, - { - "epoch": 0.35, - "grad_norm": 1.6466262185119827, - "learning_rate": 1.5090059749410553e-05, - "loss": 0.2085, - "step": 6885 - }, - { - "epoch": 0.35, - "grad_norm": 0.9729954755515294, - "learning_rate": 1.5088642030751314e-05, - "loss": 0.171, - "step": 6886 - }, - { - "epoch": 0.35, - "grad_norm": 5.385590133366258, - "learning_rate": 1.5087224174062371e-05, - "loss": 0.1922, - "step": 6887 - }, - { - "epoch": 0.35, - "grad_norm": 0.8916447836837915, - "learning_rate": 1.5085806179382188e-05, - "loss": 0.2069, - "step": 6888 - }, - { - "epoch": 0.35, - "grad_norm": 1.2889193077338137, - "learning_rate": 1.5084388046749224e-05, - "loss": 0.2201, - "step": 6889 - }, - { - "epoch": 0.35, - "grad_norm": 1.921388178955955, - "learning_rate": 1.5082969776201948e-05, - "loss": 0.2002, - "step": 6890 - }, - { - "epoch": 0.35, - "grad_norm": 1.2911495515489098, - "learning_rate": 1.5081551367778828e-05, - "loss": 0.2063, - "step": 6891 - }, - { - "epoch": 0.35, - "grad_norm": 1.54823405008985, - "learning_rate": 1.508013282151834e-05, - "loss": 0.186, - "step": 6892 - }, - { - "epoch": 0.35, - "grad_norm": 0.8821794175637585, - "learning_rate": 1.5078714137458965e-05, - "loss": 0.2036, - "step": 6893 - }, - { - "epoch": 0.35, - "grad_norm": 1.5243194241716584, - "learning_rate": 1.5077295315639183e-05, - "loss": 0.2197, - "step": 6894 - }, - { - "epoch": 0.35, - "grad_norm": 1.7558299954036014, - "learning_rate": 1.5075876356097472e-05, - "loss": 0.1974, - "step": 6895 - }, - { - "epoch": 0.35, - "grad_norm": 1.0411962229686005, - "learning_rate": 1.5074457258872332e-05, - "loss": 0.1895, - "step": 6896 - }, - { - "epoch": 0.35, - "grad_norm": 1.1459677441955394, - "learning_rate": 1.5073038024002254e-05, - "loss": 0.2032, - "step": 6897 - }, - { - "epoch": 0.35, - "grad_norm": 1.3905183101820433, - "learning_rate": 1.5071618651525733e-05, - "loss": 0.2074, - "step": 6898 - }, - { - "epoch": 0.35, - "grad_norm": 0.9654236834489405, - "learning_rate": 1.5070199141481267e-05, - "loss": 0.21, - "step": 6899 - }, - { - "epoch": 0.35, - "grad_norm": 1.0754205007601063, - "learning_rate": 1.5068779493907364e-05, - "loss": 0.2037, - "step": 6900 - }, - { - "epoch": 0.35, - "grad_norm": 1.1811109530453496, - "learning_rate": 1.5067359708842531e-05, - "loss": 0.2191, - "step": 6901 - }, - { - "epoch": 0.35, - "grad_norm": 0.8896704583059244, - "learning_rate": 1.506593978632528e-05, - "loss": 0.1918, - "step": 6902 - }, - { - "epoch": 0.35, - "grad_norm": 0.8190830743448189, - "learning_rate": 1.5064519726394127e-05, - "loss": 0.1848, - "step": 6903 - }, - { - "epoch": 0.35, - "grad_norm": 1.4839731613633682, - "learning_rate": 1.5063099529087588e-05, - "loss": 0.1942, - "step": 6904 - }, - { - "epoch": 0.35, - "grad_norm": 0.8611499038512758, - "learning_rate": 1.506167919444419e-05, - "loss": 0.1833, - "step": 6905 - }, - { - "epoch": 0.35, - "grad_norm": 0.9749771586556178, - "learning_rate": 1.5060258722502457e-05, - "loss": 0.2128, - "step": 6906 - }, - { - "epoch": 0.35, - "grad_norm": 1.1688444786842203, - "learning_rate": 1.5058838113300922e-05, - "loss": 0.1839, - "step": 6907 - }, - { - "epoch": 0.35, - "grad_norm": 0.9682085388119024, - "learning_rate": 1.5057417366878117e-05, - "loss": 0.201, - "step": 6908 - }, - { - "epoch": 0.35, - "grad_norm": 1.0901203502465975, - "learning_rate": 1.505599648327258e-05, - "loss": 0.2072, - "step": 6909 - }, - { - "epoch": 0.35, - "grad_norm": 1.404939365745371, - "learning_rate": 1.505457546252285e-05, - "loss": 0.226, - "step": 6910 - }, - { - "epoch": 0.35, - "grad_norm": 1.0672437834253885, - "learning_rate": 1.5053154304667481e-05, - "loss": 0.2089, - "step": 6911 - }, - { - "epoch": 0.35, - "grad_norm": 1.0637403739652012, - "learning_rate": 1.5051733009745013e-05, - "loss": 0.1869, - "step": 6912 - }, - { - "epoch": 0.35, - "grad_norm": 0.9076819960560645, - "learning_rate": 1.5050311577794002e-05, - "loss": 0.1877, - "step": 6913 - }, - { - "epoch": 0.35, - "grad_norm": 0.9717309390704598, - "learning_rate": 1.5048890008853004e-05, - "loss": 0.1993, - "step": 6914 - }, - { - "epoch": 0.35, - "grad_norm": 0.9407606112256421, - "learning_rate": 1.5047468302960577e-05, - "loss": 0.2085, - "step": 6915 - }, - { - "epoch": 0.35, - "grad_norm": 1.4738042456951326, - "learning_rate": 1.504604646015529e-05, - "loss": 0.184, - "step": 6916 - }, - { - "epoch": 0.35, - "grad_norm": 1.7211102814223973, - "learning_rate": 1.5044624480475704e-05, - "loss": 0.2234, - "step": 6917 - }, - { - "epoch": 0.35, - "grad_norm": 1.4920048618936634, - "learning_rate": 1.50432023639604e-05, - "loss": 0.1944, - "step": 6918 - }, - { - "epoch": 0.35, - "grad_norm": 1.0084709392210685, - "learning_rate": 1.5041780110647945e-05, - "loss": 0.1899, - "step": 6919 - }, - { - "epoch": 0.35, - "grad_norm": 1.1115772913320738, - "learning_rate": 1.5040357720576917e-05, - "loss": 0.2065, - "step": 6920 - }, - { - "epoch": 0.35, - "grad_norm": 1.0794035077770285, - "learning_rate": 1.5038935193785904e-05, - "loss": 0.2026, - "step": 6921 - }, - { - "epoch": 0.35, - "grad_norm": 0.9209720954523564, - "learning_rate": 1.5037512530313487e-05, - "loss": 0.1812, - "step": 6922 - }, - { - "epoch": 0.35, - "grad_norm": 1.3927461318168444, - "learning_rate": 1.503608973019826e-05, - "loss": 0.2163, - "step": 6923 - }, - { - "epoch": 0.35, - "grad_norm": 0.8818625228439795, - "learning_rate": 1.5034666793478814e-05, - "loss": 0.2027, - "step": 6924 - }, - { - "epoch": 0.35, - "grad_norm": 0.9774795678754011, - "learning_rate": 1.5033243720193746e-05, - "loss": 0.1986, - "step": 6925 - }, - { - "epoch": 0.35, - "grad_norm": 1.1421701507240238, - "learning_rate": 1.5031820510381661e-05, - "loss": 0.2108, - "step": 6926 - }, - { - "epoch": 0.35, - "grad_norm": 0.9000965134357147, - "learning_rate": 1.5030397164081157e-05, - "loss": 0.2099, - "step": 6927 - }, - { - "epoch": 0.35, - "grad_norm": 1.095911521854206, - "learning_rate": 1.502897368133085e-05, - "loss": 0.2009, - "step": 6928 - }, - { - "epoch": 0.35, - "grad_norm": 0.8489297047692981, - "learning_rate": 1.5027550062169343e-05, - "loss": 0.1884, - "step": 6929 - }, - { - "epoch": 0.35, - "grad_norm": 0.997414952129682, - "learning_rate": 1.5026126306635256e-05, - "loss": 0.1995, - "step": 6930 - }, - { - "epoch": 0.35, - "grad_norm": 0.9949856638154007, - "learning_rate": 1.5024702414767212e-05, - "loss": 0.2306, - "step": 6931 - }, - { - "epoch": 0.35, - "grad_norm": 0.8183068565989067, - "learning_rate": 1.5023278386603832e-05, - "loss": 0.1763, - "step": 6932 - }, - { - "epoch": 0.35, - "grad_norm": 2.1024967773167957, - "learning_rate": 1.502185422218374e-05, - "loss": 0.2041, - "step": 6933 - }, - { - "epoch": 0.35, - "grad_norm": 1.0647604029272146, - "learning_rate": 1.5020429921545572e-05, - "loss": 0.2104, - "step": 6934 - }, - { - "epoch": 0.35, - "grad_norm": 1.2222934743683858, - "learning_rate": 1.5019005484727953e-05, - "loss": 0.1928, - "step": 6935 - }, - { - "epoch": 0.35, - "grad_norm": 0.949740467637576, - "learning_rate": 1.501758091176953e-05, - "loss": 0.2148, - "step": 6936 - }, - { - "epoch": 0.35, - "grad_norm": 1.057136051282396, - "learning_rate": 1.5016156202708942e-05, - "loss": 0.2267, - "step": 6937 - }, - { - "epoch": 0.35, - "grad_norm": 0.9248210993970946, - "learning_rate": 1.5014731357584835e-05, - "loss": 0.1782, - "step": 6938 - }, - { - "epoch": 0.35, - "grad_norm": 1.0259772541994394, - "learning_rate": 1.5013306376435852e-05, - "loss": 0.2216, - "step": 6939 - }, - { - "epoch": 0.35, - "grad_norm": 1.6482033868110175, - "learning_rate": 1.5011881259300654e-05, - "loss": 0.2146, - "step": 6940 - }, - { - "epoch": 0.35, - "grad_norm": 1.137004966715899, - "learning_rate": 1.5010456006217892e-05, - "loss": 0.2031, - "step": 6941 - }, - { - "epoch": 0.35, - "grad_norm": 0.8897749780607621, - "learning_rate": 1.5009030617226227e-05, - "loss": 0.2103, - "step": 6942 - }, - { - "epoch": 0.35, - "grad_norm": 1.7726206917351606, - "learning_rate": 1.5007605092364329e-05, - "loss": 0.2039, - "step": 6943 - }, - { - "epoch": 0.35, - "grad_norm": 0.9287323221846299, - "learning_rate": 1.5006179431670853e-05, - "loss": 0.2021, - "step": 6944 - }, - { - "epoch": 0.35, - "grad_norm": 0.8194781241438943, - "learning_rate": 1.5004753635184482e-05, - "loss": 0.2027, - "step": 6945 - }, - { - "epoch": 0.35, - "grad_norm": 1.3093313015385672, - "learning_rate": 1.5003327702943886e-05, - "loss": 0.2096, - "step": 6946 - }, - { - "epoch": 0.35, - "grad_norm": 0.751471795194662, - "learning_rate": 1.5001901634987741e-05, - "loss": 0.1795, - "step": 6947 - }, - { - "epoch": 0.35, - "grad_norm": 0.8658145897043582, - "learning_rate": 1.500047543135473e-05, - "loss": 0.198, - "step": 6948 - }, - { - "epoch": 0.35, - "grad_norm": 0.927812722124989, - "learning_rate": 1.4999049092083546e-05, - "loss": 0.204, - "step": 6949 - }, - { - "epoch": 0.35, - "grad_norm": 1.0196694768308388, - "learning_rate": 1.499762261721287e-05, - "loss": 0.2097, - "step": 6950 - }, - { - "epoch": 0.35, - "grad_norm": 1.1506071629214236, - "learning_rate": 1.4996196006781398e-05, - "loss": 0.2234, - "step": 6951 - }, - { - "epoch": 0.35, - "grad_norm": 1.0757424215079343, - "learning_rate": 1.4994769260827825e-05, - "loss": 0.2187, - "step": 6952 - }, - { - "epoch": 0.35, - "grad_norm": 0.8124410268278714, - "learning_rate": 1.4993342379390859e-05, - "loss": 0.2122, - "step": 6953 - }, - { - "epoch": 0.35, - "grad_norm": 4.055419148942473, - "learning_rate": 1.4991915362509196e-05, - "loss": 0.1975, - "step": 6954 - }, - { - "epoch": 0.35, - "grad_norm": 1.074342125307164, - "learning_rate": 1.4990488210221545e-05, - "loss": 0.2257, - "step": 6955 - }, - { - "epoch": 0.35, - "grad_norm": 0.8775555168046059, - "learning_rate": 1.4989060922566623e-05, - "loss": 0.1871, - "step": 6956 - }, - { - "epoch": 0.35, - "grad_norm": 0.9828925549110333, - "learning_rate": 1.4987633499583138e-05, - "loss": 0.2055, - "step": 6957 - }, - { - "epoch": 0.35, - "grad_norm": 1.0764743184030248, - "learning_rate": 1.4986205941309818e-05, - "loss": 0.205, - "step": 6958 - }, - { - "epoch": 0.35, - "grad_norm": 0.9363216899302678, - "learning_rate": 1.4984778247785375e-05, - "loss": 0.1804, - "step": 6959 - }, - { - "epoch": 0.35, - "grad_norm": 0.931636292642853, - "learning_rate": 1.4983350419048544e-05, - "loss": 0.1797, - "step": 6960 - }, - { - "epoch": 0.35, - "grad_norm": 0.9904177526333235, - "learning_rate": 1.498192245513805e-05, - "loss": 0.1902, - "step": 6961 - }, - { - "epoch": 0.35, - "grad_norm": 0.8392932536433085, - "learning_rate": 1.4980494356092626e-05, - "loss": 0.206, - "step": 6962 - }, - { - "epoch": 0.35, - "grad_norm": 1.8715178627695, - "learning_rate": 1.4979066121951014e-05, - "loss": 0.1972, - "step": 6963 - }, - { - "epoch": 0.35, - "grad_norm": 1.1678631470250729, - "learning_rate": 1.4977637752751953e-05, - "loss": 0.1891, - "step": 6964 - }, - { - "epoch": 0.35, - "grad_norm": 0.7647725339845265, - "learning_rate": 1.4976209248534183e-05, - "loss": 0.1959, - "step": 6965 - }, - { - "epoch": 0.35, - "grad_norm": 0.8787375165725418, - "learning_rate": 1.4974780609336459e-05, - "loss": 0.2146, - "step": 6966 - }, - { - "epoch": 0.35, - "grad_norm": 0.8037882146692238, - "learning_rate": 1.497335183519753e-05, - "loss": 0.2131, - "step": 6967 - }, - { - "epoch": 0.35, - "grad_norm": 0.9437660746025482, - "learning_rate": 1.497192292615615e-05, - "loss": 0.1672, - "step": 6968 - }, - { - "epoch": 0.35, - "grad_norm": 0.9548608764647659, - "learning_rate": 1.497049388225108e-05, - "loss": 0.1965, - "step": 6969 - }, - { - "epoch": 0.35, - "grad_norm": 0.8126396317616935, - "learning_rate": 1.4969064703521082e-05, - "loss": 0.206, - "step": 6970 - }, - { - "epoch": 0.35, - "grad_norm": 4.600045784968216, - "learning_rate": 1.4967635390004924e-05, - "loss": 0.181, - "step": 6971 - }, - { - "epoch": 0.35, - "grad_norm": 1.1969410873168724, - "learning_rate": 1.496620594174138e-05, - "loss": 0.2295, - "step": 6972 - }, - { - "epoch": 0.35, - "grad_norm": 0.9415730347137686, - "learning_rate": 1.4964776358769213e-05, - "loss": 0.2113, - "step": 6973 - }, - { - "epoch": 0.35, - "grad_norm": 1.163755494326272, - "learning_rate": 1.496334664112721e-05, - "loss": 0.2001, - "step": 6974 - }, - { - "epoch": 0.35, - "grad_norm": 0.9033674842959668, - "learning_rate": 1.4961916788854147e-05, - "loss": 0.1906, - "step": 6975 - }, - { - "epoch": 0.35, - "grad_norm": 1.1287017676044906, - "learning_rate": 1.4960486801988811e-05, - "loss": 0.2106, - "step": 6976 - }, - { - "epoch": 0.35, - "grad_norm": 1.1671485012836642, - "learning_rate": 1.4959056680569992e-05, - "loss": 0.1838, - "step": 6977 - }, - { - "epoch": 0.35, - "grad_norm": 0.9909927570451686, - "learning_rate": 1.4957626424636482e-05, - "loss": 0.2005, - "step": 6978 - }, - { - "epoch": 0.35, - "grad_norm": 0.7024089312556172, - "learning_rate": 1.495619603422707e-05, - "loss": 0.1863, - "step": 6979 - }, - { - "epoch": 0.35, - "grad_norm": 0.8736425378884803, - "learning_rate": 1.4954765509380565e-05, - "loss": 0.2134, - "step": 6980 - }, - { - "epoch": 0.35, - "grad_norm": 1.0248494978958178, - "learning_rate": 1.4953334850135765e-05, - "loss": 0.2203, - "step": 6981 - }, - { - "epoch": 0.36, - "grad_norm": 1.1433657512824882, - "learning_rate": 1.495190405653148e-05, - "loss": 0.2075, - "step": 6982 - }, - { - "epoch": 0.36, - "grad_norm": 0.797062012180071, - "learning_rate": 1.4950473128606513e-05, - "loss": 0.1951, - "step": 6983 - }, - { - "epoch": 0.36, - "grad_norm": 0.9021502798448437, - "learning_rate": 1.4949042066399684e-05, - "loss": 0.1921, - "step": 6984 - }, - { - "epoch": 0.36, - "grad_norm": 0.9354344111856875, - "learning_rate": 1.4947610869949811e-05, - "loss": 0.2152, - "step": 6985 - }, - { - "epoch": 0.36, - "grad_norm": 1.2873601297598602, - "learning_rate": 1.4946179539295713e-05, - "loss": 0.2066, - "step": 6986 - }, - { - "epoch": 0.36, - "grad_norm": 1.1742473061174559, - "learning_rate": 1.4944748074476211e-05, - "loss": 0.2242, - "step": 6987 - }, - { - "epoch": 0.36, - "grad_norm": 1.027615230081389, - "learning_rate": 1.4943316475530145e-05, - "loss": 0.2241, - "step": 6988 - }, - { - "epoch": 0.36, - "grad_norm": 1.4938750326455092, - "learning_rate": 1.4941884742496338e-05, - "loss": 0.2058, - "step": 6989 - }, - { - "epoch": 0.36, - "grad_norm": 0.8638539100195232, - "learning_rate": 1.4940452875413627e-05, - "loss": 0.2076, - "step": 6990 - }, - { - "epoch": 0.36, - "grad_norm": 1.8909119223100532, - "learning_rate": 1.4939020874320856e-05, - "loss": 0.2382, - "step": 6991 - }, - { - "epoch": 0.36, - "grad_norm": 0.959846464244343, - "learning_rate": 1.4937588739256861e-05, - "loss": 0.1874, - "step": 6992 - }, - { - "epoch": 0.36, - "grad_norm": 0.8957640924216222, - "learning_rate": 1.4936156470260494e-05, - "loss": 0.1897, - "step": 6993 - }, - { - "epoch": 0.36, - "grad_norm": 0.9760286467099434, - "learning_rate": 1.4934724067370604e-05, - "loss": 0.1896, - "step": 6994 - }, - { - "epoch": 0.36, - "grad_norm": 0.7759132698260575, - "learning_rate": 1.4933291530626047e-05, - "loss": 0.1905, - "step": 6995 - }, - { - "epoch": 0.36, - "grad_norm": 1.6179256438101102, - "learning_rate": 1.4931858860065676e-05, - "loss": 0.1889, - "step": 6996 - }, - { - "epoch": 0.36, - "grad_norm": 1.1805021494268257, - "learning_rate": 1.4930426055728354e-05, - "loss": 0.2042, - "step": 6997 - }, - { - "epoch": 0.36, - "grad_norm": 0.9349921230667492, - "learning_rate": 1.492899311765295e-05, - "loss": 0.1933, - "step": 6998 - }, - { - "epoch": 0.36, - "grad_norm": 1.0884404176575875, - "learning_rate": 1.4927560045878328e-05, - "loss": 0.1874, - "step": 6999 - }, - { - "epoch": 0.36, - "grad_norm": 1.371867437927246, - "learning_rate": 1.492612684044336e-05, - "loss": 0.1859, - "step": 7000 - }, - { - "epoch": 0.36, - "grad_norm": 1.5014463510510556, - "learning_rate": 1.4924693501386925e-05, - "loss": 0.1972, - "step": 7001 - }, - { - "epoch": 0.36, - "grad_norm": 1.658847598307763, - "learning_rate": 1.4923260028747899e-05, - "loss": 0.1931, - "step": 7002 - }, - { - "epoch": 0.36, - "grad_norm": 0.9026298128366345, - "learning_rate": 1.4921826422565169e-05, - "loss": 0.2159, - "step": 7003 - }, - { - "epoch": 0.36, - "grad_norm": 1.740979167213659, - "learning_rate": 1.4920392682877618e-05, - "loss": 0.1958, - "step": 7004 - }, - { - "epoch": 0.36, - "grad_norm": 0.8531910200230725, - "learning_rate": 1.4918958809724135e-05, - "loss": 0.1846, - "step": 7005 - }, - { - "epoch": 0.36, - "grad_norm": 0.7906279464839503, - "learning_rate": 1.491752480314362e-05, - "loss": 0.2073, - "step": 7006 - }, - { - "epoch": 0.36, - "grad_norm": 0.9873303650652501, - "learning_rate": 1.4916090663174966e-05, - "loss": 0.1949, - "step": 7007 - }, - { - "epoch": 0.36, - "grad_norm": 1.881165750191825, - "learning_rate": 1.4914656389857076e-05, - "loss": 0.2105, - "step": 7008 - }, - { - "epoch": 0.36, - "grad_norm": 0.9239039519027061, - "learning_rate": 1.4913221983228851e-05, - "loss": 0.2083, - "step": 7009 - }, - { - "epoch": 0.36, - "grad_norm": 1.0745052245332594, - "learning_rate": 1.4911787443329204e-05, - "loss": 0.2027, - "step": 7010 - }, - { - "epoch": 0.36, - "grad_norm": 0.8717707200753981, - "learning_rate": 1.4910352770197044e-05, - "loss": 0.2013, - "step": 7011 - }, - { - "epoch": 0.36, - "grad_norm": 0.8507639295424179, - "learning_rate": 1.4908917963871292e-05, - "loss": 0.2, - "step": 7012 - }, - { - "epoch": 0.36, - "grad_norm": 2.7524155811833033, - "learning_rate": 1.4907483024390859e-05, - "loss": 0.1818, - "step": 7013 - }, - { - "epoch": 0.36, - "grad_norm": 1.8231612141524862, - "learning_rate": 1.4906047951794671e-05, - "loss": 0.1897, - "step": 7014 - }, - { - "epoch": 0.36, - "grad_norm": 1.0364202333602426, - "learning_rate": 1.4904612746121657e-05, - "loss": 0.2018, - "step": 7015 - }, - { - "epoch": 0.36, - "grad_norm": 1.3215366699557016, - "learning_rate": 1.4903177407410749e-05, - "loss": 0.1915, - "step": 7016 - }, - { - "epoch": 0.36, - "grad_norm": 0.8798560451011856, - "learning_rate": 1.4901741935700873e-05, - "loss": 0.2068, - "step": 7017 - }, - { - "epoch": 0.36, - "grad_norm": 0.8262393299901581, - "learning_rate": 1.4900306331030967e-05, - "loss": 0.2209, - "step": 7018 - }, - { - "epoch": 0.36, - "grad_norm": 0.817376552435723, - "learning_rate": 1.489887059343998e-05, - "loss": 0.2057, - "step": 7019 - }, - { - "epoch": 0.36, - "grad_norm": 1.2862260016588265, - "learning_rate": 1.4897434722966851e-05, - "loss": 0.2076, - "step": 7020 - }, - { - "epoch": 0.36, - "grad_norm": 0.8367966460811618, - "learning_rate": 1.4895998719650526e-05, - "loss": 0.2173, - "step": 7021 - }, - { - "epoch": 0.36, - "grad_norm": 0.6914550113491021, - "learning_rate": 1.4894562583529961e-05, - "loss": 0.197, - "step": 7022 - }, - { - "epoch": 0.36, - "grad_norm": 1.0438177686296022, - "learning_rate": 1.4893126314644106e-05, - "loss": 0.1845, - "step": 7023 - }, - { - "epoch": 0.36, - "grad_norm": 0.8025077624120668, - "learning_rate": 1.4891689913031928e-05, - "loss": 0.1915, - "step": 7024 - }, - { - "epoch": 0.36, - "grad_norm": 0.9815403441598226, - "learning_rate": 1.4890253378732385e-05, - "loss": 0.2024, - "step": 7025 - }, - { - "epoch": 0.36, - "grad_norm": 1.4646887196561633, - "learning_rate": 1.488881671178444e-05, - "loss": 0.2244, - "step": 7026 - }, - { - "epoch": 0.36, - "grad_norm": 0.8324670358442616, - "learning_rate": 1.4887379912227064e-05, - "loss": 0.2185, - "step": 7027 - }, - { - "epoch": 0.36, - "grad_norm": 0.870578956635888, - "learning_rate": 1.4885942980099236e-05, - "loss": 0.2036, - "step": 7028 - }, - { - "epoch": 0.36, - "grad_norm": 1.3454427996276077, - "learning_rate": 1.488450591543993e-05, - "loss": 0.2091, - "step": 7029 - }, - { - "epoch": 0.36, - "grad_norm": 1.1991823657329022, - "learning_rate": 1.4883068718288121e-05, - "loss": 0.2294, - "step": 7030 - }, - { - "epoch": 0.36, - "grad_norm": 0.8558186090970756, - "learning_rate": 1.4881631388682797e-05, - "loss": 0.1883, - "step": 7031 - }, - { - "epoch": 0.36, - "grad_norm": 1.0901259681525386, - "learning_rate": 1.4880193926662948e-05, - "loss": 0.1977, - "step": 7032 - }, - { - "epoch": 0.36, - "grad_norm": 1.000147735629142, - "learning_rate": 1.4878756332267563e-05, - "loss": 0.1937, - "step": 7033 - }, - { - "epoch": 0.36, - "grad_norm": 0.9307866687602493, - "learning_rate": 1.4877318605535638e-05, - "loss": 0.2057, - "step": 7034 - }, - { - "epoch": 0.36, - "grad_norm": 0.8674026920795987, - "learning_rate": 1.4875880746506169e-05, - "loss": 0.1858, - "step": 7035 - }, - { - "epoch": 0.36, - "grad_norm": 1.066773643311902, - "learning_rate": 1.4874442755218156e-05, - "loss": 0.1948, - "step": 7036 - }, - { - "epoch": 0.36, - "grad_norm": 0.9281596268519601, - "learning_rate": 1.4873004631710613e-05, - "loss": 0.2231, - "step": 7037 - }, - { - "epoch": 0.36, - "grad_norm": 0.8973859838471487, - "learning_rate": 1.4871566376022542e-05, - "loss": 0.1794, - "step": 7038 - }, - { - "epoch": 0.36, - "grad_norm": 0.9926979536022584, - "learning_rate": 1.4870127988192957e-05, - "loss": 0.1973, - "step": 7039 - }, - { - "epoch": 0.36, - "grad_norm": 1.1506318008619696, - "learning_rate": 1.4868689468260876e-05, - "loss": 0.2115, - "step": 7040 - }, - { - "epoch": 0.36, - "grad_norm": 1.0734519175053021, - "learning_rate": 1.4867250816265318e-05, - "loss": 0.182, - "step": 7041 - }, - { - "epoch": 0.36, - "grad_norm": 1.229578365298459, - "learning_rate": 1.4865812032245308e-05, - "loss": 0.1948, - "step": 7042 - }, - { - "epoch": 0.36, - "grad_norm": 0.9429939490887894, - "learning_rate": 1.4864373116239872e-05, - "loss": 0.2185, - "step": 7043 - }, - { - "epoch": 0.36, - "grad_norm": 1.1696130548769823, - "learning_rate": 1.486293406828804e-05, - "loss": 0.2244, - "step": 7044 - }, - { - "epoch": 0.36, - "grad_norm": 1.382434322965532, - "learning_rate": 1.4861494888428845e-05, - "loss": 0.2003, - "step": 7045 - }, - { - "epoch": 0.36, - "grad_norm": 1.0071367527080957, - "learning_rate": 1.4860055576701327e-05, - "loss": 0.1995, - "step": 7046 - }, - { - "epoch": 0.36, - "grad_norm": 1.1758286360602521, - "learning_rate": 1.4858616133144527e-05, - "loss": 0.1906, - "step": 7047 - }, - { - "epoch": 0.36, - "grad_norm": 1.0913830281505044, - "learning_rate": 1.4857176557797493e-05, - "loss": 0.2076, - "step": 7048 - }, - { - "epoch": 0.36, - "grad_norm": 0.8585908518502955, - "learning_rate": 1.4855736850699268e-05, - "loss": 0.2014, - "step": 7049 - }, - { - "epoch": 0.36, - "grad_norm": 1.6561467985014158, - "learning_rate": 1.4854297011888905e-05, - "loss": 0.2059, - "step": 7050 - }, - { - "epoch": 0.36, - "grad_norm": 1.4893530541324937, - "learning_rate": 1.4852857041405466e-05, - "loss": 0.1821, - "step": 7051 - }, - { - "epoch": 0.36, - "grad_norm": 1.1291541762800112, - "learning_rate": 1.4851416939288003e-05, - "loss": 0.2142, - "step": 7052 - }, - { - "epoch": 0.36, - "grad_norm": 0.889361943725798, - "learning_rate": 1.484997670557558e-05, - "loss": 0.1824, - "step": 7053 - }, - { - "epoch": 0.36, - "grad_norm": 0.8613533659766596, - "learning_rate": 1.4848536340307267e-05, - "loss": 0.2051, - "step": 7054 - }, - { - "epoch": 0.36, - "grad_norm": 1.3496139019332325, - "learning_rate": 1.4847095843522133e-05, - "loss": 0.2061, - "step": 7055 - }, - { - "epoch": 0.36, - "grad_norm": 0.8141814539341413, - "learning_rate": 1.4845655215259249e-05, - "loss": 0.1955, - "step": 7056 - }, - { - "epoch": 0.36, - "grad_norm": 0.7389541225733909, - "learning_rate": 1.4844214455557693e-05, - "loss": 0.1944, - "step": 7057 - }, - { - "epoch": 0.36, - "grad_norm": 1.100671299271619, - "learning_rate": 1.4842773564456545e-05, - "loss": 0.2101, - "step": 7058 - }, - { - "epoch": 0.36, - "grad_norm": 1.1313577250220692, - "learning_rate": 1.4841332541994893e-05, - "loss": 0.2043, - "step": 7059 - }, - { - "epoch": 0.36, - "grad_norm": 0.7963336071239322, - "learning_rate": 1.4839891388211822e-05, - "loss": 0.2073, - "step": 7060 - }, - { - "epoch": 0.36, - "grad_norm": 0.9057394292716163, - "learning_rate": 1.4838450103146424e-05, - "loss": 0.2432, - "step": 7061 - }, - { - "epoch": 0.36, - "grad_norm": 2.270303282906903, - "learning_rate": 1.4837008686837791e-05, - "loss": 0.2007, - "step": 7062 - }, - { - "epoch": 0.36, - "grad_norm": 0.8417121898936355, - "learning_rate": 1.4835567139325026e-05, - "loss": 0.2002, - "step": 7063 - }, - { - "epoch": 0.36, - "grad_norm": 0.8494513029708324, - "learning_rate": 1.4834125460647231e-05, - "loss": 0.1912, - "step": 7064 - }, - { - "epoch": 0.36, - "grad_norm": 1.1110096798803188, - "learning_rate": 1.483268365084351e-05, - "loss": 0.1917, - "step": 7065 - }, - { - "epoch": 0.36, - "grad_norm": 1.2050849762105256, - "learning_rate": 1.4831241709952969e-05, - "loss": 0.2051, - "step": 7066 - }, - { - "epoch": 0.36, - "grad_norm": 1.1217378012786416, - "learning_rate": 1.4829799638014724e-05, - "loss": 0.2045, - "step": 7067 - }, - { - "epoch": 0.36, - "grad_norm": 1.118223071720989, - "learning_rate": 1.4828357435067895e-05, - "loss": 0.1838, - "step": 7068 - }, - { - "epoch": 0.36, - "grad_norm": 2.5884446354343718, - "learning_rate": 1.4826915101151595e-05, - "loss": 0.2154, - "step": 7069 - }, - { - "epoch": 0.36, - "grad_norm": 0.9053567421261107, - "learning_rate": 1.4825472636304952e-05, - "loss": 0.2375, - "step": 7070 - }, - { - "epoch": 0.36, - "grad_norm": 0.8367797883521595, - "learning_rate": 1.4824030040567086e-05, - "loss": 0.1871, - "step": 7071 - }, - { - "epoch": 0.36, - "grad_norm": 1.7442778069105056, - "learning_rate": 1.4822587313977137e-05, - "loss": 0.2197, - "step": 7072 - }, - { - "epoch": 0.36, - "grad_norm": 1.46195344130919, - "learning_rate": 1.4821144456574235e-05, - "loss": 0.1843, - "step": 7073 - }, - { - "epoch": 0.36, - "grad_norm": 0.871892278282872, - "learning_rate": 1.4819701468397516e-05, - "loss": 0.1998, - "step": 7074 - }, - { - "epoch": 0.36, - "grad_norm": 1.0590408819917725, - "learning_rate": 1.4818258349486121e-05, - "loss": 0.2003, - "step": 7075 - }, - { - "epoch": 0.36, - "grad_norm": 0.9773154856611629, - "learning_rate": 1.4816815099879199e-05, - "loss": 0.2079, - "step": 7076 - }, - { - "epoch": 0.36, - "grad_norm": 0.9642359721391925, - "learning_rate": 1.4815371719615895e-05, - "loss": 0.2001, - "step": 7077 - }, - { - "epoch": 0.36, - "grad_norm": 0.8066183899205053, - "learning_rate": 1.481392820873536e-05, - "loss": 0.1761, - "step": 7078 - }, - { - "epoch": 0.36, - "grad_norm": 1.0007716990939366, - "learning_rate": 1.481248456727675e-05, - "loss": 0.209, - "step": 7079 - }, - { - "epoch": 0.36, - "grad_norm": 0.8113832340307315, - "learning_rate": 1.4811040795279223e-05, - "loss": 0.1979, - "step": 7080 - }, - { - "epoch": 0.36, - "grad_norm": 1.0519165147720977, - "learning_rate": 1.4809596892781946e-05, - "loss": 0.2089, - "step": 7081 - }, - { - "epoch": 0.36, - "grad_norm": 0.8878676928357122, - "learning_rate": 1.480815285982408e-05, - "loss": 0.2352, - "step": 7082 - }, - { - "epoch": 0.36, - "grad_norm": 0.8365083305411455, - "learning_rate": 1.4806708696444796e-05, - "loss": 0.2044, - "step": 7083 - }, - { - "epoch": 0.36, - "grad_norm": 1.128972835355977, - "learning_rate": 1.4805264402683268e-05, - "loss": 0.1973, - "step": 7084 - }, - { - "epoch": 0.36, - "grad_norm": 1.2933985370978633, - "learning_rate": 1.480381997857867e-05, - "loss": 0.2061, - "step": 7085 - }, - { - "epoch": 0.36, - "grad_norm": 1.1958606496129567, - "learning_rate": 1.4802375424170187e-05, - "loss": 0.2116, - "step": 7086 - }, - { - "epoch": 0.36, - "grad_norm": 0.9234574265240589, - "learning_rate": 1.4800930739497e-05, - "loss": 0.185, - "step": 7087 - }, - { - "epoch": 0.36, - "grad_norm": 0.9777658505483192, - "learning_rate": 1.4799485924598292e-05, - "loss": 0.2006, - "step": 7088 - }, - { - "epoch": 0.36, - "grad_norm": 0.8632732697046354, - "learning_rate": 1.4798040979513258e-05, - "loss": 0.1881, - "step": 7089 - }, - { - "epoch": 0.36, - "grad_norm": 1.0198595161320083, - "learning_rate": 1.479659590428109e-05, - "loss": 0.2127, - "step": 7090 - }, - { - "epoch": 0.36, - "grad_norm": 0.9215793081250065, - "learning_rate": 1.479515069894099e-05, - "loss": 0.1954, - "step": 7091 - }, - { - "epoch": 0.36, - "grad_norm": 0.9916694114763142, - "learning_rate": 1.4793705363532156e-05, - "loss": 0.1978, - "step": 7092 - }, - { - "epoch": 0.36, - "grad_norm": 1.592385505546323, - "learning_rate": 1.4792259898093791e-05, - "loss": 0.2388, - "step": 7093 - }, - { - "epoch": 0.36, - "grad_norm": 1.012073087478319, - "learning_rate": 1.4790814302665107e-05, - "loss": 0.1814, - "step": 7094 - }, - { - "epoch": 0.36, - "grad_norm": 1.90806762365769, - "learning_rate": 1.4789368577285314e-05, - "loss": 0.2186, - "step": 7095 - }, - { - "epoch": 0.36, - "grad_norm": 0.8809231723802302, - "learning_rate": 1.478792272199363e-05, - "loss": 0.1912, - "step": 7096 - }, - { - "epoch": 0.36, - "grad_norm": 0.8225179914084455, - "learning_rate": 1.4786476736829267e-05, - "loss": 0.1968, - "step": 7097 - }, - { - "epoch": 0.36, - "grad_norm": 0.8506939817328405, - "learning_rate": 1.4785030621831458e-05, - "loss": 0.1949, - "step": 7098 - }, - { - "epoch": 0.36, - "grad_norm": 0.9884889449489223, - "learning_rate": 1.478358437703942e-05, - "loss": 0.2254, - "step": 7099 - }, - { - "epoch": 0.36, - "grad_norm": 7.4120500445281, - "learning_rate": 1.4782138002492385e-05, - "loss": 0.2025, - "step": 7100 - }, - { - "epoch": 0.36, - "grad_norm": 1.6147739500037153, - "learning_rate": 1.4780691498229588e-05, - "loss": 0.1985, - "step": 7101 - }, - { - "epoch": 0.36, - "grad_norm": 1.9097814287498975, - "learning_rate": 1.4779244864290264e-05, - "loss": 0.2045, - "step": 7102 - }, - { - "epoch": 0.36, - "grad_norm": 0.8348803855026005, - "learning_rate": 1.4777798100713655e-05, - "loss": 0.1769, - "step": 7103 - }, - { - "epoch": 0.36, - "grad_norm": 1.1900370878471898, - "learning_rate": 1.4776351207538999e-05, - "loss": 0.1719, - "step": 7104 - }, - { - "epoch": 0.36, - "grad_norm": 1.2091092328565474, - "learning_rate": 1.477490418480555e-05, - "loss": 0.1898, - "step": 7105 - }, - { - "epoch": 0.36, - "grad_norm": 1.5838612601289892, - "learning_rate": 1.4773457032552551e-05, - "loss": 0.224, - "step": 7106 - }, - { - "epoch": 0.36, - "grad_norm": 4.945232541245546, - "learning_rate": 1.4772009750819262e-05, - "loss": 0.2107, - "step": 7107 - }, - { - "epoch": 0.36, - "grad_norm": 0.9322743971599436, - "learning_rate": 1.4770562339644943e-05, - "loss": 0.235, - "step": 7108 - }, - { - "epoch": 0.36, - "grad_norm": 1.1461959180016132, - "learning_rate": 1.4769114799068847e-05, - "loss": 0.2051, - "step": 7109 - }, - { - "epoch": 0.36, - "grad_norm": 1.636216515301017, - "learning_rate": 1.4767667129130243e-05, - "loss": 0.221, - "step": 7110 - }, - { - "epoch": 0.36, - "grad_norm": 1.1442129928861824, - "learning_rate": 1.4766219329868399e-05, - "loss": 0.2033, - "step": 7111 - }, - { - "epoch": 0.36, - "grad_norm": 1.4749380356676163, - "learning_rate": 1.4764771401322588e-05, - "loss": 0.1876, - "step": 7112 - }, - { - "epoch": 0.36, - "grad_norm": 0.982928862953428, - "learning_rate": 1.4763323343532083e-05, - "loss": 0.2089, - "step": 7113 - }, - { - "epoch": 0.36, - "grad_norm": 1.0080157854821021, - "learning_rate": 1.4761875156536163e-05, - "loss": 0.2059, - "step": 7114 - }, - { - "epoch": 0.36, - "grad_norm": 0.9390907781578757, - "learning_rate": 1.476042684037411e-05, - "loss": 0.1792, - "step": 7115 - }, - { - "epoch": 0.36, - "grad_norm": 3.299468734945118, - "learning_rate": 1.4758978395085208e-05, - "loss": 0.2316, - "step": 7116 - }, - { - "epoch": 0.36, - "grad_norm": 0.9578609983641396, - "learning_rate": 1.4757529820708754e-05, - "loss": 0.1964, - "step": 7117 - }, - { - "epoch": 0.36, - "grad_norm": 0.8416583906906582, - "learning_rate": 1.475608111728403e-05, - "loss": 0.1789, - "step": 7118 - }, - { - "epoch": 0.36, - "grad_norm": 0.9457542076436503, - "learning_rate": 1.4754632284850338e-05, - "loss": 0.1833, - "step": 7119 - }, - { - "epoch": 0.36, - "grad_norm": 0.9545702827475283, - "learning_rate": 1.4753183323446978e-05, - "loss": 0.2101, - "step": 7120 - }, - { - "epoch": 0.36, - "grad_norm": 1.088974962874592, - "learning_rate": 1.4751734233113253e-05, - "loss": 0.2124, - "step": 7121 - }, - { - "epoch": 0.36, - "grad_norm": 0.9677299958915025, - "learning_rate": 1.4750285013888466e-05, - "loss": 0.1812, - "step": 7122 - }, - { - "epoch": 0.36, - "grad_norm": 1.9383390825244804, - "learning_rate": 1.474883566581193e-05, - "loss": 0.2187, - "step": 7123 - }, - { - "epoch": 0.36, - "grad_norm": 1.049802052114001, - "learning_rate": 1.474738618892296e-05, - "loss": 0.1903, - "step": 7124 - }, - { - "epoch": 0.36, - "grad_norm": 1.2918584280404934, - "learning_rate": 1.474593658326087e-05, - "loss": 0.2075, - "step": 7125 - }, - { - "epoch": 0.36, - "grad_norm": 1.2190451902433268, - "learning_rate": 1.4744486848864982e-05, - "loss": 0.1998, - "step": 7126 - }, - { - "epoch": 0.36, - "grad_norm": 2.2539579091445217, - "learning_rate": 1.4743036985774621e-05, - "loss": 0.1809, - "step": 7127 - }, - { - "epoch": 0.36, - "grad_norm": 0.8069144816251373, - "learning_rate": 1.4741586994029113e-05, - "loss": 0.1841, - "step": 7128 - }, - { - "epoch": 0.36, - "grad_norm": 0.8739653517671883, - "learning_rate": 1.474013687366779e-05, - "loss": 0.1951, - "step": 7129 - }, - { - "epoch": 0.36, - "grad_norm": 1.1976902309195656, - "learning_rate": 1.4738686624729987e-05, - "loss": 0.2206, - "step": 7130 - }, - { - "epoch": 0.36, - "grad_norm": 1.1363599177482622, - "learning_rate": 1.4737236247255045e-05, - "loss": 0.1928, - "step": 7131 - }, - { - "epoch": 0.36, - "grad_norm": 1.0141472201673492, - "learning_rate": 1.4735785741282298e-05, - "loss": 0.17, - "step": 7132 - }, - { - "epoch": 0.36, - "grad_norm": 0.8962760373086162, - "learning_rate": 1.4734335106851095e-05, - "loss": 0.1788, - "step": 7133 - }, - { - "epoch": 0.36, - "grad_norm": 1.0975108161546672, - "learning_rate": 1.4732884344000787e-05, - "loss": 0.1932, - "step": 7134 - }, - { - "epoch": 0.36, - "grad_norm": 1.1304931506411495, - "learning_rate": 1.4731433452770723e-05, - "loss": 0.2232, - "step": 7135 - }, - { - "epoch": 0.36, - "grad_norm": 0.9684346870572593, - "learning_rate": 1.4729982433200261e-05, - "loss": 0.2124, - "step": 7136 - }, - { - "epoch": 0.36, - "grad_norm": 2.5665638986844876, - "learning_rate": 1.4728531285328753e-05, - "loss": 0.2219, - "step": 7137 - }, - { - "epoch": 0.36, - "grad_norm": 0.9854542831804197, - "learning_rate": 1.4727080009195573e-05, - "loss": 0.217, - "step": 7138 - }, - { - "epoch": 0.36, - "grad_norm": 1.0710552426739544, - "learning_rate": 1.4725628604840078e-05, - "loss": 0.1855, - "step": 7139 - }, - { - "epoch": 0.36, - "grad_norm": 1.2343554376103933, - "learning_rate": 1.4724177072301642e-05, - "loss": 0.2069, - "step": 7140 - }, - { - "epoch": 0.36, - "grad_norm": 0.9514401751415055, - "learning_rate": 1.4722725411619634e-05, - "loss": 0.2106, - "step": 7141 - }, - { - "epoch": 0.36, - "grad_norm": 2.030571956714932, - "learning_rate": 1.4721273622833432e-05, - "loss": 0.2027, - "step": 7142 - }, - { - "epoch": 0.36, - "grad_norm": 1.506294984385844, - "learning_rate": 1.4719821705982417e-05, - "loss": 0.199, - "step": 7143 - }, - { - "epoch": 0.36, - "grad_norm": 1.258818755205757, - "learning_rate": 1.4718369661105973e-05, - "loss": 0.2058, - "step": 7144 - }, - { - "epoch": 0.36, - "grad_norm": 0.7594735060195563, - "learning_rate": 1.4716917488243485e-05, - "loss": 0.1839, - "step": 7145 - }, - { - "epoch": 0.36, - "grad_norm": 1.3661682393111336, - "learning_rate": 1.4715465187434342e-05, - "loss": 0.1978, - "step": 7146 - }, - { - "epoch": 0.36, - "grad_norm": 1.1330714487070268, - "learning_rate": 1.4714012758717941e-05, - "loss": 0.1889, - "step": 7147 - }, - { - "epoch": 0.36, - "grad_norm": 1.1189407523486754, - "learning_rate": 1.4712560202133679e-05, - "loss": 0.2008, - "step": 7148 - }, - { - "epoch": 0.36, - "grad_norm": 1.8707259683031279, - "learning_rate": 1.4711107517720953e-05, - "loss": 0.1833, - "step": 7149 - }, - { - "epoch": 0.36, - "grad_norm": 1.05214004080036, - "learning_rate": 1.4709654705519168e-05, - "loss": 0.1987, - "step": 7150 - }, - { - "epoch": 0.36, - "grad_norm": 1.0682777254050864, - "learning_rate": 1.4708201765567736e-05, - "loss": 0.1798, - "step": 7151 - }, - { - "epoch": 0.36, - "grad_norm": 1.1839164935435411, - "learning_rate": 1.4706748697906065e-05, - "loss": 0.201, - "step": 7152 - }, - { - "epoch": 0.36, - "grad_norm": 1.1184231051281768, - "learning_rate": 1.4705295502573571e-05, - "loss": 0.2206, - "step": 7153 - }, - { - "epoch": 0.36, - "grad_norm": 0.8979461790109566, - "learning_rate": 1.4703842179609668e-05, - "loss": 0.2269, - "step": 7154 - }, - { - "epoch": 0.36, - "grad_norm": 0.9264038467836124, - "learning_rate": 1.470238872905378e-05, - "loss": 0.1965, - "step": 7155 - }, - { - "epoch": 0.36, - "grad_norm": 0.8079661378752513, - "learning_rate": 1.4700935150945334e-05, - "loss": 0.1951, - "step": 7156 - }, - { - "epoch": 0.36, - "grad_norm": 1.1541144569815773, - "learning_rate": 1.4699481445323757e-05, - "loss": 0.2105, - "step": 7157 - }, - { - "epoch": 0.36, - "grad_norm": 1.1023098748718994, - "learning_rate": 1.4698027612228478e-05, - "loss": 0.2134, - "step": 7158 - }, - { - "epoch": 0.36, - "grad_norm": 0.9886838350178068, - "learning_rate": 1.4696573651698937e-05, - "loss": 0.2033, - "step": 7159 - }, - { - "epoch": 0.36, - "grad_norm": 0.6959608514363341, - "learning_rate": 1.4695119563774568e-05, - "loss": 0.1985, - "step": 7160 - }, - { - "epoch": 0.36, - "grad_norm": 43.03118587420249, - "learning_rate": 1.4693665348494819e-05, - "loss": 0.1765, - "step": 7161 - }, - { - "epoch": 0.36, - "grad_norm": 1.0170267614935629, - "learning_rate": 1.469221100589913e-05, - "loss": 0.182, - "step": 7162 - }, - { - "epoch": 0.36, - "grad_norm": 0.7638325075184338, - "learning_rate": 1.4690756536026952e-05, - "loss": 0.2022, - "step": 7163 - }, - { - "epoch": 0.36, - "grad_norm": 0.7929466280136379, - "learning_rate": 1.4689301938917737e-05, - "loss": 0.1787, - "step": 7164 - }, - { - "epoch": 0.36, - "grad_norm": 0.9428273508130095, - "learning_rate": 1.4687847214610944e-05, - "loss": 0.1829, - "step": 7165 - }, - { - "epoch": 0.36, - "grad_norm": 1.7559421200980139, - "learning_rate": 1.4686392363146032e-05, - "loss": 0.2028, - "step": 7166 - }, - { - "epoch": 0.36, - "grad_norm": 0.9443670684991925, - "learning_rate": 1.468493738456246e-05, - "loss": 0.1837, - "step": 7167 - }, - { - "epoch": 0.36, - "grad_norm": 1.187792744550687, - "learning_rate": 1.4683482278899696e-05, - "loss": 0.2238, - "step": 7168 - }, - { - "epoch": 0.36, - "grad_norm": 1.1056454767564257, - "learning_rate": 1.4682027046197214e-05, - "loss": 0.2076, - "step": 7169 - }, - { - "epoch": 0.36, - "grad_norm": 0.8500674701813452, - "learning_rate": 1.4680571686494483e-05, - "loss": 0.2032, - "step": 7170 - }, - { - "epoch": 0.36, - "grad_norm": 1.1124369387671862, - "learning_rate": 1.4679116199830978e-05, - "loss": 0.1817, - "step": 7171 - }, - { - "epoch": 0.36, - "grad_norm": 1.0194354860544301, - "learning_rate": 1.4677660586246183e-05, - "loss": 0.1875, - "step": 7172 - }, - { - "epoch": 0.36, - "grad_norm": 0.8772144588619847, - "learning_rate": 1.4676204845779585e-05, - "loss": 0.1882, - "step": 7173 - }, - { - "epoch": 0.36, - "grad_norm": 1.169638257110541, - "learning_rate": 1.4674748978470663e-05, - "loss": 0.195, - "step": 7174 - }, - { - "epoch": 0.36, - "grad_norm": 1.0328090611192982, - "learning_rate": 1.4673292984358911e-05, - "loss": 0.1922, - "step": 7175 - }, - { - "epoch": 0.36, - "grad_norm": 0.8585369331116066, - "learning_rate": 1.4671836863483819e-05, - "loss": 0.1723, - "step": 7176 - }, - { - "epoch": 0.36, - "grad_norm": 0.9757881472890122, - "learning_rate": 1.4670380615884891e-05, - "loss": 0.2182, - "step": 7177 - }, - { - "epoch": 0.37, - "grad_norm": 1.1618572034347405, - "learning_rate": 1.4668924241601627e-05, - "loss": 0.1893, - "step": 7178 - }, - { - "epoch": 0.37, - "grad_norm": 0.8359477577026576, - "learning_rate": 1.4667467740673528e-05, - "loss": 0.1752, - "step": 7179 - }, - { - "epoch": 0.37, - "grad_norm": 0.9593541875986683, - "learning_rate": 1.4666011113140103e-05, - "loss": 0.2085, - "step": 7180 - }, - { - "epoch": 0.37, - "grad_norm": 0.9373030226818541, - "learning_rate": 1.4664554359040862e-05, - "loss": 0.2049, - "step": 7181 - }, - { - "epoch": 0.37, - "grad_norm": 1.046339481942067, - "learning_rate": 1.4663097478415322e-05, - "loss": 0.2115, - "step": 7182 - }, - { - "epoch": 0.37, - "grad_norm": 1.16317388913559, - "learning_rate": 1.4661640471302998e-05, - "loss": 0.1874, - "step": 7183 - }, - { - "epoch": 0.37, - "grad_norm": 1.0277480391584366, - "learning_rate": 1.4660183337743414e-05, - "loss": 0.2128, - "step": 7184 - }, - { - "epoch": 0.37, - "grad_norm": 1.18453723334063, - "learning_rate": 1.4658726077776093e-05, - "loss": 0.2066, - "step": 7185 - }, - { - "epoch": 0.37, - "grad_norm": 1.0996444774192848, - "learning_rate": 1.4657268691440564e-05, - "loss": 0.2115, - "step": 7186 - }, - { - "epoch": 0.37, - "grad_norm": 0.8886067711511231, - "learning_rate": 1.465581117877636e-05, - "loss": 0.178, - "step": 7187 - }, - { - "epoch": 0.37, - "grad_norm": 1.113528221312272, - "learning_rate": 1.4654353539823014e-05, - "loss": 0.2113, - "step": 7188 - }, - { - "epoch": 0.37, - "grad_norm": 2.355610201304584, - "learning_rate": 1.4652895774620066e-05, - "loss": 0.1913, - "step": 7189 - }, - { - "epoch": 0.37, - "grad_norm": 0.9662037049135782, - "learning_rate": 1.4651437883207056e-05, - "loss": 0.2023, - "step": 7190 - }, - { - "epoch": 0.37, - "grad_norm": 1.791090091633652, - "learning_rate": 1.4649979865623531e-05, - "loss": 0.1935, - "step": 7191 - }, - { - "epoch": 0.37, - "grad_norm": 1.1973119684181808, - "learning_rate": 1.4648521721909042e-05, - "loss": 0.1807, - "step": 7192 - }, - { - "epoch": 0.37, - "grad_norm": 0.7438397230367796, - "learning_rate": 1.4647063452103135e-05, - "loss": 0.1938, - "step": 7193 - }, - { - "epoch": 0.37, - "grad_norm": 1.017059128027557, - "learning_rate": 1.464560505624537e-05, - "loss": 0.1893, - "step": 7194 - }, - { - "epoch": 0.37, - "grad_norm": 1.1394480960376436, - "learning_rate": 1.4644146534375307e-05, - "loss": 0.1966, - "step": 7195 - }, - { - "epoch": 0.37, - "grad_norm": 7.309035780184552, - "learning_rate": 1.4642687886532507e-05, - "loss": 0.2063, - "step": 7196 - }, - { - "epoch": 0.37, - "grad_norm": 0.965556266585576, - "learning_rate": 1.4641229112756537e-05, - "loss": 0.2061, - "step": 7197 - }, - { - "epoch": 0.37, - "grad_norm": 0.9247343728821459, - "learning_rate": 1.4639770213086962e-05, - "loss": 0.2257, - "step": 7198 - }, - { - "epoch": 0.37, - "grad_norm": 1.2873322184303597, - "learning_rate": 1.463831118756336e-05, - "loss": 0.2219, - "step": 7199 - }, - { - "epoch": 0.37, - "grad_norm": 1.0503873999197977, - "learning_rate": 1.4636852036225304e-05, - "loss": 0.1835, - "step": 7200 - }, - { - "epoch": 0.37, - "grad_norm": 1.0441262383963452, - "learning_rate": 1.4635392759112374e-05, - "loss": 0.2196, - "step": 7201 - }, - { - "epoch": 0.37, - "grad_norm": 1.0886323583800523, - "learning_rate": 1.4633933356264156e-05, - "loss": 0.2264, - "step": 7202 - }, - { - "epoch": 0.37, - "grad_norm": 1.0740117152730773, - "learning_rate": 1.463247382772023e-05, - "loss": 0.2016, - "step": 7203 - }, - { - "epoch": 0.37, - "grad_norm": 1.0431229186759934, - "learning_rate": 1.4631014173520192e-05, - "loss": 0.2064, - "step": 7204 - }, - { - "epoch": 0.37, - "grad_norm": 1.2250889849637991, - "learning_rate": 1.4629554393703635e-05, - "loss": 0.1874, - "step": 7205 - }, - { - "epoch": 0.37, - "grad_norm": 0.8985830906857448, - "learning_rate": 1.4628094488310154e-05, - "loss": 0.1906, - "step": 7206 - }, - { - "epoch": 0.37, - "grad_norm": 1.4827841710717786, - "learning_rate": 1.4626634457379343e-05, - "loss": 0.1823, - "step": 7207 - }, - { - "epoch": 0.37, - "grad_norm": 2.3135064026868895, - "learning_rate": 1.4625174300950817e-05, - "loss": 0.2345, - "step": 7208 - }, - { - "epoch": 0.37, - "grad_norm": 1.3767976555094654, - "learning_rate": 1.4623714019064178e-05, - "loss": 0.2239, - "step": 7209 - }, - { - "epoch": 0.37, - "grad_norm": 1.0399025276719043, - "learning_rate": 1.462225361175903e-05, - "loss": 0.1845, - "step": 7210 - }, - { - "epoch": 0.37, - "grad_norm": 1.1365818325865469, - "learning_rate": 1.4620793079074991e-05, - "loss": 0.2074, - "step": 7211 - }, - { - "epoch": 0.37, - "grad_norm": 0.9650778862329886, - "learning_rate": 1.4619332421051682e-05, - "loss": 0.201, - "step": 7212 - }, - { - "epoch": 0.37, - "grad_norm": 1.2317824187188442, - "learning_rate": 1.4617871637728719e-05, - "loss": 0.181, - "step": 7213 - }, - { - "epoch": 0.37, - "grad_norm": 1.2293569861227824, - "learning_rate": 1.461641072914573e-05, - "loss": 0.1788, - "step": 7214 - }, - { - "epoch": 0.37, - "grad_norm": 0.8421056403657364, - "learning_rate": 1.4614949695342335e-05, - "loss": 0.1826, - "step": 7215 - }, - { - "epoch": 0.37, - "grad_norm": 1.7556245059003615, - "learning_rate": 1.461348853635817e-05, - "loss": 0.198, - "step": 7216 - }, - { - "epoch": 0.37, - "grad_norm": 0.8373865110950348, - "learning_rate": 1.4612027252232868e-05, - "loss": 0.1869, - "step": 7217 - }, - { - "epoch": 0.37, - "grad_norm": 1.4859909383682117, - "learning_rate": 1.4610565843006066e-05, - "loss": 0.2279, - "step": 7218 - }, - { - "epoch": 0.37, - "grad_norm": 0.9840411718276176, - "learning_rate": 1.4609104308717405e-05, - "loss": 0.2095, - "step": 7219 - }, - { - "epoch": 0.37, - "grad_norm": 1.0813751383332257, - "learning_rate": 1.4607642649406529e-05, - "loss": 0.1955, - "step": 7220 - }, - { - "epoch": 0.37, - "grad_norm": 1.7086729803488199, - "learning_rate": 1.4606180865113087e-05, - "loss": 0.1809, - "step": 7221 - }, - { - "epoch": 0.37, - "grad_norm": 2.0543148531001396, - "learning_rate": 1.460471895587673e-05, - "loss": 0.2351, - "step": 7222 - }, - { - "epoch": 0.37, - "grad_norm": 1.5590508133007468, - "learning_rate": 1.460325692173711e-05, - "loss": 0.1929, - "step": 7223 - }, - { - "epoch": 0.37, - "grad_norm": 1.8575470786430182, - "learning_rate": 1.4601794762733885e-05, - "loss": 0.2414, - "step": 7224 - }, - { - "epoch": 0.37, - "grad_norm": 1.0713855039589257, - "learning_rate": 1.4600332478906718e-05, - "loss": 0.191, - "step": 7225 - }, - { - "epoch": 0.37, - "grad_norm": 1.050489799066373, - "learning_rate": 1.4598870070295274e-05, - "loss": 0.1957, - "step": 7226 - }, - { - "epoch": 0.37, - "grad_norm": 1.1241824559481621, - "learning_rate": 1.4597407536939221e-05, - "loss": 0.2044, - "step": 7227 - }, - { - "epoch": 0.37, - "grad_norm": 0.9398946171930076, - "learning_rate": 1.4595944878878226e-05, - "loss": 0.1788, - "step": 7228 - }, - { - "epoch": 0.37, - "grad_norm": 2.6923161481460482, - "learning_rate": 1.4594482096151965e-05, - "loss": 0.1869, - "step": 7229 - }, - { - "epoch": 0.37, - "grad_norm": 0.8537422935352744, - "learning_rate": 1.459301918880012e-05, - "loss": 0.1784, - "step": 7230 - }, - { - "epoch": 0.37, - "grad_norm": 1.251240074898674, - "learning_rate": 1.459155615686237e-05, - "loss": 0.2119, - "step": 7231 - }, - { - "epoch": 0.37, - "grad_norm": 1.04468001598618, - "learning_rate": 1.45900930003784e-05, - "loss": 0.2061, - "step": 7232 - }, - { - "epoch": 0.37, - "grad_norm": 0.9481835045186339, - "learning_rate": 1.4588629719387895e-05, - "loss": 0.1806, - "step": 7233 - }, - { - "epoch": 0.37, - "grad_norm": 1.5470941140754693, - "learning_rate": 1.4587166313930551e-05, - "loss": 0.1995, - "step": 7234 - }, - { - "epoch": 0.37, - "grad_norm": 0.8879328653548417, - "learning_rate": 1.4585702784046065e-05, - "loss": 0.2003, - "step": 7235 - }, - { - "epoch": 0.37, - "grad_norm": 0.9206448905493988, - "learning_rate": 1.4584239129774131e-05, - "loss": 0.1888, - "step": 7236 - }, - { - "epoch": 0.37, - "grad_norm": 1.7014245751046675, - "learning_rate": 1.458277535115445e-05, - "loss": 0.1778, - "step": 7237 - }, - { - "epoch": 0.37, - "grad_norm": 1.0059919959825143, - "learning_rate": 1.458131144822673e-05, - "loss": 0.2028, - "step": 7238 - }, - { - "epoch": 0.37, - "grad_norm": 1.0593151433111, - "learning_rate": 1.4579847421030677e-05, - "loss": 0.2188, - "step": 7239 - }, - { - "epoch": 0.37, - "grad_norm": 1.4445554920908814, - "learning_rate": 1.4578383269606004e-05, - "loss": 0.1781, - "step": 7240 - }, - { - "epoch": 0.37, - "grad_norm": 1.774630230057031, - "learning_rate": 1.4576918993992429e-05, - "loss": 0.1994, - "step": 7241 - }, - { - "epoch": 0.37, - "grad_norm": 1.711497024864439, - "learning_rate": 1.4575454594229666e-05, - "loss": 0.2623, - "step": 7242 - }, - { - "epoch": 0.37, - "grad_norm": 0.8612693862757297, - "learning_rate": 1.4573990070357437e-05, - "loss": 0.1703, - "step": 7243 - }, - { - "epoch": 0.37, - "grad_norm": 2.0058030680566827, - "learning_rate": 1.4572525422415471e-05, - "loss": 0.2049, - "step": 7244 - }, - { - "epoch": 0.37, - "grad_norm": 0.913388143288835, - "learning_rate": 1.4571060650443494e-05, - "loss": 0.197, - "step": 7245 - }, - { - "epoch": 0.37, - "grad_norm": 1.4343812327113545, - "learning_rate": 1.4569595754481238e-05, - "loss": 0.1961, - "step": 7246 - }, - { - "epoch": 0.37, - "grad_norm": 1.2043942073468759, - "learning_rate": 1.456813073456844e-05, - "loss": 0.1734, - "step": 7247 - }, - { - "epoch": 0.37, - "grad_norm": 1.0221013480787766, - "learning_rate": 1.4566665590744838e-05, - "loss": 0.1844, - "step": 7248 - }, - { - "epoch": 0.37, - "grad_norm": 2.427492512904666, - "learning_rate": 1.4565200323050173e-05, - "loss": 0.1967, - "step": 7249 - }, - { - "epoch": 0.37, - "grad_norm": 0.8977229493298299, - "learning_rate": 1.4563734931524191e-05, - "loss": 0.197, - "step": 7250 - }, - { - "epoch": 0.37, - "grad_norm": 0.8534301150679654, - "learning_rate": 1.4562269416206642e-05, - "loss": 0.1963, - "step": 7251 - }, - { - "epoch": 0.37, - "grad_norm": 1.7761348402379218, - "learning_rate": 1.4560803777137279e-05, - "loss": 0.195, - "step": 7252 - }, - { - "epoch": 0.37, - "grad_norm": 1.1073974709757195, - "learning_rate": 1.4559338014355853e-05, - "loss": 0.2045, - "step": 7253 - }, - { - "epoch": 0.37, - "grad_norm": 1.488909966122852, - "learning_rate": 1.455787212790213e-05, - "loss": 0.2125, - "step": 7254 - }, - { - "epoch": 0.37, - "grad_norm": 0.8852613049754224, - "learning_rate": 1.4556406117815864e-05, - "loss": 0.1959, - "step": 7255 - }, - { - "epoch": 0.37, - "grad_norm": 0.8339723125115669, - "learning_rate": 1.4554939984136824e-05, - "loss": 0.1803, - "step": 7256 - }, - { - "epoch": 0.37, - "grad_norm": 0.9529550397471396, - "learning_rate": 1.4553473726904783e-05, - "loss": 0.223, - "step": 7257 - }, - { - "epoch": 0.37, - "grad_norm": 0.8851176434942727, - "learning_rate": 1.4552007346159509e-05, - "loss": 0.1759, - "step": 7258 - }, - { - "epoch": 0.37, - "grad_norm": 0.8007076173251486, - "learning_rate": 1.4550540841940778e-05, - "loss": 0.2015, - "step": 7259 - }, - { - "epoch": 0.37, - "grad_norm": 0.9714155323978105, - "learning_rate": 1.4549074214288368e-05, - "loss": 0.1795, - "step": 7260 - }, - { - "epoch": 0.37, - "grad_norm": 1.6302081131518966, - "learning_rate": 1.4547607463242068e-05, - "loss": 0.1857, - "step": 7261 - }, - { - "epoch": 0.37, - "grad_norm": 1.1714822203917645, - "learning_rate": 1.4546140588841656e-05, - "loss": 0.2201, - "step": 7262 - }, - { - "epoch": 0.37, - "grad_norm": 1.3467129135182294, - "learning_rate": 1.4544673591126924e-05, - "loss": 0.1882, - "step": 7263 - }, - { - "epoch": 0.37, - "grad_norm": 1.0569269794229985, - "learning_rate": 1.4543206470137663e-05, - "loss": 0.2448, - "step": 7264 - }, - { - "epoch": 0.37, - "grad_norm": 1.2157216517942167, - "learning_rate": 1.4541739225913669e-05, - "loss": 0.2044, - "step": 7265 - }, - { - "epoch": 0.37, - "grad_norm": 0.9933802128857785, - "learning_rate": 1.4540271858494746e-05, - "loss": 0.2042, - "step": 7266 - }, - { - "epoch": 0.37, - "grad_norm": 0.933445012615465, - "learning_rate": 1.453880436792069e-05, - "loss": 0.2293, - "step": 7267 - }, - { - "epoch": 0.37, - "grad_norm": 2.7724878186469613, - "learning_rate": 1.4537336754231307e-05, - "loss": 0.1888, - "step": 7268 - }, - { - "epoch": 0.37, - "grad_norm": 1.4015165482196508, - "learning_rate": 1.453586901746641e-05, - "loss": 0.1998, - "step": 7269 - }, - { - "epoch": 0.37, - "grad_norm": 0.8703397411856586, - "learning_rate": 1.4534401157665812e-05, - "loss": 0.2205, - "step": 7270 - }, - { - "epoch": 0.37, - "grad_norm": 1.0065068650542348, - "learning_rate": 1.4532933174869323e-05, - "loss": 0.1992, - "step": 7271 - }, - { - "epoch": 0.37, - "grad_norm": 1.3699495130815498, - "learning_rate": 1.4531465069116771e-05, - "loss": 0.2028, - "step": 7272 - }, - { - "epoch": 0.37, - "grad_norm": 1.0793924999933862, - "learning_rate": 1.4529996840447966e-05, - "loss": 0.2349, - "step": 7273 - }, - { - "epoch": 0.37, - "grad_norm": 0.7948066903330879, - "learning_rate": 1.4528528488902745e-05, - "loss": 0.1809, - "step": 7274 - }, - { - "epoch": 0.37, - "grad_norm": 1.1284683175111199, - "learning_rate": 1.4527060014520932e-05, - "loss": 0.217, - "step": 7275 - }, - { - "epoch": 0.37, - "grad_norm": 1.5284300188468116, - "learning_rate": 1.452559141734236e-05, - "loss": 0.2062, - "step": 7276 - }, - { - "epoch": 0.37, - "grad_norm": 0.781343543319652, - "learning_rate": 1.4524122697406866e-05, - "loss": 0.1962, - "step": 7277 - }, - { - "epoch": 0.37, - "grad_norm": 1.295639290670492, - "learning_rate": 1.452265385475429e-05, - "loss": 0.212, - "step": 7278 - }, - { - "epoch": 0.37, - "grad_norm": 1.740549444374038, - "learning_rate": 1.4521184889424472e-05, - "loss": 0.2036, - "step": 7279 - }, - { - "epoch": 0.37, - "grad_norm": 1.0011987948570245, - "learning_rate": 1.4519715801457256e-05, - "loss": 0.1967, - "step": 7280 - }, - { - "epoch": 0.37, - "grad_norm": 2.5341229496916164, - "learning_rate": 1.4518246590892493e-05, - "loss": 0.2037, - "step": 7281 - }, - { - "epoch": 0.37, - "grad_norm": 1.0225133863288018, - "learning_rate": 1.4516777257770039e-05, - "loss": 0.1988, - "step": 7282 - }, - { - "epoch": 0.37, - "grad_norm": 0.8600004079706661, - "learning_rate": 1.4515307802129746e-05, - "loss": 0.1886, - "step": 7283 - }, - { - "epoch": 0.37, - "grad_norm": 0.8580445077526663, - "learning_rate": 1.4513838224011474e-05, - "loss": 0.1931, - "step": 7284 - }, - { - "epoch": 0.37, - "grad_norm": 1.277280462094834, - "learning_rate": 1.4512368523455085e-05, - "loss": 0.2025, - "step": 7285 - }, - { - "epoch": 0.37, - "grad_norm": 1.00902764493686, - "learning_rate": 1.4510898700500442e-05, - "loss": 0.2112, - "step": 7286 - }, - { - "epoch": 0.37, - "grad_norm": 0.9961082386523968, - "learning_rate": 1.450942875518742e-05, - "loss": 0.1924, - "step": 7287 - }, - { - "epoch": 0.37, - "grad_norm": 1.0077175637689286, - "learning_rate": 1.4507958687555887e-05, - "loss": 0.1927, - "step": 7288 - }, - { - "epoch": 0.37, - "grad_norm": 1.064534723234433, - "learning_rate": 1.4506488497645724e-05, - "loss": 0.1952, - "step": 7289 - }, - { - "epoch": 0.37, - "grad_norm": 0.8798686625193131, - "learning_rate": 1.4505018185496802e-05, - "loss": 0.2013, - "step": 7290 - }, - { - "epoch": 0.37, - "grad_norm": 0.9306270609813555, - "learning_rate": 1.4503547751149007e-05, - "loss": 0.2206, - "step": 7291 - }, - { - "epoch": 0.37, - "grad_norm": 1.1938661547775145, - "learning_rate": 1.4502077194642229e-05, - "loss": 0.2108, - "step": 7292 - }, - { - "epoch": 0.37, - "grad_norm": 1.0111927132329457, - "learning_rate": 1.4500606516016347e-05, - "loss": 0.1839, - "step": 7293 - }, - { - "epoch": 0.37, - "grad_norm": 1.2037406245910078, - "learning_rate": 1.4499135715311262e-05, - "loss": 0.212, - "step": 7294 - }, - { - "epoch": 0.37, - "grad_norm": 1.6647718656816444, - "learning_rate": 1.4497664792566865e-05, - "loss": 0.224, - "step": 7295 - }, - { - "epoch": 0.37, - "grad_norm": 0.9636072832329631, - "learning_rate": 1.4496193747823062e-05, - "loss": 0.2038, - "step": 7296 - }, - { - "epoch": 0.37, - "grad_norm": 0.8220322234345318, - "learning_rate": 1.4494722581119748e-05, - "loss": 0.1819, - "step": 7297 - }, - { - "epoch": 0.37, - "grad_norm": 1.1926677975279927, - "learning_rate": 1.4493251292496826e-05, - "loss": 0.1974, - "step": 7298 - }, - { - "epoch": 0.37, - "grad_norm": 1.2505678449152033, - "learning_rate": 1.4491779881994208e-05, - "loss": 0.2185, - "step": 7299 - }, - { - "epoch": 0.37, - "grad_norm": 1.339102115123226, - "learning_rate": 1.4490308349651812e-05, - "loss": 0.1729, - "step": 7300 - }, - { - "epoch": 0.37, - "grad_norm": 1.3449780056843892, - "learning_rate": 1.4488836695509545e-05, - "loss": 0.1973, - "step": 7301 - }, - { - "epoch": 0.37, - "grad_norm": 1.0480233615897767, - "learning_rate": 1.4487364919607331e-05, - "loss": 0.2261, - "step": 7302 - }, - { - "epoch": 0.37, - "grad_norm": 1.1627938006870673, - "learning_rate": 1.4485893021985091e-05, - "loss": 0.1835, - "step": 7303 - }, - { - "epoch": 0.37, - "grad_norm": 0.9359446400184624, - "learning_rate": 1.4484421002682745e-05, - "loss": 0.2001, - "step": 7304 - }, - { - "epoch": 0.37, - "grad_norm": 1.7920756240041322, - "learning_rate": 1.4482948861740229e-05, - "loss": 0.2011, - "step": 7305 - }, - { - "epoch": 0.37, - "grad_norm": 0.998299872861058, - "learning_rate": 1.4481476599197473e-05, - "loss": 0.2082, - "step": 7306 - }, - { - "epoch": 0.37, - "grad_norm": 0.9021048208921083, - "learning_rate": 1.448000421509441e-05, - "loss": 0.2173, - "step": 7307 - }, - { - "epoch": 0.37, - "grad_norm": 0.8386435457194779, - "learning_rate": 1.4478531709470978e-05, - "loss": 0.2075, - "step": 7308 - }, - { - "epoch": 0.37, - "grad_norm": 1.4749227890431893, - "learning_rate": 1.4477059082367122e-05, - "loss": 0.1917, - "step": 7309 - }, - { - "epoch": 0.37, - "grad_norm": 0.9811174959203954, - "learning_rate": 1.4475586333822787e-05, - "loss": 0.1978, - "step": 7310 - }, - { - "epoch": 0.37, - "grad_norm": 1.4198098792649345, - "learning_rate": 1.4474113463877917e-05, - "loss": 0.2122, - "step": 7311 - }, - { - "epoch": 0.37, - "grad_norm": 0.9695964071852331, - "learning_rate": 1.4472640472572468e-05, - "loss": 0.2082, - "step": 7312 - }, - { - "epoch": 0.37, - "grad_norm": 0.9142693265376828, - "learning_rate": 1.4471167359946394e-05, - "loss": 0.186, - "step": 7313 - }, - { - "epoch": 0.37, - "grad_norm": 1.0171477584067625, - "learning_rate": 1.4469694126039653e-05, - "loss": 0.1787, - "step": 7314 - }, - { - "epoch": 0.37, - "grad_norm": 1.016606679527248, - "learning_rate": 1.4468220770892208e-05, - "loss": 0.1863, - "step": 7315 - }, - { - "epoch": 0.37, - "grad_norm": 0.969808532614358, - "learning_rate": 1.4466747294544017e-05, - "loss": 0.2009, - "step": 7316 - }, - { - "epoch": 0.37, - "grad_norm": 0.820909974683755, - "learning_rate": 1.4465273697035055e-05, - "loss": 0.2328, - "step": 7317 - }, - { - "epoch": 0.37, - "grad_norm": 0.894778322942877, - "learning_rate": 1.4463799978405295e-05, - "loss": 0.2129, - "step": 7318 - }, - { - "epoch": 0.37, - "grad_norm": 1.4698423912176912, - "learning_rate": 1.4462326138694706e-05, - "loss": 0.2089, - "step": 7319 - }, - { - "epoch": 0.37, - "grad_norm": 0.8868102604174576, - "learning_rate": 1.446085217794327e-05, - "loss": 0.1753, - "step": 7320 - }, - { - "epoch": 0.37, - "grad_norm": 1.1381290790671303, - "learning_rate": 1.4459378096190966e-05, - "loss": 0.2157, - "step": 7321 - }, - { - "epoch": 0.37, - "grad_norm": 0.9477793579680197, - "learning_rate": 1.4457903893477779e-05, - "loss": 0.2068, - "step": 7322 - }, - { - "epoch": 0.37, - "grad_norm": 1.0369651684066326, - "learning_rate": 1.4456429569843698e-05, - "loss": 0.1826, - "step": 7323 - }, - { - "epoch": 0.37, - "grad_norm": 0.8862359245446289, - "learning_rate": 1.4454955125328711e-05, - "loss": 0.2096, - "step": 7324 - }, - { - "epoch": 0.37, - "grad_norm": 0.971297388987907, - "learning_rate": 1.4453480559972817e-05, - "loss": 0.2092, - "step": 7325 - }, - { - "epoch": 0.37, - "grad_norm": 1.621655362230059, - "learning_rate": 1.4452005873816009e-05, - "loss": 0.1852, - "step": 7326 - }, - { - "epoch": 0.37, - "grad_norm": 0.9555106190526966, - "learning_rate": 1.4450531066898293e-05, - "loss": 0.1971, - "step": 7327 - }, - { - "epoch": 0.37, - "grad_norm": 1.1937414917284117, - "learning_rate": 1.4449056139259667e-05, - "loss": 0.2201, - "step": 7328 - }, - { - "epoch": 0.37, - "grad_norm": 0.8844007950976225, - "learning_rate": 1.4447581090940144e-05, - "loss": 0.2079, - "step": 7329 - }, - { - "epoch": 0.37, - "grad_norm": 1.3966735678188713, - "learning_rate": 1.4446105921979731e-05, - "loss": 0.1648, - "step": 7330 - }, - { - "epoch": 0.37, - "grad_norm": 0.8794422204306628, - "learning_rate": 1.4444630632418449e-05, - "loss": 0.1935, - "step": 7331 - }, - { - "epoch": 0.37, - "grad_norm": 1.1053488098602837, - "learning_rate": 1.4443155222296305e-05, - "loss": 0.2236, - "step": 7332 - }, - { - "epoch": 0.37, - "grad_norm": 1.165254773028472, - "learning_rate": 1.4441679691653327e-05, - "loss": 0.1892, - "step": 7333 - }, - { - "epoch": 0.37, - "grad_norm": 0.9169400364770195, - "learning_rate": 1.4440204040529536e-05, - "loss": 0.1849, - "step": 7334 - }, - { - "epoch": 0.37, - "grad_norm": 0.8791850419992676, - "learning_rate": 1.4438728268964956e-05, - "loss": 0.195, - "step": 7335 - }, - { - "epoch": 0.37, - "grad_norm": 0.8901616149988154, - "learning_rate": 1.4437252376999627e-05, - "loss": 0.2114, - "step": 7336 - }, - { - "epoch": 0.37, - "grad_norm": 1.0122521359050152, - "learning_rate": 1.4435776364673573e-05, - "loss": 0.195, - "step": 7337 - }, - { - "epoch": 0.37, - "grad_norm": 0.9392493572957487, - "learning_rate": 1.4434300232026837e-05, - "loss": 0.1802, - "step": 7338 - }, - { - "epoch": 0.37, - "grad_norm": 1.1768578635257316, - "learning_rate": 1.4432823979099453e-05, - "loss": 0.2006, - "step": 7339 - }, - { - "epoch": 0.37, - "grad_norm": 1.3795244554400317, - "learning_rate": 1.443134760593147e-05, - "loss": 0.1662, - "step": 7340 - }, - { - "epoch": 0.37, - "grad_norm": 1.452143408625368, - "learning_rate": 1.4429871112562935e-05, - "loss": 0.197, - "step": 7341 - }, - { - "epoch": 0.37, - "grad_norm": 1.1602637874632054, - "learning_rate": 1.4428394499033893e-05, - "loss": 0.2098, - "step": 7342 - }, - { - "epoch": 0.37, - "grad_norm": 1.4394723268569924, - "learning_rate": 1.4426917765384398e-05, - "loss": 0.2087, - "step": 7343 - }, - { - "epoch": 0.37, - "grad_norm": 1.0905020016637779, - "learning_rate": 1.4425440911654514e-05, - "loss": 0.207, - "step": 7344 - }, - { - "epoch": 0.37, - "grad_norm": 1.5623935818671604, - "learning_rate": 1.4423963937884293e-05, - "loss": 0.2205, - "step": 7345 - }, - { - "epoch": 0.37, - "grad_norm": 0.9808215660267777, - "learning_rate": 1.44224868441138e-05, - "loss": 0.1919, - "step": 7346 - }, - { - "epoch": 0.37, - "grad_norm": 1.0041036859515802, - "learning_rate": 1.4421009630383103e-05, - "loss": 0.18, - "step": 7347 - }, - { - "epoch": 0.37, - "grad_norm": 1.1655556834955962, - "learning_rate": 1.4419532296732271e-05, - "loss": 0.1819, - "step": 7348 - }, - { - "epoch": 0.37, - "grad_norm": 1.073691610451434, - "learning_rate": 1.4418054843201373e-05, - "loss": 0.1892, - "step": 7349 - }, - { - "epoch": 0.37, - "grad_norm": 0.9877252828329729, - "learning_rate": 1.441657726983049e-05, - "loss": 0.195, - "step": 7350 - }, - { - "epoch": 0.37, - "grad_norm": 1.0427460053130362, - "learning_rate": 1.4415099576659698e-05, - "loss": 0.1944, - "step": 7351 - }, - { - "epoch": 0.37, - "grad_norm": 0.9397173058898916, - "learning_rate": 1.4413621763729077e-05, - "loss": 0.1876, - "step": 7352 - }, - { - "epoch": 0.37, - "grad_norm": 1.1310771497695997, - "learning_rate": 1.4412143831078722e-05, - "loss": 0.1863, - "step": 7353 - }, - { - "epoch": 0.37, - "grad_norm": 1.048926621899362, - "learning_rate": 1.4410665778748714e-05, - "loss": 0.2142, - "step": 7354 - }, - { - "epoch": 0.37, - "grad_norm": 0.9894402322478005, - "learning_rate": 1.4409187606779149e-05, - "loss": 0.1851, - "step": 7355 - }, - { - "epoch": 0.37, - "grad_norm": 0.9000909516825687, - "learning_rate": 1.4407709315210117e-05, - "loss": 0.1865, - "step": 7356 - }, - { - "epoch": 0.37, - "grad_norm": 0.6959787873830762, - "learning_rate": 1.4406230904081724e-05, - "loss": 0.194, - "step": 7357 - }, - { - "epoch": 0.37, - "grad_norm": 1.5086135648888905, - "learning_rate": 1.440475237343407e-05, - "loss": 0.2017, - "step": 7358 - }, - { - "epoch": 0.37, - "grad_norm": 1.5819638309039437, - "learning_rate": 1.4403273723307259e-05, - "loss": 0.1928, - "step": 7359 - }, - { - "epoch": 0.37, - "grad_norm": 1.6196587202106798, - "learning_rate": 1.4401794953741397e-05, - "loss": 0.1949, - "step": 7360 - }, - { - "epoch": 0.37, - "grad_norm": 1.0728889712252914, - "learning_rate": 1.4400316064776598e-05, - "loss": 0.2076, - "step": 7361 - }, - { - "epoch": 0.37, - "grad_norm": 0.9184986881056747, - "learning_rate": 1.4398837056452979e-05, - "loss": 0.1919, - "step": 7362 - }, - { - "epoch": 0.37, - "grad_norm": 0.8316174688989357, - "learning_rate": 1.4397357928810657e-05, - "loss": 0.1754, - "step": 7363 - }, - { - "epoch": 0.37, - "grad_norm": 0.7761411361598949, - "learning_rate": 1.4395878681889753e-05, - "loss": 0.2109, - "step": 7364 - }, - { - "epoch": 0.37, - "grad_norm": 1.2223321694261715, - "learning_rate": 1.4394399315730389e-05, - "loss": 0.2328, - "step": 7365 - }, - { - "epoch": 0.37, - "grad_norm": 0.9737362109650243, - "learning_rate": 1.4392919830372698e-05, - "loss": 0.2028, - "step": 7366 - }, - { - "epoch": 0.37, - "grad_norm": 0.9702903964399952, - "learning_rate": 1.4391440225856807e-05, - "loss": 0.1712, - "step": 7367 - }, - { - "epoch": 0.37, - "grad_norm": 0.7614903580983567, - "learning_rate": 1.4389960502222855e-05, - "loss": 0.2001, - "step": 7368 - }, - { - "epoch": 0.37, - "grad_norm": 0.9259848994936244, - "learning_rate": 1.438848065951097e-05, - "loss": 0.2102, - "step": 7369 - }, - { - "epoch": 0.37, - "grad_norm": 1.48112954255448, - "learning_rate": 1.4387000697761305e-05, - "loss": 0.193, - "step": 7370 - }, - { - "epoch": 0.37, - "grad_norm": 0.8531334505763309, - "learning_rate": 1.4385520617013998e-05, - "loss": 0.1966, - "step": 7371 - }, - { - "epoch": 0.37, - "grad_norm": 1.103475362674807, - "learning_rate": 1.4384040417309194e-05, - "loss": 0.1842, - "step": 7372 - }, - { - "epoch": 0.37, - "grad_norm": 1.266035539089789, - "learning_rate": 1.4382560098687045e-05, - "loss": 0.2053, - "step": 7373 - }, - { - "epoch": 0.37, - "grad_norm": 1.0031143586674998, - "learning_rate": 1.4381079661187708e-05, - "loss": 0.2169, - "step": 7374 - }, - { - "epoch": 0.38, - "grad_norm": 0.9897718835102313, - "learning_rate": 1.4379599104851336e-05, - "loss": 0.2133, - "step": 7375 - }, - { - "epoch": 0.38, - "grad_norm": 4.726912981667751, - "learning_rate": 1.4378118429718093e-05, - "loss": 0.249, - "step": 7376 - }, - { - "epoch": 0.38, - "grad_norm": 0.84419251677018, - "learning_rate": 1.4376637635828142e-05, - "loss": 0.1989, - "step": 7377 - }, - { - "epoch": 0.38, - "grad_norm": 0.9721913264285105, - "learning_rate": 1.4375156723221642e-05, - "loss": 0.2163, - "step": 7378 - }, - { - "epoch": 0.38, - "grad_norm": 1.0242439576936002, - "learning_rate": 1.4373675691938773e-05, - "loss": 0.2078, - "step": 7379 - }, - { - "epoch": 0.38, - "grad_norm": 0.9130983342073931, - "learning_rate": 1.4372194542019705e-05, - "loss": 0.2066, - "step": 7380 - }, - { - "epoch": 0.38, - "grad_norm": 1.3429044399629129, - "learning_rate": 1.4370713273504611e-05, - "loss": 0.1946, - "step": 7381 - }, - { - "epoch": 0.38, - "grad_norm": 1.2895851595974883, - "learning_rate": 1.4369231886433672e-05, - "loss": 0.1807, - "step": 7382 - }, - { - "epoch": 0.38, - "grad_norm": 1.0427998515920929, - "learning_rate": 1.4367750380847073e-05, - "loss": 0.2046, - "step": 7383 - }, - { - "epoch": 0.38, - "grad_norm": 0.7801428574537339, - "learning_rate": 1.4366268756784998e-05, - "loss": 0.1846, - "step": 7384 - }, - { - "epoch": 0.38, - "grad_norm": 0.8523927910703909, - "learning_rate": 1.4364787014287636e-05, - "loss": 0.1846, - "step": 7385 - }, - { - "epoch": 0.38, - "grad_norm": 0.8105319148802839, - "learning_rate": 1.4363305153395179e-05, - "loss": 0.1764, - "step": 7386 - }, - { - "epoch": 0.38, - "grad_norm": 0.856027812134765, - "learning_rate": 1.4361823174147822e-05, - "loss": 0.2052, - "step": 7387 - }, - { - "epoch": 0.38, - "grad_norm": 0.8132340069486842, - "learning_rate": 1.4360341076585769e-05, - "loss": 0.1954, - "step": 7388 - }, - { - "epoch": 0.38, - "grad_norm": 0.7088325614833517, - "learning_rate": 1.4358858860749213e-05, - "loss": 0.1911, - "step": 7389 - }, - { - "epoch": 0.38, - "grad_norm": 0.8891267655937457, - "learning_rate": 1.4357376526678368e-05, - "loss": 0.1839, - "step": 7390 - }, - { - "epoch": 0.38, - "grad_norm": 0.866720734186058, - "learning_rate": 1.4355894074413436e-05, - "loss": 0.2069, - "step": 7391 - }, - { - "epoch": 0.38, - "grad_norm": 1.0826656456304604, - "learning_rate": 1.4354411503994634e-05, - "loss": 0.2116, - "step": 7392 - }, - { - "epoch": 0.38, - "grad_norm": 1.064708131590027, - "learning_rate": 1.4352928815462175e-05, - "loss": 0.1811, - "step": 7393 - }, - { - "epoch": 0.38, - "grad_norm": 0.8589484404006575, - "learning_rate": 1.4351446008856274e-05, - "loss": 0.2032, - "step": 7394 - }, - { - "epoch": 0.38, - "grad_norm": 0.7668618139828199, - "learning_rate": 1.4349963084217154e-05, - "loss": 0.2154, - "step": 7395 - }, - { - "epoch": 0.38, - "grad_norm": 0.7697956911015835, - "learning_rate": 1.4348480041585037e-05, - "loss": 0.2101, - "step": 7396 - }, - { - "epoch": 0.38, - "grad_norm": 0.8783925945149618, - "learning_rate": 1.4346996881000157e-05, - "loss": 0.193, - "step": 7397 - }, - { - "epoch": 0.38, - "grad_norm": 2.150747005100924, - "learning_rate": 1.434551360250274e-05, - "loss": 0.1899, - "step": 7398 - }, - { - "epoch": 0.38, - "grad_norm": 1.1226882074944422, - "learning_rate": 1.4344030206133022e-05, - "loss": 0.214, - "step": 7399 - }, - { - "epoch": 0.38, - "grad_norm": 0.8692486363623312, - "learning_rate": 1.4342546691931238e-05, - "loss": 0.2153, - "step": 7400 - }, - { - "epoch": 0.38, - "grad_norm": 1.4976160493577089, - "learning_rate": 1.4341063059937631e-05, - "loss": 0.2022, - "step": 7401 - }, - { - "epoch": 0.38, - "grad_norm": 1.1188129385862706, - "learning_rate": 1.4339579310192444e-05, - "loss": 0.2345, - "step": 7402 - }, - { - "epoch": 0.38, - "grad_norm": 1.0964171334407278, - "learning_rate": 1.4338095442735923e-05, - "loss": 0.1857, - "step": 7403 - }, - { - "epoch": 0.38, - "grad_norm": 1.8696443304840087, - "learning_rate": 1.4336611457608314e-05, - "loss": 0.2192, - "step": 7404 - }, - { - "epoch": 0.38, - "grad_norm": 1.1151258175613268, - "learning_rate": 1.4335127354849876e-05, - "loss": 0.1896, - "step": 7405 - }, - { - "epoch": 0.38, - "grad_norm": 0.8802160194360952, - "learning_rate": 1.4333643134500865e-05, - "loss": 0.2021, - "step": 7406 - }, - { - "epoch": 0.38, - "grad_norm": 0.8063957735402743, - "learning_rate": 1.433215879660154e-05, - "loss": 0.1948, - "step": 7407 - }, - { - "epoch": 0.38, - "grad_norm": 0.913986916751216, - "learning_rate": 1.4330674341192163e-05, - "loss": 0.1951, - "step": 7408 - }, - { - "epoch": 0.38, - "grad_norm": 1.3042370977547812, - "learning_rate": 1.4329189768312997e-05, - "loss": 0.2044, - "step": 7409 - }, - { - "epoch": 0.38, - "grad_norm": 1.18897831691075, - "learning_rate": 1.4327705078004317e-05, - "loss": 0.1884, - "step": 7410 - }, - { - "epoch": 0.38, - "grad_norm": 1.80509128784862, - "learning_rate": 1.432622027030639e-05, - "loss": 0.1911, - "step": 7411 - }, - { - "epoch": 0.38, - "grad_norm": 1.1516519194253798, - "learning_rate": 1.4324735345259495e-05, - "loss": 0.1702, - "step": 7412 - }, - { - "epoch": 0.38, - "grad_norm": 0.8693490250888909, - "learning_rate": 1.432325030290391e-05, - "loss": 0.1905, - "step": 7413 - }, - { - "epoch": 0.38, - "grad_norm": 0.8508701142949681, - "learning_rate": 1.4321765143279916e-05, - "loss": 0.2042, - "step": 7414 - }, - { - "epoch": 0.38, - "grad_norm": 1.2434167042103295, - "learning_rate": 1.4320279866427798e-05, - "loss": 0.2234, - "step": 7415 - }, - { - "epoch": 0.38, - "grad_norm": 0.8106162299147703, - "learning_rate": 1.4318794472387845e-05, - "loss": 0.1855, - "step": 7416 - }, - { - "epoch": 0.38, - "grad_norm": 0.8147454401274455, - "learning_rate": 1.4317308961200347e-05, - "loss": 0.1982, - "step": 7417 - }, - { - "epoch": 0.38, - "grad_norm": 0.7589531970409737, - "learning_rate": 1.43158233329056e-05, - "loss": 0.1847, - "step": 7418 - }, - { - "epoch": 0.38, - "grad_norm": 0.7345539558339856, - "learning_rate": 1.4314337587543903e-05, - "loss": 0.1827, - "step": 7419 - }, - { - "epoch": 0.38, - "grad_norm": 0.8228984501748001, - "learning_rate": 1.4312851725155554e-05, - "loss": 0.1878, - "step": 7420 - }, - { - "epoch": 0.38, - "grad_norm": 1.7492280721147175, - "learning_rate": 1.431136574578086e-05, - "loss": 0.1696, - "step": 7421 - }, - { - "epoch": 0.38, - "grad_norm": 1.1691536457309282, - "learning_rate": 1.4309879649460123e-05, - "loss": 0.1944, - "step": 7422 - }, - { - "epoch": 0.38, - "grad_norm": 0.9063338491403092, - "learning_rate": 1.4308393436233658e-05, - "loss": 0.1912, - "step": 7423 - }, - { - "epoch": 0.38, - "grad_norm": 1.2322521307302403, - "learning_rate": 1.4306907106141781e-05, - "loss": 0.1909, - "step": 7424 - }, - { - "epoch": 0.38, - "grad_norm": 0.9291985895172377, - "learning_rate": 1.4305420659224802e-05, - "loss": 0.2008, - "step": 7425 - }, - { - "epoch": 0.38, - "grad_norm": 0.8540370909551723, - "learning_rate": 1.4303934095523046e-05, - "loss": 0.1881, - "step": 7426 - }, - { - "epoch": 0.38, - "grad_norm": 0.8181487956916195, - "learning_rate": 1.4302447415076835e-05, - "loss": 0.2017, - "step": 7427 - }, - { - "epoch": 0.38, - "grad_norm": 0.8370007506643188, - "learning_rate": 1.4300960617926495e-05, - "loss": 0.192, - "step": 7428 - }, - { - "epoch": 0.38, - "grad_norm": 0.6955940772303095, - "learning_rate": 1.4299473704112355e-05, - "loss": 0.2156, - "step": 7429 - }, - { - "epoch": 0.38, - "grad_norm": 0.9342504930900565, - "learning_rate": 1.4297986673674747e-05, - "loss": 0.2549, - "step": 7430 - }, - { - "epoch": 0.38, - "grad_norm": 0.9147917475970455, - "learning_rate": 1.4296499526654004e-05, - "loss": 0.1773, - "step": 7431 - }, - { - "epoch": 0.38, - "grad_norm": 0.8086678880363469, - "learning_rate": 1.4295012263090475e-05, - "loss": 0.1908, - "step": 7432 - }, - { - "epoch": 0.38, - "grad_norm": 0.95526409863462, - "learning_rate": 1.4293524883024494e-05, - "loss": 0.1924, - "step": 7433 - }, - { - "epoch": 0.38, - "grad_norm": 0.9898514284623698, - "learning_rate": 1.4292037386496407e-05, - "loss": 0.1975, - "step": 7434 - }, - { - "epoch": 0.38, - "grad_norm": 1.214407578688893, - "learning_rate": 1.4290549773546565e-05, - "loss": 0.2102, - "step": 7435 - }, - { - "epoch": 0.38, - "grad_norm": 0.7862349311310997, - "learning_rate": 1.4289062044215318e-05, - "loss": 0.1952, - "step": 7436 - }, - { - "epoch": 0.38, - "grad_norm": 0.805684277750261, - "learning_rate": 1.428757419854302e-05, - "loss": 0.2077, - "step": 7437 - }, - { - "epoch": 0.38, - "grad_norm": 1.52754589046096, - "learning_rate": 1.428608623657003e-05, - "loss": 0.2141, - "step": 7438 - }, - { - "epoch": 0.38, - "grad_norm": 0.995220102864857, - "learning_rate": 1.4284598158336707e-05, - "loss": 0.2145, - "step": 7439 - }, - { - "epoch": 0.38, - "grad_norm": 0.9295949768900353, - "learning_rate": 1.428310996388342e-05, - "loss": 0.2259, - "step": 7440 - }, - { - "epoch": 0.38, - "grad_norm": 0.9462065479377103, - "learning_rate": 1.428162165325053e-05, - "loss": 0.2235, - "step": 7441 - }, - { - "epoch": 0.38, - "grad_norm": 1.2948090680155522, - "learning_rate": 1.4280133226478413e-05, - "loss": 0.1929, - "step": 7442 - }, - { - "epoch": 0.38, - "grad_norm": 1.4418156783883276, - "learning_rate": 1.4278644683607442e-05, - "loss": 0.1942, - "step": 7443 - }, - { - "epoch": 0.38, - "grad_norm": 0.8066543175705367, - "learning_rate": 1.4277156024677987e-05, - "loss": 0.1654, - "step": 7444 - }, - { - "epoch": 0.38, - "grad_norm": 1.230279856593095, - "learning_rate": 1.4275667249730437e-05, - "loss": 0.2238, - "step": 7445 - }, - { - "epoch": 0.38, - "grad_norm": 1.1038156894093265, - "learning_rate": 1.427417835880517e-05, - "loss": 0.2287, - "step": 7446 - }, - { - "epoch": 0.38, - "grad_norm": 1.218749043552778, - "learning_rate": 1.4272689351942577e-05, - "loss": 0.2005, - "step": 7447 - }, - { - "epoch": 0.38, - "grad_norm": 1.5229360109931684, - "learning_rate": 1.4271200229183043e-05, - "loss": 0.1836, - "step": 7448 - }, - { - "epoch": 0.38, - "grad_norm": 1.0477126003594448, - "learning_rate": 1.4269710990566958e-05, - "loss": 0.2061, - "step": 7449 - }, - { - "epoch": 0.38, - "grad_norm": 1.009328444666866, - "learning_rate": 1.4268221636134722e-05, - "loss": 0.2418, - "step": 7450 - }, - { - "epoch": 0.38, - "grad_norm": 0.8603144680485548, - "learning_rate": 1.4266732165926735e-05, - "loss": 0.2214, - "step": 7451 - }, - { - "epoch": 0.38, - "grad_norm": 1.0262286597197119, - "learning_rate": 1.4265242579983398e-05, - "loss": 0.2136, - "step": 7452 - }, - { - "epoch": 0.38, - "grad_norm": 0.7806878375610516, - "learning_rate": 1.4263752878345112e-05, - "loss": 0.2169, - "step": 7453 - }, - { - "epoch": 0.38, - "grad_norm": 1.1225825958989664, - "learning_rate": 1.4262263061052291e-05, - "loss": 0.2022, - "step": 7454 - }, - { - "epoch": 0.38, - "grad_norm": 1.6108452092479106, - "learning_rate": 1.4260773128145341e-05, - "loss": 0.1909, - "step": 7455 - }, - { - "epoch": 0.38, - "grad_norm": 1.0173183086801756, - "learning_rate": 1.4259283079664683e-05, - "loss": 0.1895, - "step": 7456 - }, - { - "epoch": 0.38, - "grad_norm": 0.8439453311015034, - "learning_rate": 1.4257792915650728e-05, - "loss": 0.1973, - "step": 7457 - }, - { - "epoch": 0.38, - "grad_norm": 0.8723570887290385, - "learning_rate": 1.42563026361439e-05, - "loss": 0.2065, - "step": 7458 - }, - { - "epoch": 0.38, - "grad_norm": 0.8536901531933275, - "learning_rate": 1.4254812241184623e-05, - "loss": 0.2027, - "step": 7459 - }, - { - "epoch": 0.38, - "grad_norm": 1.3662775011425288, - "learning_rate": 1.4253321730813326e-05, - "loss": 0.2275, - "step": 7460 - }, - { - "epoch": 0.38, - "grad_norm": 2.97442449862382, - "learning_rate": 1.4251831105070433e-05, - "loss": 0.2079, - "step": 7461 - }, - { - "epoch": 0.38, - "grad_norm": 0.790465580849618, - "learning_rate": 1.4250340363996382e-05, - "loss": 0.2051, - "step": 7462 - }, - { - "epoch": 0.38, - "grad_norm": 1.093983192420429, - "learning_rate": 1.4248849507631608e-05, - "loss": 0.212, - "step": 7463 - }, - { - "epoch": 0.38, - "grad_norm": 0.9556791044852044, - "learning_rate": 1.4247358536016554e-05, - "loss": 0.2081, - "step": 7464 - }, - { - "epoch": 0.38, - "grad_norm": 1.5223824451295664, - "learning_rate": 1.424586744919166e-05, - "loss": 0.1855, - "step": 7465 - }, - { - "epoch": 0.38, - "grad_norm": 0.8448789582666036, - "learning_rate": 1.4244376247197367e-05, - "loss": 0.2186, - "step": 7466 - }, - { - "epoch": 0.38, - "grad_norm": 1.3207283654711193, - "learning_rate": 1.424288493007413e-05, - "loss": 0.2033, - "step": 7467 - }, - { - "epoch": 0.38, - "grad_norm": 1.0601494536216605, - "learning_rate": 1.4241393497862403e-05, - "loss": 0.2069, - "step": 7468 - }, - { - "epoch": 0.38, - "grad_norm": 1.0956312757128686, - "learning_rate": 1.4239901950602635e-05, - "loss": 0.2081, - "step": 7469 - }, - { - "epoch": 0.38, - "grad_norm": 1.2581936092687398, - "learning_rate": 1.4238410288335289e-05, - "loss": 0.1978, - "step": 7470 - }, - { - "epoch": 0.38, - "grad_norm": 0.9771948394045026, - "learning_rate": 1.4236918511100824e-05, - "loss": 0.2054, - "step": 7471 - }, - { - "epoch": 0.38, - "grad_norm": 1.166621037901537, - "learning_rate": 1.4235426618939704e-05, - "loss": 0.2156, - "step": 7472 - }, - { - "epoch": 0.38, - "grad_norm": 0.9622643795882899, - "learning_rate": 1.4233934611892399e-05, - "loss": 0.2164, - "step": 7473 - }, - { - "epoch": 0.38, - "grad_norm": 1.004021903210207, - "learning_rate": 1.423244248999938e-05, - "loss": 0.1907, - "step": 7474 - }, - { - "epoch": 0.38, - "grad_norm": 1.0349109753246033, - "learning_rate": 1.4230950253301115e-05, - "loss": 0.1946, - "step": 7475 - }, - { - "epoch": 0.38, - "grad_norm": 1.343381097620725, - "learning_rate": 1.4229457901838087e-05, - "loss": 0.2022, - "step": 7476 - }, - { - "epoch": 0.38, - "grad_norm": 1.08645471980482, - "learning_rate": 1.4227965435650774e-05, - "loss": 0.1711, - "step": 7477 - }, - { - "epoch": 0.38, - "grad_norm": 1.376681948697426, - "learning_rate": 1.4226472854779663e-05, - "loss": 0.1741, - "step": 7478 - }, - { - "epoch": 0.38, - "grad_norm": 0.8444694354214273, - "learning_rate": 1.4224980159265234e-05, - "loss": 0.1995, - "step": 7479 - }, - { - "epoch": 0.38, - "grad_norm": 1.358410968788333, - "learning_rate": 1.422348734914798e-05, - "loss": 0.1953, - "step": 7480 - }, - { - "epoch": 0.38, - "grad_norm": 1.0823915381063276, - "learning_rate": 1.4221994424468395e-05, - "loss": 0.1951, - "step": 7481 - }, - { - "epoch": 0.38, - "grad_norm": 1.2856834550347218, - "learning_rate": 1.4220501385266971e-05, - "loss": 0.205, - "step": 7482 - }, - { - "epoch": 0.38, - "grad_norm": 1.185673366244246, - "learning_rate": 1.4219008231584211e-05, - "loss": 0.205, - "step": 7483 - }, - { - "epoch": 0.38, - "grad_norm": 1.0602670135534036, - "learning_rate": 1.421751496346061e-05, - "loss": 0.1888, - "step": 7484 - }, - { - "epoch": 0.38, - "grad_norm": 1.1431011201509098, - "learning_rate": 1.4216021580936681e-05, - "loss": 0.201, - "step": 7485 - }, - { - "epoch": 0.38, - "grad_norm": 1.9083813915428518, - "learning_rate": 1.4214528084052925e-05, - "loss": 0.1979, - "step": 7486 - }, - { - "epoch": 0.38, - "grad_norm": 1.0573463288335716, - "learning_rate": 1.421303447284986e-05, - "loss": 0.1886, - "step": 7487 - }, - { - "epoch": 0.38, - "grad_norm": 0.9609320810574705, - "learning_rate": 1.4211540747367998e-05, - "loss": 0.1885, - "step": 7488 - }, - { - "epoch": 0.38, - "grad_norm": 0.9505170150612685, - "learning_rate": 1.4210046907647852e-05, - "loss": 0.2028, - "step": 7489 - }, - { - "epoch": 0.38, - "grad_norm": 1.1493563606221087, - "learning_rate": 1.4208552953729949e-05, - "loss": 0.2101, - "step": 7490 - }, - { - "epoch": 0.38, - "grad_norm": 0.9009289958930716, - "learning_rate": 1.4207058885654807e-05, - "loss": 0.2003, - "step": 7491 - }, - { - "epoch": 0.38, - "grad_norm": 1.0713540112615993, - "learning_rate": 1.4205564703462955e-05, - "loss": 0.1874, - "step": 7492 - }, - { - "epoch": 0.38, - "grad_norm": 1.0702896376829447, - "learning_rate": 1.4204070407194923e-05, - "loss": 0.1953, - "step": 7493 - }, - { - "epoch": 0.38, - "grad_norm": 1.272473715175916, - "learning_rate": 1.4202575996891246e-05, - "loss": 0.2053, - "step": 7494 - }, - { - "epoch": 0.38, - "grad_norm": 0.908677908811022, - "learning_rate": 1.4201081472592455e-05, - "loss": 0.2103, - "step": 7495 - }, - { - "epoch": 0.38, - "grad_norm": 1.013295489272494, - "learning_rate": 1.4199586834339093e-05, - "loss": 0.2058, - "step": 7496 - }, - { - "epoch": 0.38, - "grad_norm": 1.0451142798524864, - "learning_rate": 1.41980920821717e-05, - "loss": 0.1923, - "step": 7497 - }, - { - "epoch": 0.38, - "grad_norm": 3.086382225473405, - "learning_rate": 1.4196597216130823e-05, - "loss": 0.1879, - "step": 7498 - }, - { - "epoch": 0.38, - "grad_norm": 1.0869633424813556, - "learning_rate": 1.4195102236257011e-05, - "loss": 0.2132, - "step": 7499 - }, - { - "epoch": 0.38, - "grad_norm": 1.0450687978973898, - "learning_rate": 1.4193607142590812e-05, - "loss": 0.2081, - "step": 7500 - }, - { - "epoch": 0.38, - "grad_norm": 1.1569443333472407, - "learning_rate": 1.4192111935172781e-05, - "loss": 0.221, - "step": 7501 - }, - { - "epoch": 0.38, - "grad_norm": 1.0329301579206815, - "learning_rate": 1.419061661404348e-05, - "loss": 0.1791, - "step": 7502 - }, - { - "epoch": 0.38, - "grad_norm": 1.6107073747509522, - "learning_rate": 1.4189121179243466e-05, - "loss": 0.189, - "step": 7503 - }, - { - "epoch": 0.38, - "grad_norm": 0.9123903187240583, - "learning_rate": 1.4187625630813303e-05, - "loss": 0.1986, - "step": 7504 - }, - { - "epoch": 0.38, - "grad_norm": 0.9802098693497492, - "learning_rate": 1.4186129968793558e-05, - "loss": 0.1743, - "step": 7505 - }, - { - "epoch": 0.38, - "grad_norm": 1.0183773359868546, - "learning_rate": 1.41846341932248e-05, - "loss": 0.1797, - "step": 7506 - }, - { - "epoch": 0.38, - "grad_norm": 1.0375210887014656, - "learning_rate": 1.4183138304147605e-05, - "loss": 0.1949, - "step": 7507 - }, - { - "epoch": 0.38, - "grad_norm": 0.9658665195692431, - "learning_rate": 1.4181642301602547e-05, - "loss": 0.195, - "step": 7508 - }, - { - "epoch": 0.38, - "grad_norm": 1.3377407285594807, - "learning_rate": 1.4180146185630205e-05, - "loss": 0.2411, - "step": 7509 - }, - { - "epoch": 0.38, - "grad_norm": 1.2619605983747744, - "learning_rate": 1.4178649956271162e-05, - "loss": 0.1842, - "step": 7510 - }, - { - "epoch": 0.38, - "grad_norm": 0.96968818167038, - "learning_rate": 1.4177153613566002e-05, - "loss": 0.191, - "step": 7511 - }, - { - "epoch": 0.38, - "grad_norm": 0.992558708079268, - "learning_rate": 1.4175657157555316e-05, - "loss": 0.182, - "step": 7512 - }, - { - "epoch": 0.38, - "grad_norm": 1.4435494809485996, - "learning_rate": 1.4174160588279692e-05, - "loss": 0.2136, - "step": 7513 - }, - { - "epoch": 0.38, - "grad_norm": 1.0321323815625674, - "learning_rate": 1.4172663905779725e-05, - "loss": 0.198, - "step": 7514 - }, - { - "epoch": 0.38, - "grad_norm": 0.7564634893716371, - "learning_rate": 1.4171167110096017e-05, - "loss": 0.1783, - "step": 7515 - }, - { - "epoch": 0.38, - "grad_norm": 0.9330135482607076, - "learning_rate": 1.4169670201269164e-05, - "loss": 0.2053, - "step": 7516 - }, - { - "epoch": 0.38, - "grad_norm": 0.8472369283082383, - "learning_rate": 1.4168173179339772e-05, - "loss": 0.2047, - "step": 7517 - }, - { - "epoch": 0.38, - "grad_norm": 0.8801592059554315, - "learning_rate": 1.4166676044348448e-05, - "loss": 0.1955, - "step": 7518 - }, - { - "epoch": 0.38, - "grad_norm": 1.0766442544705805, - "learning_rate": 1.4165178796335797e-05, - "loss": 0.2025, - "step": 7519 - }, - { - "epoch": 0.38, - "grad_norm": 1.0174652520991234, - "learning_rate": 1.416368143534244e-05, - "loss": 0.23, - "step": 7520 - }, - { - "epoch": 0.38, - "grad_norm": 0.9339941915171122, - "learning_rate": 1.4162183961408987e-05, - "loss": 0.1897, - "step": 7521 - }, - { - "epoch": 0.38, - "grad_norm": 1.5105807693516058, - "learning_rate": 1.416068637457606e-05, - "loss": 0.1781, - "step": 7522 - }, - { - "epoch": 0.38, - "grad_norm": 0.935675378996467, - "learning_rate": 1.4159188674884279e-05, - "loss": 0.2052, - "step": 7523 - }, - { - "epoch": 0.38, - "grad_norm": 1.1427958146169708, - "learning_rate": 1.4157690862374272e-05, - "loss": 0.2039, - "step": 7524 - }, - { - "epoch": 0.38, - "grad_norm": 1.658788262602051, - "learning_rate": 1.4156192937086666e-05, - "loss": 0.2107, - "step": 7525 - }, - { - "epoch": 0.38, - "grad_norm": 0.8955541412189186, - "learning_rate": 1.4154694899062089e-05, - "loss": 0.1832, - "step": 7526 - }, - { - "epoch": 0.38, - "grad_norm": 0.8889059275984288, - "learning_rate": 1.4153196748341179e-05, - "loss": 0.2379, - "step": 7527 - }, - { - "epoch": 0.38, - "grad_norm": 1.0487778302161304, - "learning_rate": 1.4151698484964574e-05, - "loss": 0.2044, - "step": 7528 - }, - { - "epoch": 0.38, - "grad_norm": 0.8812194186270116, - "learning_rate": 1.4150200108972912e-05, - "loss": 0.2, - "step": 7529 - }, - { - "epoch": 0.38, - "grad_norm": 0.9193206661664284, - "learning_rate": 1.4148701620406838e-05, - "loss": 0.2098, - "step": 7530 - }, - { - "epoch": 0.38, - "grad_norm": 0.993980966928357, - "learning_rate": 1.4147203019306997e-05, - "loss": 0.2048, - "step": 7531 - }, - { - "epoch": 0.38, - "grad_norm": 1.0750132796267666, - "learning_rate": 1.4145704305714038e-05, - "loss": 0.1998, - "step": 7532 - }, - { - "epoch": 0.38, - "grad_norm": 0.6834923657312354, - "learning_rate": 1.414420547966862e-05, - "loss": 0.1738, - "step": 7533 - }, - { - "epoch": 0.38, - "grad_norm": 0.7890938616586272, - "learning_rate": 1.4142706541211392e-05, - "loss": 0.1867, - "step": 7534 - }, - { - "epoch": 0.38, - "grad_norm": 1.2978997036990871, - "learning_rate": 1.4141207490383018e-05, - "loss": 0.1919, - "step": 7535 - }, - { - "epoch": 0.38, - "grad_norm": 0.8117457987197362, - "learning_rate": 1.4139708327224155e-05, - "loss": 0.1937, - "step": 7536 - }, - { - "epoch": 0.38, - "grad_norm": 0.9688092437532801, - "learning_rate": 1.4138209051775467e-05, - "loss": 0.2102, - "step": 7537 - }, - { - "epoch": 0.38, - "grad_norm": 1.0092214002394286, - "learning_rate": 1.4136709664077628e-05, - "loss": 0.2067, - "step": 7538 - }, - { - "epoch": 0.38, - "grad_norm": 1.2446739747004254, - "learning_rate": 1.4135210164171306e-05, - "loss": 0.2075, - "step": 7539 - }, - { - "epoch": 0.38, - "grad_norm": 1.409950331815906, - "learning_rate": 1.4133710552097175e-05, - "loss": 0.1752, - "step": 7540 - }, - { - "epoch": 0.38, - "grad_norm": 1.0317598910736085, - "learning_rate": 1.4132210827895909e-05, - "loss": 0.2115, - "step": 7541 - }, - { - "epoch": 0.38, - "grad_norm": 1.363377833518173, - "learning_rate": 1.4130710991608194e-05, - "loss": 0.2003, - "step": 7542 - }, - { - "epoch": 0.38, - "grad_norm": 1.0121446264043472, - "learning_rate": 1.4129211043274709e-05, - "loss": 0.2075, - "step": 7543 - }, - { - "epoch": 0.38, - "grad_norm": 0.8417773315163373, - "learning_rate": 1.412771098293614e-05, - "loss": 0.1946, - "step": 7544 - }, - { - "epoch": 0.38, - "grad_norm": 1.1855268084816621, - "learning_rate": 1.4126210810633176e-05, - "loss": 0.2152, - "step": 7545 - }, - { - "epoch": 0.38, - "grad_norm": 1.0237124307200967, - "learning_rate": 1.4124710526406516e-05, - "loss": 0.2443, - "step": 7546 - }, - { - "epoch": 0.38, - "grad_norm": 1.404831522108842, - "learning_rate": 1.4123210130296845e-05, - "loss": 0.198, - "step": 7547 - }, - { - "epoch": 0.38, - "grad_norm": 0.9074529008026088, - "learning_rate": 1.412170962234487e-05, - "loss": 0.1943, - "step": 7548 - }, - { - "epoch": 0.38, - "grad_norm": 1.51360435231819, - "learning_rate": 1.4120209002591286e-05, - "loss": 0.1871, - "step": 7549 - }, - { - "epoch": 0.38, - "grad_norm": 0.9224528569596793, - "learning_rate": 1.41187082710768e-05, - "loss": 0.1955, - "step": 7550 - }, - { - "epoch": 0.38, - "grad_norm": 0.8130885991407517, - "learning_rate": 1.4117207427842122e-05, - "loss": 0.1914, - "step": 7551 - }, - { - "epoch": 0.38, - "grad_norm": 1.0662624440238975, - "learning_rate": 1.4115706472927957e-05, - "loss": 0.1817, - "step": 7552 - }, - { - "epoch": 0.38, - "grad_norm": 0.8750154796571464, - "learning_rate": 1.4114205406375025e-05, - "loss": 0.2128, - "step": 7553 - }, - { - "epoch": 0.38, - "grad_norm": 1.0251582509921566, - "learning_rate": 1.4112704228224034e-05, - "loss": 0.1807, - "step": 7554 - }, - { - "epoch": 0.38, - "grad_norm": 0.8656304735082289, - "learning_rate": 1.4111202938515711e-05, - "loss": 0.1855, - "step": 7555 - }, - { - "epoch": 0.38, - "grad_norm": 0.9354978653471507, - "learning_rate": 1.4109701537290779e-05, - "loss": 0.2093, - "step": 7556 - }, - { - "epoch": 0.38, - "grad_norm": 0.9342037452734095, - "learning_rate": 1.4108200024589958e-05, - "loss": 0.1875, - "step": 7557 - }, - { - "epoch": 0.38, - "grad_norm": 0.9043537891505412, - "learning_rate": 1.410669840045398e-05, - "loss": 0.1928, - "step": 7558 - }, - { - "epoch": 0.38, - "grad_norm": 1.0488505805359303, - "learning_rate": 1.4105196664923577e-05, - "loss": 0.2078, - "step": 7559 - }, - { - "epoch": 0.38, - "grad_norm": 1.0669685487941905, - "learning_rate": 1.4103694818039483e-05, - "loss": 0.1933, - "step": 7560 - }, - { - "epoch": 0.38, - "grad_norm": 1.0754201019931622, - "learning_rate": 1.4102192859842432e-05, - "loss": 0.2111, - "step": 7561 - }, - { - "epoch": 0.38, - "grad_norm": 1.0524796337983948, - "learning_rate": 1.4100690790373168e-05, - "loss": 0.1853, - "step": 7562 - }, - { - "epoch": 0.38, - "grad_norm": 1.023500297719706, - "learning_rate": 1.4099188609672436e-05, - "loss": 0.1698, - "step": 7563 - }, - { - "epoch": 0.38, - "grad_norm": 0.6937197254418842, - "learning_rate": 1.409768631778098e-05, - "loss": 0.1687, - "step": 7564 - }, - { - "epoch": 0.38, - "grad_norm": 0.9742004715333842, - "learning_rate": 1.4096183914739554e-05, - "loss": 0.1947, - "step": 7565 - }, - { - "epoch": 0.38, - "grad_norm": 0.920578932705953, - "learning_rate": 1.4094681400588908e-05, - "loss": 0.1946, - "step": 7566 - }, - { - "epoch": 0.38, - "grad_norm": 1.3660496533422637, - "learning_rate": 1.4093178775369793e-05, - "loss": 0.2007, - "step": 7567 - }, - { - "epoch": 0.38, - "grad_norm": 0.8202394736784563, - "learning_rate": 1.4091676039122977e-05, - "loss": 0.2181, - "step": 7568 - }, - { - "epoch": 0.38, - "grad_norm": 0.8823324782020567, - "learning_rate": 1.4090173191889216e-05, - "loss": 0.2063, - "step": 7569 - }, - { - "epoch": 0.38, - "grad_norm": 0.6719778905366667, - "learning_rate": 1.4088670233709278e-05, - "loss": 0.185, - "step": 7570 - }, - { - "epoch": 0.38, - "grad_norm": 0.9286681884469269, - "learning_rate": 1.4087167164623927e-05, - "loss": 0.1838, - "step": 7571 - }, - { - "epoch": 0.39, - "grad_norm": 0.9178992253331234, - "learning_rate": 1.4085663984673936e-05, - "loss": 0.2011, - "step": 7572 - }, - { - "epoch": 0.39, - "grad_norm": 0.867270369064487, - "learning_rate": 1.408416069390008e-05, - "loss": 0.1782, - "step": 7573 - }, - { - "epoch": 0.39, - "grad_norm": 0.6760773325114068, - "learning_rate": 1.4082657292343131e-05, - "loss": 0.2006, - "step": 7574 - }, - { - "epoch": 0.39, - "grad_norm": 0.9197042191937326, - "learning_rate": 1.4081153780043878e-05, - "loss": 0.1803, - "step": 7575 - }, - { - "epoch": 0.39, - "grad_norm": 0.7566894677669311, - "learning_rate": 1.4079650157043095e-05, - "loss": 0.2011, - "step": 7576 - }, - { - "epoch": 0.39, - "grad_norm": 1.9803458625908985, - "learning_rate": 1.4078146423381571e-05, - "loss": 0.2276, - "step": 7577 - }, - { - "epoch": 0.39, - "grad_norm": 0.8948601920458561, - "learning_rate": 1.4076642579100095e-05, - "loss": 0.1957, - "step": 7578 - }, - { - "epoch": 0.39, - "grad_norm": 0.8251175785770059, - "learning_rate": 1.407513862423946e-05, - "loss": 0.2001, - "step": 7579 - }, - { - "epoch": 0.39, - "grad_norm": 0.8159217586434515, - "learning_rate": 1.4073634558840454e-05, - "loss": 0.1887, - "step": 7580 - }, - { - "epoch": 0.39, - "grad_norm": 1.712292700290352, - "learning_rate": 1.4072130382943888e-05, - "loss": 0.1882, - "step": 7581 - }, - { - "epoch": 0.39, - "grad_norm": 1.0144574994086635, - "learning_rate": 1.4070626096590552e-05, - "loss": 0.2054, - "step": 7582 - }, - { - "epoch": 0.39, - "grad_norm": 0.8214830372217938, - "learning_rate": 1.4069121699821253e-05, - "loss": 0.1965, - "step": 7583 - }, - { - "epoch": 0.39, - "grad_norm": 1.0635809609229483, - "learning_rate": 1.4067617192676799e-05, - "loss": 0.1768, - "step": 7584 - }, - { - "epoch": 0.39, - "grad_norm": 0.9959854507722363, - "learning_rate": 1.4066112575197996e-05, - "loss": 0.1972, - "step": 7585 - }, - { - "epoch": 0.39, - "grad_norm": 1.0418149972153863, - "learning_rate": 1.4064607847425664e-05, - "loss": 0.216, - "step": 7586 - }, - { - "epoch": 0.39, - "grad_norm": 0.9272614314221803, - "learning_rate": 1.4063103009400613e-05, - "loss": 0.2134, - "step": 7587 - }, - { - "epoch": 0.39, - "grad_norm": 0.8282259836531397, - "learning_rate": 1.4061598061163664e-05, - "loss": 0.2053, - "step": 7588 - }, - { - "epoch": 0.39, - "grad_norm": 1.518892077046803, - "learning_rate": 1.4060093002755636e-05, - "loss": 0.21, - "step": 7589 - }, - { - "epoch": 0.39, - "grad_norm": 0.872535797315508, - "learning_rate": 1.4058587834217356e-05, - "loss": 0.1814, - "step": 7590 - }, - { - "epoch": 0.39, - "grad_norm": 1.2812768967861146, - "learning_rate": 1.4057082555589654e-05, - "loss": 0.2118, - "step": 7591 - }, - { - "epoch": 0.39, - "grad_norm": 1.8252964740636697, - "learning_rate": 1.4055577166913358e-05, - "loss": 0.2015, - "step": 7592 - }, - { - "epoch": 0.39, - "grad_norm": 1.0118958760314445, - "learning_rate": 1.4054071668229304e-05, - "loss": 0.207, - "step": 7593 - }, - { - "epoch": 0.39, - "grad_norm": 0.8168769002372456, - "learning_rate": 1.4052566059578326e-05, - "loss": 0.1981, - "step": 7594 - }, - { - "epoch": 0.39, - "grad_norm": 0.9911446249748135, - "learning_rate": 1.4051060341001264e-05, - "loss": 0.2068, - "step": 7595 - }, - { - "epoch": 0.39, - "grad_norm": 0.8867221481455925, - "learning_rate": 1.4049554512538958e-05, - "loss": 0.1939, - "step": 7596 - }, - { - "epoch": 0.39, - "grad_norm": 0.8235464707735634, - "learning_rate": 1.4048048574232261e-05, - "loss": 0.1808, - "step": 7597 - }, - { - "epoch": 0.39, - "grad_norm": 0.8495631625786931, - "learning_rate": 1.4046542526122018e-05, - "loss": 0.2044, - "step": 7598 - }, - { - "epoch": 0.39, - "grad_norm": 1.0312588114857115, - "learning_rate": 1.4045036368249079e-05, - "loss": 0.1923, - "step": 7599 - }, - { - "epoch": 0.39, - "grad_norm": 1.2257480499644202, - "learning_rate": 1.4043530100654301e-05, - "loss": 0.2095, - "step": 7600 - }, - { - "epoch": 0.39, - "grad_norm": 0.9347192728461339, - "learning_rate": 1.4042023723378541e-05, - "loss": 0.1793, - "step": 7601 - }, - { - "epoch": 0.39, - "grad_norm": 0.8903951969647127, - "learning_rate": 1.4040517236462656e-05, - "loss": 0.1929, - "step": 7602 - }, - { - "epoch": 0.39, - "grad_norm": 1.3242813549316574, - "learning_rate": 1.4039010639947516e-05, - "loss": 0.1972, - "step": 7603 - }, - { - "epoch": 0.39, - "grad_norm": 1.201248666535143, - "learning_rate": 1.4037503933873984e-05, - "loss": 0.2101, - "step": 7604 - }, - { - "epoch": 0.39, - "grad_norm": 1.1902203553720785, - "learning_rate": 1.403599711828293e-05, - "loss": 0.2285, - "step": 7605 - }, - { - "epoch": 0.39, - "grad_norm": 0.9544624554727649, - "learning_rate": 1.4034490193215224e-05, - "loss": 0.1957, - "step": 7606 - }, - { - "epoch": 0.39, - "grad_norm": 1.0195966094164068, - "learning_rate": 1.4032983158711744e-05, - "loss": 0.2003, - "step": 7607 - }, - { - "epoch": 0.39, - "grad_norm": 1.3276408942198594, - "learning_rate": 1.403147601481337e-05, - "loss": 0.2001, - "step": 7608 - }, - { - "epoch": 0.39, - "grad_norm": 0.7275677793246561, - "learning_rate": 1.4029968761560979e-05, - "loss": 0.1805, - "step": 7609 - }, - { - "epoch": 0.39, - "grad_norm": 0.8479951597951434, - "learning_rate": 1.402846139899546e-05, - "loss": 0.2048, - "step": 7610 - }, - { - "epoch": 0.39, - "grad_norm": 1.838782389886137, - "learning_rate": 1.4026953927157698e-05, - "loss": 0.2025, - "step": 7611 - }, - { - "epoch": 0.39, - "grad_norm": 0.8453036827999479, - "learning_rate": 1.4025446346088582e-05, - "loss": 0.1987, - "step": 7612 - }, - { - "epoch": 0.39, - "grad_norm": 1.0457773409872628, - "learning_rate": 1.402393865582901e-05, - "loss": 0.1991, - "step": 7613 - }, - { - "epoch": 0.39, - "grad_norm": 0.8654771992999472, - "learning_rate": 1.4022430856419872e-05, - "loss": 0.1999, - "step": 7614 - }, - { - "epoch": 0.39, - "grad_norm": 1.1394541387656685, - "learning_rate": 1.4020922947902067e-05, - "loss": 0.2206, - "step": 7615 - }, - { - "epoch": 0.39, - "grad_norm": 0.8091480970897881, - "learning_rate": 1.4019414930316501e-05, - "loss": 0.1888, - "step": 7616 - }, - { - "epoch": 0.39, - "grad_norm": 1.0685495245375725, - "learning_rate": 1.4017906803704083e-05, - "loss": 0.1874, - "step": 7617 - }, - { - "epoch": 0.39, - "grad_norm": 0.872612439298588, - "learning_rate": 1.401639856810571e-05, - "loss": 0.1935, - "step": 7618 - }, - { - "epoch": 0.39, - "grad_norm": 1.1548406631959833, - "learning_rate": 1.4014890223562303e-05, - "loss": 0.1862, - "step": 7619 - }, - { - "epoch": 0.39, - "grad_norm": 0.9949755985513199, - "learning_rate": 1.401338177011477e-05, - "loss": 0.1795, - "step": 7620 - }, - { - "epoch": 0.39, - "grad_norm": 0.9879919215858408, - "learning_rate": 1.401187320780403e-05, - "loss": 0.1913, - "step": 7621 - }, - { - "epoch": 0.39, - "grad_norm": 0.8082593760487321, - "learning_rate": 1.4010364536671004e-05, - "loss": 0.1858, - "step": 7622 - }, - { - "epoch": 0.39, - "grad_norm": 1.4274036461103985, - "learning_rate": 1.4008855756756612e-05, - "loss": 0.2293, - "step": 7623 - }, - { - "epoch": 0.39, - "grad_norm": 0.9867689521147746, - "learning_rate": 1.4007346868101779e-05, - "loss": 0.1839, - "step": 7624 - }, - { - "epoch": 0.39, - "grad_norm": 0.9102121091949064, - "learning_rate": 1.4005837870747439e-05, - "loss": 0.2006, - "step": 7625 - }, - { - "epoch": 0.39, - "grad_norm": 0.854600912758212, - "learning_rate": 1.400432876473452e-05, - "loss": 0.1996, - "step": 7626 - }, - { - "epoch": 0.39, - "grad_norm": 1.2096446917432433, - "learning_rate": 1.4002819550103958e-05, - "loss": 0.1817, - "step": 7627 - }, - { - "epoch": 0.39, - "grad_norm": 1.0627923072240306, - "learning_rate": 1.4001310226896689e-05, - "loss": 0.2394, - "step": 7628 - }, - { - "epoch": 0.39, - "grad_norm": 1.2724119703874543, - "learning_rate": 1.3999800795153652e-05, - "loss": 0.186, - "step": 7629 - }, - { - "epoch": 0.39, - "grad_norm": 0.9999657164938784, - "learning_rate": 1.3998291254915797e-05, - "loss": 0.197, - "step": 7630 - }, - { - "epoch": 0.39, - "grad_norm": 1.2678985627552222, - "learning_rate": 1.3996781606224063e-05, - "loss": 0.1817, - "step": 7631 - }, - { - "epoch": 0.39, - "grad_norm": 0.9871813158684251, - "learning_rate": 1.3995271849119403e-05, - "loss": 0.2123, - "step": 7632 - }, - { - "epoch": 0.39, - "grad_norm": 0.7917965541568505, - "learning_rate": 1.3993761983642765e-05, - "loss": 0.1854, - "step": 7633 - }, - { - "epoch": 0.39, - "grad_norm": 0.948713022523547, - "learning_rate": 1.3992252009835112e-05, - "loss": 0.1835, - "step": 7634 - }, - { - "epoch": 0.39, - "grad_norm": 1.013801688872813, - "learning_rate": 1.3990741927737395e-05, - "loss": 0.1914, - "step": 7635 - }, - { - "epoch": 0.39, - "grad_norm": 1.0338027271382328, - "learning_rate": 1.3989231737390578e-05, - "loss": 0.2086, - "step": 7636 - }, - { - "epoch": 0.39, - "grad_norm": 1.0038262656980823, - "learning_rate": 1.3987721438835626e-05, - "loss": 0.2164, - "step": 7637 - }, - { - "epoch": 0.39, - "grad_norm": 1.198776705998106, - "learning_rate": 1.3986211032113504e-05, - "loss": 0.2259, - "step": 7638 - }, - { - "epoch": 0.39, - "grad_norm": 1.1559578172326987, - "learning_rate": 1.3984700517265184e-05, - "loss": 0.2053, - "step": 7639 - }, - { - "epoch": 0.39, - "grad_norm": 1.9478082394204406, - "learning_rate": 1.3983189894331636e-05, - "loss": 0.2075, - "step": 7640 - }, - { - "epoch": 0.39, - "grad_norm": 1.0319933891708784, - "learning_rate": 1.3981679163353839e-05, - "loss": 0.1778, - "step": 7641 - }, - { - "epoch": 0.39, - "grad_norm": 1.2467961556378275, - "learning_rate": 1.3980168324372768e-05, - "loss": 0.1881, - "step": 7642 - }, - { - "epoch": 0.39, - "grad_norm": 1.097771450032399, - "learning_rate": 1.3978657377429405e-05, - "loss": 0.2074, - "step": 7643 - }, - { - "epoch": 0.39, - "grad_norm": 0.7948181272258855, - "learning_rate": 1.3977146322564739e-05, - "loss": 0.1797, - "step": 7644 - }, - { - "epoch": 0.39, - "grad_norm": 1.6906764323928702, - "learning_rate": 1.3975635159819757e-05, - "loss": 0.1994, - "step": 7645 - }, - { - "epoch": 0.39, - "grad_norm": 0.8555549281608942, - "learning_rate": 1.3974123889235437e-05, - "loss": 0.1963, - "step": 7646 - }, - { - "epoch": 0.39, - "grad_norm": 0.8152633474642856, - "learning_rate": 1.3972612510852789e-05, - "loss": 0.2028, - "step": 7647 - }, - { - "epoch": 0.39, - "grad_norm": 0.834483546768694, - "learning_rate": 1.3971101024712803e-05, - "loss": 0.2009, - "step": 7648 - }, - { - "epoch": 0.39, - "grad_norm": 0.8059419858590291, - "learning_rate": 1.3969589430856476e-05, - "loss": 0.1705, - "step": 7649 - }, - { - "epoch": 0.39, - "grad_norm": 0.8663444660848391, - "learning_rate": 1.3968077729324808e-05, - "loss": 0.2038, - "step": 7650 - }, - { - "epoch": 0.39, - "grad_norm": 0.9524959304972902, - "learning_rate": 1.3966565920158811e-05, - "loss": 0.1794, - "step": 7651 - }, - { - "epoch": 0.39, - "grad_norm": 0.9974758966169943, - "learning_rate": 1.3965054003399493e-05, - "loss": 0.1747, - "step": 7652 - }, - { - "epoch": 0.39, - "grad_norm": 1.0143608736447438, - "learning_rate": 1.3963541979087855e-05, - "loss": 0.1919, - "step": 7653 - }, - { - "epoch": 0.39, - "grad_norm": 0.8770771941762959, - "learning_rate": 1.396202984726492e-05, - "loss": 0.2121, - "step": 7654 - }, - { - "epoch": 0.39, - "grad_norm": 1.058384880625591, - "learning_rate": 1.3960517607971697e-05, - "loss": 0.2058, - "step": 7655 - }, - { - "epoch": 0.39, - "grad_norm": 1.2266740375032155, - "learning_rate": 1.3959005261249217e-05, - "loss": 0.1884, - "step": 7656 - }, - { - "epoch": 0.39, - "grad_norm": 0.803399173151223, - "learning_rate": 1.3957492807138491e-05, - "loss": 0.1858, - "step": 7657 - }, - { - "epoch": 0.39, - "grad_norm": 2.5027341436903714, - "learning_rate": 1.3955980245680551e-05, - "loss": 0.2012, - "step": 7658 - }, - { - "epoch": 0.39, - "grad_norm": 0.7537433177825293, - "learning_rate": 1.3954467576916422e-05, - "loss": 0.197, - "step": 7659 - }, - { - "epoch": 0.39, - "grad_norm": 1.0281275927478755, - "learning_rate": 1.3952954800887137e-05, - "loss": 0.1985, - "step": 7660 - }, - { - "epoch": 0.39, - "grad_norm": 3.4007636048539807, - "learning_rate": 1.3951441917633732e-05, - "loss": 0.2119, - "step": 7661 - }, - { - "epoch": 0.39, - "grad_norm": 1.5874162488104218, - "learning_rate": 1.394992892719724e-05, - "loss": 0.1999, - "step": 7662 - }, - { - "epoch": 0.39, - "grad_norm": 0.90732612942027, - "learning_rate": 1.3948415829618705e-05, - "loss": 0.1906, - "step": 7663 - }, - { - "epoch": 0.39, - "grad_norm": 0.8673104556121487, - "learning_rate": 1.3946902624939164e-05, - "loss": 0.2082, - "step": 7664 - }, - { - "epoch": 0.39, - "grad_norm": 1.037346088119156, - "learning_rate": 1.3945389313199669e-05, - "loss": 0.2019, - "step": 7665 - }, - { - "epoch": 0.39, - "grad_norm": 1.123917949735145, - "learning_rate": 1.3943875894441265e-05, - "loss": 0.2053, - "step": 7666 - }, - { - "epoch": 0.39, - "grad_norm": 0.9346108283378147, - "learning_rate": 1.3942362368705007e-05, - "loss": 0.1752, - "step": 7667 - }, - { - "epoch": 0.39, - "grad_norm": 0.8821888566223758, - "learning_rate": 1.3940848736031943e-05, - "loss": 0.1883, - "step": 7668 - }, - { - "epoch": 0.39, - "grad_norm": 1.0537674571428322, - "learning_rate": 1.3939334996463136e-05, - "loss": 0.2164, - "step": 7669 - }, - { - "epoch": 0.39, - "grad_norm": 1.0721094239860878, - "learning_rate": 1.3937821150039645e-05, - "loss": 0.2313, - "step": 7670 - }, - { - "epoch": 0.39, - "grad_norm": 0.8856670502546947, - "learning_rate": 1.3936307196802532e-05, - "loss": 0.1929, - "step": 7671 - }, - { - "epoch": 0.39, - "grad_norm": 1.111960198508083, - "learning_rate": 1.3934793136792863e-05, - "loss": 0.2132, - "step": 7672 - }, - { - "epoch": 0.39, - "grad_norm": 1.1384490541523558, - "learning_rate": 1.3933278970051712e-05, - "loss": 0.2056, - "step": 7673 - }, - { - "epoch": 0.39, - "grad_norm": 1.2061262362828225, - "learning_rate": 1.3931764696620144e-05, - "loss": 0.2098, - "step": 7674 - }, - { - "epoch": 0.39, - "grad_norm": 1.6609050792349747, - "learning_rate": 1.3930250316539237e-05, - "loss": 0.1937, - "step": 7675 - }, - { - "epoch": 0.39, - "grad_norm": 1.0601825483540026, - "learning_rate": 1.3928735829850069e-05, - "loss": 0.2108, - "step": 7676 - }, - { - "epoch": 0.39, - "grad_norm": 0.9550895412085639, - "learning_rate": 1.3927221236593717e-05, - "loss": 0.1926, - "step": 7677 - }, - { - "epoch": 0.39, - "grad_norm": 0.8557196616439343, - "learning_rate": 1.392570653681127e-05, - "loss": 0.196, - "step": 7678 - }, - { - "epoch": 0.39, - "grad_norm": 0.9880124750398763, - "learning_rate": 1.392419173054381e-05, - "loss": 0.2059, - "step": 7679 - }, - { - "epoch": 0.39, - "grad_norm": 0.8231742504746878, - "learning_rate": 1.3922676817832432e-05, - "loss": 0.1963, - "step": 7680 - }, - { - "epoch": 0.39, - "grad_norm": 1.1466035230522185, - "learning_rate": 1.3921161798718217e-05, - "loss": 0.1918, - "step": 7681 - }, - { - "epoch": 0.39, - "grad_norm": 0.8208451969313353, - "learning_rate": 1.3919646673242272e-05, - "loss": 0.1905, - "step": 7682 - }, - { - "epoch": 0.39, - "grad_norm": 1.3997750841679093, - "learning_rate": 1.3918131441445687e-05, - "loss": 0.2143, - "step": 7683 - }, - { - "epoch": 0.39, - "grad_norm": 0.9636337485045307, - "learning_rate": 1.3916616103369567e-05, - "loss": 0.1944, - "step": 7684 - }, - { - "epoch": 0.39, - "grad_norm": 1.6144844863183354, - "learning_rate": 1.391510065905501e-05, - "loss": 0.1966, - "step": 7685 - }, - { - "epoch": 0.39, - "grad_norm": 0.9800808641664451, - "learning_rate": 1.3913585108543131e-05, - "loss": 0.1754, - "step": 7686 - }, - { - "epoch": 0.39, - "grad_norm": 0.8861425397209246, - "learning_rate": 1.3912069451875032e-05, - "loss": 0.192, - "step": 7687 - }, - { - "epoch": 0.39, - "grad_norm": 0.9087226051425511, - "learning_rate": 1.391055368909183e-05, - "loss": 0.1769, - "step": 7688 - }, - { - "epoch": 0.39, - "grad_norm": 1.054864288688696, - "learning_rate": 1.3909037820234637e-05, - "loss": 0.1894, - "step": 7689 - }, - { - "epoch": 0.39, - "grad_norm": 1.1708274247339223, - "learning_rate": 1.3907521845344571e-05, - "loss": 0.1985, - "step": 7690 - }, - { - "epoch": 0.39, - "grad_norm": 1.0713949500783901, - "learning_rate": 1.3906005764462758e-05, - "loss": 0.2113, - "step": 7691 - }, - { - "epoch": 0.39, - "grad_norm": 1.5889139746273195, - "learning_rate": 1.3904489577630316e-05, - "loss": 0.1841, - "step": 7692 - }, - { - "epoch": 0.39, - "grad_norm": 1.091357490656932, - "learning_rate": 1.3902973284888375e-05, - "loss": 0.1912, - "step": 7693 - }, - { - "epoch": 0.39, - "grad_norm": 0.8368483420261934, - "learning_rate": 1.3901456886278063e-05, - "loss": 0.204, - "step": 7694 - }, - { - "epoch": 0.39, - "grad_norm": 1.3583693284481406, - "learning_rate": 1.389994038184051e-05, - "loss": 0.203, - "step": 7695 - }, - { - "epoch": 0.39, - "grad_norm": 0.9312531164878359, - "learning_rate": 1.3898423771616854e-05, - "loss": 0.2161, - "step": 7696 - }, - { - "epoch": 0.39, - "grad_norm": 1.0903451646742162, - "learning_rate": 1.3896907055648235e-05, - "loss": 0.1956, - "step": 7697 - }, - { - "epoch": 0.39, - "grad_norm": 1.3439929901930354, - "learning_rate": 1.3895390233975793e-05, - "loss": 0.1906, - "step": 7698 - }, - { - "epoch": 0.39, - "grad_norm": 0.9825101816701041, - "learning_rate": 1.3893873306640669e-05, - "loss": 0.1943, - "step": 7699 - }, - { - "epoch": 0.39, - "grad_norm": 1.7285069594345552, - "learning_rate": 1.3892356273684012e-05, - "loss": 0.1971, - "step": 7700 - }, - { - "epoch": 0.39, - "grad_norm": 0.8837068614510146, - "learning_rate": 1.3890839135146972e-05, - "loss": 0.2041, - "step": 7701 - }, - { - "epoch": 0.39, - "grad_norm": 1.0181464717308626, - "learning_rate": 1.38893218910707e-05, - "loss": 0.1595, - "step": 7702 - }, - { - "epoch": 0.39, - "grad_norm": 1.1712417123030014, - "learning_rate": 1.388780454149635e-05, - "loss": 0.1995, - "step": 7703 - }, - { - "epoch": 0.39, - "grad_norm": 1.4386055659474735, - "learning_rate": 1.3886287086465085e-05, - "loss": 0.199, - "step": 7704 - }, - { - "epoch": 0.39, - "grad_norm": 0.9347759139666304, - "learning_rate": 1.3884769526018063e-05, - "loss": 0.1871, - "step": 7705 - }, - { - "epoch": 0.39, - "grad_norm": 1.035172557245781, - "learning_rate": 1.3883251860196446e-05, - "loss": 0.2382, - "step": 7706 - }, - { - "epoch": 0.39, - "grad_norm": 1.0406406386957872, - "learning_rate": 1.3881734089041405e-05, - "loss": 0.1744, - "step": 7707 - }, - { - "epoch": 0.39, - "grad_norm": 1.0977754191926146, - "learning_rate": 1.388021621259411e-05, - "loss": 0.197, - "step": 7708 - }, - { - "epoch": 0.39, - "grad_norm": 1.0018304593302076, - "learning_rate": 1.3878698230895726e-05, - "loss": 0.1818, - "step": 7709 - }, - { - "epoch": 0.39, - "grad_norm": 0.9539595526956814, - "learning_rate": 1.3877180143987436e-05, - "loss": 0.1904, - "step": 7710 - }, - { - "epoch": 0.39, - "grad_norm": 1.3344670181819707, - "learning_rate": 1.3875661951910417e-05, - "loss": 0.1971, - "step": 7711 - }, - { - "epoch": 0.39, - "grad_norm": 0.960361113128031, - "learning_rate": 1.3874143654705845e-05, - "loss": 0.1771, - "step": 7712 - }, - { - "epoch": 0.39, - "grad_norm": 1.2090564270325268, - "learning_rate": 1.387262525241491e-05, - "loss": 0.2206, - "step": 7713 - }, - { - "epoch": 0.39, - "grad_norm": 0.986088483516514, - "learning_rate": 1.3871106745078798e-05, - "loss": 0.1889, - "step": 7714 - }, - { - "epoch": 0.39, - "grad_norm": 1.434936865372642, - "learning_rate": 1.3869588132738696e-05, - "loss": 0.2354, - "step": 7715 - }, - { - "epoch": 0.39, - "grad_norm": 1.0088629743524504, - "learning_rate": 1.3868069415435795e-05, - "loss": 0.184, - "step": 7716 - }, - { - "epoch": 0.39, - "grad_norm": 1.6641815842457266, - "learning_rate": 1.3866550593211292e-05, - "loss": 0.2017, - "step": 7717 - }, - { - "epoch": 0.39, - "grad_norm": 1.3170396920934737, - "learning_rate": 1.386503166610639e-05, - "loss": 0.1842, - "step": 7718 - }, - { - "epoch": 0.39, - "grad_norm": 0.7855746745131865, - "learning_rate": 1.3863512634162283e-05, - "loss": 0.1922, - "step": 7719 - }, - { - "epoch": 0.39, - "grad_norm": 0.9041492336676009, - "learning_rate": 1.3861993497420176e-05, - "loss": 0.1913, - "step": 7720 - }, - { - "epoch": 0.39, - "grad_norm": 1.1314961895930964, - "learning_rate": 1.3860474255921279e-05, - "loss": 0.2246, - "step": 7721 - }, - { - "epoch": 0.39, - "grad_norm": 1.5367511841726282, - "learning_rate": 1.38589549097068e-05, - "loss": 0.1881, - "step": 7722 - }, - { - "epoch": 0.39, - "grad_norm": 0.9589789968589525, - "learning_rate": 1.3857435458817952e-05, - "loss": 0.2037, - "step": 7723 - }, - { - "epoch": 0.39, - "grad_norm": 1.1288332166730548, - "learning_rate": 1.3855915903295949e-05, - "loss": 0.1902, - "step": 7724 - }, - { - "epoch": 0.39, - "grad_norm": 0.960376489981517, - "learning_rate": 1.3854396243182007e-05, - "loss": 0.2171, - "step": 7725 - }, - { - "epoch": 0.39, - "grad_norm": 0.9273880760394527, - "learning_rate": 1.3852876478517352e-05, - "loss": 0.2086, - "step": 7726 - }, - { - "epoch": 0.39, - "grad_norm": 1.2440017744030931, - "learning_rate": 1.3851356609343202e-05, - "loss": 0.1798, - "step": 7727 - }, - { - "epoch": 0.39, - "grad_norm": 1.8535773225164793, - "learning_rate": 1.3849836635700791e-05, - "loss": 0.1772, - "step": 7728 - }, - { - "epoch": 0.39, - "grad_norm": 0.963785047508875, - "learning_rate": 1.384831655763134e-05, - "loss": 0.2068, - "step": 7729 - }, - { - "epoch": 0.39, - "grad_norm": 1.214837663043247, - "learning_rate": 1.3846796375176083e-05, - "loss": 0.1957, - "step": 7730 - }, - { - "epoch": 0.39, - "grad_norm": 1.1073446266276163, - "learning_rate": 1.3845276088376262e-05, - "loss": 0.2112, - "step": 7731 - }, - { - "epoch": 0.39, - "grad_norm": 1.0268993364703471, - "learning_rate": 1.3843755697273109e-05, - "loss": 0.1911, - "step": 7732 - }, - { - "epoch": 0.39, - "grad_norm": 0.9552792651480384, - "learning_rate": 1.3842235201907865e-05, - "loss": 0.2289, - "step": 7733 - }, - { - "epoch": 0.39, - "grad_norm": 1.0202539534015398, - "learning_rate": 1.3840714602321774e-05, - "loss": 0.1571, - "step": 7734 - }, - { - "epoch": 0.39, - "grad_norm": 0.8635891135148347, - "learning_rate": 1.3839193898556083e-05, - "loss": 0.1876, - "step": 7735 - }, - { - "epoch": 0.39, - "grad_norm": 1.1217723335679453, - "learning_rate": 1.383767309065204e-05, - "loss": 0.1988, - "step": 7736 - }, - { - "epoch": 0.39, - "grad_norm": 1.0698837728586303, - "learning_rate": 1.3836152178650899e-05, - "loss": 0.1921, - "step": 7737 - }, - { - "epoch": 0.39, - "grad_norm": 1.0058637146346, - "learning_rate": 1.383463116259391e-05, - "loss": 0.2095, - "step": 7738 - }, - { - "epoch": 0.39, - "grad_norm": 0.9999314188011952, - "learning_rate": 1.3833110042522337e-05, - "loss": 0.2046, - "step": 7739 - }, - { - "epoch": 0.39, - "grad_norm": 1.4672213955523357, - "learning_rate": 1.3831588818477437e-05, - "loss": 0.2053, - "step": 7740 - }, - { - "epoch": 0.39, - "grad_norm": 1.0337800417999594, - "learning_rate": 1.3830067490500474e-05, - "loss": 0.2175, - "step": 7741 - }, - { - "epoch": 0.39, - "grad_norm": 1.2319011038386658, - "learning_rate": 1.3828546058632714e-05, - "loss": 0.1902, - "step": 7742 - }, - { - "epoch": 0.39, - "grad_norm": 1.3044079880196222, - "learning_rate": 1.3827024522915425e-05, - "loss": 0.2291, - "step": 7743 - }, - { - "epoch": 0.39, - "grad_norm": 0.8538781044258716, - "learning_rate": 1.382550288338988e-05, - "loss": 0.2123, - "step": 7744 - }, - { - "epoch": 0.39, - "grad_norm": 0.879168221739137, - "learning_rate": 1.3823981140097354e-05, - "loss": 0.179, - "step": 7745 - }, - { - "epoch": 0.39, - "grad_norm": 0.9413694338433246, - "learning_rate": 1.3822459293079122e-05, - "loss": 0.1967, - "step": 7746 - }, - { - "epoch": 0.39, - "grad_norm": 1.1056894724859516, - "learning_rate": 1.3820937342376467e-05, - "loss": 0.191, - "step": 7747 - }, - { - "epoch": 0.39, - "grad_norm": 0.9126483606695378, - "learning_rate": 1.3819415288030672e-05, - "loss": 0.1762, - "step": 7748 - }, - { - "epoch": 0.39, - "grad_norm": 1.1355339214771147, - "learning_rate": 1.381789313008302e-05, - "loss": 0.2235, - "step": 7749 - }, - { - "epoch": 0.39, - "grad_norm": 0.9826179082927652, - "learning_rate": 1.3816370868574804e-05, - "loss": 0.2131, - "step": 7750 - }, - { - "epoch": 0.39, - "grad_norm": 2.8860565691463593, - "learning_rate": 1.3814848503547308e-05, - "loss": 0.1782, - "step": 7751 - }, - { - "epoch": 0.39, - "grad_norm": 1.5081861453338383, - "learning_rate": 1.3813326035041832e-05, - "loss": 0.2113, - "step": 7752 - }, - { - "epoch": 0.39, - "grad_norm": 1.3892948185606584, - "learning_rate": 1.3811803463099675e-05, - "loss": 0.2063, - "step": 7753 - }, - { - "epoch": 0.39, - "grad_norm": 0.9911639651533942, - "learning_rate": 1.3810280787762131e-05, - "loss": 0.1817, - "step": 7754 - }, - { - "epoch": 0.39, - "grad_norm": 2.022925229717426, - "learning_rate": 1.3808758009070506e-05, - "loss": 0.1943, - "step": 7755 - }, - { - "epoch": 0.39, - "grad_norm": 1.3548483263654096, - "learning_rate": 1.3807235127066104e-05, - "loss": 0.2179, - "step": 7756 - }, - { - "epoch": 0.39, - "grad_norm": 1.1166557524441694, - "learning_rate": 1.3805712141790237e-05, - "loss": 0.2168, - "step": 7757 - }, - { - "epoch": 0.39, - "grad_norm": 0.9892567026131145, - "learning_rate": 1.380418905328421e-05, - "loss": 0.177, - "step": 7758 - }, - { - "epoch": 0.39, - "grad_norm": 1.0970740628385713, - "learning_rate": 1.3802665861589342e-05, - "loss": 0.229, - "step": 7759 - }, - { - "epoch": 0.39, - "grad_norm": 1.8689804869980224, - "learning_rate": 1.3801142566746945e-05, - "loss": 0.167, - "step": 7760 - }, - { - "epoch": 0.39, - "grad_norm": 1.081339687802085, - "learning_rate": 1.3799619168798346e-05, - "loss": 0.1883, - "step": 7761 - }, - { - "epoch": 0.39, - "grad_norm": 1.7086114323382755, - "learning_rate": 1.3798095667784859e-05, - "loss": 0.2153, - "step": 7762 - }, - { - "epoch": 0.39, - "grad_norm": 1.2132432860413234, - "learning_rate": 1.3796572063747813e-05, - "loss": 0.2145, - "step": 7763 - }, - { - "epoch": 0.39, - "grad_norm": 0.9325342418973277, - "learning_rate": 1.3795048356728538e-05, - "loss": 0.173, - "step": 7764 - }, - { - "epoch": 0.39, - "grad_norm": 1.1566731200513392, - "learning_rate": 1.3793524546768358e-05, - "loss": 0.1983, - "step": 7765 - }, - { - "epoch": 0.39, - "grad_norm": 1.9601961276441815, - "learning_rate": 1.3792000633908612e-05, - "loss": 0.2059, - "step": 7766 - }, - { - "epoch": 0.39, - "grad_norm": 0.8099371655872485, - "learning_rate": 1.3790476618190634e-05, - "loss": 0.2094, - "step": 7767 - }, - { - "epoch": 0.4, - "grad_norm": 1.7473037502181532, - "learning_rate": 1.3788952499655765e-05, - "loss": 0.1907, - "step": 7768 - }, - { - "epoch": 0.4, - "grad_norm": 0.9096819251192658, - "learning_rate": 1.3787428278345344e-05, - "loss": 0.1857, - "step": 7769 - }, - { - "epoch": 0.4, - "grad_norm": 0.9874258926455962, - "learning_rate": 1.378590395430072e-05, - "loss": 0.1808, - "step": 7770 - }, - { - "epoch": 0.4, - "grad_norm": 0.9443362322677041, - "learning_rate": 1.3784379527563233e-05, - "loss": 0.1781, - "step": 7771 - }, - { - "epoch": 0.4, - "grad_norm": 1.1366189588352082, - "learning_rate": 1.3782854998174243e-05, - "loss": 0.2119, - "step": 7772 - }, - { - "epoch": 0.4, - "grad_norm": 0.9441430740776766, - "learning_rate": 1.3781330366175093e-05, - "loss": 0.1915, - "step": 7773 - }, - { - "epoch": 0.4, - "grad_norm": 1.1722347958655013, - "learning_rate": 1.3779805631607144e-05, - "loss": 0.2206, - "step": 7774 - }, - { - "epoch": 0.4, - "grad_norm": 1.011282633832701, - "learning_rate": 1.3778280794511753e-05, - "loss": 0.1823, - "step": 7775 - }, - { - "epoch": 0.4, - "grad_norm": 1.1645863169044224, - "learning_rate": 1.3776755854930285e-05, - "loss": 0.2104, - "step": 7776 - }, - { - "epoch": 0.4, - "grad_norm": 1.1038162763376442, - "learning_rate": 1.3775230812904101e-05, - "loss": 0.2008, - "step": 7777 - }, - { - "epoch": 0.4, - "grad_norm": 1.5586894325978662, - "learning_rate": 1.3773705668474564e-05, - "loss": 0.2054, - "step": 7778 - }, - { - "epoch": 0.4, - "grad_norm": 1.0103587347743808, - "learning_rate": 1.377218042168305e-05, - "loss": 0.2099, - "step": 7779 - }, - { - "epoch": 0.4, - "grad_norm": 1.0698592267223566, - "learning_rate": 1.3770655072570929e-05, - "loss": 0.2056, - "step": 7780 - }, - { - "epoch": 0.4, - "grad_norm": 1.1252856387119448, - "learning_rate": 1.3769129621179578e-05, - "loss": 0.1959, - "step": 7781 - }, - { - "epoch": 0.4, - "grad_norm": 1.3938755135494856, - "learning_rate": 1.3767604067550369e-05, - "loss": 0.2072, - "step": 7782 - }, - { - "epoch": 0.4, - "grad_norm": 1.043094931379382, - "learning_rate": 1.376607841172469e-05, - "loss": 0.1888, - "step": 7783 - }, - { - "epoch": 0.4, - "grad_norm": 0.9900803380343406, - "learning_rate": 1.376455265374392e-05, - "loss": 0.199, - "step": 7784 - }, - { - "epoch": 0.4, - "grad_norm": 1.3610036980785805, - "learning_rate": 1.376302679364945e-05, - "loss": 0.17, - "step": 7785 - }, - { - "epoch": 0.4, - "grad_norm": 0.8993769498920858, - "learning_rate": 1.3761500831482661e-05, - "loss": 0.1989, - "step": 7786 - }, - { - "epoch": 0.4, - "grad_norm": 0.9511935062881577, - "learning_rate": 1.3759974767284954e-05, - "loss": 0.1929, - "step": 7787 - }, - { - "epoch": 0.4, - "grad_norm": 1.04482184811609, - "learning_rate": 1.3758448601097715e-05, - "loss": 0.1905, - "step": 7788 - }, - { - "epoch": 0.4, - "grad_norm": 1.0151552795044474, - "learning_rate": 1.3756922332962349e-05, - "loss": 0.1841, - "step": 7789 - }, - { - "epoch": 0.4, - "grad_norm": 1.1638260461131706, - "learning_rate": 1.375539596292025e-05, - "loss": 0.203, - "step": 7790 - }, - { - "epoch": 0.4, - "grad_norm": 1.1345663298986353, - "learning_rate": 1.3753869491012822e-05, - "loss": 0.1927, - "step": 7791 - }, - { - "epoch": 0.4, - "grad_norm": 1.274166847278216, - "learning_rate": 1.3752342917281474e-05, - "loss": 0.1876, - "step": 7792 - }, - { - "epoch": 0.4, - "grad_norm": 3.2116466657736265, - "learning_rate": 1.3750816241767612e-05, - "loss": 0.2162, - "step": 7793 - }, - { - "epoch": 0.4, - "grad_norm": 3.292280681415955, - "learning_rate": 1.374928946451265e-05, - "loss": 0.2137, - "step": 7794 - }, - { - "epoch": 0.4, - "grad_norm": 0.9538512883912856, - "learning_rate": 1.3747762585557995e-05, - "loss": 0.1932, - "step": 7795 - }, - { - "epoch": 0.4, - "grad_norm": 0.902903017738043, - "learning_rate": 1.374623560494507e-05, - "loss": 0.2053, - "step": 7796 - }, - { - "epoch": 0.4, - "grad_norm": 0.9089488041497878, - "learning_rate": 1.3744708522715295e-05, - "loss": 0.1868, - "step": 7797 - }, - { - "epoch": 0.4, - "grad_norm": 1.036402555926078, - "learning_rate": 1.3743181338910088e-05, - "loss": 0.175, - "step": 7798 - }, - { - "epoch": 0.4, - "grad_norm": 1.1412140081353452, - "learning_rate": 1.3741654053570877e-05, - "loss": 0.2069, - "step": 7799 - }, - { - "epoch": 0.4, - "grad_norm": 0.9812081753904945, - "learning_rate": 1.3740126666739086e-05, - "loss": 0.1921, - "step": 7800 - }, - { - "epoch": 0.4, - "grad_norm": 0.8599736416295825, - "learning_rate": 1.3738599178456149e-05, - "loss": 0.1993, - "step": 7801 - }, - { - "epoch": 0.4, - "grad_norm": 1.3933742441258217, - "learning_rate": 1.37370715887635e-05, - "loss": 0.2095, - "step": 7802 - }, - { - "epoch": 0.4, - "grad_norm": 0.9186112337500361, - "learning_rate": 1.3735543897702572e-05, - "loss": 0.2101, - "step": 7803 - }, - { - "epoch": 0.4, - "grad_norm": 0.7780230586723094, - "learning_rate": 1.3734016105314803e-05, - "loss": 0.1716, - "step": 7804 - }, - { - "epoch": 0.4, - "grad_norm": 0.912468228908501, - "learning_rate": 1.3732488211641638e-05, - "loss": 0.1777, - "step": 7805 - }, - { - "epoch": 0.4, - "grad_norm": 1.0843294009632642, - "learning_rate": 1.3730960216724518e-05, - "loss": 0.1716, - "step": 7806 - }, - { - "epoch": 0.4, - "grad_norm": 1.3501717023837643, - "learning_rate": 1.3729432120604895e-05, - "loss": 0.1975, - "step": 7807 - }, - { - "epoch": 0.4, - "grad_norm": 1.033915342936664, - "learning_rate": 1.3727903923324211e-05, - "loss": 0.1954, - "step": 7808 - }, - { - "epoch": 0.4, - "grad_norm": 0.9776691062281382, - "learning_rate": 1.3726375624923925e-05, - "loss": 0.1895, - "step": 7809 - }, - { - "epoch": 0.4, - "grad_norm": 0.8463396302747215, - "learning_rate": 1.3724847225445488e-05, - "loss": 0.1929, - "step": 7810 - }, - { - "epoch": 0.4, - "grad_norm": 1.1511623956776489, - "learning_rate": 1.3723318724930362e-05, - "loss": 0.168, - "step": 7811 - }, - { - "epoch": 0.4, - "grad_norm": 1.6733247641066649, - "learning_rate": 1.3721790123420002e-05, - "loss": 0.2306, - "step": 7812 - }, - { - "epoch": 0.4, - "grad_norm": 1.0738043696689645, - "learning_rate": 1.3720261420955874e-05, - "loss": 0.1865, - "step": 7813 - }, - { - "epoch": 0.4, - "grad_norm": 0.9350696789972645, - "learning_rate": 1.3718732617579449e-05, - "loss": 0.1919, - "step": 7814 - }, - { - "epoch": 0.4, - "grad_norm": 1.0062826557369187, - "learning_rate": 1.371720371333219e-05, - "loss": 0.178, - "step": 7815 - }, - { - "epoch": 0.4, - "grad_norm": 1.095925858088011, - "learning_rate": 1.3715674708255571e-05, - "loss": 0.1966, - "step": 7816 - }, - { - "epoch": 0.4, - "grad_norm": 1.072683440548299, - "learning_rate": 1.3714145602391063e-05, - "loss": 0.1699, - "step": 7817 - }, - { - "epoch": 0.4, - "grad_norm": 1.0745173488233095, - "learning_rate": 1.3712616395780148e-05, - "loss": 0.1882, - "step": 7818 - }, - { - "epoch": 0.4, - "grad_norm": 0.9455046613837105, - "learning_rate": 1.3711087088464303e-05, - "loss": 0.1703, - "step": 7819 - }, - { - "epoch": 0.4, - "grad_norm": 2.875943906638408, - "learning_rate": 1.3709557680485013e-05, - "loss": 0.21, - "step": 7820 - }, - { - "epoch": 0.4, - "grad_norm": 3.555443947600797, - "learning_rate": 1.3708028171883757e-05, - "loss": 0.2067, - "step": 7821 - }, - { - "epoch": 0.4, - "grad_norm": 1.019118936893599, - "learning_rate": 1.3706498562702032e-05, - "loss": 0.1905, - "step": 7822 - }, - { - "epoch": 0.4, - "grad_norm": 0.9698314019007279, - "learning_rate": 1.3704968852981322e-05, - "loss": 0.2184, - "step": 7823 - }, - { - "epoch": 0.4, - "grad_norm": 0.9142833794886264, - "learning_rate": 1.3703439042763122e-05, - "loss": 0.2014, - "step": 7824 - }, - { - "epoch": 0.4, - "grad_norm": 0.9029009788321303, - "learning_rate": 1.3701909132088931e-05, - "loss": 0.2014, - "step": 7825 - }, - { - "epoch": 0.4, - "grad_norm": 1.252169628887097, - "learning_rate": 1.3700379121000245e-05, - "loss": 0.2111, - "step": 7826 - }, - { - "epoch": 0.4, - "grad_norm": 1.3070075999421975, - "learning_rate": 1.3698849009538564e-05, - "loss": 0.2055, - "step": 7827 - }, - { - "epoch": 0.4, - "grad_norm": 1.3437122932148957, - "learning_rate": 1.3697318797745399e-05, - "loss": 0.1853, - "step": 7828 - }, - { - "epoch": 0.4, - "grad_norm": 1.0379265187071747, - "learning_rate": 1.3695788485662248e-05, - "loss": 0.1936, - "step": 7829 - }, - { - "epoch": 0.4, - "grad_norm": 1.1959800320200566, - "learning_rate": 1.3694258073330626e-05, - "loss": 0.1847, - "step": 7830 - }, - { - "epoch": 0.4, - "grad_norm": 1.0100684236799955, - "learning_rate": 1.3692727560792048e-05, - "loss": 0.2191, - "step": 7831 - }, - { - "epoch": 0.4, - "grad_norm": 1.0045741496998244, - "learning_rate": 1.3691196948088026e-05, - "loss": 0.2086, - "step": 7832 - }, - { - "epoch": 0.4, - "grad_norm": 0.880542853536741, - "learning_rate": 1.3689666235260078e-05, - "loss": 0.2093, - "step": 7833 - }, - { - "epoch": 0.4, - "grad_norm": 0.8663881683353095, - "learning_rate": 1.3688135422349724e-05, - "loss": 0.1842, - "step": 7834 - }, - { - "epoch": 0.4, - "grad_norm": 1.1894788719346479, - "learning_rate": 1.3686604509398489e-05, - "loss": 0.215, - "step": 7835 - }, - { - "epoch": 0.4, - "grad_norm": 1.239200767986605, - "learning_rate": 1.3685073496447898e-05, - "loss": 0.2123, - "step": 7836 - }, - { - "epoch": 0.4, - "grad_norm": 1.0676797817770391, - "learning_rate": 1.3683542383539482e-05, - "loss": 0.1925, - "step": 7837 - }, - { - "epoch": 0.4, - "grad_norm": 1.179582556696482, - "learning_rate": 1.3682011170714771e-05, - "loss": 0.1993, - "step": 7838 - }, - { - "epoch": 0.4, - "grad_norm": 0.9022362728092863, - "learning_rate": 1.3680479858015297e-05, - "loss": 0.184, - "step": 7839 - }, - { - "epoch": 0.4, - "grad_norm": 0.8072628959257323, - "learning_rate": 1.3678948445482598e-05, - "loss": 0.1991, - "step": 7840 - }, - { - "epoch": 0.4, - "grad_norm": 1.1446276432557878, - "learning_rate": 1.3677416933158216e-05, - "loss": 0.1997, - "step": 7841 - }, - { - "epoch": 0.4, - "grad_norm": 1.3285163458160731, - "learning_rate": 1.3675885321083693e-05, - "loss": 0.1934, - "step": 7842 - }, - { - "epoch": 0.4, - "grad_norm": 1.0662290021048162, - "learning_rate": 1.3674353609300571e-05, - "loss": 0.1701, - "step": 7843 - }, - { - "epoch": 0.4, - "grad_norm": 0.8600687517356913, - "learning_rate": 1.3672821797850402e-05, - "loss": 0.1782, - "step": 7844 - }, - { - "epoch": 0.4, - "grad_norm": 0.9653570695824466, - "learning_rate": 1.3671289886774733e-05, - "loss": 0.2028, - "step": 7845 - }, - { - "epoch": 0.4, - "grad_norm": 0.9470181500627005, - "learning_rate": 1.3669757876115117e-05, - "loss": 0.2137, - "step": 7846 - }, - { - "epoch": 0.4, - "grad_norm": 1.3353820715794826, - "learning_rate": 1.3668225765913114e-05, - "loss": 0.1769, - "step": 7847 - }, - { - "epoch": 0.4, - "grad_norm": 1.0425801707777265, - "learning_rate": 1.3666693556210278e-05, - "loss": 0.1965, - "step": 7848 - }, - { - "epoch": 0.4, - "grad_norm": 0.9905064591861154, - "learning_rate": 1.3665161247048173e-05, - "loss": 0.1833, - "step": 7849 - }, - { - "epoch": 0.4, - "grad_norm": 1.2775710642059581, - "learning_rate": 1.3663628838468364e-05, - "loss": 0.1912, - "step": 7850 - }, - { - "epoch": 0.4, - "grad_norm": 1.2917881575654395, - "learning_rate": 1.3662096330512413e-05, - "loss": 0.2011, - "step": 7851 - }, - { - "epoch": 0.4, - "grad_norm": 1.0359447949370741, - "learning_rate": 1.3660563723221894e-05, - "loss": 0.2311, - "step": 7852 - }, - { - "epoch": 0.4, - "grad_norm": 0.8779461362875117, - "learning_rate": 1.3659031016638376e-05, - "loss": 0.2028, - "step": 7853 - }, - { - "epoch": 0.4, - "grad_norm": 1.0540674291123995, - "learning_rate": 1.3657498210803435e-05, - "loss": 0.1967, - "step": 7854 - }, - { - "epoch": 0.4, - "grad_norm": 1.4087280910830706, - "learning_rate": 1.3655965305758652e-05, - "loss": 0.1976, - "step": 7855 - }, - { - "epoch": 0.4, - "grad_norm": 0.8105676637666767, - "learning_rate": 1.36544323015456e-05, - "loss": 0.214, - "step": 7856 - }, - { - "epoch": 0.4, - "grad_norm": 0.9090277786069536, - "learning_rate": 1.3652899198205864e-05, - "loss": 0.1919, - "step": 7857 - }, - { - "epoch": 0.4, - "grad_norm": 1.0320170900219516, - "learning_rate": 1.3651365995781034e-05, - "loss": 0.1886, - "step": 7858 - }, - { - "epoch": 0.4, - "grad_norm": 0.8236703494092736, - "learning_rate": 1.3649832694312695e-05, - "loss": 0.2061, - "step": 7859 - }, - { - "epoch": 0.4, - "grad_norm": 1.380715986676055, - "learning_rate": 1.3648299293842438e-05, - "loss": 0.1852, - "step": 7860 - }, - { - "epoch": 0.4, - "grad_norm": 0.7081578575084013, - "learning_rate": 1.3646765794411854e-05, - "loss": 0.172, - "step": 7861 - }, - { - "epoch": 0.4, - "grad_norm": 0.8488985864335844, - "learning_rate": 1.3645232196062544e-05, - "loss": 0.2182, - "step": 7862 - }, - { - "epoch": 0.4, - "grad_norm": 1.006743983617717, - "learning_rate": 1.3643698498836104e-05, - "loss": 0.1942, - "step": 7863 - }, - { - "epoch": 0.4, - "grad_norm": 1.14256700029364, - "learning_rate": 1.3642164702774137e-05, - "loss": 0.196, - "step": 7864 - }, - { - "epoch": 0.4, - "grad_norm": 1.1811893526465074, - "learning_rate": 1.3640630807918246e-05, - "loss": 0.1893, - "step": 7865 - }, - { - "epoch": 0.4, - "grad_norm": 0.8455848742795187, - "learning_rate": 1.3639096814310037e-05, - "loss": 0.2269, - "step": 7866 - }, - { - "epoch": 0.4, - "grad_norm": 0.7749192958494059, - "learning_rate": 1.3637562721991127e-05, - "loss": 0.1869, - "step": 7867 - }, - { - "epoch": 0.4, - "grad_norm": 0.8349972385642351, - "learning_rate": 1.3636028531003118e-05, - "loss": 0.2016, - "step": 7868 - }, - { - "epoch": 0.4, - "grad_norm": 0.9953791132956865, - "learning_rate": 1.3634494241387632e-05, - "loss": 0.2042, - "step": 7869 - }, - { - "epoch": 0.4, - "grad_norm": 0.9222005288634191, - "learning_rate": 1.363295985318628e-05, - "loss": 0.2072, - "step": 7870 - }, - { - "epoch": 0.4, - "grad_norm": 1.075824838479233, - "learning_rate": 1.3631425366440691e-05, - "loss": 0.1992, - "step": 7871 - }, - { - "epoch": 0.4, - "grad_norm": 1.5194579873674021, - "learning_rate": 1.3629890781192486e-05, - "loss": 0.1939, - "step": 7872 - }, - { - "epoch": 0.4, - "grad_norm": 0.7643820525970437, - "learning_rate": 1.3628356097483288e-05, - "loss": 0.1971, - "step": 7873 - }, - { - "epoch": 0.4, - "grad_norm": 1.197943921189177, - "learning_rate": 1.362682131535472e-05, - "loss": 0.1876, - "step": 7874 - }, - { - "epoch": 0.4, - "grad_norm": 1.2330583994530917, - "learning_rate": 1.3625286434848424e-05, - "loss": 0.1771, - "step": 7875 - }, - { - "epoch": 0.4, - "grad_norm": 1.1913529068812612, - "learning_rate": 1.3623751456006027e-05, - "loss": 0.201, - "step": 7876 - }, - { - "epoch": 0.4, - "grad_norm": 1.1474609620458969, - "learning_rate": 1.3622216378869169e-05, - "loss": 0.1877, - "step": 7877 - }, - { - "epoch": 0.4, - "grad_norm": 1.5636328342053893, - "learning_rate": 1.3620681203479484e-05, - "loss": 0.1911, - "step": 7878 - }, - { - "epoch": 0.4, - "grad_norm": 1.1944564472343593, - "learning_rate": 1.3619145929878617e-05, - "loss": 0.1999, - "step": 7879 - }, - { - "epoch": 0.4, - "grad_norm": 0.9179919973426882, - "learning_rate": 1.3617610558108214e-05, - "loss": 0.2123, - "step": 7880 - }, - { - "epoch": 0.4, - "grad_norm": 1.060644212888549, - "learning_rate": 1.3616075088209921e-05, - "loss": 0.2137, - "step": 7881 - }, - { - "epoch": 0.4, - "grad_norm": 0.857124640067879, - "learning_rate": 1.3614539520225388e-05, - "loss": 0.1664, - "step": 7882 - }, - { - "epoch": 0.4, - "grad_norm": 1.6058142618696125, - "learning_rate": 1.361300385419626e-05, - "loss": 0.1857, - "step": 7883 - }, - { - "epoch": 0.4, - "grad_norm": 1.335899099030283, - "learning_rate": 1.3611468090164203e-05, - "loss": 0.2105, - "step": 7884 - }, - { - "epoch": 0.4, - "grad_norm": 1.0110629983418822, - "learning_rate": 1.3609932228170873e-05, - "loss": 0.189, - "step": 7885 - }, - { - "epoch": 0.4, - "grad_norm": 0.9841281444487436, - "learning_rate": 1.3608396268257922e-05, - "loss": 0.1885, - "step": 7886 - }, - { - "epoch": 0.4, - "grad_norm": 0.9728002662716808, - "learning_rate": 1.360686021046702e-05, - "loss": 0.2076, - "step": 7887 - }, - { - "epoch": 0.4, - "grad_norm": 1.1365569391346437, - "learning_rate": 1.360532405483983e-05, - "loss": 0.2065, - "step": 7888 - }, - { - "epoch": 0.4, - "grad_norm": 1.5428500470719355, - "learning_rate": 1.3603787801418025e-05, - "loss": 0.1843, - "step": 7889 - }, - { - "epoch": 0.4, - "grad_norm": 0.9149812568191804, - "learning_rate": 1.3602251450243273e-05, - "loss": 0.2111, - "step": 7890 - }, - { - "epoch": 0.4, - "grad_norm": 1.4272311709229732, - "learning_rate": 1.3600715001357241e-05, - "loss": 0.2076, - "step": 7891 - }, - { - "epoch": 0.4, - "grad_norm": 1.1625936777606585, - "learning_rate": 1.3599178454801615e-05, - "loss": 0.2171, - "step": 7892 - }, - { - "epoch": 0.4, - "grad_norm": 0.8890855284761452, - "learning_rate": 1.3597641810618071e-05, - "loss": 0.2132, - "step": 7893 - }, - { - "epoch": 0.4, - "grad_norm": 0.9647647431889569, - "learning_rate": 1.3596105068848289e-05, - "loss": 0.2097, - "step": 7894 - }, - { - "epoch": 0.4, - "grad_norm": 0.9228742641602002, - "learning_rate": 1.3594568229533953e-05, - "loss": 0.1896, - "step": 7895 - }, - { - "epoch": 0.4, - "grad_norm": 0.9523702934499297, - "learning_rate": 1.359303129271675e-05, - "loss": 0.2188, - "step": 7896 - }, - { - "epoch": 0.4, - "grad_norm": 1.2907224300111102, - "learning_rate": 1.3591494258438372e-05, - "loss": 0.2196, - "step": 7897 - }, - { - "epoch": 0.4, - "grad_norm": 1.4941226422465974, - "learning_rate": 1.3589957126740508e-05, - "loss": 0.1844, - "step": 7898 - }, - { - "epoch": 0.4, - "grad_norm": 1.39262869659887, - "learning_rate": 1.3588419897664855e-05, - "loss": 0.2123, - "step": 7899 - }, - { - "epoch": 0.4, - "grad_norm": 1.3557954440930702, - "learning_rate": 1.358688257125311e-05, - "loss": 0.2364, - "step": 7900 - }, - { - "epoch": 0.4, - "grad_norm": 2.8274355379770744, - "learning_rate": 1.3585345147546971e-05, - "loss": 0.2012, - "step": 7901 - }, - { - "epoch": 0.4, - "grad_norm": 1.445332020814645, - "learning_rate": 1.3583807626588143e-05, - "loss": 0.2053, - "step": 7902 - }, - { - "epoch": 0.4, - "grad_norm": 1.0809255064379315, - "learning_rate": 1.3582270008418332e-05, - "loss": 0.1686, - "step": 7903 - }, - { - "epoch": 0.4, - "grad_norm": 1.1250535830016777, - "learning_rate": 1.3580732293079244e-05, - "loss": 0.1923, - "step": 7904 - }, - { - "epoch": 0.4, - "grad_norm": 0.9187987560101761, - "learning_rate": 1.357919448061259e-05, - "loss": 0.1737, - "step": 7905 - }, - { - "epoch": 0.4, - "grad_norm": 0.9939882858974505, - "learning_rate": 1.3577656571060082e-05, - "loss": 0.1889, - "step": 7906 - }, - { - "epoch": 0.4, - "grad_norm": 1.1124667983403917, - "learning_rate": 1.357611856446344e-05, - "loss": 0.204, - "step": 7907 - }, - { - "epoch": 0.4, - "grad_norm": 1.2556400683929114, - "learning_rate": 1.3574580460864381e-05, - "loss": 0.1767, - "step": 7908 - }, - { - "epoch": 0.4, - "grad_norm": 1.139433671837242, - "learning_rate": 1.3573042260304623e-05, - "loss": 0.2201, - "step": 7909 - }, - { - "epoch": 0.4, - "grad_norm": 0.7612526668488224, - "learning_rate": 1.3571503962825892e-05, - "loss": 0.1781, - "step": 7910 - }, - { - "epoch": 0.4, - "grad_norm": 0.9627113385215519, - "learning_rate": 1.3569965568469915e-05, - "loss": 0.1901, - "step": 7911 - }, - { - "epoch": 0.4, - "grad_norm": 0.8696829378181286, - "learning_rate": 1.3568427077278422e-05, - "loss": 0.1642, - "step": 7912 - }, - { - "epoch": 0.4, - "grad_norm": 0.8150121776016657, - "learning_rate": 1.356688848929314e-05, - "loss": 0.1953, - "step": 7913 - }, - { - "epoch": 0.4, - "grad_norm": 1.192895214033769, - "learning_rate": 1.3565349804555805e-05, - "loss": 0.1792, - "step": 7914 - }, - { - "epoch": 0.4, - "grad_norm": 0.9312534145645553, - "learning_rate": 1.3563811023108157e-05, - "loss": 0.1973, - "step": 7915 - }, - { - "epoch": 0.4, - "grad_norm": 1.102289423897505, - "learning_rate": 1.3562272144991934e-05, - "loss": 0.187, - "step": 7916 - }, - { - "epoch": 0.4, - "grad_norm": 1.5551690732592423, - "learning_rate": 1.3560733170248878e-05, - "loss": 0.2014, - "step": 7917 - }, - { - "epoch": 0.4, - "grad_norm": 0.9967216384377598, - "learning_rate": 1.3559194098920732e-05, - "loss": 0.1793, - "step": 7918 - }, - { - "epoch": 0.4, - "grad_norm": 0.98329460157344, - "learning_rate": 1.3557654931049247e-05, - "loss": 0.1944, - "step": 7919 - }, - { - "epoch": 0.4, - "grad_norm": 2.558620907919238, - "learning_rate": 1.355611566667617e-05, - "loss": 0.1906, - "step": 7920 - }, - { - "epoch": 0.4, - "grad_norm": 1.1066331357961985, - "learning_rate": 1.3554576305843257e-05, - "loss": 0.2229, - "step": 7921 - }, - { - "epoch": 0.4, - "grad_norm": 0.9693267653975826, - "learning_rate": 1.3553036848592261e-05, - "loss": 0.2108, - "step": 7922 - }, - { - "epoch": 0.4, - "grad_norm": 0.8412025440545284, - "learning_rate": 1.3551497294964935e-05, - "loss": 0.2106, - "step": 7923 - }, - { - "epoch": 0.4, - "grad_norm": 1.1110383384259537, - "learning_rate": 1.3549957645003046e-05, - "loss": 0.1907, - "step": 7924 - }, - { - "epoch": 0.4, - "grad_norm": 0.8034700067923729, - "learning_rate": 1.3548417898748361e-05, - "loss": 0.2206, - "step": 7925 - }, - { - "epoch": 0.4, - "grad_norm": 0.7850263892895291, - "learning_rate": 1.3546878056242632e-05, - "loss": 0.1809, - "step": 7926 - }, - { - "epoch": 0.4, - "grad_norm": 1.1270612963348563, - "learning_rate": 1.354533811752764e-05, - "loss": 0.1852, - "step": 7927 - }, - { - "epoch": 0.4, - "grad_norm": 0.9435404761329577, - "learning_rate": 1.3543798082645152e-05, - "loss": 0.2031, - "step": 7928 - }, - { - "epoch": 0.4, - "grad_norm": 0.8921086902767899, - "learning_rate": 1.3542257951636939e-05, - "loss": 0.2034, - "step": 7929 - }, - { - "epoch": 0.4, - "grad_norm": 1.6722635092982678, - "learning_rate": 1.354071772454478e-05, - "loss": 0.2064, - "step": 7930 - }, - { - "epoch": 0.4, - "grad_norm": 2.201211190418289, - "learning_rate": 1.353917740141045e-05, - "loss": 0.2042, - "step": 7931 - }, - { - "epoch": 0.4, - "grad_norm": 0.9930918100552604, - "learning_rate": 1.3537636982275734e-05, - "loss": 0.1872, - "step": 7932 - }, - { - "epoch": 0.4, - "grad_norm": 0.9903922908206649, - "learning_rate": 1.3536096467182418e-05, - "loss": 0.1829, - "step": 7933 - }, - { - "epoch": 0.4, - "grad_norm": 0.8269314138269611, - "learning_rate": 1.3534555856172285e-05, - "loss": 0.2094, - "step": 7934 - }, - { - "epoch": 0.4, - "grad_norm": 1.193805357692574, - "learning_rate": 1.3533015149287123e-05, - "loss": 0.1892, - "step": 7935 - }, - { - "epoch": 0.4, - "grad_norm": 2.504864313457114, - "learning_rate": 1.3531474346568724e-05, - "loss": 0.1833, - "step": 7936 - }, - { - "epoch": 0.4, - "grad_norm": 0.8930310541385884, - "learning_rate": 1.3529933448058885e-05, - "loss": 0.1991, - "step": 7937 - }, - { - "epoch": 0.4, - "grad_norm": 1.112814505702536, - "learning_rate": 1.3528392453799403e-05, - "loss": 0.2099, - "step": 7938 - }, - { - "epoch": 0.4, - "grad_norm": 1.1446346643591627, - "learning_rate": 1.352685136383208e-05, - "loss": 0.2001, - "step": 7939 - }, - { - "epoch": 0.4, - "grad_norm": 0.8912392415537557, - "learning_rate": 1.3525310178198707e-05, - "loss": 0.1905, - "step": 7940 - }, - { - "epoch": 0.4, - "grad_norm": 1.1065270220767909, - "learning_rate": 1.3523768896941101e-05, - "loss": 0.2287, - "step": 7941 - }, - { - "epoch": 0.4, - "grad_norm": 1.0090741292296308, - "learning_rate": 1.3522227520101064e-05, - "loss": 0.1835, - "step": 7942 - }, - { - "epoch": 0.4, - "grad_norm": 0.9676105550984178, - "learning_rate": 1.3520686047720409e-05, - "loss": 0.1913, - "step": 7943 - }, - { - "epoch": 0.4, - "grad_norm": 1.073028571367729, - "learning_rate": 1.3519144479840942e-05, - "loss": 0.2014, - "step": 7944 - }, - { - "epoch": 0.4, - "grad_norm": 0.796391365846085, - "learning_rate": 1.3517602816504482e-05, - "loss": 0.193, - "step": 7945 - }, - { - "epoch": 0.4, - "grad_norm": 1.0138190132905434, - "learning_rate": 1.351606105775285e-05, - "loss": 0.1995, - "step": 7946 - }, - { - "epoch": 0.4, - "grad_norm": 1.0018287764775855, - "learning_rate": 1.3514519203627863e-05, - "loss": 0.1938, - "step": 7947 - }, - { - "epoch": 0.4, - "grad_norm": 0.8105897346963113, - "learning_rate": 1.3512977254171343e-05, - "loss": 0.1963, - "step": 7948 - }, - { - "epoch": 0.4, - "grad_norm": 0.8502494921740894, - "learning_rate": 1.3511435209425115e-05, - "loss": 0.2106, - "step": 7949 - }, - { - "epoch": 0.4, - "grad_norm": 1.1600577932155047, - "learning_rate": 1.3509893069431011e-05, - "loss": 0.1868, - "step": 7950 - }, - { - "epoch": 0.4, - "grad_norm": 1.1029932924061727, - "learning_rate": 1.3508350834230857e-05, - "loss": 0.2033, - "step": 7951 - }, - { - "epoch": 0.4, - "grad_norm": 0.8956362559777115, - "learning_rate": 1.3506808503866491e-05, - "loss": 0.1889, - "step": 7952 - }, - { - "epoch": 0.4, - "grad_norm": 1.144965652654032, - "learning_rate": 1.3505266078379741e-05, - "loss": 0.1805, - "step": 7953 - }, - { - "epoch": 0.4, - "grad_norm": 0.9835022229395287, - "learning_rate": 1.3503723557812455e-05, - "loss": 0.1973, - "step": 7954 - }, - { - "epoch": 0.4, - "grad_norm": 0.7857287589948215, - "learning_rate": 1.3502180942206472e-05, - "loss": 0.1822, - "step": 7955 - }, - { - "epoch": 0.4, - "grad_norm": 1.045687291906436, - "learning_rate": 1.350063823160363e-05, - "loss": 0.1969, - "step": 7956 - }, - { - "epoch": 0.4, - "grad_norm": 0.8156672686588005, - "learning_rate": 1.3499095426045779e-05, - "loss": 0.1872, - "step": 7957 - }, - { - "epoch": 0.4, - "grad_norm": 1.2209587892466776, - "learning_rate": 1.3497552525574763e-05, - "loss": 0.2176, - "step": 7958 - }, - { - "epoch": 0.4, - "grad_norm": 0.8184576292391839, - "learning_rate": 1.3496009530232444e-05, - "loss": 0.2073, - "step": 7959 - }, - { - "epoch": 0.4, - "grad_norm": 0.7409523366673464, - "learning_rate": 1.3494466440060667e-05, - "loss": 0.1898, - "step": 7960 - }, - { - "epoch": 0.4, - "grad_norm": 0.9175046443611684, - "learning_rate": 1.349292325510129e-05, - "loss": 0.1953, - "step": 7961 - }, - { - "epoch": 0.4, - "grad_norm": 0.991920168357191, - "learning_rate": 1.3491379975396171e-05, - "loss": 0.1846, - "step": 7962 - }, - { - "epoch": 0.4, - "grad_norm": 0.9289700337194272, - "learning_rate": 1.3489836600987173e-05, - "loss": 0.1884, - "step": 7963 - }, - { - "epoch": 0.4, - "grad_norm": 0.9731661635607622, - "learning_rate": 1.3488293131916161e-05, - "loss": 0.1642, - "step": 7964 - }, - { - "epoch": 0.41, - "grad_norm": 0.8785920997118186, - "learning_rate": 1.3486749568225002e-05, - "loss": 0.1948, - "step": 7965 - }, - { - "epoch": 0.41, - "grad_norm": 0.9600923761808118, - "learning_rate": 1.3485205909955562e-05, - "loss": 0.1848, - "step": 7966 - }, - { - "epoch": 0.41, - "grad_norm": 0.9832943610219766, - "learning_rate": 1.3483662157149713e-05, - "loss": 0.181, - "step": 7967 - }, - { - "epoch": 0.41, - "grad_norm": 1.164552711160505, - "learning_rate": 1.3482118309849335e-05, - "loss": 0.2095, - "step": 7968 - }, - { - "epoch": 0.41, - "grad_norm": 0.8209572252079305, - "learning_rate": 1.34805743680963e-05, - "loss": 0.1783, - "step": 7969 - }, - { - "epoch": 0.41, - "grad_norm": 2.279067529820421, - "learning_rate": 1.3479030331932488e-05, - "loss": 0.18, - "step": 7970 - }, - { - "epoch": 0.41, - "grad_norm": 0.70461197632034, - "learning_rate": 1.347748620139978e-05, - "loss": 0.1622, - "step": 7971 - }, - { - "epoch": 0.41, - "grad_norm": 0.9765087054600745, - "learning_rate": 1.3475941976540066e-05, - "loss": 0.2011, - "step": 7972 - }, - { - "epoch": 0.41, - "grad_norm": 1.6868005859222541, - "learning_rate": 1.3474397657395231e-05, - "loss": 0.1957, - "step": 7973 - }, - { - "epoch": 0.41, - "grad_norm": 0.8892658074672084, - "learning_rate": 1.347285324400716e-05, - "loss": 0.2046, - "step": 7974 - }, - { - "epoch": 0.41, - "grad_norm": 1.2671014445922417, - "learning_rate": 1.347130873641775e-05, - "loss": 0.2059, - "step": 7975 - }, - { - "epoch": 0.41, - "grad_norm": 0.8699850071913555, - "learning_rate": 1.346976413466889e-05, - "loss": 0.1762, - "step": 7976 - }, - { - "epoch": 0.41, - "grad_norm": 1.8392292585051886, - "learning_rate": 1.3468219438802487e-05, - "loss": 0.1654, - "step": 7977 - }, - { - "epoch": 0.41, - "grad_norm": 1.5843249219827986, - "learning_rate": 1.3466674648860436e-05, - "loss": 0.2023, - "step": 7978 - }, - { - "epoch": 0.41, - "grad_norm": 0.8306870529471548, - "learning_rate": 1.3465129764884636e-05, - "loss": 0.1781, - "step": 7979 - }, - { - "epoch": 0.41, - "grad_norm": 0.9424208434182422, - "learning_rate": 1.3463584786916997e-05, - "loss": 0.2124, - "step": 7980 - }, - { - "epoch": 0.41, - "grad_norm": 1.0553131794893045, - "learning_rate": 1.3462039714999426e-05, - "loss": 0.2425, - "step": 7981 - }, - { - "epoch": 0.41, - "grad_norm": 0.8565393972030418, - "learning_rate": 1.3460494549173833e-05, - "loss": 0.1893, - "step": 7982 - }, - { - "epoch": 0.41, - "grad_norm": 1.3153057889606654, - "learning_rate": 1.3458949289482126e-05, - "loss": 0.174, - "step": 7983 - }, - { - "epoch": 0.41, - "grad_norm": 0.972633249567405, - "learning_rate": 1.3457403935966227e-05, - "loss": 0.1992, - "step": 7984 - }, - { - "epoch": 0.41, - "grad_norm": 1.1382920500187748, - "learning_rate": 1.345585848866805e-05, - "loss": 0.2359, - "step": 7985 - }, - { - "epoch": 0.41, - "grad_norm": 0.8575969850834319, - "learning_rate": 1.3454312947629515e-05, - "loss": 0.1995, - "step": 7986 - }, - { - "epoch": 0.41, - "grad_norm": 1.0979026964467675, - "learning_rate": 1.345276731289255e-05, - "loss": 0.2002, - "step": 7987 - }, - { - "epoch": 0.41, - "grad_norm": 1.079195049925033, - "learning_rate": 1.3451221584499073e-05, - "loss": 0.2269, - "step": 7988 - }, - { - "epoch": 0.41, - "grad_norm": 0.9035088056320559, - "learning_rate": 1.3449675762491017e-05, - "loss": 0.1953, - "step": 7989 - }, - { - "epoch": 0.41, - "grad_norm": 0.8710857518370761, - "learning_rate": 1.3448129846910312e-05, - "loss": 0.2033, - "step": 7990 - }, - { - "epoch": 0.41, - "grad_norm": 0.7995548269495811, - "learning_rate": 1.344658383779889e-05, - "loss": 0.2021, - "step": 7991 - }, - { - "epoch": 0.41, - "grad_norm": 1.317150873310952, - "learning_rate": 1.3445037735198684e-05, - "loss": 0.2065, - "step": 7992 - }, - { - "epoch": 0.41, - "grad_norm": 0.8921681775584891, - "learning_rate": 1.3443491539151636e-05, - "loss": 0.2054, - "step": 7993 - }, - { - "epoch": 0.41, - "grad_norm": 0.8545579901327621, - "learning_rate": 1.3441945249699687e-05, - "loss": 0.2003, - "step": 7994 - }, - { - "epoch": 0.41, - "grad_norm": 1.0063728134691161, - "learning_rate": 1.3440398866884781e-05, - "loss": 0.2002, - "step": 7995 - }, - { - "epoch": 0.41, - "grad_norm": 0.9023502076409247, - "learning_rate": 1.343885239074886e-05, - "loss": 0.2044, - "step": 7996 - }, - { - "epoch": 0.41, - "grad_norm": 0.7519701313854469, - "learning_rate": 1.343730582133387e-05, - "loss": 0.1963, - "step": 7997 - }, - { - "epoch": 0.41, - "grad_norm": 0.9367432902659597, - "learning_rate": 1.3435759158681767e-05, - "loss": 0.1965, - "step": 7998 - }, - { - "epoch": 0.41, - "grad_norm": 0.9849913851261063, - "learning_rate": 1.3434212402834503e-05, - "loss": 0.1797, - "step": 7999 - }, - { - "epoch": 0.41, - "grad_norm": 0.9043783031126352, - "learning_rate": 1.3432665553834036e-05, - "loss": 0.1825, - "step": 8000 - }, - { - "epoch": 0.41, - "grad_norm": 0.9773402570390824, - "learning_rate": 1.3431118611722317e-05, - "loss": 0.1956, - "step": 8001 - }, - { - "epoch": 0.41, - "grad_norm": 0.9198481783329289, - "learning_rate": 1.3429571576541315e-05, - "loss": 0.1996, - "step": 8002 - }, - { - "epoch": 0.41, - "grad_norm": 0.9380731530654524, - "learning_rate": 1.3428024448332992e-05, - "loss": 0.1938, - "step": 8003 - }, - { - "epoch": 0.41, - "grad_norm": 1.1878699897617486, - "learning_rate": 1.342647722713931e-05, - "loss": 0.1915, - "step": 8004 - }, - { - "epoch": 0.41, - "grad_norm": 0.7892945827995911, - "learning_rate": 1.342492991300224e-05, - "loss": 0.1886, - "step": 8005 - }, - { - "epoch": 0.41, - "grad_norm": 0.8627428951792862, - "learning_rate": 1.3423382505963752e-05, - "loss": 0.1665, - "step": 8006 - }, - { - "epoch": 0.41, - "grad_norm": 0.8358965250113496, - "learning_rate": 1.3421835006065821e-05, - "loss": 0.1845, - "step": 8007 - }, - { - "epoch": 0.41, - "grad_norm": 0.8940788101936157, - "learning_rate": 1.3420287413350424e-05, - "loss": 0.1943, - "step": 8008 - }, - { - "epoch": 0.41, - "grad_norm": 0.8457027511164052, - "learning_rate": 1.3418739727859536e-05, - "loss": 0.1884, - "step": 8009 - }, - { - "epoch": 0.41, - "grad_norm": 0.9616924394345931, - "learning_rate": 1.3417191949635137e-05, - "loss": 0.2005, - "step": 8010 - }, - { - "epoch": 0.41, - "grad_norm": 0.9765691241789635, - "learning_rate": 1.3415644078719216e-05, - "loss": 0.2, - "step": 8011 - }, - { - "epoch": 0.41, - "grad_norm": 1.0198592589757345, - "learning_rate": 1.3414096115153758e-05, - "loss": 0.2008, - "step": 8012 - }, - { - "epoch": 0.41, - "grad_norm": 1.078405481686966, - "learning_rate": 1.341254805898075e-05, - "loss": 0.1754, - "step": 8013 - }, - { - "epoch": 0.41, - "grad_norm": 1.1520658043098602, - "learning_rate": 1.341099991024218e-05, - "loss": 0.195, - "step": 8014 - }, - { - "epoch": 0.41, - "grad_norm": 0.8431762267412917, - "learning_rate": 1.3409451668980047e-05, - "loss": 0.1917, - "step": 8015 - }, - { - "epoch": 0.41, - "grad_norm": 0.9709340793317897, - "learning_rate": 1.3407903335236342e-05, - "loss": 0.1988, - "step": 8016 - }, - { - "epoch": 0.41, - "grad_norm": 0.9562143034143634, - "learning_rate": 1.3406354909053072e-05, - "loss": 0.1865, - "step": 8017 - }, - { - "epoch": 0.41, - "grad_norm": 1.8903264345603317, - "learning_rate": 1.340480639047223e-05, - "loss": 0.1795, - "step": 8018 - }, - { - "epoch": 0.41, - "grad_norm": 0.9493680900295547, - "learning_rate": 1.340325777953582e-05, - "loss": 0.196, - "step": 8019 - }, - { - "epoch": 0.41, - "grad_norm": 0.892180765456923, - "learning_rate": 1.3401709076285854e-05, - "loss": 0.1962, - "step": 8020 - }, - { - "epoch": 0.41, - "grad_norm": 1.0172462805046316, - "learning_rate": 1.3400160280764334e-05, - "loss": 0.1945, - "step": 8021 - }, - { - "epoch": 0.41, - "grad_norm": 0.8633358976212213, - "learning_rate": 1.3398611393013276e-05, - "loss": 0.184, - "step": 8022 - }, - { - "epoch": 0.41, - "grad_norm": 0.8679448290497178, - "learning_rate": 1.3397062413074692e-05, - "loss": 0.2077, - "step": 8023 - }, - { - "epoch": 0.41, - "grad_norm": 0.8340346318477808, - "learning_rate": 1.3395513340990599e-05, - "loss": 0.1961, - "step": 8024 - }, - { - "epoch": 0.41, - "grad_norm": 1.2509060316174685, - "learning_rate": 1.3393964176803014e-05, - "loss": 0.2064, - "step": 8025 - }, - { - "epoch": 0.41, - "grad_norm": 0.7697603617815317, - "learning_rate": 1.3392414920553958e-05, - "loss": 0.1696, - "step": 8026 - }, - { - "epoch": 0.41, - "grad_norm": 0.8871225258106962, - "learning_rate": 1.3390865572285456e-05, - "loss": 0.191, - "step": 8027 - }, - { - "epoch": 0.41, - "grad_norm": 0.9158079068168967, - "learning_rate": 1.3389316132039534e-05, - "loss": 0.1744, - "step": 8028 - }, - { - "epoch": 0.41, - "grad_norm": 0.7186316771574106, - "learning_rate": 1.3387766599858223e-05, - "loss": 0.2006, - "step": 8029 - }, - { - "epoch": 0.41, - "grad_norm": 1.4655735167646904, - "learning_rate": 1.338621697578355e-05, - "loss": 0.1928, - "step": 8030 - }, - { - "epoch": 0.41, - "grad_norm": 3.429526068500552, - "learning_rate": 1.338466725985755e-05, - "loss": 0.1723, - "step": 8031 - }, - { - "epoch": 0.41, - "grad_norm": 1.057773856701616, - "learning_rate": 1.3383117452122259e-05, - "loss": 0.1977, - "step": 8032 - }, - { - "epoch": 0.41, - "grad_norm": 1.035697937100189, - "learning_rate": 1.3381567552619716e-05, - "loss": 0.1796, - "step": 8033 - }, - { - "epoch": 0.41, - "grad_norm": 1.8543769830582006, - "learning_rate": 1.3380017561391964e-05, - "loss": 0.1993, - "step": 8034 - }, - { - "epoch": 0.41, - "grad_norm": 1.1232548014489392, - "learning_rate": 1.3378467478481043e-05, - "loss": 0.2145, - "step": 8035 - }, - { - "epoch": 0.41, - "grad_norm": 0.7160729312689819, - "learning_rate": 1.3376917303929e-05, - "loss": 0.1762, - "step": 8036 - }, - { - "epoch": 0.41, - "grad_norm": 1.0801627888125969, - "learning_rate": 1.3375367037777887e-05, - "loss": 0.223, - "step": 8037 - }, - { - "epoch": 0.41, - "grad_norm": 2.247750747667892, - "learning_rate": 1.3373816680069749e-05, - "loss": 0.1975, - "step": 8038 - }, - { - "epoch": 0.41, - "grad_norm": 0.8611190036702995, - "learning_rate": 1.3372266230846647e-05, - "loss": 0.2073, - "step": 8039 - }, - { - "epoch": 0.41, - "grad_norm": 1.0154499916864541, - "learning_rate": 1.3370715690150631e-05, - "loss": 0.1905, - "step": 8040 - }, - { - "epoch": 0.41, - "grad_norm": 0.9680673347591338, - "learning_rate": 1.336916505802376e-05, - "loss": 0.1972, - "step": 8041 - }, - { - "epoch": 0.41, - "grad_norm": 0.9171675293224171, - "learning_rate": 1.3367614334508097e-05, - "loss": 0.2005, - "step": 8042 - }, - { - "epoch": 0.41, - "grad_norm": 0.7850773437724924, - "learning_rate": 1.3366063519645707e-05, - "loss": 0.2221, - "step": 8043 - }, - { - "epoch": 0.41, - "grad_norm": 0.9294976754762169, - "learning_rate": 1.3364512613478654e-05, - "loss": 0.175, - "step": 8044 - }, - { - "epoch": 0.41, - "grad_norm": 0.8474386472668382, - "learning_rate": 1.3362961616049006e-05, - "loss": 0.1965, - "step": 8045 - }, - { - "epoch": 0.41, - "grad_norm": 0.8462067143572174, - "learning_rate": 1.336141052739883e-05, - "loss": 0.2185, - "step": 8046 - }, - { - "epoch": 0.41, - "grad_norm": 0.895323741516153, - "learning_rate": 1.335985934757021e-05, - "loss": 0.2025, - "step": 8047 - }, - { - "epoch": 0.41, - "grad_norm": 1.1842591109627791, - "learning_rate": 1.3358308076605213e-05, - "loss": 0.1718, - "step": 8048 - }, - { - "epoch": 0.41, - "grad_norm": 1.358975880195966, - "learning_rate": 1.3356756714545917e-05, - "loss": 0.1839, - "step": 8049 - }, - { - "epoch": 0.41, - "grad_norm": 1.2801457031286867, - "learning_rate": 1.3355205261434408e-05, - "loss": 0.1898, - "step": 8050 - }, - { - "epoch": 0.41, - "grad_norm": 0.9469890610484961, - "learning_rate": 1.3353653717312767e-05, - "loss": 0.1808, - "step": 8051 - }, - { - "epoch": 0.41, - "grad_norm": 2.865045301335842, - "learning_rate": 1.335210208222308e-05, - "loss": 0.2102, - "step": 8052 - }, - { - "epoch": 0.41, - "grad_norm": 1.0032497809315304, - "learning_rate": 1.3350550356207435e-05, - "loss": 0.2145, - "step": 8053 - }, - { - "epoch": 0.41, - "grad_norm": 1.1154342561170936, - "learning_rate": 1.3348998539307919e-05, - "loss": 0.1813, - "step": 8054 - }, - { - "epoch": 0.41, - "grad_norm": 1.0187690617804463, - "learning_rate": 1.334744663156663e-05, - "loss": 0.1844, - "step": 8055 - }, - { - "epoch": 0.41, - "grad_norm": 1.0827207692673682, - "learning_rate": 1.3345894633025662e-05, - "loss": 0.2129, - "step": 8056 - }, - { - "epoch": 0.41, - "grad_norm": 1.2555042431760213, - "learning_rate": 1.3344342543727115e-05, - "loss": 0.2022, - "step": 8057 - }, - { - "epoch": 0.41, - "grad_norm": 0.8291008640026881, - "learning_rate": 1.3342790363713088e-05, - "loss": 0.182, - "step": 8058 - }, - { - "epoch": 0.41, - "grad_norm": 1.4148639718049554, - "learning_rate": 1.3341238093025679e-05, - "loss": 0.1941, - "step": 8059 - }, - { - "epoch": 0.41, - "grad_norm": 0.9735277681041818, - "learning_rate": 1.3339685731707002e-05, - "loss": 0.1892, - "step": 8060 - }, - { - "epoch": 0.41, - "grad_norm": 0.923651269778692, - "learning_rate": 1.3338133279799159e-05, - "loss": 0.1852, - "step": 8061 - }, - { - "epoch": 0.41, - "grad_norm": 1.071944681667006, - "learning_rate": 1.3336580737344265e-05, - "loss": 0.2174, - "step": 8062 - }, - { - "epoch": 0.41, - "grad_norm": 1.6682334959961802, - "learning_rate": 1.3335028104384424e-05, - "loss": 0.2173, - "step": 8063 - }, - { - "epoch": 0.41, - "grad_norm": 1.2152012251443567, - "learning_rate": 1.3333475380961762e-05, - "loss": 0.1892, - "step": 8064 - }, - { - "epoch": 0.41, - "grad_norm": 1.0145161664772557, - "learning_rate": 1.3331922567118394e-05, - "loss": 0.2089, - "step": 8065 - }, - { - "epoch": 0.41, - "grad_norm": 1.1507511222245936, - "learning_rate": 1.3330369662896437e-05, - "loss": 0.214, - "step": 8066 - }, - { - "epoch": 0.41, - "grad_norm": 0.9486939578167846, - "learning_rate": 1.3328816668338012e-05, - "loss": 0.2021, - "step": 8067 - }, - { - "epoch": 0.41, - "grad_norm": 1.114137032112211, - "learning_rate": 1.3327263583485248e-05, - "loss": 0.1919, - "step": 8068 - }, - { - "epoch": 0.41, - "grad_norm": 1.1410597004242617, - "learning_rate": 1.3325710408380272e-05, - "loss": 0.2153, - "step": 8069 - }, - { - "epoch": 0.41, - "grad_norm": 0.8798023649166551, - "learning_rate": 1.3324157143065213e-05, - "loss": 0.195, - "step": 8070 - }, - { - "epoch": 0.41, - "grad_norm": 0.9157107395312563, - "learning_rate": 1.3322603787582205e-05, - "loss": 0.1989, - "step": 8071 - }, - { - "epoch": 0.41, - "grad_norm": 0.8099024728710942, - "learning_rate": 1.3321050341973378e-05, - "loss": 0.2094, - "step": 8072 - }, - { - "epoch": 0.41, - "grad_norm": 1.0920822863428858, - "learning_rate": 1.3319496806280877e-05, - "loss": 0.1846, - "step": 8073 - }, - { - "epoch": 0.41, - "grad_norm": 2.260886118708162, - "learning_rate": 1.3317943180546836e-05, - "loss": 0.1959, - "step": 8074 - }, - { - "epoch": 0.41, - "grad_norm": 1.0141664566644863, - "learning_rate": 1.3316389464813397e-05, - "loss": 0.1988, - "step": 8075 - }, - { - "epoch": 0.41, - "grad_norm": 0.8211926777508631, - "learning_rate": 1.3314835659122707e-05, - "loss": 0.1757, - "step": 8076 - }, - { - "epoch": 0.41, - "grad_norm": 0.949669333859531, - "learning_rate": 1.3313281763516915e-05, - "loss": 0.1939, - "step": 8077 - }, - { - "epoch": 0.41, - "grad_norm": 0.9081621642327156, - "learning_rate": 1.3311727778038165e-05, - "loss": 0.1835, - "step": 8078 - }, - { - "epoch": 0.41, - "grad_norm": 0.9381499951139102, - "learning_rate": 1.3310173702728614e-05, - "loss": 0.196, - "step": 8079 - }, - { - "epoch": 0.41, - "grad_norm": 1.4136313347465652, - "learning_rate": 1.3308619537630416e-05, - "loss": 0.2122, - "step": 8080 - }, - { - "epoch": 0.41, - "grad_norm": 0.7596837076372943, - "learning_rate": 1.3307065282785723e-05, - "loss": 0.2217, - "step": 8081 - }, - { - "epoch": 0.41, - "grad_norm": 1.1243254028756764, - "learning_rate": 1.33055109382367e-05, - "loss": 0.1903, - "step": 8082 - }, - { - "epoch": 0.41, - "grad_norm": 0.9040116470936199, - "learning_rate": 1.3303956504025506e-05, - "loss": 0.2046, - "step": 8083 - }, - { - "epoch": 0.41, - "grad_norm": 1.1134412438267782, - "learning_rate": 1.3302401980194303e-05, - "loss": 0.1973, - "step": 8084 - }, - { - "epoch": 0.41, - "grad_norm": 1.311444185148507, - "learning_rate": 1.3300847366785261e-05, - "loss": 0.2263, - "step": 8085 - }, - { - "epoch": 0.41, - "grad_norm": 1.6291026055605924, - "learning_rate": 1.3299292663840546e-05, - "loss": 0.205, - "step": 8086 - }, - { - "epoch": 0.41, - "grad_norm": 1.1327737332574543, - "learning_rate": 1.3297737871402333e-05, - "loss": 0.2018, - "step": 8087 - }, - { - "epoch": 0.41, - "grad_norm": 0.9299835949328888, - "learning_rate": 1.3296182989512794e-05, - "loss": 0.1824, - "step": 8088 - }, - { - "epoch": 0.41, - "grad_norm": 0.8818644971901961, - "learning_rate": 1.3294628018214105e-05, - "loss": 0.2143, - "step": 8089 - }, - { - "epoch": 0.41, - "grad_norm": 0.9567065848980094, - "learning_rate": 1.3293072957548443e-05, - "loss": 0.1917, - "step": 8090 - }, - { - "epoch": 0.41, - "grad_norm": 0.9484045798329989, - "learning_rate": 1.3291517807557994e-05, - "loss": 0.1908, - "step": 8091 - }, - { - "epoch": 0.41, - "grad_norm": 1.0639832334067085, - "learning_rate": 1.3289962568284937e-05, - "loss": 0.1944, - "step": 8092 - }, - { - "epoch": 0.41, - "grad_norm": 1.2129082756641967, - "learning_rate": 1.3288407239771462e-05, - "loss": 0.1931, - "step": 8093 - }, - { - "epoch": 0.41, - "grad_norm": 1.5496534950187957, - "learning_rate": 1.328685182205975e-05, - "loss": 0.1984, - "step": 8094 - }, - { - "epoch": 0.41, - "grad_norm": 0.7701213256886666, - "learning_rate": 1.3285296315192e-05, - "loss": 0.1883, - "step": 8095 - }, - { - "epoch": 0.41, - "grad_norm": 1.2667584968529702, - "learning_rate": 1.32837407192104e-05, - "loss": 0.196, - "step": 8096 - }, - { - "epoch": 0.41, - "grad_norm": 1.4217626914391113, - "learning_rate": 1.3282185034157151e-05, - "loss": 0.2033, - "step": 8097 - }, - { - "epoch": 0.41, - "grad_norm": 0.9766157940575106, - "learning_rate": 1.3280629260074442e-05, - "loss": 0.1865, - "step": 8098 - }, - { - "epoch": 0.41, - "grad_norm": 0.7803982157049401, - "learning_rate": 1.3279073397004485e-05, - "loss": 0.1695, - "step": 8099 - }, - { - "epoch": 0.41, - "grad_norm": 1.0041553192413004, - "learning_rate": 1.3277517444989476e-05, - "loss": 0.1876, - "step": 8100 - }, - { - "epoch": 0.41, - "grad_norm": 0.8422004848718346, - "learning_rate": 1.327596140407162e-05, - "loss": 0.195, - "step": 8101 - }, - { - "epoch": 0.41, - "grad_norm": 1.126805813050145, - "learning_rate": 1.3274405274293122e-05, - "loss": 0.1922, - "step": 8102 - }, - { - "epoch": 0.41, - "grad_norm": 1.3489940250126804, - "learning_rate": 1.3272849055696203e-05, - "loss": 0.1994, - "step": 8103 - }, - { - "epoch": 0.41, - "grad_norm": 1.2051419046998122, - "learning_rate": 1.3271292748323064e-05, - "loss": 0.2189, - "step": 8104 - }, - { - "epoch": 0.41, - "grad_norm": 0.9861422782076765, - "learning_rate": 1.3269736352215925e-05, - "loss": 0.2058, - "step": 8105 - }, - { - "epoch": 0.41, - "grad_norm": 0.7770971461313525, - "learning_rate": 1.3268179867417004e-05, - "loss": 0.1905, - "step": 8106 - }, - { - "epoch": 0.41, - "grad_norm": 0.9277715775572186, - "learning_rate": 1.3266623293968518e-05, - "loss": 0.1919, - "step": 8107 - }, - { - "epoch": 0.41, - "grad_norm": 0.9695669138804043, - "learning_rate": 1.326506663191269e-05, - "loss": 0.1894, - "step": 8108 - }, - { - "epoch": 0.41, - "grad_norm": 0.9098778962285857, - "learning_rate": 1.3263509881291748e-05, - "loss": 0.1768, - "step": 8109 - }, - { - "epoch": 0.41, - "grad_norm": 0.8360349993777317, - "learning_rate": 1.3261953042147915e-05, - "loss": 0.1895, - "step": 8110 - }, - { - "epoch": 0.41, - "grad_norm": 1.147216914825333, - "learning_rate": 1.326039611452342e-05, - "loss": 0.1896, - "step": 8111 - }, - { - "epoch": 0.41, - "grad_norm": 0.8526896466281547, - "learning_rate": 1.3258839098460496e-05, - "loss": 0.1937, - "step": 8112 - }, - { - "epoch": 0.41, - "grad_norm": 1.1435652297643273, - "learning_rate": 1.325728199400138e-05, - "loss": 0.2093, - "step": 8113 - }, - { - "epoch": 0.41, - "grad_norm": 0.8185991738538979, - "learning_rate": 1.3255724801188305e-05, - "loss": 0.1922, - "step": 8114 - }, - { - "epoch": 0.41, - "grad_norm": 1.0561514708661517, - "learning_rate": 1.325416752006351e-05, - "loss": 0.191, - "step": 8115 - }, - { - "epoch": 0.41, - "grad_norm": 0.7351805421960459, - "learning_rate": 1.3252610150669236e-05, - "loss": 0.1879, - "step": 8116 - }, - { - "epoch": 0.41, - "grad_norm": 2.522389693987051, - "learning_rate": 1.3251052693047732e-05, - "loss": 0.1902, - "step": 8117 - }, - { - "epoch": 0.41, - "grad_norm": 0.9716643739300673, - "learning_rate": 1.324949514724124e-05, - "loss": 0.1864, - "step": 8118 - }, - { - "epoch": 0.41, - "grad_norm": 1.372780519038876, - "learning_rate": 1.3247937513292007e-05, - "loss": 0.2246, - "step": 8119 - }, - { - "epoch": 0.41, - "grad_norm": 1.1258389056492442, - "learning_rate": 1.3246379791242284e-05, - "loss": 0.2059, - "step": 8120 - }, - { - "epoch": 0.41, - "grad_norm": 0.6779761807315811, - "learning_rate": 1.3244821981134326e-05, - "loss": 0.1742, - "step": 8121 - }, - { - "epoch": 0.41, - "grad_norm": 0.9024791913390288, - "learning_rate": 1.324326408301039e-05, - "loss": 0.214, - "step": 8122 - }, - { - "epoch": 0.41, - "grad_norm": 0.9387075044127428, - "learning_rate": 1.3241706096912731e-05, - "loss": 0.1813, - "step": 8123 - }, - { - "epoch": 0.41, - "grad_norm": 1.0895957947682025, - "learning_rate": 1.324014802288361e-05, - "loss": 0.2017, - "step": 8124 - }, - { - "epoch": 0.41, - "grad_norm": 0.8091951199061025, - "learning_rate": 1.3238589860965295e-05, - "loss": 0.1847, - "step": 8125 - }, - { - "epoch": 0.41, - "grad_norm": 0.9760857115992949, - "learning_rate": 1.3237031611200044e-05, - "loss": 0.2073, - "step": 8126 - }, - { - "epoch": 0.41, - "grad_norm": 0.8664661099865184, - "learning_rate": 1.3235473273630128e-05, - "loss": 0.1859, - "step": 8127 - }, - { - "epoch": 0.41, - "grad_norm": 1.08176486103619, - "learning_rate": 1.3233914848297817e-05, - "loss": 0.1751, - "step": 8128 - }, - { - "epoch": 0.41, - "grad_norm": 1.0521867252969814, - "learning_rate": 1.3232356335245381e-05, - "loss": 0.1917, - "step": 8129 - }, - { - "epoch": 0.41, - "grad_norm": 1.8501926151086432, - "learning_rate": 1.3230797734515102e-05, - "loss": 0.1832, - "step": 8130 - }, - { - "epoch": 0.41, - "grad_norm": 0.8890514909572997, - "learning_rate": 1.3229239046149249e-05, - "loss": 0.1979, - "step": 8131 - }, - { - "epoch": 0.41, - "grad_norm": 1.0085527273462358, - "learning_rate": 1.3227680270190106e-05, - "loss": 0.1977, - "step": 8132 - }, - { - "epoch": 0.41, - "grad_norm": 1.7789829508973025, - "learning_rate": 1.322612140667995e-05, - "loss": 0.1936, - "step": 8133 - }, - { - "epoch": 0.41, - "grad_norm": 1.0270694342051736, - "learning_rate": 1.3224562455661069e-05, - "loss": 0.2294, - "step": 8134 - }, - { - "epoch": 0.41, - "grad_norm": 0.8857809528499025, - "learning_rate": 1.3223003417175755e-05, - "loss": 0.2033, - "step": 8135 - }, - { - "epoch": 0.41, - "grad_norm": 0.8872779913474352, - "learning_rate": 1.3221444291266288e-05, - "loss": 0.209, - "step": 8136 - }, - { - "epoch": 0.41, - "grad_norm": 1.9116143631173013, - "learning_rate": 1.3219885077974959e-05, - "loss": 0.1896, - "step": 8137 - }, - { - "epoch": 0.41, - "grad_norm": 0.8408863730472417, - "learning_rate": 1.321832577734407e-05, - "loss": 0.2315, - "step": 8138 - }, - { - "epoch": 0.41, - "grad_norm": 0.9435006909990574, - "learning_rate": 1.3216766389415909e-05, - "loss": 0.2047, - "step": 8139 - }, - { - "epoch": 0.41, - "grad_norm": 0.9122559504169311, - "learning_rate": 1.321520691423278e-05, - "loss": 0.1946, - "step": 8140 - }, - { - "epoch": 0.41, - "grad_norm": 0.9570472472937975, - "learning_rate": 1.3213647351836985e-05, - "loss": 0.1801, - "step": 8141 - }, - { - "epoch": 0.41, - "grad_norm": 1.3682519206358967, - "learning_rate": 1.3212087702270817e-05, - "loss": 0.2392, - "step": 8142 - }, - { - "epoch": 0.41, - "grad_norm": 0.7131643812133875, - "learning_rate": 1.3210527965576594e-05, - "loss": 0.165, - "step": 8143 - }, - { - "epoch": 0.41, - "grad_norm": 1.0247955772632986, - "learning_rate": 1.3208968141796616e-05, - "loss": 0.169, - "step": 8144 - }, - { - "epoch": 0.41, - "grad_norm": 0.7389961092282391, - "learning_rate": 1.3207408230973198e-05, - "loss": 0.1991, - "step": 8145 - }, - { - "epoch": 0.41, - "grad_norm": 0.987690758612029, - "learning_rate": 1.3205848233148649e-05, - "loss": 0.2091, - "step": 8146 - }, - { - "epoch": 0.41, - "grad_norm": 1.162320781384892, - "learning_rate": 1.3204288148365285e-05, - "loss": 0.1972, - "step": 8147 - }, - { - "epoch": 0.41, - "grad_norm": 1.259879163113445, - "learning_rate": 1.3202727976665426e-05, - "loss": 0.1776, - "step": 8148 - }, - { - "epoch": 0.41, - "grad_norm": 2.6173310688838565, - "learning_rate": 1.320116771809139e-05, - "loss": 0.2075, - "step": 8149 - }, - { - "epoch": 0.41, - "grad_norm": 1.15319608640954, - "learning_rate": 1.3199607372685497e-05, - "loss": 0.2367, - "step": 8150 - }, - { - "epoch": 0.41, - "grad_norm": 1.1085735482195873, - "learning_rate": 1.3198046940490072e-05, - "loss": 0.1925, - "step": 8151 - }, - { - "epoch": 0.41, - "grad_norm": 0.9526619299240466, - "learning_rate": 1.3196486421547447e-05, - "loss": 0.2255, - "step": 8152 - }, - { - "epoch": 0.41, - "grad_norm": 1.186250935829803, - "learning_rate": 1.3194925815899946e-05, - "loss": 0.1938, - "step": 8153 - }, - { - "epoch": 0.41, - "grad_norm": 0.9462820876927331, - "learning_rate": 1.3193365123589904e-05, - "loss": 0.2193, - "step": 8154 - }, - { - "epoch": 0.41, - "grad_norm": 1.0680716000128938, - "learning_rate": 1.3191804344659647e-05, - "loss": 0.2051, - "step": 8155 - }, - { - "epoch": 0.41, - "grad_norm": 0.890509111019689, - "learning_rate": 1.319024347915152e-05, - "loss": 0.197, - "step": 8156 - }, - { - "epoch": 0.41, - "grad_norm": 0.9695353673509965, - "learning_rate": 1.3188682527107856e-05, - "loss": 0.2027, - "step": 8157 - }, - { - "epoch": 0.41, - "grad_norm": 0.8790321917308074, - "learning_rate": 1.3187121488571001e-05, - "loss": 0.1975, - "step": 8158 - }, - { - "epoch": 0.41, - "grad_norm": 1.0904526851948093, - "learning_rate": 1.3185560363583294e-05, - "loss": 0.2031, - "step": 8159 - }, - { - "epoch": 0.41, - "grad_norm": 1.0304998013343536, - "learning_rate": 1.3183999152187084e-05, - "loss": 0.1907, - "step": 8160 - }, - { - "epoch": 0.42, - "grad_norm": 1.3517359749264424, - "learning_rate": 1.3182437854424716e-05, - "loss": 0.2229, - "step": 8161 - }, - { - "epoch": 0.42, - "grad_norm": 0.9239570982372851, - "learning_rate": 1.3180876470338545e-05, - "loss": 0.2058, - "step": 8162 - }, - { - "epoch": 0.42, - "grad_norm": 0.8717308844191373, - "learning_rate": 1.3179314999970915e-05, - "loss": 0.1981, - "step": 8163 - }, - { - "epoch": 0.42, - "grad_norm": 0.8246230871063063, - "learning_rate": 1.3177753443364188e-05, - "loss": 0.1909, - "step": 8164 - }, - { - "epoch": 0.42, - "grad_norm": 2.4695548290453426, - "learning_rate": 1.317619180056072e-05, - "loss": 0.1985, - "step": 8165 - }, - { - "epoch": 0.42, - "grad_norm": 0.8400845928983217, - "learning_rate": 1.317463007160287e-05, - "loss": 0.1846, - "step": 8166 - }, - { - "epoch": 0.42, - "grad_norm": 0.7093872820281457, - "learning_rate": 1.3173068256533e-05, - "loss": 0.1812, - "step": 8167 - }, - { - "epoch": 0.42, - "grad_norm": 0.9064441522202039, - "learning_rate": 1.3171506355393473e-05, - "loss": 0.2122, - "step": 8168 - }, - { - "epoch": 0.42, - "grad_norm": 1.367856901127145, - "learning_rate": 1.3169944368226655e-05, - "loss": 0.2047, - "step": 8169 - }, - { - "epoch": 0.42, - "grad_norm": 0.8530899850369583, - "learning_rate": 1.3168382295074923e-05, - "loss": 0.1974, - "step": 8170 - }, - { - "epoch": 0.42, - "grad_norm": 0.9398278061149311, - "learning_rate": 1.316682013598064e-05, - "loss": 0.1778, - "step": 8171 - }, - { - "epoch": 0.42, - "grad_norm": 1.2818932363910829, - "learning_rate": 1.3165257890986178e-05, - "loss": 0.1952, - "step": 8172 - }, - { - "epoch": 0.42, - "grad_norm": 0.7649808941738352, - "learning_rate": 1.3163695560133922e-05, - "loss": 0.198, - "step": 8173 - }, - { - "epoch": 0.42, - "grad_norm": 1.2894358803233346, - "learning_rate": 1.3162133143466242e-05, - "loss": 0.1856, - "step": 8174 - }, - { - "epoch": 0.42, - "grad_norm": 1.665193046043179, - "learning_rate": 1.3160570641025526e-05, - "loss": 0.1955, - "step": 8175 - }, - { - "epoch": 0.42, - "grad_norm": 1.157724088983787, - "learning_rate": 1.3159008052854147e-05, - "loss": 0.1865, - "step": 8176 - }, - { - "epoch": 0.42, - "grad_norm": 1.0373369719316299, - "learning_rate": 1.3157445378994498e-05, - "loss": 0.2099, - "step": 8177 - }, - { - "epoch": 0.42, - "grad_norm": 1.8229843102712258, - "learning_rate": 1.3155882619488967e-05, - "loss": 0.1947, - "step": 8178 - }, - { - "epoch": 0.42, - "grad_norm": 1.0086904947534578, - "learning_rate": 1.315431977437994e-05, - "loss": 0.2151, - "step": 8179 - }, - { - "epoch": 0.42, - "grad_norm": 1.1766623038489123, - "learning_rate": 1.3152756843709814e-05, - "loss": 0.1941, - "step": 8180 - }, - { - "epoch": 0.42, - "grad_norm": 4.190846556534522, - "learning_rate": 1.3151193827520975e-05, - "loss": 0.1877, - "step": 8181 - }, - { - "epoch": 0.42, - "grad_norm": 1.2251108654586467, - "learning_rate": 1.3149630725855828e-05, - "loss": 0.2026, - "step": 8182 - }, - { - "epoch": 0.42, - "grad_norm": 1.0803278763437667, - "learning_rate": 1.314806753875677e-05, - "loss": 0.1935, - "step": 8183 - }, - { - "epoch": 0.42, - "grad_norm": 0.9777490138647087, - "learning_rate": 1.3146504266266202e-05, - "loss": 0.2, - "step": 8184 - }, - { - "epoch": 0.42, - "grad_norm": 0.8264047600720581, - "learning_rate": 1.3144940908426532e-05, - "loss": 0.1841, - "step": 8185 - }, - { - "epoch": 0.42, - "grad_norm": 1.5967147063009872, - "learning_rate": 1.3143377465280155e-05, - "loss": 0.1837, - "step": 8186 - }, - { - "epoch": 0.42, - "grad_norm": 0.9558789187028566, - "learning_rate": 1.3141813936869494e-05, - "loss": 0.2139, - "step": 8187 - }, - { - "epoch": 0.42, - "grad_norm": 0.8320678707779134, - "learning_rate": 1.314025032323695e-05, - "loss": 0.201, - "step": 8188 - }, - { - "epoch": 0.42, - "grad_norm": 0.9471377988177363, - "learning_rate": 1.3138686624424937e-05, - "loss": 0.2062, - "step": 8189 - }, - { - "epoch": 0.42, - "grad_norm": 0.9599458958587899, - "learning_rate": 1.313712284047587e-05, - "loss": 0.1865, - "step": 8190 - }, - { - "epoch": 0.42, - "grad_norm": 1.3681755844392067, - "learning_rate": 1.3135558971432172e-05, - "loss": 0.1965, - "step": 8191 - }, - { - "epoch": 0.42, - "grad_norm": 0.8330320087338403, - "learning_rate": 1.3133995017336259e-05, - "loss": 0.2007, - "step": 8192 - }, - { - "epoch": 0.42, - "grad_norm": 0.9023380179478936, - "learning_rate": 1.3132430978230555e-05, - "loss": 0.182, - "step": 8193 - }, - { - "epoch": 0.42, - "grad_norm": 2.4851398984555777, - "learning_rate": 1.3130866854157482e-05, - "loss": 0.1946, - "step": 8194 - }, - { - "epoch": 0.42, - "grad_norm": 0.8570937044939294, - "learning_rate": 1.312930264515947e-05, - "loss": 0.1703, - "step": 8195 - }, - { - "epoch": 0.42, - "grad_norm": 2.3065861691308562, - "learning_rate": 1.3127738351278946e-05, - "loss": 0.1996, - "step": 8196 - }, - { - "epoch": 0.42, - "grad_norm": 1.3622707775354201, - "learning_rate": 1.3126173972558345e-05, - "loss": 0.1964, - "step": 8197 - }, - { - "epoch": 0.42, - "grad_norm": 1.1894897579210884, - "learning_rate": 1.3124609509040095e-05, - "loss": 0.1916, - "step": 8198 - }, - { - "epoch": 0.42, - "grad_norm": 1.0950019770268824, - "learning_rate": 1.3123044960766638e-05, - "loss": 0.1734, - "step": 8199 - }, - { - "epoch": 0.42, - "grad_norm": 1.4604954107534205, - "learning_rate": 1.3121480327780409e-05, - "loss": 0.1936, - "step": 8200 - }, - { - "epoch": 0.42, - "grad_norm": 1.0583170625721745, - "learning_rate": 1.311991561012385e-05, - "loss": 0.1843, - "step": 8201 - }, - { - "epoch": 0.42, - "grad_norm": 2.0871249935829725, - "learning_rate": 1.3118350807839404e-05, - "loss": 0.2015, - "step": 8202 - }, - { - "epoch": 0.42, - "grad_norm": 1.1809231282154058, - "learning_rate": 1.3116785920969517e-05, - "loss": 0.1884, - "step": 8203 - }, - { - "epoch": 0.42, - "grad_norm": 1.1723187003173714, - "learning_rate": 1.3115220949556635e-05, - "loss": 0.1776, - "step": 8204 - }, - { - "epoch": 0.42, - "grad_norm": 1.4721516041880833, - "learning_rate": 1.3113655893643208e-05, - "loss": 0.1993, - "step": 8205 - }, - { - "epoch": 0.42, - "grad_norm": 1.3186840370466455, - "learning_rate": 1.3112090753271693e-05, - "loss": 0.1927, - "step": 8206 - }, - { - "epoch": 0.42, - "grad_norm": 1.0011682661603702, - "learning_rate": 1.3110525528484535e-05, - "loss": 0.1978, - "step": 8207 - }, - { - "epoch": 0.42, - "grad_norm": 1.07523059426114, - "learning_rate": 1.3108960219324201e-05, - "loss": 0.1978, - "step": 8208 - }, - { - "epoch": 0.42, - "grad_norm": 1.2301338016941046, - "learning_rate": 1.3107394825833142e-05, - "loss": 0.192, - "step": 8209 - }, - { - "epoch": 0.42, - "grad_norm": 1.430169746032634, - "learning_rate": 1.3105829348053824e-05, - "loss": 0.2017, - "step": 8210 - }, - { - "epoch": 0.42, - "grad_norm": 0.9970915919775609, - "learning_rate": 1.310426378602871e-05, - "loss": 0.202, - "step": 8211 - }, - { - "epoch": 0.42, - "grad_norm": 1.7391855967398469, - "learning_rate": 1.3102698139800266e-05, - "loss": 0.2163, - "step": 8212 - }, - { - "epoch": 0.42, - "grad_norm": 1.366594790036297, - "learning_rate": 1.3101132409410957e-05, - "loss": 0.2044, - "step": 8213 - }, - { - "epoch": 0.42, - "grad_norm": 1.329529746225666, - "learning_rate": 1.309956659490326e-05, - "loss": 0.2073, - "step": 8214 - }, - { - "epoch": 0.42, - "grad_norm": 0.864839755199251, - "learning_rate": 1.3098000696319642e-05, - "loss": 0.1674, - "step": 8215 - }, - { - "epoch": 0.42, - "grad_norm": 0.9127792797131793, - "learning_rate": 1.3096434713702579e-05, - "loss": 0.1944, - "step": 8216 - }, - { - "epoch": 0.42, - "grad_norm": 0.8344816953431473, - "learning_rate": 1.309486864709455e-05, - "loss": 0.1928, - "step": 8217 - }, - { - "epoch": 0.42, - "grad_norm": 0.7475117512260006, - "learning_rate": 1.3093302496538036e-05, - "loss": 0.1854, - "step": 8218 - }, - { - "epoch": 0.42, - "grad_norm": 0.8922062545828179, - "learning_rate": 1.3091736262075516e-05, - "loss": 0.182, - "step": 8219 - }, - { - "epoch": 0.42, - "grad_norm": 1.0794784191756126, - "learning_rate": 1.3090169943749475e-05, - "loss": 0.2091, - "step": 8220 - }, - { - "epoch": 0.42, - "grad_norm": 1.1386734232776148, - "learning_rate": 1.3088603541602401e-05, - "loss": 0.1989, - "step": 8221 - }, - { - "epoch": 0.42, - "grad_norm": 0.97328561492684, - "learning_rate": 1.3087037055676782e-05, - "loss": 0.2353, - "step": 8222 - }, - { - "epoch": 0.42, - "grad_norm": 0.7729143032057222, - "learning_rate": 1.3085470486015106e-05, - "loss": 0.2031, - "step": 8223 - }, - { - "epoch": 0.42, - "grad_norm": 0.9819343230584475, - "learning_rate": 1.308390383265987e-05, - "loss": 0.2244, - "step": 8224 - }, - { - "epoch": 0.42, - "grad_norm": 0.8593104249308028, - "learning_rate": 1.3082337095653569e-05, - "loss": 0.1637, - "step": 8225 - }, - { - "epoch": 0.42, - "grad_norm": 1.200272710074114, - "learning_rate": 1.30807702750387e-05, - "loss": 0.2117, - "step": 8226 - }, - { - "epoch": 0.42, - "grad_norm": 1.3002822205995266, - "learning_rate": 1.307920337085776e-05, - "loss": 0.2086, - "step": 8227 - }, - { - "epoch": 0.42, - "grad_norm": 0.870542691222419, - "learning_rate": 1.3077636383153258e-05, - "loss": 0.1817, - "step": 8228 - }, - { - "epoch": 0.42, - "grad_norm": 0.9153276421843458, - "learning_rate": 1.3076069311967696e-05, - "loss": 0.2096, - "step": 8229 - }, - { - "epoch": 0.42, - "grad_norm": 1.2293731778608028, - "learning_rate": 1.3074502157343575e-05, - "loss": 0.2198, - "step": 8230 - }, - { - "epoch": 0.42, - "grad_norm": 0.8688580667543977, - "learning_rate": 1.3072934919323414e-05, - "loss": 0.1632, - "step": 8231 - }, - { - "epoch": 0.42, - "grad_norm": 1.409556841159473, - "learning_rate": 1.307136759794972e-05, - "loss": 0.1991, - "step": 8232 - }, - { - "epoch": 0.42, - "grad_norm": 0.8755205631804428, - "learning_rate": 1.3069800193265005e-05, - "loss": 0.1977, - "step": 8233 - }, - { - "epoch": 0.42, - "grad_norm": 0.9340942948944939, - "learning_rate": 1.3068232705311784e-05, - "loss": 0.188, - "step": 8234 - }, - { - "epoch": 0.42, - "grad_norm": 0.9190292523868503, - "learning_rate": 1.3066665134132584e-05, - "loss": 0.2043, - "step": 8235 - }, - { - "epoch": 0.42, - "grad_norm": 1.1836600836195899, - "learning_rate": 1.3065097479769915e-05, - "loss": 0.2094, - "step": 8236 - }, - { - "epoch": 0.42, - "grad_norm": 0.8521202367831131, - "learning_rate": 1.3063529742266304e-05, - "loss": 0.2102, - "step": 8237 - }, - { - "epoch": 0.42, - "grad_norm": 1.0102823962086578, - "learning_rate": 1.3061961921664276e-05, - "loss": 0.1673, - "step": 8238 - }, - { - "epoch": 0.42, - "grad_norm": 0.8387168527811484, - "learning_rate": 1.3060394018006357e-05, - "loss": 0.1924, - "step": 8239 - }, - { - "epoch": 0.42, - "grad_norm": 1.0532826607045784, - "learning_rate": 1.305882603133508e-05, - "loss": 0.1784, - "step": 8240 - }, - { - "epoch": 0.42, - "grad_norm": 0.8118358299802393, - "learning_rate": 1.305725796169297e-05, - "loss": 0.1818, - "step": 8241 - }, - { - "epoch": 0.42, - "grad_norm": 0.9002539298249029, - "learning_rate": 1.3055689809122569e-05, - "loss": 0.2066, - "step": 8242 - }, - { - "epoch": 0.42, - "grad_norm": 0.9585852126504476, - "learning_rate": 1.3054121573666408e-05, - "loss": 0.168, - "step": 8243 - }, - { - "epoch": 0.42, - "grad_norm": 0.8460014460500977, - "learning_rate": 1.3052553255367024e-05, - "loss": 0.1781, - "step": 8244 - }, - { - "epoch": 0.42, - "grad_norm": 1.0891528902857406, - "learning_rate": 1.3050984854266963e-05, - "loss": 0.197, - "step": 8245 - }, - { - "epoch": 0.42, - "grad_norm": 0.7424589594293574, - "learning_rate": 1.3049416370408768e-05, - "loss": 0.1609, - "step": 8246 - }, - { - "epoch": 0.42, - "grad_norm": 1.135546053361693, - "learning_rate": 1.3047847803834976e-05, - "loss": 0.2029, - "step": 8247 - }, - { - "epoch": 0.42, - "grad_norm": 1.0569142092023314, - "learning_rate": 1.3046279154588146e-05, - "loss": 0.2046, - "step": 8248 - }, - { - "epoch": 0.42, - "grad_norm": 1.7796741457759377, - "learning_rate": 1.3044710422710818e-05, - "loss": 0.2229, - "step": 8249 - }, - { - "epoch": 0.42, - "grad_norm": 1.0588020023041, - "learning_rate": 1.3043141608245551e-05, - "loss": 0.2038, - "step": 8250 - }, - { - "epoch": 0.42, - "grad_norm": 1.1450205997702398, - "learning_rate": 1.3041572711234893e-05, - "loss": 0.1869, - "step": 8251 - }, - { - "epoch": 0.42, - "grad_norm": 1.2504897105794466, - "learning_rate": 1.3040003731721402e-05, - "loss": 0.2002, - "step": 8252 - }, - { - "epoch": 0.42, - "grad_norm": 0.7303802878941769, - "learning_rate": 1.3038434669747644e-05, - "loss": 0.1793, - "step": 8253 - }, - { - "epoch": 0.42, - "grad_norm": 1.1163159311554531, - "learning_rate": 1.3036865525356168e-05, - "loss": 0.1896, - "step": 8254 - }, - { - "epoch": 0.42, - "grad_norm": 1.3143525222797499, - "learning_rate": 1.3035296298589549e-05, - "loss": 0.2188, - "step": 8255 - }, - { - "epoch": 0.42, - "grad_norm": 1.1341176124382015, - "learning_rate": 1.3033726989490341e-05, - "loss": 0.2011, - "step": 8256 - }, - { - "epoch": 0.42, - "grad_norm": 1.1201148079058703, - "learning_rate": 1.303215759810112e-05, - "loss": 0.1983, - "step": 8257 - }, - { - "epoch": 0.42, - "grad_norm": 1.014726310011175, - "learning_rate": 1.3030588124464453e-05, - "loss": 0.1923, - "step": 8258 - }, - { - "epoch": 0.42, - "grad_norm": 2.5739238510028213, - "learning_rate": 1.302901856862291e-05, - "loss": 0.1954, - "step": 8259 - }, - { - "epoch": 0.42, - "grad_norm": 1.6029945998288895, - "learning_rate": 1.302744893061907e-05, - "loss": 0.2006, - "step": 8260 - }, - { - "epoch": 0.42, - "grad_norm": 1.152759348936254, - "learning_rate": 1.3025879210495505e-05, - "loss": 0.2025, - "step": 8261 - }, - { - "epoch": 0.42, - "grad_norm": 0.9133265209563681, - "learning_rate": 1.3024309408294795e-05, - "loss": 0.1915, - "step": 8262 - }, - { - "epoch": 0.42, - "grad_norm": 0.8821852254147305, - "learning_rate": 1.3022739524059521e-05, - "loss": 0.1704, - "step": 8263 - }, - { - "epoch": 0.42, - "grad_norm": 0.7904481085547684, - "learning_rate": 1.3021169557832269e-05, - "loss": 0.187, - "step": 8264 - }, - { - "epoch": 0.42, - "grad_norm": 0.9535276347236445, - "learning_rate": 1.301959950965562e-05, - "loss": 0.1792, - "step": 8265 - }, - { - "epoch": 0.42, - "grad_norm": 1.4203925883538724, - "learning_rate": 1.3018029379572163e-05, - "loss": 0.2237, - "step": 8266 - }, - { - "epoch": 0.42, - "grad_norm": 1.147777070235677, - "learning_rate": 1.3016459167624494e-05, - "loss": 0.2015, - "step": 8267 - }, - { - "epoch": 0.42, - "grad_norm": 0.8485290112978522, - "learning_rate": 1.3014888873855194e-05, - "loss": 0.2094, - "step": 8268 - }, - { - "epoch": 0.42, - "grad_norm": 0.8724810785819876, - "learning_rate": 1.3013318498306864e-05, - "loss": 0.1882, - "step": 8269 - }, - { - "epoch": 0.42, - "grad_norm": 1.0869723494866173, - "learning_rate": 1.3011748041022101e-05, - "loss": 0.2007, - "step": 8270 - }, - { - "epoch": 0.42, - "grad_norm": 0.8982814311359465, - "learning_rate": 1.3010177502043502e-05, - "loss": 0.2056, - "step": 8271 - }, - { - "epoch": 0.42, - "grad_norm": 0.9975730113802105, - "learning_rate": 1.3008606881413668e-05, - "loss": 0.2134, - "step": 8272 - }, - { - "epoch": 0.42, - "grad_norm": 1.1142646889120968, - "learning_rate": 1.3007036179175203e-05, - "loss": 0.2105, - "step": 8273 - }, - { - "epoch": 0.42, - "grad_norm": 0.8904410113100958, - "learning_rate": 1.300546539537071e-05, - "loss": 0.2045, - "step": 8274 - }, - { - "epoch": 0.42, - "grad_norm": 0.9203103644433339, - "learning_rate": 1.3003894530042803e-05, - "loss": 0.2076, - "step": 8275 - }, - { - "epoch": 0.42, - "grad_norm": 0.9444630885033088, - "learning_rate": 1.3002323583234082e-05, - "loss": 0.1697, - "step": 8276 - }, - { - "epoch": 0.42, - "grad_norm": 0.8244494027922004, - "learning_rate": 1.3000752554987166e-05, - "loss": 0.1908, - "step": 8277 - }, - { - "epoch": 0.42, - "grad_norm": 0.8985091790541165, - "learning_rate": 1.2999181445344666e-05, - "loss": 0.1874, - "step": 8278 - }, - { - "epoch": 0.42, - "grad_norm": 1.04902462783325, - "learning_rate": 1.2997610254349203e-05, - "loss": 0.1777, - "step": 8279 - }, - { - "epoch": 0.42, - "grad_norm": 0.8914789760341593, - "learning_rate": 1.299603898204339e-05, - "loss": 0.2008, - "step": 8280 - }, - { - "epoch": 0.42, - "grad_norm": 0.8652214926690659, - "learning_rate": 1.2994467628469853e-05, - "loss": 0.1843, - "step": 8281 - }, - { - "epoch": 0.42, - "grad_norm": 0.7753537627462095, - "learning_rate": 1.299289619367121e-05, - "loss": 0.1718, - "step": 8282 - }, - { - "epoch": 0.42, - "grad_norm": 0.9956931480728481, - "learning_rate": 1.299132467769009e-05, - "loss": 0.1941, - "step": 8283 - }, - { - "epoch": 0.42, - "grad_norm": 1.201701752879004, - "learning_rate": 1.2989753080569119e-05, - "loss": 0.2011, - "step": 8284 - }, - { - "epoch": 0.42, - "grad_norm": 0.9191160847591369, - "learning_rate": 1.2988181402350926e-05, - "loss": 0.1923, - "step": 8285 - }, - { - "epoch": 0.42, - "grad_norm": 0.9220969801360414, - "learning_rate": 1.2986609643078145e-05, - "loss": 0.2123, - "step": 8286 - }, - { - "epoch": 0.42, - "grad_norm": 0.9169015302630571, - "learning_rate": 1.2985037802793405e-05, - "loss": 0.2273, - "step": 8287 - }, - { - "epoch": 0.42, - "grad_norm": 0.8521753910449956, - "learning_rate": 1.298346588153935e-05, - "loss": 0.2115, - "step": 8288 - }, - { - "epoch": 0.42, - "grad_norm": 1.2120447235605898, - "learning_rate": 1.2981893879358616e-05, - "loss": 0.2047, - "step": 8289 - }, - { - "epoch": 0.42, - "grad_norm": 3.101266182978855, - "learning_rate": 1.2980321796293838e-05, - "loss": 0.2001, - "step": 8290 - }, - { - "epoch": 0.42, - "grad_norm": 1.174558001806414, - "learning_rate": 1.2978749632387665e-05, - "loss": 0.1848, - "step": 8291 - }, - { - "epoch": 0.42, - "grad_norm": 0.8953418782568198, - "learning_rate": 1.297717738768274e-05, - "loss": 0.2, - "step": 8292 - }, - { - "epoch": 0.42, - "grad_norm": 1.6475492947463053, - "learning_rate": 1.297560506222171e-05, - "loss": 0.1929, - "step": 8293 - }, - { - "epoch": 0.42, - "grad_norm": 1.157541240282237, - "learning_rate": 1.2974032656047223e-05, - "loss": 0.2041, - "step": 8294 - }, - { - "epoch": 0.42, - "grad_norm": 2.4156656473816827, - "learning_rate": 1.2972460169201933e-05, - "loss": 0.1935, - "step": 8295 - }, - { - "epoch": 0.42, - "grad_norm": 0.9467195639374285, - "learning_rate": 1.2970887601728495e-05, - "loss": 0.2219, - "step": 8296 - }, - { - "epoch": 0.42, - "grad_norm": 1.6312858345818504, - "learning_rate": 1.2969314953669563e-05, - "loss": 0.1676, - "step": 8297 - }, - { - "epoch": 0.42, - "grad_norm": 1.4278720908189264, - "learning_rate": 1.2967742225067792e-05, - "loss": 0.1909, - "step": 8298 - }, - { - "epoch": 0.42, - "grad_norm": 0.9080688833167727, - "learning_rate": 1.2966169415965847e-05, - "loss": 0.1995, - "step": 8299 - }, - { - "epoch": 0.42, - "grad_norm": 0.9863305922628434, - "learning_rate": 1.296459652640639e-05, - "loss": 0.1974, - "step": 8300 - }, - { - "epoch": 0.42, - "grad_norm": 0.9664763132904046, - "learning_rate": 1.2963023556432083e-05, - "loss": 0.1962, - "step": 8301 - }, - { - "epoch": 0.42, - "grad_norm": 0.9776353817339578, - "learning_rate": 1.2961450506085597e-05, - "loss": 0.1841, - "step": 8302 - }, - { - "epoch": 0.42, - "grad_norm": 0.8443028312632579, - "learning_rate": 1.2959877375409598e-05, - "loss": 0.1786, - "step": 8303 - }, - { - "epoch": 0.42, - "grad_norm": 1.9857253074754555, - "learning_rate": 1.2958304164446758e-05, - "loss": 0.2038, - "step": 8304 - }, - { - "epoch": 0.42, - "grad_norm": 0.8825347887344253, - "learning_rate": 1.2956730873239746e-05, - "loss": 0.203, - "step": 8305 - }, - { - "epoch": 0.42, - "grad_norm": 0.9936270761193413, - "learning_rate": 1.2955157501831248e-05, - "loss": 0.206, - "step": 8306 - }, - { - "epoch": 0.42, - "grad_norm": 0.9087815807999129, - "learning_rate": 1.2953584050263935e-05, - "loss": 0.2033, - "step": 8307 - }, - { - "epoch": 0.42, - "grad_norm": 0.9204698155344063, - "learning_rate": 1.2952010518580487e-05, - "loss": 0.1886, - "step": 8308 - }, - { - "epoch": 0.42, - "grad_norm": 1.6209462461027426, - "learning_rate": 1.2950436906823584e-05, - "loss": 0.2012, - "step": 8309 - }, - { - "epoch": 0.42, - "grad_norm": 0.9357386669354606, - "learning_rate": 1.2948863215035918e-05, - "loss": 0.195, - "step": 8310 - }, - { - "epoch": 0.42, - "grad_norm": 0.8333558655932062, - "learning_rate": 1.2947289443260172e-05, - "loss": 0.1844, - "step": 8311 - }, - { - "epoch": 0.42, - "grad_norm": 1.3236927019756342, - "learning_rate": 1.2945715591539028e-05, - "loss": 0.208, - "step": 8312 - }, - { - "epoch": 0.42, - "grad_norm": 1.0149795519327343, - "learning_rate": 1.2944141659915184e-05, - "loss": 0.1809, - "step": 8313 - }, - { - "epoch": 0.42, - "grad_norm": 0.7928022468669075, - "learning_rate": 1.2942567648431333e-05, - "loss": 0.1779, - "step": 8314 - }, - { - "epoch": 0.42, - "grad_norm": 1.2245378017402813, - "learning_rate": 1.2940993557130166e-05, - "loss": 0.1938, - "step": 8315 - }, - { - "epoch": 0.42, - "grad_norm": 0.8517131351401706, - "learning_rate": 1.2939419386054384e-05, - "loss": 0.2013, - "step": 8316 - }, - { - "epoch": 0.42, - "grad_norm": 0.9578387527059876, - "learning_rate": 1.2937845135246682e-05, - "loss": 0.1977, - "step": 8317 - }, - { - "epoch": 0.42, - "grad_norm": 1.0551568804695122, - "learning_rate": 1.2936270804749769e-05, - "loss": 0.2133, - "step": 8318 - }, - { - "epoch": 0.42, - "grad_norm": 0.849943397128867, - "learning_rate": 1.2934696394606344e-05, - "loss": 0.1761, - "step": 8319 - }, - { - "epoch": 0.42, - "grad_norm": 0.8992953513527367, - "learning_rate": 1.2933121904859111e-05, - "loss": 0.2131, - "step": 8320 - }, - { - "epoch": 0.42, - "grad_norm": 1.9666343163181608, - "learning_rate": 1.2931547335550782e-05, - "loss": 0.205, - "step": 8321 - }, - { - "epoch": 0.42, - "grad_norm": 0.8788238128220377, - "learning_rate": 1.2929972686724066e-05, - "loss": 0.1943, - "step": 8322 - }, - { - "epoch": 0.42, - "grad_norm": 0.9164644091486011, - "learning_rate": 1.2928397958421674e-05, - "loss": 0.1861, - "step": 8323 - }, - { - "epoch": 0.42, - "grad_norm": 1.0092684889010668, - "learning_rate": 1.2926823150686325e-05, - "loss": 0.1942, - "step": 8324 - }, - { - "epoch": 0.42, - "grad_norm": 1.7327146496169548, - "learning_rate": 1.2925248263560733e-05, - "loss": 0.1946, - "step": 8325 - }, - { - "epoch": 0.42, - "grad_norm": 0.9039676989792145, - "learning_rate": 1.2923673297087613e-05, - "loss": 0.1961, - "step": 8326 - }, - { - "epoch": 0.42, - "grad_norm": 1.06902187233108, - "learning_rate": 1.2922098251309694e-05, - "loss": 0.204, - "step": 8327 - }, - { - "epoch": 0.42, - "grad_norm": 0.9821026300743326, - "learning_rate": 1.2920523126269692e-05, - "loss": 0.1915, - "step": 8328 - }, - { - "epoch": 0.42, - "grad_norm": 0.905264461468117, - "learning_rate": 1.2918947922010336e-05, - "loss": 0.1825, - "step": 8329 - }, - { - "epoch": 0.42, - "grad_norm": 0.8558085429359792, - "learning_rate": 1.291737263857435e-05, - "loss": 0.1857, - "step": 8330 - }, - { - "epoch": 0.42, - "grad_norm": 0.8567074452471907, - "learning_rate": 1.2915797276004469e-05, - "loss": 0.1843, - "step": 8331 - }, - { - "epoch": 0.42, - "grad_norm": 0.8409836164037255, - "learning_rate": 1.2914221834343423e-05, - "loss": 0.2138, - "step": 8332 - }, - { - "epoch": 0.42, - "grad_norm": 1.0864523715789525, - "learning_rate": 1.2912646313633945e-05, - "loss": 0.1695, - "step": 8333 - }, - { - "epoch": 0.42, - "grad_norm": 0.9739854947766973, - "learning_rate": 1.2911070713918772e-05, - "loss": 0.2029, - "step": 8334 - }, - { - "epoch": 0.42, - "grad_norm": 1.0051465038200151, - "learning_rate": 1.2909495035240638e-05, - "loss": 0.2148, - "step": 8335 - }, - { - "epoch": 0.42, - "grad_norm": 1.2388383034363506, - "learning_rate": 1.2907919277642287e-05, - "loss": 0.1904, - "step": 8336 - }, - { - "epoch": 0.42, - "grad_norm": 0.7647215138884825, - "learning_rate": 1.2906343441166465e-05, - "loss": 0.1729, - "step": 8337 - }, - { - "epoch": 0.42, - "grad_norm": 0.7929162694707192, - "learning_rate": 1.290476752585591e-05, - "loss": 0.173, - "step": 8338 - }, - { - "epoch": 0.42, - "grad_norm": 0.8801109553531988, - "learning_rate": 1.2903191531753373e-05, - "loss": 0.1831, - "step": 8339 - }, - { - "epoch": 0.42, - "grad_norm": 0.8117842995770987, - "learning_rate": 1.2901615458901602e-05, - "loss": 0.1862, - "step": 8340 - }, - { - "epoch": 0.42, - "grad_norm": 0.8564129234299726, - "learning_rate": 1.2900039307343345e-05, - "loss": 0.1944, - "step": 8341 - }, - { - "epoch": 0.42, - "grad_norm": 0.8326250384944172, - "learning_rate": 1.2898463077121361e-05, - "loss": 0.1922, - "step": 8342 - }, - { - "epoch": 0.42, - "grad_norm": 0.8476141151237748, - "learning_rate": 1.2896886768278406e-05, - "loss": 0.2183, - "step": 8343 - }, - { - "epoch": 0.42, - "grad_norm": 0.9497331360658231, - "learning_rate": 1.2895310380857224e-05, - "loss": 0.1823, - "step": 8344 - }, - { - "epoch": 0.42, - "grad_norm": 0.896734197061469, - "learning_rate": 1.2893733914900595e-05, - "loss": 0.1855, - "step": 8345 - }, - { - "epoch": 0.42, - "grad_norm": 0.9376838004640948, - "learning_rate": 1.2892157370451263e-05, - "loss": 0.1936, - "step": 8346 - }, - { - "epoch": 0.42, - "grad_norm": 1.2314498483898435, - "learning_rate": 1.2890580747552002e-05, - "loss": 0.2073, - "step": 8347 - }, - { - "epoch": 0.42, - "grad_norm": 0.9690556766269256, - "learning_rate": 1.2889004046245574e-05, - "loss": 0.1872, - "step": 8348 - }, - { - "epoch": 0.42, - "grad_norm": 0.8238006996316732, - "learning_rate": 1.2887427266574748e-05, - "loss": 0.2207, - "step": 8349 - }, - { - "epoch": 0.42, - "grad_norm": 0.8068928897313348, - "learning_rate": 1.2885850408582295e-05, - "loss": 0.1946, - "step": 8350 - }, - { - "epoch": 0.42, - "grad_norm": 0.859272390151615, - "learning_rate": 1.2884273472310986e-05, - "loss": 0.2203, - "step": 8351 - }, - { - "epoch": 0.42, - "grad_norm": 0.946612446604715, - "learning_rate": 1.2882696457803597e-05, - "loss": 0.159, - "step": 8352 - }, - { - "epoch": 0.42, - "grad_norm": 0.9089696914287012, - "learning_rate": 1.2881119365102901e-05, - "loss": 0.2278, - "step": 8353 - }, - { - "epoch": 0.42, - "grad_norm": 1.2959784235147993, - "learning_rate": 1.2879542194251681e-05, - "loss": 0.1795, - "step": 8354 - }, - { - "epoch": 0.42, - "grad_norm": 0.8042643958346923, - "learning_rate": 1.2877964945292717e-05, - "loss": 0.1843, - "step": 8355 - }, - { - "epoch": 0.42, - "grad_norm": 1.3629437841374252, - "learning_rate": 1.2876387618268793e-05, - "loss": 0.2049, - "step": 8356 - }, - { - "epoch": 0.42, - "grad_norm": 2.958231619240513, - "learning_rate": 1.2874810213222689e-05, - "loss": 0.1776, - "step": 8357 - }, - { - "epoch": 0.43, - "grad_norm": 1.5744423044538112, - "learning_rate": 1.2873232730197197e-05, - "loss": 0.2109, - "step": 8358 - }, - { - "epoch": 0.43, - "grad_norm": 0.936861568438983, - "learning_rate": 1.2871655169235104e-05, - "loss": 0.2103, - "step": 8359 - }, - { - "epoch": 0.43, - "grad_norm": 0.8437459959051726, - "learning_rate": 1.2870077530379205e-05, - "loss": 0.1947, - "step": 8360 - }, - { - "epoch": 0.43, - "grad_norm": 1.081735761931492, - "learning_rate": 1.286849981367229e-05, - "loss": 0.1811, - "step": 8361 - }, - { - "epoch": 0.43, - "grad_norm": 1.1563276410499428, - "learning_rate": 1.2866922019157155e-05, - "loss": 0.1966, - "step": 8362 - }, - { - "epoch": 0.43, - "grad_norm": 2.80787778076596, - "learning_rate": 1.28653441468766e-05, - "loss": 0.1989, - "step": 8363 - }, - { - "epoch": 0.43, - "grad_norm": 1.0611653379046442, - "learning_rate": 1.2863766196873419e-05, - "loss": 0.213, - "step": 8364 - }, - { - "epoch": 0.43, - "grad_norm": 0.90607110241615, - "learning_rate": 1.2862188169190419e-05, - "loss": 0.2084, - "step": 8365 - }, - { - "epoch": 0.43, - "grad_norm": 0.8946529178848721, - "learning_rate": 1.2860610063870405e-05, - "loss": 0.1944, - "step": 8366 - }, - { - "epoch": 0.43, - "grad_norm": 0.8532649529386813, - "learning_rate": 1.2859031880956181e-05, - "loss": 0.1794, - "step": 8367 - }, - { - "epoch": 0.43, - "grad_norm": 1.392909454230544, - "learning_rate": 1.2857453620490557e-05, - "loss": 0.1786, - "step": 8368 - }, - { - "epoch": 0.43, - "grad_norm": 0.8397705370290642, - "learning_rate": 1.2855875282516342e-05, - "loss": 0.2058, - "step": 8369 - }, - { - "epoch": 0.43, - "grad_norm": 1.0693770104817486, - "learning_rate": 1.2854296867076346e-05, - "loss": 0.1955, - "step": 8370 - }, - { - "epoch": 0.43, - "grad_norm": 0.7623931235591676, - "learning_rate": 1.2852718374213389e-05, - "loss": 0.17, - "step": 8371 - }, - { - "epoch": 0.43, - "grad_norm": 0.9831192248267664, - "learning_rate": 1.2851139803970285e-05, - "loss": 0.2333, - "step": 8372 - }, - { - "epoch": 0.43, - "grad_norm": 0.8845103966984852, - "learning_rate": 1.2849561156389851e-05, - "loss": 0.1856, - "step": 8373 - }, - { - "epoch": 0.43, - "grad_norm": 1.0431153361684566, - "learning_rate": 1.2847982431514911e-05, - "loss": 0.1819, - "step": 8374 - }, - { - "epoch": 0.43, - "grad_norm": 0.9885410602615909, - "learning_rate": 1.2846403629388285e-05, - "loss": 0.1831, - "step": 8375 - }, - { - "epoch": 0.43, - "grad_norm": 0.8649856620753934, - "learning_rate": 1.28448247500528e-05, - "loss": 0.1959, - "step": 8376 - }, - { - "epoch": 0.43, - "grad_norm": 1.3401866519005952, - "learning_rate": 1.2843245793551284e-05, - "loss": 0.2121, - "step": 8377 - }, - { - "epoch": 0.43, - "grad_norm": 0.9549802734325415, - "learning_rate": 1.2841666759926566e-05, - "loss": 0.1829, - "step": 8378 - }, - { - "epoch": 0.43, - "grad_norm": 0.8970244098474727, - "learning_rate": 1.2840087649221476e-05, - "loss": 0.1897, - "step": 8379 - }, - { - "epoch": 0.43, - "grad_norm": 0.9181250871587772, - "learning_rate": 1.283850846147885e-05, - "loss": 0.1814, - "step": 8380 - }, - { - "epoch": 0.43, - "grad_norm": 0.9677772504052429, - "learning_rate": 1.2836929196741518e-05, - "loss": 0.2052, - "step": 8381 - }, - { - "epoch": 0.43, - "grad_norm": 1.3704789262684547, - "learning_rate": 1.2835349855052324e-05, - "loss": 0.1896, - "step": 8382 - }, - { - "epoch": 0.43, - "grad_norm": 1.0717251508841295, - "learning_rate": 1.2833770436454103e-05, - "loss": 0.2023, - "step": 8383 - }, - { - "epoch": 0.43, - "grad_norm": 0.9885072449972804, - "learning_rate": 1.2832190940989699e-05, - "loss": 0.1995, - "step": 8384 - }, - { - "epoch": 0.43, - "grad_norm": 0.896312751966259, - "learning_rate": 1.2830611368701957e-05, - "loss": 0.2039, - "step": 8385 - }, - { - "epoch": 0.43, - "grad_norm": 1.7110044731285126, - "learning_rate": 1.2829031719633722e-05, - "loss": 0.1865, - "step": 8386 - }, - { - "epoch": 0.43, - "grad_norm": 0.9079049703949122, - "learning_rate": 1.2827451993827841e-05, - "loss": 0.1777, - "step": 8387 - }, - { - "epoch": 0.43, - "grad_norm": 1.2645822150236385, - "learning_rate": 1.2825872191327164e-05, - "loss": 0.1932, - "step": 8388 - }, - { - "epoch": 0.43, - "grad_norm": 1.7242785606944482, - "learning_rate": 1.2824292312174547e-05, - "loss": 0.1961, - "step": 8389 - }, - { - "epoch": 0.43, - "grad_norm": 0.8742480736548195, - "learning_rate": 1.282271235641284e-05, - "loss": 0.1985, - "step": 8390 - }, - { - "epoch": 0.43, - "grad_norm": 1.0083321320072796, - "learning_rate": 1.28211323240849e-05, - "loss": 0.1837, - "step": 8391 - }, - { - "epoch": 0.43, - "grad_norm": 0.7590299682094305, - "learning_rate": 1.2819552215233585e-05, - "loss": 0.1861, - "step": 8392 - }, - { - "epoch": 0.43, - "grad_norm": 0.7632714309789075, - "learning_rate": 1.2817972029901759e-05, - "loss": 0.1687, - "step": 8393 - }, - { - "epoch": 0.43, - "grad_norm": 0.8250873902419275, - "learning_rate": 1.2816391768132284e-05, - "loss": 0.2067, - "step": 8394 - }, - { - "epoch": 0.43, - "grad_norm": 0.9130439612947502, - "learning_rate": 1.2814811429968022e-05, - "loss": 0.1961, - "step": 8395 - }, - { - "epoch": 0.43, - "grad_norm": 1.1247865595713258, - "learning_rate": 1.2813231015451842e-05, - "loss": 0.1934, - "step": 8396 - }, - { - "epoch": 0.43, - "grad_norm": 2.418166635996874, - "learning_rate": 1.2811650524626608e-05, - "loss": 0.2263, - "step": 8397 - }, - { - "epoch": 0.43, - "grad_norm": 1.4470070889450257, - "learning_rate": 1.2810069957535198e-05, - "loss": 0.1728, - "step": 8398 - }, - { - "epoch": 0.43, - "grad_norm": 1.002144849625383, - "learning_rate": 1.2808489314220483e-05, - "loss": 0.1923, - "step": 8399 - }, - { - "epoch": 0.43, - "grad_norm": 1.3487231469057854, - "learning_rate": 1.2806908594725335e-05, - "loss": 0.1714, - "step": 8400 - }, - { - "epoch": 0.43, - "grad_norm": 1.0348615877972056, - "learning_rate": 1.280532779909263e-05, - "loss": 0.19, - "step": 8401 - }, - { - "epoch": 0.43, - "grad_norm": 0.818483891086596, - "learning_rate": 1.2803746927365252e-05, - "loss": 0.1832, - "step": 8402 - }, - { - "epoch": 0.43, - "grad_norm": 0.9141276415044727, - "learning_rate": 1.2802165979586084e-05, - "loss": 0.2016, - "step": 8403 - }, - { - "epoch": 0.43, - "grad_norm": 1.0645918152401999, - "learning_rate": 1.2800584955798e-05, - "loss": 0.1963, - "step": 8404 - }, - { - "epoch": 0.43, - "grad_norm": 0.9973262286041973, - "learning_rate": 1.2799003856043893e-05, - "loss": 0.207, - "step": 8405 - }, - { - "epoch": 0.43, - "grad_norm": 0.9579121869369324, - "learning_rate": 1.2797422680366649e-05, - "loss": 0.1928, - "step": 8406 - }, - { - "epoch": 0.43, - "grad_norm": 1.0122691887181539, - "learning_rate": 1.2795841428809155e-05, - "loss": 0.1898, - "step": 8407 - }, - { - "epoch": 0.43, - "grad_norm": 0.8437227917655225, - "learning_rate": 1.2794260101414307e-05, - "loss": 0.2134, - "step": 8408 - }, - { - "epoch": 0.43, - "grad_norm": 1.298694300803485, - "learning_rate": 1.2792678698224995e-05, - "loss": 0.1703, - "step": 8409 - }, - { - "epoch": 0.43, - "grad_norm": 0.8327681044213492, - "learning_rate": 1.2791097219284115e-05, - "loss": 0.2284, - "step": 8410 - }, - { - "epoch": 0.43, - "grad_norm": 1.6873455193552223, - "learning_rate": 1.2789515664634564e-05, - "loss": 0.2054, - "step": 8411 - }, - { - "epoch": 0.43, - "grad_norm": 1.0429920359977503, - "learning_rate": 1.2787934034319245e-05, - "loss": 0.1796, - "step": 8412 - }, - { - "epoch": 0.43, - "grad_norm": 0.831414425233925, - "learning_rate": 1.2786352328381057e-05, - "loss": 0.1732, - "step": 8413 - }, - { - "epoch": 0.43, - "grad_norm": 1.0479045262632114, - "learning_rate": 1.2784770546862905e-05, - "loss": 0.2097, - "step": 8414 - }, - { - "epoch": 0.43, - "grad_norm": 0.9688634782356663, - "learning_rate": 1.2783188689807697e-05, - "loss": 0.1877, - "step": 8415 - }, - { - "epoch": 0.43, - "grad_norm": 0.9186954634465992, - "learning_rate": 1.2781606757258335e-05, - "loss": 0.2137, - "step": 8416 - }, - { - "epoch": 0.43, - "grad_norm": 0.8804643222421765, - "learning_rate": 1.2780024749257736e-05, - "loss": 0.187, - "step": 8417 - }, - { - "epoch": 0.43, - "grad_norm": 1.0031110219542805, - "learning_rate": 1.2778442665848805e-05, - "loss": 0.2042, - "step": 8418 - }, - { - "epoch": 0.43, - "grad_norm": 0.7537420190969669, - "learning_rate": 1.277686050707446e-05, - "loss": 0.1772, - "step": 8419 - }, - { - "epoch": 0.43, - "grad_norm": 1.0582699108537157, - "learning_rate": 1.277527827297762e-05, - "loss": 0.1974, - "step": 8420 - }, - { - "epoch": 0.43, - "grad_norm": 1.1413213859696738, - "learning_rate": 1.2773695963601199e-05, - "loss": 0.1822, - "step": 8421 - }, - { - "epoch": 0.43, - "grad_norm": 0.8847067204436639, - "learning_rate": 1.2772113578988117e-05, - "loss": 0.1918, - "step": 8422 - }, - { - "epoch": 0.43, - "grad_norm": 1.3751260454860468, - "learning_rate": 1.2770531119181295e-05, - "loss": 0.1992, - "step": 8423 - }, - { - "epoch": 0.43, - "grad_norm": 1.181669385070735, - "learning_rate": 1.2768948584223666e-05, - "loss": 0.1808, - "step": 8424 - }, - { - "epoch": 0.43, - "grad_norm": 0.9003848410651017, - "learning_rate": 1.2767365974158146e-05, - "loss": 0.1724, - "step": 8425 - }, - { - "epoch": 0.43, - "grad_norm": 1.1547567937475915, - "learning_rate": 1.2765783289027671e-05, - "loss": 0.2013, - "step": 8426 - }, - { - "epoch": 0.43, - "grad_norm": 0.9531012582250008, - "learning_rate": 1.2764200528875164e-05, - "loss": 0.2085, - "step": 8427 - }, - { - "epoch": 0.43, - "grad_norm": 1.4375307799373926, - "learning_rate": 1.2762617693743562e-05, - "loss": 0.1851, - "step": 8428 - }, - { - "epoch": 0.43, - "grad_norm": 0.9412421733587898, - "learning_rate": 1.2761034783675803e-05, - "loss": 0.1906, - "step": 8429 - }, - { - "epoch": 0.43, - "grad_norm": 0.9939292690651369, - "learning_rate": 1.2759451798714816e-05, - "loss": 0.1956, - "step": 8430 - }, - { - "epoch": 0.43, - "grad_norm": 0.7829017756475471, - "learning_rate": 1.2757868738903545e-05, - "loss": 0.1985, - "step": 8431 - }, - { - "epoch": 0.43, - "grad_norm": 1.1982992843299392, - "learning_rate": 1.2756285604284928e-05, - "loss": 0.1817, - "step": 8432 - }, - { - "epoch": 0.43, - "grad_norm": 0.9110729910834504, - "learning_rate": 1.275470239490191e-05, - "loss": 0.2057, - "step": 8433 - }, - { - "epoch": 0.43, - "grad_norm": 1.2121332217693976, - "learning_rate": 1.2753119110797432e-05, - "loss": 0.2112, - "step": 8434 - }, - { - "epoch": 0.43, - "grad_norm": 1.2017977376703233, - "learning_rate": 1.2751535752014444e-05, - "loss": 0.1876, - "step": 8435 - }, - { - "epoch": 0.43, - "grad_norm": 1.162603626637288, - "learning_rate": 1.274995231859589e-05, - "loss": 0.161, - "step": 8436 - }, - { - "epoch": 0.43, - "grad_norm": 1.8380059596350338, - "learning_rate": 1.2748368810584725e-05, - "loss": 0.1758, - "step": 8437 - }, - { - "epoch": 0.43, - "grad_norm": 1.424109898645111, - "learning_rate": 1.2746785228023904e-05, - "loss": 0.1961, - "step": 8438 - }, - { - "epoch": 0.43, - "grad_norm": 1.3728472348261007, - "learning_rate": 1.2745201570956379e-05, - "loss": 0.1872, - "step": 8439 - }, - { - "epoch": 0.43, - "grad_norm": 2.2101805295026007, - "learning_rate": 1.27436178394251e-05, - "loss": 0.1694, - "step": 8440 - }, - { - "epoch": 0.43, - "grad_norm": 0.9012241586461739, - "learning_rate": 1.2742034033473037e-05, - "loss": 0.1645, - "step": 8441 - }, - { - "epoch": 0.43, - "grad_norm": 1.4716426150058821, - "learning_rate": 1.2740450153143144e-05, - "loss": 0.1965, - "step": 8442 - }, - { - "epoch": 0.43, - "grad_norm": 0.8971699492952784, - "learning_rate": 1.2738866198478388e-05, - "loss": 0.2057, - "step": 8443 - }, - { - "epoch": 0.43, - "grad_norm": 0.8749718882080885, - "learning_rate": 1.2737282169521732e-05, - "loss": 0.1899, - "step": 8444 - }, - { - "epoch": 0.43, - "grad_norm": 0.9340663748135312, - "learning_rate": 1.2735698066316138e-05, - "loss": 0.2032, - "step": 8445 - }, - { - "epoch": 0.43, - "grad_norm": 0.9371509604709034, - "learning_rate": 1.2734113888904584e-05, - "loss": 0.1909, - "step": 8446 - }, - { - "epoch": 0.43, - "grad_norm": 1.2213004935273502, - "learning_rate": 1.2732529637330036e-05, - "loss": 0.1849, - "step": 8447 - }, - { - "epoch": 0.43, - "grad_norm": 0.9992218155973259, - "learning_rate": 1.2730945311635465e-05, - "loss": 0.198, - "step": 8448 - }, - { - "epoch": 0.43, - "grad_norm": 0.8369839626639749, - "learning_rate": 1.272936091186385e-05, - "loss": 0.1964, - "step": 8449 - }, - { - "epoch": 0.43, - "grad_norm": 0.9779530137060174, - "learning_rate": 1.2727776438058166e-05, - "loss": 0.2013, - "step": 8450 - }, - { - "epoch": 0.43, - "grad_norm": 1.746936294451183, - "learning_rate": 1.2726191890261393e-05, - "loss": 0.19, - "step": 8451 - }, - { - "epoch": 0.43, - "grad_norm": 1.2654470125593666, - "learning_rate": 1.272460726851651e-05, - "loss": 0.1716, - "step": 8452 - }, - { - "epoch": 0.43, - "grad_norm": 1.6691442469544424, - "learning_rate": 1.2723022572866497e-05, - "loss": 0.1844, - "step": 8453 - }, - { - "epoch": 0.43, - "grad_norm": 1.4040660915184713, - "learning_rate": 1.2721437803354348e-05, - "loss": 0.2178, - "step": 8454 - }, - { - "epoch": 0.43, - "grad_norm": 3.168892417400603, - "learning_rate": 1.2719852960023043e-05, - "loss": 0.206, - "step": 8455 - }, - { - "epoch": 0.43, - "grad_norm": 1.2688061946410185, - "learning_rate": 1.2718268042915574e-05, - "loss": 0.1774, - "step": 8456 - }, - { - "epoch": 0.43, - "grad_norm": 1.2904127430747385, - "learning_rate": 1.271668305207493e-05, - "loss": 0.206, - "step": 8457 - }, - { - "epoch": 0.43, - "grad_norm": 1.1834309738463864, - "learning_rate": 1.2715097987544104e-05, - "loss": 0.2018, - "step": 8458 - }, - { - "epoch": 0.43, - "grad_norm": 1.3807424297364006, - "learning_rate": 1.2713512849366092e-05, - "loss": 0.1826, - "step": 8459 - }, - { - "epoch": 0.43, - "grad_norm": 1.1125232250625734, - "learning_rate": 1.2711927637583892e-05, - "loss": 0.1961, - "step": 8460 - }, - { - "epoch": 0.43, - "grad_norm": 0.8912732082138243, - "learning_rate": 1.2710342352240498e-05, - "loss": 0.2065, - "step": 8461 - }, - { - "epoch": 0.43, - "grad_norm": 1.2823631484358686, - "learning_rate": 1.270875699337892e-05, - "loss": 0.2041, - "step": 8462 - }, - { - "epoch": 0.43, - "grad_norm": 0.9186526027198525, - "learning_rate": 1.270717156104215e-05, - "loss": 0.2414, - "step": 8463 - }, - { - "epoch": 0.43, - "grad_norm": 0.9547704881649449, - "learning_rate": 1.2705586055273202e-05, - "loss": 0.2094, - "step": 8464 - }, - { - "epoch": 0.43, - "grad_norm": 0.9810576545703948, - "learning_rate": 1.2704000476115079e-05, - "loss": 0.218, - "step": 8465 - }, - { - "epoch": 0.43, - "grad_norm": 0.8829659202427127, - "learning_rate": 1.2702414823610791e-05, - "loss": 0.1928, - "step": 8466 - }, - { - "epoch": 0.43, - "grad_norm": 1.0581841757246446, - "learning_rate": 1.2700829097803347e-05, - "loss": 0.1974, - "step": 8467 - }, - { - "epoch": 0.43, - "grad_norm": 0.9613440736632844, - "learning_rate": 1.2699243298735762e-05, - "loss": 0.189, - "step": 8468 - }, - { - "epoch": 0.43, - "grad_norm": 1.1715634892409108, - "learning_rate": 1.2697657426451051e-05, - "loss": 0.1986, - "step": 8469 - }, - { - "epoch": 0.43, - "grad_norm": 0.9137834082634491, - "learning_rate": 1.2696071480992229e-05, - "loss": 0.2278, - "step": 8470 - }, - { - "epoch": 0.43, - "grad_norm": 0.8783778890963508, - "learning_rate": 1.2694485462402315e-05, - "loss": 0.1965, - "step": 8471 - }, - { - "epoch": 0.43, - "grad_norm": 1.1505391364195132, - "learning_rate": 1.269289937072433e-05, - "loss": 0.2037, - "step": 8472 - }, - { - "epoch": 0.43, - "grad_norm": 1.4161874301172996, - "learning_rate": 1.2691313206001298e-05, - "loss": 0.2112, - "step": 8473 - }, - { - "epoch": 0.43, - "grad_norm": 1.647897930955205, - "learning_rate": 1.2689726968276246e-05, - "loss": 0.2061, - "step": 8474 - }, - { - "epoch": 0.43, - "grad_norm": 0.9562300082535361, - "learning_rate": 1.2688140657592195e-05, - "loss": 0.2063, - "step": 8475 - }, - { - "epoch": 0.43, - "grad_norm": 2.6988876400429254, - "learning_rate": 1.2686554273992177e-05, - "loss": 0.2024, - "step": 8476 - }, - { - "epoch": 0.43, - "grad_norm": 1.0352568876456878, - "learning_rate": 1.2684967817519222e-05, - "loss": 0.2233, - "step": 8477 - }, - { - "epoch": 0.43, - "grad_norm": 1.6628966240070668, - "learning_rate": 1.2683381288216368e-05, - "loss": 0.2163, - "step": 8478 - }, - { - "epoch": 0.43, - "grad_norm": 1.0610061654242153, - "learning_rate": 1.268179468612664e-05, - "loss": 0.2103, - "step": 8479 - }, - { - "epoch": 0.43, - "grad_norm": 1.0703551730464311, - "learning_rate": 1.268020801129308e-05, - "loss": 0.1937, - "step": 8480 - }, - { - "epoch": 0.43, - "grad_norm": 0.9026742355707935, - "learning_rate": 1.2678621263758728e-05, - "loss": 0.2067, - "step": 8481 - }, - { - "epoch": 0.43, - "grad_norm": 1.1872590164339674, - "learning_rate": 1.2677034443566623e-05, - "loss": 0.1944, - "step": 8482 - }, - { - "epoch": 0.43, - "grad_norm": 1.3809906224953725, - "learning_rate": 1.2675447550759807e-05, - "loss": 0.1892, - "step": 8483 - }, - { - "epoch": 0.43, - "grad_norm": 1.1300445482997878, - "learning_rate": 1.2673860585381329e-05, - "loss": 0.1978, - "step": 8484 - }, - { - "epoch": 0.43, - "grad_norm": 1.0810033265516914, - "learning_rate": 1.2672273547474225e-05, - "loss": 0.17, - "step": 8485 - }, - { - "epoch": 0.43, - "grad_norm": 1.190920506332146, - "learning_rate": 1.2670686437081554e-05, - "loss": 0.1965, - "step": 8486 - }, - { - "epoch": 0.43, - "grad_norm": 1.093795744759877, - "learning_rate": 1.2669099254246363e-05, - "loss": 0.2049, - "step": 8487 - }, - { - "epoch": 0.43, - "grad_norm": 1.4062279100264012, - "learning_rate": 1.2667511999011699e-05, - "loss": 0.1928, - "step": 8488 - }, - { - "epoch": 0.43, - "grad_norm": 1.3611497401758716, - "learning_rate": 1.2665924671420626e-05, - "loss": 0.1897, - "step": 8489 - }, - { - "epoch": 0.43, - "grad_norm": 0.8179653065978323, - "learning_rate": 1.2664337271516194e-05, - "loss": 0.1996, - "step": 8490 - }, - { - "epoch": 0.43, - "grad_norm": 2.221152195388021, - "learning_rate": 1.2662749799341464e-05, - "loss": 0.1651, - "step": 8491 - }, - { - "epoch": 0.43, - "grad_norm": 1.174205904888376, - "learning_rate": 1.2661162254939496e-05, - "loss": 0.2103, - "step": 8492 - }, - { - "epoch": 0.43, - "grad_norm": 1.3784424859747435, - "learning_rate": 1.2659574638353349e-05, - "loss": 0.1805, - "step": 8493 - }, - { - "epoch": 0.43, - "grad_norm": 1.1577475217917517, - "learning_rate": 1.2657986949626091e-05, - "loss": 0.1849, - "step": 8494 - }, - { - "epoch": 0.43, - "grad_norm": 0.8962746567132944, - "learning_rate": 1.2656399188800788e-05, - "loss": 0.1944, - "step": 8495 - }, - { - "epoch": 0.43, - "grad_norm": 0.9910380022034406, - "learning_rate": 1.2654811355920505e-05, - "loss": 0.1889, - "step": 8496 - }, - { - "epoch": 0.43, - "grad_norm": 1.0426069901807775, - "learning_rate": 1.2653223451028316e-05, - "loss": 0.1993, - "step": 8497 - }, - { - "epoch": 0.43, - "grad_norm": 2.1887980320486227, - "learning_rate": 1.2651635474167287e-05, - "loss": 0.177, - "step": 8498 - }, - { - "epoch": 0.43, - "grad_norm": 3.0126773212647495, - "learning_rate": 1.2650047425380501e-05, - "loss": 0.2027, - "step": 8499 - }, - { - "epoch": 0.43, - "grad_norm": 5.165218024798249, - "learning_rate": 1.2648459304711026e-05, - "loss": 0.1823, - "step": 8500 - }, - { - "epoch": 0.43, - "grad_norm": 0.9797609981321793, - "learning_rate": 1.2646871112201943e-05, - "loss": 0.1933, - "step": 8501 - }, - { - "epoch": 0.43, - "grad_norm": 1.344103428058884, - "learning_rate": 1.2645282847896335e-05, - "loss": 0.1963, - "step": 8502 - }, - { - "epoch": 0.43, - "grad_norm": 1.2433867429641254, - "learning_rate": 1.2643694511837278e-05, - "loss": 0.1712, - "step": 8503 - }, - { - "epoch": 0.43, - "grad_norm": 1.3329144189269893, - "learning_rate": 1.2642106104067857e-05, - "loss": 0.1836, - "step": 8504 - }, - { - "epoch": 0.43, - "grad_norm": 1.7401092387768005, - "learning_rate": 1.264051762463116e-05, - "loss": 0.2108, - "step": 8505 - }, - { - "epoch": 0.43, - "grad_norm": 0.7026921755147951, - "learning_rate": 1.2638929073570273e-05, - "loss": 0.1618, - "step": 8506 - }, - { - "epoch": 0.43, - "grad_norm": 1.188762430576397, - "learning_rate": 1.2637340450928284e-05, - "loss": 0.2004, - "step": 8507 - }, - { - "epoch": 0.43, - "grad_norm": 1.169245700509231, - "learning_rate": 1.263575175674829e-05, - "loss": 0.1656, - "step": 8508 - }, - { - "epoch": 0.43, - "grad_norm": 0.9169441692984407, - "learning_rate": 1.2634162991073376e-05, - "loss": 0.1608, - "step": 8509 - }, - { - "epoch": 0.43, - "grad_norm": 4.239622696551675, - "learning_rate": 1.2632574153946646e-05, - "loss": 0.1836, - "step": 8510 - }, - { - "epoch": 0.43, - "grad_norm": 1.0826608464510832, - "learning_rate": 1.263098524541119e-05, - "loss": 0.1777, - "step": 8511 - }, - { - "epoch": 0.43, - "grad_norm": 1.5205226845830233, - "learning_rate": 1.2629396265510113e-05, - "loss": 0.1969, - "step": 8512 - }, - { - "epoch": 0.43, - "grad_norm": 0.9138966349728019, - "learning_rate": 1.2627807214286514e-05, - "loss": 0.1927, - "step": 8513 - }, - { - "epoch": 0.43, - "grad_norm": 1.9706735417872003, - "learning_rate": 1.2626218091783496e-05, - "loss": 0.1932, - "step": 8514 - }, - { - "epoch": 0.43, - "grad_norm": 1.0667045902349253, - "learning_rate": 1.262462889804416e-05, - "loss": 0.1853, - "step": 8515 - }, - { - "epoch": 0.43, - "grad_norm": 0.9375839118068797, - "learning_rate": 1.2623039633111623e-05, - "loss": 0.1862, - "step": 8516 - }, - { - "epoch": 0.43, - "grad_norm": 0.9210059162958971, - "learning_rate": 1.2621450297028984e-05, - "loss": 0.1892, - "step": 8517 - }, - { - "epoch": 0.43, - "grad_norm": 1.321499182230024, - "learning_rate": 1.261986088983936e-05, - "loss": 0.1814, - "step": 8518 - }, - { - "epoch": 0.43, - "grad_norm": 1.2698154007000069, - "learning_rate": 1.2618271411585859e-05, - "loss": 0.1871, - "step": 8519 - }, - { - "epoch": 0.43, - "grad_norm": 0.8777129749240363, - "learning_rate": 1.26166818623116e-05, - "loss": 0.2097, - "step": 8520 - }, - { - "epoch": 0.43, - "grad_norm": 1.712414234101878, - "learning_rate": 1.2615092242059697e-05, - "loss": 0.1877, - "step": 8521 - }, - { - "epoch": 0.43, - "grad_norm": 1.0140691144950047, - "learning_rate": 1.2613502550873269e-05, - "loss": 0.1742, - "step": 8522 - }, - { - "epoch": 0.43, - "grad_norm": 1.5850344252998092, - "learning_rate": 1.2611912788795437e-05, - "loss": 0.2046, - "step": 8523 - }, - { - "epoch": 0.43, - "grad_norm": 1.329465016086951, - "learning_rate": 1.261032295586932e-05, - "loss": 0.1894, - "step": 8524 - }, - { - "epoch": 0.43, - "grad_norm": 0.8296339341431521, - "learning_rate": 1.260873305213805e-05, - "loss": 0.202, - "step": 8525 - }, - { - "epoch": 0.43, - "grad_norm": 0.9733137634418483, - "learning_rate": 1.2607143077644746e-05, - "loss": 0.194, - "step": 8526 - }, - { - "epoch": 0.43, - "grad_norm": 1.160550697515113, - "learning_rate": 1.260555303243254e-05, - "loss": 0.2085, - "step": 8527 - }, - { - "epoch": 0.43, - "grad_norm": 0.8414652418885628, - "learning_rate": 1.2603962916544558e-05, - "loss": 0.1943, - "step": 8528 - }, - { - "epoch": 0.43, - "grad_norm": 1.0728029220031154, - "learning_rate": 1.2602372730023938e-05, - "loss": 0.1731, - "step": 8529 - }, - { - "epoch": 0.43, - "grad_norm": 0.8849396662531649, - "learning_rate": 1.2600782472913811e-05, - "loss": 0.1975, - "step": 8530 - }, - { - "epoch": 0.43, - "grad_norm": 0.7267393674010236, - "learning_rate": 1.259919214525731e-05, - "loss": 0.1781, - "step": 8531 - }, - { - "epoch": 0.43, - "grad_norm": 0.8599653618866973, - "learning_rate": 1.2597601747097578e-05, - "loss": 0.212, - "step": 8532 - }, - { - "epoch": 0.43, - "grad_norm": 1.0116049169051977, - "learning_rate": 1.259601127847775e-05, - "loss": 0.1909, - "step": 8533 - }, - { - "epoch": 0.43, - "grad_norm": 1.0711190542073004, - "learning_rate": 1.259442073944097e-05, - "loss": 0.1807, - "step": 8534 - }, - { - "epoch": 0.43, - "grad_norm": 1.0228121141488749, - "learning_rate": 1.259283013003038e-05, - "loss": 0.2351, - "step": 8535 - }, - { - "epoch": 0.43, - "grad_norm": 0.9452815167442381, - "learning_rate": 1.2591239450289127e-05, - "loss": 0.2026, - "step": 8536 - }, - { - "epoch": 0.43, - "grad_norm": 0.9610403379351731, - "learning_rate": 1.2589648700260359e-05, - "loss": 0.221, - "step": 8537 - }, - { - "epoch": 0.43, - "grad_norm": 1.6244814059214097, - "learning_rate": 1.2588057879987223e-05, - "loss": 0.1703, - "step": 8538 - }, - { - "epoch": 0.43, - "grad_norm": 0.9149145552404859, - "learning_rate": 1.2586466989512872e-05, - "loss": 0.2023, - "step": 8539 - }, - { - "epoch": 0.43, - "grad_norm": 0.9812119245902255, - "learning_rate": 1.2584876028880455e-05, - "loss": 0.1755, - "step": 8540 - }, - { - "epoch": 0.43, - "grad_norm": 1.0256757321539571, - "learning_rate": 1.258328499813313e-05, - "loss": 0.1977, - "step": 8541 - }, - { - "epoch": 0.43, - "grad_norm": 1.0537374073758372, - "learning_rate": 1.2581693897314056e-05, - "loss": 0.1901, - "step": 8542 - }, - { - "epoch": 0.43, - "grad_norm": 0.9823156564187766, - "learning_rate": 1.2580102726466388e-05, - "loss": 0.2047, - "step": 8543 - }, - { - "epoch": 0.43, - "grad_norm": 1.1843880734627192, - "learning_rate": 1.2578511485633288e-05, - "loss": 0.1823, - "step": 8544 - }, - { - "epoch": 0.43, - "grad_norm": 2.93633345218317, - "learning_rate": 1.2576920174857917e-05, - "loss": 0.2008, - "step": 8545 - }, - { - "epoch": 0.43, - "grad_norm": 1.040963176406591, - "learning_rate": 1.2575328794183439e-05, - "loss": 0.1999, - "step": 8546 - }, - { - "epoch": 0.43, - "grad_norm": 1.0780583875307697, - "learning_rate": 1.2573737343653026e-05, - "loss": 0.2144, - "step": 8547 - }, - { - "epoch": 0.43, - "grad_norm": 0.9516385201913629, - "learning_rate": 1.257214582330984e-05, - "loss": 0.1737, - "step": 8548 - }, - { - "epoch": 0.43, - "grad_norm": 0.9613205197820011, - "learning_rate": 1.2570554233197054e-05, - "loss": 0.1744, - "step": 8549 - }, - { - "epoch": 0.43, - "grad_norm": 1.019145823439213, - "learning_rate": 1.2568962573357837e-05, - "loss": 0.2127, - "step": 8550 - }, - { - "epoch": 0.43, - "grad_norm": 0.873002514272877, - "learning_rate": 1.256737084383537e-05, - "loss": 0.2042, - "step": 8551 - }, - { - "epoch": 0.43, - "grad_norm": 1.050032016240503, - "learning_rate": 1.2565779044672821e-05, - "loss": 0.1894, - "step": 8552 - }, - { - "epoch": 0.43, - "grad_norm": 0.8068052044940806, - "learning_rate": 1.256418717591337e-05, - "loss": 0.1846, - "step": 8553 - }, - { - "epoch": 0.43, - "grad_norm": 1.3979721822608953, - "learning_rate": 1.25625952376002e-05, - "loss": 0.1842, - "step": 8554 - }, - { - "epoch": 0.44, - "grad_norm": 2.547408364045404, - "learning_rate": 1.2561003229776485e-05, - "loss": 0.182, - "step": 8555 - }, - { - "epoch": 0.44, - "grad_norm": 0.9049443458327565, - "learning_rate": 1.2559411152485414e-05, - "loss": 0.1992, - "step": 8556 - }, - { - "epoch": 0.44, - "grad_norm": 0.8407326410671497, - "learning_rate": 1.2557819005770174e-05, - "loss": 0.1639, - "step": 8557 - }, - { - "epoch": 0.44, - "grad_norm": 0.9925588954179055, - "learning_rate": 1.2556226789673946e-05, - "loss": 0.2002, - "step": 8558 - }, - { - "epoch": 0.44, - "grad_norm": 0.8053222521118775, - "learning_rate": 1.2554634504239923e-05, - "loss": 0.2065, - "step": 8559 - }, - { - "epoch": 0.44, - "grad_norm": 0.8122917580352957, - "learning_rate": 1.2553042149511295e-05, - "loss": 0.1815, - "step": 8560 - }, - { - "epoch": 0.44, - "grad_norm": 0.9270132598078689, - "learning_rate": 1.2551449725531254e-05, - "loss": 0.1895, - "step": 8561 - }, - { - "epoch": 0.44, - "grad_norm": 0.9402093869050032, - "learning_rate": 1.2549857232342995e-05, - "loss": 0.198, - "step": 8562 - }, - { - "epoch": 0.44, - "grad_norm": 0.9951990611606287, - "learning_rate": 1.2548264669989712e-05, - "loss": 0.2181, - "step": 8563 - }, - { - "epoch": 0.44, - "grad_norm": 1.195070825105065, - "learning_rate": 1.2546672038514608e-05, - "loss": 0.1918, - "step": 8564 - }, - { - "epoch": 0.44, - "grad_norm": 0.8772802211101891, - "learning_rate": 1.2545079337960883e-05, - "loss": 0.1969, - "step": 8565 - }, - { - "epoch": 0.44, - "grad_norm": 1.2160331913974665, - "learning_rate": 1.2543486568371736e-05, - "loss": 0.195, - "step": 8566 - }, - { - "epoch": 0.44, - "grad_norm": 2.994132559894767, - "learning_rate": 1.2541893729790374e-05, - "loss": 0.1901, - "step": 8567 - }, - { - "epoch": 0.44, - "grad_norm": 0.7510679233061346, - "learning_rate": 1.2540300822259996e-05, - "loss": 0.1633, - "step": 8568 - }, - { - "epoch": 0.44, - "grad_norm": 1.0775744359597887, - "learning_rate": 1.253870784582382e-05, - "loss": 0.1956, - "step": 8569 - }, - { - "epoch": 0.44, - "grad_norm": 1.0562615754525093, - "learning_rate": 1.2537114800525047e-05, - "loss": 0.182, - "step": 8570 - }, - { - "epoch": 0.44, - "grad_norm": 1.1632556247766093, - "learning_rate": 1.2535521686406892e-05, - "loss": 0.1942, - "step": 8571 - }, - { - "epoch": 0.44, - "grad_norm": 1.1858005068205464, - "learning_rate": 1.253392850351257e-05, - "loss": 0.2178, - "step": 8572 - }, - { - "epoch": 0.44, - "grad_norm": 0.9958119742587856, - "learning_rate": 1.2532335251885295e-05, - "loss": 0.1953, - "step": 8573 - }, - { - "epoch": 0.44, - "grad_norm": 1.1614657700601225, - "learning_rate": 1.253074193156828e-05, - "loss": 0.1933, - "step": 8574 - }, - { - "epoch": 0.44, - "grad_norm": 0.7794605022464707, - "learning_rate": 1.252914854260475e-05, - "loss": 0.1924, - "step": 8575 - }, - { - "epoch": 0.44, - "grad_norm": 8.52489821490543, - "learning_rate": 1.2527555085037919e-05, - "loss": 0.1658, - "step": 8576 - }, - { - "epoch": 0.44, - "grad_norm": 0.8420982181362046, - "learning_rate": 1.2525961558911018e-05, - "loss": 0.1941, - "step": 8577 - }, - { - "epoch": 0.44, - "grad_norm": 1.2260277438965748, - "learning_rate": 1.2524367964267264e-05, - "loss": 0.1826, - "step": 8578 - }, - { - "epoch": 0.44, - "grad_norm": 1.4095294544722203, - "learning_rate": 1.252277430114989e-05, - "loss": 0.2074, - "step": 8579 - }, - { - "epoch": 0.44, - "grad_norm": 1.062831301945892, - "learning_rate": 1.2521180569602117e-05, - "loss": 0.1782, - "step": 8580 - }, - { - "epoch": 0.44, - "grad_norm": 1.1477816960609704, - "learning_rate": 1.2519586769667178e-05, - "loss": 0.1647, - "step": 8581 - }, - { - "epoch": 0.44, - "grad_norm": 0.9151787324161524, - "learning_rate": 1.2517992901388308e-05, - "loss": 0.2288, - "step": 8582 - }, - { - "epoch": 0.44, - "grad_norm": 0.8072999718676849, - "learning_rate": 1.2516398964808735e-05, - "loss": 0.2063, - "step": 8583 - }, - { - "epoch": 0.44, - "grad_norm": 0.9536553411143446, - "learning_rate": 1.2514804959971703e-05, - "loss": 0.1889, - "step": 8584 - }, - { - "epoch": 0.44, - "grad_norm": 1.374498857012934, - "learning_rate": 1.251321088692044e-05, - "loss": 0.191, - "step": 8585 - }, - { - "epoch": 0.44, - "grad_norm": 0.7731476687954281, - "learning_rate": 1.2511616745698192e-05, - "loss": 0.1846, - "step": 8586 - }, - { - "epoch": 0.44, - "grad_norm": 1.0778842872188044, - "learning_rate": 1.2510022536348198e-05, - "loss": 0.1892, - "step": 8587 - }, - { - "epoch": 0.44, - "grad_norm": 0.9253449989932073, - "learning_rate": 1.2508428258913701e-05, - "loss": 0.187, - "step": 8588 - }, - { - "epoch": 0.44, - "grad_norm": 0.793090613678402, - "learning_rate": 1.2506833913437946e-05, - "loss": 0.1968, - "step": 8589 - }, - { - "epoch": 0.44, - "grad_norm": 1.019474888579202, - "learning_rate": 1.2505239499964179e-05, - "loss": 0.1851, - "step": 8590 - }, - { - "epoch": 0.44, - "grad_norm": 0.8630947482479875, - "learning_rate": 1.2503645018535649e-05, - "loss": 0.1793, - "step": 8591 - }, - { - "epoch": 0.44, - "grad_norm": 0.9521707788414885, - "learning_rate": 1.2502050469195609e-05, - "loss": 0.1933, - "step": 8592 - }, - { - "epoch": 0.44, - "grad_norm": 1.2404247773503827, - "learning_rate": 1.2500455851987306e-05, - "loss": 0.1703, - "step": 8593 - }, - { - "epoch": 0.44, - "grad_norm": 1.1472370304032953, - "learning_rate": 1.2498861166953995e-05, - "loss": 0.182, - "step": 8594 - }, - { - "epoch": 0.44, - "grad_norm": 0.9263589657793647, - "learning_rate": 1.2497266414138935e-05, - "loss": 0.2068, - "step": 8595 - }, - { - "epoch": 0.44, - "grad_norm": 1.0346555947683853, - "learning_rate": 1.2495671593585384e-05, - "loss": 0.182, - "step": 8596 - }, - { - "epoch": 0.44, - "grad_norm": 1.2960625843711235, - "learning_rate": 1.2494076705336599e-05, - "loss": 0.1918, - "step": 8597 - }, - { - "epoch": 0.44, - "grad_norm": 0.9689840253768952, - "learning_rate": 1.249248174943584e-05, - "loss": 0.2027, - "step": 8598 - }, - { - "epoch": 0.44, - "grad_norm": 0.8396890836603846, - "learning_rate": 1.2490886725926376e-05, - "loss": 0.1822, - "step": 8599 - }, - { - "epoch": 0.44, - "grad_norm": 0.7729532838421248, - "learning_rate": 1.248929163485147e-05, - "loss": 0.191, - "step": 8600 - }, - { - "epoch": 0.44, - "grad_norm": 1.332863405617956, - "learning_rate": 1.2487696476254385e-05, - "loss": 0.1975, - "step": 8601 - }, - { - "epoch": 0.44, - "grad_norm": 0.8057277936675971, - "learning_rate": 1.2486101250178394e-05, - "loss": 0.1848, - "step": 8602 - }, - { - "epoch": 0.44, - "grad_norm": 0.8889873160686912, - "learning_rate": 1.2484505956666765e-05, - "loss": 0.1798, - "step": 8603 - }, - { - "epoch": 0.44, - "grad_norm": 0.7659955529390862, - "learning_rate": 1.2482910595762774e-05, - "loss": 0.1898, - "step": 8604 - }, - { - "epoch": 0.44, - "grad_norm": 1.3749674199455812, - "learning_rate": 1.2481315167509691e-05, - "loss": 0.2143, - "step": 8605 - }, - { - "epoch": 0.44, - "grad_norm": 1.045463034779189, - "learning_rate": 1.2479719671950794e-05, - "loss": 0.1893, - "step": 8606 - }, - { - "epoch": 0.44, - "grad_norm": 0.957490242738464, - "learning_rate": 1.247812410912936e-05, - "loss": 0.1875, - "step": 8607 - }, - { - "epoch": 0.44, - "grad_norm": 0.8685192921852094, - "learning_rate": 1.2476528479088672e-05, - "loss": 0.1966, - "step": 8608 - }, - { - "epoch": 0.44, - "grad_norm": 0.7153859251377935, - "learning_rate": 1.247493278187201e-05, - "loss": 0.166, - "step": 8609 - }, - { - "epoch": 0.44, - "grad_norm": 0.9957059549307132, - "learning_rate": 1.2473337017522653e-05, - "loss": 0.1893, - "step": 8610 - }, - { - "epoch": 0.44, - "grad_norm": 0.8473339655405635, - "learning_rate": 1.247174118608389e-05, - "loss": 0.2085, - "step": 8611 - }, - { - "epoch": 0.44, - "grad_norm": 0.9887067152293455, - "learning_rate": 1.247014528759901e-05, - "loss": 0.202, - "step": 8612 - }, - { - "epoch": 0.44, - "grad_norm": 0.7880322623898394, - "learning_rate": 1.2468549322111299e-05, - "loss": 0.1973, - "step": 8613 - }, - { - "epoch": 0.44, - "grad_norm": 1.2680939216214728, - "learning_rate": 1.2466953289664047e-05, - "loss": 0.2221, - "step": 8614 - }, - { - "epoch": 0.44, - "grad_norm": 0.7955595735621311, - "learning_rate": 1.246535719030055e-05, - "loss": 0.2079, - "step": 8615 - }, - { - "epoch": 0.44, - "grad_norm": 0.9892586690670219, - "learning_rate": 1.2463761024064093e-05, - "loss": 0.1772, - "step": 8616 - }, - { - "epoch": 0.44, - "grad_norm": 0.8184148596800643, - "learning_rate": 1.2462164790997986e-05, - "loss": 0.1804, - "step": 8617 - }, - { - "epoch": 0.44, - "grad_norm": 1.0393142650583775, - "learning_rate": 1.246056849114552e-05, - "loss": 0.1957, - "step": 8618 - }, - { - "epoch": 0.44, - "grad_norm": 0.7627059001636234, - "learning_rate": 1.2458972124549993e-05, - "loss": 0.1964, - "step": 8619 - }, - { - "epoch": 0.44, - "grad_norm": 0.9186241307525763, - "learning_rate": 1.2457375691254707e-05, - "loss": 0.1814, - "step": 8620 - }, - { - "epoch": 0.44, - "grad_norm": 0.9818593160010094, - "learning_rate": 1.2455779191302968e-05, - "loss": 0.2123, - "step": 8621 - }, - { - "epoch": 0.44, - "grad_norm": 0.9572114580336248, - "learning_rate": 1.2454182624738079e-05, - "loss": 0.2078, - "step": 8622 - }, - { - "epoch": 0.44, - "grad_norm": 0.9285814703568301, - "learning_rate": 1.2452585991603347e-05, - "loss": 0.2018, - "step": 8623 - }, - { - "epoch": 0.44, - "grad_norm": 0.8095708824494355, - "learning_rate": 1.2450989291942084e-05, - "loss": 0.1798, - "step": 8624 - }, - { - "epoch": 0.44, - "grad_norm": 0.8823515473747471, - "learning_rate": 1.2449392525797597e-05, - "loss": 0.1979, - "step": 8625 - }, - { - "epoch": 0.44, - "grad_norm": 0.8613819549391174, - "learning_rate": 1.24477956932132e-05, - "loss": 0.1853, - "step": 8626 - }, - { - "epoch": 0.44, - "grad_norm": 0.8301435562156122, - "learning_rate": 1.2446198794232206e-05, - "loss": 0.2087, - "step": 8627 - }, - { - "epoch": 0.44, - "grad_norm": 0.9817789102752862, - "learning_rate": 1.2444601828897932e-05, - "loss": 0.1995, - "step": 8628 - }, - { - "epoch": 0.44, - "grad_norm": 0.8351663415207485, - "learning_rate": 1.2443004797253692e-05, - "loss": 0.1902, - "step": 8629 - }, - { - "epoch": 0.44, - "grad_norm": 1.132073956766278, - "learning_rate": 1.244140769934281e-05, - "loss": 0.2077, - "step": 8630 - }, - { - "epoch": 0.44, - "grad_norm": 1.7589643983073007, - "learning_rate": 1.243981053520861e-05, - "loss": 0.2104, - "step": 8631 - }, - { - "epoch": 0.44, - "grad_norm": 0.8034555252532624, - "learning_rate": 1.2438213304894408e-05, - "loss": 0.1927, - "step": 8632 - }, - { - "epoch": 0.44, - "grad_norm": 1.1157291738011732, - "learning_rate": 1.2436616008443534e-05, - "loss": 0.1892, - "step": 8633 - }, - { - "epoch": 0.44, - "grad_norm": 0.8209965863930293, - "learning_rate": 1.2435018645899312e-05, - "loss": 0.1695, - "step": 8634 - }, - { - "epoch": 0.44, - "grad_norm": 0.8722595948991724, - "learning_rate": 1.2433421217305072e-05, - "loss": 0.1867, - "step": 8635 - }, - { - "epoch": 0.44, - "grad_norm": 1.0124487539884681, - "learning_rate": 1.2431823722704147e-05, - "loss": 0.1798, - "step": 8636 - }, - { - "epoch": 0.44, - "grad_norm": 0.7754073159722372, - "learning_rate": 1.2430226162139863e-05, - "loss": 0.1993, - "step": 8637 - }, - { - "epoch": 0.44, - "grad_norm": 0.7968838934251788, - "learning_rate": 1.2428628535655557e-05, - "loss": 0.1781, - "step": 8638 - }, - { - "epoch": 0.44, - "grad_norm": 1.0162402030663464, - "learning_rate": 1.2427030843294562e-05, - "loss": 0.2094, - "step": 8639 - }, - { - "epoch": 0.44, - "grad_norm": 0.9488292199973177, - "learning_rate": 1.2425433085100224e-05, - "loss": 0.2202, - "step": 8640 - }, - { - "epoch": 0.44, - "grad_norm": 0.8235359537085621, - "learning_rate": 1.2423835261115875e-05, - "loss": 0.1932, - "step": 8641 - }, - { - "epoch": 0.44, - "grad_norm": 1.0966483166359258, - "learning_rate": 1.2422237371384857e-05, - "loss": 0.2221, - "step": 8642 - }, - { - "epoch": 0.44, - "grad_norm": 0.7067805471174222, - "learning_rate": 1.2420639415950512e-05, - "loss": 0.2121, - "step": 8643 - }, - { - "epoch": 0.44, - "grad_norm": 0.9066505172572226, - "learning_rate": 1.241904139485619e-05, - "loss": 0.1914, - "step": 8644 - }, - { - "epoch": 0.44, - "grad_norm": 0.8482878763826599, - "learning_rate": 1.2417443308145231e-05, - "loss": 0.2082, - "step": 8645 - }, - { - "epoch": 0.44, - "grad_norm": 0.902057632643992, - "learning_rate": 1.2415845155860985e-05, - "loss": 0.2329, - "step": 8646 - }, - { - "epoch": 0.44, - "grad_norm": 0.8031248362414674, - "learning_rate": 1.2414246938046803e-05, - "loss": 0.1731, - "step": 8647 - }, - { - "epoch": 0.44, - "grad_norm": 1.212999570557578, - "learning_rate": 1.2412648654746038e-05, - "loss": 0.1741, - "step": 8648 - }, - { - "epoch": 0.44, - "grad_norm": 1.054499635574648, - "learning_rate": 1.2411050306002042e-05, - "loss": 0.1829, - "step": 8649 - }, - { - "epoch": 0.44, - "grad_norm": 0.9135542379212512, - "learning_rate": 1.240945189185817e-05, - "loss": 0.1867, - "step": 8650 - }, - { - "epoch": 0.44, - "grad_norm": 1.405275321330997, - "learning_rate": 1.2407853412357775e-05, - "loss": 0.1889, - "step": 8651 - }, - { - "epoch": 0.44, - "grad_norm": 1.0048655495585312, - "learning_rate": 1.2406254867544225e-05, - "loss": 0.2119, - "step": 8652 - }, - { - "epoch": 0.44, - "grad_norm": 1.0534382352091345, - "learning_rate": 1.2404656257460875e-05, - "loss": 0.1997, - "step": 8653 - }, - { - "epoch": 0.44, - "grad_norm": 0.8011986524483066, - "learning_rate": 1.2403057582151088e-05, - "loss": 0.2032, - "step": 8654 - }, - { - "epoch": 0.44, - "grad_norm": 1.1453188395860927, - "learning_rate": 1.2401458841658227e-05, - "loss": 0.2022, - "step": 8655 - }, - { - "epoch": 0.44, - "grad_norm": 0.9429234081767248, - "learning_rate": 1.239986003602566e-05, - "loss": 0.1846, - "step": 8656 - }, - { - "epoch": 0.44, - "grad_norm": 1.2492399758575994, - "learning_rate": 1.2398261165296755e-05, - "loss": 0.186, - "step": 8657 - }, - { - "epoch": 0.44, - "grad_norm": 1.1247829996000132, - "learning_rate": 1.239666222951488e-05, - "loss": 0.1907, - "step": 8658 - }, - { - "epoch": 0.44, - "grad_norm": 0.9308794957701302, - "learning_rate": 1.2395063228723405e-05, - "loss": 0.1974, - "step": 8659 - }, - { - "epoch": 0.44, - "grad_norm": 0.8476358733321918, - "learning_rate": 1.2393464162965708e-05, - "loss": 0.2024, - "step": 8660 - }, - { - "epoch": 0.44, - "grad_norm": 0.8318120223195576, - "learning_rate": 1.239186503228516e-05, - "loss": 0.1754, - "step": 8661 - }, - { - "epoch": 0.44, - "grad_norm": 0.9189154775984179, - "learning_rate": 1.2390265836725136e-05, - "loss": 0.1938, - "step": 8662 - }, - { - "epoch": 0.44, - "grad_norm": 0.8411064285076155, - "learning_rate": 1.2388666576329016e-05, - "loss": 0.2287, - "step": 8663 - }, - { - "epoch": 0.44, - "grad_norm": 1.0606276167362687, - "learning_rate": 1.2387067251140178e-05, - "loss": 0.1818, - "step": 8664 - }, - { - "epoch": 0.44, - "grad_norm": 1.6362813671613476, - "learning_rate": 1.238546786120201e-05, - "loss": 0.2011, - "step": 8665 - }, - { - "epoch": 0.44, - "grad_norm": 1.162047843891554, - "learning_rate": 1.238386840655789e-05, - "loss": 0.1743, - "step": 8666 - }, - { - "epoch": 0.44, - "grad_norm": 0.8695016899684824, - "learning_rate": 1.2382268887251207e-05, - "loss": 0.1849, - "step": 8667 - }, - { - "epoch": 0.44, - "grad_norm": 0.9264986670392068, - "learning_rate": 1.2380669303325346e-05, - "loss": 0.1989, - "step": 8668 - }, - { - "epoch": 0.44, - "grad_norm": 1.1435490303453728, - "learning_rate": 1.237906965482369e-05, - "loss": 0.2015, - "step": 8669 - }, - { - "epoch": 0.44, - "grad_norm": 0.7978001269514786, - "learning_rate": 1.2377469941789639e-05, - "loss": 0.1993, - "step": 8670 - }, - { - "epoch": 0.44, - "grad_norm": 1.1432618438727802, - "learning_rate": 1.2375870164266584e-05, - "loss": 0.1714, - "step": 8671 - }, - { - "epoch": 0.44, - "grad_norm": 0.9594113068655842, - "learning_rate": 1.2374270322297912e-05, - "loss": 0.1852, - "step": 8672 - }, - { - "epoch": 0.44, - "grad_norm": 0.7286130896672868, - "learning_rate": 1.2372670415927023e-05, - "loss": 0.1797, - "step": 8673 - }, - { - "epoch": 0.44, - "grad_norm": 0.938152450920805, - "learning_rate": 1.2371070445197319e-05, - "loss": 0.2, - "step": 8674 - }, - { - "epoch": 0.44, - "grad_norm": 0.8560302080257822, - "learning_rate": 1.2369470410152195e-05, - "loss": 0.1802, - "step": 8675 - }, - { - "epoch": 0.44, - "grad_norm": 0.923050463393855, - "learning_rate": 1.236787031083505e-05, - "loss": 0.2041, - "step": 8676 - }, - { - "epoch": 0.44, - "grad_norm": 0.9821030954763804, - "learning_rate": 1.2366270147289292e-05, - "loss": 0.1782, - "step": 8677 - }, - { - "epoch": 0.44, - "grad_norm": 1.9208630447896724, - "learning_rate": 1.2364669919558321e-05, - "loss": 0.1772, - "step": 8678 - }, - { - "epoch": 0.44, - "grad_norm": 0.8104379453709678, - "learning_rate": 1.2363069627685545e-05, - "loss": 0.1839, - "step": 8679 - }, - { - "epoch": 0.44, - "grad_norm": 0.8191806600272565, - "learning_rate": 1.2361469271714372e-05, - "loss": 0.1912, - "step": 8680 - }, - { - "epoch": 0.44, - "grad_norm": 0.9325590163572611, - "learning_rate": 1.235986885168821e-05, - "loss": 0.1951, - "step": 8681 - }, - { - "epoch": 0.44, - "grad_norm": 1.0513257766154047, - "learning_rate": 1.2358268367650472e-05, - "loss": 0.1946, - "step": 8682 - }, - { - "epoch": 0.44, - "grad_norm": 1.2444637911653882, - "learning_rate": 1.2356667819644575e-05, - "loss": 0.1932, - "step": 8683 - }, - { - "epoch": 0.44, - "grad_norm": 0.9103909492122905, - "learning_rate": 1.2355067207713927e-05, - "loss": 0.1942, - "step": 8684 - }, - { - "epoch": 0.44, - "grad_norm": 0.9457849858116818, - "learning_rate": 1.235346653190195e-05, - "loss": 0.2188, - "step": 8685 - }, - { - "epoch": 0.44, - "grad_norm": 0.9842814315695442, - "learning_rate": 1.2351865792252056e-05, - "loss": 0.1956, - "step": 8686 - }, - { - "epoch": 0.44, - "grad_norm": 0.8901122886363906, - "learning_rate": 1.2350264988807674e-05, - "loss": 0.1979, - "step": 8687 - }, - { - "epoch": 0.44, - "grad_norm": 0.7798350694565372, - "learning_rate": 1.2348664121612219e-05, - "loss": 0.2042, - "step": 8688 - }, - { - "epoch": 0.44, - "grad_norm": 0.9090186696452542, - "learning_rate": 1.234706319070912e-05, - "loss": 0.185, - "step": 8689 - }, - { - "epoch": 0.44, - "grad_norm": 0.7624892317314006, - "learning_rate": 1.2345462196141797e-05, - "loss": 0.1861, - "step": 8690 - }, - { - "epoch": 0.44, - "grad_norm": 1.3538646256449431, - "learning_rate": 1.2343861137953678e-05, - "loss": 0.1867, - "step": 8691 - }, - { - "epoch": 0.44, - "grad_norm": 0.9224124338770778, - "learning_rate": 1.2342260016188197e-05, - "loss": 0.1833, - "step": 8692 - }, - { - "epoch": 0.44, - "grad_norm": 0.8073268179435598, - "learning_rate": 1.234065883088878e-05, - "loss": 0.1896, - "step": 8693 - }, - { - "epoch": 0.44, - "grad_norm": 0.8844325994489721, - "learning_rate": 1.2339057582098859e-05, - "loss": 0.1878, - "step": 8694 - }, - { - "epoch": 0.44, - "grad_norm": 0.91483931694918, - "learning_rate": 1.233745626986187e-05, - "loss": 0.2062, - "step": 8695 - }, - { - "epoch": 0.44, - "grad_norm": 1.1966454284281745, - "learning_rate": 1.2335854894221247e-05, - "loss": 0.1903, - "step": 8696 - }, - { - "epoch": 0.44, - "grad_norm": 1.0333757994680617, - "learning_rate": 1.2334253455220429e-05, - "loss": 0.1973, - "step": 8697 - }, - { - "epoch": 0.44, - "grad_norm": 0.8421908931074247, - "learning_rate": 1.2332651952902852e-05, - "loss": 0.1971, - "step": 8698 - }, - { - "epoch": 0.44, - "grad_norm": 0.9574744317950326, - "learning_rate": 1.2331050387311957e-05, - "loss": 0.2113, - "step": 8699 - }, - { - "epoch": 0.44, - "grad_norm": 0.9223770881917542, - "learning_rate": 1.2329448758491195e-05, - "loss": 0.1758, - "step": 8700 - }, - { - "epoch": 0.44, - "grad_norm": 0.9606924406565363, - "learning_rate": 1.2327847066484e-05, - "loss": 0.178, - "step": 8701 - }, - { - "epoch": 0.44, - "grad_norm": 1.2263409336211948, - "learning_rate": 1.2326245311333823e-05, - "loss": 0.1908, - "step": 8702 - }, - { - "epoch": 0.44, - "grad_norm": 2.184016379817194, - "learning_rate": 1.232464349308411e-05, - "loss": 0.1893, - "step": 8703 - }, - { - "epoch": 0.44, - "grad_norm": 0.8384118727319358, - "learning_rate": 1.2323041611778309e-05, - "loss": 0.2002, - "step": 8704 - }, - { - "epoch": 0.44, - "grad_norm": 0.9282092440214317, - "learning_rate": 1.2321439667459876e-05, - "loss": 0.1757, - "step": 8705 - }, - { - "epoch": 0.44, - "grad_norm": 0.7959667284276463, - "learning_rate": 1.2319837660172258e-05, - "loss": 0.1889, - "step": 8706 - }, - { - "epoch": 0.44, - "grad_norm": 0.750400860745341, - "learning_rate": 1.2318235589958916e-05, - "loss": 0.1915, - "step": 8707 - }, - { - "epoch": 0.44, - "grad_norm": 0.7735865678129724, - "learning_rate": 1.2316633456863299e-05, - "loss": 0.1712, - "step": 8708 - }, - { - "epoch": 0.44, - "grad_norm": 0.8724522023035849, - "learning_rate": 1.2315031260928872e-05, - "loss": 0.2235, - "step": 8709 - }, - { - "epoch": 0.44, - "grad_norm": 0.915554410318466, - "learning_rate": 1.2313429002199088e-05, - "loss": 0.2089, - "step": 8710 - }, - { - "epoch": 0.44, - "grad_norm": 0.78490783447735, - "learning_rate": 1.2311826680717416e-05, - "loss": 0.1776, - "step": 8711 - }, - { - "epoch": 0.44, - "grad_norm": 0.8774957388654196, - "learning_rate": 1.231022429652731e-05, - "loss": 0.1975, - "step": 8712 - }, - { - "epoch": 0.44, - "grad_norm": 0.775234896911333, - "learning_rate": 1.2308621849672244e-05, - "loss": 0.1829, - "step": 8713 - }, - { - "epoch": 0.44, - "grad_norm": 0.9202591567715592, - "learning_rate": 1.2307019340195679e-05, - "loss": 0.2139, - "step": 8714 - }, - { - "epoch": 0.44, - "grad_norm": 0.9882359732294717, - "learning_rate": 1.2305416768141082e-05, - "loss": 0.2124, - "step": 8715 - }, - { - "epoch": 0.44, - "grad_norm": 1.443374205415701, - "learning_rate": 1.2303814133551926e-05, - "loss": 0.1838, - "step": 8716 - }, - { - "epoch": 0.44, - "grad_norm": 0.8106286511053232, - "learning_rate": 1.230221143647168e-05, - "loss": 0.1945, - "step": 8717 - }, - { - "epoch": 0.44, - "grad_norm": 1.2442072877651187, - "learning_rate": 1.230060867694382e-05, - "loss": 0.1881, - "step": 8718 - }, - { - "epoch": 0.44, - "grad_norm": 0.9047866963976102, - "learning_rate": 1.229900585501182e-05, - "loss": 0.1872, - "step": 8719 - }, - { - "epoch": 0.44, - "grad_norm": 1.072887532874953, - "learning_rate": 1.2297402970719157e-05, - "loss": 0.2219, - "step": 8720 - }, - { - "epoch": 0.44, - "grad_norm": 0.8320286552867004, - "learning_rate": 1.2295800024109306e-05, - "loss": 0.2063, - "step": 8721 - }, - { - "epoch": 0.44, - "grad_norm": 0.9128948927663452, - "learning_rate": 1.2294197015225751e-05, - "loss": 0.1819, - "step": 8722 - }, - { - "epoch": 0.44, - "grad_norm": 0.7655232053293657, - "learning_rate": 1.2292593944111972e-05, - "loss": 0.1741, - "step": 8723 - }, - { - "epoch": 0.44, - "grad_norm": 1.0419077231983973, - "learning_rate": 1.2290990810811456e-05, - "loss": 0.1952, - "step": 8724 - }, - { - "epoch": 0.44, - "grad_norm": 0.7758100286467379, - "learning_rate": 1.2289387615367684e-05, - "loss": 0.1884, - "step": 8725 - }, - { - "epoch": 0.44, - "grad_norm": 0.7020149836767687, - "learning_rate": 1.2287784357824138e-05, - "loss": 0.2069, - "step": 8726 - }, - { - "epoch": 0.44, - "grad_norm": 0.8239198097590726, - "learning_rate": 1.2286181038224316e-05, - "loss": 0.1886, - "step": 8727 - }, - { - "epoch": 0.44, - "grad_norm": 0.928750257315147, - "learning_rate": 1.2284577656611706e-05, - "loss": 0.1989, - "step": 8728 - }, - { - "epoch": 0.44, - "grad_norm": 1.0055916699279317, - "learning_rate": 1.2282974213029797e-05, - "loss": 0.1863, - "step": 8729 - }, - { - "epoch": 0.44, - "grad_norm": 0.8478829767427825, - "learning_rate": 1.2281370707522083e-05, - "loss": 0.2072, - "step": 8730 - }, - { - "epoch": 0.44, - "grad_norm": 0.765500143011328, - "learning_rate": 1.2279767140132059e-05, - "loss": 0.21, - "step": 8731 - }, - { - "epoch": 0.44, - "grad_norm": 1.8223953910269926, - "learning_rate": 1.2278163510903222e-05, - "loss": 0.2074, - "step": 8732 - }, - { - "epoch": 0.44, - "grad_norm": 1.0654584856574105, - "learning_rate": 1.2276559819879075e-05, - "loss": 0.2128, - "step": 8733 - }, - { - "epoch": 0.44, - "grad_norm": 0.9679552401237536, - "learning_rate": 1.227495606710311e-05, - "loss": 0.2025, - "step": 8734 - }, - { - "epoch": 0.44, - "grad_norm": 0.9519636449630081, - "learning_rate": 1.2273352252618834e-05, - "loss": 0.1731, - "step": 8735 - }, - { - "epoch": 0.44, - "grad_norm": 0.7244513931276524, - "learning_rate": 1.2271748376469753e-05, - "loss": 0.1691, - "step": 8736 - }, - { - "epoch": 0.44, - "grad_norm": 1.253965355212945, - "learning_rate": 1.2270144438699365e-05, - "loss": 0.1948, - "step": 8737 - }, - { - "epoch": 0.44, - "grad_norm": 0.8691465810010405, - "learning_rate": 1.2268540439351183e-05, - "loss": 0.2224, - "step": 8738 - }, - { - "epoch": 0.44, - "grad_norm": 0.9405752587134205, - "learning_rate": 1.226693637846871e-05, - "loss": 0.1693, - "step": 8739 - }, - { - "epoch": 0.44, - "grad_norm": 0.9853075377180287, - "learning_rate": 1.2265332256095463e-05, - "loss": 0.1713, - "step": 8740 - }, - { - "epoch": 0.44, - "grad_norm": 0.899562324586662, - "learning_rate": 1.2263728072274952e-05, - "loss": 0.1964, - "step": 8741 - }, - { - "epoch": 0.44, - "grad_norm": 0.8181308450571411, - "learning_rate": 1.2262123827050686e-05, - "loss": 0.1864, - "step": 8742 - }, - { - "epoch": 0.44, - "grad_norm": 0.8100402362729071, - "learning_rate": 1.2260519520466185e-05, - "loss": 0.2008, - "step": 8743 - }, - { - "epoch": 0.44, - "grad_norm": 0.7879823698755953, - "learning_rate": 1.2258915152564964e-05, - "loss": 0.1935, - "step": 8744 - }, - { - "epoch": 0.44, - "grad_norm": 0.8888512146131431, - "learning_rate": 1.2257310723390541e-05, - "loss": 0.1891, - "step": 8745 - }, - { - "epoch": 0.44, - "grad_norm": 1.0620541290396086, - "learning_rate": 1.2255706232986438e-05, - "loss": 0.184, - "step": 8746 - }, - { - "epoch": 0.44, - "grad_norm": 0.8531629248382205, - "learning_rate": 1.2254101681396177e-05, - "loss": 0.1811, - "step": 8747 - }, - { - "epoch": 0.44, - "grad_norm": 0.7726202393329789, - "learning_rate": 1.2252497068663281e-05, - "loss": 0.2278, - "step": 8748 - }, - { - "epoch": 0.44, - "grad_norm": 1.094567252642259, - "learning_rate": 1.2250892394831272e-05, - "loss": 0.2069, - "step": 8749 - }, - { - "epoch": 0.44, - "grad_norm": 0.9823153214494921, - "learning_rate": 1.2249287659943682e-05, - "loss": 0.2044, - "step": 8750 - }, - { - "epoch": 0.45, - "grad_norm": 1.0498614567686715, - "learning_rate": 1.2247682864044037e-05, - "loss": 0.1827, - "step": 8751 - }, - { - "epoch": 0.45, - "grad_norm": 0.7545538618223903, - "learning_rate": 1.2246078007175866e-05, - "loss": 0.1968, - "step": 8752 - }, - { - "epoch": 0.45, - "grad_norm": 1.061379383257261, - "learning_rate": 1.2244473089382702e-05, - "loss": 0.2101, - "step": 8753 - }, - { - "epoch": 0.45, - "grad_norm": 0.9009511671275275, - "learning_rate": 1.2242868110708079e-05, - "loss": 0.1945, - "step": 8754 - }, - { - "epoch": 0.45, - "grad_norm": 0.790125825791761, - "learning_rate": 1.2241263071195535e-05, - "loss": 0.1818, - "step": 8755 - }, - { - "epoch": 0.45, - "grad_norm": 0.8315887432214394, - "learning_rate": 1.2239657970888598e-05, - "loss": 0.1918, - "step": 8756 - }, - { - "epoch": 0.45, - "grad_norm": 2.2449738158638226, - "learning_rate": 1.2238052809830816e-05, - "loss": 0.1872, - "step": 8757 - }, - { - "epoch": 0.45, - "grad_norm": 0.867437863360933, - "learning_rate": 1.2236447588065723e-05, - "loss": 0.1945, - "step": 8758 - }, - { - "epoch": 0.45, - "grad_norm": 1.0749293306637857, - "learning_rate": 1.2234842305636865e-05, - "loss": 0.2122, - "step": 8759 - }, - { - "epoch": 0.45, - "grad_norm": 1.0562899061895583, - "learning_rate": 1.2233236962587782e-05, - "loss": 0.1708, - "step": 8760 - }, - { - "epoch": 0.45, - "grad_norm": 1.034263512526097, - "learning_rate": 1.223163155896202e-05, - "loss": 0.195, - "step": 8761 - }, - { - "epoch": 0.45, - "grad_norm": 1.5855367010272412, - "learning_rate": 1.2230026094803127e-05, - "loss": 0.2104, - "step": 8762 - }, - { - "epoch": 0.45, - "grad_norm": 0.7252618781768035, - "learning_rate": 1.2228420570154649e-05, - "loss": 0.1752, - "step": 8763 - }, - { - "epoch": 0.45, - "grad_norm": 1.0131144932423213, - "learning_rate": 1.222681498506014e-05, - "loss": 0.2102, - "step": 8764 - }, - { - "epoch": 0.45, - "grad_norm": 0.8044676222681848, - "learning_rate": 1.2225209339563144e-05, - "loss": 0.167, - "step": 8765 - }, - { - "epoch": 0.45, - "grad_norm": 0.867224693956808, - "learning_rate": 1.2223603633707224e-05, - "loss": 0.1935, - "step": 8766 - }, - { - "epoch": 0.45, - "grad_norm": 1.339476780240116, - "learning_rate": 1.2221997867535926e-05, - "loss": 0.1887, - "step": 8767 - }, - { - "epoch": 0.45, - "grad_norm": 0.8640242341151145, - "learning_rate": 1.2220392041092813e-05, - "loss": 0.2016, - "step": 8768 - }, - { - "epoch": 0.45, - "grad_norm": 1.1395164976092698, - "learning_rate": 1.2218786154421439e-05, - "loss": 0.1965, - "step": 8769 - }, - { - "epoch": 0.45, - "grad_norm": 1.0955555345668755, - "learning_rate": 1.2217180207565365e-05, - "loss": 0.1814, - "step": 8770 - }, - { - "epoch": 0.45, - "grad_norm": 1.080909186673661, - "learning_rate": 1.2215574200568155e-05, - "loss": 0.2092, - "step": 8771 - }, - { - "epoch": 0.45, - "grad_norm": 0.868806952837568, - "learning_rate": 1.2213968133473366e-05, - "loss": 0.2031, - "step": 8772 - }, - { - "epoch": 0.45, - "grad_norm": 1.4526469492698382, - "learning_rate": 1.221236200632457e-05, - "loss": 0.1876, - "step": 8773 - }, - { - "epoch": 0.45, - "grad_norm": 2.78784617514379, - "learning_rate": 1.2210755819165325e-05, - "loss": 0.1934, - "step": 8774 - }, - { - "epoch": 0.45, - "grad_norm": 1.1377878874747358, - "learning_rate": 1.2209149572039207e-05, - "loss": 0.2003, - "step": 8775 - }, - { - "epoch": 0.45, - "grad_norm": 1.1566898421232799, - "learning_rate": 1.220754326498978e-05, - "loss": 0.188, - "step": 8776 - }, - { - "epoch": 0.45, - "grad_norm": 1.4577368626043437, - "learning_rate": 1.2205936898060619e-05, - "loss": 0.2484, - "step": 8777 - }, - { - "epoch": 0.45, - "grad_norm": 1.076111363988673, - "learning_rate": 1.2204330471295296e-05, - "loss": 0.1786, - "step": 8778 - }, - { - "epoch": 0.45, - "grad_norm": 1.2501558204678522, - "learning_rate": 1.2202723984737381e-05, - "loss": 0.1708, - "step": 8779 - }, - { - "epoch": 0.45, - "grad_norm": 0.9423711066705222, - "learning_rate": 1.2201117438430456e-05, - "loss": 0.1917, - "step": 8780 - }, - { - "epoch": 0.45, - "grad_norm": 1.069649524013828, - "learning_rate": 1.2199510832418095e-05, - "loss": 0.1663, - "step": 8781 - }, - { - "epoch": 0.45, - "grad_norm": 1.181212300392255, - "learning_rate": 1.2197904166743882e-05, - "loss": 0.1815, - "step": 8782 - }, - { - "epoch": 0.45, - "grad_norm": 0.7986369608289404, - "learning_rate": 1.2196297441451392e-05, - "loss": 0.179, - "step": 8783 - }, - { - "epoch": 0.45, - "grad_norm": 0.9746185125165827, - "learning_rate": 1.2194690656584209e-05, - "loss": 0.2062, - "step": 8784 - }, - { - "epoch": 0.45, - "grad_norm": 0.8134645721588003, - "learning_rate": 1.2193083812185919e-05, - "loss": 0.1861, - "step": 8785 - }, - { - "epoch": 0.45, - "grad_norm": 1.052563619953177, - "learning_rate": 1.2191476908300107e-05, - "loss": 0.1774, - "step": 8786 - }, - { - "epoch": 0.45, - "grad_norm": 0.9659584698227893, - "learning_rate": 1.2189869944970356e-05, - "loss": 0.1747, - "step": 8787 - }, - { - "epoch": 0.45, - "grad_norm": 0.7230070985388103, - "learning_rate": 1.2188262922240263e-05, - "loss": 0.2043, - "step": 8788 - }, - { - "epoch": 0.45, - "grad_norm": 1.0231134064007024, - "learning_rate": 1.2186655840153413e-05, - "loss": 0.1926, - "step": 8789 - }, - { - "epoch": 0.45, - "grad_norm": 0.8286619944444407, - "learning_rate": 1.2185048698753403e-05, - "loss": 0.1684, - "step": 8790 - }, - { - "epoch": 0.45, - "grad_norm": 0.9708291745003088, - "learning_rate": 1.2183441498083821e-05, - "loss": 0.1807, - "step": 8791 - }, - { - "epoch": 0.45, - "grad_norm": 0.7781997192222662, - "learning_rate": 1.2181834238188264e-05, - "loss": 0.1757, - "step": 8792 - }, - { - "epoch": 0.45, - "grad_norm": 0.9225471816554169, - "learning_rate": 1.2180226919110332e-05, - "loss": 0.1986, - "step": 8793 - }, - { - "epoch": 0.45, - "grad_norm": 0.9767934398909636, - "learning_rate": 1.2178619540893621e-05, - "loss": 0.1835, - "step": 8794 - }, - { - "epoch": 0.45, - "grad_norm": 1.0241007879543205, - "learning_rate": 1.2177012103581733e-05, - "loss": 0.1993, - "step": 8795 - }, - { - "epoch": 0.45, - "grad_norm": 0.9077009583754211, - "learning_rate": 1.2175404607218267e-05, - "loss": 0.1816, - "step": 8796 - }, - { - "epoch": 0.45, - "grad_norm": 0.7274573376917196, - "learning_rate": 1.217379705184683e-05, - "loss": 0.1782, - "step": 8797 - }, - { - "epoch": 0.45, - "grad_norm": 1.072798711913059, - "learning_rate": 1.2172189437511023e-05, - "loss": 0.2116, - "step": 8798 - }, - { - "epoch": 0.45, - "grad_norm": 1.2541852954716943, - "learning_rate": 1.2170581764254458e-05, - "loss": 0.1911, - "step": 8799 - }, - { - "epoch": 0.45, - "grad_norm": 1.2076914791500577, - "learning_rate": 1.2168974032120737e-05, - "loss": 0.2009, - "step": 8800 - }, - { - "epoch": 0.45, - "grad_norm": 0.9503860641385818, - "learning_rate": 1.2167366241153475e-05, - "loss": 0.1913, - "step": 8801 - }, - { - "epoch": 0.45, - "grad_norm": 1.0390296904907494, - "learning_rate": 1.2165758391396281e-05, - "loss": 0.1984, - "step": 8802 - }, - { - "epoch": 0.45, - "grad_norm": 1.3874804890400194, - "learning_rate": 1.2164150482892768e-05, - "loss": 0.2067, - "step": 8803 - }, - { - "epoch": 0.45, - "grad_norm": 1.6212319706782996, - "learning_rate": 1.2162542515686551e-05, - "loss": 0.1909, - "step": 8804 - }, - { - "epoch": 0.45, - "grad_norm": 0.9567611095220859, - "learning_rate": 1.2160934489821244e-05, - "loss": 0.1839, - "step": 8805 - }, - { - "epoch": 0.45, - "grad_norm": 1.2799465065164584, - "learning_rate": 1.2159326405340468e-05, - "loss": 0.1921, - "step": 8806 - }, - { - "epoch": 0.45, - "grad_norm": 1.086003363283199, - "learning_rate": 1.2157718262287841e-05, - "loss": 0.1784, - "step": 8807 - }, - { - "epoch": 0.45, - "grad_norm": 0.7401550610848462, - "learning_rate": 1.2156110060706986e-05, - "loss": 0.1891, - "step": 8808 - }, - { - "epoch": 0.45, - "grad_norm": 0.8915999284651739, - "learning_rate": 1.215450180064152e-05, - "loss": 0.1977, - "step": 8809 - }, - { - "epoch": 0.45, - "grad_norm": 1.0195758291399881, - "learning_rate": 1.2152893482135075e-05, - "loss": 0.1806, - "step": 8810 - }, - { - "epoch": 0.45, - "grad_norm": 1.1749497925696335, - "learning_rate": 1.2151285105231273e-05, - "loss": 0.2189, - "step": 8811 - }, - { - "epoch": 0.45, - "grad_norm": 0.8225497459206069, - "learning_rate": 1.214967666997374e-05, - "loss": 0.1824, - "step": 8812 - }, - { - "epoch": 0.45, - "grad_norm": 1.214799801660971, - "learning_rate": 1.2148068176406104e-05, - "loss": 0.178, - "step": 8813 - }, - { - "epoch": 0.45, - "grad_norm": 0.9197690708431241, - "learning_rate": 1.2146459624571998e-05, - "loss": 0.1859, - "step": 8814 - }, - { - "epoch": 0.45, - "grad_norm": 0.9091858741271089, - "learning_rate": 1.2144851014515055e-05, - "loss": 0.1755, - "step": 8815 - }, - { - "epoch": 0.45, - "grad_norm": 1.086043007279395, - "learning_rate": 1.2143242346278908e-05, - "loss": 0.1923, - "step": 8816 - }, - { - "epoch": 0.45, - "grad_norm": 1.3372441168269635, - "learning_rate": 1.214163361990719e-05, - "loss": 0.1931, - "step": 8817 - }, - { - "epoch": 0.45, - "grad_norm": 0.963781956668138, - "learning_rate": 1.2140024835443537e-05, - "loss": 0.2051, - "step": 8818 - }, - { - "epoch": 0.45, - "grad_norm": 1.2980605044900115, - "learning_rate": 1.213841599293159e-05, - "loss": 0.1746, - "step": 8819 - }, - { - "epoch": 0.45, - "grad_norm": 0.8386179927027012, - "learning_rate": 1.2136807092414992e-05, - "loss": 0.1855, - "step": 8820 - }, - { - "epoch": 0.45, - "grad_norm": 0.8201498327266299, - "learning_rate": 1.2135198133937381e-05, - "loss": 0.1677, - "step": 8821 - }, - { - "epoch": 0.45, - "grad_norm": 0.9651597614348006, - "learning_rate": 1.2133589117542395e-05, - "loss": 0.1907, - "step": 8822 - }, - { - "epoch": 0.45, - "grad_norm": 0.9814233612261183, - "learning_rate": 1.2131980043273685e-05, - "loss": 0.2056, - "step": 8823 - }, - { - "epoch": 0.45, - "grad_norm": 0.8653900828234407, - "learning_rate": 1.2130370911174898e-05, - "loss": 0.1729, - "step": 8824 - }, - { - "epoch": 0.45, - "grad_norm": 1.3058003760102206, - "learning_rate": 1.212876172128968e-05, - "loss": 0.1978, - "step": 8825 - }, - { - "epoch": 0.45, - "grad_norm": 1.0536411372349836, - "learning_rate": 1.2127152473661678e-05, - "loss": 0.1949, - "step": 8826 - }, - { - "epoch": 0.45, - "grad_norm": 0.9398760268893137, - "learning_rate": 1.2125543168334546e-05, - "loss": 0.1978, - "step": 8827 - }, - { - "epoch": 0.45, - "grad_norm": 0.9603591809895711, - "learning_rate": 1.2123933805351934e-05, - "loss": 0.2136, - "step": 8828 - }, - { - "epoch": 0.45, - "grad_norm": 0.8674456935843604, - "learning_rate": 1.21223243847575e-05, - "loss": 0.1757, - "step": 8829 - }, - { - "epoch": 0.45, - "grad_norm": 0.959780207158066, - "learning_rate": 1.2120714906594897e-05, - "loss": 0.2059, - "step": 8830 - }, - { - "epoch": 0.45, - "grad_norm": 1.250047070583227, - "learning_rate": 1.211910537090778e-05, - "loss": 0.1826, - "step": 8831 - }, - { - "epoch": 0.45, - "grad_norm": 0.8414534827990336, - "learning_rate": 1.2117495777739815e-05, - "loss": 0.174, - "step": 8832 - }, - { - "epoch": 0.45, - "grad_norm": 1.105316291874254, - "learning_rate": 1.2115886127134653e-05, - "loss": 0.2109, - "step": 8833 - }, - { - "epoch": 0.45, - "grad_norm": 0.8495791290530903, - "learning_rate": 1.2114276419135964e-05, - "loss": 0.1905, - "step": 8834 - }, - { - "epoch": 0.45, - "grad_norm": 1.4008403268681422, - "learning_rate": 1.2112666653787404e-05, - "loss": 0.2, - "step": 8835 - }, - { - "epoch": 0.45, - "grad_norm": 1.0376823697906254, - "learning_rate": 1.2111056831132641e-05, - "loss": 0.1758, - "step": 8836 - }, - { - "epoch": 0.45, - "grad_norm": 0.9164332739485018, - "learning_rate": 1.2109446951215347e-05, - "loss": 0.196, - "step": 8837 - }, - { - "epoch": 0.45, - "grad_norm": 1.0077455559186472, - "learning_rate": 1.2107837014079182e-05, - "loss": 0.2039, - "step": 8838 - }, - { - "epoch": 0.45, - "grad_norm": 0.8600414769402724, - "learning_rate": 1.210622701976782e-05, - "loss": 0.1759, - "step": 8839 - }, - { - "epoch": 0.45, - "grad_norm": 0.83784665366274, - "learning_rate": 1.2104616968324928e-05, - "loss": 0.1779, - "step": 8840 - }, - { - "epoch": 0.45, - "grad_norm": 1.1928432869020684, - "learning_rate": 1.2103006859794184e-05, - "loss": 0.193, - "step": 8841 - }, - { - "epoch": 0.45, - "grad_norm": 0.9301657792242488, - "learning_rate": 1.2101396694219262e-05, - "loss": 0.176, - "step": 8842 - }, - { - "epoch": 0.45, - "grad_norm": 1.1520056763985131, - "learning_rate": 1.2099786471643834e-05, - "loss": 0.1946, - "step": 8843 - }, - { - "epoch": 0.45, - "grad_norm": 1.1447231599498406, - "learning_rate": 1.2098176192111578e-05, - "loss": 0.2055, - "step": 8844 - }, - { - "epoch": 0.45, - "grad_norm": 0.8944520715875532, - "learning_rate": 1.2096565855666178e-05, - "loss": 0.2033, - "step": 8845 - }, - { - "epoch": 0.45, - "grad_norm": 1.1372848444010801, - "learning_rate": 1.209495546235131e-05, - "loss": 0.2061, - "step": 8846 - }, - { - "epoch": 0.45, - "grad_norm": 0.7647781497580564, - "learning_rate": 1.2093345012210656e-05, - "loss": 0.1932, - "step": 8847 - }, - { - "epoch": 0.45, - "grad_norm": 0.8912513425073467, - "learning_rate": 1.20917345052879e-05, - "loss": 0.1796, - "step": 8848 - }, - { - "epoch": 0.45, - "grad_norm": 1.5089662709207687, - "learning_rate": 1.2090123941626726e-05, - "loss": 0.1784, - "step": 8849 - }, - { - "epoch": 0.45, - "grad_norm": 1.0933341421872094, - "learning_rate": 1.2088513321270823e-05, - "loss": 0.1995, - "step": 8850 - }, - { - "epoch": 0.45, - "grad_norm": 0.9191939310469205, - "learning_rate": 1.2086902644263878e-05, - "loss": 0.1889, - "step": 8851 - }, - { - "epoch": 0.45, - "grad_norm": 0.8287536485776805, - "learning_rate": 1.2085291910649585e-05, - "loss": 0.1873, - "step": 8852 - }, - { - "epoch": 0.45, - "grad_norm": 0.9271405890498322, - "learning_rate": 1.2083681120471626e-05, - "loss": 0.1982, - "step": 8853 - }, - { - "epoch": 0.45, - "grad_norm": 0.9672085990082487, - "learning_rate": 1.20820702737737e-05, - "loss": 0.1924, - "step": 8854 - }, - { - "epoch": 0.45, - "grad_norm": 1.050213589768552, - "learning_rate": 1.2080459370599502e-05, - "loss": 0.184, - "step": 8855 - }, - { - "epoch": 0.45, - "grad_norm": 0.9957672183719678, - "learning_rate": 1.2078848410992726e-05, - "loss": 0.1982, - "step": 8856 - }, - { - "epoch": 0.45, - "grad_norm": 0.7800792332036594, - "learning_rate": 1.2077237394997065e-05, - "loss": 0.1702, - "step": 8857 - }, - { - "epoch": 0.45, - "grad_norm": 1.0934036596223062, - "learning_rate": 1.2075626322656227e-05, - "loss": 0.1914, - "step": 8858 - }, - { - "epoch": 0.45, - "grad_norm": 0.8329459545117248, - "learning_rate": 1.2074015194013906e-05, - "loss": 0.1869, - "step": 8859 - }, - { - "epoch": 0.45, - "grad_norm": 0.8139405261848299, - "learning_rate": 1.2072404009113808e-05, - "loss": 0.2056, - "step": 8860 - }, - { - "epoch": 0.45, - "grad_norm": 0.9862417615264362, - "learning_rate": 1.2070792767999633e-05, - "loss": 0.1837, - "step": 8861 - }, - { - "epoch": 0.45, - "grad_norm": 1.0204044344285164, - "learning_rate": 1.2069181470715083e-05, - "loss": 0.2264, - "step": 8862 - }, - { - "epoch": 0.45, - "grad_norm": 2.1435750249832166, - "learning_rate": 1.2067570117303872e-05, - "loss": 0.198, - "step": 8863 - }, - { - "epoch": 0.45, - "grad_norm": 0.9935613757091443, - "learning_rate": 1.2065958707809705e-05, - "loss": 0.2109, - "step": 8864 - }, - { - "epoch": 0.45, - "grad_norm": 1.0114797607164516, - "learning_rate": 1.2064347242276293e-05, - "loss": 0.192, - "step": 8865 - }, - { - "epoch": 0.45, - "grad_norm": 1.118130182359597, - "learning_rate": 1.2062735720747343e-05, - "loss": 0.1888, - "step": 8866 - }, - { - "epoch": 0.45, - "grad_norm": 2.3928760503882156, - "learning_rate": 1.2061124143266571e-05, - "loss": 0.1989, - "step": 8867 - }, - { - "epoch": 0.45, - "grad_norm": 1.020045704762623, - "learning_rate": 1.2059512509877691e-05, - "loss": 0.1943, - "step": 8868 - }, - { - "epoch": 0.45, - "grad_norm": 0.7282918073861949, - "learning_rate": 1.205790082062442e-05, - "loss": 0.1686, - "step": 8869 - }, - { - "epoch": 0.45, - "grad_norm": 0.8817405987482647, - "learning_rate": 1.2056289075550467e-05, - "loss": 0.1743, - "step": 8870 - }, - { - "epoch": 0.45, - "grad_norm": 1.2194815943547086, - "learning_rate": 1.205467727469956e-05, - "loss": 0.1815, - "step": 8871 - }, - { - "epoch": 0.45, - "grad_norm": 1.1515293942216966, - "learning_rate": 1.2053065418115418e-05, - "loss": 0.1901, - "step": 8872 - }, - { - "epoch": 0.45, - "grad_norm": 0.8144674738067087, - "learning_rate": 1.2051453505841757e-05, - "loss": 0.194, - "step": 8873 - }, - { - "epoch": 0.45, - "grad_norm": 0.8586989430386209, - "learning_rate": 1.2049841537922307e-05, - "loss": 0.196, - "step": 8874 - }, - { - "epoch": 0.45, - "grad_norm": 0.7018719551715215, - "learning_rate": 1.2048229514400785e-05, - "loss": 0.1723, - "step": 8875 - }, - { - "epoch": 0.45, - "grad_norm": 1.0214569381837684, - "learning_rate": 1.2046617435320924e-05, - "loss": 0.1901, - "step": 8876 - }, - { - "epoch": 0.45, - "grad_norm": 1.005899395849749, - "learning_rate": 1.2045005300726452e-05, - "loss": 0.1923, - "step": 8877 - }, - { - "epoch": 0.45, - "grad_norm": 1.5862559489013401, - "learning_rate": 1.2043393110661092e-05, - "loss": 0.1911, - "step": 8878 - }, - { - "epoch": 0.45, - "grad_norm": 1.1185356170292566, - "learning_rate": 1.2041780865168577e-05, - "loss": 0.2061, - "step": 8879 - }, - { - "epoch": 0.45, - "grad_norm": 1.0316089428982487, - "learning_rate": 1.2040168564292644e-05, - "loss": 0.19, - "step": 8880 - }, - { - "epoch": 0.45, - "grad_norm": 0.6316760066241547, - "learning_rate": 1.2038556208077026e-05, - "loss": 0.1756, - "step": 8881 - }, - { - "epoch": 0.45, - "grad_norm": 0.9262215986725295, - "learning_rate": 1.2036943796565453e-05, - "loss": 0.1677, - "step": 8882 - }, - { - "epoch": 0.45, - "grad_norm": 0.8161375418600838, - "learning_rate": 1.2035331329801663e-05, - "loss": 0.2222, - "step": 8883 - }, - { - "epoch": 0.45, - "grad_norm": 0.9862382320843694, - "learning_rate": 1.2033718807829395e-05, - "loss": 0.2013, - "step": 8884 - }, - { - "epoch": 0.45, - "grad_norm": 0.924590732113376, - "learning_rate": 1.2032106230692394e-05, - "loss": 0.1705, - "step": 8885 - }, - { - "epoch": 0.45, - "grad_norm": 0.7549429941943393, - "learning_rate": 1.2030493598434392e-05, - "loss": 0.1843, - "step": 8886 - }, - { - "epoch": 0.45, - "grad_norm": 0.8770179406545869, - "learning_rate": 1.2028880911099141e-05, - "loss": 0.199, - "step": 8887 - }, - { - "epoch": 0.45, - "grad_norm": 1.0535531729638505, - "learning_rate": 1.2027268168730378e-05, - "loss": 0.1917, - "step": 8888 - }, - { - "epoch": 0.45, - "grad_norm": 0.9816682275340497, - "learning_rate": 1.202565537137185e-05, - "loss": 0.1873, - "step": 8889 - }, - { - "epoch": 0.45, - "grad_norm": 1.111591525187513, - "learning_rate": 1.202404251906731e-05, - "loss": 0.2159, - "step": 8890 - }, - { - "epoch": 0.45, - "grad_norm": 0.9930935676272202, - "learning_rate": 1.20224296118605e-05, - "loss": 0.1959, - "step": 8891 - }, - { - "epoch": 0.45, - "grad_norm": 1.0744553729919295, - "learning_rate": 1.202081664979517e-05, - "loss": 0.1829, - "step": 8892 - }, - { - "epoch": 0.45, - "grad_norm": 1.0903638199210839, - "learning_rate": 1.2019203632915078e-05, - "loss": 0.193, - "step": 8893 - }, - { - "epoch": 0.45, - "grad_norm": 1.8433993464195626, - "learning_rate": 1.2017590561263973e-05, - "loss": 0.1989, - "step": 8894 - }, - { - "epoch": 0.45, - "grad_norm": 0.9218661790570869, - "learning_rate": 1.2015977434885608e-05, - "loss": 0.1988, - "step": 8895 - }, - { - "epoch": 0.45, - "grad_norm": 1.086711989423081, - "learning_rate": 1.2014364253823742e-05, - "loss": 0.1997, - "step": 8896 - }, - { - "epoch": 0.45, - "grad_norm": 0.9552204008080996, - "learning_rate": 1.2012751018122132e-05, - "loss": 0.1918, - "step": 8897 - }, - { - "epoch": 0.45, - "grad_norm": 1.1356827714958893, - "learning_rate": 1.2011137727824536e-05, - "loss": 0.1838, - "step": 8898 - }, - { - "epoch": 0.45, - "grad_norm": 1.1033408198461252, - "learning_rate": 1.2009524382974717e-05, - "loss": 0.2092, - "step": 8899 - }, - { - "epoch": 0.45, - "grad_norm": 1.026359269650774, - "learning_rate": 1.2007910983616435e-05, - "loss": 0.1791, - "step": 8900 - }, - { - "epoch": 0.45, - "grad_norm": 0.7595638197151642, - "learning_rate": 1.2006297529793456e-05, - "loss": 0.185, - "step": 8901 - }, - { - "epoch": 0.45, - "grad_norm": 1.8070567756184621, - "learning_rate": 1.200468402154954e-05, - "loss": 0.1862, - "step": 8902 - }, - { - "epoch": 0.45, - "grad_norm": 0.9054734943926448, - "learning_rate": 1.2003070458928458e-05, - "loss": 0.215, - "step": 8903 - }, - { - "epoch": 0.45, - "grad_norm": 3.4442105832569023, - "learning_rate": 1.200145684197398e-05, - "loss": 0.1978, - "step": 8904 - }, - { - "epoch": 0.45, - "grad_norm": 1.2466883918482339, - "learning_rate": 1.1999843170729866e-05, - "loss": 0.1729, - "step": 8905 - }, - { - "epoch": 0.45, - "grad_norm": 0.941852164338417, - "learning_rate": 1.1998229445239898e-05, - "loss": 0.1966, - "step": 8906 - }, - { - "epoch": 0.45, - "grad_norm": 1.1718913462872127, - "learning_rate": 1.1996615665547841e-05, - "loss": 0.1927, - "step": 8907 - }, - { - "epoch": 0.45, - "grad_norm": 0.994145976940982, - "learning_rate": 1.1995001831697472e-05, - "loss": 0.2004, - "step": 8908 - }, - { - "epoch": 0.45, - "grad_norm": 0.8267161858948318, - "learning_rate": 1.1993387943732567e-05, - "loss": 0.2053, - "step": 8909 - }, - { - "epoch": 0.45, - "grad_norm": 0.7850285872276042, - "learning_rate": 1.1991774001696896e-05, - "loss": 0.1807, - "step": 8910 - }, - { - "epoch": 0.45, - "grad_norm": 0.9629868160271702, - "learning_rate": 1.1990160005634248e-05, - "loss": 0.1959, - "step": 8911 - }, - { - "epoch": 0.45, - "grad_norm": 1.0503190559300086, - "learning_rate": 1.1988545955588395e-05, - "loss": 0.1844, - "step": 8912 - }, - { - "epoch": 0.45, - "grad_norm": 0.9957325838836562, - "learning_rate": 1.1986931851603122e-05, - "loss": 0.2011, - "step": 8913 - }, - { - "epoch": 0.45, - "grad_norm": 1.0639985315351588, - "learning_rate": 1.1985317693722212e-05, - "loss": 0.1996, - "step": 8914 - }, - { - "epoch": 0.45, - "grad_norm": 0.9629188523199401, - "learning_rate": 1.1983703481989443e-05, - "loss": 0.2051, - "step": 8915 - }, - { - "epoch": 0.45, - "grad_norm": 0.9569474788455568, - "learning_rate": 1.1982089216448607e-05, - "loss": 0.2102, - "step": 8916 - }, - { - "epoch": 0.45, - "grad_norm": 1.0215416358534117, - "learning_rate": 1.198047489714349e-05, - "loss": 0.2206, - "step": 8917 - }, - { - "epoch": 0.45, - "grad_norm": 1.1053432675497292, - "learning_rate": 1.197886052411788e-05, - "loss": 0.2172, - "step": 8918 - }, - { - "epoch": 0.45, - "grad_norm": 1.2538967899336353, - "learning_rate": 1.1977246097415565e-05, - "loss": 0.1804, - "step": 8919 - }, - { - "epoch": 0.45, - "grad_norm": 0.7904544707992178, - "learning_rate": 1.1975631617080339e-05, - "loss": 0.2404, - "step": 8920 - }, - { - "epoch": 0.45, - "grad_norm": 1.010512958427013, - "learning_rate": 1.1974017083155993e-05, - "loss": 0.2069, - "step": 8921 - }, - { - "epoch": 0.45, - "grad_norm": 0.9187475659348551, - "learning_rate": 1.1972402495686323e-05, - "loss": 0.195, - "step": 8922 - }, - { - "epoch": 0.45, - "grad_norm": 1.1264834567954618, - "learning_rate": 1.1970787854715123e-05, - "loss": 0.1797, - "step": 8923 - }, - { - "epoch": 0.45, - "grad_norm": 0.902873139466147, - "learning_rate": 1.1969173160286191e-05, - "loss": 0.1672, - "step": 8924 - }, - { - "epoch": 0.45, - "grad_norm": 3.8147847023995554, - "learning_rate": 1.1967558412443328e-05, - "loss": 0.2024, - "step": 8925 - }, - { - "epoch": 0.45, - "grad_norm": 1.426225608926197, - "learning_rate": 1.1965943611230331e-05, - "loss": 0.1954, - "step": 8926 - }, - { - "epoch": 0.45, - "grad_norm": 0.9450555610864556, - "learning_rate": 1.1964328756691e-05, - "loss": 0.1639, - "step": 8927 - }, - { - "epoch": 0.45, - "grad_norm": 1.239627680378109, - "learning_rate": 1.196271384886914e-05, - "loss": 0.1813, - "step": 8928 - }, - { - "epoch": 0.45, - "grad_norm": 1.110179894405812, - "learning_rate": 1.196109888780856e-05, - "loss": 0.1652, - "step": 8929 - }, - { - "epoch": 0.45, - "grad_norm": 0.8597255073498342, - "learning_rate": 1.1959483873553059e-05, - "loss": 0.1854, - "step": 8930 - }, - { - "epoch": 0.45, - "grad_norm": 1.5145937177785822, - "learning_rate": 1.1957868806146449e-05, - "loss": 0.2137, - "step": 8931 - }, - { - "epoch": 0.45, - "grad_norm": 1.1041447036829297, - "learning_rate": 1.1956253685632534e-05, - "loss": 0.1661, - "step": 8932 - }, - { - "epoch": 0.45, - "grad_norm": 0.9525616525645625, - "learning_rate": 1.1954638512055131e-05, - "loss": 0.2112, - "step": 8933 - }, - { - "epoch": 0.45, - "grad_norm": 1.3595966325094786, - "learning_rate": 1.1953023285458047e-05, - "loss": 0.1958, - "step": 8934 - }, - { - "epoch": 0.45, - "grad_norm": 0.9975591742736065, - "learning_rate": 1.1951408005885098e-05, - "loss": 0.183, - "step": 8935 - }, - { - "epoch": 0.45, - "grad_norm": 0.773323065930237, - "learning_rate": 1.1949792673380094e-05, - "loss": 0.1689, - "step": 8936 - }, - { - "epoch": 0.45, - "grad_norm": 1.0978246199548096, - "learning_rate": 1.194817728798685e-05, - "loss": 0.2031, - "step": 8937 - }, - { - "epoch": 0.45, - "grad_norm": 0.9025386810836243, - "learning_rate": 1.1946561849749192e-05, - "loss": 0.1739, - "step": 8938 - }, - { - "epoch": 0.45, - "grad_norm": 1.0924668110309403, - "learning_rate": 1.1944946358710936e-05, - "loss": 0.1892, - "step": 8939 - }, - { - "epoch": 0.45, - "grad_norm": 0.8475628728718165, - "learning_rate": 1.1943330814915897e-05, - "loss": 0.1962, - "step": 8940 - }, - { - "epoch": 0.45, - "grad_norm": 1.144332901565883, - "learning_rate": 1.1941715218407898e-05, - "loss": 0.1789, - "step": 8941 - }, - { - "epoch": 0.45, - "grad_norm": 0.9641545428843032, - "learning_rate": 1.1940099569230767e-05, - "loss": 0.2029, - "step": 8942 - }, - { - "epoch": 0.45, - "grad_norm": 1.0694630086624335, - "learning_rate": 1.1938483867428326e-05, - "loss": 0.1833, - "step": 8943 - }, - { - "epoch": 0.45, - "grad_norm": 1.6671110565068417, - "learning_rate": 1.19368681130444e-05, - "loss": 0.1973, - "step": 8944 - }, - { - "epoch": 0.45, - "grad_norm": 1.0086089261741662, - "learning_rate": 1.1935252306122812e-05, - "loss": 0.1949, - "step": 8945 - }, - { - "epoch": 0.45, - "grad_norm": 0.8401339480562876, - "learning_rate": 1.1933636446707401e-05, - "loss": 0.159, - "step": 8946 - }, - { - "epoch": 0.45, - "grad_norm": 1.1733262203357395, - "learning_rate": 1.1932020534841992e-05, - "loss": 0.1925, - "step": 8947 - }, - { - "epoch": 0.46, - "grad_norm": 1.2062640371557118, - "learning_rate": 1.1930404570570417e-05, - "loss": 0.2117, - "step": 8948 - }, - { - "epoch": 0.46, - "grad_norm": 1.58050795448441, - "learning_rate": 1.1928788553936507e-05, - "loss": 0.1997, - "step": 8949 - }, - { - "epoch": 0.46, - "grad_norm": 1.2136141398608247, - "learning_rate": 1.19271724849841e-05, - "loss": 0.2107, - "step": 8950 - }, - { - "epoch": 0.46, - "grad_norm": 1.5287755275959543, - "learning_rate": 1.192555636375703e-05, - "loss": 0.1869, - "step": 8951 - }, - { - "epoch": 0.46, - "grad_norm": 0.7695097792624204, - "learning_rate": 1.1923940190299135e-05, - "loss": 0.1838, - "step": 8952 - }, - { - "epoch": 0.46, - "grad_norm": 1.046415308205685, - "learning_rate": 1.1922323964654254e-05, - "loss": 0.1898, - "step": 8953 - }, - { - "epoch": 0.46, - "grad_norm": 0.9872118514792896, - "learning_rate": 1.1920707686866227e-05, - "loss": 0.1898, - "step": 8954 - }, - { - "epoch": 0.46, - "grad_norm": 0.9799211018153227, - "learning_rate": 1.1919091356978894e-05, - "loss": 0.1642, - "step": 8955 - }, - { - "epoch": 0.46, - "grad_norm": 1.155722364648013, - "learning_rate": 1.19174749750361e-05, - "loss": 0.1772, - "step": 8956 - }, - { - "epoch": 0.46, - "grad_norm": 1.2290325356839902, - "learning_rate": 1.1915858541081693e-05, - "loss": 0.1663, - "step": 8957 - }, - { - "epoch": 0.46, - "grad_norm": 1.30211196478441, - "learning_rate": 1.191424205515951e-05, - "loss": 0.2073, - "step": 8958 - }, - { - "epoch": 0.46, - "grad_norm": 1.0336619162587886, - "learning_rate": 1.1912625517313406e-05, - "loss": 0.2175, - "step": 8959 - }, - { - "epoch": 0.46, - "grad_norm": 1.015136790947612, - "learning_rate": 1.1911008927587224e-05, - "loss": 0.1947, - "step": 8960 - }, - { - "epoch": 0.46, - "grad_norm": 1.0525402971892104, - "learning_rate": 1.190939228602482e-05, - "loss": 0.2008, - "step": 8961 - }, - { - "epoch": 0.46, - "grad_norm": 1.0309507319611686, - "learning_rate": 1.190777559267004e-05, - "loss": 0.1712, - "step": 8962 - }, - { - "epoch": 0.46, - "grad_norm": 1.042308202194219, - "learning_rate": 1.190615884756674e-05, - "loss": 0.2174, - "step": 8963 - }, - { - "epoch": 0.46, - "grad_norm": 1.2486820476818925, - "learning_rate": 1.1904542050758774e-05, - "loss": 0.2019, - "step": 8964 - }, - { - "epoch": 0.46, - "grad_norm": 0.9681283258309303, - "learning_rate": 1.1902925202289997e-05, - "loss": 0.1748, - "step": 8965 - }, - { - "epoch": 0.46, - "grad_norm": 0.8824864392862095, - "learning_rate": 1.1901308302204267e-05, - "loss": 0.1827, - "step": 8966 - }, - { - "epoch": 0.46, - "grad_norm": 2.0792711159266326, - "learning_rate": 1.189969135054544e-05, - "loss": 0.1817, - "step": 8967 - }, - { - "epoch": 0.46, - "grad_norm": 0.9004669137625958, - "learning_rate": 1.1898074347357377e-05, - "loss": 0.1905, - "step": 8968 - }, - { - "epoch": 0.46, - "grad_norm": 0.8946191807568328, - "learning_rate": 1.1896457292683945e-05, - "loss": 0.2004, - "step": 8969 - }, - { - "epoch": 0.46, - "grad_norm": 0.9348331603869736, - "learning_rate": 1.1894840186569e-05, - "loss": 0.1854, - "step": 8970 - }, - { - "epoch": 0.46, - "grad_norm": 1.7410801074117348, - "learning_rate": 1.1893223029056406e-05, - "loss": 0.1813, - "step": 8971 - }, - { - "epoch": 0.46, - "grad_norm": 1.1059641875465824, - "learning_rate": 1.1891605820190031e-05, - "loss": 0.1687, - "step": 8972 - }, - { - "epoch": 0.46, - "grad_norm": 0.9228413836557761, - "learning_rate": 1.1889988560013741e-05, - "loss": 0.1917, - "step": 8973 - }, - { - "epoch": 0.46, - "grad_norm": 0.8623997445511834, - "learning_rate": 1.1888371248571409e-05, - "loss": 0.1745, - "step": 8974 - }, - { - "epoch": 0.46, - "grad_norm": 1.3696551052147947, - "learning_rate": 1.1886753885906895e-05, - "loss": 0.2035, - "step": 8975 - }, - { - "epoch": 0.46, - "grad_norm": 1.1247215692096408, - "learning_rate": 1.1885136472064076e-05, - "loss": 0.2072, - "step": 8976 - }, - { - "epoch": 0.46, - "grad_norm": 1.1875189683505865, - "learning_rate": 1.1883519007086824e-05, - "loss": 0.217, - "step": 8977 - }, - { - "epoch": 0.46, - "grad_norm": 1.2625623423472383, - "learning_rate": 1.1881901491019014e-05, - "loss": 0.1774, - "step": 8978 - }, - { - "epoch": 0.46, - "grad_norm": 0.9240639516641106, - "learning_rate": 1.1880283923904518e-05, - "loss": 0.192, - "step": 8979 - }, - { - "epoch": 0.46, - "grad_norm": 1.8776495632201888, - "learning_rate": 1.1878666305787214e-05, - "loss": 0.1982, - "step": 8980 - }, - { - "epoch": 0.46, - "grad_norm": 0.9658463095638202, - "learning_rate": 1.1877048636710981e-05, - "loss": 0.1716, - "step": 8981 - }, - { - "epoch": 0.46, - "grad_norm": 1.2894250472685753, - "learning_rate": 1.1875430916719698e-05, - "loss": 0.1957, - "step": 8982 - }, - { - "epoch": 0.46, - "grad_norm": 1.0529602099603186, - "learning_rate": 1.187381314585725e-05, - "loss": 0.18, - "step": 8983 - }, - { - "epoch": 0.46, - "grad_norm": 1.159065460851313, - "learning_rate": 1.1872195324167508e-05, - "loss": 0.2057, - "step": 8984 - }, - { - "epoch": 0.46, - "grad_norm": 1.0456114107643657, - "learning_rate": 1.1870577451694363e-05, - "loss": 0.1668, - "step": 8985 - }, - { - "epoch": 0.46, - "grad_norm": 0.8648956682920848, - "learning_rate": 1.18689595284817e-05, - "loss": 0.1842, - "step": 8986 - }, - { - "epoch": 0.46, - "grad_norm": 1.0062545746069842, - "learning_rate": 1.1867341554573405e-05, - "loss": 0.1752, - "step": 8987 - }, - { - "epoch": 0.46, - "grad_norm": 0.9052168926450945, - "learning_rate": 1.1865723530013367e-05, - "loss": 0.178, - "step": 8988 - }, - { - "epoch": 0.46, - "grad_norm": 1.1849884410504337, - "learning_rate": 1.1864105454845467e-05, - "loss": 0.1898, - "step": 8989 - }, - { - "epoch": 0.46, - "grad_norm": 1.1768091963894038, - "learning_rate": 1.1862487329113606e-05, - "loss": 0.2171, - "step": 8990 - }, - { - "epoch": 0.46, - "grad_norm": 1.9149338679136048, - "learning_rate": 1.186086915286167e-05, - "loss": 0.191, - "step": 8991 - }, - { - "epoch": 0.46, - "grad_norm": 0.8723532019990747, - "learning_rate": 1.1859250926133554e-05, - "loss": 0.1828, - "step": 8992 - }, - { - "epoch": 0.46, - "grad_norm": 1.4707240199120826, - "learning_rate": 1.185763264897315e-05, - "loss": 0.1637, - "step": 8993 - }, - { - "epoch": 0.46, - "grad_norm": 0.7403666829101415, - "learning_rate": 1.1856014321424356e-05, - "loss": 0.1928, - "step": 8994 - }, - { - "epoch": 0.46, - "grad_norm": 0.9394086856654486, - "learning_rate": 1.185439594353107e-05, - "loss": 0.1723, - "step": 8995 - }, - { - "epoch": 0.46, - "grad_norm": 0.7670705541749999, - "learning_rate": 1.1852777515337186e-05, - "loss": 0.1841, - "step": 8996 - }, - { - "epoch": 0.46, - "grad_norm": 1.1035336169056782, - "learning_rate": 1.185115903688661e-05, - "loss": 0.1886, - "step": 8997 - }, - { - "epoch": 0.46, - "grad_norm": 1.2146454611707889, - "learning_rate": 1.1849540508223238e-05, - "loss": 0.1969, - "step": 8998 - }, - { - "epoch": 0.46, - "grad_norm": 1.0393209712450067, - "learning_rate": 1.1847921929390977e-05, - "loss": 0.1946, - "step": 8999 - }, - { - "epoch": 0.46, - "grad_norm": 1.008870498761671, - "learning_rate": 1.184630330043373e-05, - "loss": 0.1847, - "step": 9000 - }, - { - "epoch": 0.46, - "grad_norm": 0.9349115695325397, - "learning_rate": 1.1844684621395401e-05, - "loss": 0.2054, - "step": 9001 - }, - { - "epoch": 0.46, - "grad_norm": 0.924911367937286, - "learning_rate": 1.1843065892319895e-05, - "loss": 0.1891, - "step": 9002 - }, - { - "epoch": 0.46, - "grad_norm": 1.1563745043827904, - "learning_rate": 1.1841447113251126e-05, - "loss": 0.2084, - "step": 9003 - }, - { - "epoch": 0.46, - "grad_norm": 0.8965980604277103, - "learning_rate": 1.1839828284233e-05, - "loss": 0.1612, - "step": 9004 - }, - { - "epoch": 0.46, - "grad_norm": 0.9034527880843174, - "learning_rate": 1.1838209405309427e-05, - "loss": 0.1886, - "step": 9005 - }, - { - "epoch": 0.46, - "grad_norm": 1.0077625098069756, - "learning_rate": 1.183659047652432e-05, - "loss": 0.1865, - "step": 9006 - }, - { - "epoch": 0.46, - "grad_norm": 0.9196710553785808, - "learning_rate": 1.1834971497921591e-05, - "loss": 0.1953, - "step": 9007 - }, - { - "epoch": 0.46, - "grad_norm": 0.9536834374306841, - "learning_rate": 1.1833352469545158e-05, - "loss": 0.2062, - "step": 9008 - }, - { - "epoch": 0.46, - "grad_norm": 1.2212574071626323, - "learning_rate": 1.1831733391438937e-05, - "loss": 0.1907, - "step": 9009 - }, - { - "epoch": 0.46, - "grad_norm": 1.5537835504342088, - "learning_rate": 1.1830114263646844e-05, - "loss": 0.2087, - "step": 9010 - }, - { - "epoch": 0.46, - "grad_norm": 0.8431492083254475, - "learning_rate": 1.1828495086212794e-05, - "loss": 0.1801, - "step": 9011 - }, - { - "epoch": 0.46, - "grad_norm": 1.0463466456718005, - "learning_rate": 1.1826875859180718e-05, - "loss": 0.186, - "step": 9012 - }, - { - "epoch": 0.46, - "grad_norm": 1.1162356038874017, - "learning_rate": 1.1825256582594526e-05, - "loss": 0.207, - "step": 9013 - }, - { - "epoch": 0.46, - "grad_norm": 0.9375444806124058, - "learning_rate": 1.1823637256498149e-05, - "loss": 0.2037, - "step": 9014 - }, - { - "epoch": 0.46, - "grad_norm": 1.180452055645475, - "learning_rate": 1.1822017880935507e-05, - "loss": 0.1722, - "step": 9015 - }, - { - "epoch": 0.46, - "grad_norm": 0.8274008918640854, - "learning_rate": 1.1820398455950526e-05, - "loss": 0.1889, - "step": 9016 - }, - { - "epoch": 0.46, - "grad_norm": 0.7722948608780377, - "learning_rate": 1.1818778981587135e-05, - "loss": 0.1851, - "step": 9017 - }, - { - "epoch": 0.46, - "grad_norm": 0.8623477671210378, - "learning_rate": 1.1817159457889264e-05, - "loss": 0.1758, - "step": 9018 - }, - { - "epoch": 0.46, - "grad_norm": 0.7589775449226215, - "learning_rate": 1.1815539884900837e-05, - "loss": 0.1954, - "step": 9019 - }, - { - "epoch": 0.46, - "grad_norm": 1.054810457169351, - "learning_rate": 1.1813920262665788e-05, - "loss": 0.1878, - "step": 9020 - }, - { - "epoch": 0.46, - "grad_norm": 1.293935543595433, - "learning_rate": 1.1812300591228052e-05, - "loss": 0.212, - "step": 9021 - }, - { - "epoch": 0.46, - "grad_norm": 0.8558181092026415, - "learning_rate": 1.1810680870631558e-05, - "loss": 0.176, - "step": 9022 - }, - { - "epoch": 0.46, - "grad_norm": 0.7573874055928003, - "learning_rate": 1.1809061100920245e-05, - "loss": 0.1755, - "step": 9023 - }, - { - "epoch": 0.46, - "grad_norm": 1.1342319197752257, - "learning_rate": 1.1807441282138045e-05, - "loss": 0.1882, - "step": 9024 - }, - { - "epoch": 0.46, - "grad_norm": 1.052170888869555, - "learning_rate": 1.1805821414328897e-05, - "loss": 0.1855, - "step": 9025 - }, - { - "epoch": 0.46, - "grad_norm": 0.9040062958108732, - "learning_rate": 1.1804201497536746e-05, - "loss": 0.1781, - "step": 9026 - }, - { - "epoch": 0.46, - "grad_norm": 1.0001531848845238, - "learning_rate": 1.1802581531805525e-05, - "loss": 0.1835, - "step": 9027 - }, - { - "epoch": 0.46, - "grad_norm": 0.9702189858557619, - "learning_rate": 1.1800961517179177e-05, - "loss": 0.1925, - "step": 9028 - }, - { - "epoch": 0.46, - "grad_norm": 0.9391996907219045, - "learning_rate": 1.179934145370165e-05, - "loss": 0.1968, - "step": 9029 - }, - { - "epoch": 0.46, - "grad_norm": 1.169550256959143, - "learning_rate": 1.1797721341416882e-05, - "loss": 0.2095, - "step": 9030 - }, - { - "epoch": 0.46, - "grad_norm": 1.4986948128088586, - "learning_rate": 1.179610118036882e-05, - "loss": 0.1983, - "step": 9031 - }, - { - "epoch": 0.46, - "grad_norm": 1.2257047082649992, - "learning_rate": 1.1794480970601413e-05, - "loss": 0.2061, - "step": 9032 - }, - { - "epoch": 0.46, - "grad_norm": 1.1231876572801334, - "learning_rate": 1.1792860712158608e-05, - "loss": 0.1956, - "step": 9033 - }, - { - "epoch": 0.46, - "grad_norm": 1.3543258643514862, - "learning_rate": 1.1791240405084355e-05, - "loss": 0.1994, - "step": 9034 - }, - { - "epoch": 0.46, - "grad_norm": 0.8771805946884635, - "learning_rate": 1.1789620049422603e-05, - "loss": 0.1673, - "step": 9035 - }, - { - "epoch": 0.46, - "grad_norm": 1.028897040551911, - "learning_rate": 1.1787999645217309e-05, - "loss": 0.2148, - "step": 9036 - }, - { - "epoch": 0.46, - "grad_norm": 1.0308527874745395, - "learning_rate": 1.178637919251242e-05, - "loss": 0.2075, - "step": 9037 - }, - { - "epoch": 0.46, - "grad_norm": 1.2276060915073248, - "learning_rate": 1.1784758691351898e-05, - "loss": 0.1925, - "step": 9038 - }, - { - "epoch": 0.46, - "grad_norm": 1.2425534898000836, - "learning_rate": 1.1783138141779695e-05, - "loss": 0.1499, - "step": 9039 - }, - { - "epoch": 0.46, - "grad_norm": 0.9302029211692885, - "learning_rate": 1.178151754383977e-05, - "loss": 0.1992, - "step": 9040 - }, - { - "epoch": 0.46, - "grad_norm": 1.0435356139538756, - "learning_rate": 1.177989689757608e-05, - "loss": 0.1983, - "step": 9041 - }, - { - "epoch": 0.46, - "grad_norm": 0.9754457914366035, - "learning_rate": 1.1778276203032584e-05, - "loss": 0.1784, - "step": 9042 - }, - { - "epoch": 0.46, - "grad_norm": 2.0053469624790905, - "learning_rate": 1.1776655460253248e-05, - "loss": 0.1921, - "step": 9043 - }, - { - "epoch": 0.46, - "grad_norm": 0.8778961911792674, - "learning_rate": 1.1775034669282034e-05, - "loss": 0.1955, - "step": 9044 - }, - { - "epoch": 0.46, - "grad_norm": 1.1203395252317987, - "learning_rate": 1.1773413830162902e-05, - "loss": 0.1981, - "step": 9045 - }, - { - "epoch": 0.46, - "grad_norm": 1.0830248882097275, - "learning_rate": 1.177179294293982e-05, - "loss": 0.1805, - "step": 9046 - }, - { - "epoch": 0.46, - "grad_norm": 0.9531798772402397, - "learning_rate": 1.1770172007656756e-05, - "loss": 0.195, - "step": 9047 - }, - { - "epoch": 0.46, - "grad_norm": 1.3430591008501616, - "learning_rate": 1.1768551024357673e-05, - "loss": 0.2009, - "step": 9048 - }, - { - "epoch": 0.46, - "grad_norm": 0.9824402645449075, - "learning_rate": 1.1766929993086547e-05, - "loss": 0.2128, - "step": 9049 - }, - { - "epoch": 0.46, - "grad_norm": 1.6385073345023113, - "learning_rate": 1.1765308913887341e-05, - "loss": 0.1901, - "step": 9050 - }, - { - "epoch": 0.46, - "grad_norm": 0.964258379970069, - "learning_rate": 1.1763687786804034e-05, - "loss": 0.1797, - "step": 9051 - }, - { - "epoch": 0.46, - "grad_norm": 1.1455400657359058, - "learning_rate": 1.1762066611880596e-05, - "loss": 0.1977, - "step": 9052 - }, - { - "epoch": 0.46, - "grad_norm": 0.8002007770933686, - "learning_rate": 1.1760445389161002e-05, - "loss": 0.1776, - "step": 9053 - }, - { - "epoch": 0.46, - "grad_norm": 0.943694401307601, - "learning_rate": 1.1758824118689225e-05, - "loss": 0.2099, - "step": 9054 - }, - { - "epoch": 0.46, - "grad_norm": 1.0283652595881423, - "learning_rate": 1.1757202800509245e-05, - "loss": 0.1996, - "step": 9055 - }, - { - "epoch": 0.46, - "grad_norm": 0.9931005687344918, - "learning_rate": 1.1755581434665043e-05, - "loss": 0.18, - "step": 9056 - }, - { - "epoch": 0.46, - "grad_norm": 2.238537129714373, - "learning_rate": 1.1753960021200593e-05, - "loss": 0.1984, - "step": 9057 - }, - { - "epoch": 0.46, - "grad_norm": 0.7553415966956479, - "learning_rate": 1.1752338560159878e-05, - "loss": 0.1678, - "step": 9058 - }, - { - "epoch": 0.46, - "grad_norm": 1.0408684215597828, - "learning_rate": 1.1750717051586882e-05, - "loss": 0.1895, - "step": 9059 - }, - { - "epoch": 0.46, - "grad_norm": 1.0384347110753673, - "learning_rate": 1.1749095495525584e-05, - "loss": 0.1906, - "step": 9060 - }, - { - "epoch": 0.46, - "grad_norm": 1.044673644971104, - "learning_rate": 1.1747473892019977e-05, - "loss": 0.1983, - "step": 9061 - }, - { - "epoch": 0.46, - "grad_norm": 1.109794063699578, - "learning_rate": 1.1745852241114038e-05, - "loss": 0.2028, - "step": 9062 - }, - { - "epoch": 0.46, - "grad_norm": 1.084289158802793, - "learning_rate": 1.1744230542851758e-05, - "loss": 0.1999, - "step": 9063 - }, - { - "epoch": 0.46, - "grad_norm": 0.777145804319658, - "learning_rate": 1.174260879727713e-05, - "loss": 0.1711, - "step": 9064 - }, - { - "epoch": 0.46, - "grad_norm": 1.0703078027644066, - "learning_rate": 1.1740987004434137e-05, - "loss": 0.1918, - "step": 9065 - }, - { - "epoch": 0.46, - "grad_norm": 0.8781926448135073, - "learning_rate": 1.1739365164366775e-05, - "loss": 0.198, - "step": 9066 - }, - { - "epoch": 0.46, - "grad_norm": 1.3489089861431531, - "learning_rate": 1.1737743277119031e-05, - "loss": 0.1865, - "step": 9067 - }, - { - "epoch": 0.46, - "grad_norm": 1.9097810356618725, - "learning_rate": 1.1736121342734903e-05, - "loss": 0.1857, - "step": 9068 - }, - { - "epoch": 0.46, - "grad_norm": 1.0893069302099345, - "learning_rate": 1.173449936125839e-05, - "loss": 0.186, - "step": 9069 - }, - { - "epoch": 0.46, - "grad_norm": 0.8277620769173616, - "learning_rate": 1.1732877332733479e-05, - "loss": 0.1812, - "step": 9070 - }, - { - "epoch": 0.46, - "grad_norm": 1.3249669025073985, - "learning_rate": 1.1731255257204174e-05, - "loss": 0.1903, - "step": 9071 - }, - { - "epoch": 0.46, - "grad_norm": 1.1042809564014204, - "learning_rate": 1.1729633134714475e-05, - "loss": 0.2004, - "step": 9072 - }, - { - "epoch": 0.46, - "grad_norm": 0.9206027756962021, - "learning_rate": 1.1728010965308377e-05, - "loss": 0.1911, - "step": 9073 - }, - { - "epoch": 0.46, - "grad_norm": 1.0423116504989947, - "learning_rate": 1.1726388749029884e-05, - "loss": 0.188, - "step": 9074 - }, - { - "epoch": 0.46, - "grad_norm": 0.7968196186056415, - "learning_rate": 1.1724766485922998e-05, - "loss": 0.1874, - "step": 9075 - }, - { - "epoch": 0.46, - "grad_norm": 0.9243988557355096, - "learning_rate": 1.1723144176031727e-05, - "loss": 0.1881, - "step": 9076 - }, - { - "epoch": 0.46, - "grad_norm": 1.042151183072485, - "learning_rate": 1.1721521819400068e-05, - "loss": 0.1965, - "step": 9077 - }, - { - "epoch": 0.46, - "grad_norm": 0.9206453558272957, - "learning_rate": 1.1719899416072037e-05, - "loss": 0.1782, - "step": 9078 - }, - { - "epoch": 0.46, - "grad_norm": 0.9587520930099989, - "learning_rate": 1.1718276966091638e-05, - "loss": 0.1903, - "step": 9079 - }, - { - "epoch": 0.46, - "grad_norm": 1.2943426276561494, - "learning_rate": 1.1716654469502875e-05, - "loss": 0.218, - "step": 9080 - }, - { - "epoch": 0.46, - "grad_norm": 2.124829320688035, - "learning_rate": 1.1715031926349763e-05, - "loss": 0.1913, - "step": 9081 - }, - { - "epoch": 0.46, - "grad_norm": 0.8456629905306549, - "learning_rate": 1.1713409336676313e-05, - "loss": 0.1845, - "step": 9082 - }, - { - "epoch": 0.46, - "grad_norm": 0.8710014737852795, - "learning_rate": 1.1711786700526541e-05, - "loss": 0.1871, - "step": 9083 - }, - { - "epoch": 0.46, - "grad_norm": 1.2990723070408405, - "learning_rate": 1.1710164017944456e-05, - "loss": 0.1872, - "step": 9084 - }, - { - "epoch": 0.46, - "grad_norm": 1.236419696060247, - "learning_rate": 1.1708541288974074e-05, - "loss": 0.2054, - "step": 9085 - }, - { - "epoch": 0.46, - "grad_norm": 3.0682702113309683, - "learning_rate": 1.1706918513659416e-05, - "loss": 0.1932, - "step": 9086 - }, - { - "epoch": 0.46, - "grad_norm": 0.8068307461764347, - "learning_rate": 1.1705295692044496e-05, - "loss": 0.173, - "step": 9087 - }, - { - "epoch": 0.46, - "grad_norm": 1.3880576279247103, - "learning_rate": 1.1703672824173333e-05, - "loss": 0.1704, - "step": 9088 - }, - { - "epoch": 0.46, - "grad_norm": 1.270121404719117, - "learning_rate": 1.170204991008995e-05, - "loss": 0.1879, - "step": 9089 - }, - { - "epoch": 0.46, - "grad_norm": 1.0435643904778924, - "learning_rate": 1.1700426949838364e-05, - "loss": 0.1885, - "step": 9090 - }, - { - "epoch": 0.46, - "grad_norm": 0.8568140770975008, - "learning_rate": 1.1698803943462604e-05, - "loss": 0.1848, - "step": 9091 - }, - { - "epoch": 0.46, - "grad_norm": 1.0820704614596093, - "learning_rate": 1.169718089100669e-05, - "loss": 0.212, - "step": 9092 - }, - { - "epoch": 0.46, - "grad_norm": 1.0800354792918512, - "learning_rate": 1.1695557792514648e-05, - "loss": 0.1971, - "step": 9093 - }, - { - "epoch": 0.46, - "grad_norm": 1.1750232689570579, - "learning_rate": 1.1693934648030508e-05, - "loss": 0.1914, - "step": 9094 - }, - { - "epoch": 0.46, - "grad_norm": 1.3779916093593438, - "learning_rate": 1.1692311457598291e-05, - "loss": 0.184, - "step": 9095 - }, - { - "epoch": 0.46, - "grad_norm": 0.8684209284055365, - "learning_rate": 1.1690688221262035e-05, - "loss": 0.1927, - "step": 9096 - }, - { - "epoch": 0.46, - "grad_norm": 0.697502018051808, - "learning_rate": 1.1689064939065762e-05, - "loss": 0.1879, - "step": 9097 - }, - { - "epoch": 0.46, - "grad_norm": 1.0772614966661544, - "learning_rate": 1.1687441611053505e-05, - "loss": 0.1772, - "step": 9098 - }, - { - "epoch": 0.46, - "grad_norm": 1.1014332449661124, - "learning_rate": 1.1685818237269302e-05, - "loss": 0.1901, - "step": 9099 - }, - { - "epoch": 0.46, - "grad_norm": 2.013613860724744, - "learning_rate": 1.1684194817757184e-05, - "loss": 0.2189, - "step": 9100 - }, - { - "epoch": 0.46, - "grad_norm": 1.0590001336397885, - "learning_rate": 1.1682571352561187e-05, - "loss": 0.1967, - "step": 9101 - }, - { - "epoch": 0.46, - "grad_norm": 1.1143069368763483, - "learning_rate": 1.1680947841725348e-05, - "loss": 0.1986, - "step": 9102 - }, - { - "epoch": 0.46, - "grad_norm": 0.9463708285613962, - "learning_rate": 1.1679324285293698e-05, - "loss": 0.1896, - "step": 9103 - }, - { - "epoch": 0.46, - "grad_norm": 1.0438085528108987, - "learning_rate": 1.1677700683310286e-05, - "loss": 0.1854, - "step": 9104 - }, - { - "epoch": 0.46, - "grad_norm": 0.92252065973537, - "learning_rate": 1.1676077035819148e-05, - "loss": 0.2096, - "step": 9105 - }, - { - "epoch": 0.46, - "grad_norm": 1.1214717138794612, - "learning_rate": 1.1674453342864327e-05, - "loss": 0.1925, - "step": 9106 - }, - { - "epoch": 0.46, - "grad_norm": 1.0099356972626463, - "learning_rate": 1.1672829604489864e-05, - "loss": 0.1649, - "step": 9107 - }, - { - "epoch": 0.46, - "grad_norm": 1.1449027127665157, - "learning_rate": 1.16712058207398e-05, - "loss": 0.215, - "step": 9108 - }, - { - "epoch": 0.46, - "grad_norm": 0.911336039272148, - "learning_rate": 1.1669581991658187e-05, - "loss": 0.1711, - "step": 9109 - }, - { - "epoch": 0.46, - "grad_norm": 1.4580166182319019, - "learning_rate": 1.1667958117289068e-05, - "loss": 0.2201, - "step": 9110 - }, - { - "epoch": 0.46, - "grad_norm": 1.0857664654881272, - "learning_rate": 1.1666334197676492e-05, - "loss": 0.1829, - "step": 9111 - }, - { - "epoch": 0.46, - "grad_norm": 0.8859738895387417, - "learning_rate": 1.1664710232864505e-05, - "loss": 0.1673, - "step": 9112 - }, - { - "epoch": 0.46, - "grad_norm": 0.7798264118555199, - "learning_rate": 1.1663086222897157e-05, - "loss": 0.2089, - "step": 9113 - }, - { - "epoch": 0.46, - "grad_norm": 0.9975538121325291, - "learning_rate": 1.1661462167818507e-05, - "loss": 0.1873, - "step": 9114 - }, - { - "epoch": 0.46, - "grad_norm": 1.0709575867805985, - "learning_rate": 1.16598380676726e-05, - "loss": 0.1843, - "step": 9115 - }, - { - "epoch": 0.46, - "grad_norm": 0.9233783373327914, - "learning_rate": 1.1658213922503488e-05, - "loss": 0.1582, - "step": 9116 - }, - { - "epoch": 0.46, - "grad_norm": 1.0645080907936217, - "learning_rate": 1.1656589732355233e-05, - "loss": 0.1973, - "step": 9117 - }, - { - "epoch": 0.46, - "grad_norm": 0.9392700054586304, - "learning_rate": 1.1654965497271886e-05, - "loss": 0.1929, - "step": 9118 - }, - { - "epoch": 0.46, - "grad_norm": 1.114398197170294, - "learning_rate": 1.1653341217297507e-05, - "loss": 0.1915, - "step": 9119 - }, - { - "epoch": 0.46, - "grad_norm": 0.9674687619310163, - "learning_rate": 1.1651716892476154e-05, - "loss": 0.1944, - "step": 9120 - }, - { - "epoch": 0.46, - "grad_norm": 0.9367552026693869, - "learning_rate": 1.1650092522851885e-05, - "loss": 0.1898, - "step": 9121 - }, - { - "epoch": 0.46, - "grad_norm": 1.0708496935895828, - "learning_rate": 1.1648468108468767e-05, - "loss": 0.1894, - "step": 9122 - }, - { - "epoch": 0.46, - "grad_norm": 0.8815096620043407, - "learning_rate": 1.1646843649370858e-05, - "loss": 0.1887, - "step": 9123 - }, - { - "epoch": 0.46, - "grad_norm": 0.9908797866048731, - "learning_rate": 1.164521914560222e-05, - "loss": 0.1956, - "step": 9124 - }, - { - "epoch": 0.46, - "grad_norm": 1.0955654895177573, - "learning_rate": 1.164359459720692e-05, - "loss": 0.1917, - "step": 9125 - }, - { - "epoch": 0.46, - "grad_norm": 0.9897343143053348, - "learning_rate": 1.1641970004229025e-05, - "loss": 0.1836, - "step": 9126 - }, - { - "epoch": 0.46, - "grad_norm": 0.7886372445419565, - "learning_rate": 1.16403453667126e-05, - "loss": 0.2013, - "step": 9127 - }, - { - "epoch": 0.46, - "grad_norm": 1.3670924199438759, - "learning_rate": 1.1638720684701714e-05, - "loss": 0.1719, - "step": 9128 - }, - { - "epoch": 0.46, - "grad_norm": 1.6908065901193217, - "learning_rate": 1.1637095958240439e-05, - "loss": 0.2039, - "step": 9129 - }, - { - "epoch": 0.46, - "grad_norm": 1.8688739782273087, - "learning_rate": 1.163547118737284e-05, - "loss": 0.1984, - "step": 9130 - }, - { - "epoch": 0.46, - "grad_norm": 0.8693621221208244, - "learning_rate": 1.1633846372142997e-05, - "loss": 0.1758, - "step": 9131 - }, - { - "epoch": 0.46, - "grad_norm": 1.0033795035288822, - "learning_rate": 1.1632221512594977e-05, - "loss": 0.1829, - "step": 9132 - }, - { - "epoch": 0.46, - "grad_norm": 1.203854492354105, - "learning_rate": 1.1630596608772855e-05, - "loss": 0.1991, - "step": 9133 - }, - { - "epoch": 0.46, - "grad_norm": 0.7739788349490826, - "learning_rate": 1.1628971660720707e-05, - "loss": 0.2161, - "step": 9134 - }, - { - "epoch": 0.46, - "grad_norm": 1.3333093970645196, - "learning_rate": 1.1627346668482614e-05, - "loss": 0.2002, - "step": 9135 - }, - { - "epoch": 0.46, - "grad_norm": 1.3961606707042902, - "learning_rate": 1.1625721632102649e-05, - "loss": 0.181, - "step": 9136 - }, - { - "epoch": 0.46, - "grad_norm": 0.7972776439352925, - "learning_rate": 1.1624096551624893e-05, - "loss": 0.1848, - "step": 9137 - }, - { - "epoch": 0.46, - "grad_norm": 0.8296410166518111, - "learning_rate": 1.1622471427093424e-05, - "loss": 0.1726, - "step": 9138 - }, - { - "epoch": 0.46, - "grad_norm": 0.9506650235484677, - "learning_rate": 1.162084625855233e-05, - "loss": 0.1721, - "step": 9139 - }, - { - "epoch": 0.46, - "grad_norm": 2.7818432041827315, - "learning_rate": 1.1619221046045688e-05, - "loss": 0.1942, - "step": 9140 - }, - { - "epoch": 0.46, - "grad_norm": 1.1575333574213695, - "learning_rate": 1.1617595789617585e-05, - "loss": 0.2027, - "step": 9141 - }, - { - "epoch": 0.46, - "grad_norm": 1.434177466973586, - "learning_rate": 1.1615970489312102e-05, - "loss": 0.194, - "step": 9142 - }, - { - "epoch": 0.46, - "grad_norm": 1.245727117900203, - "learning_rate": 1.1614345145173329e-05, - "loss": 0.2023, - "step": 9143 - }, - { - "epoch": 0.46, - "grad_norm": 1.4528276599772527, - "learning_rate": 1.1612719757245353e-05, - "loss": 0.1841, - "step": 9144 - }, - { - "epoch": 0.47, - "grad_norm": 0.883066228974052, - "learning_rate": 1.1611094325572263e-05, - "loss": 0.1973, - "step": 9145 - }, - { - "epoch": 0.47, - "grad_norm": 1.153096925156812, - "learning_rate": 1.1609468850198149e-05, - "loss": 0.1883, - "step": 9146 - }, - { - "epoch": 0.47, - "grad_norm": 1.2894960675682319, - "learning_rate": 1.1607843331167099e-05, - "loss": 0.2053, - "step": 9147 - }, - { - "epoch": 0.47, - "grad_norm": 0.8747914564760497, - "learning_rate": 1.160621776852321e-05, - "loss": 0.1882, - "step": 9148 - }, - { - "epoch": 0.47, - "grad_norm": 1.1131978812303405, - "learning_rate": 1.1604592162310575e-05, - "loss": 0.2121, - "step": 9149 - }, - { - "epoch": 0.47, - "grad_norm": 1.5693726179321674, - "learning_rate": 1.1602966512573286e-05, - "loss": 0.1881, - "step": 9150 - }, - { - "epoch": 0.47, - "grad_norm": 1.0216723824159144, - "learning_rate": 1.1601340819355437e-05, - "loss": 0.1912, - "step": 9151 - }, - { - "epoch": 0.47, - "grad_norm": 0.7815028235272667, - "learning_rate": 1.159971508270113e-05, - "loss": 0.1954, - "step": 9152 - }, - { - "epoch": 0.47, - "grad_norm": 0.9754311439115084, - "learning_rate": 1.159808930265446e-05, - "loss": 0.1918, - "step": 9153 - }, - { - "epoch": 0.47, - "grad_norm": 1.0278402602039418, - "learning_rate": 1.159646347925953e-05, - "loss": 0.1974, - "step": 9154 - }, - { - "epoch": 0.47, - "grad_norm": 0.9032850279132243, - "learning_rate": 1.1594837612560437e-05, - "loss": 0.1778, - "step": 9155 - }, - { - "epoch": 0.47, - "grad_norm": 0.8774455184882909, - "learning_rate": 1.159321170260128e-05, - "loss": 0.2006, - "step": 9156 - }, - { - "epoch": 0.47, - "grad_norm": 0.9127838115789007, - "learning_rate": 1.159158574942617e-05, - "loss": 0.1733, - "step": 9157 - }, - { - "epoch": 0.47, - "grad_norm": 0.8448337659172754, - "learning_rate": 1.1589959753079203e-05, - "loss": 0.1759, - "step": 9158 - }, - { - "epoch": 0.47, - "grad_norm": 1.2241639207791157, - "learning_rate": 1.1588333713604491e-05, - "loss": 0.2184, - "step": 9159 - }, - { - "epoch": 0.47, - "grad_norm": 0.7723198053308619, - "learning_rate": 1.1586707631046135e-05, - "loss": 0.1973, - "step": 9160 - }, - { - "epoch": 0.47, - "grad_norm": 0.8615544161233657, - "learning_rate": 1.1585081505448246e-05, - "loss": 0.2044, - "step": 9161 - }, - { - "epoch": 0.47, - "grad_norm": 0.8611882561608455, - "learning_rate": 1.1583455336854932e-05, - "loss": 0.1845, - "step": 9162 - }, - { - "epoch": 0.47, - "grad_norm": 1.0851258693274464, - "learning_rate": 1.1581829125310302e-05, - "loss": 0.179, - "step": 9163 - }, - { - "epoch": 0.47, - "grad_norm": 0.7737070155432831, - "learning_rate": 1.1580202870858468e-05, - "loss": 0.1768, - "step": 9164 - }, - { - "epoch": 0.47, - "grad_norm": 0.9287374921183846, - "learning_rate": 1.1578576573543541e-05, - "loss": 0.1976, - "step": 9165 - }, - { - "epoch": 0.47, - "grad_norm": 0.8834705337603755, - "learning_rate": 1.1576950233409638e-05, - "loss": 0.1852, - "step": 9166 - }, - { - "epoch": 0.47, - "grad_norm": 0.8774831452300471, - "learning_rate": 1.1575323850500868e-05, - "loss": 0.1702, - "step": 9167 - }, - { - "epoch": 0.47, - "grad_norm": 2.4809191685656273, - "learning_rate": 1.1573697424861353e-05, - "loss": 0.1834, - "step": 9168 - }, - { - "epoch": 0.47, - "grad_norm": 0.9047569129335316, - "learning_rate": 1.1572070956535202e-05, - "loss": 0.1878, - "step": 9169 - }, - { - "epoch": 0.47, - "grad_norm": 1.0114729627753714, - "learning_rate": 1.1570444445566538e-05, - "loss": 0.1841, - "step": 9170 - }, - { - "epoch": 0.47, - "grad_norm": 1.0533951490813414, - "learning_rate": 1.1568817891999482e-05, - "loss": 0.2076, - "step": 9171 - }, - { - "epoch": 0.47, - "grad_norm": 1.0908645916592143, - "learning_rate": 1.1567191295878152e-05, - "loss": 0.1858, - "step": 9172 - }, - { - "epoch": 0.47, - "grad_norm": 0.9703974422296601, - "learning_rate": 1.1565564657246667e-05, - "loss": 0.2004, - "step": 9173 - }, - { - "epoch": 0.47, - "grad_norm": 1.0118705478228984, - "learning_rate": 1.1563937976149153e-05, - "loss": 0.1953, - "step": 9174 - }, - { - "epoch": 0.47, - "grad_norm": 0.8260904591199736, - "learning_rate": 1.1562311252629736e-05, - "loss": 0.2017, - "step": 9175 - }, - { - "epoch": 0.47, - "grad_norm": 0.7339715507228588, - "learning_rate": 1.1560684486732537e-05, - "loss": 0.1916, - "step": 9176 - }, - { - "epoch": 0.47, - "grad_norm": 0.8509635990034636, - "learning_rate": 1.1559057678501682e-05, - "loss": 0.1829, - "step": 9177 - }, - { - "epoch": 0.47, - "grad_norm": 1.9624278814958298, - "learning_rate": 1.1557430827981297e-05, - "loss": 0.2154, - "step": 9178 - }, - { - "epoch": 0.47, - "grad_norm": 1.479984283272646, - "learning_rate": 1.1555803935215516e-05, - "loss": 0.2187, - "step": 9179 - }, - { - "epoch": 0.47, - "grad_norm": 0.9547459427889696, - "learning_rate": 1.1554177000248466e-05, - "loss": 0.192, - "step": 9180 - }, - { - "epoch": 0.47, - "grad_norm": 1.0713234269482474, - "learning_rate": 1.1552550023124276e-05, - "loss": 0.1993, - "step": 9181 - }, - { - "epoch": 0.47, - "grad_norm": 1.084265326853148, - "learning_rate": 1.155092300388708e-05, - "loss": 0.1896, - "step": 9182 - }, - { - "epoch": 0.47, - "grad_norm": 0.823005443489001, - "learning_rate": 1.1549295942581007e-05, - "loss": 0.1774, - "step": 9183 - }, - { - "epoch": 0.47, - "grad_norm": 1.6002085217547777, - "learning_rate": 1.1547668839250199e-05, - "loss": 0.1919, - "step": 9184 - }, - { - "epoch": 0.47, - "grad_norm": 0.8626993942068936, - "learning_rate": 1.1546041693938784e-05, - "loss": 0.192, - "step": 9185 - }, - { - "epoch": 0.47, - "grad_norm": 1.0077475214317404, - "learning_rate": 1.1544414506690897e-05, - "loss": 0.1848, - "step": 9186 - }, - { - "epoch": 0.47, - "grad_norm": 1.0757353918928187, - "learning_rate": 1.1542787277550683e-05, - "loss": 0.2179, - "step": 9187 - }, - { - "epoch": 0.47, - "grad_norm": 1.08680507436135, - "learning_rate": 1.1541160006562275e-05, - "loss": 0.1883, - "step": 9188 - }, - { - "epoch": 0.47, - "grad_norm": 0.9751665996225382, - "learning_rate": 1.1539532693769818e-05, - "loss": 0.1974, - "step": 9189 - }, - { - "epoch": 0.47, - "grad_norm": 2.48160369171706, - "learning_rate": 1.1537905339217448e-05, - "loss": 0.1883, - "step": 9190 - }, - { - "epoch": 0.47, - "grad_norm": 0.975263185759104, - "learning_rate": 1.1536277942949305e-05, - "loss": 0.2017, - "step": 9191 - }, - { - "epoch": 0.47, - "grad_norm": 0.8240373691871361, - "learning_rate": 1.1534650505009542e-05, - "loss": 0.1775, - "step": 9192 - }, - { - "epoch": 0.47, - "grad_norm": 1.3049402644744645, - "learning_rate": 1.1533023025442294e-05, - "loss": 0.2072, - "step": 9193 - }, - { - "epoch": 0.47, - "grad_norm": 0.9742705420566906, - "learning_rate": 1.1531395504291711e-05, - "loss": 0.1864, - "step": 9194 - }, - { - "epoch": 0.47, - "grad_norm": 0.9472394210632011, - "learning_rate": 1.1529767941601937e-05, - "loss": 0.1836, - "step": 9195 - }, - { - "epoch": 0.47, - "grad_norm": 0.9238803406730269, - "learning_rate": 1.1528140337417121e-05, - "loss": 0.1921, - "step": 9196 - }, - { - "epoch": 0.47, - "grad_norm": 0.8351639923254849, - "learning_rate": 1.1526512691781415e-05, - "loss": 0.1927, - "step": 9197 - }, - { - "epoch": 0.47, - "grad_norm": 1.2967717494314384, - "learning_rate": 1.1524885004738966e-05, - "loss": 0.1871, - "step": 9198 - }, - { - "epoch": 0.47, - "grad_norm": 0.915047277005369, - "learning_rate": 1.1523257276333924e-05, - "loss": 0.2058, - "step": 9199 - }, - { - "epoch": 0.47, - "grad_norm": 0.8859798604811852, - "learning_rate": 1.1521629506610439e-05, - "loss": 0.2082, - "step": 9200 - }, - { - "epoch": 0.47, - "grad_norm": 0.9162438028535452, - "learning_rate": 1.1520001695612675e-05, - "loss": 0.2077, - "step": 9201 - }, - { - "epoch": 0.47, - "grad_norm": 0.8868233681525796, - "learning_rate": 1.1518373843384774e-05, - "loss": 0.1949, - "step": 9202 - }, - { - "epoch": 0.47, - "grad_norm": 0.893103606569373, - "learning_rate": 1.1516745949970897e-05, - "loss": 0.1734, - "step": 9203 - }, - { - "epoch": 0.47, - "grad_norm": 1.0350623362067328, - "learning_rate": 1.15151180154152e-05, - "loss": 0.1957, - "step": 9204 - }, - { - "epoch": 0.47, - "grad_norm": 0.7472797149020382, - "learning_rate": 1.1513490039761843e-05, - "loss": 0.1719, - "step": 9205 - }, - { - "epoch": 0.47, - "grad_norm": 0.8943678903413873, - "learning_rate": 1.1511862023054983e-05, - "loss": 0.1849, - "step": 9206 - }, - { - "epoch": 0.47, - "grad_norm": 2.0615004595864916, - "learning_rate": 1.151023396533878e-05, - "loss": 0.1849, - "step": 9207 - }, - { - "epoch": 0.47, - "grad_norm": 1.0065551689099037, - "learning_rate": 1.1508605866657392e-05, - "loss": 0.2165, - "step": 9208 - }, - { - "epoch": 0.47, - "grad_norm": 0.8278316868482662, - "learning_rate": 1.1506977727054988e-05, - "loss": 0.1768, - "step": 9209 - }, - { - "epoch": 0.47, - "grad_norm": 2.0617148335170974, - "learning_rate": 1.1505349546575728e-05, - "loss": 0.1931, - "step": 9210 - }, - { - "epoch": 0.47, - "grad_norm": 0.8807830721570364, - "learning_rate": 1.1503721325263778e-05, - "loss": 0.2041, - "step": 9211 - }, - { - "epoch": 0.47, - "grad_norm": 1.131425982837395, - "learning_rate": 1.15020930631633e-05, - "loss": 0.1908, - "step": 9212 - }, - { - "epoch": 0.47, - "grad_norm": 1.016980414378633, - "learning_rate": 1.1500464760318462e-05, - "loss": 0.1705, - "step": 9213 - }, - { - "epoch": 0.47, - "grad_norm": 1.3114196960927185, - "learning_rate": 1.1498836416773433e-05, - "loss": 0.2042, - "step": 9214 - }, - { - "epoch": 0.47, - "grad_norm": 0.9548647672267575, - "learning_rate": 1.1497208032572385e-05, - "loss": 0.1688, - "step": 9215 - }, - { - "epoch": 0.47, - "grad_norm": 1.4053452700177105, - "learning_rate": 1.149557960775948e-05, - "loss": 0.2194, - "step": 9216 - }, - { - "epoch": 0.47, - "grad_norm": 0.9455345695137635, - "learning_rate": 1.1493951142378896e-05, - "loss": 0.174, - "step": 9217 - }, - { - "epoch": 0.47, - "grad_norm": 2.914664529750119, - "learning_rate": 1.1492322636474802e-05, - "loss": 0.1867, - "step": 9218 - }, - { - "epoch": 0.47, - "grad_norm": 0.9728464313455532, - "learning_rate": 1.1490694090091375e-05, - "loss": 0.1737, - "step": 9219 - }, - { - "epoch": 0.47, - "grad_norm": 0.8613771294384492, - "learning_rate": 1.1489065503272785e-05, - "loss": 0.1906, - "step": 9220 - }, - { - "epoch": 0.47, - "grad_norm": 0.9827950275426468, - "learning_rate": 1.1487436876063205e-05, - "loss": 0.2, - "step": 9221 - }, - { - "epoch": 0.47, - "grad_norm": 1.253797589125273, - "learning_rate": 1.148580820850682e-05, - "loss": 0.1748, - "step": 9222 - }, - { - "epoch": 0.47, - "grad_norm": 0.92996072269765, - "learning_rate": 1.1484179500647802e-05, - "loss": 0.1813, - "step": 9223 - }, - { - "epoch": 0.47, - "grad_norm": 0.9294885667105391, - "learning_rate": 1.1482550752530332e-05, - "loss": 0.1911, - "step": 9224 - }, - { - "epoch": 0.47, - "grad_norm": 1.4144234683841561, - "learning_rate": 1.148092196419859e-05, - "loss": 0.2001, - "step": 9225 - }, - { - "epoch": 0.47, - "grad_norm": 0.8613645922670157, - "learning_rate": 1.1479293135696755e-05, - "loss": 0.2162, - "step": 9226 - }, - { - "epoch": 0.47, - "grad_norm": 1.0450389190608478, - "learning_rate": 1.1477664267069009e-05, - "loss": 0.1856, - "step": 9227 - }, - { - "epoch": 0.47, - "grad_norm": 0.8930640924927837, - "learning_rate": 1.1476035358359539e-05, - "loss": 0.2097, - "step": 9228 - }, - { - "epoch": 0.47, - "grad_norm": 1.592902215469274, - "learning_rate": 1.1474406409612524e-05, - "loss": 0.1921, - "step": 9229 - }, - { - "epoch": 0.47, - "grad_norm": 0.7904460656923809, - "learning_rate": 1.1472777420872154e-05, - "loss": 0.1891, - "step": 9230 - }, - { - "epoch": 0.47, - "grad_norm": 0.8015309344030134, - "learning_rate": 1.147114839218261e-05, - "loss": 0.1601, - "step": 9231 - }, - { - "epoch": 0.47, - "grad_norm": 0.9705118301556879, - "learning_rate": 1.1469519323588085e-05, - "loss": 0.1639, - "step": 9232 - }, - { - "epoch": 0.47, - "grad_norm": 1.5770038604029533, - "learning_rate": 1.1467890215132767e-05, - "loss": 0.1919, - "step": 9233 - }, - { - "epoch": 0.47, - "grad_norm": 1.1379908559648302, - "learning_rate": 1.146626106686084e-05, - "loss": 0.1739, - "step": 9234 - }, - { - "epoch": 0.47, - "grad_norm": 1.7509321612595246, - "learning_rate": 1.1464631878816502e-05, - "loss": 0.1993, - "step": 9235 - }, - { - "epoch": 0.47, - "grad_norm": 1.1126365561109344, - "learning_rate": 1.1463002651043942e-05, - "loss": 0.1867, - "step": 9236 - }, - { - "epoch": 0.47, - "grad_norm": 0.9436631209359791, - "learning_rate": 1.146137338358735e-05, - "loss": 0.1892, - "step": 9237 - }, - { - "epoch": 0.47, - "grad_norm": 1.293770946685034, - "learning_rate": 1.1459744076490924e-05, - "loss": 0.1957, - "step": 9238 - }, - { - "epoch": 0.47, - "grad_norm": 0.9276318520781688, - "learning_rate": 1.1458114729798855e-05, - "loss": 0.1872, - "step": 9239 - }, - { - "epoch": 0.47, - "grad_norm": 0.6795751167978585, - "learning_rate": 1.1456485343555344e-05, - "loss": 0.1803, - "step": 9240 - }, - { - "epoch": 0.47, - "grad_norm": 0.9264723014647175, - "learning_rate": 1.1454855917804586e-05, - "loss": 0.1718, - "step": 9241 - }, - { - "epoch": 0.47, - "grad_norm": 1.457193211728748, - "learning_rate": 1.145322645259078e-05, - "loss": 0.1898, - "step": 9242 - }, - { - "epoch": 0.47, - "grad_norm": 0.9019050296705318, - "learning_rate": 1.1451596947958122e-05, - "loss": 0.2239, - "step": 9243 - }, - { - "epoch": 0.47, - "grad_norm": 7.865206930755326, - "learning_rate": 1.1449967403950812e-05, - "loss": 0.2017, - "step": 9244 - }, - { - "epoch": 0.47, - "grad_norm": 1.132181096344825, - "learning_rate": 1.1448337820613061e-05, - "loss": 0.1885, - "step": 9245 - }, - { - "epoch": 0.47, - "grad_norm": 1.151784318954177, - "learning_rate": 1.1446708197989061e-05, - "loss": 0.2025, - "step": 9246 - }, - { - "epoch": 0.47, - "grad_norm": 1.0029738228138243, - "learning_rate": 1.144507853612302e-05, - "loss": 0.1855, - "step": 9247 - }, - { - "epoch": 0.47, - "grad_norm": 1.0962947125468563, - "learning_rate": 1.1443448835059141e-05, - "loss": 0.1944, - "step": 9248 - }, - { - "epoch": 0.47, - "grad_norm": 0.8477426463884647, - "learning_rate": 1.144181909484163e-05, - "loss": 0.1753, - "step": 9249 - }, - { - "epoch": 0.47, - "grad_norm": 0.8817457321386621, - "learning_rate": 1.1440189315514698e-05, - "loss": 0.1827, - "step": 9250 - }, - { - "epoch": 0.47, - "grad_norm": 0.9198362005845908, - "learning_rate": 1.143855949712255e-05, - "loss": 0.1758, - "step": 9251 - }, - { - "epoch": 0.47, - "grad_norm": 0.8501168011702994, - "learning_rate": 1.143692963970939e-05, - "loss": 0.1951, - "step": 9252 - }, - { - "epoch": 0.47, - "grad_norm": 1.1709863082016263, - "learning_rate": 1.1435299743319434e-05, - "loss": 0.1998, - "step": 9253 - }, - { - "epoch": 0.47, - "grad_norm": 0.9096201448211303, - "learning_rate": 1.1433669807996894e-05, - "loss": 0.17, - "step": 9254 - }, - { - "epoch": 0.47, - "grad_norm": 1.1348872412188835, - "learning_rate": 1.1432039833785979e-05, - "loss": 0.1884, - "step": 9255 - }, - { - "epoch": 0.47, - "grad_norm": 1.1259494677848945, - "learning_rate": 1.1430409820730902e-05, - "loss": 0.1881, - "step": 9256 - }, - { - "epoch": 0.47, - "grad_norm": 1.1273228094294094, - "learning_rate": 1.1428779768875874e-05, - "loss": 0.1934, - "step": 9257 - }, - { - "epoch": 0.47, - "grad_norm": 1.1033316887349531, - "learning_rate": 1.1427149678265119e-05, - "loss": 0.2156, - "step": 9258 - }, - { - "epoch": 0.47, - "grad_norm": 1.008896724459771, - "learning_rate": 1.1425519548942847e-05, - "loss": 0.2092, - "step": 9259 - }, - { - "epoch": 0.47, - "grad_norm": 1.263669512820033, - "learning_rate": 1.1423889380953277e-05, - "loss": 0.1778, - "step": 9260 - }, - { - "epoch": 0.47, - "grad_norm": 0.9381984779225716, - "learning_rate": 1.1422259174340624e-05, - "loss": 0.1963, - "step": 9261 - }, - { - "epoch": 0.47, - "grad_norm": 1.5088037717808391, - "learning_rate": 1.1420628929149114e-05, - "loss": 0.2038, - "step": 9262 - }, - { - "epoch": 0.47, - "grad_norm": 1.0717096467954101, - "learning_rate": 1.1418998645422963e-05, - "loss": 0.1927, - "step": 9263 - }, - { - "epoch": 0.47, - "grad_norm": 1.193684945593441, - "learning_rate": 1.1417368323206395e-05, - "loss": 0.2046, - "step": 9264 - }, - { - "epoch": 0.47, - "grad_norm": 1.0533682703561398, - "learning_rate": 1.141573796254363e-05, - "loss": 0.1697, - "step": 9265 - }, - { - "epoch": 0.47, - "grad_norm": 1.1840786054105483, - "learning_rate": 1.141410756347889e-05, - "loss": 0.1822, - "step": 9266 - }, - { - "epoch": 0.47, - "grad_norm": 1.749805516924357, - "learning_rate": 1.1412477126056405e-05, - "loss": 0.1979, - "step": 9267 - }, - { - "epoch": 0.47, - "grad_norm": 1.253852439701606, - "learning_rate": 1.14108466503204e-05, - "loss": 0.2176, - "step": 9268 - }, - { - "epoch": 0.47, - "grad_norm": 1.1173364128331582, - "learning_rate": 1.1409216136315097e-05, - "loss": 0.1778, - "step": 9269 - }, - { - "epoch": 0.47, - "grad_norm": 1.035032031791532, - "learning_rate": 1.1407585584084726e-05, - "loss": 0.2161, - "step": 9270 - }, - { - "epoch": 0.47, - "grad_norm": 1.2405865114634105, - "learning_rate": 1.1405954993673522e-05, - "loss": 0.1771, - "step": 9271 - }, - { - "epoch": 0.47, - "grad_norm": 0.9961618122285781, - "learning_rate": 1.1404324365125708e-05, - "loss": 0.193, - "step": 9272 - }, - { - "epoch": 0.47, - "grad_norm": 1.6814547632964016, - "learning_rate": 1.1402693698485512e-05, - "loss": 0.1832, - "step": 9273 - }, - { - "epoch": 0.47, - "grad_norm": 1.2294370918587325, - "learning_rate": 1.1401062993797171e-05, - "loss": 0.1903, - "step": 9274 - }, - { - "epoch": 0.47, - "grad_norm": 1.415889465472221, - "learning_rate": 1.1399432251104918e-05, - "loss": 0.2137, - "step": 9275 - }, - { - "epoch": 0.47, - "grad_norm": 1.0334241329352223, - "learning_rate": 1.1397801470452987e-05, - "loss": 0.196, - "step": 9276 - }, - { - "epoch": 0.47, - "grad_norm": 0.9642912610388481, - "learning_rate": 1.1396170651885613e-05, - "loss": 0.182, - "step": 9277 - }, - { - "epoch": 0.47, - "grad_norm": 1.2648969637697525, - "learning_rate": 1.139453979544703e-05, - "loss": 0.1908, - "step": 9278 - }, - { - "epoch": 0.47, - "grad_norm": 0.8346050383865007, - "learning_rate": 1.1392908901181474e-05, - "loss": 0.176, - "step": 9279 - }, - { - "epoch": 0.47, - "grad_norm": 1.3413644643229863, - "learning_rate": 1.139127796913319e-05, - "loss": 0.1911, - "step": 9280 - }, - { - "epoch": 0.47, - "grad_norm": 0.9490728998996395, - "learning_rate": 1.138964699934641e-05, - "loss": 0.1796, - "step": 9281 - }, - { - "epoch": 0.47, - "grad_norm": 1.0848424621668546, - "learning_rate": 1.1388015991865377e-05, - "loss": 0.1773, - "step": 9282 - }, - { - "epoch": 0.47, - "grad_norm": 1.8101725755414195, - "learning_rate": 1.138638494673433e-05, - "loss": 0.2012, - "step": 9283 - }, - { - "epoch": 0.47, - "grad_norm": 1.3028282362221764, - "learning_rate": 1.1384753863997516e-05, - "loss": 0.1876, - "step": 9284 - }, - { - "epoch": 0.47, - "grad_norm": 1.1140020330220612, - "learning_rate": 1.1383122743699173e-05, - "loss": 0.2109, - "step": 9285 - }, - { - "epoch": 0.47, - "grad_norm": 1.1084320608542635, - "learning_rate": 1.1381491585883548e-05, - "loss": 0.1971, - "step": 9286 - }, - { - "epoch": 0.47, - "grad_norm": 1.3256072713621745, - "learning_rate": 1.1379860390594888e-05, - "loss": 0.2103, - "step": 9287 - }, - { - "epoch": 0.47, - "grad_norm": 0.9168074800231482, - "learning_rate": 1.1378229157877432e-05, - "loss": 0.1789, - "step": 9288 - }, - { - "epoch": 0.47, - "grad_norm": 0.8194316208116729, - "learning_rate": 1.1376597887775438e-05, - "loss": 0.1948, - "step": 9289 - }, - { - "epoch": 0.47, - "grad_norm": 0.98526556813502, - "learning_rate": 1.1374966580333147e-05, - "loss": 0.1984, - "step": 9290 - }, - { - "epoch": 0.47, - "grad_norm": 2.061805734811115, - "learning_rate": 1.1373335235594809e-05, - "loss": 0.2147, - "step": 9291 - }, - { - "epoch": 0.47, - "grad_norm": 0.952970495944534, - "learning_rate": 1.1371703853604672e-05, - "loss": 0.1879, - "step": 9292 - }, - { - "epoch": 0.47, - "grad_norm": 1.0049105878186886, - "learning_rate": 1.1370072434406993e-05, - "loss": 0.2125, - "step": 9293 - }, - { - "epoch": 0.47, - "grad_norm": 1.180288824955952, - "learning_rate": 1.1368440978046022e-05, - "loss": 0.1977, - "step": 9294 - }, - { - "epoch": 0.47, - "grad_norm": 0.9273278457059216, - "learning_rate": 1.1366809484566015e-05, - "loss": 0.1786, - "step": 9295 - }, - { - "epoch": 0.47, - "grad_norm": 0.7298925974661218, - "learning_rate": 1.136517795401122e-05, - "loss": 0.18, - "step": 9296 - }, - { - "epoch": 0.47, - "grad_norm": 0.9384320818110587, - "learning_rate": 1.1363546386425895e-05, - "loss": 0.184, - "step": 9297 - }, - { - "epoch": 0.47, - "grad_norm": 0.7859695205526979, - "learning_rate": 1.13619147818543e-05, - "loss": 0.167, - "step": 9298 - }, - { - "epoch": 0.47, - "grad_norm": 0.9421177588324476, - "learning_rate": 1.1360283140340688e-05, - "loss": 0.1688, - "step": 9299 - }, - { - "epoch": 0.47, - "grad_norm": 1.2079617951803552, - "learning_rate": 1.1358651461929319e-05, - "loss": 0.1966, - "step": 9300 - }, - { - "epoch": 0.47, - "grad_norm": 0.817762563013867, - "learning_rate": 1.1357019746664453e-05, - "loss": 0.1757, - "step": 9301 - }, - { - "epoch": 0.47, - "grad_norm": 2.1866910568283346, - "learning_rate": 1.135538799459035e-05, - "loss": 0.199, - "step": 9302 - }, - { - "epoch": 0.47, - "grad_norm": 0.9677330185793297, - "learning_rate": 1.1353756205751272e-05, - "loss": 0.2217, - "step": 9303 - }, - { - "epoch": 0.47, - "grad_norm": 1.7153143992431847, - "learning_rate": 1.1352124380191479e-05, - "loss": 0.1836, - "step": 9304 - }, - { - "epoch": 0.47, - "grad_norm": 0.9525697878726538, - "learning_rate": 1.1350492517955234e-05, - "loss": 0.1999, - "step": 9305 - }, - { - "epoch": 0.47, - "grad_norm": 0.9008507657210286, - "learning_rate": 1.1348860619086808e-05, - "loss": 0.1982, - "step": 9306 - }, - { - "epoch": 0.47, - "grad_norm": 0.9362961925883473, - "learning_rate": 1.134722868363046e-05, - "loss": 0.1963, - "step": 9307 - }, - { - "epoch": 0.47, - "grad_norm": 0.8781050347037737, - "learning_rate": 1.1345596711630456e-05, - "loss": 0.1846, - "step": 9308 - }, - { - "epoch": 0.47, - "grad_norm": 0.9572232189569388, - "learning_rate": 1.1343964703131065e-05, - "loss": 0.1866, - "step": 9309 - }, - { - "epoch": 0.47, - "grad_norm": 1.3214437628723248, - "learning_rate": 1.1342332658176556e-05, - "loss": 0.1705, - "step": 9310 - }, - { - "epoch": 0.47, - "grad_norm": 1.7267957828656673, - "learning_rate": 1.1340700576811198e-05, - "loss": 0.1847, - "step": 9311 - }, - { - "epoch": 0.47, - "grad_norm": 1.6346922718412682, - "learning_rate": 1.1339068459079262e-05, - "loss": 0.1748, - "step": 9312 - }, - { - "epoch": 0.47, - "grad_norm": 0.7569900182507847, - "learning_rate": 1.1337436305025019e-05, - "loss": 0.177, - "step": 9313 - }, - { - "epoch": 0.47, - "grad_norm": 0.955594887499051, - "learning_rate": 1.1335804114692737e-05, - "loss": 0.2005, - "step": 9314 - }, - { - "epoch": 0.47, - "grad_norm": 2.9007643934387923, - "learning_rate": 1.1334171888126698e-05, - "loss": 0.192, - "step": 9315 - }, - { - "epoch": 0.47, - "grad_norm": 1.1458182580341056, - "learning_rate": 1.1332539625371166e-05, - "loss": 0.179, - "step": 9316 - }, - { - "epoch": 0.47, - "grad_norm": 1.346045821307898, - "learning_rate": 1.1330907326470426e-05, - "loss": 0.1999, - "step": 9317 - }, - { - "epoch": 0.47, - "grad_norm": 1.1606367702267062, - "learning_rate": 1.1329274991468747e-05, - "loss": 0.1847, - "step": 9318 - }, - { - "epoch": 0.47, - "grad_norm": 1.1289045966659277, - "learning_rate": 1.1327642620410408e-05, - "loss": 0.1875, - "step": 9319 - }, - { - "epoch": 0.47, - "grad_norm": 0.8345586647548614, - "learning_rate": 1.1326010213339688e-05, - "loss": 0.1749, - "step": 9320 - }, - { - "epoch": 0.47, - "grad_norm": 1.4714817836575558, - "learning_rate": 1.132437777030087e-05, - "loss": 0.2186, - "step": 9321 - }, - { - "epoch": 0.47, - "grad_norm": 0.7114676751705088, - "learning_rate": 1.1322745291338226e-05, - "loss": 0.1805, - "step": 9322 - }, - { - "epoch": 0.47, - "grad_norm": 1.1326550840352263, - "learning_rate": 1.1321112776496042e-05, - "loss": 0.1794, - "step": 9323 - }, - { - "epoch": 0.47, - "grad_norm": 0.8232979729290985, - "learning_rate": 1.1319480225818602e-05, - "loss": 0.1973, - "step": 9324 - }, - { - "epoch": 0.47, - "grad_norm": 0.9378338038834931, - "learning_rate": 1.1317847639350186e-05, - "loss": 0.1777, - "step": 9325 - }, - { - "epoch": 0.47, - "grad_norm": 0.927375096532382, - "learning_rate": 1.1316215017135076e-05, - "loss": 0.203, - "step": 9326 - }, - { - "epoch": 0.47, - "grad_norm": 1.0456569263599929, - "learning_rate": 1.1314582359217558e-05, - "loss": 0.2025, - "step": 9327 - }, - { - "epoch": 0.47, - "grad_norm": 1.2830682932161301, - "learning_rate": 1.1312949665641923e-05, - "loss": 0.1987, - "step": 9328 - }, - { - "epoch": 0.47, - "grad_norm": 0.80481172348749, - "learning_rate": 1.1311316936452452e-05, - "loss": 0.1925, - "step": 9329 - }, - { - "epoch": 0.47, - "grad_norm": 0.9271681235067764, - "learning_rate": 1.1309684171693435e-05, - "loss": 0.1907, - "step": 9330 - }, - { - "epoch": 0.47, - "grad_norm": 0.8884154679752989, - "learning_rate": 1.1308051371409162e-05, - "loss": 0.1738, - "step": 9331 - }, - { - "epoch": 0.47, - "grad_norm": 1.2122619537768777, - "learning_rate": 1.1306418535643922e-05, - "loss": 0.1828, - "step": 9332 - }, - { - "epoch": 0.47, - "grad_norm": 1.0197330364239467, - "learning_rate": 1.1304785664442003e-05, - "loss": 0.1931, - "step": 9333 - }, - { - "epoch": 0.47, - "grad_norm": 0.9782015436799009, - "learning_rate": 1.1303152757847702e-05, - "loss": 0.2035, - "step": 9334 - }, - { - "epoch": 0.47, - "grad_norm": 0.9159995398951478, - "learning_rate": 1.1301519815905309e-05, - "loss": 0.1963, - "step": 9335 - }, - { - "epoch": 0.47, - "grad_norm": 0.9347505438490339, - "learning_rate": 1.1299886838659114e-05, - "loss": 0.1951, - "step": 9336 - }, - { - "epoch": 0.47, - "grad_norm": 1.0526098163364204, - "learning_rate": 1.1298253826153415e-05, - "loss": 0.1929, - "step": 9337 - }, - { - "epoch": 0.47, - "grad_norm": 1.193015880699807, - "learning_rate": 1.1296620778432512e-05, - "loss": 0.1911, - "step": 9338 - }, - { - "epoch": 0.47, - "grad_norm": 1.4693323716969673, - "learning_rate": 1.1294987695540695e-05, - "loss": 0.1605, - "step": 9339 - }, - { - "epoch": 0.47, - "grad_norm": 1.2914714608353897, - "learning_rate": 1.1293354577522264e-05, - "loss": 0.1874, - "step": 9340 - }, - { - "epoch": 0.48, - "grad_norm": 0.8955534658899678, - "learning_rate": 1.1291721424421518e-05, - "loss": 0.1762, - "step": 9341 - }, - { - "epoch": 0.48, - "grad_norm": 0.9457021966906232, - "learning_rate": 1.1290088236282752e-05, - "loss": 0.1945, - "step": 9342 - }, - { - "epoch": 0.48, - "grad_norm": 0.9463505398747077, - "learning_rate": 1.1288455013150275e-05, - "loss": 0.1811, - "step": 9343 - }, - { - "epoch": 0.48, - "grad_norm": 0.8537062086454555, - "learning_rate": 1.1286821755068375e-05, - "loss": 0.1776, - "step": 9344 - }, - { - "epoch": 0.48, - "grad_norm": 1.1395762597930732, - "learning_rate": 1.128518846208137e-05, - "loss": 0.1853, - "step": 9345 - }, - { - "epoch": 0.48, - "grad_norm": 0.8332298531890091, - "learning_rate": 1.128355513423355e-05, - "loss": 0.1726, - "step": 9346 - }, - { - "epoch": 0.48, - "grad_norm": 1.075619778357324, - "learning_rate": 1.1281921771569229e-05, - "loss": 0.1671, - "step": 9347 - }, - { - "epoch": 0.48, - "grad_norm": 1.1719243199926213, - "learning_rate": 1.1280288374132704e-05, - "loss": 0.1979, - "step": 9348 - }, - { - "epoch": 0.48, - "grad_norm": 1.1759049357909859, - "learning_rate": 1.1278654941968285e-05, - "loss": 0.1915, - "step": 9349 - }, - { - "epoch": 0.48, - "grad_norm": 0.8400856096808293, - "learning_rate": 1.127702147512028e-05, - "loss": 0.1709, - "step": 9350 - }, - { - "epoch": 0.48, - "grad_norm": 1.1444023227495415, - "learning_rate": 1.1275387973632994e-05, - "loss": 0.2173, - "step": 9351 - }, - { - "epoch": 0.48, - "grad_norm": 1.0254301032804167, - "learning_rate": 1.1273754437550738e-05, - "loss": 0.1826, - "step": 9352 - }, - { - "epoch": 0.48, - "grad_norm": 1.30260397154843, - "learning_rate": 1.1272120866917821e-05, - "loss": 0.1936, - "step": 9353 - }, - { - "epoch": 0.48, - "grad_norm": 0.8536684491265273, - "learning_rate": 1.1270487261778554e-05, - "loss": 0.1925, - "step": 9354 - }, - { - "epoch": 0.48, - "grad_norm": 0.9385710112540513, - "learning_rate": 1.1268853622177248e-05, - "loss": 0.1765, - "step": 9355 - }, - { - "epoch": 0.48, - "grad_norm": 9.90856099719088, - "learning_rate": 1.1267219948158215e-05, - "loss": 0.201, - "step": 9356 - }, - { - "epoch": 0.48, - "grad_norm": 0.9892179146824301, - "learning_rate": 1.1265586239765772e-05, - "loss": 0.2016, - "step": 9357 - }, - { - "epoch": 0.48, - "grad_norm": 2.2092280855303295, - "learning_rate": 1.1263952497044225e-05, - "loss": 0.1823, - "step": 9358 - }, - { - "epoch": 0.48, - "grad_norm": 1.1156350393611687, - "learning_rate": 1.1262318720037902e-05, - "loss": 0.1772, - "step": 9359 - }, - { - "epoch": 0.48, - "grad_norm": 0.9048227767853564, - "learning_rate": 1.1260684908791109e-05, - "loss": 0.1869, - "step": 9360 - }, - { - "epoch": 0.48, - "grad_norm": 1.0731084504573483, - "learning_rate": 1.1259051063348167e-05, - "loss": 0.1601, - "step": 9361 - }, - { - "epoch": 0.48, - "grad_norm": 1.4848992993314354, - "learning_rate": 1.1257417183753391e-05, - "loss": 0.1813, - "step": 9362 - }, - { - "epoch": 0.48, - "grad_norm": 0.987609588215139, - "learning_rate": 1.1255783270051105e-05, - "loss": 0.1609, - "step": 9363 - }, - { - "epoch": 0.48, - "grad_norm": 1.1217420741641009, - "learning_rate": 1.125414932228563e-05, - "loss": 0.184, - "step": 9364 - }, - { - "epoch": 0.48, - "grad_norm": 1.0911673067257806, - "learning_rate": 1.1252515340501282e-05, - "loss": 0.1864, - "step": 9365 - }, - { - "epoch": 0.48, - "grad_norm": 1.2569941304967243, - "learning_rate": 1.1250881324742382e-05, - "loss": 0.2255, - "step": 9366 - }, - { - "epoch": 0.48, - "grad_norm": 0.9116639119858985, - "learning_rate": 1.1249247275053256e-05, - "loss": 0.1844, - "step": 9367 - }, - { - "epoch": 0.48, - "grad_norm": 1.083187498682193, - "learning_rate": 1.1247613191478231e-05, - "loss": 0.181, - "step": 9368 - }, - { - "epoch": 0.48, - "grad_norm": 1.2340382247496755, - "learning_rate": 1.1245979074061623e-05, - "loss": 0.1796, - "step": 9369 - }, - { - "epoch": 0.48, - "grad_norm": 1.2310194776736283, - "learning_rate": 1.1244344922847765e-05, - "loss": 0.2008, - "step": 9370 - }, - { - "epoch": 0.48, - "grad_norm": 1.0514258589881065, - "learning_rate": 1.1242710737880979e-05, - "loss": 0.2235, - "step": 9371 - }, - { - "epoch": 0.48, - "grad_norm": 1.0733065946367741, - "learning_rate": 1.1241076519205595e-05, - "loss": 0.1793, - "step": 9372 - }, - { - "epoch": 0.48, - "grad_norm": 1.6731214337021334, - "learning_rate": 1.123944226686594e-05, - "loss": 0.2054, - "step": 9373 - }, - { - "epoch": 0.48, - "grad_norm": 1.0373545401706548, - "learning_rate": 1.1237807980906346e-05, - "loss": 0.1899, - "step": 9374 - }, - { - "epoch": 0.48, - "grad_norm": 0.8567521447141507, - "learning_rate": 1.1236173661371139e-05, - "loss": 0.1926, - "step": 9375 - }, - { - "epoch": 0.48, - "grad_norm": 1.6535204425524637, - "learning_rate": 1.123453930830465e-05, - "loss": 0.1908, - "step": 9376 - }, - { - "epoch": 0.48, - "grad_norm": 1.6849632397852876, - "learning_rate": 1.1232904921751216e-05, - "loss": 0.2385, - "step": 9377 - }, - { - "epoch": 0.48, - "grad_norm": 0.9553081200938266, - "learning_rate": 1.1231270501755162e-05, - "loss": 0.1924, - "step": 9378 - }, - { - "epoch": 0.48, - "grad_norm": 0.9561839368777103, - "learning_rate": 1.1229636048360828e-05, - "loss": 0.2014, - "step": 9379 - }, - { - "epoch": 0.48, - "grad_norm": 0.9692146046944835, - "learning_rate": 1.1228001561612547e-05, - "loss": 0.1691, - "step": 9380 - }, - { - "epoch": 0.48, - "grad_norm": 1.0657126735953355, - "learning_rate": 1.1226367041554655e-05, - "loss": 0.1835, - "step": 9381 - }, - { - "epoch": 0.48, - "grad_norm": 0.9752036307321181, - "learning_rate": 1.1224732488231487e-05, - "loss": 0.1828, - "step": 9382 - }, - { - "epoch": 0.48, - "grad_norm": 0.9659786017577724, - "learning_rate": 1.1223097901687382e-05, - "loss": 0.1909, - "step": 9383 - }, - { - "epoch": 0.48, - "grad_norm": 0.8918016838091298, - "learning_rate": 1.1221463281966673e-05, - "loss": 0.1661, - "step": 9384 - }, - { - "epoch": 0.48, - "grad_norm": 0.8987191636079584, - "learning_rate": 1.1219828629113707e-05, - "loss": 0.1765, - "step": 9385 - }, - { - "epoch": 0.48, - "grad_norm": 1.45173704218604, - "learning_rate": 1.1218193943172821e-05, - "loss": 0.2047, - "step": 9386 - }, - { - "epoch": 0.48, - "grad_norm": 1.6950586457712218, - "learning_rate": 1.1216559224188355e-05, - "loss": 0.2014, - "step": 9387 - }, - { - "epoch": 0.48, - "grad_norm": 1.0394831725837366, - "learning_rate": 1.1214924472204651e-05, - "loss": 0.1983, - "step": 9388 - }, - { - "epoch": 0.48, - "grad_norm": 1.2175066037402202, - "learning_rate": 1.1213289687266052e-05, - "loss": 0.1872, - "step": 9389 - }, - { - "epoch": 0.48, - "grad_norm": 1.3022976481634068, - "learning_rate": 1.1211654869416901e-05, - "loss": 0.2173, - "step": 9390 - }, - { - "epoch": 0.48, - "grad_norm": 1.027564229058217, - "learning_rate": 1.1210020018701546e-05, - "loss": 0.1745, - "step": 9391 - }, - { - "epoch": 0.48, - "grad_norm": 1.1945177307533967, - "learning_rate": 1.1208385135164329e-05, - "loss": 0.2103, - "step": 9392 - }, - { - "epoch": 0.48, - "grad_norm": 0.8846131409121079, - "learning_rate": 1.120675021884959e-05, - "loss": 0.1483, - "step": 9393 - }, - { - "epoch": 0.48, - "grad_norm": 1.012946214427254, - "learning_rate": 1.1205115269801695e-05, - "loss": 0.1726, - "step": 9394 - }, - { - "epoch": 0.48, - "grad_norm": 1.4632348655698826, - "learning_rate": 1.1203480288064974e-05, - "loss": 0.1836, - "step": 9395 - }, - { - "epoch": 0.48, - "grad_norm": 1.2870539131782863, - "learning_rate": 1.1201845273683782e-05, - "loss": 0.1799, - "step": 9396 - }, - { - "epoch": 0.48, - "grad_norm": 0.8808388430584856, - "learning_rate": 1.1200210226702469e-05, - "loss": 0.2151, - "step": 9397 - }, - { - "epoch": 0.48, - "grad_norm": 0.9740500347235727, - "learning_rate": 1.1198575147165384e-05, - "loss": 0.177, - "step": 9398 - }, - { - "epoch": 0.48, - "grad_norm": 1.5208257341292488, - "learning_rate": 1.1196940035116884e-05, - "loss": 0.1716, - "step": 9399 - }, - { - "epoch": 0.48, - "grad_norm": 1.035575690228776, - "learning_rate": 1.1195304890601317e-05, - "loss": 0.2063, - "step": 9400 - }, - { - "epoch": 0.48, - "grad_norm": 1.012321287351004, - "learning_rate": 1.1193669713663039e-05, - "loss": 0.1733, - "step": 9401 - }, - { - "epoch": 0.48, - "grad_norm": 1.3756052531505762, - "learning_rate": 1.1192034504346397e-05, - "loss": 0.1951, - "step": 9402 - }, - { - "epoch": 0.48, - "grad_norm": 2.4966464979842127, - "learning_rate": 1.1190399262695757e-05, - "loss": 0.1959, - "step": 9403 - }, - { - "epoch": 0.48, - "grad_norm": 0.9375503280340816, - "learning_rate": 1.1188763988755467e-05, - "loss": 0.183, - "step": 9404 - }, - { - "epoch": 0.48, - "grad_norm": 1.1260862585486986, - "learning_rate": 1.1187128682569888e-05, - "loss": 0.1945, - "step": 9405 - }, - { - "epoch": 0.48, - "grad_norm": 0.9728000538819264, - "learning_rate": 1.1185493344183375e-05, - "loss": 0.1859, - "step": 9406 - }, - { - "epoch": 0.48, - "grad_norm": 1.0834736784120405, - "learning_rate": 1.1183857973640289e-05, - "loss": 0.1855, - "step": 9407 - }, - { - "epoch": 0.48, - "grad_norm": 0.9348768976215369, - "learning_rate": 1.118222257098499e-05, - "loss": 0.1856, - "step": 9408 - }, - { - "epoch": 0.48, - "grad_norm": 0.9706361044266855, - "learning_rate": 1.1180587136261835e-05, - "loss": 0.1889, - "step": 9409 - }, - { - "epoch": 0.48, - "grad_norm": 0.9726258225414485, - "learning_rate": 1.117895166951519e-05, - "loss": 0.1734, - "step": 9410 - }, - { - "epoch": 0.48, - "grad_norm": 1.0166944674563385, - "learning_rate": 1.1177316170789412e-05, - "loss": 0.1909, - "step": 9411 - }, - { - "epoch": 0.48, - "grad_norm": 0.8494335568980105, - "learning_rate": 1.1175680640128867e-05, - "loss": 0.2172, - "step": 9412 - }, - { - "epoch": 0.48, - "grad_norm": 0.9503780665887606, - "learning_rate": 1.117404507757792e-05, - "loss": 0.1835, - "step": 9413 - }, - { - "epoch": 0.48, - "grad_norm": 1.0304138091835113, - "learning_rate": 1.1172409483180929e-05, - "loss": 0.1669, - "step": 9414 - }, - { - "epoch": 0.48, - "grad_norm": 1.3386853123357887, - "learning_rate": 1.1170773856982268e-05, - "loss": 0.1824, - "step": 9415 - }, - { - "epoch": 0.48, - "grad_norm": 2.192617945116487, - "learning_rate": 1.11691381990263e-05, - "loss": 0.1875, - "step": 9416 - }, - { - "epoch": 0.48, - "grad_norm": 1.044363310431338, - "learning_rate": 1.1167502509357393e-05, - "loss": 0.1805, - "step": 9417 - }, - { - "epoch": 0.48, - "grad_norm": 0.8158167248839474, - "learning_rate": 1.1165866788019912e-05, - "loss": 0.1921, - "step": 9418 - }, - { - "epoch": 0.48, - "grad_norm": 1.4395607267781292, - "learning_rate": 1.1164231035058228e-05, - "loss": 0.2025, - "step": 9419 - }, - { - "epoch": 0.48, - "grad_norm": 1.015506286236725, - "learning_rate": 1.1162595250516715e-05, - "loss": 0.1666, - "step": 9420 - }, - { - "epoch": 0.48, - "grad_norm": 1.5064817417365441, - "learning_rate": 1.116095943443974e-05, - "loss": 0.1904, - "step": 9421 - }, - { - "epoch": 0.48, - "grad_norm": 1.7158616883607665, - "learning_rate": 1.1159323586871673e-05, - "loss": 0.1879, - "step": 9422 - }, - { - "epoch": 0.48, - "grad_norm": 0.8696293588736635, - "learning_rate": 1.1157687707856888e-05, - "loss": 0.1676, - "step": 9423 - }, - { - "epoch": 0.48, - "grad_norm": 1.2717209787363684, - "learning_rate": 1.1156051797439757e-05, - "loss": 0.177, - "step": 9424 - }, - { - "epoch": 0.48, - "grad_norm": 1.0687536577180041, - "learning_rate": 1.1154415855664657e-05, - "loss": 0.2175, - "step": 9425 - }, - { - "epoch": 0.48, - "grad_norm": 1.487045634947814, - "learning_rate": 1.1152779882575964e-05, - "loss": 0.1979, - "step": 9426 - }, - { - "epoch": 0.48, - "grad_norm": 1.247667420377959, - "learning_rate": 1.115114387821805e-05, - "loss": 0.2063, - "step": 9427 - }, - { - "epoch": 0.48, - "grad_norm": 0.9232023822380149, - "learning_rate": 1.1149507842635293e-05, - "loss": 0.193, - "step": 9428 - }, - { - "epoch": 0.48, - "grad_norm": 1.0482885930779677, - "learning_rate": 1.1147871775872072e-05, - "loss": 0.1661, - "step": 9429 - }, - { - "epoch": 0.48, - "grad_norm": 0.9521862144910216, - "learning_rate": 1.1146235677972765e-05, - "loss": 0.1883, - "step": 9430 - }, - { - "epoch": 0.48, - "grad_norm": 0.8533427250808602, - "learning_rate": 1.1144599548981749e-05, - "loss": 0.2021, - "step": 9431 - }, - { - "epoch": 0.48, - "grad_norm": 1.0419278456030308, - "learning_rate": 1.1142963388943405e-05, - "loss": 0.1666, - "step": 9432 - }, - { - "epoch": 0.48, - "grad_norm": 0.7751955248876855, - "learning_rate": 1.1141327197902114e-05, - "loss": 0.163, - "step": 9433 - }, - { - "epoch": 0.48, - "grad_norm": 0.8975275086310701, - "learning_rate": 1.113969097590226e-05, - "loss": 0.1881, - "step": 9434 - }, - { - "epoch": 0.48, - "grad_norm": 0.92585182171884, - "learning_rate": 1.1138054722988223e-05, - "loss": 0.2046, - "step": 9435 - }, - { - "epoch": 0.48, - "grad_norm": 0.7624586845530239, - "learning_rate": 1.1136418439204388e-05, - "loss": 0.1841, - "step": 9436 - }, - { - "epoch": 0.48, - "grad_norm": 6.557712694484254, - "learning_rate": 1.1134782124595136e-05, - "loss": 0.1889, - "step": 9437 - }, - { - "epoch": 0.48, - "grad_norm": 1.0712559355250177, - "learning_rate": 1.113314577920486e-05, - "loss": 0.204, - "step": 9438 - }, - { - "epoch": 0.48, - "grad_norm": 0.9869076526938929, - "learning_rate": 1.1131509403077936e-05, - "loss": 0.1832, - "step": 9439 - }, - { - "epoch": 0.48, - "grad_norm": 1.4004004264938446, - "learning_rate": 1.1129872996258757e-05, - "loss": 0.2028, - "step": 9440 - }, - { - "epoch": 0.48, - "grad_norm": 0.8442967787105335, - "learning_rate": 1.1128236558791708e-05, - "loss": 0.1941, - "step": 9441 - }, - { - "epoch": 0.48, - "grad_norm": 0.9274354478195815, - "learning_rate": 1.112660009072118e-05, - "loss": 0.1768, - "step": 9442 - }, - { - "epoch": 0.48, - "grad_norm": 1.130228477397798, - "learning_rate": 1.1124963592091563e-05, - "loss": 0.1773, - "step": 9443 - }, - { - "epoch": 0.48, - "grad_norm": 0.9728083311241017, - "learning_rate": 1.1123327062947243e-05, - "loss": 0.2137, - "step": 9444 - }, - { - "epoch": 0.48, - "grad_norm": 0.9970451783684262, - "learning_rate": 1.1121690503332613e-05, - "loss": 0.1762, - "step": 9445 - }, - { - "epoch": 0.48, - "grad_norm": 0.8959882886040591, - "learning_rate": 1.1120053913292066e-05, - "loss": 0.1735, - "step": 9446 - }, - { - "epoch": 0.48, - "grad_norm": 1.1404202161163575, - "learning_rate": 1.1118417292869992e-05, - "loss": 0.2025, - "step": 9447 - }, - { - "epoch": 0.48, - "grad_norm": 0.9352040327694172, - "learning_rate": 1.1116780642110785e-05, - "loss": 0.1955, - "step": 9448 - }, - { - "epoch": 0.48, - "grad_norm": 1.3641165860043831, - "learning_rate": 1.1115143961058843e-05, - "loss": 0.182, - "step": 9449 - }, - { - "epoch": 0.48, - "grad_norm": 1.0398020609796403, - "learning_rate": 1.1113507249758553e-05, - "loss": 0.1765, - "step": 9450 - }, - { - "epoch": 0.48, - "grad_norm": 1.6875637408914828, - "learning_rate": 1.111187050825432e-05, - "loss": 0.1832, - "step": 9451 - }, - { - "epoch": 0.48, - "grad_norm": 1.0012695602418653, - "learning_rate": 1.1110233736590535e-05, - "loss": 0.1798, - "step": 9452 - }, - { - "epoch": 0.48, - "grad_norm": 1.137091287242088, - "learning_rate": 1.1108596934811598e-05, - "loss": 0.2062, - "step": 9453 - }, - { - "epoch": 0.48, - "grad_norm": 1.073121214044263, - "learning_rate": 1.1106960102961906e-05, - "loss": 0.1909, - "step": 9454 - }, - { - "epoch": 0.48, - "grad_norm": 0.919479504533419, - "learning_rate": 1.1105323241085856e-05, - "loss": 0.1874, - "step": 9455 - }, - { - "epoch": 0.48, - "grad_norm": 1.0339051802221684, - "learning_rate": 1.1103686349227856e-05, - "loss": 0.1888, - "step": 9456 - }, - { - "epoch": 0.48, - "grad_norm": 0.9920965654107798, - "learning_rate": 1.11020494274323e-05, - "loss": 0.1766, - "step": 9457 - }, - { - "epoch": 0.48, - "grad_norm": 1.091572426986709, - "learning_rate": 1.1100412475743589e-05, - "loss": 0.198, - "step": 9458 - }, - { - "epoch": 0.48, - "grad_norm": 0.9002278553615146, - "learning_rate": 1.1098775494206126e-05, - "loss": 0.1809, - "step": 9459 - }, - { - "epoch": 0.48, - "grad_norm": 0.9662421903169365, - "learning_rate": 1.109713848286432e-05, - "loss": 0.1869, - "step": 9460 - }, - { - "epoch": 0.48, - "grad_norm": 1.0745650451858144, - "learning_rate": 1.1095501441762568e-05, - "loss": 0.1761, - "step": 9461 - }, - { - "epoch": 0.48, - "grad_norm": 0.6982312184914814, - "learning_rate": 1.109386437094528e-05, - "loss": 0.1811, - "step": 9462 - }, - { - "epoch": 0.48, - "grad_norm": 0.8952950825627658, - "learning_rate": 1.1092227270456857e-05, - "loss": 0.1971, - "step": 9463 - }, - { - "epoch": 0.48, - "grad_norm": 1.1015031040313652, - "learning_rate": 1.1090590140341709e-05, - "loss": 0.2004, - "step": 9464 - }, - { - "epoch": 0.48, - "grad_norm": 0.8780746732320548, - "learning_rate": 1.1088952980644242e-05, - "loss": 0.1639, - "step": 9465 - }, - { - "epoch": 0.48, - "grad_norm": 1.0166800530835955, - "learning_rate": 1.1087315791408864e-05, - "loss": 0.1938, - "step": 9466 - }, - { - "epoch": 0.48, - "grad_norm": 0.893028751235322, - "learning_rate": 1.1085678572679978e-05, - "loss": 0.1791, - "step": 9467 - }, - { - "epoch": 0.48, - "grad_norm": 2.1214063958853724, - "learning_rate": 1.1084041324502006e-05, - "loss": 0.1951, - "step": 9468 - }, - { - "epoch": 0.48, - "grad_norm": 0.9458754700189468, - "learning_rate": 1.108240404691935e-05, - "loss": 0.1844, - "step": 9469 - }, - { - "epoch": 0.48, - "grad_norm": 1.4094387971337061, - "learning_rate": 1.1080766739976424e-05, - "loss": 0.2075, - "step": 9470 - }, - { - "epoch": 0.48, - "grad_norm": 0.9267791702679756, - "learning_rate": 1.1079129403717639e-05, - "loss": 0.1891, - "step": 9471 - }, - { - "epoch": 0.48, - "grad_norm": 3.480391044904194, - "learning_rate": 1.1077492038187403e-05, - "loss": 0.2007, - "step": 9472 - }, - { - "epoch": 0.48, - "grad_norm": 1.2714801697325204, - "learning_rate": 1.107585464343014e-05, - "loss": 0.1809, - "step": 9473 - }, - { - "epoch": 0.48, - "grad_norm": 1.0151298523185912, - "learning_rate": 1.1074217219490258e-05, - "loss": 0.1926, - "step": 9474 - }, - { - "epoch": 0.48, - "grad_norm": 1.4766124566730703, - "learning_rate": 1.1072579766412172e-05, - "loss": 0.2004, - "step": 9475 - }, - { - "epoch": 0.48, - "grad_norm": 1.0682720487733304, - "learning_rate": 1.10709422842403e-05, - "loss": 0.1746, - "step": 9476 - }, - { - "epoch": 0.48, - "grad_norm": 1.0623812204356664, - "learning_rate": 1.1069304773019058e-05, - "loss": 0.2052, - "step": 9477 - }, - { - "epoch": 0.48, - "grad_norm": 0.98325888769799, - "learning_rate": 1.1067667232792864e-05, - "loss": 0.1969, - "step": 9478 - }, - { - "epoch": 0.48, - "grad_norm": 1.249394726179517, - "learning_rate": 1.1066029663606138e-05, - "loss": 0.1765, - "step": 9479 - }, - { - "epoch": 0.48, - "grad_norm": 1.1215393168986563, - "learning_rate": 1.1064392065503294e-05, - "loss": 0.2001, - "step": 9480 - }, - { - "epoch": 0.48, - "grad_norm": 0.9950671645201333, - "learning_rate": 1.1062754438528758e-05, - "loss": 0.1866, - "step": 9481 - }, - { - "epoch": 0.48, - "grad_norm": 1.4252768997024146, - "learning_rate": 1.1061116782726947e-05, - "loss": 0.2047, - "step": 9482 - }, - { - "epoch": 0.48, - "grad_norm": 1.6916831687929794, - "learning_rate": 1.1059479098142281e-05, - "loss": 0.1886, - "step": 9483 - }, - { - "epoch": 0.48, - "grad_norm": 1.298294711202152, - "learning_rate": 1.1057841384819185e-05, - "loss": 0.1986, - "step": 9484 - }, - { - "epoch": 0.48, - "grad_norm": 0.9317317237057114, - "learning_rate": 1.1056203642802081e-05, - "loss": 0.1926, - "step": 9485 - }, - { - "epoch": 0.48, - "grad_norm": 0.8458105659558617, - "learning_rate": 1.1054565872135397e-05, - "loss": 0.189, - "step": 9486 - }, - { - "epoch": 0.48, - "grad_norm": 1.2018879195347907, - "learning_rate": 1.1052928072863552e-05, - "loss": 0.1777, - "step": 9487 - }, - { - "epoch": 0.48, - "grad_norm": 0.894558397914112, - "learning_rate": 1.1051290245030975e-05, - "loss": 0.1987, - "step": 9488 - }, - { - "epoch": 0.48, - "grad_norm": 1.1740428506485563, - "learning_rate": 1.1049652388682088e-05, - "loss": 0.1954, - "step": 9489 - }, - { - "epoch": 0.48, - "grad_norm": 0.8405196629069694, - "learning_rate": 1.1048014503861321e-05, - "loss": 0.1678, - "step": 9490 - }, - { - "epoch": 0.48, - "grad_norm": 1.025401225748899, - "learning_rate": 1.1046376590613103e-05, - "loss": 0.1766, - "step": 9491 - }, - { - "epoch": 0.48, - "grad_norm": 1.2048673707118842, - "learning_rate": 1.104473864898186e-05, - "loss": 0.2014, - "step": 9492 - }, - { - "epoch": 0.48, - "grad_norm": 1.2694804452467194, - "learning_rate": 1.1043100679012025e-05, - "loss": 0.1935, - "step": 9493 - }, - { - "epoch": 0.48, - "grad_norm": 0.8965625555710108, - "learning_rate": 1.104146268074802e-05, - "loss": 0.1854, - "step": 9494 - }, - { - "epoch": 0.48, - "grad_norm": 0.9878495481703905, - "learning_rate": 1.1039824654234286e-05, - "loss": 0.1748, - "step": 9495 - }, - { - "epoch": 0.48, - "grad_norm": 1.3167225063683237, - "learning_rate": 1.1038186599515247e-05, - "loss": 0.2079, - "step": 9496 - }, - { - "epoch": 0.48, - "grad_norm": 1.148398782309424, - "learning_rate": 1.1036548516635339e-05, - "loss": 0.1933, - "step": 9497 - }, - { - "epoch": 0.48, - "grad_norm": 0.9509034949490187, - "learning_rate": 1.1034910405638992e-05, - "loss": 0.1888, - "step": 9498 - }, - { - "epoch": 0.48, - "grad_norm": 1.0496257154764759, - "learning_rate": 1.1033272266570645e-05, - "loss": 0.1887, - "step": 9499 - }, - { - "epoch": 0.48, - "grad_norm": 1.035097016626563, - "learning_rate": 1.1031634099474727e-05, - "loss": 0.1916, - "step": 9500 - }, - { - "epoch": 0.48, - "grad_norm": 1.1664136233350906, - "learning_rate": 1.1029995904395676e-05, - "loss": 0.2019, - "step": 9501 - }, - { - "epoch": 0.48, - "grad_norm": 1.132179506013568, - "learning_rate": 1.1028357681377928e-05, - "loss": 0.179, - "step": 9502 - }, - { - "epoch": 0.48, - "grad_norm": 0.9801111290950743, - "learning_rate": 1.1026719430465919e-05, - "loss": 0.2018, - "step": 9503 - }, - { - "epoch": 0.48, - "grad_norm": 1.116829993968936, - "learning_rate": 1.1025081151704089e-05, - "loss": 0.171, - "step": 9504 - }, - { - "epoch": 0.48, - "grad_norm": 0.9850572439318865, - "learning_rate": 1.1023442845136874e-05, - "loss": 0.2012, - "step": 9505 - }, - { - "epoch": 0.48, - "grad_norm": 1.1544031189586472, - "learning_rate": 1.1021804510808715e-05, - "loss": 0.1966, - "step": 9506 - }, - { - "epoch": 0.48, - "grad_norm": 1.1640617416122763, - "learning_rate": 1.102016614876405e-05, - "loss": 0.1639, - "step": 9507 - }, - { - "epoch": 0.48, - "grad_norm": 1.7324786741544342, - "learning_rate": 1.1018527759047319e-05, - "loss": 0.1758, - "step": 9508 - }, - { - "epoch": 0.48, - "grad_norm": 0.9896374147248223, - "learning_rate": 1.1016889341702968e-05, - "loss": 0.1674, - "step": 9509 - }, - { - "epoch": 0.48, - "grad_norm": 1.0471666833332736, - "learning_rate": 1.1015250896775436e-05, - "loss": 0.1844, - "step": 9510 - }, - { - "epoch": 0.48, - "grad_norm": 0.9148938137694808, - "learning_rate": 1.1013612424309163e-05, - "loss": 0.1826, - "step": 9511 - }, - { - "epoch": 0.48, - "grad_norm": 1.0928563897735315, - "learning_rate": 1.1011973924348599e-05, - "loss": 0.2285, - "step": 9512 - }, - { - "epoch": 0.48, - "grad_norm": 1.2076467126569248, - "learning_rate": 1.1010335396938183e-05, - "loss": 0.2117, - "step": 9513 - }, - { - "epoch": 0.48, - "grad_norm": 2.7580399000340794, - "learning_rate": 1.1008696842122364e-05, - "loss": 0.1968, - "step": 9514 - }, - { - "epoch": 0.48, - "grad_norm": 0.9902090228799503, - "learning_rate": 1.1007058259945584e-05, - "loss": 0.1701, - "step": 9515 - }, - { - "epoch": 0.48, - "grad_norm": 0.8934607040812377, - "learning_rate": 1.1005419650452294e-05, - "loss": 0.2102, - "step": 9516 - }, - { - "epoch": 0.48, - "grad_norm": 1.5274316465729678, - "learning_rate": 1.1003781013686939e-05, - "loss": 0.192, - "step": 9517 - }, - { - "epoch": 0.48, - "grad_norm": 1.1934406110291276, - "learning_rate": 1.1002142349693967e-05, - "loss": 0.1596, - "step": 9518 - }, - { - "epoch": 0.48, - "grad_norm": 1.8946649186923104, - "learning_rate": 1.1000503658517827e-05, - "loss": 0.1874, - "step": 9519 - }, - { - "epoch": 0.48, - "grad_norm": 2.2912572338837025, - "learning_rate": 1.0998864940202967e-05, - "loss": 0.2044, - "step": 9520 - }, - { - "epoch": 0.48, - "grad_norm": 1.1351873568499273, - "learning_rate": 1.0997226194793842e-05, - "loss": 0.1764, - "step": 9521 - }, - { - "epoch": 0.48, - "grad_norm": 1.1338616587599282, - "learning_rate": 1.09955874223349e-05, - "loss": 0.1884, - "step": 9522 - }, - { - "epoch": 0.48, - "grad_norm": 0.9169417601362231, - "learning_rate": 1.099394862287059e-05, - "loss": 0.1832, - "step": 9523 - }, - { - "epoch": 0.48, - "grad_norm": 0.8400343124098275, - "learning_rate": 1.099230979644537e-05, - "loss": 0.1825, - "step": 9524 - }, - { - "epoch": 0.48, - "grad_norm": 1.2284716183910207, - "learning_rate": 1.0990670943103688e-05, - "loss": 0.1828, - "step": 9525 - }, - { - "epoch": 0.48, - "grad_norm": 1.2249751854792137, - "learning_rate": 1.0989032062890004e-05, - "loss": 0.1851, - "step": 9526 - }, - { - "epoch": 0.48, - "grad_norm": 1.1007619155064332, - "learning_rate": 1.0987393155848767e-05, - "loss": 0.1899, - "step": 9527 - }, - { - "epoch": 0.48, - "grad_norm": 1.3302949936435415, - "learning_rate": 1.0985754222024437e-05, - "loss": 0.1778, - "step": 9528 - }, - { - "epoch": 0.48, - "grad_norm": 0.7935309274750862, - "learning_rate": 1.0984115261461466e-05, - "loss": 0.1679, - "step": 9529 - }, - { - "epoch": 0.48, - "grad_norm": 0.8740483493541972, - "learning_rate": 1.0982476274204314e-05, - "loss": 0.1912, - "step": 9530 - }, - { - "epoch": 0.48, - "grad_norm": 0.9544592534379195, - "learning_rate": 1.0980837260297437e-05, - "loss": 0.1756, - "step": 9531 - }, - { - "epoch": 0.48, - "grad_norm": 1.4647054361443923, - "learning_rate": 1.0979198219785296e-05, - "loss": 0.1668, - "step": 9532 - }, - { - "epoch": 0.48, - "grad_norm": 2.0763420008421867, - "learning_rate": 1.0977559152712347e-05, - "loss": 0.1942, - "step": 9533 - }, - { - "epoch": 0.48, - "grad_norm": 1.3867599307183982, - "learning_rate": 1.0975920059123051e-05, - "loss": 0.1943, - "step": 9534 - }, - { - "epoch": 0.48, - "grad_norm": 1.4807281378852024, - "learning_rate": 1.0974280939061867e-05, - "loss": 0.1769, - "step": 9535 - }, - { - "epoch": 0.48, - "grad_norm": 1.7477775660421317, - "learning_rate": 1.0972641792573258e-05, - "loss": 0.1705, - "step": 9536 - }, - { - "epoch": 0.48, - "grad_norm": 1.0163715440655499, - "learning_rate": 1.0971002619701682e-05, - "loss": 0.1845, - "step": 9537 - }, - { - "epoch": 0.49, - "grad_norm": 1.0746521485497653, - "learning_rate": 1.096936342049161e-05, - "loss": 0.2001, - "step": 9538 - }, - { - "epoch": 0.49, - "grad_norm": 0.8781367171305492, - "learning_rate": 1.0967724194987498e-05, - "loss": 0.1904, - "step": 9539 - }, - { - "epoch": 0.49, - "grad_norm": 0.765734891397578, - "learning_rate": 1.0966084943233818e-05, - "loss": 0.1884, - "step": 9540 - }, - { - "epoch": 0.49, - "grad_norm": 1.13823561612025, - "learning_rate": 1.0964445665275023e-05, - "loss": 0.1873, - "step": 9541 - }, - { - "epoch": 0.49, - "grad_norm": 0.9539136937544149, - "learning_rate": 1.0962806361155585e-05, - "loss": 0.1784, - "step": 9542 - }, - { - "epoch": 0.49, - "grad_norm": 1.0034189361586376, - "learning_rate": 1.0961167030919973e-05, - "loss": 0.1868, - "step": 9543 - }, - { - "epoch": 0.49, - "grad_norm": 1.2143927037480549, - "learning_rate": 1.095952767461265e-05, - "loss": 0.1821, - "step": 9544 - }, - { - "epoch": 0.49, - "grad_norm": 1.54044962525446, - "learning_rate": 1.0957888292278084e-05, - "loss": 0.1871, - "step": 9545 - }, - { - "epoch": 0.49, - "grad_norm": 0.9402634558591103, - "learning_rate": 1.0956248883960744e-05, - "loss": 0.1947, - "step": 9546 - }, - { - "epoch": 0.49, - "grad_norm": 0.9362298716995432, - "learning_rate": 1.0954609449705097e-05, - "loss": 0.1812, - "step": 9547 - }, - { - "epoch": 0.49, - "grad_norm": 1.1257224446570795, - "learning_rate": 1.095296998955562e-05, - "loss": 0.1759, - "step": 9548 - }, - { - "epoch": 0.49, - "grad_norm": 1.7555764622968162, - "learning_rate": 1.0951330503556776e-05, - "loss": 0.2133, - "step": 9549 - }, - { - "epoch": 0.49, - "grad_norm": 0.8595309424350507, - "learning_rate": 1.0949690991753036e-05, - "loss": 0.1849, - "step": 9550 - }, - { - "epoch": 0.49, - "grad_norm": 1.2556944727905175, - "learning_rate": 1.0948051454188877e-05, - "loss": 0.1932, - "step": 9551 - }, - { - "epoch": 0.49, - "grad_norm": 1.0799262374997667, - "learning_rate": 1.094641189090877e-05, - "loss": 0.1774, - "step": 9552 - }, - { - "epoch": 0.49, - "grad_norm": 1.2042809302790212, - "learning_rate": 1.0944772301957185e-05, - "loss": 0.1714, - "step": 9553 - }, - { - "epoch": 0.49, - "grad_norm": 0.8870239839138336, - "learning_rate": 1.0943132687378597e-05, - "loss": 0.2022, - "step": 9554 - }, - { - "epoch": 0.49, - "grad_norm": 0.7968798872828126, - "learning_rate": 1.0941493047217482e-05, - "loss": 0.1526, - "step": 9555 - }, - { - "epoch": 0.49, - "grad_norm": 0.9582730119266584, - "learning_rate": 1.0939853381518315e-05, - "loss": 0.1779, - "step": 9556 - }, - { - "epoch": 0.49, - "grad_norm": 0.8933870974205196, - "learning_rate": 1.0938213690325572e-05, - "loss": 0.1911, - "step": 9557 - }, - { - "epoch": 0.49, - "grad_norm": 0.8760223525297648, - "learning_rate": 1.093657397368373e-05, - "loss": 0.1833, - "step": 9558 - }, - { - "epoch": 0.49, - "grad_norm": 0.8660757127061413, - "learning_rate": 1.0934934231637267e-05, - "loss": 0.2105, - "step": 9559 - }, - { - "epoch": 0.49, - "grad_norm": 1.0198198870011186, - "learning_rate": 1.0933294464230657e-05, - "loss": 0.1763, - "step": 9560 - }, - { - "epoch": 0.49, - "grad_norm": 0.8730812713789345, - "learning_rate": 1.0931654671508384e-05, - "loss": 0.203, - "step": 9561 - }, - { - "epoch": 0.49, - "grad_norm": 0.8240497451748958, - "learning_rate": 1.0930014853514925e-05, - "loss": 0.1809, - "step": 9562 - }, - { - "epoch": 0.49, - "grad_norm": 1.3428577217343693, - "learning_rate": 1.0928375010294762e-05, - "loss": 0.189, - "step": 9563 - }, - { - "epoch": 0.49, - "grad_norm": 0.9102713113626955, - "learning_rate": 1.092673514189237e-05, - "loss": 0.194, - "step": 9564 - }, - { - "epoch": 0.49, - "grad_norm": 0.8800419886753066, - "learning_rate": 1.092509524835224e-05, - "loss": 0.1863, - "step": 9565 - }, - { - "epoch": 0.49, - "grad_norm": 0.8915238012081395, - "learning_rate": 1.0923455329718849e-05, - "loss": 0.1711, - "step": 9566 - }, - { - "epoch": 0.49, - "grad_norm": 0.8247002058985641, - "learning_rate": 1.0921815386036679e-05, - "loss": 0.1808, - "step": 9567 - }, - { - "epoch": 0.49, - "grad_norm": 0.9421256090529048, - "learning_rate": 1.0920175417350214e-05, - "loss": 0.1706, - "step": 9568 - }, - { - "epoch": 0.49, - "grad_norm": 1.5488011925565883, - "learning_rate": 1.091853542370394e-05, - "loss": 0.1864, - "step": 9569 - }, - { - "epoch": 0.49, - "grad_norm": 1.0941063553037418, - "learning_rate": 1.0916895405142339e-05, - "loss": 0.1973, - "step": 9570 - }, - { - "epoch": 0.49, - "grad_norm": 0.933533004750533, - "learning_rate": 1.09152553617099e-05, - "loss": 0.1759, - "step": 9571 - }, - { - "epoch": 0.49, - "grad_norm": 1.116679573487381, - "learning_rate": 1.0913615293451105e-05, - "loss": 0.1764, - "step": 9572 - }, - { - "epoch": 0.49, - "grad_norm": 1.3262860299047816, - "learning_rate": 1.0911975200410445e-05, - "loss": 0.2024, - "step": 9573 - }, - { - "epoch": 0.49, - "grad_norm": 1.0543549657524613, - "learning_rate": 1.0910335082632406e-05, - "loss": 0.2182, - "step": 9574 - }, - { - "epoch": 0.49, - "grad_norm": 1.1044120478533699, - "learning_rate": 1.0908694940161477e-05, - "loss": 0.1796, - "step": 9575 - }, - { - "epoch": 0.49, - "grad_norm": 0.8634574691411527, - "learning_rate": 1.0907054773042148e-05, - "loss": 0.1866, - "step": 9576 - }, - { - "epoch": 0.49, - "grad_norm": 0.8953783284154978, - "learning_rate": 1.0905414581318902e-05, - "loss": 0.1815, - "step": 9577 - }, - { - "epoch": 0.49, - "grad_norm": 1.0168979420911626, - "learning_rate": 1.090377436503624e-05, - "loss": 0.1585, - "step": 9578 - }, - { - "epoch": 0.49, - "grad_norm": 2.3193192319875138, - "learning_rate": 1.0902134124238644e-05, - "loss": 0.1978, - "step": 9579 - }, - { - "epoch": 0.49, - "grad_norm": 0.9542646773015464, - "learning_rate": 1.090049385897061e-05, - "loss": 0.2019, - "step": 9580 - }, - { - "epoch": 0.49, - "grad_norm": 1.2211461636864696, - "learning_rate": 1.089885356927663e-05, - "loss": 0.1991, - "step": 9581 - }, - { - "epoch": 0.49, - "grad_norm": 1.4789441101008454, - "learning_rate": 1.0897213255201193e-05, - "loss": 0.1985, - "step": 9582 - }, - { - "epoch": 0.49, - "grad_norm": 1.2913650894369069, - "learning_rate": 1.0895572916788799e-05, - "loss": 0.1891, - "step": 9583 - }, - { - "epoch": 0.49, - "grad_norm": 0.8862058384341132, - "learning_rate": 1.089393255408394e-05, - "loss": 0.1904, - "step": 9584 - }, - { - "epoch": 0.49, - "grad_norm": 0.9609850270437007, - "learning_rate": 1.0892292167131107e-05, - "loss": 0.2003, - "step": 9585 - }, - { - "epoch": 0.49, - "grad_norm": 1.068301573582356, - "learning_rate": 1.08906517559748e-05, - "loss": 0.1921, - "step": 9586 - }, - { - "epoch": 0.49, - "grad_norm": 0.9769987088158, - "learning_rate": 1.0889011320659513e-05, - "loss": 0.1679, - "step": 9587 - }, - { - "epoch": 0.49, - "grad_norm": 1.3124652154756535, - "learning_rate": 1.0887370861229744e-05, - "loss": 0.1971, - "step": 9588 - }, - { - "epoch": 0.49, - "grad_norm": 1.2458187192822827, - "learning_rate": 1.0885730377729993e-05, - "loss": 0.1789, - "step": 9589 - }, - { - "epoch": 0.49, - "grad_norm": 0.9722915506603313, - "learning_rate": 1.0884089870204751e-05, - "loss": 0.1861, - "step": 9590 - }, - { - "epoch": 0.49, - "grad_norm": 1.0798640423811054, - "learning_rate": 1.0882449338698521e-05, - "loss": 0.1671, - "step": 9591 - }, - { - "epoch": 0.49, - "grad_norm": 0.9778314506386806, - "learning_rate": 1.0880808783255808e-05, - "loss": 0.185, - "step": 9592 - }, - { - "epoch": 0.49, - "grad_norm": 1.1909601832238668, - "learning_rate": 1.0879168203921105e-05, - "loss": 0.1663, - "step": 9593 - }, - { - "epoch": 0.49, - "grad_norm": 1.354804048495785, - "learning_rate": 1.0877527600738913e-05, - "loss": 0.2059, - "step": 9594 - }, - { - "epoch": 0.49, - "grad_norm": 2.2460384170334757, - "learning_rate": 1.0875886973753735e-05, - "loss": 0.1811, - "step": 9595 - }, - { - "epoch": 0.49, - "grad_norm": 1.4932185049543425, - "learning_rate": 1.0874246323010074e-05, - "loss": 0.2045, - "step": 9596 - }, - { - "epoch": 0.49, - "grad_norm": 1.298711194215064, - "learning_rate": 1.0872605648552435e-05, - "loss": 0.1746, - "step": 9597 - }, - { - "epoch": 0.49, - "grad_norm": 1.322696201426117, - "learning_rate": 1.0870964950425315e-05, - "loss": 0.1721, - "step": 9598 - }, - { - "epoch": 0.49, - "grad_norm": 1.0958001072962307, - "learning_rate": 1.0869324228673222e-05, - "loss": 0.1864, - "step": 9599 - }, - { - "epoch": 0.49, - "grad_norm": 2.75974717231408, - "learning_rate": 1.086768348334066e-05, - "loss": 0.204, - "step": 9600 - }, - { - "epoch": 0.49, - "grad_norm": 0.966607763752332, - "learning_rate": 1.0866042714472136e-05, - "loss": 0.2106, - "step": 9601 - }, - { - "epoch": 0.49, - "grad_norm": 1.0667990702956014, - "learning_rate": 1.0864401922112155e-05, - "loss": 0.1905, - "step": 9602 - }, - { - "epoch": 0.49, - "grad_norm": 0.9604517721025619, - "learning_rate": 1.0862761106305222e-05, - "loss": 0.1987, - "step": 9603 - }, - { - "epoch": 0.49, - "grad_norm": 1.0185280849062157, - "learning_rate": 1.0861120267095846e-05, - "loss": 0.2039, - "step": 9604 - }, - { - "epoch": 0.49, - "grad_norm": 1.1082943502475084, - "learning_rate": 1.0859479404528532e-05, - "loss": 0.1873, - "step": 9605 - }, - { - "epoch": 0.49, - "grad_norm": 1.1452453729289234, - "learning_rate": 1.0857838518647794e-05, - "loss": 0.2176, - "step": 9606 - }, - { - "epoch": 0.49, - "grad_norm": 3.148710275207547, - "learning_rate": 1.0856197609498135e-05, - "loss": 0.1744, - "step": 9607 - }, - { - "epoch": 0.49, - "grad_norm": 1.209646023184379, - "learning_rate": 1.0854556677124066e-05, - "loss": 0.1627, - "step": 9608 - }, - { - "epoch": 0.49, - "grad_norm": 1.1650388011788848, - "learning_rate": 1.08529157215701e-05, - "loss": 0.1956, - "step": 9609 - }, - { - "epoch": 0.49, - "grad_norm": 1.157072205528815, - "learning_rate": 1.085127474288075e-05, - "loss": 0.1939, - "step": 9610 - }, - { - "epoch": 0.49, - "grad_norm": 2.648764821811985, - "learning_rate": 1.0849633741100522e-05, - "loss": 0.1991, - "step": 9611 - }, - { - "epoch": 0.49, - "grad_norm": 1.0288598778364175, - "learning_rate": 1.084799271627393e-05, - "loss": 0.2053, - "step": 9612 - }, - { - "epoch": 0.49, - "grad_norm": 1.1278542729437095, - "learning_rate": 1.0846351668445489e-05, - "loss": 0.1999, - "step": 9613 - }, - { - "epoch": 0.49, - "grad_norm": 1.457359559733764, - "learning_rate": 1.084471059765971e-05, - "loss": 0.2182, - "step": 9614 - }, - { - "epoch": 0.49, - "grad_norm": 1.185454539256383, - "learning_rate": 1.0843069503961112e-05, - "loss": 0.1964, - "step": 9615 - }, - { - "epoch": 0.49, - "grad_norm": 1.1001727164465225, - "learning_rate": 1.0841428387394204e-05, - "loss": 0.1753, - "step": 9616 - }, - { - "epoch": 0.49, - "grad_norm": 1.468981106103501, - "learning_rate": 1.0839787248003499e-05, - "loss": 0.2171, - "step": 9617 - }, - { - "epoch": 0.49, - "grad_norm": 1.2066098512363195, - "learning_rate": 1.0838146085833523e-05, - "loss": 0.1775, - "step": 9618 - }, - { - "epoch": 0.49, - "grad_norm": 1.3883135625315148, - "learning_rate": 1.0836504900928786e-05, - "loss": 0.1777, - "step": 9619 - }, - { - "epoch": 0.49, - "grad_norm": 1.1582429998119048, - "learning_rate": 1.0834863693333805e-05, - "loss": 0.1803, - "step": 9620 - }, - { - "epoch": 0.49, - "grad_norm": 1.076354165273277, - "learning_rate": 1.08332224630931e-05, - "loss": 0.1923, - "step": 9621 - }, - { - "epoch": 0.49, - "grad_norm": 0.9869213870487541, - "learning_rate": 1.083158121025119e-05, - "loss": 0.1957, - "step": 9622 - }, - { - "epoch": 0.49, - "grad_norm": 1.4017559780579356, - "learning_rate": 1.082993993485259e-05, - "loss": 0.2127, - "step": 9623 - }, - { - "epoch": 0.49, - "grad_norm": 0.9471222448724727, - "learning_rate": 1.0828298636941826e-05, - "loss": 0.1712, - "step": 9624 - }, - { - "epoch": 0.49, - "grad_norm": 1.3935679546422635, - "learning_rate": 1.0826657316563412e-05, - "loss": 0.1822, - "step": 9625 - }, - { - "epoch": 0.49, - "grad_norm": 1.1490294573199775, - "learning_rate": 1.082501597376187e-05, - "loss": 0.19, - "step": 9626 - }, - { - "epoch": 0.49, - "grad_norm": 1.1764376635965907, - "learning_rate": 1.0823374608581727e-05, - "loss": 0.1984, - "step": 9627 - }, - { - "epoch": 0.49, - "grad_norm": 1.6176490098619052, - "learning_rate": 1.0821733221067499e-05, - "loss": 0.1924, - "step": 9628 - }, - { - "epoch": 0.49, - "grad_norm": 1.1701847629385655, - "learning_rate": 1.082009181126371e-05, - "loss": 0.1885, - "step": 9629 - }, - { - "epoch": 0.49, - "grad_norm": 1.4873077019656005, - "learning_rate": 1.0818450379214887e-05, - "loss": 0.1864, - "step": 9630 - }, - { - "epoch": 0.49, - "grad_norm": 1.0821228362386008, - "learning_rate": 1.081680892496555e-05, - "loss": 0.1884, - "step": 9631 - }, - { - "epoch": 0.49, - "grad_norm": 1.0979545228674528, - "learning_rate": 1.0815167448560225e-05, - "loss": 0.1843, - "step": 9632 - }, - { - "epoch": 0.49, - "grad_norm": 1.2455347475804985, - "learning_rate": 1.0813525950043435e-05, - "loss": 0.1813, - "step": 9633 - }, - { - "epoch": 0.49, - "grad_norm": 3.978559329533385, - "learning_rate": 1.0811884429459708e-05, - "loss": 0.1883, - "step": 9634 - }, - { - "epoch": 0.49, - "grad_norm": 0.9707061312525472, - "learning_rate": 1.0810242886853572e-05, - "loss": 0.1923, - "step": 9635 - }, - { - "epoch": 0.49, - "grad_norm": 1.4835241358193412, - "learning_rate": 1.0808601322269553e-05, - "loss": 0.1855, - "step": 9636 - }, - { - "epoch": 0.49, - "grad_norm": 1.0247533098985855, - "learning_rate": 1.0806959735752174e-05, - "loss": 0.1938, - "step": 9637 - }, - { - "epoch": 0.49, - "grad_norm": 0.9173282273112893, - "learning_rate": 1.0805318127345968e-05, - "loss": 0.1907, - "step": 9638 - }, - { - "epoch": 0.49, - "grad_norm": 1.2942874387981254, - "learning_rate": 1.0803676497095463e-05, - "loss": 0.1993, - "step": 9639 - }, - { - "epoch": 0.49, - "grad_norm": 1.103891088845872, - "learning_rate": 1.0802034845045189e-05, - "loss": 0.1806, - "step": 9640 - }, - { - "epoch": 0.49, - "grad_norm": 0.8294224003058488, - "learning_rate": 1.0800393171239672e-05, - "loss": 0.1798, - "step": 9641 - }, - { - "epoch": 0.49, - "grad_norm": 1.6864174989057856, - "learning_rate": 1.0798751475723446e-05, - "loss": 0.1904, - "step": 9642 - }, - { - "epoch": 0.49, - "grad_norm": 1.3092171275756168, - "learning_rate": 1.0797109758541038e-05, - "loss": 0.201, - "step": 9643 - }, - { - "epoch": 0.49, - "grad_norm": 0.9974374522277218, - "learning_rate": 1.0795468019736988e-05, - "loss": 0.2134, - "step": 9644 - }, - { - "epoch": 0.49, - "grad_norm": 1.2165807216297406, - "learning_rate": 1.079382625935582e-05, - "loss": 0.175, - "step": 9645 - }, - { - "epoch": 0.49, - "grad_norm": 0.9718849350489902, - "learning_rate": 1.0792184477442072e-05, - "loss": 0.1899, - "step": 9646 - }, - { - "epoch": 0.49, - "grad_norm": 1.4698470200530558, - "learning_rate": 1.079054267404027e-05, - "loss": 0.1963, - "step": 9647 - }, - { - "epoch": 0.49, - "grad_norm": 1.1466382693672115, - "learning_rate": 1.078890084919496e-05, - "loss": 0.181, - "step": 9648 - }, - { - "epoch": 0.49, - "grad_norm": 1.25333634486384, - "learning_rate": 1.0787259002950665e-05, - "loss": 0.2133, - "step": 9649 - }, - { - "epoch": 0.49, - "grad_norm": 1.012534747933004, - "learning_rate": 1.0785617135351927e-05, - "loss": 0.1924, - "step": 9650 - }, - { - "epoch": 0.49, - "grad_norm": 1.1474468277559202, - "learning_rate": 1.0783975246443281e-05, - "loss": 0.209, - "step": 9651 - }, - { - "epoch": 0.49, - "grad_norm": 0.9345414739937334, - "learning_rate": 1.078233333626926e-05, - "loss": 0.1717, - "step": 9652 - }, - { - "epoch": 0.49, - "grad_norm": 1.5527860447119741, - "learning_rate": 1.0780691404874404e-05, - "loss": 0.1913, - "step": 9653 - }, - { - "epoch": 0.49, - "grad_norm": 0.9515322340897505, - "learning_rate": 1.077904945230325e-05, - "loss": 0.1982, - "step": 9654 - }, - { - "epoch": 0.49, - "grad_norm": 0.9235823339102696, - "learning_rate": 1.0777407478600334e-05, - "loss": 0.2162, - "step": 9655 - }, - { - "epoch": 0.49, - "grad_norm": 0.7539908144802698, - "learning_rate": 1.0775765483810199e-05, - "loss": 0.1876, - "step": 9656 - }, - { - "epoch": 0.49, - "grad_norm": 0.9304968353210401, - "learning_rate": 1.0774123467977379e-05, - "loss": 0.1866, - "step": 9657 - }, - { - "epoch": 0.49, - "grad_norm": 1.2604049295529116, - "learning_rate": 1.077248143114642e-05, - "loss": 0.1925, - "step": 9658 - }, - { - "epoch": 0.49, - "grad_norm": 0.9924552531913771, - "learning_rate": 1.0770839373361854e-05, - "loss": 0.1633, - "step": 9659 - }, - { - "epoch": 0.49, - "grad_norm": 4.364045262513066, - "learning_rate": 1.0769197294668228e-05, - "loss": 0.1889, - "step": 9660 - }, - { - "epoch": 0.49, - "grad_norm": 0.9236530495839659, - "learning_rate": 1.0767555195110082e-05, - "loss": 0.204, - "step": 9661 - }, - { - "epoch": 0.49, - "grad_norm": 1.0476753106491055, - "learning_rate": 1.0765913074731957e-05, - "loss": 0.175, - "step": 9662 - }, - { - "epoch": 0.49, - "grad_norm": 1.0026805236194896, - "learning_rate": 1.07642709335784e-05, - "loss": 0.1936, - "step": 9663 - }, - { - "epoch": 0.49, - "grad_norm": 0.9422222095805597, - "learning_rate": 1.0762628771693948e-05, - "loss": 0.2034, - "step": 9664 - }, - { - "epoch": 0.49, - "grad_norm": 0.9433156465199231, - "learning_rate": 1.0760986589123145e-05, - "loss": 0.2009, - "step": 9665 - }, - { - "epoch": 0.49, - "grad_norm": 1.2291693414472664, - "learning_rate": 1.0759344385910541e-05, - "loss": 0.1703, - "step": 9666 - }, - { - "epoch": 0.49, - "grad_norm": 1.0785063042674297, - "learning_rate": 1.0757702162100679e-05, - "loss": 0.1848, - "step": 9667 - }, - { - "epoch": 0.49, - "grad_norm": 0.949233765859445, - "learning_rate": 1.0756059917738102e-05, - "loss": 0.1871, - "step": 9668 - }, - { - "epoch": 0.49, - "grad_norm": 1.0232043255187737, - "learning_rate": 1.0754417652867357e-05, - "loss": 0.1785, - "step": 9669 - }, - { - "epoch": 0.49, - "grad_norm": 0.9992934331116861, - "learning_rate": 1.0752775367532988e-05, - "loss": 0.2028, - "step": 9670 - }, - { - "epoch": 0.49, - "grad_norm": 1.4295122213944318, - "learning_rate": 1.0751133061779545e-05, - "loss": 0.1792, - "step": 9671 - }, - { - "epoch": 0.49, - "grad_norm": 1.18761065388684, - "learning_rate": 1.074949073565158e-05, - "loss": 0.1577, - "step": 9672 - }, - { - "epoch": 0.49, - "grad_norm": 1.7470259053038004, - "learning_rate": 1.0747848389193633e-05, - "loss": 0.1993, - "step": 9673 - }, - { - "epoch": 0.49, - "grad_norm": 0.9904275822503751, - "learning_rate": 1.0746206022450256e-05, - "loss": 0.1931, - "step": 9674 - }, - { - "epoch": 0.49, - "grad_norm": 0.7834337706150454, - "learning_rate": 1.0744563635466e-05, - "loss": 0.1788, - "step": 9675 - }, - { - "epoch": 0.49, - "grad_norm": 0.7895005401380205, - "learning_rate": 1.0742921228285412e-05, - "loss": 0.1897, - "step": 9676 - }, - { - "epoch": 0.49, - "grad_norm": 1.283230030469254, - "learning_rate": 1.0741278800953045e-05, - "loss": 0.2225, - "step": 9677 - }, - { - "epoch": 0.49, - "grad_norm": 1.12162867780618, - "learning_rate": 1.0739636353513446e-05, - "loss": 0.1907, - "step": 9678 - }, - { - "epoch": 0.49, - "grad_norm": 1.0680028023000427, - "learning_rate": 1.0737993886011171e-05, - "loss": 0.2474, - "step": 9679 - }, - { - "epoch": 0.49, - "grad_norm": 0.8863078026342777, - "learning_rate": 1.0736351398490772e-05, - "loss": 0.1842, - "step": 9680 - }, - { - "epoch": 0.49, - "grad_norm": 1.1023494855573912, - "learning_rate": 1.0734708890996797e-05, - "loss": 0.1818, - "step": 9681 - }, - { - "epoch": 0.49, - "grad_norm": 0.9156047317575732, - "learning_rate": 1.0733066363573803e-05, - "loss": 0.1983, - "step": 9682 - }, - { - "epoch": 0.49, - "grad_norm": 1.105766073750055, - "learning_rate": 1.073142381626634e-05, - "loss": 0.1703, - "step": 9683 - }, - { - "epoch": 0.49, - "grad_norm": 1.2012940202707465, - "learning_rate": 1.0729781249118966e-05, - "loss": 0.2002, - "step": 9684 - }, - { - "epoch": 0.49, - "grad_norm": 1.162556841011124, - "learning_rate": 1.0728138662176237e-05, - "loss": 0.2084, - "step": 9685 - }, - { - "epoch": 0.49, - "grad_norm": 0.9256523144338665, - "learning_rate": 1.0726496055482705e-05, - "loss": 0.2, - "step": 9686 - }, - { - "epoch": 0.49, - "grad_norm": 0.9945108882251645, - "learning_rate": 1.0724853429082923e-05, - "loss": 0.187, - "step": 9687 - }, - { - "epoch": 0.49, - "grad_norm": 0.8823018088973252, - "learning_rate": 1.0723210783021454e-05, - "loss": 0.209, - "step": 9688 - }, - { - "epoch": 0.49, - "grad_norm": 1.0012462162192681, - "learning_rate": 1.072156811734285e-05, - "loss": 0.1664, - "step": 9689 - }, - { - "epoch": 0.49, - "grad_norm": 0.9969096264391915, - "learning_rate": 1.0719925432091671e-05, - "loss": 0.207, - "step": 9690 - }, - { - "epoch": 0.49, - "grad_norm": 1.8109754142384766, - "learning_rate": 1.0718282727312475e-05, - "loss": 0.1923, - "step": 9691 - }, - { - "epoch": 0.49, - "grad_norm": 1.2853994775292084, - "learning_rate": 1.0716640003049818e-05, - "loss": 0.1762, - "step": 9692 - }, - { - "epoch": 0.49, - "grad_norm": 1.1245990847029432, - "learning_rate": 1.0714997259348261e-05, - "loss": 0.1988, - "step": 9693 - }, - { - "epoch": 0.49, - "grad_norm": 0.8821098044855635, - "learning_rate": 1.0713354496252364e-05, - "loss": 0.1747, - "step": 9694 - }, - { - "epoch": 0.49, - "grad_norm": 1.3418193495044213, - "learning_rate": 1.0711711713806684e-05, - "loss": 0.2024, - "step": 9695 - }, - { - "epoch": 0.49, - "grad_norm": 1.121844576126626, - "learning_rate": 1.0710068912055784e-05, - "loss": 0.1763, - "step": 9696 - }, - { - "epoch": 0.49, - "grad_norm": 1.469846337002135, - "learning_rate": 1.0708426091044224e-05, - "loss": 0.1865, - "step": 9697 - }, - { - "epoch": 0.49, - "grad_norm": 1.127361301705819, - "learning_rate": 1.0706783250816568e-05, - "loss": 0.1752, - "step": 9698 - }, - { - "epoch": 0.49, - "grad_norm": 1.371137664811588, - "learning_rate": 1.0705140391417377e-05, - "loss": 0.1794, - "step": 9699 - }, - { - "epoch": 0.49, - "grad_norm": 0.9201500615344242, - "learning_rate": 1.070349751289121e-05, - "loss": 0.1792, - "step": 9700 - }, - { - "epoch": 0.49, - "grad_norm": 0.9205893271689509, - "learning_rate": 1.0701854615282635e-05, - "loss": 0.1859, - "step": 9701 - }, - { - "epoch": 0.49, - "grad_norm": 2.3951584715649386, - "learning_rate": 1.0700211698636214e-05, - "loss": 0.1859, - "step": 9702 - }, - { - "epoch": 0.49, - "grad_norm": 1.2693974342116394, - "learning_rate": 1.069856876299651e-05, - "loss": 0.1972, - "step": 9703 - }, - { - "epoch": 0.49, - "grad_norm": 0.9274877290317544, - "learning_rate": 1.0696925808408092e-05, - "loss": 0.1689, - "step": 9704 - }, - { - "epoch": 0.49, - "grad_norm": 1.2619010784277056, - "learning_rate": 1.0695282834915517e-05, - "loss": 0.1971, - "step": 9705 - }, - { - "epoch": 0.49, - "grad_norm": 1.1154770717828937, - "learning_rate": 1.069363984256336e-05, - "loss": 0.195, - "step": 9706 - }, - { - "epoch": 0.49, - "grad_norm": 0.8760721779934151, - "learning_rate": 1.0691996831396181e-05, - "loss": 0.1914, - "step": 9707 - }, - { - "epoch": 0.49, - "grad_norm": 1.1540281001625392, - "learning_rate": 1.0690353801458551e-05, - "loss": 0.2161, - "step": 9708 - }, - { - "epoch": 0.49, - "grad_norm": 1.0251076150045788, - "learning_rate": 1.0688710752795033e-05, - "loss": 0.1751, - "step": 9709 - }, - { - "epoch": 0.49, - "grad_norm": 0.8244986961316821, - "learning_rate": 1.0687067685450199e-05, - "loss": 0.1642, - "step": 9710 - }, - { - "epoch": 0.49, - "grad_norm": 0.814014152897162, - "learning_rate": 1.0685424599468615e-05, - "loss": 0.181, - "step": 9711 - }, - { - "epoch": 0.49, - "grad_norm": 1.2640957549475418, - "learning_rate": 1.068378149489485e-05, - "loss": 0.1815, - "step": 9712 - }, - { - "epoch": 0.49, - "grad_norm": 0.8867530504394332, - "learning_rate": 1.068213837177347e-05, - "loss": 0.1818, - "step": 9713 - }, - { - "epoch": 0.49, - "grad_norm": 0.8642166809082094, - "learning_rate": 1.068049523014905e-05, - "loss": 0.1707, - "step": 9714 - }, - { - "epoch": 0.49, - "grad_norm": 1.013203360223355, - "learning_rate": 1.067885207006616e-05, - "loss": 0.1836, - "step": 9715 - }, - { - "epoch": 0.49, - "grad_norm": 1.2534834176344625, - "learning_rate": 1.0677208891569366e-05, - "loss": 0.1988, - "step": 9716 - }, - { - "epoch": 0.49, - "grad_norm": 1.3915590820409383, - "learning_rate": 1.0675565694703248e-05, - "loss": 0.1985, - "step": 9717 - }, - { - "epoch": 0.49, - "grad_norm": 0.9889276453661713, - "learning_rate": 1.0673922479512366e-05, - "loss": 0.1814, - "step": 9718 - }, - { - "epoch": 0.49, - "grad_norm": 1.1748920823909086, - "learning_rate": 1.0672279246041301e-05, - "loss": 0.1878, - "step": 9719 - }, - { - "epoch": 0.49, - "grad_norm": 1.4392217921353778, - "learning_rate": 1.0670635994334626e-05, - "loss": 0.1757, - "step": 9720 - }, - { - "epoch": 0.49, - "grad_norm": 0.7471315556478759, - "learning_rate": 1.066899272443691e-05, - "loss": 0.1693, - "step": 9721 - }, - { - "epoch": 0.49, - "grad_norm": 1.1259472114454028, - "learning_rate": 1.0667349436392727e-05, - "loss": 0.1794, - "step": 9722 - }, - { - "epoch": 0.49, - "grad_norm": 1.1501304500378, - "learning_rate": 1.0665706130246654e-05, - "loss": 0.2131, - "step": 9723 - }, - { - "epoch": 0.49, - "grad_norm": 1.0239004877819975, - "learning_rate": 1.0664062806043266e-05, - "loss": 0.1724, - "step": 9724 - }, - { - "epoch": 0.49, - "grad_norm": 1.0414264832020326, - "learning_rate": 1.0662419463827136e-05, - "loss": 0.201, - "step": 9725 - }, - { - "epoch": 0.49, - "grad_norm": 1.3522003124483102, - "learning_rate": 1.066077610364284e-05, - "loss": 0.1844, - "step": 9726 - }, - { - "epoch": 0.49, - "grad_norm": 1.1639536779282773, - "learning_rate": 1.0659132725534958e-05, - "loss": 0.1931, - "step": 9727 - }, - { - "epoch": 0.49, - "grad_norm": 0.9436860780724251, - "learning_rate": 1.065748932954806e-05, - "loss": 0.1929, - "step": 9728 - }, - { - "epoch": 0.49, - "grad_norm": 1.273126733844216, - "learning_rate": 1.0655845915726728e-05, - "loss": 0.1848, - "step": 9729 - }, - { - "epoch": 0.49, - "grad_norm": 1.2985323865422416, - "learning_rate": 1.065420248411554e-05, - "loss": 0.1935, - "step": 9730 - }, - { - "epoch": 0.49, - "grad_norm": 1.039552057235806, - "learning_rate": 1.0652559034759069e-05, - "loss": 0.195, - "step": 9731 - }, - { - "epoch": 0.49, - "grad_norm": 0.9724179390813914, - "learning_rate": 1.0650915567701897e-05, - "loss": 0.1773, - "step": 9732 - }, - { - "epoch": 0.49, - "grad_norm": 2.31261878897922, - "learning_rate": 1.0649272082988609e-05, - "loss": 0.2152, - "step": 9733 - }, - { - "epoch": 0.49, - "grad_norm": 1.0956625730563676, - "learning_rate": 1.0647628580663775e-05, - "loss": 0.1902, - "step": 9734 - }, - { - "epoch": 0.5, - "grad_norm": 0.9542691101221298, - "learning_rate": 1.0645985060771978e-05, - "loss": 0.1743, - "step": 9735 - }, - { - "epoch": 0.5, - "grad_norm": 1.1992225194268242, - "learning_rate": 1.0644341523357802e-05, - "loss": 0.1901, - "step": 9736 - }, - { - "epoch": 0.5, - "grad_norm": 0.8999673120258931, - "learning_rate": 1.0642697968465827e-05, - "loss": 0.1845, - "step": 9737 - }, - { - "epoch": 0.5, - "grad_norm": 1.0087462698811394, - "learning_rate": 1.0641054396140631e-05, - "loss": 0.1856, - "step": 9738 - }, - { - "epoch": 0.5, - "grad_norm": 0.8581094685422574, - "learning_rate": 1.06394108064268e-05, - "loss": 0.2202, - "step": 9739 - }, - { - "epoch": 0.5, - "grad_norm": 1.1336111707300922, - "learning_rate": 1.0637767199368911e-05, - "loss": 0.1673, - "step": 9740 - }, - { - "epoch": 0.5, - "grad_norm": 0.814053132649836, - "learning_rate": 1.0636123575011555e-05, - "loss": 0.1984, - "step": 9741 - }, - { - "epoch": 0.5, - "grad_norm": 1.2560613817757404, - "learning_rate": 1.063447993339931e-05, - "loss": 0.1903, - "step": 9742 - }, - { - "epoch": 0.5, - "grad_norm": 1.1206558307710335, - "learning_rate": 1.0632836274576761e-05, - "loss": 0.1904, - "step": 9743 - }, - { - "epoch": 0.5, - "grad_norm": 0.7950750967837613, - "learning_rate": 1.0631192598588493e-05, - "loss": 0.1781, - "step": 9744 - }, - { - "epoch": 0.5, - "grad_norm": 0.7829207386631539, - "learning_rate": 1.062954890547909e-05, - "loss": 0.1681, - "step": 9745 - }, - { - "epoch": 0.5, - "grad_norm": 0.9252872638635755, - "learning_rate": 1.0627905195293135e-05, - "loss": 0.1822, - "step": 9746 - }, - { - "epoch": 0.5, - "grad_norm": 1.1785591768775445, - "learning_rate": 1.0626261468075218e-05, - "loss": 0.1863, - "step": 9747 - }, - { - "epoch": 0.5, - "grad_norm": 0.6476250713917159, - "learning_rate": 1.0624617723869921e-05, - "loss": 0.191, - "step": 9748 - }, - { - "epoch": 0.5, - "grad_norm": 1.0225638341945646, - "learning_rate": 1.0622973962721836e-05, - "loss": 0.1719, - "step": 9749 - }, - { - "epoch": 0.5, - "grad_norm": 1.2016612896236973, - "learning_rate": 1.0621330184675547e-05, - "loss": 0.202, - "step": 9750 - }, - { - "epoch": 0.5, - "grad_norm": 0.9244176686589038, - "learning_rate": 1.061968638977564e-05, - "loss": 0.192, - "step": 9751 - }, - { - "epoch": 0.5, - "grad_norm": 2.185460765223053, - "learning_rate": 1.0618042578066707e-05, - "loss": 0.1725, - "step": 9752 - }, - { - "epoch": 0.5, - "grad_norm": 1.0514921292392578, - "learning_rate": 1.0616398749593331e-05, - "loss": 0.2072, - "step": 9753 - }, - { - "epoch": 0.5, - "grad_norm": 1.1355402752532482, - "learning_rate": 1.0614754904400105e-05, - "loss": 0.1859, - "step": 9754 - }, - { - "epoch": 0.5, - "grad_norm": 0.8676669752768543, - "learning_rate": 1.0613111042531618e-05, - "loss": 0.1521, - "step": 9755 - }, - { - "epoch": 0.5, - "grad_norm": 1.9336257746652679, - "learning_rate": 1.061146716403246e-05, - "loss": 0.1879, - "step": 9756 - }, - { - "epoch": 0.5, - "grad_norm": 1.2460212844758167, - "learning_rate": 1.0609823268947219e-05, - "loss": 0.2039, - "step": 9757 - }, - { - "epoch": 0.5, - "grad_norm": 3.30362751558046, - "learning_rate": 1.0608179357320487e-05, - "loss": 0.2029, - "step": 9758 - }, - { - "epoch": 0.5, - "grad_norm": 1.1747846489666585, - "learning_rate": 1.0606535429196858e-05, - "loss": 0.2088, - "step": 9759 - }, - { - "epoch": 0.5, - "grad_norm": 0.9912826968645417, - "learning_rate": 1.060489148462092e-05, - "loss": 0.203, - "step": 9760 - }, - { - "epoch": 0.5, - "grad_norm": 2.554812461103568, - "learning_rate": 1.0603247523637268e-05, - "loss": 0.1879, - "step": 9761 - }, - { - "epoch": 0.5, - "grad_norm": 1.1857395296609805, - "learning_rate": 1.0601603546290491e-05, - "loss": 0.1882, - "step": 9762 - }, - { - "epoch": 0.5, - "grad_norm": 1.0603146410098505, - "learning_rate": 1.0599959552625186e-05, - "loss": 0.1918, - "step": 9763 - }, - { - "epoch": 0.5, - "grad_norm": 0.896561227828809, - "learning_rate": 1.0598315542685941e-05, - "loss": 0.1835, - "step": 9764 - }, - { - "epoch": 0.5, - "grad_norm": 0.9037701781341697, - "learning_rate": 1.0596671516517356e-05, - "loss": 0.1953, - "step": 9765 - }, - { - "epoch": 0.5, - "grad_norm": 1.0873238212270744, - "learning_rate": 1.059502747416402e-05, - "loss": 0.1801, - "step": 9766 - }, - { - "epoch": 0.5, - "grad_norm": 0.7932512597856124, - "learning_rate": 1.059338341567053e-05, - "loss": 0.1721, - "step": 9767 - }, - { - "epoch": 0.5, - "grad_norm": 0.9210138857139931, - "learning_rate": 1.0591739341081485e-05, - "loss": 0.1906, - "step": 9768 - }, - { - "epoch": 0.5, - "grad_norm": 1.224460682832701, - "learning_rate": 1.0590095250441473e-05, - "loss": 0.181, - "step": 9769 - }, - { - "epoch": 0.5, - "grad_norm": 0.9226760646689801, - "learning_rate": 1.0588451143795093e-05, - "loss": 0.181, - "step": 9770 - }, - { - "epoch": 0.5, - "grad_norm": 0.8404512315313271, - "learning_rate": 1.0586807021186946e-05, - "loss": 0.1965, - "step": 9771 - }, - { - "epoch": 0.5, - "grad_norm": 0.7582090020659907, - "learning_rate": 1.0585162882661624e-05, - "loss": 0.1869, - "step": 9772 - }, - { - "epoch": 0.5, - "grad_norm": 0.8639559963409217, - "learning_rate": 1.0583518728263726e-05, - "loss": 0.1993, - "step": 9773 - }, - { - "epoch": 0.5, - "grad_norm": 0.8684827736925508, - "learning_rate": 1.058187455803785e-05, - "loss": 0.1904, - "step": 9774 - }, - { - "epoch": 0.5, - "grad_norm": 6.543355549440279, - "learning_rate": 1.0580230372028593e-05, - "loss": 0.1883, - "step": 9775 - }, - { - "epoch": 0.5, - "grad_norm": 1.1117833572882903, - "learning_rate": 1.0578586170280554e-05, - "loss": 0.1982, - "step": 9776 - }, - { - "epoch": 0.5, - "grad_norm": 0.9471809713270811, - "learning_rate": 1.0576941952838334e-05, - "loss": 0.2013, - "step": 9777 - }, - { - "epoch": 0.5, - "grad_norm": 1.2033368512094136, - "learning_rate": 1.0575297719746533e-05, - "loss": 0.1684, - "step": 9778 - }, - { - "epoch": 0.5, - "grad_norm": 1.0747091930537156, - "learning_rate": 1.0573653471049745e-05, - "loss": 0.1869, - "step": 9779 - }, - { - "epoch": 0.5, - "grad_norm": 0.9710104972552251, - "learning_rate": 1.0572009206792575e-05, - "loss": 0.1758, - "step": 9780 - }, - { - "epoch": 0.5, - "grad_norm": 1.2635121948298544, - "learning_rate": 1.0570364927019623e-05, - "loss": 0.1872, - "step": 9781 - }, - { - "epoch": 0.5, - "grad_norm": 0.7568544286952503, - "learning_rate": 1.0568720631775491e-05, - "loss": 0.1767, - "step": 9782 - }, - { - "epoch": 0.5, - "grad_norm": 0.8659534324188721, - "learning_rate": 1.0567076321104776e-05, - "loss": 0.1758, - "step": 9783 - }, - { - "epoch": 0.5, - "grad_norm": 1.0559891149766256, - "learning_rate": 1.0565431995052089e-05, - "loss": 0.1908, - "step": 9784 - }, - { - "epoch": 0.5, - "grad_norm": 1.057283250817933, - "learning_rate": 1.0563787653662025e-05, - "loss": 0.2087, - "step": 9785 - }, - { - "epoch": 0.5, - "grad_norm": 0.8860916224318578, - "learning_rate": 1.0562143296979188e-05, - "loss": 0.1736, - "step": 9786 - }, - { - "epoch": 0.5, - "grad_norm": 0.859785432898717, - "learning_rate": 1.0560498925048186e-05, - "loss": 0.1838, - "step": 9787 - }, - { - "epoch": 0.5, - "grad_norm": 1.385780047711686, - "learning_rate": 1.0558854537913614e-05, - "loss": 0.1905, - "step": 9788 - }, - { - "epoch": 0.5, - "grad_norm": 0.9099122713281608, - "learning_rate": 1.0557210135620084e-05, - "loss": 0.1958, - "step": 9789 - }, - { - "epoch": 0.5, - "grad_norm": 1.0201497510398987, - "learning_rate": 1.0555565718212198e-05, - "loss": 0.1751, - "step": 9790 - }, - { - "epoch": 0.5, - "grad_norm": 1.0445946424152273, - "learning_rate": 1.0553921285734559e-05, - "loss": 0.1961, - "step": 9791 - }, - { - "epoch": 0.5, - "grad_norm": 0.7918993501345337, - "learning_rate": 1.0552276838231773e-05, - "loss": 0.166, - "step": 9792 - }, - { - "epoch": 0.5, - "grad_norm": 0.9163235676527428, - "learning_rate": 1.0550632375748448e-05, - "loss": 0.1755, - "step": 9793 - }, - { - "epoch": 0.5, - "grad_norm": 1.0610958812797693, - "learning_rate": 1.0548987898329188e-05, - "loss": 0.2183, - "step": 9794 - }, - { - "epoch": 0.5, - "grad_norm": 1.1659132117607722, - "learning_rate": 1.0547343406018602e-05, - "loss": 0.2157, - "step": 9795 - }, - { - "epoch": 0.5, - "grad_norm": 0.828123363788774, - "learning_rate": 1.0545698898861293e-05, - "loss": 0.1848, - "step": 9796 - }, - { - "epoch": 0.5, - "grad_norm": 0.8330494134593698, - "learning_rate": 1.0544054376901872e-05, - "loss": 0.1919, - "step": 9797 - }, - { - "epoch": 0.5, - "grad_norm": 0.9986865906064578, - "learning_rate": 1.0542409840184946e-05, - "loss": 0.1824, - "step": 9798 - }, - { - "epoch": 0.5, - "grad_norm": 2.431778286746509, - "learning_rate": 1.0540765288755124e-05, - "loss": 0.171, - "step": 9799 - }, - { - "epoch": 0.5, - "grad_norm": 1.3598909405812594, - "learning_rate": 1.053912072265701e-05, - "loss": 0.174, - "step": 9800 - }, - { - "epoch": 0.5, - "grad_norm": 0.8439246989389632, - "learning_rate": 1.0537476141935215e-05, - "loss": 0.1882, - "step": 9801 - }, - { - "epoch": 0.5, - "grad_norm": 1.1056060930525182, - "learning_rate": 1.053583154663435e-05, - "loss": 0.1848, - "step": 9802 - }, - { - "epoch": 0.5, - "grad_norm": 0.8568831388430186, - "learning_rate": 1.0534186936799024e-05, - "loss": 0.19, - "step": 9803 - }, - { - "epoch": 0.5, - "grad_norm": 0.881329670015039, - "learning_rate": 1.053254231247385e-05, - "loss": 0.1883, - "step": 9804 - }, - { - "epoch": 0.5, - "grad_norm": 0.8986303767656837, - "learning_rate": 1.0530897673703431e-05, - "loss": 0.1952, - "step": 9805 - }, - { - "epoch": 0.5, - "grad_norm": 1.053070481720899, - "learning_rate": 1.0529253020532386e-05, - "loss": 0.2114, - "step": 9806 - }, - { - "epoch": 0.5, - "grad_norm": 0.8058192743448597, - "learning_rate": 1.0527608353005324e-05, - "loss": 0.1872, - "step": 9807 - }, - { - "epoch": 0.5, - "grad_norm": 1.0561154218791682, - "learning_rate": 1.0525963671166852e-05, - "loss": 0.1821, - "step": 9808 - }, - { - "epoch": 0.5, - "grad_norm": 0.950268019639858, - "learning_rate": 1.0524318975061589e-05, - "loss": 0.1885, - "step": 9809 - }, - { - "epoch": 0.5, - "grad_norm": 1.0711119481698064, - "learning_rate": 1.0522674264734141e-05, - "loss": 0.1874, - "step": 9810 - }, - { - "epoch": 0.5, - "grad_norm": 1.4547965340786408, - "learning_rate": 1.0521029540229126e-05, - "loss": 0.1782, - "step": 9811 - }, - { - "epoch": 0.5, - "grad_norm": 0.8882553205810025, - "learning_rate": 1.0519384801591155e-05, - "loss": 0.1625, - "step": 9812 - }, - { - "epoch": 0.5, - "grad_norm": 0.9386707906152143, - "learning_rate": 1.0517740048864843e-05, - "loss": 0.2197, - "step": 9813 - }, - { - "epoch": 0.5, - "grad_norm": 1.481738485759149, - "learning_rate": 1.05160952820948e-05, - "loss": 0.1855, - "step": 9814 - }, - { - "epoch": 0.5, - "grad_norm": 1.3158177722380133, - "learning_rate": 1.0514450501325646e-05, - "loss": 0.1794, - "step": 9815 - }, - { - "epoch": 0.5, - "grad_norm": 0.8984572182708918, - "learning_rate": 1.0512805706601994e-05, - "loss": 0.173, - "step": 9816 - }, - { - "epoch": 0.5, - "grad_norm": 1.2354073521381275, - "learning_rate": 1.0511160897968456e-05, - "loss": 0.1829, - "step": 9817 - }, - { - "epoch": 0.5, - "grad_norm": 1.2088642384857178, - "learning_rate": 1.0509516075469648e-05, - "loss": 0.1863, - "step": 9818 - }, - { - "epoch": 0.5, - "grad_norm": 0.9717684108168861, - "learning_rate": 1.0507871239150192e-05, - "loss": 0.1976, - "step": 9819 - }, - { - "epoch": 0.5, - "grad_norm": 1.1378371016917381, - "learning_rate": 1.0506226389054697e-05, - "loss": 0.1918, - "step": 9820 - }, - { - "epoch": 0.5, - "grad_norm": 1.1337727830864175, - "learning_rate": 1.0504581525227784e-05, - "loss": 0.1701, - "step": 9821 - }, - { - "epoch": 0.5, - "grad_norm": 0.9734523957238299, - "learning_rate": 1.0502936647714068e-05, - "loss": 0.1675, - "step": 9822 - }, - { - "epoch": 0.5, - "grad_norm": 0.8085856071065838, - "learning_rate": 1.0501291756558166e-05, - "loss": 0.2105, - "step": 9823 - }, - { - "epoch": 0.5, - "grad_norm": 1.3614299113261774, - "learning_rate": 1.0499646851804698e-05, - "loss": 0.1949, - "step": 9824 - }, - { - "epoch": 0.5, - "grad_norm": 1.0225480331788612, - "learning_rate": 1.049800193349828e-05, - "loss": 0.1803, - "step": 9825 - }, - { - "epoch": 0.5, - "grad_norm": 1.1063594496079652, - "learning_rate": 1.0496357001683535e-05, - "loss": 0.2012, - "step": 9826 - }, - { - "epoch": 0.5, - "grad_norm": 0.8650630444598459, - "learning_rate": 1.0494712056405077e-05, - "loss": 0.1975, - "step": 9827 - }, - { - "epoch": 0.5, - "grad_norm": 1.2071697382974003, - "learning_rate": 1.0493067097707521e-05, - "loss": 0.173, - "step": 9828 - }, - { - "epoch": 0.5, - "grad_norm": 0.8907759542377793, - "learning_rate": 1.0491422125635497e-05, - "loss": 0.1606, - "step": 9829 - }, - { - "epoch": 0.5, - "grad_norm": 1.1407485010753793, - "learning_rate": 1.0489777140233619e-05, - "loss": 0.2085, - "step": 9830 - }, - { - "epoch": 0.5, - "grad_norm": 1.2018164434360048, - "learning_rate": 1.048813214154651e-05, - "loss": 0.1777, - "step": 9831 - }, - { - "epoch": 0.5, - "grad_norm": 0.8739314910355729, - "learning_rate": 1.0486487129618787e-05, - "loss": 0.1964, - "step": 9832 - }, - { - "epoch": 0.5, - "grad_norm": 1.2347078940200111, - "learning_rate": 1.0484842104495077e-05, - "loss": 0.1826, - "step": 9833 - }, - { - "epoch": 0.5, - "grad_norm": 0.9880699813876799, - "learning_rate": 1.0483197066219994e-05, - "loss": 0.1739, - "step": 9834 - }, - { - "epoch": 0.5, - "grad_norm": 1.5484746841825274, - "learning_rate": 1.0481552014838164e-05, - "loss": 0.2093, - "step": 9835 - }, - { - "epoch": 0.5, - "grad_norm": 0.8828003341975913, - "learning_rate": 1.0479906950394205e-05, - "loss": 0.174, - "step": 9836 - }, - { - "epoch": 0.5, - "grad_norm": 0.8714267237009181, - "learning_rate": 1.0478261872932747e-05, - "loss": 0.1865, - "step": 9837 - }, - { - "epoch": 0.5, - "grad_norm": 0.9416663230472567, - "learning_rate": 1.0476616782498408e-05, - "loss": 0.1954, - "step": 9838 - }, - { - "epoch": 0.5, - "grad_norm": 0.8907966044512139, - "learning_rate": 1.0474971679135812e-05, - "loss": 0.18, - "step": 9839 - }, - { - "epoch": 0.5, - "grad_norm": 1.0428955435248508, - "learning_rate": 1.0473326562889583e-05, - "loss": 0.2001, - "step": 9840 - }, - { - "epoch": 0.5, - "grad_norm": 0.8931635815421982, - "learning_rate": 1.047168143380434e-05, - "loss": 0.2256, - "step": 9841 - }, - { - "epoch": 0.5, - "grad_norm": 1.230855516835648, - "learning_rate": 1.0470036291924716e-05, - "loss": 0.1949, - "step": 9842 - }, - { - "epoch": 0.5, - "grad_norm": 0.9159697293692711, - "learning_rate": 1.046839113729533e-05, - "loss": 0.1648, - "step": 9843 - }, - { - "epoch": 0.5, - "grad_norm": 1.0587328554788622, - "learning_rate": 1.0466745969960808e-05, - "loss": 0.1693, - "step": 9844 - }, - { - "epoch": 0.5, - "grad_norm": 1.0236897380683918, - "learning_rate": 1.0465100789965774e-05, - "loss": 0.1741, - "step": 9845 - }, - { - "epoch": 0.5, - "grad_norm": 0.8364371860688766, - "learning_rate": 1.0463455597354857e-05, - "loss": 0.1745, - "step": 9846 - }, - { - "epoch": 0.5, - "grad_norm": 1.2590298353872946, - "learning_rate": 1.0461810392172678e-05, - "loss": 0.1973, - "step": 9847 - }, - { - "epoch": 0.5, - "grad_norm": 1.55764474316235, - "learning_rate": 1.046016517446387e-05, - "loss": 0.2304, - "step": 9848 - }, - { - "epoch": 0.5, - "grad_norm": 1.3950511436078774, - "learning_rate": 1.0458519944273051e-05, - "loss": 0.2017, - "step": 9849 - }, - { - "epoch": 0.5, - "grad_norm": 2.075205875836515, - "learning_rate": 1.0456874701644857e-05, - "loss": 0.1854, - "step": 9850 - }, - { - "epoch": 0.5, - "grad_norm": 1.1534536502690333, - "learning_rate": 1.0455229446623909e-05, - "loss": 0.188, - "step": 9851 - }, - { - "epoch": 0.5, - "grad_norm": 1.105654838126189, - "learning_rate": 1.0453584179254837e-05, - "loss": 0.2011, - "step": 9852 - }, - { - "epoch": 0.5, - "grad_norm": 1.2759741028043035, - "learning_rate": 1.0451938899582263e-05, - "loss": 0.2026, - "step": 9853 - }, - { - "epoch": 0.5, - "grad_norm": 1.3135892552693105, - "learning_rate": 1.0450293607650828e-05, - "loss": 0.1803, - "step": 9854 - }, - { - "epoch": 0.5, - "grad_norm": 1.4361511876030737, - "learning_rate": 1.044864830350515e-05, - "loss": 0.1854, - "step": 9855 - }, - { - "epoch": 0.5, - "grad_norm": 0.7257367038897319, - "learning_rate": 1.0447002987189863e-05, - "loss": 0.1817, - "step": 9856 - }, - { - "epoch": 0.5, - "grad_norm": 1.0461617761421993, - "learning_rate": 1.0445357658749596e-05, - "loss": 0.1761, - "step": 9857 - }, - { - "epoch": 0.5, - "grad_norm": 0.9720243915830117, - "learning_rate": 1.0443712318228973e-05, - "loss": 0.1935, - "step": 9858 - }, - { - "epoch": 0.5, - "grad_norm": 1.109979195338577, - "learning_rate": 1.044206696567263e-05, - "loss": 0.184, - "step": 9859 - }, - { - "epoch": 0.5, - "grad_norm": 1.952114468244844, - "learning_rate": 1.0440421601125196e-05, - "loss": 0.1996, - "step": 9860 - }, - { - "epoch": 0.5, - "grad_norm": 0.832335857365281, - "learning_rate": 1.0438776224631305e-05, - "loss": 0.1811, - "step": 9861 - }, - { - "epoch": 0.5, - "grad_norm": 1.04682268647991, - "learning_rate": 1.043713083623558e-05, - "loss": 0.1811, - "step": 9862 - }, - { - "epoch": 0.5, - "grad_norm": 2.700625011153412, - "learning_rate": 1.0435485435982655e-05, - "loss": 0.1839, - "step": 9863 - }, - { - "epoch": 0.5, - "grad_norm": 0.8833745933497529, - "learning_rate": 1.0433840023917166e-05, - "loss": 0.2041, - "step": 9864 - }, - { - "epoch": 0.5, - "grad_norm": 1.0260558036137535, - "learning_rate": 1.043219460008374e-05, - "loss": 0.1931, - "step": 9865 - }, - { - "epoch": 0.5, - "grad_norm": 1.158223230321419, - "learning_rate": 1.0430549164527012e-05, - "loss": 0.1954, - "step": 9866 - }, - { - "epoch": 0.5, - "grad_norm": 4.840651535607204, - "learning_rate": 1.0428903717291614e-05, - "loss": 0.1628, - "step": 9867 - }, - { - "epoch": 0.5, - "grad_norm": 0.8215057895386293, - "learning_rate": 1.0427258258422179e-05, - "loss": 0.1621, - "step": 9868 - }, - { - "epoch": 0.5, - "grad_norm": 0.9297959300807954, - "learning_rate": 1.042561278796334e-05, - "loss": 0.2035, - "step": 9869 - }, - { - "epoch": 0.5, - "grad_norm": 1.0375582587651102, - "learning_rate": 1.0423967305959727e-05, - "loss": 0.2245, - "step": 9870 - }, - { - "epoch": 0.5, - "grad_norm": 0.8712326630395105, - "learning_rate": 1.0422321812455977e-05, - "loss": 0.1941, - "step": 9871 - }, - { - "epoch": 0.5, - "grad_norm": 1.2359447371119525, - "learning_rate": 1.0420676307496727e-05, - "loss": 0.1969, - "step": 9872 - }, - { - "epoch": 0.5, - "grad_norm": 0.9577458563174123, - "learning_rate": 1.0419030791126608e-05, - "loss": 0.2073, - "step": 9873 - }, - { - "epoch": 0.5, - "grad_norm": 0.9015402021284342, - "learning_rate": 1.0417385263390253e-05, - "loss": 0.1743, - "step": 9874 - }, - { - "epoch": 0.5, - "grad_norm": 0.8146898383568317, - "learning_rate": 1.0415739724332301e-05, - "loss": 0.1706, - "step": 9875 - }, - { - "epoch": 0.5, - "grad_norm": 0.7583665868297698, - "learning_rate": 1.0414094173997382e-05, - "loss": 0.1644, - "step": 9876 - }, - { - "epoch": 0.5, - "grad_norm": 0.7536084868690823, - "learning_rate": 1.0412448612430139e-05, - "loss": 0.1932, - "step": 9877 - }, - { - "epoch": 0.5, - "grad_norm": 0.8179173485214218, - "learning_rate": 1.0410803039675203e-05, - "loss": 0.1711, - "step": 9878 - }, - { - "epoch": 0.5, - "grad_norm": 0.7474447497140159, - "learning_rate": 1.0409157455777212e-05, - "loss": 0.1742, - "step": 9879 - }, - { - "epoch": 0.5, - "grad_norm": 1.3573782407500192, - "learning_rate": 1.0407511860780798e-05, - "loss": 0.2061, - "step": 9880 - }, - { - "epoch": 0.5, - "grad_norm": 1.1693626671955326, - "learning_rate": 1.0405866254730607e-05, - "loss": 0.1809, - "step": 9881 - }, - { - "epoch": 0.5, - "grad_norm": 1.121973631958896, - "learning_rate": 1.0404220637671269e-05, - "loss": 0.2006, - "step": 9882 - }, - { - "epoch": 0.5, - "grad_norm": 0.9838023394438137, - "learning_rate": 1.0402575009647427e-05, - "loss": 0.184, - "step": 9883 - }, - { - "epoch": 0.5, - "grad_norm": 0.8474963997760555, - "learning_rate": 1.040092937070371e-05, - "loss": 0.1793, - "step": 9884 - }, - { - "epoch": 0.5, - "grad_norm": 1.436673796146176, - "learning_rate": 1.0399283720884761e-05, - "loss": 0.1649, - "step": 9885 - }, - { - "epoch": 0.5, - "grad_norm": 0.9966040144415655, - "learning_rate": 1.039763806023522e-05, - "loss": 0.1769, - "step": 9886 - }, - { - "epoch": 0.5, - "grad_norm": 1.6373730020474027, - "learning_rate": 1.0395992388799725e-05, - "loss": 0.1877, - "step": 9887 - }, - { - "epoch": 0.5, - "grad_norm": 2.883731825593106, - "learning_rate": 1.0394346706622915e-05, - "loss": 0.1512, - "step": 9888 - }, - { - "epoch": 0.5, - "grad_norm": 1.316877146392383, - "learning_rate": 1.0392701013749424e-05, - "loss": 0.211, - "step": 9889 - }, - { - "epoch": 0.5, - "grad_norm": 0.9359088500850488, - "learning_rate": 1.0391055310223899e-05, - "loss": 0.1746, - "step": 9890 - }, - { - "epoch": 0.5, - "grad_norm": 1.0825036335565394, - "learning_rate": 1.0389409596090975e-05, - "loss": 0.1844, - "step": 9891 - }, - { - "epoch": 0.5, - "grad_norm": 0.8675212267076646, - "learning_rate": 1.0387763871395298e-05, - "loss": 0.1681, - "step": 9892 - }, - { - "epoch": 0.5, - "grad_norm": 1.252490369140081, - "learning_rate": 1.0386118136181498e-05, - "loss": 0.1764, - "step": 9893 - }, - { - "epoch": 0.5, - "grad_norm": 0.8267953666195103, - "learning_rate": 1.0384472390494225e-05, - "loss": 0.1704, - "step": 9894 - }, - { - "epoch": 0.5, - "grad_norm": 1.1896296090305618, - "learning_rate": 1.0382826634378115e-05, - "loss": 0.1895, - "step": 9895 - }, - { - "epoch": 0.5, - "grad_norm": 0.8499202270123422, - "learning_rate": 1.0381180867877813e-05, - "loss": 0.1709, - "step": 9896 - }, - { - "epoch": 0.5, - "grad_norm": 1.6666646990052674, - "learning_rate": 1.037953509103796e-05, - "loss": 0.1928, - "step": 9897 - }, - { - "epoch": 0.5, - "grad_norm": 1.2907050050197117, - "learning_rate": 1.037788930390319e-05, - "loss": 0.1838, - "step": 9898 - }, - { - "epoch": 0.5, - "grad_norm": 0.9548489444956723, - "learning_rate": 1.0376243506518157e-05, - "loss": 0.1835, - "step": 9899 - }, - { - "epoch": 0.5, - "grad_norm": 1.2427355712972166, - "learning_rate": 1.0374597698927496e-05, - "loss": 0.1887, - "step": 9900 - }, - { - "epoch": 0.5, - "grad_norm": 1.0923649933816408, - "learning_rate": 1.0372951881175854e-05, - "loss": 0.2256, - "step": 9901 - }, - { - "epoch": 0.5, - "grad_norm": 1.4007054267078818, - "learning_rate": 1.0371306053307866e-05, - "loss": 0.1744, - "step": 9902 - }, - { - "epoch": 0.5, - "grad_norm": 0.8349455748931837, - "learning_rate": 1.0369660215368182e-05, - "loss": 0.1902, - "step": 9903 - }, - { - "epoch": 0.5, - "grad_norm": 1.160864993415897, - "learning_rate": 1.0368014367401447e-05, - "loss": 0.1748, - "step": 9904 - }, - { - "epoch": 0.5, - "grad_norm": 1.100514323505627, - "learning_rate": 1.0366368509452298e-05, - "loss": 0.1795, - "step": 9905 - }, - { - "epoch": 0.5, - "grad_norm": 1.3266089140201034, - "learning_rate": 1.0364722641565381e-05, - "loss": 0.1917, - "step": 9906 - }, - { - "epoch": 0.5, - "grad_norm": 1.6110809904091798, - "learning_rate": 1.0363076763785345e-05, - "loss": 0.1889, - "step": 9907 - }, - { - "epoch": 0.5, - "grad_norm": 0.9414698726238333, - "learning_rate": 1.0361430876156831e-05, - "loss": 0.1756, - "step": 9908 - }, - { - "epoch": 0.5, - "grad_norm": 1.2947285498046557, - "learning_rate": 1.0359784978724483e-05, - "loss": 0.2214, - "step": 9909 - }, - { - "epoch": 0.5, - "grad_norm": 1.014018441865324, - "learning_rate": 1.0358139071532949e-05, - "loss": 0.1785, - "step": 9910 - }, - { - "epoch": 0.5, - "grad_norm": 1.1017582642136696, - "learning_rate": 1.0356493154626868e-05, - "loss": 0.1778, - "step": 9911 - }, - { - "epoch": 0.5, - "grad_norm": 1.5116573330006802, - "learning_rate": 1.0354847228050895e-05, - "loss": 0.1851, - "step": 9912 - }, - { - "epoch": 0.5, - "grad_norm": 1.1288381460495436, - "learning_rate": 1.0353201291849668e-05, - "loss": 0.2002, - "step": 9913 - }, - { - "epoch": 0.5, - "grad_norm": 0.8981263323620646, - "learning_rate": 1.0351555346067836e-05, - "loss": 0.1919, - "step": 9914 - }, - { - "epoch": 0.5, - "grad_norm": 0.9193426516734726, - "learning_rate": 1.0349909390750046e-05, - "loss": 0.1826, - "step": 9915 - }, - { - "epoch": 0.5, - "grad_norm": 0.9227102536487921, - "learning_rate": 1.0348263425940945e-05, - "loss": 0.1691, - "step": 9916 - }, - { - "epoch": 0.5, - "grad_norm": 1.1041781716681, - "learning_rate": 1.034661745168518e-05, - "loss": 0.1606, - "step": 9917 - }, - { - "epoch": 0.5, - "grad_norm": 1.2902489109492576, - "learning_rate": 1.0344971468027397e-05, - "loss": 0.1863, - "step": 9918 - }, - { - "epoch": 0.5, - "grad_norm": 0.9399133950621475, - "learning_rate": 1.034332547501224e-05, - "loss": 0.1891, - "step": 9919 - }, - { - "epoch": 0.5, - "grad_norm": 1.2562837223830832, - "learning_rate": 1.034167947268436e-05, - "loss": 0.1842, - "step": 9920 - }, - { - "epoch": 0.5, - "grad_norm": 0.8729064282047535, - "learning_rate": 1.0340033461088408e-05, - "loss": 0.1671, - "step": 9921 - }, - { - "epoch": 0.5, - "grad_norm": 0.8439636188899999, - "learning_rate": 1.0338387440269029e-05, - "loss": 0.1943, - "step": 9922 - }, - { - "epoch": 0.5, - "grad_norm": 1.3023244038905142, - "learning_rate": 1.0336741410270872e-05, - "loss": 0.1724, - "step": 9923 - }, - { - "epoch": 0.5, - "grad_norm": 0.8552753076216781, - "learning_rate": 1.0335095371138582e-05, - "loss": 0.1997, - "step": 9924 - }, - { - "epoch": 0.5, - "grad_norm": 0.9254039051965883, - "learning_rate": 1.0333449322916812e-05, - "loss": 0.2074, - "step": 9925 - }, - { - "epoch": 0.5, - "grad_norm": 0.9900810976488515, - "learning_rate": 1.0331803265650212e-05, - "loss": 0.1736, - "step": 9926 - }, - { - "epoch": 0.5, - "grad_norm": 1.2135277999895162, - "learning_rate": 1.0330157199383428e-05, - "loss": 0.1773, - "step": 9927 - }, - { - "epoch": 0.5, - "grad_norm": 1.1341813241407668, - "learning_rate": 1.0328511124161111e-05, - "loss": 0.1741, - "step": 9928 - }, - { - "epoch": 0.5, - "grad_norm": 1.0080976235152903, - "learning_rate": 1.0326865040027914e-05, - "loss": 0.1836, - "step": 9929 - }, - { - "epoch": 0.5, - "grad_norm": 0.9764000986623692, - "learning_rate": 1.0325218947028483e-05, - "loss": 0.2, - "step": 9930 - }, - { - "epoch": 0.51, - "grad_norm": 1.0610741734776916, - "learning_rate": 1.032357284520747e-05, - "loss": 0.2032, - "step": 9931 - }, - { - "epoch": 0.51, - "grad_norm": 1.168931745220606, - "learning_rate": 1.0321926734609525e-05, - "loss": 0.1873, - "step": 9932 - }, - { - "epoch": 0.51, - "grad_norm": 1.2369314036165708, - "learning_rate": 1.0320280615279297e-05, - "loss": 0.1899, - "step": 9933 - }, - { - "epoch": 0.51, - "grad_norm": 1.7069155450925078, - "learning_rate": 1.031863448726144e-05, - "loss": 0.1916, - "step": 9934 - }, - { - "epoch": 0.51, - "grad_norm": 1.1104411774982685, - "learning_rate": 1.0316988350600608e-05, - "loss": 0.1847, - "step": 9935 - }, - { - "epoch": 0.51, - "grad_norm": 1.295191707646736, - "learning_rate": 1.0315342205341448e-05, - "loss": 0.2246, - "step": 9936 - }, - { - "epoch": 0.51, - "grad_norm": 0.9595382596028376, - "learning_rate": 1.031369605152861e-05, - "loss": 0.1805, - "step": 9937 - }, - { - "epoch": 0.51, - "grad_norm": 1.3007792169988164, - "learning_rate": 1.031204988920675e-05, - "loss": 0.1858, - "step": 9938 - }, - { - "epoch": 0.51, - "grad_norm": 1.3047610591721563, - "learning_rate": 1.031040371842052e-05, - "loss": 0.1952, - "step": 9939 - }, - { - "epoch": 0.51, - "grad_norm": 2.094211112565609, - "learning_rate": 1.0308757539214573e-05, - "loss": 0.1874, - "step": 9940 - }, - { - "epoch": 0.51, - "grad_norm": 2.831448138141734, - "learning_rate": 1.0307111351633556e-05, - "loss": 0.1948, - "step": 9941 - }, - { - "epoch": 0.51, - "grad_norm": 1.1926966402724417, - "learning_rate": 1.030546515572213e-05, - "loss": 0.1982, - "step": 9942 - }, - { - "epoch": 0.51, - "grad_norm": 1.4148961719797137, - "learning_rate": 1.0303818951524941e-05, - "loss": 0.1655, - "step": 9943 - }, - { - "epoch": 0.51, - "grad_norm": 2.1868873442922836, - "learning_rate": 1.0302172739086651e-05, - "loss": 0.193, - "step": 9944 - }, - { - "epoch": 0.51, - "grad_norm": 1.0994668253712192, - "learning_rate": 1.0300526518451906e-05, - "loss": 0.2119, - "step": 9945 - }, - { - "epoch": 0.51, - "grad_norm": 0.7749159270480126, - "learning_rate": 1.0298880289665359e-05, - "loss": 0.1655, - "step": 9946 - }, - { - "epoch": 0.51, - "grad_norm": 1.2181962641100348, - "learning_rate": 1.029723405277167e-05, - "loss": 0.1956, - "step": 9947 - }, - { - "epoch": 0.51, - "grad_norm": 1.3402104542718751, - "learning_rate": 1.029558780781549e-05, - "loss": 0.1954, - "step": 9948 - }, - { - "epoch": 0.51, - "grad_norm": 1.0498104781581756, - "learning_rate": 1.0293941554841475e-05, - "loss": 0.1621, - "step": 9949 - }, - { - "epoch": 0.51, - "grad_norm": 0.8955502875619714, - "learning_rate": 1.0292295293894279e-05, - "loss": 0.1815, - "step": 9950 - }, - { - "epoch": 0.51, - "grad_norm": 1.4261451905653821, - "learning_rate": 1.0290649025018553e-05, - "loss": 0.1885, - "step": 9951 - }, - { - "epoch": 0.51, - "grad_norm": 1.1050297705764032, - "learning_rate": 1.0289002748258961e-05, - "loss": 0.1985, - "step": 9952 - }, - { - "epoch": 0.51, - "grad_norm": 0.9725277146089406, - "learning_rate": 1.0287356463660152e-05, - "loss": 0.1828, - "step": 9953 - }, - { - "epoch": 0.51, - "grad_norm": 1.2985263149088095, - "learning_rate": 1.0285710171266778e-05, - "loss": 0.1933, - "step": 9954 - }, - { - "epoch": 0.51, - "grad_norm": 0.8226489022264653, - "learning_rate": 1.0284063871123504e-05, - "loss": 0.1784, - "step": 9955 - }, - { - "epoch": 0.51, - "grad_norm": 2.5280430717726388, - "learning_rate": 1.0282417563274982e-05, - "loss": 0.1858, - "step": 9956 - }, - { - "epoch": 0.51, - "grad_norm": 1.4197035876160895, - "learning_rate": 1.0280771247765865e-05, - "loss": 0.1906, - "step": 9957 - }, - { - "epoch": 0.51, - "grad_norm": 1.1898992638116423, - "learning_rate": 1.0279124924640813e-05, - "loss": 0.1845, - "step": 9958 - }, - { - "epoch": 0.51, - "grad_norm": 1.301609927287, - "learning_rate": 1.027747859394448e-05, - "loss": 0.1846, - "step": 9959 - }, - { - "epoch": 0.51, - "grad_norm": 1.03226421368799, - "learning_rate": 1.0275832255721527e-05, - "loss": 0.1687, - "step": 9960 - }, - { - "epoch": 0.51, - "grad_norm": 1.1475592463345812, - "learning_rate": 1.0274185910016608e-05, - "loss": 0.2076, - "step": 9961 - }, - { - "epoch": 0.51, - "grad_norm": 0.857957466398941, - "learning_rate": 1.0272539556874381e-05, - "loss": 0.1826, - "step": 9962 - }, - { - "epoch": 0.51, - "grad_norm": 0.8451749170398627, - "learning_rate": 1.0270893196339499e-05, - "loss": 0.1783, - "step": 9963 - }, - { - "epoch": 0.51, - "grad_norm": 1.0211113370636866, - "learning_rate": 1.026924682845663e-05, - "loss": 0.1852, - "step": 9964 - }, - { - "epoch": 0.51, - "grad_norm": 1.3551642322630637, - "learning_rate": 1.0267600453270422e-05, - "loss": 0.1822, - "step": 9965 - }, - { - "epoch": 0.51, - "grad_norm": 0.9282330658954546, - "learning_rate": 1.0265954070825536e-05, - "loss": 0.1837, - "step": 9966 - }, - { - "epoch": 0.51, - "grad_norm": 1.555358636297375, - "learning_rate": 1.0264307681166634e-05, - "loss": 0.1748, - "step": 9967 - }, - { - "epoch": 0.51, - "grad_norm": 1.5972130118339416, - "learning_rate": 1.0262661284338367e-05, - "loss": 0.1762, - "step": 9968 - }, - { - "epoch": 0.51, - "grad_norm": 1.001103276739541, - "learning_rate": 1.02610148803854e-05, - "loss": 0.1952, - "step": 9969 - }, - { - "epoch": 0.51, - "grad_norm": 1.1031313216687382, - "learning_rate": 1.025936846935239e-05, - "loss": 0.2099, - "step": 9970 - }, - { - "epoch": 0.51, - "grad_norm": 1.2314792299673958, - "learning_rate": 1.0257722051283998e-05, - "loss": 0.1816, - "step": 9971 - }, - { - "epoch": 0.51, - "grad_norm": 1.061943164826423, - "learning_rate": 1.0256075626224876e-05, - "loss": 0.1715, - "step": 9972 - }, - { - "epoch": 0.51, - "grad_norm": 0.9307765106478149, - "learning_rate": 1.0254429194219694e-05, - "loss": 0.1957, - "step": 9973 - }, - { - "epoch": 0.51, - "grad_norm": 0.9256565304503027, - "learning_rate": 1.02527827553131e-05, - "loss": 0.2083, - "step": 9974 - }, - { - "epoch": 0.51, - "grad_norm": 1.4219555269572766, - "learning_rate": 1.0251136309549764e-05, - "loss": 0.1665, - "step": 9975 - }, - { - "epoch": 0.51, - "grad_norm": 1.1394062648630405, - "learning_rate": 1.0249489856974335e-05, - "loss": 0.1743, - "step": 9976 - }, - { - "epoch": 0.51, - "grad_norm": 0.8884799606782805, - "learning_rate": 1.0247843397631485e-05, - "loss": 0.2081, - "step": 9977 - }, - { - "epoch": 0.51, - "grad_norm": 1.0280662741511235, - "learning_rate": 1.0246196931565869e-05, - "loss": 0.1822, - "step": 9978 - }, - { - "epoch": 0.51, - "grad_norm": 2.1389930656547747, - "learning_rate": 1.0244550458822145e-05, - "loss": 0.1817, - "step": 9979 - }, - { - "epoch": 0.51, - "grad_norm": 0.8778846451344755, - "learning_rate": 1.0242903979444976e-05, - "loss": 0.1585, - "step": 9980 - }, - { - "epoch": 0.51, - "grad_norm": 0.9142638715956067, - "learning_rate": 1.0241257493479022e-05, - "loss": 0.2058, - "step": 9981 - }, - { - "epoch": 0.51, - "grad_norm": 1.18773644917128, - "learning_rate": 1.0239611000968948e-05, - "loss": 0.2521, - "step": 9982 - }, - { - "epoch": 0.51, - "grad_norm": 1.0356077541986568, - "learning_rate": 1.023796450195941e-05, - "loss": 0.1737, - "step": 9983 - }, - { - "epoch": 0.51, - "grad_norm": 1.1727486639573697, - "learning_rate": 1.0236317996495074e-05, - "loss": 0.1765, - "step": 9984 - }, - { - "epoch": 0.51, - "grad_norm": 0.9419796126948489, - "learning_rate": 1.0234671484620595e-05, - "loss": 0.1902, - "step": 9985 - }, - { - "epoch": 0.51, - "grad_norm": 0.7801012010400616, - "learning_rate": 1.023302496638064e-05, - "loss": 0.1929, - "step": 9986 - }, - { - "epoch": 0.51, - "grad_norm": 1.013663718762519, - "learning_rate": 1.023137844181987e-05, - "loss": 0.1936, - "step": 9987 - }, - { - "epoch": 0.51, - "grad_norm": 1.386473113559732, - "learning_rate": 1.022973191098295e-05, - "loss": 0.1757, - "step": 9988 - }, - { - "epoch": 0.51, - "grad_norm": 0.9898436861099714, - "learning_rate": 1.0228085373914534e-05, - "loss": 0.1833, - "step": 9989 - }, - { - "epoch": 0.51, - "grad_norm": 0.8509169739451241, - "learning_rate": 1.022643883065929e-05, - "loss": 0.1759, - "step": 9990 - }, - { - "epoch": 0.51, - "grad_norm": 0.8572529282667672, - "learning_rate": 1.0224792281261883e-05, - "loss": 0.1923, - "step": 9991 - }, - { - "epoch": 0.51, - "grad_norm": 2.52936893410723, - "learning_rate": 1.0223145725766972e-05, - "loss": 0.215, - "step": 9992 - }, - { - "epoch": 0.51, - "grad_norm": 1.7402502741709267, - "learning_rate": 1.022149916421922e-05, - "loss": 0.1867, - "step": 9993 - }, - { - "epoch": 0.51, - "grad_norm": 1.4300609361053354, - "learning_rate": 1.0219852596663287e-05, - "loss": 0.1933, - "step": 9994 - }, - { - "epoch": 0.51, - "grad_norm": 1.0778884603823187, - "learning_rate": 1.0218206023143843e-05, - "loss": 0.1901, - "step": 9995 - }, - { - "epoch": 0.51, - "grad_norm": 1.0155397155807473, - "learning_rate": 1.0216559443705549e-05, - "loss": 0.1654, - "step": 9996 - }, - { - "epoch": 0.51, - "grad_norm": 1.2284066582722302, - "learning_rate": 1.0214912858393069e-05, - "loss": 0.182, - "step": 9997 - }, - { - "epoch": 0.51, - "grad_norm": 1.0145416710485446, - "learning_rate": 1.0213266267251063e-05, - "loss": 0.1879, - "step": 9998 - }, - { - "epoch": 0.51, - "grad_norm": 1.19001894105872, - "learning_rate": 1.0211619670324196e-05, - "loss": 0.1792, - "step": 9999 - }, - { - "epoch": 0.51, - "grad_norm": 0.970080459098825, - "learning_rate": 1.0209973067657138e-05, - "loss": 0.1814, - "step": 10000 - }, - { - "epoch": 0.51, - "grad_norm": 1.0951474234469627, - "learning_rate": 1.0208326459294544e-05, - "loss": 0.189, - "step": 10001 - }, - { - "epoch": 0.51, - "grad_norm": 0.8388440522398718, - "learning_rate": 1.0206679845281086e-05, - "loss": 0.2007, - "step": 10002 - }, - { - "epoch": 0.51, - "grad_norm": 1.0514825151087455, - "learning_rate": 1.0205033225661425e-05, - "loss": 0.1738, - "step": 10003 - }, - { - "epoch": 0.51, - "grad_norm": 0.9795950508541672, - "learning_rate": 1.0203386600480225e-05, - "loss": 0.1931, - "step": 10004 - }, - { - "epoch": 0.51, - "grad_norm": 3.335975532830077, - "learning_rate": 1.0201739969782154e-05, - "loss": 0.1905, - "step": 10005 - }, - { - "epoch": 0.51, - "grad_norm": 0.9444491493237841, - "learning_rate": 1.0200093333611877e-05, - "loss": 0.202, - "step": 10006 - }, - { - "epoch": 0.51, - "grad_norm": 1.8810868878895413, - "learning_rate": 1.0198446692014052e-05, - "loss": 0.1995, - "step": 10007 - }, - { - "epoch": 0.51, - "grad_norm": 0.9449853646973742, - "learning_rate": 1.019680004503335e-05, - "loss": 0.1927, - "step": 10008 - }, - { - "epoch": 0.51, - "grad_norm": 3.285770560807395, - "learning_rate": 1.0195153392714439e-05, - "loss": 0.2011, - "step": 10009 - }, - { - "epoch": 0.51, - "grad_norm": 1.4409911905410049, - "learning_rate": 1.019350673510198e-05, - "loss": 0.1609, - "step": 10010 - }, - { - "epoch": 0.51, - "grad_norm": 1.1450798008075653, - "learning_rate": 1.0191860072240638e-05, - "loss": 0.1909, - "step": 10011 - }, - { - "epoch": 0.51, - "grad_norm": 1.2757295834893168, - "learning_rate": 1.019021340417508e-05, - "loss": 0.1918, - "step": 10012 - }, - { - "epoch": 0.51, - "grad_norm": 0.9584243701572146, - "learning_rate": 1.0188566730949977e-05, - "loss": 0.1855, - "step": 10013 - }, - { - "epoch": 0.51, - "grad_norm": 1.1818591305464108, - "learning_rate": 1.0186920052609988e-05, - "loss": 0.1811, - "step": 10014 - }, - { - "epoch": 0.51, - "grad_norm": 1.3379051075950539, - "learning_rate": 1.0185273369199781e-05, - "loss": 0.2011, - "step": 10015 - }, - { - "epoch": 0.51, - "grad_norm": 0.7753559912686392, - "learning_rate": 1.0183626680764023e-05, - "loss": 0.1962, - "step": 10016 - }, - { - "epoch": 0.51, - "grad_norm": 0.9930898004506734, - "learning_rate": 1.0181979987347383e-05, - "loss": 0.178, - "step": 10017 - }, - { - "epoch": 0.51, - "grad_norm": 0.9906395873333926, - "learning_rate": 1.0180333288994526e-05, - "loss": 0.1679, - "step": 10018 - }, - { - "epoch": 0.51, - "grad_norm": 1.1841086133641656, - "learning_rate": 1.0178686585750117e-05, - "loss": 0.2037, - "step": 10019 - }, - { - "epoch": 0.51, - "grad_norm": 0.9662665276637551, - "learning_rate": 1.0177039877658825e-05, - "loss": 0.1944, - "step": 10020 - }, - { - "epoch": 0.51, - "grad_norm": 1.092277086848292, - "learning_rate": 1.0175393164765315e-05, - "loss": 0.1799, - "step": 10021 - }, - { - "epoch": 0.51, - "grad_norm": 0.9675325735824981, - "learning_rate": 1.0173746447114257e-05, - "loss": 0.1645, - "step": 10022 - }, - { - "epoch": 0.51, - "grad_norm": 1.1363807412961018, - "learning_rate": 1.017209972475032e-05, - "loss": 0.1859, - "step": 10023 - }, - { - "epoch": 0.51, - "grad_norm": 1.1286346321271528, - "learning_rate": 1.0170452997718161e-05, - "loss": 0.1827, - "step": 10024 - }, - { - "epoch": 0.51, - "grad_norm": 1.0493532996425716, - "learning_rate": 1.0168806266062459e-05, - "loss": 0.1824, - "step": 10025 - }, - { - "epoch": 0.51, - "grad_norm": 2.5678128545444037, - "learning_rate": 1.0167159529827876e-05, - "loss": 0.1938, - "step": 10026 - }, - { - "epoch": 0.51, - "grad_norm": 0.9157850060664193, - "learning_rate": 1.0165512789059084e-05, - "loss": 0.2014, - "step": 10027 - }, - { - "epoch": 0.51, - "grad_norm": 1.1309156120661967, - "learning_rate": 1.0163866043800748e-05, - "loss": 0.2063, - "step": 10028 - }, - { - "epoch": 0.51, - "grad_norm": 1.25804053030035, - "learning_rate": 1.0162219294097531e-05, - "loss": 0.1726, - "step": 10029 - }, - { - "epoch": 0.51, - "grad_norm": 1.3146522587286733, - "learning_rate": 1.0160572539994111e-05, - "loss": 0.1736, - "step": 10030 - }, - { - "epoch": 0.51, - "grad_norm": 1.140177136353688, - "learning_rate": 1.015892578153515e-05, - "loss": 0.1942, - "step": 10031 - }, - { - "epoch": 0.51, - "grad_norm": 0.9510605440053564, - "learning_rate": 1.015727901876532e-05, - "loss": 0.1731, - "step": 10032 - }, - { - "epoch": 0.51, - "grad_norm": 0.9945450978518271, - "learning_rate": 1.0155632251729289e-05, - "loss": 0.2, - "step": 10033 - }, - { - "epoch": 0.51, - "grad_norm": 0.894224695698637, - "learning_rate": 1.015398548047172e-05, - "loss": 0.1977, - "step": 10034 - }, - { - "epoch": 0.51, - "grad_norm": 1.142454133947765, - "learning_rate": 1.0152338705037288e-05, - "loss": 0.1869, - "step": 10035 - }, - { - "epoch": 0.51, - "grad_norm": 0.976074582797507, - "learning_rate": 1.0150691925470661e-05, - "loss": 0.1678, - "step": 10036 - }, - { - "epoch": 0.51, - "grad_norm": 1.110742325414474, - "learning_rate": 1.0149045141816507e-05, - "loss": 0.1991, - "step": 10037 - }, - { - "epoch": 0.51, - "grad_norm": 2.5073131663612216, - "learning_rate": 1.0147398354119493e-05, - "loss": 0.2022, - "step": 10038 - }, - { - "epoch": 0.51, - "grad_norm": 1.1136933742481399, - "learning_rate": 1.0145751562424293e-05, - "loss": 0.1838, - "step": 10039 - }, - { - "epoch": 0.51, - "grad_norm": 0.8396311826471067, - "learning_rate": 1.0144104766775574e-05, - "loss": 0.1648, - "step": 10040 - }, - { - "epoch": 0.51, - "grad_norm": 1.5889142219711996, - "learning_rate": 1.0142457967218004e-05, - "loss": 0.1527, - "step": 10041 - }, - { - "epoch": 0.51, - "grad_norm": 1.5526245240475558, - "learning_rate": 1.0140811163796251e-05, - "loss": 0.1834, - "step": 10042 - }, - { - "epoch": 0.51, - "grad_norm": 1.4435183341553837, - "learning_rate": 1.0139164356554991e-05, - "loss": 0.18, - "step": 10043 - }, - { - "epoch": 0.51, - "grad_norm": 0.8552306532750134, - "learning_rate": 1.0137517545538889e-05, - "loss": 0.2122, - "step": 10044 - }, - { - "epoch": 0.51, - "grad_norm": 1.0819198266646604, - "learning_rate": 1.0135870730792614e-05, - "loss": 0.1571, - "step": 10045 - }, - { - "epoch": 0.51, - "grad_norm": 1.2645867726469782, - "learning_rate": 1.0134223912360841e-05, - "loss": 0.1843, - "step": 10046 - }, - { - "epoch": 0.51, - "grad_norm": 0.9493612997745103, - "learning_rate": 1.013257709028823e-05, - "loss": 0.1805, - "step": 10047 - }, - { - "epoch": 0.51, - "grad_norm": 0.9482335058184064, - "learning_rate": 1.0130930264619464e-05, - "loss": 0.1744, - "step": 10048 - }, - { - "epoch": 0.51, - "grad_norm": 1.0100789781159738, - "learning_rate": 1.0129283435399209e-05, - "loss": 0.2131, - "step": 10049 - }, - { - "epoch": 0.51, - "grad_norm": 1.210160492224197, - "learning_rate": 1.0127636602672129e-05, - "loss": 0.1801, - "step": 10050 - }, - { - "epoch": 0.51, - "grad_norm": 1.0121260518070379, - "learning_rate": 1.01259897664829e-05, - "loss": 0.1851, - "step": 10051 - }, - { - "epoch": 0.51, - "grad_norm": 1.0048530916729317, - "learning_rate": 1.0124342926876191e-05, - "loss": 0.193, - "step": 10052 - }, - { - "epoch": 0.51, - "grad_norm": 1.5781470869665752, - "learning_rate": 1.0122696083896675e-05, - "loss": 0.1884, - "step": 10053 - }, - { - "epoch": 0.51, - "grad_norm": 1.3276050210602262, - "learning_rate": 1.012104923758902e-05, - "loss": 0.178, - "step": 10054 - }, - { - "epoch": 0.51, - "grad_norm": 1.7500154198232776, - "learning_rate": 1.0119402387997896e-05, - "loss": 0.1751, - "step": 10055 - }, - { - "epoch": 0.51, - "grad_norm": 1.1186441647606769, - "learning_rate": 1.0117755535167976e-05, - "loss": 0.1866, - "step": 10056 - }, - { - "epoch": 0.51, - "grad_norm": 1.0804238144692095, - "learning_rate": 1.0116108679143932e-05, - "loss": 0.2126, - "step": 10057 - }, - { - "epoch": 0.51, - "grad_norm": 0.8679185570128787, - "learning_rate": 1.0114461819970435e-05, - "loss": 0.1635, - "step": 10058 - }, - { - "epoch": 0.51, - "grad_norm": 2.012499264568714, - "learning_rate": 1.0112814957692151e-05, - "loss": 0.1922, - "step": 10059 - }, - { - "epoch": 0.51, - "grad_norm": 1.2950140590211783, - "learning_rate": 1.0111168092353755e-05, - "loss": 0.1999, - "step": 10060 - }, - { - "epoch": 0.51, - "grad_norm": 1.4073424409836586, - "learning_rate": 1.010952122399992e-05, - "loss": 0.1851, - "step": 10061 - }, - { - "epoch": 0.51, - "grad_norm": 0.9568381123647998, - "learning_rate": 1.0107874352675318e-05, - "loss": 0.1821, - "step": 10062 - }, - { - "epoch": 0.51, - "grad_norm": 1.262395675575379, - "learning_rate": 1.0106227478424616e-05, - "loss": 0.2034, - "step": 10063 - }, - { - "epoch": 0.51, - "grad_norm": 0.9072944589352504, - "learning_rate": 1.0104580601292484e-05, - "loss": 0.1892, - "step": 10064 - }, - { - "epoch": 0.51, - "grad_norm": 0.9383135405528775, - "learning_rate": 1.01029337213236e-05, - "loss": 0.1723, - "step": 10065 - }, - { - "epoch": 0.51, - "grad_norm": 1.0190912617277366, - "learning_rate": 1.0101286838562634e-05, - "loss": 0.1561, - "step": 10066 - }, - { - "epoch": 0.51, - "grad_norm": 1.0876522637918589, - "learning_rate": 1.0099639953054256e-05, - "loss": 0.2164, - "step": 10067 - }, - { - "epoch": 0.51, - "grad_norm": 1.1657087864947426, - "learning_rate": 1.0097993064843138e-05, - "loss": 0.1862, - "step": 10068 - }, - { - "epoch": 0.51, - "grad_norm": 1.273406700391757, - "learning_rate": 1.0096346173973951e-05, - "loss": 0.2154, - "step": 10069 - }, - { - "epoch": 0.51, - "grad_norm": 1.1349999528339405, - "learning_rate": 1.0094699280491371e-05, - "loss": 0.1691, - "step": 10070 - }, - { - "epoch": 0.51, - "grad_norm": 1.8161335476433862, - "learning_rate": 1.009305238444007e-05, - "loss": 0.1859, - "step": 10071 - }, - { - "epoch": 0.51, - "grad_norm": 1.1468136125885018, - "learning_rate": 1.0091405485864714e-05, - "loss": 0.2021, - "step": 10072 - }, - { - "epoch": 0.51, - "grad_norm": 2.5859126246115993, - "learning_rate": 1.008975858480998e-05, - "loss": 0.1969, - "step": 10073 - }, - { - "epoch": 0.51, - "grad_norm": 1.07772020244796, - "learning_rate": 1.0088111681320539e-05, - "loss": 0.17, - "step": 10074 - }, - { - "epoch": 0.51, - "grad_norm": 1.367254327526161, - "learning_rate": 1.0086464775441064e-05, - "loss": 0.1844, - "step": 10075 - }, - { - "epoch": 0.51, - "grad_norm": 1.5431365797665315, - "learning_rate": 1.008481786721623e-05, - "loss": 0.2131, - "step": 10076 - }, - { - "epoch": 0.51, - "grad_norm": 1.2134258991202571, - "learning_rate": 1.0083170956690702e-05, - "loss": 0.1739, - "step": 10077 - }, - { - "epoch": 0.51, - "grad_norm": 1.1734516351523943, - "learning_rate": 1.008152404390916e-05, - "loss": 0.2067, - "step": 10078 - }, - { - "epoch": 0.51, - "grad_norm": 1.3435819692760993, - "learning_rate": 1.0079877128916274e-05, - "loss": 0.1844, - "step": 10079 - }, - { - "epoch": 0.51, - "grad_norm": 1.1947122193617454, - "learning_rate": 1.0078230211756714e-05, - "loss": 0.2014, - "step": 10080 - }, - { - "epoch": 0.51, - "grad_norm": 1.1594082609931562, - "learning_rate": 1.0076583292475157e-05, - "loss": 0.1865, - "step": 10081 - }, - { - "epoch": 0.51, - "grad_norm": 1.0904335945044985, - "learning_rate": 1.007493637111627e-05, - "loss": 0.1975, - "step": 10082 - }, - { - "epoch": 0.51, - "grad_norm": 0.9743470552569026, - "learning_rate": 1.0073289447724735e-05, - "loss": 0.202, - "step": 10083 - }, - { - "epoch": 0.51, - "grad_norm": 0.9636925739935388, - "learning_rate": 1.0071642522345217e-05, - "loss": 0.1914, - "step": 10084 - }, - { - "epoch": 0.51, - "grad_norm": 1.1185462840749154, - "learning_rate": 1.0069995595022393e-05, - "loss": 0.1728, - "step": 10085 - }, - { - "epoch": 0.51, - "grad_norm": 1.2161215893688821, - "learning_rate": 1.006834866580093e-05, - "loss": 0.1995, - "step": 10086 - }, - { - "epoch": 0.51, - "grad_norm": 1.5347187922548848, - "learning_rate": 1.006670173472551e-05, - "loss": 0.195, - "step": 10087 - }, - { - "epoch": 0.51, - "grad_norm": 0.8850629354087978, - "learning_rate": 1.00650548018408e-05, - "loss": 0.1853, - "step": 10088 - }, - { - "epoch": 0.51, - "grad_norm": 1.2386088666481518, - "learning_rate": 1.0063407867191478e-05, - "loss": 0.2176, - "step": 10089 - }, - { - "epoch": 0.51, - "grad_norm": 1.797387745492623, - "learning_rate": 1.006176093082221e-05, - "loss": 0.1852, - "step": 10090 - }, - { - "epoch": 0.51, - "grad_norm": 1.150179340161504, - "learning_rate": 1.0060113992777674e-05, - "loss": 0.1699, - "step": 10091 - }, - { - "epoch": 0.51, - "grad_norm": 1.0130362875047425, - "learning_rate": 1.0058467053102544e-05, - "loss": 0.2271, - "step": 10092 - }, - { - "epoch": 0.51, - "grad_norm": 1.491247780734237, - "learning_rate": 1.0056820111841495e-05, - "loss": 0.1868, - "step": 10093 - }, - { - "epoch": 0.51, - "grad_norm": 0.9510571415665885, - "learning_rate": 1.0055173169039192e-05, - "loss": 0.2148, - "step": 10094 - }, - { - "epoch": 0.51, - "grad_norm": 1.4031686577019125, - "learning_rate": 1.0053526224740313e-05, - "loss": 0.1746, - "step": 10095 - }, - { - "epoch": 0.51, - "grad_norm": 0.783505650323615, - "learning_rate": 1.0051879278989536e-05, - "loss": 0.1739, - "step": 10096 - }, - { - "epoch": 0.51, - "grad_norm": 1.7477764660744681, - "learning_rate": 1.0050232331831528e-05, - "loss": 0.1789, - "step": 10097 - }, - { - "epoch": 0.51, - "grad_norm": 1.8995038115576395, - "learning_rate": 1.0048585383310967e-05, - "loss": 0.2128, - "step": 10098 - }, - { - "epoch": 0.51, - "grad_norm": 1.8440069820748402, - "learning_rate": 1.0046938433472522e-05, - "loss": 0.1614, - "step": 10099 - }, - { - "epoch": 0.51, - "grad_norm": 1.0316875891218211, - "learning_rate": 1.0045291482360871e-05, - "loss": 0.1923, - "step": 10100 - }, - { - "epoch": 0.51, - "grad_norm": 1.3853684601406886, - "learning_rate": 1.0043644530020686e-05, - "loss": 0.1897, - "step": 10101 - }, - { - "epoch": 0.51, - "grad_norm": 1.0621006982820664, - "learning_rate": 1.0041997576496643e-05, - "loss": 0.1831, - "step": 10102 - }, - { - "epoch": 0.51, - "grad_norm": 1.0929803339188837, - "learning_rate": 1.004035062183341e-05, - "loss": 0.1871, - "step": 10103 - }, - { - "epoch": 0.51, - "grad_norm": 1.5969371607412692, - "learning_rate": 1.0038703666075665e-05, - "loss": 0.1802, - "step": 10104 - }, - { - "epoch": 0.51, - "grad_norm": 1.2339497000753874, - "learning_rate": 1.003705670926808e-05, - "loss": 0.1894, - "step": 10105 - }, - { - "epoch": 0.51, - "grad_norm": 1.408984839071695, - "learning_rate": 1.0035409751455332e-05, - "loss": 0.1927, - "step": 10106 - }, - { - "epoch": 0.51, - "grad_norm": 0.9577016281366301, - "learning_rate": 1.0033762792682092e-05, - "loss": 0.1802, - "step": 10107 - }, - { - "epoch": 0.51, - "grad_norm": 1.0593468957036303, - "learning_rate": 1.0032115832993032e-05, - "loss": 0.1736, - "step": 10108 - }, - { - "epoch": 0.51, - "grad_norm": 1.1893297903859361, - "learning_rate": 1.003046887243283e-05, - "loss": 0.1927, - "step": 10109 - }, - { - "epoch": 0.51, - "grad_norm": 0.7841198800976856, - "learning_rate": 1.0028821911046158e-05, - "loss": 0.1733, - "step": 10110 - }, - { - "epoch": 0.51, - "grad_norm": 0.9635994815974888, - "learning_rate": 1.0027174948877692e-05, - "loss": 0.1769, - "step": 10111 - }, - { - "epoch": 0.51, - "grad_norm": 1.4976506517758972, - "learning_rate": 1.0025527985972102e-05, - "loss": 0.1903, - "step": 10112 - }, - { - "epoch": 0.51, - "grad_norm": 0.9258277904885904, - "learning_rate": 1.0023881022374062e-05, - "loss": 0.181, - "step": 10113 - }, - { - "epoch": 0.51, - "grad_norm": 1.1322482761758057, - "learning_rate": 1.0022234058128251e-05, - "loss": 0.185, - "step": 10114 - }, - { - "epoch": 0.51, - "grad_norm": 1.697952043975562, - "learning_rate": 1.0020587093279339e-05, - "loss": 0.1945, - "step": 10115 - }, - { - "epoch": 0.51, - "grad_norm": 1.0221196595404018, - "learning_rate": 1.0018940127872001e-05, - "loss": 0.173, - "step": 10116 - }, - { - "epoch": 0.51, - "grad_norm": 2.525650165385635, - "learning_rate": 1.001729316195091e-05, - "loss": 0.196, - "step": 10117 - }, - { - "epoch": 0.51, - "grad_norm": 0.9281390262973999, - "learning_rate": 1.001564619556074e-05, - "loss": 0.1758, - "step": 10118 - }, - { - "epoch": 0.51, - "grad_norm": 1.5021408002983778, - "learning_rate": 1.001399922874617e-05, - "loss": 0.2149, - "step": 10119 - }, - { - "epoch": 0.51, - "grad_norm": 1.338027793563955, - "learning_rate": 1.0012352261551868e-05, - "loss": 0.1987, - "step": 10120 - }, - { - "epoch": 0.51, - "grad_norm": 1.034500217978134, - "learning_rate": 1.001070529402251e-05, - "loss": 0.1678, - "step": 10121 - }, - { - "epoch": 0.51, - "grad_norm": 0.9757847773691674, - "learning_rate": 1.0009058326202768e-05, - "loss": 0.1821, - "step": 10122 - }, - { - "epoch": 0.51, - "grad_norm": 0.9283672375689633, - "learning_rate": 1.000741135813732e-05, - "loss": 0.2065, - "step": 10123 - }, - { - "epoch": 0.51, - "grad_norm": 1.0360793799716828, - "learning_rate": 1.000576438987084e-05, - "loss": 0.188, - "step": 10124 - }, - { - "epoch": 0.51, - "grad_norm": 0.9095239168298347, - "learning_rate": 1.0004117421448e-05, - "loss": 0.1953, - "step": 10125 - }, - { - "epoch": 0.51, - "grad_norm": 1.0856311188231922, - "learning_rate": 1.0002470452913473e-05, - "loss": 0.184, - "step": 10126 - }, - { - "epoch": 0.51, - "grad_norm": 1.0522283892013424, - "learning_rate": 1.0000823484311937e-05, - "loss": 0.1797, - "step": 10127 - }, - { - "epoch": 0.52, - "grad_norm": 0.8729040385760526, - "learning_rate": 9.999176515688066e-06, - "loss": 0.1783, - "step": 10128 - }, - { - "epoch": 0.52, - "grad_norm": 1.5443301439658077, - "learning_rate": 9.997529547086527e-06, - "loss": 0.1928, - "step": 10129 - }, - { - "epoch": 0.52, - "grad_norm": 1.0450069976328333, - "learning_rate": 9.995882578552002e-06, - "loss": 0.1769, - "step": 10130 - }, - { - "epoch": 0.52, - "grad_norm": 2.594677740582822, - "learning_rate": 9.99423561012916e-06, - "loss": 0.1859, - "step": 10131 - }, - { - "epoch": 0.52, - "grad_norm": 0.9298369881994603, - "learning_rate": 9.992588641862682e-06, - "loss": 0.183, - "step": 10132 - }, - { - "epoch": 0.52, - "grad_norm": 1.1359824341091553, - "learning_rate": 9.990941673797234e-06, - "loss": 0.2131, - "step": 10133 - }, - { - "epoch": 0.52, - "grad_norm": 1.1726229751635113, - "learning_rate": 9.989294705977494e-06, - "loss": 0.2187, - "step": 10134 - }, - { - "epoch": 0.52, - "grad_norm": 1.0697105168059207, - "learning_rate": 9.987647738448134e-06, - "loss": 0.1792, - "step": 10135 - }, - { - "epoch": 0.52, - "grad_norm": 1.0446155555654533, - "learning_rate": 9.986000771253835e-06, - "loss": 0.1842, - "step": 10136 - }, - { - "epoch": 0.52, - "grad_norm": 1.5252893644381011, - "learning_rate": 9.984353804439264e-06, - "loss": 0.2099, - "step": 10137 - }, - { - "epoch": 0.52, - "grad_norm": 0.991401095277735, - "learning_rate": 9.982706838049094e-06, - "loss": 0.1812, - "step": 10138 - }, - { - "epoch": 0.52, - "grad_norm": 0.9456998564950463, - "learning_rate": 9.981059872128004e-06, - "loss": 0.1749, - "step": 10139 - }, - { - "epoch": 0.52, - "grad_norm": 1.141972747906058, - "learning_rate": 9.979412906720663e-06, - "loss": 0.1747, - "step": 10140 - }, - { - "epoch": 0.52, - "grad_norm": 0.9011930047903997, - "learning_rate": 9.977765941871754e-06, - "loss": 0.1748, - "step": 10141 - }, - { - "epoch": 0.52, - "grad_norm": 1.135559019401279, - "learning_rate": 9.976118977625941e-06, - "loss": 0.1878, - "step": 10142 - }, - { - "epoch": 0.52, - "grad_norm": 1.4727396649384408, - "learning_rate": 9.974472014027903e-06, - "loss": 0.1737, - "step": 10143 - }, - { - "epoch": 0.52, - "grad_norm": 0.8580717686639748, - "learning_rate": 9.97282505112231e-06, - "loss": 0.21, - "step": 10144 - }, - { - "epoch": 0.52, - "grad_norm": 1.09660598116125, - "learning_rate": 9.971178088953845e-06, - "loss": 0.1681, - "step": 10145 - }, - { - "epoch": 0.52, - "grad_norm": 0.8839707672338685, - "learning_rate": 9.969531127567172e-06, - "loss": 0.1792, - "step": 10146 - }, - { - "epoch": 0.52, - "grad_norm": 0.970215974749392, - "learning_rate": 9.96788416700697e-06, - "loss": 0.177, - "step": 10147 - }, - { - "epoch": 0.52, - "grad_norm": 1.26868888668654, - "learning_rate": 9.96623720731791e-06, - "loss": 0.1784, - "step": 10148 - }, - { - "epoch": 0.52, - "grad_norm": 2.656729429434053, - "learning_rate": 9.964590248544671e-06, - "loss": 0.2014, - "step": 10149 - }, - { - "epoch": 0.52, - "grad_norm": 0.93392320181895, - "learning_rate": 9.96294329073192e-06, - "loss": 0.1887, - "step": 10150 - }, - { - "epoch": 0.52, - "grad_norm": 0.8589463763497676, - "learning_rate": 9.961296333924338e-06, - "loss": 0.1678, - "step": 10151 - }, - { - "epoch": 0.52, - "grad_norm": 1.0833683363223623, - "learning_rate": 9.959649378166593e-06, - "loss": 0.1936, - "step": 10152 - }, - { - "epoch": 0.52, - "grad_norm": 0.9254068495035073, - "learning_rate": 9.95800242350336e-06, - "loss": 0.1791, - "step": 10153 - }, - { - "epoch": 0.52, - "grad_norm": 1.2301348039871198, - "learning_rate": 9.95635546997932e-06, - "loss": 0.1916, - "step": 10154 - }, - { - "epoch": 0.52, - "grad_norm": 0.8856938637562976, - "learning_rate": 9.95470851763913e-06, - "loss": 0.1649, - "step": 10155 - }, - { - "epoch": 0.52, - "grad_norm": 2.540285019478901, - "learning_rate": 9.953061566527481e-06, - "loss": 0.1785, - "step": 10156 - }, - { - "epoch": 0.52, - "grad_norm": 0.9488229772162609, - "learning_rate": 9.951414616689037e-06, - "loss": 0.1971, - "step": 10157 - }, - { - "epoch": 0.52, - "grad_norm": 1.4195672508913053, - "learning_rate": 9.949767668168477e-06, - "loss": 0.1716, - "step": 10158 - }, - { - "epoch": 0.52, - "grad_norm": 0.9390328203362874, - "learning_rate": 9.948120721010467e-06, - "loss": 0.1847, - "step": 10159 - }, - { - "epoch": 0.52, - "grad_norm": 0.9321828642929739, - "learning_rate": 9.94647377525969e-06, - "loss": 0.1757, - "step": 10160 - }, - { - "epoch": 0.52, - "grad_norm": 1.4279965604940652, - "learning_rate": 9.94482683096081e-06, - "loss": 0.201, - "step": 10161 - }, - { - "epoch": 0.52, - "grad_norm": 1.0650861327001324, - "learning_rate": 9.943179888158512e-06, - "loss": 0.1886, - "step": 10162 - }, - { - "epoch": 0.52, - "grad_norm": 2.259419552091082, - "learning_rate": 9.941532946897456e-06, - "loss": 0.1867, - "step": 10163 - }, - { - "epoch": 0.52, - "grad_norm": 1.0238322413879728, - "learning_rate": 9.93988600722233e-06, - "loss": 0.1822, - "step": 10164 - }, - { - "epoch": 0.52, - "grad_norm": 2.9864417236171614, - "learning_rate": 9.938239069177792e-06, - "loss": 0.1855, - "step": 10165 - }, - { - "epoch": 0.52, - "grad_norm": 0.8899000559027274, - "learning_rate": 9.936592132808526e-06, - "loss": 0.1858, - "step": 10166 - }, - { - "epoch": 0.52, - "grad_norm": 1.0665160725544522, - "learning_rate": 9.9349451981592e-06, - "loss": 0.1728, - "step": 10167 - }, - { - "epoch": 0.52, - "grad_norm": 1.0003805091844538, - "learning_rate": 9.933298265274493e-06, - "loss": 0.1756, - "step": 10168 - }, - { - "epoch": 0.52, - "grad_norm": 0.7563077112028692, - "learning_rate": 9.93165133419907e-06, - "loss": 0.1651, - "step": 10169 - }, - { - "epoch": 0.52, - "grad_norm": 0.9976909236680422, - "learning_rate": 9.93000440497761e-06, - "loss": 0.2197, - "step": 10170 - }, - { - "epoch": 0.52, - "grad_norm": 1.0635056769523115, - "learning_rate": 9.928357477654783e-06, - "loss": 0.1755, - "step": 10171 - }, - { - "epoch": 0.52, - "grad_norm": 0.9026074106361105, - "learning_rate": 9.926710552275268e-06, - "loss": 0.1995, - "step": 10172 - }, - { - "epoch": 0.52, - "grad_norm": 0.8673749012582641, - "learning_rate": 9.925063628883731e-06, - "loss": 0.1817, - "step": 10173 - }, - { - "epoch": 0.52, - "grad_norm": 0.9998411376931166, - "learning_rate": 9.923416707524845e-06, - "loss": 0.205, - "step": 10174 - }, - { - "epoch": 0.52, - "grad_norm": 1.334736412034857, - "learning_rate": 9.921769788243291e-06, - "loss": 0.1721, - "step": 10175 - }, - { - "epoch": 0.52, - "grad_norm": 0.853685111169905, - "learning_rate": 9.92012287108373e-06, - "loss": 0.1825, - "step": 10176 - }, - { - "epoch": 0.52, - "grad_norm": 1.5061052824171897, - "learning_rate": 9.918475956090845e-06, - "loss": 0.1907, - "step": 10177 - }, - { - "epoch": 0.52, - "grad_norm": 1.649723100823968, - "learning_rate": 9.9168290433093e-06, - "loss": 0.1581, - "step": 10178 - }, - { - "epoch": 0.52, - "grad_norm": 0.9177755922432216, - "learning_rate": 9.915182132783773e-06, - "loss": 0.1698, - "step": 10179 - }, - { - "epoch": 0.52, - "grad_norm": 1.171004729759711, - "learning_rate": 9.913535224558936e-06, - "loss": 0.1862, - "step": 10180 - }, - { - "epoch": 0.52, - "grad_norm": 1.1815972737473774, - "learning_rate": 9.911888318679463e-06, - "loss": 0.2017, - "step": 10181 - }, - { - "epoch": 0.52, - "grad_norm": 0.9401379104944637, - "learning_rate": 9.910241415190022e-06, - "loss": 0.1664, - "step": 10182 - }, - { - "epoch": 0.52, - "grad_norm": 1.086417801036447, - "learning_rate": 9.908594514135288e-06, - "loss": 0.2047, - "step": 10183 - }, - { - "epoch": 0.52, - "grad_norm": 1.1246348855935053, - "learning_rate": 9.906947615559932e-06, - "loss": 0.182, - "step": 10184 - }, - { - "epoch": 0.52, - "grad_norm": 1.5114210520282993, - "learning_rate": 9.90530071950863e-06, - "loss": 0.2235, - "step": 10185 - }, - { - "epoch": 0.52, - "grad_norm": 1.7888776269891093, - "learning_rate": 9.903653826026049e-06, - "loss": 0.1705, - "step": 10186 - }, - { - "epoch": 0.52, - "grad_norm": 1.3258985075992884, - "learning_rate": 9.902006935156863e-06, - "loss": 0.1849, - "step": 10187 - }, - { - "epoch": 0.52, - "grad_norm": 1.954590283479778, - "learning_rate": 9.900360046945746e-06, - "loss": 0.184, - "step": 10188 - }, - { - "epoch": 0.52, - "grad_norm": 0.9657271524800654, - "learning_rate": 9.89871316143737e-06, - "loss": 0.1725, - "step": 10189 - }, - { - "epoch": 0.52, - "grad_norm": 0.910098204363335, - "learning_rate": 9.897066278676405e-06, - "loss": 0.2054, - "step": 10190 - }, - { - "epoch": 0.52, - "grad_norm": 0.980882661006259, - "learning_rate": 9.89541939870752e-06, - "loss": 0.169, - "step": 10191 - }, - { - "epoch": 0.52, - "grad_norm": 1.3256139950041805, - "learning_rate": 9.893772521575391e-06, - "loss": 0.1984, - "step": 10192 - }, - { - "epoch": 0.52, - "grad_norm": 1.2102877874991573, - "learning_rate": 9.892125647324686e-06, - "loss": 0.1972, - "step": 10193 - }, - { - "epoch": 0.52, - "grad_norm": 1.2624640998421532, - "learning_rate": 9.890478776000084e-06, - "loss": 0.1975, - "step": 10194 - }, - { - "epoch": 0.52, - "grad_norm": 1.6185245911520465, - "learning_rate": 9.888831907646246e-06, - "loss": 0.1702, - "step": 10195 - }, - { - "epoch": 0.52, - "grad_norm": 1.3108016567585763, - "learning_rate": 9.887185042307852e-06, - "loss": 0.1929, - "step": 10196 - }, - { - "epoch": 0.52, - "grad_norm": 1.1164020898665021, - "learning_rate": 9.885538180029568e-06, - "loss": 0.1819, - "step": 10197 - }, - { - "epoch": 0.52, - "grad_norm": 1.3538769640540862, - "learning_rate": 9.883891320856071e-06, - "loss": 0.2121, - "step": 10198 - }, - { - "epoch": 0.52, - "grad_norm": 2.724159505917361, - "learning_rate": 9.882244464832026e-06, - "loss": 0.1805, - "step": 10199 - }, - { - "epoch": 0.52, - "grad_norm": 1.2116009207840774, - "learning_rate": 9.880597612002106e-06, - "loss": 0.1754, - "step": 10200 - }, - { - "epoch": 0.52, - "grad_norm": 1.0692218955637767, - "learning_rate": 9.878950762410981e-06, - "loss": 0.1719, - "step": 10201 - }, - { - "epoch": 0.52, - "grad_norm": 1.1384005316524284, - "learning_rate": 9.877303916103328e-06, - "loss": 0.2003, - "step": 10202 - }, - { - "epoch": 0.52, - "grad_norm": 1.7804404480331262, - "learning_rate": 9.87565707312381e-06, - "loss": 0.1939, - "step": 10203 - }, - { - "epoch": 0.52, - "grad_norm": 1.1253730710614271, - "learning_rate": 9.874010233517103e-06, - "loss": 0.1829, - "step": 10204 - }, - { - "epoch": 0.52, - "grad_norm": 1.0966039460231016, - "learning_rate": 9.872363397327873e-06, - "loss": 0.1832, - "step": 10205 - }, - { - "epoch": 0.52, - "grad_norm": 0.9516794217543744, - "learning_rate": 9.870716564600796e-06, - "loss": 0.1649, - "step": 10206 - }, - { - "epoch": 0.52, - "grad_norm": 1.4001621379783895, - "learning_rate": 9.869069735380539e-06, - "loss": 0.2184, - "step": 10207 - }, - { - "epoch": 0.52, - "grad_norm": 0.9562249033797356, - "learning_rate": 9.86742290971177e-06, - "loss": 0.1564, - "step": 10208 - }, - { - "epoch": 0.52, - "grad_norm": 1.1783580116509145, - "learning_rate": 9.865776087639166e-06, - "loss": 0.1931, - "step": 10209 - }, - { - "epoch": 0.52, - "grad_norm": 0.9452027253631481, - "learning_rate": 9.864129269207388e-06, - "loss": 0.1644, - "step": 10210 - }, - { - "epoch": 0.52, - "grad_norm": 0.7783206095949607, - "learning_rate": 9.862482454461116e-06, - "loss": 0.1727, - "step": 10211 - }, - { - "epoch": 0.52, - "grad_norm": 1.2040093366723683, - "learning_rate": 9.860835643445012e-06, - "loss": 0.1946, - "step": 10212 - }, - { - "epoch": 0.52, - "grad_norm": 1.0919123989202537, - "learning_rate": 9.85918883620375e-06, - "loss": 0.1927, - "step": 10213 - }, - { - "epoch": 0.52, - "grad_norm": 1.2210304943276447, - "learning_rate": 9.857542032781998e-06, - "loss": 0.1658, - "step": 10214 - }, - { - "epoch": 0.52, - "grad_norm": 1.403600247600213, - "learning_rate": 9.855895233224431e-06, - "loss": 0.1764, - "step": 10215 - }, - { - "epoch": 0.52, - "grad_norm": 2.6703789557349276, - "learning_rate": 9.854248437575709e-06, - "loss": 0.1786, - "step": 10216 - }, - { - "epoch": 0.52, - "grad_norm": 1.2725040333955278, - "learning_rate": 9.852601645880509e-06, - "loss": 0.1944, - "step": 10217 - }, - { - "epoch": 0.52, - "grad_norm": 1.2734027828974808, - "learning_rate": 9.850954858183496e-06, - "loss": 0.1755, - "step": 10218 - }, - { - "epoch": 0.52, - "grad_norm": 1.5556163072912323, - "learning_rate": 9.84930807452934e-06, - "loss": 0.2188, - "step": 10219 - }, - { - "epoch": 0.52, - "grad_norm": 0.8223797292168257, - "learning_rate": 9.847661294962712e-06, - "loss": 0.1837, - "step": 10220 - }, - { - "epoch": 0.52, - "grad_norm": 0.9725737606009888, - "learning_rate": 9.846014519528284e-06, - "loss": 0.1765, - "step": 10221 - }, - { - "epoch": 0.52, - "grad_norm": 0.9305960044566103, - "learning_rate": 9.844367748270715e-06, - "loss": 0.1787, - "step": 10222 - }, - { - "epoch": 0.52, - "grad_norm": 0.9975408276525014, - "learning_rate": 9.842720981234682e-06, - "loss": 0.1891, - "step": 10223 - }, - { - "epoch": 0.52, - "grad_norm": 1.8328704239311866, - "learning_rate": 9.841074218464852e-06, - "loss": 0.2045, - "step": 10224 - }, - { - "epoch": 0.52, - "grad_norm": 0.8011822959442132, - "learning_rate": 9.839427460005892e-06, - "loss": 0.1606, - "step": 10225 - }, - { - "epoch": 0.52, - "grad_norm": 1.1040336320983504, - "learning_rate": 9.83778070590247e-06, - "loss": 0.1841, - "step": 10226 - }, - { - "epoch": 0.52, - "grad_norm": 1.6536014897279836, - "learning_rate": 9.836133956199256e-06, - "loss": 0.2025, - "step": 10227 - }, - { - "epoch": 0.52, - "grad_norm": 1.1487464384278228, - "learning_rate": 9.834487210940921e-06, - "loss": 0.1803, - "step": 10228 - }, - { - "epoch": 0.52, - "grad_norm": 1.167115620085271, - "learning_rate": 9.832840470172125e-06, - "loss": 0.169, - "step": 10229 - }, - { - "epoch": 0.52, - "grad_norm": 1.1368296793900705, - "learning_rate": 9.831193733937546e-06, - "loss": 0.1966, - "step": 10230 - }, - { - "epoch": 0.52, - "grad_norm": 2.248366853893879, - "learning_rate": 9.829547002281842e-06, - "loss": 0.1962, - "step": 10231 - }, - { - "epoch": 0.52, - "grad_norm": 1.2296604069185608, - "learning_rate": 9.827900275249686e-06, - "loss": 0.1698, - "step": 10232 - }, - { - "epoch": 0.52, - "grad_norm": 1.0772480915238765, - "learning_rate": 9.826253552885744e-06, - "loss": 0.1702, - "step": 10233 - }, - { - "epoch": 0.52, - "grad_norm": 1.1237664542437968, - "learning_rate": 9.824606835234689e-06, - "loss": 0.1807, - "step": 10234 - }, - { - "epoch": 0.52, - "grad_norm": 1.1427706105579902, - "learning_rate": 9.822960122341178e-06, - "loss": 0.162, - "step": 10235 - }, - { - "epoch": 0.52, - "grad_norm": 1.1936009566155101, - "learning_rate": 9.821313414249885e-06, - "loss": 0.1701, - "step": 10236 - }, - { - "epoch": 0.52, - "grad_norm": 0.858311831063239, - "learning_rate": 9.819666711005475e-06, - "loss": 0.1671, - "step": 10237 - }, - { - "epoch": 0.52, - "grad_norm": 0.9974203917111358, - "learning_rate": 9.818020012652619e-06, - "loss": 0.1792, - "step": 10238 - }, - { - "epoch": 0.52, - "grad_norm": 1.048164162370106, - "learning_rate": 9.816373319235978e-06, - "loss": 0.1773, - "step": 10239 - }, - { - "epoch": 0.52, - "grad_norm": 1.1626993038494857, - "learning_rate": 9.81472663080022e-06, - "loss": 0.2068, - "step": 10240 - }, - { - "epoch": 0.52, - "grad_norm": 1.1444195275217797, - "learning_rate": 9.813079947390014e-06, - "loss": 0.176, - "step": 10241 - }, - { - "epoch": 0.52, - "grad_norm": 2.142794849853422, - "learning_rate": 9.811433269050028e-06, - "loss": 0.1936, - "step": 10242 - }, - { - "epoch": 0.52, - "grad_norm": 1.73344991210697, - "learning_rate": 9.809786595824922e-06, - "loss": 0.1756, - "step": 10243 - }, - { - "epoch": 0.52, - "grad_norm": 0.9409173951911256, - "learning_rate": 9.808139927759363e-06, - "loss": 0.1816, - "step": 10244 - }, - { - "epoch": 0.52, - "grad_norm": 1.0067367796209783, - "learning_rate": 9.806493264898025e-06, - "loss": 0.1731, - "step": 10245 - }, - { - "epoch": 0.52, - "grad_norm": 1.0266266779009294, - "learning_rate": 9.804846607285564e-06, - "loss": 0.1842, - "step": 10246 - }, - { - "epoch": 0.52, - "grad_norm": 1.1987923485278358, - "learning_rate": 9.803199954966653e-06, - "loss": 0.1718, - "step": 10247 - }, - { - "epoch": 0.52, - "grad_norm": 1.6993453303103312, - "learning_rate": 9.801553307985951e-06, - "loss": 0.1612, - "step": 10248 - }, - { - "epoch": 0.52, - "grad_norm": 1.1169837375895257, - "learning_rate": 9.799906666388129e-06, - "loss": 0.2179, - "step": 10249 - }, - { - "epoch": 0.52, - "grad_norm": 1.2233213875080469, - "learning_rate": 9.798260030217846e-06, - "loss": 0.1806, - "step": 10250 - }, - { - "epoch": 0.52, - "grad_norm": 0.8535516549948462, - "learning_rate": 9.796613399519777e-06, - "loss": 0.1985, - "step": 10251 - }, - { - "epoch": 0.52, - "grad_norm": 1.042384382904542, - "learning_rate": 9.794966774338576e-06, - "loss": 0.1733, - "step": 10252 - }, - { - "epoch": 0.52, - "grad_norm": 1.2915414484488394, - "learning_rate": 9.793320154718916e-06, - "loss": 0.2085, - "step": 10253 - }, - { - "epoch": 0.52, - "grad_norm": 1.0400857471966334, - "learning_rate": 9.791673540705455e-06, - "loss": 0.187, - "step": 10254 - }, - { - "epoch": 0.52, - "grad_norm": 1.0354907674864533, - "learning_rate": 9.790026932342867e-06, - "loss": 0.1905, - "step": 10255 - }, - { - "epoch": 0.52, - "grad_norm": 1.156361371396123, - "learning_rate": 9.788380329675804e-06, - "loss": 0.1737, - "step": 10256 - }, - { - "epoch": 0.52, - "grad_norm": 1.130435978970632, - "learning_rate": 9.78673373274894e-06, - "loss": 0.1822, - "step": 10257 - }, - { - "epoch": 0.52, - "grad_norm": 1.005638120384302, - "learning_rate": 9.785087141606933e-06, - "loss": 0.1739, - "step": 10258 - }, - { - "epoch": 0.52, - "grad_norm": 1.1380722013661921, - "learning_rate": 9.783440556294453e-06, - "loss": 0.1632, - "step": 10259 - }, - { - "epoch": 0.52, - "grad_norm": 0.9394729324116381, - "learning_rate": 9.781793976856162e-06, - "loss": 0.1935, - "step": 10260 - }, - { - "epoch": 0.52, - "grad_norm": 1.0403813908247483, - "learning_rate": 9.780147403336715e-06, - "loss": 0.1996, - "step": 10261 - }, - { - "epoch": 0.52, - "grad_norm": 1.0395031212258359, - "learning_rate": 9.778500835780787e-06, - "loss": 0.1701, - "step": 10262 - }, - { - "epoch": 0.52, - "grad_norm": 1.4688923849207338, - "learning_rate": 9.776854274233033e-06, - "loss": 0.216, - "step": 10263 - }, - { - "epoch": 0.52, - "grad_norm": 0.9303142358262178, - "learning_rate": 9.775207718738122e-06, - "loss": 0.1751, - "step": 10264 - }, - { - "epoch": 0.52, - "grad_norm": 0.9180433229028818, - "learning_rate": 9.773561169340711e-06, - "loss": 0.1701, - "step": 10265 - }, - { - "epoch": 0.52, - "grad_norm": 4.3860890328689495, - "learning_rate": 9.771914626085469e-06, - "loss": 0.182, - "step": 10266 - }, - { - "epoch": 0.52, - "grad_norm": 0.9648158711671928, - "learning_rate": 9.770268089017053e-06, - "loss": 0.1796, - "step": 10267 - }, - { - "epoch": 0.52, - "grad_norm": 1.2075022464405123, - "learning_rate": 9.768621558180132e-06, - "loss": 0.1843, - "step": 10268 - }, - { - "epoch": 0.52, - "grad_norm": 1.0368552296049038, - "learning_rate": 9.766975033619361e-06, - "loss": 0.2178, - "step": 10269 - }, - { - "epoch": 0.52, - "grad_norm": 0.9632831147646977, - "learning_rate": 9.765328515379407e-06, - "loss": 0.192, - "step": 10270 - }, - { - "epoch": 0.52, - "grad_norm": 1.0815652516872132, - "learning_rate": 9.763682003504928e-06, - "loss": 0.2028, - "step": 10271 - }, - { - "epoch": 0.52, - "grad_norm": 1.0663126245248298, - "learning_rate": 9.762035498040594e-06, - "loss": 0.1806, - "step": 10272 - }, - { - "epoch": 0.52, - "grad_norm": 1.0079893330839749, - "learning_rate": 9.760388999031052e-06, - "loss": 0.1645, - "step": 10273 - }, - { - "epoch": 0.52, - "grad_norm": 0.8802844478953326, - "learning_rate": 9.758742506520981e-06, - "loss": 0.2002, - "step": 10274 - }, - { - "epoch": 0.52, - "grad_norm": 1.0872710320559473, - "learning_rate": 9.757096020555026e-06, - "loss": 0.1872, - "step": 10275 - }, - { - "epoch": 0.52, - "grad_norm": 1.3758322155268397, - "learning_rate": 9.755449541177858e-06, - "loss": 0.1975, - "step": 10276 - }, - { - "epoch": 0.52, - "grad_norm": 0.8157296114345972, - "learning_rate": 9.753803068434138e-06, - "loss": 0.1757, - "step": 10277 - }, - { - "epoch": 0.52, - "grad_norm": 0.8569053998828822, - "learning_rate": 9.752156602368518e-06, - "loss": 0.1921, - "step": 10278 - }, - { - "epoch": 0.52, - "grad_norm": 1.086718255160991, - "learning_rate": 9.750510143025667e-06, - "loss": 0.183, - "step": 10279 - }, - { - "epoch": 0.52, - "grad_norm": 0.8547794002110998, - "learning_rate": 9.74886369045024e-06, - "loss": 0.1925, - "step": 10280 - }, - { - "epoch": 0.52, - "grad_norm": 0.9958419750913367, - "learning_rate": 9.747217244686904e-06, - "loss": 0.1836, - "step": 10281 - }, - { - "epoch": 0.52, - "grad_norm": 0.9492194733753996, - "learning_rate": 9.745570805780312e-06, - "loss": 0.1913, - "step": 10282 - }, - { - "epoch": 0.52, - "grad_norm": 1.0321429448228026, - "learning_rate": 9.743924373775125e-06, - "loss": 0.1981, - "step": 10283 - }, - { - "epoch": 0.52, - "grad_norm": 1.2903368325471654, - "learning_rate": 9.742277948716004e-06, - "loss": 0.1851, - "step": 10284 - }, - { - "epoch": 0.52, - "grad_norm": 0.9680758698483248, - "learning_rate": 9.740631530647611e-06, - "loss": 0.1756, - "step": 10285 - }, - { - "epoch": 0.52, - "grad_norm": 1.1418268403416956, - "learning_rate": 9.7389851196146e-06, - "loss": 0.1651, - "step": 10286 - }, - { - "epoch": 0.52, - "grad_norm": 1.0744824585428467, - "learning_rate": 9.737338715661635e-06, - "loss": 0.1815, - "step": 10287 - }, - { - "epoch": 0.52, - "grad_norm": 0.9499836138535693, - "learning_rate": 9.735692318833368e-06, - "loss": 0.1643, - "step": 10288 - }, - { - "epoch": 0.52, - "grad_norm": 1.033330852504397, - "learning_rate": 9.734045929174465e-06, - "loss": 0.1745, - "step": 10289 - }, - { - "epoch": 0.52, - "grad_norm": 0.9317581030504224, - "learning_rate": 9.732399546729578e-06, - "loss": 0.181, - "step": 10290 - }, - { - "epoch": 0.52, - "grad_norm": 0.936147738733036, - "learning_rate": 9.730753171543374e-06, - "loss": 0.1947, - "step": 10291 - }, - { - "epoch": 0.52, - "grad_norm": 1.4341538894034087, - "learning_rate": 9.729106803660501e-06, - "loss": 0.1622, - "step": 10292 - }, - { - "epoch": 0.52, - "grad_norm": 1.0503544146668324, - "learning_rate": 9.727460443125622e-06, - "loss": 0.1868, - "step": 10293 - }, - { - "epoch": 0.52, - "grad_norm": 0.9132717870250818, - "learning_rate": 9.725814089983398e-06, - "loss": 0.1837, - "step": 10294 - }, - { - "epoch": 0.52, - "grad_norm": 1.2074972029352926, - "learning_rate": 9.724167744278475e-06, - "loss": 0.1876, - "step": 10295 - }, - { - "epoch": 0.52, - "grad_norm": 1.166491148740018, - "learning_rate": 9.722521406055521e-06, - "loss": 0.2105, - "step": 10296 - }, - { - "epoch": 0.52, - "grad_norm": 1.1476688881483905, - "learning_rate": 9.720875075359188e-06, - "loss": 0.201, - "step": 10297 - }, - { - "epoch": 0.52, - "grad_norm": 1.0005395496337184, - "learning_rate": 9.71922875223414e-06, - "loss": 0.1653, - "step": 10298 - }, - { - "epoch": 0.52, - "grad_norm": 1.045700555276377, - "learning_rate": 9.717582436725021e-06, - "loss": 0.1933, - "step": 10299 - }, - { - "epoch": 0.52, - "grad_norm": 1.1122857347006354, - "learning_rate": 9.715936128876501e-06, - "loss": 0.2016, - "step": 10300 - }, - { - "epoch": 0.52, - "grad_norm": 0.8552952996571777, - "learning_rate": 9.714289828733223e-06, - "loss": 0.1983, - "step": 10301 - }, - { - "epoch": 0.52, - "grad_norm": 0.9366334544382264, - "learning_rate": 9.712643536339853e-06, - "loss": 0.1673, - "step": 10302 - }, - { - "epoch": 0.52, - "grad_norm": 0.8599452455983897, - "learning_rate": 9.71099725174104e-06, - "loss": 0.177, - "step": 10303 - }, - { - "epoch": 0.52, - "grad_norm": 1.4254728191793864, - "learning_rate": 9.709350974981449e-06, - "loss": 0.1746, - "step": 10304 - }, - { - "epoch": 0.52, - "grad_norm": 1.2572506240427659, - "learning_rate": 9.707704706105724e-06, - "loss": 0.2239, - "step": 10305 - }, - { - "epoch": 0.52, - "grad_norm": 0.8240594846353297, - "learning_rate": 9.706058445158527e-06, - "loss": 0.162, - "step": 10306 - }, - { - "epoch": 0.52, - "grad_norm": 1.3421053745298535, - "learning_rate": 9.704412192184511e-06, - "loss": 0.1903, - "step": 10307 - }, - { - "epoch": 0.52, - "grad_norm": 0.707243750628533, - "learning_rate": 9.702765947228333e-06, - "loss": 0.1605, - "step": 10308 - }, - { - "epoch": 0.52, - "grad_norm": 1.4382999861183035, - "learning_rate": 9.701119710334641e-06, - "loss": 0.2078, - "step": 10309 - }, - { - "epoch": 0.52, - "grad_norm": 0.9283704283606212, - "learning_rate": 9.699473481548097e-06, - "loss": 0.2208, - "step": 10310 - }, - { - "epoch": 0.52, - "grad_norm": 0.9548298747791736, - "learning_rate": 9.69782726091335e-06, - "loss": 0.185, - "step": 10311 - }, - { - "epoch": 0.52, - "grad_norm": 1.2026902730032376, - "learning_rate": 9.69618104847506e-06, - "loss": 0.2108, - "step": 10312 - }, - { - "epoch": 0.52, - "grad_norm": 1.2489949232747564, - "learning_rate": 9.694534844277876e-06, - "loss": 0.1893, - "step": 10313 - }, - { - "epoch": 0.52, - "grad_norm": 0.9972214462379292, - "learning_rate": 9.692888648366447e-06, - "loss": 0.21, - "step": 10314 - }, - { - "epoch": 0.52, - "grad_norm": 1.1213588878823344, - "learning_rate": 9.691242460785433e-06, - "loss": 0.1822, - "step": 10315 - }, - { - "epoch": 0.52, - "grad_norm": 0.9237740150032636, - "learning_rate": 9.689596281579481e-06, - "loss": 0.1859, - "step": 10316 - }, - { - "epoch": 0.52, - "grad_norm": 1.0459244960203853, - "learning_rate": 9.687950110793254e-06, - "loss": 0.1757, - "step": 10317 - }, - { - "epoch": 0.52, - "grad_norm": 0.9527148976547666, - "learning_rate": 9.686303948471393e-06, - "loss": 0.1902, - "step": 10318 - }, - { - "epoch": 0.52, - "grad_norm": 1.399354081487811, - "learning_rate": 9.684657794658557e-06, - "loss": 0.1742, - "step": 10319 - }, - { - "epoch": 0.52, - "grad_norm": 1.2006200307552037, - "learning_rate": 9.683011649399393e-06, - "loss": 0.1801, - "step": 10320 - }, - { - "epoch": 0.52, - "grad_norm": 0.8273043295743692, - "learning_rate": 9.681365512738561e-06, - "loss": 0.2077, - "step": 10321 - }, - { - "epoch": 0.52, - "grad_norm": 0.9695511459683863, - "learning_rate": 9.679719384720705e-06, - "loss": 0.1791, - "step": 10322 - }, - { - "epoch": 0.52, - "grad_norm": 1.4961861053556897, - "learning_rate": 9.678073265390478e-06, - "loss": 0.1728, - "step": 10323 - }, - { - "epoch": 0.52, - "grad_norm": 1.1475693874767454, - "learning_rate": 9.676427154792532e-06, - "loss": 0.1927, - "step": 10324 - }, - { - "epoch": 0.53, - "grad_norm": 1.1376021528687463, - "learning_rate": 9.67478105297152e-06, - "loss": 0.1805, - "step": 10325 - }, - { - "epoch": 0.53, - "grad_norm": 1.4852948187095072, - "learning_rate": 9.673134959972087e-06, - "loss": 0.1786, - "step": 10326 - }, - { - "epoch": 0.53, - "grad_norm": 1.3038353556527462, - "learning_rate": 9.671488875838892e-06, - "loss": 0.1658, - "step": 10327 - }, - { - "epoch": 0.53, - "grad_norm": 1.2454552539516948, - "learning_rate": 9.669842800616573e-06, - "loss": 0.1997, - "step": 10328 - }, - { - "epoch": 0.53, - "grad_norm": 1.1513530264433791, - "learning_rate": 9.66819673434979e-06, - "loss": 0.176, - "step": 10329 - }, - { - "epoch": 0.53, - "grad_norm": 1.0618655964419974, - "learning_rate": 9.666550677083193e-06, - "loss": 0.1856, - "step": 10330 - }, - { - "epoch": 0.53, - "grad_norm": 1.8207853176957545, - "learning_rate": 9.664904628861423e-06, - "loss": 0.165, - "step": 10331 - }, - { - "epoch": 0.53, - "grad_norm": 1.4835936664284424, - "learning_rate": 9.663258589729133e-06, - "loss": 0.1818, - "step": 10332 - }, - { - "epoch": 0.53, - "grad_norm": 1.148356495795242, - "learning_rate": 9.661612559730974e-06, - "loss": 0.1886, - "step": 10333 - }, - { - "epoch": 0.53, - "grad_norm": 0.977431153226433, - "learning_rate": 9.659966538911597e-06, - "loss": 0.1764, - "step": 10334 - }, - { - "epoch": 0.53, - "grad_norm": 1.3184864457409595, - "learning_rate": 9.658320527315642e-06, - "loss": 0.1875, - "step": 10335 - }, - { - "epoch": 0.53, - "grad_norm": 1.0881433063606767, - "learning_rate": 9.656674524987764e-06, - "loss": 0.1858, - "step": 10336 - }, - { - "epoch": 0.53, - "grad_norm": 0.9988224231568393, - "learning_rate": 9.655028531972607e-06, - "loss": 0.2006, - "step": 10337 - }, - { - "epoch": 0.53, - "grad_norm": 1.0873306012414692, - "learning_rate": 9.653382548314824e-06, - "loss": 0.1902, - "step": 10338 - }, - { - "epoch": 0.53, - "grad_norm": 1.6293296419133951, - "learning_rate": 9.651736574059056e-06, - "loss": 0.1736, - "step": 10339 - }, - { - "epoch": 0.53, - "grad_norm": 0.9422063308430143, - "learning_rate": 9.650090609249957e-06, - "loss": 0.1897, - "step": 10340 - }, - { - "epoch": 0.53, - "grad_norm": 1.098181699439507, - "learning_rate": 9.648444653932166e-06, - "loss": 0.2025, - "step": 10341 - }, - { - "epoch": 0.53, - "grad_norm": 1.043494769008611, - "learning_rate": 9.646798708150335e-06, - "loss": 0.1811, - "step": 10342 - }, - { - "epoch": 0.53, - "grad_norm": 1.4141343490160387, - "learning_rate": 9.645152771949107e-06, - "loss": 0.1732, - "step": 10343 - }, - { - "epoch": 0.53, - "grad_norm": 0.9833433273445842, - "learning_rate": 9.643506845373134e-06, - "loss": 0.1736, - "step": 10344 - }, - { - "epoch": 0.53, - "grad_norm": 1.4078488952573975, - "learning_rate": 9.641860928467054e-06, - "loss": 0.1871, - "step": 10345 - }, - { - "epoch": 0.53, - "grad_norm": 1.254584100159465, - "learning_rate": 9.64021502127552e-06, - "loss": 0.2344, - "step": 10346 - }, - { - "epoch": 0.53, - "grad_norm": 0.9609613365024053, - "learning_rate": 9.638569123843174e-06, - "loss": 0.1756, - "step": 10347 - }, - { - "epoch": 0.53, - "grad_norm": 0.863047354823533, - "learning_rate": 9.636923236214658e-06, - "loss": 0.1634, - "step": 10348 - }, - { - "epoch": 0.53, - "grad_norm": 0.9746403408105337, - "learning_rate": 9.635277358434622e-06, - "loss": 0.191, - "step": 10349 - }, - { - "epoch": 0.53, - "grad_norm": 0.9928457396463447, - "learning_rate": 9.633631490547705e-06, - "loss": 0.1771, - "step": 10350 - }, - { - "epoch": 0.53, - "grad_norm": 1.1389667439345696, - "learning_rate": 9.63198563259856e-06, - "loss": 0.1779, - "step": 10351 - }, - { - "epoch": 0.53, - "grad_norm": 0.8675214941337062, - "learning_rate": 9.63033978463182e-06, - "loss": 0.1908, - "step": 10352 - }, - { - "epoch": 0.53, - "grad_norm": 1.8474049487338804, - "learning_rate": 9.628693946692137e-06, - "loss": 0.1869, - "step": 10353 - }, - { - "epoch": 0.53, - "grad_norm": 0.9039438034001152, - "learning_rate": 9.62704811882415e-06, - "loss": 0.198, - "step": 10354 - }, - { - "epoch": 0.53, - "grad_norm": 1.2251513400924823, - "learning_rate": 9.625402301072508e-06, - "loss": 0.1837, - "step": 10355 - }, - { - "epoch": 0.53, - "grad_norm": 0.7946568640432952, - "learning_rate": 9.623756493481845e-06, - "loss": 0.169, - "step": 10356 - }, - { - "epoch": 0.53, - "grad_norm": 1.0808439840321342, - "learning_rate": 9.622110696096812e-06, - "loss": 0.2229, - "step": 10357 - }, - { - "epoch": 0.53, - "grad_norm": 1.1578573480507568, - "learning_rate": 9.620464908962044e-06, - "loss": 0.2092, - "step": 10358 - }, - { - "epoch": 0.53, - "grad_norm": 1.2304337056025627, - "learning_rate": 9.618819132122188e-06, - "loss": 0.1985, - "step": 10359 - }, - { - "epoch": 0.53, - "grad_norm": 0.9198798137925053, - "learning_rate": 9.617173365621885e-06, - "loss": 0.2165, - "step": 10360 - }, - { - "epoch": 0.53, - "grad_norm": 1.3088003472110072, - "learning_rate": 9.615527609505778e-06, - "loss": 0.2178, - "step": 10361 - }, - { - "epoch": 0.53, - "grad_norm": 1.1875781433128594, - "learning_rate": 9.613881863818504e-06, - "loss": 0.2038, - "step": 10362 - }, - { - "epoch": 0.53, - "grad_norm": 0.7732911230002019, - "learning_rate": 9.612236128604707e-06, - "loss": 0.1597, - "step": 10363 - }, - { - "epoch": 0.53, - "grad_norm": 0.8891492803152726, - "learning_rate": 9.610590403909028e-06, - "loss": 0.1673, - "step": 10364 - }, - { - "epoch": 0.53, - "grad_norm": 1.4303661276989428, - "learning_rate": 9.608944689776104e-06, - "loss": 0.2045, - "step": 10365 - }, - { - "epoch": 0.53, - "grad_norm": 1.1698005064544776, - "learning_rate": 9.607298986250578e-06, - "loss": 0.1826, - "step": 10366 - }, - { - "epoch": 0.53, - "grad_norm": 0.9063023615216049, - "learning_rate": 9.605653293377088e-06, - "loss": 0.1959, - "step": 10367 - }, - { - "epoch": 0.53, - "grad_norm": 0.9339563685741792, - "learning_rate": 9.60400761120028e-06, - "loss": 0.1899, - "step": 10368 - }, - { - "epoch": 0.53, - "grad_norm": 0.9848466830667983, - "learning_rate": 9.60236193976478e-06, - "loss": 0.1742, - "step": 10369 - }, - { - "epoch": 0.53, - "grad_norm": 1.9967791172095308, - "learning_rate": 9.600716279115244e-06, - "loss": 0.182, - "step": 10370 - }, - { - "epoch": 0.53, - "grad_norm": 1.1842568174382346, - "learning_rate": 9.599070629296295e-06, - "loss": 0.1643, - "step": 10371 - }, - { - "epoch": 0.53, - "grad_norm": 1.6531054022617186, - "learning_rate": 9.597424990352578e-06, - "loss": 0.2011, - "step": 10372 - }, - { - "epoch": 0.53, - "grad_norm": 0.7400670830777677, - "learning_rate": 9.595779362328731e-06, - "loss": 0.156, - "step": 10373 - }, - { - "epoch": 0.53, - "grad_norm": 0.9571451657640488, - "learning_rate": 9.594133745269396e-06, - "loss": 0.1706, - "step": 10374 - }, - { - "epoch": 0.53, - "grad_norm": 1.1804292443357665, - "learning_rate": 9.5924881392192e-06, - "loss": 0.1502, - "step": 10375 - }, - { - "epoch": 0.53, - "grad_norm": 2.3734033967550605, - "learning_rate": 9.59084254422279e-06, - "loss": 0.1662, - "step": 10376 - }, - { - "epoch": 0.53, - "grad_norm": 1.1294070061228543, - "learning_rate": 9.589196960324797e-06, - "loss": 0.2031, - "step": 10377 - }, - { - "epoch": 0.53, - "grad_norm": 1.1415644059540622, - "learning_rate": 9.587551387569863e-06, - "loss": 0.1832, - "step": 10378 - }, - { - "epoch": 0.53, - "grad_norm": 0.9893916259689024, - "learning_rate": 9.585905826002618e-06, - "loss": 0.1943, - "step": 10379 - }, - { - "epoch": 0.53, - "grad_norm": 0.9395348895068868, - "learning_rate": 9.584260275667702e-06, - "loss": 0.1786, - "step": 10380 - }, - { - "epoch": 0.53, - "grad_norm": 0.9656296619115283, - "learning_rate": 9.582614736609746e-06, - "loss": 0.1878, - "step": 10381 - }, - { - "epoch": 0.53, - "grad_norm": 0.9396833635719322, - "learning_rate": 9.580969208873396e-06, - "loss": 0.1971, - "step": 10382 - }, - { - "epoch": 0.53, - "grad_norm": 0.850830522329186, - "learning_rate": 9.579323692503278e-06, - "loss": 0.1804, - "step": 10383 - }, - { - "epoch": 0.53, - "grad_norm": 0.9138329218506234, - "learning_rate": 9.577678187544024e-06, - "loss": 0.1811, - "step": 10384 - }, - { - "epoch": 0.53, - "grad_norm": 0.895827065989639, - "learning_rate": 9.576032694040278e-06, - "loss": 0.19, - "step": 10385 - }, - { - "epoch": 0.53, - "grad_norm": 0.9258072443873068, - "learning_rate": 9.574387212036664e-06, - "loss": 0.1925, - "step": 10386 - }, - { - "epoch": 0.53, - "grad_norm": 0.8878547805058776, - "learning_rate": 9.572741741577826e-06, - "loss": 0.2078, - "step": 10387 - }, - { - "epoch": 0.53, - "grad_norm": 1.1455362422134288, - "learning_rate": 9.571096282708388e-06, - "loss": 0.1865, - "step": 10388 - }, - { - "epoch": 0.53, - "grad_norm": 1.6733140804840556, - "learning_rate": 9.56945083547299e-06, - "loss": 0.1918, - "step": 10389 - }, - { - "epoch": 0.53, - "grad_norm": 1.1029868972832708, - "learning_rate": 9.56780539991626e-06, - "loss": 0.184, - "step": 10390 - }, - { - "epoch": 0.53, - "grad_norm": 1.2399755222533273, - "learning_rate": 9.566159976082838e-06, - "loss": 0.1819, - "step": 10391 - }, - { - "epoch": 0.53, - "grad_norm": 0.95713725716782, - "learning_rate": 9.564514564017345e-06, - "loss": 0.1873, - "step": 10392 - }, - { - "epoch": 0.53, - "grad_norm": 1.241490567498554, - "learning_rate": 9.562869163764423e-06, - "loss": 0.2063, - "step": 10393 - }, - { - "epoch": 0.53, - "grad_norm": 1.162189024921771, - "learning_rate": 9.561223775368697e-06, - "loss": 0.1756, - "step": 10394 - }, - { - "epoch": 0.53, - "grad_norm": 0.9746192496771294, - "learning_rate": 9.559578398874805e-06, - "loss": 0.189, - "step": 10395 - }, - { - "epoch": 0.53, - "grad_norm": 0.997149034861936, - "learning_rate": 9.55793303432737e-06, - "loss": 0.1811, - "step": 10396 - }, - { - "epoch": 0.53, - "grad_norm": 0.8424462531241881, - "learning_rate": 9.55628768177103e-06, - "loss": 0.1777, - "step": 10397 - }, - { - "epoch": 0.53, - "grad_norm": 0.8116625462094226, - "learning_rate": 9.554642341250408e-06, - "loss": 0.1795, - "step": 10398 - }, - { - "epoch": 0.53, - "grad_norm": 0.9758897036252532, - "learning_rate": 9.552997012810138e-06, - "loss": 0.1524, - "step": 10399 - }, - { - "epoch": 0.53, - "grad_norm": 1.7726637556307456, - "learning_rate": 9.551351696494854e-06, - "loss": 0.1866, - "step": 10400 - }, - { - "epoch": 0.53, - "grad_norm": 1.4435038850679889, - "learning_rate": 9.549706392349175e-06, - "loss": 0.1796, - "step": 10401 - }, - { - "epoch": 0.53, - "grad_norm": 1.0780415380104225, - "learning_rate": 9.548061100417739e-06, - "loss": 0.1979, - "step": 10402 - }, - { - "epoch": 0.53, - "grad_norm": 1.0693027700309574, - "learning_rate": 9.546415820745168e-06, - "loss": 0.1951, - "step": 10403 - }, - { - "epoch": 0.53, - "grad_norm": 0.9908854052260064, - "learning_rate": 9.544770553376098e-06, - "loss": 0.1904, - "step": 10404 - }, - { - "epoch": 0.53, - "grad_norm": 1.6473184583869245, - "learning_rate": 9.543125298355147e-06, - "loss": 0.1923, - "step": 10405 - }, - { - "epoch": 0.53, - "grad_norm": 1.3924433832198697, - "learning_rate": 9.54148005572695e-06, - "loss": 0.1897, - "step": 10406 - }, - { - "epoch": 0.53, - "grad_norm": 1.111984621097495, - "learning_rate": 9.539834825536131e-06, - "loss": 0.1845, - "step": 10407 - }, - { - "epoch": 0.53, - "grad_norm": 1.1519537417134282, - "learning_rate": 9.538189607827324e-06, - "loss": 0.1888, - "step": 10408 - }, - { - "epoch": 0.53, - "grad_norm": 1.07458036048325, - "learning_rate": 9.536544402645144e-06, - "loss": 0.1847, - "step": 10409 - }, - { - "epoch": 0.53, - "grad_norm": 1.5093378512311653, - "learning_rate": 9.53489921003423e-06, - "loss": 0.1835, - "step": 10410 - }, - { - "epoch": 0.53, - "grad_norm": 1.0330251895942397, - "learning_rate": 9.533254030039193e-06, - "loss": 0.1596, - "step": 10411 - }, - { - "epoch": 0.53, - "grad_norm": 1.6626992170533568, - "learning_rate": 9.531608862704672e-06, - "loss": 0.2001, - "step": 10412 - }, - { - "epoch": 0.53, - "grad_norm": 1.496480785595012, - "learning_rate": 9.529963708075284e-06, - "loss": 0.1882, - "step": 10413 - }, - { - "epoch": 0.53, - "grad_norm": 0.9989953762051174, - "learning_rate": 9.528318566195661e-06, - "loss": 0.1707, - "step": 10414 - }, - { - "epoch": 0.53, - "grad_norm": 1.35912861959528, - "learning_rate": 9.52667343711042e-06, - "loss": 0.2268, - "step": 10415 - }, - { - "epoch": 0.53, - "grad_norm": 1.3312356143026387, - "learning_rate": 9.525028320864191e-06, - "loss": 0.1828, - "step": 10416 - }, - { - "epoch": 0.53, - "grad_norm": 0.9451334639642954, - "learning_rate": 9.523383217501596e-06, - "loss": 0.1897, - "step": 10417 - }, - { - "epoch": 0.53, - "grad_norm": 1.2835826239197037, - "learning_rate": 9.521738127067254e-06, - "loss": 0.1893, - "step": 10418 - }, - { - "epoch": 0.53, - "grad_norm": 1.9477317307320914, - "learning_rate": 9.520093049605796e-06, - "loss": 0.1738, - "step": 10419 - }, - { - "epoch": 0.53, - "grad_norm": 0.9035321903439242, - "learning_rate": 9.51844798516184e-06, - "loss": 0.1919, - "step": 10420 - }, - { - "epoch": 0.53, - "grad_norm": 0.7218146957856297, - "learning_rate": 9.516802933780011e-06, - "loss": 0.1565, - "step": 10421 - }, - { - "epoch": 0.53, - "grad_norm": 1.0801580770780828, - "learning_rate": 9.515157895504927e-06, - "loss": 0.192, - "step": 10422 - }, - { - "epoch": 0.53, - "grad_norm": 0.9582296167426243, - "learning_rate": 9.513512870381216e-06, - "loss": 0.1733, - "step": 10423 - }, - { - "epoch": 0.53, - "grad_norm": 1.0867462991915926, - "learning_rate": 9.511867858453493e-06, - "loss": 0.2014, - "step": 10424 - }, - { - "epoch": 0.53, - "grad_norm": 0.7490081315040854, - "learning_rate": 9.510222859766383e-06, - "loss": 0.1687, - "step": 10425 - }, - { - "epoch": 0.53, - "grad_norm": 0.9441063005416657, - "learning_rate": 9.508577874364503e-06, - "loss": 0.1992, - "step": 10426 - }, - { - "epoch": 0.53, - "grad_norm": 2.211252729767083, - "learning_rate": 9.506932902292482e-06, - "loss": 0.182, - "step": 10427 - }, - { - "epoch": 0.53, - "grad_norm": 1.384841114501458, - "learning_rate": 9.505287943594928e-06, - "loss": 0.175, - "step": 10428 - }, - { - "epoch": 0.53, - "grad_norm": 1.2205290375607247, - "learning_rate": 9.503642998316469e-06, - "loss": 0.1751, - "step": 10429 - }, - { - "epoch": 0.53, - "grad_norm": 0.8274716728200241, - "learning_rate": 9.501998066501718e-06, - "loss": 0.1955, - "step": 10430 - }, - { - "epoch": 0.53, - "grad_norm": 1.0254602672883348, - "learning_rate": 9.500353148195305e-06, - "loss": 0.2029, - "step": 10431 - }, - { - "epoch": 0.53, - "grad_norm": 0.7968841718287084, - "learning_rate": 9.498708243441834e-06, - "loss": 0.1899, - "step": 10432 - }, - { - "epoch": 0.53, - "grad_norm": 1.0826837399121725, - "learning_rate": 9.497063352285934e-06, - "loss": 0.1851, - "step": 10433 - }, - { - "epoch": 0.53, - "grad_norm": 0.9069755430051439, - "learning_rate": 9.495418474772221e-06, - "loss": 0.2015, - "step": 10434 - }, - { - "epoch": 0.53, - "grad_norm": 1.271043067333602, - "learning_rate": 9.493773610945305e-06, - "loss": 0.194, - "step": 10435 - }, - { - "epoch": 0.53, - "grad_norm": 1.1972613171697342, - "learning_rate": 9.492128760849813e-06, - "loss": 0.2168, - "step": 10436 - }, - { - "epoch": 0.53, - "grad_norm": 1.0838621602061793, - "learning_rate": 9.490483924530353e-06, - "loss": 0.1731, - "step": 10437 - }, - { - "epoch": 0.53, - "grad_norm": 0.7529980716882423, - "learning_rate": 9.488839102031549e-06, - "loss": 0.1725, - "step": 10438 - }, - { - "epoch": 0.53, - "grad_norm": 0.8989904055035346, - "learning_rate": 9.48719429339801e-06, - "loss": 0.1859, - "step": 10439 - }, - { - "epoch": 0.53, - "grad_norm": 0.9278387486832459, - "learning_rate": 9.485549498674357e-06, - "loss": 0.191, - "step": 10440 - }, - { - "epoch": 0.53, - "grad_norm": 1.4556072385973486, - "learning_rate": 9.483904717905202e-06, - "loss": 0.1897, - "step": 10441 - }, - { - "epoch": 0.53, - "grad_norm": 0.9878938604682453, - "learning_rate": 9.48225995113516e-06, - "loss": 0.1697, - "step": 10442 - }, - { - "epoch": 0.53, - "grad_norm": 1.2287388076396861, - "learning_rate": 9.480615198408846e-06, - "loss": 0.1862, - "step": 10443 - }, - { - "epoch": 0.53, - "grad_norm": 0.8831594096054374, - "learning_rate": 9.478970459770878e-06, - "loss": 0.201, - "step": 10444 - }, - { - "epoch": 0.53, - "grad_norm": 0.9367663375726483, - "learning_rate": 9.47732573526586e-06, - "loss": 0.1834, - "step": 10445 - }, - { - "epoch": 0.53, - "grad_norm": 0.7777173866626321, - "learning_rate": 9.475681024938415e-06, - "loss": 0.2014, - "step": 10446 - }, - { - "epoch": 0.53, - "grad_norm": 1.17639472112245, - "learning_rate": 9.474036328833148e-06, - "loss": 0.1915, - "step": 10447 - }, - { - "epoch": 0.53, - "grad_norm": 0.910047139114023, - "learning_rate": 9.472391646994681e-06, - "loss": 0.1648, - "step": 10448 - }, - { - "epoch": 0.53, - "grad_norm": 1.044503529177657, - "learning_rate": 9.470746979467614e-06, - "loss": 0.1794, - "step": 10449 - }, - { - "epoch": 0.53, - "grad_norm": 0.9275408983661686, - "learning_rate": 9.46910232629657e-06, - "loss": 0.1785, - "step": 10450 - }, - { - "epoch": 0.53, - "grad_norm": 1.0062888448818028, - "learning_rate": 9.467457687526156e-06, - "loss": 0.213, - "step": 10451 - }, - { - "epoch": 0.53, - "grad_norm": 1.0758056837608503, - "learning_rate": 9.465813063200978e-06, - "loss": 0.1883, - "step": 10452 - }, - { - "epoch": 0.53, - "grad_norm": 1.200194759153906, - "learning_rate": 9.464168453365655e-06, - "loss": 0.1687, - "step": 10453 - }, - { - "epoch": 0.53, - "grad_norm": 0.8561814641014569, - "learning_rate": 9.462523858064788e-06, - "loss": 0.1801, - "step": 10454 - }, - { - "epoch": 0.53, - "grad_norm": 2.4085649439814016, - "learning_rate": 9.460879277342995e-06, - "loss": 0.1891, - "step": 10455 - }, - { - "epoch": 0.53, - "grad_norm": 1.072865594506947, - "learning_rate": 9.459234711244881e-06, - "loss": 0.1901, - "step": 10456 - }, - { - "epoch": 0.53, - "grad_norm": 1.4323014820101265, - "learning_rate": 9.457590159815058e-06, - "loss": 0.1858, - "step": 10457 - }, - { - "epoch": 0.53, - "grad_norm": 1.4154831275527877, - "learning_rate": 9.45594562309813e-06, - "loss": 0.2139, - "step": 10458 - }, - { - "epoch": 0.53, - "grad_norm": 1.0001665481648387, - "learning_rate": 9.454301101138708e-06, - "loss": 0.1876, - "step": 10459 - }, - { - "epoch": 0.53, - "grad_norm": 1.9027308619514212, - "learning_rate": 9.452656593981398e-06, - "loss": 0.1831, - "step": 10460 - }, - { - "epoch": 0.53, - "grad_norm": 0.9316900980594415, - "learning_rate": 9.451012101670814e-06, - "loss": 0.1858, - "step": 10461 - }, - { - "epoch": 0.53, - "grad_norm": 0.7836745774854093, - "learning_rate": 9.449367624251554e-06, - "loss": 0.1624, - "step": 10462 - }, - { - "epoch": 0.53, - "grad_norm": 1.230737222455105, - "learning_rate": 9.447723161768228e-06, - "loss": 0.1664, - "step": 10463 - }, - { - "epoch": 0.53, - "grad_norm": 0.8627584376712686, - "learning_rate": 9.446078714265441e-06, - "loss": 0.1978, - "step": 10464 - }, - { - "epoch": 0.53, - "grad_norm": 1.0735143941794252, - "learning_rate": 9.444434281787806e-06, - "loss": 0.1821, - "step": 10465 - }, - { - "epoch": 0.53, - "grad_norm": 1.113359953970941, - "learning_rate": 9.442789864379918e-06, - "loss": 0.1847, - "step": 10466 - }, - { - "epoch": 0.53, - "grad_norm": 1.2387280858722214, - "learning_rate": 9.44114546208639e-06, - "loss": 0.1765, - "step": 10467 - }, - { - "epoch": 0.53, - "grad_norm": 0.8166548705579885, - "learning_rate": 9.439501074951817e-06, - "loss": 0.1841, - "step": 10468 - }, - { - "epoch": 0.53, - "grad_norm": 0.9275349732677751, - "learning_rate": 9.437856703020813e-06, - "loss": 0.1949, - "step": 10469 - }, - { - "epoch": 0.53, - "grad_norm": 0.9684713113188496, - "learning_rate": 9.436212346337981e-06, - "loss": 0.2019, - "step": 10470 - }, - { - "epoch": 0.53, - "grad_norm": 2.752915393134308, - "learning_rate": 9.434568004947914e-06, - "loss": 0.1811, - "step": 10471 - }, - { - "epoch": 0.53, - "grad_norm": 1.0686608805981916, - "learning_rate": 9.432923678895225e-06, - "loss": 0.1788, - "step": 10472 - }, - { - "epoch": 0.53, - "grad_norm": 1.3131991830901886, - "learning_rate": 9.431279368224512e-06, - "loss": 0.1957, - "step": 10473 - }, - { - "epoch": 0.53, - "grad_norm": 0.8220619801982747, - "learning_rate": 9.429635072980382e-06, - "loss": 0.1781, - "step": 10474 - }, - { - "epoch": 0.53, - "grad_norm": 0.7944177343908115, - "learning_rate": 9.427990793207428e-06, - "loss": 0.1783, - "step": 10475 - }, - { - "epoch": 0.53, - "grad_norm": 0.8865612068771639, - "learning_rate": 9.426346528950258e-06, - "loss": 0.1681, - "step": 10476 - }, - { - "epoch": 0.53, - "grad_norm": 1.0013291908495037, - "learning_rate": 9.42470228025347e-06, - "loss": 0.1789, - "step": 10477 - }, - { - "epoch": 0.53, - "grad_norm": 1.1030495223038712, - "learning_rate": 9.423058047161668e-06, - "loss": 0.1759, - "step": 10478 - }, - { - "epoch": 0.53, - "grad_norm": 1.2086008136565232, - "learning_rate": 9.421413829719446e-06, - "loss": 0.191, - "step": 10479 - }, - { - "epoch": 0.53, - "grad_norm": 1.0555368232810498, - "learning_rate": 9.41976962797141e-06, - "loss": 0.2003, - "step": 10480 - }, - { - "epoch": 0.53, - "grad_norm": 0.8284304837222289, - "learning_rate": 9.418125441962151e-06, - "loss": 0.1916, - "step": 10481 - }, - { - "epoch": 0.53, - "grad_norm": 1.1149723921087935, - "learning_rate": 9.416481271736276e-06, - "loss": 0.178, - "step": 10482 - }, - { - "epoch": 0.53, - "grad_norm": 1.2590214185530384, - "learning_rate": 9.414837117338376e-06, - "loss": 0.2095, - "step": 10483 - }, - { - "epoch": 0.53, - "grad_norm": 0.9191664131662469, - "learning_rate": 9.413192978813057e-06, - "loss": 0.183, - "step": 10484 - }, - { - "epoch": 0.53, - "grad_norm": 1.0947807191380714, - "learning_rate": 9.411548856204907e-06, - "loss": 0.1784, - "step": 10485 - }, - { - "epoch": 0.53, - "grad_norm": 0.8485057552267062, - "learning_rate": 9.409904749558529e-06, - "loss": 0.1741, - "step": 10486 - }, - { - "epoch": 0.53, - "grad_norm": 1.2353579300556639, - "learning_rate": 9.408260658918522e-06, - "loss": 0.1711, - "step": 10487 - }, - { - "epoch": 0.53, - "grad_norm": 0.9047841484421405, - "learning_rate": 9.406616584329473e-06, - "loss": 0.1886, - "step": 10488 - }, - { - "epoch": 0.53, - "grad_norm": 1.0537218642384858, - "learning_rate": 9.404972525835984e-06, - "loss": 0.17, - "step": 10489 - }, - { - "epoch": 0.53, - "grad_norm": 2.490393840000147, - "learning_rate": 9.403328483482647e-06, - "loss": 0.1835, - "step": 10490 - }, - { - "epoch": 0.53, - "grad_norm": 0.9113168576797979, - "learning_rate": 9.401684457314064e-06, - "loss": 0.1985, - "step": 10491 - }, - { - "epoch": 0.53, - "grad_norm": 0.8407452044189376, - "learning_rate": 9.400040447374818e-06, - "loss": 0.1784, - "step": 10492 - }, - { - "epoch": 0.53, - "grad_norm": 1.0300144394706507, - "learning_rate": 9.398396453709514e-06, - "loss": 0.1661, - "step": 10493 - }, - { - "epoch": 0.53, - "grad_norm": 1.0476008663127399, - "learning_rate": 9.396752476362735e-06, - "loss": 0.1865, - "step": 10494 - }, - { - "epoch": 0.53, - "grad_norm": 1.6707348934781523, - "learning_rate": 9.395108515379082e-06, - "loss": 0.2134, - "step": 10495 - }, - { - "epoch": 0.53, - "grad_norm": 2.4262919869218256, - "learning_rate": 9.393464570803142e-06, - "loss": 0.2053, - "step": 10496 - }, - { - "epoch": 0.53, - "grad_norm": 1.1980144717024208, - "learning_rate": 9.391820642679515e-06, - "loss": 0.1852, - "step": 10497 - }, - { - "epoch": 0.53, - "grad_norm": 1.0619071454147688, - "learning_rate": 9.390176731052783e-06, - "loss": 0.1835, - "step": 10498 - }, - { - "epoch": 0.53, - "grad_norm": 0.9156702670758646, - "learning_rate": 9.388532835967543e-06, - "loss": 0.1979, - "step": 10499 - }, - { - "epoch": 0.53, - "grad_norm": 1.1991320317620187, - "learning_rate": 9.386888957468383e-06, - "loss": 0.1743, - "step": 10500 - }, - { - "epoch": 0.53, - "grad_norm": 1.0832346660178478, - "learning_rate": 9.385245095599897e-06, - "loss": 0.1608, - "step": 10501 - }, - { - "epoch": 0.53, - "grad_norm": 1.1136661258607057, - "learning_rate": 9.38360125040667e-06, - "loss": 0.1903, - "step": 10502 - }, - { - "epoch": 0.53, - "grad_norm": 1.9346059659450856, - "learning_rate": 9.381957421933296e-06, - "loss": 0.1833, - "step": 10503 - }, - { - "epoch": 0.53, - "grad_norm": 1.0498713030587017, - "learning_rate": 9.380313610224364e-06, - "loss": 0.1831, - "step": 10504 - }, - { - "epoch": 0.53, - "grad_norm": 1.1507233448862686, - "learning_rate": 9.378669815324456e-06, - "loss": 0.1679, - "step": 10505 - }, - { - "epoch": 0.53, - "grad_norm": 0.7585534369008222, - "learning_rate": 9.377026037278169e-06, - "loss": 0.1746, - "step": 10506 - }, - { - "epoch": 0.53, - "grad_norm": 1.1627401646562339, - "learning_rate": 9.37538227613008e-06, - "loss": 0.1868, - "step": 10507 - }, - { - "epoch": 0.53, - "grad_norm": 0.9497167071824306, - "learning_rate": 9.373738531924787e-06, - "loss": 0.1733, - "step": 10508 - }, - { - "epoch": 0.53, - "grad_norm": 0.9222271876528074, - "learning_rate": 9.372094804706867e-06, - "loss": 0.1955, - "step": 10509 - }, - { - "epoch": 0.53, - "grad_norm": 0.982110484488253, - "learning_rate": 9.370451094520915e-06, - "loss": 0.1801, - "step": 10510 - }, - { - "epoch": 0.53, - "grad_norm": 1.0602814609731621, - "learning_rate": 9.36880740141151e-06, - "loss": 0.1606, - "step": 10511 - }, - { - "epoch": 0.53, - "grad_norm": 1.2839696823036988, - "learning_rate": 9.367163725423242e-06, - "loss": 0.177, - "step": 10512 - }, - { - "epoch": 0.53, - "grad_norm": 1.2200605917065472, - "learning_rate": 9.365520066600691e-06, - "loss": 0.1706, - "step": 10513 - }, - { - "epoch": 0.53, - "grad_norm": 0.8684543483198988, - "learning_rate": 9.363876424988447e-06, - "loss": 0.19, - "step": 10514 - }, - { - "epoch": 0.53, - "grad_norm": 1.4733424165761082, - "learning_rate": 9.362232800631087e-06, - "loss": 0.1808, - "step": 10515 - }, - { - "epoch": 0.53, - "grad_norm": 1.332430221388429, - "learning_rate": 9.360589193573203e-06, - "loss": 0.1703, - "step": 10516 - }, - { - "epoch": 0.53, - "grad_norm": 1.0983837973789459, - "learning_rate": 9.358945603859369e-06, - "loss": 0.1682, - "step": 10517 - }, - { - "epoch": 0.53, - "grad_norm": 0.9061642429714712, - "learning_rate": 9.357302031534178e-06, - "loss": 0.1752, - "step": 10518 - }, - { - "epoch": 0.53, - "grad_norm": 1.0453790873171436, - "learning_rate": 9.355658476642198e-06, - "loss": 0.191, - "step": 10519 - }, - { - "epoch": 0.53, - "grad_norm": 0.8765708013616752, - "learning_rate": 9.354014939228024e-06, - "loss": 0.1748, - "step": 10520 - }, - { - "epoch": 0.54, - "grad_norm": 0.890012437695401, - "learning_rate": 9.35237141933623e-06, - "loss": 0.2308, - "step": 10521 - }, - { - "epoch": 0.54, - "grad_norm": 0.9096191796737242, - "learning_rate": 9.350727917011395e-06, - "loss": 0.1793, - "step": 10522 - }, - { - "epoch": 0.54, - "grad_norm": 1.5613106494417954, - "learning_rate": 9.349084432298106e-06, - "loss": 0.1811, - "step": 10523 - }, - { - "epoch": 0.54, - "grad_norm": 1.0860020915804065, - "learning_rate": 9.347440965240934e-06, - "loss": 0.2029, - "step": 10524 - }, - { - "epoch": 0.54, - "grad_norm": 0.8887090919405828, - "learning_rate": 9.345797515884466e-06, - "loss": 0.174, - "step": 10525 - }, - { - "epoch": 0.54, - "grad_norm": 0.8486337984116933, - "learning_rate": 9.344154084273275e-06, - "loss": 0.1791, - "step": 10526 - }, - { - "epoch": 0.54, - "grad_norm": 1.006827054252546, - "learning_rate": 9.342510670451944e-06, - "loss": 0.1654, - "step": 10527 - }, - { - "epoch": 0.54, - "grad_norm": 0.9545599629514354, - "learning_rate": 9.340867274465047e-06, - "loss": 0.1948, - "step": 10528 - }, - { - "epoch": 0.54, - "grad_norm": 1.047854515192047, - "learning_rate": 9.339223896357163e-06, - "loss": 0.1808, - "step": 10529 - }, - { - "epoch": 0.54, - "grad_norm": 1.1177400791380077, - "learning_rate": 9.337580536172864e-06, - "loss": 0.1798, - "step": 10530 - }, - { - "epoch": 0.54, - "grad_norm": 0.8060106213647832, - "learning_rate": 9.335937193956736e-06, - "loss": 0.1807, - "step": 10531 - }, - { - "epoch": 0.54, - "grad_norm": 0.884147369338201, - "learning_rate": 9.334293869753346e-06, - "loss": 0.1824, - "step": 10532 - }, - { - "epoch": 0.54, - "grad_norm": 1.082017160418502, - "learning_rate": 9.332650563607276e-06, - "loss": 0.205, - "step": 10533 - }, - { - "epoch": 0.54, - "grad_norm": 0.8124867757454877, - "learning_rate": 9.331007275563093e-06, - "loss": 0.1854, - "step": 10534 - }, - { - "epoch": 0.54, - "grad_norm": 0.8725530897469803, - "learning_rate": 9.329364005665377e-06, - "loss": 0.1858, - "step": 10535 - }, - { - "epoch": 0.54, - "grad_norm": 0.8761616764100726, - "learning_rate": 9.327720753958699e-06, - "loss": 0.2076, - "step": 10536 - }, - { - "epoch": 0.54, - "grad_norm": 1.0565332273224135, - "learning_rate": 9.326077520487637e-06, - "loss": 0.2011, - "step": 10537 - }, - { - "epoch": 0.54, - "grad_norm": 1.5342916385910432, - "learning_rate": 9.324434305296757e-06, - "loss": 0.1785, - "step": 10538 - }, - { - "epoch": 0.54, - "grad_norm": 0.9888427341066531, - "learning_rate": 9.322791108430636e-06, - "loss": 0.2056, - "step": 10539 - }, - { - "epoch": 0.54, - "grad_norm": 0.8702642528267842, - "learning_rate": 9.321147929933847e-06, - "loss": 0.1675, - "step": 10540 - }, - { - "epoch": 0.54, - "grad_norm": 1.2638876117441158, - "learning_rate": 9.319504769850953e-06, - "loss": 0.1782, - "step": 10541 - }, - { - "epoch": 0.54, - "grad_norm": 0.9326731432920613, - "learning_rate": 9.317861628226535e-06, - "loss": 0.1902, - "step": 10542 - }, - { - "epoch": 0.54, - "grad_norm": 1.0811358970036962, - "learning_rate": 9.316218505105155e-06, - "loss": 0.1943, - "step": 10543 - }, - { - "epoch": 0.54, - "grad_norm": 0.9911777388698443, - "learning_rate": 9.314575400531391e-06, - "loss": 0.2022, - "step": 10544 - }, - { - "epoch": 0.54, - "grad_norm": 1.1818692996375626, - "learning_rate": 9.312932314549804e-06, - "loss": 0.1933, - "step": 10545 - }, - { - "epoch": 0.54, - "grad_norm": 0.9087683719340895, - "learning_rate": 9.311289247204972e-06, - "loss": 0.187, - "step": 10546 - }, - { - "epoch": 0.54, - "grad_norm": 1.2647180730801626, - "learning_rate": 9.309646198541454e-06, - "loss": 0.1762, - "step": 10547 - }, - { - "epoch": 0.54, - "grad_norm": 1.0285760495995913, - "learning_rate": 9.308003168603822e-06, - "loss": 0.1709, - "step": 10548 - }, - { - "epoch": 0.54, - "grad_norm": 1.4024991467330072, - "learning_rate": 9.306360157436642e-06, - "loss": 0.1795, - "step": 10549 - }, - { - "epoch": 0.54, - "grad_norm": 0.9153367260426736, - "learning_rate": 9.304717165084486e-06, - "loss": 0.1668, - "step": 10550 - }, - { - "epoch": 0.54, - "grad_norm": 0.9666848999268505, - "learning_rate": 9.303074191591912e-06, - "loss": 0.2107, - "step": 10551 - }, - { - "epoch": 0.54, - "grad_norm": 1.443604240907049, - "learning_rate": 9.301431237003492e-06, - "loss": 0.1799, - "step": 10552 - }, - { - "epoch": 0.54, - "grad_norm": 1.1412227625228761, - "learning_rate": 9.299788301363786e-06, - "loss": 0.1912, - "step": 10553 - }, - { - "epoch": 0.54, - "grad_norm": 1.074255400598678, - "learning_rate": 9.298145384717369e-06, - "loss": 0.1648, - "step": 10554 - }, - { - "epoch": 0.54, - "grad_norm": 1.278035111588236, - "learning_rate": 9.296502487108792e-06, - "loss": 0.2043, - "step": 10555 - }, - { - "epoch": 0.54, - "grad_norm": 0.9894725590022767, - "learning_rate": 9.294859608582626e-06, - "loss": 0.1744, - "step": 10556 - }, - { - "epoch": 0.54, - "grad_norm": 0.9126589381137777, - "learning_rate": 9.293216749183437e-06, - "loss": 0.1606, - "step": 10557 - }, - { - "epoch": 0.54, - "grad_norm": 0.9971284910584742, - "learning_rate": 9.291573908955777e-06, - "loss": 0.1872, - "step": 10558 - }, - { - "epoch": 0.54, - "grad_norm": 0.9356317290539621, - "learning_rate": 9.289931087944221e-06, - "loss": 0.1674, - "step": 10559 - }, - { - "epoch": 0.54, - "grad_norm": 0.9180737699979528, - "learning_rate": 9.28828828619332e-06, - "loss": 0.1911, - "step": 10560 - }, - { - "epoch": 0.54, - "grad_norm": 1.3102951476006044, - "learning_rate": 9.286645503747641e-06, - "loss": 0.1943, - "step": 10561 - }, - { - "epoch": 0.54, - "grad_norm": 1.0486085362353716, - "learning_rate": 9.28500274065174e-06, - "loss": 0.172, - "step": 10562 - }, - { - "epoch": 0.54, - "grad_norm": 1.0143367843255702, - "learning_rate": 9.283359996950187e-06, - "loss": 0.1923, - "step": 10563 - }, - { - "epoch": 0.54, - "grad_norm": 1.0226800184238658, - "learning_rate": 9.281717272687527e-06, - "loss": 0.1905, - "step": 10564 - }, - { - "epoch": 0.54, - "grad_norm": 1.0633593771594727, - "learning_rate": 9.28007456790833e-06, - "loss": 0.1949, - "step": 10565 - }, - { - "epoch": 0.54, - "grad_norm": 0.9299341580114174, - "learning_rate": 9.27843188265715e-06, - "loss": 0.1804, - "step": 10566 - }, - { - "epoch": 0.54, - "grad_norm": 1.169669224468857, - "learning_rate": 9.276789216978549e-06, - "loss": 0.2227, - "step": 10567 - }, - { - "epoch": 0.54, - "grad_norm": 0.9097009835886758, - "learning_rate": 9.275146570917077e-06, - "loss": 0.1884, - "step": 10568 - }, - { - "epoch": 0.54, - "grad_norm": 0.9145517605087363, - "learning_rate": 9.273503944517298e-06, - "loss": 0.1675, - "step": 10569 - }, - { - "epoch": 0.54, - "grad_norm": 0.8695056976150293, - "learning_rate": 9.271861337823763e-06, - "loss": 0.1663, - "step": 10570 - }, - { - "epoch": 0.54, - "grad_norm": 0.912319748951526, - "learning_rate": 9.270218750881036e-06, - "loss": 0.1891, - "step": 10571 - }, - { - "epoch": 0.54, - "grad_norm": 1.1769578406569654, - "learning_rate": 9.26857618373366e-06, - "loss": 0.2083, - "step": 10572 - }, - { - "epoch": 0.54, - "grad_norm": 1.1584981841543633, - "learning_rate": 9.2669336364262e-06, - "loss": 0.1728, - "step": 10573 - }, - { - "epoch": 0.54, - "grad_norm": 0.7834513467328376, - "learning_rate": 9.265291109003208e-06, - "loss": 0.1634, - "step": 10574 - }, - { - "epoch": 0.54, - "grad_norm": 1.5024126986383335, - "learning_rate": 9.263648601509231e-06, - "loss": 0.1773, - "step": 10575 - }, - { - "epoch": 0.54, - "grad_norm": 0.9605455781620283, - "learning_rate": 9.262006113988832e-06, - "loss": 0.1835, - "step": 10576 - }, - { - "epoch": 0.54, - "grad_norm": 0.7979609875885121, - "learning_rate": 9.260363646486556e-06, - "loss": 0.185, - "step": 10577 - }, - { - "epoch": 0.54, - "grad_norm": 1.0930640857959548, - "learning_rate": 9.25872119904696e-06, - "loss": 0.1911, - "step": 10578 - }, - { - "epoch": 0.54, - "grad_norm": 1.5730311189725499, - "learning_rate": 9.257078771714591e-06, - "loss": 0.1897, - "step": 10579 - }, - { - "epoch": 0.54, - "grad_norm": 1.00160813983151, - "learning_rate": 9.255436364534005e-06, - "loss": 0.2044, - "step": 10580 - }, - { - "epoch": 0.54, - "grad_norm": 0.907234305672958, - "learning_rate": 9.253793977549747e-06, - "loss": 0.1529, - "step": 10581 - }, - { - "epoch": 0.54, - "grad_norm": 1.7767435462070758, - "learning_rate": 9.25215161080637e-06, - "loss": 0.1958, - "step": 10582 - }, - { - "epoch": 0.54, - "grad_norm": 0.9855417730617082, - "learning_rate": 9.250509264348422e-06, - "loss": 0.2006, - "step": 10583 - }, - { - "epoch": 0.54, - "grad_norm": 0.8226800135543453, - "learning_rate": 9.248866938220456e-06, - "loss": 0.1796, - "step": 10584 - }, - { - "epoch": 0.54, - "grad_norm": 0.8673771835954791, - "learning_rate": 9.247224632467014e-06, - "loss": 0.1813, - "step": 10585 - }, - { - "epoch": 0.54, - "grad_norm": 1.6398020717643, - "learning_rate": 9.245582347132646e-06, - "loss": 0.1856, - "step": 10586 - }, - { - "epoch": 0.54, - "grad_norm": 0.9801202936924129, - "learning_rate": 9.2439400822619e-06, - "loss": 0.1629, - "step": 10587 - }, - { - "epoch": 0.54, - "grad_norm": 1.09510375553041, - "learning_rate": 9.242297837899325e-06, - "loss": 0.1768, - "step": 10588 - }, - { - "epoch": 0.54, - "grad_norm": 1.005381026821629, - "learning_rate": 9.240655614089459e-06, - "loss": 0.2058, - "step": 10589 - }, - { - "epoch": 0.54, - "grad_norm": 1.3320793772483235, - "learning_rate": 9.239013410876856e-06, - "loss": 0.1891, - "step": 10590 - }, - { - "epoch": 0.54, - "grad_norm": 1.1738236438179857, - "learning_rate": 9.237371228306057e-06, - "loss": 0.202, - "step": 10591 - }, - { - "epoch": 0.54, - "grad_norm": 1.1494644621728747, - "learning_rate": 9.235729066421604e-06, - "loss": 0.1738, - "step": 10592 - }, - { - "epoch": 0.54, - "grad_norm": 0.9827935431430217, - "learning_rate": 9.234086925268046e-06, - "loss": 0.1813, - "step": 10593 - }, - { - "epoch": 0.54, - "grad_norm": 1.1216711618319328, - "learning_rate": 9.232444804889921e-06, - "loss": 0.1901, - "step": 10594 - }, - { - "epoch": 0.54, - "grad_norm": 1.3244857919711657, - "learning_rate": 9.230802705331776e-06, - "loss": 0.184, - "step": 10595 - }, - { - "epoch": 0.54, - "grad_norm": 0.9137910573447837, - "learning_rate": 9.229160626638148e-06, - "loss": 0.1817, - "step": 10596 - }, - { - "epoch": 0.54, - "grad_norm": 0.9236684179786859, - "learning_rate": 9.227518568853587e-06, - "loss": 0.1825, - "step": 10597 - }, - { - "epoch": 0.54, - "grad_norm": 1.135558348639019, - "learning_rate": 9.225876532022623e-06, - "loss": 0.1815, - "step": 10598 - }, - { - "epoch": 0.54, - "grad_norm": 0.9099245816546617, - "learning_rate": 9.224234516189803e-06, - "loss": 0.1802, - "step": 10599 - }, - { - "epoch": 0.54, - "grad_norm": 0.936960906132101, - "learning_rate": 9.222592521399666e-06, - "loss": 0.1844, - "step": 10600 - }, - { - "epoch": 0.54, - "grad_norm": 1.196191487570183, - "learning_rate": 9.220950547696754e-06, - "loss": 0.1801, - "step": 10601 - }, - { - "epoch": 0.54, - "grad_norm": 1.1492432241366455, - "learning_rate": 9.219308595125598e-06, - "loss": 0.1677, - "step": 10602 - }, - { - "epoch": 0.54, - "grad_norm": 1.2051869978190075, - "learning_rate": 9.217666663730744e-06, - "loss": 0.1814, - "step": 10603 - }, - { - "epoch": 0.54, - "grad_norm": 1.0243526918755632, - "learning_rate": 9.216024753556722e-06, - "loss": 0.1958, - "step": 10604 - }, - { - "epoch": 0.54, - "grad_norm": 1.084683646176466, - "learning_rate": 9.214382864648075e-06, - "loss": 0.1562, - "step": 10605 - }, - { - "epoch": 0.54, - "grad_norm": 1.080449899563939, - "learning_rate": 9.212740997049335e-06, - "loss": 0.2075, - "step": 10606 - }, - { - "epoch": 0.54, - "grad_norm": 1.2832964653669452, - "learning_rate": 9.211099150805046e-06, - "loss": 0.1761, - "step": 10607 - }, - { - "epoch": 0.54, - "grad_norm": 1.7528709193932426, - "learning_rate": 9.209457325959731e-06, - "loss": 0.1813, - "step": 10608 - }, - { - "epoch": 0.54, - "grad_norm": 0.8444403300450413, - "learning_rate": 9.207815522557932e-06, - "loss": 0.1839, - "step": 10609 - }, - { - "epoch": 0.54, - "grad_norm": 0.9972227451408476, - "learning_rate": 9.206173740644186e-06, - "loss": 0.1934, - "step": 10610 - }, - { - "epoch": 0.54, - "grad_norm": 3.8715618480349923, - "learning_rate": 9.204531980263017e-06, - "loss": 0.191, - "step": 10611 - }, - { - "epoch": 0.54, - "grad_norm": 1.2879592708261964, - "learning_rate": 9.202890241458963e-06, - "loss": 0.1668, - "step": 10612 - }, - { - "epoch": 0.54, - "grad_norm": 0.815992453516756, - "learning_rate": 9.201248524276557e-06, - "loss": 0.1838, - "step": 10613 - }, - { - "epoch": 0.54, - "grad_norm": 1.3697672435436143, - "learning_rate": 9.199606828760333e-06, - "loss": 0.2018, - "step": 10614 - }, - { - "epoch": 0.54, - "grad_norm": 1.0653803940723892, - "learning_rate": 9.197965154954815e-06, - "loss": 0.1773, - "step": 10615 - }, - { - "epoch": 0.54, - "grad_norm": 0.7669012746424213, - "learning_rate": 9.196323502904542e-06, - "loss": 0.1711, - "step": 10616 - }, - { - "epoch": 0.54, - "grad_norm": 1.2937877619242906, - "learning_rate": 9.194681872654034e-06, - "loss": 0.1799, - "step": 10617 - }, - { - "epoch": 0.54, - "grad_norm": 1.490122996501818, - "learning_rate": 9.19304026424783e-06, - "loss": 0.1531, - "step": 10618 - }, - { - "epoch": 0.54, - "grad_norm": 1.3258244978870801, - "learning_rate": 9.191398677730449e-06, - "loss": 0.1871, - "step": 10619 - }, - { - "epoch": 0.54, - "grad_norm": 0.9765260196003005, - "learning_rate": 9.189757113146431e-06, - "loss": 0.1832, - "step": 10620 - }, - { - "epoch": 0.54, - "grad_norm": 1.1123303040511696, - "learning_rate": 9.188115570540292e-06, - "loss": 0.1659, - "step": 10621 - }, - { - "epoch": 0.54, - "grad_norm": 1.132217710576086, - "learning_rate": 9.186474049956568e-06, - "loss": 0.1827, - "step": 10622 - }, - { - "epoch": 0.54, - "grad_norm": 1.0677326613521958, - "learning_rate": 9.184832551439777e-06, - "loss": 0.1999, - "step": 10623 - }, - { - "epoch": 0.54, - "grad_norm": 0.8733102154009217, - "learning_rate": 9.183191075034455e-06, - "loss": 0.1573, - "step": 10624 - }, - { - "epoch": 0.54, - "grad_norm": 1.4879165809895192, - "learning_rate": 9.181549620785115e-06, - "loss": 0.1958, - "step": 10625 - }, - { - "epoch": 0.54, - "grad_norm": 0.989982301785905, - "learning_rate": 9.179908188736291e-06, - "loss": 0.1876, - "step": 10626 - }, - { - "epoch": 0.54, - "grad_norm": 0.9870998274556386, - "learning_rate": 9.178266778932506e-06, - "loss": 0.1604, - "step": 10627 - }, - { - "epoch": 0.54, - "grad_norm": 1.0754699021323235, - "learning_rate": 9.176625391418277e-06, - "loss": 0.1792, - "step": 10628 - }, - { - "epoch": 0.54, - "grad_norm": 0.8355366004814154, - "learning_rate": 9.174984026238134e-06, - "loss": 0.1895, - "step": 10629 - }, - { - "epoch": 0.54, - "grad_norm": 1.4989380803523193, - "learning_rate": 9.173342683436593e-06, - "loss": 0.2049, - "step": 10630 - }, - { - "epoch": 0.54, - "grad_norm": 0.7914223255584208, - "learning_rate": 9.171701363058179e-06, - "loss": 0.1691, - "step": 10631 - }, - { - "epoch": 0.54, - "grad_norm": 0.9393068625759057, - "learning_rate": 9.17006006514741e-06, - "loss": 0.1749, - "step": 10632 - }, - { - "epoch": 0.54, - "grad_norm": 1.1462009659205226, - "learning_rate": 9.168418789748815e-06, - "loss": 0.1781, - "step": 10633 - }, - { - "epoch": 0.54, - "grad_norm": 1.3736892729869854, - "learning_rate": 9.166777536906901e-06, - "loss": 0.1726, - "step": 10634 - }, - { - "epoch": 0.54, - "grad_norm": 1.1734054679774246, - "learning_rate": 9.165136306666196e-06, - "loss": 0.1598, - "step": 10635 - }, - { - "epoch": 0.54, - "grad_norm": 0.8581227962415139, - "learning_rate": 9.163495099071214e-06, - "loss": 0.1763, - "step": 10636 - }, - { - "epoch": 0.54, - "grad_norm": 0.9346363358837523, - "learning_rate": 9.161853914166479e-06, - "loss": 0.2053, - "step": 10637 - }, - { - "epoch": 0.54, - "grad_norm": 1.0907314457043735, - "learning_rate": 9.1602127519965e-06, - "loss": 0.1876, - "step": 10638 - }, - { - "epoch": 0.54, - "grad_norm": 0.9318701442009211, - "learning_rate": 9.1585716126058e-06, - "loss": 0.1759, - "step": 10639 - }, - { - "epoch": 0.54, - "grad_norm": 0.9744826376394506, - "learning_rate": 9.15693049603889e-06, - "loss": 0.2064, - "step": 10640 - }, - { - "epoch": 0.54, - "grad_norm": 1.0207950872373368, - "learning_rate": 9.155289402340291e-06, - "loss": 0.1799, - "step": 10641 - }, - { - "epoch": 0.54, - "grad_norm": 0.9647089007127575, - "learning_rate": 9.153648331554511e-06, - "loss": 0.1964, - "step": 10642 - }, - { - "epoch": 0.54, - "grad_norm": 1.1810311399011428, - "learning_rate": 9.152007283726073e-06, - "loss": 0.1655, - "step": 10643 - }, - { - "epoch": 0.54, - "grad_norm": 1.67131220083229, - "learning_rate": 9.150366258899483e-06, - "loss": 0.2026, - "step": 10644 - }, - { - "epoch": 0.54, - "grad_norm": 1.0390141748363309, - "learning_rate": 9.148725257119253e-06, - "loss": 0.1978, - "step": 10645 - }, - { - "epoch": 0.54, - "grad_norm": 2.4274642279887897, - "learning_rate": 9.147084278429903e-06, - "loss": 0.1734, - "step": 10646 - }, - { - "epoch": 0.54, - "grad_norm": 1.1536905388237146, - "learning_rate": 9.145443322875937e-06, - "loss": 0.1664, - "step": 10647 - }, - { - "epoch": 0.54, - "grad_norm": 1.0672186751727026, - "learning_rate": 9.14380239050187e-06, - "loss": 0.1857, - "step": 10648 - }, - { - "epoch": 0.54, - "grad_norm": 1.1406716880450503, - "learning_rate": 9.14216148135221e-06, - "loss": 0.1912, - "step": 10649 - }, - { - "epoch": 0.54, - "grad_norm": 1.6124381866654078, - "learning_rate": 9.140520595471473e-06, - "loss": 0.1919, - "step": 10650 - }, - { - "epoch": 0.54, - "grad_norm": 0.9507612008810503, - "learning_rate": 9.138879732904157e-06, - "loss": 0.2101, - "step": 10651 - }, - { - "epoch": 0.54, - "grad_norm": 0.8238385961207768, - "learning_rate": 9.137238893694781e-06, - "loss": 0.2056, - "step": 10652 - }, - { - "epoch": 0.54, - "grad_norm": 0.9904799908904333, - "learning_rate": 9.135598077887846e-06, - "loss": 0.192, - "step": 10653 - }, - { - "epoch": 0.54, - "grad_norm": 1.0593204896708142, - "learning_rate": 9.133957285527868e-06, - "loss": 0.1807, - "step": 10654 - }, - { - "epoch": 0.54, - "grad_norm": 0.8334129482294383, - "learning_rate": 9.132316516659341e-06, - "loss": 0.1912, - "step": 10655 - }, - { - "epoch": 0.54, - "grad_norm": 2.878014703805649, - "learning_rate": 9.130675771326783e-06, - "loss": 0.1927, - "step": 10656 - }, - { - "epoch": 0.54, - "grad_norm": 1.0126816393076499, - "learning_rate": 9.129035049574688e-06, - "loss": 0.2007, - "step": 10657 - }, - { - "epoch": 0.54, - "grad_norm": 1.280017082713366, - "learning_rate": 9.12739435144757e-06, - "loss": 0.1823, - "step": 10658 - }, - { - "epoch": 0.54, - "grad_norm": 1.2509973026115213, - "learning_rate": 9.125753676989926e-06, - "loss": 0.1979, - "step": 10659 - }, - { - "epoch": 0.54, - "grad_norm": 1.090298495463318, - "learning_rate": 9.124113026246268e-06, - "loss": 0.1854, - "step": 10660 - }, - { - "epoch": 0.54, - "grad_norm": 1.5921016493384281, - "learning_rate": 9.122472399261092e-06, - "loss": 0.1974, - "step": 10661 - }, - { - "epoch": 0.54, - "grad_norm": 0.9171169378239742, - "learning_rate": 9.1208317960789e-06, - "loss": 0.197, - "step": 10662 - }, - { - "epoch": 0.54, - "grad_norm": 0.8770555727852963, - "learning_rate": 9.119191216744197e-06, - "loss": 0.2032, - "step": 10663 - }, - { - "epoch": 0.54, - "grad_norm": 1.3314627171471414, - "learning_rate": 9.11755066130148e-06, - "loss": 0.1925, - "step": 10664 - }, - { - "epoch": 0.54, - "grad_norm": 0.8738649746145589, - "learning_rate": 9.115910129795252e-06, - "loss": 0.184, - "step": 10665 - }, - { - "epoch": 0.54, - "grad_norm": 0.9780393428276124, - "learning_rate": 9.11426962227001e-06, - "loss": 0.1919, - "step": 10666 - }, - { - "epoch": 0.54, - "grad_norm": 0.9004068467150715, - "learning_rate": 9.112629138770259e-06, - "loss": 0.1904, - "step": 10667 - }, - { - "epoch": 0.54, - "grad_norm": 1.0289144423469088, - "learning_rate": 9.110988679340488e-06, - "loss": 0.1721, - "step": 10668 - }, - { - "epoch": 0.54, - "grad_norm": 1.1838230125708527, - "learning_rate": 9.109348244025204e-06, - "loss": 0.1929, - "step": 10669 - }, - { - "epoch": 0.54, - "grad_norm": 0.9743941893483662, - "learning_rate": 9.107707832868896e-06, - "loss": 0.1949, - "step": 10670 - }, - { - "epoch": 0.54, - "grad_norm": 0.8473092924176169, - "learning_rate": 9.106067445916064e-06, - "loss": 0.1724, - "step": 10671 - }, - { - "epoch": 0.54, - "grad_norm": 0.957361674100075, - "learning_rate": 9.104427083211201e-06, - "loss": 0.1669, - "step": 10672 - }, - { - "epoch": 0.54, - "grad_norm": 0.7641459772145157, - "learning_rate": 9.10278674479881e-06, - "loss": 0.166, - "step": 10673 - }, - { - "epoch": 0.54, - "grad_norm": 0.9649873289264107, - "learning_rate": 9.101146430723373e-06, - "loss": 0.157, - "step": 10674 - }, - { - "epoch": 0.54, - "grad_norm": 1.695599698503498, - "learning_rate": 9.099506141029393e-06, - "loss": 0.1863, - "step": 10675 - }, - { - "epoch": 0.54, - "grad_norm": 1.041754790008622, - "learning_rate": 9.097865875761356e-06, - "loss": 0.1821, - "step": 10676 - }, - { - "epoch": 0.54, - "grad_norm": 1.4290400240424552, - "learning_rate": 9.096225634963764e-06, - "loss": 0.1835, - "step": 10677 - }, - { - "epoch": 0.54, - "grad_norm": 1.2286969932742677, - "learning_rate": 9.094585418681098e-06, - "loss": 0.1774, - "step": 10678 - }, - { - "epoch": 0.54, - "grad_norm": 0.8841463674321048, - "learning_rate": 9.092945226957856e-06, - "loss": 0.1698, - "step": 10679 - }, - { - "epoch": 0.54, - "grad_norm": 1.219517297991463, - "learning_rate": 9.091305059838528e-06, - "loss": 0.1917, - "step": 10680 - }, - { - "epoch": 0.54, - "grad_norm": 1.2140141811496525, - "learning_rate": 9.089664917367597e-06, - "loss": 0.1842, - "step": 10681 - }, - { - "epoch": 0.54, - "grad_norm": 1.2792334097251197, - "learning_rate": 9.088024799589557e-06, - "loss": 0.2023, - "step": 10682 - }, - { - "epoch": 0.54, - "grad_norm": 1.0618023545637925, - "learning_rate": 9.086384706548897e-06, - "loss": 0.1664, - "step": 10683 - }, - { - "epoch": 0.54, - "grad_norm": 4.191493755751196, - "learning_rate": 9.084744638290105e-06, - "loss": 0.1846, - "step": 10684 - }, - { - "epoch": 0.54, - "grad_norm": 0.9933607568905328, - "learning_rate": 9.083104594857663e-06, - "loss": 0.1936, - "step": 10685 - }, - { - "epoch": 0.54, - "grad_norm": 2.8200920801777767, - "learning_rate": 9.081464576296066e-06, - "loss": 0.1865, - "step": 10686 - }, - { - "epoch": 0.54, - "grad_norm": 0.971900461440888, - "learning_rate": 9.079824582649788e-06, - "loss": 0.1748, - "step": 10687 - }, - { - "epoch": 0.54, - "grad_norm": 0.8686920317236623, - "learning_rate": 9.078184613963324e-06, - "loss": 0.1749, - "step": 10688 - }, - { - "epoch": 0.54, - "grad_norm": 1.0556078077071391, - "learning_rate": 9.076544670281153e-06, - "loss": 0.1644, - "step": 10689 - }, - { - "epoch": 0.54, - "grad_norm": 0.8479512987307262, - "learning_rate": 9.074904751647762e-06, - "loss": 0.1655, - "step": 10690 - }, - { - "epoch": 0.54, - "grad_norm": 2.9580535758655278, - "learning_rate": 9.073264858107628e-06, - "loss": 0.2055, - "step": 10691 - }, - { - "epoch": 0.54, - "grad_norm": 1.0938096694712256, - "learning_rate": 9.071624989705241e-06, - "loss": 0.1681, - "step": 10692 - }, - { - "epoch": 0.54, - "grad_norm": 1.4343843624625687, - "learning_rate": 9.069985146485075e-06, - "loss": 0.1951, - "step": 10693 - }, - { - "epoch": 0.54, - "grad_norm": 0.9902205037582185, - "learning_rate": 9.068345328491619e-06, - "loss": 0.1865, - "step": 10694 - }, - { - "epoch": 0.54, - "grad_norm": 0.9372712378232515, - "learning_rate": 9.066705535769345e-06, - "loss": 0.1802, - "step": 10695 - }, - { - "epoch": 0.54, - "grad_norm": 1.4848712846231706, - "learning_rate": 9.065065768362736e-06, - "loss": 0.1754, - "step": 10696 - }, - { - "epoch": 0.54, - "grad_norm": 1.5807359672516952, - "learning_rate": 9.063426026316275e-06, - "loss": 0.1838, - "step": 10697 - }, - { - "epoch": 0.54, - "grad_norm": 1.059155620869402, - "learning_rate": 9.061786309674431e-06, - "loss": 0.1754, - "step": 10698 - }, - { - "epoch": 0.54, - "grad_norm": 0.9541798586769945, - "learning_rate": 9.060146618481692e-06, - "loss": 0.1656, - "step": 10699 - }, - { - "epoch": 0.54, - "grad_norm": 1.81118910909562, - "learning_rate": 9.058506952782522e-06, - "loss": 0.2161, - "step": 10700 - }, - { - "epoch": 0.54, - "grad_norm": 1.0132204443059734, - "learning_rate": 9.056867312621408e-06, - "loss": 0.1715, - "step": 10701 - }, - { - "epoch": 0.54, - "grad_norm": 1.0257858478307955, - "learning_rate": 9.05522769804282e-06, - "loss": 0.193, - "step": 10702 - }, - { - "epoch": 0.54, - "grad_norm": 0.8758821720127123, - "learning_rate": 9.053588109091236e-06, - "loss": 0.1795, - "step": 10703 - }, - { - "epoch": 0.54, - "grad_norm": 1.459217006890036, - "learning_rate": 9.051948545811125e-06, - "loss": 0.1789, - "step": 10704 - }, - { - "epoch": 0.54, - "grad_norm": 1.0812146827278828, - "learning_rate": 9.050309008246965e-06, - "loss": 0.2045, - "step": 10705 - }, - { - "epoch": 0.54, - "grad_norm": 2.7067803049987202, - "learning_rate": 9.048669496443226e-06, - "loss": 0.197, - "step": 10706 - }, - { - "epoch": 0.54, - "grad_norm": 1.0480730399539124, - "learning_rate": 9.047030010444382e-06, - "loss": 0.1772, - "step": 10707 - }, - { - "epoch": 0.54, - "grad_norm": 0.9990838162286354, - "learning_rate": 9.045390550294901e-06, - "loss": 0.1904, - "step": 10708 - }, - { - "epoch": 0.54, - "grad_norm": 0.8913607947233958, - "learning_rate": 9.043751116039257e-06, - "loss": 0.182, - "step": 10709 - }, - { - "epoch": 0.54, - "grad_norm": 1.0511457664452766, - "learning_rate": 9.042111707721916e-06, - "loss": 0.1594, - "step": 10710 - }, - { - "epoch": 0.54, - "grad_norm": 1.0732583932499873, - "learning_rate": 9.040472325387352e-06, - "loss": 0.2012, - "step": 10711 - }, - { - "epoch": 0.54, - "grad_norm": 1.903939596274699, - "learning_rate": 9.038832969080029e-06, - "loss": 0.1879, - "step": 10712 - }, - { - "epoch": 0.54, - "grad_norm": 2.200570819981873, - "learning_rate": 9.037193638844417e-06, - "loss": 0.1818, - "step": 10713 - }, - { - "epoch": 0.54, - "grad_norm": 0.7451402376048245, - "learning_rate": 9.035554334724982e-06, - "loss": 0.1894, - "step": 10714 - }, - { - "epoch": 0.54, - "grad_norm": 1.0144393292155092, - "learning_rate": 9.033915056766187e-06, - "loss": 0.1624, - "step": 10715 - }, - { - "epoch": 0.54, - "grad_norm": 1.1598482461764674, - "learning_rate": 9.032275805012505e-06, - "loss": 0.187, - "step": 10716 - }, - { - "epoch": 0.54, - "grad_norm": 1.4088383790140577, - "learning_rate": 9.030636579508392e-06, - "loss": 0.1993, - "step": 10717 - }, - { - "epoch": 0.55, - "grad_norm": 0.9323936061752953, - "learning_rate": 9.02899738029832e-06, - "loss": 0.1877, - "step": 10718 - }, - { - "epoch": 0.55, - "grad_norm": 1.6577111888773488, - "learning_rate": 9.027358207426745e-06, - "loss": 0.2049, - "step": 10719 - }, - { - "epoch": 0.55, - "grad_norm": 0.9143010624235737, - "learning_rate": 9.025719060938138e-06, - "loss": 0.1708, - "step": 10720 - }, - { - "epoch": 0.55, - "grad_norm": 0.8855990663781108, - "learning_rate": 9.024079940876954e-06, - "loss": 0.199, - "step": 10721 - }, - { - "epoch": 0.55, - "grad_norm": 0.9750804804995725, - "learning_rate": 9.022440847287656e-06, - "loss": 0.2279, - "step": 10722 - }, - { - "epoch": 0.55, - "grad_norm": 0.8803501296190209, - "learning_rate": 9.020801780214705e-06, - "loss": 0.1813, - "step": 10723 - }, - { - "epoch": 0.55, - "grad_norm": 0.9742924036790803, - "learning_rate": 9.019162739702564e-06, - "loss": 0.1951, - "step": 10724 - }, - { - "epoch": 0.55, - "grad_norm": 1.0582094217135745, - "learning_rate": 9.017523725795688e-06, - "loss": 0.1769, - "step": 10725 - }, - { - "epoch": 0.55, - "grad_norm": 0.9670077934281341, - "learning_rate": 9.015884738538537e-06, - "loss": 0.1735, - "step": 10726 - }, - { - "epoch": 0.55, - "grad_norm": 0.9456609790651568, - "learning_rate": 9.014245777975565e-06, - "loss": 0.1664, - "step": 10727 - }, - { - "epoch": 0.55, - "grad_norm": 1.5450763527086255, - "learning_rate": 9.012606844151235e-06, - "loss": 0.2163, - "step": 10728 - }, - { - "epoch": 0.55, - "grad_norm": 0.8390513214365491, - "learning_rate": 9.010967937109997e-06, - "loss": 0.1823, - "step": 10729 - }, - { - "epoch": 0.55, - "grad_norm": 1.0167610566569403, - "learning_rate": 9.009329056896314e-06, - "loss": 0.2059, - "step": 10730 - }, - { - "epoch": 0.55, - "grad_norm": 0.8110446302914665, - "learning_rate": 9.007690203554636e-06, - "loss": 0.1828, - "step": 10731 - }, - { - "epoch": 0.55, - "grad_norm": 0.9537201974786012, - "learning_rate": 9.006051377129412e-06, - "loss": 0.1662, - "step": 10732 - }, - { - "epoch": 0.55, - "grad_norm": 0.8364630993085369, - "learning_rate": 9.004412577665107e-06, - "loss": 0.166, - "step": 10733 - }, - { - "epoch": 0.55, - "grad_norm": 0.9991791474471617, - "learning_rate": 9.002773805206161e-06, - "loss": 0.198, - "step": 10734 - }, - { - "epoch": 0.55, - "grad_norm": 1.328538844649469, - "learning_rate": 9.001135059797036e-06, - "loss": 0.1678, - "step": 10735 - }, - { - "epoch": 0.55, - "grad_norm": 1.333354113863571, - "learning_rate": 8.999496341482176e-06, - "loss": 0.1917, - "step": 10736 - }, - { - "epoch": 0.55, - "grad_norm": 1.1206005428077606, - "learning_rate": 8.997857650306038e-06, - "loss": 0.209, - "step": 10737 - }, - { - "epoch": 0.55, - "grad_norm": 1.1991656192292084, - "learning_rate": 8.996218986313063e-06, - "loss": 0.2033, - "step": 10738 - }, - { - "epoch": 0.55, - "grad_norm": 11.08974914382019, - "learning_rate": 8.994580349547711e-06, - "loss": 0.1793, - "step": 10739 - }, - { - "epoch": 0.55, - "grad_norm": 0.8497449718022965, - "learning_rate": 8.992941740054418e-06, - "loss": 0.1951, - "step": 10740 - }, - { - "epoch": 0.55, - "grad_norm": 1.2854953153089266, - "learning_rate": 8.99130315787764e-06, - "loss": 0.1944, - "step": 10741 - }, - { - "epoch": 0.55, - "grad_norm": 1.316795993709575, - "learning_rate": 8.989664603061818e-06, - "loss": 0.1785, - "step": 10742 - }, - { - "epoch": 0.55, - "grad_norm": 0.858883046960564, - "learning_rate": 8.988026075651406e-06, - "loss": 0.1789, - "step": 10743 - }, - { - "epoch": 0.55, - "grad_norm": 0.7604592648249685, - "learning_rate": 8.986387575690839e-06, - "loss": 0.1655, - "step": 10744 - }, - { - "epoch": 0.55, - "grad_norm": 0.9078061178716357, - "learning_rate": 8.984749103224568e-06, - "loss": 0.2136, - "step": 10745 - }, - { - "epoch": 0.55, - "grad_norm": 0.7417197418190933, - "learning_rate": 8.983110658297033e-06, - "loss": 0.1641, - "step": 10746 - }, - { - "epoch": 0.55, - "grad_norm": 1.0208350376886899, - "learning_rate": 8.981472240952683e-06, - "loss": 0.1941, - "step": 10747 - }, - { - "epoch": 0.55, - "grad_norm": 1.0770493927619436, - "learning_rate": 8.979833851235952e-06, - "loss": 0.181, - "step": 10748 - }, - { - "epoch": 0.55, - "grad_norm": 1.2738105750757516, - "learning_rate": 8.978195489191288e-06, - "loss": 0.1891, - "step": 10749 - }, - { - "epoch": 0.55, - "grad_norm": 1.3508481124158724, - "learning_rate": 8.97655715486313e-06, - "loss": 0.1934, - "step": 10750 - }, - { - "epoch": 0.55, - "grad_norm": 0.9695221805853889, - "learning_rate": 8.974918848295914e-06, - "loss": 0.1796, - "step": 10751 - }, - { - "epoch": 0.55, - "grad_norm": 1.2229861774074742, - "learning_rate": 8.973280569534086e-06, - "loss": 0.1776, - "step": 10752 - }, - { - "epoch": 0.55, - "grad_norm": 0.9182531427775426, - "learning_rate": 8.971642318622076e-06, - "loss": 0.1921, - "step": 10753 - }, - { - "epoch": 0.55, - "grad_norm": 0.8634715877601445, - "learning_rate": 8.970004095604329e-06, - "loss": 0.177, - "step": 10754 - }, - { - "epoch": 0.55, - "grad_norm": 0.8793938254743472, - "learning_rate": 8.968365900525275e-06, - "loss": 0.1857, - "step": 10755 - }, - { - "epoch": 0.55, - "grad_norm": 0.9715431874467615, - "learning_rate": 8.966727733429362e-06, - "loss": 0.1889, - "step": 10756 - }, - { - "epoch": 0.55, - "grad_norm": 0.9027070073199415, - "learning_rate": 8.96508959436101e-06, - "loss": 0.1761, - "step": 10757 - }, - { - "epoch": 0.55, - "grad_norm": 1.1654803110367036, - "learning_rate": 8.963451483364665e-06, - "loss": 0.1877, - "step": 10758 - }, - { - "epoch": 0.55, - "grad_norm": 1.002974639763267, - "learning_rate": 8.961813400484753e-06, - "loss": 0.186, - "step": 10759 - }, - { - "epoch": 0.55, - "grad_norm": 0.9182051316480477, - "learning_rate": 8.960175345765718e-06, - "loss": 0.1783, - "step": 10760 - }, - { - "epoch": 0.55, - "grad_norm": 1.1405988075873255, - "learning_rate": 8.95853731925198e-06, - "loss": 0.1648, - "step": 10761 - }, - { - "epoch": 0.55, - "grad_norm": 0.7103698934238166, - "learning_rate": 8.956899320987977e-06, - "loss": 0.1845, - "step": 10762 - }, - { - "epoch": 0.55, - "grad_norm": 0.9802152870441576, - "learning_rate": 8.955261351018138e-06, - "loss": 0.1906, - "step": 10763 - }, - { - "epoch": 0.55, - "grad_norm": 1.1017980194807244, - "learning_rate": 8.953623409386898e-06, - "loss": 0.1808, - "step": 10764 - }, - { - "epoch": 0.55, - "grad_norm": 0.8795915647215737, - "learning_rate": 8.951985496138679e-06, - "loss": 0.1646, - "step": 10765 - }, - { - "epoch": 0.55, - "grad_norm": 0.8785209703760559, - "learning_rate": 8.950347611317915e-06, - "loss": 0.2085, - "step": 10766 - }, - { - "epoch": 0.55, - "grad_norm": 0.8129306072891198, - "learning_rate": 8.94870975496903e-06, - "loss": 0.1803, - "step": 10767 - }, - { - "epoch": 0.55, - "grad_norm": 3.5239210463731796, - "learning_rate": 8.94707192713645e-06, - "loss": 0.1723, - "step": 10768 - }, - { - "epoch": 0.55, - "grad_norm": 0.9476611032628168, - "learning_rate": 8.945434127864608e-06, - "loss": 0.1719, - "step": 10769 - }, - { - "epoch": 0.55, - "grad_norm": 0.7272317358149221, - "learning_rate": 8.94379635719792e-06, - "loss": 0.1572, - "step": 10770 - }, - { - "epoch": 0.55, - "grad_norm": 4.484549886429762, - "learning_rate": 8.942158615180818e-06, - "loss": 0.1828, - "step": 10771 - }, - { - "epoch": 0.55, - "grad_norm": 1.1519687675896289, - "learning_rate": 8.940520901857722e-06, - "loss": 0.1835, - "step": 10772 - }, - { - "epoch": 0.55, - "grad_norm": 0.9810758635443829, - "learning_rate": 8.93888321727306e-06, - "loss": 0.1785, - "step": 10773 - }, - { - "epoch": 0.55, - "grad_norm": 0.7676471358143412, - "learning_rate": 8.937245561471247e-06, - "loss": 0.1815, - "step": 10774 - }, - { - "epoch": 0.55, - "grad_norm": 1.3673114707940612, - "learning_rate": 8.935607934496708e-06, - "loss": 0.1937, - "step": 10775 - }, - { - "epoch": 0.55, - "grad_norm": 1.047631526141729, - "learning_rate": 8.933970336393864e-06, - "loss": 0.1663, - "step": 10776 - }, - { - "epoch": 0.55, - "grad_norm": 1.0940264402701274, - "learning_rate": 8.932332767207138e-06, - "loss": 0.1758, - "step": 10777 - }, - { - "epoch": 0.55, - "grad_norm": 0.7164913330577038, - "learning_rate": 8.930695226980942e-06, - "loss": 0.1674, - "step": 10778 - }, - { - "epoch": 0.55, - "grad_norm": 1.2698096854172805, - "learning_rate": 8.929057715759703e-06, - "loss": 0.1714, - "step": 10779 - }, - { - "epoch": 0.55, - "grad_norm": 0.939943834510151, - "learning_rate": 8.92742023358783e-06, - "loss": 0.1808, - "step": 10780 - }, - { - "epoch": 0.55, - "grad_norm": 5.2860861831418395, - "learning_rate": 8.925782780509745e-06, - "loss": 0.1863, - "step": 10781 - }, - { - "epoch": 0.55, - "grad_norm": 3.6427009088574174, - "learning_rate": 8.92414535656986e-06, - "loss": 0.1753, - "step": 10782 - }, - { - "epoch": 0.55, - "grad_norm": 1.307681135247615, - "learning_rate": 8.922507961812599e-06, - "loss": 0.1795, - "step": 10783 - }, - { - "epoch": 0.55, - "grad_norm": 1.0184505253136333, - "learning_rate": 8.920870596282368e-06, - "loss": 0.1885, - "step": 10784 - }, - { - "epoch": 0.55, - "grad_norm": 1.040277807833205, - "learning_rate": 8.91923326002358e-06, - "loss": 0.1727, - "step": 10785 - }, - { - "epoch": 0.55, - "grad_norm": 1.0954583122249455, - "learning_rate": 8.917595953080656e-06, - "loss": 0.1815, - "step": 10786 - }, - { - "epoch": 0.55, - "grad_norm": 0.9721497354157779, - "learning_rate": 8.915958675497997e-06, - "loss": 0.1791, - "step": 10787 - }, - { - "epoch": 0.55, - "grad_norm": 0.9360953372361804, - "learning_rate": 8.914321427320024e-06, - "loss": 0.1773, - "step": 10788 - }, - { - "epoch": 0.55, - "grad_norm": 1.4024322278896943, - "learning_rate": 8.91268420859114e-06, - "loss": 0.1794, - "step": 10789 - }, - { - "epoch": 0.55, - "grad_norm": 0.9425720861101381, - "learning_rate": 8.911047019355763e-06, - "loss": 0.1712, - "step": 10790 - }, - { - "epoch": 0.55, - "grad_norm": 1.639149419515541, - "learning_rate": 8.909409859658293e-06, - "loss": 0.1579, - "step": 10791 - }, - { - "epoch": 0.55, - "grad_norm": 1.0818029150587538, - "learning_rate": 8.907772729543145e-06, - "loss": 0.2021, - "step": 10792 - }, - { - "epoch": 0.55, - "grad_norm": 1.409368473961371, - "learning_rate": 8.90613562905472e-06, - "loss": 0.17, - "step": 10793 - }, - { - "epoch": 0.55, - "grad_norm": 0.9131587173741051, - "learning_rate": 8.904498558237434e-06, - "loss": 0.1664, - "step": 10794 - }, - { - "epoch": 0.55, - "grad_norm": 0.9603316547360553, - "learning_rate": 8.902861517135682e-06, - "loss": 0.1652, - "step": 10795 - }, - { - "epoch": 0.55, - "grad_norm": 1.1837257062991657, - "learning_rate": 8.901224505793876e-06, - "loss": 0.1775, - "step": 10796 - }, - { - "epoch": 0.55, - "grad_norm": 0.8485619861393137, - "learning_rate": 8.899587524256414e-06, - "loss": 0.1721, - "step": 10797 - }, - { - "epoch": 0.55, - "grad_norm": 1.0226003384133822, - "learning_rate": 8.897950572567704e-06, - "loss": 0.1849, - "step": 10798 - }, - { - "epoch": 0.55, - "grad_norm": 0.9032907278532869, - "learning_rate": 8.896313650772144e-06, - "loss": 0.1971, - "step": 10799 - }, - { - "epoch": 0.55, - "grad_norm": 1.1700811482162825, - "learning_rate": 8.894676758914145e-06, - "loss": 0.1821, - "step": 10800 - }, - { - "epoch": 0.55, - "grad_norm": 1.3275915038076012, - "learning_rate": 8.893039897038097e-06, - "loss": 0.2052, - "step": 10801 - }, - { - "epoch": 0.55, - "grad_norm": 1.1586253791165513, - "learning_rate": 8.891403065188404e-06, - "loss": 0.1714, - "step": 10802 - }, - { - "epoch": 0.55, - "grad_norm": 1.0172789986237956, - "learning_rate": 8.88976626340947e-06, - "loss": 0.1748, - "step": 10803 - }, - { - "epoch": 0.55, - "grad_norm": 0.9277282769430485, - "learning_rate": 8.888129491745684e-06, - "loss": 0.1624, - "step": 10804 - }, - { - "epoch": 0.55, - "grad_norm": 1.7708823030251715, - "learning_rate": 8.88649275024145e-06, - "loss": 0.2086, - "step": 10805 - }, - { - "epoch": 0.55, - "grad_norm": 1.458422843658293, - "learning_rate": 8.88485603894116e-06, - "loss": 0.2087, - "step": 10806 - }, - { - "epoch": 0.55, - "grad_norm": 3.906221351091835, - "learning_rate": 8.883219357889218e-06, - "loss": 0.1811, - "step": 10807 - }, - { - "epoch": 0.55, - "grad_norm": 0.9681472452182767, - "learning_rate": 8.88158270713001e-06, - "loss": 0.1948, - "step": 10808 - }, - { - "epoch": 0.55, - "grad_norm": 1.0084461542273477, - "learning_rate": 8.87994608670794e-06, - "loss": 0.1857, - "step": 10809 - }, - { - "epoch": 0.55, - "grad_norm": 1.0272427474370724, - "learning_rate": 8.87830949666739e-06, - "loss": 0.1802, - "step": 10810 - }, - { - "epoch": 0.55, - "grad_norm": 1.0135686742121741, - "learning_rate": 8.87667293705276e-06, - "loss": 0.2147, - "step": 10811 - }, - { - "epoch": 0.55, - "grad_norm": 0.9696081518257766, - "learning_rate": 8.875036407908439e-06, - "loss": 0.1948, - "step": 10812 - }, - { - "epoch": 0.55, - "grad_norm": 1.0369553923642154, - "learning_rate": 8.873399909278821e-06, - "loss": 0.2007, - "step": 10813 - }, - { - "epoch": 0.55, - "grad_norm": 1.083568254291667, - "learning_rate": 8.871763441208292e-06, - "loss": 0.1672, - "step": 10814 - }, - { - "epoch": 0.55, - "grad_norm": 0.9319992047044777, - "learning_rate": 8.870127003741245e-06, - "loss": 0.2072, - "step": 10815 - }, - { - "epoch": 0.55, - "grad_norm": 1.0894873647954664, - "learning_rate": 8.868490596922064e-06, - "loss": 0.1849, - "step": 10816 - }, - { - "epoch": 0.55, - "grad_norm": 0.8786214420077622, - "learning_rate": 8.866854220795144e-06, - "loss": 0.1663, - "step": 10817 - }, - { - "epoch": 0.55, - "grad_norm": 0.7965908011134126, - "learning_rate": 8.865217875404864e-06, - "loss": 0.1566, - "step": 10818 - }, - { - "epoch": 0.55, - "grad_norm": 1.122771993981457, - "learning_rate": 8.863581560795614e-06, - "loss": 0.1663, - "step": 10819 - }, - { - "epoch": 0.55, - "grad_norm": 1.5355491727641055, - "learning_rate": 8.861945277011782e-06, - "loss": 0.1811, - "step": 10820 - }, - { - "epoch": 0.55, - "grad_norm": 1.0149352880199478, - "learning_rate": 8.860309024097744e-06, - "loss": 0.1896, - "step": 10821 - }, - { - "epoch": 0.55, - "grad_norm": 1.15441128359665, - "learning_rate": 8.85867280209789e-06, - "loss": 0.19, - "step": 10822 - }, - { - "epoch": 0.55, - "grad_norm": 1.0242201683658139, - "learning_rate": 8.857036611056599e-06, - "loss": 0.1854, - "step": 10823 - }, - { - "epoch": 0.55, - "grad_norm": 1.0740247347675482, - "learning_rate": 8.855400451018256e-06, - "loss": 0.2088, - "step": 10824 - }, - { - "epoch": 0.55, - "grad_norm": 1.7181041405240665, - "learning_rate": 8.853764322027239e-06, - "loss": 0.1843, - "step": 10825 - }, - { - "epoch": 0.55, - "grad_norm": 1.096691445469932, - "learning_rate": 8.852128224127931e-06, - "loss": 0.1879, - "step": 10826 - }, - { - "epoch": 0.55, - "grad_norm": 1.0328893655865132, - "learning_rate": 8.850492157364709e-06, - "loss": 0.1928, - "step": 10827 - }, - { - "epoch": 0.55, - "grad_norm": 1.1989050714425344, - "learning_rate": 8.848856121781953e-06, - "loss": 0.1722, - "step": 10828 - }, - { - "epoch": 0.55, - "grad_norm": 0.9972744783098172, - "learning_rate": 8.847220117424035e-06, - "loss": 0.1822, - "step": 10829 - }, - { - "epoch": 0.55, - "grad_norm": 1.022753486245912, - "learning_rate": 8.845584144335344e-06, - "loss": 0.1798, - "step": 10830 - }, - { - "epoch": 0.55, - "grad_norm": 1.1575173533564185, - "learning_rate": 8.843948202560243e-06, - "loss": 0.1672, - "step": 10831 - }, - { - "epoch": 0.55, - "grad_norm": 1.1022591065507295, - "learning_rate": 8.842312292143115e-06, - "loss": 0.1953, - "step": 10832 - }, - { - "epoch": 0.55, - "grad_norm": 1.074404331661858, - "learning_rate": 8.840676413128329e-06, - "loss": 0.2035, - "step": 10833 - }, - { - "epoch": 0.55, - "grad_norm": 0.9233624295011634, - "learning_rate": 8.839040565560264e-06, - "loss": 0.178, - "step": 10834 - }, - { - "epoch": 0.55, - "grad_norm": 1.1496136891813986, - "learning_rate": 8.837404749483285e-06, - "loss": 0.1718, - "step": 10835 - }, - { - "epoch": 0.55, - "grad_norm": 0.9570721283259703, - "learning_rate": 8.835768964941773e-06, - "loss": 0.194, - "step": 10836 - }, - { - "epoch": 0.55, - "grad_norm": 1.000049498068047, - "learning_rate": 8.834133211980091e-06, - "loss": 0.1732, - "step": 10837 - }, - { - "epoch": 0.55, - "grad_norm": 0.9731055950131926, - "learning_rate": 8.83249749064261e-06, - "loss": 0.1644, - "step": 10838 - }, - { - "epoch": 0.55, - "grad_norm": 0.8763466169368689, - "learning_rate": 8.830861800973705e-06, - "loss": 0.1772, - "step": 10839 - }, - { - "epoch": 0.55, - "grad_norm": 0.9514424877296412, - "learning_rate": 8.829226143017735e-06, - "loss": 0.1578, - "step": 10840 - }, - { - "epoch": 0.55, - "grad_norm": 1.072765787949337, - "learning_rate": 8.827590516819073e-06, - "loss": 0.1659, - "step": 10841 - }, - { - "epoch": 0.55, - "grad_norm": 1.076068814527915, - "learning_rate": 8.825954922422084e-06, - "loss": 0.1831, - "step": 10842 - }, - { - "epoch": 0.55, - "grad_norm": 1.0256703193394343, - "learning_rate": 8.824319359871138e-06, - "loss": 0.1759, - "step": 10843 - }, - { - "epoch": 0.55, - "grad_norm": 0.9485220998311064, - "learning_rate": 8.822683829210591e-06, - "loss": 0.1959, - "step": 10844 - }, - { - "epoch": 0.55, - "grad_norm": 1.8247569049585812, - "learning_rate": 8.821048330484814e-06, - "loss": 0.1611, - "step": 10845 - }, - { - "epoch": 0.55, - "grad_norm": 0.9920482972212759, - "learning_rate": 8.819412863738165e-06, - "loss": 0.1807, - "step": 10846 - }, - { - "epoch": 0.55, - "grad_norm": 1.1803199349485702, - "learning_rate": 8.817777429015013e-06, - "loss": 0.1949, - "step": 10847 - }, - { - "epoch": 0.55, - "grad_norm": 1.8361111010536968, - "learning_rate": 8.816142026359711e-06, - "loss": 0.1904, - "step": 10848 - }, - { - "epoch": 0.55, - "grad_norm": 1.0204752253006832, - "learning_rate": 8.814506655816628e-06, - "loss": 0.2107, - "step": 10849 - }, - { - "epoch": 0.55, - "grad_norm": 0.8683096109613782, - "learning_rate": 8.812871317430114e-06, - "loss": 0.1856, - "step": 10850 - }, - { - "epoch": 0.55, - "grad_norm": 1.1192019964063356, - "learning_rate": 8.811236011244535e-06, - "loss": 0.1803, - "step": 10851 - }, - { - "epoch": 0.55, - "grad_norm": 0.8915818411611265, - "learning_rate": 8.809600737304245e-06, - "loss": 0.1713, - "step": 10852 - }, - { - "epoch": 0.55, - "grad_norm": 1.2922478281929404, - "learning_rate": 8.807965495653605e-06, - "loss": 0.1756, - "step": 10853 - }, - { - "epoch": 0.55, - "grad_norm": 1.1438370119106243, - "learning_rate": 8.806330286336966e-06, - "loss": 0.1737, - "step": 10854 - }, - { - "epoch": 0.55, - "grad_norm": 0.9674863155197395, - "learning_rate": 8.804695109398686e-06, - "loss": 0.1739, - "step": 10855 - }, - { - "epoch": 0.55, - "grad_norm": 0.8716670976435485, - "learning_rate": 8.803059964883121e-06, - "loss": 0.185, - "step": 10856 - }, - { - "epoch": 0.55, - "grad_norm": 0.8751877818085911, - "learning_rate": 8.801424852834617e-06, - "loss": 0.1864, - "step": 10857 - }, - { - "epoch": 0.55, - "grad_norm": 1.2810384916662814, - "learning_rate": 8.799789773297536e-06, - "loss": 0.2092, - "step": 10858 - }, - { - "epoch": 0.55, - "grad_norm": 1.715194154870013, - "learning_rate": 8.79815472631622e-06, - "loss": 0.1817, - "step": 10859 - }, - { - "epoch": 0.55, - "grad_norm": 0.957211967304257, - "learning_rate": 8.796519711935032e-06, - "loss": 0.1881, - "step": 10860 - }, - { - "epoch": 0.55, - "grad_norm": 1.204919477838909, - "learning_rate": 8.794884730198309e-06, - "loss": 0.1823, - "step": 10861 - }, - { - "epoch": 0.55, - "grad_norm": 1.1796530871354738, - "learning_rate": 8.793249781150411e-06, - "loss": 0.216, - "step": 10862 - }, - { - "epoch": 0.55, - "grad_norm": 1.4713416941020465, - "learning_rate": 8.791614864835676e-06, - "loss": 0.1788, - "step": 10863 - }, - { - "epoch": 0.55, - "grad_norm": 0.9662566873754121, - "learning_rate": 8.789979981298457e-06, - "loss": 0.1884, - "step": 10864 - }, - { - "epoch": 0.55, - "grad_norm": 1.356337540589256, - "learning_rate": 8.788345130583099e-06, - "loss": 0.1663, - "step": 10865 - }, - { - "epoch": 0.55, - "grad_norm": 1.7815317896632867, - "learning_rate": 8.786710312733952e-06, - "loss": 0.1656, - "step": 10866 - }, - { - "epoch": 0.55, - "grad_norm": 1.3871753218471037, - "learning_rate": 8.78507552779535e-06, - "loss": 0.1613, - "step": 10867 - }, - { - "epoch": 0.55, - "grad_norm": 1.5491548784204685, - "learning_rate": 8.783440775811646e-06, - "loss": 0.1772, - "step": 10868 - }, - { - "epoch": 0.55, - "grad_norm": 1.3689465680987378, - "learning_rate": 8.781806056827179e-06, - "loss": 0.1688, - "step": 10869 - }, - { - "epoch": 0.55, - "grad_norm": 1.2208364624605414, - "learning_rate": 8.780171370886295e-06, - "loss": 0.1632, - "step": 10870 - }, - { - "epoch": 0.55, - "grad_norm": 1.1374577018670873, - "learning_rate": 8.778536718033329e-06, - "loss": 0.2031, - "step": 10871 - }, - { - "epoch": 0.55, - "grad_norm": 1.064865021837396, - "learning_rate": 8.776902098312622e-06, - "loss": 0.1848, - "step": 10872 - }, - { - "epoch": 0.55, - "grad_norm": 1.1104820656639132, - "learning_rate": 8.775267511768518e-06, - "loss": 0.1866, - "step": 10873 - }, - { - "epoch": 0.55, - "grad_norm": 0.9836540178033396, - "learning_rate": 8.773632958445348e-06, - "loss": 0.182, - "step": 10874 - }, - { - "epoch": 0.55, - "grad_norm": 0.7715029190728462, - "learning_rate": 8.771998438387458e-06, - "loss": 0.1629, - "step": 10875 - }, - { - "epoch": 0.55, - "grad_norm": 0.9027018301738213, - "learning_rate": 8.770363951639175e-06, - "loss": 0.1903, - "step": 10876 - }, - { - "epoch": 0.55, - "grad_norm": 1.0277633203112935, - "learning_rate": 8.768729498244841e-06, - "loss": 0.1688, - "step": 10877 - }, - { - "epoch": 0.55, - "grad_norm": 1.2082709822081161, - "learning_rate": 8.767095078248788e-06, - "loss": 0.1884, - "step": 10878 - }, - { - "epoch": 0.55, - "grad_norm": 1.4968536355503077, - "learning_rate": 8.765460691695353e-06, - "loss": 0.1846, - "step": 10879 - }, - { - "epoch": 0.55, - "grad_norm": 0.8723340832563421, - "learning_rate": 8.763826338628865e-06, - "loss": 0.1659, - "step": 10880 - }, - { - "epoch": 0.55, - "grad_norm": 1.0983506797438705, - "learning_rate": 8.762192019093658e-06, - "loss": 0.1557, - "step": 10881 - }, - { - "epoch": 0.55, - "grad_norm": 2.665668563682617, - "learning_rate": 8.76055773313406e-06, - "loss": 0.1948, - "step": 10882 - }, - { - "epoch": 0.55, - "grad_norm": 1.1360990318938495, - "learning_rate": 8.758923480794407e-06, - "loss": 0.1858, - "step": 10883 - }, - { - "epoch": 0.55, - "grad_norm": 1.0668625114928212, - "learning_rate": 8.757289262119021e-06, - "loss": 0.1843, - "step": 10884 - }, - { - "epoch": 0.55, - "grad_norm": 1.5359141009129476, - "learning_rate": 8.755655077152237e-06, - "loss": 0.1967, - "step": 10885 - }, - { - "epoch": 0.55, - "grad_norm": 1.0503386454467547, - "learning_rate": 8.754020925938375e-06, - "loss": 0.1732, - "step": 10886 - }, - { - "epoch": 0.55, - "grad_norm": 1.0101469154598328, - "learning_rate": 8.752386808521774e-06, - "loss": 0.1775, - "step": 10887 - }, - { - "epoch": 0.55, - "grad_norm": 1.1936552339879511, - "learning_rate": 8.750752724946744e-06, - "loss": 0.1859, - "step": 10888 - }, - { - "epoch": 0.55, - "grad_norm": 1.083771152185385, - "learning_rate": 8.749118675257622e-06, - "loss": 0.1906, - "step": 10889 - }, - { - "epoch": 0.55, - "grad_norm": 1.1874420465891897, - "learning_rate": 8.747484659498725e-06, - "loss": 0.1761, - "step": 10890 - }, - { - "epoch": 0.55, - "grad_norm": 0.9178432426616299, - "learning_rate": 8.745850677714373e-06, - "loss": 0.1913, - "step": 10891 - }, - { - "epoch": 0.55, - "grad_norm": 0.9437458995593613, - "learning_rate": 8.744216729948898e-06, - "loss": 0.1849, - "step": 10892 - }, - { - "epoch": 0.55, - "grad_norm": 1.0195680018807618, - "learning_rate": 8.74258281624661e-06, - "loss": 0.1661, - "step": 10893 - }, - { - "epoch": 0.55, - "grad_norm": 0.9040885233318499, - "learning_rate": 8.740948936651838e-06, - "loss": 0.1879, - "step": 10894 - }, - { - "epoch": 0.55, - "grad_norm": 1.2327915094808635, - "learning_rate": 8.739315091208893e-06, - "loss": 0.1977, - "step": 10895 - }, - { - "epoch": 0.55, - "grad_norm": 1.2229661061013206, - "learning_rate": 8.737681279962103e-06, - "loss": 0.1607, - "step": 10896 - }, - { - "epoch": 0.55, - "grad_norm": 1.8743363590350692, - "learning_rate": 8.736047502955776e-06, - "loss": 0.1775, - "step": 10897 - }, - { - "epoch": 0.55, - "grad_norm": 1.1094227460806871, - "learning_rate": 8.734413760234233e-06, - "loss": 0.1764, - "step": 10898 - }, - { - "epoch": 0.55, - "grad_norm": 1.3084970668744595, - "learning_rate": 8.732780051841785e-06, - "loss": 0.1815, - "step": 10899 - }, - { - "epoch": 0.55, - "grad_norm": 1.6092401166985617, - "learning_rate": 8.731146377822755e-06, - "loss": 0.1936, - "step": 10900 - }, - { - "epoch": 0.55, - "grad_norm": 1.0475620352949029, - "learning_rate": 8.729512738221448e-06, - "loss": 0.2011, - "step": 10901 - }, - { - "epoch": 0.55, - "grad_norm": 0.9645061349451889, - "learning_rate": 8.72787913308218e-06, - "loss": 0.1767, - "step": 10902 - }, - { - "epoch": 0.55, - "grad_norm": 1.0792427949313164, - "learning_rate": 8.726245562449261e-06, - "loss": 0.1862, - "step": 10903 - }, - { - "epoch": 0.55, - "grad_norm": 1.1064164071055667, - "learning_rate": 8.724612026367008e-06, - "loss": 0.188, - "step": 10904 - }, - { - "epoch": 0.55, - "grad_norm": 2.2077951842578907, - "learning_rate": 8.72297852487972e-06, - "loss": 0.2033, - "step": 10905 - }, - { - "epoch": 0.55, - "grad_norm": 0.9631462825847468, - "learning_rate": 8.721345058031718e-06, - "loss": 0.1773, - "step": 10906 - }, - { - "epoch": 0.55, - "grad_norm": 1.0126229462231389, - "learning_rate": 8.7197116258673e-06, - "loss": 0.1764, - "step": 10907 - }, - { - "epoch": 0.55, - "grad_norm": 1.111625593594014, - "learning_rate": 8.718078228430775e-06, - "loss": 0.1739, - "step": 10908 - }, - { - "epoch": 0.55, - "grad_norm": 1.136754878674941, - "learning_rate": 8.716444865766454e-06, - "loss": 0.1699, - "step": 10909 - }, - { - "epoch": 0.55, - "grad_norm": 3.0117881332904743, - "learning_rate": 8.714811537918634e-06, - "loss": 0.2098, - "step": 10910 - }, - { - "epoch": 0.55, - "grad_norm": 0.9200787097045166, - "learning_rate": 8.713178244931626e-06, - "loss": 0.1763, - "step": 10911 - }, - { - "epoch": 0.55, - "grad_norm": 1.1680809124305545, - "learning_rate": 8.71154498684973e-06, - "loss": 0.1767, - "step": 10912 - }, - { - "epoch": 0.55, - "grad_norm": 0.994348721844077, - "learning_rate": 8.709911763717251e-06, - "loss": 0.1888, - "step": 10913 - }, - { - "epoch": 0.55, - "grad_norm": 0.848085721687801, - "learning_rate": 8.708278575578485e-06, - "loss": 0.158, - "step": 10914 - }, - { - "epoch": 0.56, - "grad_norm": 1.2588476989472734, - "learning_rate": 8.706645422477739e-06, - "loss": 0.1978, - "step": 10915 - }, - { - "epoch": 0.56, - "grad_norm": 1.8154000665521712, - "learning_rate": 8.705012304459306e-06, - "loss": 0.1759, - "step": 10916 - }, - { - "epoch": 0.56, - "grad_norm": 1.0771918217834016, - "learning_rate": 8.703379221567491e-06, - "loss": 0.1771, - "step": 10917 - }, - { - "epoch": 0.56, - "grad_norm": 1.1485676136008025, - "learning_rate": 8.701746173846583e-06, - "loss": 0.1665, - "step": 10918 - }, - { - "epoch": 0.56, - "grad_norm": 1.4594060433552434, - "learning_rate": 8.70011316134089e-06, - "loss": 0.2013, - "step": 10919 - }, - { - "epoch": 0.56, - "grad_norm": 1.1035150344192732, - "learning_rate": 8.698480184094695e-06, - "loss": 0.1896, - "step": 10920 - }, - { - "epoch": 0.56, - "grad_norm": 0.79687252374784, - "learning_rate": 8.696847242152301e-06, - "loss": 0.1588, - "step": 10921 - }, - { - "epoch": 0.56, - "grad_norm": 1.8751652874434868, - "learning_rate": 8.695214335557997e-06, - "loss": 0.1703, - "step": 10922 - }, - { - "epoch": 0.56, - "grad_norm": 2.283108327990949, - "learning_rate": 8.693581464356083e-06, - "loss": 0.1891, - "step": 10923 - }, - { - "epoch": 0.56, - "grad_norm": 1.30940533827719, - "learning_rate": 8.691948628590841e-06, - "loss": 0.1774, - "step": 10924 - }, - { - "epoch": 0.56, - "grad_norm": 1.2612497366281696, - "learning_rate": 8.690315828306566e-06, - "loss": 0.1899, - "step": 10925 - }, - { - "epoch": 0.56, - "grad_norm": 1.1632980454401722, - "learning_rate": 8.688683063547551e-06, - "loss": 0.1785, - "step": 10926 - }, - { - "epoch": 0.56, - "grad_norm": 1.2054669236060747, - "learning_rate": 8.68705033435808e-06, - "loss": 0.18, - "step": 10927 - }, - { - "epoch": 0.56, - "grad_norm": 0.8880601277908344, - "learning_rate": 8.685417640782444e-06, - "loss": 0.1753, - "step": 10928 - }, - { - "epoch": 0.56, - "grad_norm": 1.2596056413723435, - "learning_rate": 8.683784982864925e-06, - "loss": 0.1873, - "step": 10929 - }, - { - "epoch": 0.56, - "grad_norm": 1.0722207155737384, - "learning_rate": 8.682152360649819e-06, - "loss": 0.174, - "step": 10930 - }, - { - "epoch": 0.56, - "grad_norm": 1.0243942383285416, - "learning_rate": 8.6805197741814e-06, - "loss": 0.1703, - "step": 10931 - }, - { - "epoch": 0.56, - "grad_norm": 1.04686861248303, - "learning_rate": 8.678887223503962e-06, - "loss": 0.1897, - "step": 10932 - }, - { - "epoch": 0.56, - "grad_norm": 0.99105527675413, - "learning_rate": 8.677254708661775e-06, - "loss": 0.1561, - "step": 10933 - }, - { - "epoch": 0.56, - "grad_norm": 1.2436201373640272, - "learning_rate": 8.675622229699134e-06, - "loss": 0.1735, - "step": 10934 - }, - { - "epoch": 0.56, - "grad_norm": 1.0608032324133017, - "learning_rate": 8.67398978666031e-06, - "loss": 0.1764, - "step": 10935 - }, - { - "epoch": 0.56, - "grad_norm": 1.0845256537351942, - "learning_rate": 8.672357379589595e-06, - "loss": 0.1746, - "step": 10936 - }, - { - "epoch": 0.56, - "grad_norm": 1.1726740536790237, - "learning_rate": 8.670725008531255e-06, - "loss": 0.203, - "step": 10937 - }, - { - "epoch": 0.56, - "grad_norm": 0.8982383138239131, - "learning_rate": 8.669092673529577e-06, - "loss": 0.1936, - "step": 10938 - }, - { - "epoch": 0.56, - "grad_norm": 1.1467761252995048, - "learning_rate": 8.667460374628834e-06, - "loss": 0.2081, - "step": 10939 - }, - { - "epoch": 0.56, - "grad_norm": 1.279402231030666, - "learning_rate": 8.665828111873307e-06, - "loss": 0.1743, - "step": 10940 - }, - { - "epoch": 0.56, - "grad_norm": 1.3727098270038447, - "learning_rate": 8.664195885307265e-06, - "loss": 0.1823, - "step": 10941 - }, - { - "epoch": 0.56, - "grad_norm": 1.6401924818960485, - "learning_rate": 8.662563694974983e-06, - "loss": 0.1907, - "step": 10942 - }, - { - "epoch": 0.56, - "grad_norm": 1.123455051565522, - "learning_rate": 8.660931540920743e-06, - "loss": 0.1908, - "step": 10943 - }, - { - "epoch": 0.56, - "grad_norm": 0.838274020948395, - "learning_rate": 8.659299423188803e-06, - "loss": 0.1799, - "step": 10944 - }, - { - "epoch": 0.56, - "grad_norm": 1.0452364543364976, - "learning_rate": 8.657667341823449e-06, - "loss": 0.1792, - "step": 10945 - }, - { - "epoch": 0.56, - "grad_norm": 0.8190999827722713, - "learning_rate": 8.656035296868938e-06, - "loss": 0.1693, - "step": 10946 - }, - { - "epoch": 0.56, - "grad_norm": 1.4522244007392222, - "learning_rate": 8.654403288369548e-06, - "loss": 0.166, - "step": 10947 - }, - { - "epoch": 0.56, - "grad_norm": 1.3287989010272494, - "learning_rate": 8.652771316369544e-06, - "loss": 0.1847, - "step": 10948 - }, - { - "epoch": 0.56, - "grad_norm": 1.0890163794301415, - "learning_rate": 8.651139380913197e-06, - "loss": 0.1977, - "step": 10949 - }, - { - "epoch": 0.56, - "grad_norm": 1.0666571494962853, - "learning_rate": 8.649507482044766e-06, - "loss": 0.1751, - "step": 10950 - }, - { - "epoch": 0.56, - "grad_norm": 0.8103740471153004, - "learning_rate": 8.647875619808523e-06, - "loss": 0.1765, - "step": 10951 - }, - { - "epoch": 0.56, - "grad_norm": 1.0313607099358908, - "learning_rate": 8.64624379424873e-06, - "loss": 0.1665, - "step": 10952 - }, - { - "epoch": 0.56, - "grad_norm": 1.0468097800952842, - "learning_rate": 8.644612005409654e-06, - "loss": 0.1795, - "step": 10953 - }, - { - "epoch": 0.56, - "grad_norm": 1.1057397636613135, - "learning_rate": 8.642980253335547e-06, - "loss": 0.2016, - "step": 10954 - }, - { - "epoch": 0.56, - "grad_norm": 0.8451430566992679, - "learning_rate": 8.641348538070683e-06, - "loss": 0.175, - "step": 10955 - }, - { - "epoch": 0.56, - "grad_norm": 0.7806195927151729, - "learning_rate": 8.639716859659312e-06, - "loss": 0.1901, - "step": 10956 - }, - { - "epoch": 0.56, - "grad_norm": 0.8798299824358545, - "learning_rate": 8.638085218145704e-06, - "loss": 0.1769, - "step": 10957 - }, - { - "epoch": 0.56, - "grad_norm": 1.5650750606191544, - "learning_rate": 8.63645361357411e-06, - "loss": 0.1877, - "step": 10958 - }, - { - "epoch": 0.56, - "grad_norm": 1.1279481372195475, - "learning_rate": 8.634822045988784e-06, - "loss": 0.1831, - "step": 10959 - }, - { - "epoch": 0.56, - "grad_norm": 1.2830148866132574, - "learning_rate": 8.633190515433992e-06, - "loss": 0.1901, - "step": 10960 - }, - { - "epoch": 0.56, - "grad_norm": 1.2855953567844727, - "learning_rate": 8.63155902195398e-06, - "loss": 0.1865, - "step": 10961 - }, - { - "epoch": 0.56, - "grad_norm": 0.8777387129501347, - "learning_rate": 8.62992756559301e-06, - "loss": 0.1688, - "step": 10962 - }, - { - "epoch": 0.56, - "grad_norm": 0.8700630299582908, - "learning_rate": 8.628296146395331e-06, - "loss": 0.1704, - "step": 10963 - }, - { - "epoch": 0.56, - "grad_norm": 1.103315427349816, - "learning_rate": 8.626664764405196e-06, - "loss": 0.1724, - "step": 10964 - }, - { - "epoch": 0.56, - "grad_norm": 1.0764734465086485, - "learning_rate": 8.625033419666856e-06, - "loss": 0.1882, - "step": 10965 - }, - { - "epoch": 0.56, - "grad_norm": 1.0634970577767235, - "learning_rate": 8.623402112224567e-06, - "loss": 0.1972, - "step": 10966 - }, - { - "epoch": 0.56, - "grad_norm": 1.0115594755172888, - "learning_rate": 8.621770842122569e-06, - "loss": 0.2047, - "step": 10967 - }, - { - "epoch": 0.56, - "grad_norm": 1.0603757146365727, - "learning_rate": 8.620139609405115e-06, - "loss": 0.1647, - "step": 10968 - }, - { - "epoch": 0.56, - "grad_norm": 1.0003635283977106, - "learning_rate": 8.618508414116452e-06, - "loss": 0.1721, - "step": 10969 - }, - { - "epoch": 0.56, - "grad_norm": 1.1566118633153082, - "learning_rate": 8.616877256300829e-06, - "loss": 0.1855, - "step": 10970 - }, - { - "epoch": 0.56, - "grad_norm": 0.8767666422857754, - "learning_rate": 8.615246136002486e-06, - "loss": 0.1422, - "step": 10971 - }, - { - "epoch": 0.56, - "grad_norm": 0.9756034260032039, - "learning_rate": 8.613615053265673e-06, - "loss": 0.1666, - "step": 10972 - }, - { - "epoch": 0.56, - "grad_norm": 1.054608091695435, - "learning_rate": 8.611984008134626e-06, - "loss": 0.2045, - "step": 10973 - }, - { - "epoch": 0.56, - "grad_norm": 1.0026363374092324, - "learning_rate": 8.610353000653592e-06, - "loss": 0.1857, - "step": 10974 - }, - { - "epoch": 0.56, - "grad_norm": 1.0371375238714557, - "learning_rate": 8.608722030866812e-06, - "loss": 0.1849, - "step": 10975 - }, - { - "epoch": 0.56, - "grad_norm": 0.9701005160752207, - "learning_rate": 8.607091098818528e-06, - "loss": 0.1735, - "step": 10976 - }, - { - "epoch": 0.56, - "grad_norm": 1.0497538225267664, - "learning_rate": 8.605460204552975e-06, - "loss": 0.1926, - "step": 10977 - }, - { - "epoch": 0.56, - "grad_norm": 0.9234896721451861, - "learning_rate": 8.60382934811439e-06, - "loss": 0.1928, - "step": 10978 - }, - { - "epoch": 0.56, - "grad_norm": 0.9119449965995741, - "learning_rate": 8.602198529547016e-06, - "loss": 0.1795, - "step": 10979 - }, - { - "epoch": 0.56, - "grad_norm": 0.98527998422671, - "learning_rate": 8.600567748895083e-06, - "loss": 0.1869, - "step": 10980 - }, - { - "epoch": 0.56, - "grad_norm": 1.1053717143578772, - "learning_rate": 8.598937006202832e-06, - "loss": 0.1964, - "step": 10981 - }, - { - "epoch": 0.56, - "grad_norm": 1.468621052173339, - "learning_rate": 8.59730630151449e-06, - "loss": 0.1734, - "step": 10982 - }, - { - "epoch": 0.56, - "grad_norm": 0.952452351528307, - "learning_rate": 8.595675634874299e-06, - "loss": 0.1841, - "step": 10983 - }, - { - "epoch": 0.56, - "grad_norm": 1.1959355986594147, - "learning_rate": 8.594045006326481e-06, - "loss": 0.194, - "step": 10984 - }, - { - "epoch": 0.56, - "grad_norm": 0.9707998389931917, - "learning_rate": 8.592414415915275e-06, - "loss": 0.1864, - "step": 10985 - }, - { - "epoch": 0.56, - "grad_norm": 1.382033789392686, - "learning_rate": 8.590783863684904e-06, - "loss": 0.2151, - "step": 10986 - }, - { - "epoch": 0.56, - "grad_norm": 1.5081807414030657, - "learning_rate": 8.589153349679602e-06, - "loss": 0.1716, - "step": 10987 - }, - { - "epoch": 0.56, - "grad_norm": 1.261638494448365, - "learning_rate": 8.587522873943595e-06, - "loss": 0.1997, - "step": 10988 - }, - { - "epoch": 0.56, - "grad_norm": 1.462716210009533, - "learning_rate": 8.585892436521113e-06, - "loss": 0.188, - "step": 10989 - }, - { - "epoch": 0.56, - "grad_norm": 1.4971491727761224, - "learning_rate": 8.584262037456374e-06, - "loss": 0.1831, - "step": 10990 - }, - { - "epoch": 0.56, - "grad_norm": 0.9349659575444129, - "learning_rate": 8.582631676793609e-06, - "loss": 0.1834, - "step": 10991 - }, - { - "epoch": 0.56, - "grad_norm": 0.9905856958238987, - "learning_rate": 8.581001354577037e-06, - "loss": 0.1975, - "step": 10992 - }, - { - "epoch": 0.56, - "grad_norm": 0.9028068531467937, - "learning_rate": 8.57937107085089e-06, - "loss": 0.1784, - "step": 10993 - }, - { - "epoch": 0.56, - "grad_norm": 0.9150736772460405, - "learning_rate": 8.577740825659379e-06, - "loss": 0.1729, - "step": 10994 - }, - { - "epoch": 0.56, - "grad_norm": 0.9936047580820794, - "learning_rate": 8.576110619046726e-06, - "loss": 0.2087, - "step": 10995 - }, - { - "epoch": 0.56, - "grad_norm": 1.2040788857862377, - "learning_rate": 8.574480451057158e-06, - "loss": 0.1729, - "step": 10996 - }, - { - "epoch": 0.56, - "grad_norm": 0.8647923487542004, - "learning_rate": 8.572850321734884e-06, - "loss": 0.1611, - "step": 10997 - }, - { - "epoch": 0.56, - "grad_norm": 1.0258661585027546, - "learning_rate": 8.571220231124129e-06, - "loss": 0.1889, - "step": 10998 - }, - { - "epoch": 0.56, - "grad_norm": 0.9404572152675427, - "learning_rate": 8.569590179269103e-06, - "loss": 0.1859, - "step": 10999 - }, - { - "epoch": 0.56, - "grad_norm": 1.3241112379396596, - "learning_rate": 8.567960166214026e-06, - "loss": 0.1805, - "step": 11000 - }, - { - "epoch": 0.56, - "grad_norm": 1.2502395125362873, - "learning_rate": 8.566330192003108e-06, - "loss": 0.1694, - "step": 11001 - }, - { - "epoch": 0.56, - "grad_norm": 1.3002683332018317, - "learning_rate": 8.564700256680568e-06, - "loss": 0.168, - "step": 11002 - }, - { - "epoch": 0.56, - "grad_norm": 1.159540772474298, - "learning_rate": 8.563070360290611e-06, - "loss": 0.1874, - "step": 11003 - }, - { - "epoch": 0.56, - "grad_norm": 0.9030555139116054, - "learning_rate": 8.561440502877454e-06, - "loss": 0.1761, - "step": 11004 - }, - { - "epoch": 0.56, - "grad_norm": 0.89022901095595, - "learning_rate": 8.559810684485302e-06, - "loss": 0.1863, - "step": 11005 - }, - { - "epoch": 0.56, - "grad_norm": 1.2859747477095844, - "learning_rate": 8.558180905158372e-06, - "loss": 0.1866, - "step": 11006 - }, - { - "epoch": 0.56, - "grad_norm": 2.1070866647981865, - "learning_rate": 8.55655116494086e-06, - "loss": 0.2022, - "step": 11007 - }, - { - "epoch": 0.56, - "grad_norm": 1.0914719993337787, - "learning_rate": 8.554921463876983e-06, - "loss": 0.1655, - "step": 11008 - }, - { - "epoch": 0.56, - "grad_norm": 1.2357096687659934, - "learning_rate": 8.55329180201094e-06, - "loss": 0.203, - "step": 11009 - }, - { - "epoch": 0.56, - "grad_norm": 1.1989549010584921, - "learning_rate": 8.551662179386944e-06, - "loss": 0.1722, - "step": 11010 - }, - { - "epoch": 0.56, - "grad_norm": 2.088420664118368, - "learning_rate": 8.55003259604919e-06, - "loss": 0.1696, - "step": 11011 - }, - { - "epoch": 0.56, - "grad_norm": 1.0460955577335986, - "learning_rate": 8.54840305204188e-06, - "loss": 0.1639, - "step": 11012 - }, - { - "epoch": 0.56, - "grad_norm": 0.8399521974271892, - "learning_rate": 8.546773547409227e-06, - "loss": 0.168, - "step": 11013 - }, - { - "epoch": 0.56, - "grad_norm": 1.1979374294097083, - "learning_rate": 8.545144082195417e-06, - "loss": 0.1817, - "step": 11014 - }, - { - "epoch": 0.56, - "grad_norm": 1.3236855265368384, - "learning_rate": 8.54351465644466e-06, - "loss": 0.1872, - "step": 11015 - }, - { - "epoch": 0.56, - "grad_norm": 2.563503817505762, - "learning_rate": 8.541885270201146e-06, - "loss": 0.1858, - "step": 11016 - }, - { - "epoch": 0.56, - "grad_norm": 0.9908741381061342, - "learning_rate": 8.540255923509081e-06, - "loss": 0.2116, - "step": 11017 - }, - { - "epoch": 0.56, - "grad_norm": 1.184467292305344, - "learning_rate": 8.538626616412651e-06, - "loss": 0.1809, - "step": 11018 - }, - { - "epoch": 0.56, - "grad_norm": 2.2502518651558443, - "learning_rate": 8.536997348956065e-06, - "loss": 0.1994, - "step": 11019 - }, - { - "epoch": 0.56, - "grad_norm": 1.1074915593033396, - "learning_rate": 8.5353681211835e-06, - "loss": 0.1774, - "step": 11020 - }, - { - "epoch": 0.56, - "grad_norm": 1.3377825391510998, - "learning_rate": 8.533738933139162e-06, - "loss": 0.2249, - "step": 11021 - }, - { - "epoch": 0.56, - "grad_norm": 0.8265730358711466, - "learning_rate": 8.532109784867235e-06, - "loss": 0.1889, - "step": 11022 - }, - { - "epoch": 0.56, - "grad_norm": 1.147024717062173, - "learning_rate": 8.530480676411919e-06, - "loss": 0.1922, - "step": 11023 - }, - { - "epoch": 0.56, - "grad_norm": 0.9714762838124623, - "learning_rate": 8.52885160781739e-06, - "loss": 0.1623, - "step": 11024 - }, - { - "epoch": 0.56, - "grad_norm": 1.4244966383242854, - "learning_rate": 8.52722257912785e-06, - "loss": 0.1986, - "step": 11025 - }, - { - "epoch": 0.56, - "grad_norm": 1.406073801878377, - "learning_rate": 8.525593590387476e-06, - "loss": 0.1853, - "step": 11026 - }, - { - "epoch": 0.56, - "grad_norm": 1.9625221752761264, - "learning_rate": 8.523964641640466e-06, - "loss": 0.1741, - "step": 11027 - }, - { - "epoch": 0.56, - "grad_norm": 1.6320938086216492, - "learning_rate": 8.522335732930996e-06, - "loss": 0.1879, - "step": 11028 - }, - { - "epoch": 0.56, - "grad_norm": 0.8104481171462272, - "learning_rate": 8.52070686430325e-06, - "loss": 0.167, - "step": 11029 - }, - { - "epoch": 0.56, - "grad_norm": 2.0081096113409695, - "learning_rate": 8.519078035801415e-06, - "loss": 0.1695, - "step": 11030 - }, - { - "epoch": 0.56, - "grad_norm": 0.8971930904927773, - "learning_rate": 8.51744924746967e-06, - "loss": 0.1832, - "step": 11031 - }, - { - "epoch": 0.56, - "grad_norm": 0.9465995537342011, - "learning_rate": 8.515820499352203e-06, - "loss": 0.1698, - "step": 11032 - }, - { - "epoch": 0.56, - "grad_norm": 1.2279426320906355, - "learning_rate": 8.514191791493183e-06, - "loss": 0.1648, - "step": 11033 - }, - { - "epoch": 0.56, - "grad_norm": 1.096622128478326, - "learning_rate": 8.512563123936796e-06, - "loss": 0.1885, - "step": 11034 - }, - { - "epoch": 0.56, - "grad_norm": 0.9072936887908979, - "learning_rate": 8.510934496727218e-06, - "loss": 0.186, - "step": 11035 - }, - { - "epoch": 0.56, - "grad_norm": 1.2281823546434798, - "learning_rate": 8.50930590990863e-06, - "loss": 0.1806, - "step": 11036 - }, - { - "epoch": 0.56, - "grad_norm": 0.7826850654708606, - "learning_rate": 8.5076773635252e-06, - "loss": 0.1985, - "step": 11037 - }, - { - "epoch": 0.56, - "grad_norm": 0.7724179175255349, - "learning_rate": 8.506048857621106e-06, - "loss": 0.1779, - "step": 11038 - }, - { - "epoch": 0.56, - "grad_norm": 1.1463623013180657, - "learning_rate": 8.50442039224052e-06, - "loss": 0.1776, - "step": 11039 - }, - { - "epoch": 0.56, - "grad_norm": 0.8442033414257292, - "learning_rate": 8.50279196742762e-06, - "loss": 0.1574, - "step": 11040 - }, - { - "epoch": 0.56, - "grad_norm": 1.7464494523210246, - "learning_rate": 8.501163583226567e-06, - "loss": 0.1837, - "step": 11041 - }, - { - "epoch": 0.56, - "grad_norm": 2.03783109296676, - "learning_rate": 8.499535239681541e-06, - "loss": 0.1846, - "step": 11042 - }, - { - "epoch": 0.56, - "grad_norm": 1.186059856182854, - "learning_rate": 8.497906936836704e-06, - "loss": 0.1718, - "step": 11043 - }, - { - "epoch": 0.56, - "grad_norm": 1.6469341544249463, - "learning_rate": 8.496278674736226e-06, - "loss": 0.1484, - "step": 11044 - }, - { - "epoch": 0.56, - "grad_norm": 1.039933904642872, - "learning_rate": 8.494650453424272e-06, - "loss": 0.2039, - "step": 11045 - }, - { - "epoch": 0.56, - "grad_norm": 1.0120314170011517, - "learning_rate": 8.493022272945014e-06, - "loss": 0.1794, - "step": 11046 - }, - { - "epoch": 0.56, - "grad_norm": 1.0318104615833152, - "learning_rate": 8.49139413334261e-06, - "loss": 0.1917, - "step": 11047 - }, - { - "epoch": 0.56, - "grad_norm": 1.042770661802242, - "learning_rate": 8.489766034661225e-06, - "loss": 0.2006, - "step": 11048 - }, - { - "epoch": 0.56, - "grad_norm": 1.0643753590812406, - "learning_rate": 8.488137976945023e-06, - "loss": 0.2061, - "step": 11049 - }, - { - "epoch": 0.56, - "grad_norm": 0.9171635729586125, - "learning_rate": 8.48650996023816e-06, - "loss": 0.1804, - "step": 11050 - }, - { - "epoch": 0.56, - "grad_norm": 0.8872809410093716, - "learning_rate": 8.484881984584803e-06, - "loss": 0.1678, - "step": 11051 - }, - { - "epoch": 0.56, - "grad_norm": 0.8891633300766486, - "learning_rate": 8.483254050029105e-06, - "loss": 0.1966, - "step": 11052 - }, - { - "epoch": 0.56, - "grad_norm": 1.0598293338182732, - "learning_rate": 8.481626156615231e-06, - "loss": 0.1981, - "step": 11053 - }, - { - "epoch": 0.56, - "grad_norm": 1.0567734225104917, - "learning_rate": 8.479998304387329e-06, - "loss": 0.1826, - "step": 11054 - }, - { - "epoch": 0.56, - "grad_norm": 0.7260869610454411, - "learning_rate": 8.478370493389563e-06, - "loss": 0.184, - "step": 11055 - }, - { - "epoch": 0.56, - "grad_norm": 1.1388976808393028, - "learning_rate": 8.47674272366608e-06, - "loss": 0.1711, - "step": 11056 - }, - { - "epoch": 0.56, - "grad_norm": 0.9298321738607229, - "learning_rate": 8.475114995261038e-06, - "loss": 0.1799, - "step": 11057 - }, - { - "epoch": 0.56, - "grad_norm": 1.6325973917833867, - "learning_rate": 8.473487308218585e-06, - "loss": 0.2003, - "step": 11058 - }, - { - "epoch": 0.56, - "grad_norm": 0.8499000723973356, - "learning_rate": 8.47185966258288e-06, - "loss": 0.1757, - "step": 11059 - }, - { - "epoch": 0.56, - "grad_norm": 0.8913225991349657, - "learning_rate": 8.470232058398063e-06, - "loss": 0.1657, - "step": 11060 - }, - { - "epoch": 0.56, - "grad_norm": 1.1076538002812075, - "learning_rate": 8.468604495708292e-06, - "loss": 0.1799, - "step": 11061 - }, - { - "epoch": 0.56, - "grad_norm": 0.9903752364490356, - "learning_rate": 8.466976974557706e-06, - "loss": 0.1719, - "step": 11062 - }, - { - "epoch": 0.56, - "grad_norm": 0.8559784269414673, - "learning_rate": 8.465349494990461e-06, - "loss": 0.1761, - "step": 11063 - }, - { - "epoch": 0.56, - "grad_norm": 1.3156245006771254, - "learning_rate": 8.463722057050696e-06, - "loss": 0.1677, - "step": 11064 - }, - { - "epoch": 0.56, - "grad_norm": 0.943210871044513, - "learning_rate": 8.462094660782555e-06, - "loss": 0.1644, - "step": 11065 - }, - { - "epoch": 0.56, - "grad_norm": 0.9269504655641544, - "learning_rate": 8.460467306230187e-06, - "loss": 0.2061, - "step": 11066 - }, - { - "epoch": 0.56, - "grad_norm": 0.8738119366074828, - "learning_rate": 8.458839993437726e-06, - "loss": 0.2071, - "step": 11067 - }, - { - "epoch": 0.56, - "grad_norm": 0.8343929887803908, - "learning_rate": 8.457212722449322e-06, - "loss": 0.1685, - "step": 11068 - }, - { - "epoch": 0.56, - "grad_norm": 0.7622864170582623, - "learning_rate": 8.455585493309107e-06, - "loss": 0.1819, - "step": 11069 - }, - { - "epoch": 0.56, - "grad_norm": 1.8684169737198935, - "learning_rate": 8.453958306061223e-06, - "loss": 0.1858, - "step": 11070 - }, - { - "epoch": 0.56, - "grad_norm": 1.294786211809637, - "learning_rate": 8.452331160749804e-06, - "loss": 0.1853, - "step": 11071 - }, - { - "epoch": 0.56, - "grad_norm": 0.8926249284110674, - "learning_rate": 8.450704057418996e-06, - "loss": 0.206, - "step": 11072 - }, - { - "epoch": 0.56, - "grad_norm": 1.3117790446383297, - "learning_rate": 8.449076996112924e-06, - "loss": 0.1691, - "step": 11073 - }, - { - "epoch": 0.56, - "grad_norm": 0.869685388091908, - "learning_rate": 8.447449976875726e-06, - "loss": 0.1864, - "step": 11074 - }, - { - "epoch": 0.56, - "grad_norm": 0.7568117156894011, - "learning_rate": 8.445822999751536e-06, - "loss": 0.1803, - "step": 11075 - }, - { - "epoch": 0.56, - "grad_norm": 0.7174381127014311, - "learning_rate": 8.444196064784487e-06, - "loss": 0.1794, - "step": 11076 - }, - { - "epoch": 0.56, - "grad_norm": 0.9146953380622976, - "learning_rate": 8.442569172018703e-06, - "loss": 0.2146, - "step": 11077 - }, - { - "epoch": 0.56, - "grad_norm": 1.3439061263436667, - "learning_rate": 8.440942321498322e-06, - "loss": 0.1911, - "step": 11078 - }, - { - "epoch": 0.56, - "grad_norm": 0.794186945195168, - "learning_rate": 8.439315513267465e-06, - "loss": 0.1677, - "step": 11079 - }, - { - "epoch": 0.56, - "grad_norm": 1.1085470147484742, - "learning_rate": 8.437688747370267e-06, - "loss": 0.1719, - "step": 11080 - }, - { - "epoch": 0.56, - "grad_norm": 0.7518099221003346, - "learning_rate": 8.43606202385085e-06, - "loss": 0.1733, - "step": 11081 - }, - { - "epoch": 0.56, - "grad_norm": 0.9856665916024756, - "learning_rate": 8.434435342753335e-06, - "loss": 0.1834, - "step": 11082 - }, - { - "epoch": 0.56, - "grad_norm": 4.82839232903437, - "learning_rate": 8.432808704121854e-06, - "loss": 0.207, - "step": 11083 - }, - { - "epoch": 0.56, - "grad_norm": 1.3434290434642995, - "learning_rate": 8.431182108000522e-06, - "loss": 0.1664, - "step": 11084 - }, - { - "epoch": 0.56, - "grad_norm": 1.0161057228711095, - "learning_rate": 8.429555554433466e-06, - "loss": 0.1824, - "step": 11085 - }, - { - "epoch": 0.56, - "grad_norm": 0.8666489308505075, - "learning_rate": 8.427929043464802e-06, - "loss": 0.1797, - "step": 11086 - }, - { - "epoch": 0.56, - "grad_norm": 1.0791313722788873, - "learning_rate": 8.426302575138652e-06, - "loss": 0.1925, - "step": 11087 - }, - { - "epoch": 0.56, - "grad_norm": 0.9458821418898986, - "learning_rate": 8.424676149499133e-06, - "loss": 0.1704, - "step": 11088 - }, - { - "epoch": 0.56, - "grad_norm": 1.266642190112638, - "learning_rate": 8.423049766590368e-06, - "loss": 0.1862, - "step": 11089 - }, - { - "epoch": 0.56, - "grad_norm": 1.0161348196522118, - "learning_rate": 8.42142342645646e-06, - "loss": 0.1985, - "step": 11090 - }, - { - "epoch": 0.56, - "grad_norm": 0.8030952803424762, - "learning_rate": 8.419797129141535e-06, - "loss": 0.174, - "step": 11091 - }, - { - "epoch": 0.56, - "grad_norm": 1.9562934368345417, - "learning_rate": 8.418170874689698e-06, - "loss": 0.1998, - "step": 11092 - }, - { - "epoch": 0.56, - "grad_norm": 0.7236144403554686, - "learning_rate": 8.416544663145073e-06, - "loss": 0.1681, - "step": 11093 - }, - { - "epoch": 0.56, - "grad_norm": 0.7208411969746172, - "learning_rate": 8.414918494551756e-06, - "loss": 0.1795, - "step": 11094 - }, - { - "epoch": 0.56, - "grad_norm": 1.0597353018878433, - "learning_rate": 8.41329236895387e-06, - "loss": 0.1828, - "step": 11095 - }, - { - "epoch": 0.56, - "grad_norm": 1.9124523581849326, - "learning_rate": 8.411666286395512e-06, - "loss": 0.1724, - "step": 11096 - }, - { - "epoch": 0.56, - "grad_norm": 0.9149754997881748, - "learning_rate": 8.410040246920799e-06, - "loss": 0.1765, - "step": 11097 - }, - { - "epoch": 0.56, - "grad_norm": 1.8623594055291297, - "learning_rate": 8.408414250573836e-06, - "loss": 0.1808, - "step": 11098 - }, - { - "epoch": 0.56, - "grad_norm": 1.6064872872688194, - "learning_rate": 8.406788297398722e-06, - "loss": 0.2015, - "step": 11099 - }, - { - "epoch": 0.56, - "grad_norm": 0.8449701891957053, - "learning_rate": 8.40516238743957e-06, - "loss": 0.1716, - "step": 11100 - }, - { - "epoch": 0.56, - "grad_norm": 0.8100056755848879, - "learning_rate": 8.403536520740474e-06, - "loss": 0.1733, - "step": 11101 - }, - { - "epoch": 0.56, - "grad_norm": 1.958813365387284, - "learning_rate": 8.401910697345545e-06, - "loss": 0.2077, - "step": 11102 - }, - { - "epoch": 0.56, - "grad_norm": 0.7220376893487891, - "learning_rate": 8.400284917298873e-06, - "loss": 0.1976, - "step": 11103 - }, - { - "epoch": 0.56, - "grad_norm": 1.2678557390705776, - "learning_rate": 8.398659180644566e-06, - "loss": 0.2011, - "step": 11104 - }, - { - "epoch": 0.56, - "grad_norm": 1.1575418812541884, - "learning_rate": 8.397033487426717e-06, - "loss": 0.1832, - "step": 11105 - }, - { - "epoch": 0.56, - "grad_norm": 0.8109308744591895, - "learning_rate": 8.395407837689429e-06, - "loss": 0.186, - "step": 11106 - }, - { - "epoch": 0.56, - "grad_norm": 0.8894855499170428, - "learning_rate": 8.393782231476791e-06, - "loss": 0.1618, - "step": 11107 - }, - { - "epoch": 0.56, - "grad_norm": 0.9600703395707851, - "learning_rate": 8.392156668832904e-06, - "loss": 0.1905, - "step": 11108 - }, - { - "epoch": 0.56, - "grad_norm": 0.7495019141844758, - "learning_rate": 8.390531149801855e-06, - "loss": 0.1779, - "step": 11109 - }, - { - "epoch": 0.56, - "grad_norm": 0.8162713651475454, - "learning_rate": 8.38890567442774e-06, - "loss": 0.1886, - "step": 11110 - }, - { - "epoch": 0.57, - "grad_norm": 0.8336747419583592, - "learning_rate": 8.387280242754647e-06, - "loss": 0.1572, - "step": 11111 - }, - { - "epoch": 0.57, - "grad_norm": 1.2507564505391258, - "learning_rate": 8.385654854826674e-06, - "loss": 0.1887, - "step": 11112 - }, - { - "epoch": 0.57, - "grad_norm": 1.3709963705184494, - "learning_rate": 8.384029510687901e-06, - "loss": 0.1749, - "step": 11113 - }, - { - "epoch": 0.57, - "grad_norm": 1.2214279227277296, - "learning_rate": 8.38240421038242e-06, - "loss": 0.1783, - "step": 11114 - }, - { - "epoch": 0.57, - "grad_norm": 1.1108161355604176, - "learning_rate": 8.380778953954314e-06, - "loss": 0.1769, - "step": 11115 - }, - { - "epoch": 0.57, - "grad_norm": 0.8516403521844824, - "learning_rate": 8.379153741447674e-06, - "loss": 0.1869, - "step": 11116 - }, - { - "epoch": 0.57, - "grad_norm": 1.5159161379968633, - "learning_rate": 8.377528572906577e-06, - "loss": 0.1771, - "step": 11117 - }, - { - "epoch": 0.57, - "grad_norm": 2.1689390073254047, - "learning_rate": 8.375903448375109e-06, - "loss": 0.1902, - "step": 11118 - }, - { - "epoch": 0.57, - "grad_norm": 0.9519725613019046, - "learning_rate": 8.374278367897356e-06, - "loss": 0.1678, - "step": 11119 - }, - { - "epoch": 0.57, - "grad_norm": 1.1234418790420935, - "learning_rate": 8.37265333151739e-06, - "loss": 0.19, - "step": 11120 - }, - { - "epoch": 0.57, - "grad_norm": 0.8878798498587716, - "learning_rate": 8.371028339279294e-06, - "loss": 0.1852, - "step": 11121 - }, - { - "epoch": 0.57, - "grad_norm": 0.8025824103558893, - "learning_rate": 8.369403391227147e-06, - "loss": 0.1799, - "step": 11122 - }, - { - "epoch": 0.57, - "grad_norm": 0.9236287372839385, - "learning_rate": 8.367778487405028e-06, - "loss": 0.1707, - "step": 11123 - }, - { - "epoch": 0.57, - "grad_norm": 0.91283308205594, - "learning_rate": 8.366153627857007e-06, - "loss": 0.1595, - "step": 11124 - }, - { - "epoch": 0.57, - "grad_norm": 0.9898467211961541, - "learning_rate": 8.364528812627164e-06, - "loss": 0.1914, - "step": 11125 - }, - { - "epoch": 0.57, - "grad_norm": 1.496393384975324, - "learning_rate": 8.362904041759565e-06, - "loss": 0.1833, - "step": 11126 - }, - { - "epoch": 0.57, - "grad_norm": 1.1537680813326736, - "learning_rate": 8.361279315298289e-06, - "loss": 0.1994, - "step": 11127 - }, - { - "epoch": 0.57, - "grad_norm": 1.041084597870637, - "learning_rate": 8.359654633287402e-06, - "loss": 0.1973, - "step": 11128 - }, - { - "epoch": 0.57, - "grad_norm": 1.0126117950716194, - "learning_rate": 8.358029995770979e-06, - "loss": 0.1643, - "step": 11129 - }, - { - "epoch": 0.57, - "grad_norm": 1.0569021104004925, - "learning_rate": 8.356405402793081e-06, - "loss": 0.1698, - "step": 11130 - }, - { - "epoch": 0.57, - "grad_norm": 1.026689494861531, - "learning_rate": 8.354780854397782e-06, - "loss": 0.1609, - "step": 11131 - }, - { - "epoch": 0.57, - "grad_norm": 1.0032381083929853, - "learning_rate": 8.353156350629144e-06, - "loss": 0.1742, - "step": 11132 - }, - { - "epoch": 0.57, - "grad_norm": 0.9080894368846892, - "learning_rate": 8.351531891531235e-06, - "loss": 0.1905, - "step": 11133 - }, - { - "epoch": 0.57, - "grad_norm": 0.8909433565269967, - "learning_rate": 8.349907477148117e-06, - "loss": 0.1918, - "step": 11134 - }, - { - "epoch": 0.57, - "grad_norm": 0.9240676936852567, - "learning_rate": 8.348283107523848e-06, - "loss": 0.1654, - "step": 11135 - }, - { - "epoch": 0.57, - "grad_norm": 1.0737051026902602, - "learning_rate": 8.346658782702497e-06, - "loss": 0.1875, - "step": 11136 - }, - { - "epoch": 0.57, - "grad_norm": 1.2452176936417192, - "learning_rate": 8.345034502728117e-06, - "loss": 0.1878, - "step": 11137 - }, - { - "epoch": 0.57, - "grad_norm": 1.0940724955406431, - "learning_rate": 8.343410267644772e-06, - "loss": 0.1712, - "step": 11138 - }, - { - "epoch": 0.57, - "grad_norm": 1.2688468526066765, - "learning_rate": 8.341786077496513e-06, - "loss": 0.1796, - "step": 11139 - }, - { - "epoch": 0.57, - "grad_norm": 1.1625892788590617, - "learning_rate": 8.340161932327405e-06, - "loss": 0.1892, - "step": 11140 - }, - { - "epoch": 0.57, - "grad_norm": 0.8787948953079177, - "learning_rate": 8.338537832181494e-06, - "loss": 0.1679, - "step": 11141 - }, - { - "epoch": 0.57, - "grad_norm": 1.433149178929281, - "learning_rate": 8.336913777102844e-06, - "loss": 0.1955, - "step": 11142 - }, - { - "epoch": 0.57, - "grad_norm": 1.0963719115563995, - "learning_rate": 8.335289767135497e-06, - "loss": 0.1678, - "step": 11143 - }, - { - "epoch": 0.57, - "grad_norm": 0.8733510208261461, - "learning_rate": 8.333665802323511e-06, - "loss": 0.1749, - "step": 11144 - }, - { - "epoch": 0.57, - "grad_norm": 1.0572328492621077, - "learning_rate": 8.33204188271093e-06, - "loss": 0.1763, - "step": 11145 - }, - { - "epoch": 0.57, - "grad_norm": 0.9655645976323488, - "learning_rate": 8.330418008341814e-06, - "loss": 0.1817, - "step": 11146 - }, - { - "epoch": 0.57, - "grad_norm": 1.0939906086573947, - "learning_rate": 8.328794179260199e-06, - "loss": 0.181, - "step": 11147 - }, - { - "epoch": 0.57, - "grad_norm": 1.243143463741493, - "learning_rate": 8.327170395510137e-06, - "loss": 0.182, - "step": 11148 - }, - { - "epoch": 0.57, - "grad_norm": 1.0128360439150017, - "learning_rate": 8.325546657135673e-06, - "loss": 0.1882, - "step": 11149 - }, - { - "epoch": 0.57, - "grad_norm": 1.2114392175763768, - "learning_rate": 8.323922964180853e-06, - "loss": 0.1819, - "step": 11150 - }, - { - "epoch": 0.57, - "grad_norm": 1.111825839568996, - "learning_rate": 8.322299316689717e-06, - "loss": 0.1881, - "step": 11151 - }, - { - "epoch": 0.57, - "grad_norm": 0.9514854567930829, - "learning_rate": 8.320675714706304e-06, - "loss": 0.1885, - "step": 11152 - }, - { - "epoch": 0.57, - "grad_norm": 1.729532871086949, - "learning_rate": 8.319052158274659e-06, - "loss": 0.1857, - "step": 11153 - }, - { - "epoch": 0.57, - "grad_norm": 0.8476108959359467, - "learning_rate": 8.317428647438816e-06, - "loss": 0.1958, - "step": 11154 - }, - { - "epoch": 0.57, - "grad_norm": 1.4892354448303768, - "learning_rate": 8.31580518224282e-06, - "loss": 0.1747, - "step": 11155 - }, - { - "epoch": 0.57, - "grad_norm": 0.9187311605693036, - "learning_rate": 8.3141817627307e-06, - "loss": 0.1769, - "step": 11156 - }, - { - "epoch": 0.57, - "grad_norm": 1.072172900218075, - "learning_rate": 8.312558388946497e-06, - "loss": 0.1733, - "step": 11157 - }, - { - "epoch": 0.57, - "grad_norm": 1.1420718748445113, - "learning_rate": 8.310935060934242e-06, - "loss": 0.1699, - "step": 11158 - }, - { - "epoch": 0.57, - "grad_norm": 0.913240919562509, - "learning_rate": 8.30931177873797e-06, - "loss": 0.1633, - "step": 11159 - }, - { - "epoch": 0.57, - "grad_norm": 1.051073321740484, - "learning_rate": 8.307688542401709e-06, - "loss": 0.1867, - "step": 11160 - }, - { - "epoch": 0.57, - "grad_norm": 0.8171469423370246, - "learning_rate": 8.306065351969494e-06, - "loss": 0.1793, - "step": 11161 - }, - { - "epoch": 0.57, - "grad_norm": 0.8922136251925066, - "learning_rate": 8.30444220748535e-06, - "loss": 0.2011, - "step": 11162 - }, - { - "epoch": 0.57, - "grad_norm": 0.9938900223149604, - "learning_rate": 8.302819108993311e-06, - "loss": 0.1852, - "step": 11163 - }, - { - "epoch": 0.57, - "grad_norm": 1.4287792786208626, - "learning_rate": 8.301196056537398e-06, - "loss": 0.1674, - "step": 11164 - }, - { - "epoch": 0.57, - "grad_norm": 0.7941065747802549, - "learning_rate": 8.299573050161637e-06, - "loss": 0.1573, - "step": 11165 - }, - { - "epoch": 0.57, - "grad_norm": 1.8134429355374344, - "learning_rate": 8.297950089910053e-06, - "loss": 0.1637, - "step": 11166 - }, - { - "epoch": 0.57, - "grad_norm": 1.0627650354860654, - "learning_rate": 8.29632717582667e-06, - "loss": 0.1735, - "step": 11167 - }, - { - "epoch": 0.57, - "grad_norm": 1.1882568419001993, - "learning_rate": 8.29470430795551e-06, - "loss": 0.2061, - "step": 11168 - }, - { - "epoch": 0.57, - "grad_norm": 0.9156524069604196, - "learning_rate": 8.293081486340587e-06, - "loss": 0.1754, - "step": 11169 - }, - { - "epoch": 0.57, - "grad_norm": 0.9397394528457855, - "learning_rate": 8.291458711025928e-06, - "loss": 0.1683, - "step": 11170 - }, - { - "epoch": 0.57, - "grad_norm": 0.9827543442383088, - "learning_rate": 8.289835982055546e-06, - "loss": 0.1969, - "step": 11171 - }, - { - "epoch": 0.57, - "grad_norm": 1.001792440456909, - "learning_rate": 8.288213299473464e-06, - "loss": 0.1875, - "step": 11172 - }, - { - "epoch": 0.57, - "grad_norm": 1.083625888509629, - "learning_rate": 8.286590663323689e-06, - "loss": 0.1876, - "step": 11173 - }, - { - "epoch": 0.57, - "grad_norm": 1.220390786988613, - "learning_rate": 8.28496807365024e-06, - "loss": 0.1822, - "step": 11174 - }, - { - "epoch": 0.57, - "grad_norm": 0.9251997557124158, - "learning_rate": 8.283345530497128e-06, - "loss": 0.1911, - "step": 11175 - }, - { - "epoch": 0.57, - "grad_norm": 0.9765254071523586, - "learning_rate": 8.281723033908369e-06, - "loss": 0.1863, - "step": 11176 - }, - { - "epoch": 0.57, - "grad_norm": 1.4798687261341534, - "learning_rate": 8.280100583927965e-06, - "loss": 0.1853, - "step": 11177 - }, - { - "epoch": 0.57, - "grad_norm": 0.813600417515956, - "learning_rate": 8.278478180599935e-06, - "loss": 0.1628, - "step": 11178 - }, - { - "epoch": 0.57, - "grad_norm": 1.0435886610526612, - "learning_rate": 8.276855823968278e-06, - "loss": 0.1875, - "step": 11179 - }, - { - "epoch": 0.57, - "grad_norm": 0.8112724641517984, - "learning_rate": 8.275233514077003e-06, - "loss": 0.1714, - "step": 11180 - }, - { - "epoch": 0.57, - "grad_norm": 1.3049418762007479, - "learning_rate": 8.273611250970118e-06, - "loss": 0.1742, - "step": 11181 - }, - { - "epoch": 0.57, - "grad_norm": 1.2920212964599433, - "learning_rate": 8.271989034691628e-06, - "loss": 0.1938, - "step": 11182 - }, - { - "epoch": 0.57, - "grad_norm": 0.9233125397139785, - "learning_rate": 8.270366865285528e-06, - "loss": 0.1923, - "step": 11183 - }, - { - "epoch": 0.57, - "grad_norm": 1.1376499852517181, - "learning_rate": 8.268744742795827e-06, - "loss": 0.1899, - "step": 11184 - }, - { - "epoch": 0.57, - "grad_norm": 1.094168903904755, - "learning_rate": 8.26712266726652e-06, - "loss": 0.1805, - "step": 11185 - }, - { - "epoch": 0.57, - "grad_norm": 0.9333248112316166, - "learning_rate": 8.265500638741615e-06, - "loss": 0.1801, - "step": 11186 - }, - { - "epoch": 0.57, - "grad_norm": 0.823286028623516, - "learning_rate": 8.263878657265099e-06, - "loss": 0.1779, - "step": 11187 - }, - { - "epoch": 0.57, - "grad_norm": 0.7560114879876358, - "learning_rate": 8.262256722880972e-06, - "loss": 0.1864, - "step": 11188 - }, - { - "epoch": 0.57, - "grad_norm": 1.336291099600056, - "learning_rate": 8.260634835633232e-06, - "loss": 0.1729, - "step": 11189 - }, - { - "epoch": 0.57, - "grad_norm": 0.7182598354778278, - "learning_rate": 8.259012995565868e-06, - "loss": 0.1624, - "step": 11190 - }, - { - "epoch": 0.57, - "grad_norm": 0.7813488338610487, - "learning_rate": 8.257391202722877e-06, - "loss": 0.1972, - "step": 11191 - }, - { - "epoch": 0.57, - "grad_norm": 1.3591205075647002, - "learning_rate": 8.255769457148245e-06, - "loss": 0.197, - "step": 11192 - }, - { - "epoch": 0.57, - "grad_norm": 0.7968283715538477, - "learning_rate": 8.254147758885967e-06, - "loss": 0.1754, - "step": 11193 - }, - { - "epoch": 0.57, - "grad_norm": 0.7972409968258353, - "learning_rate": 8.252526107980027e-06, - "loss": 0.177, - "step": 11194 - }, - { - "epoch": 0.57, - "grad_norm": 1.092037423266726, - "learning_rate": 8.25090450447442e-06, - "loss": 0.198, - "step": 11195 - }, - { - "epoch": 0.57, - "grad_norm": 0.993110992833045, - "learning_rate": 8.249282948413123e-06, - "loss": 0.2043, - "step": 11196 - }, - { - "epoch": 0.57, - "grad_norm": 2.0829880769116116, - "learning_rate": 8.247661439840126e-06, - "loss": 0.1982, - "step": 11197 - }, - { - "epoch": 0.57, - "grad_norm": 1.15991828521091, - "learning_rate": 8.246039978799409e-06, - "loss": 0.1755, - "step": 11198 - }, - { - "epoch": 0.57, - "grad_norm": 0.8749820174230736, - "learning_rate": 8.244418565334962e-06, - "loss": 0.1856, - "step": 11199 - }, - { - "epoch": 0.57, - "grad_norm": 1.0155370756575854, - "learning_rate": 8.242797199490757e-06, - "loss": 0.1975, - "step": 11200 - }, - { - "epoch": 0.57, - "grad_norm": 0.954844012432073, - "learning_rate": 8.241175881310776e-06, - "loss": 0.15, - "step": 11201 - }, - { - "epoch": 0.57, - "grad_norm": 1.393593559664671, - "learning_rate": 8.239554610839e-06, - "loss": 0.1904, - "step": 11202 - }, - { - "epoch": 0.57, - "grad_norm": 1.0309112780357765, - "learning_rate": 8.237933388119409e-06, - "loss": 0.1784, - "step": 11203 - }, - { - "epoch": 0.57, - "grad_norm": 1.2242804853714373, - "learning_rate": 8.236312213195972e-06, - "loss": 0.1842, - "step": 11204 - }, - { - "epoch": 0.57, - "grad_norm": 1.092592283605871, - "learning_rate": 8.234691086112662e-06, - "loss": 0.1838, - "step": 11205 - }, - { - "epoch": 0.57, - "grad_norm": 0.7873812308441436, - "learning_rate": 8.23307000691346e-06, - "loss": 0.1828, - "step": 11206 - }, - { - "epoch": 0.57, - "grad_norm": 1.1405892274453442, - "learning_rate": 8.231448975642329e-06, - "loss": 0.2055, - "step": 11207 - }, - { - "epoch": 0.57, - "grad_norm": 0.9035227624336404, - "learning_rate": 8.229827992343251e-06, - "loss": 0.1863, - "step": 11208 - }, - { - "epoch": 0.57, - "grad_norm": 0.9515723670404533, - "learning_rate": 8.228207057060184e-06, - "loss": 0.1696, - "step": 11209 - }, - { - "epoch": 0.57, - "grad_norm": 1.2242351733938421, - "learning_rate": 8.226586169837103e-06, - "loss": 0.213, - "step": 11210 - }, - { - "epoch": 0.57, - "grad_norm": 1.0159569573403826, - "learning_rate": 8.224965330717967e-06, - "loss": 0.1848, - "step": 11211 - }, - { - "epoch": 0.57, - "grad_norm": 0.8746281789128181, - "learning_rate": 8.223344539746755e-06, - "loss": 0.1847, - "step": 11212 - }, - { - "epoch": 0.57, - "grad_norm": 0.9441233355578249, - "learning_rate": 8.221723796967416e-06, - "loss": 0.1997, - "step": 11213 - }, - { - "epoch": 0.57, - "grad_norm": 0.9518524036191406, - "learning_rate": 8.220103102423923e-06, - "loss": 0.1789, - "step": 11214 - }, - { - "epoch": 0.57, - "grad_norm": 0.779774266436401, - "learning_rate": 8.21848245616023e-06, - "loss": 0.1759, - "step": 11215 - }, - { - "epoch": 0.57, - "grad_norm": 2.3450116809336112, - "learning_rate": 8.216861858220307e-06, - "loss": 0.1888, - "step": 11216 - }, - { - "epoch": 0.57, - "grad_norm": 1.229616077127556, - "learning_rate": 8.215241308648102e-06, - "loss": 0.194, - "step": 11217 - }, - { - "epoch": 0.57, - "grad_norm": 0.9018917733087243, - "learning_rate": 8.213620807487583e-06, - "loss": 0.168, - "step": 11218 - }, - { - "epoch": 0.57, - "grad_norm": 1.079239171539652, - "learning_rate": 8.212000354782695e-06, - "loss": 0.19, - "step": 11219 - }, - { - "epoch": 0.57, - "grad_norm": 0.8430855813690791, - "learning_rate": 8.210379950577398e-06, - "loss": 0.1706, - "step": 11220 - }, - { - "epoch": 0.57, - "grad_norm": 0.8230309276074811, - "learning_rate": 8.208759594915652e-06, - "loss": 0.1828, - "step": 11221 - }, - { - "epoch": 0.57, - "grad_norm": 1.050508723669258, - "learning_rate": 8.207139287841397e-06, - "loss": 0.1818, - "step": 11222 - }, - { - "epoch": 0.57, - "grad_norm": 0.8161318295355628, - "learning_rate": 8.205519029398592e-06, - "loss": 0.1982, - "step": 11223 - }, - { - "epoch": 0.57, - "grad_norm": 0.8558635578387, - "learning_rate": 8.203898819631183e-06, - "loss": 0.1853, - "step": 11224 - }, - { - "epoch": 0.57, - "grad_norm": 0.8714033130034283, - "learning_rate": 8.202278658583125e-06, - "loss": 0.1865, - "step": 11225 - }, - { - "epoch": 0.57, - "grad_norm": 0.8827239327377759, - "learning_rate": 8.200658546298354e-06, - "loss": 0.1877, - "step": 11226 - }, - { - "epoch": 0.57, - "grad_norm": 1.2222996881129682, - "learning_rate": 8.199038482820824e-06, - "loss": 0.1913, - "step": 11227 - }, - { - "epoch": 0.57, - "grad_norm": 0.8298858935965866, - "learning_rate": 8.197418468194476e-06, - "loss": 0.179, - "step": 11228 - }, - { - "epoch": 0.57, - "grad_norm": 0.8088596354338242, - "learning_rate": 8.195798502463256e-06, - "loss": 0.1849, - "step": 11229 - }, - { - "epoch": 0.57, - "grad_norm": 1.3270774863738242, - "learning_rate": 8.194178585671102e-06, - "loss": 0.1787, - "step": 11230 - }, - { - "epoch": 0.57, - "grad_norm": 1.1797488127169842, - "learning_rate": 8.192558717861956e-06, - "loss": 0.1669, - "step": 11231 - }, - { - "epoch": 0.57, - "grad_norm": 0.790298626038553, - "learning_rate": 8.190938899079756e-06, - "loss": 0.1583, - "step": 11232 - }, - { - "epoch": 0.57, - "grad_norm": 0.7010232969764986, - "learning_rate": 8.189319129368445e-06, - "loss": 0.1859, - "step": 11233 - }, - { - "epoch": 0.57, - "grad_norm": 0.9080372020704749, - "learning_rate": 8.18769940877195e-06, - "loss": 0.1892, - "step": 11234 - }, - { - "epoch": 0.57, - "grad_norm": 1.0889779620400961, - "learning_rate": 8.186079737334214e-06, - "loss": 0.1793, - "step": 11235 - }, - { - "epoch": 0.57, - "grad_norm": 0.91466018716544, - "learning_rate": 8.184460115099165e-06, - "loss": 0.1735, - "step": 11236 - }, - { - "epoch": 0.57, - "grad_norm": 0.8871193225037902, - "learning_rate": 8.182840542110739e-06, - "loss": 0.1565, - "step": 11237 - }, - { - "epoch": 0.57, - "grad_norm": 1.0285746494638235, - "learning_rate": 8.181221018412868e-06, - "loss": 0.1718, - "step": 11238 - }, - { - "epoch": 0.57, - "grad_norm": 1.029761854841097, - "learning_rate": 8.179601544049475e-06, - "loss": 0.187, - "step": 11239 - }, - { - "epoch": 0.57, - "grad_norm": 1.1239798590547327, - "learning_rate": 8.177982119064497e-06, - "loss": 0.1579, - "step": 11240 - }, - { - "epoch": 0.57, - "grad_norm": 1.275798523162937, - "learning_rate": 8.176362743501853e-06, - "loss": 0.1851, - "step": 11241 - }, - { - "epoch": 0.57, - "grad_norm": 1.1377935943260051, - "learning_rate": 8.174743417405479e-06, - "loss": 0.1923, - "step": 11242 - }, - { - "epoch": 0.57, - "grad_norm": 0.7527062986335642, - "learning_rate": 8.173124140819285e-06, - "loss": 0.1969, - "step": 11243 - }, - { - "epoch": 0.57, - "grad_norm": 1.0965192679867608, - "learning_rate": 8.171504913787208e-06, - "loss": 0.1695, - "step": 11244 - }, - { - "epoch": 0.57, - "grad_norm": 0.8068912443926077, - "learning_rate": 8.169885736353158e-06, - "loss": 0.1896, - "step": 11245 - }, - { - "epoch": 0.57, - "grad_norm": 0.8361653287998111, - "learning_rate": 8.168266608561068e-06, - "loss": 0.1828, - "step": 11246 - }, - { - "epoch": 0.57, - "grad_norm": 1.2573793588577753, - "learning_rate": 8.166647530454843e-06, - "loss": 0.1468, - "step": 11247 - }, - { - "epoch": 0.57, - "grad_norm": 0.9078758393689891, - "learning_rate": 8.165028502078412e-06, - "loss": 0.1715, - "step": 11248 - }, - { - "epoch": 0.57, - "grad_norm": 0.7759762634961216, - "learning_rate": 8.163409523475682e-06, - "loss": 0.1895, - "step": 11249 - }, - { - "epoch": 0.57, - "grad_norm": 0.8809090715288981, - "learning_rate": 8.161790594690577e-06, - "loss": 0.1923, - "step": 11250 - }, - { - "epoch": 0.57, - "grad_norm": 1.07713204744277, - "learning_rate": 8.160171715767002e-06, - "loss": 0.1645, - "step": 11251 - }, - { - "epoch": 0.57, - "grad_norm": 0.8341872450950087, - "learning_rate": 8.158552886748878e-06, - "loss": 0.1677, - "step": 11252 - }, - { - "epoch": 0.57, - "grad_norm": 1.642327976245687, - "learning_rate": 8.156934107680106e-06, - "loss": 0.1772, - "step": 11253 - }, - { - "epoch": 0.57, - "grad_norm": 0.822319012757017, - "learning_rate": 8.155315378604602e-06, - "loss": 0.1802, - "step": 11254 - }, - { - "epoch": 0.57, - "grad_norm": 1.0442418900022932, - "learning_rate": 8.153696699566272e-06, - "loss": 0.1798, - "step": 11255 - }, - { - "epoch": 0.57, - "grad_norm": 0.8988307675062374, - "learning_rate": 8.152078070609027e-06, - "loss": 0.1967, - "step": 11256 - }, - { - "epoch": 0.57, - "grad_norm": 1.1779903856225626, - "learning_rate": 8.150459491776765e-06, - "loss": 0.1847, - "step": 11257 - }, - { - "epoch": 0.57, - "grad_norm": 0.8690379175414268, - "learning_rate": 8.148840963113392e-06, - "loss": 0.1668, - "step": 11258 - }, - { - "epoch": 0.57, - "grad_norm": 7.08253893833268, - "learning_rate": 8.147222484662817e-06, - "loss": 0.1828, - "step": 11259 - }, - { - "epoch": 0.57, - "grad_norm": 2.802304156833683, - "learning_rate": 8.145604056468935e-06, - "loss": 0.177, - "step": 11260 - }, - { - "epoch": 0.57, - "grad_norm": 0.8294670880912641, - "learning_rate": 8.14398567857565e-06, - "loss": 0.1621, - "step": 11261 - }, - { - "epoch": 0.57, - "grad_norm": 0.7540643717119795, - "learning_rate": 8.142367351026853e-06, - "loss": 0.1481, - "step": 11262 - }, - { - "epoch": 0.57, - "grad_norm": 1.26321843148518, - "learning_rate": 8.140749073866449e-06, - "loss": 0.1903, - "step": 11263 - }, - { - "epoch": 0.57, - "grad_norm": 1.3476642102613676, - "learning_rate": 8.139130847138332e-06, - "loss": 0.1826, - "step": 11264 - }, - { - "epoch": 0.57, - "grad_norm": 1.2458429990147137, - "learning_rate": 8.137512670886397e-06, - "loss": 0.1746, - "step": 11265 - }, - { - "epoch": 0.57, - "grad_norm": 8.834594923921182, - "learning_rate": 8.135894545154533e-06, - "loss": 0.2108, - "step": 11266 - }, - { - "epoch": 0.57, - "grad_norm": 1.129814446121592, - "learning_rate": 8.134276469986638e-06, - "loss": 0.1707, - "step": 11267 - }, - { - "epoch": 0.57, - "grad_norm": 1.136819679803226, - "learning_rate": 8.132658445426595e-06, - "loss": 0.1569, - "step": 11268 - }, - { - "epoch": 0.57, - "grad_norm": 0.9788466534491648, - "learning_rate": 8.131040471518302e-06, - "loss": 0.173, - "step": 11269 - }, - { - "epoch": 0.57, - "grad_norm": 1.0440416991646695, - "learning_rate": 8.129422548305637e-06, - "loss": 0.1741, - "step": 11270 - }, - { - "epoch": 0.57, - "grad_norm": 1.1616215131566432, - "learning_rate": 8.127804675832494e-06, - "loss": 0.1861, - "step": 11271 - }, - { - "epoch": 0.57, - "grad_norm": 1.1600212126434577, - "learning_rate": 8.126186854142752e-06, - "loss": 0.2059, - "step": 11272 - }, - { - "epoch": 0.57, - "grad_norm": 1.6623548564513533, - "learning_rate": 8.124569083280303e-06, - "loss": 0.1855, - "step": 11273 - }, - { - "epoch": 0.57, - "grad_norm": 1.185514362249397, - "learning_rate": 8.122951363289022e-06, - "loss": 0.1988, - "step": 11274 - }, - { - "epoch": 0.57, - "grad_norm": 2.536505725783757, - "learning_rate": 8.12133369421279e-06, - "loss": 0.1921, - "step": 11275 - }, - { - "epoch": 0.57, - "grad_norm": 1.047302910115353, - "learning_rate": 8.119716076095485e-06, - "loss": 0.2041, - "step": 11276 - }, - { - "epoch": 0.57, - "grad_norm": 1.1227914996846082, - "learning_rate": 8.118098508980989e-06, - "loss": 0.1765, - "step": 11277 - }, - { - "epoch": 0.57, - "grad_norm": 0.8278666307866703, - "learning_rate": 8.116480992913181e-06, - "loss": 0.1863, - "step": 11278 - }, - { - "epoch": 0.57, - "grad_norm": 1.2470534589076179, - "learning_rate": 8.114863527935929e-06, - "loss": 0.1943, - "step": 11279 - }, - { - "epoch": 0.57, - "grad_norm": 0.7763928476800777, - "learning_rate": 8.11324611409311e-06, - "loss": 0.1893, - "step": 11280 - }, - { - "epoch": 0.57, - "grad_norm": 0.7925818183893052, - "learning_rate": 8.111628751428595e-06, - "loss": 0.1634, - "step": 11281 - }, - { - "epoch": 0.57, - "grad_norm": 0.8936196251979709, - "learning_rate": 8.110011439986262e-06, - "loss": 0.163, - "step": 11282 - }, - { - "epoch": 0.57, - "grad_norm": 1.1152052160492107, - "learning_rate": 8.10839417980997e-06, - "loss": 0.1821, - "step": 11283 - }, - { - "epoch": 0.57, - "grad_norm": 0.8475383954634401, - "learning_rate": 8.106776970943597e-06, - "loss": 0.1888, - "step": 11284 - }, - { - "epoch": 0.57, - "grad_norm": 0.9449434158508707, - "learning_rate": 8.105159813431002e-06, - "loss": 0.1883, - "step": 11285 - }, - { - "epoch": 0.57, - "grad_norm": 1.0582156036341694, - "learning_rate": 8.103542707316058e-06, - "loss": 0.1877, - "step": 11286 - }, - { - "epoch": 0.57, - "grad_norm": 0.6783740262877755, - "learning_rate": 8.10192565264262e-06, - "loss": 0.1699, - "step": 11287 - }, - { - "epoch": 0.57, - "grad_norm": 1.2497198731430568, - "learning_rate": 8.100308649454563e-06, - "loss": 0.1756, - "step": 11288 - }, - { - "epoch": 0.57, - "grad_norm": 1.7127747891379077, - "learning_rate": 8.098691697795737e-06, - "loss": 0.1665, - "step": 11289 - }, - { - "epoch": 0.57, - "grad_norm": 0.8696899217535935, - "learning_rate": 8.097074797710007e-06, - "loss": 0.1796, - "step": 11290 - }, - { - "epoch": 0.57, - "grad_norm": 0.8810934002107135, - "learning_rate": 8.095457949241233e-06, - "loss": 0.1798, - "step": 11291 - }, - { - "epoch": 0.57, - "grad_norm": 1.086874398334323, - "learning_rate": 8.093841152433265e-06, - "loss": 0.1778, - "step": 11292 - }, - { - "epoch": 0.57, - "grad_norm": 1.2700382193260717, - "learning_rate": 8.092224407329965e-06, - "loss": 0.1766, - "step": 11293 - }, - { - "epoch": 0.57, - "grad_norm": 1.0137503311295675, - "learning_rate": 8.090607713975182e-06, - "loss": 0.1728, - "step": 11294 - }, - { - "epoch": 0.57, - "grad_norm": 1.280165089528958, - "learning_rate": 8.08899107241278e-06, - "loss": 0.1919, - "step": 11295 - }, - { - "epoch": 0.57, - "grad_norm": 1.2938704499783362, - "learning_rate": 8.087374482686598e-06, - "loss": 0.2076, - "step": 11296 - }, - { - "epoch": 0.57, - "grad_norm": 1.1512383258265375, - "learning_rate": 8.085757944840493e-06, - "loss": 0.2066, - "step": 11297 - }, - { - "epoch": 0.57, - "grad_norm": 0.7807085489114645, - "learning_rate": 8.08414145891831e-06, - "loss": 0.2115, - "step": 11298 - }, - { - "epoch": 0.57, - "grad_norm": 1.05569501529721, - "learning_rate": 8.082525024963902e-06, - "loss": 0.1771, - "step": 11299 - }, - { - "epoch": 0.57, - "grad_norm": 0.9034811271517589, - "learning_rate": 8.080908643021107e-06, - "loss": 0.2346, - "step": 11300 - }, - { - "epoch": 0.57, - "grad_norm": 1.0660529540221337, - "learning_rate": 8.079292313133778e-06, - "loss": 0.1686, - "step": 11301 - }, - { - "epoch": 0.57, - "grad_norm": 1.2568586287865866, - "learning_rate": 8.077676035345748e-06, - "loss": 0.1725, - "step": 11302 - }, - { - "epoch": 0.57, - "grad_norm": 0.8573594071467343, - "learning_rate": 8.076059809700866e-06, - "loss": 0.1938, - "step": 11303 - }, - { - "epoch": 0.57, - "grad_norm": 1.0561172525104656, - "learning_rate": 8.07444363624297e-06, - "loss": 0.1567, - "step": 11304 - }, - { - "epoch": 0.57, - "grad_norm": 0.9525099461969996, - "learning_rate": 8.072827515015902e-06, - "loss": 0.1808, - "step": 11305 - }, - { - "epoch": 0.57, - "grad_norm": 1.1728442563294836, - "learning_rate": 8.071211446063495e-06, - "loss": 0.1774, - "step": 11306 - }, - { - "epoch": 0.57, - "grad_norm": 1.2680203679857898, - "learning_rate": 8.069595429429586e-06, - "loss": 0.1642, - "step": 11307 - }, - { - "epoch": 0.58, - "grad_norm": 0.9485894628866848, - "learning_rate": 8.067979465158013e-06, - "loss": 0.1728, - "step": 11308 - }, - { - "epoch": 0.58, - "grad_norm": 0.9960977651544729, - "learning_rate": 8.0663635532926e-06, - "loss": 0.1747, - "step": 11309 - }, - { - "epoch": 0.58, - "grad_norm": 1.3899001437254705, - "learning_rate": 8.06474769387719e-06, - "loss": 0.1401, - "step": 11310 - }, - { - "epoch": 0.58, - "grad_norm": 2.8600698725128217, - "learning_rate": 8.063131886955605e-06, - "loss": 0.1904, - "step": 11311 - }, - { - "epoch": 0.58, - "grad_norm": 0.9555677688751293, - "learning_rate": 8.061516132571679e-06, - "loss": 0.1964, - "step": 11312 - }, - { - "epoch": 0.58, - "grad_norm": 0.9597259602120783, - "learning_rate": 8.059900430769234e-06, - "loss": 0.185, - "step": 11313 - }, - { - "epoch": 0.58, - "grad_norm": 1.111605309098347, - "learning_rate": 8.058284781592107e-06, - "loss": 0.1626, - "step": 11314 - }, - { - "epoch": 0.58, - "grad_norm": 1.2225140069439981, - "learning_rate": 8.056669185084108e-06, - "loss": 0.1598, - "step": 11315 - }, - { - "epoch": 0.58, - "grad_norm": 1.2846536877935064, - "learning_rate": 8.05505364128907e-06, - "loss": 0.1752, - "step": 11316 - }, - { - "epoch": 0.58, - "grad_norm": 0.8394495233121696, - "learning_rate": 8.053438150250808e-06, - "loss": 0.172, - "step": 11317 - }, - { - "epoch": 0.58, - "grad_norm": 0.9968223731163737, - "learning_rate": 8.051822712013151e-06, - "loss": 0.1919, - "step": 11318 - }, - { - "epoch": 0.58, - "grad_norm": 1.123603368114008, - "learning_rate": 8.05020732661991e-06, - "loss": 0.1887, - "step": 11319 - }, - { - "epoch": 0.58, - "grad_norm": 0.7884087579364176, - "learning_rate": 8.048591994114906e-06, - "loss": 0.1702, - "step": 11320 - }, - { - "epoch": 0.58, - "grad_norm": 1.4011551740812698, - "learning_rate": 8.046976714541953e-06, - "loss": 0.164, - "step": 11321 - }, - { - "epoch": 0.58, - "grad_norm": 0.8102925771158213, - "learning_rate": 8.045361487944872e-06, - "loss": 0.2049, - "step": 11322 - }, - { - "epoch": 0.58, - "grad_norm": 0.7791722073468371, - "learning_rate": 8.043746314367466e-06, - "loss": 0.1966, - "step": 11323 - }, - { - "epoch": 0.58, - "grad_norm": 1.2138756039240277, - "learning_rate": 8.042131193853553e-06, - "loss": 0.1648, - "step": 11324 - }, - { - "epoch": 0.58, - "grad_norm": 1.1604943883973953, - "learning_rate": 8.04051612644694e-06, - "loss": 0.1767, - "step": 11325 - }, - { - "epoch": 0.58, - "grad_norm": 0.7364800594337658, - "learning_rate": 8.038901112191443e-06, - "loss": 0.157, - "step": 11326 - }, - { - "epoch": 0.58, - "grad_norm": 0.9303579297888586, - "learning_rate": 8.037286151130864e-06, - "loss": 0.1788, - "step": 11327 - }, - { - "epoch": 0.58, - "grad_norm": 0.9261735222055252, - "learning_rate": 8.035671243309005e-06, - "loss": 0.2088, - "step": 11328 - }, - { - "epoch": 0.58, - "grad_norm": 1.146395985557706, - "learning_rate": 8.034056388769676e-06, - "loss": 0.1908, - "step": 11329 - }, - { - "epoch": 0.58, - "grad_norm": 0.8834531220910166, - "learning_rate": 8.032441587556676e-06, - "loss": 0.182, - "step": 11330 - }, - { - "epoch": 0.58, - "grad_norm": 0.7483513003591301, - "learning_rate": 8.030826839713814e-06, - "loss": 0.1804, - "step": 11331 - }, - { - "epoch": 0.58, - "grad_norm": 0.9241777250512747, - "learning_rate": 8.02921214528488e-06, - "loss": 0.1819, - "step": 11332 - }, - { - "epoch": 0.58, - "grad_norm": 0.8444863698741362, - "learning_rate": 8.02759750431368e-06, - "loss": 0.1852, - "step": 11333 - }, - { - "epoch": 0.58, - "grad_norm": 1.3926107355585513, - "learning_rate": 8.025982916844008e-06, - "loss": 0.1818, - "step": 11334 - }, - { - "epoch": 0.58, - "grad_norm": 0.8435975489819109, - "learning_rate": 8.024368382919665e-06, - "loss": 0.1855, - "step": 11335 - }, - { - "epoch": 0.58, - "grad_norm": 1.3527644387003557, - "learning_rate": 8.022753902584436e-06, - "loss": 0.206, - "step": 11336 - }, - { - "epoch": 0.58, - "grad_norm": 0.9316082958199031, - "learning_rate": 8.021139475882122e-06, - "loss": 0.1661, - "step": 11337 - }, - { - "epoch": 0.58, - "grad_norm": 0.7054655059974458, - "learning_rate": 8.01952510285651e-06, - "loss": 0.1804, - "step": 11338 - }, - { - "epoch": 0.58, - "grad_norm": 0.908661234462497, - "learning_rate": 8.017910783551394e-06, - "loss": 0.176, - "step": 11339 - }, - { - "epoch": 0.58, - "grad_norm": 0.7743444896095891, - "learning_rate": 8.016296518010558e-06, - "loss": 0.1833, - "step": 11340 - }, - { - "epoch": 0.58, - "grad_norm": 0.9433594583525929, - "learning_rate": 8.014682306277792e-06, - "loss": 0.1766, - "step": 11341 - }, - { - "epoch": 0.58, - "grad_norm": 1.122567249465266, - "learning_rate": 8.013068148396878e-06, - "loss": 0.1981, - "step": 11342 - }, - { - "epoch": 0.58, - "grad_norm": 0.8714340710402263, - "learning_rate": 8.011454044411606e-06, - "loss": 0.1819, - "step": 11343 - }, - { - "epoch": 0.58, - "grad_norm": 1.572982517943364, - "learning_rate": 8.009839994365757e-06, - "loss": 0.1771, - "step": 11344 - }, - { - "epoch": 0.58, - "grad_norm": 1.033687137192266, - "learning_rate": 8.008225998303107e-06, - "loss": 0.1911, - "step": 11345 - }, - { - "epoch": 0.58, - "grad_norm": 1.0309772673882498, - "learning_rate": 8.00661205626744e-06, - "loss": 0.2027, - "step": 11346 - }, - { - "epoch": 0.58, - "grad_norm": 0.9153445694050915, - "learning_rate": 8.004998168302531e-06, - "loss": 0.1847, - "step": 11347 - }, - { - "epoch": 0.58, - "grad_norm": 0.8893563536780098, - "learning_rate": 8.003384334452165e-06, - "loss": 0.1967, - "step": 11348 - }, - { - "epoch": 0.58, - "grad_norm": 0.9472506688885662, - "learning_rate": 8.001770554760107e-06, - "loss": 0.1759, - "step": 11349 - }, - { - "epoch": 0.58, - "grad_norm": 0.8856873542075918, - "learning_rate": 8.000156829270136e-06, - "loss": 0.1713, - "step": 11350 - }, - { - "epoch": 0.58, - "grad_norm": 0.8354670246876854, - "learning_rate": 7.998543158026025e-06, - "loss": 0.1784, - "step": 11351 - }, - { - "epoch": 0.58, - "grad_norm": 1.8998962124671674, - "learning_rate": 7.996929541071545e-06, - "loss": 0.189, - "step": 11352 - }, - { - "epoch": 0.58, - "grad_norm": 0.6963210550002428, - "learning_rate": 7.995315978450462e-06, - "loss": 0.1902, - "step": 11353 - }, - { - "epoch": 0.58, - "grad_norm": 1.7976868012803862, - "learning_rate": 7.993702470206547e-06, - "loss": 0.1982, - "step": 11354 - }, - { - "epoch": 0.58, - "grad_norm": 0.8559801835244099, - "learning_rate": 7.992089016383565e-06, - "loss": 0.1885, - "step": 11355 - }, - { - "epoch": 0.58, - "grad_norm": 0.9255002415299407, - "learning_rate": 7.990475617025286e-06, - "loss": 0.1789, - "step": 11356 - }, - { - "epoch": 0.58, - "grad_norm": 1.0647958227251417, - "learning_rate": 7.988862272175464e-06, - "loss": 0.1602, - "step": 11357 - }, - { - "epoch": 0.58, - "grad_norm": 1.0094849945715152, - "learning_rate": 7.987248981877872e-06, - "loss": 0.1838, - "step": 11358 - }, - { - "epoch": 0.58, - "grad_norm": 0.7584797503213979, - "learning_rate": 7.985635746176261e-06, - "loss": 0.1643, - "step": 11359 - }, - { - "epoch": 0.58, - "grad_norm": 0.9401437229638838, - "learning_rate": 7.984022565114396e-06, - "loss": 0.2103, - "step": 11360 - }, - { - "epoch": 0.58, - "grad_norm": 0.9411254100791999, - "learning_rate": 7.982409438736034e-06, - "loss": 0.1814, - "step": 11361 - }, - { - "epoch": 0.58, - "grad_norm": 2.467728278191582, - "learning_rate": 7.980796367084925e-06, - "loss": 0.1642, - "step": 11362 - }, - { - "epoch": 0.58, - "grad_norm": 0.7544167417792813, - "learning_rate": 7.979183350204833e-06, - "loss": 0.1721, - "step": 11363 - }, - { - "epoch": 0.58, - "grad_norm": 0.7551276841736471, - "learning_rate": 7.977570388139503e-06, - "loss": 0.1781, - "step": 11364 - }, - { - "epoch": 0.58, - "grad_norm": 0.9763581032326041, - "learning_rate": 7.975957480932695e-06, - "loss": 0.1714, - "step": 11365 - }, - { - "epoch": 0.58, - "grad_norm": 0.7418208277674712, - "learning_rate": 7.974344628628151e-06, - "loss": 0.1668, - "step": 11366 - }, - { - "epoch": 0.58, - "grad_norm": 0.8384863204541823, - "learning_rate": 7.972731831269624e-06, - "loss": 0.1927, - "step": 11367 - }, - { - "epoch": 0.58, - "grad_norm": 0.8651724563330945, - "learning_rate": 7.97111908890086e-06, - "loss": 0.1944, - "step": 11368 - }, - { - "epoch": 0.58, - "grad_norm": 1.0535952841849294, - "learning_rate": 7.96950640156561e-06, - "loss": 0.2104, - "step": 11369 - }, - { - "epoch": 0.58, - "grad_norm": 4.437367487342895, - "learning_rate": 7.967893769307608e-06, - "loss": 0.1829, - "step": 11370 - }, - { - "epoch": 0.58, - "grad_norm": 1.0217015755872205, - "learning_rate": 7.966281192170607e-06, - "loss": 0.1721, - "step": 11371 - }, - { - "epoch": 0.58, - "grad_norm": 0.9469785199313246, - "learning_rate": 7.964668670198339e-06, - "loss": 0.1768, - "step": 11372 - }, - { - "epoch": 0.58, - "grad_norm": 0.8351899718021233, - "learning_rate": 7.963056203434552e-06, - "loss": 0.1773, - "step": 11373 - }, - { - "epoch": 0.58, - "grad_norm": 0.8950493743963949, - "learning_rate": 7.961443791922975e-06, - "loss": 0.1957, - "step": 11374 - }, - { - "epoch": 0.58, - "grad_norm": 0.5883478193156267, - "learning_rate": 7.959831435707357e-06, - "loss": 0.1735, - "step": 11375 - }, - { - "epoch": 0.58, - "grad_norm": 1.0007651701611708, - "learning_rate": 7.958219134831423e-06, - "loss": 0.1675, - "step": 11376 - }, - { - "epoch": 0.58, - "grad_norm": 1.183435973584841, - "learning_rate": 7.95660688933891e-06, - "loss": 0.161, - "step": 11377 - }, - { - "epoch": 0.58, - "grad_norm": 0.8598891102521062, - "learning_rate": 7.954994699273555e-06, - "loss": 0.1785, - "step": 11378 - }, - { - "epoch": 0.58, - "grad_norm": 1.0475770668796385, - "learning_rate": 7.953382564679078e-06, - "loss": 0.1632, - "step": 11379 - }, - { - "epoch": 0.58, - "grad_norm": 0.7656142930701677, - "learning_rate": 7.951770485599218e-06, - "loss": 0.1835, - "step": 11380 - }, - { - "epoch": 0.58, - "grad_norm": 0.7597755941350266, - "learning_rate": 7.950158462077697e-06, - "loss": 0.175, - "step": 11381 - }, - { - "epoch": 0.58, - "grad_norm": 1.1075597327894078, - "learning_rate": 7.948546494158247e-06, - "loss": 0.1563, - "step": 11382 - }, - { - "epoch": 0.58, - "grad_norm": 0.919981907436329, - "learning_rate": 7.946934581884585e-06, - "loss": 0.1861, - "step": 11383 - }, - { - "epoch": 0.58, - "grad_norm": 0.8663663674300841, - "learning_rate": 7.945322725300444e-06, - "loss": 0.1851, - "step": 11384 - }, - { - "epoch": 0.58, - "grad_norm": 1.0200009715386287, - "learning_rate": 7.943710924449535e-06, - "loss": 0.1677, - "step": 11385 - }, - { - "epoch": 0.58, - "grad_norm": 0.7627219820305977, - "learning_rate": 7.942099179375585e-06, - "loss": 0.1654, - "step": 11386 - }, - { - "epoch": 0.58, - "grad_norm": 1.0960879832055765, - "learning_rate": 7.940487490122309e-06, - "loss": 0.1573, - "step": 11387 - }, - { - "epoch": 0.58, - "grad_norm": 0.9655957129166881, - "learning_rate": 7.93887585673343e-06, - "loss": 0.1622, - "step": 11388 - }, - { - "epoch": 0.58, - "grad_norm": 1.0674479446431646, - "learning_rate": 7.937264279252657e-06, - "loss": 0.1907, - "step": 11389 - }, - { - "epoch": 0.58, - "grad_norm": 0.7380337556095736, - "learning_rate": 7.935652757723709e-06, - "loss": 0.1808, - "step": 11390 - }, - { - "epoch": 0.58, - "grad_norm": 1.027981513065775, - "learning_rate": 7.934041292190293e-06, - "loss": 0.2013, - "step": 11391 - }, - { - "epoch": 0.58, - "grad_norm": 0.7875171759545012, - "learning_rate": 7.93242988269613e-06, - "loss": 0.1519, - "step": 11392 - }, - { - "epoch": 0.58, - "grad_norm": 1.210993758514946, - "learning_rate": 7.930818529284917e-06, - "loss": 0.1916, - "step": 11393 - }, - { - "epoch": 0.58, - "grad_norm": 0.7811719689055883, - "learning_rate": 7.92920723200037e-06, - "loss": 0.1937, - "step": 11394 - }, - { - "epoch": 0.58, - "grad_norm": 0.8521424976568306, - "learning_rate": 7.927595990886194e-06, - "loss": 0.1843, - "step": 11395 - }, - { - "epoch": 0.58, - "grad_norm": 1.190810739178779, - "learning_rate": 7.925984805986096e-06, - "loss": 0.1903, - "step": 11396 - }, - { - "epoch": 0.58, - "grad_norm": 0.8073776952125132, - "learning_rate": 7.924373677343778e-06, - "loss": 0.1905, - "step": 11397 - }, - { - "epoch": 0.58, - "grad_norm": 0.9438162305535253, - "learning_rate": 7.922762605002938e-06, - "loss": 0.1794, - "step": 11398 - }, - { - "epoch": 0.58, - "grad_norm": 1.0413964995074405, - "learning_rate": 7.92115158900728e-06, - "loss": 0.1932, - "step": 11399 - }, - { - "epoch": 0.58, - "grad_norm": 0.8659254048488056, - "learning_rate": 7.9195406294005e-06, - "loss": 0.1756, - "step": 11400 - }, - { - "epoch": 0.58, - "grad_norm": 0.8148011974376181, - "learning_rate": 7.917929726226305e-06, - "loss": 0.1778, - "step": 11401 - }, - { - "epoch": 0.58, - "grad_norm": 0.8350748355467905, - "learning_rate": 7.916318879528377e-06, - "loss": 0.1828, - "step": 11402 - }, - { - "epoch": 0.58, - "grad_norm": 1.0380626632884626, - "learning_rate": 7.91470808935042e-06, - "loss": 0.1881, - "step": 11403 - }, - { - "epoch": 0.58, - "grad_norm": 0.924415556504601, - "learning_rate": 7.913097355736122e-06, - "loss": 0.1809, - "step": 11404 - }, - { - "epoch": 0.58, - "grad_norm": 0.7720068220086019, - "learning_rate": 7.91148667872918e-06, - "loss": 0.1791, - "step": 11405 - }, - { - "epoch": 0.58, - "grad_norm": 0.8069146836895251, - "learning_rate": 7.909876058373275e-06, - "loss": 0.1721, - "step": 11406 - }, - { - "epoch": 0.58, - "grad_norm": 0.7883066391198847, - "learning_rate": 7.908265494712105e-06, - "loss": 0.2014, - "step": 11407 - }, - { - "epoch": 0.58, - "grad_norm": 1.8683896446322161, - "learning_rate": 7.906654987789346e-06, - "loss": 0.1462, - "step": 11408 - }, - { - "epoch": 0.58, - "grad_norm": 0.7508193646444697, - "learning_rate": 7.905044537648693e-06, - "loss": 0.1703, - "step": 11409 - }, - { - "epoch": 0.58, - "grad_norm": 1.5055920321230043, - "learning_rate": 7.903434144333824e-06, - "loss": 0.1907, - "step": 11410 - }, - { - "epoch": 0.58, - "grad_norm": 0.888206103381393, - "learning_rate": 7.901823807888423e-06, - "loss": 0.1638, - "step": 11411 - }, - { - "epoch": 0.58, - "grad_norm": 0.7553488596562018, - "learning_rate": 7.900213528356167e-06, - "loss": 0.1638, - "step": 11412 - }, - { - "epoch": 0.58, - "grad_norm": 1.0805557840987123, - "learning_rate": 7.898603305780741e-06, - "loss": 0.1854, - "step": 11413 - }, - { - "epoch": 0.58, - "grad_norm": 0.9065778378349452, - "learning_rate": 7.89699314020582e-06, - "loss": 0.1778, - "step": 11414 - }, - { - "epoch": 0.58, - "grad_norm": 0.9282067156258447, - "learning_rate": 7.895383031675074e-06, - "loss": 0.1867, - "step": 11415 - }, - { - "epoch": 0.58, - "grad_norm": 0.9695888508795255, - "learning_rate": 7.893772980232186e-06, - "loss": 0.1935, - "step": 11416 - }, - { - "epoch": 0.58, - "grad_norm": 0.8176310094625524, - "learning_rate": 7.89216298592082e-06, - "loss": 0.1776, - "step": 11417 - }, - { - "epoch": 0.58, - "grad_norm": 0.8476605704866567, - "learning_rate": 7.89055304878466e-06, - "loss": 0.1779, - "step": 11418 - }, - { - "epoch": 0.58, - "grad_norm": 0.7864485731998299, - "learning_rate": 7.88894316886736e-06, - "loss": 0.1586, - "step": 11419 - }, - { - "epoch": 0.58, - "grad_norm": 0.8945343470837589, - "learning_rate": 7.8873333462126e-06, - "loss": 0.1903, - "step": 11420 - }, - { - "epoch": 0.58, - "grad_norm": 1.0553523338334387, - "learning_rate": 7.885723580864039e-06, - "loss": 0.1942, - "step": 11421 - }, - { - "epoch": 0.58, - "grad_norm": 1.4852096499603038, - "learning_rate": 7.884113872865352e-06, - "loss": 0.1909, - "step": 11422 - }, - { - "epoch": 0.58, - "grad_norm": 2.918085514806113, - "learning_rate": 7.882504222260187e-06, - "loss": 0.2025, - "step": 11423 - }, - { - "epoch": 0.58, - "grad_norm": 0.7621380179939279, - "learning_rate": 7.880894629092222e-06, - "loss": 0.1904, - "step": 11424 - }, - { - "epoch": 0.58, - "grad_norm": 0.8141253588089128, - "learning_rate": 7.879285093405105e-06, - "loss": 0.198, - "step": 11425 - }, - { - "epoch": 0.58, - "grad_norm": 0.9091609362931211, - "learning_rate": 7.877675615242502e-06, - "loss": 0.1942, - "step": 11426 - }, - { - "epoch": 0.58, - "grad_norm": 0.779397878754563, - "learning_rate": 7.876066194648066e-06, - "loss": 0.1855, - "step": 11427 - }, - { - "epoch": 0.58, - "grad_norm": 0.894861345972049, - "learning_rate": 7.874456831665457e-06, - "loss": 0.1745, - "step": 11428 - }, - { - "epoch": 0.58, - "grad_norm": 0.8408203658866416, - "learning_rate": 7.872847526338324e-06, - "loss": 0.1742, - "step": 11429 - }, - { - "epoch": 0.58, - "grad_norm": 0.811850660399226, - "learning_rate": 7.871238278710322e-06, - "loss": 0.1826, - "step": 11430 - }, - { - "epoch": 0.58, - "grad_norm": 0.8983352155401033, - "learning_rate": 7.869629088825105e-06, - "loss": 0.187, - "step": 11431 - }, - { - "epoch": 0.58, - "grad_norm": 0.727772267058997, - "learning_rate": 7.868019956726318e-06, - "loss": 0.1926, - "step": 11432 - }, - { - "epoch": 0.58, - "grad_norm": 0.9780031004620156, - "learning_rate": 7.866410882457609e-06, - "loss": 0.1567, - "step": 11433 - }, - { - "epoch": 0.58, - "grad_norm": 0.9214707777852454, - "learning_rate": 7.864801866062624e-06, - "loss": 0.1843, - "step": 11434 - }, - { - "epoch": 0.58, - "grad_norm": 1.1192018977233658, - "learning_rate": 7.863192907585013e-06, - "loss": 0.1722, - "step": 11435 - }, - { - "epoch": 0.58, - "grad_norm": 0.7960680607754248, - "learning_rate": 7.861584007068411e-06, - "loss": 0.1832, - "step": 11436 - }, - { - "epoch": 0.58, - "grad_norm": 0.9839711925450026, - "learning_rate": 7.859975164556468e-06, - "loss": 0.17, - "step": 11437 - }, - { - "epoch": 0.58, - "grad_norm": 1.0211951625381592, - "learning_rate": 7.858366380092814e-06, - "loss": 0.1893, - "step": 11438 - }, - { - "epoch": 0.58, - "grad_norm": 2.425161321405098, - "learning_rate": 7.856757653721097e-06, - "loss": 0.1898, - "step": 11439 - }, - { - "epoch": 0.58, - "grad_norm": 0.8515906228888953, - "learning_rate": 7.855148985484946e-06, - "loss": 0.1855, - "step": 11440 - }, - { - "epoch": 0.58, - "grad_norm": 0.8177327295630341, - "learning_rate": 7.853540375428006e-06, - "loss": 0.1662, - "step": 11441 - }, - { - "epoch": 0.58, - "grad_norm": 0.8708247841613003, - "learning_rate": 7.851931823593897e-06, - "loss": 0.1933, - "step": 11442 - }, - { - "epoch": 0.58, - "grad_norm": 0.7436403301338063, - "learning_rate": 7.850323330026264e-06, - "loss": 0.1819, - "step": 11443 - }, - { - "epoch": 0.58, - "grad_norm": 0.8529954048960217, - "learning_rate": 7.848714894768729e-06, - "loss": 0.1752, - "step": 11444 - }, - { - "epoch": 0.58, - "grad_norm": 0.86165304634647, - "learning_rate": 7.847106517864927e-06, - "loss": 0.1675, - "step": 11445 - }, - { - "epoch": 0.58, - "grad_norm": 0.795950397385831, - "learning_rate": 7.84549819935848e-06, - "loss": 0.1692, - "step": 11446 - }, - { - "epoch": 0.58, - "grad_norm": 0.8796257453564453, - "learning_rate": 7.843889939293017e-06, - "loss": 0.1604, - "step": 11447 - }, - { - "epoch": 0.58, - "grad_norm": 0.8178018265488591, - "learning_rate": 7.842281737712164e-06, - "loss": 0.1763, - "step": 11448 - }, - { - "epoch": 0.58, - "grad_norm": 1.2412486625603991, - "learning_rate": 7.840673594659535e-06, - "loss": 0.1635, - "step": 11449 - }, - { - "epoch": 0.58, - "grad_norm": 5.138558639655599, - "learning_rate": 7.839065510178763e-06, - "loss": 0.151, - "step": 11450 - }, - { - "epoch": 0.58, - "grad_norm": 0.8575207245711235, - "learning_rate": 7.837457484313452e-06, - "loss": 0.1722, - "step": 11451 - }, - { - "epoch": 0.58, - "grad_norm": 1.1377203906398663, - "learning_rate": 7.835849517107237e-06, - "loss": 0.1826, - "step": 11452 - }, - { - "epoch": 0.58, - "grad_norm": 0.9724400883078111, - "learning_rate": 7.834241608603722e-06, - "loss": 0.1759, - "step": 11453 - }, - { - "epoch": 0.58, - "grad_norm": 3.823551349347765, - "learning_rate": 7.83263375884653e-06, - "loss": 0.1969, - "step": 11454 - }, - { - "epoch": 0.58, - "grad_norm": 0.8901574635583533, - "learning_rate": 7.831025967879265e-06, - "loss": 0.1827, - "step": 11455 - }, - { - "epoch": 0.58, - "grad_norm": 0.9064639578916673, - "learning_rate": 7.829418235745547e-06, - "loss": 0.1743, - "step": 11456 - }, - { - "epoch": 0.58, - "grad_norm": 1.1197538740125437, - "learning_rate": 7.827810562488978e-06, - "loss": 0.1735, - "step": 11457 - }, - { - "epoch": 0.58, - "grad_norm": 0.7804009424828282, - "learning_rate": 7.826202948153174e-06, - "loss": 0.1559, - "step": 11458 - }, - { - "epoch": 0.58, - "grad_norm": 0.784302834791359, - "learning_rate": 7.824595392781735e-06, - "loss": 0.1955, - "step": 11459 - }, - { - "epoch": 0.58, - "grad_norm": 0.955136017244413, - "learning_rate": 7.822987896418269e-06, - "loss": 0.1802, - "step": 11460 - }, - { - "epoch": 0.58, - "grad_norm": 1.11692735362752, - "learning_rate": 7.821380459106379e-06, - "loss": 0.1784, - "step": 11461 - }, - { - "epoch": 0.58, - "grad_norm": 0.686381091202176, - "learning_rate": 7.81977308088967e-06, - "loss": 0.1852, - "step": 11462 - }, - { - "epoch": 0.58, - "grad_norm": 0.9285742766920333, - "learning_rate": 7.818165761811736e-06, - "loss": 0.1724, - "step": 11463 - }, - { - "epoch": 0.58, - "grad_norm": 1.5463402825839065, - "learning_rate": 7.81655850191618e-06, - "loss": 0.1889, - "step": 11464 - }, - { - "epoch": 0.58, - "grad_norm": 0.8834470298152093, - "learning_rate": 7.814951301246597e-06, - "loss": 0.2023, - "step": 11465 - }, - { - "epoch": 0.58, - "grad_norm": 0.9091701075412179, - "learning_rate": 7.813344159846588e-06, - "loss": 0.1946, - "step": 11466 - }, - { - "epoch": 0.58, - "grad_norm": 0.8394475643175278, - "learning_rate": 7.811737077759742e-06, - "loss": 0.1687, - "step": 11467 - }, - { - "epoch": 0.58, - "grad_norm": 0.6724614209465645, - "learning_rate": 7.810130055029646e-06, - "loss": 0.1806, - "step": 11468 - }, - { - "epoch": 0.58, - "grad_norm": 0.9378148453117767, - "learning_rate": 7.808523091699898e-06, - "loss": 0.1773, - "step": 11469 - }, - { - "epoch": 0.58, - "grad_norm": 2.5997825594312163, - "learning_rate": 7.806916187814084e-06, - "loss": 0.1728, - "step": 11470 - }, - { - "epoch": 0.58, - "grad_norm": 0.7187489821039615, - "learning_rate": 7.805309343415796e-06, - "loss": 0.1724, - "step": 11471 - }, - { - "epoch": 0.58, - "grad_norm": 0.7144109041965934, - "learning_rate": 7.803702558548611e-06, - "loss": 0.1796, - "step": 11472 - }, - { - "epoch": 0.58, - "grad_norm": 0.9252592793590034, - "learning_rate": 7.802095833256121e-06, - "loss": 0.2001, - "step": 11473 - }, - { - "epoch": 0.58, - "grad_norm": 0.786306724546287, - "learning_rate": 7.800489167581903e-06, - "loss": 0.1819, - "step": 11474 - }, - { - "epoch": 0.58, - "grad_norm": 0.9248066516176838, - "learning_rate": 7.798882561569546e-06, - "loss": 0.1855, - "step": 11475 - }, - { - "epoch": 0.58, - "grad_norm": 1.1746878367894527, - "learning_rate": 7.797276015262619e-06, - "loss": 0.1822, - "step": 11476 - }, - { - "epoch": 0.58, - "grad_norm": 0.9795575556369396, - "learning_rate": 7.795669528704707e-06, - "loss": 0.1721, - "step": 11477 - }, - { - "epoch": 0.58, - "grad_norm": 0.9468774083981828, - "learning_rate": 7.794063101939381e-06, - "loss": 0.1758, - "step": 11478 - }, - { - "epoch": 0.58, - "grad_norm": 0.8166886570894248, - "learning_rate": 7.792456735010223e-06, - "loss": 0.1815, - "step": 11479 - }, - { - "epoch": 0.58, - "grad_norm": 0.9671715919770977, - "learning_rate": 7.790850427960795e-06, - "loss": 0.201, - "step": 11480 - }, - { - "epoch": 0.58, - "grad_norm": 1.357756688548844, - "learning_rate": 7.789244180834679e-06, - "loss": 0.1946, - "step": 11481 - }, - { - "epoch": 0.58, - "grad_norm": 0.8662586324346514, - "learning_rate": 7.787637993675434e-06, - "loss": 0.1871, - "step": 11482 - }, - { - "epoch": 0.58, - "grad_norm": 1.066321292841425, - "learning_rate": 7.786031866526636e-06, - "loss": 0.193, - "step": 11483 - }, - { - "epoch": 0.58, - "grad_norm": 0.8948737952595948, - "learning_rate": 7.784425799431852e-06, - "loss": 0.1686, - "step": 11484 - }, - { - "epoch": 0.58, - "grad_norm": 1.1269647961952738, - "learning_rate": 7.782819792434638e-06, - "loss": 0.174, - "step": 11485 - }, - { - "epoch": 0.58, - "grad_norm": 1.2486855535449515, - "learning_rate": 7.781213845578564e-06, - "loss": 0.181, - "step": 11486 - }, - { - "epoch": 0.58, - "grad_norm": 1.2255911988677148, - "learning_rate": 7.779607958907189e-06, - "loss": 0.2061, - "step": 11487 - }, - { - "epoch": 0.58, - "grad_norm": 0.6697046944408509, - "learning_rate": 7.778002132464077e-06, - "loss": 0.1637, - "step": 11488 - }, - { - "epoch": 0.58, - "grad_norm": 1.1222438318590247, - "learning_rate": 7.77639636629278e-06, - "loss": 0.2002, - "step": 11489 - }, - { - "epoch": 0.58, - "grad_norm": 0.8560548327618978, - "learning_rate": 7.774790660436857e-06, - "loss": 0.1879, - "step": 11490 - }, - { - "epoch": 0.58, - "grad_norm": 1.0547481846929991, - "learning_rate": 7.773185014939863e-06, - "loss": 0.2034, - "step": 11491 - }, - { - "epoch": 0.58, - "grad_norm": 0.9656593366959516, - "learning_rate": 7.771579429845353e-06, - "loss": 0.1883, - "step": 11492 - }, - { - "epoch": 0.58, - "grad_norm": 0.933937310893644, - "learning_rate": 7.769973905196875e-06, - "loss": 0.1685, - "step": 11493 - }, - { - "epoch": 0.58, - "grad_norm": 1.0471187930082608, - "learning_rate": 7.768368441037983e-06, - "loss": 0.1507, - "step": 11494 - }, - { - "epoch": 0.58, - "grad_norm": 0.8376089379719395, - "learning_rate": 7.766763037412219e-06, - "loss": 0.1825, - "step": 11495 - }, - { - "epoch": 0.58, - "grad_norm": 0.9080888688549174, - "learning_rate": 7.765157694363138e-06, - "loss": 0.2009, - "step": 11496 - }, - { - "epoch": 0.58, - "grad_norm": 0.8024492902468562, - "learning_rate": 7.763552411934277e-06, - "loss": 0.1785, - "step": 11497 - }, - { - "epoch": 0.58, - "grad_norm": 0.7860320019469207, - "learning_rate": 7.761947190169188e-06, - "loss": 0.1718, - "step": 11498 - }, - { - "epoch": 0.58, - "grad_norm": 1.0387294443548292, - "learning_rate": 7.760342029111403e-06, - "loss": 0.1829, - "step": 11499 - }, - { - "epoch": 0.58, - "grad_norm": 0.9896381467940631, - "learning_rate": 7.758736928804469e-06, - "loss": 0.1924, - "step": 11500 - }, - { - "epoch": 0.58, - "grad_norm": 1.0018665717164907, - "learning_rate": 7.757131889291925e-06, - "loss": 0.1735, - "step": 11501 - }, - { - "epoch": 0.58, - "grad_norm": 0.9894239106183639, - "learning_rate": 7.7555269106173e-06, - "loss": 0.1751, - "step": 11502 - }, - { - "epoch": 0.58, - "grad_norm": 0.8985708137642188, - "learning_rate": 7.753921992824139e-06, - "loss": 0.1964, - "step": 11503 - }, - { - "epoch": 0.58, - "grad_norm": 1.1661604244942374, - "learning_rate": 7.752317135955966e-06, - "loss": 0.1713, - "step": 11504 - }, - { - "epoch": 0.59, - "grad_norm": 0.8612006619090127, - "learning_rate": 7.750712340056323e-06, - "loss": 0.1987, - "step": 11505 - }, - { - "epoch": 0.59, - "grad_norm": 0.7680521604795602, - "learning_rate": 7.74910760516873e-06, - "loss": 0.1889, - "step": 11506 - }, - { - "epoch": 0.59, - "grad_norm": 0.9533786515303825, - "learning_rate": 7.747502931336726e-06, - "loss": 0.1727, - "step": 11507 - }, - { - "epoch": 0.59, - "grad_norm": 0.7931110934670622, - "learning_rate": 7.745898318603826e-06, - "loss": 0.1815, - "step": 11508 - }, - { - "epoch": 0.59, - "grad_norm": 1.9576609653751136, - "learning_rate": 7.744293767013564e-06, - "loss": 0.1833, - "step": 11509 - }, - { - "epoch": 0.59, - "grad_norm": 0.9853274013225376, - "learning_rate": 7.742689276609459e-06, - "loss": 0.1607, - "step": 11510 - }, - { - "epoch": 0.59, - "grad_norm": 0.8886805308841748, - "learning_rate": 7.74108484743504e-06, - "loss": 0.1788, - "step": 11511 - }, - { - "epoch": 0.59, - "grad_norm": 0.9910320281706113, - "learning_rate": 7.739480479533818e-06, - "loss": 0.1914, - "step": 11512 - }, - { - "epoch": 0.59, - "grad_norm": 0.9545227317871207, - "learning_rate": 7.737876172949317e-06, - "loss": 0.1613, - "step": 11513 - }, - { - "epoch": 0.59, - "grad_norm": 0.7746215432805033, - "learning_rate": 7.73627192772505e-06, - "loss": 0.1784, - "step": 11514 - }, - { - "epoch": 0.59, - "grad_norm": 1.1258108748177553, - "learning_rate": 7.73466774390454e-06, - "loss": 0.1493, - "step": 11515 - }, - { - "epoch": 0.59, - "grad_norm": 1.1555646260024954, - "learning_rate": 7.73306362153129e-06, - "loss": 0.1875, - "step": 11516 - }, - { - "epoch": 0.59, - "grad_norm": 1.0687324728634058, - "learning_rate": 7.73145956064882e-06, - "loss": 0.1546, - "step": 11517 - }, - { - "epoch": 0.59, - "grad_norm": 1.106528469273153, - "learning_rate": 7.72985556130064e-06, - "loss": 0.1887, - "step": 11518 - }, - { - "epoch": 0.59, - "grad_norm": 0.837873803762906, - "learning_rate": 7.728251623530253e-06, - "loss": 0.1703, - "step": 11519 - }, - { - "epoch": 0.59, - "grad_norm": 0.9792314296285167, - "learning_rate": 7.726647747381171e-06, - "loss": 0.1769, - "step": 11520 - }, - { - "epoch": 0.59, - "grad_norm": 1.036453018636925, - "learning_rate": 7.725043932896895e-06, - "loss": 0.1763, - "step": 11521 - }, - { - "epoch": 0.59, - "grad_norm": 1.6120548814324298, - "learning_rate": 7.723440180120932e-06, - "loss": 0.1884, - "step": 11522 - }, - { - "epoch": 0.59, - "grad_norm": 0.952343846906782, - "learning_rate": 7.72183648909678e-06, - "loss": 0.1594, - "step": 11523 - }, - { - "epoch": 0.59, - "grad_norm": 0.8464787482722366, - "learning_rate": 7.720232859867946e-06, - "loss": 0.1709, - "step": 11524 - }, - { - "epoch": 0.59, - "grad_norm": 0.84795764242471, - "learning_rate": 7.71862929247792e-06, - "loss": 0.1867, - "step": 11525 - }, - { - "epoch": 0.59, - "grad_norm": 1.0405235420112218, - "learning_rate": 7.717025786970208e-06, - "loss": 0.176, - "step": 11526 - }, - { - "epoch": 0.59, - "grad_norm": 1.781721864117291, - "learning_rate": 7.715422343388296e-06, - "loss": 0.1801, - "step": 11527 - }, - { - "epoch": 0.59, - "grad_norm": 0.9009036342142364, - "learning_rate": 7.713818961775686e-06, - "loss": 0.1775, - "step": 11528 - }, - { - "epoch": 0.59, - "grad_norm": 0.889676087190506, - "learning_rate": 7.712215642175862e-06, - "loss": 0.1806, - "step": 11529 - }, - { - "epoch": 0.59, - "grad_norm": 1.537611419687043, - "learning_rate": 7.710612384632321e-06, - "loss": 0.1986, - "step": 11530 - }, - { - "epoch": 0.59, - "grad_norm": 0.9885073435263368, - "learning_rate": 7.709009189188546e-06, - "loss": 0.2099, - "step": 11531 - }, - { - "epoch": 0.59, - "grad_norm": 1.157845104013614, - "learning_rate": 7.70740605588803e-06, - "loss": 0.1774, - "step": 11532 - }, - { - "epoch": 0.59, - "grad_norm": 1.5695577270079757, - "learning_rate": 7.70580298477425e-06, - "loss": 0.1729, - "step": 11533 - }, - { - "epoch": 0.59, - "grad_norm": 0.7935269584611251, - "learning_rate": 7.704199975890698e-06, - "loss": 0.1811, - "step": 11534 - }, - { - "epoch": 0.59, - "grad_norm": 1.302641620541749, - "learning_rate": 7.702597029280848e-06, - "loss": 0.1637, - "step": 11535 - }, - { - "epoch": 0.59, - "grad_norm": 1.0267924968478008, - "learning_rate": 7.700994144988183e-06, - "loss": 0.1992, - "step": 11536 - }, - { - "epoch": 0.59, - "grad_norm": 1.0705791649139917, - "learning_rate": 7.699391323056184e-06, - "loss": 0.1867, - "step": 11537 - }, - { - "epoch": 0.59, - "grad_norm": 1.0443080061831171, - "learning_rate": 7.697788563528323e-06, - "loss": 0.1743, - "step": 11538 - }, - { - "epoch": 0.59, - "grad_norm": 4.91455270222627, - "learning_rate": 7.696185866448079e-06, - "loss": 0.1781, - "step": 11539 - }, - { - "epoch": 0.59, - "grad_norm": 0.9676560323614188, - "learning_rate": 7.694583231858921e-06, - "loss": 0.1898, - "step": 11540 - }, - { - "epoch": 0.59, - "grad_norm": 0.8959806001854843, - "learning_rate": 7.692980659804327e-06, - "loss": 0.1763, - "step": 11541 - }, - { - "epoch": 0.59, - "grad_norm": 1.0203472241064597, - "learning_rate": 7.691378150327759e-06, - "loss": 0.1764, - "step": 11542 - }, - { - "epoch": 0.59, - "grad_norm": 1.1974668086090456, - "learning_rate": 7.689775703472691e-06, - "loss": 0.1866, - "step": 11543 - }, - { - "epoch": 0.59, - "grad_norm": 1.7475706747831443, - "learning_rate": 7.688173319282586e-06, - "loss": 0.1976, - "step": 11544 - }, - { - "epoch": 0.59, - "grad_norm": 0.935656524326181, - "learning_rate": 7.686570997800914e-06, - "loss": 0.1605, - "step": 11545 - }, - { - "epoch": 0.59, - "grad_norm": 0.8084136464393837, - "learning_rate": 7.68496873907113e-06, - "loss": 0.2016, - "step": 11546 - }, - { - "epoch": 0.59, - "grad_norm": 0.9290286115022512, - "learning_rate": 7.683366543136703e-06, - "loss": 0.1894, - "step": 11547 - }, - { - "epoch": 0.59, - "grad_norm": 1.4974991685353187, - "learning_rate": 7.681764410041087e-06, - "loss": 0.1957, - "step": 11548 - }, - { - "epoch": 0.59, - "grad_norm": 0.8232639356455898, - "learning_rate": 7.680162339827744e-06, - "loss": 0.1841, - "step": 11549 - }, - { - "epoch": 0.59, - "grad_norm": 0.9511264667655934, - "learning_rate": 7.678560332540126e-06, - "loss": 0.1775, - "step": 11550 - }, - { - "epoch": 0.59, - "grad_norm": 0.9769977386312791, - "learning_rate": 7.676958388221693e-06, - "loss": 0.1797, - "step": 11551 - }, - { - "epoch": 0.59, - "grad_norm": 1.167911556068729, - "learning_rate": 7.675356506915892e-06, - "loss": 0.1962, - "step": 11552 - }, - { - "epoch": 0.59, - "grad_norm": 0.8463201079565178, - "learning_rate": 7.67375468866618e-06, - "loss": 0.1771, - "step": 11553 - }, - { - "epoch": 0.59, - "grad_norm": 1.2272626867654062, - "learning_rate": 7.672152933516005e-06, - "loss": 0.1973, - "step": 11554 - }, - { - "epoch": 0.59, - "grad_norm": 2.094380301002933, - "learning_rate": 7.670551241508809e-06, - "loss": 0.1678, - "step": 11555 - }, - { - "epoch": 0.59, - "grad_norm": 0.915630672766507, - "learning_rate": 7.668949612688044e-06, - "loss": 0.1837, - "step": 11556 - }, - { - "epoch": 0.59, - "grad_norm": 0.7441259642618644, - "learning_rate": 7.667348047097151e-06, - "loss": 0.1744, - "step": 11557 - }, - { - "epoch": 0.59, - "grad_norm": 0.761767486300642, - "learning_rate": 7.665746544779577e-06, - "loss": 0.172, - "step": 11558 - }, - { - "epoch": 0.59, - "grad_norm": 0.6978167876222314, - "learning_rate": 7.664145105778755e-06, - "loss": 0.1646, - "step": 11559 - }, - { - "epoch": 0.59, - "grad_norm": 0.8980322487996942, - "learning_rate": 7.662543730138136e-06, - "loss": 0.1795, - "step": 11560 - }, - { - "epoch": 0.59, - "grad_norm": 1.1376196282122772, - "learning_rate": 7.660942417901145e-06, - "loss": 0.1729, - "step": 11561 - }, - { - "epoch": 0.59, - "grad_norm": 1.372232682551164, - "learning_rate": 7.659341169111222e-06, - "loss": 0.1562, - "step": 11562 - }, - { - "epoch": 0.59, - "grad_norm": 0.9663702113761, - "learning_rate": 7.657739983811803e-06, - "loss": 0.2076, - "step": 11563 - }, - { - "epoch": 0.59, - "grad_norm": 1.0129999221068482, - "learning_rate": 7.656138862046323e-06, - "loss": 0.1767, - "step": 11564 - }, - { - "epoch": 0.59, - "grad_norm": 0.9004518055692233, - "learning_rate": 7.654537803858205e-06, - "loss": 0.186, - "step": 11565 - }, - { - "epoch": 0.59, - "grad_norm": 0.8210445742427821, - "learning_rate": 7.652936809290883e-06, - "loss": 0.2024, - "step": 11566 - }, - { - "epoch": 0.59, - "grad_norm": 0.9394699808210847, - "learning_rate": 7.65133587838778e-06, - "loss": 0.1847, - "step": 11567 - }, - { - "epoch": 0.59, - "grad_norm": 0.9609354948526633, - "learning_rate": 7.649735011192329e-06, - "loss": 0.1912, - "step": 11568 - }, - { - "epoch": 0.59, - "grad_norm": 1.654273035810828, - "learning_rate": 7.648134207747944e-06, - "loss": 0.1791, - "step": 11569 - }, - { - "epoch": 0.59, - "grad_norm": 0.8138671952721602, - "learning_rate": 7.646533468098054e-06, - "loss": 0.1994, - "step": 11570 - }, - { - "epoch": 0.59, - "grad_norm": 0.9362668301075935, - "learning_rate": 7.644932792286078e-06, - "loss": 0.2046, - "step": 11571 - }, - { - "epoch": 0.59, - "grad_norm": 0.8749552721286621, - "learning_rate": 7.64333218035543e-06, - "loss": 0.1914, - "step": 11572 - }, - { - "epoch": 0.59, - "grad_norm": 0.8649832225609275, - "learning_rate": 7.64173163234953e-06, - "loss": 0.1579, - "step": 11573 - }, - { - "epoch": 0.59, - "grad_norm": 1.171430802862261, - "learning_rate": 7.640131148311791e-06, - "loss": 0.19, - "step": 11574 - }, - { - "epoch": 0.59, - "grad_norm": 1.2259477037304825, - "learning_rate": 7.638530728285633e-06, - "loss": 0.1808, - "step": 11575 - }, - { - "epoch": 0.59, - "grad_norm": 1.4841824061106357, - "learning_rate": 7.636930372314457e-06, - "loss": 0.19, - "step": 11576 - }, - { - "epoch": 0.59, - "grad_norm": 0.9639021119081354, - "learning_rate": 7.635330080441684e-06, - "loss": 0.1811, - "step": 11577 - }, - { - "epoch": 0.59, - "grad_norm": 0.9701950679898839, - "learning_rate": 7.633729852710711e-06, - "loss": 0.1714, - "step": 11578 - }, - { - "epoch": 0.59, - "grad_norm": 1.0768229910505707, - "learning_rate": 7.632129689164951e-06, - "loss": 0.2228, - "step": 11579 - }, - { - "epoch": 0.59, - "grad_norm": 1.3353356856633012, - "learning_rate": 7.630529589847807e-06, - "loss": 0.1921, - "step": 11580 - }, - { - "epoch": 0.59, - "grad_norm": 1.3622609087993482, - "learning_rate": 7.628929554802683e-06, - "loss": 0.1911, - "step": 11581 - }, - { - "epoch": 0.59, - "grad_norm": 0.8975720323589307, - "learning_rate": 7.627329584072976e-06, - "loss": 0.1796, - "step": 11582 - }, - { - "epoch": 0.59, - "grad_norm": 0.9448305446099222, - "learning_rate": 7.625729677702089e-06, - "loss": 0.197, - "step": 11583 - }, - { - "epoch": 0.59, - "grad_norm": 1.414447297300965, - "learning_rate": 7.624129835733418e-06, - "loss": 0.1847, - "step": 11584 - }, - { - "epoch": 0.59, - "grad_norm": 1.1403047850573143, - "learning_rate": 7.622530058210363e-06, - "loss": 0.1705, - "step": 11585 - }, - { - "epoch": 0.59, - "grad_norm": 0.7769183214743727, - "learning_rate": 7.62093034517631e-06, - "loss": 0.1736, - "step": 11586 - }, - { - "epoch": 0.59, - "grad_norm": 0.8832212847030111, - "learning_rate": 7.619330696674658e-06, - "loss": 0.1749, - "step": 11587 - }, - { - "epoch": 0.59, - "grad_norm": 1.0324852829353546, - "learning_rate": 7.6177311127487984e-06, - "loss": 0.17, - "step": 11588 - }, - { - "epoch": 0.59, - "grad_norm": 1.0803947078290193, - "learning_rate": 7.616131593442111e-06, - "loss": 0.1657, - "step": 11589 - }, - { - "epoch": 0.59, - "grad_norm": 1.3593432430994428, - "learning_rate": 7.614532138797994e-06, - "loss": 0.2016, - "step": 11590 - }, - { - "epoch": 0.59, - "grad_norm": 1.1694049472944021, - "learning_rate": 7.6129327488598225e-06, - "loss": 0.1627, - "step": 11591 - }, - { - "epoch": 0.59, - "grad_norm": 0.9088413784789771, - "learning_rate": 7.611333423670988e-06, - "loss": 0.184, - "step": 11592 - }, - { - "epoch": 0.59, - "grad_norm": 1.2524174806149122, - "learning_rate": 7.609734163274867e-06, - "loss": 0.1803, - "step": 11593 - }, - { - "epoch": 0.59, - "grad_norm": 0.8086586822961561, - "learning_rate": 7.608134967714846e-06, - "loss": 0.1657, - "step": 11594 - }, - { - "epoch": 0.59, - "grad_norm": 1.0821545601165399, - "learning_rate": 7.606535837034295e-06, - "loss": 0.1814, - "step": 11595 - }, - { - "epoch": 0.59, - "grad_norm": 1.1909757173673818, - "learning_rate": 7.604936771276596e-06, - "loss": 0.1925, - "step": 11596 - }, - { - "epoch": 0.59, - "grad_norm": 1.1089161855517706, - "learning_rate": 7.603337770485122e-06, - "loss": 0.2052, - "step": 11597 - }, - { - "epoch": 0.59, - "grad_norm": 0.9012722774072845, - "learning_rate": 7.601738834703249e-06, - "loss": 0.1869, - "step": 11598 - }, - { - "epoch": 0.59, - "grad_norm": 0.7300428580712354, - "learning_rate": 7.600139963974341e-06, - "loss": 0.1819, - "step": 11599 - }, - { - "epoch": 0.59, - "grad_norm": 0.9574724234455277, - "learning_rate": 7.598541158341774e-06, - "loss": 0.1799, - "step": 11600 - }, - { - "epoch": 0.59, - "grad_norm": 1.2387333692896372, - "learning_rate": 7.5969424178489134e-06, - "loss": 0.17, - "step": 11601 - }, - { - "epoch": 0.59, - "grad_norm": 0.907897336158818, - "learning_rate": 7.5953437425391284e-06, - "loss": 0.1887, - "step": 11602 - }, - { - "epoch": 0.59, - "grad_norm": 1.4548819371977963, - "learning_rate": 7.593745132455776e-06, - "loss": 0.1931, - "step": 11603 - }, - { - "epoch": 0.59, - "grad_norm": 1.0247766606758641, - "learning_rate": 7.592146587642227e-06, - "loss": 0.1961, - "step": 11604 - }, - { - "epoch": 0.59, - "grad_norm": 1.237579976507833, - "learning_rate": 7.5905481081418365e-06, - "loss": 0.1764, - "step": 11605 - }, - { - "epoch": 0.59, - "grad_norm": 0.9641963466945946, - "learning_rate": 7.588949693997962e-06, - "loss": 0.1806, - "step": 11606 - }, - { - "epoch": 0.59, - "grad_norm": 1.0322631441623458, - "learning_rate": 7.587351345253968e-06, - "loss": 0.1658, - "step": 11607 - }, - { - "epoch": 0.59, - "grad_norm": 0.8648225298624148, - "learning_rate": 7.585753061953199e-06, - "loss": 0.1559, - "step": 11608 - }, - { - "epoch": 0.59, - "grad_norm": 0.8683653108191068, - "learning_rate": 7.584154844139019e-06, - "loss": 0.1815, - "step": 11609 - }, - { - "epoch": 0.59, - "grad_norm": 2.291889450017652, - "learning_rate": 7.582556691854772e-06, - "loss": 0.1659, - "step": 11610 - }, - { - "epoch": 0.59, - "grad_norm": 1.074740180118149, - "learning_rate": 7.580958605143816e-06, - "loss": 0.1688, - "step": 11611 - }, - { - "epoch": 0.59, - "grad_norm": 1.8152071906384597, - "learning_rate": 7.579360584049489e-06, - "loss": 0.1947, - "step": 11612 - }, - { - "epoch": 0.59, - "grad_norm": 1.1801412874720607, - "learning_rate": 7.577762628615146e-06, - "loss": 0.1859, - "step": 11613 - }, - { - "epoch": 0.59, - "grad_norm": 1.034604716980372, - "learning_rate": 7.576164738884126e-06, - "loss": 0.1831, - "step": 11614 - }, - { - "epoch": 0.59, - "grad_norm": 0.9786064729118367, - "learning_rate": 7.574566914899779e-06, - "loss": 0.1892, - "step": 11615 - }, - { - "epoch": 0.59, - "grad_norm": 1.1607546685118113, - "learning_rate": 7.572969156705437e-06, - "loss": 0.1854, - "step": 11616 - }, - { - "epoch": 0.59, - "grad_norm": 0.8754173581044404, - "learning_rate": 7.571371464344448e-06, - "loss": 0.1663, - "step": 11617 - }, - { - "epoch": 0.59, - "grad_norm": 0.9708501178409704, - "learning_rate": 7.5697738378601406e-06, - "loss": 0.2012, - "step": 11618 - }, - { - "epoch": 0.59, - "grad_norm": 1.0361952174520443, - "learning_rate": 7.568176277295858e-06, - "loss": 0.1848, - "step": 11619 - }, - { - "epoch": 0.59, - "grad_norm": 1.31195992549576, - "learning_rate": 7.566578782694928e-06, - "loss": 0.1803, - "step": 11620 - }, - { - "epoch": 0.59, - "grad_norm": 1.0422363977834639, - "learning_rate": 7.564981354100691e-06, - "loss": 0.1711, - "step": 11621 - }, - { - "epoch": 0.59, - "grad_norm": 1.7208745670437828, - "learning_rate": 7.563383991556468e-06, - "loss": 0.1681, - "step": 11622 - }, - { - "epoch": 0.59, - "grad_norm": 1.4976767554787114, - "learning_rate": 7.561786695105593e-06, - "loss": 0.1692, - "step": 11623 - }, - { - "epoch": 0.59, - "grad_norm": 1.1242864380247015, - "learning_rate": 7.5601894647913955e-06, - "loss": 0.1749, - "step": 11624 - }, - { - "epoch": 0.59, - "grad_norm": 1.1508597414092376, - "learning_rate": 7.55859230065719e-06, - "loss": 0.165, - "step": 11625 - }, - { - "epoch": 0.59, - "grad_norm": 0.9724639392749773, - "learning_rate": 7.556995202746311e-06, - "loss": 0.1606, - "step": 11626 - }, - { - "epoch": 0.59, - "grad_norm": 0.7715827756096528, - "learning_rate": 7.555398171102072e-06, - "loss": 0.1771, - "step": 11627 - }, - { - "epoch": 0.59, - "grad_norm": 1.3122835713518486, - "learning_rate": 7.5538012057677985e-06, - "loss": 0.1746, - "step": 11628 - }, - { - "epoch": 0.59, - "grad_norm": 1.5803497435595275, - "learning_rate": 7.5522043067868034e-06, - "loss": 0.1704, - "step": 11629 - }, - { - "epoch": 0.59, - "grad_norm": 1.1103465528131011, - "learning_rate": 7.550607474202407e-06, - "loss": 0.1813, - "step": 11630 - }, - { - "epoch": 0.59, - "grad_norm": 0.998855733673789, - "learning_rate": 7.549010708057919e-06, - "loss": 0.1905, - "step": 11631 - }, - { - "epoch": 0.59, - "grad_norm": 2.5212565968715075, - "learning_rate": 7.5474140083966544e-06, - "loss": 0.1947, - "step": 11632 - }, - { - "epoch": 0.59, - "grad_norm": 0.973467654139691, - "learning_rate": 7.545817375261921e-06, - "loss": 0.1931, - "step": 11633 - }, - { - "epoch": 0.59, - "grad_norm": 0.941498013978441, - "learning_rate": 7.544220808697036e-06, - "loss": 0.1799, - "step": 11634 - }, - { - "epoch": 0.59, - "grad_norm": 0.7971826901881703, - "learning_rate": 7.542624308745294e-06, - "loss": 0.1502, - "step": 11635 - }, - { - "epoch": 0.59, - "grad_norm": 0.8618617274901741, - "learning_rate": 7.541027875450011e-06, - "loss": 0.1794, - "step": 11636 - }, - { - "epoch": 0.59, - "grad_norm": 0.8212372318307198, - "learning_rate": 7.539431508854482e-06, - "loss": 0.1771, - "step": 11637 - }, - { - "epoch": 0.59, - "grad_norm": 0.8450855924204931, - "learning_rate": 7.537835209002015e-06, - "loss": 0.1869, - "step": 11638 - }, - { - "epoch": 0.59, - "grad_norm": 2.1731059901473806, - "learning_rate": 7.536238975935906e-06, - "loss": 0.1832, - "step": 11639 - }, - { - "epoch": 0.59, - "grad_norm": 0.8971279390004603, - "learning_rate": 7.534642809699455e-06, - "loss": 0.1556, - "step": 11640 - }, - { - "epoch": 0.59, - "grad_norm": 1.279437219873534, - "learning_rate": 7.533046710335959e-06, - "loss": 0.2015, - "step": 11641 - }, - { - "epoch": 0.59, - "grad_norm": 1.3048828184889434, - "learning_rate": 7.531450677888706e-06, - "loss": 0.192, - "step": 11642 - }, - { - "epoch": 0.59, - "grad_norm": 1.0784834754808177, - "learning_rate": 7.529854712400996e-06, - "loss": 0.1835, - "step": 11643 - }, - { - "epoch": 0.59, - "grad_norm": 1.8167241931927995, - "learning_rate": 7.528258813916113e-06, - "loss": 0.175, - "step": 11644 - }, - { - "epoch": 0.59, - "grad_norm": 0.9433414492128455, - "learning_rate": 7.5266629824773506e-06, - "loss": 0.1663, - "step": 11645 - }, - { - "epoch": 0.59, - "grad_norm": 0.9127783252817138, - "learning_rate": 7.525067218127994e-06, - "loss": 0.1837, - "step": 11646 - }, - { - "epoch": 0.59, - "grad_norm": 0.8962957098263203, - "learning_rate": 7.523471520911332e-06, - "loss": 0.1634, - "step": 11647 - }, - { - "epoch": 0.59, - "grad_norm": 0.9029306025815423, - "learning_rate": 7.521875890870641e-06, - "loss": 0.1654, - "step": 11648 - }, - { - "epoch": 0.59, - "grad_norm": 1.472572365862214, - "learning_rate": 7.520280328049209e-06, - "loss": 0.1952, - "step": 11649 - }, - { - "epoch": 0.59, - "grad_norm": 0.8529342448124863, - "learning_rate": 7.518684832490311e-06, - "loss": 0.1789, - "step": 11650 - }, - { - "epoch": 0.59, - "grad_norm": 1.0128886629264706, - "learning_rate": 7.51708940423723e-06, - "loss": 0.1848, - "step": 11651 - }, - { - "epoch": 0.59, - "grad_norm": 0.8367934406403794, - "learning_rate": 7.5154940433332354e-06, - "loss": 0.1677, - "step": 11652 - }, - { - "epoch": 0.59, - "grad_norm": 1.7232024685942742, - "learning_rate": 7.513898749821607e-06, - "loss": 0.189, - "step": 11653 - }, - { - "epoch": 0.59, - "grad_norm": 0.9930819245270107, - "learning_rate": 7.5123035237456145e-06, - "loss": 0.19, - "step": 11654 - }, - { - "epoch": 0.59, - "grad_norm": 0.813383634612531, - "learning_rate": 7.510708365148534e-06, - "loss": 0.1614, - "step": 11655 - }, - { - "epoch": 0.59, - "grad_norm": 1.0880834403732438, - "learning_rate": 7.509113274073624e-06, - "loss": 0.1766, - "step": 11656 - }, - { - "epoch": 0.59, - "grad_norm": 1.0095368000735225, - "learning_rate": 7.507518250564162e-06, - "loss": 0.1605, - "step": 11657 - }, - { - "epoch": 0.59, - "grad_norm": 0.8563653106348561, - "learning_rate": 7.505923294663407e-06, - "loss": 0.1922, - "step": 11658 - }, - { - "epoch": 0.59, - "grad_norm": 0.9991615525590314, - "learning_rate": 7.5043284064146195e-06, - "loss": 0.1761, - "step": 11659 - }, - { - "epoch": 0.59, - "grad_norm": 0.8596434111528071, - "learning_rate": 7.50273358586107e-06, - "loss": 0.1658, - "step": 11660 - }, - { - "epoch": 0.59, - "grad_norm": 0.8708077223179779, - "learning_rate": 7.501138833046009e-06, - "loss": 0.17, - "step": 11661 - }, - { - "epoch": 0.59, - "grad_norm": 0.9135953852571289, - "learning_rate": 7.499544148012701e-06, - "loss": 0.1691, - "step": 11662 - }, - { - "epoch": 0.59, - "grad_norm": 1.1459883181004817, - "learning_rate": 7.4979495308043956e-06, - "loss": 0.1632, - "step": 11663 - }, - { - "epoch": 0.59, - "grad_norm": 1.0935777455155868, - "learning_rate": 7.496354981464355e-06, - "loss": 0.1817, - "step": 11664 - }, - { - "epoch": 0.59, - "grad_norm": 0.9206013159913617, - "learning_rate": 7.494760500035824e-06, - "loss": 0.1793, - "step": 11665 - }, - { - "epoch": 0.59, - "grad_norm": 0.9309447002221981, - "learning_rate": 7.493166086562057e-06, - "loss": 0.1669, - "step": 11666 - }, - { - "epoch": 0.59, - "grad_norm": 0.847841347921362, - "learning_rate": 7.4915717410862985e-06, - "loss": 0.1515, - "step": 11667 - }, - { - "epoch": 0.59, - "grad_norm": 0.8058277967072249, - "learning_rate": 7.489977463651805e-06, - "loss": 0.1861, - "step": 11668 - }, - { - "epoch": 0.59, - "grad_norm": 1.0099173797801007, - "learning_rate": 7.488383254301809e-06, - "loss": 0.1436, - "step": 11669 - }, - { - "epoch": 0.59, - "grad_norm": 1.146285183247257, - "learning_rate": 7.4867891130795625e-06, - "loss": 0.1771, - "step": 11670 - }, - { - "epoch": 0.59, - "grad_norm": 0.9109887240430162, - "learning_rate": 7.4851950400283e-06, - "loss": 0.1819, - "step": 11671 - }, - { - "epoch": 0.59, - "grad_norm": 1.0952192354772308, - "learning_rate": 7.483601035191265e-06, - "loss": 0.1899, - "step": 11672 - }, - { - "epoch": 0.59, - "grad_norm": 0.7876174516574952, - "learning_rate": 7.482007098611694e-06, - "loss": 0.1723, - "step": 11673 - }, - { - "epoch": 0.59, - "grad_norm": 0.9015007081376896, - "learning_rate": 7.480413230332826e-06, - "loss": 0.189, - "step": 11674 - }, - { - "epoch": 0.59, - "grad_norm": 1.1568448543002259, - "learning_rate": 7.478819430397888e-06, - "loss": 0.1847, - "step": 11675 - }, - { - "epoch": 0.59, - "grad_norm": 1.333752114037828, - "learning_rate": 7.4772256988501145e-06, - "loss": 0.1753, - "step": 11676 - }, - { - "epoch": 0.59, - "grad_norm": 0.8048652598854352, - "learning_rate": 7.4756320357327406e-06, - "loss": 0.1843, - "step": 11677 - }, - { - "epoch": 0.59, - "grad_norm": 0.8734669456812948, - "learning_rate": 7.474038441088987e-06, - "loss": 0.1565, - "step": 11678 - }, - { - "epoch": 0.59, - "grad_norm": 1.6532642783039464, - "learning_rate": 7.472444914962084e-06, - "loss": 0.1814, - "step": 11679 - }, - { - "epoch": 0.59, - "grad_norm": 1.0401268716166456, - "learning_rate": 7.470851457395254e-06, - "loss": 0.182, - "step": 11680 - }, - { - "epoch": 0.59, - "grad_norm": 1.4617086415693403, - "learning_rate": 7.469258068431724e-06, - "loss": 0.2092, - "step": 11681 - }, - { - "epoch": 0.59, - "grad_norm": 0.8619882782489962, - "learning_rate": 7.467664748114709e-06, - "loss": 0.1808, - "step": 11682 - }, - { - "epoch": 0.59, - "grad_norm": 0.9972063938232998, - "learning_rate": 7.466071496487432e-06, - "loss": 0.1763, - "step": 11683 - }, - { - "epoch": 0.59, - "grad_norm": 0.8998667580089821, - "learning_rate": 7.4644783135931076e-06, - "loss": 0.2041, - "step": 11684 - }, - { - "epoch": 0.59, - "grad_norm": 0.8611236553868576, - "learning_rate": 7.462885199474956e-06, - "loss": 0.1838, - "step": 11685 - }, - { - "epoch": 0.59, - "grad_norm": 1.5463735071999285, - "learning_rate": 7.461292154176183e-06, - "loss": 0.164, - "step": 11686 - }, - { - "epoch": 0.59, - "grad_norm": 0.9550680424020984, - "learning_rate": 7.459699177740006e-06, - "loss": 0.1831, - "step": 11687 - }, - { - "epoch": 0.59, - "grad_norm": 1.0858276043205346, - "learning_rate": 7.4581062702096295e-06, - "loss": 0.1742, - "step": 11688 - }, - { - "epoch": 0.59, - "grad_norm": 1.1960381092670782, - "learning_rate": 7.456513431628266e-06, - "loss": 0.2053, - "step": 11689 - }, - { - "epoch": 0.59, - "grad_norm": 1.127318310682017, - "learning_rate": 7.454920662039118e-06, - "loss": 0.1765, - "step": 11690 - }, - { - "epoch": 0.59, - "grad_norm": 0.8470517961431754, - "learning_rate": 7.4533279614853935e-06, - "loss": 0.1777, - "step": 11691 - }, - { - "epoch": 0.59, - "grad_norm": 0.9298438108234138, - "learning_rate": 7.451735330010288e-06, - "loss": 0.1779, - "step": 11692 - }, - { - "epoch": 0.59, - "grad_norm": 0.8895116893439401, - "learning_rate": 7.450142767657009e-06, - "loss": 0.1914, - "step": 11693 - }, - { - "epoch": 0.59, - "grad_norm": 1.2369497075642366, - "learning_rate": 7.448550274468752e-06, - "loss": 0.1621, - "step": 11694 - }, - { - "epoch": 0.59, - "grad_norm": 1.7270373270555748, - "learning_rate": 7.4469578504887094e-06, - "loss": 0.1751, - "step": 11695 - }, - { - "epoch": 0.59, - "grad_norm": 1.0363877318541614, - "learning_rate": 7.445365495760082e-06, - "loss": 0.1832, - "step": 11696 - }, - { - "epoch": 0.59, - "grad_norm": 1.473420292278499, - "learning_rate": 7.443773210326057e-06, - "loss": 0.1829, - "step": 11697 - }, - { - "epoch": 0.59, - "grad_norm": 1.0751573517871695, - "learning_rate": 7.4421809942298305e-06, - "loss": 0.1953, - "step": 11698 - }, - { - "epoch": 0.59, - "grad_norm": 0.9729007669559803, - "learning_rate": 7.440588847514587e-06, - "loss": 0.1871, - "step": 11699 - }, - { - "epoch": 0.59, - "grad_norm": 0.9134498944853251, - "learning_rate": 7.43899677022352e-06, - "loss": 0.1819, - "step": 11700 - }, - { - "epoch": 0.6, - "grad_norm": 0.9035168835614964, - "learning_rate": 7.437404762399805e-06, - "loss": 0.1806, - "step": 11701 - }, - { - "epoch": 0.6, - "grad_norm": 1.1679300875833856, - "learning_rate": 7.435812824086632e-06, - "loss": 0.1791, - "step": 11702 - }, - { - "epoch": 0.6, - "grad_norm": 0.8328750754816019, - "learning_rate": 7.434220955327181e-06, - "loss": 0.1596, - "step": 11703 - }, - { - "epoch": 0.6, - "grad_norm": 1.354004614535836, - "learning_rate": 7.4326291561646345e-06, - "loss": 0.1971, - "step": 11704 - }, - { - "epoch": 0.6, - "grad_norm": 1.2102840579710277, - "learning_rate": 7.4310374266421625e-06, - "loss": 0.1502, - "step": 11705 - }, - { - "epoch": 0.6, - "grad_norm": 1.0356127604610017, - "learning_rate": 7.429445766802949e-06, - "loss": 0.1783, - "step": 11706 - }, - { - "epoch": 0.6, - "grad_norm": 0.8936017296139833, - "learning_rate": 7.427854176690161e-06, - "loss": 0.1605, - "step": 11707 - }, - { - "epoch": 0.6, - "grad_norm": 1.4480314333799575, - "learning_rate": 7.426262656346979e-06, - "loss": 0.1862, - "step": 11708 - }, - { - "epoch": 0.6, - "grad_norm": 0.7651554481125494, - "learning_rate": 7.424671205816562e-06, - "loss": 0.1647, - "step": 11709 - }, - { - "epoch": 0.6, - "grad_norm": 1.125730464461837, - "learning_rate": 7.4230798251420865e-06, - "loss": 0.2053, - "step": 11710 - }, - { - "epoch": 0.6, - "grad_norm": 1.0174053546119461, - "learning_rate": 7.421488514366719e-06, - "loss": 0.1772, - "step": 11711 - }, - { - "epoch": 0.6, - "grad_norm": 1.0451093555191666, - "learning_rate": 7.419897273533616e-06, - "loss": 0.1855, - "step": 11712 - }, - { - "epoch": 0.6, - "grad_norm": 0.770060246482943, - "learning_rate": 7.41830610268595e-06, - "loss": 0.1818, - "step": 11713 - }, - { - "epoch": 0.6, - "grad_norm": 1.2007239777086642, - "learning_rate": 7.416715001866873e-06, - "loss": 0.184, - "step": 11714 - }, - { - "epoch": 0.6, - "grad_norm": 1.1565457242610033, - "learning_rate": 7.415123971119549e-06, - "loss": 0.1896, - "step": 11715 - }, - { - "epoch": 0.6, - "grad_norm": 0.9928207158486638, - "learning_rate": 7.4135330104871315e-06, - "loss": 0.1813, - "step": 11716 - }, - { - "epoch": 0.6, - "grad_norm": 1.1910867213681666, - "learning_rate": 7.411942120012781e-06, - "loss": 0.1823, - "step": 11717 - }, - { - "epoch": 0.6, - "grad_norm": 1.1848318570575225, - "learning_rate": 7.4103512997396434e-06, - "loss": 0.1878, - "step": 11718 - }, - { - "epoch": 0.6, - "grad_norm": 1.2032769546753332, - "learning_rate": 7.408760549710874e-06, - "loss": 0.1879, - "step": 11719 - }, - { - "epoch": 0.6, - "grad_norm": 0.7872865465004285, - "learning_rate": 7.40716986996962e-06, - "loss": 0.1565, - "step": 11720 - }, - { - "epoch": 0.6, - "grad_norm": 2.1778944733024477, - "learning_rate": 7.405579260559033e-06, - "loss": 0.1751, - "step": 11721 - }, - { - "epoch": 0.6, - "grad_norm": 2.6036289736231404, - "learning_rate": 7.4039887215222515e-06, - "loss": 0.1724, - "step": 11722 - }, - { - "epoch": 0.6, - "grad_norm": 0.7619375542330458, - "learning_rate": 7.402398252902425e-06, - "loss": 0.169, - "step": 11723 - }, - { - "epoch": 0.6, - "grad_norm": 1.0087057949594154, - "learning_rate": 7.400807854742689e-06, - "loss": 0.1545, - "step": 11724 - }, - { - "epoch": 0.6, - "grad_norm": 1.008949785204754, - "learning_rate": 7.399217527086192e-06, - "loss": 0.1866, - "step": 11725 - }, - { - "epoch": 0.6, - "grad_norm": 1.3812962714038624, - "learning_rate": 7.397627269976062e-06, - "loss": 0.1737, - "step": 11726 - }, - { - "epoch": 0.6, - "grad_norm": 1.0063596374669785, - "learning_rate": 7.396037083455444e-06, - "loss": 0.1723, - "step": 11727 - }, - { - "epoch": 0.6, - "grad_norm": 0.9817553872537692, - "learning_rate": 7.394446967567464e-06, - "loss": 0.1541, - "step": 11728 - }, - { - "epoch": 0.6, - "grad_norm": 0.9842802734124819, - "learning_rate": 7.392856922355256e-06, - "loss": 0.1935, - "step": 11729 - }, - { - "epoch": 0.6, - "grad_norm": 0.932910795695672, - "learning_rate": 7.3912669478619555e-06, - "loss": 0.1928, - "step": 11730 - }, - { - "epoch": 0.6, - "grad_norm": 0.9222028155289357, - "learning_rate": 7.389677044130682e-06, - "loss": 0.1671, - "step": 11731 - }, - { - "epoch": 0.6, - "grad_norm": 1.0197399095709354, - "learning_rate": 7.3880872112045685e-06, - "loss": 0.1622, - "step": 11732 - }, - { - "epoch": 0.6, - "grad_norm": 1.3319641346176132, - "learning_rate": 7.386497449126735e-06, - "loss": 0.1827, - "step": 11733 - }, - { - "epoch": 0.6, - "grad_norm": 1.352246925301863, - "learning_rate": 7.384907757940309e-06, - "loss": 0.1698, - "step": 11734 - }, - { - "epoch": 0.6, - "grad_norm": 1.2899917207740446, - "learning_rate": 7.383318137688403e-06, - "loss": 0.1954, - "step": 11735 - }, - { - "epoch": 0.6, - "grad_norm": 0.8328991342166945, - "learning_rate": 7.381728588414143e-06, - "loss": 0.1813, - "step": 11736 - }, - { - "epoch": 0.6, - "grad_norm": 1.0932700968247486, - "learning_rate": 7.380139110160642e-06, - "loss": 0.1883, - "step": 11737 - }, - { - "epoch": 0.6, - "grad_norm": 0.8564260352071182, - "learning_rate": 7.378549702971018e-06, - "loss": 0.1905, - "step": 11738 - }, - { - "epoch": 0.6, - "grad_norm": 1.2383818779988518, - "learning_rate": 7.3769603668883794e-06, - "loss": 0.1784, - "step": 11739 - }, - { - "epoch": 0.6, - "grad_norm": 0.9750897662967867, - "learning_rate": 7.375371101955842e-06, - "loss": 0.1606, - "step": 11740 - }, - { - "epoch": 0.6, - "grad_norm": 1.0507731167395518, - "learning_rate": 7.373781908216507e-06, - "loss": 0.174, - "step": 11741 - }, - { - "epoch": 0.6, - "grad_norm": 2.124368626038594, - "learning_rate": 7.372192785713489e-06, - "loss": 0.1719, - "step": 11742 - }, - { - "epoch": 0.6, - "grad_norm": 0.7837162544033184, - "learning_rate": 7.370603734489887e-06, - "loss": 0.159, - "step": 11743 - }, - { - "epoch": 0.6, - "grad_norm": 0.8844333450966607, - "learning_rate": 7.3690147545888124e-06, - "loss": 0.2085, - "step": 11744 - }, - { - "epoch": 0.6, - "grad_norm": 1.2703046460006664, - "learning_rate": 7.36742584605336e-06, - "loss": 0.1899, - "step": 11745 - }, - { - "epoch": 0.6, - "grad_norm": 1.0418363545100195, - "learning_rate": 7.365837008926626e-06, - "loss": 0.1729, - "step": 11746 - }, - { - "epoch": 0.6, - "grad_norm": 0.9182324287203589, - "learning_rate": 7.364248243251717e-06, - "loss": 0.1685, - "step": 11747 - }, - { - "epoch": 0.6, - "grad_norm": 1.4520427465418861, - "learning_rate": 7.362659549071719e-06, - "loss": 0.203, - "step": 11748 - }, - { - "epoch": 0.6, - "grad_norm": 0.8872792416800015, - "learning_rate": 7.361070926429732e-06, - "loss": 0.1743, - "step": 11749 - }, - { - "epoch": 0.6, - "grad_norm": 0.9247274282638314, - "learning_rate": 7.359482375368843e-06, - "loss": 0.1945, - "step": 11750 - }, - { - "epoch": 0.6, - "grad_norm": 1.0557085759243954, - "learning_rate": 7.357893895932148e-06, - "loss": 0.1891, - "step": 11751 - }, - { - "epoch": 0.6, - "grad_norm": 1.8863500229256194, - "learning_rate": 7.356305488162725e-06, - "loss": 0.1874, - "step": 11752 - }, - { - "epoch": 0.6, - "grad_norm": 0.6322796010243121, - "learning_rate": 7.3547171521036705e-06, - "loss": 0.1572, - "step": 11753 - }, - { - "epoch": 0.6, - "grad_norm": 1.1802599977822752, - "learning_rate": 7.353128887798058e-06, - "loss": 0.1852, - "step": 11754 - }, - { - "epoch": 0.6, - "grad_norm": 0.9031348770048434, - "learning_rate": 7.351540695288977e-06, - "loss": 0.1597, - "step": 11755 - }, - { - "epoch": 0.6, - "grad_norm": 1.3039431244166906, - "learning_rate": 7.349952574619501e-06, - "loss": 0.1914, - "step": 11756 - }, - { - "epoch": 0.6, - "grad_norm": 0.8685734518172754, - "learning_rate": 7.3483645258327145e-06, - "loss": 0.1813, - "step": 11757 - }, - { - "epoch": 0.6, - "grad_norm": 1.0207187065630585, - "learning_rate": 7.346776548971687e-06, - "loss": 0.198, - "step": 11758 - }, - { - "epoch": 0.6, - "grad_norm": 1.0524126214529708, - "learning_rate": 7.345188644079497e-06, - "loss": 0.1833, - "step": 11759 - }, - { - "epoch": 0.6, - "grad_norm": 0.8143507974511598, - "learning_rate": 7.3436008111992145e-06, - "loss": 0.1725, - "step": 11760 - }, - { - "epoch": 0.6, - "grad_norm": 0.8899011291190664, - "learning_rate": 7.342013050373913e-06, - "loss": 0.1763, - "step": 11761 - }, - { - "epoch": 0.6, - "grad_norm": 0.9923921534932216, - "learning_rate": 7.340425361646653e-06, - "loss": 0.1843, - "step": 11762 - }, - { - "epoch": 0.6, - "grad_norm": 0.7734285672321267, - "learning_rate": 7.338837745060508e-06, - "loss": 0.1713, - "step": 11763 - }, - { - "epoch": 0.6, - "grad_norm": 0.9661992269342103, - "learning_rate": 7.337250200658541e-06, - "loss": 0.1829, - "step": 11764 - }, - { - "epoch": 0.6, - "grad_norm": 0.9162980784153151, - "learning_rate": 7.335662728483808e-06, - "loss": 0.1751, - "step": 11765 - }, - { - "epoch": 0.6, - "grad_norm": 2.6337898949008385, - "learning_rate": 7.33407532857938e-06, - "loss": 0.1695, - "step": 11766 - }, - { - "epoch": 0.6, - "grad_norm": 1.1236715789822076, - "learning_rate": 7.332488000988303e-06, - "loss": 0.1855, - "step": 11767 - }, - { - "epoch": 0.6, - "grad_norm": 1.157821899167246, - "learning_rate": 7.330900745753643e-06, - "loss": 0.1811, - "step": 11768 - }, - { - "epoch": 0.6, - "grad_norm": 0.9014010739784609, - "learning_rate": 7.329313562918449e-06, - "loss": 0.1748, - "step": 11769 - }, - { - "epoch": 0.6, - "grad_norm": 1.13217901184811, - "learning_rate": 7.327726452525779e-06, - "loss": 0.1707, - "step": 11770 - }, - { - "epoch": 0.6, - "grad_norm": 0.9580859428605121, - "learning_rate": 7.326139414618676e-06, - "loss": 0.1931, - "step": 11771 - }, - { - "epoch": 0.6, - "grad_norm": 0.9840830862799712, - "learning_rate": 7.324552449240194e-06, - "loss": 0.1843, - "step": 11772 - }, - { - "epoch": 0.6, - "grad_norm": 0.9225628334085767, - "learning_rate": 7.322965556433377e-06, - "loss": 0.1967, - "step": 11773 - }, - { - "epoch": 0.6, - "grad_norm": 0.8297351049262989, - "learning_rate": 7.321378736241274e-06, - "loss": 0.1662, - "step": 11774 - }, - { - "epoch": 0.6, - "grad_norm": 0.9391575526978314, - "learning_rate": 7.31979198870692e-06, - "loss": 0.1803, - "step": 11775 - }, - { - "epoch": 0.6, - "grad_norm": 0.8288981321997082, - "learning_rate": 7.318205313873361e-06, - "loss": 0.1572, - "step": 11776 - }, - { - "epoch": 0.6, - "grad_norm": 1.0773086929648976, - "learning_rate": 7.316618711783634e-06, - "loss": 0.2063, - "step": 11777 - }, - { - "epoch": 0.6, - "grad_norm": 1.1221116623307208, - "learning_rate": 7.315032182480779e-06, - "loss": 0.181, - "step": 11778 - }, - { - "epoch": 0.6, - "grad_norm": 1.0846276680180476, - "learning_rate": 7.313445726007824e-06, - "loss": 0.1942, - "step": 11779 - }, - { - "epoch": 0.6, - "grad_norm": 0.9901471340187721, - "learning_rate": 7.311859342407809e-06, - "loss": 0.1945, - "step": 11780 - }, - { - "epoch": 0.6, - "grad_norm": 0.8943804861069643, - "learning_rate": 7.310273031723759e-06, - "loss": 0.1569, - "step": 11781 - }, - { - "epoch": 0.6, - "grad_norm": 0.8785188472162674, - "learning_rate": 7.3086867939987025e-06, - "loss": 0.1808, - "step": 11782 - }, - { - "epoch": 0.6, - "grad_norm": 1.2553735501788523, - "learning_rate": 7.307100629275674e-06, - "loss": 0.1856, - "step": 11783 - }, - { - "epoch": 0.6, - "grad_norm": 0.7430195608217605, - "learning_rate": 7.305514537597689e-06, - "loss": 0.1745, - "step": 11784 - }, - { - "epoch": 0.6, - "grad_norm": 0.8762282857033288, - "learning_rate": 7.303928519007776e-06, - "loss": 0.1854, - "step": 11785 - }, - { - "epoch": 0.6, - "grad_norm": 0.973383706689411, - "learning_rate": 7.302342573548952e-06, - "loss": 0.2029, - "step": 11786 - }, - { - "epoch": 0.6, - "grad_norm": 1.2048069280922016, - "learning_rate": 7.300756701264242e-06, - "loss": 0.1754, - "step": 11787 - }, - { - "epoch": 0.6, - "grad_norm": 1.330264833832681, - "learning_rate": 7.299170902196655e-06, - "loss": 0.18, - "step": 11788 - }, - { - "epoch": 0.6, - "grad_norm": 0.8528049614191048, - "learning_rate": 7.297585176389212e-06, - "loss": 0.1892, - "step": 11789 - }, - { - "epoch": 0.6, - "grad_norm": 0.9450155315439861, - "learning_rate": 7.295999523884921e-06, - "loss": 0.168, - "step": 11790 - }, - { - "epoch": 0.6, - "grad_norm": 0.9239769701781485, - "learning_rate": 7.294413944726801e-06, - "loss": 0.1762, - "step": 11791 - }, - { - "epoch": 0.6, - "grad_norm": 1.1140158903085968, - "learning_rate": 7.292828438957851e-06, - "loss": 0.1669, - "step": 11792 - }, - { - "epoch": 0.6, - "grad_norm": 0.7712235884304885, - "learning_rate": 7.291243006621084e-06, - "loss": 0.1739, - "step": 11793 - }, - { - "epoch": 0.6, - "grad_norm": 1.0059461187022227, - "learning_rate": 7.289657647759501e-06, - "loss": 0.163, - "step": 11794 - }, - { - "epoch": 0.6, - "grad_norm": 0.9109098709361532, - "learning_rate": 7.288072362416112e-06, - "loss": 0.1922, - "step": 11795 - }, - { - "epoch": 0.6, - "grad_norm": 0.9489883402080502, - "learning_rate": 7.28648715063391e-06, - "loss": 0.16, - "step": 11796 - }, - { - "epoch": 0.6, - "grad_norm": 1.0745977552488903, - "learning_rate": 7.2849020124559015e-06, - "loss": 0.1783, - "step": 11797 - }, - { - "epoch": 0.6, - "grad_norm": 0.9793700759659624, - "learning_rate": 7.283316947925075e-06, - "loss": 0.2013, - "step": 11798 - }, - { - "epoch": 0.6, - "grad_norm": 0.9171954822057149, - "learning_rate": 7.28173195708443e-06, - "loss": 0.1914, - "step": 11799 - }, - { - "epoch": 0.6, - "grad_norm": 0.9517807304858662, - "learning_rate": 7.280147039976963e-06, - "loss": 0.1757, - "step": 11800 - }, - { - "epoch": 0.6, - "grad_norm": 0.9207643617133459, - "learning_rate": 7.278562196645656e-06, - "loss": 0.1978, - "step": 11801 - }, - { - "epoch": 0.6, - "grad_norm": 1.2926951372931075, - "learning_rate": 7.276977427133505e-06, - "loss": 0.2026, - "step": 11802 - }, - { - "epoch": 0.6, - "grad_norm": 1.1629358351213168, - "learning_rate": 7.275392731483495e-06, - "loss": 0.1937, - "step": 11803 - }, - { - "epoch": 0.6, - "grad_norm": 0.9957458561200179, - "learning_rate": 7.273808109738614e-06, - "loss": 0.1602, - "step": 11804 - }, - { - "epoch": 0.6, - "grad_norm": 1.4993122787846052, - "learning_rate": 7.272223561941837e-06, - "loss": 0.1736, - "step": 11805 - }, - { - "epoch": 0.6, - "grad_norm": 1.3441130422411014, - "learning_rate": 7.270639088136154e-06, - "loss": 0.1756, - "step": 11806 - }, - { - "epoch": 0.6, - "grad_norm": 0.9196103667338753, - "learning_rate": 7.269054688364535e-06, - "loss": 0.1838, - "step": 11807 - }, - { - "epoch": 0.6, - "grad_norm": 1.0711928579048098, - "learning_rate": 7.2674703626699685e-06, - "loss": 0.1731, - "step": 11808 - }, - { - "epoch": 0.6, - "grad_norm": 1.1727203349150228, - "learning_rate": 7.265886111095417e-06, - "loss": 0.2018, - "step": 11809 - }, - { - "epoch": 0.6, - "grad_norm": 1.19263133780406, - "learning_rate": 7.264301933683864e-06, - "loss": 0.1866, - "step": 11810 - }, - { - "epoch": 0.6, - "grad_norm": 0.9406769046267162, - "learning_rate": 7.262717830478272e-06, - "loss": 0.1979, - "step": 11811 - }, - { - "epoch": 0.6, - "grad_norm": 0.9433538747251087, - "learning_rate": 7.261133801521614e-06, - "loss": 0.1738, - "step": 11812 - }, - { - "epoch": 0.6, - "grad_norm": 0.9999985459271858, - "learning_rate": 7.259549846856855e-06, - "loss": 0.1768, - "step": 11813 - }, - { - "epoch": 0.6, - "grad_norm": 2.089501907253873, - "learning_rate": 7.257965966526966e-06, - "loss": 0.1793, - "step": 11814 - }, - { - "epoch": 0.6, - "grad_norm": 1.1513213292828732, - "learning_rate": 7.256382160574902e-06, - "loss": 0.1967, - "step": 11815 - }, - { - "epoch": 0.6, - "grad_norm": 0.9955636307423217, - "learning_rate": 7.254798429043626e-06, - "loss": 0.1881, - "step": 11816 - }, - { - "epoch": 0.6, - "grad_norm": 0.9804159920200639, - "learning_rate": 7.2532147719761e-06, - "loss": 0.1697, - "step": 11817 - }, - { - "epoch": 0.6, - "grad_norm": 1.1111216413748202, - "learning_rate": 7.251631189415275e-06, - "loss": 0.1902, - "step": 11818 - }, - { - "epoch": 0.6, - "grad_norm": 1.0571395710658018, - "learning_rate": 7.250047681404113e-06, - "loss": 0.2079, - "step": 11819 - }, - { - "epoch": 0.6, - "grad_norm": 2.1265894734074346, - "learning_rate": 7.248464247985558e-06, - "loss": 0.192, - "step": 11820 - }, - { - "epoch": 0.6, - "grad_norm": 0.8806117017761279, - "learning_rate": 7.246880889202572e-06, - "loss": 0.155, - "step": 11821 - }, - { - "epoch": 0.6, - "grad_norm": 0.9507389103704897, - "learning_rate": 7.245297605098093e-06, - "loss": 0.1478, - "step": 11822 - }, - { - "epoch": 0.6, - "grad_norm": 1.2828201375748278, - "learning_rate": 7.243714395715076e-06, - "loss": 0.1679, - "step": 11823 - }, - { - "epoch": 0.6, - "grad_norm": 2.137428365187284, - "learning_rate": 7.242131261096457e-06, - "loss": 0.1591, - "step": 11824 - }, - { - "epoch": 0.6, - "grad_norm": 0.9450990752849141, - "learning_rate": 7.240548201285186e-06, - "loss": 0.1715, - "step": 11825 - }, - { - "epoch": 0.6, - "grad_norm": 0.9348710156084757, - "learning_rate": 7.238965216324199e-06, - "loss": 0.1897, - "step": 11826 - }, - { - "epoch": 0.6, - "grad_norm": 1.2180133790398777, - "learning_rate": 7.2373823062564396e-06, - "loss": 0.1676, - "step": 11827 - }, - { - "epoch": 0.6, - "grad_norm": 1.4145821638061544, - "learning_rate": 7.235799471124838e-06, - "loss": 0.1753, - "step": 11828 - }, - { - "epoch": 0.6, - "grad_norm": 1.479901347156181, - "learning_rate": 7.234216710972333e-06, - "loss": 0.1736, - "step": 11829 - }, - { - "epoch": 0.6, - "grad_norm": 1.0622603323670472, - "learning_rate": 7.232634025841853e-06, - "loss": 0.1667, - "step": 11830 - }, - { - "epoch": 0.6, - "grad_norm": 0.8733703542173635, - "learning_rate": 7.231051415776338e-06, - "loss": 0.1768, - "step": 11831 - }, - { - "epoch": 0.6, - "grad_norm": 0.7926624980613768, - "learning_rate": 7.2294688808187045e-06, - "loss": 0.1545, - "step": 11832 - }, - { - "epoch": 0.6, - "grad_norm": 0.8767977208712247, - "learning_rate": 7.227886421011886e-06, - "loss": 0.1706, - "step": 11833 - }, - { - "epoch": 0.6, - "grad_norm": 0.9092722362527742, - "learning_rate": 7.226304036398808e-06, - "loss": 0.179, - "step": 11834 - }, - { - "epoch": 0.6, - "grad_norm": 0.9596541254080619, - "learning_rate": 7.224721727022384e-06, - "loss": 0.1998, - "step": 11835 - }, - { - "epoch": 0.6, - "grad_norm": 1.2707631192094582, - "learning_rate": 7.223139492925544e-06, - "loss": 0.1838, - "step": 11836 - }, - { - "epoch": 0.6, - "grad_norm": 1.4641264206313687, - "learning_rate": 7.221557334151199e-06, - "loss": 0.1954, - "step": 11837 - }, - { - "epoch": 0.6, - "grad_norm": 1.0628876393856026, - "learning_rate": 7.21997525074227e-06, - "loss": 0.1831, - "step": 11838 - }, - { - "epoch": 0.6, - "grad_norm": 0.8944603679621254, - "learning_rate": 7.218393242741667e-06, - "loss": 0.1661, - "step": 11839 - }, - { - "epoch": 0.6, - "grad_norm": 0.8294708528212483, - "learning_rate": 7.2168113101923085e-06, - "loss": 0.1595, - "step": 11840 - }, - { - "epoch": 0.6, - "grad_norm": 0.9545116326224699, - "learning_rate": 7.215229453137097e-06, - "loss": 0.1907, - "step": 11841 - }, - { - "epoch": 0.6, - "grad_norm": 1.5609981833288196, - "learning_rate": 7.213647671618945e-06, - "loss": 0.2064, - "step": 11842 - }, - { - "epoch": 0.6, - "grad_norm": 1.2486096471058774, - "learning_rate": 7.212065965680755e-06, - "loss": 0.1733, - "step": 11843 - }, - { - "epoch": 0.6, - "grad_norm": 0.9830128638983774, - "learning_rate": 7.210484335365438e-06, - "loss": 0.1777, - "step": 11844 - }, - { - "epoch": 0.6, - "grad_norm": 1.1222461529512562, - "learning_rate": 7.208902780715888e-06, - "loss": 0.1849, - "step": 11845 - }, - { - "epoch": 0.6, - "grad_norm": 0.9584478418686001, - "learning_rate": 7.207321301775008e-06, - "loss": 0.1651, - "step": 11846 - }, - { - "epoch": 0.6, - "grad_norm": 1.0400600781793397, - "learning_rate": 7.205739898585693e-06, - "loss": 0.2062, - "step": 11847 - }, - { - "epoch": 0.6, - "grad_norm": 2.101326132606511, - "learning_rate": 7.204158571190847e-06, - "loss": 0.1983, - "step": 11848 - }, - { - "epoch": 0.6, - "grad_norm": 0.9634348795913377, - "learning_rate": 7.202577319633353e-06, - "loss": 0.1736, - "step": 11849 - }, - { - "epoch": 0.6, - "grad_norm": 0.8806079665225696, - "learning_rate": 7.200996143956111e-06, - "loss": 0.1777, - "step": 11850 - }, - { - "epoch": 0.6, - "grad_norm": 1.1346657628354975, - "learning_rate": 7.199415044202004e-06, - "loss": 0.1747, - "step": 11851 - }, - { - "epoch": 0.6, - "grad_norm": 1.7454248060467574, - "learning_rate": 7.1978340204139205e-06, - "loss": 0.229, - "step": 11852 - }, - { - "epoch": 0.6, - "grad_norm": 0.8133745687679493, - "learning_rate": 7.196253072634751e-06, - "loss": 0.1764, - "step": 11853 - }, - { - "epoch": 0.6, - "grad_norm": 0.9192064226925258, - "learning_rate": 7.194672200907373e-06, - "loss": 0.1787, - "step": 11854 - }, - { - "epoch": 0.6, - "grad_norm": 0.9325417312120747, - "learning_rate": 7.193091405274671e-06, - "loss": 0.1826, - "step": 11855 - }, - { - "epoch": 0.6, - "grad_norm": 0.8704646661650539, - "learning_rate": 7.19151068577952e-06, - "loss": 0.1806, - "step": 11856 - }, - { - "epoch": 0.6, - "grad_norm": 0.9427144139646895, - "learning_rate": 7.189930042464806e-06, - "loss": 0.1696, - "step": 11857 - }, - { - "epoch": 0.6, - "grad_norm": 0.7537092950958617, - "learning_rate": 7.188349475373393e-06, - "loss": 0.16, - "step": 11858 - }, - { - "epoch": 0.6, - "grad_norm": 1.35349247055404, - "learning_rate": 7.186768984548162e-06, - "loss": 0.1975, - "step": 11859 - }, - { - "epoch": 0.6, - "grad_norm": 1.1981338252651514, - "learning_rate": 7.185188570031979e-06, - "loss": 0.19, - "step": 11860 - }, - { - "epoch": 0.6, - "grad_norm": 1.001442490909649, - "learning_rate": 7.18360823186772e-06, - "loss": 0.1824, - "step": 11861 - }, - { - "epoch": 0.6, - "grad_norm": 0.8589437176538642, - "learning_rate": 7.182027970098242e-06, - "loss": 0.1514, - "step": 11862 - }, - { - "epoch": 0.6, - "grad_norm": 1.3932418492827867, - "learning_rate": 7.180447784766418e-06, - "loss": 0.1654, - "step": 11863 - }, - { - "epoch": 0.6, - "grad_norm": 1.680823262999588, - "learning_rate": 7.178867675915104e-06, - "loss": 0.1665, - "step": 11864 - }, - { - "epoch": 0.6, - "grad_norm": 0.8770714709369166, - "learning_rate": 7.177287643587164e-06, - "loss": 0.1608, - "step": 11865 - }, - { - "epoch": 0.6, - "grad_norm": 1.4125927927249728, - "learning_rate": 7.175707687825455e-06, - "loss": 0.1637, - "step": 11866 - }, - { - "epoch": 0.6, - "grad_norm": 0.9644948475142574, - "learning_rate": 7.1741278086728395e-06, - "loss": 0.1644, - "step": 11867 - }, - { - "epoch": 0.6, - "grad_norm": 1.0163483995642684, - "learning_rate": 7.172548006172164e-06, - "loss": 0.202, - "step": 11868 - }, - { - "epoch": 0.6, - "grad_norm": 1.0437266979742186, - "learning_rate": 7.170968280366281e-06, - "loss": 0.1642, - "step": 11869 - }, - { - "epoch": 0.6, - "grad_norm": 1.8267503919071744, - "learning_rate": 7.1693886312980475e-06, - "loss": 0.1618, - "step": 11870 - }, - { - "epoch": 0.6, - "grad_norm": 1.1865281005301913, - "learning_rate": 7.1678090590103035e-06, - "loss": 0.1923, - "step": 11871 - }, - { - "epoch": 0.6, - "grad_norm": 0.9358919918321995, - "learning_rate": 7.166229563545901e-06, - "loss": 0.1808, - "step": 11872 - }, - { - "epoch": 0.6, - "grad_norm": 2.050465910789463, - "learning_rate": 7.164650144947679e-06, - "loss": 0.1753, - "step": 11873 - }, - { - "epoch": 0.6, - "grad_norm": 1.0196472656738296, - "learning_rate": 7.163070803258486e-06, - "loss": 0.2121, - "step": 11874 - }, - { - "epoch": 0.6, - "grad_norm": 2.553112700593638, - "learning_rate": 7.161491538521154e-06, - "loss": 0.1763, - "step": 11875 - }, - { - "epoch": 0.6, - "grad_norm": 1.012673181882685, - "learning_rate": 7.159912350778528e-06, - "loss": 0.1572, - "step": 11876 - }, - { - "epoch": 0.6, - "grad_norm": 1.216975617994192, - "learning_rate": 7.158333240073436e-06, - "loss": 0.1983, - "step": 11877 - }, - { - "epoch": 0.6, - "grad_norm": 0.9765534866537904, - "learning_rate": 7.156754206448718e-06, - "loss": 0.1688, - "step": 11878 - }, - { - "epoch": 0.6, - "grad_norm": 0.7845748811610675, - "learning_rate": 7.1551752499472005e-06, - "loss": 0.19, - "step": 11879 - }, - { - "epoch": 0.6, - "grad_norm": 1.424597105206769, - "learning_rate": 7.153596370611719e-06, - "loss": 0.1788, - "step": 11880 - }, - { - "epoch": 0.6, - "grad_norm": 1.3585442671939263, - "learning_rate": 7.152017568485092e-06, - "loss": 0.1835, - "step": 11881 - }, - { - "epoch": 0.6, - "grad_norm": 2.7360441143193666, - "learning_rate": 7.150438843610152e-06, - "loss": 0.189, - "step": 11882 - }, - { - "epoch": 0.6, - "grad_norm": 0.9452059160457844, - "learning_rate": 7.148860196029717e-06, - "loss": 0.2015, - "step": 11883 - }, - { - "epoch": 0.6, - "grad_norm": 1.2493024368049896, - "learning_rate": 7.147281625786615e-06, - "loss": 0.1752, - "step": 11884 - }, - { - "epoch": 0.6, - "grad_norm": 0.883847817100227, - "learning_rate": 7.145703132923657e-06, - "loss": 0.1823, - "step": 11885 - }, - { - "epoch": 0.6, - "grad_norm": 1.906830011163742, - "learning_rate": 7.144124717483661e-06, - "loss": 0.175, - "step": 11886 - }, - { - "epoch": 0.6, - "grad_norm": 0.9714690367328279, - "learning_rate": 7.1425463795094476e-06, - "loss": 0.163, - "step": 11887 - }, - { - "epoch": 0.6, - "grad_norm": 1.1441987658528388, - "learning_rate": 7.14096811904382e-06, - "loss": 0.1728, - "step": 11888 - }, - { - "epoch": 0.6, - "grad_norm": 0.9804796842368632, - "learning_rate": 7.139389936129599e-06, - "loss": 0.1925, - "step": 11889 - }, - { - "epoch": 0.6, - "grad_norm": 1.2599611555433587, - "learning_rate": 7.1378118308095835e-06, - "loss": 0.1869, - "step": 11890 - }, - { - "epoch": 0.6, - "grad_norm": 1.0853613521821157, - "learning_rate": 7.136233803126584e-06, - "loss": 0.1751, - "step": 11891 - }, - { - "epoch": 0.6, - "grad_norm": 2.6086541355655184, - "learning_rate": 7.1346558531234046e-06, - "loss": 0.1936, - "step": 11892 - }, - { - "epoch": 0.6, - "grad_norm": 1.1234407378780253, - "learning_rate": 7.133077980842851e-06, - "loss": 0.1855, - "step": 11893 - }, - { - "epoch": 0.6, - "grad_norm": 1.1006285198696308, - "learning_rate": 7.1315001863277135e-06, - "loss": 0.1831, - "step": 11894 - }, - { - "epoch": 0.6, - "grad_norm": 1.128971427493001, - "learning_rate": 7.129922469620798e-06, - "loss": 0.1685, - "step": 11895 - }, - { - "epoch": 0.6, - "grad_norm": 0.878266359471762, - "learning_rate": 7.128344830764895e-06, - "loss": 0.1826, - "step": 11896 - }, - { - "epoch": 0.6, - "grad_norm": 1.4104581272003494, - "learning_rate": 7.126767269802806e-06, - "loss": 0.1746, - "step": 11897 - }, - { - "epoch": 0.61, - "grad_norm": 1.1426165349149782, - "learning_rate": 7.125189786777312e-06, - "loss": 0.1707, - "step": 11898 - }, - { - "epoch": 0.61, - "grad_norm": 1.3657276142785477, - "learning_rate": 7.123612381731211e-06, - "loss": 0.1663, - "step": 11899 - }, - { - "epoch": 0.61, - "grad_norm": 1.014592637511237, - "learning_rate": 7.122035054707283e-06, - "loss": 0.1808, - "step": 11900 - }, - { - "epoch": 0.61, - "grad_norm": 1.059580390073793, - "learning_rate": 7.1204578057483206e-06, - "loss": 0.1851, - "step": 11901 - }, - { - "epoch": 0.61, - "grad_norm": 1.1218977907688872, - "learning_rate": 7.1188806348971e-06, - "loss": 0.192, - "step": 11902 - }, - { - "epoch": 0.61, - "grad_norm": 0.8440398569371811, - "learning_rate": 7.117303542196406e-06, - "loss": 0.1728, - "step": 11903 - }, - { - "epoch": 0.61, - "grad_norm": 1.0808151942873958, - "learning_rate": 7.1157265276890195e-06, - "loss": 0.1944, - "step": 11904 - }, - { - "epoch": 0.61, - "grad_norm": 1.194100575050271, - "learning_rate": 7.114149591417709e-06, - "loss": 0.1842, - "step": 11905 - }, - { - "epoch": 0.61, - "grad_norm": 0.9826936837280463, - "learning_rate": 7.112572733425257e-06, - "loss": 0.1748, - "step": 11906 - }, - { - "epoch": 0.61, - "grad_norm": 1.234040095182392, - "learning_rate": 7.11099595375443e-06, - "loss": 0.1875, - "step": 11907 - }, - { - "epoch": 0.61, - "grad_norm": 0.9664874950364423, - "learning_rate": 7.1094192524480025e-06, - "loss": 0.1562, - "step": 11908 - }, - { - "epoch": 0.61, - "grad_norm": 0.7664885752023356, - "learning_rate": 7.107842629548738e-06, - "loss": 0.1667, - "step": 11909 - }, - { - "epoch": 0.61, - "grad_norm": 0.8510699508661388, - "learning_rate": 7.106266085099412e-06, - "loss": 0.1942, - "step": 11910 - }, - { - "epoch": 0.61, - "grad_norm": 1.783986982922015, - "learning_rate": 7.104689619142775e-06, - "loss": 0.1705, - "step": 11911 - }, - { - "epoch": 0.61, - "grad_norm": 0.7713147450056765, - "learning_rate": 7.103113231721599e-06, - "loss": 0.1825, - "step": 11912 - }, - { - "epoch": 0.61, - "grad_norm": 0.8175255022774114, - "learning_rate": 7.101536922878638e-06, - "loss": 0.1721, - "step": 11913 - }, - { - "epoch": 0.61, - "grad_norm": 1.5835071361004465, - "learning_rate": 7.0999606926566554e-06, - "loss": 0.1853, - "step": 11914 - }, - { - "epoch": 0.61, - "grad_norm": 1.4009658683843862, - "learning_rate": 7.0983845410984e-06, - "loss": 0.187, - "step": 11915 - }, - { - "epoch": 0.61, - "grad_norm": 1.0056035099506464, - "learning_rate": 7.096808468246629e-06, - "loss": 0.1735, - "step": 11916 - }, - { - "epoch": 0.61, - "grad_norm": 1.0911482613971564, - "learning_rate": 7.095232474144089e-06, - "loss": 0.1802, - "step": 11917 - }, - { - "epoch": 0.61, - "grad_norm": 0.9992152632875951, - "learning_rate": 7.0936565588335386e-06, - "loss": 0.1792, - "step": 11918 - }, - { - "epoch": 0.61, - "grad_norm": 1.3219752435746845, - "learning_rate": 7.092080722357713e-06, - "loss": 0.1683, - "step": 11919 - }, - { - "epoch": 0.61, - "grad_norm": 1.1595662144721943, - "learning_rate": 7.090504964759366e-06, - "loss": 0.1739, - "step": 11920 - }, - { - "epoch": 0.61, - "grad_norm": 1.0274332516187044, - "learning_rate": 7.0889292860812344e-06, - "loss": 0.1694, - "step": 11921 - }, - { - "epoch": 0.61, - "grad_norm": 0.7438566646687913, - "learning_rate": 7.087353686366059e-06, - "loss": 0.1743, - "step": 11922 - }, - { - "epoch": 0.61, - "grad_norm": 0.7802668225695544, - "learning_rate": 7.085778165656581e-06, - "loss": 0.1644, - "step": 11923 - }, - { - "epoch": 0.61, - "grad_norm": 1.044161670020728, - "learning_rate": 7.084202723995533e-06, - "loss": 0.1855, - "step": 11924 - }, - { - "epoch": 0.61, - "grad_norm": 0.8374455063926431, - "learning_rate": 7.082627361425652e-06, - "loss": 0.1736, - "step": 11925 - }, - { - "epoch": 0.61, - "grad_norm": 2.0659359053226045, - "learning_rate": 7.081052077989668e-06, - "loss": 0.1615, - "step": 11926 - }, - { - "epoch": 0.61, - "grad_norm": 1.0140527059862043, - "learning_rate": 7.0794768737303135e-06, - "loss": 0.1732, - "step": 11927 - }, - { - "epoch": 0.61, - "grad_norm": 0.9475497891387962, - "learning_rate": 7.07790174869031e-06, - "loss": 0.1554, - "step": 11928 - }, - { - "epoch": 0.61, - "grad_norm": 0.8568709849395552, - "learning_rate": 7.076326702912388e-06, - "loss": 0.1957, - "step": 11929 - }, - { - "epoch": 0.61, - "grad_norm": 1.0945804443320943, - "learning_rate": 7.0747517364392694e-06, - "loss": 0.1911, - "step": 11930 - }, - { - "epoch": 0.61, - "grad_norm": 1.042478240027438, - "learning_rate": 7.073176849313678e-06, - "loss": 0.193, - "step": 11931 - }, - { - "epoch": 0.61, - "grad_norm": 0.9780085983785852, - "learning_rate": 7.071602041578325e-06, - "loss": 0.2077, - "step": 11932 - }, - { - "epoch": 0.61, - "grad_norm": 0.8456134232105421, - "learning_rate": 7.0700273132759374e-06, - "loss": 0.1859, - "step": 11933 - }, - { - "epoch": 0.61, - "grad_norm": 1.0665183972480694, - "learning_rate": 7.068452664449219e-06, - "loss": 0.189, - "step": 11934 - }, - { - "epoch": 0.61, - "grad_norm": 7.7600552381034, - "learning_rate": 7.066878095140892e-06, - "loss": 0.1883, - "step": 11935 - }, - { - "epoch": 0.61, - "grad_norm": 1.7283221691703692, - "learning_rate": 7.065303605393659e-06, - "loss": 0.1785, - "step": 11936 - }, - { - "epoch": 0.61, - "grad_norm": 3.944965829965286, - "learning_rate": 7.0637291952502355e-06, - "loss": 0.191, - "step": 11937 - }, - { - "epoch": 0.61, - "grad_norm": 1.2512234638902588, - "learning_rate": 7.062154864753321e-06, - "loss": 0.1475, - "step": 11938 - }, - { - "epoch": 0.61, - "grad_norm": 0.8827605647103475, - "learning_rate": 7.0605806139456205e-06, - "loss": 0.1713, - "step": 11939 - }, - { - "epoch": 0.61, - "grad_norm": 0.9519568727584321, - "learning_rate": 7.05900644286984e-06, - "loss": 0.1674, - "step": 11940 - }, - { - "epoch": 0.61, - "grad_norm": 1.0050025496707462, - "learning_rate": 7.057432351568671e-06, - "loss": 0.1691, - "step": 11941 - }, - { - "epoch": 0.61, - "grad_norm": 1.1691513647351677, - "learning_rate": 7.05585834008482e-06, - "loss": 0.1958, - "step": 11942 - }, - { - "epoch": 0.61, - "grad_norm": 1.082537787781271, - "learning_rate": 7.054284408460974e-06, - "loss": 0.1719, - "step": 11943 - }, - { - "epoch": 0.61, - "grad_norm": 1.0093555069161197, - "learning_rate": 7.052710556739835e-06, - "loss": 0.1721, - "step": 11944 - }, - { - "epoch": 0.61, - "grad_norm": 1.367301110642045, - "learning_rate": 7.051136784964083e-06, - "loss": 0.1933, - "step": 11945 - }, - { - "epoch": 0.61, - "grad_norm": 0.8675198631297651, - "learning_rate": 7.049563093176418e-06, - "loss": 0.1958, - "step": 11946 - }, - { - "epoch": 0.61, - "grad_norm": 0.8474305343227716, - "learning_rate": 7.047989481419516e-06, - "loss": 0.1806, - "step": 11947 - }, - { - "epoch": 0.61, - "grad_norm": 1.2477633360015177, - "learning_rate": 7.0464159497360675e-06, - "loss": 0.2078, - "step": 11948 - }, - { - "epoch": 0.61, - "grad_norm": 1.37049127199201, - "learning_rate": 7.044842498168752e-06, - "loss": 0.1967, - "step": 11949 - }, - { - "epoch": 0.61, - "grad_norm": 1.081084129031886, - "learning_rate": 7.043269126760255e-06, - "loss": 0.1803, - "step": 11950 - }, - { - "epoch": 0.61, - "grad_norm": 1.0286902605759753, - "learning_rate": 7.041695835553245e-06, - "loss": 0.1862, - "step": 11951 - }, - { - "epoch": 0.61, - "grad_norm": 0.9401373508312021, - "learning_rate": 7.040122624590405e-06, - "loss": 0.1669, - "step": 11952 - }, - { - "epoch": 0.61, - "grad_norm": 1.520605516020862, - "learning_rate": 7.038549493914404e-06, - "loss": 0.1609, - "step": 11953 - }, - { - "epoch": 0.61, - "grad_norm": 1.1363660276509426, - "learning_rate": 7.03697644356792e-06, - "loss": 0.1696, - "step": 11954 - }, - { - "epoch": 0.61, - "grad_norm": 1.269973237982814, - "learning_rate": 7.035403473593614e-06, - "loss": 0.1637, - "step": 11955 - }, - { - "epoch": 0.61, - "grad_norm": 1.0499017769975487, - "learning_rate": 7.0338305840341535e-06, - "loss": 0.1809, - "step": 11956 - }, - { - "epoch": 0.61, - "grad_norm": 0.9581650638544686, - "learning_rate": 7.032257774932212e-06, - "loss": 0.202, - "step": 11957 - }, - { - "epoch": 0.61, - "grad_norm": 1.1165249053239028, - "learning_rate": 7.030685046330441e-06, - "loss": 0.1592, - "step": 11958 - }, - { - "epoch": 0.61, - "grad_norm": 3.303437766903637, - "learning_rate": 7.02911239827151e-06, - "loss": 0.1623, - "step": 11959 - }, - { - "epoch": 0.61, - "grad_norm": 0.8286811962313174, - "learning_rate": 7.027539830798069e-06, - "loss": 0.1495, - "step": 11960 - }, - { - "epoch": 0.61, - "grad_norm": 2.4931590119544556, - "learning_rate": 7.02596734395278e-06, - "loss": 0.1829, - "step": 11961 - }, - { - "epoch": 0.61, - "grad_norm": 0.9899743456092847, - "learning_rate": 7.024394937778293e-06, - "loss": 0.178, - "step": 11962 - }, - { - "epoch": 0.61, - "grad_norm": 1.1519840371806078, - "learning_rate": 7.022822612317265e-06, - "loss": 0.2048, - "step": 11963 - }, - { - "epoch": 0.61, - "grad_norm": 0.9024290437544673, - "learning_rate": 7.021250367612338e-06, - "loss": 0.1552, - "step": 11964 - }, - { - "epoch": 0.61, - "grad_norm": 0.9192970344585016, - "learning_rate": 7.019678203706164e-06, - "loss": 0.1918, - "step": 11965 - }, - { - "epoch": 0.61, - "grad_norm": 0.9435428055972597, - "learning_rate": 7.018106120641386e-06, - "loss": 0.1798, - "step": 11966 - }, - { - "epoch": 0.61, - "grad_norm": 0.9971047001488691, - "learning_rate": 7.016534118460652e-06, - "loss": 0.1695, - "step": 11967 - }, - { - "epoch": 0.61, - "grad_norm": 1.1748885021929922, - "learning_rate": 7.014962197206594e-06, - "loss": 0.175, - "step": 11968 - }, - { - "epoch": 0.61, - "grad_norm": 0.9969796770369592, - "learning_rate": 7.013390356921858e-06, - "loss": 0.1897, - "step": 11969 - }, - { - "epoch": 0.61, - "grad_norm": 0.9595108683100719, - "learning_rate": 7.011818597649074e-06, - "loss": 0.1694, - "step": 11970 - }, - { - "epoch": 0.61, - "grad_norm": 0.9267782372098969, - "learning_rate": 7.010246919430884e-06, - "loss": 0.1956, - "step": 11971 - }, - { - "epoch": 0.61, - "grad_norm": 0.8777993144052069, - "learning_rate": 7.008675322309911e-06, - "loss": 0.1847, - "step": 11972 - }, - { - "epoch": 0.61, - "grad_norm": 1.7337740983158976, - "learning_rate": 7.0071038063287935e-06, - "loss": 0.1991, - "step": 11973 - }, - { - "epoch": 0.61, - "grad_norm": 1.0782367779242605, - "learning_rate": 7.005532371530152e-06, - "loss": 0.201, - "step": 11974 - }, - { - "epoch": 0.61, - "grad_norm": 1.501456289799915, - "learning_rate": 7.003961017956611e-06, - "loss": 0.1869, - "step": 11975 - }, - { - "epoch": 0.61, - "grad_norm": 1.1395341072149203, - "learning_rate": 7.002389745650801e-06, - "loss": 0.2017, - "step": 11976 - }, - { - "epoch": 0.61, - "grad_norm": 1.1050083080002921, - "learning_rate": 7.000818554655335e-06, - "loss": 0.1697, - "step": 11977 - }, - { - "epoch": 0.61, - "grad_norm": 0.7721561404794662, - "learning_rate": 6.9992474450128375e-06, - "loss": 0.1542, - "step": 11978 - }, - { - "epoch": 0.61, - "grad_norm": 1.043825789826784, - "learning_rate": 6.997676416765919e-06, - "loss": 0.1771, - "step": 11979 - }, - { - "epoch": 0.61, - "grad_norm": 3.473082441806016, - "learning_rate": 6.996105469957204e-06, - "loss": 0.1672, - "step": 11980 - }, - { - "epoch": 0.61, - "grad_norm": 1.1440472364717835, - "learning_rate": 6.994534604629291e-06, - "loss": 0.1782, - "step": 11981 - }, - { - "epoch": 0.61, - "grad_norm": 0.9716464255364722, - "learning_rate": 6.9929638208247994e-06, - "loss": 0.189, - "step": 11982 - }, - { - "epoch": 0.61, - "grad_norm": 1.069848684646463, - "learning_rate": 6.991393118586333e-06, - "loss": 0.199, - "step": 11983 - }, - { - "epoch": 0.61, - "grad_norm": 0.909313878941909, - "learning_rate": 6.989822497956501e-06, - "loss": 0.182, - "step": 11984 - }, - { - "epoch": 0.61, - "grad_norm": 1.029764795486539, - "learning_rate": 6.9882519589779005e-06, - "loss": 0.194, - "step": 11985 - }, - { - "epoch": 0.61, - "grad_norm": 0.9275625368461713, - "learning_rate": 6.986681501693139e-06, - "loss": 0.1921, - "step": 11986 - }, - { - "epoch": 0.61, - "grad_norm": 1.0466489063396667, - "learning_rate": 6.985111126144808e-06, - "loss": 0.1705, - "step": 11987 - }, - { - "epoch": 0.61, - "grad_norm": 0.7498953008113175, - "learning_rate": 6.983540832375511e-06, - "loss": 0.1567, - "step": 11988 - }, - { - "epoch": 0.61, - "grad_norm": 2.3164636095920277, - "learning_rate": 6.981970620427836e-06, - "loss": 0.1582, - "step": 11989 - }, - { - "epoch": 0.61, - "grad_norm": 0.9719462276135705, - "learning_rate": 6.980400490344383e-06, - "loss": 0.2122, - "step": 11990 - }, - { - "epoch": 0.61, - "grad_norm": 0.8601857009168751, - "learning_rate": 6.9788304421677355e-06, - "loss": 0.1746, - "step": 11991 - }, - { - "epoch": 0.61, - "grad_norm": 0.861866966346215, - "learning_rate": 6.97726047594048e-06, - "loss": 0.1765, - "step": 11992 - }, - { - "epoch": 0.61, - "grad_norm": 0.9007785415154862, - "learning_rate": 6.975690591705211e-06, - "loss": 0.162, - "step": 11993 - }, - { - "epoch": 0.61, - "grad_norm": 0.9620423411317283, - "learning_rate": 6.974120789504499e-06, - "loss": 0.1736, - "step": 11994 - }, - { - "epoch": 0.61, - "grad_norm": 4.686444458854959, - "learning_rate": 6.972551069380935e-06, - "loss": 0.1866, - "step": 11995 - }, - { - "epoch": 0.61, - "grad_norm": 1.3545266700952874, - "learning_rate": 6.9709814313770905e-06, - "loss": 0.1644, - "step": 11996 - }, - { - "epoch": 0.61, - "grad_norm": 1.1660286325084908, - "learning_rate": 6.969411875535552e-06, - "loss": 0.1799, - "step": 11997 - }, - { - "epoch": 0.61, - "grad_norm": 1.0370267507532889, - "learning_rate": 6.967842401898882e-06, - "loss": 0.2005, - "step": 11998 - }, - { - "epoch": 0.61, - "grad_norm": 0.7969908814107881, - "learning_rate": 6.966273010509663e-06, - "loss": 0.1664, - "step": 11999 - }, - { - "epoch": 0.61, - "grad_norm": 1.3401157501995602, - "learning_rate": 6.964703701410455e-06, - "loss": 0.1846, - "step": 12000 - }, - { - "epoch": 0.61, - "grad_norm": 1.0017537274961412, - "learning_rate": 6.963134474643834e-06, - "loss": 0.2044, - "step": 12001 - }, - { - "epoch": 0.61, - "grad_norm": 0.9056146251290355, - "learning_rate": 6.961565330252358e-06, - "loss": 0.1871, - "step": 12002 - }, - { - "epoch": 0.61, - "grad_norm": 1.2843447159022305, - "learning_rate": 6.959996268278599e-06, - "loss": 0.1831, - "step": 12003 - }, - { - "epoch": 0.61, - "grad_norm": 0.9759637968679855, - "learning_rate": 6.958427288765108e-06, - "loss": 0.1687, - "step": 12004 - }, - { - "epoch": 0.61, - "grad_norm": 1.1690450666793533, - "learning_rate": 6.956858391754453e-06, - "loss": 0.2056, - "step": 12005 - }, - { - "epoch": 0.61, - "grad_norm": 1.412385050354641, - "learning_rate": 6.955289577289181e-06, - "loss": 0.1845, - "step": 12006 - }, - { - "epoch": 0.61, - "grad_norm": 0.9436818805180692, - "learning_rate": 6.953720845411858e-06, - "loss": 0.1676, - "step": 12007 - }, - { - "epoch": 0.61, - "grad_norm": 1.3523117395487558, - "learning_rate": 6.952152196165025e-06, - "loss": 0.1984, - "step": 12008 - }, - { - "epoch": 0.61, - "grad_norm": 0.8942514384754394, - "learning_rate": 6.950583629591235e-06, - "loss": 0.1808, - "step": 12009 - }, - { - "epoch": 0.61, - "grad_norm": 0.9647460971315064, - "learning_rate": 6.94901514573304e-06, - "loss": 0.1716, - "step": 12010 - }, - { - "epoch": 0.61, - "grad_norm": 0.7947737276649559, - "learning_rate": 6.9474467446329775e-06, - "loss": 0.1601, - "step": 12011 - }, - { - "epoch": 0.61, - "grad_norm": 1.2135854992453965, - "learning_rate": 6.9458784263335965e-06, - "loss": 0.1749, - "step": 12012 - }, - { - "epoch": 0.61, - "grad_norm": 0.8258705553834466, - "learning_rate": 6.944310190877433e-06, - "loss": 0.1806, - "step": 12013 - }, - { - "epoch": 0.61, - "grad_norm": 1.0624409171900406, - "learning_rate": 6.942742038307033e-06, - "loss": 0.1654, - "step": 12014 - }, - { - "epoch": 0.61, - "grad_norm": 0.8998384943681688, - "learning_rate": 6.941173968664923e-06, - "loss": 0.1806, - "step": 12015 - }, - { - "epoch": 0.61, - "grad_norm": 1.0775549450326525, - "learning_rate": 6.939605981993647e-06, - "loss": 0.1928, - "step": 12016 - }, - { - "epoch": 0.61, - "grad_norm": 1.0758381169200557, - "learning_rate": 6.938038078335727e-06, - "loss": 0.1669, - "step": 12017 - }, - { - "epoch": 0.61, - "grad_norm": 1.0478686096447207, - "learning_rate": 6.936470257733699e-06, - "loss": 0.1935, - "step": 12018 - }, - { - "epoch": 0.61, - "grad_norm": 1.0007796945466119, - "learning_rate": 6.9349025202300865e-06, - "loss": 0.163, - "step": 12019 - }, - { - "epoch": 0.61, - "grad_norm": 1.062954767058354, - "learning_rate": 6.93333486586742e-06, - "loss": 0.174, - "step": 12020 - }, - { - "epoch": 0.61, - "grad_norm": 1.1895420775002619, - "learning_rate": 6.931767294688214e-06, - "loss": 0.1786, - "step": 12021 - }, - { - "epoch": 0.61, - "grad_norm": 1.150659466077958, - "learning_rate": 6.930199806734996e-06, - "loss": 0.1693, - "step": 12022 - }, - { - "epoch": 0.61, - "grad_norm": 0.9054373439655421, - "learning_rate": 6.92863240205028e-06, - "loss": 0.1825, - "step": 12023 - }, - { - "epoch": 0.61, - "grad_norm": 1.0779083796299886, - "learning_rate": 6.927065080676587e-06, - "loss": 0.1796, - "step": 12024 - }, - { - "epoch": 0.61, - "grad_norm": 1.6111483127425923, - "learning_rate": 6.9254978426564256e-06, - "loss": 0.1789, - "step": 12025 - }, - { - "epoch": 0.61, - "grad_norm": 1.7561342831981865, - "learning_rate": 6.923930688032308e-06, - "loss": 0.1653, - "step": 12026 - }, - { - "epoch": 0.61, - "grad_norm": 1.252568257892285, - "learning_rate": 6.922363616846746e-06, - "loss": 0.1928, - "step": 12027 - }, - { - "epoch": 0.61, - "grad_norm": 1.3861545130349302, - "learning_rate": 6.920796629142242e-06, - "loss": 0.1789, - "step": 12028 - }, - { - "epoch": 0.61, - "grad_norm": 1.126739769588076, - "learning_rate": 6.9192297249613074e-06, - "loss": 0.1963, - "step": 12029 - }, - { - "epoch": 0.61, - "grad_norm": 1.376665325309207, - "learning_rate": 6.9176629043464364e-06, - "loss": 0.1705, - "step": 12030 - }, - { - "epoch": 0.61, - "grad_norm": 1.0270356795638427, - "learning_rate": 6.916096167340134e-06, - "loss": 0.1797, - "step": 12031 - }, - { - "epoch": 0.61, - "grad_norm": 0.872751930898111, - "learning_rate": 6.9145295139848954e-06, - "loss": 0.1788, - "step": 12032 - }, - { - "epoch": 0.61, - "grad_norm": 1.3292159171289397, - "learning_rate": 6.9129629443232235e-06, - "loss": 0.184, - "step": 12033 - }, - { - "epoch": 0.61, - "grad_norm": 0.9125351804266222, - "learning_rate": 6.911396458397602e-06, - "loss": 0.1744, - "step": 12034 - }, - { - "epoch": 0.61, - "grad_norm": 0.8528246667288966, - "learning_rate": 6.909830056250527e-06, - "loss": 0.1622, - "step": 12035 - }, - { - "epoch": 0.61, - "grad_norm": 1.270843585328278, - "learning_rate": 6.9082637379244844e-06, - "loss": 0.2054, - "step": 12036 - }, - { - "epoch": 0.61, - "grad_norm": 0.9391457519420248, - "learning_rate": 6.906697503461968e-06, - "loss": 0.1799, - "step": 12037 - }, - { - "epoch": 0.61, - "grad_norm": 0.7632331087196969, - "learning_rate": 6.90513135290545e-06, - "loss": 0.1588, - "step": 12038 - }, - { - "epoch": 0.61, - "grad_norm": 1.0727942068343892, - "learning_rate": 6.903565286297422e-06, - "loss": 0.1679, - "step": 12039 - }, - { - "epoch": 0.61, - "grad_norm": 1.0133257925068448, - "learning_rate": 6.901999303680359e-06, - "loss": 0.177, - "step": 12040 - }, - { - "epoch": 0.61, - "grad_norm": 0.8623895125847567, - "learning_rate": 6.900433405096744e-06, - "loss": 0.1623, - "step": 12041 - }, - { - "epoch": 0.61, - "grad_norm": 0.9602670825421511, - "learning_rate": 6.898867590589047e-06, - "loss": 0.1617, - "step": 12042 - }, - { - "epoch": 0.61, - "grad_norm": 0.9567573291126384, - "learning_rate": 6.897301860199738e-06, - "loss": 0.1533, - "step": 12043 - }, - { - "epoch": 0.61, - "grad_norm": 0.9042572859837555, - "learning_rate": 6.895736213971293e-06, - "loss": 0.1654, - "step": 12044 - }, - { - "epoch": 0.61, - "grad_norm": 1.330908865010921, - "learning_rate": 6.8941706519461785e-06, - "loss": 0.1989, - "step": 12045 - }, - { - "epoch": 0.61, - "grad_norm": 1.0736449432758612, - "learning_rate": 6.892605174166862e-06, - "loss": 0.1762, - "step": 12046 - }, - { - "epoch": 0.61, - "grad_norm": 0.8609311260815152, - "learning_rate": 6.891039780675803e-06, - "loss": 0.1609, - "step": 12047 - }, - { - "epoch": 0.61, - "grad_norm": 1.502432617527464, - "learning_rate": 6.889474471515467e-06, - "loss": 0.1671, - "step": 12048 - }, - { - "epoch": 0.61, - "grad_norm": 1.5916558758401025, - "learning_rate": 6.887909246728311e-06, - "loss": 0.1678, - "step": 12049 - }, - { - "epoch": 0.61, - "grad_norm": 1.7377823963807983, - "learning_rate": 6.886344106356795e-06, - "loss": 0.1733, - "step": 12050 - }, - { - "epoch": 0.61, - "grad_norm": 0.8830484258834155, - "learning_rate": 6.8847790504433664e-06, - "loss": 0.162, - "step": 12051 - }, - { - "epoch": 0.61, - "grad_norm": 1.1390348137066064, - "learning_rate": 6.883214079030485e-06, - "loss": 0.1737, - "step": 12052 - }, - { - "epoch": 0.61, - "grad_norm": 0.8768182692622531, - "learning_rate": 6.881649192160596e-06, - "loss": 0.1553, - "step": 12053 - }, - { - "epoch": 0.61, - "grad_norm": 1.3875687021614835, - "learning_rate": 6.880084389876153e-06, - "loss": 0.1863, - "step": 12054 - }, - { - "epoch": 0.61, - "grad_norm": 0.9544048864076122, - "learning_rate": 6.878519672219592e-06, - "loss": 0.1617, - "step": 12055 - }, - { - "epoch": 0.61, - "grad_norm": 1.0780909741086129, - "learning_rate": 6.8769550392333665e-06, - "loss": 0.1881, - "step": 12056 - }, - { - "epoch": 0.61, - "grad_norm": 0.954038343043023, - "learning_rate": 6.875390490959907e-06, - "loss": 0.1743, - "step": 12057 - }, - { - "epoch": 0.61, - "grad_norm": 1.1336834996926124, - "learning_rate": 6.873826027441659e-06, - "loss": 0.1784, - "step": 12058 - }, - { - "epoch": 0.61, - "grad_norm": 0.9306362669905568, - "learning_rate": 6.872261648721055e-06, - "loss": 0.1879, - "step": 12059 - }, - { - "epoch": 0.61, - "grad_norm": 1.197695678667343, - "learning_rate": 6.870697354840534e-06, - "loss": 0.1962, - "step": 12060 - }, - { - "epoch": 0.61, - "grad_norm": 0.8779976121881782, - "learning_rate": 6.869133145842521e-06, - "loss": 0.1757, - "step": 12061 - }, - { - "epoch": 0.61, - "grad_norm": 1.0792172420476411, - "learning_rate": 6.8675690217694476e-06, - "loss": 0.1962, - "step": 12062 - }, - { - "epoch": 0.61, - "grad_norm": 1.0307635970176123, - "learning_rate": 6.866004982663746e-06, - "loss": 0.1805, - "step": 12063 - }, - { - "epoch": 0.61, - "grad_norm": 0.8749574919822711, - "learning_rate": 6.864441028567831e-06, - "loss": 0.1726, - "step": 12064 - }, - { - "epoch": 0.61, - "grad_norm": 1.898665738913217, - "learning_rate": 6.862877159524133e-06, - "loss": 0.1558, - "step": 12065 - }, - { - "epoch": 0.61, - "grad_norm": 1.4824044475075975, - "learning_rate": 6.861313375575067e-06, - "loss": 0.1659, - "step": 12066 - }, - { - "epoch": 0.61, - "grad_norm": 0.9629904483904526, - "learning_rate": 6.859749676763056e-06, - "loss": 0.1581, - "step": 12067 - }, - { - "epoch": 0.61, - "grad_norm": 0.8272828329013648, - "learning_rate": 6.85818606313051e-06, - "loss": 0.1689, - "step": 12068 - }, - { - "epoch": 0.61, - "grad_norm": 1.6118105822053284, - "learning_rate": 6.856622534719848e-06, - "loss": 0.1891, - "step": 12069 - }, - { - "epoch": 0.61, - "grad_norm": 1.7255331847586153, - "learning_rate": 6.855059091573472e-06, - "loss": 0.2084, - "step": 12070 - }, - { - "epoch": 0.61, - "grad_norm": 1.1749647430370458, - "learning_rate": 6.853495733733799e-06, - "loss": 0.2064, - "step": 12071 - }, - { - "epoch": 0.61, - "grad_norm": 0.8938854197590528, - "learning_rate": 6.851932461243229e-06, - "loss": 0.1925, - "step": 12072 - }, - { - "epoch": 0.61, - "grad_norm": 1.0110531702751793, - "learning_rate": 6.850369274144174e-06, - "loss": 0.1726, - "step": 12073 - }, - { - "epoch": 0.61, - "grad_norm": 0.6937841164262705, - "learning_rate": 6.848806172479025e-06, - "loss": 0.1823, - "step": 12074 - }, - { - "epoch": 0.61, - "grad_norm": 0.9360324682026385, - "learning_rate": 6.847243156290191e-06, - "loss": 0.173, - "step": 12075 - }, - { - "epoch": 0.61, - "grad_norm": 2.5232523179973194, - "learning_rate": 6.8456802256200596e-06, - "loss": 0.1734, - "step": 12076 - }, - { - "epoch": 0.61, - "grad_norm": 1.0482809026447173, - "learning_rate": 6.844117380511036e-06, - "loss": 0.1811, - "step": 12077 - }, - { - "epoch": 0.61, - "grad_norm": 0.8719802085341933, - "learning_rate": 6.842554621005504e-06, - "loss": 0.1792, - "step": 12078 - }, - { - "epoch": 0.61, - "grad_norm": 1.224889727426434, - "learning_rate": 6.840991947145854e-06, - "loss": 0.1858, - "step": 12079 - }, - { - "epoch": 0.61, - "grad_norm": 1.0220060453992144, - "learning_rate": 6.839429358974481e-06, - "loss": 0.1755, - "step": 12080 - }, - { - "epoch": 0.61, - "grad_norm": 0.9639989927823014, - "learning_rate": 6.837866856533761e-06, - "loss": 0.1683, - "step": 12081 - }, - { - "epoch": 0.61, - "grad_norm": 1.2119181742534815, - "learning_rate": 6.836304439866084e-06, - "loss": 0.1741, - "step": 12082 - }, - { - "epoch": 0.61, - "grad_norm": 1.02227900587153, - "learning_rate": 6.834742109013823e-06, - "loss": 0.1968, - "step": 12083 - }, - { - "epoch": 0.61, - "grad_norm": 1.3070559048308237, - "learning_rate": 6.833179864019366e-06, - "loss": 0.1761, - "step": 12084 - }, - { - "epoch": 0.61, - "grad_norm": 1.1660429668454133, - "learning_rate": 6.83161770492508e-06, - "loss": 0.1861, - "step": 12085 - }, - { - "epoch": 0.61, - "grad_norm": 0.8861079705636326, - "learning_rate": 6.830055631773347e-06, - "loss": 0.1627, - "step": 12086 - }, - { - "epoch": 0.61, - "grad_norm": 0.838978038894428, - "learning_rate": 6.82849364460653e-06, - "loss": 0.1684, - "step": 12087 - }, - { - "epoch": 0.61, - "grad_norm": 0.8761185023942043, - "learning_rate": 6.826931743467004e-06, - "loss": 0.1795, - "step": 12088 - }, - { - "epoch": 0.61, - "grad_norm": 0.8362184895394403, - "learning_rate": 6.825369928397132e-06, - "loss": 0.1841, - "step": 12089 - }, - { - "epoch": 0.61, - "grad_norm": 1.3908870975417034, - "learning_rate": 6.8238081994392836e-06, - "loss": 0.1791, - "step": 12090 - }, - { - "epoch": 0.61, - "grad_norm": 0.8640804279105561, - "learning_rate": 6.822246556635814e-06, - "loss": 0.1689, - "step": 12091 - }, - { - "epoch": 0.61, - "grad_norm": 1.0446730178037433, - "learning_rate": 6.820685000029087e-06, - "loss": 0.1835, - "step": 12092 - }, - { - "epoch": 0.61, - "grad_norm": 1.0395863481397598, - "learning_rate": 6.819123529661458e-06, - "loss": 0.1669, - "step": 12093 - }, - { - "epoch": 0.62, - "grad_norm": 0.9949242524187777, - "learning_rate": 6.817562145575285e-06, - "loss": 0.1808, - "step": 12094 - }, - { - "epoch": 0.62, - "grad_norm": 1.2442467941307531, - "learning_rate": 6.81600084781292e-06, - "loss": 0.1768, - "step": 12095 - }, - { - "epoch": 0.62, - "grad_norm": 0.9768991335555441, - "learning_rate": 6.814439636416708e-06, - "loss": 0.1734, - "step": 12096 - }, - { - "epoch": 0.62, - "grad_norm": 1.1049467939463726, - "learning_rate": 6.812878511429002e-06, - "loss": 0.2294, - "step": 12097 - }, - { - "epoch": 0.62, - "grad_norm": 0.8864407710560207, - "learning_rate": 6.811317472892145e-06, - "loss": 0.1815, - "step": 12098 - }, - { - "epoch": 0.62, - "grad_norm": 1.648388743148969, - "learning_rate": 6.809756520848486e-06, - "loss": 0.1769, - "step": 12099 - }, - { - "epoch": 0.62, - "grad_norm": 0.7739231396867304, - "learning_rate": 6.808195655340357e-06, - "loss": 0.1562, - "step": 12100 - }, - { - "epoch": 0.62, - "grad_norm": 0.9479706936152373, - "learning_rate": 6.806634876410103e-06, - "loss": 0.1788, - "step": 12101 - }, - { - "epoch": 0.62, - "grad_norm": 1.5049638635512383, - "learning_rate": 6.805074184100056e-06, - "loss": 0.1851, - "step": 12102 - }, - { - "epoch": 0.62, - "grad_norm": 1.1612459449559867, - "learning_rate": 6.803513578452557e-06, - "loss": 0.1992, - "step": 12103 - }, - { - "epoch": 0.62, - "grad_norm": 0.8635805305573898, - "learning_rate": 6.8019530595099294e-06, - "loss": 0.1798, - "step": 12104 - }, - { - "epoch": 0.62, - "grad_norm": 0.9803848651176261, - "learning_rate": 6.800392627314506e-06, - "loss": 0.1819, - "step": 12105 - }, - { - "epoch": 0.62, - "grad_norm": 0.934349171865883, - "learning_rate": 6.798832281908612e-06, - "loss": 0.1826, - "step": 12106 - }, - { - "epoch": 0.62, - "grad_norm": 1.284915985657181, - "learning_rate": 6.797272023334578e-06, - "loss": 0.17, - "step": 12107 - }, - { - "epoch": 0.62, - "grad_norm": 0.967496996176986, - "learning_rate": 6.7957118516347156e-06, - "loss": 0.1915, - "step": 12108 - }, - { - "epoch": 0.62, - "grad_norm": 1.028295447950258, - "learning_rate": 6.794151766851356e-06, - "loss": 0.1713, - "step": 12109 - }, - { - "epoch": 0.62, - "grad_norm": 0.9704153265494159, - "learning_rate": 6.792591769026804e-06, - "loss": 0.1794, - "step": 12110 - }, - { - "epoch": 0.62, - "grad_norm": 1.5479240050610468, - "learning_rate": 6.791031858203385e-06, - "loss": 0.17, - "step": 12111 - }, - { - "epoch": 0.62, - "grad_norm": 1.0705127091317566, - "learning_rate": 6.789472034423412e-06, - "loss": 0.1824, - "step": 12112 - }, - { - "epoch": 0.62, - "grad_norm": 1.0649028556132054, - "learning_rate": 6.787912297729184e-06, - "loss": 0.1883, - "step": 12113 - }, - { - "epoch": 0.62, - "grad_norm": 0.9102264829131282, - "learning_rate": 6.786352648163022e-06, - "loss": 0.1838, - "step": 12114 - }, - { - "epoch": 0.62, - "grad_norm": 1.0511014344801506, - "learning_rate": 6.7847930857672205e-06, - "loss": 0.1784, - "step": 12115 - }, - { - "epoch": 0.62, - "grad_norm": 1.0186815963205775, - "learning_rate": 6.783233610584095e-06, - "loss": 0.1676, - "step": 12116 - }, - { - "epoch": 0.62, - "grad_norm": 0.8880854123514019, - "learning_rate": 6.781674222655934e-06, - "loss": 0.1794, - "step": 12117 - }, - { - "epoch": 0.62, - "grad_norm": 0.9295879693635356, - "learning_rate": 6.780114922025043e-06, - "loss": 0.1856, - "step": 12118 - }, - { - "epoch": 0.62, - "grad_norm": 0.9006839537874907, - "learning_rate": 6.778555708733715e-06, - "loss": 0.1743, - "step": 12119 - }, - { - "epoch": 0.62, - "grad_norm": 0.9975869625126415, - "learning_rate": 6.7769965828242505e-06, - "loss": 0.1928, - "step": 12120 - }, - { - "epoch": 0.62, - "grad_norm": 1.1032710418317961, - "learning_rate": 6.7754375443389294e-06, - "loss": 0.1958, - "step": 12121 - }, - { - "epoch": 0.62, - "grad_norm": 1.0261541172084292, - "learning_rate": 6.773878593320052e-06, - "loss": 0.1651, - "step": 12122 - }, - { - "epoch": 0.62, - "grad_norm": 0.7787786381676972, - "learning_rate": 6.772319729809895e-06, - "loss": 0.1636, - "step": 12123 - }, - { - "epoch": 0.62, - "grad_norm": 1.6696086999669617, - "learning_rate": 6.770760953850754e-06, - "loss": 0.1811, - "step": 12124 - }, - { - "epoch": 0.62, - "grad_norm": 1.4360424505617724, - "learning_rate": 6.769202265484899e-06, - "loss": 0.1558, - "step": 12125 - }, - { - "epoch": 0.62, - "grad_norm": 1.1170506912862783, - "learning_rate": 6.767643664754619e-06, - "loss": 0.1812, - "step": 12126 - }, - { - "epoch": 0.62, - "grad_norm": 1.2946728030781767, - "learning_rate": 6.766085151702184e-06, - "loss": 0.1774, - "step": 12127 - }, - { - "epoch": 0.62, - "grad_norm": 0.9326024358102776, - "learning_rate": 6.764526726369873e-06, - "loss": 0.1827, - "step": 12128 - }, - { - "epoch": 0.62, - "grad_norm": 1.076727177272974, - "learning_rate": 6.762968388799958e-06, - "loss": 0.1913, - "step": 12129 - }, - { - "epoch": 0.62, - "grad_norm": 0.9799196474439371, - "learning_rate": 6.7614101390347095e-06, - "loss": 0.1742, - "step": 12130 - }, - { - "epoch": 0.62, - "grad_norm": 0.9460898423948457, - "learning_rate": 6.759851977116392e-06, - "loss": 0.1788, - "step": 12131 - }, - { - "epoch": 0.62, - "grad_norm": 0.9967772399446618, - "learning_rate": 6.758293903087272e-06, - "loss": 0.1802, - "step": 12132 - }, - { - "epoch": 0.62, - "grad_norm": 0.9750470589847444, - "learning_rate": 6.756735916989616e-06, - "loss": 0.169, - "step": 12133 - }, - { - "epoch": 0.62, - "grad_norm": 0.9776038603721809, - "learning_rate": 6.755178018865678e-06, - "loss": 0.1588, - "step": 12134 - }, - { - "epoch": 0.62, - "grad_norm": 0.8506390814787281, - "learning_rate": 6.753620208757721e-06, - "loss": 0.1761, - "step": 12135 - }, - { - "epoch": 0.62, - "grad_norm": 0.8690918817459329, - "learning_rate": 6.7520624867079965e-06, - "loss": 0.1851, - "step": 12136 - }, - { - "epoch": 0.62, - "grad_norm": 0.9535446340652028, - "learning_rate": 6.7505048527587656e-06, - "loss": 0.1634, - "step": 12137 - }, - { - "epoch": 0.62, - "grad_norm": 1.3906783266482012, - "learning_rate": 6.748947306952269e-06, - "loss": 0.179, - "step": 12138 - }, - { - "epoch": 0.62, - "grad_norm": 1.0931209334496226, - "learning_rate": 6.747389849330765e-06, - "loss": 0.1685, - "step": 12139 - }, - { - "epoch": 0.62, - "grad_norm": 1.2113979580898313, - "learning_rate": 6.745832479936492e-06, - "loss": 0.186, - "step": 12140 - }, - { - "epoch": 0.62, - "grad_norm": 1.0226123145274468, - "learning_rate": 6.744275198811698e-06, - "loss": 0.2114, - "step": 12141 - }, - { - "epoch": 0.62, - "grad_norm": 0.8793336866514591, - "learning_rate": 6.742718005998621e-06, - "loss": 0.1767, - "step": 12142 - }, - { - "epoch": 0.62, - "grad_norm": 1.0782116318894777, - "learning_rate": 6.741160901539506e-06, - "loss": 0.1711, - "step": 12143 - }, - { - "epoch": 0.62, - "grad_norm": 1.1707774557584145, - "learning_rate": 6.7396038854765825e-06, - "loss": 0.1655, - "step": 12144 - }, - { - "epoch": 0.62, - "grad_norm": 1.104390856186119, - "learning_rate": 6.738046957852089e-06, - "loss": 0.1661, - "step": 12145 - }, - { - "epoch": 0.62, - "grad_norm": 0.9243443440720327, - "learning_rate": 6.736490118708253e-06, - "loss": 0.1777, - "step": 12146 - }, - { - "epoch": 0.62, - "grad_norm": 1.7956070831990771, - "learning_rate": 6.7349333680873134e-06, - "loss": 0.1696, - "step": 12147 - }, - { - "epoch": 0.62, - "grad_norm": 1.0515919332880705, - "learning_rate": 6.733376706031486e-06, - "loss": 0.1781, - "step": 12148 - }, - { - "epoch": 0.62, - "grad_norm": 0.857621003636688, - "learning_rate": 6.731820132582999e-06, - "loss": 0.1746, - "step": 12149 - }, - { - "epoch": 0.62, - "grad_norm": 1.1617687067828353, - "learning_rate": 6.730263647784079e-06, - "loss": 0.1528, - "step": 12150 - }, - { - "epoch": 0.62, - "grad_norm": 2.0959333069840698, - "learning_rate": 6.728707251676939e-06, - "loss": 0.1886, - "step": 12151 - }, - { - "epoch": 0.62, - "grad_norm": 1.1623054341465726, - "learning_rate": 6.727150944303804e-06, - "loss": 0.1776, - "step": 12152 - }, - { - "epoch": 0.62, - "grad_norm": 1.200230087745242, - "learning_rate": 6.7255947257068785e-06, - "loss": 0.1949, - "step": 12153 - }, - { - "epoch": 0.62, - "grad_norm": 0.8042497751515356, - "learning_rate": 6.724038595928385e-06, - "loss": 0.1624, - "step": 12154 - }, - { - "epoch": 0.62, - "grad_norm": 1.3636760843144802, - "learning_rate": 6.722482555010528e-06, - "loss": 0.1907, - "step": 12155 - }, - { - "epoch": 0.62, - "grad_norm": 0.8964440433985537, - "learning_rate": 6.720926602995519e-06, - "loss": 0.177, - "step": 12156 - }, - { - "epoch": 0.62, - "grad_norm": 0.782888558585591, - "learning_rate": 6.719370739925557e-06, - "loss": 0.1531, - "step": 12157 - }, - { - "epoch": 0.62, - "grad_norm": 1.4599766269420338, - "learning_rate": 6.717814965842852e-06, - "loss": 0.1639, - "step": 12158 - }, - { - "epoch": 0.62, - "grad_norm": 2.032260911561945, - "learning_rate": 6.716259280789599e-06, - "loss": 0.2493, - "step": 12159 - }, - { - "epoch": 0.62, - "grad_norm": 1.4459519392693336, - "learning_rate": 6.714703684808004e-06, - "loss": 0.1685, - "step": 12160 - }, - { - "epoch": 0.62, - "grad_norm": 0.8809204338200619, - "learning_rate": 6.7131481779402505e-06, - "loss": 0.178, - "step": 12161 - }, - { - "epoch": 0.62, - "grad_norm": 2.0338654307609594, - "learning_rate": 6.7115927602285424e-06, - "loss": 0.1912, - "step": 12162 - }, - { - "epoch": 0.62, - "grad_norm": 1.0140484819462559, - "learning_rate": 6.710037431715063e-06, - "loss": 0.1967, - "step": 12163 - }, - { - "epoch": 0.62, - "grad_norm": 1.0985829362573525, - "learning_rate": 6.70848219244201e-06, - "loss": 0.1755, - "step": 12164 - }, - { - "epoch": 0.62, - "grad_norm": 0.7797192565191957, - "learning_rate": 6.706927042451561e-06, - "loss": 0.1654, - "step": 12165 - }, - { - "epoch": 0.62, - "grad_norm": 1.0983960363940297, - "learning_rate": 6.7053719817859e-06, - "loss": 0.1785, - "step": 12166 - }, - { - "epoch": 0.62, - "grad_norm": 0.75585709682769, - "learning_rate": 6.7038170104872106e-06, - "loss": 0.1605, - "step": 12167 - }, - { - "epoch": 0.62, - "grad_norm": 1.2958472121653062, - "learning_rate": 6.70226212859767e-06, - "loss": 0.217, - "step": 12168 - }, - { - "epoch": 0.62, - "grad_norm": 0.8712282797124832, - "learning_rate": 6.700707336159458e-06, - "loss": 0.1797, - "step": 12169 - }, - { - "epoch": 0.62, - "grad_norm": 0.9107466535849766, - "learning_rate": 6.699152633214743e-06, - "loss": 0.18, - "step": 12170 - }, - { - "epoch": 0.62, - "grad_norm": 0.8591545415978744, - "learning_rate": 6.697598019805701e-06, - "loss": 0.175, - "step": 12171 - }, - { - "epoch": 0.62, - "grad_norm": 1.1046279818870088, - "learning_rate": 6.696043495974498e-06, - "loss": 0.1778, - "step": 12172 - }, - { - "epoch": 0.62, - "grad_norm": 1.0874204812001573, - "learning_rate": 6.6944890617633055e-06, - "loss": 0.1929, - "step": 12173 - }, - { - "epoch": 0.62, - "grad_norm": 1.1738304949725218, - "learning_rate": 6.6929347172142785e-06, - "loss": 0.208, - "step": 12174 - }, - { - "epoch": 0.62, - "grad_norm": 1.1878166769378955, - "learning_rate": 6.691380462369588e-06, - "loss": 0.1731, - "step": 12175 - }, - { - "epoch": 0.62, - "grad_norm": 0.9056200496531202, - "learning_rate": 6.689826297271384e-06, - "loss": 0.1787, - "step": 12176 - }, - { - "epoch": 0.62, - "grad_norm": 1.1374102316252948, - "learning_rate": 6.6882722219618355e-06, - "loss": 0.1701, - "step": 12177 - }, - { - "epoch": 0.62, - "grad_norm": 0.695640372186796, - "learning_rate": 6.686718236483086e-06, - "loss": 0.1782, - "step": 12178 - }, - { - "epoch": 0.62, - "grad_norm": 1.3815010096582772, - "learning_rate": 6.685164340877295e-06, - "loss": 0.1695, - "step": 12179 - }, - { - "epoch": 0.62, - "grad_norm": 1.083543432973144, - "learning_rate": 6.683610535186604e-06, - "loss": 0.1866, - "step": 12180 - }, - { - "epoch": 0.62, - "grad_norm": 1.2725443888858214, - "learning_rate": 6.682056819453168e-06, - "loss": 0.1651, - "step": 12181 - }, - { - "epoch": 0.62, - "grad_norm": 0.8442178166320784, - "learning_rate": 6.680503193719129e-06, - "loss": 0.1743, - "step": 12182 - }, - { - "epoch": 0.62, - "grad_norm": 1.0029030646090114, - "learning_rate": 6.678949658026625e-06, - "loss": 0.1923, - "step": 12183 - }, - { - "epoch": 0.62, - "grad_norm": 2.3476567787546982, - "learning_rate": 6.677396212417801e-06, - "loss": 0.1663, - "step": 12184 - }, - { - "epoch": 0.62, - "grad_norm": 1.0698221213216408, - "learning_rate": 6.675842856934789e-06, - "loss": 0.1724, - "step": 12185 - }, - { - "epoch": 0.62, - "grad_norm": 1.0245881277985018, - "learning_rate": 6.674289591619732e-06, - "loss": 0.1664, - "step": 12186 - }, - { - "epoch": 0.62, - "grad_norm": 0.81864411656269, - "learning_rate": 6.672736416514754e-06, - "loss": 0.1814, - "step": 12187 - }, - { - "epoch": 0.62, - "grad_norm": 1.1444151316489966, - "learning_rate": 6.671183331661991e-06, - "loss": 0.1682, - "step": 12188 - }, - { - "epoch": 0.62, - "grad_norm": 1.0227672142427702, - "learning_rate": 6.669630337103565e-06, - "loss": 0.194, - "step": 12189 - }, - { - "epoch": 0.62, - "grad_norm": 1.1132674106865061, - "learning_rate": 6.66807743288161e-06, - "loss": 0.1802, - "step": 12190 - }, - { - "epoch": 0.62, - "grad_norm": 9.422171075788219, - "learning_rate": 6.666524619038237e-06, - "loss": 0.1802, - "step": 12191 - }, - { - "epoch": 0.62, - "grad_norm": 1.1871758296843817, - "learning_rate": 6.664971895615578e-06, - "loss": 0.1834, - "step": 12192 - }, - { - "epoch": 0.62, - "grad_norm": 2.460493666235359, - "learning_rate": 6.663419262655739e-06, - "loss": 0.1733, - "step": 12193 - }, - { - "epoch": 0.62, - "grad_norm": 0.7359353917510435, - "learning_rate": 6.6618667202008435e-06, - "loss": 0.1693, - "step": 12194 - }, - { - "epoch": 0.62, - "grad_norm": 1.112629516499026, - "learning_rate": 6.660314268293e-06, - "loss": 0.1551, - "step": 12195 - }, - { - "epoch": 0.62, - "grad_norm": 1.0067294238530116, - "learning_rate": 6.6587619069743236e-06, - "loss": 0.1956, - "step": 12196 - }, - { - "epoch": 0.62, - "grad_norm": 1.1353157521263597, - "learning_rate": 6.6572096362869165e-06, - "loss": 0.1654, - "step": 12197 - }, - { - "epoch": 0.62, - "grad_norm": 1.0968623503957977, - "learning_rate": 6.655657456272888e-06, - "loss": 0.2003, - "step": 12198 - }, - { - "epoch": 0.62, - "grad_norm": 0.9659530346298896, - "learning_rate": 6.6541053669743375e-06, - "loss": 0.1741, - "step": 12199 - }, - { - "epoch": 0.62, - "grad_norm": 1.1324711606541336, - "learning_rate": 6.6525533684333724e-06, - "loss": 0.1946, - "step": 12200 - }, - { - "epoch": 0.62, - "grad_norm": 1.06053526847367, - "learning_rate": 6.6510014606920845e-06, - "loss": 0.1825, - "step": 12201 - }, - { - "epoch": 0.62, - "grad_norm": 0.936716622629329, - "learning_rate": 6.64944964379257e-06, - "loss": 0.1448, - "step": 12202 - }, - { - "epoch": 0.62, - "grad_norm": 0.8731618283716971, - "learning_rate": 6.647897917776925e-06, - "loss": 0.1658, - "step": 12203 - }, - { - "epoch": 0.62, - "grad_norm": 0.9600048699949525, - "learning_rate": 6.646346282687235e-06, - "loss": 0.179, - "step": 12204 - }, - { - "epoch": 0.62, - "grad_norm": 1.387098874350567, - "learning_rate": 6.644794738565597e-06, - "loss": 0.1876, - "step": 12205 - }, - { - "epoch": 0.62, - "grad_norm": 0.8351157077754774, - "learning_rate": 6.643243285454086e-06, - "loss": 0.1786, - "step": 12206 - }, - { - "epoch": 0.62, - "grad_norm": 0.8903763554787368, - "learning_rate": 6.641691923394792e-06, - "loss": 0.1825, - "step": 12207 - }, - { - "epoch": 0.62, - "grad_norm": 0.9163699840386397, - "learning_rate": 6.640140652429793e-06, - "loss": 0.1649, - "step": 12208 - }, - { - "epoch": 0.62, - "grad_norm": 1.1858028194793624, - "learning_rate": 6.6385894726011725e-06, - "loss": 0.166, - "step": 12209 - }, - { - "epoch": 0.62, - "grad_norm": 1.400037704545918, - "learning_rate": 6.637038383950998e-06, - "loss": 0.1737, - "step": 12210 - }, - { - "epoch": 0.62, - "grad_norm": 0.9340838632121498, - "learning_rate": 6.63548738652135e-06, - "loss": 0.1666, - "step": 12211 - }, - { - "epoch": 0.62, - "grad_norm": 1.4298851897685383, - "learning_rate": 6.633936480354294e-06, - "loss": 0.1829, - "step": 12212 - }, - { - "epoch": 0.62, - "grad_norm": 1.153294646208856, - "learning_rate": 6.632385665491905e-06, - "loss": 0.1828, - "step": 12213 - }, - { - "epoch": 0.62, - "grad_norm": 1.5212721972224903, - "learning_rate": 6.630834941976241e-06, - "loss": 0.1757, - "step": 12214 - }, - { - "epoch": 0.62, - "grad_norm": 1.8293599964046527, - "learning_rate": 6.629284309849373e-06, - "loss": 0.1726, - "step": 12215 - }, - { - "epoch": 0.62, - "grad_norm": 1.1039891440162553, - "learning_rate": 6.627733769153355e-06, - "loss": 0.182, - "step": 12216 - }, - { - "epoch": 0.62, - "grad_norm": 0.9536046109231258, - "learning_rate": 6.626183319930253e-06, - "loss": 0.1731, - "step": 12217 - }, - { - "epoch": 0.62, - "grad_norm": 1.04469142650679, - "learning_rate": 6.624632962222119e-06, - "loss": 0.1766, - "step": 12218 - }, - { - "epoch": 0.62, - "grad_norm": 0.9295249528325686, - "learning_rate": 6.6230826960710035e-06, - "loss": 0.1728, - "step": 12219 - }, - { - "epoch": 0.62, - "grad_norm": 1.0219966420634277, - "learning_rate": 6.621532521518962e-06, - "loss": 0.1728, - "step": 12220 - }, - { - "epoch": 0.62, - "grad_norm": 1.3289858108783676, - "learning_rate": 6.619982438608039e-06, - "loss": 0.1957, - "step": 12221 - }, - { - "epoch": 0.62, - "grad_norm": 1.2373544417696125, - "learning_rate": 6.618432447380288e-06, - "loss": 0.1997, - "step": 12222 - }, - { - "epoch": 0.62, - "grad_norm": 0.9934359092593344, - "learning_rate": 6.616882547877743e-06, - "loss": 0.1508, - "step": 12223 - }, - { - "epoch": 0.62, - "grad_norm": 0.8810613838875165, - "learning_rate": 6.615332740142454e-06, - "loss": 0.1403, - "step": 12224 - }, - { - "epoch": 0.62, - "grad_norm": 0.9879691489052049, - "learning_rate": 6.613783024216451e-06, - "loss": 0.172, - "step": 12225 - }, - { - "epoch": 0.62, - "grad_norm": 0.9526821220696458, - "learning_rate": 6.612233400141781e-06, - "loss": 0.1726, - "step": 12226 - }, - { - "epoch": 0.62, - "grad_norm": 1.3838159800652556, - "learning_rate": 6.610683867960466e-06, - "loss": 0.1858, - "step": 12227 - }, - { - "epoch": 0.62, - "grad_norm": 1.4162296287964915, - "learning_rate": 6.6091344277145456e-06, - "loss": 0.2036, - "step": 12228 - }, - { - "epoch": 0.62, - "grad_norm": 1.1086836490155574, - "learning_rate": 6.6075850794460414e-06, - "loss": 0.1688, - "step": 12229 - }, - { - "epoch": 0.62, - "grad_norm": 1.8066030135254134, - "learning_rate": 6.60603582319699e-06, - "loss": 0.1664, - "step": 12230 - }, - { - "epoch": 0.62, - "grad_norm": 1.4957728270277422, - "learning_rate": 6.604486659009404e-06, - "loss": 0.1833, - "step": 12231 - }, - { - "epoch": 0.62, - "grad_norm": 1.0987554878137216, - "learning_rate": 6.602937586925309e-06, - "loss": 0.188, - "step": 12232 - }, - { - "epoch": 0.62, - "grad_norm": 1.1942177727237888, - "learning_rate": 6.6013886069867235e-06, - "loss": 0.1785, - "step": 12233 - }, - { - "epoch": 0.62, - "grad_norm": 0.8794121473028255, - "learning_rate": 6.599839719235668e-06, - "loss": 0.1964, - "step": 12234 - }, - { - "epoch": 0.62, - "grad_norm": 1.3678191518713683, - "learning_rate": 6.598290923714152e-06, - "loss": 0.1797, - "step": 12235 - }, - { - "epoch": 0.62, - "grad_norm": 0.9275715467464166, - "learning_rate": 6.596742220464183e-06, - "loss": 0.1843, - "step": 12236 - }, - { - "epoch": 0.62, - "grad_norm": 0.7509251617436092, - "learning_rate": 6.595193609527774e-06, - "loss": 0.1704, - "step": 12237 - }, - { - "epoch": 0.62, - "grad_norm": 2.261131799563364, - "learning_rate": 6.593645090946932e-06, - "loss": 0.1765, - "step": 12238 - }, - { - "epoch": 0.62, - "grad_norm": 1.393015058725475, - "learning_rate": 6.592096664763661e-06, - "loss": 0.1702, - "step": 12239 - }, - { - "epoch": 0.62, - "grad_norm": 0.956100047072734, - "learning_rate": 6.590548331019957e-06, - "loss": 0.1815, - "step": 12240 - }, - { - "epoch": 0.62, - "grad_norm": 0.9956111845289233, - "learning_rate": 6.589000089757822e-06, - "loss": 0.1566, - "step": 12241 - }, - { - "epoch": 0.62, - "grad_norm": 0.7385208387149487, - "learning_rate": 6.587451941019253e-06, - "loss": 0.1718, - "step": 12242 - }, - { - "epoch": 0.62, - "grad_norm": 0.7324993874826935, - "learning_rate": 6.585903884846245e-06, - "loss": 0.1692, - "step": 12243 - }, - { - "epoch": 0.62, - "grad_norm": 0.8834736171907606, - "learning_rate": 6.584355921280785e-06, - "loss": 0.1764, - "step": 12244 - }, - { - "epoch": 0.62, - "grad_norm": 1.1075738748603206, - "learning_rate": 6.582808050364864e-06, - "loss": 0.1816, - "step": 12245 - }, - { - "epoch": 0.62, - "grad_norm": 0.8171360429587897, - "learning_rate": 6.581260272140466e-06, - "loss": 0.1796, - "step": 12246 - }, - { - "epoch": 0.62, - "grad_norm": 1.085648007323475, - "learning_rate": 6.579712586649581e-06, - "loss": 0.1715, - "step": 12247 - }, - { - "epoch": 0.62, - "grad_norm": 1.0691469016265858, - "learning_rate": 6.5781649939341794e-06, - "loss": 0.1849, - "step": 12248 - }, - { - "epoch": 0.62, - "grad_norm": 0.9259858332783487, - "learning_rate": 6.5766174940362505e-06, - "loss": 0.1606, - "step": 12249 - }, - { - "epoch": 0.62, - "grad_norm": 1.446395498306882, - "learning_rate": 6.575070086997762e-06, - "loss": 0.1877, - "step": 12250 - }, - { - "epoch": 0.62, - "grad_norm": 0.9671137700828732, - "learning_rate": 6.573522772860692e-06, - "loss": 0.1925, - "step": 12251 - }, - { - "epoch": 0.62, - "grad_norm": 0.8860157833790498, - "learning_rate": 6.571975551667014e-06, - "loss": 0.1821, - "step": 12252 - }, - { - "epoch": 0.62, - "grad_norm": 1.2918582948810673, - "learning_rate": 6.570428423458687e-06, - "loss": 0.1804, - "step": 12253 - }, - { - "epoch": 0.62, - "grad_norm": 1.3403658730770927, - "learning_rate": 6.568881388277685e-06, - "loss": 0.1773, - "step": 12254 - }, - { - "epoch": 0.62, - "grad_norm": 1.3153845570882179, - "learning_rate": 6.567334446165967e-06, - "loss": 0.1775, - "step": 12255 - }, - { - "epoch": 0.62, - "grad_norm": 0.7505597276119826, - "learning_rate": 6.565787597165501e-06, - "loss": 0.1549, - "step": 12256 - }, - { - "epoch": 0.62, - "grad_norm": 1.7234390957600532, - "learning_rate": 6.5642408413182345e-06, - "loss": 0.1667, - "step": 12257 - }, - { - "epoch": 0.62, - "grad_norm": 0.9668169901742122, - "learning_rate": 6.5626941786661335e-06, - "loss": 0.1609, - "step": 12258 - }, - { - "epoch": 0.62, - "grad_norm": 0.9568058195100089, - "learning_rate": 6.5611476092511435e-06, - "loss": 0.194, - "step": 12259 - }, - { - "epoch": 0.62, - "grad_norm": 2.6035064737348144, - "learning_rate": 6.559601133115223e-06, - "loss": 0.202, - "step": 12260 - }, - { - "epoch": 0.62, - "grad_norm": 0.96974975639801, - "learning_rate": 6.558054750300313e-06, - "loss": 0.1978, - "step": 12261 - }, - { - "epoch": 0.62, - "grad_norm": 0.79531616090137, - "learning_rate": 6.556508460848365e-06, - "loss": 0.1568, - "step": 12262 - }, - { - "epoch": 0.62, - "grad_norm": 1.071490924175172, - "learning_rate": 6.554962264801316e-06, - "loss": 0.1729, - "step": 12263 - }, - { - "epoch": 0.62, - "grad_norm": 1.036724040066627, - "learning_rate": 6.553416162201114e-06, - "loss": 0.1712, - "step": 12264 - }, - { - "epoch": 0.62, - "grad_norm": 1.3034074952249906, - "learning_rate": 6.55187015308969e-06, - "loss": 0.2014, - "step": 12265 - }, - { - "epoch": 0.62, - "grad_norm": 0.8093630274081415, - "learning_rate": 6.550324237508986e-06, - "loss": 0.1779, - "step": 12266 - }, - { - "epoch": 0.62, - "grad_norm": 1.093760841897314, - "learning_rate": 6.5487784155009285e-06, - "loss": 0.1687, - "step": 12267 - }, - { - "epoch": 0.62, - "grad_norm": 0.9507641187044447, - "learning_rate": 6.547232687107453e-06, - "loss": 0.1768, - "step": 12268 - }, - { - "epoch": 0.62, - "grad_norm": 0.9106248716503401, - "learning_rate": 6.5456870523704845e-06, - "loss": 0.1669, - "step": 12269 - }, - { - "epoch": 0.62, - "grad_norm": 0.8055247993376253, - "learning_rate": 6.544141511331954e-06, - "loss": 0.1783, - "step": 12270 - }, - { - "epoch": 0.62, - "grad_norm": 0.9886344291566992, - "learning_rate": 6.542596064033777e-06, - "loss": 0.1875, - "step": 12271 - }, - { - "epoch": 0.62, - "grad_norm": 1.0897904038840873, - "learning_rate": 6.541050710517875e-06, - "loss": 0.1544, - "step": 12272 - }, - { - "epoch": 0.62, - "grad_norm": 0.99314961842094, - "learning_rate": 6.539505450826174e-06, - "loss": 0.1726, - "step": 12273 - }, - { - "epoch": 0.62, - "grad_norm": 0.8909882895410705, - "learning_rate": 6.537960285000577e-06, - "loss": 0.1981, - "step": 12274 - }, - { - "epoch": 0.62, - "grad_norm": 1.2303960200796495, - "learning_rate": 6.536415213083007e-06, - "loss": 0.1855, - "step": 12275 - }, - { - "epoch": 0.62, - "grad_norm": 0.8916355656067946, - "learning_rate": 6.534870235115367e-06, - "loss": 0.1833, - "step": 12276 - }, - { - "epoch": 0.62, - "grad_norm": 0.8624614446017977, - "learning_rate": 6.533325351139569e-06, - "loss": 0.1747, - "step": 12277 - }, - { - "epoch": 0.62, - "grad_norm": 1.103022254689305, - "learning_rate": 6.531780561197514e-06, - "loss": 0.1913, - "step": 12278 - }, - { - "epoch": 0.62, - "grad_norm": 1.3641657032827077, - "learning_rate": 6.530235865331112e-06, - "loss": 0.1624, - "step": 12279 - }, - { - "epoch": 0.62, - "grad_norm": 0.8736367851666924, - "learning_rate": 6.528691263582254e-06, - "loss": 0.1776, - "step": 12280 - }, - { - "epoch": 0.62, - "grad_norm": 0.9389189226683222, - "learning_rate": 6.527146755992844e-06, - "loss": 0.1697, - "step": 12281 - }, - { - "epoch": 0.62, - "grad_norm": 1.536533177111427, - "learning_rate": 6.525602342604771e-06, - "loss": 0.1688, - "step": 12282 - }, - { - "epoch": 0.62, - "grad_norm": 1.0136640445014258, - "learning_rate": 6.524058023459936e-06, - "loss": 0.2022, - "step": 12283 - }, - { - "epoch": 0.62, - "grad_norm": 0.9594188684445367, - "learning_rate": 6.522513798600219e-06, - "loss": 0.1913, - "step": 12284 - }, - { - "epoch": 0.62, - "grad_norm": 0.8637535389066199, - "learning_rate": 6.520969668067514e-06, - "loss": 0.1587, - "step": 12285 - }, - { - "epoch": 0.62, - "grad_norm": 0.7712698643365918, - "learning_rate": 6.5194256319036996e-06, - "loss": 0.1583, - "step": 12286 - }, - { - "epoch": 0.62, - "grad_norm": 1.1062185087587382, - "learning_rate": 6.517881690150667e-06, - "loss": 0.1824, - "step": 12287 - }, - { - "epoch": 0.62, - "grad_norm": 0.8197071505996901, - "learning_rate": 6.516337842850291e-06, - "loss": 0.1703, - "step": 12288 - }, - { - "epoch": 0.62, - "grad_norm": 1.5756187693803567, - "learning_rate": 6.514794090044443e-06, - "loss": 0.175, - "step": 12289 - }, - { - "epoch": 0.62, - "grad_norm": 1.7986011584614001, - "learning_rate": 6.513250431775003e-06, - "loss": 0.1599, - "step": 12290 - }, - { - "epoch": 0.63, - "grad_norm": 0.876400412985864, - "learning_rate": 6.511706868083842e-06, - "loss": 0.1826, - "step": 12291 - }, - { - "epoch": 0.63, - "grad_norm": 1.3253744757454275, - "learning_rate": 6.510163399012832e-06, - "loss": 0.2196, - "step": 12292 - }, - { - "epoch": 0.63, - "grad_norm": 0.8228314003915462, - "learning_rate": 6.508620024603833e-06, - "loss": 0.1873, - "step": 12293 - }, - { - "epoch": 0.63, - "grad_norm": 1.182334128744673, - "learning_rate": 6.507076744898715e-06, - "loss": 0.1665, - "step": 12294 - }, - { - "epoch": 0.63, - "grad_norm": 1.0461347391662688, - "learning_rate": 6.505533559939335e-06, - "loss": 0.1864, - "step": 12295 - }, - { - "epoch": 0.63, - "grad_norm": 0.896008704215264, - "learning_rate": 6.50399046976756e-06, - "loss": 0.2103, - "step": 12296 - }, - { - "epoch": 0.63, - "grad_norm": 1.2823807091290649, - "learning_rate": 6.502447474425235e-06, - "loss": 0.1633, - "step": 12297 - }, - { - "epoch": 0.63, - "grad_norm": 0.9199393892955969, - "learning_rate": 6.5009045739542235e-06, - "loss": 0.1988, - "step": 12298 - }, - { - "epoch": 0.63, - "grad_norm": 1.0069335458303679, - "learning_rate": 6.499361768396371e-06, - "loss": 0.1854, - "step": 12299 - }, - { - "epoch": 0.63, - "grad_norm": 1.0795233172269108, - "learning_rate": 6.497819057793531e-06, - "loss": 0.1732, - "step": 12300 - }, - { - "epoch": 0.63, - "grad_norm": 0.9021808424439925, - "learning_rate": 6.496276442187543e-06, - "loss": 0.1833, - "step": 12301 - }, - { - "epoch": 0.63, - "grad_norm": 0.965671577262932, - "learning_rate": 6.49473392162026e-06, - "loss": 0.1763, - "step": 12302 - }, - { - "epoch": 0.63, - "grad_norm": 1.0642907663234293, - "learning_rate": 6.493191496133513e-06, - "loss": 0.1626, - "step": 12303 - }, - { - "epoch": 0.63, - "grad_norm": 0.9872091817808939, - "learning_rate": 6.491649165769145e-06, - "loss": 0.1797, - "step": 12304 - }, - { - "epoch": 0.63, - "grad_norm": 0.829784055595061, - "learning_rate": 6.4901069305689955e-06, - "loss": 0.1834, - "step": 12305 - }, - { - "epoch": 0.63, - "grad_norm": 1.2547817572673163, - "learning_rate": 6.488564790574889e-06, - "loss": 0.1705, - "step": 12306 - }, - { - "epoch": 0.63, - "grad_norm": 1.2223911861358498, - "learning_rate": 6.487022745828663e-06, - "loss": 0.1894, - "step": 12307 - }, - { - "epoch": 0.63, - "grad_norm": 2.015859280347554, - "learning_rate": 6.485480796372141e-06, - "loss": 0.1863, - "step": 12308 - }, - { - "epoch": 0.63, - "grad_norm": 1.0186987150755753, - "learning_rate": 6.483938942247155e-06, - "loss": 0.1735, - "step": 12309 - }, - { - "epoch": 0.63, - "grad_norm": 0.9826918667646444, - "learning_rate": 6.482397183495519e-06, - "loss": 0.1746, - "step": 12310 - }, - { - "epoch": 0.63, - "grad_norm": 1.171723404689601, - "learning_rate": 6.4808555201590614e-06, - "loss": 0.1505, - "step": 12311 - }, - { - "epoch": 0.63, - "grad_norm": 1.1341544628466593, - "learning_rate": 6.479313952279594e-06, - "loss": 0.181, - "step": 12312 - }, - { - "epoch": 0.63, - "grad_norm": 0.9730907582567121, - "learning_rate": 6.47777247989894e-06, - "loss": 0.1756, - "step": 12313 - }, - { - "epoch": 0.63, - "grad_norm": 1.165375281334116, - "learning_rate": 6.476231103058901e-06, - "loss": 0.1869, - "step": 12314 - }, - { - "epoch": 0.63, - "grad_norm": 1.2957891090340417, - "learning_rate": 6.474689821801295e-06, - "loss": 0.1717, - "step": 12315 - }, - { - "epoch": 0.63, - "grad_norm": 0.8354411998000495, - "learning_rate": 6.473148636167925e-06, - "loss": 0.1796, - "step": 12316 - }, - { - "epoch": 0.63, - "grad_norm": 1.0039260067099076, - "learning_rate": 6.471607546200598e-06, - "loss": 0.1761, - "step": 12317 - }, - { - "epoch": 0.63, - "grad_norm": 0.9107910951230012, - "learning_rate": 6.470066551941114e-06, - "loss": 0.1643, - "step": 12318 - }, - { - "epoch": 0.63, - "grad_norm": 3.013960752228629, - "learning_rate": 6.468525653431279e-06, - "loss": 0.1653, - "step": 12319 - }, - { - "epoch": 0.63, - "grad_norm": 0.8127586630711379, - "learning_rate": 6.466984850712881e-06, - "loss": 0.1688, - "step": 12320 - }, - { - "epoch": 0.63, - "grad_norm": 0.8394294595716062, - "learning_rate": 6.4654441438277194e-06, - "loss": 0.1725, - "step": 12321 - }, - { - "epoch": 0.63, - "grad_norm": 1.2239762650728694, - "learning_rate": 6.463903532817587e-06, - "loss": 0.1498, - "step": 12322 - }, - { - "epoch": 0.63, - "grad_norm": 1.5568944801202331, - "learning_rate": 6.462363017724267e-06, - "loss": 0.182, - "step": 12323 - }, - { - "epoch": 0.63, - "grad_norm": 0.9073127865426454, - "learning_rate": 6.460822598589554e-06, - "loss": 0.1526, - "step": 12324 - }, - { - "epoch": 0.63, - "grad_norm": 1.2372909375299115, - "learning_rate": 6.459282275455223e-06, - "loss": 0.1721, - "step": 12325 - }, - { - "epoch": 0.63, - "grad_norm": 0.8625107471525778, - "learning_rate": 6.457742048363066e-06, - "loss": 0.1461, - "step": 12326 - }, - { - "epoch": 0.63, - "grad_norm": 2.449375047735124, - "learning_rate": 6.456201917354852e-06, - "loss": 0.1865, - "step": 12327 - }, - { - "epoch": 0.63, - "grad_norm": 0.952600062618889, - "learning_rate": 6.454661882472364e-06, - "loss": 0.1612, - "step": 12328 - }, - { - "epoch": 0.63, - "grad_norm": 1.0525552911764944, - "learning_rate": 6.45312194375737e-06, - "loss": 0.1621, - "step": 12329 - }, - { - "epoch": 0.63, - "grad_norm": 1.1407915087173905, - "learning_rate": 6.451582101251645e-06, - "loss": 0.2157, - "step": 12330 - }, - { - "epoch": 0.63, - "grad_norm": 0.9388793084022125, - "learning_rate": 6.450042354996954e-06, - "loss": 0.1724, - "step": 12331 - }, - { - "epoch": 0.63, - "grad_norm": 2.195697188482703, - "learning_rate": 6.448502705035069e-06, - "loss": 0.1737, - "step": 12332 - }, - { - "epoch": 0.63, - "grad_norm": 26.04404863008669, - "learning_rate": 6.446963151407743e-06, - "loss": 0.1697, - "step": 12333 - }, - { - "epoch": 0.63, - "grad_norm": 1.0686488893519888, - "learning_rate": 6.445423694156746e-06, - "loss": 0.1445, - "step": 12334 - }, - { - "epoch": 0.63, - "grad_norm": 1.2770191889968054, - "learning_rate": 6.44388433332383e-06, - "loss": 0.1686, - "step": 12335 - }, - { - "epoch": 0.63, - "grad_norm": 0.9588144049117461, - "learning_rate": 6.442345068950755e-06, - "loss": 0.1631, - "step": 12336 - }, - { - "epoch": 0.63, - "grad_norm": 1.0817090132952616, - "learning_rate": 6.440805901079268e-06, - "loss": 0.2015, - "step": 12337 - }, - { - "epoch": 0.63, - "grad_norm": 1.1930738953388942, - "learning_rate": 6.4392668297511244e-06, - "loss": 0.1689, - "step": 12338 - }, - { - "epoch": 0.63, - "grad_norm": 1.5876164763829481, - "learning_rate": 6.4377278550080664e-06, - "loss": 0.1798, - "step": 12339 - }, - { - "epoch": 0.63, - "grad_norm": 1.1346038126888687, - "learning_rate": 6.436188976891846e-06, - "loss": 0.1722, - "step": 12340 - }, - { - "epoch": 0.63, - "grad_norm": 0.8509113581377679, - "learning_rate": 6.434650195444199e-06, - "loss": 0.1723, - "step": 12341 - }, - { - "epoch": 0.63, - "grad_norm": 1.4524280102048681, - "learning_rate": 6.433111510706864e-06, - "loss": 0.1895, - "step": 12342 - }, - { - "epoch": 0.63, - "grad_norm": 1.2121512139484962, - "learning_rate": 6.431572922721585e-06, - "loss": 0.1904, - "step": 12343 - }, - { - "epoch": 0.63, - "grad_norm": 1.0653587248529126, - "learning_rate": 6.430034431530088e-06, - "loss": 0.1584, - "step": 12344 - }, - { - "epoch": 0.63, - "grad_norm": 0.9241859336426226, - "learning_rate": 6.428496037174112e-06, - "loss": 0.1727, - "step": 12345 - }, - { - "epoch": 0.63, - "grad_norm": 1.7452389512234971, - "learning_rate": 6.42695773969538e-06, - "loss": 0.1622, - "step": 12346 - }, - { - "epoch": 0.63, - "grad_norm": 1.2684237230799293, - "learning_rate": 6.425419539135622e-06, - "loss": 0.1634, - "step": 12347 - }, - { - "epoch": 0.63, - "grad_norm": 1.237057056550504, - "learning_rate": 6.42388143553656e-06, - "loss": 0.1799, - "step": 12348 - }, - { - "epoch": 0.63, - "grad_norm": 0.9888821411178348, - "learning_rate": 6.422343428939919e-06, - "loss": 0.1728, - "step": 12349 - }, - { - "epoch": 0.63, - "grad_norm": 1.4023727794262242, - "learning_rate": 6.420805519387412e-06, - "loss": 0.1853, - "step": 12350 - }, - { - "epoch": 0.63, - "grad_norm": 1.2704004212987643, - "learning_rate": 6.419267706920758e-06, - "loss": 0.1814, - "step": 12351 - }, - { - "epoch": 0.63, - "grad_norm": 0.9049368121777028, - "learning_rate": 6.417729991581668e-06, - "loss": 0.1794, - "step": 12352 - }, - { - "epoch": 0.63, - "grad_norm": 1.0791248063852146, - "learning_rate": 6.4161923734118594e-06, - "loss": 0.178, - "step": 12353 - }, - { - "epoch": 0.63, - "grad_norm": 1.0154705817518106, - "learning_rate": 6.41465485245303e-06, - "loss": 0.1718, - "step": 12354 - }, - { - "epoch": 0.63, - "grad_norm": 0.8059650877129417, - "learning_rate": 6.413117428746892e-06, - "loss": 0.1903, - "step": 12355 - }, - { - "epoch": 0.63, - "grad_norm": 0.8510180569734818, - "learning_rate": 6.4115801023351444e-06, - "loss": 0.1621, - "step": 12356 - }, - { - "epoch": 0.63, - "grad_norm": 1.0253418963036458, - "learning_rate": 6.410042873259494e-06, - "loss": 0.147, - "step": 12357 - }, - { - "epoch": 0.63, - "grad_norm": 1.1512407414739831, - "learning_rate": 6.408505741561633e-06, - "loss": 0.1798, - "step": 12358 - }, - { - "epoch": 0.63, - "grad_norm": 0.9846029617594317, - "learning_rate": 6.406968707283253e-06, - "loss": 0.1722, - "step": 12359 - }, - { - "epoch": 0.63, - "grad_norm": 1.2493645732998286, - "learning_rate": 6.405431770466051e-06, - "loss": 0.1677, - "step": 12360 - }, - { - "epoch": 0.63, - "grad_norm": 0.9348559490222239, - "learning_rate": 6.403894931151714e-06, - "loss": 0.1615, - "step": 12361 - }, - { - "epoch": 0.63, - "grad_norm": 0.9569479373941275, - "learning_rate": 6.4023581893819345e-06, - "loss": 0.1838, - "step": 12362 - }, - { - "epoch": 0.63, - "grad_norm": 0.7611388126812884, - "learning_rate": 6.4008215451983864e-06, - "loss": 0.1756, - "step": 12363 - }, - { - "epoch": 0.63, - "grad_norm": 1.2822095789309997, - "learning_rate": 6.399284998642761e-06, - "loss": 0.2031, - "step": 12364 - }, - { - "epoch": 0.63, - "grad_norm": 0.7669714612070854, - "learning_rate": 6.39774854975673e-06, - "loss": 0.17, - "step": 12365 - }, - { - "epoch": 0.63, - "grad_norm": 0.9943848290476993, - "learning_rate": 6.396212198581978e-06, - "loss": 0.1678, - "step": 12366 - }, - { - "epoch": 0.63, - "grad_norm": 2.708205758868274, - "learning_rate": 6.394675945160169e-06, - "loss": 0.1662, - "step": 12367 - }, - { - "epoch": 0.63, - "grad_norm": 1.5553428840830763, - "learning_rate": 6.39313978953298e-06, - "loss": 0.1749, - "step": 12368 - }, - { - "epoch": 0.63, - "grad_norm": 0.8969706376294244, - "learning_rate": 6.391603731742078e-06, - "loss": 0.1733, - "step": 12369 - }, - { - "epoch": 0.63, - "grad_norm": 1.1488854694275947, - "learning_rate": 6.390067771829132e-06, - "loss": 0.1677, - "step": 12370 - }, - { - "epoch": 0.63, - "grad_norm": 0.9769116755166073, - "learning_rate": 6.3885319098357966e-06, - "loss": 0.1584, - "step": 12371 - }, - { - "epoch": 0.63, - "grad_norm": 0.8587655311387853, - "learning_rate": 6.386996145803741e-06, - "loss": 0.1848, - "step": 12372 - }, - { - "epoch": 0.63, - "grad_norm": 1.6151992229384966, - "learning_rate": 6.385460479774616e-06, - "loss": 0.1781, - "step": 12373 - }, - { - "epoch": 0.63, - "grad_norm": 1.161128554908505, - "learning_rate": 6.383924911790081e-06, - "loss": 0.1978, - "step": 12374 - }, - { - "epoch": 0.63, - "grad_norm": 4.102166422327023, - "learning_rate": 6.3823894418917895e-06, - "loss": 0.1716, - "step": 12375 - }, - { - "epoch": 0.63, - "grad_norm": 0.9866300969961047, - "learning_rate": 6.380854070121385e-06, - "loss": 0.1741, - "step": 12376 - }, - { - "epoch": 0.63, - "grad_norm": 0.8744583736841051, - "learning_rate": 6.37931879652052e-06, - "loss": 0.1776, - "step": 12377 - }, - { - "epoch": 0.63, - "grad_norm": 0.9033550694190644, - "learning_rate": 6.377783621130834e-06, - "loss": 0.1815, - "step": 12378 - }, - { - "epoch": 0.63, - "grad_norm": 1.7952117507679426, - "learning_rate": 6.376248543993977e-06, - "loss": 0.1769, - "step": 12379 - }, - { - "epoch": 0.63, - "grad_norm": 1.0221285316727067, - "learning_rate": 6.374713565151579e-06, - "loss": 0.1809, - "step": 12380 - }, - { - "epoch": 0.63, - "grad_norm": 0.9361376699622568, - "learning_rate": 6.373178684645283e-06, - "loss": 0.1831, - "step": 12381 - }, - { - "epoch": 0.63, - "grad_norm": 1.000278952974784, - "learning_rate": 6.371643902516715e-06, - "loss": 0.2024, - "step": 12382 - }, - { - "epoch": 0.63, - "grad_norm": 0.8337946910176021, - "learning_rate": 6.3701092188075176e-06, - "loss": 0.1733, - "step": 12383 - }, - { - "epoch": 0.63, - "grad_norm": 1.068934840910689, - "learning_rate": 6.368574633559308e-06, - "loss": 0.1867, - "step": 12384 - }, - { - "epoch": 0.63, - "grad_norm": 1.0862813904471362, - "learning_rate": 6.367040146813721e-06, - "loss": 0.1759, - "step": 12385 - }, - { - "epoch": 0.63, - "grad_norm": 0.9875804752664659, - "learning_rate": 6.365505758612371e-06, - "loss": 0.1781, - "step": 12386 - }, - { - "epoch": 0.63, - "grad_norm": 1.0816791746408998, - "learning_rate": 6.363971468996883e-06, - "loss": 0.1785, - "step": 12387 - }, - { - "epoch": 0.63, - "grad_norm": 1.2488081898099574, - "learning_rate": 6.362437278008875e-06, - "loss": 0.1764, - "step": 12388 - }, - { - "epoch": 0.63, - "grad_norm": 1.1154057711008427, - "learning_rate": 6.360903185689964e-06, - "loss": 0.1629, - "step": 12389 - }, - { - "epoch": 0.63, - "grad_norm": 0.7958664729630673, - "learning_rate": 6.359369192081756e-06, - "loss": 0.164, - "step": 12390 - }, - { - "epoch": 0.63, - "grad_norm": 0.8140188803014982, - "learning_rate": 6.357835297225865e-06, - "loss": 0.1648, - "step": 12391 - }, - { - "epoch": 0.63, - "grad_norm": 0.9086263255788167, - "learning_rate": 6.356301501163901e-06, - "loss": 0.1498, - "step": 12392 - }, - { - "epoch": 0.63, - "grad_norm": 1.1784567810825564, - "learning_rate": 6.3547678039374595e-06, - "loss": 0.1702, - "step": 12393 - }, - { - "epoch": 0.63, - "grad_norm": 0.9426996480766275, - "learning_rate": 6.35323420558815e-06, - "loss": 0.1861, - "step": 12394 - }, - { - "epoch": 0.63, - "grad_norm": 0.8746147919053582, - "learning_rate": 6.351700706157565e-06, - "loss": 0.1816, - "step": 12395 - }, - { - "epoch": 0.63, - "grad_norm": 0.9674403403160589, - "learning_rate": 6.350167305687309e-06, - "loss": 0.1854, - "step": 12396 - }, - { - "epoch": 0.63, - "grad_norm": 2.3616418400384807, - "learning_rate": 6.348634004218969e-06, - "loss": 0.1557, - "step": 12397 - }, - { - "epoch": 0.63, - "grad_norm": 1.191532683707872, - "learning_rate": 6.3471008017941396e-06, - "loss": 0.1812, - "step": 12398 - }, - { - "epoch": 0.63, - "grad_norm": 0.9497595078806705, - "learning_rate": 6.345567698454405e-06, - "loss": 0.1836, - "step": 12399 - }, - { - "epoch": 0.63, - "grad_norm": 1.061604308953469, - "learning_rate": 6.344034694241353e-06, - "loss": 0.1713, - "step": 12400 - }, - { - "epoch": 0.63, - "grad_norm": 0.9765112821432018, - "learning_rate": 6.342501789196565e-06, - "loss": 0.1707, - "step": 12401 - }, - { - "epoch": 0.63, - "grad_norm": 0.9497694404575935, - "learning_rate": 6.340968983361629e-06, - "loss": 0.1837, - "step": 12402 - }, - { - "epoch": 0.63, - "grad_norm": 1.1070548884603109, - "learning_rate": 6.339436276778108e-06, - "loss": 0.1638, - "step": 12403 - }, - { - "epoch": 0.63, - "grad_norm": 1.5135514613785872, - "learning_rate": 6.33790366948759e-06, - "loss": 0.1823, - "step": 12404 - }, - { - "epoch": 0.63, - "grad_norm": 0.9890899244681153, - "learning_rate": 6.3363711615316384e-06, - "loss": 0.1621, - "step": 12405 - }, - { - "epoch": 0.63, - "grad_norm": 2.085404642674482, - "learning_rate": 6.334838752951829e-06, - "loss": 0.2035, - "step": 12406 - }, - { - "epoch": 0.63, - "grad_norm": 0.821864051302577, - "learning_rate": 6.333306443789723e-06, - "loss": 0.1637, - "step": 12407 - }, - { - "epoch": 0.63, - "grad_norm": 1.018056615727074, - "learning_rate": 6.331774234086888e-06, - "loss": 0.1798, - "step": 12408 - }, - { - "epoch": 0.63, - "grad_norm": 1.1267528713054624, - "learning_rate": 6.330242123884882e-06, - "loss": 0.1932, - "step": 12409 - }, - { - "epoch": 0.63, - "grad_norm": 0.9204954399604649, - "learning_rate": 6.328710113225271e-06, - "loss": 0.1607, - "step": 12410 - }, - { - "epoch": 0.63, - "grad_norm": 1.134368920284439, - "learning_rate": 6.327178202149604e-06, - "loss": 0.1712, - "step": 12411 - }, - { - "epoch": 0.63, - "grad_norm": 0.8948304924405651, - "learning_rate": 6.325646390699432e-06, - "loss": 0.1707, - "step": 12412 - }, - { - "epoch": 0.63, - "grad_norm": 1.148432251960856, - "learning_rate": 6.324114678916312e-06, - "loss": 0.1651, - "step": 12413 - }, - { - "epoch": 0.63, - "grad_norm": 1.3243261950780123, - "learning_rate": 6.322583066841787e-06, - "loss": 0.2044, - "step": 12414 - }, - { - "epoch": 0.63, - "grad_norm": 1.0739468116695186, - "learning_rate": 6.321051554517406e-06, - "loss": 0.1714, - "step": 12415 - }, - { - "epoch": 0.63, - "grad_norm": 0.8085880976560276, - "learning_rate": 6.3195201419847075e-06, - "loss": 0.1876, - "step": 12416 - }, - { - "epoch": 0.63, - "grad_norm": 1.2755854149125554, - "learning_rate": 6.3179888292852345e-06, - "loss": 0.1897, - "step": 12417 - }, - { - "epoch": 0.63, - "grad_norm": 0.8972934946635283, - "learning_rate": 6.316457616460521e-06, - "loss": 0.175, - "step": 12418 - }, - { - "epoch": 0.63, - "grad_norm": 0.8943975090876413, - "learning_rate": 6.314926503552106e-06, - "loss": 0.1985, - "step": 12419 - }, - { - "epoch": 0.63, - "grad_norm": 1.0943262305329209, - "learning_rate": 6.313395490601513e-06, - "loss": 0.182, - "step": 12420 - }, - { - "epoch": 0.63, - "grad_norm": 0.9752335207142642, - "learning_rate": 6.311864577650278e-06, - "loss": 0.1536, - "step": 12421 - }, - { - "epoch": 0.63, - "grad_norm": 1.255788513507667, - "learning_rate": 6.310333764739922e-06, - "loss": 0.1692, - "step": 12422 - }, - { - "epoch": 0.63, - "grad_norm": 0.8946464195463422, - "learning_rate": 6.308803051911977e-06, - "loss": 0.1734, - "step": 12423 - }, - { - "epoch": 0.63, - "grad_norm": 1.0774757219799522, - "learning_rate": 6.307272439207952e-06, - "loss": 0.2096, - "step": 12424 - }, - { - "epoch": 0.63, - "grad_norm": 0.8735326158800345, - "learning_rate": 6.305741926669376e-06, - "loss": 0.1648, - "step": 12425 - }, - { - "epoch": 0.63, - "grad_norm": 0.7804412362106942, - "learning_rate": 6.304211514337755e-06, - "loss": 0.1792, - "step": 12426 - }, - { - "epoch": 0.63, - "grad_norm": 0.8648967530320314, - "learning_rate": 6.302681202254605e-06, - "loss": 0.1727, - "step": 12427 - }, - { - "epoch": 0.63, - "grad_norm": 0.951635997434637, - "learning_rate": 6.30115099046144e-06, - "loss": 0.1663, - "step": 12428 - }, - { - "epoch": 0.63, - "grad_norm": 0.8540674273278183, - "learning_rate": 6.299620878999759e-06, - "loss": 0.1723, - "step": 12429 - }, - { - "epoch": 0.63, - "grad_norm": 0.8448351618492159, - "learning_rate": 6.298090867911073e-06, - "loss": 0.1673, - "step": 12430 - }, - { - "epoch": 0.63, - "grad_norm": 0.8999928356265918, - "learning_rate": 6.296560957236879e-06, - "loss": 0.1645, - "step": 12431 - }, - { - "epoch": 0.63, - "grad_norm": 1.0263325239331658, - "learning_rate": 6.295031147018682e-06, - "loss": 0.1971, - "step": 12432 - }, - { - "epoch": 0.63, - "grad_norm": 0.8343603797823569, - "learning_rate": 6.293501437297971e-06, - "loss": 0.1734, - "step": 12433 - }, - { - "epoch": 0.63, - "grad_norm": 1.2727832738429121, - "learning_rate": 6.291971828116244e-06, - "loss": 0.1658, - "step": 12434 - }, - { - "epoch": 0.63, - "grad_norm": 0.9658412176768761, - "learning_rate": 6.290442319514989e-06, - "loss": 0.1793, - "step": 12435 - }, - { - "epoch": 0.63, - "grad_norm": 1.2050351172057916, - "learning_rate": 6.288912911535701e-06, - "loss": 0.174, - "step": 12436 - }, - { - "epoch": 0.63, - "grad_norm": 1.2826664564756307, - "learning_rate": 6.2873836042198546e-06, - "loss": 0.1864, - "step": 12437 - }, - { - "epoch": 0.63, - "grad_norm": 0.7933695499610162, - "learning_rate": 6.285854397608941e-06, - "loss": 0.1513, - "step": 12438 - }, - { - "epoch": 0.63, - "grad_norm": 0.8704491729302477, - "learning_rate": 6.284325291744433e-06, - "loss": 0.1652, - "step": 12439 - }, - { - "epoch": 0.63, - "grad_norm": 0.8751594756844792, - "learning_rate": 6.282796286667814e-06, - "loss": 0.179, - "step": 12440 - }, - { - "epoch": 0.63, - "grad_norm": 1.152411953728966, - "learning_rate": 6.281267382420553e-06, - "loss": 0.191, - "step": 12441 - }, - { - "epoch": 0.63, - "grad_norm": 1.0783429025431217, - "learning_rate": 6.2797385790441275e-06, - "loss": 0.1865, - "step": 12442 - }, - { - "epoch": 0.63, - "grad_norm": 1.165853509016558, - "learning_rate": 6.278209876580002e-06, - "loss": 0.1671, - "step": 12443 - }, - { - "epoch": 0.63, - "grad_norm": 1.4369216323054037, - "learning_rate": 6.2766812750696425e-06, - "loss": 0.1707, - "step": 12444 - }, - { - "epoch": 0.63, - "grad_norm": 1.1509816814159848, - "learning_rate": 6.275152774554518e-06, - "loss": 0.1796, - "step": 12445 - }, - { - "epoch": 0.63, - "grad_norm": 1.0754526489308998, - "learning_rate": 6.273624375076079e-06, - "loss": 0.178, - "step": 12446 - }, - { - "epoch": 0.63, - "grad_norm": 0.9512043301201804, - "learning_rate": 6.272096076675794e-06, - "loss": 0.1706, - "step": 12447 - }, - { - "epoch": 0.63, - "grad_norm": 0.9603013115681072, - "learning_rate": 6.2705678793951085e-06, - "loss": 0.2025, - "step": 12448 - }, - { - "epoch": 0.63, - "grad_norm": 1.0168091469068452, - "learning_rate": 6.269039783275486e-06, - "loss": 0.1876, - "step": 12449 - }, - { - "epoch": 0.63, - "grad_norm": 1.3743891386632616, - "learning_rate": 6.267511788358365e-06, - "loss": 0.1839, - "step": 12450 - }, - { - "epoch": 0.63, - "grad_norm": 0.8779440589301847, - "learning_rate": 6.265983894685199e-06, - "loss": 0.1658, - "step": 12451 - }, - { - "epoch": 0.63, - "grad_norm": 0.9168517762241354, - "learning_rate": 6.264456102297431e-06, - "loss": 0.1808, - "step": 12452 - }, - { - "epoch": 0.63, - "grad_norm": 1.478232498156621, - "learning_rate": 6.262928411236504e-06, - "loss": 0.1785, - "step": 12453 - }, - { - "epoch": 0.63, - "grad_norm": 1.850537886066106, - "learning_rate": 6.261400821543853e-06, - "loss": 0.1629, - "step": 12454 - }, - { - "epoch": 0.63, - "grad_norm": 0.945733684470051, - "learning_rate": 6.259873333260917e-06, - "loss": 0.175, - "step": 12455 - }, - { - "epoch": 0.63, - "grad_norm": 1.1153704099256443, - "learning_rate": 6.258345946429127e-06, - "loss": 0.1927, - "step": 12456 - }, - { - "epoch": 0.63, - "grad_norm": 1.0169887154193404, - "learning_rate": 6.256818661089914e-06, - "loss": 0.1717, - "step": 12457 - }, - { - "epoch": 0.63, - "grad_norm": 1.5532608557851424, - "learning_rate": 6.255291477284706e-06, - "loss": 0.1806, - "step": 12458 - }, - { - "epoch": 0.63, - "grad_norm": 0.8137842106602222, - "learning_rate": 6.253764395054931e-06, - "loss": 0.1781, - "step": 12459 - }, - { - "epoch": 0.63, - "grad_norm": 0.778704510268674, - "learning_rate": 6.252237414442006e-06, - "loss": 0.1645, - "step": 12460 - }, - { - "epoch": 0.63, - "grad_norm": 0.9806884124431697, - "learning_rate": 6.250710535487354e-06, - "loss": 0.1584, - "step": 12461 - }, - { - "epoch": 0.63, - "grad_norm": 0.7972196638544355, - "learning_rate": 6.249183758232391e-06, - "loss": 0.1791, - "step": 12462 - }, - { - "epoch": 0.63, - "grad_norm": 0.8762787480998664, - "learning_rate": 6.247657082718528e-06, - "loss": 0.1787, - "step": 12463 - }, - { - "epoch": 0.63, - "grad_norm": 1.5548117723420225, - "learning_rate": 6.246130508987181e-06, - "loss": 0.1589, - "step": 12464 - }, - { - "epoch": 0.63, - "grad_norm": 0.9578502567917115, - "learning_rate": 6.244604037079754e-06, - "loss": 0.1862, - "step": 12465 - }, - { - "epoch": 0.63, - "grad_norm": 1.1923637270836631, - "learning_rate": 6.2430776670376565e-06, - "loss": 0.1628, - "step": 12466 - }, - { - "epoch": 0.63, - "grad_norm": 1.132564443816655, - "learning_rate": 6.241551398902288e-06, - "loss": 0.1805, - "step": 12467 - }, - { - "epoch": 0.63, - "grad_norm": 1.297131069014862, - "learning_rate": 6.240025232715052e-06, - "loss": 0.1896, - "step": 12468 - }, - { - "epoch": 0.63, - "grad_norm": 1.0555852137876773, - "learning_rate": 6.2384991685173415e-06, - "loss": 0.1655, - "step": 12469 - }, - { - "epoch": 0.63, - "grad_norm": 1.2255988170345553, - "learning_rate": 6.236973206350554e-06, - "loss": 0.1969, - "step": 12470 - }, - { - "epoch": 0.63, - "grad_norm": 0.857306568176004, - "learning_rate": 6.23544734625608e-06, - "loss": 0.1773, - "step": 12471 - }, - { - "epoch": 0.63, - "grad_norm": 1.4411426640641447, - "learning_rate": 6.233921588275313e-06, - "loss": 0.1704, - "step": 12472 - }, - { - "epoch": 0.63, - "grad_norm": 1.081017069593761, - "learning_rate": 6.232395932449632e-06, - "loss": 0.179, - "step": 12473 - }, - { - "epoch": 0.63, - "grad_norm": 0.9309147223998671, - "learning_rate": 6.230870378820426e-06, - "loss": 0.1675, - "step": 12474 - }, - { - "epoch": 0.63, - "grad_norm": 1.3926729763150256, - "learning_rate": 6.22934492742907e-06, - "loss": 0.1871, - "step": 12475 - }, - { - "epoch": 0.63, - "grad_norm": 1.05205076758149, - "learning_rate": 6.2278195783169525e-06, - "loss": 0.1799, - "step": 12476 - }, - { - "epoch": 0.63, - "grad_norm": 0.9209143818079015, - "learning_rate": 6.226294331525437e-06, - "loss": 0.1982, - "step": 12477 - }, - { - "epoch": 0.63, - "grad_norm": 0.9332230467328521, - "learning_rate": 6.224769187095903e-06, - "loss": 0.1717, - "step": 12478 - }, - { - "epoch": 0.63, - "grad_norm": 0.8431590555877424, - "learning_rate": 6.223244145069715e-06, - "loss": 0.1714, - "step": 12479 - }, - { - "epoch": 0.63, - "grad_norm": 0.9699148561201477, - "learning_rate": 6.221719205488248e-06, - "loss": 0.1887, - "step": 12480 - }, - { - "epoch": 0.63, - "grad_norm": 0.8557638152178924, - "learning_rate": 6.220194368392862e-06, - "loss": 0.1801, - "step": 12481 - }, - { - "epoch": 0.63, - "grad_norm": 1.0081281462198912, - "learning_rate": 6.218669633824911e-06, - "loss": 0.1779, - "step": 12482 - }, - { - "epoch": 0.63, - "grad_norm": 0.9963855583977796, - "learning_rate": 6.2171450018257625e-06, - "loss": 0.1647, - "step": 12483 - }, - { - "epoch": 0.63, - "grad_norm": 0.812626859159139, - "learning_rate": 6.2156204724367674e-06, - "loss": 0.1848, - "step": 12484 - }, - { - "epoch": 0.63, - "grad_norm": 1.1443046290186372, - "learning_rate": 6.214096045699285e-06, - "loss": 0.1837, - "step": 12485 - }, - { - "epoch": 0.63, - "grad_norm": 1.0078120851075847, - "learning_rate": 6.212571721654658e-06, - "loss": 0.1919, - "step": 12486 - }, - { - "epoch": 0.63, - "grad_norm": 0.9909980525748541, - "learning_rate": 6.211047500344239e-06, - "loss": 0.1795, - "step": 12487 - }, - { - "epoch": 0.64, - "grad_norm": 0.9126111150104824, - "learning_rate": 6.209523381809366e-06, - "loss": 0.1545, - "step": 12488 - }, - { - "epoch": 0.64, - "grad_norm": 0.9009539115573664, - "learning_rate": 6.207999366091392e-06, - "loss": 0.1921, - "step": 12489 - }, - { - "epoch": 0.64, - "grad_norm": 0.7706876863747594, - "learning_rate": 6.206475453231644e-06, - "loss": 0.1635, - "step": 12490 - }, - { - "epoch": 0.64, - "grad_norm": 1.0875709472808894, - "learning_rate": 6.204951643271466e-06, - "loss": 0.1667, - "step": 12491 - }, - { - "epoch": 0.64, - "grad_norm": 1.060787182075934, - "learning_rate": 6.2034279362521866e-06, - "loss": 0.1804, - "step": 12492 - }, - { - "epoch": 0.64, - "grad_norm": 1.0740900720219337, - "learning_rate": 6.201904332215143e-06, - "loss": 0.1743, - "step": 12493 - }, - { - "epoch": 0.64, - "grad_norm": 0.9956019682721083, - "learning_rate": 6.200380831201655e-06, - "loss": 0.1902, - "step": 12494 - }, - { - "epoch": 0.64, - "grad_norm": 0.8991276352608935, - "learning_rate": 6.198857433253056e-06, - "loss": 0.1836, - "step": 12495 - }, - { - "epoch": 0.64, - "grad_norm": 1.0371989188888475, - "learning_rate": 6.19733413841066e-06, - "loss": 0.1809, - "step": 12496 - }, - { - "epoch": 0.64, - "grad_norm": 1.694860645984785, - "learning_rate": 6.1958109467157925e-06, - "loss": 0.1632, - "step": 12497 - }, - { - "epoch": 0.64, - "grad_norm": 1.22687686411913, - "learning_rate": 6.1942878582097685e-06, - "loss": 0.1845, - "step": 12498 - }, - { - "epoch": 0.64, - "grad_norm": 0.976504010769984, - "learning_rate": 6.192764872933899e-06, - "loss": 0.1771, - "step": 12499 - }, - { - "epoch": 0.64, - "grad_norm": 1.0657364619208907, - "learning_rate": 6.191241990929498e-06, - "loss": 0.1724, - "step": 12500 - }, - { - "epoch": 0.64, - "grad_norm": 1.0860099335201034, - "learning_rate": 6.1897192122378714e-06, - "loss": 0.1886, - "step": 12501 - }, - { - "epoch": 0.64, - "grad_norm": 0.9996622761610917, - "learning_rate": 6.18819653690033e-06, - "loss": 0.1871, - "step": 12502 - }, - { - "epoch": 0.64, - "grad_norm": 1.1164634795618917, - "learning_rate": 6.18667396495817e-06, - "loss": 0.1976, - "step": 12503 - }, - { - "epoch": 0.64, - "grad_norm": 0.8602778593863528, - "learning_rate": 6.185151496452695e-06, - "loss": 0.1791, - "step": 12504 - }, - { - "epoch": 0.64, - "grad_norm": 1.145776520803997, - "learning_rate": 6.1836291314252e-06, - "loss": 0.1649, - "step": 12505 - }, - { - "epoch": 0.64, - "grad_norm": 1.152848083543941, - "learning_rate": 6.182106869916984e-06, - "loss": 0.1775, - "step": 12506 - }, - { - "epoch": 0.64, - "grad_norm": 1.6093748241813428, - "learning_rate": 6.180584711969331e-06, - "loss": 0.1949, - "step": 12507 - }, - { - "epoch": 0.64, - "grad_norm": 1.1685262483998686, - "learning_rate": 6.179062657623536e-06, - "loss": 0.2048, - "step": 12508 - }, - { - "epoch": 0.64, - "grad_norm": 1.0364475450915975, - "learning_rate": 6.17754070692088e-06, - "loss": 0.1723, - "step": 12509 - }, - { - "epoch": 0.64, - "grad_norm": 1.137829764269063, - "learning_rate": 6.17601885990265e-06, - "loss": 0.1862, - "step": 12510 - }, - { - "epoch": 0.64, - "grad_norm": 0.9867987488422779, - "learning_rate": 6.174497116610121e-06, - "loss": 0.1683, - "step": 12511 - }, - { - "epoch": 0.64, - "grad_norm": 1.1026191481551983, - "learning_rate": 6.1729754770845795e-06, - "loss": 0.1665, - "step": 12512 - }, - { - "epoch": 0.64, - "grad_norm": 1.263035244463893, - "learning_rate": 6.171453941367289e-06, - "loss": 0.1601, - "step": 12513 - }, - { - "epoch": 0.64, - "grad_norm": 0.9018081110097101, - "learning_rate": 6.1699325094995284e-06, - "loss": 0.1554, - "step": 12514 - }, - { - "epoch": 0.64, - "grad_norm": 0.8358416616204396, - "learning_rate": 6.168411181522569e-06, - "loss": 0.1863, - "step": 12515 - }, - { - "epoch": 0.64, - "grad_norm": 1.3526473813417075, - "learning_rate": 6.1668899574776665e-06, - "loss": 0.1791, - "step": 12516 - }, - { - "epoch": 0.64, - "grad_norm": 1.074541065702537, - "learning_rate": 6.165368837406094e-06, - "loss": 0.1742, - "step": 12517 - }, - { - "epoch": 0.64, - "grad_norm": 1.266699220462641, - "learning_rate": 6.1638478213491045e-06, - "loss": 0.1811, - "step": 12518 - }, - { - "epoch": 0.64, - "grad_norm": 1.1978464666007855, - "learning_rate": 6.162326909347964e-06, - "loss": 0.1596, - "step": 12519 - }, - { - "epoch": 0.64, - "grad_norm": 0.9428725976666876, - "learning_rate": 6.160806101443919e-06, - "loss": 0.2014, - "step": 12520 - }, - { - "epoch": 0.64, - "grad_norm": 2.2711091819725837, - "learning_rate": 6.159285397678231e-06, - "loss": 0.1745, - "step": 12521 - }, - { - "epoch": 0.64, - "grad_norm": 0.8918084712482156, - "learning_rate": 6.157764798092139e-06, - "loss": 0.169, - "step": 12522 - }, - { - "epoch": 0.64, - "grad_norm": 1.1336763768588909, - "learning_rate": 6.156244302726894e-06, - "loss": 0.1734, - "step": 12523 - }, - { - "epoch": 0.64, - "grad_norm": 0.8880467909572677, - "learning_rate": 6.154723911623739e-06, - "loss": 0.1854, - "step": 12524 - }, - { - "epoch": 0.64, - "grad_norm": 2.2989210515788927, - "learning_rate": 6.153203624823918e-06, - "loss": 0.1713, - "step": 12525 - }, - { - "epoch": 0.64, - "grad_norm": 0.845146306048577, - "learning_rate": 6.151683442368662e-06, - "loss": 0.1709, - "step": 12526 - }, - { - "epoch": 0.64, - "grad_norm": 0.9185256844844901, - "learning_rate": 6.150163364299213e-06, - "loss": 0.1749, - "step": 12527 - }, - { - "epoch": 0.64, - "grad_norm": 0.797325566838219, - "learning_rate": 6.148643390656797e-06, - "loss": 0.1799, - "step": 12528 - }, - { - "epoch": 0.64, - "grad_norm": 1.075427656446058, - "learning_rate": 6.147123521482652e-06, - "loss": 0.2035, - "step": 12529 - }, - { - "epoch": 0.64, - "grad_norm": 1.4026009435662508, - "learning_rate": 6.145603756817994e-06, - "loss": 0.174, - "step": 12530 - }, - { - "epoch": 0.64, - "grad_norm": 1.1020490093254394, - "learning_rate": 6.144084096704054e-06, - "loss": 0.1807, - "step": 12531 - }, - { - "epoch": 0.64, - "grad_norm": 1.7151474986946946, - "learning_rate": 6.142564541182052e-06, - "loss": 0.1773, - "step": 12532 - }, - { - "epoch": 0.64, - "grad_norm": 0.8427975962075497, - "learning_rate": 6.141045090293203e-06, - "loss": 0.1482, - "step": 12533 - }, - { - "epoch": 0.64, - "grad_norm": 1.4389259546810442, - "learning_rate": 6.1395257440787246e-06, - "loss": 0.1911, - "step": 12534 - }, - { - "epoch": 0.64, - "grad_norm": 1.1012280045114835, - "learning_rate": 6.1380065025798275e-06, - "loss": 0.1771, - "step": 12535 - }, - { - "epoch": 0.64, - "grad_norm": 0.9123329869617892, - "learning_rate": 6.136487365837723e-06, - "loss": 0.1756, - "step": 12536 - }, - { - "epoch": 0.64, - "grad_norm": 0.8711802443983307, - "learning_rate": 6.134968333893614e-06, - "loss": 0.1823, - "step": 12537 - }, - { - "epoch": 0.64, - "grad_norm": 1.8182388385244617, - "learning_rate": 6.133449406788712e-06, - "loss": 0.1746, - "step": 12538 - }, - { - "epoch": 0.64, - "grad_norm": 0.7628933579636543, - "learning_rate": 6.13193058456421e-06, - "loss": 0.1636, - "step": 12539 - }, - { - "epoch": 0.64, - "grad_norm": 1.0452787874927696, - "learning_rate": 6.13041186726131e-06, - "loss": 0.1752, - "step": 12540 - }, - { - "epoch": 0.64, - "grad_norm": 1.1334682158832199, - "learning_rate": 6.128893254921204e-06, - "loss": 0.1732, - "step": 12541 - }, - { - "epoch": 0.64, - "grad_norm": 0.9651552788366533, - "learning_rate": 6.127374747585093e-06, - "loss": 0.1817, - "step": 12542 - }, - { - "epoch": 0.64, - "grad_norm": 1.003239206354539, - "learning_rate": 6.125856345294156e-06, - "loss": 0.1699, - "step": 12543 - }, - { - "epoch": 0.64, - "grad_norm": 1.0489086733374817, - "learning_rate": 6.124338048089586e-06, - "loss": 0.1975, - "step": 12544 - }, - { - "epoch": 0.64, - "grad_norm": 1.6214492669460177, - "learning_rate": 6.122819856012564e-06, - "loss": 0.1545, - "step": 12545 - }, - { - "epoch": 0.64, - "grad_norm": 0.9810971851715937, - "learning_rate": 6.121301769104277e-06, - "loss": 0.1707, - "step": 12546 - }, - { - "epoch": 0.64, - "grad_norm": 0.9002404769464839, - "learning_rate": 6.119783787405893e-06, - "loss": 0.1655, - "step": 12547 - }, - { - "epoch": 0.64, - "grad_norm": 0.9440022409126835, - "learning_rate": 6.118265910958599e-06, - "loss": 0.1719, - "step": 12548 - }, - { - "epoch": 0.64, - "grad_norm": 0.8769209904214279, - "learning_rate": 6.116748139803554e-06, - "loss": 0.1809, - "step": 12549 - }, - { - "epoch": 0.64, - "grad_norm": 1.039009549593771, - "learning_rate": 6.115230473981939e-06, - "loss": 0.18, - "step": 12550 - }, - { - "epoch": 0.64, - "grad_norm": 0.8954688446998333, - "learning_rate": 6.113712913534919e-06, - "loss": 0.1879, - "step": 12551 - }, - { - "epoch": 0.64, - "grad_norm": 0.8868688380705804, - "learning_rate": 6.1121954585036525e-06, - "loss": 0.1731, - "step": 12552 - }, - { - "epoch": 0.64, - "grad_norm": 0.9068755428729511, - "learning_rate": 6.110678108929304e-06, - "loss": 0.1735, - "step": 12553 - }, - { - "epoch": 0.64, - "grad_norm": 0.8247271190545595, - "learning_rate": 6.109160864853031e-06, - "loss": 0.1609, - "step": 12554 - }, - { - "epoch": 0.64, - "grad_norm": 0.8450076184849792, - "learning_rate": 6.107643726315993e-06, - "loss": 0.1572, - "step": 12555 - }, - { - "epoch": 0.64, - "grad_norm": 0.8092402376512072, - "learning_rate": 6.106126693359334e-06, - "loss": 0.1653, - "step": 12556 - }, - { - "epoch": 0.64, - "grad_norm": 0.8671305954230574, - "learning_rate": 6.104609766024211e-06, - "loss": 0.1621, - "step": 12557 - }, - { - "epoch": 0.64, - "grad_norm": 0.9829882238021675, - "learning_rate": 6.103092944351766e-06, - "loss": 0.1917, - "step": 12558 - }, - { - "epoch": 0.64, - "grad_norm": 0.9452635175929243, - "learning_rate": 6.1015762283831485e-06, - "loss": 0.225, - "step": 12559 - }, - { - "epoch": 0.64, - "grad_norm": 0.9586078615730995, - "learning_rate": 6.100059618159493e-06, - "loss": 0.1717, - "step": 12560 - }, - { - "epoch": 0.64, - "grad_norm": 1.0276062367385321, - "learning_rate": 6.098543113721942e-06, - "loss": 0.1911, - "step": 12561 - }, - { - "epoch": 0.64, - "grad_norm": 0.928138926530368, - "learning_rate": 6.097026715111627e-06, - "loss": 0.1668, - "step": 12562 - }, - { - "epoch": 0.64, - "grad_norm": 1.5221052583071613, - "learning_rate": 6.095510422369687e-06, - "loss": 0.1712, - "step": 12563 - }, - { - "epoch": 0.64, - "grad_norm": 1.0930100736865827, - "learning_rate": 6.093994235537244e-06, - "loss": 0.1807, - "step": 12564 - }, - { - "epoch": 0.64, - "grad_norm": 0.8970169138993683, - "learning_rate": 6.092478154655431e-06, - "loss": 0.1762, - "step": 12565 - }, - { - "epoch": 0.64, - "grad_norm": 0.8136805000756253, - "learning_rate": 6.090962179765365e-06, - "loss": 0.1645, - "step": 12566 - }, - { - "epoch": 0.64, - "grad_norm": 1.0157663073300447, - "learning_rate": 6.089446310908174e-06, - "loss": 0.1983, - "step": 12567 - }, - { - "epoch": 0.64, - "grad_norm": 1.4187437187365042, - "learning_rate": 6.087930548124973e-06, - "loss": 0.1778, - "step": 12568 - }, - { - "epoch": 0.64, - "grad_norm": 0.8998068081604149, - "learning_rate": 6.086414891456873e-06, - "loss": 0.2043, - "step": 12569 - }, - { - "epoch": 0.64, - "grad_norm": 1.23602924999552, - "learning_rate": 6.084899340944993e-06, - "loss": 0.1731, - "step": 12570 - }, - { - "epoch": 0.64, - "grad_norm": 1.0015833512001684, - "learning_rate": 6.083383896630437e-06, - "loss": 0.181, - "step": 12571 - }, - { - "epoch": 0.64, - "grad_norm": 1.1870337585167698, - "learning_rate": 6.081868558554318e-06, - "loss": 0.1765, - "step": 12572 - }, - { - "epoch": 0.64, - "grad_norm": 1.041099892707607, - "learning_rate": 6.080353326757732e-06, - "loss": 0.1851, - "step": 12573 - }, - { - "epoch": 0.64, - "grad_norm": 0.9109856770529133, - "learning_rate": 6.078838201281785e-06, - "loss": 0.1775, - "step": 12574 - }, - { - "epoch": 0.64, - "grad_norm": 0.8825147559118742, - "learning_rate": 6.077323182167572e-06, - "loss": 0.1584, - "step": 12575 - }, - { - "epoch": 0.64, - "grad_norm": 0.9183584383098351, - "learning_rate": 6.075808269456191e-06, - "loss": 0.1842, - "step": 12576 - }, - { - "epoch": 0.64, - "grad_norm": 0.8299857824314143, - "learning_rate": 6.074293463188731e-06, - "loss": 0.1685, - "step": 12577 - }, - { - "epoch": 0.64, - "grad_norm": 0.815838704444408, - "learning_rate": 6.072778763406285e-06, - "loss": 0.171, - "step": 12578 - }, - { - "epoch": 0.64, - "grad_norm": 0.8010028661546602, - "learning_rate": 6.071264170149933e-06, - "loss": 0.1635, - "step": 12579 - }, - { - "epoch": 0.64, - "grad_norm": 1.0617631073565157, - "learning_rate": 6.069749683460765e-06, - "loss": 0.1699, - "step": 12580 - }, - { - "epoch": 0.64, - "grad_norm": 0.8113835607544837, - "learning_rate": 6.068235303379857e-06, - "loss": 0.1667, - "step": 12581 - }, - { - "epoch": 0.64, - "grad_norm": 0.7552633298934263, - "learning_rate": 6.066721029948291e-06, - "loss": 0.1869, - "step": 12582 - }, - { - "epoch": 0.64, - "grad_norm": 0.8411484353367596, - "learning_rate": 6.065206863207136e-06, - "loss": 0.1598, - "step": 12583 - }, - { - "epoch": 0.64, - "grad_norm": 1.1801608714988914, - "learning_rate": 6.06369280319747e-06, - "loss": 0.1732, - "step": 12584 - }, - { - "epoch": 0.64, - "grad_norm": 0.9325254706005588, - "learning_rate": 6.062178849960359e-06, - "loss": 0.1748, - "step": 12585 - }, - { - "epoch": 0.64, - "grad_norm": 0.8197909597762826, - "learning_rate": 6.060665003536868e-06, - "loss": 0.1739, - "step": 12586 - }, - { - "epoch": 0.64, - "grad_norm": 0.8392306554506301, - "learning_rate": 6.059151263968061e-06, - "loss": 0.1668, - "step": 12587 - }, - { - "epoch": 0.64, - "grad_norm": 0.9631135744680915, - "learning_rate": 6.057637631294997e-06, - "loss": 0.1712, - "step": 12588 - }, - { - "epoch": 0.64, - "grad_norm": 0.9695296619769307, - "learning_rate": 6.0561241055587385e-06, - "loss": 0.1689, - "step": 12589 - }, - { - "epoch": 0.64, - "grad_norm": 0.9120100410242605, - "learning_rate": 6.054610686800333e-06, - "loss": 0.1876, - "step": 12590 - }, - { - "epoch": 0.64, - "grad_norm": 0.7689129787731763, - "learning_rate": 6.053097375060839e-06, - "loss": 0.1474, - "step": 12591 - }, - { - "epoch": 0.64, - "grad_norm": 1.12835739084911, - "learning_rate": 6.051584170381298e-06, - "loss": 0.1648, - "step": 12592 - }, - { - "epoch": 0.64, - "grad_norm": 0.7884327382625383, - "learning_rate": 6.050071072802761e-06, - "loss": 0.154, - "step": 12593 - }, - { - "epoch": 0.64, - "grad_norm": 0.9505681884985597, - "learning_rate": 6.048558082366269e-06, - "loss": 0.173, - "step": 12594 - }, - { - "epoch": 0.64, - "grad_norm": 1.1454112915904329, - "learning_rate": 6.047045199112865e-06, - "loss": 0.1908, - "step": 12595 - }, - { - "epoch": 0.64, - "grad_norm": 1.2481260398457952, - "learning_rate": 6.045532423083578e-06, - "loss": 0.1882, - "step": 12596 - }, - { - "epoch": 0.64, - "grad_norm": 1.0597575897436244, - "learning_rate": 6.04401975431945e-06, - "loss": 0.1596, - "step": 12597 - }, - { - "epoch": 0.64, - "grad_norm": 0.9179478487130808, - "learning_rate": 6.042507192861509e-06, - "loss": 0.1782, - "step": 12598 - }, - { - "epoch": 0.64, - "grad_norm": 1.1493989123689738, - "learning_rate": 6.040994738750788e-06, - "loss": 0.1578, - "step": 12599 - }, - { - "epoch": 0.64, - "grad_norm": 1.1523802744773493, - "learning_rate": 6.039482392028302e-06, - "loss": 0.162, - "step": 12600 - }, - { - "epoch": 0.64, - "grad_norm": 0.9364003989788462, - "learning_rate": 6.037970152735083e-06, - "loss": 0.1816, - "step": 12601 - }, - { - "epoch": 0.64, - "grad_norm": 0.9241871003791564, - "learning_rate": 6.036458020912151e-06, - "loss": 0.1792, - "step": 12602 - }, - { - "epoch": 0.64, - "grad_norm": 0.8505269578328596, - "learning_rate": 6.034945996600512e-06, - "loss": 0.1693, - "step": 12603 - }, - { - "epoch": 0.64, - "grad_norm": 1.070369815758006, - "learning_rate": 6.033434079841192e-06, - "loss": 0.1704, - "step": 12604 - }, - { - "epoch": 0.64, - "grad_norm": 0.7797409791480164, - "learning_rate": 6.031922270675193e-06, - "loss": 0.1754, - "step": 12605 - }, - { - "epoch": 0.64, - "grad_norm": 0.9061342005549365, - "learning_rate": 6.0304105691435285e-06, - "loss": 0.1869, - "step": 12606 - }, - { - "epoch": 0.64, - "grad_norm": 1.2881616547284978, - "learning_rate": 6.028898975287199e-06, - "loss": 0.1554, - "step": 12607 - }, - { - "epoch": 0.64, - "grad_norm": 1.3061192645383484, - "learning_rate": 6.027387489147214e-06, - "loss": 0.1809, - "step": 12608 - }, - { - "epoch": 0.64, - "grad_norm": 1.1680155757501567, - "learning_rate": 6.025876110764563e-06, - "loss": 0.1547, - "step": 12609 - }, - { - "epoch": 0.64, - "grad_norm": 0.8902388411726355, - "learning_rate": 6.02436484018025e-06, - "loss": 0.1789, - "step": 12610 - }, - { - "epoch": 0.64, - "grad_norm": 0.8750040119051603, - "learning_rate": 6.022853677435262e-06, - "loss": 0.1615, - "step": 12611 - }, - { - "epoch": 0.64, - "grad_norm": 1.0092482811801622, - "learning_rate": 6.021342622570597e-06, - "loss": 0.1881, - "step": 12612 - }, - { - "epoch": 0.64, - "grad_norm": 1.0436717957655843, - "learning_rate": 6.019831675627235e-06, - "loss": 0.184, - "step": 12613 - }, - { - "epoch": 0.64, - "grad_norm": 1.0653181286038373, - "learning_rate": 6.018320836646164e-06, - "loss": 0.1948, - "step": 12614 - }, - { - "epoch": 0.64, - "grad_norm": 1.280309670639057, - "learning_rate": 6.016810105668365e-06, - "loss": 0.1689, - "step": 12615 - }, - { - "epoch": 0.64, - "grad_norm": 1.231377345198479, - "learning_rate": 6.015299482734819e-06, - "loss": 0.1624, - "step": 12616 - }, - { - "epoch": 0.64, - "grad_norm": 0.7934969383349629, - "learning_rate": 6.013788967886496e-06, - "loss": 0.1635, - "step": 12617 - }, - { - "epoch": 0.64, - "grad_norm": 0.9639688222591635, - "learning_rate": 6.012278561164377e-06, - "loss": 0.2132, - "step": 12618 - }, - { - "epoch": 0.64, - "grad_norm": 0.8849522972008587, - "learning_rate": 6.010768262609425e-06, - "loss": 0.162, - "step": 12619 - }, - { - "epoch": 0.64, - "grad_norm": 1.0277230971550373, - "learning_rate": 6.009258072262607e-06, - "loss": 0.1852, - "step": 12620 - }, - { - "epoch": 0.64, - "grad_norm": 1.0009072609625371, - "learning_rate": 6.0077479901648935e-06, - "loss": 0.149, - "step": 12621 - }, - { - "epoch": 0.64, - "grad_norm": 1.136258000812448, - "learning_rate": 6.006238016357238e-06, - "loss": 0.1721, - "step": 12622 - }, - { - "epoch": 0.64, - "grad_norm": 0.8486374392042341, - "learning_rate": 6.0047281508806035e-06, - "loss": 0.1714, - "step": 12623 - }, - { - "epoch": 0.64, - "grad_norm": 0.9263199151783211, - "learning_rate": 6.00321839377594e-06, - "loss": 0.1878, - "step": 12624 - }, - { - "epoch": 0.64, - "grad_norm": 0.8905235912213837, - "learning_rate": 6.001708745084209e-06, - "loss": 0.1812, - "step": 12625 - }, - { - "epoch": 0.64, - "grad_norm": 1.0831038098071641, - "learning_rate": 6.000199204846348e-06, - "loss": 0.1646, - "step": 12626 - }, - { - "epoch": 0.64, - "grad_norm": 0.742740153937068, - "learning_rate": 5.998689773103314e-06, - "loss": 0.1742, - "step": 12627 - }, - { - "epoch": 0.64, - "grad_norm": 0.8225453319335321, - "learning_rate": 5.997180449896043e-06, - "loss": 0.1609, - "step": 12628 - }, - { - "epoch": 0.64, - "grad_norm": 0.9766197525212346, - "learning_rate": 5.995671235265483e-06, - "loss": 0.2027, - "step": 12629 - }, - { - "epoch": 0.64, - "grad_norm": 0.8413232819910352, - "learning_rate": 5.994162129252561e-06, - "loss": 0.2092, - "step": 12630 - }, - { - "epoch": 0.64, - "grad_norm": 3.6884841068191743, - "learning_rate": 5.992653131898223e-06, - "loss": 0.1658, - "step": 12631 - }, - { - "epoch": 0.64, - "grad_norm": 1.094459052333219, - "learning_rate": 5.991144243243392e-06, - "loss": 0.1712, - "step": 12632 - }, - { - "epoch": 0.64, - "grad_norm": 1.5800840062814814, - "learning_rate": 5.989635463329e-06, - "loss": 0.1637, - "step": 12633 - }, - { - "epoch": 0.64, - "grad_norm": 0.8334496722945175, - "learning_rate": 5.988126792195972e-06, - "loss": 0.161, - "step": 12634 - }, - { - "epoch": 0.64, - "grad_norm": 1.0395517635284837, - "learning_rate": 5.986618229885234e-06, - "loss": 0.1679, - "step": 12635 - }, - { - "epoch": 0.64, - "grad_norm": 1.1216746978677126, - "learning_rate": 5.985109776437699e-06, - "loss": 0.1818, - "step": 12636 - }, - { - "epoch": 0.64, - "grad_norm": 1.139123745295039, - "learning_rate": 5.983601431894291e-06, - "loss": 0.1953, - "step": 12637 - }, - { - "epoch": 0.64, - "grad_norm": 0.8956866938954258, - "learning_rate": 5.982093196295924e-06, - "loss": 0.1775, - "step": 12638 - }, - { - "epoch": 0.64, - "grad_norm": 0.939937101816068, - "learning_rate": 5.9805850696835e-06, - "loss": 0.1668, - "step": 12639 - }, - { - "epoch": 0.64, - "grad_norm": 0.9370917688385507, - "learning_rate": 5.979077052097936e-06, - "loss": 0.1718, - "step": 12640 - }, - { - "epoch": 0.64, - "grad_norm": 0.9764839246852787, - "learning_rate": 5.977569143580132e-06, - "loss": 0.1749, - "step": 12641 - }, - { - "epoch": 0.64, - "grad_norm": 0.857365213898431, - "learning_rate": 5.976061344170995e-06, - "loss": 0.1691, - "step": 12642 - }, - { - "epoch": 0.64, - "grad_norm": 1.000499061221936, - "learning_rate": 5.974553653911419e-06, - "loss": 0.1784, - "step": 12643 - }, - { - "epoch": 0.64, - "grad_norm": 0.8623887963384143, - "learning_rate": 5.973046072842305e-06, - "loss": 0.1772, - "step": 12644 - }, - { - "epoch": 0.64, - "grad_norm": 0.749589177560146, - "learning_rate": 5.971538601004542e-06, - "loss": 0.1734, - "step": 12645 - }, - { - "epoch": 0.64, - "grad_norm": 2.1602944806291715, - "learning_rate": 5.970031238439023e-06, - "loss": 0.1871, - "step": 12646 - }, - { - "epoch": 0.64, - "grad_norm": 0.8914992617167609, - "learning_rate": 5.968523985186632e-06, - "loss": 0.1541, - "step": 12647 - }, - { - "epoch": 0.64, - "grad_norm": 0.7662373818745853, - "learning_rate": 5.967016841288258e-06, - "loss": 0.1553, - "step": 12648 - }, - { - "epoch": 0.64, - "grad_norm": 2.1534363779308747, - "learning_rate": 5.965509806784777e-06, - "loss": 0.1768, - "step": 12649 - }, - { - "epoch": 0.64, - "grad_norm": 1.753453688553895, - "learning_rate": 5.964002881717073e-06, - "loss": 0.1986, - "step": 12650 - }, - { - "epoch": 0.64, - "grad_norm": 1.079062233365335, - "learning_rate": 5.962496066126018e-06, - "loss": 0.1727, - "step": 12651 - }, - { - "epoch": 0.64, - "grad_norm": 1.7864402552268221, - "learning_rate": 5.960989360052487e-06, - "loss": 0.1631, - "step": 12652 - }, - { - "epoch": 0.64, - "grad_norm": 1.3834907670938499, - "learning_rate": 5.959482763537344e-06, - "loss": 0.2107, - "step": 12653 - }, - { - "epoch": 0.64, - "grad_norm": 0.8888693044438032, - "learning_rate": 5.9579762766214624e-06, - "loss": 0.166, - "step": 12654 - }, - { - "epoch": 0.64, - "grad_norm": 1.041141009502544, - "learning_rate": 5.956469899345704e-06, - "loss": 0.1655, - "step": 12655 - }, - { - "epoch": 0.64, - "grad_norm": 1.14499034697284, - "learning_rate": 5.954963631750923e-06, - "loss": 0.1748, - "step": 12656 - }, - { - "epoch": 0.64, - "grad_norm": 0.7490457820595138, - "learning_rate": 5.953457473877988e-06, - "loss": 0.1484, - "step": 12657 - }, - { - "epoch": 0.64, - "grad_norm": 1.1030787733107046, - "learning_rate": 5.9519514257677416e-06, - "loss": 0.1658, - "step": 12658 - }, - { - "epoch": 0.64, - "grad_norm": 1.1598241699401937, - "learning_rate": 5.950445487461045e-06, - "loss": 0.1905, - "step": 12659 - }, - { - "epoch": 0.64, - "grad_norm": 1.3936158789622728, - "learning_rate": 5.94893965899874e-06, - "loss": 0.1799, - "step": 12660 - }, - { - "epoch": 0.64, - "grad_norm": 0.9178583971284034, - "learning_rate": 5.947433940421681e-06, - "loss": 0.1729, - "step": 12661 - }, - { - "epoch": 0.64, - "grad_norm": 0.8496088616896511, - "learning_rate": 5.9459283317707e-06, - "loss": 0.1826, - "step": 12662 - }, - { - "epoch": 0.64, - "grad_norm": 0.973870769727679, - "learning_rate": 5.944422833086645e-06, - "loss": 0.169, - "step": 12663 - }, - { - "epoch": 0.64, - "grad_norm": 0.7730557051735637, - "learning_rate": 5.942917444410346e-06, - "loss": 0.1772, - "step": 12664 - }, - { - "epoch": 0.64, - "grad_norm": 0.8075041620874908, - "learning_rate": 5.941412165782645e-06, - "loss": 0.1764, - "step": 12665 - }, - { - "epoch": 0.64, - "grad_norm": 0.8686461335552085, - "learning_rate": 5.939906997244364e-06, - "loss": 0.1511, - "step": 12666 - }, - { - "epoch": 0.64, - "grad_norm": 0.94060984040844, - "learning_rate": 5.938401938836339e-06, - "loss": 0.1719, - "step": 12667 - }, - { - "epoch": 0.64, - "grad_norm": 1.0649589557653525, - "learning_rate": 5.936896990599388e-06, - "loss": 0.2042, - "step": 12668 - }, - { - "epoch": 0.64, - "grad_norm": 1.4109876552900762, - "learning_rate": 5.9353921525743394e-06, - "loss": 0.1642, - "step": 12669 - }, - { - "epoch": 0.64, - "grad_norm": 0.9844463011964707, - "learning_rate": 5.933887424802003e-06, - "loss": 0.174, - "step": 12670 - }, - { - "epoch": 0.64, - "grad_norm": 0.8399276919431213, - "learning_rate": 5.9323828073232025e-06, - "loss": 0.1947, - "step": 12671 - }, - { - "epoch": 0.64, - "grad_norm": 1.3259786476616642, - "learning_rate": 5.930878300178751e-06, - "loss": 0.1677, - "step": 12672 - }, - { - "epoch": 0.64, - "grad_norm": 1.0698628567492015, - "learning_rate": 5.929373903409451e-06, - "loss": 0.1983, - "step": 12673 - }, - { - "epoch": 0.64, - "grad_norm": 0.9322445915079713, - "learning_rate": 5.9278696170561175e-06, - "loss": 0.1894, - "step": 12674 - }, - { - "epoch": 0.64, - "grad_norm": 1.0557079540673695, - "learning_rate": 5.926365441159547e-06, - "loss": 0.1894, - "step": 12675 - }, - { - "epoch": 0.64, - "grad_norm": 0.8609547739005059, - "learning_rate": 5.924861375760547e-06, - "loss": 0.1706, - "step": 12676 - }, - { - "epoch": 0.64, - "grad_norm": 1.1008738440055796, - "learning_rate": 5.923357420899908e-06, - "loss": 0.1648, - "step": 12677 - }, - { - "epoch": 0.64, - "grad_norm": 1.2090115629624052, - "learning_rate": 5.921853576618435e-06, - "loss": 0.1855, - "step": 12678 - }, - { - "epoch": 0.64, - "grad_norm": 1.6107429435083536, - "learning_rate": 5.920349842956909e-06, - "loss": 0.1756, - "step": 12679 - }, - { - "epoch": 0.64, - "grad_norm": 0.9722248729245764, - "learning_rate": 5.918846219956126e-06, - "loss": 0.1689, - "step": 12680 - }, - { - "epoch": 0.64, - "grad_norm": 0.8651383834716352, - "learning_rate": 5.917342707656868e-06, - "loss": 0.1806, - "step": 12681 - }, - { - "epoch": 0.64, - "grad_norm": 1.0928700656041868, - "learning_rate": 5.915839306099924e-06, - "loss": 0.1755, - "step": 12682 - }, - { - "epoch": 0.64, - "grad_norm": 1.1338462339538051, - "learning_rate": 5.9143360153260655e-06, - "loss": 0.1763, - "step": 12683 - }, - { - "epoch": 0.65, - "grad_norm": 0.9910656380198765, - "learning_rate": 5.912832835376074e-06, - "loss": 0.1552, - "step": 12684 - }, - { - "epoch": 0.65, - "grad_norm": 1.0793574090034495, - "learning_rate": 5.911329766290723e-06, - "loss": 0.1802, - "step": 12685 - }, - { - "epoch": 0.65, - "grad_norm": 0.9980755060477906, - "learning_rate": 5.9098268081107855e-06, - "loss": 0.1837, - "step": 12686 - }, - { - "epoch": 0.65, - "grad_norm": 1.013160068361227, - "learning_rate": 5.9083239608770225e-06, - "loss": 0.1778, - "step": 12687 - }, - { - "epoch": 0.65, - "grad_norm": 1.8906471754734917, - "learning_rate": 5.9068212246302084e-06, - "loss": 0.1656, - "step": 12688 - }, - { - "epoch": 0.65, - "grad_norm": 0.8351175530599811, - "learning_rate": 5.9053185994110975e-06, - "loss": 0.1603, - "step": 12689 - }, - { - "epoch": 0.65, - "grad_norm": 1.2711286630467495, - "learning_rate": 5.903816085260447e-06, - "loss": 0.1776, - "step": 12690 - }, - { - "epoch": 0.65, - "grad_norm": 0.928024545479977, - "learning_rate": 5.902313682219023e-06, - "loss": 0.1774, - "step": 12691 - }, - { - "epoch": 0.65, - "grad_norm": 1.6213176678927754, - "learning_rate": 5.9008113903275675e-06, - "loss": 0.1622, - "step": 12692 - }, - { - "epoch": 0.65, - "grad_norm": 0.9402086897119339, - "learning_rate": 5.899309209626836e-06, - "loss": 0.1659, - "step": 12693 - }, - { - "epoch": 0.65, - "grad_norm": 1.0009811336498147, - "learning_rate": 5.8978071401575724e-06, - "loss": 0.1802, - "step": 12694 - }, - { - "epoch": 0.65, - "grad_norm": 0.9628070707067192, - "learning_rate": 5.896305181960524e-06, - "loss": 0.1425, - "step": 12695 - }, - { - "epoch": 0.65, - "grad_norm": 0.7368779807655511, - "learning_rate": 5.894803335076427e-06, - "loss": 0.1902, - "step": 12696 - }, - { - "epoch": 0.65, - "grad_norm": 0.9819880577436897, - "learning_rate": 5.8933015995460215e-06, - "loss": 0.1744, - "step": 12697 - }, - { - "epoch": 0.65, - "grad_norm": 1.1420654284111416, - "learning_rate": 5.8917999754100415e-06, - "loss": 0.1839, - "step": 12698 - }, - { - "epoch": 0.65, - "grad_norm": 0.8692782871985733, - "learning_rate": 5.890298462709224e-06, - "loss": 0.175, - "step": 12699 - }, - { - "epoch": 0.65, - "grad_norm": 1.945136366281455, - "learning_rate": 5.888797061484288e-06, - "loss": 0.1802, - "step": 12700 - }, - { - "epoch": 0.65, - "grad_norm": 0.8843237227460644, - "learning_rate": 5.887295771775968e-06, - "loss": 0.1718, - "step": 12701 - }, - { - "epoch": 0.65, - "grad_norm": 1.2556922365483774, - "learning_rate": 5.885794593624978e-06, - "loss": 0.1818, - "step": 12702 - }, - { - "epoch": 0.65, - "grad_norm": 0.9307613969627148, - "learning_rate": 5.884293527072045e-06, - "loss": 0.1816, - "step": 12703 - }, - { - "epoch": 0.65, - "grad_norm": 0.9934432917494341, - "learning_rate": 5.88279257215788e-06, - "loss": 0.1676, - "step": 12704 - }, - { - "epoch": 0.65, - "grad_norm": 0.9168098448675442, - "learning_rate": 5.881291728923202e-06, - "loss": 0.1724, - "step": 12705 - }, - { - "epoch": 0.65, - "grad_norm": 0.9078438921079398, - "learning_rate": 5.8797909974087166e-06, - "loss": 0.1746, - "step": 12706 - }, - { - "epoch": 0.65, - "grad_norm": 0.9363191791158435, - "learning_rate": 5.878290377655134e-06, - "loss": 0.1869, - "step": 12707 - }, - { - "epoch": 0.65, - "grad_norm": 0.8066846892597944, - "learning_rate": 5.876789869703159e-06, - "loss": 0.1675, - "step": 12708 - }, - { - "epoch": 0.65, - "grad_norm": 1.1161164446921086, - "learning_rate": 5.875289473593489e-06, - "loss": 0.1872, - "step": 12709 - }, - { - "epoch": 0.65, - "grad_norm": 0.8283708904113805, - "learning_rate": 5.8737891893668255e-06, - "loss": 0.1833, - "step": 12710 - }, - { - "epoch": 0.65, - "grad_norm": 0.9024395515456646, - "learning_rate": 5.872289017063861e-06, - "loss": 0.1809, - "step": 12711 - }, - { - "epoch": 0.65, - "grad_norm": 1.864448371808424, - "learning_rate": 5.8707889567252965e-06, - "loss": 0.1474, - "step": 12712 - }, - { - "epoch": 0.65, - "grad_norm": 0.9419293887724072, - "learning_rate": 5.869289008391809e-06, - "loss": 0.1964, - "step": 12713 - }, - { - "epoch": 0.65, - "grad_norm": 2.14700717152764, - "learning_rate": 5.8677891721040945e-06, - "loss": 0.1757, - "step": 12714 - }, - { - "epoch": 0.65, - "grad_norm": 0.8869622133560018, - "learning_rate": 5.866289447902829e-06, - "loss": 0.2008, - "step": 12715 - }, - { - "epoch": 0.65, - "grad_norm": 1.4162474657019513, - "learning_rate": 5.864789835828697e-06, - "loss": 0.1503, - "step": 12716 - }, - { - "epoch": 0.65, - "grad_norm": 1.0489769046663078, - "learning_rate": 5.863290335922371e-06, - "loss": 0.1688, - "step": 12717 - }, - { - "epoch": 0.65, - "grad_norm": 1.1416131733336279, - "learning_rate": 5.861790948224535e-06, - "loss": 0.2004, - "step": 12718 - }, - { - "epoch": 0.65, - "grad_norm": 0.7944055566838395, - "learning_rate": 5.860291672775847e-06, - "loss": 0.1544, - "step": 12719 - }, - { - "epoch": 0.65, - "grad_norm": 1.1178239682169353, - "learning_rate": 5.858792509616984e-06, - "loss": 0.1741, - "step": 12720 - }, - { - "epoch": 0.65, - "grad_norm": 1.9122154797112343, - "learning_rate": 5.857293458788607e-06, - "loss": 0.1751, - "step": 12721 - }, - { - "epoch": 0.65, - "grad_norm": 0.9993588874074452, - "learning_rate": 5.855794520331382e-06, - "loss": 0.1545, - "step": 12722 - }, - { - "epoch": 0.65, - "grad_norm": 0.9449544422557149, - "learning_rate": 5.854295694285961e-06, - "loss": 0.1748, - "step": 12723 - }, - { - "epoch": 0.65, - "grad_norm": 1.0191948675121945, - "learning_rate": 5.852796980693005e-06, - "loss": 0.1866, - "step": 12724 - }, - { - "epoch": 0.65, - "grad_norm": 0.9340845236893027, - "learning_rate": 5.8512983795931665e-06, - "loss": 0.1717, - "step": 12725 - }, - { - "epoch": 0.65, - "grad_norm": 0.9371937668222649, - "learning_rate": 5.8497998910270915e-06, - "loss": 0.1793, - "step": 12726 - }, - { - "epoch": 0.65, - "grad_norm": 0.8167577931936453, - "learning_rate": 5.848301515035433e-06, - "loss": 0.1629, - "step": 12727 - }, - { - "epoch": 0.65, - "grad_norm": 0.9070997042355996, - "learning_rate": 5.846803251658824e-06, - "loss": 0.1653, - "step": 12728 - }, - { - "epoch": 0.65, - "grad_norm": 1.2774767895726513, - "learning_rate": 5.8453051009379145e-06, - "loss": 0.1733, - "step": 12729 - }, - { - "epoch": 0.65, - "grad_norm": 0.753620675806103, - "learning_rate": 5.843807062913338e-06, - "loss": 0.1849, - "step": 12730 - }, - { - "epoch": 0.65, - "grad_norm": 0.9565505103888103, - "learning_rate": 5.842309137625732e-06, - "loss": 0.1838, - "step": 12731 - }, - { - "epoch": 0.65, - "grad_norm": 1.5290954314169458, - "learning_rate": 5.840811325115723e-06, - "loss": 0.1738, - "step": 12732 - }, - { - "epoch": 0.65, - "grad_norm": 0.979728759438856, - "learning_rate": 5.8393136254239424e-06, - "loss": 0.1738, - "step": 12733 - }, - { - "epoch": 0.65, - "grad_norm": 0.8586160518040348, - "learning_rate": 5.837816038591016e-06, - "loss": 0.1518, - "step": 12734 - }, - { - "epoch": 0.65, - "grad_norm": 1.0986488341282836, - "learning_rate": 5.836318564657561e-06, - "loss": 0.1572, - "step": 12735 - }, - { - "epoch": 0.65, - "grad_norm": 0.970494590695129, - "learning_rate": 5.8348212036642004e-06, - "loss": 0.206, - "step": 12736 - }, - { - "epoch": 0.65, - "grad_norm": 1.1964209550960125, - "learning_rate": 5.833323955651555e-06, - "loss": 0.1912, - "step": 12737 - }, - { - "epoch": 0.65, - "grad_norm": 1.7420502057662117, - "learning_rate": 5.831826820660228e-06, - "loss": 0.1594, - "step": 12738 - }, - { - "epoch": 0.65, - "grad_norm": 0.9275542125230787, - "learning_rate": 5.8303297987308384e-06, - "loss": 0.1778, - "step": 12739 - }, - { - "epoch": 0.65, - "grad_norm": 1.4329471083008936, - "learning_rate": 5.828832889903983e-06, - "loss": 0.1749, - "step": 12740 - }, - { - "epoch": 0.65, - "grad_norm": 1.0909640951660313, - "learning_rate": 5.827336094220278e-06, - "loss": 0.1742, - "step": 12741 - }, - { - "epoch": 0.65, - "grad_norm": 0.8744495369447978, - "learning_rate": 5.825839411720314e-06, - "loss": 0.1972, - "step": 12742 - }, - { - "epoch": 0.65, - "grad_norm": 1.1558371119444353, - "learning_rate": 5.824342842444689e-06, - "loss": 0.1768, - "step": 12743 - }, - { - "epoch": 0.65, - "grad_norm": 1.0440422547177555, - "learning_rate": 5.822846386434e-06, - "loss": 0.1891, - "step": 12744 - }, - { - "epoch": 0.65, - "grad_norm": 0.9694946099458643, - "learning_rate": 5.82135004372884e-06, - "loss": 0.1823, - "step": 12745 - }, - { - "epoch": 0.65, - "grad_norm": 0.8690001007512806, - "learning_rate": 5.819853814369798e-06, - "loss": 0.1734, - "step": 12746 - }, - { - "epoch": 0.65, - "grad_norm": 0.9975380839997835, - "learning_rate": 5.818357698397455e-06, - "loss": 0.1795, - "step": 12747 - }, - { - "epoch": 0.65, - "grad_norm": 2.109368298211608, - "learning_rate": 5.816861695852398e-06, - "loss": 0.1872, - "step": 12748 - }, - { - "epoch": 0.65, - "grad_norm": 0.9314307134735961, - "learning_rate": 5.815365806775201e-06, - "loss": 0.1967, - "step": 12749 - }, - { - "epoch": 0.65, - "grad_norm": 1.1428904285367345, - "learning_rate": 5.813870031206448e-06, - "loss": 0.1875, - "step": 12750 - }, - { - "epoch": 0.65, - "grad_norm": 0.9960408697173141, - "learning_rate": 5.812374369186701e-06, - "loss": 0.1655, - "step": 12751 - }, - { - "epoch": 0.65, - "grad_norm": 1.2568265105055656, - "learning_rate": 5.8108788207565355e-06, - "loss": 0.1743, - "step": 12752 - }, - { - "epoch": 0.65, - "grad_norm": 0.8431837679769912, - "learning_rate": 5.8093833859565196e-06, - "loss": 0.1824, - "step": 12753 - }, - { - "epoch": 0.65, - "grad_norm": 1.1127045480558082, - "learning_rate": 5.80788806482722e-06, - "loss": 0.1508, - "step": 12754 - }, - { - "epoch": 0.65, - "grad_norm": 1.0746524208411485, - "learning_rate": 5.806392857409189e-06, - "loss": 0.213, - "step": 12755 - }, - { - "epoch": 0.65, - "grad_norm": 0.9716316543874394, - "learning_rate": 5.8048977637429925e-06, - "loss": 0.1939, - "step": 12756 - }, - { - "epoch": 0.65, - "grad_norm": 1.3417170022179468, - "learning_rate": 5.803402783869178e-06, - "loss": 0.1571, - "step": 12757 - }, - { - "epoch": 0.65, - "grad_norm": 0.9285050897401939, - "learning_rate": 5.801907917828303e-06, - "loss": 0.1817, - "step": 12758 - }, - { - "epoch": 0.65, - "grad_norm": 1.824448289024012, - "learning_rate": 5.800413165660913e-06, - "loss": 0.1817, - "step": 12759 - }, - { - "epoch": 0.65, - "grad_norm": 3.9595828542615563, - "learning_rate": 5.798918527407549e-06, - "loss": 0.1864, - "step": 12760 - }, - { - "epoch": 0.65, - "grad_norm": 0.9974578852520701, - "learning_rate": 5.797424003108758e-06, - "loss": 0.1507, - "step": 12761 - }, - { - "epoch": 0.65, - "grad_norm": 0.9464195385603775, - "learning_rate": 5.795929592805077e-06, - "loss": 0.16, - "step": 12762 - }, - { - "epoch": 0.65, - "grad_norm": 1.1374103732685328, - "learning_rate": 5.794435296537049e-06, - "loss": 0.1781, - "step": 12763 - }, - { - "epoch": 0.65, - "grad_norm": 1.1827366485411164, - "learning_rate": 5.7929411143451955e-06, - "loss": 0.1668, - "step": 12764 - }, - { - "epoch": 0.65, - "grad_norm": 1.168608591288615, - "learning_rate": 5.791447046270055e-06, - "loss": 0.1834, - "step": 12765 - }, - { - "epoch": 0.65, - "grad_norm": 0.9011955344686472, - "learning_rate": 5.78995309235215e-06, - "loss": 0.1653, - "step": 12766 - }, - { - "epoch": 0.65, - "grad_norm": 1.4317286725861893, - "learning_rate": 5.788459252632008e-06, - "loss": 0.1901, - "step": 12767 - }, - { - "epoch": 0.65, - "grad_norm": 1.3077605328902828, - "learning_rate": 5.7869655271501415e-06, - "loss": 0.1947, - "step": 12768 - }, - { - "epoch": 0.65, - "grad_norm": 0.8625789708204211, - "learning_rate": 5.785471915947078e-06, - "loss": 0.1683, - "step": 12769 - }, - { - "epoch": 0.65, - "grad_norm": 0.9455906607045814, - "learning_rate": 5.783978419063323e-06, - "loss": 0.1848, - "step": 12770 - }, - { - "epoch": 0.65, - "grad_norm": 1.2640924961109428, - "learning_rate": 5.782485036539391e-06, - "loss": 0.1574, - "step": 12771 - }, - { - "epoch": 0.65, - "grad_norm": 1.0937487504731338, - "learning_rate": 5.7809917684157915e-06, - "loss": 0.1685, - "step": 12772 - }, - { - "epoch": 0.65, - "grad_norm": 1.0232287650417144, - "learning_rate": 5.779498614733032e-06, - "loss": 0.1958, - "step": 12773 - }, - { - "epoch": 0.65, - "grad_norm": 0.9582307803099454, - "learning_rate": 5.778005575531606e-06, - "loss": 0.173, - "step": 12774 - }, - { - "epoch": 0.65, - "grad_norm": 1.08070120123374, - "learning_rate": 5.7765126508520216e-06, - "loss": 0.1669, - "step": 12775 - }, - { - "epoch": 0.65, - "grad_norm": 2.898585807582454, - "learning_rate": 5.775019840734768e-06, - "loss": 0.2019, - "step": 12776 - }, - { - "epoch": 0.65, - "grad_norm": 0.8173973148695457, - "learning_rate": 5.773527145220341e-06, - "loss": 0.1813, - "step": 12777 - }, - { - "epoch": 0.65, - "grad_norm": 1.1002081814255371, - "learning_rate": 5.772034564349227e-06, - "loss": 0.166, - "step": 12778 - }, - { - "epoch": 0.65, - "grad_norm": 0.9156571269819482, - "learning_rate": 5.770542098161913e-06, - "loss": 0.1703, - "step": 12779 - }, - { - "epoch": 0.65, - "grad_norm": 0.9475744155041969, - "learning_rate": 5.769049746698889e-06, - "loss": 0.1599, - "step": 12780 - }, - { - "epoch": 0.65, - "grad_norm": 1.027253670713997, - "learning_rate": 5.767557510000624e-06, - "loss": 0.1714, - "step": 12781 - }, - { - "epoch": 0.65, - "grad_norm": 0.9115685276161519, - "learning_rate": 5.7660653881076045e-06, - "loss": 0.1624, - "step": 12782 - }, - { - "epoch": 0.65, - "grad_norm": 0.9189978581613581, - "learning_rate": 5.7645733810602975e-06, - "loss": 0.1615, - "step": 12783 - }, - { - "epoch": 0.65, - "grad_norm": 1.6394894428716966, - "learning_rate": 5.76308148889918e-06, - "loss": 0.2053, - "step": 12784 - }, - { - "epoch": 0.65, - "grad_norm": 1.3596295229586994, - "learning_rate": 5.761589711664714e-06, - "loss": 0.1913, - "step": 12785 - }, - { - "epoch": 0.65, - "grad_norm": 0.8407133594687236, - "learning_rate": 5.760098049397369e-06, - "loss": 0.1632, - "step": 12786 - }, - { - "epoch": 0.65, - "grad_norm": 0.9167228532336161, - "learning_rate": 5.7586065021376e-06, - "loss": 0.1623, - "step": 12787 - }, - { - "epoch": 0.65, - "grad_norm": 1.0360576474570997, - "learning_rate": 5.7571150699258695e-06, - "loss": 0.2142, - "step": 12788 - }, - { - "epoch": 0.65, - "grad_norm": 1.1085304668570894, - "learning_rate": 5.7556237528026325e-06, - "loss": 0.1695, - "step": 12789 - }, - { - "epoch": 0.65, - "grad_norm": 1.0896331639110117, - "learning_rate": 5.754132550808345e-06, - "loss": 0.1871, - "step": 12790 - }, - { - "epoch": 0.65, - "grad_norm": 1.0283976013604725, - "learning_rate": 5.752641463983446e-06, - "loss": 0.1893, - "step": 12791 - }, - { - "epoch": 0.65, - "grad_norm": 0.9949917672413399, - "learning_rate": 5.751150492368394e-06, - "loss": 0.17, - "step": 12792 - }, - { - "epoch": 0.65, - "grad_norm": 1.0405359463683412, - "learning_rate": 5.749659636003619e-06, - "loss": 0.1585, - "step": 12793 - }, - { - "epoch": 0.65, - "grad_norm": 1.6514389831675638, - "learning_rate": 5.748168894929571e-06, - "loss": 0.1664, - "step": 12794 - }, - { - "epoch": 0.65, - "grad_norm": 0.9551165337416043, - "learning_rate": 5.746678269186682e-06, - "loss": 0.1687, - "step": 12795 - }, - { - "epoch": 0.65, - "grad_norm": 0.7885972333474208, - "learning_rate": 5.7451877588153805e-06, - "loss": 0.1671, - "step": 12796 - }, - { - "epoch": 0.65, - "grad_norm": 0.9364830001912637, - "learning_rate": 5.743697363856103e-06, - "loss": 0.1723, - "step": 12797 - }, - { - "epoch": 0.65, - "grad_norm": 1.2316441009063175, - "learning_rate": 5.742207084349274e-06, - "loss": 0.1726, - "step": 12798 - }, - { - "epoch": 0.65, - "grad_norm": 0.9264953776930172, - "learning_rate": 5.740716920335321e-06, - "loss": 0.1784, - "step": 12799 - }, - { - "epoch": 0.65, - "grad_norm": 1.0204461207639197, - "learning_rate": 5.739226871854659e-06, - "loss": 0.1939, - "step": 12800 - }, - { - "epoch": 0.65, - "grad_norm": 1.665770547649148, - "learning_rate": 5.737736938947713e-06, - "loss": 0.1666, - "step": 12801 - }, - { - "epoch": 0.65, - "grad_norm": 0.8495805423139131, - "learning_rate": 5.73624712165489e-06, - "loss": 0.1623, - "step": 12802 - }, - { - "epoch": 0.65, - "grad_norm": 1.0323761081415743, - "learning_rate": 5.734757420016608e-06, - "loss": 0.1841, - "step": 12803 - }, - { - "epoch": 0.65, - "grad_norm": 1.0378100442313927, - "learning_rate": 5.733267834073267e-06, - "loss": 0.1752, - "step": 12804 - }, - { - "epoch": 0.65, - "grad_norm": 1.1876253510119588, - "learning_rate": 5.731778363865278e-06, - "loss": 0.1599, - "step": 12805 - }, - { - "epoch": 0.65, - "grad_norm": 1.041679902340334, - "learning_rate": 5.730289009433041e-06, - "loss": 0.1814, - "step": 12806 - }, - { - "epoch": 0.65, - "grad_norm": 0.9306180346304497, - "learning_rate": 5.7287997708169615e-06, - "loss": 0.1777, - "step": 12807 - }, - { - "epoch": 0.65, - "grad_norm": 0.8422275117744921, - "learning_rate": 5.7273106480574245e-06, - "loss": 0.1842, - "step": 12808 - }, - { - "epoch": 0.65, - "grad_norm": 0.9099700432256908, - "learning_rate": 5.725821641194831e-06, - "loss": 0.1839, - "step": 12809 - }, - { - "epoch": 0.65, - "grad_norm": 1.0335046237387424, - "learning_rate": 5.724332750269563e-06, - "loss": 0.1808, - "step": 12810 - }, - { - "epoch": 0.65, - "grad_norm": 0.7342455579777192, - "learning_rate": 5.722843975322015e-06, - "loss": 0.1691, - "step": 12811 - }, - { - "epoch": 0.65, - "grad_norm": 1.021904470086725, - "learning_rate": 5.721355316392566e-06, - "loss": 0.1678, - "step": 12812 - }, - { - "epoch": 0.65, - "grad_norm": 0.8413994432957763, - "learning_rate": 5.719866773521592e-06, - "loss": 0.1739, - "step": 12813 - }, - { - "epoch": 0.65, - "grad_norm": 0.7656323654203047, - "learning_rate": 5.718378346749473e-06, - "loss": 0.173, - "step": 12814 - }, - { - "epoch": 0.65, - "grad_norm": 0.8695568552072197, - "learning_rate": 5.716890036116582e-06, - "loss": 0.1816, - "step": 12815 - }, - { - "epoch": 0.65, - "grad_norm": 0.637543553189384, - "learning_rate": 5.715401841663296e-06, - "loss": 0.1538, - "step": 12816 - }, - { - "epoch": 0.65, - "grad_norm": 0.8986297573930118, - "learning_rate": 5.713913763429972e-06, - "loss": 0.1765, - "step": 12817 - }, - { - "epoch": 0.65, - "grad_norm": 1.165151118458408, - "learning_rate": 5.712425801456984e-06, - "loss": 0.1858, - "step": 12818 - }, - { - "epoch": 0.65, - "grad_norm": 1.2275823408117625, - "learning_rate": 5.710937955784686e-06, - "loss": 0.1808, - "step": 12819 - }, - { - "epoch": 0.65, - "grad_norm": 1.0067028818244466, - "learning_rate": 5.709450226453439e-06, - "loss": 0.2044, - "step": 12820 - }, - { - "epoch": 0.65, - "grad_norm": 0.7775459709638857, - "learning_rate": 5.707962613503595e-06, - "loss": 0.1662, - "step": 12821 - }, - { - "epoch": 0.65, - "grad_norm": 2.4527849280844114, - "learning_rate": 5.706475116975512e-06, - "loss": 0.2129, - "step": 12822 - }, - { - "epoch": 0.65, - "grad_norm": 0.9436189868348397, - "learning_rate": 5.704987736909529e-06, - "loss": 0.1751, - "step": 12823 - }, - { - "epoch": 0.65, - "grad_norm": 1.0299563360182766, - "learning_rate": 5.703500473345995e-06, - "loss": 0.1849, - "step": 12824 - }, - { - "epoch": 0.65, - "grad_norm": 1.3373956988323639, - "learning_rate": 5.702013326325256e-06, - "loss": 0.1731, - "step": 12825 - }, - { - "epoch": 0.65, - "grad_norm": 0.6450668643735773, - "learning_rate": 5.700526295887649e-06, - "loss": 0.1551, - "step": 12826 - }, - { - "epoch": 0.65, - "grad_norm": 1.4047761777979644, - "learning_rate": 5.699039382073508e-06, - "loss": 0.159, - "step": 12827 - }, - { - "epoch": 0.65, - "grad_norm": 0.9549652310257992, - "learning_rate": 5.69755258492317e-06, - "loss": 0.1792, - "step": 12828 - }, - { - "epoch": 0.65, - "grad_norm": 0.8410795021030328, - "learning_rate": 5.6960659044769596e-06, - "loss": 0.1677, - "step": 12829 - }, - { - "epoch": 0.65, - "grad_norm": 1.8490038369881001, - "learning_rate": 5.694579340775202e-06, - "loss": 0.1716, - "step": 12830 - }, - { - "epoch": 0.65, - "grad_norm": 1.0215088608250185, - "learning_rate": 5.693092893858223e-06, - "loss": 0.1724, - "step": 12831 - }, - { - "epoch": 0.65, - "grad_norm": 0.801362536481239, - "learning_rate": 5.691606563766341e-06, - "loss": 0.1882, - "step": 12832 - }, - { - "epoch": 0.65, - "grad_norm": 1.1402567424615575, - "learning_rate": 5.6901203505398805e-06, - "loss": 0.1853, - "step": 12833 - }, - { - "epoch": 0.65, - "grad_norm": 1.134326759007839, - "learning_rate": 5.688634254219143e-06, - "loss": 0.1587, - "step": 12834 - }, - { - "epoch": 0.65, - "grad_norm": 0.9233574208843645, - "learning_rate": 5.687148274844449e-06, - "loss": 0.167, - "step": 12835 - }, - { - "epoch": 0.65, - "grad_norm": 0.7866757718859764, - "learning_rate": 5.6856624124560985e-06, - "loss": 0.1697, - "step": 12836 - }, - { - "epoch": 0.65, - "grad_norm": 1.0403016153270666, - "learning_rate": 5.684176667094403e-06, - "loss": 0.1639, - "step": 12837 - }, - { - "epoch": 0.65, - "grad_norm": 1.605615224634009, - "learning_rate": 5.682691038799655e-06, - "loss": 0.1811, - "step": 12838 - }, - { - "epoch": 0.65, - "grad_norm": 0.9004022258891372, - "learning_rate": 5.68120552761216e-06, - "loss": 0.2044, - "step": 12839 - }, - { - "epoch": 0.65, - "grad_norm": 0.9103926230618025, - "learning_rate": 5.6797201335722064e-06, - "loss": 0.181, - "step": 12840 - }, - { - "epoch": 0.65, - "grad_norm": 0.9655309677333925, - "learning_rate": 5.678234856720086e-06, - "loss": 0.1693, - "step": 12841 - }, - { - "epoch": 0.65, - "grad_norm": 0.9890053890231713, - "learning_rate": 5.67674969709609e-06, - "loss": 0.1832, - "step": 12842 - }, - { - "epoch": 0.65, - "grad_norm": 1.561738973816005, - "learning_rate": 5.675264654740506e-06, - "loss": 0.1944, - "step": 12843 - }, - { - "epoch": 0.65, - "grad_norm": 1.112333814532282, - "learning_rate": 5.67377972969361e-06, - "loss": 0.1898, - "step": 12844 - }, - { - "epoch": 0.65, - "grad_norm": 0.8642933030164457, - "learning_rate": 5.672294921995687e-06, - "loss": 0.186, - "step": 12845 - }, - { - "epoch": 0.65, - "grad_norm": 0.9086013400978419, - "learning_rate": 5.670810231687004e-06, - "loss": 0.16, - "step": 12846 - }, - { - "epoch": 0.65, - "grad_norm": 2.064045502304253, - "learning_rate": 5.669325658807843e-06, - "loss": 0.1797, - "step": 12847 - }, - { - "epoch": 0.65, - "grad_norm": 1.3122020336260767, - "learning_rate": 5.667841203398463e-06, - "loss": 0.1961, - "step": 12848 - }, - { - "epoch": 0.65, - "grad_norm": 1.319368124499809, - "learning_rate": 5.666356865499134e-06, - "loss": 0.1906, - "step": 12849 - }, - { - "epoch": 0.65, - "grad_norm": 1.129651978672975, - "learning_rate": 5.664872645150126e-06, - "loss": 0.1678, - "step": 12850 - }, - { - "epoch": 0.65, - "grad_norm": 0.8886433832662072, - "learning_rate": 5.663388542391687e-06, - "loss": 0.1729, - "step": 12851 - }, - { - "epoch": 0.65, - "grad_norm": 0.8454505714582762, - "learning_rate": 5.661904557264083e-06, - "loss": 0.1826, - "step": 12852 - }, - { - "epoch": 0.65, - "grad_norm": 0.909024630136565, - "learning_rate": 5.6604206898075595e-06, - "loss": 0.1766, - "step": 12853 - }, - { - "epoch": 0.65, - "grad_norm": 0.7422197954953109, - "learning_rate": 5.658936940062373e-06, - "loss": 0.1818, - "step": 12854 - }, - { - "epoch": 0.65, - "grad_norm": 0.9900409014516947, - "learning_rate": 5.657453308068763e-06, - "loss": 0.1645, - "step": 12855 - }, - { - "epoch": 0.65, - "grad_norm": 1.0872223642690009, - "learning_rate": 5.655969793866982e-06, - "loss": 0.1686, - "step": 12856 - }, - { - "epoch": 0.65, - "grad_norm": 1.1858898783551484, - "learning_rate": 5.654486397497262e-06, - "loss": 0.172, - "step": 12857 - }, - { - "epoch": 0.65, - "grad_norm": 1.69732530121704, - "learning_rate": 5.653003118999843e-06, - "loss": 0.1616, - "step": 12858 - }, - { - "epoch": 0.65, - "grad_norm": 1.0433921844175187, - "learning_rate": 5.651519958414961e-06, - "loss": 0.1705, - "step": 12859 - }, - { - "epoch": 0.65, - "grad_norm": 1.003929183034557, - "learning_rate": 5.650036915782849e-06, - "loss": 0.1941, - "step": 12860 - }, - { - "epoch": 0.65, - "grad_norm": 0.7799695862625812, - "learning_rate": 5.648553991143728e-06, - "loss": 0.1883, - "step": 12861 - }, - { - "epoch": 0.65, - "grad_norm": 0.9482306796546718, - "learning_rate": 5.647071184537829e-06, - "loss": 0.1592, - "step": 12862 - }, - { - "epoch": 0.65, - "grad_norm": 1.9391228858816858, - "learning_rate": 5.6455884960053655e-06, - "loss": 0.1697, - "step": 12863 - }, - { - "epoch": 0.65, - "grad_norm": 1.0291871921144702, - "learning_rate": 5.6441059255865645e-06, - "loss": 0.1647, - "step": 12864 - }, - { - "epoch": 0.65, - "grad_norm": 0.9809468989705948, - "learning_rate": 5.642623473321638e-06, - "loss": 0.179, - "step": 12865 - }, - { - "epoch": 0.65, - "grad_norm": 1.0666735024576603, - "learning_rate": 5.64114113925079e-06, - "loss": 0.1894, - "step": 12866 - }, - { - "epoch": 0.65, - "grad_norm": 0.9172747178146132, - "learning_rate": 5.639658923414235e-06, - "loss": 0.1778, - "step": 12867 - }, - { - "epoch": 0.65, - "grad_norm": 0.9556114312786075, - "learning_rate": 5.638176825852178e-06, - "loss": 0.1634, - "step": 12868 - }, - { - "epoch": 0.65, - "grad_norm": 0.8090140618805673, - "learning_rate": 5.636694846604825e-06, - "loss": 0.1591, - "step": 12869 - }, - { - "epoch": 0.65, - "grad_norm": 0.8509532820931227, - "learning_rate": 5.635212985712366e-06, - "loss": 0.1727, - "step": 12870 - }, - { - "epoch": 0.65, - "grad_norm": 6.221724279621428, - "learning_rate": 5.633731243215007e-06, - "loss": 0.1786, - "step": 12871 - }, - { - "epoch": 0.65, - "grad_norm": 0.9225094148693288, - "learning_rate": 5.63224961915293e-06, - "loss": 0.1659, - "step": 12872 - }, - { - "epoch": 0.65, - "grad_norm": 0.789810703620753, - "learning_rate": 5.6307681135663315e-06, - "loss": 0.1734, - "step": 12873 - }, - { - "epoch": 0.65, - "grad_norm": 0.8656698646274308, - "learning_rate": 5.629286726495393e-06, - "loss": 0.1606, - "step": 12874 - }, - { - "epoch": 0.65, - "grad_norm": 0.9659152400307496, - "learning_rate": 5.627805457980298e-06, - "loss": 0.1709, - "step": 12875 - }, - { - "epoch": 0.65, - "grad_norm": 1.0158763651419542, - "learning_rate": 5.626324308061226e-06, - "loss": 0.183, - "step": 12876 - }, - { - "epoch": 0.65, - "grad_norm": 0.9541796052338082, - "learning_rate": 5.624843276778358e-06, - "loss": 0.1869, - "step": 12877 - }, - { - "epoch": 0.65, - "grad_norm": 0.7038071148065027, - "learning_rate": 5.62336236417186e-06, - "loss": 0.1458, - "step": 12878 - }, - { - "epoch": 0.65, - "grad_norm": 0.8309038062333706, - "learning_rate": 5.621881570281909e-06, - "loss": 0.1536, - "step": 12879 - }, - { - "epoch": 0.65, - "grad_norm": 3.1898632042402766, - "learning_rate": 5.6204008951486636e-06, - "loss": 0.1907, - "step": 12880 - }, - { - "epoch": 0.66, - "grad_norm": 0.9013225322317349, - "learning_rate": 5.618920338812295e-06, - "loss": 0.1781, - "step": 12881 - }, - { - "epoch": 0.66, - "grad_norm": 0.8402968879388498, - "learning_rate": 5.61743990131296e-06, - "loss": 0.1772, - "step": 12882 - }, - { - "epoch": 0.66, - "grad_norm": 1.2585658510983027, - "learning_rate": 5.615959582690812e-06, - "loss": 0.2075, - "step": 12883 - }, - { - "epoch": 0.66, - "grad_norm": 1.0642373692264568, - "learning_rate": 5.614479382986007e-06, - "loss": 0.1954, - "step": 12884 - }, - { - "epoch": 0.66, - "grad_norm": 0.8121988348561141, - "learning_rate": 5.612999302238696e-06, - "loss": 0.1607, - "step": 12885 - }, - { - "epoch": 0.66, - "grad_norm": 0.9981702401051303, - "learning_rate": 5.611519340489031e-06, - "loss": 0.1698, - "step": 12886 - }, - { - "epoch": 0.66, - "grad_norm": 1.01944572613643, - "learning_rate": 5.610039497777149e-06, - "loss": 0.1731, - "step": 12887 - }, - { - "epoch": 0.66, - "grad_norm": 1.2615462260108041, - "learning_rate": 5.608559774143196e-06, - "loss": 0.1783, - "step": 12888 - }, - { - "epoch": 0.66, - "grad_norm": 1.2403582568494242, - "learning_rate": 5.607080169627304e-06, - "loss": 0.1756, - "step": 12889 - }, - { - "epoch": 0.66, - "grad_norm": 0.8311748159017751, - "learning_rate": 5.6056006842696145e-06, - "loss": 0.1722, - "step": 12890 - }, - { - "epoch": 0.66, - "grad_norm": 1.5292143098094482, - "learning_rate": 5.60412131811025e-06, - "loss": 0.1706, - "step": 12891 - }, - { - "epoch": 0.66, - "grad_norm": 1.1798060723173425, - "learning_rate": 5.6026420711893485e-06, - "loss": 0.2004, - "step": 12892 - }, - { - "epoch": 0.66, - "grad_norm": 1.9015841379843128, - "learning_rate": 5.601162943547023e-06, - "loss": 0.16, - "step": 12893 - }, - { - "epoch": 0.66, - "grad_norm": 1.1723385310959136, - "learning_rate": 5.599683935223402e-06, - "loss": 0.152, - "step": 12894 - }, - { - "epoch": 0.66, - "grad_norm": 1.1839672034771838, - "learning_rate": 5.598205046258603e-06, - "loss": 0.1623, - "step": 12895 - }, - { - "epoch": 0.66, - "grad_norm": 2.3569216140387987, - "learning_rate": 5.596726276692745e-06, - "loss": 0.1868, - "step": 12896 - }, - { - "epoch": 0.66, - "grad_norm": 0.9215860827965177, - "learning_rate": 5.5952476265659315e-06, - "loss": 0.1629, - "step": 12897 - }, - { - "epoch": 0.66, - "grad_norm": 1.2795995011922863, - "learning_rate": 5.593769095918278e-06, - "loss": 0.189, - "step": 12898 - }, - { - "epoch": 0.66, - "grad_norm": 1.0940029725833822, - "learning_rate": 5.592290684789887e-06, - "loss": 0.1557, - "step": 12899 - }, - { - "epoch": 0.66, - "grad_norm": 0.846845827744864, - "learning_rate": 5.5908123932208565e-06, - "loss": 0.1829, - "step": 12900 - }, - { - "epoch": 0.66, - "grad_norm": 1.0259373068826925, - "learning_rate": 5.589334221251289e-06, - "loss": 0.1777, - "step": 12901 - }, - { - "epoch": 0.66, - "grad_norm": 0.9493271016656901, - "learning_rate": 5.587856168921279e-06, - "loss": 0.1728, - "step": 12902 - }, - { - "epoch": 0.66, - "grad_norm": 1.137369310293687, - "learning_rate": 5.586378236270925e-06, - "loss": 0.1685, - "step": 12903 - }, - { - "epoch": 0.66, - "grad_norm": 0.8186890126068809, - "learning_rate": 5.584900423340306e-06, - "loss": 0.177, - "step": 12904 - }, - { - "epoch": 0.66, - "grad_norm": 4.218637687925129, - "learning_rate": 5.5834227301695166e-06, - "loss": 0.2081, - "step": 12905 - }, - { - "epoch": 0.66, - "grad_norm": 1.6991846261385979, - "learning_rate": 5.581945156798629e-06, - "loss": 0.1579, - "step": 12906 - }, - { - "epoch": 0.66, - "grad_norm": 1.0576293401743173, - "learning_rate": 5.580467703267736e-06, - "loss": 0.1876, - "step": 12907 - }, - { - "epoch": 0.66, - "grad_norm": 0.8104066484939919, - "learning_rate": 5.578990369616899e-06, - "loss": 0.1455, - "step": 12908 - }, - { - "epoch": 0.66, - "grad_norm": 0.8376631152931486, - "learning_rate": 5.577513155886204e-06, - "loss": 0.1837, - "step": 12909 - }, - { - "epoch": 0.66, - "grad_norm": 0.8523762348499792, - "learning_rate": 5.576036062115709e-06, - "loss": 0.1626, - "step": 12910 - }, - { - "epoch": 0.66, - "grad_norm": 0.9641752481361844, - "learning_rate": 5.574559088345487e-06, - "loss": 0.1724, - "step": 12911 - }, - { - "epoch": 0.66, - "grad_norm": 0.9164492673667283, - "learning_rate": 5.573082234615599e-06, - "loss": 0.1857, - "step": 12912 - }, - { - "epoch": 0.66, - "grad_norm": 2.019011994175837, - "learning_rate": 5.57160550096611e-06, - "loss": 0.1662, - "step": 12913 - }, - { - "epoch": 0.66, - "grad_norm": 0.9286318752863383, - "learning_rate": 5.570128887437067e-06, - "loss": 0.1789, - "step": 12914 - }, - { - "epoch": 0.66, - "grad_norm": 1.2982218911285703, - "learning_rate": 5.568652394068532e-06, - "loss": 0.1752, - "step": 12915 - }, - { - "epoch": 0.66, - "grad_norm": 1.1241713257522084, - "learning_rate": 5.567176020900549e-06, - "loss": 0.1662, - "step": 12916 - }, - { - "epoch": 0.66, - "grad_norm": 1.113905632611933, - "learning_rate": 5.565699767973169e-06, - "loss": 0.1792, - "step": 12917 - }, - { - "epoch": 0.66, - "grad_norm": 0.8693497758333975, - "learning_rate": 5.564223635326433e-06, - "loss": 0.1824, - "step": 12918 - }, - { - "epoch": 0.66, - "grad_norm": 1.2387170423762486, - "learning_rate": 5.562747623000379e-06, - "loss": 0.1758, - "step": 12919 - }, - { - "epoch": 0.66, - "grad_norm": 1.1359411489145923, - "learning_rate": 5.561271731035045e-06, - "loss": 0.2008, - "step": 12920 - }, - { - "epoch": 0.66, - "grad_norm": 0.9097943534585743, - "learning_rate": 5.559795959470467e-06, - "loss": 0.1707, - "step": 12921 - }, - { - "epoch": 0.66, - "grad_norm": 1.0803842029847395, - "learning_rate": 5.558320308346677e-06, - "loss": 0.1905, - "step": 12922 - }, - { - "epoch": 0.66, - "grad_norm": 1.2140417100881817, - "learning_rate": 5.556844777703697e-06, - "loss": 0.1703, - "step": 12923 - }, - { - "epoch": 0.66, - "grad_norm": 1.0870153760009866, - "learning_rate": 5.5553693675815565e-06, - "loss": 0.1776, - "step": 12924 - }, - { - "epoch": 0.66, - "grad_norm": 1.008402963959758, - "learning_rate": 5.55389407802027e-06, - "loss": 0.1832, - "step": 12925 - }, - { - "epoch": 0.66, - "grad_norm": 0.943314946152503, - "learning_rate": 5.55241890905986e-06, - "loss": 0.1818, - "step": 12926 - }, - { - "epoch": 0.66, - "grad_norm": 0.9804005130777163, - "learning_rate": 5.5509438607403355e-06, - "loss": 0.1663, - "step": 12927 - }, - { - "epoch": 0.66, - "grad_norm": 1.6517596718905303, - "learning_rate": 5.549468933101709e-06, - "loss": 0.1787, - "step": 12928 - }, - { - "epoch": 0.66, - "grad_norm": 1.2459658247423453, - "learning_rate": 5.547994126183991e-06, - "loss": 0.1752, - "step": 12929 - }, - { - "epoch": 0.66, - "grad_norm": 0.9106347033122705, - "learning_rate": 5.546519440027186e-06, - "loss": 0.1606, - "step": 12930 - }, - { - "epoch": 0.66, - "grad_norm": 1.641041700811944, - "learning_rate": 5.545044874671289e-06, - "loss": 0.1646, - "step": 12931 - }, - { - "epoch": 0.66, - "grad_norm": 0.9307740229504946, - "learning_rate": 5.543570430156307e-06, - "loss": 0.1647, - "step": 12932 - }, - { - "epoch": 0.66, - "grad_norm": 0.8617543708343491, - "learning_rate": 5.542096106522224e-06, - "loss": 0.1474, - "step": 12933 - }, - { - "epoch": 0.66, - "grad_norm": 0.7869774137012839, - "learning_rate": 5.540621903809038e-06, - "loss": 0.1718, - "step": 12934 - }, - { - "epoch": 0.66, - "grad_norm": 0.8044176966116101, - "learning_rate": 5.539147822056736e-06, - "loss": 0.165, - "step": 12935 - }, - { - "epoch": 0.66, - "grad_norm": 0.885086608903983, - "learning_rate": 5.537673861305297e-06, - "loss": 0.1713, - "step": 12936 - }, - { - "epoch": 0.66, - "grad_norm": 0.9198934470644922, - "learning_rate": 5.536200021594707e-06, - "loss": 0.1562, - "step": 12937 - }, - { - "epoch": 0.66, - "grad_norm": 1.415345934333553, - "learning_rate": 5.534726302964944e-06, - "loss": 0.1745, - "step": 12938 - }, - { - "epoch": 0.66, - "grad_norm": 1.1173915430681507, - "learning_rate": 5.533252705455985e-06, - "loss": 0.1775, - "step": 12939 - }, - { - "epoch": 0.66, - "grad_norm": 1.3559267057784843, - "learning_rate": 5.531779229107797e-06, - "loss": 0.1655, - "step": 12940 - }, - { - "epoch": 0.66, - "grad_norm": 0.8986687901180408, - "learning_rate": 5.530305873960351e-06, - "loss": 0.1875, - "step": 12941 - }, - { - "epoch": 0.66, - "grad_norm": 0.9091720492697619, - "learning_rate": 5.528832640053607e-06, - "loss": 0.1675, - "step": 12942 - }, - { - "epoch": 0.66, - "grad_norm": 1.0879286275960187, - "learning_rate": 5.527359527427536e-06, - "loss": 0.1863, - "step": 12943 - }, - { - "epoch": 0.66, - "grad_norm": 1.1110812546096838, - "learning_rate": 5.525886536122085e-06, - "loss": 0.1794, - "step": 12944 - }, - { - "epoch": 0.66, - "grad_norm": 1.0303150589230612, - "learning_rate": 5.524413666177216e-06, - "loss": 0.1943, - "step": 12945 - }, - { - "epoch": 0.66, - "grad_norm": 0.8707871663246685, - "learning_rate": 5.522940917632878e-06, - "loss": 0.1703, - "step": 12946 - }, - { - "epoch": 0.66, - "grad_norm": 0.8765865925819567, - "learning_rate": 5.521468290529023e-06, - "loss": 0.1785, - "step": 12947 - }, - { - "epoch": 0.66, - "grad_norm": 0.8522416033424861, - "learning_rate": 5.5199957849055905e-06, - "loss": 0.1682, - "step": 12948 - }, - { - "epoch": 0.66, - "grad_norm": 1.2651317087868765, - "learning_rate": 5.51852340080253e-06, - "loss": 0.1779, - "step": 12949 - }, - { - "epoch": 0.66, - "grad_norm": 1.036138580945707, - "learning_rate": 5.517051138259771e-06, - "loss": 0.1651, - "step": 12950 - }, - { - "epoch": 0.66, - "grad_norm": 1.5286022891139401, - "learning_rate": 5.515578997317257e-06, - "loss": 0.1616, - "step": 12951 - }, - { - "epoch": 0.66, - "grad_norm": 0.9511716474931488, - "learning_rate": 5.514106978014917e-06, - "loss": 0.1965, - "step": 12952 - }, - { - "epoch": 0.66, - "grad_norm": 0.8597682366979487, - "learning_rate": 5.512635080392673e-06, - "loss": 0.1596, - "step": 12953 - }, - { - "epoch": 0.66, - "grad_norm": 1.0870180016132283, - "learning_rate": 5.511163304490456e-06, - "loss": 0.1469, - "step": 12954 - }, - { - "epoch": 0.66, - "grad_norm": 1.0480633504288328, - "learning_rate": 5.50969165034819e-06, - "loss": 0.1846, - "step": 12955 - }, - { - "epoch": 0.66, - "grad_norm": 3.5381871452442013, - "learning_rate": 5.508220118005794e-06, - "loss": 0.1666, - "step": 12956 - }, - { - "epoch": 0.66, - "grad_norm": 0.8148043083133922, - "learning_rate": 5.5067487075031764e-06, - "loss": 0.1735, - "step": 12957 - }, - { - "epoch": 0.66, - "grad_norm": 1.0187217086868456, - "learning_rate": 5.505277418880259e-06, - "loss": 0.1874, - "step": 12958 - }, - { - "epoch": 0.66, - "grad_norm": 1.4282204424031621, - "learning_rate": 5.503806252176941e-06, - "loss": 0.169, - "step": 12959 - }, - { - "epoch": 0.66, - "grad_norm": 0.9937963162216736, - "learning_rate": 5.502335207433136e-06, - "loss": 0.1993, - "step": 12960 - }, - { - "epoch": 0.66, - "grad_norm": 0.930548165625113, - "learning_rate": 5.500864284688739e-06, - "loss": 0.1654, - "step": 12961 - }, - { - "epoch": 0.66, - "grad_norm": 0.8048369428436289, - "learning_rate": 5.499393483983657e-06, - "loss": 0.1646, - "step": 12962 - }, - { - "epoch": 0.66, - "grad_norm": 1.2232545060032005, - "learning_rate": 5.497922805357776e-06, - "loss": 0.1819, - "step": 12963 - }, - { - "epoch": 0.66, - "grad_norm": 0.8363692909755749, - "learning_rate": 5.496452248850994e-06, - "loss": 0.153, - "step": 12964 - }, - { - "epoch": 0.66, - "grad_norm": 0.8803241764374603, - "learning_rate": 5.494981814503199e-06, - "loss": 0.1801, - "step": 12965 - }, - { - "epoch": 0.66, - "grad_norm": 1.189162147818141, - "learning_rate": 5.49351150235428e-06, - "loss": 0.181, - "step": 12966 - }, - { - "epoch": 0.66, - "grad_norm": 2.0669972255809883, - "learning_rate": 5.492041312444112e-06, - "loss": 0.1623, - "step": 12967 - }, - { - "epoch": 0.66, - "grad_norm": 1.7783963693138267, - "learning_rate": 5.490571244812582e-06, - "loss": 0.1841, - "step": 12968 - }, - { - "epoch": 0.66, - "grad_norm": 1.0361244849682094, - "learning_rate": 5.489101299499562e-06, - "loss": 0.1925, - "step": 12969 - }, - { - "epoch": 0.66, - "grad_norm": 0.9770182854110955, - "learning_rate": 5.487631476544921e-06, - "loss": 0.15, - "step": 12970 - }, - { - "epoch": 0.66, - "grad_norm": 1.600128640473354, - "learning_rate": 5.48616177598853e-06, - "loss": 0.1938, - "step": 12971 - }, - { - "epoch": 0.66, - "grad_norm": 1.070133379570775, - "learning_rate": 5.484692197870256e-06, - "loss": 0.193, - "step": 12972 - }, - { - "epoch": 0.66, - "grad_norm": 1.1204297316426883, - "learning_rate": 5.483222742229964e-06, - "loss": 0.1566, - "step": 12973 - }, - { - "epoch": 0.66, - "grad_norm": 3.001429122190217, - "learning_rate": 5.4817534091075084e-06, - "loss": 0.1688, - "step": 12974 - }, - { - "epoch": 0.66, - "grad_norm": 1.0830367798031313, - "learning_rate": 5.480284198542749e-06, - "loss": 0.1836, - "step": 12975 - }, - { - "epoch": 0.66, - "grad_norm": 1.0061740479549004, - "learning_rate": 5.4788151105755326e-06, - "loss": 0.1923, - "step": 12976 - }, - { - "epoch": 0.66, - "grad_norm": 1.008732350438919, - "learning_rate": 5.477346145245717e-06, - "loss": 0.1805, - "step": 12977 - }, - { - "epoch": 0.66, - "grad_norm": 1.1567289166163461, - "learning_rate": 5.475877302593135e-06, - "loss": 0.1672, - "step": 12978 - }, - { - "epoch": 0.66, - "grad_norm": 0.9044833916082834, - "learning_rate": 5.4744085826576445e-06, - "loss": 0.1732, - "step": 12979 - }, - { - "epoch": 0.66, - "grad_norm": 0.7896497231676506, - "learning_rate": 5.472939985479071e-06, - "loss": 0.1773, - "step": 12980 - }, - { - "epoch": 0.66, - "grad_norm": 1.1109312907063378, - "learning_rate": 5.471471511097257e-06, - "loss": 0.1797, - "step": 12981 - }, - { - "epoch": 0.66, - "grad_norm": 0.9273909805501587, - "learning_rate": 5.470003159552033e-06, - "loss": 0.158, - "step": 12982 - }, - { - "epoch": 0.66, - "grad_norm": 0.9094058333615452, - "learning_rate": 5.468534930883234e-06, - "loss": 0.1557, - "step": 12983 - }, - { - "epoch": 0.66, - "grad_norm": 1.0902517874089719, - "learning_rate": 5.467066825130676e-06, - "loss": 0.1632, - "step": 12984 - }, - { - "epoch": 0.66, - "grad_norm": 1.0784363867926698, - "learning_rate": 5.465598842334192e-06, - "loss": 0.1722, - "step": 12985 - }, - { - "epoch": 0.66, - "grad_norm": 1.0900263158696295, - "learning_rate": 5.46413098253359e-06, - "loss": 0.168, - "step": 12986 - }, - { - "epoch": 0.66, - "grad_norm": 1.8354048699959953, - "learning_rate": 5.462663245768696e-06, - "loss": 0.1683, - "step": 12987 - }, - { - "epoch": 0.66, - "grad_norm": 0.8168123012717525, - "learning_rate": 5.461195632079317e-06, - "loss": 0.1612, - "step": 12988 - }, - { - "epoch": 0.66, - "grad_norm": 1.2364602564754394, - "learning_rate": 5.459728141505259e-06, - "loss": 0.1733, - "step": 12989 - }, - { - "epoch": 0.66, - "grad_norm": 1.041956726976596, - "learning_rate": 5.458260774086332e-06, - "loss": 0.1809, - "step": 12990 - }, - { - "epoch": 0.66, - "grad_norm": 0.8923402773137696, - "learning_rate": 5.4567935298623385e-06, - "loss": 0.188, - "step": 12991 - }, - { - "epoch": 0.66, - "grad_norm": 0.9256412258996756, - "learning_rate": 5.45532640887308e-06, - "loss": 0.203, - "step": 12992 - }, - { - "epoch": 0.66, - "grad_norm": 0.9424634298516746, - "learning_rate": 5.453859411158347e-06, - "loss": 0.1721, - "step": 12993 - }, - { - "epoch": 0.66, - "grad_norm": 3.748854710324556, - "learning_rate": 5.452392536757936e-06, - "loss": 0.1591, - "step": 12994 - }, - { - "epoch": 0.66, - "grad_norm": 1.107341764985314, - "learning_rate": 5.450925785711632e-06, - "loss": 0.1865, - "step": 12995 - }, - { - "epoch": 0.66, - "grad_norm": 1.000456094721113, - "learning_rate": 5.449459158059226e-06, - "loss": 0.1719, - "step": 12996 - }, - { - "epoch": 0.66, - "grad_norm": 0.8533555961879594, - "learning_rate": 5.447992653840494e-06, - "loss": 0.1715, - "step": 12997 - }, - { - "epoch": 0.66, - "grad_norm": 1.0126316604351642, - "learning_rate": 5.4465262730952186e-06, - "loss": 0.1931, - "step": 12998 - }, - { - "epoch": 0.66, - "grad_norm": 0.8904401314425945, - "learning_rate": 5.445060015863175e-06, - "loss": 0.1852, - "step": 12999 - }, - { - "epoch": 0.66, - "grad_norm": 2.886172356588746, - "learning_rate": 5.443593882184139e-06, - "loss": 0.1705, - "step": 13000 - }, - { - "epoch": 0.66, - "grad_norm": 0.9153670957628168, - "learning_rate": 5.442127872097873e-06, - "loss": 0.1782, - "step": 13001 - }, - { - "epoch": 0.66, - "grad_norm": 1.2129372270740568, - "learning_rate": 5.440661985644149e-06, - "loss": 0.165, - "step": 13002 - }, - { - "epoch": 0.66, - "grad_norm": 0.8697255131527631, - "learning_rate": 5.439196222862724e-06, - "loss": 0.1648, - "step": 13003 - }, - { - "epoch": 0.66, - "grad_norm": 1.1582811645269684, - "learning_rate": 5.437730583793362e-06, - "loss": 0.1658, - "step": 13004 - }, - { - "epoch": 0.66, - "grad_norm": 1.3861422946046051, - "learning_rate": 5.436265068475815e-06, - "loss": 0.1846, - "step": 13005 - }, - { - "epoch": 0.66, - "grad_norm": 0.7321198610302062, - "learning_rate": 5.4347996769498315e-06, - "loss": 0.1696, - "step": 13006 - }, - { - "epoch": 0.66, - "grad_norm": 0.9401040464943988, - "learning_rate": 5.433334409255165e-06, - "loss": 0.1754, - "step": 13007 - }, - { - "epoch": 0.66, - "grad_norm": 2.078890431543232, - "learning_rate": 5.431869265431562e-06, - "loss": 0.1951, - "step": 13008 - }, - { - "epoch": 0.66, - "grad_norm": 1.0439385899204332, - "learning_rate": 5.430404245518766e-06, - "loss": 0.1818, - "step": 13009 - }, - { - "epoch": 0.66, - "grad_norm": 0.9609589053298132, - "learning_rate": 5.4289393495565076e-06, - "loss": 0.1804, - "step": 13010 - }, - { - "epoch": 0.66, - "grad_norm": 1.1166850002485198, - "learning_rate": 5.427474577584534e-06, - "loss": 0.1579, - "step": 13011 - }, - { - "epoch": 0.66, - "grad_norm": 0.9587875712844113, - "learning_rate": 5.426009929642566e-06, - "loss": 0.1869, - "step": 13012 - }, - { - "epoch": 0.66, - "grad_norm": 1.0499277335217805, - "learning_rate": 5.424545405770341e-06, - "loss": 0.1932, - "step": 13013 - }, - { - "epoch": 0.66, - "grad_norm": 0.9020401891335598, - "learning_rate": 5.423081006007576e-06, - "loss": 0.1855, - "step": 13014 - }, - { - "epoch": 0.66, - "grad_norm": 1.0372741435503523, - "learning_rate": 5.421616730394e-06, - "loss": 0.1919, - "step": 13015 - }, - { - "epoch": 0.66, - "grad_norm": 1.9728797046954707, - "learning_rate": 5.420152578969327e-06, - "loss": 0.1659, - "step": 13016 - }, - { - "epoch": 0.66, - "grad_norm": 1.0922675199537346, - "learning_rate": 5.4186885517732724e-06, - "loss": 0.1897, - "step": 13017 - }, - { - "epoch": 0.66, - "grad_norm": 0.9800919632695411, - "learning_rate": 5.417224648845551e-06, - "loss": 0.2087, - "step": 13018 - }, - { - "epoch": 0.66, - "grad_norm": 0.946962467462363, - "learning_rate": 5.415760870225873e-06, - "loss": 0.1881, - "step": 13019 - }, - { - "epoch": 0.66, - "grad_norm": 2.648603475752819, - "learning_rate": 5.414297215953937e-06, - "loss": 0.1628, - "step": 13020 - }, - { - "epoch": 0.66, - "grad_norm": 1.042906718375941, - "learning_rate": 5.41283368606945e-06, - "loss": 0.1598, - "step": 13021 - }, - { - "epoch": 0.66, - "grad_norm": 1.1437852574832168, - "learning_rate": 5.411370280612109e-06, - "loss": 0.1707, - "step": 13022 - }, - { - "epoch": 0.66, - "grad_norm": 1.109094609556804, - "learning_rate": 5.4099069996216055e-06, - "loss": 0.1531, - "step": 13023 - }, - { - "epoch": 0.66, - "grad_norm": 1.1181170488361203, - "learning_rate": 5.408443843137634e-06, - "loss": 0.1735, - "step": 13024 - }, - { - "epoch": 0.66, - "grad_norm": 1.0072555196252104, - "learning_rate": 5.406980811199881e-06, - "loss": 0.1584, - "step": 13025 - }, - { - "epoch": 0.66, - "grad_norm": 1.9447327044724418, - "learning_rate": 5.405517903848039e-06, - "loss": 0.1512, - "step": 13026 - }, - { - "epoch": 0.66, - "grad_norm": 1.0406129068479135, - "learning_rate": 5.404055121121778e-06, - "loss": 0.1772, - "step": 13027 - }, - { - "epoch": 0.66, - "grad_norm": 0.9813397660614563, - "learning_rate": 5.402592463060785e-06, - "loss": 0.1917, - "step": 13028 - }, - { - "epoch": 0.66, - "grad_norm": 0.9043884637346777, - "learning_rate": 5.401129929704727e-06, - "loss": 0.1743, - "step": 13029 - }, - { - "epoch": 0.66, - "grad_norm": 3.7330322694414013, - "learning_rate": 5.399667521093285e-06, - "loss": 0.1953, - "step": 13030 - }, - { - "epoch": 0.66, - "grad_norm": 1.2142528716335321, - "learning_rate": 5.398205237266116e-06, - "loss": 0.1697, - "step": 13031 - }, - { - "epoch": 0.66, - "grad_norm": 0.9287096481877403, - "learning_rate": 5.396743078262895e-06, - "loss": 0.1736, - "step": 13032 - }, - { - "epoch": 0.66, - "grad_norm": 1.2355905776550489, - "learning_rate": 5.395281044123273e-06, - "loss": 0.1645, - "step": 13033 - }, - { - "epoch": 0.66, - "grad_norm": 0.8630831827981413, - "learning_rate": 5.393819134886913e-06, - "loss": 0.1682, - "step": 13034 - }, - { - "epoch": 0.66, - "grad_norm": 2.0966432650161853, - "learning_rate": 5.392357350593469e-06, - "loss": 0.1745, - "step": 13035 - }, - { - "epoch": 0.66, - "grad_norm": 0.8858228134039371, - "learning_rate": 5.390895691282596e-06, - "loss": 0.1785, - "step": 13036 - }, - { - "epoch": 0.66, - "grad_norm": 0.9077285942791732, - "learning_rate": 5.389434156993935e-06, - "loss": 0.169, - "step": 13037 - }, - { - "epoch": 0.66, - "grad_norm": 0.9508245618541968, - "learning_rate": 5.387972747767136e-06, - "loss": 0.1893, - "step": 13038 - }, - { - "epoch": 0.66, - "grad_norm": 1.127200876715937, - "learning_rate": 5.386511463641836e-06, - "loss": 0.1729, - "step": 13039 - }, - { - "epoch": 0.66, - "grad_norm": 1.5310594407487175, - "learning_rate": 5.38505030465767e-06, - "loss": 0.1898, - "step": 13040 - }, - { - "epoch": 0.66, - "grad_norm": 0.938278714469896, - "learning_rate": 5.383589270854279e-06, - "loss": 0.1894, - "step": 13041 - }, - { - "epoch": 0.66, - "grad_norm": 1.0370258145919886, - "learning_rate": 5.382128362271285e-06, - "loss": 0.1794, - "step": 13042 - }, - { - "epoch": 0.66, - "grad_norm": 2.064224882974358, - "learning_rate": 5.380667578948321e-06, - "loss": 0.1656, - "step": 13043 - }, - { - "epoch": 0.66, - "grad_norm": 0.9352410513608885, - "learning_rate": 5.379206920925009e-06, - "loss": 0.1976, - "step": 13044 - }, - { - "epoch": 0.66, - "grad_norm": 0.7886476725295988, - "learning_rate": 5.3777463882409744e-06, - "loss": 0.1864, - "step": 13045 - }, - { - "epoch": 0.66, - "grad_norm": 1.079848051901478, - "learning_rate": 5.376285980935827e-06, - "loss": 0.1585, - "step": 13046 - }, - { - "epoch": 0.66, - "grad_norm": 0.9483693289000759, - "learning_rate": 5.374825699049186e-06, - "loss": 0.1597, - "step": 13047 - }, - { - "epoch": 0.66, - "grad_norm": 1.109683010946804, - "learning_rate": 5.3733655426206564e-06, - "loss": 0.1881, - "step": 13048 - }, - { - "epoch": 0.66, - "grad_norm": 0.9538865416720401, - "learning_rate": 5.371905511689852e-06, - "loss": 0.1738, - "step": 13049 - }, - { - "epoch": 0.66, - "grad_norm": 0.8736303533856425, - "learning_rate": 5.3704456062963674e-06, - "loss": 0.1677, - "step": 13050 - }, - { - "epoch": 0.66, - "grad_norm": 0.9141910495008178, - "learning_rate": 5.368985826479807e-06, - "loss": 0.1821, - "step": 13051 - }, - { - "epoch": 0.66, - "grad_norm": 0.9491642343236588, - "learning_rate": 5.367526172279768e-06, - "loss": 0.1619, - "step": 13052 - }, - { - "epoch": 0.66, - "grad_norm": 1.1240984076090763, - "learning_rate": 5.366066643735847e-06, - "loss": 0.1568, - "step": 13053 - }, - { - "epoch": 0.66, - "grad_norm": 1.389968272416857, - "learning_rate": 5.364607240887626e-06, - "loss": 0.1866, - "step": 13054 - }, - { - "epoch": 0.66, - "grad_norm": 3.3602336034440015, - "learning_rate": 5.3631479637747e-06, - "loss": 0.1631, - "step": 13055 - }, - { - "epoch": 0.66, - "grad_norm": 2.0299834130513226, - "learning_rate": 5.361688812436642e-06, - "loss": 0.1609, - "step": 13056 - }, - { - "epoch": 0.66, - "grad_norm": 1.2674930173145462, - "learning_rate": 5.360229786913042e-06, - "loss": 0.1657, - "step": 13057 - }, - { - "epoch": 0.66, - "grad_norm": 1.13060064340207, - "learning_rate": 5.3587708872434705e-06, - "loss": 0.1746, - "step": 13058 - }, - { - "epoch": 0.66, - "grad_norm": 0.8379752729010114, - "learning_rate": 5.357312113467497e-06, - "loss": 0.1534, - "step": 13059 - }, - { - "epoch": 0.66, - "grad_norm": 0.9557520113683214, - "learning_rate": 5.355853465624695e-06, - "loss": 0.1711, - "step": 13060 - }, - { - "epoch": 0.66, - "grad_norm": 0.9239994296030585, - "learning_rate": 5.354394943754631e-06, - "loss": 0.1737, - "step": 13061 - }, - { - "epoch": 0.66, - "grad_norm": 1.321773416829171, - "learning_rate": 5.352936547896868e-06, - "loss": 0.1484, - "step": 13062 - }, - { - "epoch": 0.66, - "grad_norm": 0.9572197883433509, - "learning_rate": 5.351478278090962e-06, - "loss": 0.1839, - "step": 13063 - }, - { - "epoch": 0.66, - "grad_norm": 1.5778148636154194, - "learning_rate": 5.350020134376472e-06, - "loss": 0.1724, - "step": 13064 - }, - { - "epoch": 0.66, - "grad_norm": 0.9079286587968833, - "learning_rate": 5.348562116792946e-06, - "loss": 0.1738, - "step": 13065 - }, - { - "epoch": 0.66, - "grad_norm": 0.9419826033561322, - "learning_rate": 5.34710422537994e-06, - "loss": 0.1763, - "step": 13066 - }, - { - "epoch": 0.66, - "grad_norm": 1.116714619089233, - "learning_rate": 5.345646460176989e-06, - "loss": 0.1757, - "step": 13067 - }, - { - "epoch": 0.66, - "grad_norm": 1.1682603017157274, - "learning_rate": 5.344188821223642e-06, - "loss": 0.1545, - "step": 13068 - }, - { - "epoch": 0.66, - "grad_norm": 0.9448118760115939, - "learning_rate": 5.342731308559435e-06, - "loss": 0.163, - "step": 13069 - }, - { - "epoch": 0.66, - "grad_norm": 1.2298396046807265, - "learning_rate": 5.341273922223908e-06, - "loss": 0.1825, - "step": 13070 - }, - { - "epoch": 0.66, - "grad_norm": 1.1640638526000524, - "learning_rate": 5.339816662256587e-06, - "loss": 0.1556, - "step": 13071 - }, - { - "epoch": 0.66, - "grad_norm": 1.1082203563147006, - "learning_rate": 5.338359528697005e-06, - "loss": 0.1632, - "step": 13072 - }, - { - "epoch": 0.66, - "grad_norm": 1.2232992504838813, - "learning_rate": 5.3369025215846796e-06, - "loss": 0.1789, - "step": 13073 - }, - { - "epoch": 0.66, - "grad_norm": 0.9051337295054136, - "learning_rate": 5.3354456409591405e-06, - "loss": 0.1649, - "step": 13074 - }, - { - "epoch": 0.66, - "grad_norm": 0.9972396031450846, - "learning_rate": 5.333988886859903e-06, - "loss": 0.1574, - "step": 13075 - }, - { - "epoch": 0.66, - "grad_norm": 0.7576046547142246, - "learning_rate": 5.332532259326476e-06, - "loss": 0.1912, - "step": 13076 - }, - { - "epoch": 0.66, - "grad_norm": 0.9144689190373975, - "learning_rate": 5.331075758398375e-06, - "loss": 0.1796, - "step": 13077 - }, - { - "epoch": 0.67, - "grad_norm": 1.7592614649841753, - "learning_rate": 5.329619384115108e-06, - "loss": 0.1616, - "step": 13078 - }, - { - "epoch": 0.67, - "grad_norm": 1.0801539664317343, - "learning_rate": 5.328163136516184e-06, - "loss": 0.1411, - "step": 13079 - }, - { - "epoch": 0.67, - "grad_norm": 1.0975411316744548, - "learning_rate": 5.326707015641093e-06, - "loss": 0.1754, - "step": 13080 - }, - { - "epoch": 0.67, - "grad_norm": 2.636575145510722, - "learning_rate": 5.325251021529343e-06, - "loss": 0.1778, - "step": 13081 - }, - { - "epoch": 0.67, - "grad_norm": 1.4382356623470685, - "learning_rate": 5.323795154220419e-06, - "loss": 0.1736, - "step": 13082 - }, - { - "epoch": 0.67, - "grad_norm": 1.6580238322303211, - "learning_rate": 5.322339413753819e-06, - "loss": 0.1732, - "step": 13083 - }, - { - "epoch": 0.67, - "grad_norm": 1.0080504503860495, - "learning_rate": 5.3208838001690236e-06, - "loss": 0.1907, - "step": 13084 - }, - { - "epoch": 0.67, - "grad_norm": 0.7317639621699024, - "learning_rate": 5.319428313505523e-06, - "loss": 0.1658, - "step": 13085 - }, - { - "epoch": 0.67, - "grad_norm": 1.4234701788945343, - "learning_rate": 5.317972953802789e-06, - "loss": 0.205, - "step": 13086 - }, - { - "epoch": 0.67, - "grad_norm": 0.8881240899721385, - "learning_rate": 5.316517721100304e-06, - "loss": 0.1756, - "step": 13087 - }, - { - "epoch": 0.67, - "grad_norm": 1.0114148632119997, - "learning_rate": 5.31506261543754e-06, - "loss": 0.1618, - "step": 13088 - }, - { - "epoch": 0.67, - "grad_norm": 1.2493343913754495, - "learning_rate": 5.3136076368539706e-06, - "loss": 0.1857, - "step": 13089 - }, - { - "epoch": 0.67, - "grad_norm": 1.011576010409811, - "learning_rate": 5.312152785389056e-06, - "loss": 0.1718, - "step": 13090 - }, - { - "epoch": 0.67, - "grad_norm": 1.1400992672593633, - "learning_rate": 5.310698061082264e-06, - "loss": 0.187, - "step": 13091 - }, - { - "epoch": 0.67, - "grad_norm": 0.8391929239457808, - "learning_rate": 5.309243463973054e-06, - "loss": 0.1662, - "step": 13092 - }, - { - "epoch": 0.67, - "grad_norm": 1.0521217755250742, - "learning_rate": 5.307788994100876e-06, - "loss": 0.189, - "step": 13093 - }, - { - "epoch": 0.67, - "grad_norm": 0.8679850090640902, - "learning_rate": 5.306334651505185e-06, - "loss": 0.1759, - "step": 13094 - }, - { - "epoch": 0.67, - "grad_norm": 1.5800351297499289, - "learning_rate": 5.304880436225432e-06, - "loss": 0.2178, - "step": 13095 - }, - { - "epoch": 0.67, - "grad_norm": 1.1295677367762624, - "learning_rate": 5.303426348301066e-06, - "loss": 0.157, - "step": 13096 - }, - { - "epoch": 0.67, - "grad_norm": 0.9239596443120076, - "learning_rate": 5.3019723877715235e-06, - "loss": 0.1811, - "step": 13097 - }, - { - "epoch": 0.67, - "grad_norm": 0.9849840091548583, - "learning_rate": 5.300518554676247e-06, - "loss": 0.1619, - "step": 13098 - }, - { - "epoch": 0.67, - "grad_norm": 0.9109699828134366, - "learning_rate": 5.299064849054667e-06, - "loss": 0.1476, - "step": 13099 - }, - { - "epoch": 0.67, - "grad_norm": 1.3790099251749453, - "learning_rate": 5.297611270946223e-06, - "loss": 0.1761, - "step": 13100 - }, - { - "epoch": 0.67, - "grad_norm": 1.226670570464689, - "learning_rate": 5.296157820390335e-06, - "loss": 0.1539, - "step": 13101 - }, - { - "epoch": 0.67, - "grad_norm": 0.7468050795649048, - "learning_rate": 5.294704497426435e-06, - "loss": 0.1555, - "step": 13102 - }, - { - "epoch": 0.67, - "grad_norm": 0.9152207253205684, - "learning_rate": 5.293251302093938e-06, - "loss": 0.1565, - "step": 13103 - }, - { - "epoch": 0.67, - "grad_norm": 1.1090519116839588, - "learning_rate": 5.291798234432264e-06, - "loss": 0.1788, - "step": 13104 - }, - { - "epoch": 0.67, - "grad_norm": 1.0360390926658547, - "learning_rate": 5.2903452944808294e-06, - "loss": 0.2028, - "step": 13105 - }, - { - "epoch": 0.67, - "grad_norm": 0.7909596590664957, - "learning_rate": 5.28889248227905e-06, - "loss": 0.1699, - "step": 13106 - }, - { - "epoch": 0.67, - "grad_norm": 1.618341463656754, - "learning_rate": 5.287439797866323e-06, - "loss": 0.1906, - "step": 13107 - }, - { - "epoch": 0.67, - "grad_norm": 0.9067444923206541, - "learning_rate": 5.2859872412820625e-06, - "loss": 0.1802, - "step": 13108 - }, - { - "epoch": 0.67, - "grad_norm": 1.4808531136665637, - "learning_rate": 5.284534812565663e-06, - "loss": 0.1836, - "step": 13109 - }, - { - "epoch": 0.67, - "grad_norm": 0.8096508917230821, - "learning_rate": 5.283082511756519e-06, - "loss": 0.173, - "step": 13110 - }, - { - "epoch": 0.67, - "grad_norm": 0.9257012351305746, - "learning_rate": 5.281630338894032e-06, - "loss": 0.1756, - "step": 13111 - }, - { - "epoch": 0.67, - "grad_norm": 1.064130585477865, - "learning_rate": 5.280178294017586e-06, - "loss": 0.1563, - "step": 13112 - }, - { - "epoch": 0.67, - "grad_norm": 1.3097633680081981, - "learning_rate": 5.27872637716657e-06, - "loss": 0.1749, - "step": 13113 - }, - { - "epoch": 0.67, - "grad_norm": 1.4768187093469949, - "learning_rate": 5.277274588380368e-06, - "loss": 0.1736, - "step": 13114 - }, - { - "epoch": 0.67, - "grad_norm": 0.9706272896638604, - "learning_rate": 5.275822927698362e-06, - "loss": 0.2017, - "step": 13115 - }, - { - "epoch": 0.67, - "grad_norm": 1.6035603029810908, - "learning_rate": 5.274371395159923e-06, - "loss": 0.1456, - "step": 13116 - }, - { - "epoch": 0.67, - "grad_norm": 1.7868879807142999, - "learning_rate": 5.27291999080443e-06, - "loss": 0.172, - "step": 13117 - }, - { - "epoch": 0.67, - "grad_norm": 1.16407422123123, - "learning_rate": 5.271468714671247e-06, - "loss": 0.1899, - "step": 13118 - }, - { - "epoch": 0.67, - "grad_norm": 1.121016751150056, - "learning_rate": 5.2700175667997456e-06, - "loss": 0.1558, - "step": 13119 - }, - { - "epoch": 0.67, - "grad_norm": 1.0911689136172678, - "learning_rate": 5.2685665472292805e-06, - "loss": 0.1773, - "step": 13120 - }, - { - "epoch": 0.67, - "grad_norm": 0.7623012469449829, - "learning_rate": 5.267115655999214e-06, - "loss": 0.1694, - "step": 13121 - }, - { - "epoch": 0.67, - "grad_norm": 1.2615801018605743, - "learning_rate": 5.265664893148904e-06, - "loss": 0.1792, - "step": 13122 - }, - { - "epoch": 0.67, - "grad_norm": 1.2063061781367201, - "learning_rate": 5.264214258717705e-06, - "loss": 0.1627, - "step": 13123 - }, - { - "epoch": 0.67, - "grad_norm": 1.36952437005172, - "learning_rate": 5.2627637527449575e-06, - "loss": 0.1762, - "step": 13124 - }, - { - "epoch": 0.67, - "grad_norm": 0.9686427549608618, - "learning_rate": 5.2613133752700145e-06, - "loss": 0.1918, - "step": 13125 - }, - { - "epoch": 0.67, - "grad_norm": 1.341735433036232, - "learning_rate": 5.2598631263322145e-06, - "loss": 0.1809, - "step": 13126 - }, - { - "epoch": 0.67, - "grad_norm": 1.3729701286309794, - "learning_rate": 5.25841300597089e-06, - "loss": 0.1642, - "step": 13127 - }, - { - "epoch": 0.67, - "grad_norm": 0.9975682349986622, - "learning_rate": 5.256963014225385e-06, - "loss": 0.1864, - "step": 13128 - }, - { - "epoch": 0.67, - "grad_norm": 1.4672529989764542, - "learning_rate": 5.255513151135022e-06, - "loss": 0.2066, - "step": 13129 - }, - { - "epoch": 0.67, - "grad_norm": 0.7928599134134791, - "learning_rate": 5.2540634167391325e-06, - "loss": 0.195, - "step": 13130 - }, - { - "epoch": 0.67, - "grad_norm": 1.0659403602991226, - "learning_rate": 5.252613811077042e-06, - "loss": 0.1718, - "step": 13131 - }, - { - "epoch": 0.67, - "grad_norm": 1.20100561882506, - "learning_rate": 5.251164334188073e-06, - "loss": 0.1737, - "step": 13132 - }, - { - "epoch": 0.67, - "grad_norm": 1.405450961930424, - "learning_rate": 5.249714986111536e-06, - "loss": 0.1619, - "step": 13133 - }, - { - "epoch": 0.67, - "grad_norm": 1.3184257732901865, - "learning_rate": 5.248265766886752e-06, - "loss": 0.1524, - "step": 13134 - }, - { - "epoch": 0.67, - "grad_norm": 1.032123083377175, - "learning_rate": 5.246816676553024e-06, - "loss": 0.1774, - "step": 13135 - }, - { - "epoch": 0.67, - "grad_norm": 2.3472235154009833, - "learning_rate": 5.245367715149665e-06, - "loss": 0.1861, - "step": 13136 - }, - { - "epoch": 0.67, - "grad_norm": 4.587312343510043, - "learning_rate": 5.243918882715973e-06, - "loss": 0.1769, - "step": 13137 - }, - { - "epoch": 0.67, - "grad_norm": 1.1996450448267364, - "learning_rate": 5.242470179291253e-06, - "loss": 0.1543, - "step": 13138 - }, - { - "epoch": 0.67, - "grad_norm": 0.9134390388709754, - "learning_rate": 5.241021604914793e-06, - "loss": 0.178, - "step": 13139 - }, - { - "epoch": 0.67, - "grad_norm": 0.897598360509561, - "learning_rate": 5.2395731596258925e-06, - "loss": 0.1726, - "step": 13140 - }, - { - "epoch": 0.67, - "grad_norm": 1.2008256831395474, - "learning_rate": 5.238124843463839e-06, - "loss": 0.1792, - "step": 13141 - }, - { - "epoch": 0.67, - "grad_norm": 1.025255671697507, - "learning_rate": 5.236676656467921e-06, - "loss": 0.1699, - "step": 13142 - }, - { - "epoch": 0.67, - "grad_norm": 1.1622225323396218, - "learning_rate": 5.235228598677413e-06, - "loss": 0.1631, - "step": 13143 - }, - { - "epoch": 0.67, - "grad_norm": 0.9028964025869091, - "learning_rate": 5.233780670131603e-06, - "loss": 0.1655, - "step": 13144 - }, - { - "epoch": 0.67, - "grad_norm": 0.947513699797315, - "learning_rate": 5.232332870869763e-06, - "loss": 0.1798, - "step": 13145 - }, - { - "epoch": 0.67, - "grad_norm": 0.9029104736554413, - "learning_rate": 5.2308852009311576e-06, - "loss": 0.1928, - "step": 13146 - }, - { - "epoch": 0.67, - "grad_norm": 1.686357294204009, - "learning_rate": 5.229437660355061e-06, - "loss": 0.1809, - "step": 13147 - }, - { - "epoch": 0.67, - "grad_norm": 0.9317309818471727, - "learning_rate": 5.227990249180737e-06, - "loss": 0.1723, - "step": 13148 - }, - { - "epoch": 0.67, - "grad_norm": 1.0209870204447664, - "learning_rate": 5.226542967447452e-06, - "loss": 0.1816, - "step": 13149 - }, - { - "epoch": 0.67, - "grad_norm": 1.167574722120494, - "learning_rate": 5.225095815194453e-06, - "loss": 0.1733, - "step": 13150 - }, - { - "epoch": 0.67, - "grad_norm": 0.9041556488319442, - "learning_rate": 5.223648792461005e-06, - "loss": 0.1856, - "step": 13151 - }, - { - "epoch": 0.67, - "grad_norm": 1.0624582657746249, - "learning_rate": 5.222201899286349e-06, - "loss": 0.1697, - "step": 13152 - }, - { - "epoch": 0.67, - "grad_norm": 1.0023796482199938, - "learning_rate": 5.2207551357097395e-06, - "loss": 0.1599, - "step": 13153 - }, - { - "epoch": 0.67, - "grad_norm": 0.8018090044150327, - "learning_rate": 5.219308501770415e-06, - "loss": 0.1589, - "step": 13154 - }, - { - "epoch": 0.67, - "grad_norm": 1.6515551581906176, - "learning_rate": 5.217861997507618e-06, - "loss": 0.192, - "step": 13155 - }, - { - "epoch": 0.67, - "grad_norm": 0.8821744422221828, - "learning_rate": 5.2164156229605835e-06, - "loss": 0.1587, - "step": 13156 - }, - { - "epoch": 0.67, - "grad_norm": 26.084781713598552, - "learning_rate": 5.214969378168544e-06, - "loss": 0.1689, - "step": 13157 - }, - { - "epoch": 0.67, - "grad_norm": 1.2620184943781705, - "learning_rate": 5.213523263170731e-06, - "loss": 0.1788, - "step": 13158 - }, - { - "epoch": 0.67, - "grad_norm": 0.9441269351658541, - "learning_rate": 5.2120772780063735e-06, - "loss": 0.1803, - "step": 13159 - }, - { - "epoch": 0.67, - "grad_norm": 0.8263735589892514, - "learning_rate": 5.210631422714686e-06, - "loss": 0.177, - "step": 13160 - }, - { - "epoch": 0.67, - "grad_norm": 0.8343292258524222, - "learning_rate": 5.209185697334895e-06, - "loss": 0.1935, - "step": 13161 - }, - { - "epoch": 0.67, - "grad_norm": 0.7870435131244253, - "learning_rate": 5.207740101906215e-06, - "loss": 0.1649, - "step": 13162 - }, - { - "epoch": 0.67, - "grad_norm": 1.8376019067077287, - "learning_rate": 5.20629463646785e-06, - "loss": 0.1656, - "step": 13163 - }, - { - "epoch": 0.67, - "grad_norm": 1.1280017623926835, - "learning_rate": 5.2048493010590125e-06, - "loss": 0.1756, - "step": 13164 - }, - { - "epoch": 0.67, - "grad_norm": 1.1939937173125184, - "learning_rate": 5.20340409571891e-06, - "loss": 0.1779, - "step": 13165 - }, - { - "epoch": 0.67, - "grad_norm": 1.0052027725381725, - "learning_rate": 5.201959020486746e-06, - "loss": 0.1924, - "step": 13166 - }, - { - "epoch": 0.67, - "grad_norm": 1.968058426905328, - "learning_rate": 5.20051407540171e-06, - "loss": 0.1891, - "step": 13167 - }, - { - "epoch": 0.67, - "grad_norm": 0.9770179576061162, - "learning_rate": 5.199069260503006e-06, - "loss": 0.1449, - "step": 13168 - }, - { - "epoch": 0.67, - "grad_norm": 1.2173618399010426, - "learning_rate": 5.197624575829815e-06, - "loss": 0.2029, - "step": 13169 - }, - { - "epoch": 0.67, - "grad_norm": 3.1799476798417223, - "learning_rate": 5.196180021421332e-06, - "loss": 0.1988, - "step": 13170 - }, - { - "epoch": 0.67, - "grad_norm": 1.5495054811517865, - "learning_rate": 5.194735597316733e-06, - "loss": 0.1916, - "step": 13171 - }, - { - "epoch": 0.67, - "grad_norm": 0.9055829113764653, - "learning_rate": 5.193291303555208e-06, - "loss": 0.1645, - "step": 13172 - }, - { - "epoch": 0.67, - "grad_norm": 3.360427470770462, - "learning_rate": 5.191847140175923e-06, - "loss": 0.1538, - "step": 13173 - }, - { - "epoch": 0.67, - "grad_norm": 1.2417065582611897, - "learning_rate": 5.190403107218056e-06, - "loss": 0.1882, - "step": 13174 - }, - { - "epoch": 0.67, - "grad_norm": 1.0556889798216127, - "learning_rate": 5.188959204720776e-06, - "loss": 0.1933, - "step": 13175 - }, - { - "epoch": 0.67, - "grad_norm": 2.736411415481473, - "learning_rate": 5.1875154327232534e-06, - "loss": 0.1805, - "step": 13176 - }, - { - "epoch": 0.67, - "grad_norm": 0.9560392244403962, - "learning_rate": 5.186071791264642e-06, - "loss": 0.1685, - "step": 13177 - }, - { - "epoch": 0.67, - "grad_norm": 0.854930259475486, - "learning_rate": 5.1846282803841095e-06, - "loss": 0.1833, - "step": 13178 - }, - { - "epoch": 0.67, - "grad_norm": 0.8505934212998812, - "learning_rate": 5.183184900120807e-06, - "loss": 0.1485, - "step": 13179 - }, - { - "epoch": 0.67, - "grad_norm": 1.1737667148898814, - "learning_rate": 5.181741650513883e-06, - "loss": 0.1629, - "step": 13180 - }, - { - "epoch": 0.67, - "grad_norm": 1.2484661380408568, - "learning_rate": 5.180298531602491e-06, - "loss": 0.1718, - "step": 13181 - }, - { - "epoch": 0.67, - "grad_norm": 1.2998613299876511, - "learning_rate": 5.178855543425771e-06, - "loss": 0.1617, - "step": 13182 - }, - { - "epoch": 0.67, - "grad_norm": 0.9322637885141669, - "learning_rate": 5.177412686022866e-06, - "loss": 0.185, - "step": 13183 - }, - { - "epoch": 0.67, - "grad_norm": 1.215022711148766, - "learning_rate": 5.1759699594329135e-06, - "loss": 0.1681, - "step": 13184 - }, - { - "epoch": 0.67, - "grad_norm": 1.4889001656465048, - "learning_rate": 5.1745273636950545e-06, - "loss": 0.1709, - "step": 13185 - }, - { - "epoch": 0.67, - "grad_norm": 1.0293914136113915, - "learning_rate": 5.173084898848408e-06, - "loss": 0.1693, - "step": 13186 - }, - { - "epoch": 0.67, - "grad_norm": 1.6068083591065103, - "learning_rate": 5.17164256493211e-06, - "loss": 0.1796, - "step": 13187 - }, - { - "epoch": 0.67, - "grad_norm": 1.0840075598306749, - "learning_rate": 5.170200361985277e-06, - "loss": 0.1684, - "step": 13188 - }, - { - "epoch": 0.67, - "grad_norm": 1.0189090314842295, - "learning_rate": 5.168758290047035e-06, - "loss": 0.1501, - "step": 13189 - }, - { - "epoch": 0.67, - "grad_norm": 0.9682687528749686, - "learning_rate": 5.167316349156495e-06, - "loss": 0.1988, - "step": 13190 - }, - { - "epoch": 0.67, - "grad_norm": 1.0825631032236098, - "learning_rate": 5.16587453935277e-06, - "loss": 0.1704, - "step": 13191 - }, - { - "epoch": 0.67, - "grad_norm": 1.2869878171587137, - "learning_rate": 5.164432860674972e-06, - "loss": 0.1723, - "step": 13192 - }, - { - "epoch": 0.67, - "grad_norm": 1.1248074085681445, - "learning_rate": 5.162991313162209e-06, - "loss": 0.1684, - "step": 13193 - }, - { - "epoch": 0.67, - "grad_norm": 1.7456815987690477, - "learning_rate": 5.161549896853577e-06, - "loss": 0.2443, - "step": 13194 - }, - { - "epoch": 0.67, - "grad_norm": 0.8367021529866165, - "learning_rate": 5.16010861178818e-06, - "loss": 0.1567, - "step": 13195 - }, - { - "epoch": 0.67, - "grad_norm": 1.1886731283547356, - "learning_rate": 5.158667458005111e-06, - "loss": 0.1787, - "step": 13196 - }, - { - "epoch": 0.67, - "grad_norm": 0.9412907591588243, - "learning_rate": 5.157226435543456e-06, - "loss": 0.1637, - "step": 13197 - }, - { - "epoch": 0.67, - "grad_norm": 1.008260727572345, - "learning_rate": 5.155785544442313e-06, - "loss": 0.1919, - "step": 13198 - }, - { - "epoch": 0.67, - "grad_norm": 1.4114423893472183, - "learning_rate": 5.154344784740757e-06, - "loss": 0.1833, - "step": 13199 - }, - { - "epoch": 0.67, - "grad_norm": 0.8491369164972039, - "learning_rate": 5.15290415647787e-06, - "loss": 0.1701, - "step": 13200 - }, - { - "epoch": 0.67, - "grad_norm": 0.9168263593528301, - "learning_rate": 5.1514636596927325e-06, - "loss": 0.1723, - "step": 13201 - }, - { - "epoch": 0.67, - "grad_norm": 0.7817434712222572, - "learning_rate": 5.150023294424422e-06, - "loss": 0.175, - "step": 13202 - }, - { - "epoch": 0.67, - "grad_norm": 0.8371772526988961, - "learning_rate": 5.148583060711999e-06, - "loss": 0.1654, - "step": 13203 - }, - { - "epoch": 0.67, - "grad_norm": 0.8143341031199058, - "learning_rate": 5.147142958594538e-06, - "loss": 0.1649, - "step": 13204 - }, - { - "epoch": 0.67, - "grad_norm": 1.6114298088143855, - "learning_rate": 5.145702988111095e-06, - "loss": 0.1901, - "step": 13205 - }, - { - "epoch": 0.67, - "grad_norm": 1.0781789312399832, - "learning_rate": 5.144263149300737e-06, - "loss": 0.1656, - "step": 13206 - }, - { - "epoch": 0.67, - "grad_norm": 0.7097433857989437, - "learning_rate": 5.142823442202511e-06, - "loss": 0.1549, - "step": 13207 - }, - { - "epoch": 0.67, - "grad_norm": 1.7835889847646613, - "learning_rate": 5.141383866855476e-06, - "loss": 0.191, - "step": 13208 - }, - { - "epoch": 0.67, - "grad_norm": 1.6006004708707962, - "learning_rate": 5.139944423298675e-06, - "loss": 0.1878, - "step": 13209 - }, - { - "epoch": 0.67, - "grad_norm": 2.797442171502045, - "learning_rate": 5.138505111571157e-06, - "loss": 0.1586, - "step": 13210 - }, - { - "epoch": 0.67, - "grad_norm": 0.8320194440956922, - "learning_rate": 5.137065931711962e-06, - "loss": 0.1753, - "step": 13211 - }, - { - "epoch": 0.67, - "grad_norm": 0.9070023242298215, - "learning_rate": 5.135626883760132e-06, - "loss": 0.1673, - "step": 13212 - }, - { - "epoch": 0.67, - "grad_norm": 0.9764642294741012, - "learning_rate": 5.134187967754694e-06, - "loss": 0.1603, - "step": 13213 - }, - { - "epoch": 0.67, - "grad_norm": 0.9232328856993673, - "learning_rate": 5.132749183734684e-06, - "loss": 0.1797, - "step": 13214 - }, - { - "epoch": 0.67, - "grad_norm": 1.220088273132286, - "learning_rate": 5.131310531739129e-06, - "loss": 0.1743, - "step": 13215 - }, - { - "epoch": 0.67, - "grad_norm": 1.1545015554406468, - "learning_rate": 5.129872011807046e-06, - "loss": 0.1894, - "step": 13216 - }, - { - "epoch": 0.67, - "grad_norm": 1.115588263462691, - "learning_rate": 5.128433623977461e-06, - "loss": 0.1794, - "step": 13217 - }, - { - "epoch": 0.67, - "grad_norm": 1.0539783738675352, - "learning_rate": 5.126995368289389e-06, - "loss": 0.1848, - "step": 13218 - }, - { - "epoch": 0.67, - "grad_norm": 1.5524460056178087, - "learning_rate": 5.125557244781847e-06, - "loss": 0.1635, - "step": 13219 - }, - { - "epoch": 0.67, - "grad_norm": 1.1596802547778315, - "learning_rate": 5.1241192534938355e-06, - "loss": 0.1671, - "step": 13220 - }, - { - "epoch": 0.67, - "grad_norm": 1.0529195047149937, - "learning_rate": 5.122681394464368e-06, - "loss": 0.1995, - "step": 13221 - }, - { - "epoch": 0.67, - "grad_norm": 1.0346863465392042, - "learning_rate": 5.12124366773244e-06, - "loss": 0.1613, - "step": 13222 - }, - { - "epoch": 0.67, - "grad_norm": 1.0604730200598846, - "learning_rate": 5.119806073337057e-06, - "loss": 0.1567, - "step": 13223 - }, - { - "epoch": 0.67, - "grad_norm": 0.8402646199803202, - "learning_rate": 5.118368611317205e-06, - "loss": 0.1591, - "step": 13224 - }, - { - "epoch": 0.67, - "grad_norm": 0.9687228752658418, - "learning_rate": 5.116931281711886e-06, - "loss": 0.1733, - "step": 13225 - }, - { - "epoch": 0.67, - "grad_norm": 1.3458085182463966, - "learning_rate": 5.115494084560076e-06, - "loss": 0.1732, - "step": 13226 - }, - { - "epoch": 0.67, - "grad_norm": 0.8374535462370374, - "learning_rate": 5.114057019900764e-06, - "loss": 0.1602, - "step": 13227 - }, - { - "epoch": 0.67, - "grad_norm": 0.9107738081284421, - "learning_rate": 5.112620087772933e-06, - "loss": 0.1593, - "step": 13228 - }, - { - "epoch": 0.67, - "grad_norm": 0.8671955054266972, - "learning_rate": 5.111183288215562e-06, - "loss": 0.1573, - "step": 13229 - }, - { - "epoch": 0.67, - "grad_norm": 1.3376215180747362, - "learning_rate": 5.1097466212676175e-06, - "loss": 0.1686, - "step": 13230 - }, - { - "epoch": 0.67, - "grad_norm": 0.8096623233751569, - "learning_rate": 5.108310086968075e-06, - "loss": 0.1765, - "step": 13231 - }, - { - "epoch": 0.67, - "grad_norm": 0.9195070385921977, - "learning_rate": 5.106873685355897e-06, - "loss": 0.1723, - "step": 13232 - }, - { - "epoch": 0.67, - "grad_norm": 0.8939499616027785, - "learning_rate": 5.105437416470043e-06, - "loss": 0.1765, - "step": 13233 - }, - { - "epoch": 0.67, - "grad_norm": 0.8221497410960499, - "learning_rate": 5.10400128034948e-06, - "loss": 0.1832, - "step": 13234 - }, - { - "epoch": 0.67, - "grad_norm": 0.9561186940936613, - "learning_rate": 5.102565277033155e-06, - "loss": 0.1513, - "step": 13235 - }, - { - "epoch": 0.67, - "grad_norm": 0.872333575342079, - "learning_rate": 5.101129406560023e-06, - "loss": 0.1904, - "step": 13236 - }, - { - "epoch": 0.67, - "grad_norm": 1.6989686795106012, - "learning_rate": 5.099693668969033e-06, - "loss": 0.1791, - "step": 13237 - }, - { - "epoch": 0.67, - "grad_norm": 10.59026638608574, - "learning_rate": 5.098258064299132e-06, - "loss": 0.1643, - "step": 13238 - }, - { - "epoch": 0.67, - "grad_norm": 1.1675096098809408, - "learning_rate": 5.096822592589254e-06, - "loss": 0.1732, - "step": 13239 - }, - { - "epoch": 0.67, - "grad_norm": 1.0224568286169364, - "learning_rate": 5.095387253878346e-06, - "loss": 0.2192, - "step": 13240 - }, - { - "epoch": 0.67, - "grad_norm": 1.4791810466091335, - "learning_rate": 5.09395204820533e-06, - "loss": 0.1682, - "step": 13241 - }, - { - "epoch": 0.67, - "grad_norm": 1.503518493377402, - "learning_rate": 5.092516975609146e-06, - "loss": 0.1833, - "step": 13242 - }, - { - "epoch": 0.67, - "grad_norm": 0.9971546165789382, - "learning_rate": 5.091082036128712e-06, - "loss": 0.173, - "step": 13243 - }, - { - "epoch": 0.67, - "grad_norm": 1.0692436202332236, - "learning_rate": 5.0896472298029555e-06, - "loss": 0.1666, - "step": 13244 - }, - { - "epoch": 0.67, - "grad_norm": 1.4054718829745738, - "learning_rate": 5.088212556670795e-06, - "loss": 0.1733, - "step": 13245 - }, - { - "epoch": 0.67, - "grad_norm": 0.8824887512289968, - "learning_rate": 5.08677801677115e-06, - "loss": 0.1785, - "step": 13246 - }, - { - "epoch": 0.67, - "grad_norm": 1.1406611285731336, - "learning_rate": 5.085343610142926e-06, - "loss": 0.1765, - "step": 13247 - }, - { - "epoch": 0.67, - "grad_norm": 1.2010164866264619, - "learning_rate": 5.083909336825037e-06, - "loss": 0.1716, - "step": 13248 - }, - { - "epoch": 0.67, - "grad_norm": 1.1463388525562261, - "learning_rate": 5.0824751968563845e-06, - "loss": 0.1839, - "step": 13249 - }, - { - "epoch": 0.67, - "grad_norm": 1.8175246259217428, - "learning_rate": 5.0810411902758675e-06, - "loss": 0.1701, - "step": 13250 - }, - { - "epoch": 0.67, - "grad_norm": 1.3857489048773455, - "learning_rate": 5.0796073171223884e-06, - "loss": 0.1901, - "step": 13251 - }, - { - "epoch": 0.67, - "grad_norm": 1.0770489342555571, - "learning_rate": 5.078173577434836e-06, - "loss": 0.156, - "step": 13252 - }, - { - "epoch": 0.67, - "grad_norm": 1.0569178307565479, - "learning_rate": 5.076739971252103e-06, - "loss": 0.1667, - "step": 13253 - }, - { - "epoch": 0.67, - "grad_norm": 0.8588401381650115, - "learning_rate": 5.0753064986130765e-06, - "loss": 0.1783, - "step": 13254 - }, - { - "epoch": 0.67, - "grad_norm": 1.0696363407064873, - "learning_rate": 5.073873159556643e-06, - "loss": 0.1749, - "step": 13255 - }, - { - "epoch": 0.67, - "grad_norm": 2.0143686351956207, - "learning_rate": 5.072439954121675e-06, - "loss": 0.1777, - "step": 13256 - }, - { - "epoch": 0.67, - "grad_norm": 1.5402276037649723, - "learning_rate": 5.0710068823470535e-06, - "loss": 0.1656, - "step": 13257 - }, - { - "epoch": 0.67, - "grad_norm": 1.0663757615170488, - "learning_rate": 5.069573944271646e-06, - "loss": 0.1785, - "step": 13258 - }, - { - "epoch": 0.67, - "grad_norm": 1.0825248239599943, - "learning_rate": 5.068141139934328e-06, - "loss": 0.1927, - "step": 13259 - }, - { - "epoch": 0.67, - "grad_norm": 1.0449836392755059, - "learning_rate": 5.066708469373958e-06, - "loss": 0.17, - "step": 13260 - }, - { - "epoch": 0.67, - "grad_norm": 1.2647662251334955, - "learning_rate": 5.065275932629401e-06, - "loss": 0.1636, - "step": 13261 - }, - { - "epoch": 0.67, - "grad_norm": 0.8529323655944855, - "learning_rate": 5.063843529739509e-06, - "loss": 0.1719, - "step": 13262 - }, - { - "epoch": 0.67, - "grad_norm": 2.026661229104386, - "learning_rate": 5.062411260743141e-06, - "loss": 0.1884, - "step": 13263 - }, - { - "epoch": 0.67, - "grad_norm": 1.2294290210006409, - "learning_rate": 5.060979125679147e-06, - "loss": 0.1672, - "step": 13264 - }, - { - "epoch": 0.67, - "grad_norm": 2.0393714847360274, - "learning_rate": 5.0595471245863745e-06, - "loss": 0.1797, - "step": 13265 - }, - { - "epoch": 0.67, - "grad_norm": 1.209605688386021, - "learning_rate": 5.058115257503667e-06, - "loss": 0.1614, - "step": 13266 - }, - { - "epoch": 0.67, - "grad_norm": 1.0857832204663802, - "learning_rate": 5.056683524469859e-06, - "loss": 0.1927, - "step": 13267 - }, - { - "epoch": 0.67, - "grad_norm": 0.9911732545186153, - "learning_rate": 5.055251925523792e-06, - "loss": 0.1655, - "step": 13268 - }, - { - "epoch": 0.67, - "grad_norm": 1.624387785149277, - "learning_rate": 5.0538204607042925e-06, - "loss": 0.1607, - "step": 13269 - }, - { - "epoch": 0.67, - "grad_norm": 0.9864496852717262, - "learning_rate": 5.052389130050193e-06, - "loss": 0.1615, - "step": 13270 - }, - { - "epoch": 0.67, - "grad_norm": 1.2432216270091936, - "learning_rate": 5.050957933600317e-06, - "loss": 0.1989, - "step": 13271 - }, - { - "epoch": 0.67, - "grad_norm": 1.0533200560620144, - "learning_rate": 5.049526871393491e-06, - "loss": 0.171, - "step": 13272 - }, - { - "epoch": 0.67, - "grad_norm": 0.7796060605558551, - "learning_rate": 5.048095943468524e-06, - "loss": 0.1742, - "step": 13273 - }, - { - "epoch": 0.68, - "grad_norm": 0.9142403208282665, - "learning_rate": 5.046665149864238e-06, - "loss": 0.1552, - "step": 13274 - }, - { - "epoch": 0.68, - "grad_norm": 0.9049380952582469, - "learning_rate": 5.045234490619435e-06, - "loss": 0.1755, - "step": 13275 - }, - { - "epoch": 0.68, - "grad_norm": 1.2962081391225797, - "learning_rate": 5.043803965772932e-06, - "loss": 0.1896, - "step": 13276 - }, - { - "epoch": 0.68, - "grad_norm": 1.101749651971222, - "learning_rate": 5.042373575363522e-06, - "loss": 0.1798, - "step": 13277 - }, - { - "epoch": 0.68, - "grad_norm": 0.951802944183445, - "learning_rate": 5.040943319430012e-06, - "loss": 0.1939, - "step": 13278 - }, - { - "epoch": 0.68, - "grad_norm": 1.0555114435186326, - "learning_rate": 5.03951319801119e-06, - "loss": 0.1626, - "step": 13279 - }, - { - "epoch": 0.68, - "grad_norm": 2.350767178690501, - "learning_rate": 5.038083211145854e-06, - "loss": 0.1766, - "step": 13280 - }, - { - "epoch": 0.68, - "grad_norm": 1.1201796798396992, - "learning_rate": 5.03665335887279e-06, - "loss": 0.2076, - "step": 13281 - }, - { - "epoch": 0.68, - "grad_norm": 0.9327089524446213, - "learning_rate": 5.035223641230789e-06, - "loss": 0.189, - "step": 13282 - }, - { - "epoch": 0.68, - "grad_norm": 0.8261115940786845, - "learning_rate": 5.033794058258623e-06, - "loss": 0.1659, - "step": 13283 - }, - { - "epoch": 0.68, - "grad_norm": 1.4459582919012035, - "learning_rate": 5.0323646099950775e-06, - "loss": 0.1576, - "step": 13284 - }, - { - "epoch": 0.68, - "grad_norm": 0.8754760385383721, - "learning_rate": 5.030935296478922e-06, - "loss": 0.1734, - "step": 13285 - }, - { - "epoch": 0.68, - "grad_norm": 1.0686975505660197, - "learning_rate": 5.029506117748924e-06, - "loss": 0.1728, - "step": 13286 - }, - { - "epoch": 0.68, - "grad_norm": 1.282629243662813, - "learning_rate": 5.0280770738438535e-06, - "loss": 0.1656, - "step": 13287 - }, - { - "epoch": 0.68, - "grad_norm": 1.013316881874263, - "learning_rate": 5.026648164802472e-06, - "loss": 0.1848, - "step": 13288 - }, - { - "epoch": 0.68, - "grad_norm": 1.0771000819802043, - "learning_rate": 5.025219390663545e-06, - "loss": 0.1599, - "step": 13289 - }, - { - "epoch": 0.68, - "grad_norm": 1.3450360397275072, - "learning_rate": 5.023790751465818e-06, - "loss": 0.1638, - "step": 13290 - }, - { - "epoch": 0.68, - "grad_norm": 1.1537028984901865, - "learning_rate": 5.022362247248052e-06, - "loss": 0.2023, - "step": 13291 - }, - { - "epoch": 0.68, - "grad_norm": 0.9600143888983569, - "learning_rate": 5.020933878048988e-06, - "loss": 0.1778, - "step": 13292 - }, - { - "epoch": 0.68, - "grad_norm": 1.347995113401063, - "learning_rate": 5.0195056439073775e-06, - "loss": 0.2065, - "step": 13293 - }, - { - "epoch": 0.68, - "grad_norm": 0.8692116195281121, - "learning_rate": 5.018077544861954e-06, - "loss": 0.1683, - "step": 13294 - }, - { - "epoch": 0.68, - "grad_norm": 1.4525317319667932, - "learning_rate": 5.016649580951462e-06, - "loss": 0.1714, - "step": 13295 - }, - { - "epoch": 0.68, - "grad_norm": 1.0646484388308648, - "learning_rate": 5.015221752214627e-06, - "loss": 0.1597, - "step": 13296 - }, - { - "epoch": 0.68, - "grad_norm": 1.0205582628493528, - "learning_rate": 5.013794058690185e-06, - "loss": 0.1797, - "step": 13297 - }, - { - "epoch": 0.68, - "grad_norm": 0.8514951899742292, - "learning_rate": 5.01236650041686e-06, - "loss": 0.1743, - "step": 13298 - }, - { - "epoch": 0.68, - "grad_norm": 0.8689347769419848, - "learning_rate": 5.010939077433378e-06, - "loss": 0.1852, - "step": 13299 - }, - { - "epoch": 0.68, - "grad_norm": 0.9292572474186874, - "learning_rate": 5.009511789778454e-06, - "loss": 0.1802, - "step": 13300 - }, - { - "epoch": 0.68, - "grad_norm": 3.692359441421138, - "learning_rate": 5.008084637490807e-06, - "loss": 0.1784, - "step": 13301 - }, - { - "epoch": 0.68, - "grad_norm": 1.2508345137841275, - "learning_rate": 5.006657620609147e-06, - "loss": 0.1911, - "step": 13302 - }, - { - "epoch": 0.68, - "grad_norm": 0.9286787172036365, - "learning_rate": 5.005230739172175e-06, - "loss": 0.1892, - "step": 13303 - }, - { - "epoch": 0.68, - "grad_norm": 0.867410754178937, - "learning_rate": 5.003803993218608e-06, - "loss": 0.1789, - "step": 13304 - }, - { - "epoch": 0.68, - "grad_norm": 1.1372677523807884, - "learning_rate": 5.002377382787135e-06, - "loss": 0.1997, - "step": 13305 - }, - { - "epoch": 0.68, - "grad_norm": 0.8645106706756592, - "learning_rate": 5.000950907916457e-06, - "loss": 0.1848, - "step": 13306 - }, - { - "epoch": 0.68, - "grad_norm": 0.9028957991781709, - "learning_rate": 4.999524568645268e-06, - "loss": 0.1791, - "step": 13307 - }, - { - "epoch": 0.68, - "grad_norm": 1.653303769921554, - "learning_rate": 4.998098365012263e-06, - "loss": 0.1944, - "step": 13308 - }, - { - "epoch": 0.68, - "grad_norm": 3.581598586863471, - "learning_rate": 4.9966722970561165e-06, - "loss": 0.1645, - "step": 13309 - }, - { - "epoch": 0.68, - "grad_norm": 1.0273155328412358, - "learning_rate": 4.995246364815522e-06, - "loss": 0.1739, - "step": 13310 - }, - { - "epoch": 0.68, - "grad_norm": 1.233151772861096, - "learning_rate": 4.993820568329147e-06, - "loss": 0.1814, - "step": 13311 - }, - { - "epoch": 0.68, - "grad_norm": 0.8748578904892358, - "learning_rate": 4.992394907635677e-06, - "loss": 0.1754, - "step": 13312 - }, - { - "epoch": 0.68, - "grad_norm": 1.3279718652025159, - "learning_rate": 4.990969382773773e-06, - "loss": 0.1784, - "step": 13313 - }, - { - "epoch": 0.68, - "grad_norm": 0.8481820795080676, - "learning_rate": 4.989543993782109e-06, - "loss": 0.1647, - "step": 13314 - }, - { - "epoch": 0.68, - "grad_norm": 0.8523082068024254, - "learning_rate": 4.9881187406993455e-06, - "loss": 0.1654, - "step": 13315 - }, - { - "epoch": 0.68, - "grad_norm": 1.0876009925692094, - "learning_rate": 4.98669362356415e-06, - "loss": 0.1692, - "step": 13316 - }, - { - "epoch": 0.68, - "grad_norm": 1.2189864873069765, - "learning_rate": 4.985268642415167e-06, - "loss": 0.2011, - "step": 13317 - }, - { - "epoch": 0.68, - "grad_norm": 1.0037139617465891, - "learning_rate": 4.98384379729106e-06, - "loss": 0.1682, - "step": 13318 - }, - { - "epoch": 0.68, - "grad_norm": 1.1469562922992695, - "learning_rate": 4.982419088230473e-06, - "loss": 0.1668, - "step": 13319 - }, - { - "epoch": 0.68, - "grad_norm": 0.8763162550492711, - "learning_rate": 4.98099451527205e-06, - "loss": 0.188, - "step": 13320 - }, - { - "epoch": 0.68, - "grad_norm": 0.9031762114085561, - "learning_rate": 4.9795700784544355e-06, - "loss": 0.1561, - "step": 13321 - }, - { - "epoch": 0.68, - "grad_norm": 1.4303154523574806, - "learning_rate": 4.978145777816264e-06, - "loss": 0.1728, - "step": 13322 - }, - { - "epoch": 0.68, - "grad_norm": 1.063977785990633, - "learning_rate": 4.9767216133961705e-06, - "loss": 0.1552, - "step": 13323 - }, - { - "epoch": 0.68, - "grad_norm": 1.1334620235258557, - "learning_rate": 4.975297585232788e-06, - "loss": 0.1845, - "step": 13324 - }, - { - "epoch": 0.68, - "grad_norm": 1.0121310102918408, - "learning_rate": 4.973873693364746e-06, - "loss": 0.1838, - "step": 13325 - }, - { - "epoch": 0.68, - "grad_norm": 0.8874720693087358, - "learning_rate": 4.972449937830659e-06, - "loss": 0.1392, - "step": 13326 - }, - { - "epoch": 0.68, - "grad_norm": 0.8405872199516397, - "learning_rate": 4.971026318669156e-06, - "loss": 0.2046, - "step": 13327 - }, - { - "epoch": 0.68, - "grad_norm": 0.9140677436739643, - "learning_rate": 4.9696028359188444e-06, - "loss": 0.1817, - "step": 13328 - }, - { - "epoch": 0.68, - "grad_norm": 1.122027630302311, - "learning_rate": 4.968179489618345e-06, - "loss": 0.1768, - "step": 13329 - }, - { - "epoch": 0.68, - "grad_norm": 0.9179887064369858, - "learning_rate": 4.966756279806255e-06, - "loss": 0.1599, - "step": 13330 - }, - { - "epoch": 0.68, - "grad_norm": 1.2229010455954328, - "learning_rate": 4.9653332065211905e-06, - "loss": 0.1692, - "step": 13331 - }, - { - "epoch": 0.68, - "grad_norm": 0.9100285797496191, - "learning_rate": 4.963910269801743e-06, - "loss": 0.1708, - "step": 13332 - }, - { - "epoch": 0.68, - "grad_norm": 1.108220029993447, - "learning_rate": 4.962487469686513e-06, - "loss": 0.1887, - "step": 13333 - }, - { - "epoch": 0.68, - "grad_norm": 1.1734441516261298, - "learning_rate": 4.961064806214096e-06, - "loss": 0.1804, - "step": 13334 - }, - { - "epoch": 0.68, - "grad_norm": 1.071162594836847, - "learning_rate": 4.959642279423085e-06, - "loss": 0.1738, - "step": 13335 - }, - { - "epoch": 0.68, - "grad_norm": 1.1985132657468434, - "learning_rate": 4.958219889352061e-06, - "loss": 0.1652, - "step": 13336 - }, - { - "epoch": 0.68, - "grad_norm": 0.8772492189650118, - "learning_rate": 4.956797636039603e-06, - "loss": 0.1721, - "step": 13337 - }, - { - "epoch": 0.68, - "grad_norm": 1.2597169048117127, - "learning_rate": 4.955375519524299e-06, - "loss": 0.2005, - "step": 13338 - }, - { - "epoch": 0.68, - "grad_norm": 2.4108297618878187, - "learning_rate": 4.953953539844715e-06, - "loss": 0.1738, - "step": 13339 - }, - { - "epoch": 0.68, - "grad_norm": 0.8353302923538877, - "learning_rate": 4.952531697039424e-06, - "loss": 0.1796, - "step": 13340 - }, - { - "epoch": 0.68, - "grad_norm": 3.45598031294388, - "learning_rate": 4.951109991146999e-06, - "loss": 0.1664, - "step": 13341 - }, - { - "epoch": 0.68, - "grad_norm": 3.890865755028747, - "learning_rate": 4.949688422206003e-06, - "loss": 0.1665, - "step": 13342 - }, - { - "epoch": 0.68, - "grad_norm": 1.035408609462023, - "learning_rate": 4.9482669902549896e-06, - "loss": 0.1791, - "step": 13343 - }, - { - "epoch": 0.68, - "grad_norm": 1.0615178010266604, - "learning_rate": 4.946845695332524e-06, - "loss": 0.1934, - "step": 13344 - }, - { - "epoch": 0.68, - "grad_norm": 1.1510058838639408, - "learning_rate": 4.945424537477149e-06, - "loss": 0.1732, - "step": 13345 - }, - { - "epoch": 0.68, - "grad_norm": 0.8626483346119986, - "learning_rate": 4.944003516727424e-06, - "loss": 0.164, - "step": 13346 - }, - { - "epoch": 0.68, - "grad_norm": 0.9788758357224554, - "learning_rate": 4.942582633121885e-06, - "loss": 0.1781, - "step": 13347 - }, - { - "epoch": 0.68, - "grad_norm": 1.2100884087094943, - "learning_rate": 4.941161886699082e-06, - "loss": 0.1607, - "step": 13348 - }, - { - "epoch": 0.68, - "grad_norm": 0.7699738187325713, - "learning_rate": 4.939741277497545e-06, - "loss": 0.1842, - "step": 13349 - }, - { - "epoch": 0.68, - "grad_norm": 0.88949621414364, - "learning_rate": 4.938320805555811e-06, - "loss": 0.1579, - "step": 13350 - }, - { - "epoch": 0.68, - "grad_norm": 0.9083718699578254, - "learning_rate": 4.9369004709124115e-06, - "loss": 0.1797, - "step": 13351 - }, - { - "epoch": 0.68, - "grad_norm": 3.122080049815317, - "learning_rate": 4.935480273605876e-06, - "loss": 0.1666, - "step": 13352 - }, - { - "epoch": 0.68, - "grad_norm": 1.1730433331504413, - "learning_rate": 4.93406021367472e-06, - "loss": 0.1693, - "step": 13353 - }, - { - "epoch": 0.68, - "grad_norm": 1.1792075254193641, - "learning_rate": 4.932640291157471e-06, - "loss": 0.1599, - "step": 13354 - }, - { - "epoch": 0.68, - "grad_norm": 0.9867221227698497, - "learning_rate": 4.931220506092641e-06, - "loss": 0.1657, - "step": 13355 - }, - { - "epoch": 0.68, - "grad_norm": 0.9272440666065497, - "learning_rate": 4.929800858518736e-06, - "loss": 0.1678, - "step": 13356 - }, - { - "epoch": 0.68, - "grad_norm": 1.3593501320246062, - "learning_rate": 4.928381348474274e-06, - "loss": 0.1864, - "step": 13357 - }, - { - "epoch": 0.68, - "grad_norm": 1.2641556486628795, - "learning_rate": 4.926961975997749e-06, - "loss": 0.2181, - "step": 13358 - }, - { - "epoch": 0.68, - "grad_norm": 1.0468224571302156, - "learning_rate": 4.925542741127669e-06, - "loss": 0.177, - "step": 13359 - }, - { - "epoch": 0.68, - "grad_norm": 1.0279548360084887, - "learning_rate": 4.9241236439025275e-06, - "loss": 0.1573, - "step": 13360 - }, - { - "epoch": 0.68, - "grad_norm": 1.4210161107545294, - "learning_rate": 4.9227046843608224e-06, - "loss": 0.1559, - "step": 13361 - }, - { - "epoch": 0.68, - "grad_norm": 1.0368899818961608, - "learning_rate": 4.921285862541037e-06, - "loss": 0.1776, - "step": 13362 - }, - { - "epoch": 0.68, - "grad_norm": 0.8609128185278291, - "learning_rate": 4.919867178481662e-06, - "loss": 0.1789, - "step": 13363 - }, - { - "epoch": 0.68, - "grad_norm": 1.0819398376435012, - "learning_rate": 4.9184486322211734e-06, - "loss": 0.1582, - "step": 13364 - }, - { - "epoch": 0.68, - "grad_norm": 2.186110252214916, - "learning_rate": 4.917030223798057e-06, - "loss": 0.1507, - "step": 13365 - }, - { - "epoch": 0.68, - "grad_norm": 0.8508139881204698, - "learning_rate": 4.915611953250778e-06, - "loss": 0.179, - "step": 13366 - }, - { - "epoch": 0.68, - "grad_norm": 2.8510715693991586, - "learning_rate": 4.914193820617813e-06, - "loss": 0.1841, - "step": 13367 - }, - { - "epoch": 0.68, - "grad_norm": 0.8557222228620209, - "learning_rate": 4.912775825937627e-06, - "loss": 0.181, - "step": 13368 - }, - { - "epoch": 0.68, - "grad_norm": 1.0329111875129768, - "learning_rate": 4.911357969248688e-06, - "loss": 0.1757, - "step": 13369 - }, - { - "epoch": 0.68, - "grad_norm": 1.0033361006799728, - "learning_rate": 4.909940250589448e-06, - "loss": 0.174, - "step": 13370 - }, - { - "epoch": 0.68, - "grad_norm": 0.9562282908872947, - "learning_rate": 4.90852266999837e-06, - "loss": 0.1636, - "step": 13371 - }, - { - "epoch": 0.68, - "grad_norm": 1.6260398737326067, - "learning_rate": 4.907105227513902e-06, - "loss": 0.1776, - "step": 13372 - }, - { - "epoch": 0.68, - "grad_norm": 0.9791958386788515, - "learning_rate": 4.905687923174488e-06, - "loss": 0.149, - "step": 13373 - }, - { - "epoch": 0.68, - "grad_norm": 0.8983703155733312, - "learning_rate": 4.904270757018581e-06, - "loss": 0.1563, - "step": 13374 - }, - { - "epoch": 0.68, - "grad_norm": 0.8223754890980843, - "learning_rate": 4.902853729084615e-06, - "loss": 0.1608, - "step": 13375 - }, - { - "epoch": 0.68, - "grad_norm": 0.8900805982829235, - "learning_rate": 4.9014368394110275e-06, - "loss": 0.1645, - "step": 13376 - }, - { - "epoch": 0.68, - "grad_norm": 1.17004263604402, - "learning_rate": 4.900020088036254e-06, - "loss": 0.157, - "step": 13377 - }, - { - "epoch": 0.68, - "grad_norm": 1.2806837428129272, - "learning_rate": 4.898603474998729e-06, - "loss": 0.1875, - "step": 13378 - }, - { - "epoch": 0.68, - "grad_norm": 0.894743260473036, - "learning_rate": 4.897187000336867e-06, - "loss": 0.1517, - "step": 13379 - }, - { - "epoch": 0.68, - "grad_norm": 1.0252274864395192, - "learning_rate": 4.895770664089101e-06, - "loss": 0.199, - "step": 13380 - }, - { - "epoch": 0.68, - "grad_norm": 1.1158333518389476, - "learning_rate": 4.89435446629384e-06, - "loss": 0.1824, - "step": 13381 - }, - { - "epoch": 0.68, - "grad_norm": 0.9805177074634117, - "learning_rate": 4.892938406989507e-06, - "loss": 0.1725, - "step": 13382 - }, - { - "epoch": 0.68, - "grad_norm": 0.998938905422329, - "learning_rate": 4.891522486214503e-06, - "loss": 0.1641, - "step": 13383 - }, - { - "epoch": 0.68, - "grad_norm": 0.9151046573689942, - "learning_rate": 4.89010670400724e-06, - "loss": 0.1815, - "step": 13384 - }, - { - "epoch": 0.68, - "grad_norm": 1.1280548607462288, - "learning_rate": 4.888691060406122e-06, - "loss": 0.1789, - "step": 13385 - }, - { - "epoch": 0.68, - "grad_norm": 1.0898656353463203, - "learning_rate": 4.887275555449552e-06, - "loss": 0.1374, - "step": 13386 - }, - { - "epoch": 0.68, - "grad_norm": 1.0457794371285571, - "learning_rate": 4.885860189175917e-06, - "loss": 0.1746, - "step": 13387 - }, - { - "epoch": 0.68, - "grad_norm": 0.8664565229805467, - "learning_rate": 4.884444961623616e-06, - "loss": 0.1848, - "step": 13388 - }, - { - "epoch": 0.68, - "grad_norm": 1.4847731662977808, - "learning_rate": 4.8830298728310355e-06, - "loss": 0.2005, - "step": 13389 - }, - { - "epoch": 0.68, - "grad_norm": 0.9869762959474485, - "learning_rate": 4.881614922836555e-06, - "loss": 0.1632, - "step": 13390 - }, - { - "epoch": 0.68, - "grad_norm": 1.1011081086417145, - "learning_rate": 4.880200111678563e-06, - "loss": 0.1541, - "step": 13391 - }, - { - "epoch": 0.68, - "grad_norm": 0.8733096551389045, - "learning_rate": 4.878785439395427e-06, - "loss": 0.1746, - "step": 13392 - }, - { - "epoch": 0.68, - "grad_norm": 1.7062110329950042, - "learning_rate": 4.8773709060255256e-06, - "loss": 0.1809, - "step": 13393 - }, - { - "epoch": 0.68, - "grad_norm": 1.4984101977385287, - "learning_rate": 4.8759565116072285e-06, - "loss": 0.196, - "step": 13394 - }, - { - "epoch": 0.68, - "grad_norm": 0.9800320483035345, - "learning_rate": 4.874542256178903e-06, - "loss": 0.1675, - "step": 13395 - }, - { - "epoch": 0.68, - "grad_norm": 1.0454689807754207, - "learning_rate": 4.873128139778906e-06, - "loss": 0.1897, - "step": 13396 - }, - { - "epoch": 0.68, - "grad_norm": 0.9859535430018859, - "learning_rate": 4.8717141624456e-06, - "loss": 0.1584, - "step": 13397 - }, - { - "epoch": 0.68, - "grad_norm": 1.0988300467051104, - "learning_rate": 4.870300324217334e-06, - "loss": 0.1787, - "step": 13398 - }, - { - "epoch": 0.68, - "grad_norm": 0.9376020847074387, - "learning_rate": 4.868886625132465e-06, - "loss": 0.1755, - "step": 13399 - }, - { - "epoch": 0.68, - "grad_norm": 0.7033458031810654, - "learning_rate": 4.867473065229332e-06, - "loss": 0.135, - "step": 13400 - }, - { - "epoch": 0.68, - "grad_norm": 1.063844911772989, - "learning_rate": 4.866059644546287e-06, - "loss": 0.1879, - "step": 13401 - }, - { - "epoch": 0.68, - "grad_norm": 1.5097869097695407, - "learning_rate": 4.864646363121659e-06, - "loss": 0.182, - "step": 13402 - }, - { - "epoch": 0.68, - "grad_norm": 1.369818989617672, - "learning_rate": 4.863233220993789e-06, - "loss": 0.1657, - "step": 13403 - }, - { - "epoch": 0.68, - "grad_norm": 0.9667709687107531, - "learning_rate": 4.861820218201009e-06, - "loss": 0.1761, - "step": 13404 - }, - { - "epoch": 0.68, - "grad_norm": 1.1342468133599284, - "learning_rate": 4.860407354781647e-06, - "loss": 0.1535, - "step": 13405 - }, - { - "epoch": 0.68, - "grad_norm": 1.1973300465476358, - "learning_rate": 4.858994630774028e-06, - "loss": 0.1678, - "step": 13406 - }, - { - "epoch": 0.68, - "grad_norm": 0.8670715244476761, - "learning_rate": 4.857582046216465e-06, - "loss": 0.1865, - "step": 13407 - }, - { - "epoch": 0.68, - "grad_norm": 0.9690513997511496, - "learning_rate": 4.856169601147285e-06, - "loss": 0.1735, - "step": 13408 - }, - { - "epoch": 0.68, - "grad_norm": 1.0865856176368371, - "learning_rate": 4.8547572956047894e-06, - "loss": 0.1875, - "step": 13409 - }, - { - "epoch": 0.68, - "grad_norm": 1.0067725107937646, - "learning_rate": 4.8533451296272934e-06, - "loss": 0.1698, - "step": 13410 - }, - { - "epoch": 0.68, - "grad_norm": 0.8611937224662363, - "learning_rate": 4.8519331032531015e-06, - "loss": 0.1621, - "step": 13411 - }, - { - "epoch": 0.68, - "grad_norm": 1.2179372902706707, - "learning_rate": 4.85052121652052e-06, - "loss": 0.1737, - "step": 13412 - }, - { - "epoch": 0.68, - "grad_norm": 1.0646786227946774, - "learning_rate": 4.849109469467835e-06, - "loss": 0.1744, - "step": 13413 - }, - { - "epoch": 0.68, - "grad_norm": 1.3153447789011714, - "learning_rate": 4.847697862133351e-06, - "loss": 0.1599, - "step": 13414 - }, - { - "epoch": 0.68, - "grad_norm": 1.139802471031512, - "learning_rate": 4.846286394555352e-06, - "loss": 0.1461, - "step": 13415 - }, - { - "epoch": 0.68, - "grad_norm": 0.8781096489001784, - "learning_rate": 4.844875066772126e-06, - "loss": 0.1754, - "step": 13416 - }, - { - "epoch": 0.68, - "grad_norm": 0.9085196484189163, - "learning_rate": 4.843463878821955e-06, - "loss": 0.1776, - "step": 13417 - }, - { - "epoch": 0.68, - "grad_norm": 0.9693730021548259, - "learning_rate": 4.842052830743118e-06, - "loss": 0.1638, - "step": 13418 - }, - { - "epoch": 0.68, - "grad_norm": 0.911581147326447, - "learning_rate": 4.840641922573888e-06, - "loss": 0.1603, - "step": 13419 - }, - { - "epoch": 0.68, - "grad_norm": 2.1832359251033444, - "learning_rate": 4.839231154352535e-06, - "loss": 0.181, - "step": 13420 - }, - { - "epoch": 0.68, - "grad_norm": 0.9944004541687458, - "learning_rate": 4.837820526117329e-06, - "loss": 0.1786, - "step": 13421 - }, - { - "epoch": 0.68, - "grad_norm": 0.9643622262243208, - "learning_rate": 4.836410037906537e-06, - "loss": 0.1863, - "step": 13422 - }, - { - "epoch": 0.68, - "grad_norm": 1.798376813456311, - "learning_rate": 4.834999689758412e-06, - "loss": 0.1683, - "step": 13423 - }, - { - "epoch": 0.68, - "grad_norm": 1.0119099256446815, - "learning_rate": 4.833589481711214e-06, - "loss": 0.1666, - "step": 13424 - }, - { - "epoch": 0.68, - "grad_norm": 1.1561921433167999, - "learning_rate": 4.832179413803193e-06, - "loss": 0.1566, - "step": 13425 - }, - { - "epoch": 0.68, - "grad_norm": 1.135528996676528, - "learning_rate": 4.830769486072594e-06, - "loss": 0.191, - "step": 13426 - }, - { - "epoch": 0.68, - "grad_norm": 1.0686848952484496, - "learning_rate": 4.829359698557669e-06, - "loss": 0.201, - "step": 13427 - }, - { - "epoch": 0.68, - "grad_norm": 0.9368366479450847, - "learning_rate": 4.827950051296651e-06, - "loss": 0.1854, - "step": 13428 - }, - { - "epoch": 0.68, - "grad_norm": 1.055241925769985, - "learning_rate": 4.826540544327778e-06, - "loss": 0.1735, - "step": 13429 - }, - { - "epoch": 0.68, - "grad_norm": 1.4929546877284332, - "learning_rate": 4.825131177689286e-06, - "loss": 0.1832, - "step": 13430 - }, - { - "epoch": 0.68, - "grad_norm": 0.9835157565703031, - "learning_rate": 4.8237219514194064e-06, - "loss": 0.1614, - "step": 13431 - }, - { - "epoch": 0.68, - "grad_norm": 0.8935105225693067, - "learning_rate": 4.8223128655563574e-06, - "loss": 0.1619, - "step": 13432 - }, - { - "epoch": 0.68, - "grad_norm": 0.9941601481300318, - "learning_rate": 4.820903920138369e-06, - "loss": 0.1813, - "step": 13433 - }, - { - "epoch": 0.68, - "grad_norm": 1.2681225658498505, - "learning_rate": 4.819495115203651e-06, - "loss": 0.1727, - "step": 13434 - }, - { - "epoch": 0.68, - "grad_norm": 1.0083145685072834, - "learning_rate": 4.818086450790423e-06, - "loss": 0.1838, - "step": 13435 - }, - { - "epoch": 0.68, - "grad_norm": 1.002803369604112, - "learning_rate": 4.816677926936889e-06, - "loss": 0.1649, - "step": 13436 - }, - { - "epoch": 0.68, - "grad_norm": 0.8970907695821947, - "learning_rate": 4.815269543681259e-06, - "loss": 0.1644, - "step": 13437 - }, - { - "epoch": 0.68, - "grad_norm": 1.7536868504864365, - "learning_rate": 4.813861301061737e-06, - "loss": 0.168, - "step": 13438 - }, - { - "epoch": 0.68, - "grad_norm": 1.287016327629495, - "learning_rate": 4.812453199116522e-06, - "loss": 0.1568, - "step": 13439 - }, - { - "epoch": 0.68, - "grad_norm": 2.365945925124035, - "learning_rate": 4.811045237883803e-06, - "loss": 0.1613, - "step": 13440 - }, - { - "epoch": 0.68, - "grad_norm": 0.8182433301573657, - "learning_rate": 4.80963741740178e-06, - "loss": 0.1671, - "step": 13441 - }, - { - "epoch": 0.68, - "grad_norm": 1.2762692421010002, - "learning_rate": 4.808229737708635e-06, - "loss": 0.183, - "step": 13442 - }, - { - "epoch": 0.68, - "grad_norm": 1.1055784334758063, - "learning_rate": 4.806822198842548e-06, - "loss": 0.1614, - "step": 13443 - }, - { - "epoch": 0.68, - "grad_norm": 1.6299468631717564, - "learning_rate": 4.805414800841706e-06, - "loss": 0.182, - "step": 13444 - }, - { - "epoch": 0.68, - "grad_norm": 0.9921407678152764, - "learning_rate": 4.804007543744277e-06, - "loss": 0.1731, - "step": 13445 - }, - { - "epoch": 0.68, - "grad_norm": 1.226900393560294, - "learning_rate": 4.802600427588437e-06, - "loss": 0.1875, - "step": 13446 - }, - { - "epoch": 0.68, - "grad_norm": 0.8771639659433222, - "learning_rate": 4.801193452412353e-06, - "loss": 0.1828, - "step": 13447 - }, - { - "epoch": 0.68, - "grad_norm": 1.2076103968998635, - "learning_rate": 4.799786618254194e-06, - "loss": 0.1824, - "step": 13448 - }, - { - "epoch": 0.68, - "grad_norm": 1.165591143836002, - "learning_rate": 4.798379925152113e-06, - "loss": 0.1688, - "step": 13449 - }, - { - "epoch": 0.68, - "grad_norm": 0.9737739548462891, - "learning_rate": 4.796973373144276e-06, - "loss": 0.1849, - "step": 13450 - }, - { - "epoch": 0.68, - "grad_norm": 0.9713615342015935, - "learning_rate": 4.795566962268824e-06, - "loss": 0.1916, - "step": 13451 - }, - { - "epoch": 0.68, - "grad_norm": 1.033946714466998, - "learning_rate": 4.794160692563917e-06, - "loss": 0.2029, - "step": 13452 - }, - { - "epoch": 0.68, - "grad_norm": 1.2749100365224257, - "learning_rate": 4.792754564067691e-06, - "loss": 0.1633, - "step": 13453 - }, - { - "epoch": 0.68, - "grad_norm": 0.806561978231977, - "learning_rate": 4.791348576818296e-06, - "loss": 0.162, - "step": 13454 - }, - { - "epoch": 0.68, - "grad_norm": 1.6382894362878446, - "learning_rate": 4.78994273085386e-06, - "loss": 0.1802, - "step": 13455 - }, - { - "epoch": 0.68, - "grad_norm": 0.9880680606393522, - "learning_rate": 4.788537026212523e-06, - "loss": 0.1718, - "step": 13456 - }, - { - "epoch": 0.68, - "grad_norm": 1.3179714810536076, - "learning_rate": 4.7871314629324125e-06, - "loss": 0.1637, - "step": 13457 - }, - { - "epoch": 0.68, - "grad_norm": 0.8938350389586178, - "learning_rate": 4.78572604105166e-06, - "loss": 0.1625, - "step": 13458 - }, - { - "epoch": 0.68, - "grad_norm": 1.2134710981285515, - "learning_rate": 4.784320760608384e-06, - "loss": 0.1706, - "step": 13459 - }, - { - "epoch": 0.68, - "grad_norm": 1.4017759179084095, - "learning_rate": 4.782915621640697e-06, - "loss": 0.1752, - "step": 13460 - }, - { - "epoch": 0.68, - "grad_norm": 1.1175978308846175, - "learning_rate": 4.781510624186723e-06, - "loss": 0.1664, - "step": 13461 - }, - { - "epoch": 0.68, - "grad_norm": 1.0035509318285942, - "learning_rate": 4.780105768284563e-06, - "loss": 0.1715, - "step": 13462 - }, - { - "epoch": 0.68, - "grad_norm": 0.7894408453332361, - "learning_rate": 4.778701053972329e-06, - "loss": 0.1549, - "step": 13463 - }, - { - "epoch": 0.68, - "grad_norm": 6.307745294733831, - "learning_rate": 4.777296481288125e-06, - "loss": 0.1649, - "step": 13464 - }, - { - "epoch": 0.68, - "grad_norm": 1.050140125564269, - "learning_rate": 4.775892050270051e-06, - "loss": 0.1903, - "step": 13465 - }, - { - "epoch": 0.68, - "grad_norm": 0.8613949065020686, - "learning_rate": 4.774487760956198e-06, - "loss": 0.169, - "step": 13466 - }, - { - "epoch": 0.68, - "grad_norm": 0.7835237230319492, - "learning_rate": 4.773083613384663e-06, - "loss": 0.1403, - "step": 13467 - }, - { - "epoch": 0.68, - "grad_norm": 1.3627385906428595, - "learning_rate": 4.771679607593526e-06, - "loss": 0.1839, - "step": 13468 - }, - { - "epoch": 0.68, - "grad_norm": 1.0347857730772498, - "learning_rate": 4.770275743620879e-06, - "loss": 0.183, - "step": 13469 - }, - { - "epoch": 0.68, - "grad_norm": 0.9220173253856068, - "learning_rate": 4.768872021504795e-06, - "loss": 0.1678, - "step": 13470 - }, - { - "epoch": 0.69, - "grad_norm": 1.468227027681512, - "learning_rate": 4.767468441283355e-06, - "loss": 0.1754, - "step": 13471 - }, - { - "epoch": 0.69, - "grad_norm": 0.953328021389925, - "learning_rate": 4.766065002994626e-06, - "loss": 0.2112, - "step": 13472 - }, - { - "epoch": 0.69, - "grad_norm": 0.8236174244301655, - "learning_rate": 4.764661706676679e-06, - "loss": 0.1659, - "step": 13473 - }, - { - "epoch": 0.69, - "grad_norm": 1.07470258531206, - "learning_rate": 4.763258552367579e-06, - "loss": 0.159, - "step": 13474 - }, - { - "epoch": 0.69, - "grad_norm": 1.4178247476696861, - "learning_rate": 4.761855540105391e-06, - "loss": 0.1845, - "step": 13475 - }, - { - "epoch": 0.69, - "grad_norm": 1.0258835616788802, - "learning_rate": 4.760452669928167e-06, - "loss": 0.1675, - "step": 13476 - }, - { - "epoch": 0.69, - "grad_norm": 0.8569243751405146, - "learning_rate": 4.759049941873957e-06, - "loss": 0.1865, - "step": 13477 - }, - { - "epoch": 0.69, - "grad_norm": 0.9533173851533268, - "learning_rate": 4.757647355980816e-06, - "loss": 0.1824, - "step": 13478 - }, - { - "epoch": 0.69, - "grad_norm": 0.775607340301293, - "learning_rate": 4.756244912286782e-06, - "loss": 0.1543, - "step": 13479 - }, - { - "epoch": 0.69, - "grad_norm": 0.8975477515315292, - "learning_rate": 4.754842610829908e-06, - "loss": 0.1811, - "step": 13480 - }, - { - "epoch": 0.69, - "grad_norm": 0.8611261096224713, - "learning_rate": 4.753440451648218e-06, - "loss": 0.1522, - "step": 13481 - }, - { - "epoch": 0.69, - "grad_norm": 1.2630671689125583, - "learning_rate": 4.752038434779752e-06, - "loss": 0.1542, - "step": 13482 - }, - { - "epoch": 0.69, - "grad_norm": 1.3134315148743594, - "learning_rate": 4.750636560262542e-06, - "loss": 0.1737, - "step": 13483 - }, - { - "epoch": 0.69, - "grad_norm": 1.6569673148435342, - "learning_rate": 4.749234828134614e-06, - "loss": 0.1738, - "step": 13484 - }, - { - "epoch": 0.69, - "grad_norm": 0.843456888349568, - "learning_rate": 4.7478332384339834e-06, - "loss": 0.1788, - "step": 13485 - }, - { - "epoch": 0.69, - "grad_norm": 1.1360067662500801, - "learning_rate": 4.746431791198678e-06, - "loss": 0.1787, - "step": 13486 - }, - { - "epoch": 0.69, - "grad_norm": 0.9441431557652228, - "learning_rate": 4.745030486466702e-06, - "loss": 0.1614, - "step": 13487 - }, - { - "epoch": 0.69, - "grad_norm": 1.217989906596319, - "learning_rate": 4.743629324276076e-06, - "loss": 0.1607, - "step": 13488 - }, - { - "epoch": 0.69, - "grad_norm": 1.2598036556797265, - "learning_rate": 4.742228304664797e-06, - "loss": 0.1863, - "step": 13489 - }, - { - "epoch": 0.69, - "grad_norm": 0.9377746170865727, - "learning_rate": 4.740827427670871e-06, - "loss": 0.1719, - "step": 13490 - }, - { - "epoch": 0.69, - "grad_norm": 0.7620754681797829, - "learning_rate": 4.7394266933322995e-06, - "loss": 0.155, - "step": 13491 - }, - { - "epoch": 0.69, - "grad_norm": 1.446752341984604, - "learning_rate": 4.73802610168708e-06, - "loss": 0.1637, - "step": 13492 - }, - { - "epoch": 0.69, - "grad_norm": 1.3227834294404783, - "learning_rate": 4.736625652773195e-06, - "loss": 0.1758, - "step": 13493 - }, - { - "epoch": 0.69, - "grad_norm": 1.2196491773894043, - "learning_rate": 4.735225346628641e-06, - "loss": 0.1779, - "step": 13494 - }, - { - "epoch": 0.69, - "grad_norm": 0.972599455443251, - "learning_rate": 4.733825183291396e-06, - "loss": 0.1582, - "step": 13495 - }, - { - "epoch": 0.69, - "grad_norm": 2.1493289407215332, - "learning_rate": 4.7324251627994375e-06, - "loss": 0.1861, - "step": 13496 - }, - { - "epoch": 0.69, - "grad_norm": 1.213066587740853, - "learning_rate": 4.731025285190748e-06, - "loss": 0.1705, - "step": 13497 - }, - { - "epoch": 0.69, - "grad_norm": 0.9963341122388324, - "learning_rate": 4.729625550503291e-06, - "loss": 0.1576, - "step": 13498 - }, - { - "epoch": 0.69, - "grad_norm": 0.928854673356536, - "learning_rate": 4.728225958775038e-06, - "loss": 0.1631, - "step": 13499 - }, - { - "epoch": 0.69, - "grad_norm": 1.1516730033725135, - "learning_rate": 4.726826510043953e-06, - "loss": 0.1566, - "step": 13500 - }, - { - "epoch": 0.69, - "grad_norm": 1.0721723247090889, - "learning_rate": 4.725427204348002e-06, - "loss": 0.1706, - "step": 13501 - }, - { - "epoch": 0.69, - "grad_norm": 1.0402834885592727, - "learning_rate": 4.724028041725132e-06, - "loss": 0.1372, - "step": 13502 - }, - { - "epoch": 0.69, - "grad_norm": 1.2585969099620888, - "learning_rate": 4.722629022213303e-06, - "loss": 0.1784, - "step": 13503 - }, - { - "epoch": 0.69, - "grad_norm": 1.211687513437, - "learning_rate": 4.721230145850456e-06, - "loss": 0.1699, - "step": 13504 - }, - { - "epoch": 0.69, - "grad_norm": 0.8774157171124775, - "learning_rate": 4.7198314126745424e-06, - "loss": 0.1476, - "step": 13505 - }, - { - "epoch": 0.69, - "grad_norm": 1.1104455447501893, - "learning_rate": 4.718432822723498e-06, - "loss": 0.1737, - "step": 13506 - }, - { - "epoch": 0.69, - "grad_norm": 0.8279150288922937, - "learning_rate": 4.7170343760352595e-06, - "loss": 0.1626, - "step": 13507 - }, - { - "epoch": 0.69, - "grad_norm": 1.3927560254857045, - "learning_rate": 4.715636072647763e-06, - "loss": 0.1732, - "step": 13508 - }, - { - "epoch": 0.69, - "grad_norm": 0.9894871996707575, - "learning_rate": 4.714237912598941e-06, - "loss": 0.1672, - "step": 13509 - }, - { - "epoch": 0.69, - "grad_norm": 1.001968051076667, - "learning_rate": 4.7128398959267095e-06, - "loss": 0.1654, - "step": 13510 - }, - { - "epoch": 0.69, - "grad_norm": 1.2122031088329057, - "learning_rate": 4.711442022668998e-06, - "loss": 0.1674, - "step": 13511 - }, - { - "epoch": 0.69, - "grad_norm": 0.7976776552459861, - "learning_rate": 4.710044292863721e-06, - "loss": 0.1469, - "step": 13512 - }, - { - "epoch": 0.69, - "grad_norm": 0.9277944585899132, - "learning_rate": 4.7086467065487875e-06, - "loss": 0.1499, - "step": 13513 - }, - { - "epoch": 0.69, - "grad_norm": 1.2169821076074399, - "learning_rate": 4.707249263762115e-06, - "loss": 0.1595, - "step": 13514 - }, - { - "epoch": 0.69, - "grad_norm": 1.6496269445057359, - "learning_rate": 4.7058519645416004e-06, - "loss": 0.1722, - "step": 13515 - }, - { - "epoch": 0.69, - "grad_norm": 1.0799766772357269, - "learning_rate": 4.7044548089251505e-06, - "loss": 0.161, - "step": 13516 - }, - { - "epoch": 0.69, - "grad_norm": 1.7183745714562393, - "learning_rate": 4.703057796950663e-06, - "loss": 0.1632, - "step": 13517 - }, - { - "epoch": 0.69, - "grad_norm": 1.2890048452136667, - "learning_rate": 4.701660928656036e-06, - "loss": 0.1841, - "step": 13518 - }, - { - "epoch": 0.69, - "grad_norm": 1.299031005709721, - "learning_rate": 4.7002642040791526e-06, - "loss": 0.1627, - "step": 13519 - }, - { - "epoch": 0.69, - "grad_norm": 1.0199202670304646, - "learning_rate": 4.698867623257905e-06, - "loss": 0.1509, - "step": 13520 - }, - { - "epoch": 0.69, - "grad_norm": 1.0061548078404354, - "learning_rate": 4.697471186230168e-06, - "loss": 0.1723, - "step": 13521 - }, - { - "epoch": 0.69, - "grad_norm": 1.6402715115473485, - "learning_rate": 4.696074893033828e-06, - "loss": 0.1612, - "step": 13522 - }, - { - "epoch": 0.69, - "grad_norm": 0.9338752918297215, - "learning_rate": 4.694678743706754e-06, - "loss": 0.1874, - "step": 13523 - }, - { - "epoch": 0.69, - "grad_norm": 1.3107696335607468, - "learning_rate": 4.693282738286822e-06, - "loss": 0.1601, - "step": 13524 - }, - { - "epoch": 0.69, - "grad_norm": 1.0287906491985137, - "learning_rate": 4.6918868768118906e-06, - "loss": 0.1608, - "step": 13525 - }, - { - "epoch": 0.69, - "grad_norm": 1.3655937539532998, - "learning_rate": 4.690491159319829e-06, - "loss": 0.1741, - "step": 13526 - }, - { - "epoch": 0.69, - "grad_norm": 1.0816495827850183, - "learning_rate": 4.689095585848494e-06, - "loss": 0.1555, - "step": 13527 - }, - { - "epoch": 0.69, - "grad_norm": 1.2177066010221815, - "learning_rate": 4.687700156435745e-06, - "loss": 0.1835, - "step": 13528 - }, - { - "epoch": 0.69, - "grad_norm": 0.8894544378116847, - "learning_rate": 4.686304871119429e-06, - "loss": 0.1577, - "step": 13529 - }, - { - "epoch": 0.69, - "grad_norm": 1.4856797398142272, - "learning_rate": 4.68490972993739e-06, - "loss": 0.1922, - "step": 13530 - }, - { - "epoch": 0.69, - "grad_norm": 0.842186615856397, - "learning_rate": 4.683514732927479e-06, - "loss": 0.1726, - "step": 13531 - }, - { - "epoch": 0.69, - "grad_norm": 1.0661252290878187, - "learning_rate": 4.682119880127526e-06, - "loss": 0.181, - "step": 13532 - }, - { - "epoch": 0.69, - "grad_norm": 1.1548888139770868, - "learning_rate": 4.680725171575373e-06, - "loss": 0.1751, - "step": 13533 - }, - { - "epoch": 0.69, - "grad_norm": 0.9473059849013943, - "learning_rate": 4.679330607308849e-06, - "loss": 0.195, - "step": 13534 - }, - { - "epoch": 0.69, - "grad_norm": 1.4304758217482247, - "learning_rate": 4.677936187365787e-06, - "loss": 0.1815, - "step": 13535 - }, - { - "epoch": 0.69, - "grad_norm": 0.9854967453610628, - "learning_rate": 4.676541911784004e-06, - "loss": 0.1773, - "step": 13536 - }, - { - "epoch": 0.69, - "grad_norm": 1.2275356983406849, - "learning_rate": 4.675147780601324e-06, - "loss": 0.1621, - "step": 13537 - }, - { - "epoch": 0.69, - "grad_norm": 0.8949481564151959, - "learning_rate": 4.673753793855559e-06, - "loss": 0.173, - "step": 13538 - }, - { - "epoch": 0.69, - "grad_norm": 1.0624308576100148, - "learning_rate": 4.672359951584526e-06, - "loss": 0.1774, - "step": 13539 - }, - { - "epoch": 0.69, - "grad_norm": 0.9060232614534275, - "learning_rate": 4.670966253826027e-06, - "loss": 0.1626, - "step": 13540 - }, - { - "epoch": 0.69, - "grad_norm": 0.8764413985940274, - "learning_rate": 4.669572700617872e-06, - "loss": 0.1696, - "step": 13541 - }, - { - "epoch": 0.69, - "grad_norm": 1.1743336547666958, - "learning_rate": 4.6681792919978565e-06, - "loss": 0.1732, - "step": 13542 - }, - { - "epoch": 0.69, - "grad_norm": 0.9143877752093164, - "learning_rate": 4.666786028003778e-06, - "loss": 0.1569, - "step": 13543 - }, - { - "epoch": 0.69, - "grad_norm": 1.0888796487708368, - "learning_rate": 4.66539290867343e-06, - "loss": 0.157, - "step": 13544 - }, - { - "epoch": 0.69, - "grad_norm": 1.25089617021715, - "learning_rate": 4.6639999340446045e-06, - "loss": 0.1631, - "step": 13545 - }, - { - "epoch": 0.69, - "grad_norm": 1.4660317954025943, - "learning_rate": 4.662607104155081e-06, - "loss": 0.1864, - "step": 13546 - }, - { - "epoch": 0.69, - "grad_norm": 0.7761331672489337, - "learning_rate": 4.661214419042639e-06, - "loss": 0.1929, - "step": 13547 - }, - { - "epoch": 0.69, - "grad_norm": 1.0568586982203227, - "learning_rate": 4.65982187874506e-06, - "loss": 0.1751, - "step": 13548 - }, - { - "epoch": 0.69, - "grad_norm": 0.8537179892331328, - "learning_rate": 4.658429483300111e-06, - "loss": 0.1648, - "step": 13549 - }, - { - "epoch": 0.69, - "grad_norm": 1.3181985905550566, - "learning_rate": 4.6570372327455686e-06, - "loss": 0.1788, - "step": 13550 - }, - { - "epoch": 0.69, - "grad_norm": 1.8328073860100116, - "learning_rate": 4.6556451271191875e-06, - "loss": 0.1745, - "step": 13551 - }, - { - "epoch": 0.69, - "grad_norm": 1.2966089598592383, - "learning_rate": 4.6542531664587355e-06, - "loss": 0.1703, - "step": 13552 - }, - { - "epoch": 0.69, - "grad_norm": 1.1042830729817357, - "learning_rate": 4.652861350801967e-06, - "loss": 0.1756, - "step": 13553 - }, - { - "epoch": 0.69, - "grad_norm": 0.8945608077369172, - "learning_rate": 4.651469680186641e-06, - "loss": 0.1402, - "step": 13554 - }, - { - "epoch": 0.69, - "grad_norm": 1.672402165651304, - "learning_rate": 4.650078154650498e-06, - "loss": 0.1837, - "step": 13555 - }, - { - "epoch": 0.69, - "grad_norm": 1.071037699984741, - "learning_rate": 4.648686774231291e-06, - "loss": 0.1725, - "step": 13556 - }, - { - "epoch": 0.69, - "grad_norm": 0.8953953526421431, - "learning_rate": 4.647295538966754e-06, - "loss": 0.1629, - "step": 13557 - }, - { - "epoch": 0.69, - "grad_norm": 1.0473247996790758, - "learning_rate": 4.645904448894632e-06, - "loss": 0.1598, - "step": 13558 - }, - { - "epoch": 0.69, - "grad_norm": 0.9166293227738351, - "learning_rate": 4.644513504052649e-06, - "loss": 0.1726, - "step": 13559 - }, - { - "epoch": 0.69, - "grad_norm": 0.8631086924575336, - "learning_rate": 4.643122704478541e-06, - "loss": 0.1433, - "step": 13560 - }, - { - "epoch": 0.69, - "grad_norm": 1.6481431810406317, - "learning_rate": 4.641732050210032e-06, - "loss": 0.1535, - "step": 13561 - }, - { - "epoch": 0.69, - "grad_norm": 2.250059379509954, - "learning_rate": 4.640341541284847e-06, - "loss": 0.1824, - "step": 13562 - }, - { - "epoch": 0.69, - "grad_norm": 0.8370577808863652, - "learning_rate": 4.638951177740697e-06, - "loss": 0.1811, - "step": 13563 - }, - { - "epoch": 0.69, - "grad_norm": 1.0688233014826511, - "learning_rate": 4.637560959615302e-06, - "loss": 0.1542, - "step": 13564 - }, - { - "epoch": 0.69, - "grad_norm": 1.165288677744048, - "learning_rate": 4.636170886946371e-06, - "loss": 0.1886, - "step": 13565 - }, - { - "epoch": 0.69, - "grad_norm": 0.9040930500201682, - "learning_rate": 4.634780959771602e-06, - "loss": 0.1591, - "step": 13566 - }, - { - "epoch": 0.69, - "grad_norm": 1.0498273932508568, - "learning_rate": 4.633391178128707e-06, - "loss": 0.1724, - "step": 13567 - }, - { - "epoch": 0.69, - "grad_norm": 1.5616589553427944, - "learning_rate": 4.632001542055375e-06, - "loss": 0.1837, - "step": 13568 - }, - { - "epoch": 0.69, - "grad_norm": 1.1889390923160186, - "learning_rate": 4.630612051589305e-06, - "loss": 0.1698, - "step": 13569 - }, - { - "epoch": 0.69, - "grad_norm": 1.1318211001505287, - "learning_rate": 4.629222706768186e-06, - "loss": 0.1799, - "step": 13570 - }, - { - "epoch": 0.69, - "grad_norm": 1.3790972285522614, - "learning_rate": 4.6278335076297085e-06, - "loss": 0.1619, - "step": 13571 - }, - { - "epoch": 0.69, - "grad_norm": 1.7173679176378776, - "learning_rate": 4.626444454211547e-06, - "loss": 0.1688, - "step": 13572 - }, - { - "epoch": 0.69, - "grad_norm": 1.1035216925111442, - "learning_rate": 4.6250555465513866e-06, - "loss": 0.1538, - "step": 13573 - }, - { - "epoch": 0.69, - "grad_norm": 0.9786599408298793, - "learning_rate": 4.623666784686895e-06, - "loss": 0.1866, - "step": 13574 - }, - { - "epoch": 0.69, - "grad_norm": 1.421386305458509, - "learning_rate": 4.6222781686557485e-06, - "loss": 0.1817, - "step": 13575 - }, - { - "epoch": 0.69, - "grad_norm": 1.4270252922303626, - "learning_rate": 4.620889698495606e-06, - "loss": 0.1885, - "step": 13576 - }, - { - "epoch": 0.69, - "grad_norm": 1.2922180581849296, - "learning_rate": 4.619501374244138e-06, - "loss": 0.1724, - "step": 13577 - }, - { - "epoch": 0.69, - "grad_norm": 0.8167681017098267, - "learning_rate": 4.618113195938997e-06, - "loss": 0.1858, - "step": 13578 - }, - { - "epoch": 0.69, - "grad_norm": 1.004602648431441, - "learning_rate": 4.616725163617838e-06, - "loss": 0.1783, - "step": 13579 - }, - { - "epoch": 0.69, - "grad_norm": 0.984136134542311, - "learning_rate": 4.615337277318313e-06, - "loss": 0.1699, - "step": 13580 - }, - { - "epoch": 0.69, - "grad_norm": 0.8063277723505651, - "learning_rate": 4.613949537078074e-06, - "loss": 0.1626, - "step": 13581 - }, - { - "epoch": 0.69, - "grad_norm": 1.6604415687659848, - "learning_rate": 4.612561942934757e-06, - "loss": 0.1848, - "step": 13582 - }, - { - "epoch": 0.69, - "grad_norm": 1.60353535338271, - "learning_rate": 4.611174494925998e-06, - "loss": 0.1684, - "step": 13583 - }, - { - "epoch": 0.69, - "grad_norm": 1.5032736790328078, - "learning_rate": 4.609787193089438e-06, - "loss": 0.1871, - "step": 13584 - }, - { - "epoch": 0.69, - "grad_norm": 2.9766809056291623, - "learning_rate": 4.608400037462702e-06, - "loss": 0.1659, - "step": 13585 - }, - { - "epoch": 0.69, - "grad_norm": 0.8504763258567766, - "learning_rate": 4.607013028083419e-06, - "loss": 0.1633, - "step": 13586 - }, - { - "epoch": 0.69, - "grad_norm": 0.9512887618479131, - "learning_rate": 4.605626164989212e-06, - "loss": 0.1586, - "step": 13587 - }, - { - "epoch": 0.69, - "grad_norm": 0.9440239043274677, - "learning_rate": 4.604239448217704e-06, - "loss": 0.1931, - "step": 13588 - }, - { - "epoch": 0.69, - "grad_norm": 0.94407399820329, - "learning_rate": 4.602852877806502e-06, - "loss": 0.1728, - "step": 13589 - }, - { - "epoch": 0.69, - "grad_norm": 0.8781022672939047, - "learning_rate": 4.601466453793224e-06, - "loss": 0.16, - "step": 13590 - }, - { - "epoch": 0.69, - "grad_norm": 1.02587555658374, - "learning_rate": 4.60008017621547e-06, - "loss": 0.1712, - "step": 13591 - }, - { - "epoch": 0.69, - "grad_norm": 0.8299062732482788, - "learning_rate": 4.598694045110851e-06, - "loss": 0.1637, - "step": 13592 - }, - { - "epoch": 0.69, - "grad_norm": 1.6775253264414962, - "learning_rate": 4.597308060516956e-06, - "loss": 0.178, - "step": 13593 - }, - { - "epoch": 0.69, - "grad_norm": 1.0173767614248344, - "learning_rate": 4.595922222471388e-06, - "loss": 0.1702, - "step": 13594 - }, - { - "epoch": 0.69, - "grad_norm": 1.15134970328035, - "learning_rate": 4.5945365310117325e-06, - "loss": 0.2012, - "step": 13595 - }, - { - "epoch": 0.69, - "grad_norm": 1.2736440643117173, - "learning_rate": 4.593150986175578e-06, - "loss": 0.1611, - "step": 13596 - }, - { - "epoch": 0.69, - "grad_norm": 0.9160802750515318, - "learning_rate": 4.59176558800051e-06, - "loss": 0.1854, - "step": 13597 - }, - { - "epoch": 0.69, - "grad_norm": 1.0618565771701247, - "learning_rate": 4.590380336524108e-06, - "loss": 0.1863, - "step": 13598 - }, - { - "epoch": 0.69, - "grad_norm": 1.4095872357508041, - "learning_rate": 4.588995231783946e-06, - "loss": 0.1574, - "step": 13599 - }, - { - "epoch": 0.69, - "grad_norm": 1.4633830074142442, - "learning_rate": 4.5876102738175895e-06, - "loss": 0.1711, - "step": 13600 - }, - { - "epoch": 0.69, - "grad_norm": 1.1496702197396802, - "learning_rate": 4.586225462662615e-06, - "loss": 0.1694, - "step": 13601 - }, - { - "epoch": 0.69, - "grad_norm": 1.2995727514266986, - "learning_rate": 4.584840798356574e-06, - "loss": 0.1774, - "step": 13602 - }, - { - "epoch": 0.69, - "grad_norm": 1.2066523787933985, - "learning_rate": 4.583456280937035e-06, - "loss": 0.178, - "step": 13603 - }, - { - "epoch": 0.69, - "grad_norm": 1.0492721475579148, - "learning_rate": 4.582071910441549e-06, - "loss": 0.159, - "step": 13604 - }, - { - "epoch": 0.69, - "grad_norm": 1.9537120737237095, - "learning_rate": 4.580687686907673e-06, - "loss": 0.1438, - "step": 13605 - }, - { - "epoch": 0.69, - "grad_norm": 2.1519038592765245, - "learning_rate": 4.579303610372945e-06, - "loss": 0.165, - "step": 13606 - }, - { - "epoch": 0.69, - "grad_norm": 0.9498047286022816, - "learning_rate": 4.577919680874917e-06, - "loss": 0.1709, - "step": 13607 - }, - { - "epoch": 0.69, - "grad_norm": 1.250374359466655, - "learning_rate": 4.57653589845112e-06, - "loss": 0.1443, - "step": 13608 - }, - { - "epoch": 0.69, - "grad_norm": 0.918021914540636, - "learning_rate": 4.575152263139096e-06, - "loss": 0.1468, - "step": 13609 - }, - { - "epoch": 0.69, - "grad_norm": 1.416579009119526, - "learning_rate": 4.573768774976371e-06, - "loss": 0.1792, - "step": 13610 - }, - { - "epoch": 0.69, - "grad_norm": 1.2171797058734255, - "learning_rate": 4.572385434000477e-06, - "loss": 0.1675, - "step": 13611 - }, - { - "epoch": 0.69, - "grad_norm": 1.3487422467380303, - "learning_rate": 4.5710022402489316e-06, - "loss": 0.1774, - "step": 13612 - }, - { - "epoch": 0.69, - "grad_norm": 0.8390122329378098, - "learning_rate": 4.569619193759257e-06, - "loss": 0.1526, - "step": 13613 - }, - { - "epoch": 0.69, - "grad_norm": 1.0920505118260386, - "learning_rate": 4.5682362945689684e-06, - "loss": 0.1821, - "step": 13614 - }, - { - "epoch": 0.69, - "grad_norm": 1.2520630500194392, - "learning_rate": 4.5668535427155816e-06, - "loss": 0.1818, - "step": 13615 - }, - { - "epoch": 0.69, - "grad_norm": 0.9270273436084522, - "learning_rate": 4.565470938236598e-06, - "loss": 0.1543, - "step": 13616 - }, - { - "epoch": 0.69, - "grad_norm": 1.0954459642480405, - "learning_rate": 4.5640884811695185e-06, - "loss": 0.1644, - "step": 13617 - }, - { - "epoch": 0.69, - "grad_norm": 0.939343554571579, - "learning_rate": 4.5627061715518504e-06, - "loss": 0.1832, - "step": 13618 - }, - { - "epoch": 0.69, - "grad_norm": 0.9068057167860933, - "learning_rate": 4.561324009421081e-06, - "loss": 0.1756, - "step": 13619 - }, - { - "epoch": 0.69, - "grad_norm": 1.21653027952336, - "learning_rate": 4.5599419948147075e-06, - "loss": 0.1592, - "step": 13620 - }, - { - "epoch": 0.69, - "grad_norm": 0.9564302335647423, - "learning_rate": 4.558560127770212e-06, - "loss": 0.1713, - "step": 13621 - }, - { - "epoch": 0.69, - "grad_norm": 0.8784787604845598, - "learning_rate": 4.55717840832508e-06, - "loss": 0.155, - "step": 13622 - }, - { - "epoch": 0.69, - "grad_norm": 0.8785266321659584, - "learning_rate": 4.5557968365167905e-06, - "loss": 0.1812, - "step": 13623 - }, - { - "epoch": 0.69, - "grad_norm": 1.9026291345633957, - "learning_rate": 4.5544154123828246e-06, - "loss": 0.1774, - "step": 13624 - }, - { - "epoch": 0.69, - "grad_norm": 1.1734358448640543, - "learning_rate": 4.553034135960643e-06, - "loss": 0.1724, - "step": 13625 - }, - { - "epoch": 0.69, - "grad_norm": 1.3709598780484173, - "learning_rate": 4.551653007287722e-06, - "loss": 0.155, - "step": 13626 - }, - { - "epoch": 0.69, - "grad_norm": 1.0501572052064885, - "learning_rate": 4.550272026401518e-06, - "loss": 0.1617, - "step": 13627 - }, - { - "epoch": 0.69, - "grad_norm": 1.106664806576892, - "learning_rate": 4.548891193339496e-06, - "loss": 0.1843, - "step": 13628 - }, - { - "epoch": 0.69, - "grad_norm": 1.4397585434339168, - "learning_rate": 4.5475105081391045e-06, - "loss": 0.1819, - "step": 13629 - }, - { - "epoch": 0.69, - "grad_norm": 6.77510083667638, - "learning_rate": 4.546129970837799e-06, - "loss": 0.1791, - "step": 13630 - }, - { - "epoch": 0.69, - "grad_norm": 0.8667437504260953, - "learning_rate": 4.544749581473026e-06, - "loss": 0.1788, - "step": 13631 - }, - { - "epoch": 0.69, - "grad_norm": 1.0402234284927807, - "learning_rate": 4.543369340082232e-06, - "loss": 0.1657, - "step": 13632 - }, - { - "epoch": 0.69, - "grad_norm": 1.2141343623878411, - "learning_rate": 4.54198924670285e-06, - "loss": 0.1783, - "step": 13633 - }, - { - "epoch": 0.69, - "grad_norm": 0.9137213467530735, - "learning_rate": 4.540609301372321e-06, - "loss": 0.1905, - "step": 13634 - }, - { - "epoch": 0.69, - "grad_norm": 1.0482645074998238, - "learning_rate": 4.539229504128073e-06, - "loss": 0.1715, - "step": 13635 - }, - { - "epoch": 0.69, - "grad_norm": 1.3001918628637983, - "learning_rate": 4.53784985500753e-06, - "loss": 0.1828, - "step": 13636 - }, - { - "epoch": 0.69, - "grad_norm": 1.4116670746126074, - "learning_rate": 4.536470354048121e-06, - "loss": 0.1869, - "step": 13637 - }, - { - "epoch": 0.69, - "grad_norm": 0.8045872138382979, - "learning_rate": 4.535091001287259e-06, - "loss": 0.1484, - "step": 13638 - }, - { - "epoch": 0.69, - "grad_norm": 1.0206620077990305, - "learning_rate": 4.533711796762362e-06, - "loss": 0.1993, - "step": 13639 - }, - { - "epoch": 0.69, - "grad_norm": 1.4379884999398949, - "learning_rate": 4.532332740510842e-06, - "loss": 0.1645, - "step": 13640 - }, - { - "epoch": 0.69, - "grad_norm": 1.6563946729632992, - "learning_rate": 4.530953832570109e-06, - "loss": 0.1724, - "step": 13641 - }, - { - "epoch": 0.69, - "grad_norm": 1.1491454430323254, - "learning_rate": 4.5295750729775565e-06, - "loss": 0.1714, - "step": 13642 - }, - { - "epoch": 0.69, - "grad_norm": 0.805474854958942, - "learning_rate": 4.528196461770596e-06, - "loss": 0.1534, - "step": 13643 - }, - { - "epoch": 0.69, - "grad_norm": 0.9306763153096663, - "learning_rate": 4.526817998986609e-06, - "loss": 0.1653, - "step": 13644 - }, - { - "epoch": 0.69, - "grad_norm": 1.0826607768088683, - "learning_rate": 4.5254396846629975e-06, - "loss": 0.1562, - "step": 13645 - }, - { - "epoch": 0.69, - "grad_norm": 1.0587769923842847, - "learning_rate": 4.5240615188371404e-06, - "loss": 0.1681, - "step": 13646 - }, - { - "epoch": 0.69, - "grad_norm": 1.0287482429144468, - "learning_rate": 4.522683501546428e-06, - "loss": 0.1685, - "step": 13647 - }, - { - "epoch": 0.69, - "grad_norm": 0.9475189022165657, - "learning_rate": 4.52130563282823e-06, - "loss": 0.1533, - "step": 13648 - }, - { - "epoch": 0.69, - "grad_norm": 0.9559323826822024, - "learning_rate": 4.519927912719927e-06, - "loss": 0.2016, - "step": 13649 - }, - { - "epoch": 0.69, - "grad_norm": 0.9539906601319694, - "learning_rate": 4.518550341258888e-06, - "loss": 0.2025, - "step": 13650 - }, - { - "epoch": 0.69, - "grad_norm": 0.7717629165163703, - "learning_rate": 4.517172918482485e-06, - "loss": 0.1606, - "step": 13651 - }, - { - "epoch": 0.69, - "grad_norm": 1.5207736020799518, - "learning_rate": 4.515795644428076e-06, - "loss": 0.191, - "step": 13652 - }, - { - "epoch": 0.69, - "grad_norm": 2.051008527005148, - "learning_rate": 4.514418519133017e-06, - "loss": 0.1737, - "step": 13653 - }, - { - "epoch": 0.69, - "grad_norm": 1.3983662117819151, - "learning_rate": 4.513041542634668e-06, - "loss": 0.1852, - "step": 13654 - }, - { - "epoch": 0.69, - "grad_norm": 1.1120072938260044, - "learning_rate": 4.511664714970374e-06, - "loss": 0.2028, - "step": 13655 - }, - { - "epoch": 0.69, - "grad_norm": 1.0610598459717966, - "learning_rate": 4.510288036177485e-06, - "loss": 0.1831, - "step": 13656 - }, - { - "epoch": 0.69, - "grad_norm": 1.2201168795069803, - "learning_rate": 4.508911506293343e-06, - "loss": 0.1497, - "step": 13657 - }, - { - "epoch": 0.69, - "grad_norm": 1.019804582026071, - "learning_rate": 4.50753512535529e-06, - "loss": 0.1689, - "step": 13658 - }, - { - "epoch": 0.69, - "grad_norm": 1.5116796780430295, - "learning_rate": 4.5061588934006525e-06, - "loss": 0.169, - "step": 13659 - }, - { - "epoch": 0.69, - "grad_norm": 0.9856392022738341, - "learning_rate": 4.50478281046677e-06, - "loss": 0.152, - "step": 13660 - }, - { - "epoch": 0.69, - "grad_norm": 1.6570900891806821, - "learning_rate": 4.50340687659096e-06, - "loss": 0.1617, - "step": 13661 - }, - { - "epoch": 0.69, - "grad_norm": 1.283983702559894, - "learning_rate": 4.502031091810553e-06, - "loss": 0.1772, - "step": 13662 - }, - { - "epoch": 0.69, - "grad_norm": 0.8647829813983922, - "learning_rate": 4.500655456162859e-06, - "loss": 0.1605, - "step": 13663 - }, - { - "epoch": 0.69, - "grad_norm": 0.8856314954674944, - "learning_rate": 4.4992799696852e-06, - "loss": 0.1602, - "step": 13664 - }, - { - "epoch": 0.69, - "grad_norm": 0.8324634660473998, - "learning_rate": 4.497904632414879e-06, - "loss": 0.1672, - "step": 13665 - }, - { - "epoch": 0.69, - "grad_norm": 1.2142499210913824, - "learning_rate": 4.496529444389206e-06, - "loss": 0.1883, - "step": 13666 - }, - { - "epoch": 0.69, - "grad_norm": 1.0431450995646872, - "learning_rate": 4.495154405645482e-06, - "loss": 0.1901, - "step": 13667 - }, - { - "epoch": 0.7, - "grad_norm": 1.0691190749628912, - "learning_rate": 4.493779516221009e-06, - "loss": 0.1844, - "step": 13668 - }, - { - "epoch": 0.7, - "grad_norm": 1.7628766475524442, - "learning_rate": 4.492404776153078e-06, - "loss": 0.1729, - "step": 13669 - }, - { - "epoch": 0.7, - "grad_norm": 0.9033434819234581, - "learning_rate": 4.491030185478976e-06, - "loss": 0.1811, - "step": 13670 - }, - { - "epoch": 0.7, - "grad_norm": 1.296385993849624, - "learning_rate": 4.489655744235994e-06, - "loss": 0.1705, - "step": 13671 - }, - { - "epoch": 0.7, - "grad_norm": 1.2469644716777546, - "learning_rate": 4.488281452461407e-06, - "loss": 0.1694, - "step": 13672 - }, - { - "epoch": 0.7, - "grad_norm": 0.8311193064293749, - "learning_rate": 4.4869073101925024e-06, - "loss": 0.1622, - "step": 13673 - }, - { - "epoch": 0.7, - "grad_norm": 1.8013546615744547, - "learning_rate": 4.4855333174665425e-06, - "loss": 0.1701, - "step": 13674 - }, - { - "epoch": 0.7, - "grad_norm": 3.628093865865405, - "learning_rate": 4.484159474320804e-06, - "loss": 0.1751, - "step": 13675 - }, - { - "epoch": 0.7, - "grad_norm": 1.3232699202577978, - "learning_rate": 4.482785780792551e-06, - "loss": 0.169, - "step": 13676 - }, - { - "epoch": 0.7, - "grad_norm": 1.5385823443408615, - "learning_rate": 4.481412236919049e-06, - "loss": 0.1629, - "step": 13677 - }, - { - "epoch": 0.7, - "grad_norm": 0.9031756800976332, - "learning_rate": 4.480038842737548e-06, - "loss": 0.1479, - "step": 13678 - }, - { - "epoch": 0.7, - "grad_norm": 1.257354476225968, - "learning_rate": 4.47866559828531e-06, - "loss": 0.1869, - "step": 13679 - }, - { - "epoch": 0.7, - "grad_norm": 0.9714070942255482, - "learning_rate": 4.477292503599574e-06, - "loss": 0.1822, - "step": 13680 - }, - { - "epoch": 0.7, - "grad_norm": 1.1593533989119276, - "learning_rate": 4.475919558717596e-06, - "loss": 0.1904, - "step": 13681 - }, - { - "epoch": 0.7, - "grad_norm": 1.5104136020876684, - "learning_rate": 4.474546763676607e-06, - "loss": 0.1784, - "step": 13682 - }, - { - "epoch": 0.7, - "grad_norm": 1.2655799392585738, - "learning_rate": 4.47317411851385e-06, - "loss": 0.1744, - "step": 13683 - }, - { - "epoch": 0.7, - "grad_norm": 1.2385869323659042, - "learning_rate": 4.471801623266558e-06, - "loss": 0.1445, - "step": 13684 - }, - { - "epoch": 0.7, - "grad_norm": 0.7638791201707295, - "learning_rate": 4.470429277971961e-06, - "loss": 0.1627, - "step": 13685 - }, - { - "epoch": 0.7, - "grad_norm": 1.0250060836714399, - "learning_rate": 4.469057082667283e-06, - "loss": 0.196, - "step": 13686 - }, - { - "epoch": 0.7, - "grad_norm": 2.077268257744968, - "learning_rate": 4.46768503738974e-06, - "loss": 0.158, - "step": 13687 - }, - { - "epoch": 0.7, - "grad_norm": 0.9390479740377202, - "learning_rate": 4.466313142176557e-06, - "loss": 0.17, - "step": 13688 - }, - { - "epoch": 0.7, - "grad_norm": 0.799024685884593, - "learning_rate": 4.464941397064938e-06, - "loss": 0.1583, - "step": 13689 - }, - { - "epoch": 0.7, - "grad_norm": 1.49710107221133, - "learning_rate": 4.4635698020921016e-06, - "loss": 0.183, - "step": 13690 - }, - { - "epoch": 0.7, - "grad_norm": 2.5396832424734, - "learning_rate": 4.462198357295242e-06, - "loss": 0.1738, - "step": 13691 - }, - { - "epoch": 0.7, - "grad_norm": 0.8603738965306661, - "learning_rate": 4.460827062711564e-06, - "loss": 0.1618, - "step": 13692 - }, - { - "epoch": 0.7, - "grad_norm": 0.8633705184933675, - "learning_rate": 4.459455918378266e-06, - "loss": 0.1624, - "step": 13693 - }, - { - "epoch": 0.7, - "grad_norm": 1.315956016828563, - "learning_rate": 4.458084924332543e-06, - "loss": 0.1703, - "step": 13694 - }, - { - "epoch": 0.7, - "grad_norm": 0.860348062193069, - "learning_rate": 4.456714080611575e-06, - "loss": 0.1672, - "step": 13695 - }, - { - "epoch": 0.7, - "grad_norm": 0.9190060904210984, - "learning_rate": 4.455343387252555e-06, - "loss": 0.1822, - "step": 13696 - }, - { - "epoch": 0.7, - "grad_norm": 0.7940459239761528, - "learning_rate": 4.453972844292654e-06, - "loss": 0.1737, - "step": 13697 - }, - { - "epoch": 0.7, - "grad_norm": 1.097721016626529, - "learning_rate": 4.452602451769058e-06, - "loss": 0.1628, - "step": 13698 - }, - { - "epoch": 0.7, - "grad_norm": 1.0759386647310396, - "learning_rate": 4.4512322097189295e-06, - "loss": 0.167, - "step": 13699 - }, - { - "epoch": 0.7, - "grad_norm": 1.3701935230414497, - "learning_rate": 4.449862118179444e-06, - "loss": 0.1584, - "step": 13700 - }, - { - "epoch": 0.7, - "grad_norm": 0.9541634410686334, - "learning_rate": 4.44849217718776e-06, - "loss": 0.1795, - "step": 13701 - }, - { - "epoch": 0.7, - "grad_norm": 0.9153389222149987, - "learning_rate": 4.447122386781038e-06, - "loss": 0.1872, - "step": 13702 - }, - { - "epoch": 0.7, - "grad_norm": 0.9417906727918101, - "learning_rate": 4.445752746996438e-06, - "loss": 0.1725, - "step": 13703 - }, - { - "epoch": 0.7, - "grad_norm": 2.1049319774043718, - "learning_rate": 4.4443832578711055e-06, - "loss": 0.1989, - "step": 13704 - }, - { - "epoch": 0.7, - "grad_norm": 0.9056218269926907, - "learning_rate": 4.443013919442194e-06, - "loss": 0.1826, - "step": 13705 - }, - { - "epoch": 0.7, - "grad_norm": 0.984890726452985, - "learning_rate": 4.4416447317468405e-06, - "loss": 0.1716, - "step": 13706 - }, - { - "epoch": 0.7, - "grad_norm": 1.0207518181035065, - "learning_rate": 4.440275694822192e-06, - "loss": 0.166, - "step": 13707 - }, - { - "epoch": 0.7, - "grad_norm": 1.0303227419266743, - "learning_rate": 4.438906808705374e-06, - "loss": 0.1637, - "step": 13708 - }, - { - "epoch": 0.7, - "grad_norm": 1.0069749895417077, - "learning_rate": 4.437538073433524e-06, - "loss": 0.1834, - "step": 13709 - }, - { - "epoch": 0.7, - "grad_norm": 1.11647684150671, - "learning_rate": 4.436169489043768e-06, - "loss": 0.2011, - "step": 13710 - }, - { - "epoch": 0.7, - "grad_norm": 1.034624038537078, - "learning_rate": 4.434801055573232e-06, - "loss": 0.1871, - "step": 13711 - }, - { - "epoch": 0.7, - "grad_norm": 1.5391380793012959, - "learning_rate": 4.433432773059028e-06, - "loss": 0.155, - "step": 13712 - }, - { - "epoch": 0.7, - "grad_norm": 1.0803455955807053, - "learning_rate": 4.432064641538279e-06, - "loss": 0.1773, - "step": 13713 - }, - { - "epoch": 0.7, - "grad_norm": 0.9100790739475726, - "learning_rate": 4.430696661048086e-06, - "loss": 0.1683, - "step": 13714 - }, - { - "epoch": 0.7, - "grad_norm": 1.0718263998332211, - "learning_rate": 4.429328831625565e-06, - "loss": 0.1694, - "step": 13715 - }, - { - "epoch": 0.7, - "grad_norm": 1.1892483023347649, - "learning_rate": 4.427961153307811e-06, - "loss": 0.1673, - "step": 13716 - }, - { - "epoch": 0.7, - "grad_norm": 1.207942887979543, - "learning_rate": 4.426593626131928e-06, - "loss": 0.1533, - "step": 13717 - }, - { - "epoch": 0.7, - "grad_norm": 0.888967603029378, - "learning_rate": 4.425226250135005e-06, - "loss": 0.1736, - "step": 13718 - }, - { - "epoch": 0.7, - "grad_norm": 1.6935255519516728, - "learning_rate": 4.4238590253541335e-06, - "loss": 0.183, - "step": 13719 - }, - { - "epoch": 0.7, - "grad_norm": 3.4630156900562032, - "learning_rate": 4.422491951826402e-06, - "loss": 0.1638, - "step": 13720 - }, - { - "epoch": 0.7, - "grad_norm": 1.2770553329985392, - "learning_rate": 4.421125029588895e-06, - "loss": 0.166, - "step": 13721 - }, - { - "epoch": 0.7, - "grad_norm": 1.5433447524383987, - "learning_rate": 4.419758258678687e-06, - "loss": 0.173, - "step": 13722 - }, - { - "epoch": 0.7, - "grad_norm": 1.0737467851992872, - "learning_rate": 4.418391639132847e-06, - "loss": 0.1707, - "step": 13723 - }, - { - "epoch": 0.7, - "grad_norm": 0.9946446709389992, - "learning_rate": 4.4170251709884526e-06, - "loss": 0.1785, - "step": 13724 - }, - { - "epoch": 0.7, - "grad_norm": 1.4318930340410951, - "learning_rate": 4.4156588542825625e-06, - "loss": 0.1586, - "step": 13725 - }, - { - "epoch": 0.7, - "grad_norm": 3.510405383262583, - "learning_rate": 4.414292689052241e-06, - "loss": 0.1779, - "step": 13726 - }, - { - "epoch": 0.7, - "grad_norm": 0.9444011738801847, - "learning_rate": 4.412926675334546e-06, - "loss": 0.1636, - "step": 13727 - }, - { - "epoch": 0.7, - "grad_norm": 1.2076138811368429, - "learning_rate": 4.411560813166535e-06, - "loss": 0.1708, - "step": 13728 - }, - { - "epoch": 0.7, - "grad_norm": 2.370268298456576, - "learning_rate": 4.410195102585247e-06, - "loss": 0.1799, - "step": 13729 - }, - { - "epoch": 0.7, - "grad_norm": 0.9673169171945579, - "learning_rate": 4.408829543627737e-06, - "loss": 0.1746, - "step": 13730 - }, - { - "epoch": 0.7, - "grad_norm": 0.8898649802675859, - "learning_rate": 4.407464136331039e-06, - "loss": 0.169, - "step": 13731 - }, - { - "epoch": 0.7, - "grad_norm": 1.8779739505312871, - "learning_rate": 4.406098880732195e-06, - "loss": 0.1844, - "step": 13732 - }, - { - "epoch": 0.7, - "grad_norm": 1.0552422789470997, - "learning_rate": 4.404733776868231e-06, - "loss": 0.1786, - "step": 13733 - }, - { - "epoch": 0.7, - "grad_norm": 1.3731759447314973, - "learning_rate": 4.403368824776183e-06, - "loss": 0.1718, - "step": 13734 - }, - { - "epoch": 0.7, - "grad_norm": 2.8969742110147156, - "learning_rate": 4.402004024493069e-06, - "loss": 0.1735, - "step": 13735 - }, - { - "epoch": 0.7, - "grad_norm": 1.8454532379142259, - "learning_rate": 4.4006393760559105e-06, - "loss": 0.1647, - "step": 13736 - }, - { - "epoch": 0.7, - "grad_norm": 1.719264383388367, - "learning_rate": 4.399274879501726e-06, - "loss": 0.1765, - "step": 13737 - }, - { - "epoch": 0.7, - "grad_norm": 1.1110548054408023, - "learning_rate": 4.39791053486753e-06, - "loss": 0.1862, - "step": 13738 - }, - { - "epoch": 0.7, - "grad_norm": 2.8295730102929992, - "learning_rate": 4.396546342190327e-06, - "loss": 0.1518, - "step": 13739 - }, - { - "epoch": 0.7, - "grad_norm": 0.904612012615748, - "learning_rate": 4.3951823015071186e-06, - "loss": 0.1683, - "step": 13740 - }, - { - "epoch": 0.7, - "grad_norm": 1.0040002316631793, - "learning_rate": 4.393818412854909e-06, - "loss": 0.1811, - "step": 13741 - }, - { - "epoch": 0.7, - "grad_norm": 1.2752164591376416, - "learning_rate": 4.392454676270687e-06, - "loss": 0.1604, - "step": 13742 - }, - { - "epoch": 0.7, - "grad_norm": 1.0096690577838259, - "learning_rate": 4.391091091791455e-06, - "loss": 0.1636, - "step": 13743 - }, - { - "epoch": 0.7, - "grad_norm": 1.3945369984716742, - "learning_rate": 4.389727659454189e-06, - "loss": 0.1741, - "step": 13744 - }, - { - "epoch": 0.7, - "grad_norm": 0.8191888837385805, - "learning_rate": 4.388364379295878e-06, - "loss": 0.1619, - "step": 13745 - }, - { - "epoch": 0.7, - "grad_norm": 1.4411158575624718, - "learning_rate": 4.387001251353499e-06, - "loss": 0.1612, - "step": 13746 - }, - { - "epoch": 0.7, - "grad_norm": 1.6639282345204542, - "learning_rate": 4.3856382756640315e-06, - "loss": 0.1821, - "step": 13747 - }, - { - "epoch": 0.7, - "grad_norm": 0.9922552014129387, - "learning_rate": 4.3842754522644394e-06, - "loss": 0.1863, - "step": 13748 - }, - { - "epoch": 0.7, - "grad_norm": 1.1021385598823878, - "learning_rate": 4.382912781191697e-06, - "loss": 0.1895, - "step": 13749 - }, - { - "epoch": 0.7, - "grad_norm": 1.0179098567527967, - "learning_rate": 4.381550262482759e-06, - "loss": 0.1586, - "step": 13750 - }, - { - "epoch": 0.7, - "grad_norm": 0.999589172516189, - "learning_rate": 4.380187896174591e-06, - "loss": 0.191, - "step": 13751 - }, - { - "epoch": 0.7, - "grad_norm": 1.0142183508137548, - "learning_rate": 4.37882568230414e-06, - "loss": 0.1714, - "step": 13752 - }, - { - "epoch": 0.7, - "grad_norm": 1.7396820691494044, - "learning_rate": 4.37746362090836e-06, - "loss": 0.1879, - "step": 13753 - }, - { - "epoch": 0.7, - "grad_norm": 1.7150075562181797, - "learning_rate": 4.376101712024197e-06, - "loss": 0.1967, - "step": 13754 - }, - { - "epoch": 0.7, - "grad_norm": 0.9169604172984819, - "learning_rate": 4.374739955688595e-06, - "loss": 0.1517, - "step": 13755 - }, - { - "epoch": 0.7, - "grad_norm": 1.5748245219247363, - "learning_rate": 4.373378351938491e-06, - "loss": 0.1449, - "step": 13756 - }, - { - "epoch": 0.7, - "grad_norm": 0.9426798859246521, - "learning_rate": 4.372016900810813e-06, - "loss": 0.1728, - "step": 13757 - }, - { - "epoch": 0.7, - "grad_norm": 0.9292767642651203, - "learning_rate": 4.370655602342497e-06, - "loss": 0.1665, - "step": 13758 - }, - { - "epoch": 0.7, - "grad_norm": 0.9681826868018988, - "learning_rate": 4.369294456570463e-06, - "loss": 0.1758, - "step": 13759 - }, - { - "epoch": 0.7, - "grad_norm": 1.1272505686047567, - "learning_rate": 4.3679334635316395e-06, - "loss": 0.183, - "step": 13760 - }, - { - "epoch": 0.7, - "grad_norm": 1.3020722492673074, - "learning_rate": 4.366572623262934e-06, - "loss": 0.1595, - "step": 13761 - }, - { - "epoch": 0.7, - "grad_norm": 2.7390029556224693, - "learning_rate": 4.365211935801264e-06, - "loss": 0.1916, - "step": 13762 - }, - { - "epoch": 0.7, - "grad_norm": 1.075142392697466, - "learning_rate": 4.363851401183539e-06, - "loss": 0.1826, - "step": 13763 - }, - { - "epoch": 0.7, - "grad_norm": 1.937147420005713, - "learning_rate": 4.3624910194466675e-06, - "loss": 0.1843, - "step": 13764 - }, - { - "epoch": 0.7, - "grad_norm": 10.87810639949517, - "learning_rate": 4.361130790627541e-06, - "loss": 0.1532, - "step": 13765 - }, - { - "epoch": 0.7, - "grad_norm": 1.308672685903403, - "learning_rate": 4.3597707147630645e-06, - "loss": 0.1758, - "step": 13766 - }, - { - "epoch": 0.7, - "grad_norm": 1.514058633514923, - "learning_rate": 4.358410791890122e-06, - "loss": 0.1718, - "step": 13767 - }, - { - "epoch": 0.7, - "grad_norm": 1.2272785034178764, - "learning_rate": 4.357051022045608e-06, - "loss": 0.171, - "step": 13768 - }, - { - "epoch": 0.7, - "grad_norm": 0.8642054250805404, - "learning_rate": 4.355691405266401e-06, - "loss": 0.1507, - "step": 13769 - }, - { - "epoch": 0.7, - "grad_norm": 1.0457190185214238, - "learning_rate": 4.354331941589387e-06, - "loss": 0.1679, - "step": 13770 - }, - { - "epoch": 0.7, - "grad_norm": 1.2799911203581829, - "learning_rate": 4.352972631051435e-06, - "loss": 0.1879, - "step": 13771 - }, - { - "epoch": 0.7, - "grad_norm": 1.6635745340448762, - "learning_rate": 4.351613473689419e-06, - "loss": 0.1617, - "step": 13772 - }, - { - "epoch": 0.7, - "grad_norm": 0.8447529424528541, - "learning_rate": 4.350254469540209e-06, - "loss": 0.1517, - "step": 13773 - }, - { - "epoch": 0.7, - "grad_norm": 1.5466512179947478, - "learning_rate": 4.348895618640663e-06, - "loss": 0.1748, - "step": 13774 - }, - { - "epoch": 0.7, - "grad_norm": 0.8568594087610589, - "learning_rate": 4.347536921027646e-06, - "loss": 0.1654, - "step": 13775 - }, - { - "epoch": 0.7, - "grad_norm": 1.1145052751272784, - "learning_rate": 4.346178376738006e-06, - "loss": 0.193, - "step": 13776 - }, - { - "epoch": 0.7, - "grad_norm": 0.8522218692305981, - "learning_rate": 4.344819985808601e-06, - "loss": 0.1649, - "step": 13777 - }, - { - "epoch": 0.7, - "grad_norm": 1.0989297052512519, - "learning_rate": 4.343461748276267e-06, - "loss": 0.1849, - "step": 13778 - }, - { - "epoch": 0.7, - "grad_norm": 1.2810290352563842, - "learning_rate": 4.342103664177856e-06, - "loss": 0.172, - "step": 13779 - }, - { - "epoch": 0.7, - "grad_norm": 2.72999235370836, - "learning_rate": 4.3407457335502e-06, - "loss": 0.1643, - "step": 13780 - }, - { - "epoch": 0.7, - "grad_norm": 0.9451376817683268, - "learning_rate": 4.339387956430141e-06, - "loss": 0.1646, - "step": 13781 - }, - { - "epoch": 0.7, - "grad_norm": 1.321991051912222, - "learning_rate": 4.3380303328545e-06, - "loss": 0.1706, - "step": 13782 - }, - { - "epoch": 0.7, - "grad_norm": 1.1961186962090051, - "learning_rate": 4.336672862860107e-06, - "loss": 0.185, - "step": 13783 - }, - { - "epoch": 0.7, - "grad_norm": 0.9379183664300133, - "learning_rate": 4.335315546483781e-06, - "loss": 0.1697, - "step": 13784 - }, - { - "epoch": 0.7, - "grad_norm": 0.9396690803575051, - "learning_rate": 4.333958383762345e-06, - "loss": 0.1493, - "step": 13785 - }, - { - "epoch": 0.7, - "grad_norm": 1.143837166113807, - "learning_rate": 4.332601374732602e-06, - "loss": 0.1744, - "step": 13786 - }, - { - "epoch": 0.7, - "grad_norm": 0.9022516815447942, - "learning_rate": 4.331244519431371e-06, - "loss": 0.1565, - "step": 13787 - }, - { - "epoch": 0.7, - "grad_norm": 1.034186295116794, - "learning_rate": 4.329887817895451e-06, - "loss": 0.1756, - "step": 13788 - }, - { - "epoch": 0.7, - "grad_norm": 0.9630885441659184, - "learning_rate": 4.328531270161642e-06, - "loss": 0.1712, - "step": 13789 - }, - { - "epoch": 0.7, - "grad_norm": 1.0721431552611012, - "learning_rate": 4.327174876266743e-06, - "loss": 0.1594, - "step": 13790 - }, - { - "epoch": 0.7, - "grad_norm": 1.225581281392638, - "learning_rate": 4.325818636247549e-06, - "loss": 0.1556, - "step": 13791 - }, - { - "epoch": 0.7, - "grad_norm": 1.0627692078353024, - "learning_rate": 4.324462550140847e-06, - "loss": 0.1749, - "step": 13792 - }, - { - "epoch": 0.7, - "grad_norm": 0.9279764998021093, - "learning_rate": 4.323106617983414e-06, - "loss": 0.1599, - "step": 13793 - }, - { - "epoch": 0.7, - "grad_norm": 1.1209255773185283, - "learning_rate": 4.321750839812038e-06, - "loss": 0.1773, - "step": 13794 - }, - { - "epoch": 0.7, - "grad_norm": 1.2147773049589803, - "learning_rate": 4.320395215663488e-06, - "loss": 0.1608, - "step": 13795 - }, - { - "epoch": 0.7, - "grad_norm": 1.355886040723802, - "learning_rate": 4.319039745574543e-06, - "loss": 0.1587, - "step": 13796 - }, - { - "epoch": 0.7, - "grad_norm": 0.889548101640906, - "learning_rate": 4.317684429581961e-06, - "loss": 0.1651, - "step": 13797 - }, - { - "epoch": 0.7, - "grad_norm": 0.9541565186730563, - "learning_rate": 4.316329267722509e-06, - "loss": 0.1481, - "step": 13798 - }, - { - "epoch": 0.7, - "grad_norm": 0.9623375218408832, - "learning_rate": 4.314974260032948e-06, - "loss": 0.1656, - "step": 13799 - }, - { - "epoch": 0.7, - "grad_norm": 0.8715958990471303, - "learning_rate": 4.313619406550034e-06, - "loss": 0.167, - "step": 13800 - }, - { - "epoch": 0.7, - "grad_norm": 0.9084593575369279, - "learning_rate": 4.3122647073105114e-06, - "loss": 0.182, - "step": 13801 - }, - { - "epoch": 0.7, - "grad_norm": 1.3108811802318745, - "learning_rate": 4.310910162351134e-06, - "loss": 0.1538, - "step": 13802 - }, - { - "epoch": 0.7, - "grad_norm": 1.3121975048987127, - "learning_rate": 4.309555771708637e-06, - "loss": 0.1871, - "step": 13803 - }, - { - "epoch": 0.7, - "grad_norm": 1.0499623681533017, - "learning_rate": 4.308201535419762e-06, - "loss": 0.1691, - "step": 13804 - }, - { - "epoch": 0.7, - "grad_norm": 1.0081547694953512, - "learning_rate": 4.306847453521241e-06, - "loss": 0.1726, - "step": 13805 - }, - { - "epoch": 0.7, - "grad_norm": 1.5058682236514365, - "learning_rate": 4.305493526049803e-06, - "loss": 0.1697, - "step": 13806 - }, - { - "epoch": 0.7, - "grad_norm": 0.9462720026788233, - "learning_rate": 4.304139753042174e-06, - "loss": 0.186, - "step": 13807 - }, - { - "epoch": 0.7, - "grad_norm": 0.8945622051574326, - "learning_rate": 4.3027861345350805e-06, - "loss": 0.1733, - "step": 13808 - }, - { - "epoch": 0.7, - "grad_norm": 0.9428319169479271, - "learning_rate": 4.301432670565235e-06, - "loss": 0.1587, - "step": 13809 - }, - { - "epoch": 0.7, - "grad_norm": 0.8628655152606859, - "learning_rate": 4.300079361169347e-06, - "loss": 0.1919, - "step": 13810 - }, - { - "epoch": 0.7, - "grad_norm": 0.9129736896773106, - "learning_rate": 4.2987262063841316e-06, - "loss": 0.1778, - "step": 13811 - }, - { - "epoch": 0.7, - "grad_norm": 1.0113974153294598, - "learning_rate": 4.297373206246286e-06, - "loss": 0.1683, - "step": 13812 - }, - { - "epoch": 0.7, - "grad_norm": 1.0648951024839919, - "learning_rate": 4.296020360792518e-06, - "loss": 0.1514, - "step": 13813 - }, - { - "epoch": 0.7, - "grad_norm": 2.073923935323422, - "learning_rate": 4.2946676700595155e-06, - "loss": 0.16, - "step": 13814 - }, - { - "epoch": 0.7, - "grad_norm": 1.3478803045424044, - "learning_rate": 4.293315134083975e-06, - "loss": 0.195, - "step": 13815 - }, - { - "epoch": 0.7, - "grad_norm": 1.5146714760895041, - "learning_rate": 4.291962752902584e-06, - "loss": 0.1863, - "step": 13816 - }, - { - "epoch": 0.7, - "grad_norm": 6.2258691029557385, - "learning_rate": 4.2906105265520295e-06, - "loss": 0.1881, - "step": 13817 - }, - { - "epoch": 0.7, - "grad_norm": 1.4088017704480014, - "learning_rate": 4.289258455068983e-06, - "loss": 0.1969, - "step": 13818 - }, - { - "epoch": 0.7, - "grad_norm": 0.9642670080882169, - "learning_rate": 4.287906538490128e-06, - "loss": 0.1634, - "step": 13819 - }, - { - "epoch": 0.7, - "grad_norm": 2.0240549553314455, - "learning_rate": 4.286554776852125e-06, - "loss": 0.1666, - "step": 13820 - }, - { - "epoch": 0.7, - "grad_norm": 1.1796958409744949, - "learning_rate": 4.285203170191652e-06, - "loss": 0.1694, - "step": 13821 - }, - { - "epoch": 0.7, - "grad_norm": 0.9443459190506037, - "learning_rate": 4.283851718545362e-06, - "loss": 0.1679, - "step": 13822 - }, - { - "epoch": 0.7, - "grad_norm": 0.9646991551786035, - "learning_rate": 4.282500421949917e-06, - "loss": 0.1596, - "step": 13823 - }, - { - "epoch": 0.7, - "grad_norm": 1.2808704572171308, - "learning_rate": 4.2811492804419695e-06, - "loss": 0.1662, - "step": 13824 - }, - { - "epoch": 0.7, - "grad_norm": 1.347312322023975, - "learning_rate": 4.279798294058176e-06, - "loss": 0.1772, - "step": 13825 - }, - { - "epoch": 0.7, - "grad_norm": 1.784693189124559, - "learning_rate": 4.278447462835176e-06, - "loss": 0.186, - "step": 13826 - }, - { - "epoch": 0.7, - "grad_norm": 1.4600284370723755, - "learning_rate": 4.277096786809608e-06, - "loss": 0.1915, - "step": 13827 - }, - { - "epoch": 0.7, - "grad_norm": 1.2121081339526243, - "learning_rate": 4.275746266018117e-06, - "loss": 0.1782, - "step": 13828 - }, - { - "epoch": 0.7, - "grad_norm": 1.0950423022369236, - "learning_rate": 4.274395900497328e-06, - "loss": 0.1945, - "step": 13829 - }, - { - "epoch": 0.7, - "grad_norm": 1.1221579992549777, - "learning_rate": 4.273045690283878e-06, - "loss": 0.1806, - "step": 13830 - }, - { - "epoch": 0.7, - "grad_norm": 1.0292814625435251, - "learning_rate": 4.2716956354143826e-06, - "loss": 0.1599, - "step": 13831 - }, - { - "epoch": 0.7, - "grad_norm": 1.0910953828917187, - "learning_rate": 4.2703457359254665e-06, - "loss": 0.1721, - "step": 13832 - }, - { - "epoch": 0.7, - "grad_norm": 0.9996097948773663, - "learning_rate": 4.268995991853746e-06, - "loss": 0.1676, - "step": 13833 - }, - { - "epoch": 0.7, - "grad_norm": 1.1543389514703013, - "learning_rate": 4.267646403235836e-06, - "loss": 0.1964, - "step": 13834 - }, - { - "epoch": 0.7, - "grad_norm": 1.5130531821660502, - "learning_rate": 4.266296970108339e-06, - "loss": 0.1613, - "step": 13835 - }, - { - "epoch": 0.7, - "grad_norm": 0.8845460390820609, - "learning_rate": 4.264947692507863e-06, - "loss": 0.1708, - "step": 13836 - }, - { - "epoch": 0.7, - "grad_norm": 0.9841852132957352, - "learning_rate": 4.263598570471003e-06, - "loss": 0.1861, - "step": 13837 - }, - { - "epoch": 0.7, - "grad_norm": 0.9041726518279823, - "learning_rate": 4.262249604034356e-06, - "loss": 0.1699, - "step": 13838 - }, - { - "epoch": 0.7, - "grad_norm": 1.0345479895144427, - "learning_rate": 4.260900793234511e-06, - "loss": 0.1663, - "step": 13839 - }, - { - "epoch": 0.7, - "grad_norm": 0.8941284391965324, - "learning_rate": 4.259552138108061e-06, - "loss": 0.1577, - "step": 13840 - }, - { - "epoch": 0.7, - "grad_norm": 0.9192186532010812, - "learning_rate": 4.258203638691578e-06, - "loss": 0.1976, - "step": 13841 - }, - { - "epoch": 0.7, - "grad_norm": 0.9715839954624031, - "learning_rate": 4.256855295021646e-06, - "loss": 0.1523, - "step": 13842 - }, - { - "epoch": 0.7, - "grad_norm": 0.9026243535838973, - "learning_rate": 4.255507107134842e-06, - "loss": 0.1917, - "step": 13843 - }, - { - "epoch": 0.7, - "grad_norm": 0.8797153394563227, - "learning_rate": 4.2541590750677285e-06, - "loss": 0.1531, - "step": 13844 - }, - { - "epoch": 0.7, - "grad_norm": 0.9451326989379703, - "learning_rate": 4.252811198856878e-06, - "loss": 0.1708, - "step": 13845 - }, - { - "epoch": 0.7, - "grad_norm": 1.0833224058149244, - "learning_rate": 4.251463478538846e-06, - "loss": 0.2052, - "step": 13846 - }, - { - "epoch": 0.7, - "grad_norm": 1.3103829188590623, - "learning_rate": 4.250115914150194e-06, - "loss": 0.1823, - "step": 13847 - }, - { - "epoch": 0.7, - "grad_norm": 0.94943048980084, - "learning_rate": 4.2487685057274695e-06, - "loss": 0.1938, - "step": 13848 - }, - { - "epoch": 0.7, - "grad_norm": 1.559673614053583, - "learning_rate": 4.247421253307225e-06, - "loss": 0.1897, - "step": 13849 - }, - { - "epoch": 0.7, - "grad_norm": 0.9774268887965165, - "learning_rate": 4.246074156926002e-06, - "loss": 0.1596, - "step": 13850 - }, - { - "epoch": 0.7, - "grad_norm": 1.4407846971193052, - "learning_rate": 4.244727216620348e-06, - "loss": 0.1621, - "step": 13851 - }, - { - "epoch": 0.7, - "grad_norm": 1.0361937161045285, - "learning_rate": 4.2433804324267895e-06, - "loss": 0.1804, - "step": 13852 - }, - { - "epoch": 0.7, - "grad_norm": 1.0412298692371762, - "learning_rate": 4.242033804381864e-06, - "loss": 0.1917, - "step": 13853 - }, - { - "epoch": 0.7, - "grad_norm": 0.9114379786274113, - "learning_rate": 4.240687332522094e-06, - "loss": 0.1873, - "step": 13854 - }, - { - "epoch": 0.7, - "grad_norm": 1.6741798793588893, - "learning_rate": 4.239341016884008e-06, - "loss": 0.1743, - "step": 13855 - }, - { - "epoch": 0.7, - "grad_norm": 0.9870271064861393, - "learning_rate": 4.237994857504121e-06, - "loss": 0.1615, - "step": 13856 - }, - { - "epoch": 0.7, - "grad_norm": 1.4861495315901574, - "learning_rate": 4.236648854418951e-06, - "loss": 0.1603, - "step": 13857 - }, - { - "epoch": 0.7, - "grad_norm": 1.035190561600465, - "learning_rate": 4.2353030076650025e-06, - "loss": 0.1593, - "step": 13858 - }, - { - "epoch": 0.7, - "grad_norm": 1.1910756738313253, - "learning_rate": 4.233957317278786e-06, - "loss": 0.16, - "step": 13859 - }, - { - "epoch": 0.7, - "grad_norm": 1.5194077790731053, - "learning_rate": 4.232611783296804e-06, - "loss": 0.1933, - "step": 13860 - }, - { - "epoch": 0.7, - "grad_norm": 0.9646802011211995, - "learning_rate": 4.2312664057555556e-06, - "loss": 0.1781, - "step": 13861 - }, - { - "epoch": 0.7, - "grad_norm": 1.4234201229578838, - "learning_rate": 4.229921184691531e-06, - "loss": 0.1787, - "step": 13862 - }, - { - "epoch": 0.7, - "grad_norm": 1.4329485564112754, - "learning_rate": 4.228576120141218e-06, - "loss": 0.1733, - "step": 13863 - }, - { - "epoch": 0.71, - "grad_norm": 0.7625806077048197, - "learning_rate": 4.2272312121411065e-06, - "loss": 0.1707, - "step": 13864 - }, - { - "epoch": 0.71, - "grad_norm": 1.0778457587652996, - "learning_rate": 4.225886460727671e-06, - "loss": 0.1932, - "step": 13865 - }, - { - "epoch": 0.71, - "grad_norm": 1.398154670499273, - "learning_rate": 4.224541865937395e-06, - "loss": 0.158, - "step": 13866 - }, - { - "epoch": 0.71, - "grad_norm": 1.156600814601921, - "learning_rate": 4.2231974278067436e-06, - "loss": 0.1709, - "step": 13867 - }, - { - "epoch": 0.71, - "grad_norm": 1.1124745144642676, - "learning_rate": 4.221853146372188e-06, - "loss": 0.195, - "step": 13868 - }, - { - "epoch": 0.71, - "grad_norm": 0.9143329997002765, - "learning_rate": 4.220509021670193e-06, - "loss": 0.1533, - "step": 13869 - }, - { - "epoch": 0.71, - "grad_norm": 1.2854654668501229, - "learning_rate": 4.21916505373722e-06, - "loss": 0.1698, - "step": 13870 - }, - { - "epoch": 0.71, - "grad_norm": 1.7644388099664177, - "learning_rate": 4.2178212426097175e-06, - "loss": 0.1726, - "step": 13871 - }, - { - "epoch": 0.71, - "grad_norm": 2.4484549246924208, - "learning_rate": 4.216477588324144e-06, - "loss": 0.1723, - "step": 13872 - }, - { - "epoch": 0.71, - "grad_norm": 1.8021444948614584, - "learning_rate": 4.215134090916939e-06, - "loss": 0.1833, - "step": 13873 - }, - { - "epoch": 0.71, - "grad_norm": 0.9335939453621243, - "learning_rate": 4.213790750424553e-06, - "loss": 0.1612, - "step": 13874 - }, - { - "epoch": 0.71, - "grad_norm": 1.0977518688232104, - "learning_rate": 4.212447566883415e-06, - "loss": 0.1641, - "step": 13875 - }, - { - "epoch": 0.71, - "grad_norm": 1.220894291615603, - "learning_rate": 4.211104540329964e-06, - "loss": 0.1739, - "step": 13876 - }, - { - "epoch": 0.71, - "grad_norm": 0.9058485613346287, - "learning_rate": 4.209761670800631e-06, - "loss": 0.1364, - "step": 13877 - }, - { - "epoch": 0.71, - "grad_norm": 3.94948120515998, - "learning_rate": 4.208418958331841e-06, - "loss": 0.1656, - "step": 13878 - }, - { - "epoch": 0.71, - "grad_norm": 1.0817260499443833, - "learning_rate": 4.207076402960015e-06, - "loss": 0.1685, - "step": 13879 - }, - { - "epoch": 0.71, - "grad_norm": 1.4737839935785986, - "learning_rate": 4.205734004721565e-06, - "loss": 0.1674, - "step": 13880 - }, - { - "epoch": 0.71, - "grad_norm": 1.2886994072038556, - "learning_rate": 4.204391763652911e-06, - "loss": 0.1618, - "step": 13881 - }, - { - "epoch": 0.71, - "grad_norm": 1.0956350599041518, - "learning_rate": 4.2030496797904526e-06, - "loss": 0.1589, - "step": 13882 - }, - { - "epoch": 0.71, - "grad_norm": 1.5778971792081893, - "learning_rate": 4.2017077531706056e-06, - "loss": 0.1754, - "step": 13883 - }, - { - "epoch": 0.71, - "grad_norm": 1.280024551622798, - "learning_rate": 4.200365983829757e-06, - "loss": 0.1679, - "step": 13884 - }, - { - "epoch": 0.71, - "grad_norm": 1.0989316304641512, - "learning_rate": 4.19902437180431e-06, - "loss": 0.179, - "step": 13885 - }, - { - "epoch": 0.71, - "grad_norm": 2.0262960765847735, - "learning_rate": 4.197682917130654e-06, - "loss": 0.1701, - "step": 13886 - }, - { - "epoch": 0.71, - "grad_norm": 1.3649534886612573, - "learning_rate": 4.196341619845182e-06, - "loss": 0.1682, - "step": 13887 - }, - { - "epoch": 0.71, - "grad_norm": 0.9768879481607011, - "learning_rate": 4.195000479984264e-06, - "loss": 0.1555, - "step": 13888 - }, - { - "epoch": 0.71, - "grad_norm": 1.2674514431589723, - "learning_rate": 4.193659497584293e-06, - "loss": 0.1718, - "step": 13889 - }, - { - "epoch": 0.71, - "grad_norm": 0.9301726568541404, - "learning_rate": 4.192318672681631e-06, - "loss": 0.1667, - "step": 13890 - }, - { - "epoch": 0.71, - "grad_norm": 1.6447104967367812, - "learning_rate": 4.190978005312657e-06, - "loss": 0.1883, - "step": 13891 - }, - { - "epoch": 0.71, - "grad_norm": 1.0557492480294897, - "learning_rate": 4.189637495513729e-06, - "loss": 0.1608, - "step": 13892 - }, - { - "epoch": 0.71, - "grad_norm": 1.1176874894671438, - "learning_rate": 4.188297143321215e-06, - "loss": 0.1801, - "step": 13893 - }, - { - "epoch": 0.71, - "grad_norm": 0.9404386916814441, - "learning_rate": 4.186956948771467e-06, - "loss": 0.1617, - "step": 13894 - }, - { - "epoch": 0.71, - "grad_norm": 1.1853230444085585, - "learning_rate": 4.1856169119008384e-06, - "loss": 0.19, - "step": 13895 - }, - { - "epoch": 0.71, - "grad_norm": 1.2015223083588877, - "learning_rate": 4.184277032745685e-06, - "loss": 0.1627, - "step": 13896 - }, - { - "epoch": 0.71, - "grad_norm": 0.8969995228287397, - "learning_rate": 4.18293731134234e-06, - "loss": 0.1409, - "step": 13897 - }, - { - "epoch": 0.71, - "grad_norm": 2.7125517882768655, - "learning_rate": 4.181597747727154e-06, - "loss": 0.1916, - "step": 13898 - }, - { - "epoch": 0.71, - "grad_norm": 1.090728912294368, - "learning_rate": 4.180258341936454e-06, - "loss": 0.1636, - "step": 13899 - }, - { - "epoch": 0.71, - "grad_norm": 1.5086909915665916, - "learning_rate": 4.178919094006578e-06, - "loss": 0.1764, - "step": 13900 - }, - { - "epoch": 0.71, - "grad_norm": 1.0281672216497417, - "learning_rate": 4.1775800039738465e-06, - "loss": 0.1694, - "step": 13901 - }, - { - "epoch": 0.71, - "grad_norm": 0.9611875987842856, - "learning_rate": 4.176241071874587e-06, - "loss": 0.1667, - "step": 13902 - }, - { - "epoch": 0.71, - "grad_norm": 1.1189889394442891, - "learning_rate": 4.174902297745118e-06, - "loss": 0.1845, - "step": 13903 - }, - { - "epoch": 0.71, - "grad_norm": 1.0347532611945578, - "learning_rate": 4.173563681621756e-06, - "loss": 0.1723, - "step": 13904 - }, - { - "epoch": 0.71, - "grad_norm": 1.4840919450107504, - "learning_rate": 4.1722252235408045e-06, - "loss": 0.1786, - "step": 13905 - }, - { - "epoch": 0.71, - "grad_norm": 1.7848343757219844, - "learning_rate": 4.170886923538576e-06, - "loss": 0.1658, - "step": 13906 - }, - { - "epoch": 0.71, - "grad_norm": 1.1563399247561783, - "learning_rate": 4.169548781651367e-06, - "loss": 0.1772, - "step": 13907 - }, - { - "epoch": 0.71, - "grad_norm": 1.231024214373564, - "learning_rate": 4.168210797915479e-06, - "loss": 0.1483, - "step": 13908 - }, - { - "epoch": 0.71, - "grad_norm": 1.2235509447303758, - "learning_rate": 4.1668729723671994e-06, - "loss": 0.163, - "step": 13909 - }, - { - "epoch": 0.71, - "grad_norm": 0.9748722624842394, - "learning_rate": 4.165535305042822e-06, - "loss": 0.1544, - "step": 13910 - }, - { - "epoch": 0.71, - "grad_norm": 1.1483576747353885, - "learning_rate": 4.164197795978628e-06, - "loss": 0.1721, - "step": 13911 - }, - { - "epoch": 0.71, - "grad_norm": 1.011189049498867, - "learning_rate": 4.162860445210897e-06, - "loss": 0.1866, - "step": 13912 - }, - { - "epoch": 0.71, - "grad_norm": 1.3078625368117252, - "learning_rate": 4.16152325277591e-06, - "loss": 0.1789, - "step": 13913 - }, - { - "epoch": 0.71, - "grad_norm": 0.9965484103475903, - "learning_rate": 4.16018621870993e-06, - "loss": 0.2024, - "step": 13914 - }, - { - "epoch": 0.71, - "grad_norm": 1.021616309600682, - "learning_rate": 4.158849343049233e-06, - "loss": 0.1891, - "step": 13915 - }, - { - "epoch": 0.71, - "grad_norm": 1.1905640579670889, - "learning_rate": 4.157512625830074e-06, - "loss": 0.1679, - "step": 13916 - }, - { - "epoch": 0.71, - "grad_norm": 0.9737795035987967, - "learning_rate": 4.156176067088717e-06, - "loss": 0.1768, - "step": 13917 - }, - { - "epoch": 0.71, - "grad_norm": 0.9511517663436382, - "learning_rate": 4.154839666861413e-06, - "loss": 0.1708, - "step": 13918 - }, - { - "epoch": 0.71, - "grad_norm": 1.1916910017235933, - "learning_rate": 4.153503425184415e-06, - "loss": 0.1807, - "step": 13919 - }, - { - "epoch": 0.71, - "grad_norm": 1.2415657986375965, - "learning_rate": 4.152167342093965e-06, - "loss": 0.1531, - "step": 13920 - }, - { - "epoch": 0.71, - "grad_norm": 1.4514494878822788, - "learning_rate": 4.150831417626304e-06, - "loss": 0.1882, - "step": 13921 - }, - { - "epoch": 0.71, - "grad_norm": 0.9120073512123998, - "learning_rate": 4.149495651817673e-06, - "loss": 0.1664, - "step": 13922 - }, - { - "epoch": 0.71, - "grad_norm": 0.9726956622707574, - "learning_rate": 4.148160044704306e-06, - "loss": 0.1741, - "step": 13923 - }, - { - "epoch": 0.71, - "grad_norm": 0.9938501765351079, - "learning_rate": 4.1468245963224245e-06, - "loss": 0.1629, - "step": 13924 - }, - { - "epoch": 0.71, - "grad_norm": 1.102826239226895, - "learning_rate": 4.1454893067082605e-06, - "loss": 0.1704, - "step": 13925 - }, - { - "epoch": 0.71, - "grad_norm": 0.8498416122497809, - "learning_rate": 4.1441541758980256e-06, - "loss": 0.1589, - "step": 13926 - }, - { - "epoch": 0.71, - "grad_norm": 1.5947741621857945, - "learning_rate": 4.142819203927942e-06, - "loss": 0.2137, - "step": 13927 - }, - { - "epoch": 0.71, - "grad_norm": 0.994381854280893, - "learning_rate": 4.141484390834216e-06, - "loss": 0.1654, - "step": 13928 - }, - { - "epoch": 0.71, - "grad_norm": 0.9255592604899666, - "learning_rate": 4.140149736653056e-06, - "loss": 0.1751, - "step": 13929 - }, - { - "epoch": 0.71, - "grad_norm": 1.5580640598570554, - "learning_rate": 4.138815241420666e-06, - "loss": 0.1859, - "step": 13930 - }, - { - "epoch": 0.71, - "grad_norm": 0.8700202663936708, - "learning_rate": 4.137480905173248e-06, - "loss": 0.1566, - "step": 13931 - }, - { - "epoch": 0.71, - "grad_norm": 1.5028060839405217, - "learning_rate": 4.13614672794699e-06, - "loss": 0.1481, - "step": 13932 - }, - { - "epoch": 0.71, - "grad_norm": 1.2787088758138214, - "learning_rate": 4.13481270977808e-06, - "loss": 0.1806, - "step": 13933 - }, - { - "epoch": 0.71, - "grad_norm": 1.0328407095066583, - "learning_rate": 4.133478850702711e-06, - "loss": 0.1802, - "step": 13934 - }, - { - "epoch": 0.71, - "grad_norm": 0.9213280279429286, - "learning_rate": 4.1321451507570555e-06, - "loss": 0.1738, - "step": 13935 - }, - { - "epoch": 0.71, - "grad_norm": 1.13141968709934, - "learning_rate": 4.130811609977297e-06, - "loss": 0.1698, - "step": 13936 - }, - { - "epoch": 0.71, - "grad_norm": 0.9087622452263413, - "learning_rate": 4.1294782283996024e-06, - "loss": 0.189, - "step": 13937 - }, - { - "epoch": 0.71, - "grad_norm": 0.7743718454839085, - "learning_rate": 4.128145006060141e-06, - "loss": 0.1608, - "step": 13938 - }, - { - "epoch": 0.71, - "grad_norm": 1.008167328082802, - "learning_rate": 4.12681194299508e-06, - "loss": 0.1724, - "step": 13939 - }, - { - "epoch": 0.71, - "grad_norm": 1.3185350228359154, - "learning_rate": 4.12547903924058e-06, - "loss": 0.1587, - "step": 13940 - }, - { - "epoch": 0.71, - "grad_norm": 1.2549184563659543, - "learning_rate": 4.124146294832788e-06, - "loss": 0.1831, - "step": 13941 - }, - { - "epoch": 0.71, - "grad_norm": 1.252213411815056, - "learning_rate": 4.122813709807864e-06, - "loss": 0.177, - "step": 13942 - }, - { - "epoch": 0.71, - "grad_norm": 1.0130254442875402, - "learning_rate": 4.121481284201946e-06, - "loss": 0.1875, - "step": 13943 - }, - { - "epoch": 0.71, - "grad_norm": 1.0870914766560396, - "learning_rate": 4.120149018051184e-06, - "loss": 0.1803, - "step": 13944 - }, - { - "epoch": 0.71, - "grad_norm": 1.0470809970557806, - "learning_rate": 4.118816911391709e-06, - "loss": 0.1668, - "step": 13945 - }, - { - "epoch": 0.71, - "grad_norm": 0.943477282672622, - "learning_rate": 4.117484964259657e-06, - "loss": 0.1635, - "step": 13946 - }, - { - "epoch": 0.71, - "grad_norm": 1.101166936528451, - "learning_rate": 4.116153176691158e-06, - "loss": 0.1645, - "step": 13947 - }, - { - "epoch": 0.71, - "grad_norm": 3.306797221498104, - "learning_rate": 4.1148215487223385e-06, - "loss": 0.1637, - "step": 13948 - }, - { - "epoch": 0.71, - "grad_norm": 0.9415598236086736, - "learning_rate": 4.1134900803893185e-06, - "loss": 0.1946, - "step": 13949 - }, - { - "epoch": 0.71, - "grad_norm": 4.200965620690179, - "learning_rate": 4.1121587717282085e-06, - "loss": 0.1801, - "step": 13950 - }, - { - "epoch": 0.71, - "grad_norm": 1.141433023565065, - "learning_rate": 4.110827622775128e-06, - "loss": 0.1821, - "step": 13951 - }, - { - "epoch": 0.71, - "grad_norm": 1.305752632035842, - "learning_rate": 4.1094966335661765e-06, - "loss": 0.1645, - "step": 13952 - }, - { - "epoch": 0.71, - "grad_norm": 1.1922977922503717, - "learning_rate": 4.108165804137466e-06, - "loss": 0.1694, - "step": 13953 - }, - { - "epoch": 0.71, - "grad_norm": 2.0802621937995167, - "learning_rate": 4.106835134525087e-06, - "loss": 0.1686, - "step": 13954 - }, - { - "epoch": 0.71, - "grad_norm": 1.0049350043646883, - "learning_rate": 4.105504624765137e-06, - "loss": 0.176, - "step": 13955 - }, - { - "epoch": 0.71, - "grad_norm": 1.762105872660763, - "learning_rate": 4.104174274893709e-06, - "loss": 0.168, - "step": 13956 - }, - { - "epoch": 0.71, - "grad_norm": 1.30451208977044, - "learning_rate": 4.102844084946889e-06, - "loss": 0.1608, - "step": 13957 - }, - { - "epoch": 0.71, - "grad_norm": 1.106594812369558, - "learning_rate": 4.101514054960752e-06, - "loss": 0.1842, - "step": 13958 - }, - { - "epoch": 0.71, - "grad_norm": 1.0997050675526354, - "learning_rate": 4.1001841849713845e-06, - "loss": 0.1939, - "step": 13959 - }, - { - "epoch": 0.71, - "grad_norm": 1.0702076106090137, - "learning_rate": 4.098854475014849e-06, - "loss": 0.2071, - "step": 13960 - }, - { - "epoch": 0.71, - "grad_norm": 0.9761023790628669, - "learning_rate": 4.097524925127224e-06, - "loss": 0.1787, - "step": 13961 - }, - { - "epoch": 0.71, - "grad_norm": 1.0409487600060539, - "learning_rate": 4.096195535344565e-06, - "loss": 0.1671, - "step": 13962 - }, - { - "epoch": 0.71, - "grad_norm": 1.1941185796143974, - "learning_rate": 4.0948663057029395e-06, - "loss": 0.1842, - "step": 13963 - }, - { - "epoch": 0.71, - "grad_norm": 1.0081634661008763, - "learning_rate": 4.093537236238394e-06, - "loss": 0.1734, - "step": 13964 - }, - { - "epoch": 0.71, - "grad_norm": 2.1712157900084783, - "learning_rate": 4.092208326986986e-06, - "loss": 0.1735, - "step": 13965 - }, - { - "epoch": 0.71, - "grad_norm": 1.493318392656283, - "learning_rate": 4.090879577984763e-06, - "loss": 0.1534, - "step": 13966 - }, - { - "epoch": 0.71, - "grad_norm": 0.76818294481959, - "learning_rate": 4.089550989267763e-06, - "loss": 0.1598, - "step": 13967 - }, - { - "epoch": 0.71, - "grad_norm": 2.3472354685096652, - "learning_rate": 4.0882225608720295e-06, - "loss": 0.1624, - "step": 13968 - }, - { - "epoch": 0.71, - "grad_norm": 0.9148780436687676, - "learning_rate": 4.086894292833589e-06, - "loss": 0.1638, - "step": 13969 - }, - { - "epoch": 0.71, - "grad_norm": 1.7247586560020707, - "learning_rate": 4.085566185188478e-06, - "loss": 0.1865, - "step": 13970 - }, - { - "epoch": 0.71, - "grad_norm": 0.8492698486093321, - "learning_rate": 4.084238237972715e-06, - "loss": 0.169, - "step": 13971 - }, - { - "epoch": 0.71, - "grad_norm": 1.1954560908592897, - "learning_rate": 4.082910451222325e-06, - "loss": 0.1825, - "step": 13972 - }, - { - "epoch": 0.71, - "grad_norm": 1.0239596161957574, - "learning_rate": 4.0815828249733226e-06, - "loss": 0.1693, - "step": 13973 - }, - { - "epoch": 0.71, - "grad_norm": 0.9171027277912834, - "learning_rate": 4.080255359261723e-06, - "loss": 0.1612, - "step": 13974 - }, - { - "epoch": 0.71, - "grad_norm": 0.9608743201810395, - "learning_rate": 4.078928054123529e-06, - "loss": 0.1791, - "step": 13975 - }, - { - "epoch": 0.71, - "grad_norm": 0.9961691965383497, - "learning_rate": 4.077600909594748e-06, - "loss": 0.1706, - "step": 13976 - }, - { - "epoch": 0.71, - "grad_norm": 1.336294130271254, - "learning_rate": 4.0762739257113734e-06, - "loss": 0.1851, - "step": 13977 - }, - { - "epoch": 0.71, - "grad_norm": 1.0139466768340508, - "learning_rate": 4.074947102509408e-06, - "loss": 0.1655, - "step": 13978 - }, - { - "epoch": 0.71, - "grad_norm": 1.2893579384169815, - "learning_rate": 4.073620440024832e-06, - "loss": 0.1941, - "step": 13979 - }, - { - "epoch": 0.71, - "grad_norm": 0.9832599128423222, - "learning_rate": 4.072293938293641e-06, - "loss": 0.1756, - "step": 13980 - }, - { - "epoch": 0.71, - "grad_norm": 1.0238164537959138, - "learning_rate": 4.070967597351808e-06, - "loss": 0.1858, - "step": 13981 - }, - { - "epoch": 0.71, - "grad_norm": 1.0183251618475806, - "learning_rate": 4.069641417235314e-06, - "loss": 0.1804, - "step": 13982 - }, - { - "epoch": 0.71, - "grad_norm": 0.9810496013205331, - "learning_rate": 4.068315397980135e-06, - "loss": 0.1718, - "step": 13983 - }, - { - "epoch": 0.71, - "grad_norm": 1.082129763356115, - "learning_rate": 4.066989539622232e-06, - "loss": 0.1605, - "step": 13984 - }, - { - "epoch": 0.71, - "grad_norm": 0.7518739008925989, - "learning_rate": 4.065663842197576e-06, - "loss": 0.1535, - "step": 13985 - }, - { - "epoch": 0.71, - "grad_norm": 1.1024784812536153, - "learning_rate": 4.06433830574212e-06, - "loss": 0.1693, - "step": 13986 - }, - { - "epoch": 0.71, - "grad_norm": 1.4415271687351996, - "learning_rate": 4.0630129302918266e-06, - "loss": 0.145, - "step": 13987 - }, - { - "epoch": 0.71, - "grad_norm": 1.139274381955693, - "learning_rate": 4.06168771588264e-06, - "loss": 0.1895, - "step": 13988 - }, - { - "epoch": 0.71, - "grad_norm": 1.269776868117072, - "learning_rate": 4.0603626625505125e-06, - "loss": 0.1632, - "step": 13989 - }, - { - "epoch": 0.71, - "grad_norm": 0.9230653132887967, - "learning_rate": 4.059037770331379e-06, - "loss": 0.168, - "step": 13990 - }, - { - "epoch": 0.71, - "grad_norm": 1.0253563738907223, - "learning_rate": 4.057713039261182e-06, - "loss": 0.1611, - "step": 13991 - }, - { - "epoch": 0.71, - "grad_norm": 1.298699424162711, - "learning_rate": 4.056388469375853e-06, - "loss": 0.1808, - "step": 13992 - }, - { - "epoch": 0.71, - "grad_norm": 1.1550292532198296, - "learning_rate": 4.055064060711328e-06, - "loss": 0.1677, - "step": 13993 - }, - { - "epoch": 0.71, - "grad_norm": 1.1411457383366315, - "learning_rate": 4.0537398133035225e-06, - "loss": 0.1628, - "step": 13994 - }, - { - "epoch": 0.71, - "grad_norm": 0.9678233487865195, - "learning_rate": 4.0524157271883635e-06, - "loss": 0.1843, - "step": 13995 - }, - { - "epoch": 0.71, - "grad_norm": 1.0057253640193635, - "learning_rate": 4.05109180240176e-06, - "loss": 0.1664, - "step": 13996 - }, - { - "epoch": 0.71, - "grad_norm": 1.8006213175726598, - "learning_rate": 4.049768038979631e-06, - "loss": 0.1781, - "step": 13997 - }, - { - "epoch": 0.71, - "grad_norm": 1.2773225084954483, - "learning_rate": 4.0484444369578764e-06, - "loss": 0.1591, - "step": 13998 - }, - { - "epoch": 0.71, - "grad_norm": 1.18123877918416, - "learning_rate": 4.047120996372403e-06, - "loss": 0.1706, - "step": 13999 - }, - { - "epoch": 0.71, - "grad_norm": 1.6529702566124738, - "learning_rate": 4.045797717259109e-06, - "loss": 0.1905, - "step": 14000 - }, - { - "epoch": 0.71, - "grad_norm": 1.3302918737464733, - "learning_rate": 4.044474599653891e-06, - "loss": 0.1764, - "step": 14001 - }, - { - "epoch": 0.71, - "grad_norm": 1.9162525803200723, - "learning_rate": 4.0431516435926365e-06, - "loss": 0.1754, - "step": 14002 - }, - { - "epoch": 0.71, - "grad_norm": 1.316548792491148, - "learning_rate": 4.0418288491112255e-06, - "loss": 0.1725, - "step": 14003 - }, - { - "epoch": 0.71, - "grad_norm": 0.9532510067195563, - "learning_rate": 4.0405062162455474e-06, - "loss": 0.184, - "step": 14004 - }, - { - "epoch": 0.71, - "grad_norm": 1.2904459935716295, - "learning_rate": 4.0391837450314715e-06, - "loss": 0.1592, - "step": 14005 - }, - { - "epoch": 0.71, - "grad_norm": 0.831784096015568, - "learning_rate": 4.037861435504877e-06, - "loss": 0.1922, - "step": 14006 - }, - { - "epoch": 0.71, - "grad_norm": 0.9590948888956858, - "learning_rate": 4.0365392877016255e-06, - "loss": 0.1639, - "step": 14007 - }, - { - "epoch": 0.71, - "grad_norm": 0.7950257845294156, - "learning_rate": 4.03521730165758e-06, - "loss": 0.1488, - "step": 14008 - }, - { - "epoch": 0.71, - "grad_norm": 0.8696092538942773, - "learning_rate": 4.033895477408604e-06, - "loss": 0.1593, - "step": 14009 - }, - { - "epoch": 0.71, - "grad_norm": 1.1425549882601498, - "learning_rate": 4.032573814990553e-06, - "loss": 0.1876, - "step": 14010 - }, - { - "epoch": 0.71, - "grad_norm": 0.9434310996016758, - "learning_rate": 4.03125231443927e-06, - "loss": 0.1984, - "step": 14011 - }, - { - "epoch": 0.71, - "grad_norm": 1.0730244990456292, - "learning_rate": 4.02993097579061e-06, - "loss": 0.181, - "step": 14012 - }, - { - "epoch": 0.71, - "grad_norm": 1.322629645251612, - "learning_rate": 4.028609799080405e-06, - "loss": 0.1739, - "step": 14013 - }, - { - "epoch": 0.71, - "grad_norm": 1.5905595740156597, - "learning_rate": 4.0272887843445005e-06, - "loss": 0.1468, - "step": 14014 - }, - { - "epoch": 0.71, - "grad_norm": 1.4081861558909625, - "learning_rate": 4.025967931618722e-06, - "loss": 0.1725, - "step": 14015 - }, - { - "epoch": 0.71, - "grad_norm": 0.9231296337146756, - "learning_rate": 4.024647240938904e-06, - "loss": 0.1671, - "step": 14016 - }, - { - "epoch": 0.71, - "grad_norm": 1.0159252032635526, - "learning_rate": 4.0233267123408626e-06, - "loss": 0.1688, - "step": 14017 - }, - { - "epoch": 0.71, - "grad_norm": 0.9687980046675957, - "learning_rate": 4.022006345860422e-06, - "loss": 0.1691, - "step": 14018 - }, - { - "epoch": 0.71, - "grad_norm": 1.656629955728825, - "learning_rate": 4.020686141533401e-06, - "loss": 0.1753, - "step": 14019 - }, - { - "epoch": 0.71, - "grad_norm": 1.0112391428165952, - "learning_rate": 4.019366099395602e-06, - "loss": 0.1519, - "step": 14020 - }, - { - "epoch": 0.71, - "grad_norm": 0.8022414046323324, - "learning_rate": 4.01804621948284e-06, - "loss": 0.163, - "step": 14021 - }, - { - "epoch": 0.71, - "grad_norm": 0.8621081733595416, - "learning_rate": 4.0167265018309075e-06, - "loss": 0.162, - "step": 14022 - }, - { - "epoch": 0.71, - "grad_norm": 0.9482586410500872, - "learning_rate": 4.01540694647561e-06, - "loss": 0.1671, - "step": 14023 - }, - { - "epoch": 0.71, - "grad_norm": 0.8599979611209102, - "learning_rate": 4.014087553452734e-06, - "loss": 0.1569, - "step": 14024 - }, - { - "epoch": 0.71, - "grad_norm": 1.224541117890549, - "learning_rate": 4.012768322798072e-06, - "loss": 0.1801, - "step": 14025 - }, - { - "epoch": 0.71, - "grad_norm": 1.1503432737597172, - "learning_rate": 4.011449254547406e-06, - "loss": 0.1693, - "step": 14026 - }, - { - "epoch": 0.71, - "grad_norm": 0.8289679623687345, - "learning_rate": 4.010130348736522e-06, - "loss": 0.1473, - "step": 14027 - }, - { - "epoch": 0.71, - "grad_norm": 0.9223129355750407, - "learning_rate": 4.008811605401186e-06, - "loss": 0.1674, - "step": 14028 - }, - { - "epoch": 0.71, - "grad_norm": 1.1534141512590803, - "learning_rate": 4.007493024577177e-06, - "loss": 0.1977, - "step": 14029 - }, - { - "epoch": 0.71, - "grad_norm": 0.8860937852779077, - "learning_rate": 4.006174606300255e-06, - "loss": 0.1612, - "step": 14030 - }, - { - "epoch": 0.71, - "grad_norm": 0.8518361547364689, - "learning_rate": 4.00485635060619e-06, - "loss": 0.1679, - "step": 14031 - }, - { - "epoch": 0.71, - "grad_norm": 0.9889257574197703, - "learning_rate": 4.0035382575307306e-06, - "loss": 0.1586, - "step": 14032 - }, - { - "epoch": 0.71, - "grad_norm": 1.1061626270404616, - "learning_rate": 4.0022203271096375e-06, - "loss": 0.1824, - "step": 14033 - }, - { - "epoch": 0.71, - "grad_norm": 0.9897553297519481, - "learning_rate": 4.000902559378654e-06, - "loss": 0.1967, - "step": 14034 - }, - { - "epoch": 0.71, - "grad_norm": 0.8693727300851246, - "learning_rate": 3.999584954373528e-06, - "loss": 0.1628, - "step": 14035 - }, - { - "epoch": 0.71, - "grad_norm": 1.0355612812061994, - "learning_rate": 3.998267512130001e-06, - "loss": 0.1976, - "step": 14036 - }, - { - "epoch": 0.71, - "grad_norm": 0.8755614439478262, - "learning_rate": 3.996950232683804e-06, - "loss": 0.1737, - "step": 14037 - }, - { - "epoch": 0.71, - "grad_norm": 1.8701663683866392, - "learning_rate": 3.995633116070675e-06, - "loss": 0.1635, - "step": 14038 - }, - { - "epoch": 0.71, - "grad_norm": 1.364662616775321, - "learning_rate": 3.994316162326333e-06, - "loss": 0.1468, - "step": 14039 - }, - { - "epoch": 0.71, - "grad_norm": 1.0885834864885342, - "learning_rate": 3.992999371486508e-06, - "loss": 0.1716, - "step": 14040 - }, - { - "epoch": 0.71, - "grad_norm": 1.1753816593544923, - "learning_rate": 3.99168274358691e-06, - "loss": 0.1937, - "step": 14041 - }, - { - "epoch": 0.71, - "grad_norm": 1.0768423153042457, - "learning_rate": 3.990366278663258e-06, - "loss": 0.1579, - "step": 14042 - }, - { - "epoch": 0.71, - "grad_norm": 0.9510173102034292, - "learning_rate": 3.989049976751259e-06, - "loss": 0.1694, - "step": 14043 - }, - { - "epoch": 0.71, - "grad_norm": 1.0030856247148077, - "learning_rate": 3.987733837886622e-06, - "loss": 0.1723, - "step": 14044 - }, - { - "epoch": 0.71, - "grad_norm": 0.9019917277961534, - "learning_rate": 3.986417862105043e-06, - "loss": 0.174, - "step": 14045 - }, - { - "epoch": 0.71, - "grad_norm": 0.9420535261557448, - "learning_rate": 3.985102049442221e-06, - "loss": 0.1612, - "step": 14046 - }, - { - "epoch": 0.71, - "grad_norm": 0.9970341470186542, - "learning_rate": 3.983786399933842e-06, - "loss": 0.1801, - "step": 14047 - }, - { - "epoch": 0.71, - "grad_norm": 1.1198941603246906, - "learning_rate": 3.982470913615602e-06, - "loss": 0.1944, - "step": 14048 - }, - { - "epoch": 0.71, - "grad_norm": 1.031101344226233, - "learning_rate": 3.981155590523173e-06, - "loss": 0.1442, - "step": 14049 - }, - { - "epoch": 0.71, - "grad_norm": 2.4822485364807862, - "learning_rate": 3.979840430692242e-06, - "loss": 0.2225, - "step": 14050 - }, - { - "epoch": 0.71, - "grad_norm": 0.9894481492357582, - "learning_rate": 3.978525434158476e-06, - "loss": 0.1638, - "step": 14051 - }, - { - "epoch": 0.71, - "grad_norm": 1.0964076308612705, - "learning_rate": 3.977210600957548e-06, - "loss": 0.1754, - "step": 14052 - }, - { - "epoch": 0.71, - "grad_norm": 1.6665709589666882, - "learning_rate": 3.975895931125126e-06, - "loss": 0.1697, - "step": 14053 - }, - { - "epoch": 0.71, - "grad_norm": 2.000045807372957, - "learning_rate": 3.9745814246968654e-06, - "loss": 0.1786, - "step": 14054 - }, - { - "epoch": 0.71, - "grad_norm": 0.8953795174978475, - "learning_rate": 3.973267081708425e-06, - "loss": 0.1689, - "step": 14055 - }, - { - "epoch": 0.71, - "grad_norm": 3.3286514942564485, - "learning_rate": 3.971952902195453e-06, - "loss": 0.1663, - "step": 14056 - }, - { - "epoch": 0.71, - "grad_norm": 1.1203493039554666, - "learning_rate": 3.970638886193603e-06, - "loss": 0.1955, - "step": 14057 - }, - { - "epoch": 0.71, - "grad_norm": 0.8888136001531051, - "learning_rate": 3.969325033738509e-06, - "loss": 0.1964, - "step": 14058 - }, - { - "epoch": 0.71, - "grad_norm": 0.795777988010517, - "learning_rate": 3.968011344865819e-06, - "loss": 0.1531, - "step": 14059 - }, - { - "epoch": 0.71, - "grad_norm": 0.7770520518118822, - "learning_rate": 3.9666978196111575e-06, - "loss": 0.1632, - "step": 14060 - }, - { - "epoch": 0.72, - "grad_norm": 0.8263785189625037, - "learning_rate": 3.965384458010157e-06, - "loss": 0.1834, - "step": 14061 - }, - { - "epoch": 0.72, - "grad_norm": 0.854320039391229, - "learning_rate": 3.964071260098446e-06, - "loss": 0.1508, - "step": 14062 - }, - { - "epoch": 0.72, - "grad_norm": 1.3807512839982112, - "learning_rate": 3.962758225911646e-06, - "loss": 0.1575, - "step": 14063 - }, - { - "epoch": 0.72, - "grad_norm": 1.1555728871664732, - "learning_rate": 3.961445355485366e-06, - "loss": 0.1758, - "step": 14064 - }, - { - "epoch": 0.72, - "grad_norm": 0.8123345828753771, - "learning_rate": 3.960132648855226e-06, - "loss": 0.1758, - "step": 14065 - }, - { - "epoch": 0.72, - "grad_norm": 0.9460660456671849, - "learning_rate": 3.958820106056826e-06, - "loss": 0.156, - "step": 14066 - }, - { - "epoch": 0.72, - "grad_norm": 1.0004887583666053, - "learning_rate": 3.957507727125775e-06, - "loss": 0.1608, - "step": 14067 - }, - { - "epoch": 0.72, - "grad_norm": 1.3035912785558115, - "learning_rate": 3.956195512097664e-06, - "loss": 0.1651, - "step": 14068 - }, - { - "epoch": 0.72, - "grad_norm": 1.0046179848207974, - "learning_rate": 3.954883461008091e-06, - "loss": 0.1829, - "step": 14069 - }, - { - "epoch": 0.72, - "grad_norm": 0.990047289247749, - "learning_rate": 3.953571573892646e-06, - "loss": 0.1758, - "step": 14070 - }, - { - "epoch": 0.72, - "grad_norm": 0.8181189145002394, - "learning_rate": 3.9522598507869166e-06, - "loss": 0.1539, - "step": 14071 - }, - { - "epoch": 0.72, - "grad_norm": 0.8046299088957247, - "learning_rate": 3.95094829172648e-06, - "loss": 0.1725, - "step": 14072 - }, - { - "epoch": 0.72, - "grad_norm": 1.5116421607576094, - "learning_rate": 3.949636896746911e-06, - "loss": 0.1514, - "step": 14073 - }, - { - "epoch": 0.72, - "grad_norm": 1.1942340373605995, - "learning_rate": 3.948325665883785e-06, - "loss": 0.1796, - "step": 14074 - }, - { - "epoch": 0.72, - "grad_norm": 1.0544986056562093, - "learning_rate": 3.947014599172664e-06, - "loss": 0.1786, - "step": 14075 - }, - { - "epoch": 0.72, - "grad_norm": 1.336394823926345, - "learning_rate": 3.945703696649117e-06, - "loss": 0.1519, - "step": 14076 - }, - { - "epoch": 0.72, - "grad_norm": 0.944020747222741, - "learning_rate": 3.944392958348696e-06, - "loss": 0.1733, - "step": 14077 - }, - { - "epoch": 0.72, - "grad_norm": 1.8344168292689214, - "learning_rate": 3.943082384306958e-06, - "loss": 0.1558, - "step": 14078 - }, - { - "epoch": 0.72, - "grad_norm": 0.9462982215603182, - "learning_rate": 3.941771974559453e-06, - "loss": 0.1849, - "step": 14079 - }, - { - "epoch": 0.72, - "grad_norm": 1.1478756672396018, - "learning_rate": 3.940461729141728e-06, - "loss": 0.1716, - "step": 14080 - }, - { - "epoch": 0.72, - "grad_norm": 1.291699795537521, - "learning_rate": 3.939151648089317e-06, - "loss": 0.138, - "step": 14081 - }, - { - "epoch": 0.72, - "grad_norm": 1.4757223942660185, - "learning_rate": 3.937841731437765e-06, - "loss": 0.176, - "step": 14082 - }, - { - "epoch": 0.72, - "grad_norm": 0.9795969693563553, - "learning_rate": 3.936531979222593e-06, - "loss": 0.1532, - "step": 14083 - }, - { - "epoch": 0.72, - "grad_norm": 1.7695703125447169, - "learning_rate": 3.935222391479339e-06, - "loss": 0.1525, - "step": 14084 - }, - { - "epoch": 0.72, - "grad_norm": 0.8807274886889117, - "learning_rate": 3.933912968243515e-06, - "loss": 0.1697, - "step": 14085 - }, - { - "epoch": 0.72, - "grad_norm": 1.17677908764498, - "learning_rate": 3.9326037095506486e-06, - "loss": 0.1619, - "step": 14086 - }, - { - "epoch": 0.72, - "grad_norm": 0.8695497729024724, - "learning_rate": 3.931294615436245e-06, - "loss": 0.1745, - "step": 14087 - }, - { - "epoch": 0.72, - "grad_norm": 1.015175994237581, - "learning_rate": 3.929985685935819e-06, - "loss": 0.1685, - "step": 14088 - }, - { - "epoch": 0.72, - "grad_norm": 1.6135670353009806, - "learning_rate": 3.928676921084877e-06, - "loss": 0.177, - "step": 14089 - }, - { - "epoch": 0.72, - "grad_norm": 1.0624513862242058, - "learning_rate": 3.9273683209189115e-06, - "loss": 0.1712, - "step": 14090 - }, - { - "epoch": 0.72, - "grad_norm": 0.9178673325700115, - "learning_rate": 3.926059885473429e-06, - "loss": 0.1617, - "step": 14091 - }, - { - "epoch": 0.72, - "grad_norm": 1.0849630238704142, - "learning_rate": 3.9247516147839105e-06, - "loss": 0.1735, - "step": 14092 - }, - { - "epoch": 0.72, - "grad_norm": 0.9923777691551956, - "learning_rate": 3.923443508885851e-06, - "loss": 0.1583, - "step": 14093 - }, - { - "epoch": 0.72, - "grad_norm": 1.2803548963051437, - "learning_rate": 3.922135567814726e-06, - "loss": 0.1708, - "step": 14094 - }, - { - "epoch": 0.72, - "grad_norm": 0.9130041610458673, - "learning_rate": 3.920827791606018e-06, - "loss": 0.1616, - "step": 14095 - }, - { - "epoch": 0.72, - "grad_norm": 1.0466901162138669, - "learning_rate": 3.919520180295199e-06, - "loss": 0.1615, - "step": 14096 - }, - { - "epoch": 0.72, - "grad_norm": 1.0599024632729042, - "learning_rate": 3.918212733917742e-06, - "loss": 0.1568, - "step": 14097 - }, - { - "epoch": 0.72, - "grad_norm": 1.1164968866332436, - "learning_rate": 3.9169054525091045e-06, - "loss": 0.1734, - "step": 14098 - }, - { - "epoch": 0.72, - "grad_norm": 0.9097229462116942, - "learning_rate": 3.915598336104754e-06, - "loss": 0.1647, - "step": 14099 - }, - { - "epoch": 0.72, - "grad_norm": 0.9607652166214575, - "learning_rate": 3.914291384740139e-06, - "loss": 0.1484, - "step": 14100 - }, - { - "epoch": 0.72, - "grad_norm": 1.1161152426737857, - "learning_rate": 3.912984598450716e-06, - "loss": 0.1711, - "step": 14101 - }, - { - "epoch": 0.72, - "grad_norm": 1.0838039029190736, - "learning_rate": 3.9116779772719274e-06, - "loss": 0.1828, - "step": 14102 - }, - { - "epoch": 0.72, - "grad_norm": 1.396753142997566, - "learning_rate": 3.91037152123922e-06, - "loss": 0.1637, - "step": 14103 - }, - { - "epoch": 0.72, - "grad_norm": 1.299828907754022, - "learning_rate": 3.9090652303880265e-06, - "loss": 0.1842, - "step": 14104 - }, - { - "epoch": 0.72, - "grad_norm": 1.5028157315567607, - "learning_rate": 3.907759104753782e-06, - "loss": 0.1568, - "step": 14105 - }, - { - "epoch": 0.72, - "grad_norm": 1.1985317318332738, - "learning_rate": 3.90645314437192e-06, - "loss": 0.1913, - "step": 14106 - }, - { - "epoch": 0.72, - "grad_norm": 1.251572368953231, - "learning_rate": 3.905147349277857e-06, - "loss": 0.1689, - "step": 14107 - }, - { - "epoch": 0.72, - "grad_norm": 1.3102618168442373, - "learning_rate": 3.9038417195070196e-06, - "loss": 0.1952, - "step": 14108 - }, - { - "epoch": 0.72, - "grad_norm": 1.0520810466721868, - "learning_rate": 3.902536255094816e-06, - "loss": 0.1655, - "step": 14109 - }, - { - "epoch": 0.72, - "grad_norm": 0.7655259367920346, - "learning_rate": 3.901230956076665e-06, - "loss": 0.165, - "step": 14110 - }, - { - "epoch": 0.72, - "grad_norm": 1.2413659929656236, - "learning_rate": 3.899925822487965e-06, - "loss": 0.1652, - "step": 14111 - }, - { - "epoch": 0.72, - "grad_norm": 0.9601156709495291, - "learning_rate": 3.898620854364126e-06, - "loss": 0.182, - "step": 14112 - }, - { - "epoch": 0.72, - "grad_norm": 1.0182150466816644, - "learning_rate": 3.897316051740536e-06, - "loss": 0.1802, - "step": 14113 - }, - { - "epoch": 0.72, - "grad_norm": 0.9465330640666286, - "learning_rate": 3.896011414652593e-06, - "loss": 0.1558, - "step": 14114 - }, - { - "epoch": 0.72, - "grad_norm": 0.8495037191629528, - "learning_rate": 3.894706943135686e-06, - "loss": 0.1619, - "step": 14115 - }, - { - "epoch": 0.72, - "grad_norm": 3.749177420998462, - "learning_rate": 3.893402637225201e-06, - "loss": 0.1738, - "step": 14116 - }, - { - "epoch": 0.72, - "grad_norm": 1.0153253780913176, - "learning_rate": 3.892098496956511e-06, - "loss": 0.1617, - "step": 14117 - }, - { - "epoch": 0.72, - "grad_norm": 1.2844861536349312, - "learning_rate": 3.890794522364998e-06, - "loss": 0.1844, - "step": 14118 - }, - { - "epoch": 0.72, - "grad_norm": 1.1477401726044283, - "learning_rate": 3.8894907134860236e-06, - "loss": 0.1761, - "step": 14119 - }, - { - "epoch": 0.72, - "grad_norm": 0.9576356068272893, - "learning_rate": 3.888187070354964e-06, - "loss": 0.1847, - "step": 14120 - }, - { - "epoch": 0.72, - "grad_norm": 0.8866710011707144, - "learning_rate": 3.886883593007171e-06, - "loss": 0.1629, - "step": 14121 - }, - { - "epoch": 0.72, - "grad_norm": 0.853715620160077, - "learning_rate": 3.885580281478007e-06, - "loss": 0.1636, - "step": 14122 - }, - { - "epoch": 0.72, - "grad_norm": 1.0522413642603576, - "learning_rate": 3.8842771358028254e-06, - "loss": 0.1828, - "step": 14123 - }, - { - "epoch": 0.72, - "grad_norm": 0.7705310953095824, - "learning_rate": 3.882974156016968e-06, - "loss": 0.146, - "step": 14124 - }, - { - "epoch": 0.72, - "grad_norm": 0.9935591406214743, - "learning_rate": 3.881671342155786e-06, - "loss": 0.1927, - "step": 14125 - }, - { - "epoch": 0.72, - "grad_norm": 1.0289400849388903, - "learning_rate": 3.880368694254612e-06, - "loss": 0.1944, - "step": 14126 - }, - { - "epoch": 0.72, - "grad_norm": 1.075369092240746, - "learning_rate": 3.879066212348786e-06, - "loss": 0.1874, - "step": 14127 - }, - { - "epoch": 0.72, - "grad_norm": 1.1412571748621356, - "learning_rate": 3.877763896473629e-06, - "loss": 0.1736, - "step": 14128 - }, - { - "epoch": 0.72, - "grad_norm": 0.8994203625034884, - "learning_rate": 3.876461746664478e-06, - "loss": 0.1513, - "step": 14129 - }, - { - "epoch": 0.72, - "grad_norm": 0.8771349317185486, - "learning_rate": 3.875159762956644e-06, - "loss": 0.178, - "step": 14130 - }, - { - "epoch": 0.72, - "grad_norm": 1.0027658517118203, - "learning_rate": 3.873857945385447e-06, - "loss": 0.1679, - "step": 14131 - }, - { - "epoch": 0.72, - "grad_norm": 0.87315016577878, - "learning_rate": 3.8725562939862e-06, - "loss": 0.1711, - "step": 14132 - }, - { - "epoch": 0.72, - "grad_norm": 0.8900586829521311, - "learning_rate": 3.871254808794213e-06, - "loss": 0.1691, - "step": 14133 - }, - { - "epoch": 0.72, - "grad_norm": 0.8966203204273094, - "learning_rate": 3.869953489844781e-06, - "loss": 0.1407, - "step": 14134 - }, - { - "epoch": 0.72, - "grad_norm": 0.998730627617709, - "learning_rate": 3.868652337173211e-06, - "loss": 0.1883, - "step": 14135 - }, - { - "epoch": 0.72, - "grad_norm": 1.7719003043359163, - "learning_rate": 3.8673513508147885e-06, - "loss": 0.161, - "step": 14136 - }, - { - "epoch": 0.72, - "grad_norm": 0.8156765100824103, - "learning_rate": 3.866050530804811e-06, - "loss": 0.141, - "step": 14137 - }, - { - "epoch": 0.72, - "grad_norm": 0.8042549971576102, - "learning_rate": 3.864749877178556e-06, - "loss": 0.1637, - "step": 14138 - }, - { - "epoch": 0.72, - "grad_norm": 0.8790603683540212, - "learning_rate": 3.86344938997131e-06, - "loss": 0.1742, - "step": 14139 - }, - { - "epoch": 0.72, - "grad_norm": 1.54209637857875, - "learning_rate": 3.862149069218343e-06, - "loss": 0.176, - "step": 14140 - }, - { - "epoch": 0.72, - "grad_norm": 1.0170286994576827, - "learning_rate": 3.8608489149549286e-06, - "loss": 0.1591, - "step": 14141 - }, - { - "epoch": 0.72, - "grad_norm": 0.9255760660296493, - "learning_rate": 3.8595489272163375e-06, - "loss": 0.181, - "step": 14142 - }, - { - "epoch": 0.72, - "grad_norm": 0.9384850020253076, - "learning_rate": 3.858249106037826e-06, - "loss": 0.1547, - "step": 14143 - }, - { - "epoch": 0.72, - "grad_norm": 1.1219354082933488, - "learning_rate": 3.856949451454658e-06, - "loss": 0.1847, - "step": 14144 - }, - { - "epoch": 0.72, - "grad_norm": 1.3725720945269748, - "learning_rate": 3.855649963502078e-06, - "loss": 0.2026, - "step": 14145 - }, - { - "epoch": 0.72, - "grad_norm": 2.826362384690696, - "learning_rate": 3.854350642215344e-06, - "loss": 0.2124, - "step": 14146 - }, - { - "epoch": 0.72, - "grad_norm": 1.0420657943593228, - "learning_rate": 3.853051487629693e-06, - "loss": 0.1555, - "step": 14147 - }, - { - "epoch": 0.72, - "grad_norm": 1.1153510092050685, - "learning_rate": 3.851752499780368e-06, - "loss": 0.1651, - "step": 14148 - }, - { - "epoch": 0.72, - "grad_norm": 0.9459742492520735, - "learning_rate": 3.8504536787026025e-06, - "loss": 0.1678, - "step": 14149 - }, - { - "epoch": 0.72, - "grad_norm": 1.0397145526569862, - "learning_rate": 3.8491550244316326e-06, - "loss": 0.171, - "step": 14150 - }, - { - "epoch": 0.72, - "grad_norm": 0.9655573592566931, - "learning_rate": 3.847856537002677e-06, - "loss": 0.1736, - "step": 14151 - }, - { - "epoch": 0.72, - "grad_norm": 0.9220548799507059, - "learning_rate": 3.846558216450962e-06, - "loss": 0.1524, - "step": 14152 - }, - { - "epoch": 0.72, - "grad_norm": 1.0456814200250961, - "learning_rate": 3.845260062811701e-06, - "loss": 0.1764, - "step": 14153 - }, - { - "epoch": 0.72, - "grad_norm": 0.9412885631888973, - "learning_rate": 3.843962076120111e-06, - "loss": 0.1722, - "step": 14154 - }, - { - "epoch": 0.72, - "grad_norm": 0.8769782231890678, - "learning_rate": 3.842664256411393e-06, - "loss": 0.1711, - "step": 14155 - }, - { - "epoch": 0.72, - "grad_norm": 0.9941968030192654, - "learning_rate": 3.841366603720761e-06, - "loss": 0.1581, - "step": 14156 - }, - { - "epoch": 0.72, - "grad_norm": 0.81692305819075, - "learning_rate": 3.840069118083403e-06, - "loss": 0.1729, - "step": 14157 - }, - { - "epoch": 0.72, - "grad_norm": 1.5179566558436612, - "learning_rate": 3.838771799534518e-06, - "loss": 0.1832, - "step": 14158 - }, - { - "epoch": 0.72, - "grad_norm": 0.9901563215065169, - "learning_rate": 3.837474648109298e-06, - "loss": 0.1637, - "step": 14159 - }, - { - "epoch": 0.72, - "grad_norm": 1.6951338937784872, - "learning_rate": 3.836177663842925e-06, - "loss": 0.1659, - "step": 14160 - }, - { - "epoch": 0.72, - "grad_norm": 0.9560269248745517, - "learning_rate": 3.834880846770584e-06, - "loss": 0.1683, - "step": 14161 - }, - { - "epoch": 0.72, - "grad_norm": 1.1981393142829408, - "learning_rate": 3.833584196927443e-06, - "loss": 0.1925, - "step": 14162 - }, - { - "epoch": 0.72, - "grad_norm": 0.737439399910469, - "learning_rate": 3.8322877143486835e-06, - "loss": 0.1732, - "step": 14163 - }, - { - "epoch": 0.72, - "grad_norm": 0.9970214572536786, - "learning_rate": 3.830991399069466e-06, - "loss": 0.1662, - "step": 14164 - }, - { - "epoch": 0.72, - "grad_norm": 0.9038070805127413, - "learning_rate": 3.829695251124953e-06, - "loss": 0.1659, - "step": 14165 - }, - { - "epoch": 0.72, - "grad_norm": 0.8343905841105865, - "learning_rate": 3.828399270550306e-06, - "loss": 0.1765, - "step": 14166 - }, - { - "epoch": 0.72, - "grad_norm": 0.7514551033421455, - "learning_rate": 3.827103457380681e-06, - "loss": 0.1569, - "step": 14167 - }, - { - "epoch": 0.72, - "grad_norm": 1.1434878349475175, - "learning_rate": 3.82580781165122e-06, - "loss": 0.1675, - "step": 14168 - }, - { - "epoch": 0.72, - "grad_norm": 1.0057817293164968, - "learning_rate": 3.824512333397073e-06, - "loss": 0.185, - "step": 14169 - }, - { - "epoch": 0.72, - "grad_norm": 1.2729546710261415, - "learning_rate": 3.823217022653376e-06, - "loss": 0.1975, - "step": 14170 - }, - { - "epoch": 0.72, - "grad_norm": 0.81564027380783, - "learning_rate": 3.821921879455268e-06, - "loss": 0.1631, - "step": 14171 - }, - { - "epoch": 0.72, - "grad_norm": 0.8976442491961399, - "learning_rate": 3.820626903837875e-06, - "loss": 0.1569, - "step": 14172 - }, - { - "epoch": 0.72, - "grad_norm": 0.9607703608651237, - "learning_rate": 3.81933209583633e-06, - "loss": 0.1783, - "step": 14173 - }, - { - "epoch": 0.72, - "grad_norm": 0.98830563137594, - "learning_rate": 3.818037455485748e-06, - "loss": 0.195, - "step": 14174 - }, - { - "epoch": 0.72, - "grad_norm": 0.967548832430743, - "learning_rate": 3.816742982821249e-06, - "loss": 0.1873, - "step": 14175 - }, - { - "epoch": 0.72, - "grad_norm": 0.9725562983600868, - "learning_rate": 3.815448677877949e-06, - "loss": 0.1637, - "step": 14176 - }, - { - "epoch": 0.72, - "grad_norm": 0.9534466925619756, - "learning_rate": 3.8141545406909486e-06, - "loss": 0.2168, - "step": 14177 - }, - { - "epoch": 0.72, - "grad_norm": 0.9022686390988373, - "learning_rate": 3.8128605712953606e-06, - "loss": 0.1771, - "step": 14178 - }, - { - "epoch": 0.72, - "grad_norm": 0.7895997669461096, - "learning_rate": 3.811566769726275e-06, - "loss": 0.1788, - "step": 14179 - }, - { - "epoch": 0.72, - "grad_norm": 1.026042586429769, - "learning_rate": 3.810273136018793e-06, - "loss": 0.1561, - "step": 14180 - }, - { - "epoch": 0.72, - "grad_norm": 0.7893571675039319, - "learning_rate": 3.8089796702079996e-06, - "loss": 0.1781, - "step": 14181 - }, - { - "epoch": 0.72, - "grad_norm": 1.6919757359686876, - "learning_rate": 3.8076863723289847e-06, - "loss": 0.1631, - "step": 14182 - }, - { - "epoch": 0.72, - "grad_norm": 0.862175568391283, - "learning_rate": 3.8063932424168236e-06, - "loss": 0.1673, - "step": 14183 - }, - { - "epoch": 0.72, - "grad_norm": 0.8485517752972715, - "learning_rate": 3.8051002805065964e-06, - "loss": 0.1934, - "step": 14184 - }, - { - "epoch": 0.72, - "grad_norm": 1.9233190973713077, - "learning_rate": 3.803807486633373e-06, - "loss": 0.1653, - "step": 14185 - }, - { - "epoch": 0.72, - "grad_norm": 1.1319349294113454, - "learning_rate": 3.802514860832225e-06, - "loss": 0.1909, - "step": 14186 - }, - { - "epoch": 0.72, - "grad_norm": 0.8653577480393039, - "learning_rate": 3.8012224031382084e-06, - "loss": 0.1779, - "step": 14187 - }, - { - "epoch": 0.72, - "grad_norm": 1.1929076075384082, - "learning_rate": 3.7999301135863875e-06, - "loss": 0.1731, - "step": 14188 - }, - { - "epoch": 0.72, - "grad_norm": 0.9413376467983017, - "learning_rate": 3.7986379922118087e-06, - "loss": 0.1551, - "step": 14189 - }, - { - "epoch": 0.72, - "grad_norm": 0.9052220902235042, - "learning_rate": 3.797346039049529e-06, - "loss": 0.1828, - "step": 14190 - }, - { - "epoch": 0.72, - "grad_norm": 0.9246863882687407, - "learning_rate": 3.7960542541345836e-06, - "loss": 0.1973, - "step": 14191 - }, - { - "epoch": 0.72, - "grad_norm": 0.8486898931706738, - "learning_rate": 3.7947626375020173e-06, - "loss": 0.1641, - "step": 14192 - }, - { - "epoch": 0.72, - "grad_norm": 1.2499576012040685, - "learning_rate": 3.793471189186869e-06, - "loss": 0.1742, - "step": 14193 - }, - { - "epoch": 0.72, - "grad_norm": 0.8500478491305098, - "learning_rate": 3.792179909224162e-06, - "loss": 0.1686, - "step": 14194 - }, - { - "epoch": 0.72, - "grad_norm": 0.9980383111334427, - "learning_rate": 3.7908887976489284e-06, - "loss": 0.2013, - "step": 14195 - }, - { - "epoch": 0.72, - "grad_norm": 1.557717958905487, - "learning_rate": 3.789597854496183e-06, - "loss": 0.1621, - "step": 14196 - }, - { - "epoch": 0.72, - "grad_norm": 1.172579101340141, - "learning_rate": 3.7883070798009503e-06, - "loss": 0.1838, - "step": 14197 - }, - { - "epoch": 0.72, - "grad_norm": 1.5335676877341493, - "learning_rate": 3.7870164735982363e-06, - "loss": 0.1741, - "step": 14198 - }, - { - "epoch": 0.72, - "grad_norm": 1.236872824666607, - "learning_rate": 3.7857260359230543e-06, - "loss": 0.1668, - "step": 14199 - }, - { - "epoch": 0.72, - "grad_norm": 1.0325130618195033, - "learning_rate": 3.7844357668104005e-06, - "loss": 0.1567, - "step": 14200 - }, - { - "epoch": 0.72, - "grad_norm": 1.1342609581997496, - "learning_rate": 3.7831456662952783e-06, - "loss": 0.1772, - "step": 14201 - }, - { - "epoch": 0.72, - "grad_norm": 1.4083831164497156, - "learning_rate": 3.7818557344126807e-06, - "loss": 0.1642, - "step": 14202 - }, - { - "epoch": 0.72, - "grad_norm": 0.8579470681672271, - "learning_rate": 3.7805659711976007e-06, - "loss": 0.1697, - "step": 14203 - }, - { - "epoch": 0.72, - "grad_norm": 1.0276154408565137, - "learning_rate": 3.779276376685017e-06, - "loss": 0.1614, - "step": 14204 - }, - { - "epoch": 0.72, - "grad_norm": 1.1292194703813447, - "learning_rate": 3.7779869509099166e-06, - "loss": 0.1716, - "step": 14205 - }, - { - "epoch": 0.72, - "grad_norm": 0.9358439759067537, - "learning_rate": 3.7766976939072673e-06, - "loss": 0.173, - "step": 14206 - }, - { - "epoch": 0.72, - "grad_norm": 0.7796355039284816, - "learning_rate": 3.7754086057120486e-06, - "loss": 0.1449, - "step": 14207 - }, - { - "epoch": 0.72, - "grad_norm": 1.056734691141821, - "learning_rate": 3.77411968635922e-06, - "loss": 0.1628, - "step": 14208 - }, - { - "epoch": 0.72, - "grad_norm": 0.9221052651830727, - "learning_rate": 3.772830935883749e-06, - "loss": 0.1563, - "step": 14209 - }, - { - "epoch": 0.72, - "grad_norm": 1.0631227692867489, - "learning_rate": 3.7715423543205875e-06, - "loss": 0.1683, - "step": 14210 - }, - { - "epoch": 0.72, - "grad_norm": 0.7410763969252387, - "learning_rate": 3.7702539417046923e-06, - "loss": 0.1603, - "step": 14211 - }, - { - "epoch": 0.72, - "grad_norm": 0.9452639562381826, - "learning_rate": 3.7689656980710132e-06, - "loss": 0.189, - "step": 14212 - }, - { - "epoch": 0.72, - "grad_norm": 1.6908879201876998, - "learning_rate": 3.7676776234544876e-06, - "loss": 0.1638, - "step": 14213 - }, - { - "epoch": 0.72, - "grad_norm": 1.0159422655316006, - "learning_rate": 3.7663897178900634e-06, - "loss": 0.1612, - "step": 14214 - }, - { - "epoch": 0.72, - "grad_norm": 0.870214529701722, - "learning_rate": 3.7651019814126656e-06, - "loss": 0.1632, - "step": 14215 - }, - { - "epoch": 0.72, - "grad_norm": 1.3077317836042965, - "learning_rate": 3.763814414057233e-06, - "loss": 0.1788, - "step": 14216 - }, - { - "epoch": 0.72, - "grad_norm": 2.0002530570640173, - "learning_rate": 3.7625270158586824e-06, - "loss": 0.1536, - "step": 14217 - }, - { - "epoch": 0.72, - "grad_norm": 1.3738436004166406, - "learning_rate": 3.761239786851939e-06, - "loss": 0.1561, - "step": 14218 - }, - { - "epoch": 0.72, - "grad_norm": 1.354949019454525, - "learning_rate": 3.7599527270719183e-06, - "loss": 0.1437, - "step": 14219 - }, - { - "epoch": 0.72, - "grad_norm": 1.5472545692675839, - "learning_rate": 3.7586658365535367e-06, - "loss": 0.1568, - "step": 14220 - }, - { - "epoch": 0.72, - "grad_norm": 0.9656835326636025, - "learning_rate": 3.757379115331693e-06, - "loss": 0.1802, - "step": 14221 - }, - { - "epoch": 0.72, - "grad_norm": 0.9776349541649698, - "learning_rate": 3.756092563441297e-06, - "loss": 0.1656, - "step": 14222 - }, - { - "epoch": 0.72, - "grad_norm": 0.8852204987916643, - "learning_rate": 3.754806180917239e-06, - "loss": 0.1921, - "step": 14223 - }, - { - "epoch": 0.72, - "grad_norm": 0.9167565711720689, - "learning_rate": 3.75351996779442e-06, - "loss": 0.1775, - "step": 14224 - }, - { - "epoch": 0.72, - "grad_norm": 0.8952438830379588, - "learning_rate": 3.752233924107721e-06, - "loss": 0.16, - "step": 14225 - }, - { - "epoch": 0.72, - "grad_norm": 1.2955265466290538, - "learning_rate": 3.7509480498920325e-06, - "loss": 0.1617, - "step": 14226 - }, - { - "epoch": 0.72, - "grad_norm": 0.9378117311047315, - "learning_rate": 3.749662345182229e-06, - "loss": 0.1589, - "step": 14227 - }, - { - "epoch": 0.72, - "grad_norm": 1.1886155582753781, - "learning_rate": 3.7483768100131857e-06, - "loss": 0.1915, - "step": 14228 - }, - { - "epoch": 0.72, - "grad_norm": 1.0826107082122094, - "learning_rate": 3.7470914444197793e-06, - "loss": 0.1709, - "step": 14229 - }, - { - "epoch": 0.72, - "grad_norm": 1.0393811372304311, - "learning_rate": 3.745806248436866e-06, - "loss": 0.1554, - "step": 14230 - }, - { - "epoch": 0.72, - "grad_norm": 1.236921987649103, - "learning_rate": 3.7445212220993167e-06, - "loss": 0.1815, - "step": 14231 - }, - { - "epoch": 0.72, - "grad_norm": 1.2104292988068672, - "learning_rate": 3.743236365441978e-06, - "loss": 0.2004, - "step": 14232 - }, - { - "epoch": 0.72, - "grad_norm": 1.6439370614882838, - "learning_rate": 3.7419516784997102e-06, - "loss": 0.1716, - "step": 14233 - }, - { - "epoch": 0.72, - "grad_norm": 0.8458498080848184, - "learning_rate": 3.740667161307352e-06, - "loss": 0.1576, - "step": 14234 - }, - { - "epoch": 0.72, - "grad_norm": 1.1550376497930346, - "learning_rate": 3.7393828138997543e-06, - "loss": 0.1706, - "step": 14235 - }, - { - "epoch": 0.72, - "grad_norm": 1.3056922913399747, - "learning_rate": 3.7380986363117488e-06, - "loss": 0.1802, - "step": 14236 - }, - { - "epoch": 0.72, - "grad_norm": 1.0611836357952285, - "learning_rate": 3.7368146285781716e-06, - "loss": 0.1553, - "step": 14237 - }, - { - "epoch": 0.72, - "grad_norm": 1.5617880165016602, - "learning_rate": 3.73553079073385e-06, - "loss": 0.173, - "step": 14238 - }, - { - "epoch": 0.72, - "grad_norm": 1.1187420114648672, - "learning_rate": 3.7342471228136148e-06, - "loss": 0.1655, - "step": 14239 - }, - { - "epoch": 0.72, - "grad_norm": 0.8656618888307698, - "learning_rate": 3.732963624852275e-06, - "loss": 0.1645, - "step": 14240 - }, - { - "epoch": 0.72, - "grad_norm": 1.0527586977075543, - "learning_rate": 3.7316802968846555e-06, - "loss": 0.1905, - "step": 14241 - }, - { - "epoch": 0.72, - "grad_norm": 0.9653891403786222, - "learning_rate": 3.7303971389455584e-06, - "loss": 0.1785, - "step": 14242 - }, - { - "epoch": 0.72, - "grad_norm": 0.8007572110573549, - "learning_rate": 3.7291141510697957e-06, - "loss": 0.1631, - "step": 14243 - }, - { - "epoch": 0.72, - "grad_norm": 1.0979067825574946, - "learning_rate": 3.7278313332921634e-06, - "loss": 0.168, - "step": 14244 - }, - { - "epoch": 0.72, - "grad_norm": 1.0376817166504873, - "learning_rate": 3.72654868564746e-06, - "loss": 0.2146, - "step": 14245 - }, - { - "epoch": 0.72, - "grad_norm": 0.870826236426534, - "learning_rate": 3.7252662081704806e-06, - "loss": 0.1576, - "step": 14246 - }, - { - "epoch": 0.72, - "grad_norm": 1.1024186957206004, - "learning_rate": 3.7239839008960066e-06, - "loss": 0.1845, - "step": 14247 - }, - { - "epoch": 0.72, - "grad_norm": 0.9461416873285, - "learning_rate": 3.722701763858828e-06, - "loss": 0.1605, - "step": 14248 - }, - { - "epoch": 0.72, - "grad_norm": 0.8203311355747611, - "learning_rate": 3.721419797093715e-06, - "loss": 0.1727, - "step": 14249 - }, - { - "epoch": 0.72, - "grad_norm": 0.9325230397827498, - "learning_rate": 3.720138000635447e-06, - "loss": 0.1751, - "step": 14250 - }, - { - "epoch": 0.72, - "grad_norm": 1.115454803046617, - "learning_rate": 3.718856374518788e-06, - "loss": 0.1702, - "step": 14251 - }, - { - "epoch": 0.72, - "grad_norm": 0.8502188295421723, - "learning_rate": 3.717574918778507e-06, - "loss": 0.1666, - "step": 14252 - }, - { - "epoch": 0.72, - "grad_norm": 1.1228808366873253, - "learning_rate": 3.7162936334493594e-06, - "loss": 0.1701, - "step": 14253 - }, - { - "epoch": 0.72, - "grad_norm": 0.8689147468875956, - "learning_rate": 3.7150125185661e-06, - "loss": 0.1704, - "step": 14254 - }, - { - "epoch": 0.72, - "grad_norm": 0.9488898299242475, - "learning_rate": 3.7137315741634825e-06, - "loss": 0.1882, - "step": 14255 - }, - { - "epoch": 0.72, - "grad_norm": 1.0817092141613454, - "learning_rate": 3.7124508002762537e-06, - "loss": 0.1599, - "step": 14256 - }, - { - "epoch": 0.72, - "grad_norm": 1.166908832486912, - "learning_rate": 3.711170196939149e-06, - "loss": 0.1724, - "step": 14257 - }, - { - "epoch": 0.73, - "grad_norm": 0.8814828284152649, - "learning_rate": 3.7098897641869113e-06, - "loss": 0.1781, - "step": 14258 - }, - { - "epoch": 0.73, - "grad_norm": 0.9455121000726662, - "learning_rate": 3.7086095020542655e-06, - "loss": 0.1781, - "step": 14259 - }, - { - "epoch": 0.73, - "grad_norm": 1.1232332362340367, - "learning_rate": 3.7073294105759462e-06, - "loss": 0.1884, - "step": 14260 - }, - { - "epoch": 0.73, - "grad_norm": 0.8402922663453951, - "learning_rate": 3.706049489786667e-06, - "loss": 0.1541, - "step": 14261 - }, - { - "epoch": 0.73, - "grad_norm": 0.9859218401322686, - "learning_rate": 3.704769739721156e-06, - "loss": 0.1641, - "step": 14262 - }, - { - "epoch": 0.73, - "grad_norm": 1.110810403466431, - "learning_rate": 3.703490160414117e-06, - "loss": 0.187, - "step": 14263 - }, - { - "epoch": 0.73, - "grad_norm": 1.0070633935527822, - "learning_rate": 3.7022107519002635e-06, - "loss": 0.1602, - "step": 14264 - }, - { - "epoch": 0.73, - "grad_norm": 2.5144713975719286, - "learning_rate": 3.7009315142143033e-06, - "loss": 0.1777, - "step": 14265 - }, - { - "epoch": 0.73, - "grad_norm": 1.3375853269180695, - "learning_rate": 3.6996524473909268e-06, - "loss": 0.1619, - "step": 14266 - }, - { - "epoch": 0.73, - "grad_norm": 1.0919991003667495, - "learning_rate": 3.6983735514648376e-06, - "loss": 0.174, - "step": 14267 - }, - { - "epoch": 0.73, - "grad_norm": 1.1546100421700132, - "learning_rate": 3.697094826470717e-06, - "loss": 0.176, - "step": 14268 - }, - { - "epoch": 0.73, - "grad_norm": 0.8533756977482386, - "learning_rate": 3.6958162724432612e-06, - "loss": 0.1706, - "step": 14269 - }, - { - "epoch": 0.73, - "grad_norm": 0.8114168158613203, - "learning_rate": 3.6945378894171392e-06, - "loss": 0.174, - "step": 14270 - }, - { - "epoch": 0.73, - "grad_norm": 1.139833257653544, - "learning_rate": 3.6932596774270346e-06, - "loss": 0.1579, - "step": 14271 - }, - { - "epoch": 0.73, - "grad_norm": 1.011143112623799, - "learning_rate": 3.6919816365076165e-06, - "loss": 0.1912, - "step": 14272 - }, - { - "epoch": 0.73, - "grad_norm": 1.599161063664042, - "learning_rate": 3.6907037666935565e-06, - "loss": 0.18, - "step": 14273 - }, - { - "epoch": 0.73, - "grad_norm": 2.111474925295134, - "learning_rate": 3.6894260680195105e-06, - "loss": 0.1687, - "step": 14274 - }, - { - "epoch": 0.73, - "grad_norm": 2.071490665473469, - "learning_rate": 3.688148540520141e-06, - "loss": 0.1563, - "step": 14275 - }, - { - "epoch": 0.73, - "grad_norm": 0.9001205884583297, - "learning_rate": 3.6868711842300964e-06, - "loss": 0.1532, - "step": 14276 - }, - { - "epoch": 0.73, - "grad_norm": 1.2590964346673659, - "learning_rate": 3.6855939991840305e-06, - "loss": 0.1857, - "step": 14277 - }, - { - "epoch": 0.73, - "grad_norm": 1.4732035688485705, - "learning_rate": 3.6843169854165807e-06, - "loss": 0.176, - "step": 14278 - }, - { - "epoch": 0.73, - "grad_norm": 1.284662221148433, - "learning_rate": 3.683040142962393e-06, - "loss": 0.1924, - "step": 14279 - }, - { - "epoch": 0.73, - "grad_norm": 1.2777057044689084, - "learning_rate": 3.6817634718560947e-06, - "loss": 0.1679, - "step": 14280 - }, - { - "epoch": 0.73, - "grad_norm": 0.9109142054246077, - "learning_rate": 3.6804869721323187e-06, - "loss": 0.1671, - "step": 14281 - }, - { - "epoch": 0.73, - "grad_norm": 1.0592463944026813, - "learning_rate": 3.6792106438256937e-06, - "loss": 0.1739, - "step": 14282 - }, - { - "epoch": 0.73, - "grad_norm": 0.810647659156529, - "learning_rate": 3.6779344869708344e-06, - "loss": 0.1507, - "step": 14283 - }, - { - "epoch": 0.73, - "grad_norm": 0.9415308598837328, - "learning_rate": 3.6766585016023624e-06, - "loss": 0.1671, - "step": 14284 - }, - { - "epoch": 0.73, - "grad_norm": 1.1358383503657274, - "learning_rate": 3.6753826877548817e-06, - "loss": 0.1595, - "step": 14285 - }, - { - "epoch": 0.73, - "grad_norm": 0.9346530761399574, - "learning_rate": 3.674107045463007e-06, - "loss": 0.191, - "step": 14286 - }, - { - "epoch": 0.73, - "grad_norm": 1.466131046517711, - "learning_rate": 3.672831574761332e-06, - "loss": 0.1527, - "step": 14287 - }, - { - "epoch": 0.73, - "grad_norm": 1.2540878334454677, - "learning_rate": 3.671556275684458e-06, - "loss": 0.1754, - "step": 14288 - }, - { - "epoch": 0.73, - "grad_norm": 1.2123331434905684, - "learning_rate": 3.6702811482669776e-06, - "loss": 0.1581, - "step": 14289 - }, - { - "epoch": 0.73, - "grad_norm": 0.8083395757328803, - "learning_rate": 3.6690061925434817e-06, - "loss": 0.1594, - "step": 14290 - }, - { - "epoch": 0.73, - "grad_norm": 0.9007273446819845, - "learning_rate": 3.667731408548547e-06, - "loss": 0.1777, - "step": 14291 - }, - { - "epoch": 0.73, - "grad_norm": 1.1764483153180103, - "learning_rate": 3.6664567963167598e-06, - "loss": 0.1855, - "step": 14292 - }, - { - "epoch": 0.73, - "grad_norm": 1.0257463654360448, - "learning_rate": 3.6651823558826847e-06, - "loss": 0.1775, - "step": 14293 - }, - { - "epoch": 0.73, - "grad_norm": 1.708378143859542, - "learning_rate": 3.6639080872809007e-06, - "loss": 0.1595, - "step": 14294 - }, - { - "epoch": 0.73, - "grad_norm": 1.063857693334117, - "learning_rate": 3.662633990545964e-06, - "loss": 0.1604, - "step": 14295 - }, - { - "epoch": 0.73, - "grad_norm": 1.015383341678732, - "learning_rate": 3.6613600657124416e-06, - "loss": 0.1715, - "step": 14296 - }, - { - "epoch": 0.73, - "grad_norm": 1.2290575016512464, - "learning_rate": 3.6600863128148823e-06, - "loss": 0.1729, - "step": 14297 - }, - { - "epoch": 0.73, - "grad_norm": 0.937283228394862, - "learning_rate": 3.6588127318878398e-06, - "loss": 0.1839, - "step": 14298 - }, - { - "epoch": 0.73, - "grad_norm": 1.0972501980810836, - "learning_rate": 3.657539322965863e-06, - "loss": 0.151, - "step": 14299 - }, - { - "epoch": 0.73, - "grad_norm": 1.2100849301088383, - "learning_rate": 3.6562660860834866e-06, - "loss": 0.1771, - "step": 14300 - }, - { - "epoch": 0.73, - "grad_norm": 0.9269535849349061, - "learning_rate": 3.654993021275255e-06, - "loss": 0.1659, - "step": 14301 - }, - { - "epoch": 0.73, - "grad_norm": 0.989396120662106, - "learning_rate": 3.6537201285756927e-06, - "loss": 0.1663, - "step": 14302 - }, - { - "epoch": 0.73, - "grad_norm": 0.9574458380861439, - "learning_rate": 3.652447408019334e-06, - "loss": 0.1689, - "step": 14303 - }, - { - "epoch": 0.73, - "grad_norm": 1.7094279189074484, - "learning_rate": 3.651174859640694e-06, - "loss": 0.1878, - "step": 14304 - }, - { - "epoch": 0.73, - "grad_norm": 1.1206309986972018, - "learning_rate": 3.6499024834742967e-06, - "loss": 0.1781, - "step": 14305 - }, - { - "epoch": 0.73, - "grad_norm": 1.9028885348627, - "learning_rate": 3.6486302795546515e-06, - "loss": 0.1713, - "step": 14306 - }, - { - "epoch": 0.73, - "grad_norm": 0.9185930338847872, - "learning_rate": 3.6473582479162684e-06, - "loss": 0.1493, - "step": 14307 - }, - { - "epoch": 0.73, - "grad_norm": 0.8589657424822319, - "learning_rate": 3.6460863885936514e-06, - "loss": 0.1684, - "step": 14308 - }, - { - "epoch": 0.73, - "grad_norm": 0.9463367078690121, - "learning_rate": 3.644814701621303e-06, - "loss": 0.1705, - "step": 14309 - }, - { - "epoch": 0.73, - "grad_norm": 0.9896721370691746, - "learning_rate": 3.6435431870337123e-06, - "loss": 0.1743, - "step": 14310 - }, - { - "epoch": 0.73, - "grad_norm": 1.1614616682775758, - "learning_rate": 3.642271844865375e-06, - "loss": 0.1781, - "step": 14311 - }, - { - "epoch": 0.73, - "grad_norm": 0.9824090286288907, - "learning_rate": 3.641000675150769e-06, - "loss": 0.1646, - "step": 14312 - }, - { - "epoch": 0.73, - "grad_norm": 0.7977130453754009, - "learning_rate": 3.639729677924382e-06, - "loss": 0.1507, - "step": 14313 - }, - { - "epoch": 0.73, - "grad_norm": 0.8735573137115935, - "learning_rate": 3.638458853220683e-06, - "loss": 0.1643, - "step": 14314 - }, - { - "epoch": 0.73, - "grad_norm": 0.9780952797888448, - "learning_rate": 3.637188201074149e-06, - "loss": 0.1755, - "step": 14315 - }, - { - "epoch": 0.73, - "grad_norm": 0.9590069219781341, - "learning_rate": 3.635917721519245e-06, - "loss": 0.1515, - "step": 14316 - }, - { - "epoch": 0.73, - "grad_norm": 0.7895245555191475, - "learning_rate": 3.634647414590431e-06, - "loss": 0.1827, - "step": 14317 - }, - { - "epoch": 0.73, - "grad_norm": 0.844476378491929, - "learning_rate": 3.6333772803221677e-06, - "loss": 0.1677, - "step": 14318 - }, - { - "epoch": 0.73, - "grad_norm": 1.05269931311854, - "learning_rate": 3.632107318748903e-06, - "loss": 0.1904, - "step": 14319 - }, - { - "epoch": 0.73, - "grad_norm": 1.636618686759367, - "learning_rate": 3.630837529905089e-06, - "loss": 0.1683, - "step": 14320 - }, - { - "epoch": 0.73, - "grad_norm": 0.8617969918168314, - "learning_rate": 3.6295679138251637e-06, - "loss": 0.1592, - "step": 14321 - }, - { - "epoch": 0.73, - "grad_norm": 0.9486332624925874, - "learning_rate": 3.628298470543572e-06, - "loss": 0.1892, - "step": 14322 - }, - { - "epoch": 0.73, - "grad_norm": 1.4453375339022392, - "learning_rate": 3.6270292000947417e-06, - "loss": 0.1761, - "step": 14323 - }, - { - "epoch": 0.73, - "grad_norm": 1.146975579000541, - "learning_rate": 3.625760102513103e-06, - "loss": 0.1698, - "step": 14324 - }, - { - "epoch": 0.73, - "grad_norm": 1.6093006977976625, - "learning_rate": 3.6244911778330826e-06, - "loss": 0.1641, - "step": 14325 - }, - { - "epoch": 0.73, - "grad_norm": 1.1152540194062124, - "learning_rate": 3.6232224260891012e-06, - "loss": 0.1529, - "step": 14326 - }, - { - "epoch": 0.73, - "grad_norm": 0.8843950514675752, - "learning_rate": 3.621953847315569e-06, - "loss": 0.1527, - "step": 14327 - }, - { - "epoch": 0.73, - "grad_norm": 1.096107420351601, - "learning_rate": 3.620685441546903e-06, - "loss": 0.1987, - "step": 14328 - }, - { - "epoch": 0.73, - "grad_norm": 1.1006170106896094, - "learning_rate": 3.6194172088175005e-06, - "loss": 0.1886, - "step": 14329 - }, - { - "epoch": 0.73, - "grad_norm": 1.026145337870011, - "learning_rate": 3.6181491491617706e-06, - "loss": 0.1496, - "step": 14330 - }, - { - "epoch": 0.73, - "grad_norm": 1.029899205795693, - "learning_rate": 3.6168812626141e-06, - "loss": 0.1987, - "step": 14331 - }, - { - "epoch": 0.73, - "grad_norm": 1.0966232569516159, - "learning_rate": 3.6156135492088915e-06, - "loss": 0.1475, - "step": 14332 - }, - { - "epoch": 0.73, - "grad_norm": 1.1319156136893418, - "learning_rate": 3.6143460089805214e-06, - "loss": 0.1609, - "step": 14333 - }, - { - "epoch": 0.73, - "grad_norm": 1.3430977218507882, - "learning_rate": 3.613078641963377e-06, - "loss": 0.1658, - "step": 14334 - }, - { - "epoch": 0.73, - "grad_norm": 0.8104722022322258, - "learning_rate": 3.611811448191839e-06, - "loss": 0.1578, - "step": 14335 - }, - { - "epoch": 0.73, - "grad_norm": 1.3388835404316106, - "learning_rate": 3.610544427700272e-06, - "loss": 0.1761, - "step": 14336 - }, - { - "epoch": 0.73, - "grad_norm": 0.8960803836041868, - "learning_rate": 3.6092775805230516e-06, - "loss": 0.1594, - "step": 14337 - }, - { - "epoch": 0.73, - "grad_norm": 0.8458899384765411, - "learning_rate": 3.6080109066945357e-06, - "loss": 0.1749, - "step": 14338 - }, - { - "epoch": 0.73, - "grad_norm": 0.8441196825783069, - "learning_rate": 3.6067444062490875e-06, - "loss": 0.18, - "step": 14339 - }, - { - "epoch": 0.73, - "grad_norm": 0.7732150398842218, - "learning_rate": 3.6054780792210542e-06, - "loss": 0.1627, - "step": 14340 - }, - { - "epoch": 0.73, - "grad_norm": 1.1086405766696839, - "learning_rate": 3.6042119256447904e-06, - "loss": 0.1675, - "step": 14341 - }, - { - "epoch": 0.73, - "grad_norm": 0.8720343885082179, - "learning_rate": 3.602945945554639e-06, - "loss": 0.1747, - "step": 14342 - }, - { - "epoch": 0.73, - "grad_norm": 0.8623553602417626, - "learning_rate": 3.6016801389849434e-06, - "loss": 0.1829, - "step": 14343 - }, - { - "epoch": 0.73, - "grad_norm": 1.2966082489742936, - "learning_rate": 3.6004145059700313e-06, - "loss": 0.1676, - "step": 14344 - }, - { - "epoch": 0.73, - "grad_norm": 1.157181184083306, - "learning_rate": 3.5991490465442413e-06, - "loss": 0.1957, - "step": 14345 - }, - { - "epoch": 0.73, - "grad_norm": 0.8556719271462281, - "learning_rate": 3.5978837607418914e-06, - "loss": 0.1431, - "step": 14346 - }, - { - "epoch": 0.73, - "grad_norm": 0.9223430518682103, - "learning_rate": 3.5966186485973097e-06, - "loss": 0.1711, - "step": 14347 - }, - { - "epoch": 0.73, - "grad_norm": 1.5919130161900463, - "learning_rate": 3.5953537101448053e-06, - "loss": 0.1598, - "step": 14348 - }, - { - "epoch": 0.73, - "grad_norm": 2.5950698902412257, - "learning_rate": 3.5940889454186965e-06, - "loss": 0.1786, - "step": 14349 - }, - { - "epoch": 0.73, - "grad_norm": 1.05794885305146, - "learning_rate": 3.5928243544532835e-06, - "loss": 0.1447, - "step": 14350 - }, - { - "epoch": 0.73, - "grad_norm": 0.9346209259921906, - "learning_rate": 3.5915599372828725e-06, - "loss": 0.1607, - "step": 14351 - }, - { - "epoch": 0.73, - "grad_norm": 1.0338066235146515, - "learning_rate": 3.590295693941763e-06, - "loss": 0.1827, - "step": 14352 - }, - { - "epoch": 0.73, - "grad_norm": 1.0959471715990214, - "learning_rate": 3.5890316244642408e-06, - "loss": 0.1626, - "step": 14353 - }, - { - "epoch": 0.73, - "grad_norm": 2.165534101280814, - "learning_rate": 3.5877677288846023e-06, - "loss": 0.158, - "step": 14354 - }, - { - "epoch": 0.73, - "grad_norm": 0.947660632419828, - "learning_rate": 3.5865040072371228e-06, - "loss": 0.1776, - "step": 14355 - }, - { - "epoch": 0.73, - "grad_norm": 0.8461107495185389, - "learning_rate": 3.5852404595560876e-06, - "loss": 0.1931, - "step": 14356 - }, - { - "epoch": 0.73, - "grad_norm": 2.1027775274177167, - "learning_rate": 3.5839770858757627e-06, - "loss": 0.1729, - "step": 14357 - }, - { - "epoch": 0.73, - "grad_norm": 0.8846669049565868, - "learning_rate": 3.5827138862304266e-06, - "loss": 0.1689, - "step": 14358 - }, - { - "epoch": 0.73, - "grad_norm": 1.2012854026999105, - "learning_rate": 3.581450860654335e-06, - "loss": 0.1838, - "step": 14359 - }, - { - "epoch": 0.73, - "grad_norm": 1.24401535072047, - "learning_rate": 3.580188009181751e-06, - "loss": 0.1878, - "step": 14360 - }, - { - "epoch": 0.73, - "grad_norm": 1.4983760604381224, - "learning_rate": 3.57892533184693e-06, - "loss": 0.1877, - "step": 14361 - }, - { - "epoch": 0.73, - "grad_norm": 1.0436754053513289, - "learning_rate": 3.577662828684125e-06, - "loss": 0.1759, - "step": 14362 - }, - { - "epoch": 0.73, - "grad_norm": 1.258726118552222, - "learning_rate": 3.576400499727576e-06, - "loss": 0.19, - "step": 14363 - }, - { - "epoch": 0.73, - "grad_norm": 0.9488113239079438, - "learning_rate": 3.5751383450115298e-06, - "loss": 0.1769, - "step": 14364 - }, - { - "epoch": 0.73, - "grad_norm": 0.7623195980440788, - "learning_rate": 3.5738763645702145e-06, - "loss": 0.1558, - "step": 14365 - }, - { - "epoch": 0.73, - "grad_norm": 0.855507636654625, - "learning_rate": 3.572614558437869e-06, - "loss": 0.1645, - "step": 14366 - }, - { - "epoch": 0.73, - "grad_norm": 1.3798623658098415, - "learning_rate": 3.5713529266487145e-06, - "loss": 0.1922, - "step": 14367 - }, - { - "epoch": 0.73, - "grad_norm": 0.9842413518768752, - "learning_rate": 3.5700914692369738e-06, - "loss": 0.1842, - "step": 14368 - }, - { - "epoch": 0.73, - "grad_norm": 1.7150008532521748, - "learning_rate": 3.568830186236869e-06, - "loss": 0.1782, - "step": 14369 - }, - { - "epoch": 0.73, - "grad_norm": 1.4899325092697677, - "learning_rate": 3.5675690776826055e-06, - "loss": 0.187, - "step": 14370 - }, - { - "epoch": 0.73, - "grad_norm": 1.0130933708959058, - "learning_rate": 3.5663081436083967e-06, - "loss": 0.1641, - "step": 14371 - }, - { - "epoch": 0.73, - "grad_norm": 0.913539667921112, - "learning_rate": 3.5650473840484402e-06, - "loss": 0.1573, - "step": 14372 - }, - { - "epoch": 0.73, - "grad_norm": 1.0861222144561535, - "learning_rate": 3.56378679903694e-06, - "loss": 0.1931, - "step": 14373 - }, - { - "epoch": 0.73, - "grad_norm": 1.0768559467476906, - "learning_rate": 3.562526388608083e-06, - "loss": 0.1692, - "step": 14374 - }, - { - "epoch": 0.73, - "grad_norm": 1.1798169488505947, - "learning_rate": 3.5612661527960646e-06, - "loss": 0.165, - "step": 14375 - }, - { - "epoch": 0.73, - "grad_norm": 0.9430744626611828, - "learning_rate": 3.560006091635062e-06, - "loss": 0.1657, - "step": 14376 - }, - { - "epoch": 0.73, - "grad_norm": 1.0014914667902954, - "learning_rate": 3.558746205159258e-06, - "loss": 0.1796, - "step": 14377 - }, - { - "epoch": 0.73, - "grad_norm": 0.9003099615219436, - "learning_rate": 3.5574864934028275e-06, - "loss": 0.1637, - "step": 14378 - }, - { - "epoch": 0.73, - "grad_norm": 1.1470960761940354, - "learning_rate": 3.556226956399943e-06, - "loss": 0.1825, - "step": 14379 - }, - { - "epoch": 0.73, - "grad_norm": 1.7413078096043346, - "learning_rate": 3.554967594184762e-06, - "loss": 0.1853, - "step": 14380 - }, - { - "epoch": 0.73, - "grad_norm": 0.7265083010133702, - "learning_rate": 3.553708406791453e-06, - "loss": 0.1709, - "step": 14381 - }, - { - "epoch": 0.73, - "grad_norm": 1.1558964925446717, - "learning_rate": 3.552449394254165e-06, - "loss": 0.1857, - "step": 14382 - }, - { - "epoch": 0.73, - "grad_norm": 1.263887901976451, - "learning_rate": 3.5511905566070537e-06, - "loss": 0.16, - "step": 14383 - }, - { - "epoch": 0.73, - "grad_norm": 0.9311776746938797, - "learning_rate": 3.549931893884259e-06, - "loss": 0.1575, - "step": 14384 - }, - { - "epoch": 0.73, - "grad_norm": 0.8371777695890396, - "learning_rate": 3.5486734061199266e-06, - "loss": 0.1647, - "step": 14385 - }, - { - "epoch": 0.73, - "grad_norm": 0.9259804564620615, - "learning_rate": 3.5474150933481955e-06, - "loss": 0.1845, - "step": 14386 - }, - { - "epoch": 0.73, - "grad_norm": 1.4184095276211304, - "learning_rate": 3.5461569556031915e-06, - "loss": 0.1696, - "step": 14387 - }, - { - "epoch": 0.73, - "grad_norm": 0.7718611178275522, - "learning_rate": 3.544898992919048e-06, - "loss": 0.1823, - "step": 14388 - }, - { - "epoch": 0.73, - "grad_norm": 0.9339948773745854, - "learning_rate": 3.543641205329881e-06, - "loss": 0.165, - "step": 14389 - }, - { - "epoch": 0.73, - "grad_norm": 0.7963601667531222, - "learning_rate": 3.5423835928698126e-06, - "loss": 0.152, - "step": 14390 - }, - { - "epoch": 0.73, - "grad_norm": 0.9641537768577647, - "learning_rate": 3.5411261555729513e-06, - "loss": 0.1606, - "step": 14391 - }, - { - "epoch": 0.73, - "grad_norm": 1.0070060877029965, - "learning_rate": 3.5398688934734125e-06, - "loss": 0.175, - "step": 14392 - }, - { - "epoch": 0.73, - "grad_norm": 1.0048912561155683, - "learning_rate": 3.53861180660529e-06, - "loss": 0.1551, - "step": 14393 - }, - { - "epoch": 0.73, - "grad_norm": 1.0342423921380977, - "learning_rate": 3.5373548950026882e-06, - "loss": 0.1561, - "step": 14394 - }, - { - "epoch": 0.73, - "grad_norm": 1.1324394346700175, - "learning_rate": 3.536098158699699e-06, - "loss": 0.1861, - "step": 14395 - }, - { - "epoch": 0.73, - "grad_norm": 0.8373933752901664, - "learning_rate": 3.5348415977304165e-06, - "loss": 0.148, - "step": 14396 - }, - { - "epoch": 0.73, - "grad_norm": 1.4163575087438194, - "learning_rate": 3.5335852121289172e-06, - "loss": 0.1624, - "step": 14397 - }, - { - "epoch": 0.73, - "grad_norm": 1.0223524984740415, - "learning_rate": 3.5323290019292867e-06, - "loss": 0.1646, - "step": 14398 - }, - { - "epoch": 0.73, - "grad_norm": 1.0621961867309728, - "learning_rate": 3.531072967165595e-06, - "loss": 0.1641, - "step": 14399 - }, - { - "epoch": 0.73, - "grad_norm": 1.0916386681798416, - "learning_rate": 3.529817107871918e-06, - "loss": 0.2014, - "step": 14400 - }, - { - "epoch": 0.73, - "grad_norm": 1.125081982430693, - "learning_rate": 3.5285614240823128e-06, - "loss": 0.1665, - "step": 14401 - }, - { - "epoch": 0.73, - "grad_norm": 1.0002037186679038, - "learning_rate": 3.5273059158308487e-06, - "loss": 0.1696, - "step": 14402 - }, - { - "epoch": 0.73, - "grad_norm": 1.114430281565667, - "learning_rate": 3.5260505831515736e-06, - "loss": 0.1829, - "step": 14403 - }, - { - "epoch": 0.73, - "grad_norm": 0.9266988557304616, - "learning_rate": 3.5247954260785422e-06, - "loss": 0.15, - "step": 14404 - }, - { - "epoch": 0.73, - "grad_norm": 0.9081102708912858, - "learning_rate": 3.523540444645804e-06, - "loss": 0.169, - "step": 14405 - }, - { - "epoch": 0.73, - "grad_norm": 0.8531204158047896, - "learning_rate": 3.522285638887394e-06, - "loss": 0.1861, - "step": 14406 - }, - { - "epoch": 0.73, - "grad_norm": 0.8542913650553401, - "learning_rate": 3.5210310088373544e-06, - "loss": 0.1799, - "step": 14407 - }, - { - "epoch": 0.73, - "grad_norm": 1.2123191910112574, - "learning_rate": 3.5197765545297124e-06, - "loss": 0.1628, - "step": 14408 - }, - { - "epoch": 0.73, - "grad_norm": 1.0883468660358517, - "learning_rate": 3.5185222759984993e-06, - "loss": 0.1844, - "step": 14409 - }, - { - "epoch": 0.73, - "grad_norm": 0.9928291607993566, - "learning_rate": 3.5172681732777335e-06, - "loss": 0.1583, - "step": 14410 - }, - { - "epoch": 0.73, - "grad_norm": 0.8216944156970188, - "learning_rate": 3.5160142464014336e-06, - "loss": 0.155, - "step": 14411 - }, - { - "epoch": 0.73, - "grad_norm": 1.0854388217429343, - "learning_rate": 3.514760495403614e-06, - "loss": 0.1634, - "step": 14412 - }, - { - "epoch": 0.73, - "grad_norm": 0.8440810900533375, - "learning_rate": 3.5135069203182858e-06, - "loss": 0.1536, - "step": 14413 - }, - { - "epoch": 0.73, - "grad_norm": 0.8591150792858231, - "learning_rate": 3.512253521179445e-06, - "loss": 0.1564, - "step": 14414 - }, - { - "epoch": 0.73, - "grad_norm": 2.701393035668696, - "learning_rate": 3.511000298021098e-06, - "loss": 0.1776, - "step": 14415 - }, - { - "epoch": 0.73, - "grad_norm": 1.0878208543064773, - "learning_rate": 3.5097472508772302e-06, - "loss": 0.1722, - "step": 14416 - }, - { - "epoch": 0.73, - "grad_norm": 0.9959521739940412, - "learning_rate": 3.508494379781838e-06, - "loss": 0.1483, - "step": 14417 - }, - { - "epoch": 0.73, - "grad_norm": 1.2945797851190817, - "learning_rate": 3.5072416847688993e-06, - "loss": 0.1735, - "step": 14418 - }, - { - "epoch": 0.73, - "grad_norm": 0.8609757094301027, - "learning_rate": 3.505989165872401e-06, - "loss": 0.1569, - "step": 14419 - }, - { - "epoch": 0.73, - "grad_norm": 0.9284580215713893, - "learning_rate": 3.504736823126309e-06, - "loss": 0.1708, - "step": 14420 - }, - { - "epoch": 0.73, - "grad_norm": 0.8923394895343267, - "learning_rate": 3.5034846565645973e-06, - "loss": 0.1718, - "step": 14421 - }, - { - "epoch": 0.73, - "grad_norm": 1.4204732071601351, - "learning_rate": 3.5022326662212347e-06, - "loss": 0.1712, - "step": 14422 - }, - { - "epoch": 0.73, - "grad_norm": 1.4474184116828497, - "learning_rate": 3.5009808521301746e-06, - "loss": 0.1592, - "step": 14423 - }, - { - "epoch": 0.73, - "grad_norm": 0.8142217738799143, - "learning_rate": 3.49972921432538e-06, - "loss": 0.1545, - "step": 14424 - }, - { - "epoch": 0.73, - "grad_norm": 0.9384311497554553, - "learning_rate": 3.4984777528407944e-06, - "loss": 0.1645, - "step": 14425 - }, - { - "epoch": 0.73, - "grad_norm": 0.947750884547131, - "learning_rate": 3.4972264677103694e-06, - "loss": 0.1805, - "step": 14426 - }, - { - "epoch": 0.73, - "grad_norm": 0.8440441374202798, - "learning_rate": 3.495975358968041e-06, - "loss": 0.1713, - "step": 14427 - }, - { - "epoch": 0.73, - "grad_norm": 1.4807376614383012, - "learning_rate": 3.4947244266477507e-06, - "loss": 0.1677, - "step": 14428 - }, - { - "epoch": 0.73, - "grad_norm": 0.9859634373605151, - "learning_rate": 3.493473670783426e-06, - "loss": 0.1568, - "step": 14429 - }, - { - "epoch": 0.73, - "grad_norm": 0.8734431242446054, - "learning_rate": 3.492223091408994e-06, - "loss": 0.1804, - "step": 14430 - }, - { - "epoch": 0.73, - "grad_norm": 1.2104883056005962, - "learning_rate": 3.4909726885583782e-06, - "loss": 0.1562, - "step": 14431 - }, - { - "epoch": 0.73, - "grad_norm": 1.1914531731251647, - "learning_rate": 3.4897224622655e-06, - "loss": 0.1766, - "step": 14432 - }, - { - "epoch": 0.73, - "grad_norm": 1.219156978564906, - "learning_rate": 3.4884724125642646e-06, - "loss": 0.1753, - "step": 14433 - }, - { - "epoch": 0.73, - "grad_norm": 0.880056487254269, - "learning_rate": 3.487222539488586e-06, - "loss": 0.1731, - "step": 14434 - }, - { - "epoch": 0.73, - "grad_norm": 0.9074694159953327, - "learning_rate": 3.4859728430723595e-06, - "loss": 0.161, - "step": 14435 - }, - { - "epoch": 0.73, - "grad_norm": 0.8519788084340726, - "learning_rate": 3.4847233233494916e-06, - "loss": 0.1958, - "step": 14436 - }, - { - "epoch": 0.73, - "grad_norm": 1.034003312618856, - "learning_rate": 3.4834739803538686e-06, - "loss": 0.1773, - "step": 14437 - }, - { - "epoch": 0.73, - "grad_norm": 0.9177306598389084, - "learning_rate": 3.4822248141193816e-06, - "loss": 0.1585, - "step": 14438 - }, - { - "epoch": 0.73, - "grad_norm": 0.8999315788165344, - "learning_rate": 3.4809758246799173e-06, - "loss": 0.166, - "step": 14439 - }, - { - "epoch": 0.73, - "grad_norm": 0.800926136705932, - "learning_rate": 3.479727012069349e-06, - "loss": 0.1541, - "step": 14440 - }, - { - "epoch": 0.73, - "grad_norm": 1.5135594002739012, - "learning_rate": 3.478478376321558e-06, - "loss": 0.1798, - "step": 14441 - }, - { - "epoch": 0.73, - "grad_norm": 1.6799241455933929, - "learning_rate": 3.4772299174704048e-06, - "loss": 0.1728, - "step": 14442 - }, - { - "epoch": 0.73, - "grad_norm": 0.9344063548247316, - "learning_rate": 3.475981635549763e-06, - "loss": 0.1697, - "step": 14443 - }, - { - "epoch": 0.73, - "grad_norm": 0.9087585500193363, - "learning_rate": 3.4747335305934836e-06, - "loss": 0.1703, - "step": 14444 - }, - { - "epoch": 0.73, - "grad_norm": 0.874986045657428, - "learning_rate": 3.47348560263543e-06, - "loss": 0.1694, - "step": 14445 - }, - { - "epoch": 0.73, - "grad_norm": 0.8998223181947868, - "learning_rate": 3.4722378517094436e-06, - "loss": 0.1631, - "step": 14446 - }, - { - "epoch": 0.73, - "grad_norm": 0.950896245133745, - "learning_rate": 3.4709902778493742e-06, - "loss": 0.185, - "step": 14447 - }, - { - "epoch": 0.73, - "grad_norm": 1.5040341211529928, - "learning_rate": 3.4697428810890634e-06, - "loss": 0.1536, - "step": 14448 - }, - { - "epoch": 0.73, - "grad_norm": 1.430844340350886, - "learning_rate": 3.4684956614623476e-06, - "loss": 0.1675, - "step": 14449 - }, - { - "epoch": 0.73, - "grad_norm": 1.3461237917573672, - "learning_rate": 3.4672486190030543e-06, - "loss": 0.1732, - "step": 14450 - }, - { - "epoch": 0.73, - "grad_norm": 0.9382187760505567, - "learning_rate": 3.466001753745013e-06, - "loss": 0.1739, - "step": 14451 - }, - { - "epoch": 0.73, - "grad_norm": 1.571179375982628, - "learning_rate": 3.4647550657220407e-06, - "loss": 0.1646, - "step": 14452 - }, - { - "epoch": 0.73, - "grad_norm": 1.1049373626773231, - "learning_rate": 3.463508554967959e-06, - "loss": 0.1782, - "step": 14453 - }, - { - "epoch": 0.74, - "grad_norm": 1.371196296814422, - "learning_rate": 3.462262221516575e-06, - "loss": 0.1718, - "step": 14454 - }, - { - "epoch": 0.74, - "grad_norm": 1.4900025372462213, - "learning_rate": 3.4610160654016987e-06, - "loss": 0.1915, - "step": 14455 - }, - { - "epoch": 0.74, - "grad_norm": 1.7389687154100937, - "learning_rate": 3.4597700866571294e-06, - "loss": 0.1759, - "step": 14456 - }, - { - "epoch": 0.74, - "grad_norm": 0.9413972328791125, - "learning_rate": 3.4585242853166657e-06, - "loss": 0.1771, - "step": 14457 - }, - { - "epoch": 0.74, - "grad_norm": 0.9685053769263471, - "learning_rate": 3.457278661414103e-06, - "loss": 0.1851, - "step": 14458 - }, - { - "epoch": 0.74, - "grad_norm": 1.1409849072794902, - "learning_rate": 3.456033214983222e-06, - "loss": 0.1857, - "step": 14459 - }, - { - "epoch": 0.74, - "grad_norm": 1.1307581819553465, - "learning_rate": 3.454787946057814e-06, - "loss": 0.1539, - "step": 14460 - }, - { - "epoch": 0.74, - "grad_norm": 1.105159104219784, - "learning_rate": 3.45354285467165e-06, - "loss": 0.1663, - "step": 14461 - }, - { - "epoch": 0.74, - "grad_norm": 1.0820023214794368, - "learning_rate": 3.452297940858508e-06, - "loss": 0.1862, - "step": 14462 - }, - { - "epoch": 0.74, - "grad_norm": 1.7615077258540803, - "learning_rate": 3.4510532046521505e-06, - "loss": 0.1792, - "step": 14463 - }, - { - "epoch": 0.74, - "grad_norm": 1.0554686371860205, - "learning_rate": 3.4498086460863455e-06, - "loss": 0.1387, - "step": 14464 - }, - { - "epoch": 0.74, - "grad_norm": 0.9781012241290394, - "learning_rate": 3.4485642651948516e-06, - "loss": 0.1652, - "step": 14465 - }, - { - "epoch": 0.74, - "grad_norm": 0.85615413732514, - "learning_rate": 3.4473200620114245e-06, - "loss": 0.1726, - "step": 14466 - }, - { - "epoch": 0.74, - "grad_norm": 1.0215857868340161, - "learning_rate": 3.4460760365698078e-06, - "loss": 0.1717, - "step": 14467 - }, - { - "epoch": 0.74, - "grad_norm": 1.1543118477931984, - "learning_rate": 3.444832188903752e-06, - "loss": 0.1546, - "step": 14468 - }, - { - "epoch": 0.74, - "grad_norm": 0.7340101051610519, - "learning_rate": 3.4435885190469886e-06, - "loss": 0.1799, - "step": 14469 - }, - { - "epoch": 0.74, - "grad_norm": 0.8684807031919314, - "learning_rate": 3.4423450270332626e-06, - "loss": 0.1616, - "step": 14470 - }, - { - "epoch": 0.74, - "grad_norm": 1.0967064609498811, - "learning_rate": 3.4411017128962932e-06, - "loss": 0.1609, - "step": 14471 - }, - { - "epoch": 0.74, - "grad_norm": 0.902430777464027, - "learning_rate": 3.4398585766698146e-06, - "loss": 0.1776, - "step": 14472 - }, - { - "epoch": 0.74, - "grad_norm": 0.9113227208746828, - "learning_rate": 3.4386156183875384e-06, - "loss": 0.1683, - "step": 14473 - }, - { - "epoch": 0.74, - "grad_norm": 0.8535988903463739, - "learning_rate": 3.437372838083184e-06, - "loss": 0.1612, - "step": 14474 - }, - { - "epoch": 0.74, - "grad_norm": 1.1787012390630862, - "learning_rate": 3.4361302357904657e-06, - "loss": 0.1583, - "step": 14475 - }, - { - "epoch": 0.74, - "grad_norm": 1.3044864076379246, - "learning_rate": 3.4348878115430827e-06, - "loss": 0.1637, - "step": 14476 - }, - { - "epoch": 0.74, - "grad_norm": 1.1927482457591698, - "learning_rate": 3.4336455653747414e-06, - "loss": 0.1669, - "step": 14477 - }, - { - "epoch": 0.74, - "grad_norm": 0.8094448998728605, - "learning_rate": 3.432403497319132e-06, - "loss": 0.1597, - "step": 14478 - }, - { - "epoch": 0.74, - "grad_norm": 1.0774702938327079, - "learning_rate": 3.4311616074099517e-06, - "loss": 0.1621, - "step": 14479 - }, - { - "epoch": 0.74, - "grad_norm": 0.9542580107319009, - "learning_rate": 3.429919895680881e-06, - "loss": 0.1632, - "step": 14480 - }, - { - "epoch": 0.74, - "grad_norm": 0.9611707086909844, - "learning_rate": 3.428678362165607e-06, - "loss": 0.1443, - "step": 14481 - }, - { - "epoch": 0.74, - "grad_norm": 1.116401520493127, - "learning_rate": 3.4274370068978013e-06, - "loss": 0.1587, - "step": 14482 - }, - { - "epoch": 0.74, - "grad_norm": 2.304449259136624, - "learning_rate": 3.4261958299111363e-06, - "loss": 0.1763, - "step": 14483 - }, - { - "epoch": 0.74, - "grad_norm": 1.1314091816500065, - "learning_rate": 3.424954831239282e-06, - "loss": 0.1945, - "step": 14484 - }, - { - "epoch": 0.74, - "grad_norm": 1.0671220035516777, - "learning_rate": 3.4237140109159015e-06, - "loss": 0.2072, - "step": 14485 - }, - { - "epoch": 0.74, - "grad_norm": 1.1149443924818006, - "learning_rate": 3.422473368974648e-06, - "loss": 0.1663, - "step": 14486 - }, - { - "epoch": 0.74, - "grad_norm": 0.7938422789239769, - "learning_rate": 3.4212329054491775e-06, - "loss": 0.1876, - "step": 14487 - }, - { - "epoch": 0.74, - "grad_norm": 0.8671460207650743, - "learning_rate": 3.419992620373134e-06, - "loss": 0.1636, - "step": 14488 - }, - { - "epoch": 0.74, - "grad_norm": 1.2237197498575294, - "learning_rate": 3.418752513780166e-06, - "loss": 0.2031, - "step": 14489 - }, - { - "epoch": 0.74, - "grad_norm": 0.7849908273815244, - "learning_rate": 3.4175125857039027e-06, - "loss": 0.1407, - "step": 14490 - }, - { - "epoch": 0.74, - "grad_norm": 1.1118225123332441, - "learning_rate": 3.416272836177984e-06, - "loss": 0.1758, - "step": 14491 - }, - { - "epoch": 0.74, - "grad_norm": 1.4168430278920894, - "learning_rate": 3.4150332652360386e-06, - "loss": 0.169, - "step": 14492 - }, - { - "epoch": 0.74, - "grad_norm": 0.8217197474909118, - "learning_rate": 3.413793872911685e-06, - "loss": 0.1589, - "step": 14493 - }, - { - "epoch": 0.74, - "grad_norm": 1.0719618045102857, - "learning_rate": 3.4125546592385483e-06, - "loss": 0.1777, - "step": 14494 - }, - { - "epoch": 0.74, - "grad_norm": 1.0332384870154756, - "learning_rate": 3.4113156242502345e-06, - "loss": 0.1622, - "step": 14495 - }, - { - "epoch": 0.74, - "grad_norm": 1.4227475903179927, - "learning_rate": 3.4100767679803605e-06, - "loss": 0.1686, - "step": 14496 - }, - { - "epoch": 0.74, - "grad_norm": 1.2006207316704358, - "learning_rate": 3.4088380904625217e-06, - "loss": 0.1633, - "step": 14497 - }, - { - "epoch": 0.74, - "grad_norm": 1.0980208062719765, - "learning_rate": 3.4075995917303263e-06, - "loss": 0.175, - "step": 14498 - }, - { - "epoch": 0.74, - "grad_norm": 1.3096078988390378, - "learning_rate": 3.4063612718173613e-06, - "loss": 0.1759, - "step": 14499 - }, - { - "epoch": 0.74, - "grad_norm": 1.1581398109088064, - "learning_rate": 3.4051231307572187e-06, - "loss": 0.1498, - "step": 14500 - }, - { - "epoch": 0.74, - "grad_norm": 0.8573471344318443, - "learning_rate": 3.403885168583484e-06, - "loss": 0.1721, - "step": 14501 - }, - { - "epoch": 0.74, - "grad_norm": 0.8507531056569723, - "learning_rate": 3.4026473853297394e-06, - "loss": 0.15, - "step": 14502 - }, - { - "epoch": 0.74, - "grad_norm": 1.1802770854199611, - "learning_rate": 3.4014097810295542e-06, - "loss": 0.1805, - "step": 14503 - }, - { - "epoch": 0.74, - "grad_norm": 1.322535646862338, - "learning_rate": 3.4001723557165046e-06, - "loss": 0.1681, - "step": 14504 - }, - { - "epoch": 0.74, - "grad_norm": 0.8631282651600433, - "learning_rate": 3.3989351094241496e-06, - "loss": 0.1683, - "step": 14505 - }, - { - "epoch": 0.74, - "grad_norm": 5.157037757388056, - "learning_rate": 3.3976980421860563e-06, - "loss": 0.1839, - "step": 14506 - }, - { - "epoch": 0.74, - "grad_norm": 1.4696535664183503, - "learning_rate": 3.396461154035772e-06, - "loss": 0.1608, - "step": 14507 - }, - { - "epoch": 0.74, - "grad_norm": 0.9476656230693181, - "learning_rate": 3.3952244450068527e-06, - "loss": 0.1616, - "step": 14508 - }, - { - "epoch": 0.74, - "grad_norm": 1.1449168374763314, - "learning_rate": 3.393987915132846e-06, - "loss": 0.1748, - "step": 14509 - }, - { - "epoch": 0.74, - "grad_norm": 0.9197974944526394, - "learning_rate": 3.3927515644472876e-06, - "loss": 0.1768, - "step": 14510 - }, - { - "epoch": 0.74, - "grad_norm": 0.86307723423347, - "learning_rate": 3.3915153929837186e-06, - "loss": 0.174, - "step": 14511 - }, - { - "epoch": 0.74, - "grad_norm": 0.9776784552407588, - "learning_rate": 3.3902794007756655e-06, - "loss": 0.1774, - "step": 14512 - }, - { - "epoch": 0.74, - "grad_norm": 1.0365769962900846, - "learning_rate": 3.38904358785666e-06, - "loss": 0.1589, - "step": 14513 - }, - { - "epoch": 0.74, - "grad_norm": 0.917243783214414, - "learning_rate": 3.3878079542602172e-06, - "loss": 0.1795, - "step": 14514 - }, - { - "epoch": 0.74, - "grad_norm": 0.9496799831594078, - "learning_rate": 3.38657250001986e-06, - "loss": 0.1648, - "step": 14515 - }, - { - "epoch": 0.74, - "grad_norm": 4.645315205313346, - "learning_rate": 3.3853372251690943e-06, - "loss": 0.1571, - "step": 14516 - }, - { - "epoch": 0.74, - "grad_norm": 1.0114640248745363, - "learning_rate": 3.38410212974143e-06, - "loss": 0.1705, - "step": 14517 - }, - { - "epoch": 0.74, - "grad_norm": 1.1624848313084417, - "learning_rate": 3.382867213770369e-06, - "loss": 0.1802, - "step": 14518 - }, - { - "epoch": 0.74, - "grad_norm": 0.9069656129089444, - "learning_rate": 3.3816324772894116e-06, - "loss": 0.1599, - "step": 14519 - }, - { - "epoch": 0.74, - "grad_norm": 1.1188970379680807, - "learning_rate": 3.380397920332045e-06, - "loss": 0.1778, - "step": 14520 - }, - { - "epoch": 0.74, - "grad_norm": 0.8696674808427313, - "learning_rate": 3.3791635429317602e-06, - "loss": 0.1655, - "step": 14521 - }, - { - "epoch": 0.74, - "grad_norm": 1.2652170450369873, - "learning_rate": 3.377929345122036e-06, - "loss": 0.152, - "step": 14522 - }, - { - "epoch": 0.74, - "grad_norm": 0.8047940293282426, - "learning_rate": 3.3766953269363555e-06, - "loss": 0.1647, - "step": 14523 - }, - { - "epoch": 0.74, - "grad_norm": 1.0025267726557285, - "learning_rate": 3.375461488408185e-06, - "loss": 0.1743, - "step": 14524 - }, - { - "epoch": 0.74, - "grad_norm": 0.9846755541578239, - "learning_rate": 3.3742278295709996e-06, - "loss": 0.1684, - "step": 14525 - }, - { - "epoch": 0.74, - "grad_norm": 1.4390956632534884, - "learning_rate": 3.372994350458254e-06, - "loss": 0.1666, - "step": 14526 - }, - { - "epoch": 0.74, - "grad_norm": 1.097163868094141, - "learning_rate": 3.3717610511034116e-06, - "loss": 0.1888, - "step": 14527 - }, - { - "epoch": 0.74, - "grad_norm": 1.4382002789137311, - "learning_rate": 3.370527931539929e-06, - "loss": 0.156, - "step": 14528 - }, - { - "epoch": 0.74, - "grad_norm": 0.8815583376865935, - "learning_rate": 3.3692949918012464e-06, - "loss": 0.1724, - "step": 14529 - }, - { - "epoch": 0.74, - "grad_norm": 3.213918545715467, - "learning_rate": 3.3680622319208158e-06, - "loss": 0.1833, - "step": 14530 - }, - { - "epoch": 0.74, - "grad_norm": 1.6967342650065715, - "learning_rate": 3.3668296519320676e-06, - "loss": 0.1699, - "step": 14531 - }, - { - "epoch": 0.74, - "grad_norm": 1.0196662112296344, - "learning_rate": 3.3655972518684433e-06, - "loss": 0.1757, - "step": 14532 - }, - { - "epoch": 0.74, - "grad_norm": 1.9265938286137334, - "learning_rate": 3.3643650317633645e-06, - "loss": 0.1703, - "step": 14533 - }, - { - "epoch": 0.74, - "grad_norm": 1.0321780931122735, - "learning_rate": 3.36313299165026e-06, - "loss": 0.1564, - "step": 14534 - }, - { - "epoch": 0.74, - "grad_norm": 1.3583160837462, - "learning_rate": 3.361901131562547e-06, - "loss": 0.1786, - "step": 14535 - }, - { - "epoch": 0.74, - "grad_norm": 1.0028582835706312, - "learning_rate": 3.3606694515336457e-06, - "loss": 0.1837, - "step": 14536 - }, - { - "epoch": 0.74, - "grad_norm": 1.2658375346336457, - "learning_rate": 3.3594379515969555e-06, - "loss": 0.1654, - "step": 14537 - }, - { - "epoch": 0.74, - "grad_norm": 0.9410506696252773, - "learning_rate": 3.3582066317858898e-06, - "loss": 0.1474, - "step": 14538 - }, - { - "epoch": 0.74, - "grad_norm": 1.256438474519618, - "learning_rate": 3.3569754921338416e-06, - "loss": 0.1898, - "step": 14539 - }, - { - "epoch": 0.74, - "grad_norm": 1.0700580104442847, - "learning_rate": 3.355744532674211e-06, - "loss": 0.1682, - "step": 14540 - }, - { - "epoch": 0.74, - "grad_norm": 0.9123242811741862, - "learning_rate": 3.3545137534403814e-06, - "loss": 0.1786, - "step": 14541 - }, - { - "epoch": 0.74, - "grad_norm": 0.9746035844674514, - "learning_rate": 3.3532831544657464e-06, - "loss": 0.1773, - "step": 14542 - }, - { - "epoch": 0.74, - "grad_norm": 1.647138127338118, - "learning_rate": 3.3520527357836764e-06, - "loss": 0.1675, - "step": 14543 - }, - { - "epoch": 0.74, - "grad_norm": 0.9351064187096528, - "learning_rate": 3.3508224974275517e-06, - "loss": 0.1534, - "step": 14544 - }, - { - "epoch": 0.74, - "grad_norm": 1.317826242356967, - "learning_rate": 3.3495924394307466e-06, - "loss": 0.1794, - "step": 14545 - }, - { - "epoch": 0.74, - "grad_norm": 0.8461449815181312, - "learning_rate": 3.348362561826618e-06, - "loss": 0.1895, - "step": 14546 - }, - { - "epoch": 0.74, - "grad_norm": 0.9737778528397362, - "learning_rate": 3.3471328646485345e-06, - "loss": 0.1576, - "step": 14547 - }, - { - "epoch": 0.74, - "grad_norm": 0.890239126549413, - "learning_rate": 3.3459033479298444e-06, - "loss": 0.1635, - "step": 14548 - }, - { - "epoch": 0.74, - "grad_norm": 1.1067314555766055, - "learning_rate": 3.3446740117039045e-06, - "loss": 0.1694, - "step": 14549 - }, - { - "epoch": 0.74, - "grad_norm": 0.7909680510638762, - "learning_rate": 3.3434448560040544e-06, - "loss": 0.1624, - "step": 14550 - }, - { - "epoch": 0.74, - "grad_norm": 1.8687447740217409, - "learning_rate": 3.342215880863643e-06, - "loss": 0.1763, - "step": 14551 - }, - { - "epoch": 0.74, - "grad_norm": 0.7685163826835121, - "learning_rate": 3.3409870863159977e-06, - "loss": 0.1599, - "step": 14552 - }, - { - "epoch": 0.74, - "grad_norm": 2.4860196312297997, - "learning_rate": 3.3397584723944542e-06, - "loss": 0.1447, - "step": 14553 - }, - { - "epoch": 0.74, - "grad_norm": 1.0828676969745032, - "learning_rate": 3.3385300391323384e-06, - "loss": 0.1508, - "step": 14554 - }, - { - "epoch": 0.74, - "grad_norm": 0.9196225158622006, - "learning_rate": 3.3373017865629742e-06, - "loss": 0.1647, - "step": 14555 - }, - { - "epoch": 0.74, - "grad_norm": 1.1366321195647922, - "learning_rate": 3.336073714719673e-06, - "loss": 0.1741, - "step": 14556 - }, - { - "epoch": 0.74, - "grad_norm": 0.9609351328524763, - "learning_rate": 3.3348458236357517e-06, - "loss": 0.1374, - "step": 14557 - }, - { - "epoch": 0.74, - "grad_norm": 1.5918165884053737, - "learning_rate": 3.33361811334451e-06, - "loss": 0.1787, - "step": 14558 - }, - { - "epoch": 0.74, - "grad_norm": 1.6087642687901853, - "learning_rate": 3.332390583879257e-06, - "loss": 0.1831, - "step": 14559 - }, - { - "epoch": 0.74, - "grad_norm": 0.954159681884898, - "learning_rate": 3.3311632352732826e-06, - "loss": 0.1633, - "step": 14560 - }, - { - "epoch": 0.74, - "grad_norm": 1.2349214257106662, - "learning_rate": 3.3299360675598826e-06, - "loss": 0.1772, - "step": 14561 - }, - { - "epoch": 0.74, - "grad_norm": 1.3786250169409255, - "learning_rate": 3.3287090807723466e-06, - "loss": 0.1524, - "step": 14562 - }, - { - "epoch": 0.74, - "grad_norm": 1.9314633192128212, - "learning_rate": 3.3274822749439506e-06, - "loss": 0.1697, - "step": 14563 - }, - { - "epoch": 0.74, - "grad_norm": 1.3280040060596827, - "learning_rate": 3.3262556501079777e-06, - "loss": 0.1545, - "step": 14564 - }, - { - "epoch": 0.74, - "grad_norm": 0.7767872809005165, - "learning_rate": 3.325029206297694e-06, - "loss": 0.1468, - "step": 14565 - }, - { - "epoch": 0.74, - "grad_norm": 1.264531729338008, - "learning_rate": 3.3238029435463727e-06, - "loss": 0.1636, - "step": 14566 - }, - { - "epoch": 0.74, - "grad_norm": 1.2727697010218713, - "learning_rate": 3.3225768618872712e-06, - "loss": 0.1635, - "step": 14567 - }, - { - "epoch": 0.74, - "grad_norm": 1.0529185194920503, - "learning_rate": 3.321350961353652e-06, - "loss": 0.1579, - "step": 14568 - }, - { - "epoch": 0.74, - "grad_norm": 1.050110099196728, - "learning_rate": 3.320125241978762e-06, - "loss": 0.1659, - "step": 14569 - }, - { - "epoch": 0.74, - "grad_norm": 1.080844483099251, - "learning_rate": 3.3188997037958535e-06, - "loss": 0.1584, - "step": 14570 - }, - { - "epoch": 0.74, - "grad_norm": 1.0033168561510555, - "learning_rate": 3.3176743468381665e-06, - "loss": 0.1654, - "step": 14571 - }, - { - "epoch": 0.74, - "grad_norm": 1.4874031465916626, - "learning_rate": 3.3164491711389434e-06, - "loss": 0.1753, - "step": 14572 - }, - { - "epoch": 0.74, - "grad_norm": 1.5101430635489643, - "learning_rate": 3.3152241767314117e-06, - "loss": 0.1632, - "step": 14573 - }, - { - "epoch": 0.74, - "grad_norm": 0.7342798075701323, - "learning_rate": 3.3139993636488042e-06, - "loss": 0.1567, - "step": 14574 - }, - { - "epoch": 0.74, - "grad_norm": 1.040576258566937, - "learning_rate": 3.3127747319243385e-06, - "loss": 0.1759, - "step": 14575 - }, - { - "epoch": 0.74, - "grad_norm": 1.0946480073485154, - "learning_rate": 3.31155028159124e-06, - "loss": 0.1689, - "step": 14576 - }, - { - "epoch": 0.74, - "grad_norm": 1.0050106610475777, - "learning_rate": 3.3103260126827143e-06, - "loss": 0.185, - "step": 14577 - }, - { - "epoch": 0.74, - "grad_norm": 1.086747451695257, - "learning_rate": 3.3091019252319755e-06, - "loss": 0.1706, - "step": 14578 - }, - { - "epoch": 0.74, - "grad_norm": 1.041621202794767, - "learning_rate": 3.3078780192722225e-06, - "loss": 0.1531, - "step": 14579 - }, - { - "epoch": 0.74, - "grad_norm": 1.0667084003881273, - "learning_rate": 3.3066542948366564e-06, - "loss": 0.1624, - "step": 14580 - }, - { - "epoch": 0.74, - "grad_norm": 1.0626094125015544, - "learning_rate": 3.3054307519584737e-06, - "loss": 0.1639, - "step": 14581 - }, - { - "epoch": 0.74, - "grad_norm": 0.9810072837276894, - "learning_rate": 3.304207390670856e-06, - "loss": 0.1596, - "step": 14582 - }, - { - "epoch": 0.74, - "grad_norm": 1.2386279164336178, - "learning_rate": 3.302984211006995e-06, - "loss": 0.175, - "step": 14583 - }, - { - "epoch": 0.74, - "grad_norm": 1.2024887775229773, - "learning_rate": 3.301761213000062e-06, - "loss": 0.1549, - "step": 14584 - }, - { - "epoch": 0.74, - "grad_norm": 1.162501635769063, - "learning_rate": 3.3005383966832383e-06, - "loss": 0.1699, - "step": 14585 - }, - { - "epoch": 0.74, - "grad_norm": 1.0021653115090114, - "learning_rate": 3.2993157620896844e-06, - "loss": 0.1644, - "step": 14586 - }, - { - "epoch": 0.74, - "grad_norm": 0.9026003490311508, - "learning_rate": 3.2980933092525704e-06, - "loss": 0.1762, - "step": 14587 - }, - { - "epoch": 0.74, - "grad_norm": 1.0016934769153616, - "learning_rate": 3.296871038205053e-06, - "loss": 0.1795, - "step": 14588 - }, - { - "epoch": 0.74, - "grad_norm": 1.1099327248410022, - "learning_rate": 3.2956489489802902e-06, - "loss": 0.176, - "step": 14589 - }, - { - "epoch": 0.74, - "grad_norm": 0.9360943590709028, - "learning_rate": 3.2944270416114256e-06, - "loss": 0.1513, - "step": 14590 - }, - { - "epoch": 0.74, - "grad_norm": 1.0377900767546822, - "learning_rate": 3.29320531613161e-06, - "loss": 0.1851, - "step": 14591 - }, - { - "epoch": 0.74, - "grad_norm": 1.1138509631808082, - "learning_rate": 3.2919837725739745e-06, - "loss": 0.1656, - "step": 14592 - }, - { - "epoch": 0.74, - "grad_norm": 0.8037614164539251, - "learning_rate": 3.290762410971663e-06, - "loss": 0.1843, - "step": 14593 - }, - { - "epoch": 0.74, - "grad_norm": 1.1417182624558486, - "learning_rate": 3.289541231357796e-06, - "loss": 0.1875, - "step": 14594 - }, - { - "epoch": 0.74, - "grad_norm": 1.1424511213698094, - "learning_rate": 3.288320233765504e-06, - "loss": 0.1674, - "step": 14595 - }, - { - "epoch": 0.74, - "grad_norm": 0.8267506888422055, - "learning_rate": 3.2870994182279036e-06, - "loss": 0.1669, - "step": 14596 - }, - { - "epoch": 0.74, - "grad_norm": 2.109347917510044, - "learning_rate": 3.2858787847781093e-06, - "loss": 0.1737, - "step": 14597 - }, - { - "epoch": 0.74, - "grad_norm": 2.4723720577426307, - "learning_rate": 3.2846583334492354e-06, - "loss": 0.1934, - "step": 14598 - }, - { - "epoch": 0.74, - "grad_norm": 1.4229642799860578, - "learning_rate": 3.2834380642743813e-06, - "loss": 0.183, - "step": 14599 - }, - { - "epoch": 0.74, - "grad_norm": 0.8882285331677954, - "learning_rate": 3.282217977286651e-06, - "loss": 0.1626, - "step": 14600 - }, - { - "epoch": 0.74, - "grad_norm": 1.040414192870245, - "learning_rate": 3.280998072519135e-06, - "loss": 0.1632, - "step": 14601 - }, - { - "epoch": 0.74, - "grad_norm": 1.5818449754137551, - "learning_rate": 3.2797783500049297e-06, - "loss": 0.1594, - "step": 14602 - }, - { - "epoch": 0.74, - "grad_norm": 1.0808796494097017, - "learning_rate": 3.2785588097771115e-06, - "loss": 0.1792, - "step": 14603 - }, - { - "epoch": 0.74, - "grad_norm": 1.8474299748024428, - "learning_rate": 3.277339451868766e-06, - "loss": 0.1518, - "step": 14604 - }, - { - "epoch": 0.74, - "grad_norm": 1.9284468258094691, - "learning_rate": 3.2761202763129674e-06, - "loss": 0.1717, - "step": 14605 - }, - { - "epoch": 0.74, - "grad_norm": 1.0873153366497863, - "learning_rate": 3.2749012831427897e-06, - "loss": 0.189, - "step": 14606 - }, - { - "epoch": 0.74, - "grad_norm": 1.30278093395129, - "learning_rate": 3.2736824723912907e-06, - "loss": 0.1631, - "step": 14607 - }, - { - "epoch": 0.74, - "grad_norm": 1.0590908505037824, - "learning_rate": 3.272463844091538e-06, - "loss": 0.1829, - "step": 14608 - }, - { - "epoch": 0.74, - "grad_norm": 0.9810362215460501, - "learning_rate": 3.2712453982765802e-06, - "loss": 0.1513, - "step": 14609 - }, - { - "epoch": 0.74, - "grad_norm": 1.8938383648760326, - "learning_rate": 3.270027134979474e-06, - "loss": 0.1808, - "step": 14610 - }, - { - "epoch": 0.74, - "grad_norm": 1.5913105125941047, - "learning_rate": 3.2688090542332597e-06, - "loss": 0.1599, - "step": 14611 - }, - { - "epoch": 0.74, - "grad_norm": 0.8290832642919869, - "learning_rate": 3.2675911560709826e-06, - "loss": 0.1719, - "step": 14612 - }, - { - "epoch": 0.74, - "grad_norm": 1.0200858042748173, - "learning_rate": 3.266373440525672e-06, - "loss": 0.1687, - "step": 14613 - }, - { - "epoch": 0.74, - "grad_norm": 0.9597410293776677, - "learning_rate": 3.265155907630363e-06, - "loss": 0.1932, - "step": 14614 - }, - { - "epoch": 0.74, - "grad_norm": 0.9905982909574356, - "learning_rate": 3.2639385574180825e-06, - "loss": 0.1726, - "step": 14615 - }, - { - "epoch": 0.74, - "grad_norm": 1.0516067557843038, - "learning_rate": 3.262721389921847e-06, - "loss": 0.1368, - "step": 14616 - }, - { - "epoch": 0.74, - "grad_norm": 1.488569177243947, - "learning_rate": 3.261504405174677e-06, - "loss": 0.1905, - "step": 14617 - }, - { - "epoch": 0.74, - "grad_norm": 1.0496740397502935, - "learning_rate": 3.2602876032095767e-06, - "loss": 0.2094, - "step": 14618 - }, - { - "epoch": 0.74, - "grad_norm": 1.2266278443699472, - "learning_rate": 3.2590709840595604e-06, - "loss": 0.1694, - "step": 14619 - }, - { - "epoch": 0.74, - "grad_norm": 3.8910979222441915, - "learning_rate": 3.2578545477576195e-06, - "loss": 0.1935, - "step": 14620 - }, - { - "epoch": 0.74, - "grad_norm": 1.1269281838780711, - "learning_rate": 3.256638294336759e-06, - "loss": 0.1769, - "step": 14621 - }, - { - "epoch": 0.74, - "grad_norm": 1.469746087556023, - "learning_rate": 3.2554222238299627e-06, - "loss": 0.1743, - "step": 14622 - }, - { - "epoch": 0.74, - "grad_norm": 1.5128575182766357, - "learning_rate": 3.2542063362702194e-06, - "loss": 0.1768, - "step": 14623 - }, - { - "epoch": 0.74, - "grad_norm": 0.9485174659917036, - "learning_rate": 3.25299063169051e-06, - "loss": 0.184, - "step": 14624 - }, - { - "epoch": 0.74, - "grad_norm": 1.819632029538112, - "learning_rate": 3.251775110123814e-06, - "loss": 0.1994, - "step": 14625 - }, - { - "epoch": 0.74, - "grad_norm": 1.1297795644354593, - "learning_rate": 3.250559771603097e-06, - "loss": 0.1556, - "step": 14626 - }, - { - "epoch": 0.74, - "grad_norm": 1.5246558499353544, - "learning_rate": 3.2493446161613297e-06, - "loss": 0.1624, - "step": 14627 - }, - { - "epoch": 0.74, - "grad_norm": 1.1439968605434216, - "learning_rate": 3.248129643831467e-06, - "loss": 0.1548, - "step": 14628 - }, - { - "epoch": 0.74, - "grad_norm": 0.9945889104991872, - "learning_rate": 3.2469148546464734e-06, - "loss": 0.1439, - "step": 14629 - }, - { - "epoch": 0.74, - "grad_norm": 1.3193732383010004, - "learning_rate": 3.245700248639292e-06, - "loss": 0.1741, - "step": 14630 - }, - { - "epoch": 0.74, - "grad_norm": 0.8404263760961086, - "learning_rate": 3.2444858258428733e-06, - "loss": 0.1655, - "step": 14631 - }, - { - "epoch": 0.74, - "grad_norm": 0.9678957974759184, - "learning_rate": 3.243271586290161e-06, - "loss": 0.1731, - "step": 14632 - }, - { - "epoch": 0.74, - "grad_norm": 1.043645278047243, - "learning_rate": 3.2420575300140855e-06, - "loss": 0.1608, - "step": 14633 - }, - { - "epoch": 0.74, - "grad_norm": 1.1111114593266385, - "learning_rate": 3.2408436570475844e-06, - "loss": 0.1684, - "step": 14634 - }, - { - "epoch": 0.74, - "grad_norm": 1.237932636905411, - "learning_rate": 3.2396299674235777e-06, - "loss": 0.1937, - "step": 14635 - }, - { - "epoch": 0.74, - "grad_norm": 0.9429836293705041, - "learning_rate": 3.2384164611749924e-06, - "loss": 0.176, - "step": 14636 - }, - { - "epoch": 0.74, - "grad_norm": 1.7866774192846304, - "learning_rate": 3.23720313833474e-06, - "loss": 0.1658, - "step": 14637 - }, - { - "epoch": 0.74, - "grad_norm": 1.1895483659518495, - "learning_rate": 3.235989998935738e-06, - "loss": 0.1748, - "step": 14638 - }, - { - "epoch": 0.74, - "grad_norm": 1.0754191928378574, - "learning_rate": 3.234777043010886e-06, - "loss": 0.175, - "step": 14639 - }, - { - "epoch": 0.74, - "grad_norm": 1.2658732859500361, - "learning_rate": 3.2335642705930883e-06, - "loss": 0.1662, - "step": 14640 - }, - { - "epoch": 0.74, - "grad_norm": 1.0116684018540003, - "learning_rate": 3.2323516817152424e-06, - "loss": 0.1697, - "step": 14641 - }, - { - "epoch": 0.74, - "grad_norm": 1.2918276885913262, - "learning_rate": 3.2311392764102422e-06, - "loss": 0.1726, - "step": 14642 - }, - { - "epoch": 0.74, - "grad_norm": 0.8766387636913678, - "learning_rate": 3.2299270547109684e-06, - "loss": 0.1728, - "step": 14643 - }, - { - "epoch": 0.74, - "grad_norm": 1.032418759945567, - "learning_rate": 3.228715016650308e-06, - "loss": 0.1813, - "step": 14644 - }, - { - "epoch": 0.74, - "grad_norm": 0.966008159927298, - "learning_rate": 3.227503162261133e-06, - "loss": 0.157, - "step": 14645 - }, - { - "epoch": 0.74, - "grad_norm": 1.2640327901478297, - "learning_rate": 3.22629149157632e-06, - "loss": 0.1604, - "step": 14646 - }, - { - "epoch": 0.74, - "grad_norm": 1.5646018269615116, - "learning_rate": 3.2250800046287303e-06, - "loss": 0.1951, - "step": 14647 - }, - { - "epoch": 0.74, - "grad_norm": 1.1532003300998375, - "learning_rate": 3.2238687014512306e-06, - "loss": 0.1482, - "step": 14648 - }, - { - "epoch": 0.74, - "grad_norm": 1.0329973344003411, - "learning_rate": 3.2226575820766727e-06, - "loss": 0.1594, - "step": 14649 - }, - { - "epoch": 0.74, - "grad_norm": 0.8869975421624843, - "learning_rate": 3.22144664653791e-06, - "loss": 0.1643, - "step": 14650 - }, - { - "epoch": 0.75, - "grad_norm": 2.0117567487631582, - "learning_rate": 3.220235894867794e-06, - "loss": 0.1664, - "step": 14651 - }, - { - "epoch": 0.75, - "grad_norm": 0.8690142384583647, - "learning_rate": 3.219025327099158e-06, - "loss": 0.1813, - "step": 14652 - }, - { - "epoch": 0.75, - "grad_norm": 1.1503442008963496, - "learning_rate": 3.2178149432648465e-06, - "loss": 0.1901, - "step": 14653 - }, - { - "epoch": 0.75, - "grad_norm": 1.072701341953258, - "learning_rate": 3.2166047433976843e-06, - "loss": 0.1462, - "step": 14654 - }, - { - "epoch": 0.75, - "grad_norm": 0.8609032986106758, - "learning_rate": 3.215394727530504e-06, - "loss": 0.1581, - "step": 14655 - }, - { - "epoch": 0.75, - "grad_norm": 0.8603901676502527, - "learning_rate": 3.214184895696123e-06, - "loss": 0.176, - "step": 14656 - }, - { - "epoch": 0.75, - "grad_norm": 0.8059552025787837, - "learning_rate": 3.21297524792736e-06, - "loss": 0.162, - "step": 14657 - }, - { - "epoch": 0.75, - "grad_norm": 0.9996015945561221, - "learning_rate": 3.211765784257026e-06, - "loss": 0.1677, - "step": 14658 - }, - { - "epoch": 0.75, - "grad_norm": 1.2233505941974974, - "learning_rate": 3.210556504717932e-06, - "loss": 0.1809, - "step": 14659 - }, - { - "epoch": 0.75, - "grad_norm": 1.0029193160617993, - "learning_rate": 3.2093474093428733e-06, - "loss": 0.1721, - "step": 14660 - }, - { - "epoch": 0.75, - "grad_norm": 1.1138558257840987, - "learning_rate": 3.2081384981646534e-06, - "loss": 0.1602, - "step": 14661 - }, - { - "epoch": 0.75, - "grad_norm": 0.8493704927249281, - "learning_rate": 3.2069297712160563e-06, - "loss": 0.1696, - "step": 14662 - }, - { - "epoch": 0.75, - "grad_norm": 0.9300782059342856, - "learning_rate": 3.2057212285298767e-06, - "loss": 0.1746, - "step": 14663 - }, - { - "epoch": 0.75, - "grad_norm": 1.6489511279917344, - "learning_rate": 3.2045128701388883e-06, - "loss": 0.1831, - "step": 14664 - }, - { - "epoch": 0.75, - "grad_norm": 0.6715377433329404, - "learning_rate": 3.2033046960758763e-06, - "loss": 0.148, - "step": 14665 - }, - { - "epoch": 0.75, - "grad_norm": 1.0328666976068182, - "learning_rate": 3.202096706373604e-06, - "loss": 0.1697, - "step": 14666 - }, - { - "epoch": 0.75, - "grad_norm": 1.553987915627782, - "learning_rate": 3.2008889010648438e-06, - "loss": 0.1515, - "step": 14667 - }, - { - "epoch": 0.75, - "grad_norm": 1.7387038180708394, - "learning_rate": 3.1996812801823585e-06, - "loss": 0.1754, - "step": 14668 - }, - { - "epoch": 0.75, - "grad_norm": 1.0705040882814145, - "learning_rate": 3.1984738437588992e-06, - "loss": 0.1509, - "step": 14669 - }, - { - "epoch": 0.75, - "grad_norm": 0.6639713788036837, - "learning_rate": 3.197266591827225e-06, - "loss": 0.1562, - "step": 14670 - }, - { - "epoch": 0.75, - "grad_norm": 1.0838213880179155, - "learning_rate": 3.1960595244200745e-06, - "loss": 0.1564, - "step": 14671 - }, - { - "epoch": 0.75, - "grad_norm": 1.1254132382331623, - "learning_rate": 3.1948526415701973e-06, - "loss": 0.1587, - "step": 14672 - }, - { - "epoch": 0.75, - "grad_norm": 1.1114188821514774, - "learning_rate": 3.1936459433103238e-06, - "loss": 0.1504, - "step": 14673 - }, - { - "epoch": 0.75, - "grad_norm": 0.8106975038011655, - "learning_rate": 3.192439429673192e-06, - "loss": 0.1733, - "step": 14674 - }, - { - "epoch": 0.75, - "grad_norm": 1.287735468603681, - "learning_rate": 3.191233100691521e-06, - "loss": 0.1759, - "step": 14675 - }, - { - "epoch": 0.75, - "grad_norm": 0.9826916736715666, - "learning_rate": 3.1900269563980368e-06, - "loss": 0.1738, - "step": 14676 - }, - { - "epoch": 0.75, - "grad_norm": 0.9326441042038448, - "learning_rate": 3.1888209968254567e-06, - "loss": 0.1624, - "step": 14677 - }, - { - "epoch": 0.75, - "grad_norm": 1.230860934384176, - "learning_rate": 3.1876152220064948e-06, - "loss": 0.1465, - "step": 14678 - }, - { - "epoch": 0.75, - "grad_norm": 1.252938672283592, - "learning_rate": 3.186409631973851e-06, - "loss": 0.1899, - "step": 14679 - }, - { - "epoch": 0.75, - "grad_norm": 0.9513008216253451, - "learning_rate": 3.1852042267602344e-06, - "loss": 0.169, - "step": 14680 - }, - { - "epoch": 0.75, - "grad_norm": 1.3737216365826161, - "learning_rate": 3.183999006398335e-06, - "loss": 0.1629, - "step": 14681 - }, - { - "epoch": 0.75, - "grad_norm": 1.0258943551734003, - "learning_rate": 3.1827939709208512e-06, - "loss": 0.1769, - "step": 14682 - }, - { - "epoch": 0.75, - "grad_norm": 1.1094208877179543, - "learning_rate": 3.181589120360462e-06, - "loss": 0.1736, - "step": 14683 - }, - { - "epoch": 0.75, - "grad_norm": 0.8428413399334154, - "learning_rate": 3.180384454749853e-06, - "loss": 0.1562, - "step": 14684 - }, - { - "epoch": 0.75, - "grad_norm": 1.0872615303852258, - "learning_rate": 3.1791799741217046e-06, - "loss": 0.1958, - "step": 14685 - }, - { - "epoch": 0.75, - "grad_norm": 1.4317740619625547, - "learning_rate": 3.17797567850868e-06, - "loss": 0.1848, - "step": 14686 - }, - { - "epoch": 0.75, - "grad_norm": 1.2333218938900938, - "learning_rate": 3.176771567943455e-06, - "loss": 0.1678, - "step": 14687 - }, - { - "epoch": 0.75, - "grad_norm": 2.200898823296642, - "learning_rate": 3.1755676424586835e-06, - "loss": 0.1623, - "step": 14688 - }, - { - "epoch": 0.75, - "grad_norm": 1.0968842840505326, - "learning_rate": 3.1743639020870277e-06, - "loss": 0.164, - "step": 14689 - }, - { - "epoch": 0.75, - "grad_norm": 1.1702189181308977, - "learning_rate": 3.173160346861134e-06, - "loss": 0.1793, - "step": 14690 - }, - { - "epoch": 0.75, - "grad_norm": 1.7023827975981258, - "learning_rate": 3.1719569768136536e-06, - "loss": 0.1598, - "step": 14691 - }, - { - "epoch": 0.75, - "grad_norm": 1.1005884174638634, - "learning_rate": 3.1707537919772236e-06, - "loss": 0.1612, - "step": 14692 - }, - { - "epoch": 0.75, - "grad_norm": 0.8893579964151115, - "learning_rate": 3.1695507923844813e-06, - "loss": 0.1508, - "step": 14693 - }, - { - "epoch": 0.75, - "grad_norm": 1.226655951675619, - "learning_rate": 3.1683479780680616e-06, - "loss": 0.1635, - "step": 14694 - }, - { - "epoch": 0.75, - "grad_norm": 0.947454141417125, - "learning_rate": 3.1671453490605906e-06, - "loss": 0.1847, - "step": 14695 - }, - { - "epoch": 0.75, - "grad_norm": 1.0113457917496576, - "learning_rate": 3.1659429053946853e-06, - "loss": 0.1553, - "step": 14696 - }, - { - "epoch": 0.75, - "grad_norm": 1.088003262151018, - "learning_rate": 3.1647406471029684e-06, - "loss": 0.1665, - "step": 14697 - }, - { - "epoch": 0.75, - "grad_norm": 1.1241668611924707, - "learning_rate": 3.1635385742180435e-06, - "loss": 0.1697, - "step": 14698 - }, - { - "epoch": 0.75, - "grad_norm": 0.8074782348500469, - "learning_rate": 3.1623366867725238e-06, - "loss": 0.1667, - "step": 14699 - }, - { - "epoch": 0.75, - "grad_norm": 0.9719281670469927, - "learning_rate": 3.1611349847990083e-06, - "loss": 0.1702, - "step": 14700 - }, - { - "epoch": 0.75, - "grad_norm": 1.115648697125075, - "learning_rate": 3.15993346833009e-06, - "loss": 0.1717, - "step": 14701 - }, - { - "epoch": 0.75, - "grad_norm": 1.1447856245133403, - "learning_rate": 3.1587321373983616e-06, - "loss": 0.1681, - "step": 14702 - }, - { - "epoch": 0.75, - "grad_norm": 1.1970897169384256, - "learning_rate": 3.1575309920364106e-06, - "loss": 0.1498, - "step": 14703 - }, - { - "epoch": 0.75, - "grad_norm": 1.4211130522484572, - "learning_rate": 3.156330032276821e-06, - "loss": 0.1797, - "step": 14704 - }, - { - "epoch": 0.75, - "grad_norm": 1.123241466425086, - "learning_rate": 3.1551292581521632e-06, - "loss": 0.1764, - "step": 14705 - }, - { - "epoch": 0.75, - "grad_norm": 1.1792557421404262, - "learning_rate": 3.1539286696950135e-06, - "loss": 0.1903, - "step": 14706 - }, - { - "epoch": 0.75, - "grad_norm": 1.0185275875438167, - "learning_rate": 3.152728266937932e-06, - "loss": 0.1541, - "step": 14707 - }, - { - "epoch": 0.75, - "grad_norm": 1.1295736663003781, - "learning_rate": 3.151528049913487e-06, - "loss": 0.183, - "step": 14708 - }, - { - "epoch": 0.75, - "grad_norm": 1.0700529459342265, - "learning_rate": 3.150328018654226e-06, - "loss": 0.1689, - "step": 14709 - }, - { - "epoch": 0.75, - "grad_norm": 1.331060876225935, - "learning_rate": 3.149128173192706e-06, - "loss": 0.1866, - "step": 14710 - }, - { - "epoch": 0.75, - "grad_norm": 1.011138547228524, - "learning_rate": 3.14792851356147e-06, - "loss": 0.1602, - "step": 14711 - }, - { - "epoch": 0.75, - "grad_norm": 0.8101396967808473, - "learning_rate": 3.1467290397930637e-06, - "loss": 0.1807, - "step": 14712 - }, - { - "epoch": 0.75, - "grad_norm": 0.8430158304766958, - "learning_rate": 3.1455297519200157e-06, - "loss": 0.1553, - "step": 14713 - }, - { - "epoch": 0.75, - "grad_norm": 1.0986957333952236, - "learning_rate": 3.144330649974864e-06, - "loss": 0.172, - "step": 14714 - }, - { - "epoch": 0.75, - "grad_norm": 0.9840712424089405, - "learning_rate": 3.1431317339901267e-06, - "loss": 0.1609, - "step": 14715 - }, - { - "epoch": 0.75, - "grad_norm": 1.1991225020841343, - "learning_rate": 3.1419330039983333e-06, - "loss": 0.1853, - "step": 14716 - }, - { - "epoch": 0.75, - "grad_norm": 1.1482875122879654, - "learning_rate": 3.14073446003199e-06, - "loss": 0.1649, - "step": 14717 - }, - { - "epoch": 0.75, - "grad_norm": 1.1271257346502312, - "learning_rate": 3.1395361021236148e-06, - "loss": 0.1825, - "step": 14718 - }, - { - "epoch": 0.75, - "grad_norm": 1.0659969335638813, - "learning_rate": 3.1383379303057084e-06, - "loss": 0.1443, - "step": 14719 - }, - { - "epoch": 0.75, - "grad_norm": 1.0191410707488946, - "learning_rate": 3.137139944610772e-06, - "loss": 0.1733, - "step": 14720 - }, - { - "epoch": 0.75, - "grad_norm": 0.76103460182858, - "learning_rate": 3.1359421450713056e-06, - "loss": 0.1548, - "step": 14721 - }, - { - "epoch": 0.75, - "grad_norm": 0.8902470007480779, - "learning_rate": 3.1347445317197935e-06, - "loss": 0.1549, - "step": 14722 - }, - { - "epoch": 0.75, - "grad_norm": 1.2089112463254226, - "learning_rate": 3.1335471045887255e-06, - "loss": 0.175, - "step": 14723 - }, - { - "epoch": 0.75, - "grad_norm": 0.973603976260908, - "learning_rate": 3.1323498637105787e-06, - "loss": 0.1607, - "step": 14724 - }, - { - "epoch": 0.75, - "grad_norm": 0.9537143962114913, - "learning_rate": 3.1311528091178324e-06, - "loss": 0.149, - "step": 14725 - }, - { - "epoch": 0.75, - "grad_norm": 0.9696065408943665, - "learning_rate": 3.12995594084295e-06, - "loss": 0.1678, - "step": 14726 - }, - { - "epoch": 0.75, - "grad_norm": 0.9559168981759395, - "learning_rate": 3.1287592589184025e-06, - "loss": 0.139, - "step": 14727 - }, - { - "epoch": 0.75, - "grad_norm": 0.9479267688834492, - "learning_rate": 3.127562763376647e-06, - "loss": 0.194, - "step": 14728 - }, - { - "epoch": 0.75, - "grad_norm": 0.9450990399734578, - "learning_rate": 3.1263664542501427e-06, - "loss": 0.2034, - "step": 14729 - }, - { - "epoch": 0.75, - "grad_norm": 0.861102551017935, - "learning_rate": 3.1251703315713333e-06, - "loss": 0.1589, - "step": 14730 - }, - { - "epoch": 0.75, - "grad_norm": 1.0664891017098699, - "learning_rate": 3.123974395372671e-06, - "loss": 0.1531, - "step": 14731 - }, - { - "epoch": 0.75, - "grad_norm": 1.0335841585823917, - "learning_rate": 3.1227786456865883e-06, - "loss": 0.1716, - "step": 14732 - }, - { - "epoch": 0.75, - "grad_norm": 2.00491646111731, - "learning_rate": 3.121583082545526e-06, - "loss": 0.1683, - "step": 14733 - }, - { - "epoch": 0.75, - "grad_norm": 1.206580417310946, - "learning_rate": 3.1203877059819077e-06, - "loss": 0.1858, - "step": 14734 - }, - { - "epoch": 0.75, - "grad_norm": 1.1377213446369543, - "learning_rate": 3.1191925160281644e-06, - "loss": 0.1572, - "step": 14735 - }, - { - "epoch": 0.75, - "grad_norm": 1.539961066898607, - "learning_rate": 3.1179975127167105e-06, - "loss": 0.1629, - "step": 14736 - }, - { - "epoch": 0.75, - "grad_norm": 0.9933739792302864, - "learning_rate": 3.1168026960799624e-06, - "loss": 0.1745, - "step": 14737 - }, - { - "epoch": 0.75, - "grad_norm": 3.090709860866361, - "learning_rate": 3.115608066150333e-06, - "loss": 0.1733, - "step": 14738 - }, - { - "epoch": 0.75, - "grad_norm": 1.1758233601106887, - "learning_rate": 3.1144136229602205e-06, - "loss": 0.1798, - "step": 14739 - }, - { - "epoch": 0.75, - "grad_norm": 0.906908514722467, - "learning_rate": 3.1132193665420306e-06, - "loss": 0.1616, - "step": 14740 - }, - { - "epoch": 0.75, - "grad_norm": 1.0543997428468896, - "learning_rate": 3.112025296928152e-06, - "loss": 0.178, - "step": 14741 - }, - { - "epoch": 0.75, - "grad_norm": 1.1950618467976908, - "learning_rate": 3.110831414150978e-06, - "loss": 0.184, - "step": 14742 - }, - { - "epoch": 0.75, - "grad_norm": 1.2784627442075167, - "learning_rate": 3.1096377182428885e-06, - "loss": 0.1458, - "step": 14743 - }, - { - "epoch": 0.75, - "grad_norm": 1.032175497273516, - "learning_rate": 3.1084442092362675e-06, - "loss": 0.1568, - "step": 14744 - }, - { - "epoch": 0.75, - "grad_norm": 1.1464911325739189, - "learning_rate": 3.1072508871634843e-06, - "loss": 0.1475, - "step": 14745 - }, - { - "epoch": 0.75, - "grad_norm": 1.1112318755193693, - "learning_rate": 3.1060577520569103e-06, - "loss": 0.1764, - "step": 14746 - }, - { - "epoch": 0.75, - "grad_norm": 1.084391657633532, - "learning_rate": 3.104864803948908e-06, - "loss": 0.1764, - "step": 14747 - }, - { - "epoch": 0.75, - "grad_norm": 1.0758458176930714, - "learning_rate": 3.1036720428718413e-06, - "loss": 0.1891, - "step": 14748 - }, - { - "epoch": 0.75, - "grad_norm": 1.1294542286974087, - "learning_rate": 3.1024794688580562e-06, - "loss": 0.176, - "step": 14749 - }, - { - "epoch": 0.75, - "grad_norm": 0.9721057798346413, - "learning_rate": 3.1012870819399087e-06, - "loss": 0.1683, - "step": 14750 - }, - { - "epoch": 0.75, - "grad_norm": 1.0807184805474042, - "learning_rate": 3.1000948821497356e-06, - "loss": 0.1756, - "step": 14751 - }, - { - "epoch": 0.75, - "grad_norm": 0.8688973454793225, - "learning_rate": 3.098902869519882e-06, - "loss": 0.1598, - "step": 14752 - }, - { - "epoch": 0.75, - "grad_norm": 0.6859639098812923, - "learning_rate": 3.097711044082674e-06, - "loss": 0.1586, - "step": 14753 - }, - { - "epoch": 0.75, - "grad_norm": 0.9853578787351401, - "learning_rate": 3.096519405870444e-06, - "loss": 0.1573, - "step": 14754 - }, - { - "epoch": 0.75, - "grad_norm": 0.9852364539415878, - "learning_rate": 3.095327954915519e-06, - "loss": 0.1824, - "step": 14755 - }, - { - "epoch": 0.75, - "grad_norm": 0.9889064845105907, - "learning_rate": 3.0941366912502092e-06, - "loss": 0.1829, - "step": 14756 - }, - { - "epoch": 0.75, - "grad_norm": 1.2036898306677362, - "learning_rate": 3.092945614906835e-06, - "loss": 0.1591, - "step": 14757 - }, - { - "epoch": 0.75, - "grad_norm": 1.0620697656612537, - "learning_rate": 3.0917547259176973e-06, - "loss": 0.1856, - "step": 14758 - }, - { - "epoch": 0.75, - "grad_norm": 1.2864163935068007, - "learning_rate": 3.090564024315107e-06, - "loss": 0.1736, - "step": 14759 - }, - { - "epoch": 0.75, - "grad_norm": 1.189074879260085, - "learning_rate": 3.089373510131354e-06, - "loss": 0.1811, - "step": 14760 - }, - { - "epoch": 0.75, - "grad_norm": 1.345609459317833, - "learning_rate": 3.0881831833987387e-06, - "loss": 0.1497, - "step": 14761 - }, - { - "epoch": 0.75, - "grad_norm": 0.8882311019034608, - "learning_rate": 3.086993044149541e-06, - "loss": 0.1618, - "step": 14762 - }, - { - "epoch": 0.75, - "grad_norm": 1.0149783463440294, - "learning_rate": 3.0858030924160477e-06, - "loss": 0.1901, - "step": 14763 - }, - { - "epoch": 0.75, - "grad_norm": 0.8790084933384287, - "learning_rate": 3.084613328230536e-06, - "loss": 0.1813, - "step": 14764 - }, - { - "epoch": 0.75, - "grad_norm": 0.8785959266232123, - "learning_rate": 3.0834237516252817e-06, - "loss": 0.1542, - "step": 14765 - }, - { - "epoch": 0.75, - "grad_norm": 1.1549721382274187, - "learning_rate": 3.082234362632546e-06, - "loss": 0.1645, - "step": 14766 - }, - { - "epoch": 0.75, - "grad_norm": 0.9054447399940857, - "learning_rate": 3.081045161284596e-06, - "loss": 0.1561, - "step": 14767 - }, - { - "epoch": 0.75, - "grad_norm": 1.0804171216263059, - "learning_rate": 3.0798561476136845e-06, - "loss": 0.1687, - "step": 14768 - }, - { - "epoch": 0.75, - "grad_norm": 1.0504575861996284, - "learning_rate": 3.078667321652069e-06, - "loss": 0.1494, - "step": 14769 - }, - { - "epoch": 0.75, - "grad_norm": 1.6349417730106812, - "learning_rate": 3.0774786834319923e-06, - "loss": 0.1834, - "step": 14770 - }, - { - "epoch": 0.75, - "grad_norm": 1.4004228489340422, - "learning_rate": 3.076290232985696e-06, - "loss": 0.1997, - "step": 14771 - }, - { - "epoch": 0.75, - "grad_norm": 1.1630110956367583, - "learning_rate": 3.0751019703454164e-06, - "loss": 0.1626, - "step": 14772 - }, - { - "epoch": 0.75, - "grad_norm": 0.9623326057353061, - "learning_rate": 3.0739138955433878e-06, - "loss": 0.1676, - "step": 14773 - }, - { - "epoch": 0.75, - "grad_norm": 1.006629466980831, - "learning_rate": 3.072726008611839e-06, - "loss": 0.1617, - "step": 14774 - }, - { - "epoch": 0.75, - "grad_norm": 1.2838886926972721, - "learning_rate": 3.0715383095829853e-06, - "loss": 0.1548, - "step": 14775 - }, - { - "epoch": 0.75, - "grad_norm": 1.0454608588604146, - "learning_rate": 3.070350798489049e-06, - "loss": 0.1815, - "step": 14776 - }, - { - "epoch": 0.75, - "grad_norm": 0.821807555146335, - "learning_rate": 3.069163475362237e-06, - "loss": 0.1492, - "step": 14777 - }, - { - "epoch": 0.75, - "grad_norm": 0.8491864566705534, - "learning_rate": 3.0679763402347584e-06, - "loss": 0.1792, - "step": 14778 - }, - { - "epoch": 0.75, - "grad_norm": 1.3916599840610384, - "learning_rate": 3.0667893931388117e-06, - "loss": 0.1748, - "step": 14779 - }, - { - "epoch": 0.75, - "grad_norm": 1.0579779934680873, - "learning_rate": 3.0656026341065927e-06, - "loss": 0.141, - "step": 14780 - }, - { - "epoch": 0.75, - "grad_norm": 1.0207426330020186, - "learning_rate": 3.0644160631702945e-06, - "loss": 0.1737, - "step": 14781 - }, - { - "epoch": 0.75, - "grad_norm": 0.8869670421806797, - "learning_rate": 3.0632296803621064e-06, - "loss": 0.1637, - "step": 14782 - }, - { - "epoch": 0.75, - "grad_norm": 0.9987869626382742, - "learning_rate": 3.0620434857142e-06, - "loss": 0.1618, - "step": 14783 - }, - { - "epoch": 0.75, - "grad_norm": 0.9076548216511201, - "learning_rate": 3.0608574792587607e-06, - "loss": 0.168, - "step": 14784 - }, - { - "epoch": 0.75, - "grad_norm": 0.8639606682030742, - "learning_rate": 3.059671661027951e-06, - "loss": 0.1503, - "step": 14785 - }, - { - "epoch": 0.75, - "grad_norm": 0.902688952256046, - "learning_rate": 3.0584860310539423e-06, - "loss": 0.15, - "step": 14786 - }, - { - "epoch": 0.75, - "grad_norm": 0.830731620498615, - "learning_rate": 3.057300589368891e-06, - "loss": 0.1611, - "step": 14787 - }, - { - "epoch": 0.75, - "grad_norm": 1.3496131252289139, - "learning_rate": 3.0561153360049513e-06, - "loss": 0.1781, - "step": 14788 - }, - { - "epoch": 0.75, - "grad_norm": 1.1472205477231077, - "learning_rate": 3.0549302709942753e-06, - "loss": 0.1688, - "step": 14789 - }, - { - "epoch": 0.75, - "grad_norm": 1.2001411056709832, - "learning_rate": 3.0537453943690076e-06, - "loss": 0.1766, - "step": 14790 - }, - { - "epoch": 0.75, - "grad_norm": 0.8679364218872742, - "learning_rate": 3.0525607061612918e-06, - "loss": 0.1603, - "step": 14791 - }, - { - "epoch": 0.75, - "grad_norm": 0.8910635946636004, - "learning_rate": 3.051376206403256e-06, - "loss": 0.1595, - "step": 14792 - }, - { - "epoch": 0.75, - "grad_norm": 0.9196162615395724, - "learning_rate": 3.050191895127036e-06, - "loss": 0.1735, - "step": 14793 - }, - { - "epoch": 0.75, - "grad_norm": 0.8051345823549313, - "learning_rate": 3.0490077723647504e-06, - "loss": 0.1763, - "step": 14794 - }, - { - "epoch": 0.75, - "grad_norm": 1.781799246737779, - "learning_rate": 3.047823838148525e-06, - "loss": 0.1682, - "step": 14795 - }, - { - "epoch": 0.75, - "grad_norm": 1.2158586699703775, - "learning_rate": 3.0466400925104665e-06, - "loss": 0.1896, - "step": 14796 - }, - { - "epoch": 0.75, - "grad_norm": 1.6484925242012631, - "learning_rate": 3.0454565354826926e-06, - "loss": 0.1647, - "step": 14797 - }, - { - "epoch": 0.75, - "grad_norm": 1.1769066884903636, - "learning_rate": 3.044273167097299e-06, - "loss": 0.1789, - "step": 14798 - }, - { - "epoch": 0.75, - "grad_norm": 0.9788467179153428, - "learning_rate": 3.0430899873863897e-06, - "loss": 0.1666, - "step": 14799 - }, - { - "epoch": 0.75, - "grad_norm": 1.161712363023313, - "learning_rate": 3.041906996382056e-06, - "loss": 0.1538, - "step": 14800 - }, - { - "epoch": 0.75, - "grad_norm": 1.5549606547832509, - "learning_rate": 3.0407241941163923e-06, - "loss": 0.159, - "step": 14801 - }, - { - "epoch": 0.75, - "grad_norm": 1.505694443799723, - "learning_rate": 3.0395415806214735e-06, - "loss": 0.193, - "step": 14802 - }, - { - "epoch": 0.75, - "grad_norm": 0.8924218114947182, - "learning_rate": 3.0383591559293867e-06, - "loss": 0.1531, - "step": 14803 - }, - { - "epoch": 0.75, - "grad_norm": 0.9852185215373485, - "learning_rate": 3.0371769200721977e-06, - "loss": 0.1806, - "step": 14804 - }, - { - "epoch": 0.75, - "grad_norm": 3.2306235996053707, - "learning_rate": 3.035994873081981e-06, - "loss": 0.1845, - "step": 14805 - }, - { - "epoch": 0.75, - "grad_norm": 0.906085883851588, - "learning_rate": 3.0348130149907928e-06, - "loss": 0.1791, - "step": 14806 - }, - { - "epoch": 0.75, - "grad_norm": 1.6349173864537299, - "learning_rate": 3.0336313458306964e-06, - "loss": 0.1894, - "step": 14807 - }, - { - "epoch": 0.75, - "grad_norm": 0.8587567982498606, - "learning_rate": 3.0324498656337453e-06, - "loss": 0.1749, - "step": 14808 - }, - { - "epoch": 0.75, - "grad_norm": 0.9227183523843092, - "learning_rate": 3.0312685744319824e-06, - "loss": 0.162, - "step": 14809 - }, - { - "epoch": 0.75, - "grad_norm": 1.015572029239059, - "learning_rate": 3.030087472257456e-06, - "loss": 0.1698, - "step": 14810 - }, - { - "epoch": 0.75, - "grad_norm": 0.9089076200824454, - "learning_rate": 3.0289065591421974e-06, - "loss": 0.1732, - "step": 14811 - }, - { - "epoch": 0.75, - "grad_norm": 0.9013864412772916, - "learning_rate": 3.027725835118245e-06, - "loss": 0.1719, - "step": 14812 - }, - { - "epoch": 0.75, - "grad_norm": 0.9075117656813344, - "learning_rate": 3.0265453002176203e-06, - "loss": 0.189, - "step": 14813 - }, - { - "epoch": 0.75, - "grad_norm": 0.9636418655739848, - "learning_rate": 3.0253649544723517e-06, - "loss": 0.1516, - "step": 14814 - }, - { - "epoch": 0.75, - "grad_norm": 0.9890103632519298, - "learning_rate": 3.024184797914449e-06, - "loss": 0.1742, - "step": 14815 - }, - { - "epoch": 0.75, - "grad_norm": 0.9107581267039456, - "learning_rate": 3.0230048305759274e-06, - "loss": 0.1547, - "step": 14816 - }, - { - "epoch": 0.75, - "grad_norm": 1.351786077343159, - "learning_rate": 3.021825052488795e-06, - "loss": 0.1514, - "step": 14817 - }, - { - "epoch": 0.75, - "grad_norm": 1.066387189414217, - "learning_rate": 3.0206454636850546e-06, - "loss": 0.1591, - "step": 14818 - }, - { - "epoch": 0.75, - "grad_norm": 1.0678313553067573, - "learning_rate": 3.0194660641966965e-06, - "loss": 0.1619, - "step": 14819 - }, - { - "epoch": 0.75, - "grad_norm": 0.8015416936308438, - "learning_rate": 3.01828685405572e-06, - "loss": 0.1536, - "step": 14820 - }, - { - "epoch": 0.75, - "grad_norm": 1.1124837008765287, - "learning_rate": 3.0171078332941028e-06, - "loss": 0.1707, - "step": 14821 - }, - { - "epoch": 0.75, - "grad_norm": 0.9763316659811963, - "learning_rate": 3.015929001943834e-06, - "loss": 0.1748, - "step": 14822 - }, - { - "epoch": 0.75, - "grad_norm": 0.9133392214693982, - "learning_rate": 3.014750360036881e-06, - "loss": 0.138, - "step": 14823 - }, - { - "epoch": 0.75, - "grad_norm": 1.1592682615722274, - "learning_rate": 3.0135719076052195e-06, - "loss": 0.187, - "step": 14824 - }, - { - "epoch": 0.75, - "grad_norm": 1.6634091590839288, - "learning_rate": 3.012393644680819e-06, - "loss": 0.1678, - "step": 14825 - }, - { - "epoch": 0.75, - "grad_norm": 0.8682168822734787, - "learning_rate": 3.0112155712956305e-06, - "loss": 0.1535, - "step": 14826 - }, - { - "epoch": 0.75, - "grad_norm": 0.9829329473504432, - "learning_rate": 3.0100376874816183e-06, - "loss": 0.1736, - "step": 14827 - }, - { - "epoch": 0.75, - "grad_norm": 0.9083554632551712, - "learning_rate": 3.0088599932707263e-06, - "loss": 0.1657, - "step": 14828 - }, - { - "epoch": 0.75, - "grad_norm": 0.9103203674379348, - "learning_rate": 3.007682488694904e-06, - "loss": 0.1599, - "step": 14829 - }, - { - "epoch": 0.75, - "grad_norm": 0.8617723421597876, - "learning_rate": 3.0065051737860863e-06, - "loss": 0.1631, - "step": 14830 - }, - { - "epoch": 0.75, - "grad_norm": 1.5295785801478599, - "learning_rate": 3.0053280485762136e-06, - "loss": 0.168, - "step": 14831 - }, - { - "epoch": 0.75, - "grad_norm": 1.010684464650659, - "learning_rate": 3.00415111309721e-06, - "loss": 0.1756, - "step": 14832 - }, - { - "epoch": 0.75, - "grad_norm": 1.1799490977267986, - "learning_rate": 3.0029743673810032e-06, - "loss": 0.2012, - "step": 14833 - }, - { - "epoch": 0.75, - "grad_norm": 1.353078753744399, - "learning_rate": 3.0017978114595103e-06, - "loss": 0.1659, - "step": 14834 - }, - { - "epoch": 0.75, - "grad_norm": 3.3560744486295873, - "learning_rate": 3.000621445364651e-06, - "loss": 0.167, - "step": 14835 - }, - { - "epoch": 0.75, - "grad_norm": 1.3890085822404705, - "learning_rate": 2.999445269128327e-06, - "loss": 0.1639, - "step": 14836 - }, - { - "epoch": 0.75, - "grad_norm": 1.0976117458451753, - "learning_rate": 2.9982692827824487e-06, - "loss": 0.1728, - "step": 14837 - }, - { - "epoch": 0.75, - "grad_norm": 1.079496472102237, - "learning_rate": 2.9970934863589086e-06, - "loss": 0.1678, - "step": 14838 - }, - { - "epoch": 0.75, - "grad_norm": 0.842838928379633, - "learning_rate": 2.9959178798896062e-06, - "loss": 0.1422, - "step": 14839 - }, - { - "epoch": 0.75, - "grad_norm": 0.9306792323418289, - "learning_rate": 2.994742463406427e-06, - "loss": 0.1736, - "step": 14840 - }, - { - "epoch": 0.75, - "grad_norm": 0.8812207056004026, - "learning_rate": 2.9935672369412507e-06, - "loss": 0.1755, - "step": 14841 - }, - { - "epoch": 0.75, - "grad_norm": 1.0940408765288119, - "learning_rate": 2.9923922005259588e-06, - "loss": 0.1793, - "step": 14842 - }, - { - "epoch": 0.75, - "grad_norm": 0.8373831947473562, - "learning_rate": 2.991217354192425e-06, - "loss": 0.1765, - "step": 14843 - }, - { - "epoch": 0.75, - "grad_norm": 2.3787908438981957, - "learning_rate": 2.9900426979725183e-06, - "loss": 0.1743, - "step": 14844 - }, - { - "epoch": 0.75, - "grad_norm": 1.1502864105435349, - "learning_rate": 2.9888682318980975e-06, - "loss": 0.1636, - "step": 14845 - }, - { - "epoch": 0.75, - "grad_norm": 1.044664524211976, - "learning_rate": 2.9876939560010244e-06, - "loss": 0.1583, - "step": 14846 - }, - { - "epoch": 0.75, - "grad_norm": 0.9738137676351397, - "learning_rate": 2.986519870313146e-06, - "loss": 0.172, - "step": 14847 - }, - { - "epoch": 0.76, - "grad_norm": 1.8642222069852814, - "learning_rate": 2.985345974866315e-06, - "loss": 0.1423, - "step": 14848 - }, - { - "epoch": 0.76, - "grad_norm": 0.810328271150909, - "learning_rate": 2.984172269692367e-06, - "loss": 0.1637, - "step": 14849 - }, - { - "epoch": 0.76, - "grad_norm": 1.1340679026784688, - "learning_rate": 2.9829987548231432e-06, - "loss": 0.1611, - "step": 14850 - }, - { - "epoch": 0.76, - "grad_norm": 1.070376228632435, - "learning_rate": 2.9818254302904735e-06, - "loss": 0.1681, - "step": 14851 - }, - { - "epoch": 0.76, - "grad_norm": 1.0497269808800511, - "learning_rate": 2.9806522961261896e-06, - "loss": 0.1623, - "step": 14852 - }, - { - "epoch": 0.76, - "grad_norm": 1.4840784587493108, - "learning_rate": 2.979479352362106e-06, - "loss": 0.1548, - "step": 14853 - }, - { - "epoch": 0.76, - "grad_norm": 0.9688261626228124, - "learning_rate": 2.9783065990300432e-06, - "loss": 0.1758, - "step": 14854 - }, - { - "epoch": 0.76, - "grad_norm": 0.9197866418393205, - "learning_rate": 2.9771340361618075e-06, - "loss": 0.1787, - "step": 14855 - }, - { - "epoch": 0.76, - "grad_norm": 1.6718879704798661, - "learning_rate": 2.975961663789212e-06, - "loss": 0.1573, - "step": 14856 - }, - { - "epoch": 0.76, - "grad_norm": 0.9578131340707566, - "learning_rate": 2.9747894819440514e-06, - "loss": 0.172, - "step": 14857 - }, - { - "epoch": 0.76, - "grad_norm": 0.930379079874052, - "learning_rate": 2.9736174906581216e-06, - "loss": 0.1655, - "step": 14858 - }, - { - "epoch": 0.76, - "grad_norm": 0.7535419036472633, - "learning_rate": 2.9724456899632126e-06, - "loss": 0.1555, - "step": 14859 - }, - { - "epoch": 0.76, - "grad_norm": 1.2210499593610467, - "learning_rate": 2.971274079891112e-06, - "loss": 0.1867, - "step": 14860 - }, - { - "epoch": 0.76, - "grad_norm": 1.1231733960892714, - "learning_rate": 2.970102660473603e-06, - "loss": 0.1519, - "step": 14861 - }, - { - "epoch": 0.76, - "grad_norm": 1.111326106758271, - "learning_rate": 2.9689314317424513e-06, - "loss": 0.1765, - "step": 14862 - }, - { - "epoch": 0.76, - "grad_norm": 4.061444520286897, - "learning_rate": 2.9677603937294364e-06, - "loss": 0.1544, - "step": 14863 - }, - { - "epoch": 0.76, - "grad_norm": 1.0349430214778528, - "learning_rate": 2.966589546466314e-06, - "loss": 0.1518, - "step": 14864 - }, - { - "epoch": 0.76, - "grad_norm": 1.0634051726805134, - "learning_rate": 2.96541888998485e-06, - "loss": 0.1696, - "step": 14865 - }, - { - "epoch": 0.76, - "grad_norm": 1.1166888090548637, - "learning_rate": 2.964248424316795e-06, - "loss": 0.1612, - "step": 14866 - }, - { - "epoch": 0.76, - "grad_norm": 1.3298552807639807, - "learning_rate": 2.9630781494938997e-06, - "loss": 0.1833, - "step": 14867 - }, - { - "epoch": 0.76, - "grad_norm": 1.236808824422805, - "learning_rate": 2.961908065547905e-06, - "loss": 0.1529, - "step": 14868 - }, - { - "epoch": 0.76, - "grad_norm": 1.4039050352062317, - "learning_rate": 2.960738172510551e-06, - "loss": 0.1812, - "step": 14869 - }, - { - "epoch": 0.76, - "grad_norm": 1.1263685791612776, - "learning_rate": 2.959568470413572e-06, - "loss": 0.1675, - "step": 14870 - }, - { - "epoch": 0.76, - "grad_norm": 1.2335040738408105, - "learning_rate": 2.9583989592886985e-06, - "loss": 0.1848, - "step": 14871 - }, - { - "epoch": 0.76, - "grad_norm": 1.0305781290114908, - "learning_rate": 2.957229639167648e-06, - "loss": 0.1641, - "step": 14872 - }, - { - "epoch": 0.76, - "grad_norm": 0.8071061008183066, - "learning_rate": 2.956060510082145e-06, - "loss": 0.1619, - "step": 14873 - }, - { - "epoch": 0.76, - "grad_norm": 1.0075024836831434, - "learning_rate": 2.954891572063895e-06, - "loss": 0.1607, - "step": 14874 - }, - { - "epoch": 0.76, - "grad_norm": 1.1847496166015081, - "learning_rate": 2.9537228251446125e-06, - "loss": 0.1713, - "step": 14875 - }, - { - "epoch": 0.76, - "grad_norm": 1.177493790578264, - "learning_rate": 2.9525542693559926e-06, - "loss": 0.1785, - "step": 14876 - }, - { - "epoch": 0.76, - "grad_norm": 1.0309264603516655, - "learning_rate": 2.9513859047297366e-06, - "loss": 0.178, - "step": 14877 - }, - { - "epoch": 0.76, - "grad_norm": 1.493847712149435, - "learning_rate": 2.9502177312975387e-06, - "loss": 0.1551, - "step": 14878 - }, - { - "epoch": 0.76, - "grad_norm": 1.1904513588353383, - "learning_rate": 2.9490497490910806e-06, - "loss": 0.1753, - "step": 14879 - }, - { - "epoch": 0.76, - "grad_norm": 0.8749388948121452, - "learning_rate": 2.9478819581420493e-06, - "loss": 0.1617, - "step": 14880 - }, - { - "epoch": 0.76, - "grad_norm": 1.2930194430877306, - "learning_rate": 2.9467143584821145e-06, - "loss": 0.1655, - "step": 14881 - }, - { - "epoch": 0.76, - "grad_norm": 0.9587201065916398, - "learning_rate": 2.9455469501429557e-06, - "loss": 0.1638, - "step": 14882 - }, - { - "epoch": 0.76, - "grad_norm": 1.031977326446882, - "learning_rate": 2.9443797331562295e-06, - "loss": 0.1904, - "step": 14883 - }, - { - "epoch": 0.76, - "grad_norm": 1.1442108789360643, - "learning_rate": 2.9432127075536056e-06, - "loss": 0.1624, - "step": 14884 - }, - { - "epoch": 0.76, - "grad_norm": 1.5933491740422148, - "learning_rate": 2.942045873366731e-06, - "loss": 0.169, - "step": 14885 - }, - { - "epoch": 0.76, - "grad_norm": 1.243282759250556, - "learning_rate": 2.9408792306272625e-06, - "loss": 0.17, - "step": 14886 - }, - { - "epoch": 0.76, - "grad_norm": 0.8418987826724236, - "learning_rate": 2.9397127793668435e-06, - "loss": 0.1578, - "step": 14887 - }, - { - "epoch": 0.76, - "grad_norm": 0.9680157927451623, - "learning_rate": 2.938546519617116e-06, - "loss": 0.1646, - "step": 14888 - }, - { - "epoch": 0.76, - "grad_norm": 1.483610722538024, - "learning_rate": 2.93738045140971e-06, - "loss": 0.1769, - "step": 14889 - }, - { - "epoch": 0.76, - "grad_norm": 0.7884359484807628, - "learning_rate": 2.9362145747762626e-06, - "loss": 0.1601, - "step": 14890 - }, - { - "epoch": 0.76, - "grad_norm": 1.121494941591859, - "learning_rate": 2.9350488897483897e-06, - "loss": 0.1894, - "step": 14891 - }, - { - "epoch": 0.76, - "grad_norm": 1.2684480232810904, - "learning_rate": 2.9338833963577184e-06, - "loss": 0.1585, - "step": 14892 - }, - { - "epoch": 0.76, - "grad_norm": 1.3305637146146216, - "learning_rate": 2.932718094635858e-06, - "loss": 0.2072, - "step": 14893 - }, - { - "epoch": 0.76, - "grad_norm": 1.3770407432912448, - "learning_rate": 2.9315529846144162e-06, - "loss": 0.169, - "step": 14894 - }, - { - "epoch": 0.76, - "grad_norm": 0.9470699738751776, - "learning_rate": 2.9303880663249985e-06, - "loss": 0.1744, - "step": 14895 - }, - { - "epoch": 0.76, - "grad_norm": 1.0590649066073299, - "learning_rate": 2.9292233397992043e-06, - "loss": 0.1538, - "step": 14896 - }, - { - "epoch": 0.76, - "grad_norm": 1.0300673069287118, - "learning_rate": 2.9280588050686287e-06, - "loss": 0.1912, - "step": 14897 - }, - { - "epoch": 0.76, - "grad_norm": 1.0014920304678387, - "learning_rate": 2.9268944621648554e-06, - "loss": 0.1973, - "step": 14898 - }, - { - "epoch": 0.76, - "grad_norm": 1.6558249390820992, - "learning_rate": 2.925730311119471e-06, - "loss": 0.1602, - "step": 14899 - }, - { - "epoch": 0.76, - "grad_norm": 1.1327455208047599, - "learning_rate": 2.92456635196405e-06, - "loss": 0.1705, - "step": 14900 - }, - { - "epoch": 0.76, - "grad_norm": 1.0468497436593192, - "learning_rate": 2.9234025847301685e-06, - "loss": 0.1679, - "step": 14901 - }, - { - "epoch": 0.76, - "grad_norm": 0.8848502370869115, - "learning_rate": 2.922239009449388e-06, - "loss": 0.1557, - "step": 14902 - }, - { - "epoch": 0.76, - "grad_norm": 8.23931656859147, - "learning_rate": 2.9210756261532746e-06, - "loss": 0.1499, - "step": 14903 - }, - { - "epoch": 0.76, - "grad_norm": 1.5678878010715138, - "learning_rate": 2.919912434873385e-06, - "loss": 0.1665, - "step": 14904 - }, - { - "epoch": 0.76, - "grad_norm": 0.8378693122157262, - "learning_rate": 2.918749435641274e-06, - "loss": 0.1657, - "step": 14905 - }, - { - "epoch": 0.76, - "grad_norm": 0.856513967247774, - "learning_rate": 2.91758662848848e-06, - "loss": 0.1677, - "step": 14906 - }, - { - "epoch": 0.76, - "grad_norm": 1.5243921986143543, - "learning_rate": 2.9164240134465527e-06, - "loss": 0.1661, - "step": 14907 - }, - { - "epoch": 0.76, - "grad_norm": 1.8660583832399535, - "learning_rate": 2.9152615905470216e-06, - "loss": 0.1686, - "step": 14908 - }, - { - "epoch": 0.76, - "grad_norm": 1.1635873514860704, - "learning_rate": 2.9140993598214217e-06, - "loss": 0.1904, - "step": 14909 - }, - { - "epoch": 0.76, - "grad_norm": 2.3164720376059806, - "learning_rate": 2.912937321301278e-06, - "loss": 0.1663, - "step": 14910 - }, - { - "epoch": 0.76, - "grad_norm": 1.0820820698019624, - "learning_rate": 2.911775475018106e-06, - "loss": 0.1674, - "step": 14911 - }, - { - "epoch": 0.76, - "grad_norm": 0.9985268905374507, - "learning_rate": 2.910613821003425e-06, - "loss": 0.1577, - "step": 14912 - }, - { - "epoch": 0.76, - "grad_norm": 1.1918499534603288, - "learning_rate": 2.9094523592887446e-06, - "loss": 0.182, - "step": 14913 - }, - { - "epoch": 0.76, - "grad_norm": 1.2000467515295787, - "learning_rate": 2.9082910899055717e-06, - "loss": 0.1647, - "step": 14914 - }, - { - "epoch": 0.76, - "grad_norm": 1.425609422227406, - "learning_rate": 2.9071300128854007e-06, - "loss": 0.1677, - "step": 14915 - }, - { - "epoch": 0.76, - "grad_norm": 2.4507515531337645, - "learning_rate": 2.9059691282597325e-06, - "loss": 0.1733, - "step": 14916 - }, - { - "epoch": 0.76, - "grad_norm": 1.3031539416406344, - "learning_rate": 2.9048084360600494e-06, - "loss": 0.1608, - "step": 14917 - }, - { - "epoch": 0.76, - "grad_norm": 0.8005481370728588, - "learning_rate": 2.9036479363178405e-06, - "loss": 0.1748, - "step": 14918 - }, - { - "epoch": 0.76, - "grad_norm": 1.4637440704208484, - "learning_rate": 2.9024876290645787e-06, - "loss": 0.1587, - "step": 14919 - }, - { - "epoch": 0.76, - "grad_norm": 1.022621538622618, - "learning_rate": 2.9013275143317453e-06, - "loss": 0.1641, - "step": 14920 - }, - { - "epoch": 0.76, - "grad_norm": 1.5938994367208572, - "learning_rate": 2.9001675921507998e-06, - "loss": 0.1825, - "step": 14921 - }, - { - "epoch": 0.76, - "grad_norm": 1.5829965650281022, - "learning_rate": 2.8990078625532104e-06, - "loss": 0.147, - "step": 14922 - }, - { - "epoch": 0.76, - "grad_norm": 1.609257440006581, - "learning_rate": 2.8978483255704325e-06, - "loss": 0.1568, - "step": 14923 - }, - { - "epoch": 0.76, - "grad_norm": 1.4008987644802413, - "learning_rate": 2.8966889812339237e-06, - "loss": 0.1795, - "step": 14924 - }, - { - "epoch": 0.76, - "grad_norm": 1.3581868558224195, - "learning_rate": 2.8955298295751245e-06, - "loss": 0.1589, - "step": 14925 - }, - { - "epoch": 0.76, - "grad_norm": 0.9135404012632948, - "learning_rate": 2.8943708706254824e-06, - "loss": 0.1602, - "step": 14926 - }, - { - "epoch": 0.76, - "grad_norm": 1.0471223644901613, - "learning_rate": 2.893212104416432e-06, - "loss": 0.1847, - "step": 14927 - }, - { - "epoch": 0.76, - "grad_norm": 0.853204413537538, - "learning_rate": 2.8920535309794018e-06, - "loss": 0.1418, - "step": 14928 - }, - { - "epoch": 0.76, - "grad_norm": 1.1207432193111928, - "learning_rate": 2.8908951503458217e-06, - "loss": 0.1736, - "step": 14929 - }, - { - "epoch": 0.76, - "grad_norm": 1.4981305908868932, - "learning_rate": 2.8897369625471105e-06, - "loss": 0.1729, - "step": 14930 - }, - { - "epoch": 0.76, - "grad_norm": 1.1182424351020628, - "learning_rate": 2.8885789676146903e-06, - "loss": 0.1718, - "step": 14931 - }, - { - "epoch": 0.76, - "grad_norm": 1.354916786961543, - "learning_rate": 2.887421165579963e-06, - "loss": 0.1484, - "step": 14932 - }, - { - "epoch": 0.76, - "grad_norm": 1.5863329120631235, - "learning_rate": 2.8862635564743424e-06, - "loss": 0.1692, - "step": 14933 - }, - { - "epoch": 0.76, - "grad_norm": 0.9166221221801297, - "learning_rate": 2.8851061403292213e-06, - "loss": 0.1578, - "step": 14934 - }, - { - "epoch": 0.76, - "grad_norm": 2.035905778944065, - "learning_rate": 2.8839489171760015e-06, - "loss": 0.1755, - "step": 14935 - }, - { - "epoch": 0.76, - "grad_norm": 0.8688123045023516, - "learning_rate": 2.882791887046066e-06, - "loss": 0.1514, - "step": 14936 - }, - { - "epoch": 0.76, - "grad_norm": 0.9109323250386723, - "learning_rate": 2.8816350499708044e-06, - "loss": 0.1676, - "step": 14937 - }, - { - "epoch": 0.76, - "grad_norm": 1.116979008834176, - "learning_rate": 2.8804784059815914e-06, - "loss": 0.1597, - "step": 14938 - }, - { - "epoch": 0.76, - "grad_norm": 0.9080606464625794, - "learning_rate": 2.879321955109805e-06, - "loss": 0.1694, - "step": 14939 - }, - { - "epoch": 0.76, - "grad_norm": 1.4528067891010092, - "learning_rate": 2.878165697386812e-06, - "loss": 0.1516, - "step": 14940 - }, - { - "epoch": 0.76, - "grad_norm": 1.0721172933704726, - "learning_rate": 2.87700963284398e-06, - "loss": 0.1821, - "step": 14941 - }, - { - "epoch": 0.76, - "grad_norm": 1.0226958352661308, - "learning_rate": 2.87585376151266e-06, - "loss": 0.1745, - "step": 14942 - }, - { - "epoch": 0.76, - "grad_norm": 0.973993706553113, - "learning_rate": 2.8746980834242133e-06, - "loss": 0.1899, - "step": 14943 - }, - { - "epoch": 0.76, - "grad_norm": 1.0773078735796286, - "learning_rate": 2.8735425986099796e-06, - "loss": 0.1737, - "step": 14944 - }, - { - "epoch": 0.76, - "grad_norm": 0.9840464557781372, - "learning_rate": 2.87238730710131e-06, - "loss": 0.172, - "step": 14945 - }, - { - "epoch": 0.76, - "grad_norm": 0.9383012071885413, - "learning_rate": 2.871232208929533e-06, - "loss": 0.1812, - "step": 14946 - }, - { - "epoch": 0.76, - "grad_norm": 1.2462761175413661, - "learning_rate": 2.8700773041259844e-06, - "loss": 0.1845, - "step": 14947 - }, - { - "epoch": 0.76, - "grad_norm": 1.32041091829035, - "learning_rate": 2.8689225927219956e-06, - "loss": 0.1949, - "step": 14948 - }, - { - "epoch": 0.76, - "grad_norm": 1.1985684244279526, - "learning_rate": 2.8677680747488812e-06, - "loss": 0.1804, - "step": 14949 - }, - { - "epoch": 0.76, - "grad_norm": 1.1156791419115983, - "learning_rate": 2.8666137502379632e-06, - "loss": 0.199, - "step": 14950 - }, - { - "epoch": 0.76, - "grad_norm": 1.149339106701358, - "learning_rate": 2.8654596192205476e-06, - "loss": 0.1742, - "step": 14951 - }, - { - "epoch": 0.76, - "grad_norm": 0.9928395353613638, - "learning_rate": 2.8643056817279448e-06, - "loss": 0.1598, - "step": 14952 - }, - { - "epoch": 0.76, - "grad_norm": 1.0961845244112949, - "learning_rate": 2.863151937791452e-06, - "loss": 0.1732, - "step": 14953 - }, - { - "epoch": 0.76, - "grad_norm": 0.8936819931072454, - "learning_rate": 2.8619983874423672e-06, - "loss": 0.1709, - "step": 14954 - }, - { - "epoch": 0.76, - "grad_norm": 1.083003245432732, - "learning_rate": 2.8608450307119772e-06, - "loss": 0.1599, - "step": 14955 - }, - { - "epoch": 0.76, - "grad_norm": 1.0890360019663619, - "learning_rate": 2.8596918676315687e-06, - "loss": 0.1804, - "step": 14956 - }, - { - "epoch": 0.76, - "grad_norm": 0.921592315801085, - "learning_rate": 2.8585388982324226e-06, - "loss": 0.1718, - "step": 14957 - }, - { - "epoch": 0.76, - "grad_norm": 1.0191836834087407, - "learning_rate": 2.8573861225458143e-06, - "loss": 0.1588, - "step": 14958 - }, - { - "epoch": 0.76, - "grad_norm": 1.049360441030582, - "learning_rate": 2.8562335406030074e-06, - "loss": 0.1789, - "step": 14959 - }, - { - "epoch": 0.76, - "grad_norm": 1.021851023037416, - "learning_rate": 2.8550811524352727e-06, - "loss": 0.1371, - "step": 14960 - }, - { - "epoch": 0.76, - "grad_norm": 1.0470103475321244, - "learning_rate": 2.8539289580738627e-06, - "loss": 0.1542, - "step": 14961 - }, - { - "epoch": 0.76, - "grad_norm": 1.033767752087917, - "learning_rate": 2.8527769575500363e-06, - "loss": 0.1691, - "step": 14962 - }, - { - "epoch": 0.76, - "grad_norm": 1.7569499057857303, - "learning_rate": 2.851625150895039e-06, - "loss": 0.1773, - "step": 14963 - }, - { - "epoch": 0.76, - "grad_norm": 1.9601401947005221, - "learning_rate": 2.850473538140108e-06, - "loss": 0.1709, - "step": 14964 - }, - { - "epoch": 0.76, - "grad_norm": 0.9617429520069206, - "learning_rate": 2.8493221193164886e-06, - "loss": 0.1505, - "step": 14965 - }, - { - "epoch": 0.76, - "grad_norm": 1.6305919387900003, - "learning_rate": 2.84817089445541e-06, - "loss": 0.1857, - "step": 14966 - }, - { - "epoch": 0.76, - "grad_norm": 0.992601166343874, - "learning_rate": 2.847019863588102e-06, - "loss": 0.1686, - "step": 14967 - }, - { - "epoch": 0.76, - "grad_norm": 1.213736998481106, - "learning_rate": 2.845869026745781e-06, - "loss": 0.196, - "step": 14968 - }, - { - "epoch": 0.76, - "grad_norm": 0.9976238049114304, - "learning_rate": 2.8447183839596705e-06, - "loss": 0.1558, - "step": 14969 - }, - { - "epoch": 0.76, - "grad_norm": 0.8547754154366524, - "learning_rate": 2.8435679352609747e-06, - "loss": 0.1787, - "step": 14970 - }, - { - "epoch": 0.76, - "grad_norm": 2.2194540168258508, - "learning_rate": 2.8424176806809068e-06, - "loss": 0.1417, - "step": 14971 - }, - { - "epoch": 0.76, - "grad_norm": 1.14402890444547, - "learning_rate": 2.8412676202506596e-06, - "loss": 0.1685, - "step": 14972 - }, - { - "epoch": 0.76, - "grad_norm": 0.9557704528886504, - "learning_rate": 2.8401177540014323e-06, - "loss": 0.173, - "step": 14973 - }, - { - "epoch": 0.76, - "grad_norm": 0.920417194706065, - "learning_rate": 2.838968081964416e-06, - "loss": 0.1653, - "step": 14974 - }, - { - "epoch": 0.76, - "grad_norm": 1.1526702838319869, - "learning_rate": 2.8378186041707977e-06, - "loss": 0.1799, - "step": 14975 - }, - { - "epoch": 0.76, - "grad_norm": 1.0949852077610864, - "learning_rate": 2.8366693206517503e-06, - "loss": 0.1584, - "step": 14976 - }, - { - "epoch": 0.76, - "grad_norm": 1.2539237468301876, - "learning_rate": 2.835520231438457e-06, - "loss": 0.1617, - "step": 14977 - }, - { - "epoch": 0.76, - "grad_norm": 1.015585176023755, - "learning_rate": 2.834371336562077e-06, - "loss": 0.1508, - "step": 14978 - }, - { - "epoch": 0.76, - "grad_norm": 1.0288380937596868, - "learning_rate": 2.833222636053784e-06, - "loss": 0.2067, - "step": 14979 - }, - { - "epoch": 0.76, - "grad_norm": 1.4883629291388756, - "learning_rate": 2.8320741299447306e-06, - "loss": 0.1725, - "step": 14980 - }, - { - "epoch": 0.76, - "grad_norm": 0.8305302577491667, - "learning_rate": 2.8309258182660693e-06, - "loss": 0.1578, - "step": 14981 - }, - { - "epoch": 0.76, - "grad_norm": 1.9041171256480882, - "learning_rate": 2.829777701048949e-06, - "loss": 0.1483, - "step": 14982 - }, - { - "epoch": 0.76, - "grad_norm": 1.1448825644487437, - "learning_rate": 2.828629778324514e-06, - "loss": 0.1614, - "step": 14983 - }, - { - "epoch": 0.76, - "grad_norm": 1.3660210089224283, - "learning_rate": 2.827482050123905e-06, - "loss": 0.1702, - "step": 14984 - }, - { - "epoch": 0.76, - "grad_norm": 1.3638108245767226, - "learning_rate": 2.8263345164782473e-06, - "loss": 0.1741, - "step": 14985 - }, - { - "epoch": 0.76, - "grad_norm": 1.2411375513006744, - "learning_rate": 2.8251871774186736e-06, - "loss": 0.1566, - "step": 14986 - }, - { - "epoch": 0.76, - "grad_norm": 0.9342897147302336, - "learning_rate": 2.8240400329762994e-06, - "loss": 0.1805, - "step": 14987 - }, - { - "epoch": 0.76, - "grad_norm": 1.0304143141046265, - "learning_rate": 2.8228930831822486e-06, - "loss": 0.1634, - "step": 14988 - }, - { - "epoch": 0.76, - "grad_norm": 0.775467905274122, - "learning_rate": 2.821746328067625e-06, - "loss": 0.1691, - "step": 14989 - }, - { - "epoch": 0.76, - "grad_norm": 0.9910104080265708, - "learning_rate": 2.82059976766354e-06, - "loss": 0.1616, - "step": 14990 - }, - { - "epoch": 0.76, - "grad_norm": 1.6828123750215387, - "learning_rate": 2.81945340200109e-06, - "loss": 0.1768, - "step": 14991 - }, - { - "epoch": 0.76, - "grad_norm": 1.0531142469858463, - "learning_rate": 2.818307231111371e-06, - "loss": 0.197, - "step": 14992 - }, - { - "epoch": 0.76, - "grad_norm": 1.2279946254505785, - "learning_rate": 2.8171612550254746e-06, - "loss": 0.1581, - "step": 14993 - }, - { - "epoch": 0.76, - "grad_norm": 1.2522759008118522, - "learning_rate": 2.816015473774487e-06, - "loss": 0.1314, - "step": 14994 - }, - { - "epoch": 0.76, - "grad_norm": 10.995273571793144, - "learning_rate": 2.814869887389483e-06, - "loss": 0.172, - "step": 14995 - }, - { - "epoch": 0.76, - "grad_norm": 1.0064004224764271, - "learning_rate": 2.813724495901543e-06, - "loss": 0.1722, - "step": 14996 - }, - { - "epoch": 0.76, - "grad_norm": 1.0028392290053534, - "learning_rate": 2.812579299341731e-06, - "loss": 0.1667, - "step": 14997 - }, - { - "epoch": 0.76, - "grad_norm": 0.9677404251213896, - "learning_rate": 2.811434297741108e-06, - "loss": 0.1836, - "step": 14998 - }, - { - "epoch": 0.76, - "grad_norm": 0.9845281773459438, - "learning_rate": 2.8102894911307367e-06, - "loss": 0.1618, - "step": 14999 - }, - { - "epoch": 0.76, - "grad_norm": 0.8616214642095336, - "learning_rate": 2.809144879541669e-06, - "loss": 0.162, - "step": 15000 - }, - { - "epoch": 0.76, - "grad_norm": 1.6859524501238814, - "learning_rate": 2.808000463004954e-06, - "loss": 0.1528, - "step": 15001 - }, - { - "epoch": 0.76, - "grad_norm": 2.160218092080421, - "learning_rate": 2.8068562415516308e-06, - "loss": 0.1746, - "step": 15002 - }, - { - "epoch": 0.76, - "grad_norm": 1.1401600807234633, - "learning_rate": 2.8057122152127413e-06, - "loss": 0.1789, - "step": 15003 - }, - { - "epoch": 0.76, - "grad_norm": 1.032889345595397, - "learning_rate": 2.804568384019312e-06, - "loss": 0.1571, - "step": 15004 - }, - { - "epoch": 0.76, - "grad_norm": 0.8592553578925399, - "learning_rate": 2.8034247480023735e-06, - "loss": 0.1717, - "step": 15005 - }, - { - "epoch": 0.76, - "grad_norm": 1.1253400311781885, - "learning_rate": 2.8022813071929434e-06, - "loss": 0.1502, - "step": 15006 - }, - { - "epoch": 0.76, - "grad_norm": 0.9738577653061913, - "learning_rate": 2.8011380616220407e-06, - "loss": 0.1847, - "step": 15007 - }, - { - "epoch": 0.76, - "grad_norm": 1.3238903583814505, - "learning_rate": 2.7999950113206732e-06, - "loss": 0.1883, - "step": 15008 - }, - { - "epoch": 0.76, - "grad_norm": 0.8885027709396675, - "learning_rate": 2.798852156319847e-06, - "loss": 0.1562, - "step": 15009 - }, - { - "epoch": 0.76, - "grad_norm": 1.0721210227832059, - "learning_rate": 2.7977094966505624e-06, - "loss": 0.1961, - "step": 15010 - }, - { - "epoch": 0.76, - "grad_norm": 1.1721695304850344, - "learning_rate": 2.7965670323438178e-06, - "loss": 0.179, - "step": 15011 - }, - { - "epoch": 0.76, - "grad_norm": 1.3159953959620643, - "learning_rate": 2.7954247634305965e-06, - "loss": 0.1562, - "step": 15012 - }, - { - "epoch": 0.76, - "grad_norm": 0.9882891878751142, - "learning_rate": 2.7942826899418886e-06, - "loss": 0.1814, - "step": 15013 - }, - { - "epoch": 0.76, - "grad_norm": 1.071625820697916, - "learning_rate": 2.7931408119086668e-06, - "loss": 0.1998, - "step": 15014 - }, - { - "epoch": 0.76, - "grad_norm": 1.1167208938383686, - "learning_rate": 2.79199912936191e-06, - "loss": 0.1751, - "step": 15015 - }, - { - "epoch": 0.76, - "grad_norm": 1.0858374521812366, - "learning_rate": 2.790857642332584e-06, - "loss": 0.1723, - "step": 15016 - }, - { - "epoch": 0.76, - "grad_norm": 0.9124234370105371, - "learning_rate": 2.789716350851649e-06, - "loss": 0.1581, - "step": 15017 - }, - { - "epoch": 0.76, - "grad_norm": 1.3363800719743475, - "learning_rate": 2.7885752549500644e-06, - "loss": 0.1549, - "step": 15018 - }, - { - "epoch": 0.76, - "grad_norm": 1.0255205464470902, - "learning_rate": 2.7874343546587846e-06, - "loss": 0.1846, - "step": 15019 - }, - { - "epoch": 0.76, - "grad_norm": 0.9317828134518765, - "learning_rate": 2.7862936500087566e-06, - "loss": 0.164, - "step": 15020 - }, - { - "epoch": 0.76, - "grad_norm": 0.8632663766608207, - "learning_rate": 2.7851531410309194e-06, - "loss": 0.1741, - "step": 15021 - }, - { - "epoch": 0.76, - "grad_norm": 1.1484226086902631, - "learning_rate": 2.784012827756213e-06, - "loss": 0.1755, - "step": 15022 - }, - { - "epoch": 0.76, - "grad_norm": 0.8400458929925192, - "learning_rate": 2.7828727102155627e-06, - "loss": 0.1696, - "step": 15023 - }, - { - "epoch": 0.76, - "grad_norm": 0.9106478377375347, - "learning_rate": 2.7817327884399014e-06, - "loss": 0.1882, - "step": 15024 - }, - { - "epoch": 0.76, - "grad_norm": 1.3584652549796459, - "learning_rate": 2.7805930624601427e-06, - "loss": 0.147, - "step": 15025 - }, - { - "epoch": 0.76, - "grad_norm": 0.9858682454425882, - "learning_rate": 2.779453532307206e-06, - "loss": 0.1864, - "step": 15026 - }, - { - "epoch": 0.76, - "grad_norm": 1.9690793197636596, - "learning_rate": 2.778314198011999e-06, - "loss": 0.1641, - "step": 15027 - }, - { - "epoch": 0.76, - "grad_norm": 0.9040955497405057, - "learning_rate": 2.7771750596054305e-06, - "loss": 0.1608, - "step": 15028 - }, - { - "epoch": 0.76, - "grad_norm": 1.2710265204702358, - "learning_rate": 2.7760361171183934e-06, - "loss": 0.1689, - "step": 15029 - }, - { - "epoch": 0.76, - "grad_norm": 1.1442824752872833, - "learning_rate": 2.774897370581787e-06, - "loss": 0.1542, - "step": 15030 - }, - { - "epoch": 0.76, - "grad_norm": 1.0493120502774447, - "learning_rate": 2.7737588200264953e-06, - "loss": 0.1577, - "step": 15031 - }, - { - "epoch": 0.76, - "grad_norm": 1.0022591482167709, - "learning_rate": 2.7726204654834067e-06, - "loss": 0.1754, - "step": 15032 - }, - { - "epoch": 0.76, - "grad_norm": 2.844231737981002, - "learning_rate": 2.7714823069833964e-06, - "loss": 0.1736, - "step": 15033 - }, - { - "epoch": 0.76, - "grad_norm": 1.4792458478526416, - "learning_rate": 2.770344344557333e-06, - "loss": 0.1683, - "step": 15034 - }, - { - "epoch": 0.76, - "grad_norm": 1.2239895492597026, - "learning_rate": 2.7692065782360876e-06, - "loss": 0.1632, - "step": 15035 - }, - { - "epoch": 0.76, - "grad_norm": 0.991586953334937, - "learning_rate": 2.7680690080505234e-06, - "loss": 0.1479, - "step": 15036 - }, - { - "epoch": 0.76, - "grad_norm": 1.2104851538907433, - "learning_rate": 2.7669316340314977e-06, - "loss": 0.1594, - "step": 15037 - }, - { - "epoch": 0.76, - "grad_norm": 0.9131732170375813, - "learning_rate": 2.765794456209857e-06, - "loss": 0.1563, - "step": 15038 - }, - { - "epoch": 0.76, - "grad_norm": 2.228509649935169, - "learning_rate": 2.7646574746164533e-06, - "loss": 0.1604, - "step": 15039 - }, - { - "epoch": 0.76, - "grad_norm": 0.9544290582199312, - "learning_rate": 2.763520689282122e-06, - "loss": 0.1653, - "step": 15040 - }, - { - "epoch": 0.76, - "grad_norm": 0.9012892594587696, - "learning_rate": 2.762384100237703e-06, - "loss": 0.1657, - "step": 15041 - }, - { - "epoch": 0.76, - "grad_norm": 1.0570949666178724, - "learning_rate": 2.761247707514021e-06, - "loss": 0.1753, - "step": 15042 - }, - { - "epoch": 0.76, - "grad_norm": 1.0871063071718943, - "learning_rate": 2.7601115111419043e-06, - "loss": 0.1647, - "step": 15043 - }, - { - "epoch": 0.77, - "grad_norm": 0.8618285709761413, - "learning_rate": 2.758975511152171e-06, - "loss": 0.1569, - "step": 15044 - }, - { - "epoch": 0.77, - "grad_norm": 1.547419540473778, - "learning_rate": 2.7578397075756404e-06, - "loss": 0.1648, - "step": 15045 - }, - { - "epoch": 0.77, - "grad_norm": 1.0384641122566953, - "learning_rate": 2.756704100443113e-06, - "loss": 0.1722, - "step": 15046 - }, - { - "epoch": 0.77, - "grad_norm": 0.8121363605603122, - "learning_rate": 2.7555686897853983e-06, - "loss": 0.1751, - "step": 15047 - }, - { - "epoch": 0.77, - "grad_norm": 0.7748124221997982, - "learning_rate": 2.754433475633289e-06, - "loss": 0.1717, - "step": 15048 - }, - { - "epoch": 0.77, - "grad_norm": 1.2459613623991952, - "learning_rate": 2.753298458017585e-06, - "loss": 0.1689, - "step": 15049 - }, - { - "epoch": 0.77, - "grad_norm": 0.7775055960696998, - "learning_rate": 2.7521636369690687e-06, - "loss": 0.1628, - "step": 15050 - }, - { - "epoch": 0.77, - "grad_norm": 1.0486466173882525, - "learning_rate": 2.7510290125185203e-06, - "loss": 0.172, - "step": 15051 - }, - { - "epoch": 0.77, - "grad_norm": 1.0057226924940363, - "learning_rate": 2.7498945846967197e-06, - "loss": 0.1626, - "step": 15052 - }, - { - "epoch": 0.77, - "grad_norm": 0.7624894425040479, - "learning_rate": 2.7487603535344375e-06, - "loss": 0.1775, - "step": 15053 - }, - { - "epoch": 0.77, - "grad_norm": 1.1466543189425418, - "learning_rate": 2.747626319062444e-06, - "loss": 0.1886, - "step": 15054 - }, - { - "epoch": 0.77, - "grad_norm": 0.8899801858769728, - "learning_rate": 2.7464924813114926e-06, - "loss": 0.1682, - "step": 15055 - }, - { - "epoch": 0.77, - "grad_norm": 1.07558876667753, - "learning_rate": 2.7453588403123453e-06, - "loss": 0.1724, - "step": 15056 - }, - { - "epoch": 0.77, - "grad_norm": 1.0008619053830783, - "learning_rate": 2.7442253960957466e-06, - "loss": 0.1681, - "step": 15057 - }, - { - "epoch": 0.77, - "grad_norm": 1.0007558626762345, - "learning_rate": 2.743092148692447e-06, - "loss": 0.1731, - "step": 15058 - }, - { - "epoch": 0.77, - "grad_norm": 1.289803694977765, - "learning_rate": 2.741959098133179e-06, - "loss": 0.1628, - "step": 15059 - }, - { - "epoch": 0.77, - "grad_norm": 1.4436849600973654, - "learning_rate": 2.7408262444486844e-06, - "loss": 0.1721, - "step": 15060 - }, - { - "epoch": 0.77, - "grad_norm": 0.9500321666703038, - "learning_rate": 2.739693587669684e-06, - "loss": 0.1584, - "step": 15061 - }, - { - "epoch": 0.77, - "grad_norm": 0.902479170382834, - "learning_rate": 2.7385611278269054e-06, - "loss": 0.1546, - "step": 15062 - }, - { - "epoch": 0.77, - "grad_norm": 0.9355144421315881, - "learning_rate": 2.737428864951066e-06, - "loss": 0.1703, - "step": 15063 - }, - { - "epoch": 0.77, - "grad_norm": 0.8297513558457941, - "learning_rate": 2.736296799072883e-06, - "loss": 0.1633, - "step": 15064 - }, - { - "epoch": 0.77, - "grad_norm": 0.9871101636958638, - "learning_rate": 2.7351649302230553e-06, - "loss": 0.157, - "step": 15065 - }, - { - "epoch": 0.77, - "grad_norm": 1.167113858545876, - "learning_rate": 2.7340332584322927e-06, - "loss": 0.1538, - "step": 15066 - }, - { - "epoch": 0.77, - "grad_norm": 0.8959136261904793, - "learning_rate": 2.7329017837312875e-06, - "loss": 0.1492, - "step": 15067 - }, - { - "epoch": 0.77, - "grad_norm": 1.0774540888808917, - "learning_rate": 2.7317705061507306e-06, - "loss": 0.166, - "step": 15068 - }, - { - "epoch": 0.77, - "grad_norm": 1.1236019697314357, - "learning_rate": 2.7306394257213078e-06, - "loss": 0.1596, - "step": 15069 - }, - { - "epoch": 0.77, - "grad_norm": 1.474374951856886, - "learning_rate": 2.729508542473702e-06, - "loss": 0.1653, - "step": 15070 - }, - { - "epoch": 0.77, - "grad_norm": 1.7946612833079612, - "learning_rate": 2.72837785643859e-06, - "loss": 0.18, - "step": 15071 - }, - { - "epoch": 0.77, - "grad_norm": 1.8050299194968318, - "learning_rate": 2.727247367646637e-06, - "loss": 0.1701, - "step": 15072 - }, - { - "epoch": 0.77, - "grad_norm": 0.9506576876624673, - "learning_rate": 2.726117076128513e-06, - "loss": 0.1817, - "step": 15073 - }, - { - "epoch": 0.77, - "grad_norm": 1.2612952414807954, - "learning_rate": 2.72498698191487e-06, - "loss": 0.1674, - "step": 15074 - }, - { - "epoch": 0.77, - "grad_norm": 1.3330606571640877, - "learning_rate": 2.72385708503637e-06, - "loss": 0.1822, - "step": 15075 - }, - { - "epoch": 0.77, - "grad_norm": 1.2579774240785415, - "learning_rate": 2.7227273855236535e-06, - "loss": 0.1622, - "step": 15076 - }, - { - "epoch": 0.77, - "grad_norm": 0.8701657202529226, - "learning_rate": 2.721597883407372e-06, - "loss": 0.1991, - "step": 15077 - }, - { - "epoch": 0.77, - "grad_norm": 2.8020324879684333, - "learning_rate": 2.720468578718155e-06, - "loss": 0.1824, - "step": 15078 - }, - { - "epoch": 0.77, - "grad_norm": 1.0579987620792803, - "learning_rate": 2.7193394714866396e-06, - "loss": 0.1741, - "step": 15079 - }, - { - "epoch": 0.77, - "grad_norm": 1.2534880239050814, - "learning_rate": 2.7182105617434516e-06, - "loss": 0.1839, - "step": 15080 - }, - { - "epoch": 0.77, - "grad_norm": 1.070027748514126, - "learning_rate": 2.7170818495192163e-06, - "loss": 0.1591, - "step": 15081 - }, - { - "epoch": 0.77, - "grad_norm": 1.145745299916052, - "learning_rate": 2.7159533348445455e-06, - "loss": 0.1667, - "step": 15082 - }, - { - "epoch": 0.77, - "grad_norm": 1.0809253146050162, - "learning_rate": 2.7148250177500534e-06, - "loss": 0.1813, - "step": 15083 - }, - { - "epoch": 0.77, - "grad_norm": 1.0653540590236674, - "learning_rate": 2.7136968982663427e-06, - "loss": 0.1828, - "step": 15084 - }, - { - "epoch": 0.77, - "grad_norm": 1.5754998020627489, - "learning_rate": 2.7125689764240173e-06, - "loss": 0.1576, - "step": 15085 - }, - { - "epoch": 0.77, - "grad_norm": 1.740473150238454, - "learning_rate": 2.711441252253669e-06, - "loss": 0.1617, - "step": 15086 - }, - { - "epoch": 0.77, - "grad_norm": 1.5122081253911168, - "learning_rate": 2.7103137257858867e-06, - "loss": 0.1749, - "step": 15087 - }, - { - "epoch": 0.77, - "grad_norm": 2.131401733962826, - "learning_rate": 2.7091863970512564e-06, - "loss": 0.1797, - "step": 15088 - }, - { - "epoch": 0.77, - "grad_norm": 1.0242405027137653, - "learning_rate": 2.708059266080356e-06, - "loss": 0.1844, - "step": 15089 - }, - { - "epoch": 0.77, - "grad_norm": 0.8234663848030147, - "learning_rate": 2.7069323329037632e-06, - "loss": 0.1545, - "step": 15090 - }, - { - "epoch": 0.77, - "grad_norm": 1.363966608963242, - "learning_rate": 2.7058055975520405e-06, - "loss": 0.1817, - "step": 15091 - }, - { - "epoch": 0.77, - "grad_norm": 1.4655481719724168, - "learning_rate": 2.704679060055755e-06, - "loss": 0.1859, - "step": 15092 - }, - { - "epoch": 0.77, - "grad_norm": 1.3100547717657192, - "learning_rate": 2.703552720445459e-06, - "loss": 0.1922, - "step": 15093 - }, - { - "epoch": 0.77, - "grad_norm": 0.968730998319128, - "learning_rate": 2.702426578751711e-06, - "loss": 0.1686, - "step": 15094 - }, - { - "epoch": 0.77, - "grad_norm": 2.464987708079324, - "learning_rate": 2.701300635005052e-06, - "loss": 0.1654, - "step": 15095 - }, - { - "epoch": 0.77, - "grad_norm": 1.1518904234958036, - "learning_rate": 2.7001748892360247e-06, - "loss": 0.1543, - "step": 15096 - }, - { - "epoch": 0.77, - "grad_norm": 1.1980341783397113, - "learning_rate": 2.6990493414751652e-06, - "loss": 0.152, - "step": 15097 - }, - { - "epoch": 0.77, - "grad_norm": 1.0117771613701103, - "learning_rate": 2.697923991753009e-06, - "loss": 0.1548, - "step": 15098 - }, - { - "epoch": 0.77, - "grad_norm": 1.3844556881797818, - "learning_rate": 2.696798840100072e-06, - "loss": 0.1779, - "step": 15099 - }, - { - "epoch": 0.77, - "grad_norm": 1.2282852794525476, - "learning_rate": 2.6956738865468832e-06, - "loss": 0.1638, - "step": 15100 - }, - { - "epoch": 0.77, - "grad_norm": 0.9476410124228332, - "learning_rate": 2.6945491311239504e-06, - "loss": 0.1522, - "step": 15101 - }, - { - "epoch": 0.77, - "grad_norm": 0.8403668096547272, - "learning_rate": 2.693424573861787e-06, - "loss": 0.1584, - "step": 15102 - }, - { - "epoch": 0.77, - "grad_norm": 1.0560591291079178, - "learning_rate": 2.692300214790895e-06, - "loss": 0.1808, - "step": 15103 - }, - { - "epoch": 0.77, - "grad_norm": 0.9424701654290392, - "learning_rate": 2.6911760539417698e-06, - "loss": 0.1649, - "step": 15104 - }, - { - "epoch": 0.77, - "grad_norm": 1.54019391223232, - "learning_rate": 2.690052091344907e-06, - "loss": 0.1624, - "step": 15105 - }, - { - "epoch": 0.77, - "grad_norm": 1.1614143182985948, - "learning_rate": 2.688928327030793e-06, - "loss": 0.175, - "step": 15106 - }, - { - "epoch": 0.77, - "grad_norm": 1.184879417210331, - "learning_rate": 2.6878047610299152e-06, - "loss": 0.1578, - "step": 15107 - }, - { - "epoch": 0.77, - "grad_norm": 1.531118791624129, - "learning_rate": 2.686681393372743e-06, - "loss": 0.1774, - "step": 15108 - }, - { - "epoch": 0.77, - "grad_norm": 0.9591536192176581, - "learning_rate": 2.6855582240897536e-06, - "loss": 0.1653, - "step": 15109 - }, - { - "epoch": 0.77, - "grad_norm": 1.0411427116028142, - "learning_rate": 2.6844352532114084e-06, - "loss": 0.1697, - "step": 15110 - }, - { - "epoch": 0.77, - "grad_norm": 0.956752950405443, - "learning_rate": 2.6833124807681722e-06, - "loss": 0.1704, - "step": 15111 - }, - { - "epoch": 0.77, - "grad_norm": 1.111203752316407, - "learning_rate": 2.6821899067904956e-06, - "loss": 0.1614, - "step": 15112 - }, - { - "epoch": 0.77, - "grad_norm": 0.9663017267242348, - "learning_rate": 2.6810675313088343e-06, - "loss": 0.1689, - "step": 15113 - }, - { - "epoch": 0.77, - "grad_norm": 1.0200424958227927, - "learning_rate": 2.6799453543536256e-06, - "loss": 0.1824, - "step": 15114 - }, - { - "epoch": 0.77, - "grad_norm": 1.0230622487319652, - "learning_rate": 2.678823375955314e-06, - "loss": 0.1733, - "step": 15115 - }, - { - "epoch": 0.77, - "grad_norm": 0.9057815915974087, - "learning_rate": 2.677701596144331e-06, - "loss": 0.1597, - "step": 15116 - }, - { - "epoch": 0.77, - "grad_norm": 1.3771708626699875, - "learning_rate": 2.6765800149511088e-06, - "loss": 0.1747, - "step": 15117 - }, - { - "epoch": 0.77, - "grad_norm": 1.141733838833967, - "learning_rate": 2.6754586324060637e-06, - "loss": 0.1738, - "step": 15118 - }, - { - "epoch": 0.77, - "grad_norm": 1.0691457001073312, - "learning_rate": 2.6743374485396212e-06, - "loss": 0.1564, - "step": 15119 - }, - { - "epoch": 0.77, - "grad_norm": 1.082285544415921, - "learning_rate": 2.673216463382189e-06, - "loss": 0.1542, - "step": 15120 - }, - { - "epoch": 0.77, - "grad_norm": 2.0461481068809784, - "learning_rate": 2.67209567696417e-06, - "loss": 0.1884, - "step": 15121 - }, - { - "epoch": 0.77, - "grad_norm": 1.5995436617467997, - "learning_rate": 2.6709750893159705e-06, - "loss": 0.1822, - "step": 15122 - }, - { - "epoch": 0.77, - "grad_norm": 1.1283157762236709, - "learning_rate": 2.6698547004679853e-06, - "loss": 0.1556, - "step": 15123 - }, - { - "epoch": 0.77, - "grad_norm": 1.2046144453330978, - "learning_rate": 2.6687345104506092e-06, - "loss": 0.152, - "step": 15124 - }, - { - "epoch": 0.77, - "grad_norm": 1.1382689858156538, - "learning_rate": 2.6676145192942194e-06, - "loss": 0.1657, - "step": 15125 - }, - { - "epoch": 0.77, - "grad_norm": 0.9200882739660786, - "learning_rate": 2.666494727029203e-06, - "loss": 0.1579, - "step": 15126 - }, - { - "epoch": 0.77, - "grad_norm": 1.029441048435042, - "learning_rate": 2.6653751336859292e-06, - "loss": 0.1602, - "step": 15127 - }, - { - "epoch": 0.77, - "grad_norm": 0.9313604753550834, - "learning_rate": 2.6642557392947722e-06, - "loss": 0.1747, - "step": 15128 - }, - { - "epoch": 0.77, - "grad_norm": 1.6679795449211656, - "learning_rate": 2.663136543886089e-06, - "loss": 0.1757, - "step": 15129 - }, - { - "epoch": 0.77, - "grad_norm": 1.0950242043797351, - "learning_rate": 2.6620175474902444e-06, - "loss": 0.1506, - "step": 15130 - }, - { - "epoch": 0.77, - "grad_norm": 1.1122224424154379, - "learning_rate": 2.660898750137585e-06, - "loss": 0.1647, - "step": 15131 - }, - { - "epoch": 0.77, - "grad_norm": 1.0499158991950026, - "learning_rate": 2.659780151858462e-06, - "loss": 0.1696, - "step": 15132 - }, - { - "epoch": 0.77, - "grad_norm": 0.8062873238846927, - "learning_rate": 2.658661752683217e-06, - "loss": 0.1594, - "step": 15133 - }, - { - "epoch": 0.77, - "grad_norm": 1.2888923453349863, - "learning_rate": 2.657543552642189e-06, - "loss": 0.1772, - "step": 15134 - }, - { - "epoch": 0.77, - "grad_norm": 2.6795161267833896, - "learning_rate": 2.6564255517657044e-06, - "loss": 0.1526, - "step": 15135 - }, - { - "epoch": 0.77, - "grad_norm": 0.9699837684539419, - "learning_rate": 2.655307750084094e-06, - "loss": 0.1669, - "step": 15136 - }, - { - "epoch": 0.77, - "grad_norm": 0.8597368366303423, - "learning_rate": 2.6541901476276767e-06, - "loss": 0.1964, - "step": 15137 - }, - { - "epoch": 0.77, - "grad_norm": 1.2499292435659783, - "learning_rate": 2.653072744426762e-06, - "loss": 0.1752, - "step": 15138 - }, - { - "epoch": 0.77, - "grad_norm": 1.031377290048498, - "learning_rate": 2.6519555405116683e-06, - "loss": 0.1867, - "step": 15139 - }, - { - "epoch": 0.77, - "grad_norm": 0.8232779349067384, - "learning_rate": 2.650838535912692e-06, - "loss": 0.1527, - "step": 15140 - }, - { - "epoch": 0.77, - "grad_norm": 1.0424697846561075, - "learning_rate": 2.6497217306601365e-06, - "loss": 0.1901, - "step": 15141 - }, - { - "epoch": 0.77, - "grad_norm": 1.1272349519057292, - "learning_rate": 2.6486051247842935e-06, - "loss": 0.2006, - "step": 15142 - }, - { - "epoch": 0.77, - "grad_norm": 0.801356139954553, - "learning_rate": 2.647488718315454e-06, - "loss": 0.1599, - "step": 15143 - }, - { - "epoch": 0.77, - "grad_norm": 1.3660574893840298, - "learning_rate": 2.6463725112838968e-06, - "loss": 0.1848, - "step": 15144 - }, - { - "epoch": 0.77, - "grad_norm": 0.8537934209396962, - "learning_rate": 2.645256503719902e-06, - "loss": 0.1759, - "step": 15145 - }, - { - "epoch": 0.77, - "grad_norm": 1.1754110127965345, - "learning_rate": 2.6441406956537376e-06, - "loss": 0.1752, - "step": 15146 - }, - { - "epoch": 0.77, - "grad_norm": 0.7957917440483896, - "learning_rate": 2.643025087115676e-06, - "loss": 0.1595, - "step": 15147 - }, - { - "epoch": 0.77, - "grad_norm": 1.0393249746050452, - "learning_rate": 2.6419096781359698e-06, - "loss": 0.193, - "step": 15148 - }, - { - "epoch": 0.77, - "grad_norm": 1.287822483747699, - "learning_rate": 2.6407944687448804e-06, - "loss": 0.1492, - "step": 15149 - }, - { - "epoch": 0.77, - "grad_norm": 0.7834088365545043, - "learning_rate": 2.639679458972657e-06, - "loss": 0.1561, - "step": 15150 - }, - { - "epoch": 0.77, - "grad_norm": 5.253358728554841, - "learning_rate": 2.6385646488495466e-06, - "loss": 0.1622, - "step": 15151 - }, - { - "epoch": 0.77, - "grad_norm": 1.4703873122591984, - "learning_rate": 2.637450038405782e-06, - "loss": 0.1869, - "step": 15152 - }, - { - "epoch": 0.77, - "grad_norm": 0.8843757015526899, - "learning_rate": 2.6363356276716046e-06, - "loss": 0.1897, - "step": 15153 - }, - { - "epoch": 0.77, - "grad_norm": 2.082280409237539, - "learning_rate": 2.6352214166772363e-06, - "loss": 0.1589, - "step": 15154 - }, - { - "epoch": 0.77, - "grad_norm": 0.9841338075804467, - "learning_rate": 2.634107405452906e-06, - "loss": 0.1781, - "step": 15155 - }, - { - "epoch": 0.77, - "grad_norm": 0.9614771366458215, - "learning_rate": 2.6329935940288286e-06, - "loss": 0.1715, - "step": 15156 - }, - { - "epoch": 0.77, - "grad_norm": 1.2490440362855904, - "learning_rate": 2.6318799824352125e-06, - "loss": 0.1555, - "step": 15157 - }, - { - "epoch": 0.77, - "grad_norm": 1.14649656482353, - "learning_rate": 2.6307665707022678e-06, - "loss": 0.1588, - "step": 15158 - }, - { - "epoch": 0.77, - "grad_norm": 1.8545450488293231, - "learning_rate": 2.629653358860197e-06, - "loss": 0.1686, - "step": 15159 - }, - { - "epoch": 0.77, - "grad_norm": 0.7601598800170505, - "learning_rate": 2.628540346939198e-06, - "loss": 0.1584, - "step": 15160 - }, - { - "epoch": 0.77, - "grad_norm": 0.9209688037861604, - "learning_rate": 2.6274275349694544e-06, - "loss": 0.1715, - "step": 15161 - }, - { - "epoch": 0.77, - "grad_norm": 1.9961783908841748, - "learning_rate": 2.6263149229811592e-06, - "loss": 0.17, - "step": 15162 - }, - { - "epoch": 0.77, - "grad_norm": 1.0209861385594814, - "learning_rate": 2.6252025110044852e-06, - "loss": 0.1623, - "step": 15163 - }, - { - "epoch": 0.77, - "grad_norm": 1.4066217365859481, - "learning_rate": 2.6240902990696126e-06, - "loss": 0.1661, - "step": 15164 - }, - { - "epoch": 0.77, - "grad_norm": 0.7784038130987568, - "learning_rate": 2.6229782872067042e-06, - "loss": 0.183, - "step": 15165 - }, - { - "epoch": 0.77, - "grad_norm": 1.2715081433842295, - "learning_rate": 2.6218664754459267e-06, - "loss": 0.1943, - "step": 15166 - }, - { - "epoch": 0.77, - "grad_norm": 1.08772074648383, - "learning_rate": 2.6207548638174374e-06, - "loss": 0.1765, - "step": 15167 - }, - { - "epoch": 0.77, - "grad_norm": 1.6357270332436424, - "learning_rate": 2.6196434523513916e-06, - "loss": 0.1594, - "step": 15168 - }, - { - "epoch": 0.77, - "grad_norm": 1.4580920469900076, - "learning_rate": 2.6185322410779312e-06, - "loss": 0.1907, - "step": 15169 - }, - { - "epoch": 0.77, - "grad_norm": 1.5075212882245206, - "learning_rate": 2.617421230027205e-06, - "loss": 0.1922, - "step": 15170 - }, - { - "epoch": 0.77, - "grad_norm": 1.527004105493696, - "learning_rate": 2.616310419229341e-06, - "loss": 0.1636, - "step": 15171 - }, - { - "epoch": 0.77, - "grad_norm": 1.4319575105625417, - "learning_rate": 2.615199808714476e-06, - "loss": 0.1791, - "step": 15172 - }, - { - "epoch": 0.77, - "grad_norm": 1.545697916364387, - "learning_rate": 2.614089398512735e-06, - "loss": 0.1862, - "step": 15173 - }, - { - "epoch": 0.77, - "grad_norm": 1.0144438909510543, - "learning_rate": 2.6129791886542323e-06, - "loss": 0.1704, - "step": 15174 - }, - { - "epoch": 0.77, - "grad_norm": 1.1318400246614295, - "learning_rate": 2.6118691791690865e-06, - "loss": 0.1844, - "step": 15175 - }, - { - "epoch": 0.77, - "grad_norm": 2.9289726534512144, - "learning_rate": 2.6107593700874056e-06, - "loss": 0.1372, - "step": 15176 - }, - { - "epoch": 0.77, - "grad_norm": 1.1153456205612922, - "learning_rate": 2.609649761439298e-06, - "loss": 0.1693, - "step": 15177 - }, - { - "epoch": 0.77, - "grad_norm": 0.9627905346417706, - "learning_rate": 2.6085403532548547e-06, - "loss": 0.1608, - "step": 15178 - }, - { - "epoch": 0.77, - "grad_norm": 0.9862059209427578, - "learning_rate": 2.6074311455641756e-06, - "loss": 0.1863, - "step": 15179 - }, - { - "epoch": 0.77, - "grad_norm": 0.9294597573359676, - "learning_rate": 2.60632213839734e-06, - "loss": 0.1724, - "step": 15180 - }, - { - "epoch": 0.77, - "grad_norm": 0.899107516158811, - "learning_rate": 2.6052133317844387e-06, - "loss": 0.164, - "step": 15181 - }, - { - "epoch": 0.77, - "grad_norm": 1.3657017149652035, - "learning_rate": 2.6041047257555384e-06, - "loss": 0.1706, - "step": 15182 - }, - { - "epoch": 0.77, - "grad_norm": 8.480391779689135, - "learning_rate": 2.6029963203407195e-06, - "loss": 0.1561, - "step": 15183 - }, - { - "epoch": 0.77, - "grad_norm": 1.596005320977434, - "learning_rate": 2.6018881155700403e-06, - "loss": 0.1809, - "step": 15184 - }, - { - "epoch": 0.77, - "grad_norm": 3.873634574087102, - "learning_rate": 2.600780111473563e-06, - "loss": 0.1833, - "step": 15185 - }, - { - "epoch": 0.77, - "grad_norm": 4.740690382810113, - "learning_rate": 2.5996723080813433e-06, - "loss": 0.1672, - "step": 15186 - }, - { - "epoch": 0.77, - "grad_norm": 1.025542092337058, - "learning_rate": 2.5985647054234332e-06, - "loss": 0.1653, - "step": 15187 - }, - { - "epoch": 0.77, - "grad_norm": 1.02212783368464, - "learning_rate": 2.597457303529871e-06, - "loss": 0.1616, - "step": 15188 - }, - { - "epoch": 0.77, - "grad_norm": 1.548952516964932, - "learning_rate": 2.5963501024307005e-06, - "loss": 0.189, - "step": 15189 - }, - { - "epoch": 0.77, - "grad_norm": 1.3022275673627424, - "learning_rate": 2.595243102155951e-06, - "loss": 0.1571, - "step": 15190 - }, - { - "epoch": 0.77, - "grad_norm": 1.0470981966654658, - "learning_rate": 2.594136302735648e-06, - "loss": 0.1625, - "step": 15191 - }, - { - "epoch": 0.77, - "grad_norm": 1.5030610793104051, - "learning_rate": 2.5930297041998152e-06, - "loss": 0.1643, - "step": 15192 - }, - { - "epoch": 0.77, - "grad_norm": 0.8238779173041473, - "learning_rate": 2.591923306578471e-06, - "loss": 0.1498, - "step": 15193 - }, - { - "epoch": 0.77, - "grad_norm": 1.0571831014548954, - "learning_rate": 2.590817109901629e-06, - "loss": 0.1685, - "step": 15194 - }, - { - "epoch": 0.77, - "grad_norm": 0.8950212936038725, - "learning_rate": 2.589711114199287e-06, - "loss": 0.1648, - "step": 15195 - }, - { - "epoch": 0.77, - "grad_norm": 1.0896129472676792, - "learning_rate": 2.5886053195014537e-06, - "loss": 0.1841, - "step": 15196 - }, - { - "epoch": 0.77, - "grad_norm": 1.1069006521388893, - "learning_rate": 2.587499725838116e-06, - "loss": 0.1755, - "step": 15197 - }, - { - "epoch": 0.77, - "grad_norm": 1.305433030656861, - "learning_rate": 2.5863943332392703e-06, - "loss": 0.1801, - "step": 15198 - }, - { - "epoch": 0.77, - "grad_norm": 2.0925570729945107, - "learning_rate": 2.5852891417348933e-06, - "loss": 0.148, - "step": 15199 - }, - { - "epoch": 0.77, - "grad_norm": 1.1185001788765023, - "learning_rate": 2.5841841513549703e-06, - "loss": 0.1791, - "step": 15200 - }, - { - "epoch": 0.77, - "grad_norm": 1.0426008164602087, - "learning_rate": 2.583079362129469e-06, - "loss": 0.1624, - "step": 15201 - }, - { - "epoch": 0.77, - "grad_norm": 0.985214885876121, - "learning_rate": 2.5819747740883584e-06, - "loss": 0.176, - "step": 15202 - }, - { - "epoch": 0.77, - "grad_norm": 1.5640615652580436, - "learning_rate": 2.5808703872616014e-06, - "loss": 0.1742, - "step": 15203 - }, - { - "epoch": 0.77, - "grad_norm": 1.2960441256428383, - "learning_rate": 2.5797662016791556e-06, - "loss": 0.153, - "step": 15204 - }, - { - "epoch": 0.77, - "grad_norm": 1.1077933440093228, - "learning_rate": 2.5786622173709695e-06, - "loss": 0.1634, - "step": 15205 - }, - { - "epoch": 0.77, - "grad_norm": 0.9797945234196949, - "learning_rate": 2.5775584343669926e-06, - "loss": 0.1636, - "step": 15206 - }, - { - "epoch": 0.77, - "grad_norm": 0.9108251801362562, - "learning_rate": 2.576454852697161e-06, - "loss": 0.1462, - "step": 15207 - }, - { - "epoch": 0.77, - "grad_norm": 0.8377769081012556, - "learning_rate": 2.5753514723914098e-06, - "loss": 0.1479, - "step": 15208 - }, - { - "epoch": 0.77, - "grad_norm": 1.0428186371239592, - "learning_rate": 2.574248293479671e-06, - "loss": 0.1864, - "step": 15209 - }, - { - "epoch": 0.77, - "grad_norm": 1.4175156519849, - "learning_rate": 2.573145315991864e-06, - "loss": 0.1769, - "step": 15210 - }, - { - "epoch": 0.77, - "grad_norm": 1.1093744001581627, - "learning_rate": 2.5720425399579095e-06, - "loss": 0.1659, - "step": 15211 - }, - { - "epoch": 0.77, - "grad_norm": 1.180261198591606, - "learning_rate": 2.5709399654077204e-06, - "loss": 0.1641, - "step": 15212 - }, - { - "epoch": 0.77, - "grad_norm": 1.423271440155294, - "learning_rate": 2.5698375923712083e-06, - "loss": 0.1562, - "step": 15213 - }, - { - "epoch": 0.77, - "grad_norm": 1.1382874794726652, - "learning_rate": 2.568735420878268e-06, - "loss": 0.1581, - "step": 15214 - }, - { - "epoch": 0.77, - "grad_norm": 1.5057172478871992, - "learning_rate": 2.567633450958801e-06, - "loss": 0.1454, - "step": 15215 - }, - { - "epoch": 0.77, - "grad_norm": 1.190316189265075, - "learning_rate": 2.5665316826426946e-06, - "loss": 0.1836, - "step": 15216 - }, - { - "epoch": 0.77, - "grad_norm": 0.8338288755160992, - "learning_rate": 2.5654301159598384e-06, - "loss": 0.1512, - "step": 15217 - }, - { - "epoch": 0.77, - "grad_norm": 1.0685886262029858, - "learning_rate": 2.564328750940107e-06, - "loss": 0.1578, - "step": 15218 - }, - { - "epoch": 0.77, - "grad_norm": 1.0227346756920659, - "learning_rate": 2.5632275876133794e-06, - "loss": 0.1693, - "step": 15219 - }, - { - "epoch": 0.77, - "grad_norm": 1.0084711151912433, - "learning_rate": 2.562126626009522e-06, - "loss": 0.1963, - "step": 15220 - }, - { - "epoch": 0.77, - "grad_norm": 1.0826122914850413, - "learning_rate": 2.561025866158404e-06, - "loss": 0.1828, - "step": 15221 - }, - { - "epoch": 0.77, - "grad_norm": 1.2022230055717948, - "learning_rate": 2.5599253080898767e-06, - "loss": 0.1574, - "step": 15222 - }, - { - "epoch": 0.77, - "grad_norm": 1.5532502806186688, - "learning_rate": 2.558824951833798e-06, - "loss": 0.1625, - "step": 15223 - }, - { - "epoch": 0.77, - "grad_norm": 2.2549004826501027, - "learning_rate": 2.5577247974200103e-06, - "loss": 0.1733, - "step": 15224 - }, - { - "epoch": 0.77, - "grad_norm": 1.0977056913219776, - "learning_rate": 2.55662484487836e-06, - "loss": 0.1581, - "step": 15225 - }, - { - "epoch": 0.77, - "grad_norm": 1.0576616231310711, - "learning_rate": 2.555525094238682e-06, - "loss": 0.1915, - "step": 15226 - }, - { - "epoch": 0.77, - "grad_norm": 0.8492059396337894, - "learning_rate": 2.5544255455308032e-06, - "loss": 0.1763, - "step": 15227 - }, - { - "epoch": 0.77, - "grad_norm": 0.9742120885664564, - "learning_rate": 2.5533261987845525e-06, - "loss": 0.1606, - "step": 15228 - }, - { - "epoch": 0.77, - "grad_norm": 1.0710840678114584, - "learning_rate": 2.552227054029749e-06, - "loss": 0.1594, - "step": 15229 - }, - { - "epoch": 0.77, - "grad_norm": 1.1422554132137888, - "learning_rate": 2.5511281112962096e-06, - "loss": 0.1949, - "step": 15230 - }, - { - "epoch": 0.77, - "grad_norm": 0.9458474201377018, - "learning_rate": 2.550029370613738e-06, - "loss": 0.1586, - "step": 15231 - }, - { - "epoch": 0.77, - "grad_norm": 1.362543961001248, - "learning_rate": 2.548930832012143e-06, - "loss": 0.1677, - "step": 15232 - }, - { - "epoch": 0.77, - "grad_norm": 1.4492155243197786, - "learning_rate": 2.5478324955212186e-06, - "loss": 0.1613, - "step": 15233 - }, - { - "epoch": 0.77, - "grad_norm": 1.5213788798995165, - "learning_rate": 2.5467343611707607e-06, - "loss": 0.1697, - "step": 15234 - }, - { - "epoch": 0.77, - "grad_norm": 0.8861019921569067, - "learning_rate": 2.545636428990551e-06, - "loss": 0.1524, - "step": 15235 - }, - { - "epoch": 0.77, - "grad_norm": 1.0214376716215812, - "learning_rate": 2.5445386990103773e-06, - "loss": 0.1632, - "step": 15236 - }, - { - "epoch": 0.77, - "grad_norm": 2.4462903199411787, - "learning_rate": 2.5434411712600095e-06, - "loss": 0.1953, - "step": 15237 - }, - { - "epoch": 0.77, - "grad_norm": 1.088113114275449, - "learning_rate": 2.542343845769222e-06, - "loss": 0.1591, - "step": 15238 - }, - { - "epoch": 0.77, - "grad_norm": 0.9099559658873935, - "learning_rate": 2.5412467225677774e-06, - "loss": 0.1473, - "step": 15239 - }, - { - "epoch": 0.77, - "grad_norm": 1.2840902837839696, - "learning_rate": 2.540149801685441e-06, - "loss": 0.1561, - "step": 15240 - }, - { - "epoch": 0.78, - "grad_norm": 1.2896921273460544, - "learning_rate": 2.5390530831519587e-06, - "loss": 0.1589, - "step": 15241 - }, - { - "epoch": 0.78, - "grad_norm": 1.233985557375335, - "learning_rate": 2.5379565669970864e-06, - "loss": 0.1873, - "step": 15242 - }, - { - "epoch": 0.78, - "grad_norm": 0.9574844819931793, - "learning_rate": 2.5368602532505637e-06, - "loss": 0.1647, - "step": 15243 - }, - { - "epoch": 0.78, - "grad_norm": 1.1470495999146328, - "learning_rate": 2.535764141942124e-06, - "loss": 0.1885, - "step": 15244 - }, - { - "epoch": 0.78, - "grad_norm": 0.7780938539703632, - "learning_rate": 2.534668233101505e-06, - "loss": 0.1521, - "step": 15245 - }, - { - "epoch": 0.78, - "grad_norm": 1.4385410709228255, - "learning_rate": 2.533572526758431e-06, - "loss": 0.1688, - "step": 15246 - }, - { - "epoch": 0.78, - "grad_norm": 1.0239198319044815, - "learning_rate": 2.5324770229426276e-06, - "loss": 0.1568, - "step": 15247 - }, - { - "epoch": 0.78, - "grad_norm": 2.158525030798928, - "learning_rate": 2.5313817216838034e-06, - "loss": 0.1787, - "step": 15248 - }, - { - "epoch": 0.78, - "grad_norm": 1.4164698569917662, - "learning_rate": 2.530286623011675e-06, - "loss": 0.1805, - "step": 15249 - }, - { - "epoch": 0.78, - "grad_norm": 1.924334069260275, - "learning_rate": 2.5291917269559408e-06, - "loss": 0.175, - "step": 15250 - }, - { - "epoch": 0.78, - "grad_norm": 0.894267709513381, - "learning_rate": 2.528097033546305e-06, - "loss": 0.1582, - "step": 15251 - }, - { - "epoch": 0.78, - "grad_norm": 0.8781609591285007, - "learning_rate": 2.527002542812457e-06, - "loss": 0.1582, - "step": 15252 - }, - { - "epoch": 0.78, - "grad_norm": 1.542183566342674, - "learning_rate": 2.5259082547840907e-06, - "loss": 0.1541, - "step": 15253 - }, - { - "epoch": 0.78, - "grad_norm": 5.65101598504308, - "learning_rate": 2.524814169490881e-06, - "loss": 0.1781, - "step": 15254 - }, - { - "epoch": 0.78, - "grad_norm": 0.8947350917702382, - "learning_rate": 2.52372028696251e-06, - "loss": 0.1769, - "step": 15255 - }, - { - "epoch": 0.78, - "grad_norm": 0.9696609784115476, - "learning_rate": 2.5226266072286475e-06, - "loss": 0.1721, - "step": 15256 - }, - { - "epoch": 0.78, - "grad_norm": 1.4945032623101626, - "learning_rate": 2.521533130318965e-06, - "loss": 0.1764, - "step": 15257 - }, - { - "epoch": 0.78, - "grad_norm": 1.0840645808247387, - "learning_rate": 2.520439856263115e-06, - "loss": 0.1657, - "step": 15258 - }, - { - "epoch": 0.78, - "grad_norm": 4.211028713539488, - "learning_rate": 2.5193467850907583e-06, - "loss": 0.1807, - "step": 15259 - }, - { - "epoch": 0.78, - "grad_norm": 0.8345107762432861, - "learning_rate": 2.5182539168315435e-06, - "loss": 0.1776, - "step": 15260 - }, - { - "epoch": 0.78, - "grad_norm": 1.1090353856444384, - "learning_rate": 2.517161251515111e-06, - "loss": 0.1765, - "step": 15261 - }, - { - "epoch": 0.78, - "grad_norm": 0.9632084661239485, - "learning_rate": 2.516068789171102e-06, - "loss": 0.1592, - "step": 15262 - }, - { - "epoch": 0.78, - "grad_norm": 2.075643005645192, - "learning_rate": 2.5149765298291508e-06, - "loss": 0.1472, - "step": 15263 - }, - { - "epoch": 0.78, - "grad_norm": 0.9688207506122695, - "learning_rate": 2.513884473518885e-06, - "loss": 0.1789, - "step": 15264 - }, - { - "epoch": 0.78, - "grad_norm": 0.9284611694669284, - "learning_rate": 2.512792620269924e-06, - "loss": 0.153, - "step": 15265 - }, - { - "epoch": 0.78, - "grad_norm": 1.2757413064160035, - "learning_rate": 2.5117009701118888e-06, - "loss": 0.1467, - "step": 15266 - }, - { - "epoch": 0.78, - "grad_norm": 1.9680738544022398, - "learning_rate": 2.5106095230743844e-06, - "loss": 0.1595, - "step": 15267 - }, - { - "epoch": 0.78, - "grad_norm": 1.0877239157747474, - "learning_rate": 2.5095182791870234e-06, - "loss": 0.1698, - "step": 15268 - }, - { - "epoch": 0.78, - "grad_norm": 1.2268524665114693, - "learning_rate": 2.5084272384793985e-06, - "loss": 0.1561, - "step": 15269 - }, - { - "epoch": 0.78, - "grad_norm": 1.36790180905005, - "learning_rate": 2.5073364009811107e-06, - "loss": 0.1757, - "step": 15270 - }, - { - "epoch": 0.78, - "grad_norm": 1.0109270739079395, - "learning_rate": 2.5062457667217433e-06, - "loss": 0.161, - "step": 15271 - }, - { - "epoch": 0.78, - "grad_norm": 1.7892760497472147, - "learning_rate": 2.505155335730883e-06, - "loss": 0.1766, - "step": 15272 - }, - { - "epoch": 0.78, - "grad_norm": 1.6048470029419701, - "learning_rate": 2.5040651080381084e-06, - "loss": 0.1753, - "step": 15273 - }, - { - "epoch": 0.78, - "grad_norm": 1.0007696327568976, - "learning_rate": 2.5029750836729926e-06, - "loss": 0.1457, - "step": 15274 - }, - { - "epoch": 0.78, - "grad_norm": 2.0214733625354664, - "learning_rate": 2.501885262665099e-06, - "loss": 0.1636, - "step": 15275 - }, - { - "epoch": 0.78, - "grad_norm": 1.5284937432774757, - "learning_rate": 2.500795645043994e-06, - "loss": 0.1556, - "step": 15276 - }, - { - "epoch": 0.78, - "grad_norm": 1.1343197681393309, - "learning_rate": 2.4997062308392304e-06, - "loss": 0.1648, - "step": 15277 - }, - { - "epoch": 0.78, - "grad_norm": 1.261112057405962, - "learning_rate": 2.498617020080356e-06, - "loss": 0.174, - "step": 15278 - }, - { - "epoch": 0.78, - "grad_norm": 1.3837587883415603, - "learning_rate": 2.4975280127969214e-06, - "loss": 0.1876, - "step": 15279 - }, - { - "epoch": 0.78, - "grad_norm": 0.9992224350374982, - "learning_rate": 2.496439209018461e-06, - "loss": 0.1656, - "step": 15280 - }, - { - "epoch": 0.78, - "grad_norm": 1.1779324775338382, - "learning_rate": 2.4953506087745107e-06, - "loss": 0.1844, - "step": 15281 - }, - { - "epoch": 0.78, - "grad_norm": 0.9112312928556008, - "learning_rate": 2.494262212094598e-06, - "loss": 0.1557, - "step": 15282 - }, - { - "epoch": 0.78, - "grad_norm": 0.9344392435604768, - "learning_rate": 2.4931740190082497e-06, - "loss": 0.1636, - "step": 15283 - }, - { - "epoch": 0.78, - "grad_norm": 0.8794078901771586, - "learning_rate": 2.4920860295449787e-06, - "loss": 0.1581, - "step": 15284 - }, - { - "epoch": 0.78, - "grad_norm": 1.134697289302735, - "learning_rate": 2.4909982437342993e-06, - "loss": 0.181, - "step": 15285 - }, - { - "epoch": 0.78, - "grad_norm": 1.0548278226712828, - "learning_rate": 2.4899106616057155e-06, - "loss": 0.1757, - "step": 15286 - }, - { - "epoch": 0.78, - "grad_norm": 9.676475877009159, - "learning_rate": 2.4888232831887304e-06, - "loss": 0.1624, - "step": 15287 - }, - { - "epoch": 0.78, - "grad_norm": 0.7828993584768532, - "learning_rate": 2.487736108512836e-06, - "loss": 0.1836, - "step": 15288 - }, - { - "epoch": 0.78, - "grad_norm": 1.020287114138541, - "learning_rate": 2.486649137607524e-06, - "loss": 0.1586, - "step": 15289 - }, - { - "epoch": 0.78, - "grad_norm": 1.2777417930524249, - "learning_rate": 2.485562370502279e-06, - "loss": 0.1821, - "step": 15290 - }, - { - "epoch": 0.78, - "grad_norm": 1.0818007196529444, - "learning_rate": 2.4844758072265806e-06, - "loss": 0.1737, - "step": 15291 - }, - { - "epoch": 0.78, - "grad_norm": 1.4445018677484833, - "learning_rate": 2.4833894478098983e-06, - "loss": 0.1685, - "step": 15292 - }, - { - "epoch": 0.78, - "grad_norm": 0.980705724826226, - "learning_rate": 2.4823032922817045e-06, - "loss": 0.1708, - "step": 15293 - }, - { - "epoch": 0.78, - "grad_norm": 0.9967433356885528, - "learning_rate": 2.481217340671457e-06, - "loss": 0.1614, - "step": 15294 - }, - { - "epoch": 0.78, - "grad_norm": 0.9265582135084066, - "learning_rate": 2.4801315930086147e-06, - "loss": 0.1866, - "step": 15295 - }, - { - "epoch": 0.78, - "grad_norm": 1.4209191915362267, - "learning_rate": 2.479046049322629e-06, - "loss": 0.1861, - "step": 15296 - }, - { - "epoch": 0.78, - "grad_norm": 0.8959157659091018, - "learning_rate": 2.4779607096429403e-06, - "loss": 0.1587, - "step": 15297 - }, - { - "epoch": 0.78, - "grad_norm": 1.0865709667746095, - "learning_rate": 2.4768755739989925e-06, - "loss": 0.1525, - "step": 15298 - }, - { - "epoch": 0.78, - "grad_norm": 0.992508724360173, - "learning_rate": 2.475790642420219e-06, - "loss": 0.134, - "step": 15299 - }, - { - "epoch": 0.78, - "grad_norm": 1.3318596943250995, - "learning_rate": 2.474705914936053e-06, - "loss": 0.1858, - "step": 15300 - }, - { - "epoch": 0.78, - "grad_norm": 0.9430394381293802, - "learning_rate": 2.473621391575911e-06, - "loss": 0.1697, - "step": 15301 - }, - { - "epoch": 0.78, - "grad_norm": 0.8667635085832349, - "learning_rate": 2.4725370723692164e-06, - "loss": 0.1514, - "step": 15302 - }, - { - "epoch": 0.78, - "grad_norm": 1.5783046632381894, - "learning_rate": 2.471452957345376e-06, - "loss": 0.166, - "step": 15303 - }, - { - "epoch": 0.78, - "grad_norm": 0.949053055901867, - "learning_rate": 2.4703690465338025e-06, - "loss": 0.1693, - "step": 15304 - }, - { - "epoch": 0.78, - "grad_norm": 1.099586983786691, - "learning_rate": 2.469285339963892e-06, - "loss": 0.1668, - "step": 15305 - }, - { - "epoch": 0.78, - "grad_norm": 1.146907604484705, - "learning_rate": 2.468201837665043e-06, - "loss": 0.1638, - "step": 15306 - }, - { - "epoch": 0.78, - "grad_norm": 1.6703895998487162, - "learning_rate": 2.467118539666643e-06, - "loss": 0.1559, - "step": 15307 - }, - { - "epoch": 0.78, - "grad_norm": 1.0693170607150535, - "learning_rate": 2.4660354459980775e-06, - "loss": 0.1422, - "step": 15308 - }, - { - "epoch": 0.78, - "grad_norm": 0.870622931041626, - "learning_rate": 2.4649525566887267e-06, - "loss": 0.1573, - "step": 15309 - }, - { - "epoch": 0.78, - "grad_norm": 1.496630248263848, - "learning_rate": 2.4638698717679653e-06, - "loss": 0.1695, - "step": 15310 - }, - { - "epoch": 0.78, - "grad_norm": 1.133075124349076, - "learning_rate": 2.462787391265157e-06, - "loss": 0.1695, - "step": 15311 - }, - { - "epoch": 0.78, - "grad_norm": 1.0358289180935836, - "learning_rate": 2.4617051152096696e-06, - "loss": 0.1568, - "step": 15312 - }, - { - "epoch": 0.78, - "grad_norm": 1.1225526395435053, - "learning_rate": 2.4606230436308554e-06, - "loss": 0.1659, - "step": 15313 - }, - { - "epoch": 0.78, - "grad_norm": 1.0468975380099528, - "learning_rate": 2.4595411765580645e-06, - "loss": 0.1858, - "step": 15314 - }, - { - "epoch": 0.78, - "grad_norm": 0.9816839310890371, - "learning_rate": 2.4584595140206457e-06, - "loss": 0.1598, - "step": 15315 - }, - { - "epoch": 0.78, - "grad_norm": 1.085821885409933, - "learning_rate": 2.4573780560479387e-06, - "loss": 0.1631, - "step": 15316 - }, - { - "epoch": 0.78, - "grad_norm": 1.0087642673161445, - "learning_rate": 2.4562968026692803e-06, - "loss": 0.1661, - "step": 15317 - }, - { - "epoch": 0.78, - "grad_norm": 1.0371578322321766, - "learning_rate": 2.4552157539139944e-06, - "loss": 0.1793, - "step": 15318 - }, - { - "epoch": 0.78, - "grad_norm": 0.9130220149814539, - "learning_rate": 2.45413490981141e-06, - "loss": 0.171, - "step": 15319 - }, - { - "epoch": 0.78, - "grad_norm": 1.290014807266559, - "learning_rate": 2.45305427039084e-06, - "loss": 0.1791, - "step": 15320 - }, - { - "epoch": 0.78, - "grad_norm": 0.9635165312055891, - "learning_rate": 2.4519738356816015e-06, - "loss": 0.1666, - "step": 15321 - }, - { - "epoch": 0.78, - "grad_norm": 1.2303516824006075, - "learning_rate": 2.450893605712996e-06, - "loss": 0.1817, - "step": 15322 - }, - { - "epoch": 0.78, - "grad_norm": 1.3058539158515092, - "learning_rate": 2.449813580514332e-06, - "loss": 0.1724, - "step": 15323 - }, - { - "epoch": 0.78, - "grad_norm": 0.9524281611944755, - "learning_rate": 2.4487337601148975e-06, - "loss": 0.1874, - "step": 15324 - }, - { - "epoch": 0.78, - "grad_norm": 1.073158181893844, - "learning_rate": 2.447654144543986e-06, - "loss": 0.1632, - "step": 15325 - }, - { - "epoch": 0.78, - "grad_norm": 0.9513488743035756, - "learning_rate": 2.446574733830882e-06, - "loss": 0.1662, - "step": 15326 - }, - { - "epoch": 0.78, - "grad_norm": 0.8947551127078637, - "learning_rate": 2.4454955280048688e-06, - "loss": 0.1641, - "step": 15327 - }, - { - "epoch": 0.78, - "grad_norm": 0.8500710551120902, - "learning_rate": 2.4444165270952126e-06, - "loss": 0.1786, - "step": 15328 - }, - { - "epoch": 0.78, - "grad_norm": 1.6713358847496242, - "learning_rate": 2.4433377311311878e-06, - "loss": 0.1814, - "step": 15329 - }, - { - "epoch": 0.78, - "grad_norm": 1.3139186587490201, - "learning_rate": 2.4422591401420537e-06, - "loss": 0.1675, - "step": 15330 - }, - { - "epoch": 0.78, - "grad_norm": 1.3539006655141135, - "learning_rate": 2.4411807541570643e-06, - "loss": 0.198, - "step": 15331 - }, - { - "epoch": 0.78, - "grad_norm": 0.8974215267445775, - "learning_rate": 2.440102573205477e-06, - "loss": 0.1586, - "step": 15332 - }, - { - "epoch": 0.78, - "grad_norm": 1.1506955324100798, - "learning_rate": 2.4390245973165316e-06, - "loss": 0.1703, - "step": 15333 - }, - { - "epoch": 0.78, - "grad_norm": 1.2501650244300122, - "learning_rate": 2.4379468265194707e-06, - "loss": 0.1611, - "step": 15334 - }, - { - "epoch": 0.78, - "grad_norm": 1.033876510720954, - "learning_rate": 2.4368692608435294e-06, - "loss": 0.1792, - "step": 15335 - }, - { - "epoch": 0.78, - "grad_norm": 0.7951661193570492, - "learning_rate": 2.4357919003179396e-06, - "loss": 0.1471, - "step": 15336 - }, - { - "epoch": 0.78, - "grad_norm": 0.9520345764506051, - "learning_rate": 2.434714744971919e-06, - "loss": 0.1776, - "step": 15337 - }, - { - "epoch": 0.78, - "grad_norm": 1.0833822675819067, - "learning_rate": 2.4336377948346912e-06, - "loss": 0.1885, - "step": 15338 - }, - { - "epoch": 0.78, - "grad_norm": 2.1029213485645695, - "learning_rate": 2.432561049935462e-06, - "loss": 0.1834, - "step": 15339 - }, - { - "epoch": 0.78, - "grad_norm": 1.4361044589948972, - "learning_rate": 2.4314845103034456e-06, - "loss": 0.176, - "step": 15340 - }, - { - "epoch": 0.78, - "grad_norm": 0.8442285067210624, - "learning_rate": 2.4304081759678357e-06, - "loss": 0.1595, - "step": 15341 - }, - { - "epoch": 0.78, - "grad_norm": 0.9579393533005156, - "learning_rate": 2.429332046957832e-06, - "loss": 0.1709, - "step": 15342 - }, - { - "epoch": 0.78, - "grad_norm": 1.9511031711141813, - "learning_rate": 2.4282561233026236e-06, - "loss": 0.2082, - "step": 15343 - }, - { - "epoch": 0.78, - "grad_norm": 1.189125025522632, - "learning_rate": 2.4271804050313984e-06, - "loss": 0.1759, - "step": 15344 - }, - { - "epoch": 0.78, - "grad_norm": 0.8696093166445392, - "learning_rate": 2.42610489217333e-06, - "loss": 0.153, - "step": 15345 - }, - { - "epoch": 0.78, - "grad_norm": 0.7661160288872305, - "learning_rate": 2.4250295847575967e-06, - "loss": 0.1701, - "step": 15346 - }, - { - "epoch": 0.78, - "grad_norm": 1.0094608338108795, - "learning_rate": 2.4239544828133632e-06, - "loss": 0.1876, - "step": 15347 - }, - { - "epoch": 0.78, - "grad_norm": 1.1947365085635253, - "learning_rate": 2.422879586369791e-06, - "loss": 0.172, - "step": 15348 - }, - { - "epoch": 0.78, - "grad_norm": 1.02607706559597, - "learning_rate": 2.421804895456039e-06, - "loss": 0.1691, - "step": 15349 - }, - { - "epoch": 0.78, - "grad_norm": 1.1105200321284465, - "learning_rate": 2.420730410101255e-06, - "loss": 0.1751, - "step": 15350 - }, - { - "epoch": 0.78, - "grad_norm": 2.1219152032536965, - "learning_rate": 2.419656130334588e-06, - "loss": 0.1913, - "step": 15351 - }, - { - "epoch": 0.78, - "grad_norm": 1.4192179540898853, - "learning_rate": 2.4185820561851747e-06, - "loss": 0.1603, - "step": 15352 - }, - { - "epoch": 0.78, - "grad_norm": 1.6876340282880435, - "learning_rate": 2.417508187682156e-06, - "loss": 0.1619, - "step": 15353 - }, - { - "epoch": 0.78, - "grad_norm": 0.9298730731346014, - "learning_rate": 2.4164345248546517e-06, - "loss": 0.1627, - "step": 15354 - }, - { - "epoch": 0.78, - "grad_norm": 1.0761477246483009, - "learning_rate": 2.415361067731793e-06, - "loss": 0.1488, - "step": 15355 - }, - { - "epoch": 0.78, - "grad_norm": 0.9804762037101074, - "learning_rate": 2.41428781634269e-06, - "loss": 0.1548, - "step": 15356 - }, - { - "epoch": 0.78, - "grad_norm": 1.0872119575860812, - "learning_rate": 2.413214770716462e-06, - "loss": 0.1627, - "step": 15357 - }, - { - "epoch": 0.78, - "grad_norm": 0.9356716591209069, - "learning_rate": 2.412141930882208e-06, - "loss": 0.1662, - "step": 15358 - }, - { - "epoch": 0.78, - "grad_norm": 1.5485828606207372, - "learning_rate": 2.4110692968690364e-06, - "loss": 0.1539, - "step": 15359 - }, - { - "epoch": 0.78, - "grad_norm": 1.2746909556741446, - "learning_rate": 2.409996868706036e-06, - "loss": 0.1875, - "step": 15360 - }, - { - "epoch": 0.78, - "grad_norm": 1.020526020568585, - "learning_rate": 2.4089246464222995e-06, - "loss": 0.1603, - "step": 15361 - }, - { - "epoch": 0.78, - "grad_norm": 1.1005090547533583, - "learning_rate": 2.4078526300469097e-06, - "loss": 0.1756, - "step": 15362 - }, - { - "epoch": 0.78, - "grad_norm": 1.0681842201569682, - "learning_rate": 2.4067808196089493e-06, - "loss": 0.1727, - "step": 15363 - }, - { - "epoch": 0.78, - "grad_norm": 1.3422597996378893, - "learning_rate": 2.4057092151374885e-06, - "loss": 0.1712, - "step": 15364 - }, - { - "epoch": 0.78, - "grad_norm": 0.9132433065779902, - "learning_rate": 2.40463781666159e-06, - "loss": 0.1484, - "step": 15365 - }, - { - "epoch": 0.78, - "grad_norm": 1.0703530978743458, - "learning_rate": 2.403566624210324e-06, - "loss": 0.1645, - "step": 15366 - }, - { - "epoch": 0.78, - "grad_norm": 1.0893874090214328, - "learning_rate": 2.4024956378127396e-06, - "loss": 0.1551, - "step": 15367 - }, - { - "epoch": 0.78, - "grad_norm": 0.896536719043762, - "learning_rate": 2.401424857497889e-06, - "loss": 0.1692, - "step": 15368 - }, - { - "epoch": 0.78, - "grad_norm": 1.1534646592762958, - "learning_rate": 2.400354283294819e-06, - "loss": 0.1708, - "step": 15369 - }, - { - "epoch": 0.78, - "grad_norm": 1.20144166772448, - "learning_rate": 2.399283915232571e-06, - "loss": 0.1728, - "step": 15370 - }, - { - "epoch": 0.78, - "grad_norm": 0.8001018929783066, - "learning_rate": 2.398213753340174e-06, - "loss": 0.159, - "step": 15371 - }, - { - "epoch": 0.78, - "grad_norm": 1.1613880996230987, - "learning_rate": 2.3971437976466604e-06, - "loss": 0.1494, - "step": 15372 - }, - { - "epoch": 0.78, - "grad_norm": 1.0741129775209934, - "learning_rate": 2.3960740481810475e-06, - "loss": 0.1668, - "step": 15373 - }, - { - "epoch": 0.78, - "grad_norm": 0.998500376985148, - "learning_rate": 2.3950045049723593e-06, - "loss": 0.1767, - "step": 15374 - }, - { - "epoch": 0.78, - "grad_norm": 4.446899401008942, - "learning_rate": 2.3939351680495994e-06, - "loss": 0.1734, - "step": 15375 - }, - { - "epoch": 0.78, - "grad_norm": 0.9632523661077028, - "learning_rate": 2.392866037441781e-06, - "loss": 0.1463, - "step": 15376 - }, - { - "epoch": 0.78, - "grad_norm": 1.0273977255365927, - "learning_rate": 2.3917971131778982e-06, - "loss": 0.1737, - "step": 15377 - }, - { - "epoch": 0.78, - "grad_norm": 1.2251126007220232, - "learning_rate": 2.3907283952869485e-06, - "loss": 0.1656, - "step": 15378 - }, - { - "epoch": 0.78, - "grad_norm": 1.41623658769232, - "learning_rate": 2.389659883797921e-06, - "loss": 0.1625, - "step": 15379 - }, - { - "epoch": 0.78, - "grad_norm": 1.2970703730778923, - "learning_rate": 2.3885915787398016e-06, - "loss": 0.1631, - "step": 15380 - }, - { - "epoch": 0.78, - "grad_norm": 1.010114637458024, - "learning_rate": 2.3875234801415626e-06, - "loss": 0.1665, - "step": 15381 - }, - { - "epoch": 0.78, - "grad_norm": 1.2164200120028743, - "learning_rate": 2.3864555880321828e-06, - "loss": 0.1599, - "step": 15382 - }, - { - "epoch": 0.78, - "grad_norm": 1.4672333643271673, - "learning_rate": 2.3853879024406244e-06, - "loss": 0.1615, - "step": 15383 - }, - { - "epoch": 0.78, - "grad_norm": 1.120252041381067, - "learning_rate": 2.3843204233958463e-06, - "loss": 0.1483, - "step": 15384 - }, - { - "epoch": 0.78, - "grad_norm": 1.0045893183847037, - "learning_rate": 2.3832531509268076e-06, - "loss": 0.1928, - "step": 15385 - }, - { - "epoch": 0.78, - "grad_norm": 1.1425096056549606, - "learning_rate": 2.382186085062457e-06, - "loss": 0.1675, - "step": 15386 - }, - { - "epoch": 0.78, - "grad_norm": 1.3341631701663308, - "learning_rate": 2.3811192258317416e-06, - "loss": 0.1693, - "step": 15387 - }, - { - "epoch": 0.78, - "grad_norm": 1.257601081796507, - "learning_rate": 2.3800525732635946e-06, - "loss": 0.1686, - "step": 15388 - }, - { - "epoch": 0.78, - "grad_norm": 1.000073423100724, - "learning_rate": 2.3789861273869553e-06, - "loss": 0.1693, - "step": 15389 - }, - { - "epoch": 0.78, - "grad_norm": 1.0257732649799796, - "learning_rate": 2.3779198882307443e-06, - "loss": 0.1654, - "step": 15390 - }, - { - "epoch": 0.78, - "grad_norm": 1.5535326206303897, - "learning_rate": 2.3768538558238895e-06, - "loss": 0.1524, - "step": 15391 - }, - { - "epoch": 0.78, - "grad_norm": 1.2975656393098935, - "learning_rate": 2.375788030195303e-06, - "loss": 0.1834, - "step": 15392 - }, - { - "epoch": 0.78, - "grad_norm": 2.3739833482123904, - "learning_rate": 2.3747224113738985e-06, - "loss": 0.146, - "step": 15393 - }, - { - "epoch": 0.78, - "grad_norm": 0.9476331047559899, - "learning_rate": 2.373656999388576e-06, - "loss": 0.1535, - "step": 15394 - }, - { - "epoch": 0.78, - "grad_norm": 1.4355877710988458, - "learning_rate": 2.3725917942682397e-06, - "loss": 0.1889, - "step": 15395 - }, - { - "epoch": 0.78, - "grad_norm": 0.9183881432091104, - "learning_rate": 2.3715267960417798e-06, - "loss": 0.1549, - "step": 15396 - }, - { - "epoch": 0.78, - "grad_norm": 1.3000475050786378, - "learning_rate": 2.370462004738091e-06, - "loss": 0.1591, - "step": 15397 - }, - { - "epoch": 0.78, - "grad_norm": 0.8743533785508302, - "learning_rate": 2.3693974203860472e-06, - "loss": 0.1637, - "step": 15398 - }, - { - "epoch": 0.78, - "grad_norm": 1.416431709572786, - "learning_rate": 2.3683330430145333e-06, - "loss": 0.1899, - "step": 15399 - }, - { - "epoch": 0.78, - "grad_norm": 1.5467261188083323, - "learning_rate": 2.367268872652416e-06, - "loss": 0.156, - "step": 15400 - }, - { - "epoch": 0.78, - "grad_norm": 0.9294656354998612, - "learning_rate": 2.366204909328559e-06, - "loss": 0.1539, - "step": 15401 - }, - { - "epoch": 0.78, - "grad_norm": 0.8225038847893258, - "learning_rate": 2.3651411530718272e-06, - "loss": 0.1698, - "step": 15402 - }, - { - "epoch": 0.78, - "grad_norm": 1.262942076782041, - "learning_rate": 2.36407760391107e-06, - "loss": 0.1768, - "step": 15403 - }, - { - "epoch": 0.78, - "grad_norm": 1.0086611381985109, - "learning_rate": 2.3630142618751405e-06, - "loss": 0.1811, - "step": 15404 - }, - { - "epoch": 0.78, - "grad_norm": 0.9710681600261067, - "learning_rate": 2.3619511269928784e-06, - "loss": 0.1837, - "step": 15405 - }, - { - "epoch": 0.78, - "grad_norm": 1.3854709074678964, - "learning_rate": 2.360888199293128e-06, - "loss": 0.1789, - "step": 15406 - }, - { - "epoch": 0.78, - "grad_norm": 0.9299119097883016, - "learning_rate": 2.3598254788047136e-06, - "loss": 0.1539, - "step": 15407 - }, - { - "epoch": 0.78, - "grad_norm": 2.011282534568099, - "learning_rate": 2.358762965556467e-06, - "loss": 0.1656, - "step": 15408 - }, - { - "epoch": 0.78, - "grad_norm": 1.2551793735265222, - "learning_rate": 2.3577006595772032e-06, - "loss": 0.1673, - "step": 15409 - }, - { - "epoch": 0.78, - "grad_norm": 1.0943491917388624, - "learning_rate": 2.3566385608957443e-06, - "loss": 0.1731, - "step": 15410 - }, - { - "epoch": 0.78, - "grad_norm": 1.3107629112380317, - "learning_rate": 2.355576669540893e-06, - "loss": 0.1575, - "step": 15411 - }, - { - "epoch": 0.78, - "grad_norm": 0.8515648899316381, - "learning_rate": 2.354514985541456e-06, - "loss": 0.1599, - "step": 15412 - }, - { - "epoch": 0.78, - "grad_norm": 1.293967723582203, - "learning_rate": 2.353453508926232e-06, - "loss": 0.1566, - "step": 15413 - }, - { - "epoch": 0.78, - "grad_norm": 0.9095448026517428, - "learning_rate": 2.3523922397240163e-06, - "loss": 0.1534, - "step": 15414 - }, - { - "epoch": 0.78, - "grad_norm": 0.8032411398733617, - "learning_rate": 2.3513311779635904e-06, - "loss": 0.1514, - "step": 15415 - }, - { - "epoch": 0.78, - "grad_norm": 1.1203727977935276, - "learning_rate": 2.3502703236737412e-06, - "loss": 0.1597, - "step": 15416 - }, - { - "epoch": 0.78, - "grad_norm": 1.3115090337752493, - "learning_rate": 2.3492096768832417e-06, - "loss": 0.1593, - "step": 15417 - }, - { - "epoch": 0.78, - "grad_norm": 1.154189297651346, - "learning_rate": 2.348149237620858e-06, - "loss": 0.1696, - "step": 15418 - }, - { - "epoch": 0.78, - "grad_norm": 1.738661265681747, - "learning_rate": 2.3470890059153616e-06, - "loss": 0.16, - "step": 15419 - }, - { - "epoch": 0.78, - "grad_norm": 0.8918477045125891, - "learning_rate": 2.3460289817955063e-06, - "loss": 0.154, - "step": 15420 - }, - { - "epoch": 0.78, - "grad_norm": 1.7360933710102129, - "learning_rate": 2.3449691652900464e-06, - "loss": 0.1658, - "step": 15421 - }, - { - "epoch": 0.78, - "grad_norm": 1.0560603543571885, - "learning_rate": 2.3439095564277305e-06, - "loss": 0.1629, - "step": 15422 - }, - { - "epoch": 0.78, - "grad_norm": 1.9505114766577443, - "learning_rate": 2.342850155237303e-06, - "loss": 0.1818, - "step": 15423 - }, - { - "epoch": 0.78, - "grad_norm": 1.0034429408534218, - "learning_rate": 2.341790961747494e-06, - "loss": 0.1752, - "step": 15424 - }, - { - "epoch": 0.78, - "grad_norm": 2.088320359735791, - "learning_rate": 2.340731975987042e-06, - "loss": 0.1705, - "step": 15425 - }, - { - "epoch": 0.78, - "grad_norm": 0.9125680830719255, - "learning_rate": 2.3396731979846634e-06, - "loss": 0.1506, - "step": 15426 - }, - { - "epoch": 0.78, - "grad_norm": 0.830417739612693, - "learning_rate": 2.3386146277690858e-06, - "loss": 0.1519, - "step": 15427 - }, - { - "epoch": 0.78, - "grad_norm": 0.849114943252066, - "learning_rate": 2.3375562653690166e-06, - "loss": 0.1603, - "step": 15428 - }, - { - "epoch": 0.78, - "grad_norm": 1.3629818306238608, - "learning_rate": 2.336498110813168e-06, - "loss": 0.1476, - "step": 15429 - }, - { - "epoch": 0.78, - "grad_norm": 0.823546883376828, - "learning_rate": 2.3354401641302395e-06, - "loss": 0.1539, - "step": 15430 - }, - { - "epoch": 0.78, - "grad_norm": 0.978023459235194, - "learning_rate": 2.3343824253489277e-06, - "loss": 0.154, - "step": 15431 - }, - { - "epoch": 0.78, - "grad_norm": 1.0018849611636187, - "learning_rate": 2.333324894497927e-06, - "loss": 0.1513, - "step": 15432 - }, - { - "epoch": 0.78, - "grad_norm": 0.9875086947887675, - "learning_rate": 2.332267571605924e-06, - "loss": 0.1709, - "step": 15433 - }, - { - "epoch": 0.78, - "grad_norm": 1.5938673758138624, - "learning_rate": 2.331210456701597e-06, - "loss": 0.1562, - "step": 15434 - }, - { - "epoch": 0.78, - "grad_norm": 1.4053685858854916, - "learning_rate": 2.330153549813615e-06, - "loss": 0.1774, - "step": 15435 - }, - { - "epoch": 0.78, - "grad_norm": 4.934543246171554, - "learning_rate": 2.329096850970656e-06, - "loss": 0.1549, - "step": 15436 - }, - { - "epoch": 0.78, - "grad_norm": 1.3750873777918644, - "learning_rate": 2.3280403602013735e-06, - "loss": 0.1707, - "step": 15437 - }, - { - "epoch": 0.79, - "grad_norm": 0.974035012887203, - "learning_rate": 2.326984077534431e-06, - "loss": 0.1733, - "step": 15438 - }, - { - "epoch": 0.79, - "grad_norm": 1.003644320415384, - "learning_rate": 2.3259280029984775e-06, - "loss": 0.1602, - "step": 15439 - }, - { - "epoch": 0.79, - "grad_norm": 3.3928202333175244, - "learning_rate": 2.324872136622164e-06, - "loss": 0.1667, - "step": 15440 - }, - { - "epoch": 0.79, - "grad_norm": 1.158804952368038, - "learning_rate": 2.3238164784341242e-06, - "loss": 0.1555, - "step": 15441 - }, - { - "epoch": 0.79, - "grad_norm": 0.9716905368598698, - "learning_rate": 2.3227610284629985e-06, - "loss": 0.1649, - "step": 15442 - }, - { - "epoch": 0.79, - "grad_norm": 1.4020372115262854, - "learning_rate": 2.3217057867374114e-06, - "loss": 0.1764, - "step": 15443 - }, - { - "epoch": 0.79, - "grad_norm": 1.0717520983846267, - "learning_rate": 2.32065075328599e-06, - "loss": 0.1576, - "step": 15444 - }, - { - "epoch": 0.79, - "grad_norm": 1.1663936158767128, - "learning_rate": 2.319595928137349e-06, - "loss": 0.1636, - "step": 15445 - }, - { - "epoch": 0.79, - "grad_norm": 1.1732575843595243, - "learning_rate": 2.318541311320105e-06, - "loss": 0.1934, - "step": 15446 - }, - { - "epoch": 0.79, - "grad_norm": 0.9486028250086229, - "learning_rate": 2.317486902862859e-06, - "loss": 0.1571, - "step": 15447 - }, - { - "epoch": 0.79, - "grad_norm": 1.1498421923735032, - "learning_rate": 2.3164327027942147e-06, - "loss": 0.1772, - "step": 15448 - }, - { - "epoch": 0.79, - "grad_norm": 0.9350401407713013, - "learning_rate": 2.3153787111427673e-06, - "loss": 0.1614, - "step": 15449 - }, - { - "epoch": 0.79, - "grad_norm": 1.5593140843812545, - "learning_rate": 2.3143249279371085e-06, - "loss": 0.1494, - "step": 15450 - }, - { - "epoch": 0.79, - "grad_norm": 1.3508993582556552, - "learning_rate": 2.313271353205818e-06, - "loss": 0.1521, - "step": 15451 - }, - { - "epoch": 0.79, - "grad_norm": 0.787726912758565, - "learning_rate": 2.3122179869774784e-06, - "loss": 0.1542, - "step": 15452 - }, - { - "epoch": 0.79, - "grad_norm": 0.8663480465259822, - "learning_rate": 2.311164829280661e-06, - "loss": 0.1509, - "step": 15453 - }, - { - "epoch": 0.79, - "grad_norm": 1.2976235796846125, - "learning_rate": 2.3101118801439283e-06, - "loss": 0.1611, - "step": 15454 - }, - { - "epoch": 0.79, - "grad_norm": 0.6973010981091752, - "learning_rate": 2.3090591395958485e-06, - "loss": 0.1373, - "step": 15455 - }, - { - "epoch": 0.79, - "grad_norm": 1.1117049975822124, - "learning_rate": 2.3080066076649697e-06, - "loss": 0.1694, - "step": 15456 - }, - { - "epoch": 0.79, - "grad_norm": 1.1801660374439589, - "learning_rate": 2.3069542843798476e-06, - "loss": 0.1734, - "step": 15457 - }, - { - "epoch": 0.79, - "grad_norm": 0.9972892773542359, - "learning_rate": 2.3059021697690254e-06, - "loss": 0.1578, - "step": 15458 - }, - { - "epoch": 0.79, - "grad_norm": 0.9699548310098502, - "learning_rate": 2.3048502638610427e-06, - "loss": 0.164, - "step": 15459 - }, - { - "epoch": 0.79, - "grad_norm": 0.9651201995600788, - "learning_rate": 2.3037985666844297e-06, - "loss": 0.1616, - "step": 15460 - }, - { - "epoch": 0.79, - "grad_norm": 0.9133523025137816, - "learning_rate": 2.3027470782677173e-06, - "loss": 0.1678, - "step": 15461 - }, - { - "epoch": 0.79, - "grad_norm": 1.0927839593337292, - "learning_rate": 2.3016957986394228e-06, - "loss": 0.17, - "step": 15462 - }, - { - "epoch": 0.79, - "grad_norm": 0.8773806402542051, - "learning_rate": 2.3006447278280676e-06, - "loss": 0.1647, - "step": 15463 - }, - { - "epoch": 0.79, - "grad_norm": 1.3811845661656832, - "learning_rate": 2.299593865862155e-06, - "loss": 0.1737, - "step": 15464 - }, - { - "epoch": 0.79, - "grad_norm": 0.9285844656456148, - "learning_rate": 2.2985432127701945e-06, - "loss": 0.1498, - "step": 15465 - }, - { - "epoch": 0.79, - "grad_norm": 0.8726490367071329, - "learning_rate": 2.2974927685806848e-06, - "loss": 0.1588, - "step": 15466 - }, - { - "epoch": 0.79, - "grad_norm": 0.927809791960326, - "learning_rate": 2.296442533322121e-06, - "loss": 0.1699, - "step": 15467 - }, - { - "epoch": 0.79, - "grad_norm": 0.9565631359875368, - "learning_rate": 2.2953925070229865e-06, - "loss": 0.1768, - "step": 15468 - }, - { - "epoch": 0.79, - "grad_norm": 1.552914352956403, - "learning_rate": 2.2943426897117672e-06, - "loss": 0.1757, - "step": 15469 - }, - { - "epoch": 0.79, - "grad_norm": 1.2338495932171418, - "learning_rate": 2.2932930814169383e-06, - "loss": 0.1785, - "step": 15470 - }, - { - "epoch": 0.79, - "grad_norm": 1.3102501076279853, - "learning_rate": 2.292243682166967e-06, - "loss": 0.16, - "step": 15471 - }, - { - "epoch": 0.79, - "grad_norm": 1.131636846718695, - "learning_rate": 2.291194491990324e-06, - "loss": 0.1681, - "step": 15472 - }, - { - "epoch": 0.79, - "grad_norm": 1.0837521584107503, - "learning_rate": 2.2901455109154626e-06, - "loss": 0.1696, - "step": 15473 - }, - { - "epoch": 0.79, - "grad_norm": 1.1508754181295, - "learning_rate": 2.2890967389708396e-06, - "loss": 0.1715, - "step": 15474 - }, - { - "epoch": 0.79, - "grad_norm": 0.9192566541574264, - "learning_rate": 2.2880481761849037e-06, - "loss": 0.1358, - "step": 15475 - }, - { - "epoch": 0.79, - "grad_norm": 1.054010923229116, - "learning_rate": 2.286999822586099e-06, - "loss": 0.1643, - "step": 15476 - }, - { - "epoch": 0.79, - "grad_norm": 0.9484038897437859, - "learning_rate": 2.285951678202857e-06, - "loss": 0.1605, - "step": 15477 - }, - { - "epoch": 0.79, - "grad_norm": 0.8489375942887908, - "learning_rate": 2.2849037430636135e-06, - "loss": 0.1565, - "step": 15478 - }, - { - "epoch": 0.79, - "grad_norm": 1.4204227390337094, - "learning_rate": 2.2838560171967906e-06, - "loss": 0.1644, - "step": 15479 - }, - { - "epoch": 0.79, - "grad_norm": 0.9216879840577646, - "learning_rate": 2.28280850063081e-06, - "loss": 0.1564, - "step": 15480 - }, - { - "epoch": 0.79, - "grad_norm": 0.9092868562804686, - "learning_rate": 2.281761193394083e-06, - "loss": 0.1472, - "step": 15481 - }, - { - "epoch": 0.79, - "grad_norm": 1.920977308430054, - "learning_rate": 2.2807140955150198e-06, - "loss": 0.1597, - "step": 15482 - }, - { - "epoch": 0.79, - "grad_norm": 0.9738559801843604, - "learning_rate": 2.2796672070220217e-06, - "loss": 0.1674, - "step": 15483 - }, - { - "epoch": 0.79, - "grad_norm": 1.3205100337184514, - "learning_rate": 2.27862052794349e-06, - "loss": 0.1505, - "step": 15484 - }, - { - "epoch": 0.79, - "grad_norm": 1.0861368779297695, - "learning_rate": 2.27757405830781e-06, - "loss": 0.1811, - "step": 15485 - }, - { - "epoch": 0.79, - "grad_norm": 1.8969086125838965, - "learning_rate": 2.276527798143372e-06, - "loss": 0.1669, - "step": 15486 - }, - { - "epoch": 0.79, - "grad_norm": 0.9619151407615854, - "learning_rate": 2.275481747478554e-06, - "loss": 0.1779, - "step": 15487 - }, - { - "epoch": 0.79, - "grad_norm": 1.2675080890008574, - "learning_rate": 2.2744359063417276e-06, - "loss": 0.1436, - "step": 15488 - }, - { - "epoch": 0.79, - "grad_norm": 0.945079506316101, - "learning_rate": 2.2733902747612656e-06, - "loss": 0.1646, - "step": 15489 - }, - { - "epoch": 0.79, - "grad_norm": 2.6382623359251274, - "learning_rate": 2.2723448527655267e-06, - "loss": 0.1597, - "step": 15490 - }, - { - "epoch": 0.79, - "grad_norm": 2.530052373076095, - "learning_rate": 2.27129964038287e-06, - "loss": 0.1808, - "step": 15491 - }, - { - "epoch": 0.79, - "grad_norm": 3.0969313221348065, - "learning_rate": 2.2702546376416467e-06, - "loss": 0.1987, - "step": 15492 - }, - { - "epoch": 0.79, - "grad_norm": 0.909649025671053, - "learning_rate": 2.269209844570206e-06, - "loss": 0.1699, - "step": 15493 - }, - { - "epoch": 0.79, - "grad_norm": 1.3532387603007856, - "learning_rate": 2.268165261196882e-06, - "loss": 0.1581, - "step": 15494 - }, - { - "epoch": 0.79, - "grad_norm": 0.7315958100266694, - "learning_rate": 2.267120887550015e-06, - "loss": 0.16, - "step": 15495 - }, - { - "epoch": 0.79, - "grad_norm": 0.9825168423437397, - "learning_rate": 2.2660767236579275e-06, - "loss": 0.1693, - "step": 15496 - }, - { - "epoch": 0.79, - "grad_norm": 0.8804275441419434, - "learning_rate": 2.265032769548948e-06, - "loss": 0.1865, - "step": 15497 - }, - { - "epoch": 0.79, - "grad_norm": 1.3730261395856402, - "learning_rate": 2.26398902525139e-06, - "loss": 0.1503, - "step": 15498 - }, - { - "epoch": 0.79, - "grad_norm": 1.0981105723529896, - "learning_rate": 2.2629454907935687e-06, - "loss": 0.1803, - "step": 15499 - }, - { - "epoch": 0.79, - "grad_norm": 1.1567464441134347, - "learning_rate": 2.2619021662037855e-06, - "loss": 0.1713, - "step": 15500 - }, - { - "epoch": 0.79, - "grad_norm": 1.029235331388223, - "learning_rate": 2.2608590515103425e-06, - "loss": 0.1663, - "step": 15501 - }, - { - "epoch": 0.79, - "grad_norm": 2.2843708418108912, - "learning_rate": 2.2598161467415357e-06, - "loss": 0.1569, - "step": 15502 - }, - { - "epoch": 0.79, - "grad_norm": 1.1181951782321329, - "learning_rate": 2.2587734519256556e-06, - "loss": 0.1742, - "step": 15503 - }, - { - "epoch": 0.79, - "grad_norm": 1.0213400485525141, - "learning_rate": 2.257730967090982e-06, - "loss": 0.1643, - "step": 15504 - }, - { - "epoch": 0.79, - "grad_norm": 1.5573104906756543, - "learning_rate": 2.2566886922657917e-06, - "loss": 0.1647, - "step": 15505 - }, - { - "epoch": 0.79, - "grad_norm": 0.8940242870876924, - "learning_rate": 2.2556466274783596e-06, - "loss": 0.1733, - "step": 15506 - }, - { - "epoch": 0.79, - "grad_norm": 1.2743541925464599, - "learning_rate": 2.2546047727569475e-06, - "loss": 0.1742, - "step": 15507 - }, - { - "epoch": 0.79, - "grad_norm": 0.939656053009, - "learning_rate": 2.253563128129819e-06, - "loss": 0.1604, - "step": 15508 - }, - { - "epoch": 0.79, - "grad_norm": 1.5790346633369725, - "learning_rate": 2.252521693625228e-06, - "loss": 0.1426, - "step": 15509 - }, - { - "epoch": 0.79, - "grad_norm": 1.7542193003523812, - "learning_rate": 2.2514804692714264e-06, - "loss": 0.1828, - "step": 15510 - }, - { - "epoch": 0.79, - "grad_norm": 1.0661220233266782, - "learning_rate": 2.2504394550966513e-06, - "loss": 0.1616, - "step": 15511 - }, - { - "epoch": 0.79, - "grad_norm": 1.2734750540730406, - "learning_rate": 2.249398651129148e-06, - "loss": 0.1979, - "step": 15512 - }, - { - "epoch": 0.79, - "grad_norm": 1.3328836438750213, - "learning_rate": 2.2483580573971396e-06, - "loss": 0.1572, - "step": 15513 - }, - { - "epoch": 0.79, - "grad_norm": 1.0606100818038617, - "learning_rate": 2.2473176739288603e-06, - "loss": 0.1738, - "step": 15514 - }, - { - "epoch": 0.79, - "grad_norm": 1.0724834883126513, - "learning_rate": 2.246277500752524e-06, - "loss": 0.1526, - "step": 15515 - }, - { - "epoch": 0.79, - "grad_norm": 1.0180285929417798, - "learning_rate": 2.245237537896351e-06, - "loss": 0.1372, - "step": 15516 - }, - { - "epoch": 0.79, - "grad_norm": 1.3299070440761946, - "learning_rate": 2.2441977853885454e-06, - "loss": 0.1568, - "step": 15517 - }, - { - "epoch": 0.79, - "grad_norm": 1.8112916247301079, - "learning_rate": 2.2431582432573127e-06, - "loss": 0.1701, - "step": 15518 - }, - { - "epoch": 0.79, - "grad_norm": 1.9341991066548048, - "learning_rate": 2.2421189115308506e-06, - "loss": 0.1682, - "step": 15519 - }, - { - "epoch": 0.79, - "grad_norm": 0.9881390734593312, - "learning_rate": 2.241079790237355e-06, - "loss": 0.1502, - "step": 15520 - }, - { - "epoch": 0.79, - "grad_norm": 1.0181825157115165, - "learning_rate": 2.2400408794050045e-06, - "loss": 0.1626, - "step": 15521 - }, - { - "epoch": 0.79, - "grad_norm": 1.4345164270616522, - "learning_rate": 2.2390021790619863e-06, - "loss": 0.1739, - "step": 15522 - }, - { - "epoch": 0.79, - "grad_norm": 0.9341993239393256, - "learning_rate": 2.237963689236472e-06, - "loss": 0.1921, - "step": 15523 - }, - { - "epoch": 0.79, - "grad_norm": 0.9828588397695067, - "learning_rate": 2.2369254099566283e-06, - "loss": 0.1603, - "step": 15524 - }, - { - "epoch": 0.79, - "grad_norm": 1.2853311671086198, - "learning_rate": 2.2358873412506254e-06, - "loss": 0.1998, - "step": 15525 - }, - { - "epoch": 0.79, - "grad_norm": 0.9950469959633863, - "learning_rate": 2.2348494831466127e-06, - "loss": 0.1758, - "step": 15526 - }, - { - "epoch": 0.79, - "grad_norm": 1.3029094611737277, - "learning_rate": 2.2338118356727466e-06, - "loss": 0.1748, - "step": 15527 - }, - { - "epoch": 0.79, - "grad_norm": 0.810663812794333, - "learning_rate": 2.2327743988571738e-06, - "loss": 0.1567, - "step": 15528 - }, - { - "epoch": 0.79, - "grad_norm": 0.9306164030658635, - "learning_rate": 2.231737172728037e-06, - "loss": 0.1562, - "step": 15529 - }, - { - "epoch": 0.79, - "grad_norm": 1.5012657439900254, - "learning_rate": 2.2307001573134646e-06, - "loss": 0.1914, - "step": 15530 - }, - { - "epoch": 0.79, - "grad_norm": 1.0339311943769462, - "learning_rate": 2.2296633526415924e-06, - "loss": 0.1805, - "step": 15531 - }, - { - "epoch": 0.79, - "grad_norm": 0.8261741916565691, - "learning_rate": 2.2286267587405376e-06, - "loss": 0.182, - "step": 15532 - }, - { - "epoch": 0.79, - "grad_norm": 0.9142063944559767, - "learning_rate": 2.227590375638423e-06, - "loss": 0.1617, - "step": 15533 - }, - { - "epoch": 0.79, - "grad_norm": 1.3244697895988153, - "learning_rate": 2.226554203363357e-06, - "loss": 0.154, - "step": 15534 - }, - { - "epoch": 0.79, - "grad_norm": 1.3274544121566074, - "learning_rate": 2.225518241943446e-06, - "loss": 0.1635, - "step": 15535 - }, - { - "epoch": 0.79, - "grad_norm": 0.9677118097166073, - "learning_rate": 2.2244824914067932e-06, - "loss": 0.1533, - "step": 15536 - }, - { - "epoch": 0.79, - "grad_norm": 1.3979123867254049, - "learning_rate": 2.2234469517814937e-06, - "loss": 0.1597, - "step": 15537 - }, - { - "epoch": 0.79, - "grad_norm": 0.757339040284125, - "learning_rate": 2.2224116230956326e-06, - "loss": 0.1425, - "step": 15538 - }, - { - "epoch": 0.79, - "grad_norm": 1.0385628676593655, - "learning_rate": 2.2213765053772984e-06, - "loss": 0.1956, - "step": 15539 - }, - { - "epoch": 0.79, - "grad_norm": 1.1618767087057638, - "learning_rate": 2.220341598654565e-06, - "loss": 0.1637, - "step": 15540 - }, - { - "epoch": 0.79, - "grad_norm": 1.0568778997025638, - "learning_rate": 2.2193069029555035e-06, - "loss": 0.1813, - "step": 15541 - }, - { - "epoch": 0.79, - "grad_norm": 0.8628483075554302, - "learning_rate": 2.2182724183081837e-06, - "loss": 0.1805, - "step": 15542 - }, - { - "epoch": 0.79, - "grad_norm": 1.6281039310208947, - "learning_rate": 2.217238144740662e-06, - "loss": 0.1737, - "step": 15543 - }, - { - "epoch": 0.79, - "grad_norm": 1.0431107402561968, - "learning_rate": 2.216204082280995e-06, - "loss": 0.1733, - "step": 15544 - }, - { - "epoch": 0.79, - "grad_norm": 0.8442010781512328, - "learning_rate": 2.215170230957231e-06, - "loss": 0.1601, - "step": 15545 - }, - { - "epoch": 0.79, - "grad_norm": 0.8923269661973011, - "learning_rate": 2.2141365907974176e-06, - "loss": 0.1776, - "step": 15546 - }, - { - "epoch": 0.79, - "grad_norm": 1.2454688797031142, - "learning_rate": 2.213103161829586e-06, - "loss": 0.1488, - "step": 15547 - }, - { - "epoch": 0.79, - "grad_norm": 1.0118447767791443, - "learning_rate": 2.212069944081774e-06, - "loss": 0.1654, - "step": 15548 - }, - { - "epoch": 0.79, - "grad_norm": 1.570171035100308, - "learning_rate": 2.2110369375820016e-06, - "loss": 0.1827, - "step": 15549 - }, - { - "epoch": 0.79, - "grad_norm": 5.126571649081451, - "learning_rate": 2.2100041423582954e-06, - "loss": 0.1923, - "step": 15550 - }, - { - "epoch": 0.79, - "grad_norm": 0.8934819284786086, - "learning_rate": 2.208971558438664e-06, - "loss": 0.1508, - "step": 15551 - }, - { - "epoch": 0.79, - "grad_norm": 0.9617945397982444, - "learning_rate": 2.2079391858511214e-06, - "loss": 0.174, - "step": 15552 - }, - { - "epoch": 0.79, - "grad_norm": 1.322771889638723, - "learning_rate": 2.2069070246236658e-06, - "loss": 0.1791, - "step": 15553 - }, - { - "epoch": 0.79, - "grad_norm": 0.9423969982106858, - "learning_rate": 2.2058750747842974e-06, - "loss": 0.1592, - "step": 15554 - }, - { - "epoch": 0.79, - "grad_norm": 1.1030627892681681, - "learning_rate": 2.2048433363610077e-06, - "loss": 0.1518, - "step": 15555 - }, - { - "epoch": 0.79, - "grad_norm": 1.1999783035530458, - "learning_rate": 2.203811809381785e-06, - "loss": 0.1547, - "step": 15556 - }, - { - "epoch": 0.79, - "grad_norm": 1.0678737575178097, - "learning_rate": 2.2027804938746087e-06, - "loss": 0.154, - "step": 15557 - }, - { - "epoch": 0.79, - "grad_norm": 1.2888427274702388, - "learning_rate": 2.201749389867448e-06, - "loss": 0.1794, - "step": 15558 - }, - { - "epoch": 0.79, - "grad_norm": 0.8089560589472962, - "learning_rate": 2.200718497388279e-06, - "loss": 0.1611, - "step": 15559 - }, - { - "epoch": 0.79, - "grad_norm": 1.0591013646114105, - "learning_rate": 2.199687816465058e-06, - "loss": 0.1696, - "step": 15560 - }, - { - "epoch": 0.79, - "grad_norm": 1.0265503614760638, - "learning_rate": 2.1986573471257456e-06, - "loss": 0.1726, - "step": 15561 - }, - { - "epoch": 0.79, - "grad_norm": 1.0804107453865288, - "learning_rate": 2.1976270893982934e-06, - "loss": 0.1647, - "step": 15562 - }, - { - "epoch": 0.79, - "grad_norm": 0.9829531661161499, - "learning_rate": 2.19659704331065e-06, - "loss": 0.1648, - "step": 15563 - }, - { - "epoch": 0.79, - "grad_norm": 1.1559582416470937, - "learning_rate": 2.19556720889075e-06, - "loss": 0.1708, - "step": 15564 - }, - { - "epoch": 0.79, - "grad_norm": 1.511957859694274, - "learning_rate": 2.194537586166532e-06, - "loss": 0.1703, - "step": 15565 - }, - { - "epoch": 0.79, - "grad_norm": 0.9399930014089914, - "learning_rate": 2.1935081751659214e-06, - "loss": 0.1495, - "step": 15566 - }, - { - "epoch": 0.79, - "grad_norm": 1.1744683179840691, - "learning_rate": 2.192478975916844e-06, - "loss": 0.1646, - "step": 15567 - }, - { - "epoch": 0.79, - "grad_norm": 1.052596780486219, - "learning_rate": 2.191449988447213e-06, - "loss": 0.1721, - "step": 15568 - }, - { - "epoch": 0.79, - "grad_norm": 0.9301386245281534, - "learning_rate": 2.1904212127849455e-06, - "loss": 0.1756, - "step": 15569 - }, - { - "epoch": 0.79, - "grad_norm": 1.2471355427079205, - "learning_rate": 2.18939264895794e-06, - "loss": 0.146, - "step": 15570 - }, - { - "epoch": 0.79, - "grad_norm": 0.8480913113661026, - "learning_rate": 2.188364296994101e-06, - "loss": 0.1825, - "step": 15571 - }, - { - "epoch": 0.79, - "grad_norm": 1.8866841429424297, - "learning_rate": 2.1873361569213204e-06, - "loss": 0.1692, - "step": 15572 - }, - { - "epoch": 0.79, - "grad_norm": 0.9416588430775136, - "learning_rate": 2.186308228767492e-06, - "loss": 0.1639, - "step": 15573 - }, - { - "epoch": 0.79, - "grad_norm": 0.925607189488996, - "learning_rate": 2.1852805125604924e-06, - "loss": 0.1766, - "step": 15574 - }, - { - "epoch": 0.79, - "grad_norm": 1.0407514437395915, - "learning_rate": 2.184253008328199e-06, - "loss": 0.1669, - "step": 15575 - }, - { - "epoch": 0.79, - "grad_norm": 0.9420561629019543, - "learning_rate": 2.1832257160984873e-06, - "loss": 0.1662, - "step": 15576 - }, - { - "epoch": 0.79, - "grad_norm": 1.22011044368606, - "learning_rate": 2.182198635899215e-06, - "loss": 0.166, - "step": 15577 - }, - { - "epoch": 0.79, - "grad_norm": 1.6176134255385397, - "learning_rate": 2.18117176775825e-06, - "loss": 0.1818, - "step": 15578 - }, - { - "epoch": 0.79, - "grad_norm": 0.8881455447897848, - "learning_rate": 2.18014511170344e-06, - "loss": 0.1482, - "step": 15579 - }, - { - "epoch": 0.79, - "grad_norm": 1.196847111229772, - "learning_rate": 2.179118667762635e-06, - "loss": 0.1732, - "step": 15580 - }, - { - "epoch": 0.79, - "grad_norm": 0.7944044035893635, - "learning_rate": 2.178092435963678e-06, - "loss": 0.1418, - "step": 15581 - }, - { - "epoch": 0.79, - "grad_norm": 1.813756883174713, - "learning_rate": 2.177066416334409e-06, - "loss": 0.163, - "step": 15582 - }, - { - "epoch": 0.79, - "grad_norm": 0.7979800318997707, - "learning_rate": 2.176040608902652e-06, - "loss": 0.1461, - "step": 15583 - }, - { - "epoch": 0.79, - "grad_norm": 1.1091856405851366, - "learning_rate": 2.175015013696238e-06, - "loss": 0.155, - "step": 15584 - }, - { - "epoch": 0.79, - "grad_norm": 1.0806486704666542, - "learning_rate": 2.173989630742981e-06, - "loss": 0.1778, - "step": 15585 - }, - { - "epoch": 0.79, - "grad_norm": 1.2480394070848813, - "learning_rate": 2.172964460070699e-06, - "loss": 0.1795, - "step": 15586 - }, - { - "epoch": 0.79, - "grad_norm": 1.0284664104629266, - "learning_rate": 2.1719395017071966e-06, - "loss": 0.1626, - "step": 15587 - }, - { - "epoch": 0.79, - "grad_norm": 1.7285129265356551, - "learning_rate": 2.170914755680277e-06, - "loss": 0.1747, - "step": 15588 - }, - { - "epoch": 0.79, - "grad_norm": 1.0249807644458409, - "learning_rate": 2.1698902220177365e-06, - "loss": 0.16, - "step": 15589 - }, - { - "epoch": 0.79, - "grad_norm": 1.4069729783476428, - "learning_rate": 2.16886590074737e-06, - "loss": 0.1767, - "step": 15590 - }, - { - "epoch": 0.79, - "grad_norm": 1.1186356411264524, - "learning_rate": 2.167841791896954e-06, - "loss": 0.1698, - "step": 15591 - }, - { - "epoch": 0.79, - "grad_norm": 1.4428332295754163, - "learning_rate": 2.1668178954942754e-06, - "loss": 0.1612, - "step": 15592 - }, - { - "epoch": 0.79, - "grad_norm": 0.8692424415989399, - "learning_rate": 2.1657942115671037e-06, - "loss": 0.1542, - "step": 15593 - }, - { - "epoch": 0.79, - "grad_norm": 0.892577770113712, - "learning_rate": 2.164770740143203e-06, - "loss": 0.1696, - "step": 15594 - }, - { - "epoch": 0.79, - "grad_norm": 1.1374430428559237, - "learning_rate": 2.163747481250342e-06, - "loss": 0.1705, - "step": 15595 - }, - { - "epoch": 0.79, - "grad_norm": 1.096025517966074, - "learning_rate": 2.1627244349162702e-06, - "loss": 0.1597, - "step": 15596 - }, - { - "epoch": 0.79, - "grad_norm": 1.4958794914876075, - "learning_rate": 2.161701601168741e-06, - "loss": 0.1549, - "step": 15597 - }, - { - "epoch": 0.79, - "grad_norm": 1.4401233627884298, - "learning_rate": 2.1606789800354978e-06, - "loss": 0.156, - "step": 15598 - }, - { - "epoch": 0.79, - "grad_norm": 1.243981167898369, - "learning_rate": 2.1596565715442843e-06, - "loss": 0.1524, - "step": 15599 - }, - { - "epoch": 0.79, - "grad_norm": 1.12636656464376, - "learning_rate": 2.1586343757228247e-06, - "loss": 0.1605, - "step": 15600 - }, - { - "epoch": 0.79, - "grad_norm": 1.5292957878929483, - "learning_rate": 2.1576123925988548e-06, - "loss": 0.1592, - "step": 15601 - }, - { - "epoch": 0.79, - "grad_norm": 0.8964219413195446, - "learning_rate": 2.1565906222000877e-06, - "loss": 0.1647, - "step": 15602 - }, - { - "epoch": 0.79, - "grad_norm": 1.2008320785613633, - "learning_rate": 2.155569064554246e-06, - "loss": 0.1705, - "step": 15603 - }, - { - "epoch": 0.79, - "grad_norm": 1.5424164419925088, - "learning_rate": 2.154547719689034e-06, - "loss": 0.1898, - "step": 15604 - }, - { - "epoch": 0.79, - "grad_norm": 0.9392973979831717, - "learning_rate": 2.1535265876321574e-06, - "loss": 0.1622, - "step": 15605 - }, - { - "epoch": 0.79, - "grad_norm": 0.9378226880420547, - "learning_rate": 2.1525056684113166e-06, - "loss": 0.164, - "step": 15606 - }, - { - "epoch": 0.79, - "grad_norm": 1.1582115722775614, - "learning_rate": 2.1514849620542045e-06, - "loss": 0.1742, - "step": 15607 - }, - { - "epoch": 0.79, - "grad_norm": 0.9833527156186123, - "learning_rate": 2.1504644685885044e-06, - "loss": 0.1541, - "step": 15608 - }, - { - "epoch": 0.79, - "grad_norm": 1.1202416352338174, - "learning_rate": 2.1494441880419005e-06, - "loss": 0.1652, - "step": 15609 - }, - { - "epoch": 0.79, - "grad_norm": 1.2100725249448117, - "learning_rate": 2.1484241204420676e-06, - "loss": 0.1691, - "step": 15610 - }, - { - "epoch": 0.79, - "grad_norm": 1.2245008690966699, - "learning_rate": 2.1474042658166703e-06, - "loss": 0.1777, - "step": 15611 - }, - { - "epoch": 0.79, - "grad_norm": 0.8136968098830883, - "learning_rate": 2.1463846241933785e-06, - "loss": 0.1627, - "step": 15612 - }, - { - "epoch": 0.79, - "grad_norm": 1.0309363959270292, - "learning_rate": 2.1453651955998445e-06, - "loss": 0.1667, - "step": 15613 - }, - { - "epoch": 0.79, - "grad_norm": 1.2038948066066886, - "learning_rate": 2.1443459800637234e-06, - "loss": 0.1653, - "step": 15614 - }, - { - "epoch": 0.79, - "grad_norm": 0.8752988126685001, - "learning_rate": 2.143326977612662e-06, - "loss": 0.1613, - "step": 15615 - }, - { - "epoch": 0.79, - "grad_norm": 1.81493830010666, - "learning_rate": 2.1423081882743026e-06, - "loss": 0.1677, - "step": 15616 - }, - { - "epoch": 0.79, - "grad_norm": 0.8819559453162542, - "learning_rate": 2.1412896120762738e-06, - "loss": 0.1824, - "step": 15617 - }, - { - "epoch": 0.79, - "grad_norm": 0.8360825523913156, - "learning_rate": 2.1402712490462106e-06, - "loss": 0.1689, - "step": 15618 - }, - { - "epoch": 0.79, - "grad_norm": 0.8747309523646191, - "learning_rate": 2.139253099211732e-06, - "loss": 0.1606, - "step": 15619 - }, - { - "epoch": 0.79, - "grad_norm": 1.0068570576272733, - "learning_rate": 2.1382351626004595e-06, - "loss": 0.1643, - "step": 15620 - }, - { - "epoch": 0.79, - "grad_norm": 1.0247923129436445, - "learning_rate": 2.1372174392400003e-06, - "loss": 0.1668, - "step": 15621 - }, - { - "epoch": 0.79, - "grad_norm": 1.0541891741832206, - "learning_rate": 2.1361999291579636e-06, - "loss": 0.167, - "step": 15622 - }, - { - "epoch": 0.79, - "grad_norm": 1.006406001594581, - "learning_rate": 2.135182632381946e-06, - "loss": 0.1552, - "step": 15623 - }, - { - "epoch": 0.79, - "grad_norm": 1.0631465203649262, - "learning_rate": 2.134165548939543e-06, - "loss": 0.1555, - "step": 15624 - }, - { - "epoch": 0.79, - "grad_norm": 1.2124724084787541, - "learning_rate": 2.1331486788583444e-06, - "loss": 0.1926, - "step": 15625 - }, - { - "epoch": 0.79, - "grad_norm": 1.1730304656747461, - "learning_rate": 2.132132022165935e-06, - "loss": 0.1615, - "step": 15626 - }, - { - "epoch": 0.79, - "grad_norm": 0.9980917211125898, - "learning_rate": 2.1311155788898884e-06, - "loss": 0.1673, - "step": 15627 - }, - { - "epoch": 0.79, - "grad_norm": 1.3509935622005758, - "learning_rate": 2.130099349057774e-06, - "loss": 0.1931, - "step": 15628 - }, - { - "epoch": 0.79, - "grad_norm": 1.2673298856578072, - "learning_rate": 2.1290833326971617e-06, - "loss": 0.1688, - "step": 15629 - }, - { - "epoch": 0.79, - "grad_norm": 0.95908081207994, - "learning_rate": 2.128067529835606e-06, - "loss": 0.155, - "step": 15630 - }, - { - "epoch": 0.79, - "grad_norm": 1.0957765186131665, - "learning_rate": 2.1270519405006618e-06, - "loss": 0.1418, - "step": 15631 - }, - { - "epoch": 0.79, - "grad_norm": 0.9286845139617024, - "learning_rate": 2.12603656471988e-06, - "loss": 0.154, - "step": 15632 - }, - { - "epoch": 0.79, - "grad_norm": 0.8510502467796793, - "learning_rate": 2.1250214025208028e-06, - "loss": 0.1514, - "step": 15633 - }, - { - "epoch": 0.8, - "grad_norm": 1.1681591596259013, - "learning_rate": 2.1240064539309637e-06, - "loss": 0.1829, - "step": 15634 - }, - { - "epoch": 0.8, - "grad_norm": 1.076521627852123, - "learning_rate": 2.122991718977896e-06, - "loss": 0.1688, - "step": 15635 - }, - { - "epoch": 0.8, - "grad_norm": 1.7132103153256815, - "learning_rate": 2.121977197689119e-06, - "loss": 0.1528, - "step": 15636 - }, - { - "epoch": 0.8, - "grad_norm": 0.9492644200091807, - "learning_rate": 2.1209628900921597e-06, - "loss": 0.1833, - "step": 15637 - }, - { - "epoch": 0.8, - "grad_norm": 0.8755613060414379, - "learning_rate": 2.1199487962145236e-06, - "loss": 0.1654, - "step": 15638 - }, - { - "epoch": 0.8, - "grad_norm": 1.0228870344740009, - "learning_rate": 2.1189349160837247e-06, - "loss": 0.17, - "step": 15639 - }, - { - "epoch": 0.8, - "grad_norm": 0.9319147718614745, - "learning_rate": 2.1179212497272582e-06, - "loss": 0.1625, - "step": 15640 - }, - { - "epoch": 0.8, - "grad_norm": 1.2706661375506798, - "learning_rate": 2.116907797172624e-06, - "loss": 0.1656, - "step": 15641 - }, - { - "epoch": 0.8, - "grad_norm": 1.0564370189070695, - "learning_rate": 2.11589455844731e-06, - "loss": 0.1666, - "step": 15642 - }, - { - "epoch": 0.8, - "grad_norm": 0.8366015802686906, - "learning_rate": 2.1148815335788044e-06, - "loss": 0.1643, - "step": 15643 - }, - { - "epoch": 0.8, - "grad_norm": 1.4744670244805906, - "learning_rate": 2.113868722594582e-06, - "loss": 0.1693, - "step": 15644 - }, - { - "epoch": 0.8, - "grad_norm": 1.3168260345926481, - "learning_rate": 2.1128561255221138e-06, - "loss": 0.1713, - "step": 15645 - }, - { - "epoch": 0.8, - "grad_norm": 0.9985770754055425, - "learning_rate": 2.1118437423888715e-06, - "loss": 0.1794, - "step": 15646 - }, - { - "epoch": 0.8, - "grad_norm": 1.281413149778363, - "learning_rate": 2.1108315732223104e-06, - "loss": 0.1713, - "step": 15647 - }, - { - "epoch": 0.8, - "grad_norm": 0.9125070660623015, - "learning_rate": 2.109819618049891e-06, - "loss": 0.1653, - "step": 15648 - }, - { - "epoch": 0.8, - "grad_norm": 1.8057245462136853, - "learning_rate": 2.108807876899057e-06, - "loss": 0.1752, - "step": 15649 - }, - { - "epoch": 0.8, - "grad_norm": 0.8487713219507667, - "learning_rate": 2.1077963497972555e-06, - "loss": 0.1809, - "step": 15650 - }, - { - "epoch": 0.8, - "grad_norm": 0.9196659372204345, - "learning_rate": 2.1067850367719247e-06, - "loss": 0.1755, - "step": 15651 - }, - { - "epoch": 0.8, - "grad_norm": 0.99716980725985, - "learning_rate": 2.105773937850497e-06, - "loss": 0.175, - "step": 15652 - }, - { - "epoch": 0.8, - "grad_norm": 0.8829025384108097, - "learning_rate": 2.1047630530603946e-06, - "loss": 0.1684, - "step": 15653 - }, - { - "epoch": 0.8, - "grad_norm": 6.110652541716581, - "learning_rate": 2.103752382429043e-06, - "loss": 0.1611, - "step": 15654 - }, - { - "epoch": 0.8, - "grad_norm": 0.9720341752038881, - "learning_rate": 2.102741925983851e-06, - "loss": 0.1536, - "step": 15655 - }, - { - "epoch": 0.8, - "grad_norm": 0.9920772371908424, - "learning_rate": 2.101731683752234e-06, - "loss": 0.1868, - "step": 15656 - }, - { - "epoch": 0.8, - "grad_norm": 1.7231021520074439, - "learning_rate": 2.100721655761587e-06, - "loss": 0.182, - "step": 15657 - }, - { - "epoch": 0.8, - "grad_norm": 1.021432371471618, - "learning_rate": 2.0997118420393125e-06, - "loss": 0.171, - "step": 15658 - }, - { - "epoch": 0.8, - "grad_norm": 0.9649623834705489, - "learning_rate": 2.0987022426128e-06, - "loss": 0.1707, - "step": 15659 - }, - { - "epoch": 0.8, - "grad_norm": 1.258458441032225, - "learning_rate": 2.097692857509439e-06, - "loss": 0.1984, - "step": 15660 - }, - { - "epoch": 0.8, - "grad_norm": 0.9717680207982854, - "learning_rate": 2.096683686756602e-06, - "loss": 0.1566, - "step": 15661 - }, - { - "epoch": 0.8, - "grad_norm": 0.7280588099862542, - "learning_rate": 2.0956747303816694e-06, - "loss": 0.1463, - "step": 15662 - }, - { - "epoch": 0.8, - "grad_norm": 0.9012205407601845, - "learning_rate": 2.0946659884120056e-06, - "loss": 0.1622, - "step": 15663 - }, - { - "epoch": 0.8, - "grad_norm": 1.584582682612466, - "learning_rate": 2.0936574608749717e-06, - "loss": 0.164, - "step": 15664 - }, - { - "epoch": 0.8, - "grad_norm": 0.8224010780566432, - "learning_rate": 2.0926491477979272e-06, - "loss": 0.1481, - "step": 15665 - }, - { - "epoch": 0.8, - "grad_norm": 1.2102346578962973, - "learning_rate": 2.0916410492082195e-06, - "loss": 0.1545, - "step": 15666 - }, - { - "epoch": 0.8, - "grad_norm": 1.4038159209736356, - "learning_rate": 2.0906331651331945e-06, - "loss": 0.1589, - "step": 15667 - }, - { - "epoch": 0.8, - "grad_norm": 0.8479165441994736, - "learning_rate": 2.0896254956001916e-06, - "loss": 0.1943, - "step": 15668 - }, - { - "epoch": 0.8, - "grad_norm": 0.9723482193483798, - "learning_rate": 2.0886180406365465e-06, - "loss": 0.2076, - "step": 15669 - }, - { - "epoch": 0.8, - "grad_norm": 1.3228687636011576, - "learning_rate": 2.0876108002695817e-06, - "loss": 0.1931, - "step": 15670 - }, - { - "epoch": 0.8, - "grad_norm": 1.0600971579261, - "learning_rate": 2.0866037745266232e-06, - "loss": 0.1512, - "step": 15671 - }, - { - "epoch": 0.8, - "grad_norm": 1.2101345515538404, - "learning_rate": 2.0855969634349816e-06, - "loss": 0.1677, - "step": 15672 - }, - { - "epoch": 0.8, - "grad_norm": 1.2727403325036004, - "learning_rate": 2.0845903670219714e-06, - "loss": 0.1886, - "step": 15673 - }, - { - "epoch": 0.8, - "grad_norm": 1.1157062298942677, - "learning_rate": 2.083583985314893e-06, - "loss": 0.1569, - "step": 15674 - }, - { - "epoch": 0.8, - "grad_norm": 2.1086577491811576, - "learning_rate": 2.0825778183410485e-06, - "loss": 0.1888, - "step": 15675 - }, - { - "epoch": 0.8, - "grad_norm": 1.043001791637282, - "learning_rate": 2.0815718661277253e-06, - "loss": 0.1628, - "step": 15676 - }, - { - "epoch": 0.8, - "grad_norm": 0.8982340742383359, - "learning_rate": 2.0805661287022115e-06, - "loss": 0.1587, - "step": 15677 - }, - { - "epoch": 0.8, - "grad_norm": 0.7720902538988256, - "learning_rate": 2.0795606060917896e-06, - "loss": 0.1491, - "step": 15678 - }, - { - "epoch": 0.8, - "grad_norm": 1.4247042004798163, - "learning_rate": 2.0785552983237366e-06, - "loss": 0.1671, - "step": 15679 - }, - { - "epoch": 0.8, - "grad_norm": 1.8891588378758972, - "learning_rate": 2.077550205425317e-06, - "loss": 0.1702, - "step": 15680 - }, - { - "epoch": 0.8, - "grad_norm": 1.2919121709259531, - "learning_rate": 2.0765453274237944e-06, - "loss": 0.1652, - "step": 15681 - }, - { - "epoch": 0.8, - "grad_norm": 0.960395602437291, - "learning_rate": 2.0755406643464293e-06, - "loss": 0.1513, - "step": 15682 - }, - { - "epoch": 0.8, - "grad_norm": 1.094433177420648, - "learning_rate": 2.07453621622047e-06, - "loss": 0.1857, - "step": 15683 - }, - { - "epoch": 0.8, - "grad_norm": 1.192579997836186, - "learning_rate": 2.0735319830731614e-06, - "loss": 0.1708, - "step": 15684 - }, - { - "epoch": 0.8, - "grad_norm": 1.2940041677953567, - "learning_rate": 2.0725279649317463e-06, - "loss": 0.1768, - "step": 15685 - }, - { - "epoch": 0.8, - "grad_norm": 1.0753642049005883, - "learning_rate": 2.0715241618234603e-06, - "loss": 0.1733, - "step": 15686 - }, - { - "epoch": 0.8, - "grad_norm": 0.8004753323947372, - "learning_rate": 2.0705205737755276e-06, - "loss": 0.1839, - "step": 15687 - }, - { - "epoch": 0.8, - "grad_norm": 1.0962965286334978, - "learning_rate": 2.069517200815173e-06, - "loss": 0.1646, - "step": 15688 - }, - { - "epoch": 0.8, - "grad_norm": 1.1776350758720255, - "learning_rate": 2.0685140429696114e-06, - "loss": 0.1776, - "step": 15689 - }, - { - "epoch": 0.8, - "grad_norm": 1.1486790236667523, - "learning_rate": 2.0675111002660566e-06, - "loss": 0.1659, - "step": 15690 - }, - { - "epoch": 0.8, - "grad_norm": 1.0319557213802002, - "learning_rate": 2.066508372731708e-06, - "loss": 0.1483, - "step": 15691 - }, - { - "epoch": 0.8, - "grad_norm": 0.9726333520511419, - "learning_rate": 2.0655058603937704e-06, - "loss": 0.1565, - "step": 15692 - }, - { - "epoch": 0.8, - "grad_norm": 1.0562199934217975, - "learning_rate": 2.0645035632794317e-06, - "loss": 0.1657, - "step": 15693 - }, - { - "epoch": 0.8, - "grad_norm": 0.9933074570339296, - "learning_rate": 2.0635014814158826e-06, - "loss": 0.1601, - "step": 15694 - }, - { - "epoch": 0.8, - "grad_norm": 1.1359255460270459, - "learning_rate": 2.0624996148303043e-06, - "loss": 0.19, - "step": 15695 - }, - { - "epoch": 0.8, - "grad_norm": 0.909456397171364, - "learning_rate": 2.0614979635498743e-06, - "loss": 0.1661, - "step": 15696 - }, - { - "epoch": 0.8, - "grad_norm": 0.8667047580297196, - "learning_rate": 2.060496527601761e-06, - "loss": 0.1555, - "step": 15697 - }, - { - "epoch": 0.8, - "grad_norm": 1.7733186418000007, - "learning_rate": 2.059495307013125e-06, - "loss": 0.1288, - "step": 15698 - }, - { - "epoch": 0.8, - "grad_norm": 1.2790866004491492, - "learning_rate": 2.0584943018111304e-06, - "loss": 0.1741, - "step": 15699 - }, - { - "epoch": 0.8, - "grad_norm": 1.0679954503666529, - "learning_rate": 2.0574935120229224e-06, - "loss": 0.1689, - "step": 15700 - }, - { - "epoch": 0.8, - "grad_norm": 0.9779793058532171, - "learning_rate": 2.0564929376756526e-06, - "loss": 0.1613, - "step": 15701 - }, - { - "epoch": 0.8, - "grad_norm": 0.9095580126414525, - "learning_rate": 2.0554925787964596e-06, - "loss": 0.1584, - "step": 15702 - }, - { - "epoch": 0.8, - "grad_norm": 0.7880611948799627, - "learning_rate": 2.0544924354124828e-06, - "loss": 0.1481, - "step": 15703 - }, - { - "epoch": 0.8, - "grad_norm": 0.94216252208585, - "learning_rate": 2.053492507550845e-06, - "loss": 0.1826, - "step": 15704 - }, - { - "epoch": 0.8, - "grad_norm": 0.9309447877817286, - "learning_rate": 2.052492795238673e-06, - "loss": 0.1599, - "step": 15705 - }, - { - "epoch": 0.8, - "grad_norm": 1.2542500450284721, - "learning_rate": 2.051493298503081e-06, - "loss": 0.179, - "step": 15706 - }, - { - "epoch": 0.8, - "grad_norm": 1.0676784313529195, - "learning_rate": 2.050494017371185e-06, - "loss": 0.1487, - "step": 15707 - }, - { - "epoch": 0.8, - "grad_norm": 1.2173517816166688, - "learning_rate": 2.0494949518700846e-06, - "loss": 0.1743, - "step": 15708 - }, - { - "epoch": 0.8, - "grad_norm": 1.1361457952523206, - "learning_rate": 2.0484961020268857e-06, - "loss": 0.1756, - "step": 15709 - }, - { - "epoch": 0.8, - "grad_norm": 0.8451480774651444, - "learning_rate": 2.0474974678686764e-06, - "loss": 0.1673, - "step": 15710 - }, - { - "epoch": 0.8, - "grad_norm": 0.9615371002031146, - "learning_rate": 2.046499049422548e-06, - "loss": 0.1523, - "step": 15711 - }, - { - "epoch": 0.8, - "grad_norm": 0.8893607417171246, - "learning_rate": 2.045500846715581e-06, - "loss": 0.1546, - "step": 15712 - }, - { - "epoch": 0.8, - "grad_norm": 0.9308432802784401, - "learning_rate": 2.0445028597748564e-06, - "loss": 0.1748, - "step": 15713 - }, - { - "epoch": 0.8, - "grad_norm": 1.1209141683077861, - "learning_rate": 2.04350508862744e-06, - "loss": 0.1768, - "step": 15714 - }, - { - "epoch": 0.8, - "grad_norm": 0.9681851429622521, - "learning_rate": 2.042507533300395e-06, - "loss": 0.1701, - "step": 15715 - }, - { - "epoch": 0.8, - "grad_norm": 1.0106823838571959, - "learning_rate": 2.041510193820786e-06, - "loss": 0.1589, - "step": 15716 - }, - { - "epoch": 0.8, - "grad_norm": 1.1698606792797457, - "learning_rate": 2.0405130702156596e-06, - "loss": 0.163, - "step": 15717 - }, - { - "epoch": 0.8, - "grad_norm": 1.3727999721815387, - "learning_rate": 2.0395161625120684e-06, - "loss": 0.1619, - "step": 15718 - }, - { - "epoch": 0.8, - "grad_norm": 1.2642484204300222, - "learning_rate": 2.038519470737048e-06, - "loss": 0.1729, - "step": 15719 - }, - { - "epoch": 0.8, - "grad_norm": 1.390230059267973, - "learning_rate": 2.0375229949176367e-06, - "loss": 0.1794, - "step": 15720 - }, - { - "epoch": 0.8, - "grad_norm": 0.9163587083719319, - "learning_rate": 2.036526735080865e-06, - "loss": 0.1682, - "step": 15721 - }, - { - "epoch": 0.8, - "grad_norm": 1.9447935253499224, - "learning_rate": 2.0355306912537565e-06, - "loss": 0.1703, - "step": 15722 - }, - { - "epoch": 0.8, - "grad_norm": 1.3922281759361557, - "learning_rate": 2.0345348634633254e-06, - "loss": 0.1785, - "step": 15723 - }, - { - "epoch": 0.8, - "grad_norm": 1.0678235780247454, - "learning_rate": 2.0335392517365895e-06, - "loss": 0.1396, - "step": 15724 - }, - { - "epoch": 0.8, - "grad_norm": 1.2132485465777194, - "learning_rate": 2.032543856100548e-06, - "loss": 0.1861, - "step": 15725 - }, - { - "epoch": 0.8, - "grad_norm": 1.0154765236215866, - "learning_rate": 2.0315486765822067e-06, - "loss": 0.1645, - "step": 15726 - }, - { - "epoch": 0.8, - "grad_norm": 1.070807295078891, - "learning_rate": 2.0305537132085562e-06, - "loss": 0.1616, - "step": 15727 - }, - { - "epoch": 0.8, - "grad_norm": 1.8395267663742878, - "learning_rate": 2.0295589660065853e-06, - "loss": 0.1606, - "step": 15728 - }, - { - "epoch": 0.8, - "grad_norm": 1.0895675484709622, - "learning_rate": 2.0285644350032786e-06, - "loss": 0.1621, - "step": 15729 - }, - { - "epoch": 0.8, - "grad_norm": 4.234470650985866, - "learning_rate": 2.027570120225614e-06, - "loss": 0.1496, - "step": 15730 - }, - { - "epoch": 0.8, - "grad_norm": 0.9022044363184193, - "learning_rate": 2.026576021700557e-06, - "loss": 0.158, - "step": 15731 - }, - { - "epoch": 0.8, - "grad_norm": 1.0723980217946327, - "learning_rate": 2.025582139455078e-06, - "loss": 0.1712, - "step": 15732 - }, - { - "epoch": 0.8, - "grad_norm": 0.9710404594344169, - "learning_rate": 2.024588473516135e-06, - "loss": 0.1716, - "step": 15733 - }, - { - "epoch": 0.8, - "grad_norm": 1.0359442328774089, - "learning_rate": 2.023595023910677e-06, - "loss": 0.1672, - "step": 15734 - }, - { - "epoch": 0.8, - "grad_norm": 1.0489456256074758, - "learning_rate": 2.0226017906656568e-06, - "loss": 0.1577, - "step": 15735 - }, - { - "epoch": 0.8, - "grad_norm": 1.0072651874558733, - "learning_rate": 2.0216087738080116e-06, - "loss": 0.1693, - "step": 15736 - }, - { - "epoch": 0.8, - "grad_norm": 1.1457513882633208, - "learning_rate": 2.020615973364679e-06, - "loss": 0.1623, - "step": 15737 - }, - { - "epoch": 0.8, - "grad_norm": 1.297135541941941, - "learning_rate": 2.0196233893625883e-06, - "loss": 0.1638, - "step": 15738 - }, - { - "epoch": 0.8, - "grad_norm": 0.971115124295936, - "learning_rate": 2.0186310218286674e-06, - "loss": 0.1912, - "step": 15739 - }, - { - "epoch": 0.8, - "grad_norm": 1.2845202111648972, - "learning_rate": 2.0176388707898274e-06, - "loss": 0.1634, - "step": 15740 - }, - { - "epoch": 0.8, - "grad_norm": 0.8963316976105339, - "learning_rate": 2.0166469362729868e-06, - "loss": 0.147, - "step": 15741 - }, - { - "epoch": 0.8, - "grad_norm": 0.9617179150805765, - "learning_rate": 2.015655218305046e-06, - "loss": 0.1708, - "step": 15742 - }, - { - "epoch": 0.8, - "grad_norm": 1.0783035229158529, - "learning_rate": 2.0146637169129114e-06, - "loss": 0.1705, - "step": 15743 - }, - { - "epoch": 0.8, - "grad_norm": 0.989808277406126, - "learning_rate": 2.0136724321234714e-06, - "loss": 0.1549, - "step": 15744 - }, - { - "epoch": 0.8, - "grad_norm": 1.1378458311050972, - "learning_rate": 2.012681363963621e-06, - "loss": 0.1559, - "step": 15745 - }, - { - "epoch": 0.8, - "grad_norm": 1.1096384684415859, - "learning_rate": 2.011690512460237e-06, - "loss": 0.1661, - "step": 15746 - }, - { - "epoch": 0.8, - "grad_norm": 1.1269780749629759, - "learning_rate": 2.010699877640199e-06, - "loss": 0.146, - "step": 15747 - }, - { - "epoch": 0.8, - "grad_norm": 1.1432157339065054, - "learning_rate": 2.0097094595303766e-06, - "loss": 0.1725, - "step": 15748 - }, - { - "epoch": 0.8, - "grad_norm": 0.6841558682447949, - "learning_rate": 2.008719258157641e-06, - "loss": 0.1405, - "step": 15749 - }, - { - "epoch": 0.8, - "grad_norm": 1.4822552614993239, - "learning_rate": 2.0077292735488465e-06, - "loss": 0.1702, - "step": 15750 - }, - { - "epoch": 0.8, - "grad_norm": 0.872921064671971, - "learning_rate": 2.0067395057308436e-06, - "loss": 0.1593, - "step": 15751 - }, - { - "epoch": 0.8, - "grad_norm": 1.4210836201174981, - "learning_rate": 2.005749954730486e-06, - "loss": 0.1776, - "step": 15752 - }, - { - "epoch": 0.8, - "grad_norm": 1.0048219241176233, - "learning_rate": 2.0047606205746095e-06, - "loss": 0.1663, - "step": 15753 - }, - { - "epoch": 0.8, - "grad_norm": 2.1412716222867774, - "learning_rate": 2.0037715032900527e-06, - "loss": 0.1612, - "step": 15754 - }, - { - "epoch": 0.8, - "grad_norm": 1.0694424370520152, - "learning_rate": 2.002782602903647e-06, - "loss": 0.1815, - "step": 15755 - }, - { - "epoch": 0.8, - "grad_norm": 1.0157618690035999, - "learning_rate": 2.0017939194422153e-06, - "loss": 0.1572, - "step": 15756 - }, - { - "epoch": 0.8, - "grad_norm": 1.0320628363686533, - "learning_rate": 2.000805452932574e-06, - "loss": 0.16, - "step": 15757 - }, - { - "epoch": 0.8, - "grad_norm": 0.9266500851117392, - "learning_rate": 1.9998172034015382e-06, - "loss": 0.1642, - "step": 15758 - }, - { - "epoch": 0.8, - "grad_norm": 1.2331866837650636, - "learning_rate": 1.9988291708759112e-06, - "loss": 0.1876, - "step": 15759 - }, - { - "epoch": 0.8, - "grad_norm": 1.195498850218461, - "learning_rate": 1.997841355382497e-06, - "loss": 0.1913, - "step": 15760 - }, - { - "epoch": 0.8, - "grad_norm": 1.292371384434633, - "learning_rate": 1.996853756948085e-06, - "loss": 0.1433, - "step": 15761 - }, - { - "epoch": 0.8, - "grad_norm": 0.8263446800583641, - "learning_rate": 1.99586637559947e-06, - "loss": 0.1749, - "step": 15762 - }, - { - "epoch": 0.8, - "grad_norm": 0.9366195331034552, - "learning_rate": 1.9948792113634286e-06, - "loss": 0.1765, - "step": 15763 - }, - { - "epoch": 0.8, - "grad_norm": 0.9960600752360786, - "learning_rate": 1.9938922642667412e-06, - "loss": 0.1725, - "step": 15764 - }, - { - "epoch": 0.8, - "grad_norm": 0.813794636115855, - "learning_rate": 1.992905534336178e-06, - "loss": 0.1622, - "step": 15765 - }, - { - "epoch": 0.8, - "grad_norm": 1.0309233878437345, - "learning_rate": 1.9919190215985073e-06, - "loss": 0.1848, - "step": 15766 - }, - { - "epoch": 0.8, - "grad_norm": 1.122958722281624, - "learning_rate": 1.9909327260804857e-06, - "loss": 0.1691, - "step": 15767 - }, - { - "epoch": 0.8, - "grad_norm": 0.999950954499327, - "learning_rate": 1.9899466478088624e-06, - "loss": 0.172, - "step": 15768 - }, - { - "epoch": 0.8, - "grad_norm": 2.0435904214356926, - "learning_rate": 1.988960786810392e-06, - "loss": 0.1712, - "step": 15769 - }, - { - "epoch": 0.8, - "grad_norm": 1.0331564521315622, - "learning_rate": 1.98797514311181e-06, - "loss": 0.1449, - "step": 15770 - }, - { - "epoch": 0.8, - "grad_norm": 1.1759836053908204, - "learning_rate": 1.9869897167398576e-06, - "loss": 0.1635, - "step": 15771 - }, - { - "epoch": 0.8, - "grad_norm": 0.959731234772625, - "learning_rate": 1.9860045077212596e-06, - "loss": 0.1487, - "step": 15772 - }, - { - "epoch": 0.8, - "grad_norm": 1.0413370355997298, - "learning_rate": 1.9850195160827413e-06, - "loss": 0.1479, - "step": 15773 - }, - { - "epoch": 0.8, - "grad_norm": 1.0646452471085828, - "learning_rate": 1.984034741851022e-06, - "loss": 0.1884, - "step": 15774 - }, - { - "epoch": 0.8, - "grad_norm": 0.9575156754587291, - "learning_rate": 1.983050185052815e-06, - "loss": 0.1631, - "step": 15775 - }, - { - "epoch": 0.8, - "grad_norm": 1.0866603608279728, - "learning_rate": 1.982065845714821e-06, - "loss": 0.1566, - "step": 15776 - }, - { - "epoch": 0.8, - "grad_norm": 0.7649475499030911, - "learning_rate": 1.981081723863748e-06, - "loss": 0.1656, - "step": 15777 - }, - { - "epoch": 0.8, - "grad_norm": 1.4407044728748781, - "learning_rate": 1.9800978195262833e-06, - "loss": 0.1635, - "step": 15778 - }, - { - "epoch": 0.8, - "grad_norm": 1.2012243933420368, - "learning_rate": 1.9791141327291207e-06, - "loss": 0.1667, - "step": 15779 - }, - { - "epoch": 0.8, - "grad_norm": 0.7866904569914929, - "learning_rate": 1.9781306634989373e-06, - "loss": 0.1476, - "step": 15780 - }, - { - "epoch": 0.8, - "grad_norm": 1.4004951623252793, - "learning_rate": 1.9771474118624123e-06, - "loss": 0.1752, - "step": 15781 - }, - { - "epoch": 0.8, - "grad_norm": 1.1472073072957027, - "learning_rate": 1.976164377846218e-06, - "loss": 0.166, - "step": 15782 - }, - { - "epoch": 0.8, - "grad_norm": 0.9439095519046051, - "learning_rate": 1.97518156147702e-06, - "loss": 0.1813, - "step": 15783 - }, - { - "epoch": 0.8, - "grad_norm": 1.2137528617256548, - "learning_rate": 1.974198962781475e-06, - "loss": 0.1753, - "step": 15784 - }, - { - "epoch": 0.8, - "grad_norm": 0.939358251629492, - "learning_rate": 1.973216581786235e-06, - "loss": 0.1601, - "step": 15785 - }, - { - "epoch": 0.8, - "grad_norm": 1.0757297630588072, - "learning_rate": 1.972234418517951e-06, - "loss": 0.1591, - "step": 15786 - }, - { - "epoch": 0.8, - "grad_norm": 1.4805360150023117, - "learning_rate": 1.971252473003259e-06, - "loss": 0.1767, - "step": 15787 - }, - { - "epoch": 0.8, - "grad_norm": 1.0135733620995198, - "learning_rate": 1.9702707452687988e-06, - "loss": 0.14, - "step": 15788 - }, - { - "epoch": 0.8, - "grad_norm": 1.087953767363629, - "learning_rate": 1.9692892353411963e-06, - "loss": 0.1531, - "step": 15789 - }, - { - "epoch": 0.8, - "grad_norm": 1.67015170683124, - "learning_rate": 1.9683079432470774e-06, - "loss": 0.1513, - "step": 15790 - }, - { - "epoch": 0.8, - "grad_norm": 0.9810071214994488, - "learning_rate": 1.967326869013059e-06, - "loss": 0.1476, - "step": 15791 - }, - { - "epoch": 0.8, - "grad_norm": 1.0675283797681412, - "learning_rate": 1.966346012665754e-06, - "loss": 0.1575, - "step": 15792 - }, - { - "epoch": 0.8, - "grad_norm": 1.121737893971538, - "learning_rate": 1.965365374231766e-06, - "loss": 0.1748, - "step": 15793 - }, - { - "epoch": 0.8, - "grad_norm": 1.2965253386253484, - "learning_rate": 1.964384953737699e-06, - "loss": 0.1592, - "step": 15794 - }, - { - "epoch": 0.8, - "grad_norm": 0.8148539120516977, - "learning_rate": 1.9634047512101405e-06, - "loss": 0.182, - "step": 15795 - }, - { - "epoch": 0.8, - "grad_norm": 1.1682681957562062, - "learning_rate": 1.9624247666756844e-06, - "loss": 0.1777, - "step": 15796 - }, - { - "epoch": 0.8, - "grad_norm": 0.8664849282242465, - "learning_rate": 1.9614450001609085e-06, - "loss": 0.1577, - "step": 15797 - }, - { - "epoch": 0.8, - "grad_norm": 1.082203974334217, - "learning_rate": 1.960465451692394e-06, - "loss": 0.1579, - "step": 15798 - }, - { - "epoch": 0.8, - "grad_norm": 0.9962982984962556, - "learning_rate": 1.9594861212967055e-06, - "loss": 0.1612, - "step": 15799 - }, - { - "epoch": 0.8, - "grad_norm": 1.0910307551966145, - "learning_rate": 1.958507009000409e-06, - "loss": 0.1689, - "step": 15800 - }, - { - "epoch": 0.8, - "grad_norm": 0.846953543876322, - "learning_rate": 1.957528114830065e-06, - "loss": 0.1661, - "step": 15801 - }, - { - "epoch": 0.8, - "grad_norm": 1.0591308349877249, - "learning_rate": 1.9565494388122273e-06, - "loss": 0.1529, - "step": 15802 - }, - { - "epoch": 0.8, - "grad_norm": 1.0679178149141837, - "learning_rate": 1.95557098097344e-06, - "loss": 0.1597, - "step": 15803 - }, - { - "epoch": 0.8, - "grad_norm": 1.315835514316853, - "learning_rate": 1.9545927413402423e-06, - "loss": 0.1676, - "step": 15804 - }, - { - "epoch": 0.8, - "grad_norm": 0.9562754514141059, - "learning_rate": 1.953614719939173e-06, - "loss": 0.1579, - "step": 15805 - }, - { - "epoch": 0.8, - "grad_norm": 1.0302795637065507, - "learning_rate": 1.9526369167967575e-06, - "loss": 0.1626, - "step": 15806 - }, - { - "epoch": 0.8, - "grad_norm": 0.9573846588142638, - "learning_rate": 1.951659331939518e-06, - "loss": 0.1702, - "step": 15807 - }, - { - "epoch": 0.8, - "grad_norm": 1.5062813400680592, - "learning_rate": 1.9506819653939757e-06, - "loss": 0.1699, - "step": 15808 - }, - { - "epoch": 0.8, - "grad_norm": 1.898213287994842, - "learning_rate": 1.9497048171866417e-06, - "loss": 0.1678, - "step": 15809 - }, - { - "epoch": 0.8, - "grad_norm": 0.8511959910939554, - "learning_rate": 1.948727887344016e-06, - "loss": 0.155, - "step": 15810 - }, - { - "epoch": 0.8, - "grad_norm": 0.9784540549217013, - "learning_rate": 1.947751175892605e-06, - "loss": 0.1587, - "step": 15811 - }, - { - "epoch": 0.8, - "grad_norm": 0.9690023808596446, - "learning_rate": 1.946774682858894e-06, - "loss": 0.1456, - "step": 15812 - }, - { - "epoch": 0.8, - "grad_norm": 1.155590744200502, - "learning_rate": 1.9457984082693783e-06, - "loss": 0.1587, - "step": 15813 - }, - { - "epoch": 0.8, - "grad_norm": 1.1814968608821823, - "learning_rate": 1.9448223521505328e-06, - "loss": 0.1735, - "step": 15814 - }, - { - "epoch": 0.8, - "grad_norm": 1.0409770722575613, - "learning_rate": 1.9438465145288377e-06, - "loss": 0.1674, - "step": 15815 - }, - { - "epoch": 0.8, - "grad_norm": 1.612792088595669, - "learning_rate": 1.9428708954307595e-06, - "loss": 0.1713, - "step": 15816 - }, - { - "epoch": 0.8, - "grad_norm": 0.9555743960260334, - "learning_rate": 1.941895494882763e-06, - "loss": 0.1508, - "step": 15817 - }, - { - "epoch": 0.8, - "grad_norm": 1.4384926799369615, - "learning_rate": 1.940920312911306e-06, - "loss": 0.1838, - "step": 15818 - }, - { - "epoch": 0.8, - "grad_norm": 1.0606797339626892, - "learning_rate": 1.9399453495428434e-06, - "loss": 0.1627, - "step": 15819 - }, - { - "epoch": 0.8, - "grad_norm": 0.8377247178765734, - "learning_rate": 1.9389706048038183e-06, - "loss": 0.1799, - "step": 15820 - }, - { - "epoch": 0.8, - "grad_norm": 1.329021242357886, - "learning_rate": 1.9379960787206686e-06, - "loss": 0.1749, - "step": 15821 - }, - { - "epoch": 0.8, - "grad_norm": 1.2807704941709444, - "learning_rate": 1.9370217713198326e-06, - "loss": 0.1577, - "step": 15822 - }, - { - "epoch": 0.8, - "grad_norm": 1.2696927151281323, - "learning_rate": 1.936047682627734e-06, - "loss": 0.1498, - "step": 15823 - }, - { - "epoch": 0.8, - "grad_norm": 2.34286567311086, - "learning_rate": 1.9350738126707978e-06, - "loss": 0.1778, - "step": 15824 - }, - { - "epoch": 0.8, - "grad_norm": 1.102952604803538, - "learning_rate": 1.9341001614754407e-06, - "loss": 0.1737, - "step": 15825 - }, - { - "epoch": 0.8, - "grad_norm": 1.2905620541568963, - "learning_rate": 1.9331267290680744e-06, - "loss": 0.1787, - "step": 15826 - }, - { - "epoch": 0.8, - "grad_norm": 1.7752783882287622, - "learning_rate": 1.932153515475098e-06, - "loss": 0.1553, - "step": 15827 - }, - { - "epoch": 0.8, - "grad_norm": 1.389049775025545, - "learning_rate": 1.9311805207229162e-06, - "loss": 0.1576, - "step": 15828 - }, - { - "epoch": 0.8, - "grad_norm": 1.203796518234086, - "learning_rate": 1.9302077448379154e-06, - "loss": 0.1906, - "step": 15829 - }, - { - "epoch": 0.8, - "grad_norm": 0.7810701067378404, - "learning_rate": 1.929235187846488e-06, - "loss": 0.1539, - "step": 15830 - }, - { - "epoch": 0.81, - "grad_norm": 1.1441633726858527, - "learning_rate": 1.9282628497750098e-06, - "loss": 0.1673, - "step": 15831 - }, - { - "epoch": 0.81, - "grad_norm": 1.3399102124466988, - "learning_rate": 1.92729073064986e-06, - "loss": 0.194, - "step": 15832 - }, - { - "epoch": 0.81, - "grad_norm": 0.9996400920561656, - "learning_rate": 1.9263188304974023e-06, - "loss": 0.1648, - "step": 15833 - }, - { - "epoch": 0.81, - "grad_norm": 0.816567835157871, - "learning_rate": 1.9253471493440036e-06, - "loss": 0.1708, - "step": 15834 - }, - { - "epoch": 0.81, - "grad_norm": 0.8033366766000336, - "learning_rate": 1.924375687216018e-06, - "loss": 0.1448, - "step": 15835 - }, - { - "epoch": 0.81, - "grad_norm": 1.0534861733157275, - "learning_rate": 1.9234044441398016e-06, - "loss": 0.169, - "step": 15836 - }, - { - "epoch": 0.81, - "grad_norm": 0.8614388275474186, - "learning_rate": 1.922433420141695e-06, - "loss": 0.1566, - "step": 15837 - }, - { - "epoch": 0.81, - "grad_norm": 0.9897933960131479, - "learning_rate": 1.921462615248035e-06, - "loss": 0.1361, - "step": 15838 - }, - { - "epoch": 0.81, - "grad_norm": 1.0033910146002347, - "learning_rate": 1.9204920294851613e-06, - "loss": 0.168, - "step": 15839 - }, - { - "epoch": 0.81, - "grad_norm": 3.3643061597353467, - "learning_rate": 1.9195216628793956e-06, - "loss": 0.162, - "step": 15840 - }, - { - "epoch": 0.81, - "grad_norm": 1.3976734137083604, - "learning_rate": 1.9185515154570633e-06, - "loss": 0.18, - "step": 15841 - }, - { - "epoch": 0.81, - "grad_norm": 1.1903452435943236, - "learning_rate": 1.9175815872444748e-06, - "loss": 0.1673, - "step": 15842 - }, - { - "epoch": 0.81, - "grad_norm": 1.8597857795206585, - "learning_rate": 1.916611878267942e-06, - "loss": 0.1857, - "step": 15843 - }, - { - "epoch": 0.81, - "grad_norm": 1.2959628492227406, - "learning_rate": 1.915642388553769e-06, - "loss": 0.1528, - "step": 15844 - }, - { - "epoch": 0.81, - "grad_norm": 1.3997789619905479, - "learning_rate": 1.9146731181282554e-06, - "loss": 0.1762, - "step": 15845 - }, - { - "epoch": 0.81, - "grad_norm": 1.0091633763978645, - "learning_rate": 1.9137040670176878e-06, - "loss": 0.1719, - "step": 15846 - }, - { - "epoch": 0.81, - "grad_norm": 1.299180758820901, - "learning_rate": 1.9127352352483563e-06, - "loss": 0.1726, - "step": 15847 - }, - { - "epoch": 0.81, - "grad_norm": 0.9054257427731401, - "learning_rate": 1.911766622846536e-06, - "loss": 0.1776, - "step": 15848 - }, - { - "epoch": 0.81, - "grad_norm": 1.592958273371159, - "learning_rate": 1.9107982298385052e-06, - "loss": 0.1783, - "step": 15849 - }, - { - "epoch": 0.81, - "grad_norm": 1.15568910717821, - "learning_rate": 1.9098300562505266e-06, - "loss": 0.164, - "step": 15850 - }, - { - "epoch": 0.81, - "grad_norm": 1.2317553042477192, - "learning_rate": 1.908862102108865e-06, - "loss": 0.1848, - "step": 15851 - }, - { - "epoch": 0.81, - "grad_norm": 1.065679268129052, - "learning_rate": 1.9078943674397753e-06, - "loss": 0.1531, - "step": 15852 - }, - { - "epoch": 0.81, - "grad_norm": 0.8677385153275745, - "learning_rate": 1.9069268522695105e-06, - "loss": 0.1704, - "step": 15853 - }, - { - "epoch": 0.81, - "grad_norm": 1.015003102554297, - "learning_rate": 1.9059595566243127e-06, - "loss": 0.1577, - "step": 15854 - }, - { - "epoch": 0.81, - "grad_norm": 1.5993881937232388, - "learning_rate": 1.9049924805304165e-06, - "loss": 0.1687, - "step": 15855 - }, - { - "epoch": 0.81, - "grad_norm": 0.9648294924172559, - "learning_rate": 1.9040256240140587e-06, - "loss": 0.1761, - "step": 15856 - }, - { - "epoch": 0.81, - "grad_norm": 1.129006993288202, - "learning_rate": 1.9030589871014604e-06, - "loss": 0.1539, - "step": 15857 - }, - { - "epoch": 0.81, - "grad_norm": 1.0888111506199036, - "learning_rate": 1.9020925698188465e-06, - "loss": 0.1651, - "step": 15858 - }, - { - "epoch": 0.81, - "grad_norm": 1.05744231234916, - "learning_rate": 1.9011263721924277e-06, - "loss": 0.1602, - "step": 15859 - }, - { - "epoch": 0.81, - "grad_norm": 2.8526338413193164, - "learning_rate": 1.9001603942484127e-06, - "loss": 0.172, - "step": 15860 - }, - { - "epoch": 0.81, - "grad_norm": 1.3523800109758868, - "learning_rate": 1.8991946360130043e-06, - "loss": 0.1401, - "step": 15861 - }, - { - "epoch": 0.81, - "grad_norm": 1.515723016981549, - "learning_rate": 1.8982290975124019e-06, - "loss": 0.1701, - "step": 15862 - }, - { - "epoch": 0.81, - "grad_norm": 1.7267760543292379, - "learning_rate": 1.8972637787727898e-06, - "loss": 0.1779, - "step": 15863 - }, - { - "epoch": 0.81, - "grad_norm": 1.773154795440011, - "learning_rate": 1.8962986798203587e-06, - "loss": 0.1682, - "step": 15864 - }, - { - "epoch": 0.81, - "grad_norm": 1.0148911097830382, - "learning_rate": 1.8953338006812805e-06, - "loss": 0.1721, - "step": 15865 - }, - { - "epoch": 0.81, - "grad_norm": 1.122399862038846, - "learning_rate": 1.8943691413817334e-06, - "loss": 0.1736, - "step": 15866 - }, - { - "epoch": 0.81, - "grad_norm": 1.1388130015125641, - "learning_rate": 1.8934047019478785e-06, - "loss": 0.149, - "step": 15867 - }, - { - "epoch": 0.81, - "grad_norm": 1.1411525588589864, - "learning_rate": 1.8924404824058816e-06, - "loss": 0.1763, - "step": 15868 - }, - { - "epoch": 0.81, - "grad_norm": 2.338725253391659, - "learning_rate": 1.8914764827818921e-06, - "loss": 0.1514, - "step": 15869 - }, - { - "epoch": 0.81, - "grad_norm": 1.542120667256205, - "learning_rate": 1.8905127031020598e-06, - "loss": 0.1648, - "step": 15870 - }, - { - "epoch": 0.81, - "grad_norm": 0.957055240444867, - "learning_rate": 1.8895491433925328e-06, - "loss": 0.1638, - "step": 15871 - }, - { - "epoch": 0.81, - "grad_norm": 0.8326227194676635, - "learning_rate": 1.8885858036794401e-06, - "loss": 0.1607, - "step": 15872 - }, - { - "epoch": 0.81, - "grad_norm": 1.0202307760657778, - "learning_rate": 1.8876226839889177e-06, - "loss": 0.1709, - "step": 15873 - }, - { - "epoch": 0.81, - "grad_norm": 1.1402323985518499, - "learning_rate": 1.8866597843470858e-06, - "loss": 0.1642, - "step": 15874 - }, - { - "epoch": 0.81, - "grad_norm": 1.010208304292357, - "learning_rate": 1.8856971047800687e-06, - "loss": 0.1636, - "step": 15875 - }, - { - "epoch": 0.81, - "grad_norm": 26.547268409354285, - "learning_rate": 1.8847346453139726e-06, - "loss": 0.1706, - "step": 15876 - }, - { - "epoch": 0.81, - "grad_norm": 2.9852515941330995, - "learning_rate": 1.8837724059749074e-06, - "loss": 0.159, - "step": 15877 - }, - { - "epoch": 0.81, - "grad_norm": 1.0533560381335083, - "learning_rate": 1.8828103867889747e-06, - "loss": 0.1958, - "step": 15878 - }, - { - "epoch": 0.81, - "grad_norm": 1.3430404155874531, - "learning_rate": 1.8818485877822712e-06, - "loss": 0.1859, - "step": 15879 - }, - { - "epoch": 0.81, - "grad_norm": 0.8794215939911043, - "learning_rate": 1.8808870089808806e-06, - "loss": 0.1685, - "step": 15880 - }, - { - "epoch": 0.81, - "grad_norm": 1.037638249002718, - "learning_rate": 1.879925650410892e-06, - "loss": 0.1607, - "step": 15881 - }, - { - "epoch": 0.81, - "grad_norm": 0.8697582929524619, - "learning_rate": 1.8789645120983746e-06, - "loss": 0.1792, - "step": 15882 - }, - { - "epoch": 0.81, - "grad_norm": 0.8982466851373708, - "learning_rate": 1.8780035940694075e-06, - "loss": 0.1577, - "step": 15883 - }, - { - "epoch": 0.81, - "grad_norm": 1.061375290924336, - "learning_rate": 1.8770428963500475e-06, - "loss": 0.1484, - "step": 15884 - }, - { - "epoch": 0.81, - "grad_norm": 1.4971804168207266, - "learning_rate": 1.8760824189663618e-06, - "loss": 0.1775, - "step": 15885 - }, - { - "epoch": 0.81, - "grad_norm": 0.9099341373713296, - "learning_rate": 1.875122161944396e-06, - "loss": 0.1559, - "step": 15886 - }, - { - "epoch": 0.81, - "grad_norm": 1.0462905697176088, - "learning_rate": 1.8741621253102005e-06, - "loss": 0.1675, - "step": 15887 - }, - { - "epoch": 0.81, - "grad_norm": 0.7935935303208741, - "learning_rate": 1.8732023090898165e-06, - "loss": 0.1369, - "step": 15888 - }, - { - "epoch": 0.81, - "grad_norm": 0.9045384382704341, - "learning_rate": 1.8722427133092813e-06, - "loss": 0.173, - "step": 15889 - }, - { - "epoch": 0.81, - "grad_norm": 1.1845902438296145, - "learning_rate": 1.8712833379946217e-06, - "loss": 0.1821, - "step": 15890 - }, - { - "epoch": 0.81, - "grad_norm": 0.7466157451415563, - "learning_rate": 1.8703241831718578e-06, - "loss": 0.1655, - "step": 15891 - }, - { - "epoch": 0.81, - "grad_norm": 0.8891584234780563, - "learning_rate": 1.8693652488670121e-06, - "loss": 0.1685, - "step": 15892 - }, - { - "epoch": 0.81, - "grad_norm": 0.973723244657113, - "learning_rate": 1.868406535106091e-06, - "loss": 0.1593, - "step": 15893 - }, - { - "epoch": 0.81, - "grad_norm": 1.1918050991675817, - "learning_rate": 1.8674480419151041e-06, - "loss": 0.1669, - "step": 15894 - }, - { - "epoch": 0.81, - "grad_norm": 0.8854809913998016, - "learning_rate": 1.8664897693200456e-06, - "loss": 0.1583, - "step": 15895 - }, - { - "epoch": 0.81, - "grad_norm": 1.0964839487023017, - "learning_rate": 1.8655317173469122e-06, - "loss": 0.1854, - "step": 15896 - }, - { - "epoch": 0.81, - "grad_norm": 0.8412304856386966, - "learning_rate": 1.86457388602169e-06, - "loss": 0.1756, - "step": 15897 - }, - { - "epoch": 0.81, - "grad_norm": 3.5080930598938767, - "learning_rate": 1.8636162753703636e-06, - "loss": 0.1666, - "step": 15898 - }, - { - "epoch": 0.81, - "grad_norm": 1.05984641476992, - "learning_rate": 1.862658885418902e-06, - "loss": 0.1487, - "step": 15899 - }, - { - "epoch": 0.81, - "grad_norm": 1.0047321045727262, - "learning_rate": 1.8617017161932815e-06, - "loss": 0.1705, - "step": 15900 - }, - { - "epoch": 0.81, - "grad_norm": 0.9856203454509121, - "learning_rate": 1.8607447677194578e-06, - "loss": 0.1615, - "step": 15901 - }, - { - "epoch": 0.81, - "grad_norm": 1.7206410988649752, - "learning_rate": 1.8597880400233959e-06, - "loss": 0.1529, - "step": 15902 - }, - { - "epoch": 0.81, - "grad_norm": 1.1406816086087803, - "learning_rate": 1.8588315331310392e-06, - "loss": 0.1806, - "step": 15903 - }, - { - "epoch": 0.81, - "grad_norm": 1.4328388461061061, - "learning_rate": 1.8578752470683381e-06, - "loss": 0.1686, - "step": 15904 - }, - { - "epoch": 0.81, - "grad_norm": 1.15321917515873, - "learning_rate": 1.8569191818612298e-06, - "loss": 0.1626, - "step": 15905 - }, - { - "epoch": 0.81, - "grad_norm": 1.175305560323787, - "learning_rate": 1.8559633375356511e-06, - "loss": 0.1803, - "step": 15906 - }, - { - "epoch": 0.81, - "grad_norm": 1.026447711263733, - "learning_rate": 1.8550077141175282e-06, - "loss": 0.1706, - "step": 15907 - }, - { - "epoch": 0.81, - "grad_norm": 0.8598763378612653, - "learning_rate": 1.8540523116327769e-06, - "loss": 0.1587, - "step": 15908 - }, - { - "epoch": 0.81, - "grad_norm": 0.9602544825284762, - "learning_rate": 1.8530971301073208e-06, - "loss": 0.1769, - "step": 15909 - }, - { - "epoch": 0.81, - "grad_norm": 1.1276646643628279, - "learning_rate": 1.8521421695670617e-06, - "loss": 0.1819, - "step": 15910 - }, - { - "epoch": 0.81, - "grad_norm": 1.3784525456178116, - "learning_rate": 1.8511874300379095e-06, - "loss": 0.1564, - "step": 15911 - }, - { - "epoch": 0.81, - "grad_norm": 0.9188987056733893, - "learning_rate": 1.8502329115457551e-06, - "loss": 0.185, - "step": 15912 - }, - { - "epoch": 0.81, - "grad_norm": 0.8884175776293851, - "learning_rate": 1.8492786141164943e-06, - "loss": 0.1616, - "step": 15913 - }, - { - "epoch": 0.81, - "grad_norm": 0.864036007568502, - "learning_rate": 1.8483245377760106e-06, - "loss": 0.163, - "step": 15914 - }, - { - "epoch": 0.81, - "grad_norm": 1.175478431383708, - "learning_rate": 1.847370682550187e-06, - "loss": 0.1706, - "step": 15915 - }, - { - "epoch": 0.81, - "grad_norm": 0.8877435476808375, - "learning_rate": 1.8464170484648924e-06, - "loss": 0.155, - "step": 15916 - }, - { - "epoch": 0.81, - "grad_norm": 1.2440300952858039, - "learning_rate": 1.8454636355459977e-06, - "loss": 0.152, - "step": 15917 - }, - { - "epoch": 0.81, - "grad_norm": 1.0778220695867444, - "learning_rate": 1.8445104438193595e-06, - "loss": 0.1672, - "step": 15918 - }, - { - "epoch": 0.81, - "grad_norm": 1.217954161449602, - "learning_rate": 1.8435574733108397e-06, - "loss": 0.1745, - "step": 15919 - }, - { - "epoch": 0.81, - "grad_norm": 0.905783275261828, - "learning_rate": 1.8426047240462807e-06, - "loss": 0.157, - "step": 15920 - }, - { - "epoch": 0.81, - "grad_norm": 1.0235188483867, - "learning_rate": 1.84165219605153e-06, - "loss": 0.1628, - "step": 15921 - }, - { - "epoch": 0.81, - "grad_norm": 0.7664994271883586, - "learning_rate": 1.840699889352423e-06, - "loss": 0.165, - "step": 15922 - }, - { - "epoch": 0.81, - "grad_norm": 1.55969940166033, - "learning_rate": 1.8397478039747962e-06, - "loss": 0.1829, - "step": 15923 - }, - { - "epoch": 0.81, - "grad_norm": 0.9627591745734583, - "learning_rate": 1.83879593994447e-06, - "loss": 0.1737, - "step": 15924 - }, - { - "epoch": 0.81, - "grad_norm": 1.1817190829880238, - "learning_rate": 1.8378442972872635e-06, - "loss": 0.164, - "step": 15925 - }, - { - "epoch": 0.81, - "grad_norm": 1.2039858241344097, - "learning_rate": 1.8368928760289928e-06, - "loss": 0.1579, - "step": 15926 - }, - { - "epoch": 0.81, - "grad_norm": 0.9183717007030037, - "learning_rate": 1.8359416761954629e-06, - "loss": 0.1791, - "step": 15927 - }, - { - "epoch": 0.81, - "grad_norm": 1.0886857154910026, - "learning_rate": 1.8349906978124776e-06, - "loss": 0.1548, - "step": 15928 - }, - { - "epoch": 0.81, - "grad_norm": 1.0074054922942874, - "learning_rate": 1.8340399409058284e-06, - "loss": 0.1753, - "step": 15929 - }, - { - "epoch": 0.81, - "grad_norm": 1.2258662949395527, - "learning_rate": 1.833089405501307e-06, - "loss": 0.1464, - "step": 15930 - }, - { - "epoch": 0.81, - "grad_norm": 0.9872561371738571, - "learning_rate": 1.8321390916246961e-06, - "loss": 0.1626, - "step": 15931 - }, - { - "epoch": 0.81, - "grad_norm": 1.3703066745943606, - "learning_rate": 1.8311889993017772e-06, - "loss": 0.1834, - "step": 15932 - }, - { - "epoch": 0.81, - "grad_norm": 1.1405886391351094, - "learning_rate": 1.8302391285583153e-06, - "loss": 0.1906, - "step": 15933 - }, - { - "epoch": 0.81, - "grad_norm": 1.6044683790640693, - "learning_rate": 1.829289479420081e-06, - "loss": 0.152, - "step": 15934 - }, - { - "epoch": 0.81, - "grad_norm": 1.029769477626035, - "learning_rate": 1.828340051912828e-06, - "loss": 0.1515, - "step": 15935 - }, - { - "epoch": 0.81, - "grad_norm": 1.3723365923561242, - "learning_rate": 1.8273908460623157e-06, - "loss": 0.1474, - "step": 15936 - }, - { - "epoch": 0.81, - "grad_norm": 1.248137118999691, - "learning_rate": 1.8264418618942859e-06, - "loss": 0.1438, - "step": 15937 - }, - { - "epoch": 0.81, - "grad_norm": 1.280726460462142, - "learning_rate": 1.8254930994344845e-06, - "loss": 0.1709, - "step": 15938 - }, - { - "epoch": 0.81, - "grad_norm": 1.1191582109101215, - "learning_rate": 1.8245445587086419e-06, - "loss": 0.1723, - "step": 15939 - }, - { - "epoch": 0.81, - "grad_norm": 1.2899830299526782, - "learning_rate": 1.82359623974249e-06, - "loss": 0.193, - "step": 15940 - }, - { - "epoch": 0.81, - "grad_norm": 1.0323262769938613, - "learning_rate": 1.8226481425617549e-06, - "loss": 0.1539, - "step": 15941 - }, - { - "epoch": 0.81, - "grad_norm": 1.5728960046365268, - "learning_rate": 1.821700267192148e-06, - "loss": 0.1543, - "step": 15942 - }, - { - "epoch": 0.81, - "grad_norm": 0.888435356901117, - "learning_rate": 1.8207526136593857e-06, - "loss": 0.1849, - "step": 15943 - }, - { - "epoch": 0.81, - "grad_norm": 1.2068742130827914, - "learning_rate": 1.8198051819891672e-06, - "loss": 0.1574, - "step": 15944 - }, - { - "epoch": 0.81, - "grad_norm": 0.8943393072237987, - "learning_rate": 1.8188579722071985e-06, - "loss": 0.1575, - "step": 15945 - }, - { - "epoch": 0.81, - "grad_norm": 1.218001554855009, - "learning_rate": 1.8179109843391673e-06, - "loss": 0.1686, - "step": 15946 - }, - { - "epoch": 0.81, - "grad_norm": 1.0580225505444398, - "learning_rate": 1.8169642184107628e-06, - "loss": 0.1771, - "step": 15947 - }, - { - "epoch": 0.81, - "grad_norm": 0.9884038518855041, - "learning_rate": 1.8160176744476654e-06, - "loss": 0.1588, - "step": 15948 - }, - { - "epoch": 0.81, - "grad_norm": 1.0281074804685377, - "learning_rate": 1.8150713524755536e-06, - "loss": 0.163, - "step": 15949 - }, - { - "epoch": 0.81, - "grad_norm": 0.8649855464767741, - "learning_rate": 1.8141252525200914e-06, - "loss": 0.1706, - "step": 15950 - }, - { - "epoch": 0.81, - "grad_norm": 1.5950583604159652, - "learning_rate": 1.813179374606946e-06, - "loss": 0.1543, - "step": 15951 - }, - { - "epoch": 0.81, - "grad_norm": 1.2086613904361754, - "learning_rate": 1.81223371876177e-06, - "loss": 0.1489, - "step": 15952 - }, - { - "epoch": 0.81, - "grad_norm": 2.7095853537929204, - "learning_rate": 1.8112882850102198e-06, - "loss": 0.1523, - "step": 15953 - }, - { - "epoch": 0.81, - "grad_norm": 0.9331269166408543, - "learning_rate": 1.8103430733779348e-06, - "loss": 0.1722, - "step": 15954 - }, - { - "epoch": 0.81, - "grad_norm": 0.8453545466230363, - "learning_rate": 1.809398083890559e-06, - "loss": 0.1549, - "step": 15955 - }, - { - "epoch": 0.81, - "grad_norm": 0.7681451063904245, - "learning_rate": 1.8084533165737195e-06, - "loss": 0.1457, - "step": 15956 - }, - { - "epoch": 0.81, - "grad_norm": 0.9178846288166791, - "learning_rate": 1.807508771453047e-06, - "loss": 0.1501, - "step": 15957 - }, - { - "epoch": 0.81, - "grad_norm": 1.0385827144853221, - "learning_rate": 1.8065644485541622e-06, - "loss": 0.1582, - "step": 15958 - }, - { - "epoch": 0.81, - "grad_norm": 0.8697295581069294, - "learning_rate": 1.8056203479026812e-06, - "loss": 0.1473, - "step": 15959 - }, - { - "epoch": 0.81, - "grad_norm": 1.2051188420799237, - "learning_rate": 1.8046764695242115e-06, - "loss": 0.1587, - "step": 15960 - }, - { - "epoch": 0.81, - "grad_norm": 1.0327908075621928, - "learning_rate": 1.8037328134443532e-06, - "loss": 0.175, - "step": 15961 - }, - { - "epoch": 0.81, - "grad_norm": 0.9823285672773258, - "learning_rate": 1.8027893796887075e-06, - "loss": 0.1754, - "step": 15962 - }, - { - "epoch": 0.81, - "grad_norm": 0.7991140537880537, - "learning_rate": 1.8018461682828603e-06, - "loss": 0.1396, - "step": 15963 - }, - { - "epoch": 0.81, - "grad_norm": 1.347432924466073, - "learning_rate": 1.8009031792524012e-06, - "loss": 0.1891, - "step": 15964 - }, - { - "epoch": 0.81, - "grad_norm": 1.6531415450426012, - "learning_rate": 1.7999604126229043e-06, - "loss": 0.1518, - "step": 15965 - }, - { - "epoch": 0.81, - "grad_norm": 1.0830412428308247, - "learning_rate": 1.7990178684199444e-06, - "loss": 0.1662, - "step": 15966 - }, - { - "epoch": 0.81, - "grad_norm": 0.9304754757162109, - "learning_rate": 1.798075546669088e-06, - "loss": 0.1515, - "step": 15967 - }, - { - "epoch": 0.81, - "grad_norm": 1.7648335000274984, - "learning_rate": 1.797133447395898e-06, - "loss": 0.1643, - "step": 15968 - }, - { - "epoch": 0.81, - "grad_norm": 1.5204642842384637, - "learning_rate": 1.7961915706259236e-06, - "loss": 0.1595, - "step": 15969 - }, - { - "epoch": 0.81, - "grad_norm": 1.230914612772511, - "learning_rate": 1.7952499163847192e-06, - "loss": 0.1545, - "step": 15970 - }, - { - "epoch": 0.81, - "grad_norm": 1.048495018382882, - "learning_rate": 1.794308484697822e-06, - "loss": 0.1423, - "step": 15971 - }, - { - "epoch": 0.81, - "grad_norm": 1.19952605245712, - "learning_rate": 1.7933672755907727e-06, - "loss": 0.1515, - "step": 15972 - }, - { - "epoch": 0.81, - "grad_norm": 1.2280860890863525, - "learning_rate": 1.7924262890890964e-06, - "loss": 0.1407, - "step": 15973 - }, - { - "epoch": 0.81, - "grad_norm": 1.0566678871977504, - "learning_rate": 1.7914855252183217e-06, - "loss": 0.193, - "step": 15974 - }, - { - "epoch": 0.81, - "grad_norm": 1.2322299380808484, - "learning_rate": 1.7905449840039645e-06, - "loss": 0.1444, - "step": 15975 - }, - { - "epoch": 0.81, - "grad_norm": 1.1100249588562576, - "learning_rate": 1.7896046654715427e-06, - "loss": 0.1737, - "step": 15976 - }, - { - "epoch": 0.81, - "grad_norm": 1.1424919104979556, - "learning_rate": 1.7886645696465566e-06, - "loss": 0.1688, - "step": 15977 - }, - { - "epoch": 0.81, - "grad_norm": 0.9677874819880176, - "learning_rate": 1.787724696554506e-06, - "loss": 0.1354, - "step": 15978 - }, - { - "epoch": 0.81, - "grad_norm": 1.060213074988549, - "learning_rate": 1.7867850462208892e-06, - "loss": 0.1783, - "step": 15979 - }, - { - "epoch": 0.81, - "grad_norm": 0.8748895283896404, - "learning_rate": 1.785845618671188e-06, - "loss": 0.1462, - "step": 15980 - }, - { - "epoch": 0.81, - "grad_norm": 1.25618502996236, - "learning_rate": 1.7849064139308925e-06, - "loss": 0.1594, - "step": 15981 - }, - { - "epoch": 0.81, - "grad_norm": 1.127720954067132, - "learning_rate": 1.7839674320254718e-06, - "loss": 0.1536, - "step": 15982 - }, - { - "epoch": 0.81, - "grad_norm": 1.0095621148613478, - "learning_rate": 1.783028672980398e-06, - "loss": 0.1686, - "step": 15983 - }, - { - "epoch": 0.81, - "grad_norm": 1.0674805735190684, - "learning_rate": 1.7820901368211362e-06, - "loss": 0.1575, - "step": 15984 - }, - { - "epoch": 0.81, - "grad_norm": 0.9566168211011928, - "learning_rate": 1.7811518235731461e-06, - "loss": 0.1596, - "step": 15985 - }, - { - "epoch": 0.81, - "grad_norm": 1.3033046485908206, - "learning_rate": 1.780213733261874e-06, - "loss": 0.1595, - "step": 15986 - }, - { - "epoch": 0.81, - "grad_norm": 0.9332843058050407, - "learning_rate": 1.7792758659127706e-06, - "loss": 0.172, - "step": 15987 - }, - { - "epoch": 0.81, - "grad_norm": 1.0017301526569287, - "learning_rate": 1.7783382215512724e-06, - "loss": 0.1769, - "step": 15988 - }, - { - "epoch": 0.81, - "grad_norm": 1.2771970862900306, - "learning_rate": 1.7774008002028164e-06, - "loss": 0.1475, - "step": 15989 - }, - { - "epoch": 0.81, - "grad_norm": 0.9754875288852934, - "learning_rate": 1.7764636018928249e-06, - "loss": 0.1619, - "step": 15990 - }, - { - "epoch": 0.81, - "grad_norm": 2.4876323017582416, - "learning_rate": 1.7755266266467264e-06, - "loss": 0.1752, - "step": 15991 - }, - { - "epoch": 0.81, - "grad_norm": 1.1379979426252034, - "learning_rate": 1.7745898744899292e-06, - "loss": 0.162, - "step": 15992 - }, - { - "epoch": 0.81, - "grad_norm": 0.8945533397316004, - "learning_rate": 1.7736533454478466e-06, - "loss": 0.166, - "step": 15993 - }, - { - "epoch": 0.81, - "grad_norm": 2.474577884249597, - "learning_rate": 1.7727170395458838e-06, - "loss": 0.1563, - "step": 15994 - }, - { - "epoch": 0.81, - "grad_norm": 1.1805580502190653, - "learning_rate": 1.7717809568094334e-06, - "loss": 0.1552, - "step": 15995 - }, - { - "epoch": 0.81, - "grad_norm": 0.8905053860600621, - "learning_rate": 1.7708450972638923e-06, - "loss": 0.1592, - "step": 15996 - }, - { - "epoch": 0.81, - "grad_norm": 1.1618938816030313, - "learning_rate": 1.76990946093464e-06, - "loss": 0.1844, - "step": 15997 - }, - { - "epoch": 0.81, - "grad_norm": 2.2644217815426884, - "learning_rate": 1.7689740478470608e-06, - "loss": 0.1661, - "step": 15998 - }, - { - "epoch": 0.81, - "grad_norm": 0.9554434364928839, - "learning_rate": 1.768038858026523e-06, - "loss": 0.163, - "step": 15999 - }, - { - "epoch": 0.81, - "grad_norm": 1.1010955555027215, - "learning_rate": 1.7671038914983963e-06, - "loss": 0.1675, - "step": 16000 - }, - { - "epoch": 0.81, - "grad_norm": 0.8927567601335374, - "learning_rate": 1.7661691482880416e-06, - "loss": 0.1633, - "step": 16001 - }, - { - "epoch": 0.81, - "grad_norm": 1.202918742731831, - "learning_rate": 1.7652346284208167e-06, - "loss": 0.1921, - "step": 16002 - }, - { - "epoch": 0.81, - "grad_norm": 1.2242544638925155, - "learning_rate": 1.7643003319220642e-06, - "loss": 0.1741, - "step": 16003 - }, - { - "epoch": 0.81, - "grad_norm": 0.8326302787576414, - "learning_rate": 1.763366258817133e-06, - "loss": 0.1455, - "step": 16004 - }, - { - "epoch": 0.81, - "grad_norm": 1.3261346371041038, - "learning_rate": 1.762432409131355e-06, - "loss": 0.1838, - "step": 16005 - }, - { - "epoch": 0.81, - "grad_norm": 1.3388318854708003, - "learning_rate": 1.7614987828900654e-06, - "loss": 0.1504, - "step": 16006 - }, - { - "epoch": 0.81, - "grad_norm": 1.1517921068209622, - "learning_rate": 1.760565380118584e-06, - "loss": 0.1813, - "step": 16007 - }, - { - "epoch": 0.81, - "grad_norm": 0.9223647551231623, - "learning_rate": 1.7596322008422351e-06, - "loss": 0.1501, - "step": 16008 - }, - { - "epoch": 0.81, - "grad_norm": 1.0249862314487033, - "learning_rate": 1.7586992450863261e-06, - "loss": 0.1602, - "step": 16009 - }, - { - "epoch": 0.81, - "grad_norm": 1.3558351653151626, - "learning_rate": 1.7577665128761645e-06, - "loss": 0.1563, - "step": 16010 - }, - { - "epoch": 0.81, - "grad_norm": 1.0748815541391956, - "learning_rate": 1.7568340042370546e-06, - "loss": 0.1664, - "step": 16011 - }, - { - "epoch": 0.81, - "grad_norm": 1.0504259349670237, - "learning_rate": 1.755901719194285e-06, - "loss": 0.1619, - "step": 16012 - }, - { - "epoch": 0.81, - "grad_norm": 1.0688802098767785, - "learning_rate": 1.7549696577731502e-06, - "loss": 0.1584, - "step": 16013 - }, - { - "epoch": 0.81, - "grad_norm": 1.414884983836496, - "learning_rate": 1.754037819998926e-06, - "loss": 0.1652, - "step": 16014 - }, - { - "epoch": 0.81, - "grad_norm": 1.3920470329979084, - "learning_rate": 1.753106205896895e-06, - "loss": 0.1533, - "step": 16015 - }, - { - "epoch": 0.81, - "grad_norm": 1.166883450478275, - "learning_rate": 1.75217481549232e-06, - "loss": 0.1838, - "step": 16016 - }, - { - "epoch": 0.81, - "grad_norm": 1.0275778044647248, - "learning_rate": 1.7512436488104723e-06, - "loss": 0.1607, - "step": 16017 - }, - { - "epoch": 0.81, - "grad_norm": 1.5038447361122222, - "learning_rate": 1.7503127058766046e-06, - "loss": 0.1896, - "step": 16018 - }, - { - "epoch": 0.81, - "grad_norm": 0.9306661523910239, - "learning_rate": 1.74938198671597e-06, - "loss": 0.1754, - "step": 16019 - }, - { - "epoch": 0.81, - "grad_norm": 1.4884420143779518, - "learning_rate": 1.7484514913538154e-06, - "loss": 0.1673, - "step": 16020 - }, - { - "epoch": 0.81, - "grad_norm": 0.9758314584022224, - "learning_rate": 1.7475212198153823e-06, - "loss": 0.1535, - "step": 16021 - }, - { - "epoch": 0.81, - "grad_norm": 1.2001369707687648, - "learning_rate": 1.7465911721259e-06, - "loss": 0.178, - "step": 16022 - }, - { - "epoch": 0.81, - "grad_norm": 1.2680078302782236, - "learning_rate": 1.7456613483106e-06, - "loss": 0.1624, - "step": 16023 - }, - { - "epoch": 0.81, - "grad_norm": 0.9690828344942196, - "learning_rate": 1.7447317483947002e-06, - "loss": 0.1319, - "step": 16024 - }, - { - "epoch": 0.81, - "grad_norm": 1.2521543439308118, - "learning_rate": 1.7438023724034215e-06, - "loss": 0.1739, - "step": 16025 - }, - { - "epoch": 0.81, - "grad_norm": 0.8606284544479783, - "learning_rate": 1.7428732203619659e-06, - "loss": 0.1684, - "step": 16026 - }, - { - "epoch": 0.82, - "grad_norm": 1.2639439074812617, - "learning_rate": 1.741944292295541e-06, - "loss": 0.2058, - "step": 16027 - }, - { - "epoch": 0.82, - "grad_norm": 0.9448353201257036, - "learning_rate": 1.7410155882293434e-06, - "loss": 0.148, - "step": 16028 - }, - { - "epoch": 0.82, - "grad_norm": 1.6066023830778167, - "learning_rate": 1.7400871081885672e-06, - "loss": 0.1708, - "step": 16029 - }, - { - "epoch": 0.82, - "grad_norm": 2.043947156009396, - "learning_rate": 1.7391588521983948e-06, - "loss": 0.162, - "step": 16030 - }, - { - "epoch": 0.82, - "grad_norm": 1.0654314480734575, - "learning_rate": 1.7382308202840027e-06, - "loss": 0.1717, - "step": 16031 - }, - { - "epoch": 0.82, - "grad_norm": 0.9800410320580712, - "learning_rate": 1.737303012470568e-06, - "loss": 0.1872, - "step": 16032 - }, - { - "epoch": 0.82, - "grad_norm": 1.0186507793410637, - "learning_rate": 1.7363754287832535e-06, - "loss": 0.174, - "step": 16033 - }, - { - "epoch": 0.82, - "grad_norm": 1.2723809059140396, - "learning_rate": 1.7354480692472253e-06, - "loss": 0.1666, - "step": 16034 - }, - { - "epoch": 0.82, - "grad_norm": 0.9192403831092457, - "learning_rate": 1.7345209338876324e-06, - "loss": 0.1606, - "step": 16035 - }, - { - "epoch": 0.82, - "grad_norm": 0.8880642758230413, - "learning_rate": 1.7335940227296254e-06, - "loss": 0.1649, - "step": 16036 - }, - { - "epoch": 0.82, - "grad_norm": 1.191038489713937, - "learning_rate": 1.7326673357983482e-06, - "loss": 0.1748, - "step": 16037 - }, - { - "epoch": 0.82, - "grad_norm": 1.0796781843628873, - "learning_rate": 1.7317408731189378e-06, - "loss": 0.1598, - "step": 16038 - }, - { - "epoch": 0.82, - "grad_norm": 1.8984590128115462, - "learning_rate": 1.7308146347165212e-06, - "loss": 0.1815, - "step": 16039 - }, - { - "epoch": 0.82, - "grad_norm": 0.9255649584515351, - "learning_rate": 1.729888620616228e-06, - "loss": 0.1813, - "step": 16040 - }, - { - "epoch": 0.82, - "grad_norm": 1.1961173565730827, - "learning_rate": 1.7289628308431694e-06, - "loss": 0.1753, - "step": 16041 - }, - { - "epoch": 0.82, - "grad_norm": 1.193927705094369, - "learning_rate": 1.7280372654224642e-06, - "loss": 0.1873, - "step": 16042 - }, - { - "epoch": 0.82, - "grad_norm": 0.8192238004138188, - "learning_rate": 1.7271119243792135e-06, - "loss": 0.1591, - "step": 16043 - }, - { - "epoch": 0.82, - "grad_norm": 0.964331006140522, - "learning_rate": 1.726186807738518e-06, - "loss": 0.1526, - "step": 16044 - }, - { - "epoch": 0.82, - "grad_norm": 1.8146540744504316, - "learning_rate": 1.7252619155254734e-06, - "loss": 0.1507, - "step": 16045 - }, - { - "epoch": 0.82, - "grad_norm": 0.8120205092019961, - "learning_rate": 1.7243372477651688e-06, - "loss": 0.1432, - "step": 16046 - }, - { - "epoch": 0.82, - "grad_norm": 0.9761030170080665, - "learning_rate": 1.7234128044826836e-06, - "loss": 0.1664, - "step": 16047 - }, - { - "epoch": 0.82, - "grad_norm": 1.1021142028744921, - "learning_rate": 1.7224885857030916e-06, - "loss": 0.1598, - "step": 16048 - }, - { - "epoch": 0.82, - "grad_norm": 1.8608872145849567, - "learning_rate": 1.7215645914514668e-06, - "loss": 0.1622, - "step": 16049 - }, - { - "epoch": 0.82, - "grad_norm": 1.107725886336896, - "learning_rate": 1.7206408217528669e-06, - "loss": 0.1642, - "step": 16050 - }, - { - "epoch": 0.82, - "grad_norm": 1.0829423681796864, - "learning_rate": 1.7197172766323556e-06, - "loss": 0.1549, - "step": 16051 - }, - { - "epoch": 0.82, - "grad_norm": 1.2654030905626539, - "learning_rate": 1.718793956114978e-06, - "loss": 0.1551, - "step": 16052 - }, - { - "epoch": 0.82, - "grad_norm": 1.7045139028307472, - "learning_rate": 1.717870860225782e-06, - "loss": 0.1527, - "step": 16053 - }, - { - "epoch": 0.82, - "grad_norm": 1.109320034642993, - "learning_rate": 1.7169479889898065e-06, - "loss": 0.175, - "step": 16054 - }, - { - "epoch": 0.82, - "grad_norm": 1.471551674901941, - "learning_rate": 1.7160253424320872e-06, - "loss": 0.1926, - "step": 16055 - }, - { - "epoch": 0.82, - "grad_norm": 1.8560448881427307, - "learning_rate": 1.7151029205776459e-06, - "loss": 0.181, - "step": 16056 - }, - { - "epoch": 0.82, - "grad_norm": 1.2365487452671833, - "learning_rate": 1.7141807234515085e-06, - "loss": 0.1671, - "step": 16057 - }, - { - "epoch": 0.82, - "grad_norm": 0.8256655601366389, - "learning_rate": 1.7132587510786846e-06, - "loss": 0.1469, - "step": 16058 - }, - { - "epoch": 0.82, - "grad_norm": 1.1887788220025923, - "learning_rate": 1.7123370034841869e-06, - "loss": 0.1688, - "step": 16059 - }, - { - "epoch": 0.82, - "grad_norm": 1.3846537570490054, - "learning_rate": 1.7114154806930138e-06, - "loss": 0.1707, - "step": 16060 - }, - { - "epoch": 0.82, - "grad_norm": 1.0174836563541019, - "learning_rate": 1.7104941827301668e-06, - "loss": 0.1723, - "step": 16061 - }, - { - "epoch": 0.82, - "grad_norm": 1.233727975926785, - "learning_rate": 1.7095731096206313e-06, - "loss": 0.182, - "step": 16062 - }, - { - "epoch": 0.82, - "grad_norm": 0.9338202871197535, - "learning_rate": 1.7086522613893918e-06, - "loss": 0.1656, - "step": 16063 - }, - { - "epoch": 0.82, - "grad_norm": 1.5330592853721912, - "learning_rate": 1.7077316380614317e-06, - "loss": 0.1568, - "step": 16064 - }, - { - "epoch": 0.82, - "grad_norm": 1.663600549880783, - "learning_rate": 1.7068112396617164e-06, - "loss": 0.1489, - "step": 16065 - }, - { - "epoch": 0.82, - "grad_norm": 1.2309622613220172, - "learning_rate": 1.7058910662152173e-06, - "loss": 0.1612, - "step": 16066 - }, - { - "epoch": 0.82, - "grad_norm": 1.2666130678712084, - "learning_rate": 1.7049711177468896e-06, - "loss": 0.1541, - "step": 16067 - }, - { - "epoch": 0.82, - "grad_norm": 0.9627847158187706, - "learning_rate": 1.7040513942816905e-06, - "loss": 0.1571, - "step": 16068 - }, - { - "epoch": 0.82, - "grad_norm": 2.041748171121705, - "learning_rate": 1.703131895844563e-06, - "loss": 0.1844, - "step": 16069 - }, - { - "epoch": 0.82, - "grad_norm": 1.0538437416835476, - "learning_rate": 1.7022126224604529e-06, - "loss": 0.1931, - "step": 16070 - }, - { - "epoch": 0.82, - "grad_norm": 0.8196593035973092, - "learning_rate": 1.7012935741542925e-06, - "loss": 0.1474, - "step": 16071 - }, - { - "epoch": 0.82, - "grad_norm": 1.5544017080113712, - "learning_rate": 1.700374750951016e-06, - "loss": 0.1739, - "step": 16072 - }, - { - "epoch": 0.82, - "grad_norm": 1.6594564457538314, - "learning_rate": 1.6994561528755404e-06, - "loss": 0.1629, - "step": 16073 - }, - { - "epoch": 0.82, - "grad_norm": 1.716203363591545, - "learning_rate": 1.698537779952788e-06, - "loss": 0.1574, - "step": 16074 - }, - { - "epoch": 0.82, - "grad_norm": 2.4843481543008163, - "learning_rate": 1.6976196322076655e-06, - "loss": 0.1672, - "step": 16075 - }, - { - "epoch": 0.82, - "grad_norm": 1.2056535835474138, - "learning_rate": 1.6967017096650807e-06, - "loss": 0.1832, - "step": 16076 - }, - { - "epoch": 0.82, - "grad_norm": 0.9487721823636202, - "learning_rate": 1.6957840123499292e-06, - "loss": 0.1626, - "step": 16077 - }, - { - "epoch": 0.82, - "grad_norm": 0.8886952662815739, - "learning_rate": 1.6948665402871067e-06, - "loss": 0.1518, - "step": 16078 - }, - { - "epoch": 0.82, - "grad_norm": 1.4301649119446862, - "learning_rate": 1.6939492935014966e-06, - "loss": 0.1574, - "step": 16079 - }, - { - "epoch": 0.82, - "grad_norm": 1.0618141135360368, - "learning_rate": 1.6930322720179816e-06, - "loss": 0.1476, - "step": 16080 - }, - { - "epoch": 0.82, - "grad_norm": 1.076064486726175, - "learning_rate": 1.6921154758614378e-06, - "loss": 0.164, - "step": 16081 - }, - { - "epoch": 0.82, - "grad_norm": 1.3984045430593135, - "learning_rate": 1.6911989050567279e-06, - "loss": 0.1682, - "step": 16082 - }, - { - "epoch": 0.82, - "grad_norm": 0.9973740840239897, - "learning_rate": 1.6902825596287198e-06, - "loss": 0.1669, - "step": 16083 - }, - { - "epoch": 0.82, - "grad_norm": 1.190269199690928, - "learning_rate": 1.6893664396022646e-06, - "loss": 0.1423, - "step": 16084 - }, - { - "epoch": 0.82, - "grad_norm": 1.473604726310278, - "learning_rate": 1.688450545002216e-06, - "loss": 0.1795, - "step": 16085 - }, - { - "epoch": 0.82, - "grad_norm": 1.0436960291378419, - "learning_rate": 1.6875348758534127e-06, - "loss": 0.1685, - "step": 16086 - }, - { - "epoch": 0.82, - "grad_norm": 1.3857159745313614, - "learning_rate": 1.6866194321806984e-06, - "loss": 0.1651, - "step": 16087 - }, - { - "epoch": 0.82, - "grad_norm": 0.8033380877779788, - "learning_rate": 1.6857042140088996e-06, - "loss": 0.1655, - "step": 16088 - }, - { - "epoch": 0.82, - "grad_norm": 0.9083806621123651, - "learning_rate": 1.6847892213628436e-06, - "loss": 0.1639, - "step": 16089 - }, - { - "epoch": 0.82, - "grad_norm": 1.0781586943612742, - "learning_rate": 1.6838744542673492e-06, - "loss": 0.1603, - "step": 16090 - }, - { - "epoch": 0.82, - "grad_norm": 0.9771201956290885, - "learning_rate": 1.6829599127472318e-06, - "loss": 0.1743, - "step": 16091 - }, - { - "epoch": 0.82, - "grad_norm": 1.060528233189165, - "learning_rate": 1.6820455968272953e-06, - "loss": 0.1684, - "step": 16092 - }, - { - "epoch": 0.82, - "grad_norm": 0.8894300227873709, - "learning_rate": 1.681131506532343e-06, - "loss": 0.1536, - "step": 16093 - }, - { - "epoch": 0.82, - "grad_norm": 1.3933629571467805, - "learning_rate": 1.6802176418871664e-06, - "loss": 0.1716, - "step": 16094 - }, - { - "epoch": 0.82, - "grad_norm": 0.9816041566511617, - "learning_rate": 1.6793040029165596e-06, - "loss": 0.1823, - "step": 16095 - }, - { - "epoch": 0.82, - "grad_norm": 1.2379254151826307, - "learning_rate": 1.6783905896452978e-06, - "loss": 0.1749, - "step": 16096 - }, - { - "epoch": 0.82, - "grad_norm": 1.0753028944850174, - "learning_rate": 1.677477402098162e-06, - "loss": 0.1633, - "step": 16097 - }, - { - "epoch": 0.82, - "grad_norm": 0.9176788598790777, - "learning_rate": 1.6765644402999216e-06, - "loss": 0.1544, - "step": 16098 - }, - { - "epoch": 0.82, - "grad_norm": 1.2709621204145147, - "learning_rate": 1.675651704275344e-06, - "loss": 0.1679, - "step": 16099 - }, - { - "epoch": 0.82, - "grad_norm": 1.1446384656419066, - "learning_rate": 1.674739194049183e-06, - "loss": 0.1589, - "step": 16100 - }, - { - "epoch": 0.82, - "grad_norm": 1.02180679059322, - "learning_rate": 1.673826909646189e-06, - "loss": 0.1534, - "step": 16101 - }, - { - "epoch": 0.82, - "grad_norm": 1.2679799246233314, - "learning_rate": 1.6729148510911142e-06, - "loss": 0.1636, - "step": 16102 - }, - { - "epoch": 0.82, - "grad_norm": 0.9948990950616705, - "learning_rate": 1.672003018408691e-06, - "loss": 0.1496, - "step": 16103 - }, - { - "epoch": 0.82, - "grad_norm": 0.900105132623221, - "learning_rate": 1.6710914116236588e-06, - "loss": 0.1585, - "step": 16104 - }, - { - "epoch": 0.82, - "grad_norm": 0.9949865804409496, - "learning_rate": 1.6701800307607397e-06, - "loss": 0.1584, - "step": 16105 - }, - { - "epoch": 0.82, - "grad_norm": 1.156298996343876, - "learning_rate": 1.6692688758446574e-06, - "loss": 0.174, - "step": 16106 - }, - { - "epoch": 0.82, - "grad_norm": 1.9389002868080762, - "learning_rate": 1.6683579469001287e-06, - "loss": 0.1793, - "step": 16107 - }, - { - "epoch": 0.82, - "grad_norm": 0.9776814651404512, - "learning_rate": 1.6674472439518629e-06, - "loss": 0.1497, - "step": 16108 - }, - { - "epoch": 0.82, - "grad_norm": 1.0709974373621516, - "learning_rate": 1.6665367670245592e-06, - "loss": 0.1841, - "step": 16109 - }, - { - "epoch": 0.82, - "grad_norm": 0.8930890109533807, - "learning_rate": 1.6656265161429186e-06, - "loss": 0.1682, - "step": 16110 - }, - { - "epoch": 0.82, - "grad_norm": 0.8296255055125633, - "learning_rate": 1.664716491331626e-06, - "loss": 0.1513, - "step": 16111 - }, - { - "epoch": 0.82, - "grad_norm": 0.9478955423947181, - "learning_rate": 1.6638066926153728e-06, - "loss": 0.1401, - "step": 16112 - }, - { - "epoch": 0.82, - "grad_norm": 0.9566375845034522, - "learning_rate": 1.6628971200188316e-06, - "loss": 0.1619, - "step": 16113 - }, - { - "epoch": 0.82, - "grad_norm": 0.7919743312019453, - "learning_rate": 1.66198777356668e-06, - "loss": 0.1513, - "step": 16114 - }, - { - "epoch": 0.82, - "grad_norm": 1.0115909183001128, - "learning_rate": 1.6610786532835776e-06, - "loss": 0.1706, - "step": 16115 - }, - { - "epoch": 0.82, - "grad_norm": 0.92772352473913, - "learning_rate": 1.660169759194188e-06, - "loss": 0.1635, - "step": 16116 - }, - { - "epoch": 0.82, - "grad_norm": 1.1785822029161883, - "learning_rate": 1.6592610913231665e-06, - "loss": 0.1683, - "step": 16117 - }, - { - "epoch": 0.82, - "grad_norm": 0.9009097664645848, - "learning_rate": 1.6583526496951573e-06, - "loss": 0.1604, - "step": 16118 - }, - { - "epoch": 0.82, - "grad_norm": 1.8481746888363388, - "learning_rate": 1.657444434334805e-06, - "loss": 0.1726, - "step": 16119 - }, - { - "epoch": 0.82, - "grad_norm": 1.0707301830080818, - "learning_rate": 1.656536445266742e-06, - "loss": 0.185, - "step": 16120 - }, - { - "epoch": 0.82, - "grad_norm": 1.1814006659495366, - "learning_rate": 1.655628682515602e-06, - "loss": 0.1763, - "step": 16121 - }, - { - "epoch": 0.82, - "grad_norm": 1.2437639723178673, - "learning_rate": 1.654721146106002e-06, - "loss": 0.1796, - "step": 16122 - }, - { - "epoch": 0.82, - "grad_norm": 1.7788299770768288, - "learning_rate": 1.6538138360625633e-06, - "loss": 0.1792, - "step": 16123 - }, - { - "epoch": 0.82, - "grad_norm": 1.0035397369570327, - "learning_rate": 1.652906752409894e-06, - "loss": 0.1646, - "step": 16124 - }, - { - "epoch": 0.82, - "grad_norm": 1.0281512783310551, - "learning_rate": 1.6519998951726045e-06, - "loss": 0.1858, - "step": 16125 - }, - { - "epoch": 0.82, - "grad_norm": 0.8408852550415522, - "learning_rate": 1.6510932643752863e-06, - "loss": 0.1609, - "step": 16126 - }, - { - "epoch": 0.82, - "grad_norm": 1.0078754787295523, - "learning_rate": 1.6501868600425374e-06, - "loss": 0.1689, - "step": 16127 - }, - { - "epoch": 0.82, - "grad_norm": 0.8816314300651446, - "learning_rate": 1.6492806821989393e-06, - "loss": 0.141, - "step": 16128 - }, - { - "epoch": 0.82, - "grad_norm": 1.0799580233562474, - "learning_rate": 1.6483747308690768e-06, - "loss": 0.1734, - "step": 16129 - }, - { - "epoch": 0.82, - "grad_norm": 1.128032995538658, - "learning_rate": 1.6474690060775178e-06, - "loss": 0.1487, - "step": 16130 - }, - { - "epoch": 0.82, - "grad_norm": 1.2278270712864054, - "learning_rate": 1.6465635078488372e-06, - "loss": 0.1667, - "step": 16131 - }, - { - "epoch": 0.82, - "grad_norm": 0.894278260754333, - "learning_rate": 1.6456582362075911e-06, - "loss": 0.1604, - "step": 16132 - }, - { - "epoch": 0.82, - "grad_norm": 0.8471791910866014, - "learning_rate": 1.6447531911783365e-06, - "loss": 0.1685, - "step": 16133 - }, - { - "epoch": 0.82, - "grad_norm": 1.4038276898923099, - "learning_rate": 1.6438483727856268e-06, - "loss": 0.166, - "step": 16134 - }, - { - "epoch": 0.82, - "grad_norm": 0.8586239073799874, - "learning_rate": 1.6429437810539982e-06, - "loss": 0.1607, - "step": 16135 - }, - { - "epoch": 0.82, - "grad_norm": 3.769070945114241, - "learning_rate": 1.6420394160079955e-06, - "loss": 0.1649, - "step": 16136 - }, - { - "epoch": 0.82, - "grad_norm": 1.3100985933899716, - "learning_rate": 1.6411352776721423e-06, - "loss": 0.2015, - "step": 16137 - }, - { - "epoch": 0.82, - "grad_norm": 1.2512570673071481, - "learning_rate": 1.640231366070969e-06, - "loss": 0.1608, - "step": 16138 - }, - { - "epoch": 0.82, - "grad_norm": 0.8822105121244441, - "learning_rate": 1.6393276812289905e-06, - "loss": 0.1631, - "step": 16139 - }, - { - "epoch": 0.82, - "grad_norm": 0.8806674958039984, - "learning_rate": 1.6384242231707203e-06, - "loss": 0.1791, - "step": 16140 - }, - { - "epoch": 0.82, - "grad_norm": 0.8521437055834901, - "learning_rate": 1.6375209919206657e-06, - "loss": 0.156, - "step": 16141 - }, - { - "epoch": 0.82, - "grad_norm": 0.863487539470905, - "learning_rate": 1.6366179875033284e-06, - "loss": 0.1528, - "step": 16142 - }, - { - "epoch": 0.82, - "grad_norm": 1.3976787853176753, - "learning_rate": 1.6357152099431984e-06, - "loss": 0.169, - "step": 16143 - }, - { - "epoch": 0.82, - "grad_norm": 1.139814567658281, - "learning_rate": 1.6348126592647684e-06, - "loss": 0.192, - "step": 16144 - }, - { - "epoch": 0.82, - "grad_norm": 1.1335595840636001, - "learning_rate": 1.6339103354925146e-06, - "loss": 0.1688, - "step": 16145 - }, - { - "epoch": 0.82, - "grad_norm": 0.8440126829948659, - "learning_rate": 1.6330082386509182e-06, - "loss": 0.1534, - "step": 16146 - }, - { - "epoch": 0.82, - "grad_norm": 0.8508049215827809, - "learning_rate": 1.6321063687644435e-06, - "loss": 0.1768, - "step": 16147 - }, - { - "epoch": 0.82, - "grad_norm": 1.282254408840473, - "learning_rate": 1.631204725857558e-06, - "loss": 0.1668, - "step": 16148 - }, - { - "epoch": 0.82, - "grad_norm": 1.0052396010512652, - "learning_rate": 1.6303033099547149e-06, - "loss": 0.1659, - "step": 16149 - }, - { - "epoch": 0.82, - "grad_norm": 0.9286821198774534, - "learning_rate": 1.629402121080368e-06, - "loss": 0.1649, - "step": 16150 - }, - { - "epoch": 0.82, - "grad_norm": 1.7312340130080666, - "learning_rate": 1.6285011592589628e-06, - "loss": 0.153, - "step": 16151 - }, - { - "epoch": 0.82, - "grad_norm": 1.0912525370551018, - "learning_rate": 1.6276004245149346e-06, - "loss": 0.1601, - "step": 16152 - }, - { - "epoch": 0.82, - "grad_norm": 1.2664795393962827, - "learning_rate": 1.6266999168727204e-06, - "loss": 0.1802, - "step": 16153 - }, - { - "epoch": 0.82, - "grad_norm": 1.0799495507810202, - "learning_rate": 1.6257996363567408e-06, - "loss": 0.1593, - "step": 16154 - }, - { - "epoch": 0.82, - "grad_norm": 0.9822469573275092, - "learning_rate": 1.6248995829914216e-06, - "loss": 0.1661, - "step": 16155 - }, - { - "epoch": 0.82, - "grad_norm": 0.8909355640364796, - "learning_rate": 1.6239997568011723e-06, - "loss": 0.1796, - "step": 16156 - }, - { - "epoch": 0.82, - "grad_norm": 0.9762448493305782, - "learning_rate": 1.6231001578104045e-06, - "loss": 0.1688, - "step": 16157 - }, - { - "epoch": 0.82, - "grad_norm": 0.8456224303249009, - "learning_rate": 1.6222007860435153e-06, - "loss": 0.1545, - "step": 16158 - }, - { - "epoch": 0.82, - "grad_norm": 1.097065634578124, - "learning_rate": 1.621301641524904e-06, - "loss": 0.1919, - "step": 16159 - }, - { - "epoch": 0.82, - "grad_norm": 1.2272594527185567, - "learning_rate": 1.6204027242789577e-06, - "loss": 0.1747, - "step": 16160 - }, - { - "epoch": 0.82, - "grad_norm": 1.4027279958659618, - "learning_rate": 1.619504034330064e-06, - "loss": 0.1336, - "step": 16161 - }, - { - "epoch": 0.82, - "grad_norm": 1.18531326701348, - "learning_rate": 1.618605571702595e-06, - "loss": 0.1574, - "step": 16162 - }, - { - "epoch": 0.82, - "grad_norm": 1.2249771958511868, - "learning_rate": 1.617707336420925e-06, - "loss": 0.1596, - "step": 16163 - }, - { - "epoch": 0.82, - "grad_norm": 1.0243959933280622, - "learning_rate": 1.6168093285094144e-06, - "loss": 0.1816, - "step": 16164 - }, - { - "epoch": 0.82, - "grad_norm": 1.1661606544666774, - "learning_rate": 1.6159115479924259e-06, - "loss": 0.1564, - "step": 16165 - }, - { - "epoch": 0.82, - "grad_norm": 1.535577913582053, - "learning_rate": 1.615013994894309e-06, - "loss": 0.1746, - "step": 16166 - }, - { - "epoch": 0.82, - "grad_norm": 1.085710298402728, - "learning_rate": 1.6141166692394106e-06, - "loss": 0.134, - "step": 16167 - }, - { - "epoch": 0.82, - "grad_norm": 1.0799809442745136, - "learning_rate": 1.6132195710520716e-06, - "loss": 0.1563, - "step": 16168 - }, - { - "epoch": 0.82, - "grad_norm": 1.2622437343108193, - "learning_rate": 1.6123227003566267e-06, - "loss": 0.1582, - "step": 16169 - }, - { - "epoch": 0.82, - "grad_norm": 1.0677073332003861, - "learning_rate": 1.6114260571774031e-06, - "loss": 0.1644, - "step": 16170 - }, - { - "epoch": 0.82, - "grad_norm": 1.2140411169479177, - "learning_rate": 1.6105296415387194e-06, - "loss": 0.1616, - "step": 16171 - }, - { - "epoch": 0.82, - "grad_norm": 0.9562483027738354, - "learning_rate": 1.609633453464895e-06, - "loss": 0.1614, - "step": 16172 - }, - { - "epoch": 0.82, - "grad_norm": 1.2325748339085705, - "learning_rate": 1.6087374929802346e-06, - "loss": 0.1424, - "step": 16173 - }, - { - "epoch": 0.82, - "grad_norm": 1.0057048813635803, - "learning_rate": 1.6078417601090457e-06, - "loss": 0.1744, - "step": 16174 - }, - { - "epoch": 0.82, - "grad_norm": 1.0863081131932661, - "learning_rate": 1.606946254875621e-06, - "loss": 0.1825, - "step": 16175 - }, - { - "epoch": 0.82, - "grad_norm": 1.0069948602074392, - "learning_rate": 1.6060509773042533e-06, - "loss": 0.1572, - "step": 16176 - }, - { - "epoch": 0.82, - "grad_norm": 0.8300856535641151, - "learning_rate": 1.6051559274192275e-06, - "loss": 0.183, - "step": 16177 - }, - { - "epoch": 0.82, - "grad_norm": 0.8124015039311869, - "learning_rate": 1.604261105244823e-06, - "loss": 0.1743, - "step": 16178 - }, - { - "epoch": 0.82, - "grad_norm": 1.0108753370218027, - "learning_rate": 1.6033665108053075e-06, - "loss": 0.1524, - "step": 16179 - }, - { - "epoch": 0.82, - "grad_norm": 1.1061628747426537, - "learning_rate": 1.6024721441249525e-06, - "loss": 0.1644, - "step": 16180 - }, - { - "epoch": 0.82, - "grad_norm": 1.137413127806049, - "learning_rate": 1.6015780052280128e-06, - "loss": 0.1657, - "step": 16181 - }, - { - "epoch": 0.82, - "grad_norm": 1.2868473563497702, - "learning_rate": 1.6006840941387458e-06, - "loss": 0.1818, - "step": 16182 - }, - { - "epoch": 0.82, - "grad_norm": 1.2457052080057864, - "learning_rate": 1.5997904108813944e-06, - "loss": 0.1723, - "step": 16183 - }, - { - "epoch": 0.82, - "grad_norm": 1.2843137230704258, - "learning_rate": 1.5988969554802058e-06, - "loss": 0.1634, - "step": 16184 - }, - { - "epoch": 0.82, - "grad_norm": 0.8464560307722944, - "learning_rate": 1.5980037279594097e-06, - "loss": 0.1469, - "step": 16185 - }, - { - "epoch": 0.82, - "grad_norm": 1.0749915701214785, - "learning_rate": 1.5971107283432363e-06, - "loss": 0.1841, - "step": 16186 - }, - { - "epoch": 0.82, - "grad_norm": 1.1146209065453505, - "learning_rate": 1.5962179566559112e-06, - "loss": 0.1895, - "step": 16187 - }, - { - "epoch": 0.82, - "grad_norm": 0.9494386154442932, - "learning_rate": 1.5953254129216467e-06, - "loss": 0.1494, - "step": 16188 - }, - { - "epoch": 0.82, - "grad_norm": 1.082046499912269, - "learning_rate": 1.594433097164657e-06, - "loss": 0.1761, - "step": 16189 - }, - { - "epoch": 0.82, - "grad_norm": 2.0094160078152505, - "learning_rate": 1.593541009409143e-06, - "loss": 0.1646, - "step": 16190 - }, - { - "epoch": 0.82, - "grad_norm": 1.3228427682959791, - "learning_rate": 1.592649149679305e-06, - "loss": 0.1611, - "step": 16191 - }, - { - "epoch": 0.82, - "grad_norm": 1.2135503694029435, - "learning_rate": 1.5917575179993328e-06, - "loss": 0.1604, - "step": 16192 - }, - { - "epoch": 0.82, - "grad_norm": 1.4004802440515558, - "learning_rate": 1.5908661143934112e-06, - "loss": 0.1517, - "step": 16193 - }, - { - "epoch": 0.82, - "grad_norm": 0.8070297986487432, - "learning_rate": 1.5899749388857222e-06, - "loss": 0.1672, - "step": 16194 - }, - { - "epoch": 0.82, - "grad_norm": 0.9741897108941053, - "learning_rate": 1.5890839915004398e-06, - "loss": 0.1729, - "step": 16195 - }, - { - "epoch": 0.82, - "grad_norm": 0.948612681485556, - "learning_rate": 1.5881932722617277e-06, - "loss": 0.1833, - "step": 16196 - }, - { - "epoch": 0.82, - "grad_norm": 2.751015180435459, - "learning_rate": 1.5873027811937491e-06, - "loss": 0.1689, - "step": 16197 - }, - { - "epoch": 0.82, - "grad_norm": 0.8035718788803883, - "learning_rate": 1.5864125183206569e-06, - "loss": 0.1714, - "step": 16198 - }, - { - "epoch": 0.82, - "grad_norm": 0.8762858502283523, - "learning_rate": 1.5855224836666016e-06, - "loss": 0.1465, - "step": 16199 - }, - { - "epoch": 0.82, - "grad_norm": 1.2879099555445566, - "learning_rate": 1.584632677255723e-06, - "loss": 0.1748, - "step": 16200 - }, - { - "epoch": 0.82, - "grad_norm": 1.0847853329598949, - "learning_rate": 1.5837430991121594e-06, - "loss": 0.1581, - "step": 16201 - }, - { - "epoch": 0.82, - "grad_norm": 1.1504659150239045, - "learning_rate": 1.5828537492600382e-06, - "loss": 0.1577, - "step": 16202 - }, - { - "epoch": 0.82, - "grad_norm": 2.439135514835605, - "learning_rate": 1.5819646277234834e-06, - "loss": 0.166, - "step": 16203 - }, - { - "epoch": 0.82, - "grad_norm": 1.593461322594299, - "learning_rate": 1.581075734526617e-06, - "loss": 0.1686, - "step": 16204 - }, - { - "epoch": 0.82, - "grad_norm": 1.0218168743453442, - "learning_rate": 1.580187069693544e-06, - "loss": 0.1435, - "step": 16205 - }, - { - "epoch": 0.82, - "grad_norm": 1.13250068726926, - "learning_rate": 1.5792986332483739e-06, - "loss": 0.17, - "step": 16206 - }, - { - "epoch": 0.82, - "grad_norm": 1.1497848504486927, - "learning_rate": 1.578410425215202e-06, - "loss": 0.1874, - "step": 16207 - }, - { - "epoch": 0.82, - "grad_norm": 0.9860777098312422, - "learning_rate": 1.577522445618126e-06, - "loss": 0.1784, - "step": 16208 - }, - { - "epoch": 0.82, - "grad_norm": 1.0653169406941323, - "learning_rate": 1.576634694481227e-06, - "loss": 0.1772, - "step": 16209 - }, - { - "epoch": 0.82, - "grad_norm": 1.3828187860236651, - "learning_rate": 1.575747171828589e-06, - "loss": 0.1707, - "step": 16210 - }, - { - "epoch": 0.82, - "grad_norm": 1.0105506162693179, - "learning_rate": 1.5748598776842838e-06, - "loss": 0.175, - "step": 16211 - }, - { - "epoch": 0.82, - "grad_norm": 1.0796724874430454, - "learning_rate": 1.5739728120723795e-06, - "loss": 0.1764, - "step": 16212 - }, - { - "epoch": 0.82, - "grad_norm": 0.8717548247982144, - "learning_rate": 1.573085975016938e-06, - "loss": 0.1559, - "step": 16213 - }, - { - "epoch": 0.82, - "grad_norm": 0.9807085943349985, - "learning_rate": 1.5721993665420187e-06, - "loss": 0.1687, - "step": 16214 - }, - { - "epoch": 0.82, - "grad_norm": 1.243518809958635, - "learning_rate": 1.5713129866716647e-06, - "loss": 0.1695, - "step": 16215 - }, - { - "epoch": 0.82, - "grad_norm": 1.0002427361868018, - "learning_rate": 1.5704268354299246e-06, - "loss": 0.1571, - "step": 16216 - }, - { - "epoch": 0.82, - "grad_norm": 1.0513130593254636, - "learning_rate": 1.56954091284083e-06, - "loss": 0.1683, - "step": 16217 - }, - { - "epoch": 0.82, - "grad_norm": 1.1471167807414897, - "learning_rate": 1.5686552189284177e-06, - "loss": 0.172, - "step": 16218 - }, - { - "epoch": 0.82, - "grad_norm": 0.8267405172081425, - "learning_rate": 1.5677697537167048e-06, - "loss": 0.1513, - "step": 16219 - }, - { - "epoch": 0.82, - "grad_norm": 0.7890938430215125, - "learning_rate": 1.5668845172297143e-06, - "loss": 0.1588, - "step": 16220 - }, - { - "epoch": 0.82, - "grad_norm": 0.8541591590053499, - "learning_rate": 1.5659995094914603e-06, - "loss": 0.1799, - "step": 16221 - }, - { - "epoch": 0.82, - "grad_norm": 1.0495622903964184, - "learning_rate": 1.565114730525944e-06, - "loss": 0.1556, - "step": 16222 - }, - { - "epoch": 0.82, - "grad_norm": 1.0599533152387104, - "learning_rate": 1.564230180357168e-06, - "loss": 0.169, - "step": 16223 - }, - { - "epoch": 0.83, - "grad_norm": 1.1758205426339616, - "learning_rate": 1.5633458590091233e-06, - "loss": 0.1818, - "step": 16224 - }, - { - "epoch": 0.83, - "grad_norm": 1.3566302422746803, - "learning_rate": 1.5624617665058005e-06, - "loss": 0.1603, - "step": 16225 - }, - { - "epoch": 0.83, - "grad_norm": 1.0060617851367233, - "learning_rate": 1.5615779028711775e-06, - "loss": 0.1576, - "step": 16226 - }, - { - "epoch": 0.83, - "grad_norm": 1.6034621808942282, - "learning_rate": 1.5606942681292326e-06, - "loss": 0.1638, - "step": 16227 - }, - { - "epoch": 0.83, - "grad_norm": 1.0066324518206973, - "learning_rate": 1.55981086230393e-06, - "loss": 0.1641, - "step": 16228 - }, - { - "epoch": 0.83, - "grad_norm": 1.0712368190292123, - "learning_rate": 1.5589276854192336e-06, - "loss": 0.1839, - "step": 16229 - }, - { - "epoch": 0.83, - "grad_norm": 0.9567927807699808, - "learning_rate": 1.5580447374991003e-06, - "loss": 0.1688, - "step": 16230 - }, - { - "epoch": 0.83, - "grad_norm": 1.3476202419921466, - "learning_rate": 1.557162018567484e-06, - "loss": 0.1629, - "step": 16231 - }, - { - "epoch": 0.83, - "grad_norm": 0.8950686877683366, - "learning_rate": 1.5562795286483212e-06, - "loss": 0.1457, - "step": 16232 - }, - { - "epoch": 0.83, - "grad_norm": 0.9267746323940856, - "learning_rate": 1.555397267765556e-06, - "loss": 0.1918, - "step": 16233 - }, - { - "epoch": 0.83, - "grad_norm": 1.0000652875899563, - "learning_rate": 1.5545152359431149e-06, - "loss": 0.1721, - "step": 16234 - }, - { - "epoch": 0.83, - "grad_norm": 0.9567611863206363, - "learning_rate": 1.5536334332049274e-06, - "loss": 0.1734, - "step": 16235 - }, - { - "epoch": 0.83, - "grad_norm": 1.0972798771214651, - "learning_rate": 1.5527518595749068e-06, - "loss": 0.1634, - "step": 16236 - }, - { - "epoch": 0.83, - "grad_norm": 1.130250553330175, - "learning_rate": 1.551870515076972e-06, - "loss": 0.172, - "step": 16237 - }, - { - "epoch": 0.83, - "grad_norm": 1.0199759185793813, - "learning_rate": 1.5509893997350245e-06, - "loss": 0.1778, - "step": 16238 - }, - { - "epoch": 0.83, - "grad_norm": 1.033979382796841, - "learning_rate": 1.5501085135729666e-06, - "loss": 0.1737, - "step": 16239 - }, - { - "epoch": 0.83, - "grad_norm": 1.348123338689501, - "learning_rate": 1.5492278566146945e-06, - "loss": 0.1746, - "step": 16240 - }, - { - "epoch": 0.83, - "grad_norm": 1.066130979899349, - "learning_rate": 1.5483474288840927e-06, - "loss": 0.196, - "step": 16241 - }, - { - "epoch": 0.83, - "grad_norm": 0.9158187549060963, - "learning_rate": 1.5474672304050454e-06, - "loss": 0.1575, - "step": 16242 - }, - { - "epoch": 0.83, - "grad_norm": 1.1187810995278062, - "learning_rate": 1.5465872612014255e-06, - "loss": 0.172, - "step": 16243 - }, - { - "epoch": 0.83, - "grad_norm": 2.019799657851233, - "learning_rate": 1.545707521297105e-06, - "loss": 0.1769, - "step": 16244 - }, - { - "epoch": 0.83, - "grad_norm": 1.0459380016808018, - "learning_rate": 1.5448280107159442e-06, - "loss": 0.1888, - "step": 16245 - }, - { - "epoch": 0.83, - "grad_norm": 1.187399423034828, - "learning_rate": 1.5439487294818002e-06, - "loss": 0.1361, - "step": 16246 - }, - { - "epoch": 0.83, - "grad_norm": 1.0301682592357948, - "learning_rate": 1.543069677618525e-06, - "loss": 0.1436, - "step": 16247 - }, - { - "epoch": 0.83, - "grad_norm": 1.1966512262807896, - "learning_rate": 1.5421908551499653e-06, - "loss": 0.1637, - "step": 16248 - }, - { - "epoch": 0.83, - "grad_norm": 1.800092552120167, - "learning_rate": 1.5413122620999533e-06, - "loss": 0.161, - "step": 16249 - }, - { - "epoch": 0.83, - "grad_norm": 0.8528766108510961, - "learning_rate": 1.540433898492326e-06, - "loss": 0.1634, - "step": 16250 - }, - { - "epoch": 0.83, - "grad_norm": 0.9868816636905955, - "learning_rate": 1.539555764350905e-06, - "loss": 0.167, - "step": 16251 - }, - { - "epoch": 0.83, - "grad_norm": 0.856926643725097, - "learning_rate": 1.5386778596995144e-06, - "loss": 0.1434, - "step": 16252 - }, - { - "epoch": 0.83, - "grad_norm": 1.1659977976533071, - "learning_rate": 1.5378001845619616e-06, - "loss": 0.1716, - "step": 16253 - }, - { - "epoch": 0.83, - "grad_norm": 1.4201134946411582, - "learning_rate": 1.53692273896206e-06, - "loss": 0.1706, - "step": 16254 - }, - { - "epoch": 0.83, - "grad_norm": 1.1415900722887473, - "learning_rate": 1.5360455229236049e-06, - "loss": 0.1813, - "step": 16255 - }, - { - "epoch": 0.83, - "grad_norm": 1.1716332672969776, - "learning_rate": 1.5351685364703916e-06, - "loss": 0.1597, - "step": 16256 - }, - { - "epoch": 0.83, - "grad_norm": 0.9427550839380094, - "learning_rate": 1.5342917796262136e-06, - "loss": 0.1502, - "step": 16257 - }, - { - "epoch": 0.83, - "grad_norm": 0.8506175397006018, - "learning_rate": 1.533415252414846e-06, - "loss": 0.1593, - "step": 16258 - }, - { - "epoch": 0.83, - "grad_norm": 0.7706683930731257, - "learning_rate": 1.5325389548600711e-06, - "loss": 0.1607, - "step": 16259 - }, - { - "epoch": 0.83, - "grad_norm": 1.1888829820220763, - "learning_rate": 1.531662886985652e-06, - "loss": 0.1752, - "step": 16260 - }, - { - "epoch": 0.83, - "grad_norm": 1.6787971741140937, - "learning_rate": 1.5307870488153586e-06, - "loss": 0.1642, - "step": 16261 - }, - { - "epoch": 0.83, - "grad_norm": 0.936195924156837, - "learning_rate": 1.529911440372942e-06, - "loss": 0.1665, - "step": 16262 - }, - { - "epoch": 0.83, - "grad_norm": 4.5813626304960975, - "learning_rate": 1.5290360616821564e-06, - "loss": 0.167, - "step": 16263 - }, - { - "epoch": 0.83, - "grad_norm": 0.9985641711965435, - "learning_rate": 1.5281609127667451e-06, - "loss": 0.1679, - "step": 16264 - }, - { - "epoch": 0.83, - "grad_norm": 2.9953769021880143, - "learning_rate": 1.5272859936504513e-06, - "loss": 0.1587, - "step": 16265 - }, - { - "epoch": 0.83, - "grad_norm": 0.899874692531944, - "learning_rate": 1.5264113043569994e-06, - "loss": 0.1555, - "step": 16266 - }, - { - "epoch": 0.83, - "grad_norm": 1.1861633024929679, - "learning_rate": 1.5255368449101226e-06, - "loss": 0.18, - "step": 16267 - }, - { - "epoch": 0.83, - "grad_norm": 1.033925166327829, - "learning_rate": 1.5246626153335364e-06, - "loss": 0.1426, - "step": 16268 - }, - { - "epoch": 0.83, - "grad_norm": 2.049961418295712, - "learning_rate": 1.5237886156509563e-06, - "loss": 0.1673, - "step": 16269 - }, - { - "epoch": 0.83, - "grad_norm": 1.0985176283197147, - "learning_rate": 1.5229148458860865e-06, - "loss": 0.1601, - "step": 16270 - }, - { - "epoch": 0.83, - "grad_norm": 0.8934168171267731, - "learning_rate": 1.5220413060626327e-06, - "loss": 0.1644, - "step": 16271 - }, - { - "epoch": 0.83, - "grad_norm": 0.9981608465218564, - "learning_rate": 1.5211679962042858e-06, - "loss": 0.1549, - "step": 16272 - }, - { - "epoch": 0.83, - "grad_norm": 0.8914455120721887, - "learning_rate": 1.5202949163347348e-06, - "loss": 0.1378, - "step": 16273 - }, - { - "epoch": 0.83, - "grad_norm": 1.6112780017756578, - "learning_rate": 1.519422066477666e-06, - "loss": 0.1838, - "step": 16274 - }, - { - "epoch": 0.83, - "grad_norm": 0.8425598172314763, - "learning_rate": 1.5185494466567508e-06, - "loss": 0.1734, - "step": 16275 - }, - { - "epoch": 0.83, - "grad_norm": 1.0261225190109788, - "learning_rate": 1.5176770568956623e-06, - "loss": 0.1726, - "step": 16276 - }, - { - "epoch": 0.83, - "grad_norm": 1.1873182046443194, - "learning_rate": 1.5168048972180605e-06, - "loss": 0.1688, - "step": 16277 - }, - { - "epoch": 0.83, - "grad_norm": 1.3521035987276042, - "learning_rate": 1.5159329676476075e-06, - "loss": 0.1623, - "step": 16278 - }, - { - "epoch": 0.83, - "grad_norm": 1.2750024973858918, - "learning_rate": 1.5150612682079502e-06, - "loss": 0.1844, - "step": 16279 - }, - { - "epoch": 0.83, - "grad_norm": 1.3258702215505207, - "learning_rate": 1.5141897989227372e-06, - "loss": 0.1674, - "step": 16280 - }, - { - "epoch": 0.83, - "grad_norm": 0.9289221783042039, - "learning_rate": 1.513318559815603e-06, - "loss": 0.1771, - "step": 16281 - }, - { - "epoch": 0.83, - "grad_norm": 1.074167772829532, - "learning_rate": 1.512447550910181e-06, - "loss": 0.1811, - "step": 16282 - }, - { - "epoch": 0.83, - "grad_norm": 0.9460771136548177, - "learning_rate": 1.5115767722301e-06, - "loss": 0.1523, - "step": 16283 - }, - { - "epoch": 0.83, - "grad_norm": 0.925950211310057, - "learning_rate": 1.51070622379898e-06, - "loss": 0.1527, - "step": 16284 - }, - { - "epoch": 0.83, - "grad_norm": 1.0862374172733744, - "learning_rate": 1.509835905640431e-06, - "loss": 0.1697, - "step": 16285 - }, - { - "epoch": 0.83, - "grad_norm": 1.2381593824256332, - "learning_rate": 1.5089658177780653e-06, - "loss": 0.1697, - "step": 16286 - }, - { - "epoch": 0.83, - "grad_norm": 1.3238266766665978, - "learning_rate": 1.5080959602354783e-06, - "loss": 0.1588, - "step": 16287 - }, - { - "epoch": 0.83, - "grad_norm": 3.170935256463546, - "learning_rate": 1.5072263330362713e-06, - "loss": 0.1631, - "step": 16288 - }, - { - "epoch": 0.83, - "grad_norm": 1.085728386526513, - "learning_rate": 1.5063569362040265e-06, - "loss": 0.1783, - "step": 16289 - }, - { - "epoch": 0.83, - "grad_norm": 0.8514572170467741, - "learning_rate": 1.5054877697623305e-06, - "loss": 0.1727, - "step": 16290 - }, - { - "epoch": 0.83, - "grad_norm": 1.031428838828624, - "learning_rate": 1.50461883373476e-06, - "loss": 0.1678, - "step": 16291 - }, - { - "epoch": 0.83, - "grad_norm": 1.1681429510390187, - "learning_rate": 1.5037501281448819e-06, - "loss": 0.1705, - "step": 16292 - }, - { - "epoch": 0.83, - "grad_norm": 1.383254386252376, - "learning_rate": 1.5028816530162627e-06, - "loss": 0.1611, - "step": 16293 - }, - { - "epoch": 0.83, - "grad_norm": 1.0569013766570596, - "learning_rate": 1.5020134083724568e-06, - "loss": 0.1687, - "step": 16294 - }, - { - "epoch": 0.83, - "grad_norm": 0.900329262047711, - "learning_rate": 1.5011453942370191e-06, - "loss": 0.1653, - "step": 16295 - }, - { - "epoch": 0.83, - "grad_norm": 1.2811474221674615, - "learning_rate": 1.5002776106334904e-06, - "loss": 0.1677, - "step": 16296 - }, - { - "epoch": 0.83, - "grad_norm": 1.1071821925985452, - "learning_rate": 1.4994100575854143e-06, - "loss": 0.1496, - "step": 16297 - }, - { - "epoch": 0.83, - "grad_norm": 0.7783188030566939, - "learning_rate": 1.4985427351163184e-06, - "loss": 0.1746, - "step": 16298 - }, - { - "epoch": 0.83, - "grad_norm": 1.3811377289457276, - "learning_rate": 1.4976756432497309e-06, - "loss": 0.1598, - "step": 16299 - }, - { - "epoch": 0.83, - "grad_norm": 0.7740242905028717, - "learning_rate": 1.4968087820091714e-06, - "loss": 0.1797, - "step": 16300 - }, - { - "epoch": 0.83, - "grad_norm": 1.2751691210341451, - "learning_rate": 1.495942151418156e-06, - "loss": 0.176, - "step": 16301 - }, - { - "epoch": 0.83, - "grad_norm": 0.8551080958388871, - "learning_rate": 1.495075751500188e-06, - "loss": 0.1475, - "step": 16302 - }, - { - "epoch": 0.83, - "grad_norm": 1.1155180459959604, - "learning_rate": 1.4942095822787738e-06, - "loss": 0.1723, - "step": 16303 - }, - { - "epoch": 0.83, - "grad_norm": 0.9582474774941909, - "learning_rate": 1.4933436437774017e-06, - "loss": 0.1623, - "step": 16304 - }, - { - "epoch": 0.83, - "grad_norm": 1.1581963477224246, - "learning_rate": 1.4924779360195662e-06, - "loss": 0.18, - "step": 16305 - }, - { - "epoch": 0.83, - "grad_norm": 1.1922788818545937, - "learning_rate": 1.4916124590287451e-06, - "loss": 0.1693, - "step": 16306 - }, - { - "epoch": 0.83, - "grad_norm": 1.7897651228922262, - "learning_rate": 1.4907472128284185e-06, - "loss": 0.1646, - "step": 16307 - }, - { - "epoch": 0.83, - "grad_norm": 1.2555341889630993, - "learning_rate": 1.489882197442053e-06, - "loss": 0.187, - "step": 16308 - }, - { - "epoch": 0.83, - "grad_norm": 0.7526073707918681, - "learning_rate": 1.4890174128931123e-06, - "loss": 0.1391, - "step": 16309 - }, - { - "epoch": 0.83, - "grad_norm": 1.70102190020427, - "learning_rate": 1.4881528592050576e-06, - "loss": 0.164, - "step": 16310 - }, - { - "epoch": 0.83, - "grad_norm": 1.1421019175027005, - "learning_rate": 1.4872885364013357e-06, - "loss": 0.1652, - "step": 16311 - }, - { - "epoch": 0.83, - "grad_norm": 1.3468381523610138, - "learning_rate": 1.4864244445053956e-06, - "loss": 0.1671, - "step": 16312 - }, - { - "epoch": 0.83, - "grad_norm": 0.8954079863094079, - "learning_rate": 1.4855605835406695e-06, - "loss": 0.1435, - "step": 16313 - }, - { - "epoch": 0.83, - "grad_norm": 1.6833123666950103, - "learning_rate": 1.4846969535305967e-06, - "loss": 0.179, - "step": 16314 - }, - { - "epoch": 0.83, - "grad_norm": 1.1499521503608294, - "learning_rate": 1.4838335544985982e-06, - "loss": 0.1595, - "step": 16315 - }, - { - "epoch": 0.83, - "grad_norm": 1.255100924441286, - "learning_rate": 1.4829703864680945e-06, - "loss": 0.1686, - "step": 16316 - }, - { - "epoch": 0.83, - "grad_norm": 1.269949061927699, - "learning_rate": 1.482107449462501e-06, - "loss": 0.174, - "step": 16317 - }, - { - "epoch": 0.83, - "grad_norm": 1.0630711296146427, - "learning_rate": 1.4812447435052258e-06, - "loss": 0.1379, - "step": 16318 - }, - { - "epoch": 0.83, - "grad_norm": 1.1507195675909738, - "learning_rate": 1.4803822686196657e-06, - "loss": 0.1676, - "step": 16319 - }, - { - "epoch": 0.83, - "grad_norm": 1.0780534179912902, - "learning_rate": 1.4795200248292207e-06, - "loss": 0.1674, - "step": 16320 - }, - { - "epoch": 0.83, - "grad_norm": 1.1352401978328097, - "learning_rate": 1.4786580121572736e-06, - "loss": 0.1763, - "step": 16321 - }, - { - "epoch": 0.83, - "grad_norm": 1.2704926559864493, - "learning_rate": 1.477796230627211e-06, - "loss": 0.1644, - "step": 16322 - }, - { - "epoch": 0.83, - "grad_norm": 1.3269094679941507, - "learning_rate": 1.476934680262405e-06, - "loss": 0.1408, - "step": 16323 - }, - { - "epoch": 0.83, - "grad_norm": 0.9223432723723342, - "learning_rate": 1.4760733610862298e-06, - "loss": 0.1999, - "step": 16324 - }, - { - "epoch": 0.83, - "grad_norm": 1.0167153221285, - "learning_rate": 1.475212273122043e-06, - "loss": 0.154, - "step": 16325 - }, - { - "epoch": 0.83, - "grad_norm": 1.1643049382379205, - "learning_rate": 1.474351416393206e-06, - "loss": 0.1609, - "step": 16326 - }, - { - "epoch": 0.83, - "grad_norm": 1.3266734784664354, - "learning_rate": 1.47349079092307e-06, - "loss": 0.1696, - "step": 16327 - }, - { - "epoch": 0.83, - "grad_norm": 0.965325287003813, - "learning_rate": 1.4726303967349754e-06, - "loss": 0.1585, - "step": 16328 - }, - { - "epoch": 0.83, - "grad_norm": 1.8263306753742028, - "learning_rate": 1.4717702338522654e-06, - "loss": 0.2012, - "step": 16329 - }, - { - "epoch": 0.83, - "grad_norm": 1.655930629079804, - "learning_rate": 1.4709103022982673e-06, - "loss": 0.2196, - "step": 16330 - }, - { - "epoch": 0.83, - "grad_norm": 0.8648245666271341, - "learning_rate": 1.4700506020963113e-06, - "loss": 0.1682, - "step": 16331 - }, - { - "epoch": 0.83, - "grad_norm": 1.1845845429985948, - "learning_rate": 1.4691911332697118e-06, - "loss": 0.1737, - "step": 16332 - }, - { - "epoch": 0.83, - "grad_norm": 1.0154675997457938, - "learning_rate": 1.468331895841787e-06, - "loss": 0.1662, - "step": 16333 - }, - { - "epoch": 0.83, - "grad_norm": 1.0646982504956448, - "learning_rate": 1.4674728898358391e-06, - "loss": 0.1612, - "step": 16334 - }, - { - "epoch": 0.83, - "grad_norm": 0.929233799062197, - "learning_rate": 1.466614115275171e-06, - "loss": 0.1785, - "step": 16335 - }, - { - "epoch": 0.83, - "grad_norm": 0.8890200594074377, - "learning_rate": 1.4657555721830775e-06, - "loss": 0.1637, - "step": 16336 - }, - { - "epoch": 0.83, - "grad_norm": 1.407647389509816, - "learning_rate": 1.4648972605828482e-06, - "loss": 0.1699, - "step": 16337 - }, - { - "epoch": 0.83, - "grad_norm": 0.9429287949318823, - "learning_rate": 1.4640391804977605e-06, - "loss": 0.1524, - "step": 16338 - }, - { - "epoch": 0.83, - "grad_norm": 1.389627543861334, - "learning_rate": 1.4631813319510945e-06, - "loss": 0.1587, - "step": 16339 - }, - { - "epoch": 0.83, - "grad_norm": 1.8792147979013831, - "learning_rate": 1.462323714966114e-06, - "loss": 0.1748, - "step": 16340 - }, - { - "epoch": 0.83, - "grad_norm": 1.1173411674101408, - "learning_rate": 1.461466329566088e-06, - "loss": 0.1689, - "step": 16341 - }, - { - "epoch": 0.83, - "grad_norm": 1.1589594154477718, - "learning_rate": 1.460609175774268e-06, - "loss": 0.1778, - "step": 16342 - }, - { - "epoch": 0.83, - "grad_norm": 3.0335774583431516, - "learning_rate": 1.4597522536139052e-06, - "loss": 0.143, - "step": 16343 - }, - { - "epoch": 0.83, - "grad_norm": 1.2014210515526043, - "learning_rate": 1.458895563108248e-06, - "loss": 0.1636, - "step": 16344 - }, - { - "epoch": 0.83, - "grad_norm": 1.2414386216197213, - "learning_rate": 1.4580391042805287e-06, - "loss": 0.1932, - "step": 16345 - }, - { - "epoch": 0.83, - "grad_norm": 1.2599250563452244, - "learning_rate": 1.4571828771539843e-06, - "loss": 0.1674, - "step": 16346 - }, - { - "epoch": 0.83, - "grad_norm": 1.1791440023288944, - "learning_rate": 1.4563268817518327e-06, - "loss": 0.1776, - "step": 16347 - }, - { - "epoch": 0.83, - "grad_norm": 1.0322052855841803, - "learning_rate": 1.4554711180973003e-06, - "loss": 0.1613, - "step": 16348 - }, - { - "epoch": 0.83, - "grad_norm": 2.2190450616956596, - "learning_rate": 1.4546155862135946e-06, - "loss": 0.1624, - "step": 16349 - }, - { - "epoch": 0.83, - "grad_norm": 1.610583375351424, - "learning_rate": 1.4537602861239253e-06, - "loss": 0.1605, - "step": 16350 - }, - { - "epoch": 0.83, - "grad_norm": 1.11298369910105, - "learning_rate": 1.452905217851489e-06, - "loss": 0.1929, - "step": 16351 - }, - { - "epoch": 0.83, - "grad_norm": 0.9145769935297287, - "learning_rate": 1.452050381419481e-06, - "loss": 0.1699, - "step": 16352 - }, - { - "epoch": 0.83, - "grad_norm": 1.458729223003803, - "learning_rate": 1.4511957768510897e-06, - "loss": 0.1594, - "step": 16353 - }, - { - "epoch": 0.83, - "grad_norm": 1.1644354573343156, - "learning_rate": 1.4503414041694985e-06, - "loss": 0.1862, - "step": 16354 - }, - { - "epoch": 0.83, - "grad_norm": 0.9109531000011147, - "learning_rate": 1.4494872633978763e-06, - "loss": 0.1455, - "step": 16355 - }, - { - "epoch": 0.83, - "grad_norm": 1.853462469807387, - "learning_rate": 1.4486333545593978e-06, - "loss": 0.1534, - "step": 16356 - }, - { - "epoch": 0.83, - "grad_norm": 1.6192208694248555, - "learning_rate": 1.4477796776772202e-06, - "loss": 0.1783, - "step": 16357 - }, - { - "epoch": 0.83, - "grad_norm": 1.1859687710840425, - "learning_rate": 1.4469262327745038e-06, - "loss": 0.1566, - "step": 16358 - }, - { - "epoch": 0.83, - "grad_norm": 1.1934652652801543, - "learning_rate": 1.4460730198743945e-06, - "loss": 0.1648, - "step": 16359 - }, - { - "epoch": 0.83, - "grad_norm": 0.8857142797389227, - "learning_rate": 1.445220039000037e-06, - "loss": 0.1575, - "step": 16360 - }, - { - "epoch": 0.83, - "grad_norm": 0.8879561711602736, - "learning_rate": 1.444367290174573e-06, - "loss": 0.1495, - "step": 16361 - }, - { - "epoch": 0.83, - "grad_norm": 0.9377892019061446, - "learning_rate": 1.4435147734211252e-06, - "loss": 0.1924, - "step": 16362 - }, - { - "epoch": 0.83, - "grad_norm": 1.100278086878258, - "learning_rate": 1.442662488762826e-06, - "loss": 0.1658, - "step": 16363 - }, - { - "epoch": 0.83, - "grad_norm": 0.9895730310143408, - "learning_rate": 1.441810436222788e-06, - "loss": 0.1758, - "step": 16364 - }, - { - "epoch": 0.83, - "grad_norm": 1.2782203378181645, - "learning_rate": 1.4409586158241272e-06, - "loss": 0.1713, - "step": 16365 - }, - { - "epoch": 0.83, - "grad_norm": 1.0976176848106227, - "learning_rate": 1.4401070275899442e-06, - "loss": 0.1675, - "step": 16366 - }, - { - "epoch": 0.83, - "grad_norm": 0.9710678328379625, - "learning_rate": 1.4392556715433447e-06, - "loss": 0.1641, - "step": 16367 - }, - { - "epoch": 0.83, - "grad_norm": 1.3605252867567799, - "learning_rate": 1.438404547707417e-06, - "loss": 0.1721, - "step": 16368 - }, - { - "epoch": 0.83, - "grad_norm": 1.1191812979827758, - "learning_rate": 1.4375536561052483e-06, - "loss": 0.167, - "step": 16369 - }, - { - "epoch": 0.83, - "grad_norm": 0.9856198196977122, - "learning_rate": 1.4367029967599211e-06, - "loss": 0.1886, - "step": 16370 - }, - { - "epoch": 0.83, - "grad_norm": 1.1024483254210755, - "learning_rate": 1.4358525696945104e-06, - "loss": 0.1536, - "step": 16371 - }, - { - "epoch": 0.83, - "grad_norm": 0.8868452274281557, - "learning_rate": 1.4350023749320807e-06, - "loss": 0.1692, - "step": 16372 - }, - { - "epoch": 0.83, - "grad_norm": 1.0929098488958067, - "learning_rate": 1.4341524124956974e-06, - "loss": 0.1664, - "step": 16373 - }, - { - "epoch": 0.83, - "grad_norm": 1.1365025851174004, - "learning_rate": 1.4333026824084116e-06, - "loss": 0.1412, - "step": 16374 - }, - { - "epoch": 0.83, - "grad_norm": 1.0641312250521482, - "learning_rate": 1.4324531846932766e-06, - "loss": 0.1577, - "step": 16375 - }, - { - "epoch": 0.83, - "grad_norm": 1.1004909593029202, - "learning_rate": 1.4316039193733299e-06, - "loss": 0.1544, - "step": 16376 - }, - { - "epoch": 0.83, - "grad_norm": 1.044385625193077, - "learning_rate": 1.4307548864716137e-06, - "loss": 0.1674, - "step": 16377 - }, - { - "epoch": 0.83, - "grad_norm": 2.4788242272864243, - "learning_rate": 1.4299060860111536e-06, - "loss": 0.1409, - "step": 16378 - }, - { - "epoch": 0.83, - "grad_norm": 0.976932708881975, - "learning_rate": 1.4290575180149735e-06, - "loss": 0.155, - "step": 16379 - }, - { - "epoch": 0.83, - "grad_norm": 1.4220348323583278, - "learning_rate": 1.4282091825060963e-06, - "loss": 0.1681, - "step": 16380 - }, - { - "epoch": 0.83, - "grad_norm": 1.172753518856644, - "learning_rate": 1.4273610795075255e-06, - "loss": 0.168, - "step": 16381 - }, - { - "epoch": 0.83, - "grad_norm": 1.0594369986964254, - "learning_rate": 1.4265132090422718e-06, - "loss": 0.1767, - "step": 16382 - }, - { - "epoch": 0.83, - "grad_norm": 0.9537846912091068, - "learning_rate": 1.42566557113333e-06, - "loss": 0.1582, - "step": 16383 - }, - { - "epoch": 0.83, - "grad_norm": 1.1865624630619633, - "learning_rate": 1.4248181658036964e-06, - "loss": 0.1691, - "step": 16384 - }, - { - "epoch": 0.83, - "grad_norm": 1.086406895496996, - "learning_rate": 1.4239709930763513e-06, - "loss": 0.1747, - "step": 16385 - }, - { - "epoch": 0.83, - "grad_norm": 1.1570947276293209, - "learning_rate": 1.4231240529742774e-06, - "loss": 0.1715, - "step": 16386 - }, - { - "epoch": 0.83, - "grad_norm": 0.9852028749587856, - "learning_rate": 1.4222773455204486e-06, - "loss": 0.1396, - "step": 16387 - }, - { - "epoch": 0.83, - "grad_norm": 1.275628718580738, - "learning_rate": 1.4214308707378333e-06, - "loss": 0.176, - "step": 16388 - }, - { - "epoch": 0.83, - "grad_norm": 1.2817426416927773, - "learning_rate": 1.4205846286493875e-06, - "loss": 0.1856, - "step": 16389 - }, - { - "epoch": 0.83, - "grad_norm": 1.0207781425195546, - "learning_rate": 1.4197386192780715e-06, - "loss": 0.1574, - "step": 16390 - }, - { - "epoch": 0.83, - "grad_norm": 0.991995452101157, - "learning_rate": 1.4188928426468263e-06, - "loss": 0.1583, - "step": 16391 - }, - { - "epoch": 0.83, - "grad_norm": 1.4650315505964862, - "learning_rate": 1.418047298778601e-06, - "loss": 0.1735, - "step": 16392 - }, - { - "epoch": 0.83, - "grad_norm": 0.9804090300175904, - "learning_rate": 1.4172019876963249e-06, - "loss": 0.1867, - "step": 16393 - }, - { - "epoch": 0.83, - "grad_norm": 1.0081826340989326, - "learning_rate": 1.4163569094229311e-06, - "loss": 0.1488, - "step": 16394 - }, - { - "epoch": 0.83, - "grad_norm": 1.0979656503210717, - "learning_rate": 1.4155120639813392e-06, - "loss": 0.1768, - "step": 16395 - }, - { - "epoch": 0.83, - "grad_norm": 3.648498219436163, - "learning_rate": 1.414667451394468e-06, - "loss": 0.1893, - "step": 16396 - }, - { - "epoch": 0.83, - "grad_norm": 0.8907662091834141, - "learning_rate": 1.4138230716852285e-06, - "loss": 0.189, - "step": 16397 - }, - { - "epoch": 0.83, - "grad_norm": 1.169590311793009, - "learning_rate": 1.4129789248765214e-06, - "loss": 0.1481, - "step": 16398 - }, - { - "epoch": 0.83, - "grad_norm": 0.8896912266362786, - "learning_rate": 1.4121350109912479e-06, - "loss": 0.1561, - "step": 16399 - }, - { - "epoch": 0.83, - "grad_norm": 1.0289256443235757, - "learning_rate": 1.4112913300522946e-06, - "loss": 0.1538, - "step": 16400 - }, - { - "epoch": 0.83, - "grad_norm": 2.1660382935985822, - "learning_rate": 1.4104478820825518e-06, - "loss": 0.153, - "step": 16401 - }, - { - "epoch": 0.83, - "grad_norm": 1.0220030347435074, - "learning_rate": 1.4096046671048935e-06, - "loss": 0.1428, - "step": 16402 - }, - { - "epoch": 0.83, - "grad_norm": 0.7746742346123936, - "learning_rate": 1.4087616851421959e-06, - "loss": 0.1324, - "step": 16403 - }, - { - "epoch": 0.83, - "grad_norm": 1.068496271057554, - "learning_rate": 1.4079189362173196e-06, - "loss": 0.1699, - "step": 16404 - }, - { - "epoch": 0.83, - "grad_norm": 1.0600589036312207, - "learning_rate": 1.4070764203531283e-06, - "loss": 0.155, - "step": 16405 - }, - { - "epoch": 0.83, - "grad_norm": 1.0528632347878315, - "learning_rate": 1.4062341375724742e-06, - "loss": 0.1697, - "step": 16406 - }, - { - "epoch": 0.83, - "grad_norm": 1.084378023728591, - "learning_rate": 1.4053920878982074e-06, - "loss": 0.1613, - "step": 16407 - }, - { - "epoch": 0.83, - "grad_norm": 1.3434346377155673, - "learning_rate": 1.4045502713531623e-06, - "loss": 0.1797, - "step": 16408 - }, - { - "epoch": 0.83, - "grad_norm": 1.131817937825781, - "learning_rate": 1.4037086879601803e-06, - "loss": 0.162, - "step": 16409 - }, - { - "epoch": 0.83, - "grad_norm": 0.779090735975815, - "learning_rate": 1.4028673377420821e-06, - "loss": 0.1587, - "step": 16410 - }, - { - "epoch": 0.83, - "grad_norm": 1.631104786572298, - "learning_rate": 1.402026220721695e-06, - "loss": 0.1926, - "step": 16411 - }, - { - "epoch": 0.83, - "grad_norm": 1.1120761340682184, - "learning_rate": 1.4011853369218308e-06, - "loss": 0.1774, - "step": 16412 - }, - { - "epoch": 0.83, - "grad_norm": 0.8984433358696586, - "learning_rate": 1.4003446863653004e-06, - "loss": 0.1529, - "step": 16413 - }, - { - "epoch": 0.83, - "grad_norm": 0.9389321223117489, - "learning_rate": 1.3995042690749072e-06, - "loss": 0.1876, - "step": 16414 - }, - { - "epoch": 0.83, - "grad_norm": 1.002293730084705, - "learning_rate": 1.3986640850734444e-06, - "loss": 0.1472, - "step": 16415 - }, - { - "epoch": 0.83, - "grad_norm": 0.9460194544172318, - "learning_rate": 1.3978241343837073e-06, - "loss": 0.178, - "step": 16416 - }, - { - "epoch": 0.83, - "grad_norm": 0.9586613850300995, - "learning_rate": 1.396984417028473e-06, - "loss": 0.1549, - "step": 16417 - }, - { - "epoch": 0.83, - "grad_norm": 1.3849114738939412, - "learning_rate": 1.3961449330305255e-06, - "loss": 0.16, - "step": 16418 - }, - { - "epoch": 0.83, - "grad_norm": 1.1897351890626975, - "learning_rate": 1.3953056824126298e-06, - "loss": 0.1598, - "step": 16419 - }, - { - "epoch": 0.83, - "grad_norm": 1.1119253441096333, - "learning_rate": 1.3944666651975559e-06, - "loss": 0.1643, - "step": 16420 - }, - { - "epoch": 0.84, - "grad_norm": 0.9219902980961129, - "learning_rate": 1.3936278814080572e-06, - "loss": 0.1604, - "step": 16421 - }, - { - "epoch": 0.84, - "grad_norm": 1.2686997809339151, - "learning_rate": 1.3927893310668883e-06, - "loss": 0.1657, - "step": 16422 - }, - { - "epoch": 0.84, - "grad_norm": 1.1503442654242535, - "learning_rate": 1.3919510141967951e-06, - "loss": 0.1586, - "step": 16423 - }, - { - "epoch": 0.84, - "grad_norm": 1.0609452628827456, - "learning_rate": 1.3911129308205196e-06, - "loss": 0.1798, - "step": 16424 - }, - { - "epoch": 0.84, - "grad_norm": 0.9015507948091649, - "learning_rate": 1.390275080960789e-06, - "loss": 0.1503, - "step": 16425 - }, - { - "epoch": 0.84, - "grad_norm": 1.1521551730803656, - "learning_rate": 1.3894374646403363e-06, - "loss": 0.1621, - "step": 16426 - }, - { - "epoch": 0.84, - "grad_norm": 1.111762810036442, - "learning_rate": 1.3886000818818758e-06, - "loss": 0.1691, - "step": 16427 - }, - { - "epoch": 0.84, - "grad_norm": 1.0080781136444183, - "learning_rate": 1.3877629327081266e-06, - "loss": 0.1465, - "step": 16428 - }, - { - "epoch": 0.84, - "grad_norm": 1.299355270870439, - "learning_rate": 1.3869260171417919e-06, - "loss": 0.166, - "step": 16429 - }, - { - "epoch": 0.84, - "grad_norm": 1.1388745378816476, - "learning_rate": 1.3860893352055782e-06, - "loss": 0.1801, - "step": 16430 - }, - { - "epoch": 0.84, - "grad_norm": 1.1660929089576133, - "learning_rate": 1.3852528869221759e-06, - "loss": 0.153, - "step": 16431 - }, - { - "epoch": 0.84, - "grad_norm": 1.2058432239074868, - "learning_rate": 1.3844166723142748e-06, - "loss": 0.1906, - "step": 16432 - }, - { - "epoch": 0.84, - "grad_norm": 1.496859015835789, - "learning_rate": 1.3835806914045602e-06, - "loss": 0.1554, - "step": 16433 - }, - { - "epoch": 0.84, - "grad_norm": 2.102976200306153, - "learning_rate": 1.3827449442157049e-06, - "loss": 0.1776, - "step": 16434 - }, - { - "epoch": 0.84, - "grad_norm": 0.8612700307948432, - "learning_rate": 1.3819094307703807e-06, - "loss": 0.162, - "step": 16435 - }, - { - "epoch": 0.84, - "grad_norm": 1.2124658733496405, - "learning_rate": 1.3810741510912485e-06, - "loss": 0.1533, - "step": 16436 - }, - { - "epoch": 0.84, - "grad_norm": 1.2087711356790787, - "learning_rate": 1.380239105200969e-06, - "loss": 0.1724, - "step": 16437 - }, - { - "epoch": 0.84, - "grad_norm": 0.8197129714176736, - "learning_rate": 1.3794042931221873e-06, - "loss": 0.1483, - "step": 16438 - }, - { - "epoch": 0.84, - "grad_norm": 1.2090348799586166, - "learning_rate": 1.3785697148775522e-06, - "loss": 0.1589, - "step": 16439 - }, - { - "epoch": 0.84, - "grad_norm": 0.9053956920846441, - "learning_rate": 1.3777353704897002e-06, - "loss": 0.1658, - "step": 16440 - }, - { - "epoch": 0.84, - "grad_norm": 0.970638279591743, - "learning_rate": 1.376901259981266e-06, - "loss": 0.1563, - "step": 16441 - }, - { - "epoch": 0.84, - "grad_norm": 1.8254486572617799, - "learning_rate": 1.3760673833748684e-06, - "loss": 0.1497, - "step": 16442 - }, - { - "epoch": 0.84, - "grad_norm": 1.0612951183055963, - "learning_rate": 1.3752337406931338e-06, - "loss": 0.1393, - "step": 16443 - }, - { - "epoch": 0.84, - "grad_norm": 1.129746663017258, - "learning_rate": 1.3744003319586685e-06, - "loss": 0.162, - "step": 16444 - }, - { - "epoch": 0.84, - "grad_norm": 1.166170052999444, - "learning_rate": 1.3735671571940835e-06, - "loss": 0.1663, - "step": 16445 - }, - { - "epoch": 0.84, - "grad_norm": 0.9240405167721508, - "learning_rate": 1.3727342164219736e-06, - "loss": 0.1682, - "step": 16446 - }, - { - "epoch": 0.84, - "grad_norm": 1.8770970913366118, - "learning_rate": 1.371901509664939e-06, - "loss": 0.1839, - "step": 16447 - }, - { - "epoch": 0.84, - "grad_norm": 1.3908562774318896, - "learning_rate": 1.3710690369455605e-06, - "loss": 0.1514, - "step": 16448 - }, - { - "epoch": 0.84, - "grad_norm": 1.5732484290072952, - "learning_rate": 1.3702367982864218e-06, - "loss": 0.1551, - "step": 16449 - }, - { - "epoch": 0.84, - "grad_norm": 1.0855678285096475, - "learning_rate": 1.3694047937100985e-06, - "loss": 0.1643, - "step": 16450 - }, - { - "epoch": 0.84, - "grad_norm": 1.2534297475243648, - "learning_rate": 1.368573023239157e-06, - "loss": 0.1929, - "step": 16451 - }, - { - "epoch": 0.84, - "grad_norm": 0.9189340263405366, - "learning_rate": 1.3677414868961615e-06, - "loss": 0.1644, - "step": 16452 - }, - { - "epoch": 0.84, - "grad_norm": 1.0245879139658436, - "learning_rate": 1.3669101847036625e-06, - "loss": 0.1689, - "step": 16453 - }, - { - "epoch": 0.84, - "grad_norm": 1.392745604846661, - "learning_rate": 1.3660791166842158e-06, - "loss": 0.1812, - "step": 16454 - }, - { - "epoch": 0.84, - "grad_norm": 1.4550152590012073, - "learning_rate": 1.3652482828603575e-06, - "loss": 0.155, - "step": 16455 - }, - { - "epoch": 0.84, - "grad_norm": 1.0442879814309745, - "learning_rate": 1.3644176832546296e-06, - "loss": 0.184, - "step": 16456 - }, - { - "epoch": 0.84, - "grad_norm": 1.1630175865305374, - "learning_rate": 1.3635873178895587e-06, - "loss": 0.1712, - "step": 16457 - }, - { - "epoch": 0.84, - "grad_norm": 1.1786859632173177, - "learning_rate": 1.3627571867876689e-06, - "loss": 0.1613, - "step": 16458 - }, - { - "epoch": 0.84, - "grad_norm": 0.825323167134559, - "learning_rate": 1.3619272899714776e-06, - "loss": 0.1654, - "step": 16459 - }, - { - "epoch": 0.84, - "grad_norm": 1.2754532680519124, - "learning_rate": 1.3610976274634991e-06, - "loss": 0.1724, - "step": 16460 - }, - { - "epoch": 0.84, - "grad_norm": 1.1863324872811745, - "learning_rate": 1.3602681992862333e-06, - "loss": 0.1731, - "step": 16461 - }, - { - "epoch": 0.84, - "grad_norm": 0.9164563337067313, - "learning_rate": 1.359439005462183e-06, - "loss": 0.1614, - "step": 16462 - }, - { - "epoch": 0.84, - "grad_norm": 1.4849730806306138, - "learning_rate": 1.3586100460138352e-06, - "loss": 0.1425, - "step": 16463 - }, - { - "epoch": 0.84, - "grad_norm": 0.9652478128597369, - "learning_rate": 1.3577813209636803e-06, - "loss": 0.1739, - "step": 16464 - }, - { - "epoch": 0.84, - "grad_norm": 1.041259915146661, - "learning_rate": 1.3569528303341927e-06, - "loss": 0.1763, - "step": 16465 - }, - { - "epoch": 0.84, - "grad_norm": 1.0776118097768106, - "learning_rate": 1.356124574147848e-06, - "loss": 0.1636, - "step": 16466 - }, - { - "epoch": 0.84, - "grad_norm": 1.4982116007172273, - "learning_rate": 1.3552965524271144e-06, - "loss": 0.1715, - "step": 16467 - }, - { - "epoch": 0.84, - "grad_norm": 1.091139820413839, - "learning_rate": 1.3544687651944476e-06, - "loss": 0.1667, - "step": 16468 - }, - { - "epoch": 0.84, - "grad_norm": 1.0532924603589213, - "learning_rate": 1.3536412124723075e-06, - "loss": 0.1559, - "step": 16469 - }, - { - "epoch": 0.84, - "grad_norm": 1.7324418230947063, - "learning_rate": 1.3528138942831337e-06, - "loss": 0.1611, - "step": 16470 - }, - { - "epoch": 0.84, - "grad_norm": 1.2857408269894968, - "learning_rate": 1.351986810649375e-06, - "loss": 0.1643, - "step": 16471 - }, - { - "epoch": 0.84, - "grad_norm": 1.007133004425682, - "learning_rate": 1.35115996159346e-06, - "loss": 0.1704, - "step": 16472 - }, - { - "epoch": 0.84, - "grad_norm": 0.9087690813649096, - "learning_rate": 1.3503333471378211e-06, - "loss": 0.168, - "step": 16473 - }, - { - "epoch": 0.84, - "grad_norm": 1.3235653985397124, - "learning_rate": 1.3495069673048778e-06, - "loss": 0.1738, - "step": 16474 - }, - { - "epoch": 0.84, - "grad_norm": 1.0334826159258361, - "learning_rate": 1.3486808221170455e-06, - "loss": 0.1568, - "step": 16475 - }, - { - "epoch": 0.84, - "grad_norm": 1.1220531438873298, - "learning_rate": 1.3478549115967344e-06, - "loss": 0.1461, - "step": 16476 - }, - { - "epoch": 0.84, - "grad_norm": 1.7370009339817567, - "learning_rate": 1.3470292357663506e-06, - "loss": 0.1513, - "step": 16477 - }, - { - "epoch": 0.84, - "grad_norm": 1.0200812436144857, - "learning_rate": 1.3462037946482842e-06, - "loss": 0.1608, - "step": 16478 - }, - { - "epoch": 0.84, - "grad_norm": 0.8658443215049889, - "learning_rate": 1.3453785882649317e-06, - "loss": 0.1708, - "step": 16479 - }, - { - "epoch": 0.84, - "grad_norm": 0.9038392745163156, - "learning_rate": 1.3445536166386708e-06, - "loss": 0.1551, - "step": 16480 - }, - { - "epoch": 0.84, - "grad_norm": 0.8945414551418993, - "learning_rate": 1.3437288797918858e-06, - "loss": 0.1659, - "step": 16481 - }, - { - "epoch": 0.84, - "grad_norm": 1.2513579274863056, - "learning_rate": 1.3429043777469397e-06, - "loss": 0.1573, - "step": 16482 - }, - { - "epoch": 0.84, - "grad_norm": 1.032128921002778, - "learning_rate": 1.3420801105262026e-06, - "loss": 0.1572, - "step": 16483 - }, - { - "epoch": 0.84, - "grad_norm": 0.9609598056949419, - "learning_rate": 1.3412560781520334e-06, - "loss": 0.1575, - "step": 16484 - }, - { - "epoch": 0.84, - "grad_norm": 0.9334010175995588, - "learning_rate": 1.3404322806467796e-06, - "loss": 0.1288, - "step": 16485 - }, - { - "epoch": 0.84, - "grad_norm": 1.3460503358341827, - "learning_rate": 1.339608718032791e-06, - "loss": 0.1533, - "step": 16486 - }, - { - "epoch": 0.84, - "grad_norm": 1.8418329982109378, - "learning_rate": 1.3387853903324032e-06, - "loss": 0.1772, - "step": 16487 - }, - { - "epoch": 0.84, - "grad_norm": 1.667359050992339, - "learning_rate": 1.337962297567954e-06, - "loss": 0.1493, - "step": 16488 - }, - { - "epoch": 0.84, - "grad_norm": 1.0445679883208367, - "learning_rate": 1.3371394397617644e-06, - "loss": 0.1637, - "step": 16489 - }, - { - "epoch": 0.84, - "grad_norm": 0.8907911590724815, - "learning_rate": 1.3363168169361574e-06, - "loss": 0.1643, - "step": 16490 - }, - { - "epoch": 0.84, - "grad_norm": 1.0557707181235574, - "learning_rate": 1.3354944291134452e-06, - "loss": 0.1616, - "step": 16491 - }, - { - "epoch": 0.84, - "grad_norm": 0.8614216558652774, - "learning_rate": 1.3346722763159358e-06, - "loss": 0.1483, - "step": 16492 - }, - { - "epoch": 0.84, - "grad_norm": 1.469119758561671, - "learning_rate": 1.3338503585659302e-06, - "loss": 0.1802, - "step": 16493 - }, - { - "epoch": 0.84, - "grad_norm": 0.8268722157076108, - "learning_rate": 1.3330286758857258e-06, - "loss": 0.1535, - "step": 16494 - }, - { - "epoch": 0.84, - "grad_norm": 0.978780111321717, - "learning_rate": 1.3322072282976051e-06, - "loss": 0.1771, - "step": 16495 - }, - { - "epoch": 0.84, - "grad_norm": 0.7792839909962566, - "learning_rate": 1.3313860158238556e-06, - "loss": 0.156, - "step": 16496 - }, - { - "epoch": 0.84, - "grad_norm": 1.130327790411797, - "learning_rate": 1.3305650384867475e-06, - "loss": 0.1761, - "step": 16497 - }, - { - "epoch": 0.84, - "grad_norm": 0.9698512769898063, - "learning_rate": 1.329744296308555e-06, - "loss": 0.1744, - "step": 16498 - }, - { - "epoch": 0.84, - "grad_norm": 1.7540272536679848, - "learning_rate": 1.3289237893115348e-06, - "loss": 0.1715, - "step": 16499 - }, - { - "epoch": 0.84, - "grad_norm": 0.9543124083561036, - "learning_rate": 1.3281035175179503e-06, - "loss": 0.1716, - "step": 16500 - }, - { - "epoch": 0.84, - "grad_norm": 1.0794734550478802, - "learning_rate": 1.3272834809500446e-06, - "loss": 0.176, - "step": 16501 - }, - { - "epoch": 0.84, - "grad_norm": 0.986549355968139, - "learning_rate": 1.3264636796300646e-06, - "loss": 0.1717, - "step": 16502 - }, - { - "epoch": 0.84, - "grad_norm": 0.9899451300872415, - "learning_rate": 1.325644113580249e-06, - "loss": 0.1564, - "step": 16503 - }, - { - "epoch": 0.84, - "grad_norm": 1.1125736143872642, - "learning_rate": 1.3248247828228244e-06, - "loss": 0.1646, - "step": 16504 - }, - { - "epoch": 0.84, - "grad_norm": 1.1028198395225064, - "learning_rate": 1.324005687380021e-06, - "loss": 0.1556, - "step": 16505 - }, - { - "epoch": 0.84, - "grad_norm": 1.9268461161989021, - "learning_rate": 1.32318682727405e-06, - "loss": 0.1824, - "step": 16506 - }, - { - "epoch": 0.84, - "grad_norm": 1.2747726043601, - "learning_rate": 1.322368202527129e-06, - "loss": 0.1638, - "step": 16507 - }, - { - "epoch": 0.84, - "grad_norm": 9.851572183953692, - "learning_rate": 1.321549813161458e-06, - "loss": 0.1739, - "step": 16508 - }, - { - "epoch": 0.84, - "grad_norm": 1.2012203043166474, - "learning_rate": 1.3207316591992392e-06, - "loss": 0.1578, - "step": 16509 - }, - { - "epoch": 0.84, - "grad_norm": 2.631785569122105, - "learning_rate": 1.3199137406626639e-06, - "loss": 0.1582, - "step": 16510 - }, - { - "epoch": 0.84, - "grad_norm": 1.8110969696411472, - "learning_rate": 1.319096057573921e-06, - "loss": 0.1816, - "step": 16511 - }, - { - "epoch": 0.84, - "grad_norm": 2.4847912761305304, - "learning_rate": 1.3182786099551848e-06, - "loss": 0.1669, - "step": 16512 - }, - { - "epoch": 0.84, - "grad_norm": 1.0646581402910877, - "learning_rate": 1.3174613978286355e-06, - "loss": 0.1706, - "step": 16513 - }, - { - "epoch": 0.84, - "grad_norm": 1.5209956250155796, - "learning_rate": 1.3166444212164331e-06, - "loss": 0.1956, - "step": 16514 - }, - { - "epoch": 0.84, - "grad_norm": 1.0073009213634985, - "learning_rate": 1.3158276801407432e-06, - "loss": 0.1748, - "step": 16515 - }, - { - "epoch": 0.84, - "grad_norm": 0.9904598112902595, - "learning_rate": 1.3150111746237159e-06, - "loss": 0.1339, - "step": 16516 - }, - { - "epoch": 0.84, - "grad_norm": 1.4137658796525223, - "learning_rate": 1.3141949046875025e-06, - "loss": 0.1742, - "step": 16517 - }, - { - "epoch": 0.84, - "grad_norm": 1.1149818676663423, - "learning_rate": 1.3133788703542417e-06, - "loss": 0.1771, - "step": 16518 - }, - { - "epoch": 0.84, - "grad_norm": 0.8472715236112703, - "learning_rate": 1.3125630716460692e-06, - "loss": 0.1746, - "step": 16519 - }, - { - "epoch": 0.84, - "grad_norm": 1.1760004963805697, - "learning_rate": 1.3117475085851173e-06, - "loss": 0.1757, - "step": 16520 - }, - { - "epoch": 0.84, - "grad_norm": 1.0916298607265162, - "learning_rate": 1.3109321811935017e-06, - "loss": 0.1651, - "step": 16521 - }, - { - "epoch": 0.84, - "grad_norm": 1.0303127188530168, - "learning_rate": 1.3101170894933436e-06, - "loss": 0.1641, - "step": 16522 - }, - { - "epoch": 0.84, - "grad_norm": 0.9849398321369848, - "learning_rate": 1.3093022335067485e-06, - "loss": 0.1786, - "step": 16523 - }, - { - "epoch": 0.84, - "grad_norm": 0.926636407257029, - "learning_rate": 1.3084876132558233e-06, - "loss": 0.1583, - "step": 16524 - }, - { - "epoch": 0.84, - "grad_norm": 1.0249330189845316, - "learning_rate": 1.3076732287626603e-06, - "loss": 0.1666, - "step": 16525 - }, - { - "epoch": 0.84, - "grad_norm": 0.8608635334435888, - "learning_rate": 1.306859080049353e-06, - "loss": 0.163, - "step": 16526 - }, - { - "epoch": 0.84, - "grad_norm": 1.077394151696253, - "learning_rate": 1.3060451671379837e-06, - "loss": 0.1793, - "step": 16527 - }, - { - "epoch": 0.84, - "grad_norm": 0.9431214779085131, - "learning_rate": 1.3052314900506292e-06, - "loss": 0.1718, - "step": 16528 - }, - { - "epoch": 0.84, - "grad_norm": 1.0518179327905288, - "learning_rate": 1.3044180488093616e-06, - "loss": 0.1525, - "step": 16529 - }, - { - "epoch": 0.84, - "grad_norm": 1.3463257700345999, - "learning_rate": 1.303604843436248e-06, - "loss": 0.1587, - "step": 16530 - }, - { - "epoch": 0.84, - "grad_norm": 1.0130415107260289, - "learning_rate": 1.3027918739533429e-06, - "loss": 0.1584, - "step": 16531 - }, - { - "epoch": 0.84, - "grad_norm": 1.2403872414203043, - "learning_rate": 1.3019791403826998e-06, - "loss": 0.158, - "step": 16532 - }, - { - "epoch": 0.84, - "grad_norm": 0.9707677779447362, - "learning_rate": 1.3011666427463631e-06, - "loss": 0.1448, - "step": 16533 - }, - { - "epoch": 0.84, - "grad_norm": 1.4342116326299943, - "learning_rate": 1.3003543810663744e-06, - "loss": 0.1634, - "step": 16534 - }, - { - "epoch": 0.84, - "grad_norm": 0.8825279913771847, - "learning_rate": 1.2995423553647623e-06, - "loss": 0.1418, - "step": 16535 - }, - { - "epoch": 0.84, - "grad_norm": 1.8585049453063847, - "learning_rate": 1.2987305656635541e-06, - "loss": 0.1763, - "step": 16536 - }, - { - "epoch": 0.84, - "grad_norm": 0.8345181790410855, - "learning_rate": 1.297919011984774e-06, - "loss": 0.192, - "step": 16537 - }, - { - "epoch": 0.84, - "grad_norm": 1.0837015041442009, - "learning_rate": 1.2971076943504302e-06, - "loss": 0.1513, - "step": 16538 - }, - { - "epoch": 0.84, - "grad_norm": 1.3773116668194152, - "learning_rate": 1.296296612782534e-06, - "loss": 0.1754, - "step": 16539 - }, - { - "epoch": 0.84, - "grad_norm": 1.0696309455674147, - "learning_rate": 1.2954857673030807e-06, - "loss": 0.1589, - "step": 16540 - }, - { - "epoch": 0.84, - "grad_norm": 0.9033196322937592, - "learning_rate": 1.2946751579340699e-06, - "loss": 0.1849, - "step": 16541 - }, - { - "epoch": 0.84, - "grad_norm": 0.9471419142153938, - "learning_rate": 1.293864784697486e-06, - "loss": 0.1392, - "step": 16542 - }, - { - "epoch": 0.84, - "grad_norm": 0.8082640463443478, - "learning_rate": 1.2930546476153128e-06, - "loss": 0.1456, - "step": 16543 - }, - { - "epoch": 0.84, - "grad_norm": 1.1840965734810238, - "learning_rate": 1.2922447467095222e-06, - "loss": 0.1835, - "step": 16544 - }, - { - "epoch": 0.84, - "grad_norm": 1.5552907721291551, - "learning_rate": 1.2914350820020837e-06, - "loss": 0.1479, - "step": 16545 - }, - { - "epoch": 0.84, - "grad_norm": 1.2297661172025343, - "learning_rate": 1.290625653514962e-06, - "loss": 0.157, - "step": 16546 - }, - { - "epoch": 0.84, - "grad_norm": 0.8249973525995244, - "learning_rate": 1.2898164612701125e-06, - "loss": 0.1489, - "step": 16547 - }, - { - "epoch": 0.84, - "grad_norm": 1.5170565974582284, - "learning_rate": 1.2890075052894812e-06, - "loss": 0.1853, - "step": 16548 - }, - { - "epoch": 0.84, - "grad_norm": 0.9236007876873691, - "learning_rate": 1.2881987855950162e-06, - "loss": 0.1636, - "step": 16549 - }, - { - "epoch": 0.84, - "grad_norm": 0.9777282906763574, - "learning_rate": 1.2873903022086487e-06, - "loss": 0.155, - "step": 16550 - }, - { - "epoch": 0.84, - "grad_norm": 1.1129280526793448, - "learning_rate": 1.2865820551523134e-06, - "loss": 0.1848, - "step": 16551 - }, - { - "epoch": 0.84, - "grad_norm": 0.995012290601466, - "learning_rate": 1.2857740444479306e-06, - "loss": 0.1923, - "step": 16552 - }, - { - "epoch": 0.84, - "grad_norm": 1.014297480330186, - "learning_rate": 1.2849662701174204e-06, - "loss": 0.1802, - "step": 16553 - }, - { - "epoch": 0.84, - "grad_norm": 1.3572272500601252, - "learning_rate": 1.28415873218269e-06, - "loss": 0.1404, - "step": 16554 - }, - { - "epoch": 0.84, - "grad_norm": 1.5575915436378995, - "learning_rate": 1.2833514306656468e-06, - "loss": 0.1473, - "step": 16555 - }, - { - "epoch": 0.84, - "grad_norm": 1.1382318316054767, - "learning_rate": 1.2825443655881897e-06, - "loss": 0.1705, - "step": 16556 - }, - { - "epoch": 0.84, - "grad_norm": 0.9193602383948707, - "learning_rate": 1.2817375369722074e-06, - "loss": 0.1576, - "step": 16557 - }, - { - "epoch": 0.84, - "grad_norm": 0.8721215551729223, - "learning_rate": 1.2809309448395891e-06, - "loss": 0.1705, - "step": 16558 - }, - { - "epoch": 0.84, - "grad_norm": 1.222139036368183, - "learning_rate": 1.2801245892122095e-06, - "loss": 0.169, - "step": 16559 - }, - { - "epoch": 0.84, - "grad_norm": 1.0006821267173502, - "learning_rate": 1.2793184701119444e-06, - "loss": 0.1468, - "step": 16560 - }, - { - "epoch": 0.84, - "grad_norm": 0.8754388516894127, - "learning_rate": 1.2785125875606563e-06, - "loss": 0.1743, - "step": 16561 - }, - { - "epoch": 0.84, - "grad_norm": 0.8966656140349384, - "learning_rate": 1.277706941580208e-06, - "loss": 0.1775, - "step": 16562 - }, - { - "epoch": 0.84, - "grad_norm": 1.2780190755664806, - "learning_rate": 1.2769015321924506e-06, - "loss": 0.1712, - "step": 16563 - }, - { - "epoch": 0.84, - "grad_norm": 0.9360785334190713, - "learning_rate": 1.2760963594192332e-06, - "loss": 0.1488, - "step": 16564 - }, - { - "epoch": 0.84, - "grad_norm": 0.910441612685864, - "learning_rate": 1.2752914232823942e-06, - "loss": 0.1678, - "step": 16565 - }, - { - "epoch": 0.84, - "grad_norm": 1.5132061912551134, - "learning_rate": 1.2744867238037695e-06, - "loss": 0.1779, - "step": 16566 - }, - { - "epoch": 0.84, - "grad_norm": 1.0221427405596983, - "learning_rate": 1.2736822610051825e-06, - "loss": 0.1525, - "step": 16567 - }, - { - "epoch": 0.84, - "grad_norm": 0.9288590580525012, - "learning_rate": 1.2728780349084603e-06, - "loss": 0.1559, - "step": 16568 - }, - { - "epoch": 0.84, - "grad_norm": 1.1899146088344454, - "learning_rate": 1.272074045535412e-06, - "loss": 0.1549, - "step": 16569 - }, - { - "epoch": 0.84, - "grad_norm": 1.185011658713729, - "learning_rate": 1.271270292907849e-06, - "loss": 0.1754, - "step": 16570 - }, - { - "epoch": 0.84, - "grad_norm": 1.110425755207929, - "learning_rate": 1.270466777047572e-06, - "loss": 0.1713, - "step": 16571 - }, - { - "epoch": 0.84, - "grad_norm": 0.8477743760141506, - "learning_rate": 1.2696634979763757e-06, - "loss": 0.145, - "step": 16572 - }, - { - "epoch": 0.84, - "grad_norm": 0.876659589143047, - "learning_rate": 1.2688604557160523e-06, - "loss": 0.1719, - "step": 16573 - }, - { - "epoch": 0.84, - "grad_norm": 1.931464501268706, - "learning_rate": 1.26805765028838e-06, - "loss": 0.1742, - "step": 16574 - }, - { - "epoch": 0.84, - "grad_norm": 1.0268481379315793, - "learning_rate": 1.2672550817151397e-06, - "loss": 0.1778, - "step": 16575 - }, - { - "epoch": 0.84, - "grad_norm": 0.9313058948325292, - "learning_rate": 1.2664527500180956e-06, - "loss": 0.1481, - "step": 16576 - }, - { - "epoch": 0.84, - "grad_norm": 1.041165067742634, - "learning_rate": 1.2656506552190163e-06, - "loss": 0.1793, - "step": 16577 - }, - { - "epoch": 0.84, - "grad_norm": 1.3093192642434583, - "learning_rate": 1.264848797339655e-06, - "loss": 0.1745, - "step": 16578 - }, - { - "epoch": 0.84, - "grad_norm": 1.1592795347577627, - "learning_rate": 1.2640471764017625e-06, - "loss": 0.165, - "step": 16579 - }, - { - "epoch": 0.84, - "grad_norm": 1.5147033431066657, - "learning_rate": 1.2632457924270835e-06, - "loss": 0.1658, - "step": 16580 - }, - { - "epoch": 0.84, - "grad_norm": 0.9477516005638641, - "learning_rate": 1.2624446454373596e-06, - "loss": 0.162, - "step": 16581 - }, - { - "epoch": 0.84, - "grad_norm": 1.6145234740898102, - "learning_rate": 1.2616437354543142e-06, - "loss": 0.1783, - "step": 16582 - }, - { - "epoch": 0.84, - "grad_norm": 0.9709781790920385, - "learning_rate": 1.2608430624996793e-06, - "loss": 0.1613, - "step": 16583 - }, - { - "epoch": 0.84, - "grad_norm": 1.0059251190145087, - "learning_rate": 1.2600426265951671e-06, - "loss": 0.1655, - "step": 16584 - }, - { - "epoch": 0.84, - "grad_norm": 0.852242224906641, - "learning_rate": 1.2592424277624948e-06, - "loss": 0.1798, - "step": 16585 - }, - { - "epoch": 0.84, - "grad_norm": 0.9307410637537811, - "learning_rate": 1.2584424660233641e-06, - "loss": 0.1717, - "step": 16586 - }, - { - "epoch": 0.84, - "grad_norm": 2.3261276227772267, - "learning_rate": 1.2576427413994764e-06, - "loss": 0.1568, - "step": 16587 - }, - { - "epoch": 0.84, - "grad_norm": 1.1057900784109742, - "learning_rate": 1.2568432539125207e-06, - "loss": 0.1714, - "step": 16588 - }, - { - "epoch": 0.84, - "grad_norm": 1.0409802751492843, - "learning_rate": 1.256044003584186e-06, - "loss": 0.1449, - "step": 16589 - }, - { - "epoch": 0.84, - "grad_norm": 1.189379870667062, - "learning_rate": 1.255244990436153e-06, - "loss": 0.1788, - "step": 16590 - }, - { - "epoch": 0.84, - "grad_norm": 0.9420416050470736, - "learning_rate": 1.2544462144900926e-06, - "loss": 0.1716, - "step": 16591 - }, - { - "epoch": 0.84, - "grad_norm": 0.7131804242727195, - "learning_rate": 1.253647675767674e-06, - "loss": 0.1671, - "step": 16592 - }, - { - "epoch": 0.84, - "grad_norm": 0.8651428576530652, - "learning_rate": 1.2528493742905533e-06, - "loss": 0.1541, - "step": 16593 - }, - { - "epoch": 0.84, - "grad_norm": 0.9329106635611252, - "learning_rate": 1.252051310080391e-06, - "loss": 0.1493, - "step": 16594 - }, - { - "epoch": 0.84, - "grad_norm": 0.8348058446150314, - "learning_rate": 1.2512534831588285e-06, - "loss": 0.1701, - "step": 16595 - }, - { - "epoch": 0.84, - "grad_norm": 1.3710651965857372, - "learning_rate": 1.2504558935475108e-06, - "loss": 0.1533, - "step": 16596 - }, - { - "epoch": 0.84, - "grad_norm": 1.540429321961662, - "learning_rate": 1.2496585412680696e-06, - "loss": 0.1754, - "step": 16597 - }, - { - "epoch": 0.84, - "grad_norm": 0.9723673657195523, - "learning_rate": 1.2488614263421338e-06, - "loss": 0.1668, - "step": 16598 - }, - { - "epoch": 0.84, - "grad_norm": 0.8723989382271515, - "learning_rate": 1.248064548791328e-06, - "loss": 0.2018, - "step": 16599 - }, - { - "epoch": 0.84, - "grad_norm": 1.1434520514035937, - "learning_rate": 1.2472679086372662e-06, - "loss": 0.1568, - "step": 16600 - }, - { - "epoch": 0.84, - "grad_norm": 1.3246782154021164, - "learning_rate": 1.2464715059015553e-06, - "loss": 0.1793, - "step": 16601 - }, - { - "epoch": 0.84, - "grad_norm": 2.537808161865273, - "learning_rate": 1.2456753406058008e-06, - "loss": 0.1464, - "step": 16602 - }, - { - "epoch": 0.84, - "grad_norm": 1.4533241913453714, - "learning_rate": 1.2448794127715947e-06, - "loss": 0.1396, - "step": 16603 - }, - { - "epoch": 0.84, - "grad_norm": 0.9080860744875678, - "learning_rate": 1.2440837224205316e-06, - "loss": 0.1399, - "step": 16604 - }, - { - "epoch": 0.84, - "grad_norm": 1.470295143980769, - "learning_rate": 1.243288269574191e-06, - "loss": 0.1974, - "step": 16605 - }, - { - "epoch": 0.84, - "grad_norm": 2.04915953083232, - "learning_rate": 1.242493054254149e-06, - "loss": 0.1341, - "step": 16606 - }, - { - "epoch": 0.84, - "grad_norm": 1.044190308506685, - "learning_rate": 1.2416980764819807e-06, - "loss": 0.1829, - "step": 16607 - }, - { - "epoch": 0.84, - "grad_norm": 0.867774746271164, - "learning_rate": 1.2409033362792444e-06, - "loss": 0.1518, - "step": 16608 - }, - { - "epoch": 0.84, - "grad_norm": 1.054702853097598, - "learning_rate": 1.2401088336675015e-06, - "loss": 0.1585, - "step": 16609 - }, - { - "epoch": 0.84, - "grad_norm": 0.9045444257729103, - "learning_rate": 1.2393145686682995e-06, - "loss": 0.1741, - "step": 16610 - }, - { - "epoch": 0.84, - "grad_norm": 1.4045243451592733, - "learning_rate": 1.2385205413031865e-06, - "loss": 0.179, - "step": 16611 - }, - { - "epoch": 0.84, - "grad_norm": 0.8820536894164042, - "learning_rate": 1.2377267515936964e-06, - "loss": 0.1542, - "step": 16612 - }, - { - "epoch": 0.84, - "grad_norm": 1.015566468698118, - "learning_rate": 1.2369331995613664e-06, - "loss": 0.1619, - "step": 16613 - }, - { - "epoch": 0.84, - "grad_norm": 1.103506609341633, - "learning_rate": 1.2361398852277151e-06, - "loss": 0.1456, - "step": 16614 - }, - { - "epoch": 0.84, - "grad_norm": 0.9577327705290578, - "learning_rate": 1.2353468086142639e-06, - "loss": 0.1849, - "step": 16615 - }, - { - "epoch": 0.84, - "grad_norm": 1.0037833100661795, - "learning_rate": 1.2345539697425269e-06, - "loss": 0.1584, - "step": 16616 - }, - { - "epoch": 0.85, - "grad_norm": 0.8532619138403739, - "learning_rate": 1.2337613686340099e-06, - "loss": 0.16, - "step": 16617 - }, - { - "epoch": 0.85, - "grad_norm": 1.0251008349032038, - "learning_rate": 1.2329690053102085e-06, - "loss": 0.1654, - "step": 16618 - }, - { - "epoch": 0.85, - "grad_norm": 0.8976002504409775, - "learning_rate": 1.2321768797926203e-06, - "loss": 0.1532, - "step": 16619 - }, - { - "epoch": 0.85, - "grad_norm": 1.1929559472997977, - "learning_rate": 1.2313849921027277e-06, - "loss": 0.1759, - "step": 16620 - }, - { - "epoch": 0.85, - "grad_norm": 1.2062388730124642, - "learning_rate": 1.2305933422620143e-06, - "loss": 0.1706, - "step": 16621 - }, - { - "epoch": 0.85, - "grad_norm": 0.9055888863766589, - "learning_rate": 1.2298019302919505e-06, - "loss": 0.1615, - "step": 16622 - }, - { - "epoch": 0.85, - "grad_norm": 1.9225015025746164, - "learning_rate": 1.2290107562140053e-06, - "loss": 0.1593, - "step": 16623 - }, - { - "epoch": 0.85, - "grad_norm": 1.2805442002710812, - "learning_rate": 1.2282198200496377e-06, - "loss": 0.1569, - "step": 16624 - }, - { - "epoch": 0.85, - "grad_norm": 1.4657779384979428, - "learning_rate": 1.2274291218203027e-06, - "loss": 0.167, - "step": 16625 - }, - { - "epoch": 0.85, - "grad_norm": 1.0700714587634568, - "learning_rate": 1.22663866154745e-06, - "loss": 0.1695, - "step": 16626 - }, - { - "epoch": 0.85, - "grad_norm": 0.8939815710264033, - "learning_rate": 1.225848439252517e-06, - "loss": 0.1686, - "step": 16627 - }, - { - "epoch": 0.85, - "grad_norm": 1.144969292046293, - "learning_rate": 1.2250584549569433e-06, - "loss": 0.1475, - "step": 16628 - }, - { - "epoch": 0.85, - "grad_norm": 0.9233843808729649, - "learning_rate": 1.2242687086821525e-06, - "loss": 0.1464, - "step": 16629 - }, - { - "epoch": 0.85, - "grad_norm": 2.6290684852204285, - "learning_rate": 1.2234792004495699e-06, - "loss": 0.1749, - "step": 16630 - }, - { - "epoch": 0.85, - "grad_norm": 0.856185263973189, - "learning_rate": 1.2226899302806083e-06, - "loss": 0.1517, - "step": 16631 - }, - { - "epoch": 0.85, - "grad_norm": 1.0364409409561266, - "learning_rate": 1.2219008981966785e-06, - "loss": 0.1678, - "step": 16632 - }, - { - "epoch": 0.85, - "grad_norm": 1.130986837273496, - "learning_rate": 1.221112104219182e-06, - "loss": 0.1577, - "step": 16633 - }, - { - "epoch": 0.85, - "grad_norm": 0.9271609905857692, - "learning_rate": 1.2203235483695176e-06, - "loss": 0.1397, - "step": 16634 - }, - { - "epoch": 0.85, - "grad_norm": 1.3835412288620175, - "learning_rate": 1.2195352306690711e-06, - "loss": 0.1585, - "step": 16635 - }, - { - "epoch": 0.85, - "grad_norm": 2.924188954337587, - "learning_rate": 1.218747151139229e-06, - "loss": 0.1469, - "step": 16636 - }, - { - "epoch": 0.85, - "grad_norm": 1.1342662068725529, - "learning_rate": 1.2179593098013642e-06, - "loss": 0.1648, - "step": 16637 - }, - { - "epoch": 0.85, - "grad_norm": 1.391040675182654, - "learning_rate": 1.2171717066768518e-06, - "loss": 0.1583, - "step": 16638 - }, - { - "epoch": 0.85, - "grad_norm": 1.3017551674300645, - "learning_rate": 1.2163843417870503e-06, - "loss": 0.1599, - "step": 16639 - }, - { - "epoch": 0.85, - "grad_norm": 1.2893603271246814, - "learning_rate": 1.2155972151533225e-06, - "loss": 0.1635, - "step": 16640 - }, - { - "epoch": 0.85, - "grad_norm": 1.5560227204195407, - "learning_rate": 1.2148103267970135e-06, - "loss": 0.1589, - "step": 16641 - }, - { - "epoch": 0.85, - "grad_norm": 1.3221619669349556, - "learning_rate": 1.2140236767394708e-06, - "loss": 0.1865, - "step": 16642 - }, - { - "epoch": 0.85, - "grad_norm": 1.5757234214722537, - "learning_rate": 1.213237265002034e-06, - "loss": 0.1658, - "step": 16643 - }, - { - "epoch": 0.85, - "grad_norm": 1.029193912238693, - "learning_rate": 1.2124510916060307e-06, - "loss": 0.1572, - "step": 16644 - }, - { - "epoch": 0.85, - "grad_norm": 1.3440173887333735, - "learning_rate": 1.21166515657279e-06, - "loss": 0.1482, - "step": 16645 - }, - { - "epoch": 0.85, - "grad_norm": 4.427243578192571, - "learning_rate": 1.2108794599236262e-06, - "loss": 0.1679, - "step": 16646 - }, - { - "epoch": 0.85, - "grad_norm": 1.2071076339340914, - "learning_rate": 1.2100940016798558e-06, - "loss": 0.1848, - "step": 16647 - }, - { - "epoch": 0.85, - "grad_norm": 1.1556199995839302, - "learning_rate": 1.2093087818627801e-06, - "loss": 0.1781, - "step": 16648 - }, - { - "epoch": 0.85, - "grad_norm": 0.8756934847230738, - "learning_rate": 1.2085238004937017e-06, - "loss": 0.1563, - "step": 16649 - }, - { - "epoch": 0.85, - "grad_norm": 1.0378254794811703, - "learning_rate": 1.2077390575939097e-06, - "loss": 0.1684, - "step": 16650 - }, - { - "epoch": 0.85, - "grad_norm": 1.5403138077505762, - "learning_rate": 1.2069545531846926e-06, - "loss": 0.1441, - "step": 16651 - }, - { - "epoch": 0.85, - "grad_norm": 1.753894289360156, - "learning_rate": 1.2061702872873304e-06, - "loss": 0.1792, - "step": 16652 - }, - { - "epoch": 0.85, - "grad_norm": 0.9978037641503398, - "learning_rate": 1.205386259923097e-06, - "loss": 0.1488, - "step": 16653 - }, - { - "epoch": 0.85, - "grad_norm": 1.0916070622992498, - "learning_rate": 1.2046024711132564e-06, - "loss": 0.1893, - "step": 16654 - }, - { - "epoch": 0.85, - "grad_norm": 1.0336387552043056, - "learning_rate": 1.2038189208790718e-06, - "loss": 0.1595, - "step": 16655 - }, - { - "epoch": 0.85, - "grad_norm": 0.9740026133928388, - "learning_rate": 1.203035609241795e-06, - "loss": 0.1524, - "step": 16656 - }, - { - "epoch": 0.85, - "grad_norm": 1.2294148310206503, - "learning_rate": 1.2022525362226755e-06, - "loss": 0.1754, - "step": 16657 - }, - { - "epoch": 0.85, - "grad_norm": 0.9963912849910682, - "learning_rate": 1.201469701842951e-06, - "loss": 0.1686, - "step": 16658 - }, - { - "epoch": 0.85, - "grad_norm": 1.4602436474568625, - "learning_rate": 1.2006871061238578e-06, - "loss": 0.1516, - "step": 16659 - }, - { - "epoch": 0.85, - "grad_norm": 1.2083045646730413, - "learning_rate": 1.1999047490866255e-06, - "loss": 0.1754, - "step": 16660 - }, - { - "epoch": 0.85, - "grad_norm": 0.9319805806800955, - "learning_rate": 1.1991226307524727e-06, - "loss": 0.1651, - "step": 16661 - }, - { - "epoch": 0.85, - "grad_norm": 1.057164534609886, - "learning_rate": 1.198340751142617e-06, - "loss": 0.1414, - "step": 16662 - }, - { - "epoch": 0.85, - "grad_norm": 1.2254343247363502, - "learning_rate": 1.1975591102782635e-06, - "loss": 0.1509, - "step": 16663 - }, - { - "epoch": 0.85, - "grad_norm": 0.8658829631174367, - "learning_rate": 1.1967777081806187e-06, - "loss": 0.1448, - "step": 16664 - }, - { - "epoch": 0.85, - "grad_norm": 0.960613665093742, - "learning_rate": 1.1959965448708731e-06, - "loss": 0.1701, - "step": 16665 - }, - { - "epoch": 0.85, - "grad_norm": 1.748125493424765, - "learning_rate": 1.1952156203702215e-06, - "loss": 0.1563, - "step": 16666 - }, - { - "epoch": 0.85, - "grad_norm": 0.9075724124031925, - "learning_rate": 1.1944349346998407e-06, - "loss": 0.1496, - "step": 16667 - }, - { - "epoch": 0.85, - "grad_norm": 0.9928967707047157, - "learning_rate": 1.1936544878809097e-06, - "loss": 0.1748, - "step": 16668 - }, - { - "epoch": 0.85, - "grad_norm": 1.0413533784476476, - "learning_rate": 1.1928742799345982e-06, - "loss": 0.1767, - "step": 16669 - }, - { - "epoch": 0.85, - "grad_norm": 0.9206089006391661, - "learning_rate": 1.1920943108820714e-06, - "loss": 0.1706, - "step": 16670 - }, - { - "epoch": 0.85, - "grad_norm": 1.2401044893499937, - "learning_rate": 1.1913145807444815e-06, - "loss": 0.1882, - "step": 16671 - }, - { - "epoch": 0.85, - "grad_norm": 0.908453897796349, - "learning_rate": 1.1905350895429835e-06, - "loss": 0.1746, - "step": 16672 - }, - { - "epoch": 0.85, - "grad_norm": 0.8886683505554861, - "learning_rate": 1.1897558372987172e-06, - "loss": 0.1677, - "step": 16673 - }, - { - "epoch": 0.85, - "grad_norm": 1.4196567280330579, - "learning_rate": 1.1889768240328225e-06, - "loss": 0.1716, - "step": 16674 - }, - { - "epoch": 0.85, - "grad_norm": 1.0678208880582205, - "learning_rate": 1.1881980497664282e-06, - "loss": 0.1358, - "step": 16675 - }, - { - "epoch": 0.85, - "grad_norm": 1.0379333061722018, - "learning_rate": 1.1874195145206603e-06, - "loss": 0.1687, - "step": 16676 - }, - { - "epoch": 0.85, - "grad_norm": 1.126038895773892, - "learning_rate": 1.1866412183166343e-06, - "loss": 0.1762, - "step": 16677 - }, - { - "epoch": 0.85, - "grad_norm": 1.0333889541378998, - "learning_rate": 1.1858631611754623e-06, - "loss": 0.1668, - "step": 16678 - }, - { - "epoch": 0.85, - "grad_norm": 1.1298072797466057, - "learning_rate": 1.185085343118253e-06, - "loss": 0.1697, - "step": 16679 - }, - { - "epoch": 0.85, - "grad_norm": 0.7944560604697056, - "learning_rate": 1.1843077641660994e-06, - "loss": 0.1484, - "step": 16680 - }, - { - "epoch": 0.85, - "grad_norm": 1.622632460724044, - "learning_rate": 1.183530424340098e-06, - "loss": 0.1536, - "step": 16681 - }, - { - "epoch": 0.85, - "grad_norm": 1.2091299150586754, - "learning_rate": 1.1827533236613287e-06, - "loss": 0.1691, - "step": 16682 - }, - { - "epoch": 0.85, - "grad_norm": 1.0788931153139116, - "learning_rate": 1.1819764621508757e-06, - "loss": 0.1669, - "step": 16683 - }, - { - "epoch": 0.85, - "grad_norm": 1.4777706715450771, - "learning_rate": 1.1811998398298074e-06, - "loss": 0.1733, - "step": 16684 - }, - { - "epoch": 0.85, - "grad_norm": 0.9636743457847207, - "learning_rate": 1.1804234567191919e-06, - "loss": 0.1698, - "step": 16685 - }, - { - "epoch": 0.85, - "grad_norm": 0.93649863813618, - "learning_rate": 1.1796473128400888e-06, - "loss": 0.1561, - "step": 16686 - }, - { - "epoch": 0.85, - "grad_norm": 1.1509318525011094, - "learning_rate": 1.178871408213551e-06, - "loss": 0.1648, - "step": 16687 - }, - { - "epoch": 0.85, - "grad_norm": 1.1851623200092514, - "learning_rate": 1.1780957428606232e-06, - "loss": 0.16, - "step": 16688 - }, - { - "epoch": 0.85, - "grad_norm": 1.303285860586854, - "learning_rate": 1.1773203168023496e-06, - "loss": 0.1832, - "step": 16689 - }, - { - "epoch": 0.85, - "grad_norm": 1.5576203670591835, - "learning_rate": 1.1765451300597574e-06, - "loss": 0.1639, - "step": 16690 - }, - { - "epoch": 0.85, - "grad_norm": 1.3140735072944771, - "learning_rate": 1.1757701826538792e-06, - "loss": 0.1562, - "step": 16691 - }, - { - "epoch": 0.85, - "grad_norm": 1.0278507652035873, - "learning_rate": 1.1749954746057313e-06, - "loss": 0.166, - "step": 16692 - }, - { - "epoch": 0.85, - "grad_norm": 1.224398636284123, - "learning_rate": 1.1742210059363312e-06, - "loss": 0.1546, - "step": 16693 - }, - { - "epoch": 0.85, - "grad_norm": 1.26970783716945, - "learning_rate": 1.1734467766666835e-06, - "loss": 0.1856, - "step": 16694 - }, - { - "epoch": 0.85, - "grad_norm": 1.0206282492563024, - "learning_rate": 1.1726727868177902e-06, - "loss": 0.1519, - "step": 16695 - }, - { - "epoch": 0.85, - "grad_norm": 1.0267392565262938, - "learning_rate": 1.1718990364106476e-06, - "loss": 0.1599, - "step": 16696 - }, - { - "epoch": 0.85, - "grad_norm": 1.0081117309857632, - "learning_rate": 1.1711255254662413e-06, - "loss": 0.168, - "step": 16697 - }, - { - "epoch": 0.85, - "grad_norm": 1.3485046268234842, - "learning_rate": 1.1703522540055545e-06, - "loss": 0.1509, - "step": 16698 - }, - { - "epoch": 0.85, - "grad_norm": 1.1515961083542794, - "learning_rate": 1.1695792220495605e-06, - "loss": 0.1563, - "step": 16699 - }, - { - "epoch": 0.85, - "grad_norm": 1.1567499848884912, - "learning_rate": 1.1688064296192313e-06, - "loss": 0.1724, - "step": 16700 - }, - { - "epoch": 0.85, - "grad_norm": 1.7545024826691027, - "learning_rate": 1.1680338767355237e-06, - "loss": 0.1678, - "step": 16701 - }, - { - "epoch": 0.85, - "grad_norm": 0.988731933262323, - "learning_rate": 1.1672615634193961e-06, - "loss": 0.1494, - "step": 16702 - }, - { - "epoch": 0.85, - "grad_norm": 1.7795116724077813, - "learning_rate": 1.1664894896917966e-06, - "loss": 0.1538, - "step": 16703 - }, - { - "epoch": 0.85, - "grad_norm": 1.127248147568288, - "learning_rate": 1.1657176555736716e-06, - "loss": 0.182, - "step": 16704 - }, - { - "epoch": 0.85, - "grad_norm": 2.0378052019218376, - "learning_rate": 1.164946061085952e-06, - "loss": 0.1718, - "step": 16705 - }, - { - "epoch": 0.85, - "grad_norm": 1.1251788094051784, - "learning_rate": 1.1641747062495723e-06, - "loss": 0.1738, - "step": 16706 - }, - { - "epoch": 0.85, - "grad_norm": 1.0306360633053053, - "learning_rate": 1.163403591085449e-06, - "loss": 0.1581, - "step": 16707 - }, - { - "epoch": 0.85, - "grad_norm": 0.934544355171937, - "learning_rate": 1.1626327156145055e-06, - "loss": 0.1486, - "step": 16708 - }, - { - "epoch": 0.85, - "grad_norm": 1.7447446102909068, - "learning_rate": 1.1618620798576474e-06, - "loss": 0.1702, - "step": 16709 - }, - { - "epoch": 0.85, - "grad_norm": 1.399895022519527, - "learning_rate": 1.16109168383578e-06, - "loss": 0.1679, - "step": 16710 - }, - { - "epoch": 0.85, - "grad_norm": 1.2903232844612942, - "learning_rate": 1.1603215275697988e-06, - "loss": 0.1856, - "step": 16711 - }, - { - "epoch": 0.85, - "grad_norm": 1.024990843444502, - "learning_rate": 1.159551611080596e-06, - "loss": 0.1592, - "step": 16712 - }, - { - "epoch": 0.85, - "grad_norm": 0.9194915982988668, - "learning_rate": 1.1587819343890561e-06, - "loss": 0.1671, - "step": 16713 - }, - { - "epoch": 0.85, - "grad_norm": 1.1353129904878652, - "learning_rate": 1.1580124975160534e-06, - "loss": 0.1731, - "step": 16714 - }, - { - "epoch": 0.85, - "grad_norm": 1.0024954784880729, - "learning_rate": 1.1572433004824635e-06, - "loss": 0.1533, - "step": 16715 - }, - { - "epoch": 0.85, - "grad_norm": 1.0136144334892372, - "learning_rate": 1.1564743433091463e-06, - "loss": 0.1551, - "step": 16716 - }, - { - "epoch": 0.85, - "grad_norm": 1.0058020332612845, - "learning_rate": 1.1557056260169653e-06, - "loss": 0.177, - "step": 16717 - }, - { - "epoch": 0.85, - "grad_norm": 0.7741571728019853, - "learning_rate": 1.1549371486267646e-06, - "loss": 0.153, - "step": 16718 - }, - { - "epoch": 0.85, - "grad_norm": 1.121485078716398, - "learning_rate": 1.1541689111593969e-06, - "loss": 0.1623, - "step": 16719 - }, - { - "epoch": 0.85, - "grad_norm": 1.0291767208676794, - "learning_rate": 1.153400913635695e-06, - "loss": 0.1635, - "step": 16720 - }, - { - "epoch": 0.85, - "grad_norm": 1.1490953421687071, - "learning_rate": 1.1526331560764926e-06, - "loss": 0.1611, - "step": 16721 - }, - { - "epoch": 0.85, - "grad_norm": 1.15884448329287, - "learning_rate": 1.151865638502615e-06, - "loss": 0.1759, - "step": 16722 - }, - { - "epoch": 0.85, - "grad_norm": 1.0724547886666227, - "learning_rate": 1.1510983609348847e-06, - "loss": 0.1575, - "step": 16723 - }, - { - "epoch": 0.85, - "grad_norm": 0.9482165988645026, - "learning_rate": 1.1503313233941082e-06, - "loss": 0.1598, - "step": 16724 - }, - { - "epoch": 0.85, - "grad_norm": 0.9189930992394797, - "learning_rate": 1.1495645259010969e-06, - "loss": 0.16, - "step": 16725 - }, - { - "epoch": 0.85, - "grad_norm": 0.6922036695384618, - "learning_rate": 1.148797968476646e-06, - "loss": 0.1426, - "step": 16726 - }, - { - "epoch": 0.85, - "grad_norm": 1.3946996712571365, - "learning_rate": 1.1480316511415513e-06, - "loss": 0.1821, - "step": 16727 - }, - { - "epoch": 0.85, - "grad_norm": 0.9618638738746309, - "learning_rate": 1.1472655739165961e-06, - "loss": 0.1481, - "step": 16728 - }, - { - "epoch": 0.85, - "grad_norm": 1.9534958513144793, - "learning_rate": 1.1464997368225629e-06, - "loss": 0.1691, - "step": 16729 - }, - { - "epoch": 0.85, - "grad_norm": 1.0621077501576741, - "learning_rate": 1.1457341398802269e-06, - "loss": 0.167, - "step": 16730 - }, - { - "epoch": 0.85, - "grad_norm": 1.1239092189624629, - "learning_rate": 1.1449687831103495e-06, - "loss": 0.1778, - "step": 16731 - }, - { - "epoch": 0.85, - "grad_norm": 1.1674994384296928, - "learning_rate": 1.1442036665336953e-06, - "loss": 0.1806, - "step": 16732 - }, - { - "epoch": 0.85, - "grad_norm": 0.9455081777025037, - "learning_rate": 1.1434387901710164e-06, - "loss": 0.1679, - "step": 16733 - }, - { - "epoch": 0.85, - "grad_norm": 1.0255477263215511, - "learning_rate": 1.142674154043062e-06, - "loss": 0.1815, - "step": 16734 - }, - { - "epoch": 0.85, - "grad_norm": 0.9171922536179344, - "learning_rate": 1.1419097581705686e-06, - "loss": 0.1512, - "step": 16735 - }, - { - "epoch": 0.85, - "grad_norm": 1.0202324495736854, - "learning_rate": 1.1411456025742763e-06, - "loss": 0.1703, - "step": 16736 - }, - { - "epoch": 0.85, - "grad_norm": 0.8282685116494622, - "learning_rate": 1.1403816872749074e-06, - "loss": 0.1707, - "step": 16737 - }, - { - "epoch": 0.85, - "grad_norm": 1.3741495687109866, - "learning_rate": 1.1396180122931854e-06, - "loss": 0.1663, - "step": 16738 - }, - { - "epoch": 0.85, - "grad_norm": 0.944588868826035, - "learning_rate": 1.1388545776498262e-06, - "loss": 0.1568, - "step": 16739 - }, - { - "epoch": 0.85, - "grad_norm": 0.9649547498178261, - "learning_rate": 1.1380913833655383e-06, - "loss": 0.1646, - "step": 16740 - }, - { - "epoch": 0.85, - "grad_norm": 1.0557333070394006, - "learning_rate": 1.13732842946102e-06, - "loss": 0.1941, - "step": 16741 - }, - { - "epoch": 0.85, - "grad_norm": 1.2316283911922998, - "learning_rate": 1.136565715956971e-06, - "loss": 0.1602, - "step": 16742 - }, - { - "epoch": 0.85, - "grad_norm": 1.1441727807938522, - "learning_rate": 1.1358032428740763e-06, - "loss": 0.1568, - "step": 16743 - }, - { - "epoch": 0.85, - "grad_norm": 1.5479886581033788, - "learning_rate": 1.13504101023302e-06, - "loss": 0.1602, - "step": 16744 - }, - { - "epoch": 0.85, - "grad_norm": 0.934032607491036, - "learning_rate": 1.134279018054475e-06, - "loss": 0.1683, - "step": 16745 - }, - { - "epoch": 0.85, - "grad_norm": 1.0609235864393567, - "learning_rate": 1.1335172663591155e-06, - "loss": 0.161, - "step": 16746 - }, - { - "epoch": 0.85, - "grad_norm": 1.6371308461071334, - "learning_rate": 1.1327557551675983e-06, - "loss": 0.1723, - "step": 16747 - }, - { - "epoch": 0.85, - "grad_norm": 1.1378631228418552, - "learning_rate": 1.1319944845005815e-06, - "loss": 0.1641, - "step": 16748 - }, - { - "epoch": 0.85, - "grad_norm": 1.0222296313110266, - "learning_rate": 1.1312334543787185e-06, - "loss": 0.1712, - "step": 16749 - }, - { - "epoch": 0.85, - "grad_norm": 0.9525488302209721, - "learning_rate": 1.130472664822646e-06, - "loss": 0.164, - "step": 16750 - }, - { - "epoch": 0.85, - "grad_norm": 1.048597087332812, - "learning_rate": 1.1297121158530056e-06, - "loss": 0.1345, - "step": 16751 - }, - { - "epoch": 0.85, - "grad_norm": 0.9475471414124667, - "learning_rate": 1.1289518074904227e-06, - "loss": 0.1838, - "step": 16752 - }, - { - "epoch": 0.85, - "grad_norm": 0.9362631929645722, - "learning_rate": 1.1281917397555253e-06, - "loss": 0.1598, - "step": 16753 - }, - { - "epoch": 0.85, - "grad_norm": 0.9116553796982717, - "learning_rate": 1.127431912668926e-06, - "loss": 0.1609, - "step": 16754 - }, - { - "epoch": 0.85, - "grad_norm": 0.9891970094191186, - "learning_rate": 1.126672326251238e-06, - "loss": 0.1706, - "step": 16755 - }, - { - "epoch": 0.85, - "grad_norm": 1.4497468118338226, - "learning_rate": 1.125912980523064e-06, - "loss": 0.1674, - "step": 16756 - }, - { - "epoch": 0.85, - "grad_norm": 1.0089683525593092, - "learning_rate": 1.1251538755050029e-06, - "loss": 0.1587, - "step": 16757 - }, - { - "epoch": 0.85, - "grad_norm": 0.8990294325244518, - "learning_rate": 1.1243950112176428e-06, - "loss": 0.1645, - "step": 16758 - }, - { - "epoch": 0.85, - "grad_norm": 0.9831129016045187, - "learning_rate": 1.1236363876815705e-06, - "loss": 0.1588, - "step": 16759 - }, - { - "epoch": 0.85, - "grad_norm": 1.0042388056429106, - "learning_rate": 1.1228780049173616e-06, - "loss": 0.1534, - "step": 16760 - }, - { - "epoch": 0.85, - "grad_norm": 1.0436397492912155, - "learning_rate": 1.1221198629455898e-06, - "loss": 0.1647, - "step": 16761 - }, - { - "epoch": 0.85, - "grad_norm": 0.9136767477797114, - "learning_rate": 1.1213619617868154e-06, - "loss": 0.1388, - "step": 16762 - }, - { - "epoch": 0.85, - "grad_norm": 0.8794550315521854, - "learning_rate": 1.120604301461602e-06, - "loss": 0.1854, - "step": 16763 - }, - { - "epoch": 0.85, - "grad_norm": 1.1904734038906932, - "learning_rate": 1.1198468819904962e-06, - "loss": 0.1649, - "step": 16764 - }, - { - "epoch": 0.85, - "grad_norm": 1.0325953177890068, - "learning_rate": 1.1190897033940461e-06, - "loss": 0.166, - "step": 16765 - }, - { - "epoch": 0.85, - "grad_norm": 1.0705206526153064, - "learning_rate": 1.11833276569279e-06, - "loss": 0.1667, - "step": 16766 - }, - { - "epoch": 0.85, - "grad_norm": 0.949411680260893, - "learning_rate": 1.117576068907258e-06, - "loss": 0.166, - "step": 16767 - }, - { - "epoch": 0.85, - "grad_norm": 0.9224277427372204, - "learning_rate": 1.116819613057979e-06, - "loss": 0.1555, - "step": 16768 - }, - { - "epoch": 0.85, - "grad_norm": 1.0286982699631861, - "learning_rate": 1.1160633981654679e-06, - "loss": 0.163, - "step": 16769 - }, - { - "epoch": 0.85, - "grad_norm": 0.9503726540756631, - "learning_rate": 1.1153074242502404e-06, - "loss": 0.1821, - "step": 16770 - }, - { - "epoch": 0.85, - "grad_norm": 1.0626090541232485, - "learning_rate": 1.1145516913327991e-06, - "loss": 0.1817, - "step": 16771 - }, - { - "epoch": 0.85, - "grad_norm": 0.8112805845435304, - "learning_rate": 1.1137961994336467e-06, - "loss": 0.1458, - "step": 16772 - }, - { - "epoch": 0.85, - "grad_norm": 3.6616339502407302, - "learning_rate": 1.1130409485732718e-06, - "loss": 0.1719, - "step": 16773 - }, - { - "epoch": 0.85, - "grad_norm": 1.4096929572061465, - "learning_rate": 1.112285938772164e-06, - "loss": 0.1431, - "step": 16774 - }, - { - "epoch": 0.85, - "grad_norm": 1.1876330357447296, - "learning_rate": 1.1115311700508026e-06, - "loss": 0.1736, - "step": 16775 - }, - { - "epoch": 0.85, - "grad_norm": 0.8458628499040595, - "learning_rate": 1.1107766424296606e-06, - "loss": 0.1515, - "step": 16776 - }, - { - "epoch": 0.85, - "grad_norm": 0.9829250335898947, - "learning_rate": 1.1100223559292035e-06, - "loss": 0.1588, - "step": 16777 - }, - { - "epoch": 0.85, - "grad_norm": 0.9993965169500876, - "learning_rate": 1.1092683105698943e-06, - "loss": 0.1751, - "step": 16778 - }, - { - "epoch": 0.85, - "grad_norm": 1.0923849752085002, - "learning_rate": 1.1085145063721814e-06, - "loss": 0.1757, - "step": 16779 - }, - { - "epoch": 0.85, - "grad_norm": 1.5467147394843548, - "learning_rate": 1.1077609433565173e-06, - "loss": 0.1703, - "step": 16780 - }, - { - "epoch": 0.85, - "grad_norm": 1.8122282454607503, - "learning_rate": 1.1070076215433367e-06, - "loss": 0.1697, - "step": 16781 - }, - { - "epoch": 0.85, - "grad_norm": 1.49125635176288, - "learning_rate": 1.1062545409530778e-06, - "loss": 0.1824, - "step": 16782 - }, - { - "epoch": 0.85, - "grad_norm": 1.0610533686743882, - "learning_rate": 1.1055017016061687e-06, - "loss": 0.1772, - "step": 16783 - }, - { - "epoch": 0.85, - "grad_norm": 0.9301093497530798, - "learning_rate": 1.1047491035230262e-06, - "loss": 0.1504, - "step": 16784 - }, - { - "epoch": 0.85, - "grad_norm": 1.229501628580653, - "learning_rate": 1.1039967467240687e-06, - "loss": 0.1473, - "step": 16785 - }, - { - "epoch": 0.85, - "grad_norm": 1.2435714542781264, - "learning_rate": 1.1032446312296995e-06, - "loss": 0.154, - "step": 16786 - }, - { - "epoch": 0.85, - "grad_norm": 0.9557053053008547, - "learning_rate": 1.102492757060325e-06, - "loss": 0.167, - "step": 16787 - }, - { - "epoch": 0.85, - "grad_norm": 2.9880025827983654, - "learning_rate": 1.1017411242363341e-06, - "loss": 0.147, - "step": 16788 - }, - { - "epoch": 0.85, - "grad_norm": 0.9448609802832272, - "learning_rate": 1.1009897327781204e-06, - "loss": 0.1805, - "step": 16789 - }, - { - "epoch": 0.85, - "grad_norm": 1.1482386908142481, - "learning_rate": 1.1002385827060602e-06, - "loss": 0.1621, - "step": 16790 - }, - { - "epoch": 0.85, - "grad_norm": 1.6367255102843754, - "learning_rate": 1.0994876740405314e-06, - "loss": 0.1771, - "step": 16791 - }, - { - "epoch": 0.85, - "grad_norm": 0.8989530912050467, - "learning_rate": 1.0987370068019021e-06, - "loss": 0.1603, - "step": 16792 - }, - { - "epoch": 0.85, - "grad_norm": 1.1553896505928736, - "learning_rate": 1.0979865810105371e-06, - "loss": 0.1405, - "step": 16793 - }, - { - "epoch": 0.85, - "grad_norm": 1.0036497212360274, - "learning_rate": 1.0972363966867861e-06, - "loss": 0.1559, - "step": 16794 - }, - { - "epoch": 0.85, - "grad_norm": 1.6289270363632566, - "learning_rate": 1.0964864538510022e-06, - "loss": 0.1923, - "step": 16795 - }, - { - "epoch": 0.85, - "grad_norm": 0.9544763364485803, - "learning_rate": 1.095736752523525e-06, - "loss": 0.1522, - "step": 16796 - }, - { - "epoch": 0.85, - "grad_norm": 1.0078909004640204, - "learning_rate": 1.094987292724693e-06, - "loss": 0.165, - "step": 16797 - }, - { - "epoch": 0.85, - "grad_norm": 1.453663246336247, - "learning_rate": 1.0942380744748315e-06, - "loss": 0.1784, - "step": 16798 - }, - { - "epoch": 0.85, - "grad_norm": 1.6144793012492105, - "learning_rate": 1.0934890977942646e-06, - "loss": 0.1675, - "step": 16799 - }, - { - "epoch": 0.85, - "grad_norm": 0.9762171377498543, - "learning_rate": 1.0927403627033129e-06, - "loss": 0.1633, - "step": 16800 - }, - { - "epoch": 0.85, - "grad_norm": 1.2662144545477028, - "learning_rate": 1.0919918692222785e-06, - "loss": 0.1654, - "step": 16801 - }, - { - "epoch": 0.85, - "grad_norm": 1.0651887956028305, - "learning_rate": 1.091243617371469e-06, - "loss": 0.1502, - "step": 16802 - }, - { - "epoch": 0.85, - "grad_norm": 0.8561373873884675, - "learning_rate": 1.0904956071711792e-06, - "loss": 0.1681, - "step": 16803 - }, - { - "epoch": 0.85, - "grad_norm": 1.7538486873095505, - "learning_rate": 1.0897478386417003e-06, - "loss": 0.1783, - "step": 16804 - }, - { - "epoch": 0.85, - "grad_norm": 1.043431297504966, - "learning_rate": 1.0890003118033132e-06, - "loss": 0.1787, - "step": 16805 - }, - { - "epoch": 0.85, - "grad_norm": 0.8472181882913274, - "learning_rate": 1.088253026676297e-06, - "loss": 0.1628, - "step": 16806 - }, - { - "epoch": 0.85, - "grad_norm": 0.9808913031348376, - "learning_rate": 1.08750598328092e-06, - "loss": 0.1607, - "step": 16807 - }, - { - "epoch": 0.85, - "grad_norm": 1.0819924372854248, - "learning_rate": 1.0867591816374456e-06, - "loss": 0.1702, - "step": 16808 - }, - { - "epoch": 0.85, - "grad_norm": 0.9482495395134791, - "learning_rate": 1.0860126217661326e-06, - "loss": 0.1651, - "step": 16809 - }, - { - "epoch": 0.85, - "grad_norm": 1.2523002163974752, - "learning_rate": 1.0852663036872324e-06, - "loss": 0.1727, - "step": 16810 - }, - { - "epoch": 0.85, - "grad_norm": 0.8939924425722398, - "learning_rate": 1.0845202274209842e-06, - "loss": 0.1637, - "step": 16811 - }, - { - "epoch": 0.85, - "grad_norm": 0.9276742350222884, - "learning_rate": 1.0837743929876321e-06, - "loss": 0.162, - "step": 16812 - }, - { - "epoch": 0.85, - "grad_norm": 0.9346752169512902, - "learning_rate": 1.0830288004073997e-06, - "loss": 0.1653, - "step": 16813 - }, - { - "epoch": 0.86, - "grad_norm": 0.8831929316357998, - "learning_rate": 1.0822834497005174e-06, - "loss": 0.1514, - "step": 16814 - }, - { - "epoch": 0.86, - "grad_norm": 0.8418644534814382, - "learning_rate": 1.0815383408871983e-06, - "loss": 0.1696, - "step": 16815 - }, - { - "epoch": 0.86, - "grad_norm": 0.9462951060958208, - "learning_rate": 1.080793473987657e-06, - "loss": 0.1689, - "step": 16816 - }, - { - "epoch": 0.86, - "grad_norm": 1.1273647831520743, - "learning_rate": 1.080048849022095e-06, - "loss": 0.1578, - "step": 16817 - }, - { - "epoch": 0.86, - "grad_norm": 1.064506698976965, - "learning_rate": 1.079304466010712e-06, - "loss": 0.152, - "step": 16818 - }, - { - "epoch": 0.86, - "grad_norm": 0.9350482498821943, - "learning_rate": 1.0785603249737008e-06, - "loss": 0.1572, - "step": 16819 - }, - { - "epoch": 0.86, - "grad_norm": 0.8666707172937299, - "learning_rate": 1.0778164259312418e-06, - "loss": 0.1439, - "step": 16820 - }, - { - "epoch": 0.86, - "grad_norm": 1.2224856485223698, - "learning_rate": 1.0770727689035198e-06, - "loss": 0.163, - "step": 16821 - }, - { - "epoch": 0.86, - "grad_norm": 1.5428579120628447, - "learning_rate": 1.0763293539107e-06, - "loss": 0.1616, - "step": 16822 - }, - { - "epoch": 0.86, - "grad_norm": 1.1434133419606083, - "learning_rate": 1.0755861809729518e-06, - "loss": 0.1518, - "step": 16823 - }, - { - "epoch": 0.86, - "grad_norm": 0.8105189335585022, - "learning_rate": 1.0748432501104322e-06, - "loss": 0.1448, - "step": 16824 - }, - { - "epoch": 0.86, - "grad_norm": 1.9990081041082408, - "learning_rate": 1.074100561343292e-06, - "loss": 0.171, - "step": 16825 - }, - { - "epoch": 0.86, - "grad_norm": 1.9316776439039638, - "learning_rate": 1.0733581146916793e-06, - "loss": 0.1706, - "step": 16826 - }, - { - "epoch": 0.86, - "grad_norm": 0.9572566077695434, - "learning_rate": 1.0726159101757327e-06, - "loss": 0.1766, - "step": 16827 - }, - { - "epoch": 0.86, - "grad_norm": 1.2773673420161995, - "learning_rate": 1.0718739478155827e-06, - "loss": 0.1915, - "step": 16828 - }, - { - "epoch": 0.86, - "grad_norm": 0.9132114296361398, - "learning_rate": 1.0711322276313586e-06, - "loss": 0.1636, - "step": 16829 - }, - { - "epoch": 0.86, - "grad_norm": 0.8430574453562424, - "learning_rate": 1.0703907496431743e-06, - "loss": 0.1673, - "step": 16830 - }, - { - "epoch": 0.86, - "grad_norm": 1.9082397791162895, - "learning_rate": 1.0696495138711472e-06, - "loss": 0.1566, - "step": 16831 - }, - { - "epoch": 0.86, - "grad_norm": 1.021608843054946, - "learning_rate": 1.06890852033538e-06, - "loss": 0.1549, - "step": 16832 - }, - { - "epoch": 0.86, - "grad_norm": 1.528181675833744, - "learning_rate": 1.0681677690559743e-06, - "loss": 0.1617, - "step": 16833 - }, - { - "epoch": 0.86, - "grad_norm": 1.0214552074791603, - "learning_rate": 1.0674272600530223e-06, - "loss": 0.1514, - "step": 16834 - }, - { - "epoch": 0.86, - "grad_norm": 1.0212046421885956, - "learning_rate": 1.0666869933466085e-06, - "loss": 0.1613, - "step": 16835 - }, - { - "epoch": 0.86, - "grad_norm": 1.9574791998002623, - "learning_rate": 1.065946968956818e-06, - "loss": 0.1637, - "step": 16836 - }, - { - "epoch": 0.86, - "grad_norm": 1.0040720899152797, - "learning_rate": 1.0652071869037172e-06, - "loss": 0.1667, - "step": 16837 - }, - { - "epoch": 0.86, - "grad_norm": 2.9335152830552658, - "learning_rate": 1.0644676472073789e-06, - "loss": 0.1617, - "step": 16838 - }, - { - "epoch": 0.86, - "grad_norm": 1.1742512919227117, - "learning_rate": 1.0637283498878592e-06, - "loss": 0.1684, - "step": 16839 - }, - { - "epoch": 0.86, - "grad_norm": 1.1858526578379234, - "learning_rate": 1.0629892949652133e-06, - "loss": 0.1756, - "step": 16840 - }, - { - "epoch": 0.86, - "grad_norm": 1.1309226874477336, - "learning_rate": 1.0622504824594859e-06, - "loss": 0.1786, - "step": 16841 - }, - { - "epoch": 0.86, - "grad_norm": 0.9353874388661649, - "learning_rate": 1.0615119123907214e-06, - "loss": 0.1552, - "step": 16842 - }, - { - "epoch": 0.86, - "grad_norm": 1.8959094708241735, - "learning_rate": 1.060773584778949e-06, - "loss": 0.1619, - "step": 16843 - }, - { - "epoch": 0.86, - "grad_norm": 1.089295782757551, - "learning_rate": 1.0600354996441986e-06, - "loss": 0.168, - "step": 16844 - }, - { - "epoch": 0.86, - "grad_norm": 0.9320442391189215, - "learning_rate": 1.0592976570064894e-06, - "loss": 0.1626, - "step": 16845 - }, - { - "epoch": 0.86, - "grad_norm": 1.1291690300178296, - "learning_rate": 1.058560056885838e-06, - "loss": 0.1674, - "step": 16846 - }, - { - "epoch": 0.86, - "grad_norm": 1.0781302845432605, - "learning_rate": 1.0578226993022488e-06, - "loss": 0.1682, - "step": 16847 - }, - { - "epoch": 0.86, - "grad_norm": 0.8344528027416395, - "learning_rate": 1.0570855842757255e-06, - "loss": 0.1715, - "step": 16848 - }, - { - "epoch": 0.86, - "grad_norm": 0.9485026795896946, - "learning_rate": 1.0563487118262583e-06, - "loss": 0.1544, - "step": 16849 - }, - { - "epoch": 0.86, - "grad_norm": 1.2366380866579516, - "learning_rate": 1.0556120819738403e-06, - "loss": 0.1746, - "step": 16850 - }, - { - "epoch": 0.86, - "grad_norm": 0.8921220522014538, - "learning_rate": 1.0548756947384475e-06, - "loss": 0.1773, - "step": 16851 - }, - { - "epoch": 0.86, - "grad_norm": 1.2807101360981994, - "learning_rate": 1.0541395501400564e-06, - "loss": 0.1778, - "step": 16852 - }, - { - "epoch": 0.86, - "grad_norm": 0.9657410535613355, - "learning_rate": 1.0534036481986375e-06, - "loss": 0.1783, - "step": 16853 - }, - { - "epoch": 0.86, - "grad_norm": 1.1306778682370806, - "learning_rate": 1.0526679889341484e-06, - "loss": 0.176, - "step": 16854 - }, - { - "epoch": 0.86, - "grad_norm": 0.8940590385512903, - "learning_rate": 1.0519325723665463e-06, - "loss": 0.1595, - "step": 16855 - }, - { - "epoch": 0.86, - "grad_norm": 1.067914678261924, - "learning_rate": 1.0511973985157775e-06, - "loss": 0.1704, - "step": 16856 - }, - { - "epoch": 0.86, - "grad_norm": 1.20909021403613, - "learning_rate": 1.0504624674017872e-06, - "loss": 0.1612, - "step": 16857 - }, - { - "epoch": 0.86, - "grad_norm": 1.0323079356985376, - "learning_rate": 1.0497277790445048e-06, - "loss": 0.1611, - "step": 16858 - }, - { - "epoch": 0.86, - "grad_norm": 0.8631782867591103, - "learning_rate": 1.0489933334638648e-06, - "loss": 0.1794, - "step": 16859 - }, - { - "epoch": 0.86, - "grad_norm": 1.1566319344478255, - "learning_rate": 1.0482591306797829e-06, - "loss": 0.1809, - "step": 16860 - }, - { - "epoch": 0.86, - "grad_norm": 1.1401885609706566, - "learning_rate": 1.0475251707121791e-06, - "loss": 0.1381, - "step": 16861 - }, - { - "epoch": 0.86, - "grad_norm": 1.5511236998777376, - "learning_rate": 1.0467914535809599e-06, - "loss": 0.152, - "step": 16862 - }, - { - "epoch": 0.86, - "grad_norm": 1.5753143634265765, - "learning_rate": 1.04605797930603e-06, - "loss": 0.1685, - "step": 16863 - }, - { - "epoch": 0.86, - "grad_norm": 1.1623307582591522, - "learning_rate": 1.0453247479072814e-06, - "loss": 0.1507, - "step": 16864 - }, - { - "epoch": 0.86, - "grad_norm": 1.1270531847545042, - "learning_rate": 1.0445917594046073e-06, - "loss": 0.1511, - "step": 16865 - }, - { - "epoch": 0.86, - "grad_norm": 2.0339182269036757, - "learning_rate": 1.043859013817885e-06, - "loss": 0.165, - "step": 16866 - }, - { - "epoch": 0.86, - "grad_norm": 1.0908660060529582, - "learning_rate": 1.0431265111669952e-06, - "loss": 0.152, - "step": 16867 - }, - { - "epoch": 0.86, - "grad_norm": 1.0022956563358218, - "learning_rate": 1.0423942514718043e-06, - "loss": 0.1529, - "step": 16868 - }, - { - "epoch": 0.86, - "grad_norm": 0.8736893626239377, - "learning_rate": 1.0416622347521732e-06, - "loss": 0.1714, - "step": 16869 - }, - { - "epoch": 0.86, - "grad_norm": 2.6477719177196377, - "learning_rate": 1.0409304610279603e-06, - "loss": 0.174, - "step": 16870 - }, - { - "epoch": 0.86, - "grad_norm": 1.3499421597610715, - "learning_rate": 1.0401989303190141e-06, - "loss": 0.1519, - "step": 16871 - }, - { - "epoch": 0.86, - "grad_norm": 1.1109980178568921, - "learning_rate": 1.039467642645181e-06, - "loss": 0.1718, - "step": 16872 - }, - { - "epoch": 0.86, - "grad_norm": 1.226219785772608, - "learning_rate": 1.038736598026291e-06, - "loss": 0.1719, - "step": 16873 - }, - { - "epoch": 0.86, - "grad_norm": 1.439364984206557, - "learning_rate": 1.03800579648218e-06, - "loss": 0.1446, - "step": 16874 - }, - { - "epoch": 0.86, - "grad_norm": 1.1988578257912608, - "learning_rate": 1.0372752380326645e-06, - "loss": 0.1809, - "step": 16875 - }, - { - "epoch": 0.86, - "grad_norm": 1.5934155252133868, - "learning_rate": 1.0365449226975677e-06, - "loss": 0.1803, - "step": 16876 - }, - { - "epoch": 0.86, - "grad_norm": 1.1377548167853007, - "learning_rate": 1.0358148504966935e-06, - "loss": 0.1548, - "step": 16877 - }, - { - "epoch": 0.86, - "grad_norm": 1.436773878572512, - "learning_rate": 1.0350850214498486e-06, - "loss": 0.1562, - "step": 16878 - }, - { - "epoch": 0.86, - "grad_norm": 0.9801581415679828, - "learning_rate": 1.0343554355768282e-06, - "loss": 0.171, - "step": 16879 - }, - { - "epoch": 0.86, - "grad_norm": 0.7754791213662016, - "learning_rate": 1.0336260928974252e-06, - "loss": 0.1658, - "step": 16880 - }, - { - "epoch": 0.86, - "grad_norm": 1.0383938709850917, - "learning_rate": 1.0328969934314181e-06, - "loss": 0.1607, - "step": 16881 - }, - { - "epoch": 0.86, - "grad_norm": 0.8080092997766583, - "learning_rate": 1.0321681371985892e-06, - "loss": 0.1422, - "step": 16882 - }, - { - "epoch": 0.86, - "grad_norm": 1.0266233977427306, - "learning_rate": 1.0314395242187037e-06, - "loss": 0.1649, - "step": 16883 - }, - { - "epoch": 0.86, - "grad_norm": 1.4384666085729678, - "learning_rate": 1.0307111545115301e-06, - "loss": 0.1672, - "step": 16884 - }, - { - "epoch": 0.86, - "grad_norm": 1.1492817195259588, - "learning_rate": 1.0299830280968205e-06, - "loss": 0.1709, - "step": 16885 - }, - { - "epoch": 0.86, - "grad_norm": 1.0985167211243825, - "learning_rate": 1.02925514499433e-06, - "loss": 0.1668, - "step": 16886 - }, - { - "epoch": 0.86, - "grad_norm": 1.032134037777604, - "learning_rate": 1.0285275052237987e-06, - "loss": 0.1665, - "step": 16887 - }, - { - "epoch": 0.86, - "grad_norm": 1.078501377404676, - "learning_rate": 1.027800108804966e-06, - "loss": 0.1602, - "step": 16888 - }, - { - "epoch": 0.86, - "grad_norm": 0.9876329553773102, - "learning_rate": 1.027072955757563e-06, - "loss": 0.1541, - "step": 16889 - }, - { - "epoch": 0.86, - "grad_norm": 1.0754178281906779, - "learning_rate": 1.026346046101312e-06, - "loss": 0.1579, - "step": 16890 - }, - { - "epoch": 0.86, - "grad_norm": 2.2273783014245425, - "learning_rate": 1.0256193798559322e-06, - "loss": 0.169, - "step": 16891 - }, - { - "epoch": 0.86, - "grad_norm": 1.2721252163758574, - "learning_rate": 1.0248929570411327e-06, - "loss": 0.1998, - "step": 16892 - }, - { - "epoch": 0.86, - "grad_norm": 0.9013353020840622, - "learning_rate": 1.0241667776766196e-06, - "loss": 0.1588, - "step": 16893 - }, - { - "epoch": 0.86, - "grad_norm": 0.9338838445718988, - "learning_rate": 1.0234408417820884e-06, - "loss": 0.151, - "step": 16894 - }, - { - "epoch": 0.86, - "grad_norm": 1.1278581521392987, - "learning_rate": 1.0227151493772324e-06, - "loss": 0.1667, - "step": 16895 - }, - { - "epoch": 0.86, - "grad_norm": 1.0304483129985431, - "learning_rate": 1.0219897004817337e-06, - "loss": 0.1555, - "step": 16896 - }, - { - "epoch": 0.86, - "grad_norm": 1.1925611182107805, - "learning_rate": 1.0212644951152718e-06, - "loss": 0.1734, - "step": 16897 - }, - { - "epoch": 0.86, - "grad_norm": 1.005882606197199, - "learning_rate": 1.0205395332975165e-06, - "loss": 0.1622, - "step": 16898 - }, - { - "epoch": 0.86, - "grad_norm": 1.0572915878224836, - "learning_rate": 1.019814815048137e-06, - "loss": 0.1744, - "step": 16899 - }, - { - "epoch": 0.86, - "grad_norm": 0.9945705083558409, - "learning_rate": 1.0190903403867847e-06, - "loss": 0.1624, - "step": 16900 - }, - { - "epoch": 0.86, - "grad_norm": 1.3335322820199638, - "learning_rate": 1.0183661093331165e-06, - "loss": 0.1597, - "step": 16901 - }, - { - "epoch": 0.86, - "grad_norm": 0.8729913239957138, - "learning_rate": 1.0176421219067734e-06, - "loss": 0.1777, - "step": 16902 - }, - { - "epoch": 0.86, - "grad_norm": 1.4870799694049563, - "learning_rate": 1.0169183781273962e-06, - "loss": 0.1648, - "step": 16903 - }, - { - "epoch": 0.86, - "grad_norm": 1.0104638473649, - "learning_rate": 1.0161948780146136e-06, - "loss": 0.1772, - "step": 16904 - }, - { - "epoch": 0.86, - "grad_norm": 1.696486495966895, - "learning_rate": 1.0154716215880523e-06, - "loss": 0.1871, - "step": 16905 - }, - { - "epoch": 0.86, - "grad_norm": 0.9349237651600266, - "learning_rate": 1.014748608867333e-06, - "loss": 0.1545, - "step": 16906 - }, - { - "epoch": 0.86, - "grad_norm": 1.1359011223613704, - "learning_rate": 1.0140258398720625e-06, - "loss": 0.1712, - "step": 16907 - }, - { - "epoch": 0.86, - "grad_norm": 1.0986364477647335, - "learning_rate": 1.0133033146218518e-06, - "loss": 0.1805, - "step": 16908 - }, - { - "epoch": 0.86, - "grad_norm": 0.7640555565366878, - "learning_rate": 1.012581033136294e-06, - "loss": 0.1664, - "step": 16909 - }, - { - "epoch": 0.86, - "grad_norm": 1.2273472753204244, - "learning_rate": 1.0118589954349845e-06, - "loss": 0.1764, - "step": 16910 - }, - { - "epoch": 0.86, - "grad_norm": 1.0131939116632758, - "learning_rate": 1.0111372015375054e-06, - "loss": 0.1775, - "step": 16911 - }, - { - "epoch": 0.86, - "grad_norm": 0.7986491351427615, - "learning_rate": 1.01041565146344e-06, - "loss": 0.1536, - "step": 16912 - }, - { - "epoch": 0.86, - "grad_norm": 1.1023363192896058, - "learning_rate": 1.009694345232356e-06, - "loss": 0.1632, - "step": 16913 - }, - { - "epoch": 0.86, - "grad_norm": 1.0947374161202323, - "learning_rate": 1.00897328286382e-06, - "loss": 0.1689, - "step": 16914 - }, - { - "epoch": 0.86, - "grad_norm": 1.305555176945722, - "learning_rate": 1.0082524643773916e-06, - "loss": 0.1832, - "step": 16915 - }, - { - "epoch": 0.86, - "grad_norm": 0.9597581032536183, - "learning_rate": 1.0075318897926255e-06, - "loss": 0.1506, - "step": 16916 - }, - { - "epoch": 0.86, - "grad_norm": 0.8767785279661219, - "learning_rate": 1.0068115591290628e-06, - "loss": 0.1808, - "step": 16917 - }, - { - "epoch": 0.86, - "grad_norm": 0.9193785028161882, - "learning_rate": 1.0060914724062454e-06, - "loss": 0.1582, - "step": 16918 - }, - { - "epoch": 0.86, - "grad_norm": 0.8982585434629353, - "learning_rate": 1.0053716296437034e-06, - "loss": 0.1622, - "step": 16919 - }, - { - "epoch": 0.86, - "grad_norm": 0.9873719152167707, - "learning_rate": 1.0046520308609664e-06, - "loss": 0.1576, - "step": 16920 - }, - { - "epoch": 0.86, - "grad_norm": 0.9879782842011524, - "learning_rate": 1.0039326760775492e-06, - "loss": 0.1362, - "step": 16921 - }, - { - "epoch": 0.86, - "grad_norm": 0.9697752776839753, - "learning_rate": 1.003213565312966e-06, - "loss": 0.1545, - "step": 16922 - }, - { - "epoch": 0.86, - "grad_norm": 0.8439307478501283, - "learning_rate": 1.0024946985867244e-06, - "loss": 0.1654, - "step": 16923 - }, - { - "epoch": 0.86, - "grad_norm": 0.9783270450751373, - "learning_rate": 1.0017760759183203e-06, - "loss": 0.1753, - "step": 16924 - }, - { - "epoch": 0.86, - "grad_norm": 1.0366479590941498, - "learning_rate": 1.0010576973272512e-06, - "loss": 0.1519, - "step": 16925 - }, - { - "epoch": 0.86, - "grad_norm": 1.4252173197466278, - "learning_rate": 1.0003395628329982e-06, - "loss": 0.1745, - "step": 16926 - }, - { - "epoch": 0.86, - "grad_norm": 1.0046565519981416, - "learning_rate": 9.996216724550445e-07, - "loss": 0.1617, - "step": 16927 - }, - { - "epoch": 0.86, - "grad_norm": 1.1973466911484718, - "learning_rate": 9.98904026212859e-07, - "loss": 0.1726, - "step": 16928 - }, - { - "epoch": 0.86, - "grad_norm": 1.023743851749609, - "learning_rate": 9.981866241259131e-07, - "loss": 0.1789, - "step": 16929 - }, - { - "epoch": 0.86, - "grad_norm": 0.9877633466971224, - "learning_rate": 9.974694662136609e-07, - "loss": 0.1487, - "step": 16930 - }, - { - "epoch": 0.86, - "grad_norm": 1.3325695332137415, - "learning_rate": 9.967525524955579e-07, - "loss": 0.1973, - "step": 16931 - }, - { - "epoch": 0.86, - "grad_norm": 1.949333856302942, - "learning_rate": 9.9603588299105e-07, - "loss": 0.1755, - "step": 16932 - }, - { - "epoch": 0.86, - "grad_norm": 0.9485511719358928, - "learning_rate": 9.9531945771958e-07, - "loss": 0.165, - "step": 16933 - }, - { - "epoch": 0.86, - "grad_norm": 0.9261506185655848, - "learning_rate": 9.946032767005752e-07, - "loss": 0.1637, - "step": 16934 - }, - { - "epoch": 0.86, - "grad_norm": 0.9878937517566453, - "learning_rate": 9.938873399534688e-07, - "loss": 0.1424, - "step": 16935 - }, - { - "epoch": 0.86, - "grad_norm": 0.9556000137729997, - "learning_rate": 9.931716474976738e-07, - "loss": 0.1391, - "step": 16936 - }, - { - "epoch": 0.86, - "grad_norm": 0.9511096528875889, - "learning_rate": 9.924561993526082e-07, - "loss": 0.1454, - "step": 16937 - }, - { - "epoch": 0.86, - "grad_norm": 1.5058829954646482, - "learning_rate": 9.917409955376778e-07, - "loss": 0.1548, - "step": 16938 - }, - { - "epoch": 0.86, - "grad_norm": 0.9453923029256704, - "learning_rate": 9.910260360722802e-07, - "loss": 0.1553, - "step": 16939 - }, - { - "epoch": 0.86, - "grad_norm": 3.388034607806563, - "learning_rate": 9.903113209758098e-07, - "loss": 0.1567, - "step": 16940 - }, - { - "epoch": 0.86, - "grad_norm": 1.1517223486250001, - "learning_rate": 9.895968502676533e-07, - "loss": 0.1749, - "step": 16941 - }, - { - "epoch": 0.86, - "grad_norm": 0.8428053004420055, - "learning_rate": 9.888826239671934e-07, - "loss": 0.1547, - "step": 16942 - }, - { - "epoch": 0.86, - "grad_norm": 0.8433502857727055, - "learning_rate": 9.881686420937986e-07, - "loss": 0.1471, - "step": 16943 - }, - { - "epoch": 0.86, - "grad_norm": 3.2771510094579086, - "learning_rate": 9.874549046668413e-07, - "loss": 0.1807, - "step": 16944 - }, - { - "epoch": 0.86, - "grad_norm": 0.9337510319746428, - "learning_rate": 9.867414117056763e-07, - "loss": 0.1473, - "step": 16945 - }, - { - "epoch": 0.86, - "grad_norm": 1.2713121575294868, - "learning_rate": 9.860281632296609e-07, - "loss": 0.1457, - "step": 16946 - }, - { - "epoch": 0.86, - "grad_norm": 1.0183252588457072, - "learning_rate": 9.8531515925814e-07, - "loss": 0.1712, - "step": 16947 - }, - { - "epoch": 0.86, - "grad_norm": 1.4441436451206293, - "learning_rate": 9.846023998104536e-07, - "loss": 0.1787, - "step": 16948 - }, - { - "epoch": 0.86, - "grad_norm": 0.8774364051310735, - "learning_rate": 9.838898849059364e-07, - "loss": 0.148, - "step": 16949 - }, - { - "epoch": 0.86, - "grad_norm": 0.8797088309314717, - "learning_rate": 9.831776145639182e-07, - "loss": 0.1535, - "step": 16950 - }, - { - "epoch": 0.86, - "grad_norm": 0.9116377986628547, - "learning_rate": 9.824655888037138e-07, - "loss": 0.1485, - "step": 16951 - }, - { - "epoch": 0.86, - "grad_norm": 1.921263524194139, - "learning_rate": 9.817538076446409e-07, - "loss": 0.1454, - "step": 16952 - }, - { - "epoch": 0.86, - "grad_norm": 0.8002452664261146, - "learning_rate": 9.810422711060042e-07, - "loss": 0.1457, - "step": 16953 - }, - { - "epoch": 0.86, - "grad_norm": 1.0264341071482261, - "learning_rate": 9.80330979207108e-07, - "loss": 0.1914, - "step": 16954 - }, - { - "epoch": 0.86, - "grad_norm": 0.9512470084198945, - "learning_rate": 9.796199319672416e-07, - "loss": 0.1529, - "step": 16955 - }, - { - "epoch": 0.86, - "grad_norm": 0.9112980601957212, - "learning_rate": 9.78909129405694e-07, - "loss": 0.1685, - "step": 16956 - }, - { - "epoch": 0.86, - "grad_norm": 1.146254875669652, - "learning_rate": 9.78198571541744e-07, - "loss": 0.1802, - "step": 16957 - }, - { - "epoch": 0.86, - "grad_norm": 0.978711356522037, - "learning_rate": 9.774882583946688e-07, - "loss": 0.1465, - "step": 16958 - }, - { - "epoch": 0.86, - "grad_norm": 0.8759311125604019, - "learning_rate": 9.767781899837348e-07, - "loss": 0.1703, - "step": 16959 - }, - { - "epoch": 0.86, - "grad_norm": 1.8155043599843381, - "learning_rate": 9.760683663282012e-07, - "loss": 0.1608, - "step": 16960 - }, - { - "epoch": 0.86, - "grad_norm": 1.3248677378427802, - "learning_rate": 9.753587874473235e-07, - "loss": 0.176, - "step": 16961 - }, - { - "epoch": 0.86, - "grad_norm": 1.061681244714024, - "learning_rate": 9.746494533603478e-07, - "loss": 0.1809, - "step": 16962 - }, - { - "epoch": 0.86, - "grad_norm": 0.8151729487186564, - "learning_rate": 9.739403640865164e-07, - "loss": 0.1664, - "step": 16963 - }, - { - "epoch": 0.86, - "grad_norm": 1.1969609035416964, - "learning_rate": 9.732315196450615e-07, - "loss": 0.1886, - "step": 16964 - }, - { - "epoch": 0.86, - "grad_norm": 1.3341288883277864, - "learning_rate": 9.725229200552123e-07, - "loss": 0.1751, - "step": 16965 - }, - { - "epoch": 0.86, - "grad_norm": 2.9657370156268987, - "learning_rate": 9.718145653361878e-07, - "loss": 0.197, - "step": 16966 - }, - { - "epoch": 0.86, - "grad_norm": 1.0373624882601507, - "learning_rate": 9.711064555072026e-07, - "loss": 0.1779, - "step": 16967 - }, - { - "epoch": 0.86, - "grad_norm": 1.0226918276226975, - "learning_rate": 9.703985905874646e-07, - "loss": 0.1604, - "step": 16968 - }, - { - "epoch": 0.86, - "grad_norm": 1.6206548522077415, - "learning_rate": 9.696909705961776e-07, - "loss": 0.17, - "step": 16969 - }, - { - "epoch": 0.86, - "grad_norm": 0.9537709638528886, - "learning_rate": 9.689835955525307e-07, - "loss": 0.1632, - "step": 16970 - }, - { - "epoch": 0.86, - "grad_norm": 1.2901100670976893, - "learning_rate": 9.682764654757149e-07, - "loss": 0.1727, - "step": 16971 - }, - { - "epoch": 0.86, - "grad_norm": 0.843111449613659, - "learning_rate": 9.675695803849094e-07, - "loss": 0.1562, - "step": 16972 - }, - { - "epoch": 0.86, - "grad_norm": 1.0565847073406056, - "learning_rate": 9.668629402992902e-07, - "loss": 0.1769, - "step": 16973 - }, - { - "epoch": 0.86, - "grad_norm": 0.8774607355627476, - "learning_rate": 9.661565452380228e-07, - "loss": 0.1604, - "step": 16974 - }, - { - "epoch": 0.86, - "grad_norm": 0.8361969432025993, - "learning_rate": 9.654503952202687e-07, - "loss": 0.1505, - "step": 16975 - }, - { - "epoch": 0.86, - "grad_norm": 2.1873928931233717, - "learning_rate": 9.647444902651847e-07, - "loss": 0.1659, - "step": 16976 - }, - { - "epoch": 0.86, - "grad_norm": 1.6912378470493363, - "learning_rate": 9.640388303919156e-07, - "loss": 0.164, - "step": 16977 - }, - { - "epoch": 0.86, - "grad_norm": 0.9739879821560055, - "learning_rate": 9.63333415619605e-07, - "loss": 0.1601, - "step": 16978 - }, - { - "epoch": 0.86, - "grad_norm": 0.9162667771255657, - "learning_rate": 9.626282459673842e-07, - "loss": 0.169, - "step": 16979 - }, - { - "epoch": 0.86, - "grad_norm": 1.0533004190917632, - "learning_rate": 9.619233214543833e-07, - "loss": 0.1456, - "step": 16980 - }, - { - "epoch": 0.86, - "grad_norm": 1.1535147922097448, - "learning_rate": 9.612186420997227e-07, - "loss": 0.1706, - "step": 16981 - }, - { - "epoch": 0.86, - "grad_norm": 1.2602299328060675, - "learning_rate": 9.605142079225183e-07, - "loss": 0.1586, - "step": 16982 - }, - { - "epoch": 0.86, - "grad_norm": 1.674843203986836, - "learning_rate": 9.598100189418736e-07, - "loss": 0.1344, - "step": 16983 - }, - { - "epoch": 0.86, - "grad_norm": 1.0166396943321496, - "learning_rate": 9.591060751768943e-07, - "loss": 0.1781, - "step": 16984 - }, - { - "epoch": 0.86, - "grad_norm": 1.0828299161586112, - "learning_rate": 9.584023766466721e-07, - "loss": 0.1953, - "step": 16985 - }, - { - "epoch": 0.86, - "grad_norm": 1.089162076325917, - "learning_rate": 9.576989233702993e-07, - "loss": 0.1856, - "step": 16986 - }, - { - "epoch": 0.86, - "grad_norm": 1.1043827985285775, - "learning_rate": 9.569957153668507e-07, - "loss": 0.1617, - "step": 16987 - }, - { - "epoch": 0.86, - "grad_norm": 1.0517660021678024, - "learning_rate": 9.562927526554066e-07, - "loss": 0.1583, - "step": 16988 - }, - { - "epoch": 0.86, - "grad_norm": 1.5950084990409343, - "learning_rate": 9.555900352550308e-07, - "loss": 0.1689, - "step": 16989 - }, - { - "epoch": 0.86, - "grad_norm": 0.9618689152709855, - "learning_rate": 9.548875631847875e-07, - "loss": 0.1714, - "step": 16990 - }, - { - "epoch": 0.86, - "grad_norm": 0.9033812724850739, - "learning_rate": 9.541853364637299e-07, - "loss": 0.1571, - "step": 16991 - }, - { - "epoch": 0.86, - "grad_norm": 0.9739470181448753, - "learning_rate": 9.534833551109035e-07, - "loss": 0.1388, - "step": 16992 - }, - { - "epoch": 0.86, - "grad_norm": 0.9077329611081956, - "learning_rate": 9.527816191453531e-07, - "loss": 0.1683, - "step": 16993 - }, - { - "epoch": 0.86, - "grad_norm": 0.8965000939025348, - "learning_rate": 9.520801285861126e-07, - "loss": 0.1708, - "step": 16994 - }, - { - "epoch": 0.86, - "grad_norm": 0.8336839169695421, - "learning_rate": 9.513788834522108e-07, - "loss": 0.1565, - "step": 16995 - }, - { - "epoch": 0.86, - "grad_norm": 1.5352836241948775, - "learning_rate": 9.506778837626652e-07, - "loss": 0.1605, - "step": 16996 - }, - { - "epoch": 0.86, - "grad_norm": 0.9124484802811895, - "learning_rate": 9.499771295364957e-07, - "loss": 0.1536, - "step": 16997 - }, - { - "epoch": 0.86, - "grad_norm": 1.10837471926042, - "learning_rate": 9.492766207927062e-07, - "loss": 0.1658, - "step": 16998 - }, - { - "epoch": 0.86, - "grad_norm": 1.166103641007375, - "learning_rate": 9.485763575503015e-07, - "loss": 0.1366, - "step": 16999 - }, - { - "epoch": 0.86, - "grad_norm": 0.8544681293800753, - "learning_rate": 9.47876339828272e-07, - "loss": 0.1594, - "step": 17000 - }, - { - "epoch": 0.86, - "grad_norm": 1.1404024846192349, - "learning_rate": 9.471765676456079e-07, - "loss": 0.1733, - "step": 17001 - }, - { - "epoch": 0.86, - "grad_norm": 1.1349490458873757, - "learning_rate": 9.464770410212909e-07, - "loss": 0.1495, - "step": 17002 - }, - { - "epoch": 0.86, - "grad_norm": 1.2651117346806118, - "learning_rate": 9.457777599742979e-07, - "loss": 0.1682, - "step": 17003 - }, - { - "epoch": 0.86, - "grad_norm": 1.2507880576208228, - "learning_rate": 9.450787245235926e-07, - "loss": 0.1689, - "step": 17004 - }, - { - "epoch": 0.86, - "grad_norm": 0.9300768396542201, - "learning_rate": 9.443799346881388e-07, - "loss": 0.1604, - "step": 17005 - }, - { - "epoch": 0.86, - "grad_norm": 1.0231991740589423, - "learning_rate": 9.436813904868902e-07, - "loss": 0.1793, - "step": 17006 - }, - { - "epoch": 0.86, - "grad_norm": 1.0992909713211707, - "learning_rate": 9.429830919387972e-07, - "loss": 0.1623, - "step": 17007 - }, - { - "epoch": 0.86, - "grad_norm": 0.870414843937221, - "learning_rate": 9.422850390627991e-07, - "loss": 0.1626, - "step": 17008 - }, - { - "epoch": 0.86, - "grad_norm": 1.3107033059662958, - "learning_rate": 9.415872318778285e-07, - "loss": 0.1745, - "step": 17009 - }, - { - "epoch": 0.86, - "grad_norm": 1.7913604862726662, - "learning_rate": 9.40889670402817e-07, - "loss": 0.1709, - "step": 17010 - }, - { - "epoch": 0.87, - "grad_norm": 1.3926226337980574, - "learning_rate": 9.401923546566838e-07, - "loss": 0.1497, - "step": 17011 - }, - { - "epoch": 0.87, - "grad_norm": 1.255256928642862, - "learning_rate": 9.394952846583472e-07, - "loss": 0.1872, - "step": 17012 - }, - { - "epoch": 0.87, - "grad_norm": 0.8369092897709353, - "learning_rate": 9.387984604267109e-07, - "loss": 0.1717, - "step": 17013 - }, - { - "epoch": 0.87, - "grad_norm": 0.8268688499431834, - "learning_rate": 9.381018819806797e-07, - "loss": 0.152, - "step": 17014 - }, - { - "epoch": 0.87, - "grad_norm": 2.7473049573367447, - "learning_rate": 9.374055493391455e-07, - "loss": 0.1608, - "step": 17015 - }, - { - "epoch": 0.87, - "grad_norm": 0.917773236637632, - "learning_rate": 9.367094625209983e-07, - "loss": 0.1705, - "step": 17016 - }, - { - "epoch": 0.87, - "grad_norm": 0.9269986492790633, - "learning_rate": 9.360136215451177e-07, - "loss": 0.1783, - "step": 17017 - }, - { - "epoch": 0.87, - "grad_norm": 0.9102869074299589, - "learning_rate": 9.353180264303818e-07, - "loss": 0.1713, - "step": 17018 - }, - { - "epoch": 0.87, - "grad_norm": 0.847096642866557, - "learning_rate": 9.346226771956523e-07, - "loss": 0.1719, - "step": 17019 - }, - { - "epoch": 0.87, - "grad_norm": 1.115828035313165, - "learning_rate": 9.339275738597975e-07, - "loss": 0.1617, - "step": 17020 - }, - { - "epoch": 0.87, - "grad_norm": 0.9756168196493309, - "learning_rate": 9.332327164416688e-07, - "loss": 0.173, - "step": 17021 - }, - { - "epoch": 0.87, - "grad_norm": 1.120436059367365, - "learning_rate": 9.325381049601157e-07, - "loss": 0.1631, - "step": 17022 - }, - { - "epoch": 0.87, - "grad_norm": 0.9422116076804968, - "learning_rate": 9.318437394339774e-07, - "loss": 0.1624, - "step": 17023 - }, - { - "epoch": 0.87, - "grad_norm": 2.919121126930504, - "learning_rate": 9.311496198820913e-07, - "loss": 0.1802, - "step": 17024 - }, - { - "epoch": 0.87, - "grad_norm": 3.408213788699369, - "learning_rate": 9.304557463232844e-07, - "loss": 0.1517, - "step": 17025 - }, - { - "epoch": 0.87, - "grad_norm": 1.1067485928654712, - "learning_rate": 9.297621187763761e-07, - "loss": 0.1771, - "step": 17026 - }, - { - "epoch": 0.87, - "grad_norm": 1.1005835224655929, - "learning_rate": 9.290687372601814e-07, - "loss": 0.1662, - "step": 17027 - }, - { - "epoch": 0.87, - "grad_norm": 1.5768878819440173, - "learning_rate": 9.283756017935108e-07, - "loss": 0.1446, - "step": 17028 - }, - { - "epoch": 0.87, - "grad_norm": 0.8738222182636233, - "learning_rate": 9.276827123951648e-07, - "loss": 0.1458, - "step": 17029 - }, - { - "epoch": 0.87, - "grad_norm": 0.9600910172910317, - "learning_rate": 9.269900690839373e-07, - "loss": 0.1729, - "step": 17030 - }, - { - "epoch": 0.87, - "grad_norm": 0.9455883463851124, - "learning_rate": 9.262976718786176e-07, - "loss": 0.1481, - "step": 17031 - }, - { - "epoch": 0.87, - "grad_norm": 1.4184803589090373, - "learning_rate": 9.256055207979841e-07, - "loss": 0.1701, - "step": 17032 - }, - { - "epoch": 0.87, - "grad_norm": 1.0587723639969662, - "learning_rate": 9.249136158608163e-07, - "loss": 0.1896, - "step": 17033 - }, - { - "epoch": 0.87, - "grad_norm": 1.0309704538708153, - "learning_rate": 9.242219570858757e-07, - "loss": 0.1813, - "step": 17034 - }, - { - "epoch": 0.87, - "grad_norm": 1.0474496824022568, - "learning_rate": 9.235305444919307e-07, - "loss": 0.1718, - "step": 17035 - }, - { - "epoch": 0.87, - "grad_norm": 3.23008560442467, - "learning_rate": 9.228393780977296e-07, - "loss": 0.1671, - "step": 17036 - }, - { - "epoch": 0.87, - "grad_norm": 0.9214761008903439, - "learning_rate": 9.22148457922023e-07, - "loss": 0.1703, - "step": 17037 - }, - { - "epoch": 0.87, - "grad_norm": 1.447550516118549, - "learning_rate": 9.214577839835514e-07, - "loss": 0.144, - "step": 17038 - }, - { - "epoch": 0.87, - "grad_norm": 2.4592960258901386, - "learning_rate": 9.207673563010533e-07, - "loss": 0.187, - "step": 17039 - }, - { - "epoch": 0.87, - "grad_norm": 2.8769187476094875, - "learning_rate": 9.200771748932513e-07, - "loss": 0.1853, - "step": 17040 - }, - { - "epoch": 0.87, - "grad_norm": 0.9573474621913002, - "learning_rate": 9.193872397788705e-07, - "loss": 0.1656, - "step": 17041 - }, - { - "epoch": 0.87, - "grad_norm": 1.6841387334089557, - "learning_rate": 9.186975509766216e-07, - "loss": 0.1465, - "step": 17042 - }, - { - "epoch": 0.87, - "grad_norm": 1.230747654561539, - "learning_rate": 9.180081085052162e-07, - "loss": 0.1594, - "step": 17043 - }, - { - "epoch": 0.87, - "grad_norm": 1.9144311106279304, - "learning_rate": 9.173189123833526e-07, - "loss": 0.1746, - "step": 17044 - }, - { - "epoch": 0.87, - "grad_norm": 0.969731855311959, - "learning_rate": 9.166299626297271e-07, - "loss": 0.1699, - "step": 17045 - }, - { - "epoch": 0.87, - "grad_norm": 1.0739965557393911, - "learning_rate": 9.159412592630279e-07, - "loss": 0.1706, - "step": 17046 - }, - { - "epoch": 0.87, - "grad_norm": 1.5170650586505527, - "learning_rate": 9.152528023019325e-07, - "loss": 0.1634, - "step": 17047 - }, - { - "epoch": 0.87, - "grad_norm": 1.441823463208156, - "learning_rate": 9.145645917651214e-07, - "loss": 0.152, - "step": 17048 - }, - { - "epoch": 0.87, - "grad_norm": 1.0354265673263898, - "learning_rate": 9.138766276712552e-07, - "loss": 0.1649, - "step": 17049 - }, - { - "epoch": 0.87, - "grad_norm": 1.1997505349833961, - "learning_rate": 9.131889100390024e-07, - "loss": 0.1737, - "step": 17050 - }, - { - "epoch": 0.87, - "grad_norm": 6.196969721625916, - "learning_rate": 9.125014388870101e-07, - "loss": 0.1546, - "step": 17051 - }, - { - "epoch": 0.87, - "grad_norm": 0.9409125676079062, - "learning_rate": 9.118142142339326e-07, - "loss": 0.1916, - "step": 17052 - }, - { - "epoch": 0.87, - "grad_norm": 1.046336542925199, - "learning_rate": 9.111272360984058e-07, - "loss": 0.158, - "step": 17053 - }, - { - "epoch": 0.87, - "grad_norm": 0.9049607362766768, - "learning_rate": 9.104405044990661e-07, - "loss": 0.1799, - "step": 17054 - }, - { - "epoch": 0.87, - "grad_norm": 1.0200458401151316, - "learning_rate": 9.097540194545407e-07, - "loss": 0.1589, - "step": 17055 - }, - { - "epoch": 0.87, - "grad_norm": 0.8076764602199117, - "learning_rate": 9.090677809834525e-07, - "loss": 0.1961, - "step": 17056 - }, - { - "epoch": 0.87, - "grad_norm": 1.0527309977219788, - "learning_rate": 9.083817891044133e-07, - "loss": 0.1601, - "step": 17057 - }, - { - "epoch": 0.87, - "grad_norm": 1.3595022523979385, - "learning_rate": 9.076960438360327e-07, - "loss": 0.1576, - "step": 17058 - }, - { - "epoch": 0.87, - "grad_norm": 0.9395096482761963, - "learning_rate": 9.070105451969091e-07, - "loss": 0.1624, - "step": 17059 - }, - { - "epoch": 0.87, - "grad_norm": 1.3353654985682613, - "learning_rate": 9.063252932056399e-07, - "loss": 0.1646, - "step": 17060 - }, - { - "epoch": 0.87, - "grad_norm": 1.1705992125708042, - "learning_rate": 9.056402878808102e-07, - "loss": 0.1655, - "step": 17061 - }, - { - "epoch": 0.87, - "grad_norm": 1.1283675058654659, - "learning_rate": 9.049555292409995e-07, - "loss": 0.1509, - "step": 17062 - }, - { - "epoch": 0.87, - "grad_norm": 0.8843788307703369, - "learning_rate": 9.042710173047841e-07, - "loss": 0.1508, - "step": 17063 - }, - { - "epoch": 0.87, - "grad_norm": 0.9171842968857437, - "learning_rate": 9.035867520907304e-07, - "loss": 0.1591, - "step": 17064 - }, - { - "epoch": 0.87, - "grad_norm": 0.9310906259002731, - "learning_rate": 9.029027336174023e-07, - "loss": 0.1681, - "step": 17065 - }, - { - "epoch": 0.87, - "grad_norm": 3.831040289624738, - "learning_rate": 9.022189619033495e-07, - "loss": 0.1806, - "step": 17066 - }, - { - "epoch": 0.87, - "grad_norm": 1.3134835153634918, - "learning_rate": 9.015354369671237e-07, - "loss": 0.185, - "step": 17067 - }, - { - "epoch": 0.87, - "grad_norm": 1.1991897700260155, - "learning_rate": 9.0085215882726e-07, - "loss": 0.1491, - "step": 17068 - }, - { - "epoch": 0.87, - "grad_norm": 1.0090186658831273, - "learning_rate": 9.001691275022984e-07, - "loss": 0.1621, - "step": 17069 - }, - { - "epoch": 0.87, - "grad_norm": 1.8294614748653688, - "learning_rate": 8.994863430107603e-07, - "loss": 0.154, - "step": 17070 - }, - { - "epoch": 0.87, - "grad_norm": 0.917475210040729, - "learning_rate": 8.988038053711701e-07, - "loss": 0.1641, - "step": 17071 - }, - { - "epoch": 0.87, - "grad_norm": 0.9202157068800895, - "learning_rate": 8.981215146020394e-07, - "loss": 0.1689, - "step": 17072 - }, - { - "epoch": 0.87, - "grad_norm": 1.1480664342916895, - "learning_rate": 8.974394707218792e-07, - "loss": 0.1692, - "step": 17073 - }, - { - "epoch": 0.87, - "grad_norm": 1.1053708115759986, - "learning_rate": 8.967576737491856e-07, - "loss": 0.1842, - "step": 17074 - }, - { - "epoch": 0.87, - "grad_norm": 0.9201851719480871, - "learning_rate": 8.960761237024573e-07, - "loss": 0.1747, - "step": 17075 - }, - { - "epoch": 0.87, - "grad_norm": 1.0454642154287848, - "learning_rate": 8.95394820600175e-07, - "loss": 0.1831, - "step": 17076 - }, - { - "epoch": 0.87, - "grad_norm": 1.0702369137636214, - "learning_rate": 8.94713764460825e-07, - "loss": 0.1695, - "step": 17077 - }, - { - "epoch": 0.87, - "grad_norm": 1.0091538146196728, - "learning_rate": 8.940329553028782e-07, - "loss": 0.1739, - "step": 17078 - }, - { - "epoch": 0.87, - "grad_norm": 1.0326055197662956, - "learning_rate": 8.933523931447996e-07, - "loss": 0.1647, - "step": 17079 - }, - { - "epoch": 0.87, - "grad_norm": 1.0176709480126727, - "learning_rate": 8.926720780050513e-07, - "loss": 0.1689, - "step": 17080 - }, - { - "epoch": 0.87, - "grad_norm": 1.0575101352035323, - "learning_rate": 8.919920099020884e-07, - "loss": 0.1482, - "step": 17081 - }, - { - "epoch": 0.87, - "grad_norm": 0.8582034731028793, - "learning_rate": 8.913121888543575e-07, - "loss": 0.1623, - "step": 17082 - }, - { - "epoch": 0.87, - "grad_norm": 1.43270078497795, - "learning_rate": 8.906326148802968e-07, - "loss": 0.1651, - "step": 17083 - }, - { - "epoch": 0.87, - "grad_norm": 1.2905694891493917, - "learning_rate": 8.89953287998343e-07, - "loss": 0.1678, - "step": 17084 - }, - { - "epoch": 0.87, - "grad_norm": 2.019744957485963, - "learning_rate": 8.892742082269179e-07, - "loss": 0.1683, - "step": 17085 - }, - { - "epoch": 0.87, - "grad_norm": 1.270537102765076, - "learning_rate": 8.885953755844467e-07, - "loss": 0.1568, - "step": 17086 - }, - { - "epoch": 0.87, - "grad_norm": 1.0910139306492328, - "learning_rate": 8.879167900893392e-07, - "loss": 0.1664, - "step": 17087 - }, - { - "epoch": 0.87, - "grad_norm": 4.833649447834106, - "learning_rate": 8.872384517600053e-07, - "loss": 0.1686, - "step": 17088 - }, - { - "epoch": 0.87, - "grad_norm": 0.9117543020684723, - "learning_rate": 8.86560360614841e-07, - "loss": 0.1638, - "step": 17089 - }, - { - "epoch": 0.87, - "grad_norm": 1.3114404092976746, - "learning_rate": 8.858825166722418e-07, - "loss": 0.1694, - "step": 17090 - }, - { - "epoch": 0.87, - "grad_norm": 0.9301874381300534, - "learning_rate": 8.852049199505941e-07, - "loss": 0.1502, - "step": 17091 - }, - { - "epoch": 0.87, - "grad_norm": 1.6722753021063932, - "learning_rate": 8.845275704682788e-07, - "loss": 0.1689, - "step": 17092 - }, - { - "epoch": 0.87, - "grad_norm": 1.0828515003759154, - "learning_rate": 8.838504682436666e-07, - "loss": 0.1768, - "step": 17093 - }, - { - "epoch": 0.87, - "grad_norm": 0.9844296891643936, - "learning_rate": 8.831736132951274e-07, - "loss": 0.1713, - "step": 17094 - }, - { - "epoch": 0.87, - "grad_norm": 0.9202759734089795, - "learning_rate": 8.824970056410187e-07, - "loss": 0.1538, - "step": 17095 - }, - { - "epoch": 0.87, - "grad_norm": 1.4026351905757806, - "learning_rate": 8.818206452996924e-07, - "loss": 0.1789, - "step": 17096 - }, - { - "epoch": 0.87, - "grad_norm": 2.615643107469265, - "learning_rate": 8.811445322894951e-07, - "loss": 0.1541, - "step": 17097 - }, - { - "epoch": 0.87, - "grad_norm": 1.1891702976515879, - "learning_rate": 8.804686666287688e-07, - "loss": 0.1564, - "step": 17098 - }, - { - "epoch": 0.87, - "grad_norm": 1.0217262338262187, - "learning_rate": 8.797930483358452e-07, - "loss": 0.167, - "step": 17099 - }, - { - "epoch": 0.87, - "grad_norm": 1.0987630781058417, - "learning_rate": 8.7911767742905e-07, - "loss": 0.1531, - "step": 17100 - }, - { - "epoch": 0.87, - "grad_norm": 1.0437865886697673, - "learning_rate": 8.784425539267038e-07, - "loss": 0.1842, - "step": 17101 - }, - { - "epoch": 0.87, - "grad_norm": 0.7743462507231501, - "learning_rate": 8.777676778471167e-07, - "loss": 0.1587, - "step": 17102 - }, - { - "epoch": 0.87, - "grad_norm": 0.8877364528856684, - "learning_rate": 8.770930492085983e-07, - "loss": 0.1584, - "step": 17103 - }, - { - "epoch": 0.87, - "grad_norm": 1.7920300896197154, - "learning_rate": 8.764186680294451e-07, - "loss": 0.179, - "step": 17104 - }, - { - "epoch": 0.87, - "grad_norm": 1.3643034663684792, - "learning_rate": 8.757445343279514e-07, - "loss": 0.1764, - "step": 17105 - }, - { - "epoch": 0.87, - "grad_norm": 0.9771049397743856, - "learning_rate": 8.750706481224014e-07, - "loss": 0.1593, - "step": 17106 - }, - { - "epoch": 0.87, - "grad_norm": 1.1156830433209912, - "learning_rate": 8.743970094310761e-07, - "loss": 0.1466, - "step": 17107 - }, - { - "epoch": 0.87, - "grad_norm": 1.2084500367718818, - "learning_rate": 8.737236182722464e-07, - "loss": 0.1795, - "step": 17108 - }, - { - "epoch": 0.87, - "grad_norm": 1.0696994804947388, - "learning_rate": 8.730504746641811e-07, - "loss": 0.1802, - "step": 17109 - }, - { - "epoch": 0.87, - "grad_norm": 0.9620892356217626, - "learning_rate": 8.723775786251354e-07, - "loss": 0.1533, - "step": 17110 - }, - { - "epoch": 0.87, - "grad_norm": 0.9459466502861161, - "learning_rate": 8.71704930173366e-07, - "loss": 0.136, - "step": 17111 - }, - { - "epoch": 0.87, - "grad_norm": 1.4660240342842592, - "learning_rate": 8.710325293271126e-07, - "loss": 0.1786, - "step": 17112 - }, - { - "epoch": 0.87, - "grad_norm": 1.228860617049589, - "learning_rate": 8.703603761046209e-07, - "loss": 0.1744, - "step": 17113 - }, - { - "epoch": 0.87, - "grad_norm": 0.841572818613883, - "learning_rate": 8.696884705241182e-07, - "loss": 0.1544, - "step": 17114 - }, - { - "epoch": 0.87, - "grad_norm": 1.1666695309699784, - "learning_rate": 8.690168126038301e-07, - "loss": 0.1818, - "step": 17115 - }, - { - "epoch": 0.87, - "grad_norm": 1.3421305915982424, - "learning_rate": 8.683454023619775e-07, - "loss": 0.158, - "step": 17116 - }, - { - "epoch": 0.87, - "grad_norm": 0.894492935229557, - "learning_rate": 8.676742398167704e-07, - "loss": 0.1709, - "step": 17117 - }, - { - "epoch": 0.87, - "grad_norm": 1.0467244786440408, - "learning_rate": 8.670033249864174e-07, - "loss": 0.1636, - "step": 17118 - }, - { - "epoch": 0.87, - "grad_norm": 1.2673527693727311, - "learning_rate": 8.66332657889114e-07, - "loss": 0.1778, - "step": 17119 - }, - { - "epoch": 0.87, - "grad_norm": 1.0575893042859574, - "learning_rate": 8.656622385430547e-07, - "loss": 0.1773, - "step": 17120 - }, - { - "epoch": 0.87, - "grad_norm": 1.1402664070621547, - "learning_rate": 8.649920669664202e-07, - "loss": 0.1673, - "step": 17121 - }, - { - "epoch": 0.87, - "grad_norm": 1.012078317229577, - "learning_rate": 8.643221431773952e-07, - "loss": 0.1697, - "step": 17122 - }, - { - "epoch": 0.87, - "grad_norm": 0.9484744378294417, - "learning_rate": 8.636524671941449e-07, - "loss": 0.1586, - "step": 17123 - }, - { - "epoch": 0.87, - "grad_norm": 0.9925048517257558, - "learning_rate": 8.629830390348382e-07, - "loss": 0.1731, - "step": 17124 - }, - { - "epoch": 0.87, - "grad_norm": 0.9828673798522906, - "learning_rate": 8.623138587176327e-07, - "loss": 0.1712, - "step": 17125 - }, - { - "epoch": 0.87, - "grad_norm": 1.7792158808509042, - "learning_rate": 8.616449262606819e-07, - "loss": 0.1464, - "step": 17126 - }, - { - "epoch": 0.87, - "grad_norm": 1.0968797454498505, - "learning_rate": 8.609762416821255e-07, - "loss": 0.171, - "step": 17127 - }, - { - "epoch": 0.87, - "grad_norm": 0.9891852371438813, - "learning_rate": 8.603078050001079e-07, - "loss": 0.1573, - "step": 17128 - }, - { - "epoch": 0.87, - "grad_norm": 2.21659724600745, - "learning_rate": 8.596396162327547e-07, - "loss": 0.1641, - "step": 17129 - }, - { - "epoch": 0.87, - "grad_norm": 1.141819368951561, - "learning_rate": 8.589716753981958e-07, - "loss": 0.1607, - "step": 17130 - }, - { - "epoch": 0.87, - "grad_norm": 1.566854918335254, - "learning_rate": 8.583039825145456e-07, - "loss": 0.1803, - "step": 17131 - }, - { - "epoch": 0.87, - "grad_norm": 1.0486597977148742, - "learning_rate": 8.576365375999151e-07, - "loss": 0.174, - "step": 17132 - }, - { - "epoch": 0.87, - "grad_norm": 0.8382358364538444, - "learning_rate": 8.569693406724089e-07, - "loss": 0.1534, - "step": 17133 - }, - { - "epoch": 0.87, - "grad_norm": 0.9025809558840777, - "learning_rate": 8.563023917501267e-07, - "loss": 0.1739, - "step": 17134 - }, - { - "epoch": 0.87, - "grad_norm": 1.0805583000627847, - "learning_rate": 8.556356908511598e-07, - "loss": 0.173, - "step": 17135 - }, - { - "epoch": 0.87, - "grad_norm": 1.0793640614355466, - "learning_rate": 8.549692379935904e-07, - "loss": 0.1488, - "step": 17136 - }, - { - "epoch": 0.87, - "grad_norm": 0.873788082427515, - "learning_rate": 8.543030331954971e-07, - "loss": 0.1416, - "step": 17137 - }, - { - "epoch": 0.87, - "grad_norm": 1.5731957662613225, - "learning_rate": 8.536370764749502e-07, - "loss": 0.1654, - "step": 17138 - }, - { - "epoch": 0.87, - "grad_norm": 1.055450540196962, - "learning_rate": 8.529713678500151e-07, - "loss": 0.1739, - "step": 17139 - }, - { - "epoch": 0.87, - "grad_norm": 0.9008341102955426, - "learning_rate": 8.523059073387474e-07, - "loss": 0.1509, - "step": 17140 - }, - { - "epoch": 0.87, - "grad_norm": 0.8612485946862515, - "learning_rate": 8.516406949591982e-07, - "loss": 0.1641, - "step": 17141 - }, - { - "epoch": 0.87, - "grad_norm": 1.0342607155695587, - "learning_rate": 8.509757307294109e-07, - "loss": 0.173, - "step": 17142 - }, - { - "epoch": 0.87, - "grad_norm": 0.9384308186441249, - "learning_rate": 8.503110146674265e-07, - "loss": 0.1612, - "step": 17143 - }, - { - "epoch": 0.87, - "grad_norm": 1.1232769793082433, - "learning_rate": 8.496465467912707e-07, - "loss": 0.1474, - "step": 17144 - }, - { - "epoch": 0.87, - "grad_norm": 1.414174825300634, - "learning_rate": 8.489823271189712e-07, - "loss": 0.1608, - "step": 17145 - }, - { - "epoch": 0.87, - "grad_norm": 2.43889602224765, - "learning_rate": 8.483183556685404e-07, - "loss": 0.1896, - "step": 17146 - }, - { - "epoch": 0.87, - "grad_norm": 1.271727693081388, - "learning_rate": 8.476546324579937e-07, - "loss": 0.165, - "step": 17147 - }, - { - "epoch": 0.87, - "grad_norm": 1.3473452346504362, - "learning_rate": 8.469911575053314e-07, - "loss": 0.1632, - "step": 17148 - }, - { - "epoch": 0.87, - "grad_norm": 0.9080943240602966, - "learning_rate": 8.463279308285488e-07, - "loss": 0.1635, - "step": 17149 - }, - { - "epoch": 0.87, - "grad_norm": 1.002355761226064, - "learning_rate": 8.456649524456384e-07, - "loss": 0.1702, - "step": 17150 - }, - { - "epoch": 0.87, - "grad_norm": 1.1178232175726797, - "learning_rate": 8.450022223745836e-07, - "loss": 0.1625, - "step": 17151 - }, - { - "epoch": 0.87, - "grad_norm": 0.9855127077410061, - "learning_rate": 8.44339740633362e-07, - "loss": 0.1483, - "step": 17152 - }, - { - "epoch": 0.87, - "grad_norm": 1.2085619193638986, - "learning_rate": 8.436775072399406e-07, - "loss": 0.1676, - "step": 17153 - }, - { - "epoch": 0.87, - "grad_norm": 1.106924279440494, - "learning_rate": 8.43015522212286e-07, - "loss": 0.162, - "step": 17154 - }, - { - "epoch": 0.87, - "grad_norm": 0.9675200693427021, - "learning_rate": 8.423537855683494e-07, - "loss": 0.1629, - "step": 17155 - }, - { - "epoch": 0.87, - "grad_norm": 1.046538127580827, - "learning_rate": 8.416922973260865e-07, - "loss": 0.1471, - "step": 17156 - }, - { - "epoch": 0.87, - "grad_norm": 0.9846894068376528, - "learning_rate": 8.410310575034353e-07, - "loss": 0.1523, - "step": 17157 - }, - { - "epoch": 0.87, - "grad_norm": 2.4189671040115495, - "learning_rate": 8.403700661183356e-07, - "loss": 0.1633, - "step": 17158 - }, - { - "epoch": 0.87, - "grad_norm": 1.1215961139129562, - "learning_rate": 8.397093231887143e-07, - "loss": 0.1657, - "step": 17159 - }, - { - "epoch": 0.87, - "grad_norm": 0.9463614162161704, - "learning_rate": 8.390488287324938e-07, - "loss": 0.1509, - "step": 17160 - }, - { - "epoch": 0.87, - "grad_norm": 1.7667384852227885, - "learning_rate": 8.383885827675919e-07, - "loss": 0.1589, - "step": 17161 - }, - { - "epoch": 0.87, - "grad_norm": 1.957756528157922, - "learning_rate": 8.377285853119188e-07, - "loss": 0.1726, - "step": 17162 - }, - { - "epoch": 0.87, - "grad_norm": 0.8567222388031468, - "learning_rate": 8.370688363833734e-07, - "loss": 0.1536, - "step": 17163 - }, - { - "epoch": 0.87, - "grad_norm": 1.5066683002465677, - "learning_rate": 8.364093359998549e-07, - "loss": 0.1671, - "step": 17164 - }, - { - "epoch": 0.87, - "grad_norm": 1.4478565414467472, - "learning_rate": 8.35750084179251e-07, - "loss": 0.1595, - "step": 17165 - }, - { - "epoch": 0.87, - "grad_norm": 0.9872017321212999, - "learning_rate": 8.350910809394419e-07, - "loss": 0.1471, - "step": 17166 - }, - { - "epoch": 0.87, - "grad_norm": 0.831180223310793, - "learning_rate": 8.344323262983056e-07, - "loss": 0.1693, - "step": 17167 - }, - { - "epoch": 0.87, - "grad_norm": 1.1506089263597188, - "learning_rate": 8.337738202737089e-07, - "loss": 0.1629, - "step": 17168 - }, - { - "epoch": 0.87, - "grad_norm": 1.5383110955910784, - "learning_rate": 8.331155628835174e-07, - "loss": 0.1663, - "step": 17169 - }, - { - "epoch": 0.87, - "grad_norm": 1.0783555363988484, - "learning_rate": 8.324575541455815e-07, - "loss": 0.1681, - "step": 17170 - }, - { - "epoch": 0.87, - "grad_norm": 4.396062909846658, - "learning_rate": 8.317997940777555e-07, - "loss": 0.1819, - "step": 17171 - }, - { - "epoch": 0.87, - "grad_norm": 0.8100993706187744, - "learning_rate": 8.311422826978743e-07, - "loss": 0.167, - "step": 17172 - }, - { - "epoch": 0.87, - "grad_norm": 1.1519109872457536, - "learning_rate": 8.304850200237801e-07, - "loss": 0.1864, - "step": 17173 - }, - { - "epoch": 0.87, - "grad_norm": 1.1145067988423865, - "learning_rate": 8.298280060732944e-07, - "loss": 0.1596, - "step": 17174 - }, - { - "epoch": 0.87, - "grad_norm": 0.9373264532460865, - "learning_rate": 8.29171240864245e-07, - "loss": 0.1651, - "step": 17175 - }, - { - "epoch": 0.87, - "grad_norm": 3.015744660484614, - "learning_rate": 8.285147244144409e-07, - "loss": 0.1858, - "step": 17176 - }, - { - "epoch": 0.87, - "grad_norm": 1.2143262440929645, - "learning_rate": 8.278584567416936e-07, - "loss": 0.1677, - "step": 17177 - }, - { - "epoch": 0.87, - "grad_norm": 0.8214701062742716, - "learning_rate": 8.272024378638033e-07, - "loss": 0.1551, - "step": 17178 - }, - { - "epoch": 0.87, - "grad_norm": 1.089253746380905, - "learning_rate": 8.265466677985667e-07, - "loss": 0.1829, - "step": 17179 - }, - { - "epoch": 0.87, - "grad_norm": 1.4385101425570883, - "learning_rate": 8.258911465637675e-07, - "loss": 0.1705, - "step": 17180 - }, - { - "epoch": 0.87, - "grad_norm": 0.9000616820305697, - "learning_rate": 8.252358741771915e-07, - "loss": 0.1539, - "step": 17181 - }, - { - "epoch": 0.87, - "grad_norm": 1.435773434610318, - "learning_rate": 8.245808506566088e-07, - "loss": 0.1807, - "step": 17182 - }, - { - "epoch": 0.87, - "grad_norm": 1.7531987585070594, - "learning_rate": 8.239260760197909e-07, - "loss": 0.1493, - "step": 17183 - }, - { - "epoch": 0.87, - "grad_norm": 0.9821965062763874, - "learning_rate": 8.232715502844968e-07, - "loss": 0.1592, - "step": 17184 - }, - { - "epoch": 0.87, - "grad_norm": 0.918881429900906, - "learning_rate": 8.226172734684779e-07, - "loss": 0.1879, - "step": 17185 - }, - { - "epoch": 0.87, - "grad_norm": 1.2048594743894412, - "learning_rate": 8.219632455894833e-07, - "loss": 0.1753, - "step": 17186 - }, - { - "epoch": 0.87, - "grad_norm": 1.7541521591860647, - "learning_rate": 8.213094666652544e-07, - "loss": 0.1596, - "step": 17187 - }, - { - "epoch": 0.87, - "grad_norm": 1.348671888935361, - "learning_rate": 8.206559367135258e-07, - "loss": 0.1725, - "step": 17188 - }, - { - "epoch": 0.87, - "grad_norm": 1.2557319159842633, - "learning_rate": 8.200026557520224e-07, - "loss": 0.1568, - "step": 17189 - }, - { - "epoch": 0.87, - "grad_norm": 1.1154418262195018, - "learning_rate": 8.193496237984677e-07, - "loss": 0.1638, - "step": 17190 - }, - { - "epoch": 0.87, - "grad_norm": 0.9471020486573046, - "learning_rate": 8.186968408705697e-07, - "loss": 0.1498, - "step": 17191 - }, - { - "epoch": 0.87, - "grad_norm": 0.8734603646022715, - "learning_rate": 8.18044306986041e-07, - "loss": 0.1548, - "step": 17192 - }, - { - "epoch": 0.87, - "grad_norm": 1.0798723064063107, - "learning_rate": 8.173920221625776e-07, - "loss": 0.1455, - "step": 17193 - }, - { - "epoch": 0.87, - "grad_norm": 0.8520548621086595, - "learning_rate": 8.167399864178749e-07, - "loss": 0.1777, - "step": 17194 - }, - { - "epoch": 0.87, - "grad_norm": 0.8022428202826044, - "learning_rate": 8.160881997696169e-07, - "loss": 0.1615, - "step": 17195 - }, - { - "epoch": 0.87, - "grad_norm": 1.5703257428569193, - "learning_rate": 8.154366622354881e-07, - "loss": 0.1734, - "step": 17196 - }, - { - "epoch": 0.87, - "grad_norm": 0.8567723378618287, - "learning_rate": 8.147853738331569e-07, - "loss": 0.1329, - "step": 17197 - }, - { - "epoch": 0.87, - "grad_norm": 1.858800319683022, - "learning_rate": 8.141343345802933e-07, - "loss": 0.1812, - "step": 17198 - }, - { - "epoch": 0.87, - "grad_norm": 0.9505104143578171, - "learning_rate": 8.134835444945521e-07, - "loss": 0.1556, - "step": 17199 - }, - { - "epoch": 0.87, - "grad_norm": 0.8372452692250099, - "learning_rate": 8.128330035935906e-07, - "loss": 0.148, - "step": 17200 - }, - { - "epoch": 0.87, - "grad_norm": 0.9504940848887924, - "learning_rate": 8.121827118950521e-07, - "loss": 0.1598, - "step": 17201 - }, - { - "epoch": 0.87, - "grad_norm": 1.674017728736617, - "learning_rate": 8.115326694165759e-07, - "loss": 0.1491, - "step": 17202 - }, - { - "epoch": 0.87, - "grad_norm": 1.0173530063273255, - "learning_rate": 8.108828761757948e-07, - "loss": 0.1596, - "step": 17203 - }, - { - "epoch": 0.87, - "grad_norm": 1.7623952342628695, - "learning_rate": 8.102333321903344e-07, - "loss": 0.1666, - "step": 17204 - }, - { - "epoch": 0.87, - "grad_norm": 1.194863581749474, - "learning_rate": 8.095840374778153e-07, - "loss": 0.1646, - "step": 17205 - }, - { - "epoch": 0.87, - "grad_norm": 0.9392827691446882, - "learning_rate": 8.089349920558465e-07, - "loss": 0.1578, - "step": 17206 - }, - { - "epoch": 0.88, - "grad_norm": 1.0778890805293149, - "learning_rate": 8.082861959420374e-07, - "loss": 0.17, - "step": 17207 - }, - { - "epoch": 0.88, - "grad_norm": 1.6435653004467239, - "learning_rate": 8.076376491539827e-07, - "loss": 0.1663, - "step": 17208 - }, - { - "epoch": 0.88, - "grad_norm": 1.2995772904802547, - "learning_rate": 8.069893517092775e-07, - "loss": 0.1644, - "step": 17209 - }, - { - "epoch": 0.88, - "grad_norm": 1.3175793197088113, - "learning_rate": 8.063413036255041e-07, - "loss": 0.1531, - "step": 17210 - }, - { - "epoch": 0.88, - "grad_norm": 1.0686728260509748, - "learning_rate": 8.05693504920243e-07, - "loss": 0.1778, - "step": 17211 - }, - { - "epoch": 0.88, - "grad_norm": 1.7564258312190575, - "learning_rate": 8.050459556110635e-07, - "loss": 0.1698, - "step": 17212 - }, - { - "epoch": 0.88, - "grad_norm": 1.2441905694267967, - "learning_rate": 8.043986557155315e-07, - "loss": 0.1581, - "step": 17213 - }, - { - "epoch": 0.88, - "grad_norm": 1.0520955464046744, - "learning_rate": 8.037516052512062e-07, - "loss": 0.1643, - "step": 17214 - }, - { - "epoch": 0.88, - "grad_norm": 1.3677567086585491, - "learning_rate": 8.031048042356393e-07, - "loss": 0.1551, - "step": 17215 - }, - { - "epoch": 0.88, - "grad_norm": 1.2042220749762846, - "learning_rate": 8.024582526863722e-07, - "loss": 0.1711, - "step": 17216 - }, - { - "epoch": 0.88, - "grad_norm": 1.0138279779706278, - "learning_rate": 8.018119506209454e-07, - "loss": 0.1576, - "step": 17217 - }, - { - "epoch": 0.88, - "grad_norm": 1.2803233628460835, - "learning_rate": 8.011658980568903e-07, - "loss": 0.1636, - "step": 17218 - }, - { - "epoch": 0.88, - "grad_norm": 0.8006138395157962, - "learning_rate": 8.005200950117275e-07, - "loss": 0.1514, - "step": 17219 - }, - { - "epoch": 0.88, - "grad_norm": 1.0802690996728705, - "learning_rate": 7.998745415029762e-07, - "loss": 0.1676, - "step": 17220 - }, - { - "epoch": 0.88, - "grad_norm": 1.227853461420169, - "learning_rate": 7.99229237548148e-07, - "loss": 0.1644, - "step": 17221 - }, - { - "epoch": 0.88, - "grad_norm": 1.0425876802294116, - "learning_rate": 7.985841831647489e-07, - "loss": 0.168, - "step": 17222 - }, - { - "epoch": 0.88, - "grad_norm": 1.0061004327128427, - "learning_rate": 7.979393783702704e-07, - "loss": 0.1621, - "step": 17223 - }, - { - "epoch": 0.88, - "grad_norm": 1.6922528782775406, - "learning_rate": 7.972948231822087e-07, - "loss": 0.1624, - "step": 17224 - }, - { - "epoch": 0.88, - "grad_norm": 0.9172038906437257, - "learning_rate": 7.966505176180428e-07, - "loss": 0.1756, - "step": 17225 - }, - { - "epoch": 0.88, - "grad_norm": 1.3744976825971134, - "learning_rate": 7.960064616952523e-07, - "loss": 0.1641, - "step": 17226 - }, - { - "epoch": 0.88, - "grad_norm": 1.268173030375666, - "learning_rate": 7.953626554313055e-07, - "loss": 0.1457, - "step": 17227 - }, - { - "epoch": 0.88, - "grad_norm": 1.4362620959002659, - "learning_rate": 7.947190988436681e-07, - "loss": 0.1489, - "step": 17228 - }, - { - "epoch": 0.88, - "grad_norm": 1.1619879743795989, - "learning_rate": 7.940757919497944e-07, - "loss": 0.1814, - "step": 17229 - }, - { - "epoch": 0.88, - "grad_norm": 0.9927394194170999, - "learning_rate": 7.934327347671333e-07, - "loss": 0.1714, - "step": 17230 - }, - { - "epoch": 0.88, - "grad_norm": 0.9038842448831277, - "learning_rate": 7.927899273131301e-07, - "loss": 0.1454, - "step": 17231 - }, - { - "epoch": 0.88, - "grad_norm": 1.3595367981253321, - "learning_rate": 7.921473696052206e-07, - "loss": 0.1528, - "step": 17232 - }, - { - "epoch": 0.88, - "grad_norm": 1.0757902365280851, - "learning_rate": 7.915050616608333e-07, - "loss": 0.1663, - "step": 17233 - }, - { - "epoch": 0.88, - "grad_norm": 1.2885867737737462, - "learning_rate": 7.90863003497393e-07, - "loss": 0.1816, - "step": 17234 - }, - { - "epoch": 0.88, - "grad_norm": 1.374042545612596, - "learning_rate": 7.902211951323135e-07, - "loss": 0.1461, - "step": 17235 - }, - { - "epoch": 0.88, - "grad_norm": 1.2403023423821855, - "learning_rate": 7.895796365830021e-07, - "loss": 0.1737, - "step": 17236 - }, - { - "epoch": 0.88, - "grad_norm": 1.1516657453656927, - "learning_rate": 7.889383278668661e-07, - "loss": 0.1486, - "step": 17237 - }, - { - "epoch": 0.88, - "grad_norm": 1.3981759897934891, - "learning_rate": 7.882972690012957e-07, - "loss": 0.1755, - "step": 17238 - }, - { - "epoch": 0.88, - "grad_norm": 1.3367001388773048, - "learning_rate": 7.876564600036818e-07, - "loss": 0.1781, - "step": 17239 - }, - { - "epoch": 0.88, - "grad_norm": 0.9591334225113681, - "learning_rate": 7.870159008914069e-07, - "loss": 0.163, - "step": 17240 - }, - { - "epoch": 0.88, - "grad_norm": 1.1275579944257983, - "learning_rate": 7.863755916818483e-07, - "loss": 0.1838, - "step": 17241 - }, - { - "epoch": 0.88, - "grad_norm": 1.4577251812287217, - "learning_rate": 7.85735532392371e-07, - "loss": 0.164, - "step": 17242 - }, - { - "epoch": 0.88, - "grad_norm": 1.0104330875157992, - "learning_rate": 7.850957230403378e-07, - "loss": 0.1717, - "step": 17243 - }, - { - "epoch": 0.88, - "grad_norm": 1.104711385825246, - "learning_rate": 7.844561636431036e-07, - "loss": 0.1879, - "step": 17244 - }, - { - "epoch": 0.88, - "grad_norm": 1.5995755578306345, - "learning_rate": 7.838168542180169e-07, - "loss": 0.1398, - "step": 17245 - }, - { - "epoch": 0.88, - "grad_norm": 1.1064120870453258, - "learning_rate": 7.83177794782417e-07, - "loss": 0.1694, - "step": 17246 - }, - { - "epoch": 0.88, - "grad_norm": 0.9905646769100281, - "learning_rate": 7.825389853536403e-07, - "loss": 0.1724, - "step": 17247 - }, - { - "epoch": 0.88, - "grad_norm": 1.0883238839154536, - "learning_rate": 7.819004259490148e-07, - "loss": 0.1671, - "step": 17248 - }, - { - "epoch": 0.88, - "grad_norm": 1.589772887880888, - "learning_rate": 7.812621165858625e-07, - "loss": 0.1989, - "step": 17249 - }, - { - "epoch": 0.88, - "grad_norm": 0.9767483701679833, - "learning_rate": 7.806240572814927e-07, - "loss": 0.1762, - "step": 17250 - }, - { - "epoch": 0.88, - "grad_norm": 0.9107924632802049, - "learning_rate": 7.799862480532194e-07, - "loss": 0.1513, - "step": 17251 - }, - { - "epoch": 0.88, - "grad_norm": 0.9780873235784965, - "learning_rate": 7.793486889183377e-07, - "loss": 0.1605, - "step": 17252 - }, - { - "epoch": 0.88, - "grad_norm": 1.2925286403963623, - "learning_rate": 7.787113798941449e-07, - "loss": 0.1701, - "step": 17253 - }, - { - "epoch": 0.88, - "grad_norm": 1.2855320737032634, - "learning_rate": 7.780743209979269e-07, - "loss": 0.1574, - "step": 17254 - }, - { - "epoch": 0.88, - "grad_norm": 1.0986164566088186, - "learning_rate": 7.774375122469624e-07, - "loss": 0.1723, - "step": 17255 - }, - { - "epoch": 0.88, - "grad_norm": 1.3614995748491658, - "learning_rate": 7.768009536585264e-07, - "loss": 0.1501, - "step": 17256 - }, - { - "epoch": 0.88, - "grad_norm": 1.6770303975905876, - "learning_rate": 7.76164645249885e-07, - "loss": 0.1551, - "step": 17257 - }, - { - "epoch": 0.88, - "grad_norm": 1.076382257356652, - "learning_rate": 7.755285870383011e-07, - "loss": 0.165, - "step": 17258 - }, - { - "epoch": 0.88, - "grad_norm": 1.5438451565334113, - "learning_rate": 7.748927790410221e-07, - "loss": 0.1292, - "step": 17259 - }, - { - "epoch": 0.88, - "grad_norm": 1.2707148714608554, - "learning_rate": 7.742572212753008e-07, - "loss": 0.1611, - "step": 17260 - }, - { - "epoch": 0.88, - "grad_norm": 1.1250090687287368, - "learning_rate": 7.736219137583701e-07, - "loss": 0.1839, - "step": 17261 - }, - { - "epoch": 0.88, - "grad_norm": 0.9268465624245334, - "learning_rate": 7.729868565074694e-07, - "loss": 0.1683, - "step": 17262 - }, - { - "epoch": 0.88, - "grad_norm": 0.8984489920500216, - "learning_rate": 7.723520495398185e-07, - "loss": 0.1573, - "step": 17263 - }, - { - "epoch": 0.88, - "grad_norm": 1.2633143592884477, - "learning_rate": 7.717174928726401e-07, - "loss": 0.1511, - "step": 17264 - }, - { - "epoch": 0.88, - "grad_norm": 1.4414316188321858, - "learning_rate": 7.710831865231461e-07, - "loss": 0.1825, - "step": 17265 - }, - { - "epoch": 0.88, - "grad_norm": 1.0270181484599707, - "learning_rate": 7.704491305085427e-07, - "loss": 0.193, - "step": 17266 - }, - { - "epoch": 0.88, - "grad_norm": 0.7770444000137979, - "learning_rate": 7.698153248460271e-07, - "loss": 0.1773, - "step": 17267 - }, - { - "epoch": 0.88, - "grad_norm": 1.3377018839713426, - "learning_rate": 7.691817695527936e-07, - "loss": 0.1898, - "step": 17268 - }, - { - "epoch": 0.88, - "grad_norm": 1.2142810653464722, - "learning_rate": 7.68548464646024e-07, - "loss": 0.1691, - "step": 17269 - }, - { - "epoch": 0.88, - "grad_norm": 1.133137337818727, - "learning_rate": 7.679154101428998e-07, - "loss": 0.1435, - "step": 17270 - }, - { - "epoch": 0.88, - "grad_norm": 1.0938459587105016, - "learning_rate": 7.672826060605931e-07, - "loss": 0.1618, - "step": 17271 - }, - { - "epoch": 0.88, - "grad_norm": 1.30171994693312, - "learning_rate": 7.666500524162646e-07, - "loss": 0.1507, - "step": 17272 - }, - { - "epoch": 0.88, - "grad_norm": 1.2058208735977238, - "learning_rate": 7.660177492270749e-07, - "loss": 0.1453, - "step": 17273 - }, - { - "epoch": 0.88, - "grad_norm": 1.1198741455359467, - "learning_rate": 7.653856965101747e-07, - "loss": 0.1589, - "step": 17274 - }, - { - "epoch": 0.88, - "grad_norm": 0.8746745470939669, - "learning_rate": 7.647538942827115e-07, - "loss": 0.145, - "step": 17275 - }, - { - "epoch": 0.88, - "grad_norm": 1.2037543741940957, - "learning_rate": 7.641223425618193e-07, - "loss": 0.1648, - "step": 17276 - }, - { - "epoch": 0.88, - "grad_norm": 0.8927725992603462, - "learning_rate": 7.634910413646313e-07, - "loss": 0.1376, - "step": 17277 - }, - { - "epoch": 0.88, - "grad_norm": 1.3217628032750142, - "learning_rate": 7.62859990708269e-07, - "loss": 0.1622, - "step": 17278 - }, - { - "epoch": 0.88, - "grad_norm": 1.1443610497786343, - "learning_rate": 7.622291906098523e-07, - "loss": 0.156, - "step": 17279 - }, - { - "epoch": 0.88, - "grad_norm": 1.2311048614980642, - "learning_rate": 7.615986410864895e-07, - "loss": 0.1614, - "step": 17280 - }, - { - "epoch": 0.88, - "grad_norm": 1.2165201760489044, - "learning_rate": 7.609683421552861e-07, - "loss": 0.1638, - "step": 17281 - }, - { - "epoch": 0.88, - "grad_norm": 1.5208694414991424, - "learning_rate": 7.603382938333382e-07, - "loss": 0.1596, - "step": 17282 - }, - { - "epoch": 0.88, - "grad_norm": 1.2366002878916877, - "learning_rate": 7.597084961377343e-07, - "loss": 0.1453, - "step": 17283 - }, - { - "epoch": 0.88, - "grad_norm": 1.956702883190355, - "learning_rate": 7.590789490855599e-07, - "loss": 0.1609, - "step": 17284 - }, - { - "epoch": 0.88, - "grad_norm": 1.7430471552680176, - "learning_rate": 7.584496526938933e-07, - "loss": 0.1864, - "step": 17285 - }, - { - "epoch": 0.88, - "grad_norm": 1.1854800332218853, - "learning_rate": 7.578206069797989e-07, - "loss": 0.1511, - "step": 17286 - }, - { - "epoch": 0.88, - "grad_norm": 1.5933026546514464, - "learning_rate": 7.57191811960345e-07, - "loss": 0.1418, - "step": 17287 - }, - { - "epoch": 0.88, - "grad_norm": 1.140500896412719, - "learning_rate": 7.565632676525858e-07, - "loss": 0.1621, - "step": 17288 - }, - { - "epoch": 0.88, - "grad_norm": 1.2614036948724896, - "learning_rate": 7.559349740735677e-07, - "loss": 0.1618, - "step": 17289 - }, - { - "epoch": 0.88, - "grad_norm": 1.0229832009884556, - "learning_rate": 7.55306931240335e-07, - "loss": 0.1566, - "step": 17290 - }, - { - "epoch": 0.88, - "grad_norm": 1.1404776623724786, - "learning_rate": 7.546791391699248e-07, - "loss": 0.151, - "step": 17291 - }, - { - "epoch": 0.88, - "grad_norm": 9.027669539432624, - "learning_rate": 7.540515978793661e-07, - "loss": 0.154, - "step": 17292 - }, - { - "epoch": 0.88, - "grad_norm": 0.9730493748099621, - "learning_rate": 7.534243073856784e-07, - "loss": 0.1629, - "step": 17293 - }, - { - "epoch": 0.88, - "grad_norm": 0.9318541669733853, - "learning_rate": 7.527972677058814e-07, - "loss": 0.1712, - "step": 17294 - }, - { - "epoch": 0.88, - "grad_norm": 1.128684780041153, - "learning_rate": 7.521704788569783e-07, - "loss": 0.1829, - "step": 17295 - }, - { - "epoch": 0.88, - "grad_norm": 1.0252096353600924, - "learning_rate": 7.515439408559744e-07, - "loss": 0.1749, - "step": 17296 - }, - { - "epoch": 0.88, - "grad_norm": 0.995727140664652, - "learning_rate": 7.509176537198626e-07, - "loss": 0.1692, - "step": 17297 - }, - { - "epoch": 0.88, - "grad_norm": 0.969441578451583, - "learning_rate": 7.502916174656338e-07, - "loss": 0.1593, - "step": 17298 - }, - { - "epoch": 0.88, - "grad_norm": 1.2329684303060555, - "learning_rate": 7.496658321102646e-07, - "loss": 0.1613, - "step": 17299 - }, - { - "epoch": 0.88, - "grad_norm": 0.9423363240103179, - "learning_rate": 7.490402976707323e-07, - "loss": 0.1572, - "step": 17300 - }, - { - "epoch": 0.88, - "grad_norm": 1.2564010048538554, - "learning_rate": 7.484150141640056e-07, - "loss": 0.173, - "step": 17301 - }, - { - "epoch": 0.88, - "grad_norm": 1.070435552031945, - "learning_rate": 7.477899816070444e-07, - "loss": 0.1659, - "step": 17302 - }, - { - "epoch": 0.88, - "grad_norm": 1.5672121229787728, - "learning_rate": 7.471652000168017e-07, - "loss": 0.1677, - "step": 17303 - }, - { - "epoch": 0.88, - "grad_norm": 1.0885715864729035, - "learning_rate": 7.465406694102273e-07, - "loss": 0.1572, - "step": 17304 - }, - { - "epoch": 0.88, - "grad_norm": 1.436480958250531, - "learning_rate": 7.459163898042599e-07, - "loss": 0.1626, - "step": 17305 - }, - { - "epoch": 0.88, - "grad_norm": 1.405159042399723, - "learning_rate": 7.452923612158303e-07, - "loss": 0.1751, - "step": 17306 - }, - { - "epoch": 0.88, - "grad_norm": 0.9347108488017326, - "learning_rate": 7.446685836618706e-07, - "loss": 0.1812, - "step": 17307 - }, - { - "epoch": 0.88, - "grad_norm": 1.16810165610296, - "learning_rate": 7.440450571592972e-07, - "loss": 0.1463, - "step": 17308 - }, - { - "epoch": 0.88, - "grad_norm": 1.045784749400454, - "learning_rate": 7.434217817250233e-07, - "loss": 0.1556, - "step": 17309 - }, - { - "epoch": 0.88, - "grad_norm": 1.2025574622438935, - "learning_rate": 7.427987573759576e-07, - "loss": 0.1656, - "step": 17310 - }, - { - "epoch": 0.88, - "grad_norm": 1.0111063760692782, - "learning_rate": 7.421759841289989e-07, - "loss": 0.1608, - "step": 17311 - }, - { - "epoch": 0.88, - "grad_norm": 1.522813352090518, - "learning_rate": 7.41553462001039e-07, - "loss": 0.1501, - "step": 17312 - }, - { - "epoch": 0.88, - "grad_norm": 1.2803206073977835, - "learning_rate": 7.409311910089645e-07, - "loss": 0.1971, - "step": 17313 - }, - { - "epoch": 0.88, - "grad_norm": 0.8767646651286921, - "learning_rate": 7.403091711696542e-07, - "loss": 0.1629, - "step": 17314 - }, - { - "epoch": 0.88, - "grad_norm": 1.2290321284414285, - "learning_rate": 7.396874024999811e-07, - "loss": 0.1695, - "step": 17315 - }, - { - "epoch": 0.88, - "grad_norm": 1.4577615994194053, - "learning_rate": 7.390658850168098e-07, - "loss": 0.1624, - "step": 17316 - }, - { - "epoch": 0.88, - "grad_norm": 1.235987193669687, - "learning_rate": 7.384446187369987e-07, - "loss": 0.1489, - "step": 17317 - }, - { - "epoch": 0.88, - "grad_norm": 0.7957026665132977, - "learning_rate": 7.378236036774e-07, - "loss": 0.1522, - "step": 17318 - }, - { - "epoch": 0.88, - "grad_norm": 0.8319149713315096, - "learning_rate": 7.372028398548614e-07, - "loss": 0.137, - "step": 17319 - }, - { - "epoch": 0.88, - "grad_norm": 1.2876008102788261, - "learning_rate": 7.365823272862183e-07, - "loss": 0.1818, - "step": 17320 - }, - { - "epoch": 0.88, - "grad_norm": 1.1938219095258311, - "learning_rate": 7.359620659883026e-07, - "loss": 0.1785, - "step": 17321 - }, - { - "epoch": 0.88, - "grad_norm": 1.1088942410264782, - "learning_rate": 7.35342055977939e-07, - "loss": 0.1956, - "step": 17322 - }, - { - "epoch": 0.88, - "grad_norm": 1.463606545831211, - "learning_rate": 7.347222972719459e-07, - "loss": 0.1475, - "step": 17323 - }, - { - "epoch": 0.88, - "grad_norm": 0.9638203971133041, - "learning_rate": 7.341027898871345e-07, - "loss": 0.1626, - "step": 17324 - }, - { - "epoch": 0.88, - "grad_norm": 1.7616047380436473, - "learning_rate": 7.334835338403056e-07, - "loss": 0.1497, - "step": 17325 - }, - { - "epoch": 0.88, - "grad_norm": 1.1910719387044761, - "learning_rate": 7.328645291482606e-07, - "loss": 0.1651, - "step": 17326 - }, - { - "epoch": 0.88, - "grad_norm": 1.595718060050182, - "learning_rate": 7.322457758277879e-07, - "loss": 0.1504, - "step": 17327 - }, - { - "epoch": 0.88, - "grad_norm": 0.7702924587303707, - "learning_rate": 7.316272738956731e-07, - "loss": 0.1508, - "step": 17328 - }, - { - "epoch": 0.88, - "grad_norm": 1.0275799989309966, - "learning_rate": 7.310090233686917e-07, - "loss": 0.1594, - "step": 17329 - }, - { - "epoch": 0.88, - "grad_norm": 0.9451932165357124, - "learning_rate": 7.303910242636147e-07, - "loss": 0.1729, - "step": 17330 - }, - { - "epoch": 0.88, - "grad_norm": 0.8652856570187278, - "learning_rate": 7.297732765972033e-07, - "loss": 0.158, - "step": 17331 - }, - { - "epoch": 0.88, - "grad_norm": 0.7772120827819856, - "learning_rate": 7.29155780386217e-07, - "loss": 0.1576, - "step": 17332 - }, - { - "epoch": 0.88, - "grad_norm": 0.8908650335082182, - "learning_rate": 7.285385356474017e-07, - "loss": 0.1343, - "step": 17333 - }, - { - "epoch": 0.88, - "grad_norm": 1.237847844903063, - "learning_rate": 7.27921542397505e-07, - "loss": 0.1735, - "step": 17334 - }, - { - "epoch": 0.88, - "grad_norm": 1.1559617010008287, - "learning_rate": 7.273048006532569e-07, - "loss": 0.1639, - "step": 17335 - }, - { - "epoch": 0.88, - "grad_norm": 1.1807040984900672, - "learning_rate": 7.266883104313916e-07, - "loss": 0.1661, - "step": 17336 - }, - { - "epoch": 0.88, - "grad_norm": 0.9171491441040258, - "learning_rate": 7.260720717486281e-07, - "loss": 0.1743, - "step": 17337 - }, - { - "epoch": 0.88, - "grad_norm": 0.9859557166131068, - "learning_rate": 7.254560846216863e-07, - "loss": 0.1809, - "step": 17338 - }, - { - "epoch": 0.88, - "grad_norm": 1.1130369511140106, - "learning_rate": 7.248403490672695e-07, - "loss": 0.1541, - "step": 17339 - }, - { - "epoch": 0.88, - "grad_norm": 0.8683717597560087, - "learning_rate": 7.242248651020845e-07, - "loss": 0.1517, - "step": 17340 - }, - { - "epoch": 0.88, - "grad_norm": 0.8444625384194985, - "learning_rate": 7.236096327428233e-07, - "loss": 0.1519, - "step": 17341 - }, - { - "epoch": 0.88, - "grad_norm": 1.2794386141678484, - "learning_rate": 7.229946520061737e-07, - "loss": 0.1743, - "step": 17342 - }, - { - "epoch": 0.88, - "grad_norm": 1.0310179364533691, - "learning_rate": 7.223799229088179e-07, - "loss": 0.1449, - "step": 17343 - }, - { - "epoch": 0.88, - "grad_norm": 0.7725121361002629, - "learning_rate": 7.217654454674305e-07, - "loss": 0.1587, - "step": 17344 - }, - { - "epoch": 0.88, - "grad_norm": 1.5382257187815973, - "learning_rate": 7.211512196986803e-07, - "loss": 0.1663, - "step": 17345 - }, - { - "epoch": 0.88, - "grad_norm": 1.0917289681842852, - "learning_rate": 7.205372456192272e-07, - "loss": 0.164, - "step": 17346 - }, - { - "epoch": 0.88, - "grad_norm": 0.9496468908631138, - "learning_rate": 7.199235232457258e-07, - "loss": 0.1825, - "step": 17347 - }, - { - "epoch": 0.88, - "grad_norm": 0.8778324978603581, - "learning_rate": 7.193100525948227e-07, - "loss": 0.1506, - "step": 17348 - }, - { - "epoch": 0.88, - "grad_norm": 1.910535826795464, - "learning_rate": 7.18696833683159e-07, - "loss": 0.1748, - "step": 17349 - }, - { - "epoch": 0.88, - "grad_norm": 1.096729609227987, - "learning_rate": 7.18083866527367e-07, - "loss": 0.1566, - "step": 17350 - }, - { - "epoch": 0.88, - "grad_norm": 0.8855534457008581, - "learning_rate": 7.174711511440757e-07, - "loss": 0.1595, - "step": 17351 - }, - { - "epoch": 0.88, - "grad_norm": 1.1438526198987724, - "learning_rate": 7.168586875499018e-07, - "loss": 0.1628, - "step": 17352 - }, - { - "epoch": 0.88, - "grad_norm": 0.9813627770034073, - "learning_rate": 7.162464757614606e-07, - "loss": 0.1505, - "step": 17353 - }, - { - "epoch": 0.88, - "grad_norm": 0.8963296520021135, - "learning_rate": 7.156345157953581e-07, - "loss": 0.1593, - "step": 17354 - }, - { - "epoch": 0.88, - "grad_norm": 1.0822470726148679, - "learning_rate": 7.150228076681954e-07, - "loss": 0.128, - "step": 17355 - }, - { - "epoch": 0.88, - "grad_norm": 1.0598933426021497, - "learning_rate": 7.144113513965623e-07, - "loss": 0.1831, - "step": 17356 - }, - { - "epoch": 0.88, - "grad_norm": 1.100669695491159, - "learning_rate": 7.138001469970468e-07, - "loss": 0.1648, - "step": 17357 - }, - { - "epoch": 0.88, - "grad_norm": 2.753646196389935, - "learning_rate": 7.131891944862269e-07, - "loss": 0.1572, - "step": 17358 - }, - { - "epoch": 0.88, - "grad_norm": 1.060548591987011, - "learning_rate": 7.125784938806723e-07, - "loss": 0.1664, - "step": 17359 - }, - { - "epoch": 0.88, - "grad_norm": 1.2714760170007762, - "learning_rate": 7.119680451969524e-07, - "loss": 0.1563, - "step": 17360 - }, - { - "epoch": 0.88, - "grad_norm": 1.391024615858319, - "learning_rate": 7.113578484516226e-07, - "loss": 0.1485, - "step": 17361 - }, - { - "epoch": 0.88, - "grad_norm": 1.0065629449928741, - "learning_rate": 7.107479036612375e-07, - "loss": 0.1482, - "step": 17362 - }, - { - "epoch": 0.88, - "grad_norm": 0.9222886347377515, - "learning_rate": 7.101382108423383e-07, - "loss": 0.1642, - "step": 17363 - }, - { - "epoch": 0.88, - "grad_norm": 0.9073932362794302, - "learning_rate": 7.095287700114673e-07, - "loss": 0.1695, - "step": 17364 - }, - { - "epoch": 0.88, - "grad_norm": 0.9807012578354181, - "learning_rate": 7.089195811851502e-07, - "loss": 0.1545, - "step": 17365 - }, - { - "epoch": 0.88, - "grad_norm": 1.1494039005467738, - "learning_rate": 7.083106443799171e-07, - "loss": 0.1607, - "step": 17366 - }, - { - "epoch": 0.88, - "grad_norm": 0.9978285693974382, - "learning_rate": 7.077019596122802e-07, - "loss": 0.1654, - "step": 17367 - }, - { - "epoch": 0.88, - "grad_norm": 1.099167488039827, - "learning_rate": 7.070935268987545e-07, - "loss": 0.1693, - "step": 17368 - }, - { - "epoch": 0.88, - "grad_norm": 0.8713895734861333, - "learning_rate": 7.064853462558397e-07, - "loss": 0.1564, - "step": 17369 - }, - { - "epoch": 0.88, - "grad_norm": 1.1131621662933266, - "learning_rate": 7.05877417700035e-07, - "loss": 0.1686, - "step": 17370 - }, - { - "epoch": 0.88, - "grad_norm": 1.0392605432584487, - "learning_rate": 7.052697412478304e-07, - "loss": 0.161, - "step": 17371 - }, - { - "epoch": 0.88, - "grad_norm": 0.9436772944917501, - "learning_rate": 7.046623169157107e-07, - "loss": 0.1495, - "step": 17372 - }, - { - "epoch": 0.88, - "grad_norm": 0.9696917040806536, - "learning_rate": 7.040551447201494e-07, - "loss": 0.1708, - "step": 17373 - }, - { - "epoch": 0.88, - "grad_norm": 0.9909409102170923, - "learning_rate": 7.034482246776187e-07, - "loss": 0.1651, - "step": 17374 - }, - { - "epoch": 0.88, - "grad_norm": 0.8782964173375589, - "learning_rate": 7.028415568045799e-07, - "loss": 0.1585, - "step": 17375 - }, - { - "epoch": 0.88, - "grad_norm": 1.4118814762288865, - "learning_rate": 7.022351411174866e-07, - "loss": 0.1663, - "step": 17376 - }, - { - "epoch": 0.88, - "grad_norm": 1.0070090083368075, - "learning_rate": 7.016289776327922e-07, - "loss": 0.1451, - "step": 17377 - }, - { - "epoch": 0.88, - "grad_norm": 1.237220825335278, - "learning_rate": 7.010230663669359e-07, - "loss": 0.1594, - "step": 17378 - }, - { - "epoch": 0.88, - "grad_norm": 1.4767425759880979, - "learning_rate": 7.004174073363546e-07, - "loss": 0.1521, - "step": 17379 - }, - { - "epoch": 0.88, - "grad_norm": 1.376161358185498, - "learning_rate": 6.998120005574749e-07, - "loss": 0.1733, - "step": 17380 - }, - { - "epoch": 0.88, - "grad_norm": 1.2946303494952718, - "learning_rate": 6.992068460467227e-07, - "loss": 0.1671, - "step": 17381 - }, - { - "epoch": 0.88, - "grad_norm": 0.9239760508107825, - "learning_rate": 6.986019438205082e-07, - "loss": 0.1603, - "step": 17382 - }, - { - "epoch": 0.88, - "grad_norm": 1.7276860307771067, - "learning_rate": 6.979972938952428e-07, - "loss": 0.1722, - "step": 17383 - }, - { - "epoch": 0.88, - "grad_norm": 1.2215560557678822, - "learning_rate": 6.973928962873244e-07, - "loss": 0.1796, - "step": 17384 - }, - { - "epoch": 0.88, - "grad_norm": 1.2635692699347436, - "learning_rate": 6.96788751013151e-07, - "loss": 0.1584, - "step": 17385 - }, - { - "epoch": 0.88, - "grad_norm": 0.8953773561330749, - "learning_rate": 6.961848580891062e-07, - "loss": 0.1671, - "step": 17386 - }, - { - "epoch": 0.88, - "grad_norm": 0.9207265981103875, - "learning_rate": 6.955812175315735e-07, - "loss": 0.1659, - "step": 17387 - }, - { - "epoch": 0.88, - "grad_norm": 1.3557730967996362, - "learning_rate": 6.949778293569253e-07, - "loss": 0.1504, - "step": 17388 - }, - { - "epoch": 0.88, - "grad_norm": 1.4123195119500518, - "learning_rate": 6.943746935815299e-07, - "loss": 0.1495, - "step": 17389 - }, - { - "epoch": 0.88, - "grad_norm": 1.3687925436747972, - "learning_rate": 6.937718102217461e-07, - "loss": 0.1978, - "step": 17390 - }, - { - "epoch": 0.88, - "grad_norm": 1.2313131796932406, - "learning_rate": 6.931691792939288e-07, - "loss": 0.1657, - "step": 17391 - }, - { - "epoch": 0.88, - "grad_norm": 0.9197371602670851, - "learning_rate": 6.925668008144204e-07, - "loss": 0.1782, - "step": 17392 - }, - { - "epoch": 0.88, - "grad_norm": 0.8727111408694579, - "learning_rate": 6.919646747995668e-07, - "loss": 0.1529, - "step": 17393 - }, - { - "epoch": 0.88, - "grad_norm": 0.8399857072424712, - "learning_rate": 6.913628012656959e-07, - "loss": 0.1724, - "step": 17394 - }, - { - "epoch": 0.88, - "grad_norm": 1.2380908736083993, - "learning_rate": 6.907611802291325e-07, - "loss": 0.154, - "step": 17395 - }, - { - "epoch": 0.88, - "grad_norm": 1.0970936765726857, - "learning_rate": 6.901598117061992e-07, - "loss": 0.1483, - "step": 17396 - }, - { - "epoch": 0.88, - "grad_norm": 1.0378284593686915, - "learning_rate": 6.895586957132061e-07, - "loss": 0.1594, - "step": 17397 - }, - { - "epoch": 0.88, - "grad_norm": 1.0045317486233272, - "learning_rate": 6.889578322664614e-07, - "loss": 0.1703, - "step": 17398 - }, - { - "epoch": 0.88, - "grad_norm": 2.096540638874496, - "learning_rate": 6.883572213822598e-07, - "loss": 0.1555, - "step": 17399 - }, - { - "epoch": 0.88, - "grad_norm": 1.6452198223024794, - "learning_rate": 6.87756863076896e-07, - "loss": 0.1605, - "step": 17400 - }, - { - "epoch": 0.88, - "grad_norm": 1.1708595471622938, - "learning_rate": 6.871567573666516e-07, - "loss": 0.1745, - "step": 17401 - }, - { - "epoch": 0.88, - "grad_norm": 0.9364607771802455, - "learning_rate": 6.865569042678066e-07, - "loss": 0.1622, - "step": 17402 - }, - { - "epoch": 0.88, - "grad_norm": 1.069816199579359, - "learning_rate": 6.859573037966316e-07, - "loss": 0.153, - "step": 17403 - }, - { - "epoch": 0.89, - "grad_norm": 1.110928969063031, - "learning_rate": 6.853579559693913e-07, - "loss": 0.1694, - "step": 17404 - }, - { - "epoch": 0.89, - "grad_norm": 0.982718671441663, - "learning_rate": 6.847588608023414e-07, - "loss": 0.1646, - "step": 17405 - }, - { - "epoch": 0.89, - "grad_norm": 0.9125047628380294, - "learning_rate": 6.841600183117336e-07, - "loss": 0.1578, - "step": 17406 - }, - { - "epoch": 0.89, - "grad_norm": 4.941456937741606, - "learning_rate": 6.835614285138115e-07, - "loss": 0.1549, - "step": 17407 - }, - { - "epoch": 0.89, - "grad_norm": 1.5330577556007376, - "learning_rate": 6.829630914248131e-07, - "loss": 0.1576, - "step": 17408 - }, - { - "epoch": 0.89, - "grad_norm": 1.6233044430724675, - "learning_rate": 6.823650070609666e-07, - "loss": 0.1449, - "step": 17409 - }, - { - "epoch": 0.89, - "grad_norm": 1.2905297846245838, - "learning_rate": 6.817671754384958e-07, - "loss": 0.167, - "step": 17410 - }, - { - "epoch": 0.89, - "grad_norm": 0.994848823740401, - "learning_rate": 6.811695965736176e-07, - "loss": 0.1641, - "step": 17411 - }, - { - "epoch": 0.89, - "grad_norm": 1.2405990289193678, - "learning_rate": 6.805722704825379e-07, - "loss": 0.1834, - "step": 17412 - }, - { - "epoch": 0.89, - "grad_norm": 1.263778459068319, - "learning_rate": 6.799751971814628e-07, - "loss": 0.1523, - "step": 17413 - }, - { - "epoch": 0.89, - "grad_norm": 1.227892496199799, - "learning_rate": 6.793783766865858e-07, - "loss": 0.1575, - "step": 17414 - }, - { - "epoch": 0.89, - "grad_norm": 0.9766224628091549, - "learning_rate": 6.787818090140985e-07, - "loss": 0.1624, - "step": 17415 - }, - { - "epoch": 0.89, - "grad_norm": 1.0826552449351832, - "learning_rate": 6.781854941801802e-07, - "loss": 0.1557, - "step": 17416 - }, - { - "epoch": 0.89, - "grad_norm": 1.076768625593469, - "learning_rate": 6.775894322010079e-07, - "loss": 0.1573, - "step": 17417 - }, - { - "epoch": 0.89, - "grad_norm": 1.107756832754881, - "learning_rate": 6.769936230927477e-07, - "loss": 0.1561, - "step": 17418 - }, - { - "epoch": 0.89, - "grad_norm": 0.9123922774250832, - "learning_rate": 6.763980668715631e-07, - "loss": 0.1576, - "step": 17419 - }, - { - "epoch": 0.89, - "grad_norm": 1.4473130817984259, - "learning_rate": 6.758027635536057e-07, - "loss": 0.1613, - "step": 17420 - }, - { - "epoch": 0.89, - "grad_norm": 1.038560087888518, - "learning_rate": 6.752077131550272e-07, - "loss": 0.1369, - "step": 17421 - }, - { - "epoch": 0.89, - "grad_norm": 1.1176387403979628, - "learning_rate": 6.746129156919645e-07, - "loss": 0.164, - "step": 17422 - }, - { - "epoch": 0.89, - "grad_norm": 1.4993249242433018, - "learning_rate": 6.740183711805537e-07, - "loss": 0.1457, - "step": 17423 - }, - { - "epoch": 0.89, - "grad_norm": 1.496667523309315, - "learning_rate": 6.734240796369207e-07, - "loss": 0.159, - "step": 17424 - }, - { - "epoch": 0.89, - "grad_norm": 1.389756354612381, - "learning_rate": 6.728300410771871e-07, - "loss": 0.1447, - "step": 17425 - }, - { - "epoch": 0.89, - "grad_norm": 1.2795009098863095, - "learning_rate": 6.722362555174644e-07, - "loss": 0.1725, - "step": 17426 - }, - { - "epoch": 0.89, - "grad_norm": 0.9403435892659728, - "learning_rate": 6.71642722973862e-07, - "loss": 0.1661, - "step": 17427 - }, - { - "epoch": 0.89, - "grad_norm": 0.8253890419597139, - "learning_rate": 6.710494434624781e-07, - "loss": 0.1522, - "step": 17428 - }, - { - "epoch": 0.89, - "grad_norm": 0.9326051873116562, - "learning_rate": 6.704564169994022e-07, - "loss": 0.1802, - "step": 17429 - }, - { - "epoch": 0.89, - "grad_norm": 1.464770167732049, - "learning_rate": 6.698636436007256e-07, - "loss": 0.1509, - "step": 17430 - }, - { - "epoch": 0.89, - "grad_norm": 1.1107217098498596, - "learning_rate": 6.692711232825222e-07, - "loss": 0.1652, - "step": 17431 - }, - { - "epoch": 0.89, - "grad_norm": 1.305902964651348, - "learning_rate": 6.686788560608671e-07, - "loss": 0.1673, - "step": 17432 - }, - { - "epoch": 0.89, - "grad_norm": 0.8709144814823717, - "learning_rate": 6.680868419518249e-07, - "loss": 0.1666, - "step": 17433 - }, - { - "epoch": 0.89, - "grad_norm": 1.1971517608356697, - "learning_rate": 6.674950809714553e-07, - "loss": 0.1649, - "step": 17434 - }, - { - "epoch": 0.89, - "grad_norm": 0.9360655756463602, - "learning_rate": 6.669035731358075e-07, - "loss": 0.1852, - "step": 17435 - }, - { - "epoch": 0.89, - "grad_norm": 1.0354548469117784, - "learning_rate": 6.663123184609299e-07, - "loss": 0.1611, - "step": 17436 - }, - { - "epoch": 0.89, - "grad_norm": 1.266579707399008, - "learning_rate": 6.657213169628551e-07, - "loss": 0.1612, - "step": 17437 - }, - { - "epoch": 0.89, - "grad_norm": 1.0852045706792273, - "learning_rate": 6.651305686576182e-07, - "loss": 0.1614, - "step": 17438 - }, - { - "epoch": 0.89, - "grad_norm": 1.4032412323848755, - "learning_rate": 6.645400735612417e-07, - "loss": 0.1786, - "step": 17439 - }, - { - "epoch": 0.89, - "grad_norm": 1.065819611607627, - "learning_rate": 6.639498316897419e-07, - "loss": 0.165, - "step": 17440 - }, - { - "epoch": 0.89, - "grad_norm": 0.8775842417319027, - "learning_rate": 6.633598430591304e-07, - "loss": 0.1523, - "step": 17441 - }, - { - "epoch": 0.89, - "grad_norm": 0.9855080549366656, - "learning_rate": 6.627701076854121e-07, - "loss": 0.1678, - "step": 17442 - }, - { - "epoch": 0.89, - "grad_norm": 0.8520611272160736, - "learning_rate": 6.621806255845797e-07, - "loss": 0.177, - "step": 17443 - }, - { - "epoch": 0.89, - "grad_norm": 1.3422182280118413, - "learning_rate": 6.615913967726273e-07, - "loss": 0.162, - "step": 17444 - }, - { - "epoch": 0.89, - "grad_norm": 0.8702945134702448, - "learning_rate": 6.610024212655364e-07, - "loss": 0.1519, - "step": 17445 - }, - { - "epoch": 0.89, - "grad_norm": 0.8964156650542517, - "learning_rate": 6.604136990792797e-07, - "loss": 0.1481, - "step": 17446 - }, - { - "epoch": 0.89, - "grad_norm": 1.5792301251622436, - "learning_rate": 6.598252302298313e-07, - "loss": 0.1649, - "step": 17447 - }, - { - "epoch": 0.89, - "grad_norm": 1.0581326679188312, - "learning_rate": 6.592370147331495e-07, - "loss": 0.159, - "step": 17448 - }, - { - "epoch": 0.89, - "grad_norm": 1.5677395291460763, - "learning_rate": 6.586490526051903e-07, - "loss": 0.1634, - "step": 17449 - }, - { - "epoch": 0.89, - "grad_norm": 0.9677135043618276, - "learning_rate": 6.580613438619044e-07, - "loss": 0.1741, - "step": 17450 - }, - { - "epoch": 0.89, - "grad_norm": 1.1615411184151843, - "learning_rate": 6.574738885192322e-07, - "loss": 0.1679, - "step": 17451 - }, - { - "epoch": 0.89, - "grad_norm": 1.0354720596249762, - "learning_rate": 6.568866865931078e-07, - "loss": 0.1655, - "step": 17452 - }, - { - "epoch": 0.89, - "grad_norm": 0.9829772905302709, - "learning_rate": 6.562997380994618e-07, - "loss": 0.1731, - "step": 17453 - }, - { - "epoch": 0.89, - "grad_norm": 1.1348821137608869, - "learning_rate": 6.557130430542114e-07, - "loss": 0.1869, - "step": 17454 - }, - { - "epoch": 0.89, - "grad_norm": 0.9861542333307887, - "learning_rate": 6.551266014732738e-07, - "loss": 0.1699, - "step": 17455 - }, - { - "epoch": 0.89, - "grad_norm": 1.2911984007830952, - "learning_rate": 6.54540413372553e-07, - "loss": 0.1657, - "step": 17456 - }, - { - "epoch": 0.89, - "grad_norm": 1.093145298497331, - "learning_rate": 6.53954478767953e-07, - "loss": 0.1652, - "step": 17457 - }, - { - "epoch": 0.89, - "grad_norm": 0.9231078419410501, - "learning_rate": 6.533687976753644e-07, - "loss": 0.1535, - "step": 17458 - }, - { - "epoch": 0.89, - "grad_norm": 2.001758271155291, - "learning_rate": 6.527833701106745e-07, - "loss": 0.1653, - "step": 17459 - }, - { - "epoch": 0.89, - "grad_norm": 0.9807167557360661, - "learning_rate": 6.521981960897639e-07, - "loss": 0.1545, - "step": 17460 - }, - { - "epoch": 0.89, - "grad_norm": 1.0752620461582922, - "learning_rate": 6.516132756285065e-07, - "loss": 0.1577, - "step": 17461 - }, - { - "epoch": 0.89, - "grad_norm": 0.9511644088197974, - "learning_rate": 6.510286087427664e-07, - "loss": 0.1579, - "step": 17462 - }, - { - "epoch": 0.89, - "grad_norm": 1.01542497369087, - "learning_rate": 6.504441954484042e-07, - "loss": 0.1461, - "step": 17463 - }, - { - "epoch": 0.89, - "grad_norm": 1.0220895756406778, - "learning_rate": 6.498600357612717e-07, - "loss": 0.1428, - "step": 17464 - }, - { - "epoch": 0.89, - "grad_norm": 1.1266382645524304, - "learning_rate": 6.492761296972117e-07, - "loss": 0.1868, - "step": 17465 - }, - { - "epoch": 0.89, - "grad_norm": 1.2207458765720829, - "learning_rate": 6.486924772720648e-07, - "loss": 0.1826, - "step": 17466 - }, - { - "epoch": 0.89, - "grad_norm": 1.2781352593174329, - "learning_rate": 6.481090785016631e-07, - "loss": 0.1654, - "step": 17467 - }, - { - "epoch": 0.89, - "grad_norm": 1.1189948022298877, - "learning_rate": 6.475259334018314e-07, - "loss": 0.1608, - "step": 17468 - }, - { - "epoch": 0.89, - "grad_norm": 1.0855749092863607, - "learning_rate": 6.46943041988387e-07, - "loss": 0.153, - "step": 17469 - }, - { - "epoch": 0.89, - "grad_norm": 0.9274200063910341, - "learning_rate": 6.463604042771409e-07, - "loss": 0.1548, - "step": 17470 - }, - { - "epoch": 0.89, - "grad_norm": 0.9209645234721234, - "learning_rate": 6.457780202838959e-07, - "loss": 0.1528, - "step": 17471 - }, - { - "epoch": 0.89, - "grad_norm": 0.9047802876618245, - "learning_rate": 6.451958900244526e-07, - "loss": 0.1596, - "step": 17472 - }, - { - "epoch": 0.89, - "grad_norm": 1.0981685953170108, - "learning_rate": 6.446140135145973e-07, - "loss": 0.152, - "step": 17473 - }, - { - "epoch": 0.89, - "grad_norm": 0.8935378090645346, - "learning_rate": 6.440323907701173e-07, - "loss": 0.1533, - "step": 17474 - }, - { - "epoch": 0.89, - "grad_norm": 0.8424974250053628, - "learning_rate": 6.434510218067846e-07, - "loss": 0.1673, - "step": 17475 - }, - { - "epoch": 0.89, - "grad_norm": 0.8613847064248749, - "learning_rate": 6.428699066403721e-07, - "loss": 0.1462, - "step": 17476 - }, - { - "epoch": 0.89, - "grad_norm": 1.0212232620116857, - "learning_rate": 6.422890452866415e-07, - "loss": 0.1787, - "step": 17477 - }, - { - "epoch": 0.89, - "grad_norm": 3.7089778175383827, - "learning_rate": 6.417084377613514e-07, - "loss": 0.1809, - "step": 17478 - }, - { - "epoch": 0.89, - "grad_norm": 1.0972190976416634, - "learning_rate": 6.411280840802459e-07, - "loss": 0.145, - "step": 17479 - }, - { - "epoch": 0.89, - "grad_norm": 1.1042160964760777, - "learning_rate": 6.405479842590723e-07, - "loss": 0.1591, - "step": 17480 - }, - { - "epoch": 0.89, - "grad_norm": 0.9220425796674508, - "learning_rate": 6.399681383135625e-07, - "loss": 0.1582, - "step": 17481 - }, - { - "epoch": 0.89, - "grad_norm": 0.9683037480178739, - "learning_rate": 6.39388546259444e-07, - "loss": 0.1504, - "step": 17482 - }, - { - "epoch": 0.89, - "grad_norm": 1.115613663388435, - "learning_rate": 6.388092081124398e-07, - "loss": 0.1617, - "step": 17483 - }, - { - "epoch": 0.89, - "grad_norm": 1.7121992562713897, - "learning_rate": 6.382301238882649e-07, - "loss": 0.1673, - "step": 17484 - }, - { - "epoch": 0.89, - "grad_norm": 1.0209392183063348, - "learning_rate": 6.37651293602628e-07, - "loss": 0.1584, - "step": 17485 - }, - { - "epoch": 0.89, - "grad_norm": 1.334734571271829, - "learning_rate": 6.370727172712276e-07, - "loss": 0.1617, - "step": 17486 - }, - { - "epoch": 0.89, - "grad_norm": 0.9918643664287256, - "learning_rate": 6.364943949097591e-07, - "loss": 0.1551, - "step": 17487 - }, - { - "epoch": 0.89, - "grad_norm": 0.9000122737801736, - "learning_rate": 6.359163265339085e-07, - "loss": 0.1652, - "step": 17488 - }, - { - "epoch": 0.89, - "grad_norm": 1.0864971415623828, - "learning_rate": 6.353385121593569e-07, - "loss": 0.1427, - "step": 17489 - }, - { - "epoch": 0.89, - "grad_norm": 1.266860418067606, - "learning_rate": 6.347609518017761e-07, - "loss": 0.1563, - "step": 17490 - }, - { - "epoch": 0.89, - "grad_norm": 1.0140534448588183, - "learning_rate": 6.341836454768358e-07, - "loss": 0.1619, - "step": 17491 - }, - { - "epoch": 0.89, - "grad_norm": 1.3410852805223372, - "learning_rate": 6.3360659320019e-07, - "loss": 0.1645, - "step": 17492 - }, - { - "epoch": 0.89, - "grad_norm": 1.255999216791831, - "learning_rate": 6.330297949874952e-07, - "loss": 0.1625, - "step": 17493 - }, - { - "epoch": 0.89, - "grad_norm": 1.2359667320876915, - "learning_rate": 6.324532508543967e-07, - "loss": 0.1781, - "step": 17494 - }, - { - "epoch": 0.89, - "grad_norm": 1.0118891462587718, - "learning_rate": 6.318769608165332e-07, - "loss": 0.1705, - "step": 17495 - }, - { - "epoch": 0.89, - "grad_norm": 0.8681988186278751, - "learning_rate": 6.313009248895352e-07, - "loss": 0.1352, - "step": 17496 - }, - { - "epoch": 0.89, - "grad_norm": 0.8505491563844542, - "learning_rate": 6.307251430890315e-07, - "loss": 0.1637, - "step": 17497 - }, - { - "epoch": 0.89, - "grad_norm": 0.9572439560590739, - "learning_rate": 6.301496154306363e-07, - "loss": 0.1578, - "step": 17498 - }, - { - "epoch": 0.89, - "grad_norm": 1.1260369067949432, - "learning_rate": 6.295743419299605e-07, - "loss": 0.1516, - "step": 17499 - }, - { - "epoch": 0.89, - "grad_norm": 0.9257905386818395, - "learning_rate": 6.289993226026114e-07, - "loss": 0.1556, - "step": 17500 - }, - { - "epoch": 0.89, - "grad_norm": 1.5032840341708493, - "learning_rate": 6.284245574641834e-07, - "loss": 0.1533, - "step": 17501 - }, - { - "epoch": 0.89, - "grad_norm": 0.8893792018006633, - "learning_rate": 6.278500465302684e-07, - "loss": 0.1584, - "step": 17502 - }, - { - "epoch": 0.89, - "grad_norm": 1.2981337750490072, - "learning_rate": 6.272757898164506e-07, - "loss": 0.1617, - "step": 17503 - }, - { - "epoch": 0.89, - "grad_norm": 2.0296535628324643, - "learning_rate": 6.267017873383085e-07, - "loss": 0.1752, - "step": 17504 - }, - { - "epoch": 0.89, - "grad_norm": 0.9195443136945383, - "learning_rate": 6.261280391114077e-07, - "loss": 0.1646, - "step": 17505 - }, - { - "epoch": 0.89, - "grad_norm": 0.9677007307761413, - "learning_rate": 6.255545451513146e-07, - "loss": 0.1379, - "step": 17506 - }, - { - "epoch": 0.89, - "grad_norm": 1.4911280417441737, - "learning_rate": 6.24981305473582e-07, - "loss": 0.1789, - "step": 17507 - }, - { - "epoch": 0.89, - "grad_norm": 1.4267656167569154, - "learning_rate": 6.244083200937634e-07, - "loss": 0.1667, - "step": 17508 - }, - { - "epoch": 0.89, - "grad_norm": 0.8906435122285429, - "learning_rate": 6.238355890273973e-07, - "loss": 0.1511, - "step": 17509 - }, - { - "epoch": 0.89, - "grad_norm": 1.120302944538742, - "learning_rate": 6.232631122900201e-07, - "loss": 0.1746, - "step": 17510 - }, - { - "epoch": 0.89, - "grad_norm": 1.0361573831639836, - "learning_rate": 6.226908898971596e-07, - "loss": 0.1612, - "step": 17511 - }, - { - "epoch": 0.89, - "grad_norm": 0.9827383407813749, - "learning_rate": 6.221189218643409e-07, - "loss": 0.1701, - "step": 17512 - }, - { - "epoch": 0.89, - "grad_norm": 1.2483588098983063, - "learning_rate": 6.21547208207075e-07, - "loss": 0.1732, - "step": 17513 - }, - { - "epoch": 0.89, - "grad_norm": 1.4254846113606228, - "learning_rate": 6.209757489408719e-07, - "loss": 0.1491, - "step": 17514 - }, - { - "epoch": 0.89, - "grad_norm": 1.438762605815942, - "learning_rate": 6.2040454408123e-07, - "loss": 0.1507, - "step": 17515 - }, - { - "epoch": 0.89, - "grad_norm": 1.0460399471502997, - "learning_rate": 6.198335936436451e-07, - "loss": 0.1535, - "step": 17516 - }, - { - "epoch": 0.89, - "grad_norm": 1.253299340529797, - "learning_rate": 6.192628976436044e-07, - "loss": 0.1462, - "step": 17517 - }, - { - "epoch": 0.89, - "grad_norm": 15.88619169160222, - "learning_rate": 6.186924560965856e-07, - "loss": 0.1825, - "step": 17518 - }, - { - "epoch": 0.89, - "grad_norm": 2.3292036213000302, - "learning_rate": 6.181222690180644e-07, - "loss": 0.1443, - "step": 17519 - }, - { - "epoch": 0.89, - "grad_norm": 1.03504688979119, - "learning_rate": 6.175523364235059e-07, - "loss": 0.1683, - "step": 17520 - }, - { - "epoch": 0.89, - "grad_norm": 1.253321521401051, - "learning_rate": 6.169826583283722e-07, - "loss": 0.1688, - "step": 17521 - }, - { - "epoch": 0.89, - "grad_norm": 1.0547134303571801, - "learning_rate": 6.164132347481122e-07, - "loss": 0.1816, - "step": 17522 - }, - { - "epoch": 0.89, - "grad_norm": 0.9580859608411995, - "learning_rate": 6.158440656981746e-07, - "loss": 0.1651, - "step": 17523 - }, - { - "epoch": 0.89, - "grad_norm": 1.509856539412932, - "learning_rate": 6.152751511939947e-07, - "loss": 0.158, - "step": 17524 - }, - { - "epoch": 0.89, - "grad_norm": 1.3455717781618253, - "learning_rate": 6.147064912510093e-07, - "loss": 0.1746, - "step": 17525 - }, - { - "epoch": 0.89, - "grad_norm": 1.227922347930265, - "learning_rate": 6.14138085884638e-07, - "loss": 0.1398, - "step": 17526 - }, - { - "epoch": 0.89, - "grad_norm": 1.1176263793989942, - "learning_rate": 6.135699351103031e-07, - "loss": 0.1848, - "step": 17527 - }, - { - "epoch": 0.89, - "grad_norm": 1.8362091794328037, - "learning_rate": 6.130020389434121e-07, - "loss": 0.155, - "step": 17528 - }, - { - "epoch": 0.89, - "grad_norm": 0.9411800601075747, - "learning_rate": 6.124343973993707e-07, - "loss": 0.1598, - "step": 17529 - }, - { - "epoch": 0.89, - "grad_norm": 1.0472801531129503, - "learning_rate": 6.118670104935765e-07, - "loss": 0.1622, - "step": 17530 - }, - { - "epoch": 0.89, - "grad_norm": 1.2564447978249575, - "learning_rate": 6.112998782414215e-07, - "loss": 0.1668, - "step": 17531 - }, - { - "epoch": 0.89, - "grad_norm": 0.8298318531080735, - "learning_rate": 6.107330006582878e-07, - "loss": 0.1592, - "step": 17532 - }, - { - "epoch": 0.89, - "grad_norm": 1.296337598576234, - "learning_rate": 6.101663777595501e-07, - "loss": 0.1737, - "step": 17533 - }, - { - "epoch": 0.89, - "grad_norm": 1.9101423442493164, - "learning_rate": 6.096000095605814e-07, - "loss": 0.1602, - "step": 17534 - }, - { - "epoch": 0.89, - "grad_norm": 0.9812618886749092, - "learning_rate": 6.090338960767417e-07, - "loss": 0.168, - "step": 17535 - }, - { - "epoch": 0.89, - "grad_norm": 1.2866841462265377, - "learning_rate": 6.084680373233875e-07, - "loss": 0.1703, - "step": 17536 - }, - { - "epoch": 0.89, - "grad_norm": 0.9056990924642179, - "learning_rate": 6.079024333158679e-07, - "loss": 0.1768, - "step": 17537 - }, - { - "epoch": 0.89, - "grad_norm": 1.0204475080312303, - "learning_rate": 6.073370840695269e-07, - "loss": 0.176, - "step": 17538 - }, - { - "epoch": 0.89, - "grad_norm": 1.3025219249136724, - "learning_rate": 6.067719895996971e-07, - "loss": 0.1602, - "step": 17539 - }, - { - "epoch": 0.89, - "grad_norm": 1.2543799126764876, - "learning_rate": 6.062071499217081e-07, - "loss": 0.162, - "step": 17540 - }, - { - "epoch": 0.89, - "grad_norm": 1.2796647743558915, - "learning_rate": 6.056425650508801e-07, - "loss": 0.1593, - "step": 17541 - }, - { - "epoch": 0.89, - "grad_norm": 0.9443775080108795, - "learning_rate": 6.050782350025297e-07, - "loss": 0.172, - "step": 17542 - }, - { - "epoch": 0.89, - "grad_norm": 1.2029689130455121, - "learning_rate": 6.045141597919613e-07, - "loss": 0.1822, - "step": 17543 - }, - { - "epoch": 0.89, - "grad_norm": 0.939996658457354, - "learning_rate": 6.039503394344782e-07, - "loss": 0.1472, - "step": 17544 - }, - { - "epoch": 0.89, - "grad_norm": 1.1660803223243563, - "learning_rate": 6.033867739453703e-07, - "loss": 0.1518, - "step": 17545 - }, - { - "epoch": 0.89, - "grad_norm": 0.8682073832006625, - "learning_rate": 6.028234633399277e-07, - "loss": 0.1602, - "step": 17546 - }, - { - "epoch": 0.89, - "grad_norm": 1.5234733333259054, - "learning_rate": 6.022604076334304e-07, - "loss": 0.1513, - "step": 17547 - }, - { - "epoch": 0.89, - "grad_norm": 0.9627404893959367, - "learning_rate": 6.016976068411506e-07, - "loss": 0.1587, - "step": 17548 - }, - { - "epoch": 0.89, - "grad_norm": 1.0389853213896696, - "learning_rate": 6.011350609783529e-07, - "loss": 0.1841, - "step": 17549 - }, - { - "epoch": 0.89, - "grad_norm": 1.319045427716174, - "learning_rate": 6.005727700602992e-07, - "loss": 0.1784, - "step": 17550 - }, - { - "epoch": 0.89, - "grad_norm": 0.9033564901415023, - "learning_rate": 6.000107341022399e-07, - "loss": 0.1841, - "step": 17551 - }, - { - "epoch": 0.89, - "grad_norm": 1.0968342902108015, - "learning_rate": 5.994489531194192e-07, - "loss": 0.1501, - "step": 17552 - }, - { - "epoch": 0.89, - "grad_norm": 1.2034136324685407, - "learning_rate": 5.988874271270773e-07, - "loss": 0.1458, - "step": 17553 - }, - { - "epoch": 0.89, - "grad_norm": 1.7376260021799919, - "learning_rate": 5.983261561404441e-07, - "loss": 0.1727, - "step": 17554 - }, - { - "epoch": 0.89, - "grad_norm": 1.1798377881298199, - "learning_rate": 5.977651401747442e-07, - "loss": 0.1375, - "step": 17555 - }, - { - "epoch": 0.89, - "grad_norm": 1.7601953087649302, - "learning_rate": 5.972043792451964e-07, - "loss": 0.1665, - "step": 17556 - }, - { - "epoch": 0.89, - "grad_norm": 1.0292216285996567, - "learning_rate": 5.966438733670121e-07, - "loss": 0.1547, - "step": 17557 - }, - { - "epoch": 0.89, - "grad_norm": 1.0016902697828487, - "learning_rate": 5.960836225553923e-07, - "loss": 0.1586, - "step": 17558 - }, - { - "epoch": 0.89, - "grad_norm": 0.8752761730699214, - "learning_rate": 5.955236268255372e-07, - "loss": 0.1724, - "step": 17559 - }, - { - "epoch": 0.89, - "grad_norm": 1.607044011795207, - "learning_rate": 5.949638861926333e-07, - "loss": 0.1407, - "step": 17560 - }, - { - "epoch": 0.89, - "grad_norm": 1.1743967307776313, - "learning_rate": 5.944044006718674e-07, - "loss": 0.1352, - "step": 17561 - }, - { - "epoch": 0.89, - "grad_norm": 1.0984485539787459, - "learning_rate": 5.938451702784109e-07, - "loss": 0.1715, - "step": 17562 - }, - { - "epoch": 0.89, - "grad_norm": 0.7723001616527106, - "learning_rate": 5.93286195027436e-07, - "loss": 0.1623, - "step": 17563 - }, - { - "epoch": 0.89, - "grad_norm": 2.3667467559842605, - "learning_rate": 5.927274749341039e-07, - "loss": 0.1737, - "step": 17564 - }, - { - "epoch": 0.89, - "grad_norm": 1.078327802462095, - "learning_rate": 5.921690100135713e-07, - "loss": 0.1619, - "step": 17565 - }, - { - "epoch": 0.89, - "grad_norm": 1.1087345773722312, - "learning_rate": 5.916108002809851e-07, - "loss": 0.1758, - "step": 17566 - }, - { - "epoch": 0.89, - "grad_norm": 0.931356983992569, - "learning_rate": 5.910528457514886e-07, - "loss": 0.1589, - "step": 17567 - }, - { - "epoch": 0.89, - "grad_norm": 0.9761997564311885, - "learning_rate": 5.904951464402154e-07, - "loss": 0.1704, - "step": 17568 - }, - { - "epoch": 0.89, - "grad_norm": 0.8754256176667727, - "learning_rate": 5.89937702362291e-07, - "loss": 0.171, - "step": 17569 - }, - { - "epoch": 0.89, - "grad_norm": 0.9700307877911704, - "learning_rate": 5.89380513532839e-07, - "loss": 0.1542, - "step": 17570 - }, - { - "epoch": 0.89, - "grad_norm": 0.8100403713688008, - "learning_rate": 5.888235799669705e-07, - "loss": 0.1737, - "step": 17571 - }, - { - "epoch": 0.89, - "grad_norm": 0.8726291967736584, - "learning_rate": 5.882669016797948e-07, - "loss": 0.1742, - "step": 17572 - }, - { - "epoch": 0.89, - "grad_norm": 1.1240848985822962, - "learning_rate": 5.877104786864107e-07, - "loss": 0.1621, - "step": 17573 - }, - { - "epoch": 0.89, - "grad_norm": 1.0396680948721067, - "learning_rate": 5.871543110019128e-07, - "loss": 0.1665, - "step": 17574 - }, - { - "epoch": 0.89, - "grad_norm": 1.1313393019502156, - "learning_rate": 5.865983986413848e-07, - "loss": 0.1529, - "step": 17575 - }, - { - "epoch": 0.89, - "grad_norm": 1.0259427094500448, - "learning_rate": 5.860427416199077e-07, - "loss": 0.1855, - "step": 17576 - }, - { - "epoch": 0.89, - "grad_norm": 1.0316922210292752, - "learning_rate": 5.854873399525518e-07, - "loss": 0.1643, - "step": 17577 - }, - { - "epoch": 0.89, - "grad_norm": 0.8627959272048978, - "learning_rate": 5.84932193654385e-07, - "loss": 0.1958, - "step": 17578 - }, - { - "epoch": 0.89, - "grad_norm": 0.8468770809456929, - "learning_rate": 5.843773027404631e-07, - "loss": 0.1748, - "step": 17579 - }, - { - "epoch": 0.89, - "grad_norm": 0.9074617601974535, - "learning_rate": 5.838226672258374e-07, - "loss": 0.1806, - "step": 17580 - }, - { - "epoch": 0.89, - "grad_norm": 0.8759627401266152, - "learning_rate": 5.832682871255546e-07, - "loss": 0.1743, - "step": 17581 - }, - { - "epoch": 0.89, - "grad_norm": 1.1549145279627093, - "learning_rate": 5.827141624546528e-07, - "loss": 0.1754, - "step": 17582 - }, - { - "epoch": 0.89, - "grad_norm": 0.9652020221208394, - "learning_rate": 5.8216029322816e-07, - "loss": 0.1497, - "step": 17583 - }, - { - "epoch": 0.89, - "grad_norm": 1.2358436435062201, - "learning_rate": 5.816066794611019e-07, - "loss": 0.1467, - "step": 17584 - }, - { - "epoch": 0.89, - "grad_norm": 1.2438610313138372, - "learning_rate": 5.810533211684954e-07, - "loss": 0.1646, - "step": 17585 - }, - { - "epoch": 0.89, - "grad_norm": 0.8587535130383744, - "learning_rate": 5.805002183653474e-07, - "loss": 0.1407, - "step": 17586 - }, - { - "epoch": 0.89, - "grad_norm": 1.1335931999480722, - "learning_rate": 5.799473710666659e-07, - "loss": 0.1711, - "step": 17587 - }, - { - "epoch": 0.89, - "grad_norm": 1.4396105891990831, - "learning_rate": 5.793947792874411e-07, - "loss": 0.1638, - "step": 17588 - }, - { - "epoch": 0.89, - "grad_norm": 0.9790535477952904, - "learning_rate": 5.788424430426653e-07, - "loss": 0.1857, - "step": 17589 - }, - { - "epoch": 0.89, - "grad_norm": 0.8856226673905241, - "learning_rate": 5.782903623473202e-07, - "loss": 0.1668, - "step": 17590 - }, - { - "epoch": 0.89, - "grad_norm": 1.067043539477932, - "learning_rate": 5.777385372163824e-07, - "loss": 0.174, - "step": 17591 - }, - { - "epoch": 0.89, - "grad_norm": 0.8740321505398196, - "learning_rate": 5.771869676648178e-07, - "loss": 0.15, - "step": 17592 - }, - { - "epoch": 0.89, - "grad_norm": 0.8681917829340098, - "learning_rate": 5.766356537075913e-07, - "loss": 0.1538, - "step": 17593 - }, - { - "epoch": 0.89, - "grad_norm": 0.9506182517731803, - "learning_rate": 5.760845953596527e-07, - "loss": 0.1759, - "step": 17594 - }, - { - "epoch": 0.89, - "grad_norm": 1.3177971254646998, - "learning_rate": 5.755337926359528e-07, - "loss": 0.1631, - "step": 17595 - }, - { - "epoch": 0.89, - "grad_norm": 1.117532008903971, - "learning_rate": 5.749832455514292e-07, - "loss": 0.1544, - "step": 17596 - }, - { - "epoch": 0.89, - "grad_norm": 1.2500663676142996, - "learning_rate": 5.744329541210203e-07, - "loss": 0.1733, - "step": 17597 - }, - { - "epoch": 0.89, - "grad_norm": 1.1405137035890365, - "learning_rate": 5.738829183596472e-07, - "loss": 0.1763, - "step": 17598 - }, - { - "epoch": 0.89, - "grad_norm": 1.364590498600788, - "learning_rate": 5.733331382822327e-07, - "loss": 0.1627, - "step": 17599 - }, - { - "epoch": 0.89, - "grad_norm": 1.3966575165611175, - "learning_rate": 5.727836139036902e-07, - "loss": 0.1771, - "step": 17600 - }, - { - "epoch": 0.9, - "grad_norm": 1.4316548739683352, - "learning_rate": 5.722343452389246e-07, - "loss": 0.1829, - "step": 17601 - }, - { - "epoch": 0.9, - "grad_norm": 0.9225231916886446, - "learning_rate": 5.71685332302836e-07, - "loss": 0.1652, - "step": 17602 - }, - { - "epoch": 0.9, - "grad_norm": 0.9262842826877503, - "learning_rate": 5.711365751103126e-07, - "loss": 0.1615, - "step": 17603 - }, - { - "epoch": 0.9, - "grad_norm": 1.125458347496385, - "learning_rate": 5.705880736762448e-07, - "loss": 0.1555, - "step": 17604 - }, - { - "epoch": 0.9, - "grad_norm": 1.0342644278015851, - "learning_rate": 5.700398280155062e-07, - "loss": 0.1555, - "step": 17605 - }, - { - "epoch": 0.9, - "grad_norm": 1.657524375857265, - "learning_rate": 5.694918381429693e-07, - "loss": 0.1715, - "step": 17606 - }, - { - "epoch": 0.9, - "grad_norm": 1.7613783650392223, - "learning_rate": 5.689441040735e-07, - "loss": 0.1705, - "step": 17607 - }, - { - "epoch": 0.9, - "grad_norm": 1.1654114356377074, - "learning_rate": 5.683966258219553e-07, - "loss": 0.1794, - "step": 17608 - }, - { - "epoch": 0.9, - "grad_norm": 1.117362526105987, - "learning_rate": 5.678494034031834e-07, - "loss": 0.1673, - "step": 17609 - }, - { - "epoch": 0.9, - "grad_norm": 0.9317194347022243, - "learning_rate": 5.673024368320313e-07, - "loss": 0.1671, - "step": 17610 - }, - { - "epoch": 0.9, - "grad_norm": 4.772568989083743, - "learning_rate": 5.667557261233303e-07, - "loss": 0.1537, - "step": 17611 - }, - { - "epoch": 0.9, - "grad_norm": 0.8476355847340409, - "learning_rate": 5.662092712919165e-07, - "loss": 0.1822, - "step": 17612 - }, - { - "epoch": 0.9, - "grad_norm": 0.9599586056070369, - "learning_rate": 5.656630723526058e-07, - "loss": 0.1925, - "step": 17613 - }, - { - "epoch": 0.9, - "grad_norm": 1.081935044119475, - "learning_rate": 5.651171293202195e-07, - "loss": 0.1543, - "step": 17614 - }, - { - "epoch": 0.9, - "grad_norm": 1.1782749849930128, - "learning_rate": 5.645714422095627e-07, - "loss": 0.1662, - "step": 17615 - }, - { - "epoch": 0.9, - "grad_norm": 1.038120793501522, - "learning_rate": 5.640260110354379e-07, - "loss": 0.1791, - "step": 17616 - }, - { - "epoch": 0.9, - "grad_norm": 1.0742685175075855, - "learning_rate": 5.634808358126409e-07, - "loss": 0.169, - "step": 17617 - }, - { - "epoch": 0.9, - "grad_norm": 1.490681460837666, - "learning_rate": 5.6293591655596e-07, - "loss": 0.1691, - "step": 17618 - }, - { - "epoch": 0.9, - "grad_norm": 2.0405156137959275, - "learning_rate": 5.623912532801745e-07, - "loss": 0.2042, - "step": 17619 - }, - { - "epoch": 0.9, - "grad_norm": 1.4385736720037017, - "learning_rate": 5.618468460000603e-07, - "loss": 0.1543, - "step": 17620 - }, - { - "epoch": 0.9, - "grad_norm": 1.3147500473045866, - "learning_rate": 5.613026947303846e-07, - "loss": 0.1723, - "step": 17621 - }, - { - "epoch": 0.9, - "grad_norm": 1.6507261497711412, - "learning_rate": 5.607587994859043e-07, - "loss": 0.173, - "step": 17622 - }, - { - "epoch": 0.9, - "grad_norm": 1.0463045500938963, - "learning_rate": 5.602151602813754e-07, - "loss": 0.1565, - "step": 17623 - }, - { - "epoch": 0.9, - "grad_norm": 0.9064519461727782, - "learning_rate": 5.59671777131543e-07, - "loss": 0.1683, - "step": 17624 - }, - { - "epoch": 0.9, - "grad_norm": 0.9830407275206406, - "learning_rate": 5.591286500511461e-07, - "loss": 0.1525, - "step": 17625 - }, - { - "epoch": 0.9, - "grad_norm": 1.4047169508316093, - "learning_rate": 5.585857790549176e-07, - "loss": 0.1647, - "step": 17626 - }, - { - "epoch": 0.9, - "grad_norm": 0.993469300350806, - "learning_rate": 5.580431641575856e-07, - "loss": 0.1591, - "step": 17627 - }, - { - "epoch": 0.9, - "grad_norm": 1.166303889142174, - "learning_rate": 5.57500805373864e-07, - "loss": 0.1714, - "step": 17628 - }, - { - "epoch": 0.9, - "grad_norm": 0.8781195607529766, - "learning_rate": 5.569587027184676e-07, - "loss": 0.1613, - "step": 17629 - }, - { - "epoch": 0.9, - "grad_norm": 0.885899291646843, - "learning_rate": 5.564168562060989e-07, - "loss": 0.1503, - "step": 17630 - }, - { - "epoch": 0.9, - "grad_norm": 0.7808838110262488, - "learning_rate": 5.558752658514576e-07, - "loss": 0.1421, - "step": 17631 - }, - { - "epoch": 0.9, - "grad_norm": 1.129720678794883, - "learning_rate": 5.553339316692319e-07, - "loss": 0.1671, - "step": 17632 - }, - { - "epoch": 0.9, - "grad_norm": 0.9170876078394514, - "learning_rate": 5.547928536741054e-07, - "loss": 0.1515, - "step": 17633 - }, - { - "epoch": 0.9, - "grad_norm": 3.195046931226926, - "learning_rate": 5.542520318807575e-07, - "loss": 0.1425, - "step": 17634 - }, - { - "epoch": 0.9, - "grad_norm": 1.289422396897735, - "learning_rate": 5.537114663038579e-07, - "loss": 0.1497, - "step": 17635 - }, - { - "epoch": 0.9, - "grad_norm": 1.002269400661614, - "learning_rate": 5.531711569580667e-07, - "loss": 0.171, - "step": 17636 - }, - { - "epoch": 0.9, - "grad_norm": 1.1868860593512662, - "learning_rate": 5.526311038580434e-07, - "loss": 0.1779, - "step": 17637 - }, - { - "epoch": 0.9, - "grad_norm": 0.7414258773959721, - "learning_rate": 5.520913070184342e-07, - "loss": 0.1615, - "step": 17638 - }, - { - "epoch": 0.9, - "grad_norm": 1.4872577817974402, - "learning_rate": 5.515517664538816e-07, - "loss": 0.1672, - "step": 17639 - }, - { - "epoch": 0.9, - "grad_norm": 1.1392463829779043, - "learning_rate": 5.510124821790208e-07, - "loss": 0.1743, - "step": 17640 - }, - { - "epoch": 0.9, - "grad_norm": 0.9808252375289511, - "learning_rate": 5.5047345420848e-07, - "loss": 0.1713, - "step": 17641 - }, - { - "epoch": 0.9, - "grad_norm": 1.0052517052647605, - "learning_rate": 5.499346825568796e-07, - "loss": 0.1572, - "step": 17642 - }, - { - "epoch": 0.9, - "grad_norm": 0.9217200434394364, - "learning_rate": 5.493961672388359e-07, - "loss": 0.1684, - "step": 17643 - }, - { - "epoch": 0.9, - "grad_norm": 0.9372417416015802, - "learning_rate": 5.488579082689549e-07, - "loss": 0.186, - "step": 17644 - }, - { - "epoch": 0.9, - "grad_norm": 1.3092412178722737, - "learning_rate": 5.48319905661836e-07, - "loss": 0.1465, - "step": 17645 - }, - { - "epoch": 0.9, - "grad_norm": 2.002333418364241, - "learning_rate": 5.477821594320754e-07, - "loss": 0.146, - "step": 17646 - }, - { - "epoch": 0.9, - "grad_norm": 0.8780082868559859, - "learning_rate": 5.472446695942557e-07, - "loss": 0.1576, - "step": 17647 - }, - { - "epoch": 0.9, - "grad_norm": 1.1158135010797137, - "learning_rate": 5.467074361629599e-07, - "loss": 0.1707, - "step": 17648 - }, - { - "epoch": 0.9, - "grad_norm": 1.2440407124510282, - "learning_rate": 5.461704591527573e-07, - "loss": 0.1658, - "step": 17649 - }, - { - "epoch": 0.9, - "grad_norm": 1.0253371799074213, - "learning_rate": 5.456337385782173e-07, - "loss": 0.1883, - "step": 17650 - }, - { - "epoch": 0.9, - "grad_norm": 0.915436610916007, - "learning_rate": 5.450972744538929e-07, - "loss": 0.1639, - "step": 17651 - }, - { - "epoch": 0.9, - "grad_norm": 1.0029131128139648, - "learning_rate": 5.445610667943401e-07, - "loss": 0.164, - "step": 17652 - }, - { - "epoch": 0.9, - "grad_norm": 0.9462649352774675, - "learning_rate": 5.440251156141019e-07, - "loss": 0.1628, - "step": 17653 - }, - { - "epoch": 0.9, - "grad_norm": 1.3574479219373592, - "learning_rate": 5.434894209277186e-07, - "loss": 0.1578, - "step": 17654 - }, - { - "epoch": 0.9, - "grad_norm": 0.9918804611222274, - "learning_rate": 5.429539827497188e-07, - "loss": 0.1497, - "step": 17655 - }, - { - "epoch": 0.9, - "grad_norm": 1.0503724191185535, - "learning_rate": 5.424188010946241e-07, - "loss": 0.1566, - "step": 17656 - }, - { - "epoch": 0.9, - "grad_norm": 0.8451239880660211, - "learning_rate": 5.418838759769551e-07, - "loss": 0.1548, - "step": 17657 - }, - { - "epoch": 0.9, - "grad_norm": 0.893324799854365, - "learning_rate": 5.413492074112192e-07, - "loss": 0.1468, - "step": 17658 - }, - { - "epoch": 0.9, - "grad_norm": 1.0016104659910292, - "learning_rate": 5.408147954119202e-07, - "loss": 0.1582, - "step": 17659 - }, - { - "epoch": 0.9, - "grad_norm": 0.8984243797807004, - "learning_rate": 5.402806399935545e-07, - "loss": 0.176, - "step": 17660 - }, - { - "epoch": 0.9, - "grad_norm": 9.62575895769711, - "learning_rate": 5.397467411706114e-07, - "loss": 0.1525, - "step": 17661 - }, - { - "epoch": 0.9, - "grad_norm": 1.0367581809540989, - "learning_rate": 5.392130989575716e-07, - "loss": 0.1787, - "step": 17662 - }, - { - "epoch": 0.9, - "grad_norm": 1.0075030435562442, - "learning_rate": 5.386797133689125e-07, - "loss": 0.1581, - "step": 17663 - }, - { - "epoch": 0.9, - "grad_norm": 0.9308276120576201, - "learning_rate": 5.38146584419098e-07, - "loss": 0.1415, - "step": 17664 - }, - { - "epoch": 0.9, - "grad_norm": 0.9995172688457722, - "learning_rate": 5.376137121225933e-07, - "loss": 0.1664, - "step": 17665 - }, - { - "epoch": 0.9, - "grad_norm": 1.1181798407086685, - "learning_rate": 5.370810964938511e-07, - "loss": 0.1645, - "step": 17666 - }, - { - "epoch": 0.9, - "grad_norm": 0.8228683638441441, - "learning_rate": 5.365487375473189e-07, - "loss": 0.1484, - "step": 17667 - }, - { - "epoch": 0.9, - "grad_norm": 0.91897186212851, - "learning_rate": 5.36016635297435e-07, - "loss": 0.1423, - "step": 17668 - }, - { - "epoch": 0.9, - "grad_norm": 0.9404184974406693, - "learning_rate": 5.354847897586346e-07, - "loss": 0.1536, - "step": 17669 - }, - { - "epoch": 0.9, - "grad_norm": 0.9341398123651984, - "learning_rate": 5.34953200945344e-07, - "loss": 0.1656, - "step": 17670 - }, - { - "epoch": 0.9, - "grad_norm": 0.9941746446631397, - "learning_rate": 5.344218688719849e-07, - "loss": 0.1838, - "step": 17671 - }, - { - "epoch": 0.9, - "grad_norm": 1.2760959638853353, - "learning_rate": 5.338907935529658e-07, - "loss": 0.1673, - "step": 17672 - }, - { - "epoch": 0.9, - "grad_norm": 1.2152025882319166, - "learning_rate": 5.33359975002693e-07, - "loss": 0.1714, - "step": 17673 - }, - { - "epoch": 0.9, - "grad_norm": 1.3978400774163173, - "learning_rate": 5.32829413235566e-07, - "loss": 0.1566, - "step": 17674 - }, - { - "epoch": 0.9, - "grad_norm": 2.228025280490407, - "learning_rate": 5.322991082659745e-07, - "loss": 0.1587, - "step": 17675 - }, - { - "epoch": 0.9, - "grad_norm": 1.6220795682324534, - "learning_rate": 5.31769060108307e-07, - "loss": 0.1669, - "step": 17676 - }, - { - "epoch": 0.9, - "grad_norm": 0.9484679760869359, - "learning_rate": 5.312392687769363e-07, - "loss": 0.1646, - "step": 17677 - }, - { - "epoch": 0.9, - "grad_norm": 1.7112336753826645, - "learning_rate": 5.307097342862355e-07, - "loss": 0.1476, - "step": 17678 - }, - { - "epoch": 0.9, - "grad_norm": 0.9050168602368139, - "learning_rate": 5.301804566505675e-07, - "loss": 0.1739, - "step": 17679 - }, - { - "epoch": 0.9, - "grad_norm": 2.6948532263660625, - "learning_rate": 5.296514358842919e-07, - "loss": 0.1562, - "step": 17680 - }, - { - "epoch": 0.9, - "grad_norm": 0.9223356351999948, - "learning_rate": 5.291226720017539e-07, - "loss": 0.1646, - "step": 17681 - }, - { - "epoch": 0.9, - "grad_norm": 1.0710729372748458, - "learning_rate": 5.285941650172999e-07, - "loss": 0.1781, - "step": 17682 - }, - { - "epoch": 0.9, - "grad_norm": 1.0156542824190222, - "learning_rate": 5.280659149452627e-07, - "loss": 0.161, - "step": 17683 - }, - { - "epoch": 0.9, - "grad_norm": 1.0609702715785894, - "learning_rate": 5.275379217999732e-07, - "loss": 0.1584, - "step": 17684 - }, - { - "epoch": 0.9, - "grad_norm": 2.593540452877707, - "learning_rate": 5.270101855957521e-07, - "loss": 0.1498, - "step": 17685 - }, - { - "epoch": 0.9, - "grad_norm": 0.9987113372708541, - "learning_rate": 5.264827063469146e-07, - "loss": 0.1667, - "step": 17686 - }, - { - "epoch": 0.9, - "grad_norm": 0.9799744766968221, - "learning_rate": 5.259554840677683e-07, - "loss": 0.1543, - "step": 17687 - }, - { - "epoch": 0.9, - "grad_norm": 1.0793003127543954, - "learning_rate": 5.25428518772616e-07, - "loss": 0.1772, - "step": 17688 - }, - { - "epoch": 0.9, - "grad_norm": 1.1104727507892227, - "learning_rate": 5.249018104757498e-07, - "loss": 0.1648, - "step": 17689 - }, - { - "epoch": 0.9, - "grad_norm": 0.8866876360592496, - "learning_rate": 5.24375359191458e-07, - "loss": 0.1711, - "step": 17690 - }, - { - "epoch": 0.9, - "grad_norm": 0.9318746380239975, - "learning_rate": 5.238491649340194e-07, - "loss": 0.1576, - "step": 17691 - }, - { - "epoch": 0.9, - "grad_norm": 1.704580316984937, - "learning_rate": 5.233232277177058e-07, - "loss": 0.1481, - "step": 17692 - }, - { - "epoch": 0.9, - "grad_norm": 0.9707591751898252, - "learning_rate": 5.22797547556787e-07, - "loss": 0.198, - "step": 17693 - }, - { - "epoch": 0.9, - "grad_norm": 1.4708636570765141, - "learning_rate": 5.222721244655182e-07, - "loss": 0.1606, - "step": 17694 - }, - { - "epoch": 0.9, - "grad_norm": 1.0986553399787764, - "learning_rate": 5.217469584581536e-07, - "loss": 0.1719, - "step": 17695 - }, - { - "epoch": 0.9, - "grad_norm": 2.24716223435193, - "learning_rate": 5.212220495489384e-07, - "loss": 0.1562, - "step": 17696 - }, - { - "epoch": 0.9, - "grad_norm": 0.8958578576870243, - "learning_rate": 5.206973977521113e-07, - "loss": 0.1731, - "step": 17697 - }, - { - "epoch": 0.9, - "grad_norm": 1.1951818186829957, - "learning_rate": 5.201730030819019e-07, - "loss": 0.1544, - "step": 17698 - }, - { - "epoch": 0.9, - "grad_norm": 1.4365885572432668, - "learning_rate": 5.196488655525356e-07, - "loss": 0.1787, - "step": 17699 - }, - { - "epoch": 0.9, - "grad_norm": 1.2545363657750488, - "learning_rate": 5.191249851782287e-07, - "loss": 0.145, - "step": 17700 - }, - { - "epoch": 0.9, - "grad_norm": 2.2246894585487254, - "learning_rate": 5.186013619731934e-07, - "loss": 0.1638, - "step": 17701 - }, - { - "epoch": 0.9, - "grad_norm": 0.9044530743019654, - "learning_rate": 5.180779959516303e-07, - "loss": 0.171, - "step": 17702 - }, - { - "epoch": 0.9, - "grad_norm": 0.8542074221845332, - "learning_rate": 5.175548871277358e-07, - "loss": 0.1606, - "step": 17703 - }, - { - "epoch": 0.9, - "grad_norm": 0.9570768342942108, - "learning_rate": 5.170320355157022e-07, - "loss": 0.1412, - "step": 17704 - }, - { - "epoch": 0.9, - "grad_norm": 1.377473388699456, - "learning_rate": 5.165094411297111e-07, - "loss": 0.1878, - "step": 17705 - }, - { - "epoch": 0.9, - "grad_norm": 0.9740939249314572, - "learning_rate": 5.15987103983936e-07, - "loss": 0.1739, - "step": 17706 - }, - { - "epoch": 0.9, - "grad_norm": 1.1030183081306857, - "learning_rate": 5.154650240925474e-07, - "loss": 0.1718, - "step": 17707 - }, - { - "epoch": 0.9, - "grad_norm": 0.9345049545849029, - "learning_rate": 5.149432014697053e-07, - "loss": 0.1798, - "step": 17708 - }, - { - "epoch": 0.9, - "grad_norm": 1.1709174668507403, - "learning_rate": 5.14421636129564e-07, - "loss": 0.1574, - "step": 17709 - }, - { - "epoch": 0.9, - "grad_norm": 0.8308813827049973, - "learning_rate": 5.13900328086272e-07, - "loss": 0.1487, - "step": 17710 - }, - { - "epoch": 0.9, - "grad_norm": 1.869450981379436, - "learning_rate": 5.133792773539681e-07, - "loss": 0.1568, - "step": 17711 - }, - { - "epoch": 0.9, - "grad_norm": 0.9869137581204871, - "learning_rate": 5.128584839467877e-07, - "loss": 0.1441, - "step": 17712 - }, - { - "epoch": 0.9, - "grad_norm": 0.7785372146076209, - "learning_rate": 5.12337947878857e-07, - "loss": 0.1614, - "step": 17713 - }, - { - "epoch": 0.9, - "grad_norm": 0.9084779713098403, - "learning_rate": 5.118176691642962e-07, - "loss": 0.144, - "step": 17714 - }, - { - "epoch": 0.9, - "grad_norm": 0.7622362638835993, - "learning_rate": 5.11297647817216e-07, - "loss": 0.1559, - "step": 17715 - }, - { - "epoch": 0.9, - "grad_norm": 1.6181503739545215, - "learning_rate": 5.107778838517241e-07, - "loss": 0.1525, - "step": 17716 - }, - { - "epoch": 0.9, - "grad_norm": 0.9288820385651829, - "learning_rate": 5.102583772819159e-07, - "loss": 0.1488, - "step": 17717 - }, - { - "epoch": 0.9, - "grad_norm": 1.0344836702661204, - "learning_rate": 5.097391281218877e-07, - "loss": 0.1446, - "step": 17718 - }, - { - "epoch": 0.9, - "grad_norm": 1.756541256971086, - "learning_rate": 5.092201363857197e-07, - "loss": 0.1528, - "step": 17719 - }, - { - "epoch": 0.9, - "grad_norm": 1.3498640132390163, - "learning_rate": 5.087014020874936e-07, - "loss": 0.1704, - "step": 17720 - }, - { - "epoch": 0.9, - "grad_norm": 0.9049343493676423, - "learning_rate": 5.081829252412762e-07, - "loss": 0.1487, - "step": 17721 - }, - { - "epoch": 0.9, - "grad_norm": 1.017207296182938, - "learning_rate": 5.076647058611328e-07, - "loss": 0.1758, - "step": 17722 - }, - { - "epoch": 0.9, - "grad_norm": 1.0428643056624292, - "learning_rate": 5.071467439611211e-07, - "loss": 0.1455, - "step": 17723 - }, - { - "epoch": 0.9, - "grad_norm": 1.1132281043897432, - "learning_rate": 5.066290395552909e-07, - "loss": 0.1574, - "step": 17724 - }, - { - "epoch": 0.9, - "grad_norm": 1.6376443306870083, - "learning_rate": 5.061115926576843e-07, - "loss": 0.1612, - "step": 17725 - }, - { - "epoch": 0.9, - "grad_norm": 1.537279060722791, - "learning_rate": 5.055944032823357e-07, - "loss": 0.1628, - "step": 17726 - }, - { - "epoch": 0.9, - "grad_norm": 1.133942195763958, - "learning_rate": 5.050774714432772e-07, - "loss": 0.1587, - "step": 17727 - }, - { - "epoch": 0.9, - "grad_norm": 1.6315795559668866, - "learning_rate": 5.045607971545263e-07, - "loss": 0.1767, - "step": 17728 - }, - { - "epoch": 0.9, - "grad_norm": 1.406991068781469, - "learning_rate": 5.040443804300998e-07, - "loss": 0.14, - "step": 17729 - }, - { - "epoch": 0.9, - "grad_norm": 1.3011977502033985, - "learning_rate": 5.035282212840065e-07, - "loss": 0.182, - "step": 17730 - }, - { - "epoch": 0.9, - "grad_norm": 1.3039874258239579, - "learning_rate": 5.030123197302472e-07, - "loss": 0.1708, - "step": 17731 - }, - { - "epoch": 0.9, - "grad_norm": 6.74425755208782, - "learning_rate": 5.024966757828143e-07, - "loss": 0.1766, - "step": 17732 - }, - { - "epoch": 0.9, - "grad_norm": 0.9541717050863457, - "learning_rate": 5.019812894556975e-07, - "loss": 0.1522, - "step": 17733 - }, - { - "epoch": 0.9, - "grad_norm": 0.9449006111982539, - "learning_rate": 5.014661607628723e-07, - "loss": 0.1705, - "step": 17734 - }, - { - "epoch": 0.9, - "grad_norm": 2.291618929572805, - "learning_rate": 5.009512897183156e-07, - "loss": 0.1501, - "step": 17735 - }, - { - "epoch": 0.9, - "grad_norm": 2.6137589502704652, - "learning_rate": 5.004366763359903e-07, - "loss": 0.1692, - "step": 17736 - }, - { - "epoch": 0.9, - "grad_norm": 2.088489181727494, - "learning_rate": 4.999223206298587e-07, - "loss": 0.1463, - "step": 17737 - }, - { - "epoch": 0.9, - "grad_norm": 0.7985636651852127, - "learning_rate": 4.994082226138686e-07, - "loss": 0.116, - "step": 17738 - }, - { - "epoch": 0.9, - "grad_norm": 1.2956073162739354, - "learning_rate": 4.988943823019676e-07, - "loss": 0.1524, - "step": 17739 - }, - { - "epoch": 0.9, - "grad_norm": 1.4870177517974048, - "learning_rate": 4.983807997080925e-07, - "loss": 0.1485, - "step": 17740 - }, - { - "epoch": 0.9, - "grad_norm": 1.1141072045469964, - "learning_rate": 4.978674748461765e-07, - "loss": 0.1965, - "step": 17741 - }, - { - "epoch": 0.9, - "grad_norm": 1.7596014826608797, - "learning_rate": 4.973544077301418e-07, - "loss": 0.1702, - "step": 17742 - }, - { - "epoch": 0.9, - "grad_norm": 0.8503845471946775, - "learning_rate": 4.968415983739039e-07, - "loss": 0.1554, - "step": 17743 - }, - { - "epoch": 0.9, - "grad_norm": 1.225632954529606, - "learning_rate": 4.963290467913761e-07, - "loss": 0.1623, - "step": 17744 - }, - { - "epoch": 0.9, - "grad_norm": 1.1538746263261441, - "learning_rate": 4.958167529964586e-07, - "loss": 0.1948, - "step": 17745 - }, - { - "epoch": 0.9, - "grad_norm": 1.0146882938179274, - "learning_rate": 4.953047170030489e-07, - "loss": 0.155, - "step": 17746 - }, - { - "epoch": 0.9, - "grad_norm": 1.0771355542823016, - "learning_rate": 4.94792938825035e-07, - "loss": 0.1491, - "step": 17747 - }, - { - "epoch": 0.9, - "grad_norm": 1.018788055431089, - "learning_rate": 4.942814184763001e-07, - "loss": 0.1723, - "step": 17748 - }, - { - "epoch": 0.9, - "grad_norm": 1.1316146560079632, - "learning_rate": 4.937701559707175e-07, - "loss": 0.1602, - "step": 17749 - }, - { - "epoch": 0.9, - "grad_norm": 0.9280565298619311, - "learning_rate": 4.932591513221586e-07, - "loss": 0.148, - "step": 17750 - }, - { - "epoch": 0.9, - "grad_norm": 1.0565016656740143, - "learning_rate": 4.927484045444797e-07, - "loss": 0.1574, - "step": 17751 - }, - { - "epoch": 0.9, - "grad_norm": 1.1490442384990298, - "learning_rate": 4.922379156515389e-07, - "loss": 0.1484, - "step": 17752 - }, - { - "epoch": 0.9, - "grad_norm": 1.0055808135829174, - "learning_rate": 4.917276846571806e-07, - "loss": 0.1499, - "step": 17753 - }, - { - "epoch": 0.9, - "grad_norm": 1.137822230302273, - "learning_rate": 4.912177115752481e-07, - "loss": 0.1698, - "step": 17754 - }, - { - "epoch": 0.9, - "grad_norm": 1.4222448581168465, - "learning_rate": 4.907079964195693e-07, - "loss": 0.1674, - "step": 17755 - }, - { - "epoch": 0.9, - "grad_norm": 1.0891384710066645, - "learning_rate": 4.901985392039743e-07, - "loss": 0.1892, - "step": 17756 - }, - { - "epoch": 0.9, - "grad_norm": 1.239926969876004, - "learning_rate": 4.896893399422809e-07, - "loss": 0.1529, - "step": 17757 - }, - { - "epoch": 0.9, - "grad_norm": 1.1012776690468857, - "learning_rate": 4.891803986483023e-07, - "loss": 0.1937, - "step": 17758 - }, - { - "epoch": 0.9, - "grad_norm": 1.0518012215731845, - "learning_rate": 4.886717153358411e-07, - "loss": 0.199, - "step": 17759 - }, - { - "epoch": 0.9, - "grad_norm": 1.0574357379309232, - "learning_rate": 4.881632900186983e-07, - "loss": 0.154, - "step": 17760 - }, - { - "epoch": 0.9, - "grad_norm": 1.175176478735189, - "learning_rate": 4.876551227106629e-07, - "loss": 0.1801, - "step": 17761 - }, - { - "epoch": 0.9, - "grad_norm": 0.8789036612254449, - "learning_rate": 4.871472134255195e-07, - "loss": 0.1609, - "step": 17762 - }, - { - "epoch": 0.9, - "grad_norm": 1.680680067709628, - "learning_rate": 4.866395621770458e-07, - "loss": 0.1423, - "step": 17763 - }, - { - "epoch": 0.9, - "grad_norm": 1.1085585855081044, - "learning_rate": 4.861321689790099e-07, - "loss": 0.1458, - "step": 17764 - }, - { - "epoch": 0.9, - "grad_norm": 2.6969531275425647, - "learning_rate": 4.856250338451763e-07, - "loss": 0.1805, - "step": 17765 - }, - { - "epoch": 0.9, - "grad_norm": 0.9945335175170363, - "learning_rate": 4.851181567893015e-07, - "loss": 0.1476, - "step": 17766 - }, - { - "epoch": 0.9, - "grad_norm": 1.1633841659011586, - "learning_rate": 4.846115378251348e-07, - "loss": 0.1401, - "step": 17767 - }, - { - "epoch": 0.9, - "grad_norm": 1.1946895954446297, - "learning_rate": 4.841051769664174e-07, - "loss": 0.1782, - "step": 17768 - }, - { - "epoch": 0.9, - "grad_norm": 1.0272983655384607, - "learning_rate": 4.835990742268848e-07, - "loss": 0.1578, - "step": 17769 - }, - { - "epoch": 0.9, - "grad_norm": 0.881203217537691, - "learning_rate": 4.83093229620264e-07, - "loss": 0.1575, - "step": 17770 - }, - { - "epoch": 0.9, - "grad_norm": 1.35515392014797, - "learning_rate": 4.825876431602772e-07, - "loss": 0.1692, - "step": 17771 - }, - { - "epoch": 0.9, - "grad_norm": 1.2188750181917818, - "learning_rate": 4.820823148606379e-07, - "loss": 0.183, - "step": 17772 - }, - { - "epoch": 0.9, - "grad_norm": 1.0844096893909176, - "learning_rate": 4.815772447350541e-07, - "loss": 0.1687, - "step": 17773 - }, - { - "epoch": 0.9, - "grad_norm": 1.051618523028314, - "learning_rate": 4.810724327972238e-07, - "loss": 0.1578, - "step": 17774 - }, - { - "epoch": 0.9, - "grad_norm": 1.7931119674665117, - "learning_rate": 4.805678790608415e-07, - "loss": 0.1636, - "step": 17775 - }, - { - "epoch": 0.9, - "grad_norm": 0.8151408484958761, - "learning_rate": 4.80063583539594e-07, - "loss": 0.1484, - "step": 17776 - }, - { - "epoch": 0.9, - "grad_norm": 1.0937623325399302, - "learning_rate": 4.795595462471592e-07, - "loss": 0.1779, - "step": 17777 - }, - { - "epoch": 0.9, - "grad_norm": 1.0631272079242073, - "learning_rate": 4.790557671972107e-07, - "loss": 0.1397, - "step": 17778 - }, - { - "epoch": 0.9, - "grad_norm": 1.0143499979429285, - "learning_rate": 4.785522464034109e-07, - "loss": 0.1782, - "step": 17779 - }, - { - "epoch": 0.9, - "grad_norm": 1.063689312103594, - "learning_rate": 4.7804898387942e-07, - "loss": 0.1645, - "step": 17780 - }, - { - "epoch": 0.9, - "grad_norm": 1.0920688128771128, - "learning_rate": 4.77545979638887e-07, - "loss": 0.1579, - "step": 17781 - }, - { - "epoch": 0.9, - "grad_norm": 1.1352514848095434, - "learning_rate": 4.770432336954567e-07, - "loss": 0.1594, - "step": 17782 - }, - { - "epoch": 0.9, - "grad_norm": 0.8693421343476601, - "learning_rate": 4.765407460627669e-07, - "loss": 0.1666, - "step": 17783 - }, - { - "epoch": 0.9, - "grad_norm": 1.512091212186362, - "learning_rate": 4.76038516754449e-07, - "loss": 0.1636, - "step": 17784 - }, - { - "epoch": 0.9, - "grad_norm": 0.9559664234061493, - "learning_rate": 4.755365457841221e-07, - "loss": 0.1621, - "step": 17785 - }, - { - "epoch": 0.9, - "grad_norm": 2.40871843285452, - "learning_rate": 4.750348331654064e-07, - "loss": 0.1668, - "step": 17786 - }, - { - "epoch": 0.9, - "grad_norm": 1.2538414667969688, - "learning_rate": 4.7453337891190776e-07, - "loss": 0.1684, - "step": 17787 - }, - { - "epoch": 0.9, - "grad_norm": 1.2285387934323766, - "learning_rate": 4.7403218303722963e-07, - "loss": 0.1673, - "step": 17788 - }, - { - "epoch": 0.9, - "grad_norm": 1.2252516668439057, - "learning_rate": 4.7353124555496566e-07, - "loss": 0.1568, - "step": 17789 - }, - { - "epoch": 0.9, - "grad_norm": 1.7644849909791742, - "learning_rate": 4.7303056647870605e-07, - "loss": 0.1833, - "step": 17790 - }, - { - "epoch": 0.9, - "grad_norm": 0.9280686009723658, - "learning_rate": 4.725301458220288e-07, - "loss": 0.1575, - "step": 17791 - }, - { - "epoch": 0.9, - "grad_norm": 0.9485796496560795, - "learning_rate": 4.7202998359850984e-07, - "loss": 0.1594, - "step": 17792 - }, - { - "epoch": 0.9, - "grad_norm": 1.2267938129539502, - "learning_rate": 4.7153007982171594e-07, - "loss": 0.1909, - "step": 17793 - }, - { - "epoch": 0.9, - "grad_norm": 0.7754106303778403, - "learning_rate": 4.7103043450520744e-07, - "loss": 0.1457, - "step": 17794 - }, - { - "epoch": 0.9, - "grad_norm": 1.026104662099303, - "learning_rate": 4.705310476625369e-07, - "loss": 0.1678, - "step": 17795 - }, - { - "epoch": 0.9, - "grad_norm": 1.1562353558815135, - "learning_rate": 4.700319193072489e-07, - "loss": 0.1869, - "step": 17796 - }, - { - "epoch": 0.91, - "grad_norm": 1.6137894136029478, - "learning_rate": 4.695330494528838e-07, - "loss": 0.1491, - "step": 17797 - }, - { - "epoch": 0.91, - "grad_norm": 1.09089695525694, - "learning_rate": 4.69034438112973e-07, - "loss": 0.1839, - "step": 17798 - }, - { - "epoch": 0.91, - "grad_norm": 1.333152341251913, - "learning_rate": 4.685360853010401e-07, - "loss": 0.1637, - "step": 17799 - }, - { - "epoch": 0.91, - "grad_norm": 0.9602187524299798, - "learning_rate": 4.6803799103060544e-07, - "loss": 0.1653, - "step": 17800 - }, - { - "epoch": 0.91, - "grad_norm": 1.1799898341032788, - "learning_rate": 4.6754015531517926e-07, - "loss": 0.1626, - "step": 17801 - }, - { - "epoch": 0.91, - "grad_norm": 3.665753566346427, - "learning_rate": 4.6704257816826306e-07, - "loss": 0.1961, - "step": 17802 - }, - { - "epoch": 0.91, - "grad_norm": 1.640526741340988, - "learning_rate": 4.6654525960335704e-07, - "loss": 0.1682, - "step": 17803 - }, - { - "epoch": 0.91, - "grad_norm": 0.8810679261644867, - "learning_rate": 4.660481996339483e-07, - "loss": 0.1621, - "step": 17804 - }, - { - "epoch": 0.91, - "grad_norm": 0.9417092579675105, - "learning_rate": 4.655513982735216e-07, - "loss": 0.1822, - "step": 17805 - }, - { - "epoch": 0.91, - "grad_norm": 1.1450114665041524, - "learning_rate": 4.6505485553555054e-07, - "loss": 0.1495, - "step": 17806 - }, - { - "epoch": 0.91, - "grad_norm": 1.2342421265088601, - "learning_rate": 4.6455857143350657e-07, - "loss": 0.1718, - "step": 17807 - }, - { - "epoch": 0.91, - "grad_norm": 1.4242395673614698, - "learning_rate": 4.6406254598084786e-07, - "loss": 0.1682, - "step": 17808 - }, - { - "epoch": 0.91, - "grad_norm": 0.9017379280780949, - "learning_rate": 4.635667791910314e-07, - "loss": 0.1617, - "step": 17809 - }, - { - "epoch": 0.91, - "grad_norm": 2.0869143627570828, - "learning_rate": 4.630712710775054e-07, - "loss": 0.1639, - "step": 17810 - }, - { - "epoch": 0.91, - "grad_norm": 0.9145479259162557, - "learning_rate": 4.625760216537112e-07, - "loss": 0.1559, - "step": 17811 - }, - { - "epoch": 0.91, - "grad_norm": 1.0659886892373898, - "learning_rate": 4.620810309330803e-07, - "loss": 0.1474, - "step": 17812 - }, - { - "epoch": 0.91, - "grad_norm": 1.0288166878075367, - "learning_rate": 4.615862989290387e-07, - "loss": 0.1814, - "step": 17813 - }, - { - "epoch": 0.91, - "grad_norm": 1.1905630758669472, - "learning_rate": 4.61091825655009e-07, - "loss": 0.1932, - "step": 17814 - }, - { - "epoch": 0.91, - "grad_norm": 1.0430728818674375, - "learning_rate": 4.605976111244015e-07, - "loss": 0.1629, - "step": 17815 - }, - { - "epoch": 0.91, - "grad_norm": 1.0384609491251084, - "learning_rate": 4.601036553506233e-07, - "loss": 0.174, - "step": 17816 - }, - { - "epoch": 0.91, - "grad_norm": 1.4392457512023191, - "learning_rate": 4.5960995834707146e-07, - "loss": 0.1346, - "step": 17817 - }, - { - "epoch": 0.91, - "grad_norm": 0.9923559260449925, - "learning_rate": 4.591165201271386e-07, - "loss": 0.1774, - "step": 17818 - }, - { - "epoch": 0.91, - "grad_norm": 1.0656259011880576, - "learning_rate": 4.5862334070420843e-07, - "loss": 0.1652, - "step": 17819 - }, - { - "epoch": 0.91, - "grad_norm": 1.4443650791473384, - "learning_rate": 4.581304200916603e-07, - "loss": 0.1698, - "step": 17820 - }, - { - "epoch": 0.91, - "grad_norm": 1.9518129480656117, - "learning_rate": 4.576377583028624e-07, - "loss": 0.1504, - "step": 17821 - }, - { - "epoch": 0.91, - "grad_norm": 1.1957606269589063, - "learning_rate": 4.571453553511807e-07, - "loss": 0.176, - "step": 17822 - }, - { - "epoch": 0.91, - "grad_norm": 1.0892795482220845, - "learning_rate": 4.5665321124996774e-07, - "loss": 0.1595, - "step": 17823 - }, - { - "epoch": 0.91, - "grad_norm": 1.1566948468637954, - "learning_rate": 4.5616132601257857e-07, - "loss": 0.1695, - "step": 17824 - }, - { - "epoch": 0.91, - "grad_norm": 0.9480355518150048, - "learning_rate": 4.556696996523502e-07, - "loss": 0.1724, - "step": 17825 - }, - { - "epoch": 0.91, - "grad_norm": 1.0548757699756477, - "learning_rate": 4.5517833218261974e-07, - "loss": 0.1673, - "step": 17826 - }, - { - "epoch": 0.91, - "grad_norm": 0.8952574575856083, - "learning_rate": 4.546872236167166e-07, - "loss": 0.1576, - "step": 17827 - }, - { - "epoch": 0.91, - "grad_norm": 1.0190701158443207, - "learning_rate": 4.5419637396796337e-07, - "loss": 0.1807, - "step": 17828 - }, - { - "epoch": 0.91, - "grad_norm": 0.8335552710274327, - "learning_rate": 4.5370578324967054e-07, - "loss": 0.1783, - "step": 17829 - }, - { - "epoch": 0.91, - "grad_norm": 1.3764248852007366, - "learning_rate": 4.532154514751497e-07, - "loss": 0.1893, - "step": 17830 - }, - { - "epoch": 0.91, - "grad_norm": 1.3915203240070135, - "learning_rate": 4.527253786576991e-07, - "loss": 0.1592, - "step": 17831 - }, - { - "epoch": 0.91, - "grad_norm": 1.2861389689670186, - "learning_rate": 4.5223556481060913e-07, - "loss": 0.1528, - "step": 17832 - }, - { - "epoch": 0.91, - "grad_norm": 1.4373456377797067, - "learning_rate": 4.5174600994717154e-07, - "loss": 0.1645, - "step": 17833 - }, - { - "epoch": 0.91, - "grad_norm": 1.348075943625048, - "learning_rate": 4.5125671408066006e-07, - "loss": 0.1546, - "step": 17834 - }, - { - "epoch": 0.91, - "grad_norm": 1.1271648016172928, - "learning_rate": 4.5076767722435075e-07, - "loss": 0.1644, - "step": 17835 - }, - { - "epoch": 0.91, - "grad_norm": 0.8177597987428121, - "learning_rate": 4.502788993915075e-07, - "loss": 0.1643, - "step": 17836 - }, - { - "epoch": 0.91, - "grad_norm": 1.4899303323960316, - "learning_rate": 4.4979038059538847e-07, - "loss": 0.1568, - "step": 17837 - }, - { - "epoch": 0.91, - "grad_norm": 0.8795186133211397, - "learning_rate": 4.493021208492443e-07, - "loss": 0.1421, - "step": 17838 - }, - { - "epoch": 0.91, - "grad_norm": 1.1307292120596275, - "learning_rate": 4.48814120166321e-07, - "loss": 0.1725, - "step": 17839 - }, - { - "epoch": 0.91, - "grad_norm": 1.0942327975567745, - "learning_rate": 4.483263785598524e-07, - "loss": 0.1607, - "step": 17840 - }, - { - "epoch": 0.91, - "grad_norm": 1.310134964018146, - "learning_rate": 4.478388960430724e-07, - "loss": 0.1533, - "step": 17841 - }, - { - "epoch": 0.91, - "grad_norm": 1.0896854956976845, - "learning_rate": 4.4735167262919934e-07, - "loss": 0.1672, - "step": 17842 - }, - { - "epoch": 0.91, - "grad_norm": 1.4009160493602761, - "learning_rate": 4.468647083314537e-07, - "loss": 0.172, - "step": 17843 - }, - { - "epoch": 0.91, - "grad_norm": 1.0883627609353188, - "learning_rate": 4.4637800316304157e-07, - "loss": 0.1673, - "step": 17844 - }, - { - "epoch": 0.91, - "grad_norm": 0.9322656297713793, - "learning_rate": 4.458915571371647e-07, - "loss": 0.1345, - "step": 17845 - }, - { - "epoch": 0.91, - "grad_norm": 1.3953361415792829, - "learning_rate": 4.4540537026702026e-07, - "loss": 0.1563, - "step": 17846 - }, - { - "epoch": 0.91, - "grad_norm": 2.582791955807601, - "learning_rate": 4.449194425657943e-07, - "loss": 0.1769, - "step": 17847 - }, - { - "epoch": 0.91, - "grad_norm": 1.2389198531379395, - "learning_rate": 4.4443377404666976e-07, - "loss": 0.1726, - "step": 17848 - }, - { - "epoch": 0.91, - "grad_norm": 1.8344787438413215, - "learning_rate": 4.439483647228171e-07, - "loss": 0.1823, - "step": 17849 - }, - { - "epoch": 0.91, - "grad_norm": 0.9170059286638855, - "learning_rate": 4.4346321460740583e-07, - "loss": 0.1698, - "step": 17850 - }, - { - "epoch": 0.91, - "grad_norm": 1.0316912778937046, - "learning_rate": 4.429783237135932e-07, - "loss": 0.17, - "step": 17851 - }, - { - "epoch": 0.91, - "grad_norm": 1.1489235166833727, - "learning_rate": 4.424936920545331e-07, - "loss": 0.1702, - "step": 17852 - }, - { - "epoch": 0.91, - "grad_norm": 1.0784830220040096, - "learning_rate": 4.420093196433717e-07, - "loss": 0.1726, - "step": 17853 - }, - { - "epoch": 0.91, - "grad_norm": 1.780789153470712, - "learning_rate": 4.415252064932485e-07, - "loss": 0.1517, - "step": 17854 - }, - { - "epoch": 0.91, - "grad_norm": 1.112357375081568, - "learning_rate": 4.4104135261729296e-07, - "loss": 0.1584, - "step": 17855 - }, - { - "epoch": 0.91, - "grad_norm": 1.1552800042958693, - "learning_rate": 4.4055775802863246e-07, - "loss": 0.1663, - "step": 17856 - }, - { - "epoch": 0.91, - "grad_norm": 1.408008062223532, - "learning_rate": 4.400744227403797e-07, - "loss": 0.1646, - "step": 17857 - }, - { - "epoch": 0.91, - "grad_norm": 0.9212932843285123, - "learning_rate": 4.3959134676565097e-07, - "loss": 0.1524, - "step": 17858 - }, - { - "epoch": 0.91, - "grad_norm": 0.9661145345045649, - "learning_rate": 4.391085301175457e-07, - "loss": 0.1622, - "step": 17859 - }, - { - "epoch": 0.91, - "grad_norm": 1.189610441208609, - "learning_rate": 4.386259728091613e-07, - "loss": 0.166, - "step": 17860 - }, - { - "epoch": 0.91, - "grad_norm": 1.2524582875011316, - "learning_rate": 4.381436748535872e-07, - "loss": 0.1598, - "step": 17861 - }, - { - "epoch": 0.91, - "grad_norm": 1.1669624890146575, - "learning_rate": 4.376616362639063e-07, - "loss": 0.1482, - "step": 17862 - }, - { - "epoch": 0.91, - "grad_norm": 1.2651938387594743, - "learning_rate": 4.371798570531927e-07, - "loss": 0.1598, - "step": 17863 - }, - { - "epoch": 0.91, - "grad_norm": 1.159737621227063, - "learning_rate": 4.3669833723451795e-07, - "loss": 0.1696, - "step": 17864 - }, - { - "epoch": 0.91, - "grad_norm": 1.210751056835837, - "learning_rate": 4.3621707682094063e-07, - "loss": 0.1556, - "step": 17865 - }, - { - "epoch": 0.91, - "grad_norm": 1.0647749145288499, - "learning_rate": 4.3573607582551356e-07, - "loss": 0.1692, - "step": 17866 - }, - { - "epoch": 0.91, - "grad_norm": 0.9487333310478399, - "learning_rate": 4.3525533426128643e-07, - "loss": 0.1684, - "step": 17867 - }, - { - "epoch": 0.91, - "grad_norm": 1.0380106070047885, - "learning_rate": 4.3477485214129864e-07, - "loss": 0.1641, - "step": 17868 - }, - { - "epoch": 0.91, - "grad_norm": 1.052899447067935, - "learning_rate": 4.3429462947858327e-07, - "loss": 0.181, - "step": 17869 - }, - { - "epoch": 0.91, - "grad_norm": 1.4736590717566007, - "learning_rate": 4.338146662861664e-07, - "loss": 0.1609, - "step": 17870 - }, - { - "epoch": 0.91, - "grad_norm": 1.797747112413201, - "learning_rate": 4.333349625770655e-07, - "loss": 0.1661, - "step": 17871 - }, - { - "epoch": 0.91, - "grad_norm": 1.2137042272213838, - "learning_rate": 4.3285551836429465e-07, - "loss": 0.1608, - "step": 17872 - }, - { - "epoch": 0.91, - "grad_norm": 0.8488474336115467, - "learning_rate": 4.3237633366085997e-07, - "loss": 0.1448, - "step": 17873 - }, - { - "epoch": 0.91, - "grad_norm": 1.0186497884210581, - "learning_rate": 4.3189740847975556e-07, - "loss": 0.1562, - "step": 17874 - }, - { - "epoch": 0.91, - "grad_norm": 1.0794700833681627, - "learning_rate": 4.3141874283397665e-07, - "loss": 0.1541, - "step": 17875 - }, - { - "epoch": 0.91, - "grad_norm": 1.3353539984921128, - "learning_rate": 4.309403367365028e-07, - "loss": 0.1698, - "step": 17876 - }, - { - "epoch": 0.91, - "grad_norm": 0.9808455563771287, - "learning_rate": 4.3046219020031366e-07, - "loss": 0.1791, - "step": 17877 - }, - { - "epoch": 0.91, - "grad_norm": 1.460488370062648, - "learning_rate": 4.299843032383777e-07, - "loss": 0.1983, - "step": 17878 - }, - { - "epoch": 0.91, - "grad_norm": 1.190477051437423, - "learning_rate": 4.295066758636579e-07, - "loss": 0.1534, - "step": 17879 - }, - { - "epoch": 0.91, - "grad_norm": 1.117376348606542, - "learning_rate": 4.2902930808910946e-07, - "loss": 0.1607, - "step": 17880 - }, - { - "epoch": 0.91, - "grad_norm": 1.1286525714094169, - "learning_rate": 4.2855219992768313e-07, - "loss": 0.1752, - "step": 17881 - }, - { - "epoch": 0.91, - "grad_norm": 1.0822666747841385, - "learning_rate": 4.280753513923197e-07, - "loss": 0.1518, - "step": 17882 - }, - { - "epoch": 0.91, - "grad_norm": 1.1124604964562284, - "learning_rate": 4.27598762495951e-07, - "loss": 0.1501, - "step": 17883 - }, - { - "epoch": 0.91, - "grad_norm": 1.2331043057271551, - "learning_rate": 4.271224332515078e-07, - "loss": 0.1651, - "step": 17884 - }, - { - "epoch": 0.91, - "grad_norm": 1.1070061068054977, - "learning_rate": 4.266463636719087e-07, - "loss": 0.1477, - "step": 17885 - }, - { - "epoch": 0.91, - "grad_norm": 1.9162346294809607, - "learning_rate": 4.261705537700678e-07, - "loss": 0.1644, - "step": 17886 - }, - { - "epoch": 0.91, - "grad_norm": 1.7168818418985248, - "learning_rate": 4.2569500355889027e-07, - "loss": 0.1631, - "step": 17887 - }, - { - "epoch": 0.91, - "grad_norm": 0.8780179759569882, - "learning_rate": 4.2521971305127695e-07, - "loss": 0.1571, - "step": 17888 - }, - { - "epoch": 0.91, - "grad_norm": 1.0836510792985983, - "learning_rate": 4.2474468226011976e-07, - "loss": 0.1682, - "step": 17889 - }, - { - "epoch": 0.91, - "grad_norm": 1.2082434934187478, - "learning_rate": 4.242699111983051e-07, - "loss": 0.1571, - "step": 17890 - }, - { - "epoch": 0.91, - "grad_norm": 1.8432251155722694, - "learning_rate": 4.2379539987870924e-07, - "loss": 0.1522, - "step": 17891 - }, - { - "epoch": 0.91, - "grad_norm": 0.800714309069143, - "learning_rate": 4.233211483142041e-07, - "loss": 0.155, - "step": 17892 - }, - { - "epoch": 0.91, - "grad_norm": 0.9767747713654685, - "learning_rate": 4.2284715651765287e-07, - "loss": 0.1674, - "step": 17893 - }, - { - "epoch": 0.91, - "grad_norm": 1.6629670704546073, - "learning_rate": 4.223734245019151e-07, - "loss": 0.1639, - "step": 17894 - }, - { - "epoch": 0.91, - "grad_norm": 1.2069269896015864, - "learning_rate": 4.2189995227983726e-07, - "loss": 0.1765, - "step": 17895 - }, - { - "epoch": 0.91, - "grad_norm": 1.466101959194786, - "learning_rate": 4.2142673986426685e-07, - "loss": 0.1842, - "step": 17896 - }, - { - "epoch": 0.91, - "grad_norm": 1.0350455168520254, - "learning_rate": 4.2095378726803473e-07, - "loss": 0.1613, - "step": 17897 - }, - { - "epoch": 0.91, - "grad_norm": 0.7932209596104125, - "learning_rate": 4.204810945039717e-07, - "loss": 0.1574, - "step": 17898 - }, - { - "epoch": 0.91, - "grad_norm": 0.9474340681428581, - "learning_rate": 4.2000866158490084e-07, - "loss": 0.16, - "step": 17899 - }, - { - "epoch": 0.91, - "grad_norm": 1.1472259303799852, - "learning_rate": 4.195364885236375e-07, - "loss": 0.1704, - "step": 17900 - }, - { - "epoch": 0.91, - "grad_norm": 1.0099861991323156, - "learning_rate": 4.1906457533298694e-07, - "loss": 0.171, - "step": 17901 - }, - { - "epoch": 0.91, - "grad_norm": 1.1539727871270642, - "learning_rate": 4.1859292202575007e-07, - "loss": 0.1546, - "step": 17902 - }, - { - "epoch": 0.91, - "grad_norm": 0.8311713315990183, - "learning_rate": 4.1812152861472333e-07, - "loss": 0.1852, - "step": 17903 - }, - { - "epoch": 0.91, - "grad_norm": 1.0243626136619337, - "learning_rate": 4.176503951126898e-07, - "loss": 0.156, - "step": 17904 - }, - { - "epoch": 0.91, - "grad_norm": 0.9057591631418205, - "learning_rate": 4.1717952153243034e-07, - "loss": 0.1389, - "step": 17905 - }, - { - "epoch": 0.91, - "grad_norm": 1.426122726890483, - "learning_rate": 4.16708907886717e-07, - "loss": 0.1613, - "step": 17906 - }, - { - "epoch": 0.91, - "grad_norm": 1.0026262405109105, - "learning_rate": 4.162385541883185e-07, - "loss": 0.1697, - "step": 17907 - }, - { - "epoch": 0.91, - "grad_norm": 1.1279028816464756, - "learning_rate": 4.157684604499879e-07, - "loss": 0.1595, - "step": 17908 - }, - { - "epoch": 0.91, - "grad_norm": 1.137347084964881, - "learning_rate": 4.152986266844805e-07, - "loss": 0.1796, - "step": 17909 - }, - { - "epoch": 0.91, - "grad_norm": 1.3469612177043435, - "learning_rate": 4.1482905290453846e-07, - "loss": 0.1645, - "step": 17910 - }, - { - "epoch": 0.91, - "grad_norm": 1.1427791293368184, - "learning_rate": 4.143597391229015e-07, - "loss": 0.1642, - "step": 17911 - }, - { - "epoch": 0.91, - "grad_norm": 0.9551977976133195, - "learning_rate": 4.1389068535229615e-07, - "loss": 0.1625, - "step": 17912 - }, - { - "epoch": 0.91, - "grad_norm": 2.279745687276524, - "learning_rate": 4.1342189160544775e-07, - "loss": 0.165, - "step": 17913 - }, - { - "epoch": 0.91, - "grad_norm": 0.9968432466039817, - "learning_rate": 4.1295335789507174e-07, - "loss": 0.1557, - "step": 17914 - }, - { - "epoch": 0.91, - "grad_norm": 0.8494539809967789, - "learning_rate": 4.124850842338779e-07, - "loss": 0.1633, - "step": 17915 - }, - { - "epoch": 0.91, - "grad_norm": 0.8921772831807426, - "learning_rate": 4.120170706345661e-07, - "loss": 0.1419, - "step": 17916 - }, - { - "epoch": 0.91, - "grad_norm": 1.2943009106600474, - "learning_rate": 4.1154931710983504e-07, - "loss": 0.1617, - "step": 17917 - }, - { - "epoch": 0.91, - "grad_norm": 1.2395827561193538, - "learning_rate": 4.1108182367237014e-07, - "loss": 0.1823, - "step": 17918 - }, - { - "epoch": 0.91, - "grad_norm": 0.8016794634208525, - "learning_rate": 4.106145903348513e-07, - "loss": 0.1587, - "step": 17919 - }, - { - "epoch": 0.91, - "grad_norm": 1.1507975908971602, - "learning_rate": 4.10147617109955e-07, - "loss": 0.1681, - "step": 17920 - }, - { - "epoch": 0.91, - "grad_norm": 0.8995866701271464, - "learning_rate": 4.0968090401034444e-07, - "loss": 0.1742, - "step": 17921 - }, - { - "epoch": 0.91, - "grad_norm": 0.8769457890812751, - "learning_rate": 4.092144510486806e-07, - "loss": 0.1565, - "step": 17922 - }, - { - "epoch": 0.91, - "grad_norm": 1.1400777198132501, - "learning_rate": 4.0874825823761676e-07, - "loss": 0.1481, - "step": 17923 - }, - { - "epoch": 0.91, - "grad_norm": 0.7596705469198951, - "learning_rate": 4.0828232558979943e-07, - "loss": 0.1577, - "step": 17924 - }, - { - "epoch": 0.91, - "grad_norm": 1.412245385430175, - "learning_rate": 4.078166531178651e-07, - "loss": 0.1475, - "step": 17925 - }, - { - "epoch": 0.91, - "grad_norm": 0.9016443987867834, - "learning_rate": 4.07351240834446e-07, - "loss": 0.1589, - "step": 17926 - }, - { - "epoch": 0.91, - "grad_norm": 1.2437259871228254, - "learning_rate": 4.0688608875216527e-07, - "loss": 0.1618, - "step": 17927 - }, - { - "epoch": 0.91, - "grad_norm": 1.763305394131877, - "learning_rate": 4.064211968836429e-07, - "loss": 0.1706, - "step": 17928 - }, - { - "epoch": 0.91, - "grad_norm": 0.776341037606745, - "learning_rate": 4.059565652414865e-07, - "loss": 0.1604, - "step": 17929 - }, - { - "epoch": 0.91, - "grad_norm": 1.1781010449575935, - "learning_rate": 4.0549219383830054e-07, - "loss": 0.1407, - "step": 17930 - }, - { - "epoch": 0.91, - "grad_norm": 0.9927757811539168, - "learning_rate": 4.0502808268668034e-07, - "loss": 0.1776, - "step": 17931 - }, - { - "epoch": 0.91, - "grad_norm": 0.9940650804664498, - "learning_rate": 4.045642317992149e-07, - "loss": 0.1609, - "step": 17932 - }, - { - "epoch": 0.91, - "grad_norm": 0.9952558127873156, - "learning_rate": 4.0410064118848624e-07, - "loss": 0.1515, - "step": 17933 - }, - { - "epoch": 0.91, - "grad_norm": 1.3742803805870323, - "learning_rate": 4.03637310867071e-07, - "loss": 0.1851, - "step": 17934 - }, - { - "epoch": 0.91, - "grad_norm": 1.1261120911519835, - "learning_rate": 4.031742408475359e-07, - "loss": 0.1669, - "step": 17935 - }, - { - "epoch": 0.91, - "grad_norm": 1.581412240413239, - "learning_rate": 4.027114311424407e-07, - "loss": 0.1574, - "step": 17936 - }, - { - "epoch": 0.91, - "grad_norm": 1.590825805117261, - "learning_rate": 4.0224888176434105e-07, - "loss": 0.1638, - "step": 17937 - }, - { - "epoch": 0.91, - "grad_norm": 0.9072850744524407, - "learning_rate": 4.0178659272578026e-07, - "loss": 0.1396, - "step": 17938 - }, - { - "epoch": 0.91, - "grad_norm": 44.79995566266486, - "learning_rate": 4.0132456403930263e-07, - "loss": 0.1802, - "step": 17939 - }, - { - "epoch": 0.91, - "grad_norm": 1.7677360420573207, - "learning_rate": 4.0086279571743715e-07, - "loss": 0.1641, - "step": 17940 - }, - { - "epoch": 0.91, - "grad_norm": 0.8974806317168946, - "learning_rate": 4.004012877727104e-07, - "loss": 0.1546, - "step": 17941 - }, - { - "epoch": 0.91, - "grad_norm": 0.9247563788673555, - "learning_rate": 3.999400402176401e-07, - "loss": 0.1758, - "step": 17942 - }, - { - "epoch": 0.91, - "grad_norm": 1.2695249530965618, - "learning_rate": 3.9947905306474077e-07, - "loss": 0.1596, - "step": 17943 - }, - { - "epoch": 0.91, - "grad_norm": 0.874647864267738, - "learning_rate": 3.990183263265124e-07, - "loss": 0.15, - "step": 17944 - }, - { - "epoch": 0.91, - "grad_norm": 1.1813860921210668, - "learning_rate": 3.985578600154549e-07, - "loss": 0.1676, - "step": 17945 - }, - { - "epoch": 0.91, - "grad_norm": 0.9040988900659936, - "learning_rate": 3.9809765414405734e-07, - "loss": 0.1454, - "step": 17946 - }, - { - "epoch": 0.91, - "grad_norm": 1.0329931692731547, - "learning_rate": 3.976377087248051e-07, - "loss": 0.1761, - "step": 17947 - }, - { - "epoch": 0.91, - "grad_norm": 0.883762203784139, - "learning_rate": 3.9717802377017057e-07, - "loss": 0.1717, - "step": 17948 - }, - { - "epoch": 0.91, - "grad_norm": 1.64805341126127, - "learning_rate": 3.967185992926237e-07, - "loss": 0.1613, - "step": 17949 - }, - { - "epoch": 0.91, - "grad_norm": 2.778869015406839, - "learning_rate": 3.9625943530462787e-07, - "loss": 0.1616, - "step": 17950 - }, - { - "epoch": 0.91, - "grad_norm": 0.8044296441418508, - "learning_rate": 3.9580053181863866e-07, - "loss": 0.1718, - "step": 17951 - }, - { - "epoch": 0.91, - "grad_norm": 1.1637532712791299, - "learning_rate": 3.953418888471017e-07, - "loss": 0.1708, - "step": 17952 - }, - { - "epoch": 0.91, - "grad_norm": 2.2436944226653273, - "learning_rate": 3.948835064024581e-07, - "loss": 0.1576, - "step": 17953 - }, - { - "epoch": 0.91, - "grad_norm": 2.16526928391713, - "learning_rate": 3.944253844971435e-07, - "loss": 0.1609, - "step": 17954 - }, - { - "epoch": 0.91, - "grad_norm": 1.1371984003091298, - "learning_rate": 3.939675231435802e-07, - "loss": 0.1489, - "step": 17955 - }, - { - "epoch": 0.91, - "grad_norm": 1.0432445408536668, - "learning_rate": 3.935099223541927e-07, - "loss": 0.1471, - "step": 17956 - }, - { - "epoch": 0.91, - "grad_norm": 0.939698839652885, - "learning_rate": 3.9305258214138995e-07, - "loss": 0.1406, - "step": 17957 - }, - { - "epoch": 0.91, - "grad_norm": 1.0156235305520485, - "learning_rate": 3.9259550251757763e-07, - "loss": 0.1752, - "step": 17958 - }, - { - "epoch": 0.91, - "grad_norm": 1.56236736930776, - "learning_rate": 3.921386834951557e-07, - "loss": 0.1594, - "step": 17959 - }, - { - "epoch": 0.91, - "grad_norm": 0.9256258931900568, - "learning_rate": 3.9168212508651547e-07, - "loss": 0.158, - "step": 17960 - }, - { - "epoch": 0.91, - "grad_norm": 1.2545803271356366, - "learning_rate": 3.9122582730403924e-07, - "loss": 0.1604, - "step": 17961 - }, - { - "epoch": 0.91, - "grad_norm": 1.2502358925172403, - "learning_rate": 3.907697901601071e-07, - "loss": 0.1283, - "step": 17962 - }, - { - "epoch": 0.91, - "grad_norm": 1.0882395503948148, - "learning_rate": 3.9031401366708467e-07, - "loss": 0.1499, - "step": 17963 - }, - { - "epoch": 0.91, - "grad_norm": 1.4270571392860416, - "learning_rate": 3.8985849783733873e-07, - "loss": 0.1528, - "step": 17964 - }, - { - "epoch": 0.91, - "grad_norm": 0.9151584518670104, - "learning_rate": 3.8940324268322285e-07, - "loss": 0.1507, - "step": 17965 - }, - { - "epoch": 0.91, - "grad_norm": 0.9494614142440932, - "learning_rate": 3.889482482170881e-07, - "loss": 0.1779, - "step": 17966 - }, - { - "epoch": 0.91, - "grad_norm": 1.311422455794517, - "learning_rate": 3.884935144512747e-07, - "loss": 0.1668, - "step": 17967 - }, - { - "epoch": 0.91, - "grad_norm": 0.9151959255779871, - "learning_rate": 3.880390413981161e-07, - "loss": 0.1457, - "step": 17968 - }, - { - "epoch": 0.91, - "grad_norm": 1.1434615925984102, - "learning_rate": 3.8758482906994245e-07, - "loss": 0.1798, - "step": 17969 - }, - { - "epoch": 0.91, - "grad_norm": 0.8407924412287494, - "learning_rate": 3.8713087747907385e-07, - "loss": 0.1365, - "step": 17970 - }, - { - "epoch": 0.91, - "grad_norm": 1.0285526948328574, - "learning_rate": 3.866771866378227e-07, - "loss": 0.156, - "step": 17971 - }, - { - "epoch": 0.91, - "grad_norm": 1.141798461154904, - "learning_rate": 3.862237565584959e-07, - "loss": 0.182, - "step": 17972 - }, - { - "epoch": 0.91, - "grad_norm": 0.9341811901107416, - "learning_rate": 3.8577058725339235e-07, - "loss": 0.157, - "step": 17973 - }, - { - "epoch": 0.91, - "grad_norm": 1.227656033497009, - "learning_rate": 3.8531767873480453e-07, - "loss": 0.162, - "step": 17974 - }, - { - "epoch": 0.91, - "grad_norm": 7.395502251060028, - "learning_rate": 3.8486503101501705e-07, - "loss": 0.1852, - "step": 17975 - }, - { - "epoch": 0.91, - "grad_norm": 1.1583305556455232, - "learning_rate": 3.84412644106309e-07, - "loss": 0.1536, - "step": 17976 - }, - { - "epoch": 0.91, - "grad_norm": 1.3079551443868314, - "learning_rate": 3.839605180209527e-07, - "loss": 0.1573, - "step": 17977 - }, - { - "epoch": 0.91, - "grad_norm": 0.8749974615321174, - "learning_rate": 3.835086527712084e-07, - "loss": 0.1618, - "step": 17978 - }, - { - "epoch": 0.91, - "grad_norm": 1.412753912406829, - "learning_rate": 3.830570483693374e-07, - "loss": 0.1508, - "step": 17979 - }, - { - "epoch": 0.91, - "grad_norm": 0.9967886468592525, - "learning_rate": 3.8260570482758554e-07, - "loss": 0.1565, - "step": 17980 - }, - { - "epoch": 0.91, - "grad_norm": 1.3170670252562915, - "learning_rate": 3.8215462215819733e-07, - "loss": 0.1727, - "step": 17981 - }, - { - "epoch": 0.91, - "grad_norm": 0.8589125439803201, - "learning_rate": 3.817038003734075e-07, - "loss": 0.1716, - "step": 17982 - }, - { - "epoch": 0.91, - "grad_norm": 2.77991289215004, - "learning_rate": 3.8125323948544734e-07, - "loss": 0.1717, - "step": 17983 - }, - { - "epoch": 0.91, - "grad_norm": 1.4852015113231571, - "learning_rate": 3.808029395065349e-07, - "loss": 0.1668, - "step": 17984 - }, - { - "epoch": 0.91, - "grad_norm": 1.3957447592536913, - "learning_rate": 3.803529004488848e-07, - "loss": 0.1621, - "step": 17985 - }, - { - "epoch": 0.91, - "grad_norm": 1.0856403131783523, - "learning_rate": 3.7990312232470627e-07, - "loss": 0.1625, - "step": 17986 - }, - { - "epoch": 0.91, - "grad_norm": 0.8855645448579809, - "learning_rate": 3.7945360514620056e-07, - "loss": 0.1342, - "step": 17987 - }, - { - "epoch": 0.91, - "grad_norm": 1.0073013102385229, - "learning_rate": 3.7900434892555903e-07, - "loss": 0.1579, - "step": 17988 - }, - { - "epoch": 0.91, - "grad_norm": 2.0871019309171697, - "learning_rate": 3.785553536749664e-07, - "loss": 0.1583, - "step": 17989 - }, - { - "epoch": 0.91, - "grad_norm": 1.0891511728916528, - "learning_rate": 3.781066194066052e-07, - "loss": 0.1642, - "step": 17990 - }, - { - "epoch": 0.91, - "grad_norm": 3.8153204240961576, - "learning_rate": 3.776581461326434e-07, - "loss": 0.1855, - "step": 17991 - }, - { - "epoch": 0.91, - "grad_norm": 1.0753731363296326, - "learning_rate": 3.772099338652491e-07, - "loss": 0.1686, - "step": 17992 - }, - { - "epoch": 0.91, - "grad_norm": 1.1277177371834488, - "learning_rate": 3.7676198261657803e-07, - "loss": 0.1571, - "step": 17993 - }, - { - "epoch": 0.92, - "grad_norm": 0.8328991730043827, - "learning_rate": 3.763142923987817e-07, - "loss": 0.1635, - "step": 17994 - }, - { - "epoch": 0.92, - "grad_norm": 1.1623322953496837, - "learning_rate": 3.7586686322400257e-07, - "loss": 0.1753, - "step": 17995 - }, - { - "epoch": 0.92, - "grad_norm": 1.2256700386655874, - "learning_rate": 3.7541969510438094e-07, - "loss": 0.1708, - "step": 17996 - }, - { - "epoch": 0.92, - "grad_norm": 0.9689492752414466, - "learning_rate": 3.749727880520415e-07, - "loss": 0.1555, - "step": 17997 - }, - { - "epoch": 0.92, - "grad_norm": 1.2295248073265919, - "learning_rate": 3.7452614207911133e-07, - "loss": 0.1701, - "step": 17998 - }, - { - "epoch": 0.92, - "grad_norm": 1.5945159084207308, - "learning_rate": 3.740797571977006e-07, - "loss": 0.1777, - "step": 17999 - }, - { - "epoch": 0.92, - "grad_norm": 1.097649874716848, - "learning_rate": 3.7363363341992197e-07, - "loss": 0.1689, - "step": 18000 - }, - { - "epoch": 0.92, - "grad_norm": 1.8499978036235123, - "learning_rate": 3.731877707578735e-07, - "loss": 0.1546, - "step": 18001 - }, - { - "epoch": 0.92, - "grad_norm": 1.8260381407155, - "learning_rate": 3.72742169223651e-07, - "loss": 0.1513, - "step": 18002 - }, - { - "epoch": 0.92, - "grad_norm": 0.7594243057445846, - "learning_rate": 3.722968288293405e-07, - "loss": 0.137, - "step": 18003 - }, - { - "epoch": 0.92, - "grad_norm": 0.9524979700874181, - "learning_rate": 3.718517495870233e-07, - "loss": 0.173, - "step": 18004 - }, - { - "epoch": 0.92, - "grad_norm": 2.0215817900375197, - "learning_rate": 3.714069315087709e-07, - "loss": 0.1522, - "step": 18005 - }, - { - "epoch": 0.92, - "grad_norm": 1.2227474046025721, - "learning_rate": 3.709623746066482e-07, - "loss": 0.1764, - "step": 18006 - }, - { - "epoch": 0.92, - "grad_norm": 1.4786570951548887, - "learning_rate": 3.7051807889271653e-07, - "loss": 0.1805, - "step": 18007 - }, - { - "epoch": 0.92, - "grad_norm": 0.9627071613835644, - "learning_rate": 3.7007404437902515e-07, - "loss": 0.1833, - "step": 18008 - }, - { - "epoch": 0.92, - "grad_norm": 0.9180815857025116, - "learning_rate": 3.6963027107761896e-07, - "loss": 0.1715, - "step": 18009 - }, - { - "epoch": 0.92, - "grad_norm": 1.144345240438523, - "learning_rate": 3.6918675900053605e-07, - "loss": 0.1531, - "step": 18010 - }, - { - "epoch": 0.92, - "grad_norm": 2.4240528022562584, - "learning_rate": 3.6874350815980565e-07, - "loss": 0.1887, - "step": 18011 - }, - { - "epoch": 0.92, - "grad_norm": 1.9857446024845689, - "learning_rate": 3.683005185674504e-07, - "loss": 0.1835, - "step": 18012 - }, - { - "epoch": 0.92, - "grad_norm": 0.8481856015382779, - "learning_rate": 3.678577902354907e-07, - "loss": 0.1537, - "step": 18013 - }, - { - "epoch": 0.92, - "grad_norm": 0.9481275747482545, - "learning_rate": 3.674153231759303e-07, - "loss": 0.159, - "step": 18014 - }, - { - "epoch": 0.92, - "grad_norm": 1.0549232250539309, - "learning_rate": 3.66973117400774e-07, - "loss": 0.1911, - "step": 18015 - }, - { - "epoch": 0.92, - "grad_norm": 1.1714198995947855, - "learning_rate": 3.665311729220156e-07, - "loss": 0.1681, - "step": 18016 - }, - { - "epoch": 0.92, - "grad_norm": 1.204461169805127, - "learning_rate": 3.6608948975164424e-07, - "loss": 0.1846, - "step": 18017 - }, - { - "epoch": 0.92, - "grad_norm": 1.0571081817276509, - "learning_rate": 3.6564806790163833e-07, - "loss": 0.1925, - "step": 18018 - }, - { - "epoch": 0.92, - "grad_norm": 0.9719685244560988, - "learning_rate": 3.6520690738397256e-07, - "loss": 0.1646, - "step": 18019 - }, - { - "epoch": 0.92, - "grad_norm": 1.9468764412733461, - "learning_rate": 3.64766008210613e-07, - "loss": 0.1658, - "step": 18020 - }, - { - "epoch": 0.92, - "grad_norm": 0.7563243675377808, - "learning_rate": 3.643253703935223e-07, - "loss": 0.1605, - "step": 18021 - }, - { - "epoch": 0.92, - "grad_norm": 0.9060599741346881, - "learning_rate": 3.6388499394464874e-07, - "loss": 0.1464, - "step": 18022 - }, - { - "epoch": 0.92, - "grad_norm": 1.2158613459959573, - "learning_rate": 3.6344487887593926e-07, - "loss": 0.17, - "step": 18023 - }, - { - "epoch": 0.92, - "grad_norm": 0.971532594962186, - "learning_rate": 3.630050251993311e-07, - "loss": 0.16, - "step": 18024 - }, - { - "epoch": 0.92, - "grad_norm": 1.1481698919830072, - "learning_rate": 3.6256543292675584e-07, - "loss": 0.1777, - "step": 18025 - }, - { - "epoch": 0.92, - "grad_norm": 1.486437555313577, - "learning_rate": 3.6212610207013943e-07, - "loss": 0.1588, - "step": 18026 - }, - { - "epoch": 0.92, - "grad_norm": 1.1882199195943515, - "learning_rate": 3.616870326413946e-07, - "loss": 0.1843, - "step": 18027 - }, - { - "epoch": 0.92, - "grad_norm": 0.8975928779369106, - "learning_rate": 3.61248224652434e-07, - "loss": 0.1493, - "step": 18028 - }, - { - "epoch": 0.92, - "grad_norm": 1.0875976408022372, - "learning_rate": 3.6080967811515933e-07, - "loss": 0.16, - "step": 18029 - }, - { - "epoch": 0.92, - "grad_norm": 0.9381668102488647, - "learning_rate": 3.603713930414676e-07, - "loss": 0.1578, - "step": 18030 - }, - { - "epoch": 0.92, - "grad_norm": 0.8959578955261757, - "learning_rate": 3.59933369443245e-07, - "loss": 0.158, - "step": 18031 - }, - { - "epoch": 0.92, - "grad_norm": 2.7477054698311623, - "learning_rate": 3.594956073323763e-07, - "loss": 0.1612, - "step": 18032 - }, - { - "epoch": 0.92, - "grad_norm": 2.0899173714504182, - "learning_rate": 3.5905810672073107e-07, - "loss": 0.1803, - "step": 18033 - }, - { - "epoch": 0.92, - "grad_norm": 1.5364206457954415, - "learning_rate": 3.586208676201819e-07, - "loss": 0.1786, - "step": 18034 - }, - { - "epoch": 0.92, - "grad_norm": 1.070889298110779, - "learning_rate": 3.581838900425838e-07, - "loss": 0.1661, - "step": 18035 - }, - { - "epoch": 0.92, - "grad_norm": 0.8732342679329591, - "learning_rate": 3.5774717399979396e-07, - "loss": 0.1585, - "step": 18036 - }, - { - "epoch": 0.92, - "grad_norm": 0.9400795374448295, - "learning_rate": 3.5731071950365625e-07, - "loss": 0.1693, - "step": 18037 - }, - { - "epoch": 0.92, - "grad_norm": 1.1803725967238192, - "learning_rate": 3.5687452656600896e-07, - "loss": 0.1711, - "step": 18038 - }, - { - "epoch": 0.92, - "grad_norm": 1.16940189288093, - "learning_rate": 3.5643859519868594e-07, - "loss": 0.1701, - "step": 18039 - }, - { - "epoch": 0.92, - "grad_norm": 0.8894016048155894, - "learning_rate": 3.5600292541351e-07, - "loss": 0.1652, - "step": 18040 - }, - { - "epoch": 0.92, - "grad_norm": 1.994107188440856, - "learning_rate": 3.5556751722230056e-07, - "loss": 0.143, - "step": 18041 - }, - { - "epoch": 0.92, - "grad_norm": 0.8943863074222658, - "learning_rate": 3.551323706368659e-07, - "loss": 0.1604, - "step": 18042 - }, - { - "epoch": 0.92, - "grad_norm": 0.8372530214153084, - "learning_rate": 3.546974856690111e-07, - "loss": 0.1682, - "step": 18043 - }, - { - "epoch": 0.92, - "grad_norm": 1.4334941554546106, - "learning_rate": 3.542628623305311e-07, - "loss": 0.1692, - "step": 18044 - }, - { - "epoch": 0.92, - "grad_norm": 0.8602732086285463, - "learning_rate": 3.538285006332154e-07, - "loss": 0.1651, - "step": 18045 - }, - { - "epoch": 0.92, - "grad_norm": 3.27004473331578, - "learning_rate": 3.5339440058884565e-07, - "loss": 0.1518, - "step": 18046 - }, - { - "epoch": 0.92, - "grad_norm": 1.2627309938058453, - "learning_rate": 3.529605622092003e-07, - "loss": 0.1388, - "step": 18047 - }, - { - "epoch": 0.92, - "grad_norm": 1.0584636145518418, - "learning_rate": 3.52526985506042e-07, - "loss": 0.1625, - "step": 18048 - }, - { - "epoch": 0.92, - "grad_norm": 0.8819800807661746, - "learning_rate": 3.5209367049113596e-07, - "loss": 0.1535, - "step": 18049 - }, - { - "epoch": 0.92, - "grad_norm": 1.1435665267192858, - "learning_rate": 3.516606171762338e-07, - "loss": 0.1647, - "step": 18050 - }, - { - "epoch": 0.92, - "grad_norm": 1.00416429595634, - "learning_rate": 3.5122782557308163e-07, - "loss": 0.1704, - "step": 18051 - }, - { - "epoch": 0.92, - "grad_norm": 1.040981168819708, - "learning_rate": 3.507952956934202e-07, - "loss": 0.1615, - "step": 18052 - }, - { - "epoch": 0.92, - "grad_norm": 1.1564463804169962, - "learning_rate": 3.503630275489811e-07, - "loss": 0.1751, - "step": 18053 - }, - { - "epoch": 0.92, - "grad_norm": 1.0239001745339626, - "learning_rate": 3.4993102115148947e-07, - "loss": 0.1444, - "step": 18054 - }, - { - "epoch": 0.92, - "grad_norm": 1.6025551048585556, - "learning_rate": 3.494992765126637e-07, - "loss": 0.1668, - "step": 18055 - }, - { - "epoch": 0.92, - "grad_norm": 1.0701646834638574, - "learning_rate": 3.490677936442155e-07, - "loss": 0.1418, - "step": 18056 - }, - { - "epoch": 0.92, - "grad_norm": 1.3906487577365694, - "learning_rate": 3.4863657255785e-07, - "loss": 0.1819, - "step": 18057 - }, - { - "epoch": 0.92, - "grad_norm": 1.2564578605249104, - "learning_rate": 3.482056132652623e-07, - "loss": 0.1609, - "step": 18058 - }, - { - "epoch": 0.92, - "grad_norm": 1.2013413310258307, - "learning_rate": 3.477749157781407e-07, - "loss": 0.1592, - "step": 18059 - }, - { - "epoch": 0.92, - "grad_norm": 2.8924948411927334, - "learning_rate": 3.4734448010817043e-07, - "loss": 0.1743, - "step": 18060 - }, - { - "epoch": 0.92, - "grad_norm": 1.0088436986399607, - "learning_rate": 3.469143062670266e-07, - "loss": 0.1438, - "step": 18061 - }, - { - "epoch": 0.92, - "grad_norm": 1.1062930156517436, - "learning_rate": 3.464843942663776e-07, - "loss": 0.1498, - "step": 18062 - }, - { - "epoch": 0.92, - "grad_norm": 1.2243603416203355, - "learning_rate": 3.4605474411788407e-07, - "loss": 0.1835, - "step": 18063 - }, - { - "epoch": 0.92, - "grad_norm": 1.1303646982159368, - "learning_rate": 3.456253558332001e-07, - "loss": 0.1652, - "step": 18064 - }, - { - "epoch": 0.92, - "grad_norm": 1.644905305055982, - "learning_rate": 3.451962294239741e-07, - "loss": 0.1728, - "step": 18065 - }, - { - "epoch": 0.92, - "grad_norm": 1.4259200726169508, - "learning_rate": 3.4476736490184683e-07, - "loss": 0.1637, - "step": 18066 - }, - { - "epoch": 0.92, - "grad_norm": 0.7865406720386555, - "learning_rate": 3.443387622784489e-07, - "loss": 0.1584, - "step": 18067 - }, - { - "epoch": 0.92, - "grad_norm": 0.7506526478559534, - "learning_rate": 3.439104215654088e-07, - "loss": 0.1562, - "step": 18068 - }, - { - "epoch": 0.92, - "grad_norm": 0.9513526709479354, - "learning_rate": 3.434823427743428e-07, - "loss": 0.1656, - "step": 18069 - }, - { - "epoch": 0.92, - "grad_norm": 0.9825643516548362, - "learning_rate": 3.430545259168638e-07, - "loss": 0.1626, - "step": 18070 - }, - { - "epoch": 0.92, - "grad_norm": 1.8435459556303768, - "learning_rate": 3.42626971004576e-07, - "loss": 0.1509, - "step": 18071 - }, - { - "epoch": 0.92, - "grad_norm": 1.1602326205822766, - "learning_rate": 3.421996780490766e-07, - "loss": 0.1743, - "step": 18072 - }, - { - "epoch": 0.92, - "grad_norm": 1.240986025282152, - "learning_rate": 3.4177264706195754e-07, - "loss": 0.1743, - "step": 18073 - }, - { - "epoch": 0.92, - "grad_norm": 0.9798339208614196, - "learning_rate": 3.413458780548007e-07, - "loss": 0.1678, - "step": 18074 - }, - { - "epoch": 0.92, - "grad_norm": 2.1122500459593923, - "learning_rate": 3.409193710391834e-07, - "loss": 0.1846, - "step": 18075 - }, - { - "epoch": 0.92, - "grad_norm": 1.2219867673175016, - "learning_rate": 3.4049312602667197e-07, - "loss": 0.1601, - "step": 18076 - }, - { - "epoch": 0.92, - "grad_norm": 1.1584272896050063, - "learning_rate": 3.400671430288316e-07, - "loss": 0.1418, - "step": 18077 - }, - { - "epoch": 0.92, - "grad_norm": 0.934316331610779, - "learning_rate": 3.396414220572142e-07, - "loss": 0.1549, - "step": 18078 - }, - { - "epoch": 0.92, - "grad_norm": 1.3242111151036162, - "learning_rate": 3.3921596312336935e-07, - "loss": 0.1588, - "step": 18079 - }, - { - "epoch": 0.92, - "grad_norm": 0.9434150029025636, - "learning_rate": 3.3879076623883677e-07, - "loss": 0.1556, - "step": 18080 - }, - { - "epoch": 0.92, - "grad_norm": 0.9255895715291566, - "learning_rate": 3.3836583141515054e-07, - "loss": 0.1705, - "step": 18081 - }, - { - "epoch": 0.92, - "grad_norm": 2.3659204139409478, - "learning_rate": 3.379411586638359e-07, - "loss": 0.1644, - "step": 18082 - }, - { - "epoch": 0.92, - "grad_norm": 2.2360570568932796, - "learning_rate": 3.3751674799641475e-07, - "loss": 0.1591, - "step": 18083 - }, - { - "epoch": 0.92, - "grad_norm": 1.1750311349980966, - "learning_rate": 3.3709259942439677e-07, - "loss": 0.1692, - "step": 18084 - }, - { - "epoch": 0.92, - "grad_norm": 1.2854669834238228, - "learning_rate": 3.3666871295928826e-07, - "loss": 0.168, - "step": 18085 - }, - { - "epoch": 0.92, - "grad_norm": 1.2254877804588578, - "learning_rate": 3.3624508861258564e-07, - "loss": 0.1592, - "step": 18086 - }, - { - "epoch": 0.92, - "grad_norm": 1.4678012533558435, - "learning_rate": 3.3582172639578304e-07, - "loss": 0.1746, - "step": 18087 - }, - { - "epoch": 0.92, - "grad_norm": 0.9622118655252306, - "learning_rate": 3.35398626320359e-07, - "loss": 0.1668, - "step": 18088 - }, - { - "epoch": 0.92, - "grad_norm": 1.467378103630613, - "learning_rate": 3.3497578839779554e-07, - "loss": 0.166, - "step": 18089 - }, - { - "epoch": 0.92, - "grad_norm": 1.0194618001770677, - "learning_rate": 3.345532126395579e-07, - "loss": 0.1638, - "step": 18090 - }, - { - "epoch": 0.92, - "grad_norm": 1.170019513023722, - "learning_rate": 3.3413089905711127e-07, - "loss": 0.1381, - "step": 18091 - }, - { - "epoch": 0.92, - "grad_norm": 0.936399762071647, - "learning_rate": 3.337088476619099e-07, - "loss": 0.1745, - "step": 18092 - }, - { - "epoch": 0.92, - "grad_norm": 0.9416114993211048, - "learning_rate": 3.332870584654013e-07, - "loss": 0.1697, - "step": 18093 - }, - { - "epoch": 0.92, - "grad_norm": 2.979094340683368, - "learning_rate": 3.3286553147902855e-07, - "loss": 0.1802, - "step": 18094 - }, - { - "epoch": 0.92, - "grad_norm": 0.9993114947029553, - "learning_rate": 3.3244426671422246e-07, - "loss": 0.1716, - "step": 18095 - }, - { - "epoch": 0.92, - "grad_norm": 0.9762340459586974, - "learning_rate": 3.320232641824139e-07, - "loss": 0.1665, - "step": 18096 - }, - { - "epoch": 0.92, - "grad_norm": 1.0886486279912961, - "learning_rate": 3.3160252389501824e-07, - "loss": 0.1652, - "step": 18097 - }, - { - "epoch": 0.92, - "grad_norm": 3.2737911651638636, - "learning_rate": 3.311820458634507e-07, - "loss": 0.1583, - "step": 18098 - }, - { - "epoch": 0.92, - "grad_norm": 0.9989512775252093, - "learning_rate": 3.3076183009911667e-07, - "loss": 0.1443, - "step": 18099 - }, - { - "epoch": 0.92, - "grad_norm": 0.9043083457326804, - "learning_rate": 3.3034187661341476e-07, - "loss": 0.1616, - "step": 18100 - }, - { - "epoch": 0.92, - "grad_norm": 1.511213481907978, - "learning_rate": 3.299221854177337e-07, - "loss": 0.1606, - "step": 18101 - }, - { - "epoch": 0.92, - "grad_norm": 0.8975862189129877, - "learning_rate": 3.29502756523461e-07, - "loss": 0.1452, - "step": 18102 - }, - { - "epoch": 0.92, - "grad_norm": 0.9606207256115495, - "learning_rate": 3.29083589941972e-07, - "loss": 0.1704, - "step": 18103 - }, - { - "epoch": 0.92, - "grad_norm": 0.9537946488192327, - "learning_rate": 3.286646856846376e-07, - "loss": 0.1623, - "step": 18104 - }, - { - "epoch": 0.92, - "grad_norm": 1.274674491839072, - "learning_rate": 3.2824604376281874e-07, - "loss": 0.166, - "step": 18105 - }, - { - "epoch": 0.92, - "grad_norm": 1.082756311497904, - "learning_rate": 3.2782766418787306e-07, - "loss": 0.1535, - "step": 18106 - }, - { - "epoch": 0.92, - "grad_norm": 1.2348526149655745, - "learning_rate": 3.274095469711469e-07, - "loss": 0.1637, - "step": 18107 - }, - { - "epoch": 0.92, - "grad_norm": 1.1677907803256713, - "learning_rate": 3.2699169212398354e-07, - "loss": 0.1689, - "step": 18108 - }, - { - "epoch": 0.92, - "grad_norm": 1.032761047187609, - "learning_rate": 3.2657409965771715e-07, - "loss": 0.1581, - "step": 18109 - }, - { - "epoch": 0.92, - "grad_norm": 1.5161936216848935, - "learning_rate": 3.2615676958367424e-07, - "loss": 0.1738, - "step": 18110 - }, - { - "epoch": 0.92, - "grad_norm": 0.8366912376904033, - "learning_rate": 3.2573970191317693e-07, - "loss": 0.1794, - "step": 18111 - }, - { - "epoch": 0.92, - "grad_norm": 0.8885962028795105, - "learning_rate": 3.2532289665753503e-07, - "loss": 0.1598, - "step": 18112 - }, - { - "epoch": 0.92, - "grad_norm": 1.7663929172818378, - "learning_rate": 3.2490635382805726e-07, - "loss": 0.156, - "step": 18113 - }, - { - "epoch": 0.92, - "grad_norm": 0.9646336089864638, - "learning_rate": 3.244900734360401e-07, - "loss": 0.169, - "step": 18114 - }, - { - "epoch": 0.92, - "grad_norm": 1.0913680494127547, - "learning_rate": 3.2407405549277683e-07, - "loss": 0.1544, - "step": 18115 - }, - { - "epoch": 0.92, - "grad_norm": 4.339377476577004, - "learning_rate": 3.2365830000954945e-07, - "loss": 0.1701, - "step": 18116 - }, - { - "epoch": 0.92, - "grad_norm": 1.1835551634972878, - "learning_rate": 3.23242806997639e-07, - "loss": 0.1798, - "step": 18117 - }, - { - "epoch": 0.92, - "grad_norm": 0.7438641443938219, - "learning_rate": 3.2282757646831306e-07, - "loss": 0.1447, - "step": 18118 - }, - { - "epoch": 0.92, - "grad_norm": 1.2541257573027336, - "learning_rate": 3.224126084328361e-07, - "loss": 0.1617, - "step": 18119 - }, - { - "epoch": 0.92, - "grad_norm": 1.298017080325946, - "learning_rate": 3.219979029024634e-07, - "loss": 0.1719, - "step": 18120 - }, - { - "epoch": 0.92, - "grad_norm": 1.0833901419058496, - "learning_rate": 3.21583459888446e-07, - "loss": 0.1504, - "step": 18121 - }, - { - "epoch": 0.92, - "grad_norm": 1.1311696910451334, - "learning_rate": 3.2116927940202157e-07, - "loss": 0.1725, - "step": 18122 - }, - { - "epoch": 0.92, - "grad_norm": 1.4654054266531409, - "learning_rate": 3.2075536145442897e-07, - "loss": 0.1606, - "step": 18123 - }, - { - "epoch": 0.92, - "grad_norm": 1.540578843845565, - "learning_rate": 3.203417060568925e-07, - "loss": 0.1611, - "step": 18124 - }, - { - "epoch": 0.92, - "grad_norm": 1.5544623075810544, - "learning_rate": 3.1992831322063324e-07, - "loss": 0.1667, - "step": 18125 - }, - { - "epoch": 0.92, - "grad_norm": 0.9578152551564797, - "learning_rate": 3.195151829568666e-07, - "loss": 0.1612, - "step": 18126 - }, - { - "epoch": 0.92, - "grad_norm": 0.9665434629439523, - "learning_rate": 3.191023152767969e-07, - "loss": 0.1497, - "step": 18127 - }, - { - "epoch": 0.92, - "grad_norm": 1.009141454543222, - "learning_rate": 3.1868971019162533e-07, - "loss": 0.1609, - "step": 18128 - }, - { - "epoch": 0.92, - "grad_norm": 1.0150188637483344, - "learning_rate": 3.1827736771253946e-07, - "loss": 0.1751, - "step": 18129 - }, - { - "epoch": 0.92, - "grad_norm": 0.8852232068809844, - "learning_rate": 3.178652878507293e-07, - "loss": 0.1538, - "step": 18130 - }, - { - "epoch": 0.92, - "grad_norm": 0.8192750933961805, - "learning_rate": 3.174534706173682e-07, - "loss": 0.1602, - "step": 18131 - }, - { - "epoch": 0.92, - "grad_norm": 0.8364749466835223, - "learning_rate": 3.170419160236293e-07, - "loss": 0.1617, - "step": 18132 - }, - { - "epoch": 0.92, - "grad_norm": 1.0486731726322975, - "learning_rate": 3.166306240806749e-07, - "loss": 0.1673, - "step": 18133 - }, - { - "epoch": 0.92, - "grad_norm": 0.994510073688784, - "learning_rate": 3.162195947996616e-07, - "loss": 0.1523, - "step": 18134 - }, - { - "epoch": 0.92, - "grad_norm": 1.2142922058584487, - "learning_rate": 3.158088281917393e-07, - "loss": 0.1691, - "step": 18135 - }, - { - "epoch": 0.92, - "grad_norm": 1.0238164627534345, - "learning_rate": 3.153983242680503e-07, - "loss": 0.1507, - "step": 18136 - }, - { - "epoch": 0.92, - "grad_norm": 1.0896154357419239, - "learning_rate": 3.149880830397267e-07, - "loss": 0.1846, - "step": 18137 - }, - { - "epoch": 0.92, - "grad_norm": 1.0416929788195814, - "learning_rate": 3.1457810451790083e-07, - "loss": 0.1621, - "step": 18138 - }, - { - "epoch": 0.92, - "grad_norm": 3.2883004776773355, - "learning_rate": 3.1416838871368925e-07, - "loss": 0.19, - "step": 18139 - }, - { - "epoch": 0.92, - "grad_norm": 0.8616914377273562, - "learning_rate": 3.137589356382076e-07, - "loss": 0.1522, - "step": 18140 - }, - { - "epoch": 0.92, - "grad_norm": 0.8543744553081801, - "learning_rate": 3.1334974530256134e-07, - "loss": 0.1656, - "step": 18141 - }, - { - "epoch": 0.92, - "grad_norm": 1.1054493297414734, - "learning_rate": 3.1294081771785057e-07, - "loss": 0.1475, - "step": 18142 - }, - { - "epoch": 0.92, - "grad_norm": 0.8807211474880493, - "learning_rate": 3.125321528951675e-07, - "loss": 0.1262, - "step": 18143 - }, - { - "epoch": 0.92, - "grad_norm": 0.9575123181423777, - "learning_rate": 3.1212375084559767e-07, - "loss": 0.1529, - "step": 18144 - }, - { - "epoch": 0.92, - "grad_norm": 1.1843135991699991, - "learning_rate": 3.117156115802178e-07, - "loss": 0.1592, - "step": 18145 - }, - { - "epoch": 0.92, - "grad_norm": 1.5669667485440848, - "learning_rate": 3.113077351100979e-07, - "loss": 0.1494, - "step": 18146 - }, - { - "epoch": 0.92, - "grad_norm": 0.9169278577698446, - "learning_rate": 3.1090012144630476e-07, - "loss": 0.1438, - "step": 18147 - }, - { - "epoch": 0.92, - "grad_norm": 1.554679031235613, - "learning_rate": 3.1049277059989167e-07, - "loss": 0.1633, - "step": 18148 - }, - { - "epoch": 0.92, - "grad_norm": 1.2194988964301239, - "learning_rate": 3.1008568258191095e-07, - "loss": 0.1622, - "step": 18149 - }, - { - "epoch": 0.92, - "grad_norm": 1.0482947007736183, - "learning_rate": 3.0967885740340266e-07, - "loss": 0.1358, - "step": 18150 - }, - { - "epoch": 0.92, - "grad_norm": 0.8393879878929953, - "learning_rate": 3.0927229507540126e-07, - "loss": 0.1526, - "step": 18151 - }, - { - "epoch": 0.92, - "grad_norm": 1.1180630860302485, - "learning_rate": 3.088659956089368e-07, - "loss": 0.1662, - "step": 18152 - }, - { - "epoch": 0.92, - "grad_norm": 1.0517176769625036, - "learning_rate": 3.0845995901503167e-07, - "loss": 0.1766, - "step": 18153 - }, - { - "epoch": 0.92, - "grad_norm": 1.3653458310572448, - "learning_rate": 3.080541853046948e-07, - "loss": 0.1944, - "step": 18154 - }, - { - "epoch": 0.92, - "grad_norm": 1.0415239876959776, - "learning_rate": 3.076486744889373e-07, - "loss": 0.1497, - "step": 18155 - }, - { - "epoch": 0.92, - "grad_norm": 1.24217910381336, - "learning_rate": 3.0724342657875604e-07, - "loss": 0.1783, - "step": 18156 - }, - { - "epoch": 0.92, - "grad_norm": 1.07365429308084, - "learning_rate": 3.068384415851455e-07, - "loss": 0.1538, - "step": 18157 - }, - { - "epoch": 0.92, - "grad_norm": 2.9870483996894945, - "learning_rate": 3.0643371951908806e-07, - "loss": 0.1896, - "step": 18158 - }, - { - "epoch": 0.92, - "grad_norm": 0.9650764946172202, - "learning_rate": 3.0602926039156487e-07, - "loss": 0.1665, - "step": 18159 - }, - { - "epoch": 0.92, - "grad_norm": 0.9600897084297326, - "learning_rate": 3.05625064213545e-07, - "loss": 0.1598, - "step": 18160 - }, - { - "epoch": 0.92, - "grad_norm": 0.8412084312803206, - "learning_rate": 3.0522113099599184e-07, - "loss": 0.1736, - "step": 18161 - }, - { - "epoch": 0.92, - "grad_norm": 1.050746648506869, - "learning_rate": 3.048174607498644e-07, - "loss": 0.186, - "step": 18162 - }, - { - "epoch": 0.92, - "grad_norm": 1.1393671291834178, - "learning_rate": 3.044140534861106e-07, - "loss": 0.1768, - "step": 18163 - }, - { - "epoch": 0.92, - "grad_norm": 1.2581635137906724, - "learning_rate": 3.04010909215674e-07, - "loss": 0.1781, - "step": 18164 - }, - { - "epoch": 0.92, - "grad_norm": 1.069811077440688, - "learning_rate": 3.0360802794948687e-07, - "loss": 0.1713, - "step": 18165 - }, - { - "epoch": 0.92, - "grad_norm": 0.8545719708007923, - "learning_rate": 3.032054096984816e-07, - "loss": 0.1502, - "step": 18166 - }, - { - "epoch": 0.92, - "grad_norm": 1.5327808645021874, - "learning_rate": 3.0280305447357607e-07, - "loss": 0.1389, - "step": 18167 - }, - { - "epoch": 0.92, - "grad_norm": 0.9530728622555193, - "learning_rate": 3.0240096228568606e-07, - "loss": 0.1396, - "step": 18168 - }, - { - "epoch": 0.92, - "grad_norm": 1.2315279397059917, - "learning_rate": 3.0199913314571726e-07, - "loss": 0.1459, - "step": 18169 - }, - { - "epoch": 0.92, - "grad_norm": 0.8872656692354135, - "learning_rate": 3.0159756706456987e-07, - "loss": 0.1484, - "step": 18170 - }, - { - "epoch": 0.92, - "grad_norm": 0.8181420046073249, - "learning_rate": 3.011962640531363e-07, - "loss": 0.1527, - "step": 18171 - }, - { - "epoch": 0.92, - "grad_norm": 1.3518825408066595, - "learning_rate": 3.007952241223022e-07, - "loss": 0.1806, - "step": 18172 - }, - { - "epoch": 0.92, - "grad_norm": 1.4158408673441485, - "learning_rate": 3.0039444728294563e-07, - "loss": 0.148, - "step": 18173 - }, - { - "epoch": 0.92, - "grad_norm": 1.1370683227512903, - "learning_rate": 2.999939335459379e-07, - "loss": 0.1633, - "step": 18174 - }, - { - "epoch": 0.92, - "grad_norm": 1.1368399110544725, - "learning_rate": 2.995936829221413e-07, - "loss": 0.1402, - "step": 18175 - }, - { - "epoch": 0.92, - "grad_norm": 0.9983565029532296, - "learning_rate": 2.9919369542241504e-07, - "loss": 0.1586, - "step": 18176 - }, - { - "epoch": 0.92, - "grad_norm": 0.8587644416400668, - "learning_rate": 2.9879397105760597e-07, - "loss": 0.1584, - "step": 18177 - }, - { - "epoch": 0.92, - "grad_norm": 1.1468541444837406, - "learning_rate": 2.9839450983855876e-07, - "loss": 0.1645, - "step": 18178 - }, - { - "epoch": 0.92, - "grad_norm": 1.8486483622789254, - "learning_rate": 2.979953117761103e-07, - "loss": 0.2073, - "step": 18179 - }, - { - "epoch": 0.92, - "grad_norm": 0.8125393213889613, - "learning_rate": 2.975963768810852e-07, - "loss": 0.1432, - "step": 18180 - }, - { - "epoch": 0.92, - "grad_norm": 1.3930677782012002, - "learning_rate": 2.971977051643071e-07, - "loss": 0.1656, - "step": 18181 - }, - { - "epoch": 0.92, - "grad_norm": 1.7007796046851476, - "learning_rate": 2.9679929663658957e-07, - "loss": 0.1799, - "step": 18182 - }, - { - "epoch": 0.92, - "grad_norm": 0.9477805648312042, - "learning_rate": 2.9640115130873835e-07, - "loss": 0.1565, - "step": 18183 - }, - { - "epoch": 0.92, - "grad_norm": 1.1168235302246006, - "learning_rate": 2.9600326919155486e-07, - "loss": 0.168, - "step": 18184 - }, - { - "epoch": 0.92, - "grad_norm": 0.859315154971044, - "learning_rate": 2.956056502958304e-07, - "loss": 0.1585, - "step": 18185 - }, - { - "epoch": 0.92, - "grad_norm": 1.5752916829869035, - "learning_rate": 2.952082946323498e-07, - "loss": 0.1666, - "step": 18186 - }, - { - "epoch": 0.92, - "grad_norm": 1.1845490612936995, - "learning_rate": 2.948112022118932e-07, - "loss": 0.1917, - "step": 18187 - }, - { - "epoch": 0.92, - "grad_norm": 1.7967161879916749, - "learning_rate": 2.944143730452298e-07, - "loss": 0.1686, - "step": 18188 - }, - { - "epoch": 0.92, - "grad_norm": 1.0093817119350061, - "learning_rate": 2.9401780714312657e-07, - "loss": 0.168, - "step": 18189 - }, - { - "epoch": 0.92, - "grad_norm": 0.8868337153966859, - "learning_rate": 2.936215045163371e-07, - "loss": 0.1441, - "step": 18190 - }, - { - "epoch": 0.93, - "grad_norm": 1.0742994689211378, - "learning_rate": 2.932254651756139e-07, - "loss": 0.1519, - "step": 18191 - }, - { - "epoch": 0.93, - "grad_norm": 1.9396325141785344, - "learning_rate": 2.928296891316973e-07, - "loss": 0.1725, - "step": 18192 - }, - { - "epoch": 0.93, - "grad_norm": 1.1127639369893016, - "learning_rate": 2.9243417639532424e-07, - "loss": 0.1516, - "step": 18193 - }, - { - "epoch": 0.93, - "grad_norm": 1.0071558709655952, - "learning_rate": 2.920389269772217e-07, - "loss": 0.1427, - "step": 18194 - }, - { - "epoch": 0.93, - "grad_norm": 1.3870759237473882, - "learning_rate": 2.916439408881111e-07, - "loss": 0.1573, - "step": 18195 - }, - { - "epoch": 0.93, - "grad_norm": 1.2161075701965, - "learning_rate": 2.912492181387072e-07, - "loss": 0.1739, - "step": 18196 - }, - { - "epoch": 0.93, - "grad_norm": 1.1769855123428175, - "learning_rate": 2.9085475873971815e-07, - "loss": 0.1627, - "step": 18197 - }, - { - "epoch": 0.93, - "grad_norm": 1.077500567192318, - "learning_rate": 2.9046056270184197e-07, - "loss": 0.1619, - "step": 18198 - }, - { - "epoch": 0.93, - "grad_norm": 1.067921049897875, - "learning_rate": 2.9006663003576904e-07, - "loss": 0.1553, - "step": 18199 - }, - { - "epoch": 0.93, - "grad_norm": 1.0080994104165948, - "learning_rate": 2.896729607521898e-07, - "loss": 0.1739, - "step": 18200 - }, - { - "epoch": 0.93, - "grad_norm": 1.031766215965229, - "learning_rate": 2.892795548617788e-07, - "loss": 0.1758, - "step": 18201 - }, - { - "epoch": 0.93, - "grad_norm": 1.2709143832706076, - "learning_rate": 2.8888641237520886e-07, - "loss": 0.1829, - "step": 18202 - }, - { - "epoch": 0.93, - "grad_norm": 0.7373246872216361, - "learning_rate": 2.8849353330314247e-07, - "loss": 0.1482, - "step": 18203 - }, - { - "epoch": 0.93, - "grad_norm": 1.6141345512194505, - "learning_rate": 2.881009176562377e-07, - "loss": 0.1835, - "step": 18204 - }, - { - "epoch": 0.93, - "grad_norm": 0.8910948700271394, - "learning_rate": 2.8770856544514393e-07, - "loss": 0.1536, - "step": 18205 - }, - { - "epoch": 0.93, - "grad_norm": 1.0048942029409267, - "learning_rate": 2.8731647668050477e-07, - "loss": 0.1706, - "step": 18206 - }, - { - "epoch": 0.93, - "grad_norm": 1.2715211457408504, - "learning_rate": 2.86924651372954e-07, - "loss": 0.1536, - "step": 18207 - }, - { - "epoch": 0.93, - "grad_norm": 1.185487138272821, - "learning_rate": 2.865330895331209e-07, - "loss": 0.1677, - "step": 18208 - }, - { - "epoch": 0.93, - "grad_norm": 0.8032552185841167, - "learning_rate": 2.861417911716269e-07, - "loss": 0.1383, - "step": 18209 - }, - { - "epoch": 0.93, - "grad_norm": 2.989140575419518, - "learning_rate": 2.8575075629908465e-07, - "loss": 0.1528, - "step": 18210 - }, - { - "epoch": 0.93, - "grad_norm": 1.3954966524477264, - "learning_rate": 2.853599849261024e-07, - "loss": 0.1658, - "step": 18211 - }, - { - "epoch": 0.93, - "grad_norm": 0.9393271717300197, - "learning_rate": 2.849694770632794e-07, - "loss": 0.1639, - "step": 18212 - }, - { - "epoch": 0.93, - "grad_norm": 0.8570807725947537, - "learning_rate": 2.8457923272120715e-07, - "loss": 0.157, - "step": 18213 - }, - { - "epoch": 0.93, - "grad_norm": 0.9916469584513358, - "learning_rate": 2.8418925191047163e-07, - "loss": 0.1492, - "step": 18214 - }, - { - "epoch": 0.93, - "grad_norm": 0.875743607965698, - "learning_rate": 2.8379953464165334e-07, - "loss": 0.1633, - "step": 18215 - }, - { - "epoch": 0.93, - "grad_norm": 1.0349400008204857, - "learning_rate": 2.8341008092531927e-07, - "loss": 0.1557, - "step": 18216 - }, - { - "epoch": 0.93, - "grad_norm": 1.6943081379529428, - "learning_rate": 2.8302089077203776e-07, - "loss": 0.1753, - "step": 18217 - }, - { - "epoch": 0.93, - "grad_norm": 1.3400755030544176, - "learning_rate": 2.826319641923614e-07, - "loss": 0.1639, - "step": 18218 - }, - { - "epoch": 0.93, - "grad_norm": 2.458439540499437, - "learning_rate": 2.8224330119684286e-07, - "loss": 0.1778, - "step": 18219 - }, - { - "epoch": 0.93, - "grad_norm": 1.0458337430912972, - "learning_rate": 2.818549017960237e-07, - "loss": 0.1697, - "step": 18220 - }, - { - "epoch": 0.93, - "grad_norm": 2.1383391293406424, - "learning_rate": 2.8146676600043777e-07, - "loss": 0.1956, - "step": 18221 - }, - { - "epoch": 0.93, - "grad_norm": 1.1962578304457712, - "learning_rate": 2.810788938206155e-07, - "loss": 0.1767, - "step": 18222 - }, - { - "epoch": 0.93, - "grad_norm": 0.9702710290737139, - "learning_rate": 2.8069128526707845e-07, - "loss": 0.178, - "step": 18223 - }, - { - "epoch": 0.93, - "grad_norm": 0.9376196152192133, - "learning_rate": 2.8030394035033827e-07, - "loss": 0.1586, - "step": 18224 - }, - { - "epoch": 0.93, - "grad_norm": 1.05253853210062, - "learning_rate": 2.7991685908090316e-07, - "loss": 0.16, - "step": 18225 - }, - { - "epoch": 0.93, - "grad_norm": 0.939644911654303, - "learning_rate": 2.7953004146927145e-07, - "loss": 0.1365, - "step": 18226 - }, - { - "epoch": 0.93, - "grad_norm": 1.1506454940271946, - "learning_rate": 2.791434875259369e-07, - "loss": 0.1536, - "step": 18227 - }, - { - "epoch": 0.93, - "grad_norm": 0.8693278008577767, - "learning_rate": 2.787571972613845e-07, - "loss": 0.1691, - "step": 18228 - }, - { - "epoch": 0.93, - "grad_norm": 0.8956275554521052, - "learning_rate": 2.7837117068609254e-07, - "loss": 0.1611, - "step": 18229 - }, - { - "epoch": 0.93, - "grad_norm": 1.3026610082054975, - "learning_rate": 2.779854078105304e-07, - "loss": 0.1835, - "step": 18230 - }, - { - "epoch": 0.93, - "grad_norm": 0.8988603613018683, - "learning_rate": 2.7759990864516306e-07, - "loss": 0.1412, - "step": 18231 - }, - { - "epoch": 0.93, - "grad_norm": 1.4338602081572938, - "learning_rate": 2.772146732004488e-07, - "loss": 0.1496, - "step": 18232 - }, - { - "epoch": 0.93, - "grad_norm": 1.7382306793567448, - "learning_rate": 2.768297014868337e-07, - "loss": 0.1581, - "step": 18233 - }, - { - "epoch": 0.93, - "grad_norm": 0.9871482240366329, - "learning_rate": 2.7644499351476396e-07, - "loss": 0.151, - "step": 18234 - }, - { - "epoch": 0.93, - "grad_norm": 1.4205508635886646, - "learning_rate": 2.760605492946722e-07, - "loss": 0.1774, - "step": 18235 - }, - { - "epoch": 0.93, - "grad_norm": 0.9918044669951058, - "learning_rate": 2.756763688369879e-07, - "loss": 0.176, - "step": 18236 - }, - { - "epoch": 0.93, - "grad_norm": 1.07450755147161, - "learning_rate": 2.7529245215213053e-07, - "loss": 0.1628, - "step": 18237 - }, - { - "epoch": 0.93, - "grad_norm": 1.0434581153695943, - "learning_rate": 2.7490879925051397e-07, - "loss": 0.1525, - "step": 18238 - }, - { - "epoch": 0.93, - "grad_norm": 0.9332330854715319, - "learning_rate": 2.745254101425465e-07, - "loss": 0.1678, - "step": 18239 - }, - { - "epoch": 0.93, - "grad_norm": 1.3009425937901884, - "learning_rate": 2.741422848386266e-07, - "loss": 0.1606, - "step": 18240 - }, - { - "epoch": 0.93, - "grad_norm": 1.2168959844464464, - "learning_rate": 2.737594233491458e-07, - "loss": 0.1757, - "step": 18241 - }, - { - "epoch": 0.93, - "grad_norm": 1.052663313889229, - "learning_rate": 2.733768256844915e-07, - "loss": 0.1414, - "step": 18242 - }, - { - "epoch": 0.93, - "grad_norm": 0.9262570446313309, - "learning_rate": 2.729944918550387e-07, - "loss": 0.1429, - "step": 18243 - }, - { - "epoch": 0.93, - "grad_norm": 0.810952260793739, - "learning_rate": 2.726124218711612e-07, - "loss": 0.1595, - "step": 18244 - }, - { - "epoch": 0.93, - "grad_norm": 1.0981094901401551, - "learning_rate": 2.7223061574321975e-07, - "loss": 0.184, - "step": 18245 - }, - { - "epoch": 0.93, - "grad_norm": 0.9443083295769201, - "learning_rate": 2.7184907348157377e-07, - "loss": 0.1632, - "step": 18246 - }, - { - "epoch": 0.93, - "grad_norm": 1.075805429275343, - "learning_rate": 2.714677950965694e-07, - "loss": 0.151, - "step": 18247 - }, - { - "epoch": 0.93, - "grad_norm": 1.1320019826123289, - "learning_rate": 2.7108678059855064e-07, - "loss": 0.1539, - "step": 18248 - }, - { - "epoch": 0.93, - "grad_norm": 0.9815125581439055, - "learning_rate": 2.707060299978537e-07, - "loss": 0.1659, - "step": 18249 - }, - { - "epoch": 0.93, - "grad_norm": 1.0098818213384717, - "learning_rate": 2.7032554330480464e-07, - "loss": 0.1505, - "step": 18250 - }, - { - "epoch": 0.93, - "grad_norm": 1.1575032174458653, - "learning_rate": 2.699453205297253e-07, - "loss": 0.1633, - "step": 18251 - }, - { - "epoch": 0.93, - "grad_norm": 1.2478476951841007, - "learning_rate": 2.6956536168292747e-07, - "loss": 0.1638, - "step": 18252 - }, - { - "epoch": 0.93, - "grad_norm": 1.2882046648564152, - "learning_rate": 2.6918566677471946e-07, - "loss": 0.1565, - "step": 18253 - }, - { - "epoch": 0.93, - "grad_norm": 1.1423199526776788, - "learning_rate": 2.688062358153998e-07, - "loss": 0.1486, - "step": 18254 - }, - { - "epoch": 0.93, - "grad_norm": 0.8599154346841992, - "learning_rate": 2.6842706881526125e-07, - "loss": 0.1521, - "step": 18255 - }, - { - "epoch": 0.93, - "grad_norm": 1.0747232249223346, - "learning_rate": 2.680481657845868e-07, - "loss": 0.1405, - "step": 18256 - }, - { - "epoch": 0.93, - "grad_norm": 1.1667972927867134, - "learning_rate": 2.67669526733656e-07, - "loss": 0.1858, - "step": 18257 - }, - { - "epoch": 0.93, - "grad_norm": 1.3900194062396538, - "learning_rate": 2.6729115167273834e-07, - "loss": 0.1652, - "step": 18258 - }, - { - "epoch": 0.93, - "grad_norm": 1.0342281416929027, - "learning_rate": 2.66913040612099e-07, - "loss": 0.1841, - "step": 18259 - }, - { - "epoch": 0.93, - "grad_norm": 1.1819018631064098, - "learning_rate": 2.665351935619931e-07, - "loss": 0.1683, - "step": 18260 - }, - { - "epoch": 0.93, - "grad_norm": 1.394688171936252, - "learning_rate": 2.661576105326702e-07, - "loss": 0.1641, - "step": 18261 - }, - { - "epoch": 0.93, - "grad_norm": 0.8566435495872279, - "learning_rate": 2.65780291534371e-07, - "loss": 0.1631, - "step": 18262 - }, - { - "epoch": 0.93, - "grad_norm": 1.355142728772295, - "learning_rate": 2.654032365773318e-07, - "loss": 0.1421, - "step": 18263 - }, - { - "epoch": 0.93, - "grad_norm": 1.1998677089669416, - "learning_rate": 2.650264456717788e-07, - "loss": 0.1559, - "step": 18264 - }, - { - "epoch": 0.93, - "grad_norm": 1.2343661872770706, - "learning_rate": 2.646499188279328e-07, - "loss": 0.1569, - "step": 18265 - }, - { - "epoch": 0.93, - "grad_norm": 0.9675566866904394, - "learning_rate": 2.6427365605600883e-07, - "loss": 0.1459, - "step": 18266 - }, - { - "epoch": 0.93, - "grad_norm": 1.2944387885918796, - "learning_rate": 2.638976573662122e-07, - "loss": 0.1758, - "step": 18267 - }, - { - "epoch": 0.93, - "grad_norm": 1.0901993055937835, - "learning_rate": 2.635219227687413e-07, - "loss": 0.1604, - "step": 18268 - }, - { - "epoch": 0.93, - "grad_norm": 0.8275611356887129, - "learning_rate": 2.63146452273787e-07, - "loss": 0.1381, - "step": 18269 - }, - { - "epoch": 0.93, - "grad_norm": 1.1146506553802311, - "learning_rate": 2.6277124589153657e-07, - "loss": 0.1734, - "step": 18270 - }, - { - "epoch": 0.93, - "grad_norm": 1.5159778292664159, - "learning_rate": 2.623963036321642e-07, - "loss": 0.157, - "step": 18271 - }, - { - "epoch": 0.93, - "grad_norm": 1.1055152798151293, - "learning_rate": 2.6202162550584387e-07, - "loss": 0.1712, - "step": 18272 - }, - { - "epoch": 0.93, - "grad_norm": 1.671505281711295, - "learning_rate": 2.6164721152273644e-07, - "loss": 0.1883, - "step": 18273 - }, - { - "epoch": 0.93, - "grad_norm": 0.9763923828625075, - "learning_rate": 2.6127306169299815e-07, - "loss": 0.1563, - "step": 18274 - }, - { - "epoch": 0.93, - "grad_norm": 1.1784812501395956, - "learning_rate": 2.608991760267776e-07, - "loss": 0.1732, - "step": 18275 - }, - { - "epoch": 0.93, - "grad_norm": 0.9906157660421311, - "learning_rate": 2.605255545342178e-07, - "loss": 0.1457, - "step": 18276 - }, - { - "epoch": 0.93, - "grad_norm": 1.1435715096827683, - "learning_rate": 2.6015219722545173e-07, - "loss": 0.1449, - "step": 18277 - }, - { - "epoch": 0.93, - "grad_norm": 1.0535128935316218, - "learning_rate": 2.5977910411060905e-07, - "loss": 0.1861, - "step": 18278 - }, - { - "epoch": 0.93, - "grad_norm": 1.5980771885217533, - "learning_rate": 2.594062751998061e-07, - "loss": 0.1623, - "step": 18279 - }, - { - "epoch": 0.93, - "grad_norm": 1.4537834976700899, - "learning_rate": 2.590337105031604e-07, - "loss": 0.166, - "step": 18280 - }, - { - "epoch": 0.93, - "grad_norm": 1.231681614406639, - "learning_rate": 2.586614100307738e-07, - "loss": 0.1532, - "step": 18281 - }, - { - "epoch": 0.93, - "grad_norm": 1.0291305206451635, - "learning_rate": 2.582893737927472e-07, - "loss": 0.1823, - "step": 18282 - }, - { - "epoch": 0.93, - "grad_norm": 0.8890034877363463, - "learning_rate": 2.5791760179917135e-07, - "loss": 0.1658, - "step": 18283 - }, - { - "epoch": 0.93, - "grad_norm": 1.0296792104223675, - "learning_rate": 2.5754609406013044e-07, - "loss": 0.161, - "step": 18284 - }, - { - "epoch": 0.93, - "grad_norm": 0.8199592585300417, - "learning_rate": 2.5717485058570304e-07, - "loss": 0.1665, - "step": 18285 - }, - { - "epoch": 0.93, - "grad_norm": 2.0453994543406147, - "learning_rate": 2.5680387138595663e-07, - "loss": 0.1708, - "step": 18286 - }, - { - "epoch": 0.93, - "grad_norm": 1.324591897357187, - "learning_rate": 2.5643315647095655e-07, - "loss": 0.1461, - "step": 18287 - }, - { - "epoch": 0.93, - "grad_norm": 1.942078050240307, - "learning_rate": 2.56062705850757e-07, - "loss": 0.1643, - "step": 18288 - }, - { - "epoch": 0.93, - "grad_norm": 1.0278227471601282, - "learning_rate": 2.5569251953540763e-07, - "loss": 0.1624, - "step": 18289 - }, - { - "epoch": 0.93, - "grad_norm": 1.1004008321153336, - "learning_rate": 2.5532259753494825e-07, - "loss": 0.165, - "step": 18290 - }, - { - "epoch": 0.93, - "grad_norm": 1.2990139603467061, - "learning_rate": 2.5495293985941414e-07, - "loss": 0.1631, - "step": 18291 - }, - { - "epoch": 0.93, - "grad_norm": 3.930321254713959, - "learning_rate": 2.5458354651883065e-07, - "loss": 0.1714, - "step": 18292 - }, - { - "epoch": 0.93, - "grad_norm": 1.093198105156117, - "learning_rate": 2.5421441752322086e-07, - "loss": 0.1641, - "step": 18293 - }, - { - "epoch": 0.93, - "grad_norm": 0.9736377661780048, - "learning_rate": 2.538455528825945e-07, - "loss": 0.1577, - "step": 18294 - }, - { - "epoch": 0.93, - "grad_norm": 0.8873869753036656, - "learning_rate": 2.5347695260695805e-07, - "loss": 0.1459, - "step": 18295 - }, - { - "epoch": 0.93, - "grad_norm": 1.216394455946613, - "learning_rate": 2.5310861670631015e-07, - "loss": 0.1644, - "step": 18296 - }, - { - "epoch": 0.93, - "grad_norm": 1.2671143732954295, - "learning_rate": 2.5274054519064175e-07, - "loss": 0.1524, - "step": 18297 - }, - { - "epoch": 0.93, - "grad_norm": 1.0028280164130396, - "learning_rate": 2.523727380699348e-07, - "loss": 0.1736, - "step": 18298 - }, - { - "epoch": 0.93, - "grad_norm": 1.3123715393982744, - "learning_rate": 2.5200519535417024e-07, - "loss": 0.157, - "step": 18299 - }, - { - "epoch": 0.93, - "grad_norm": 0.9316140446756942, - "learning_rate": 2.5163791705331343e-07, - "loss": 0.1543, - "step": 18300 - }, - { - "epoch": 0.93, - "grad_norm": 2.046187498563863, - "learning_rate": 2.5127090317732973e-07, - "loss": 0.1673, - "step": 18301 - }, - { - "epoch": 0.93, - "grad_norm": 1.1597565818611806, - "learning_rate": 2.5090415373617336e-07, - "loss": 0.1635, - "step": 18302 - }, - { - "epoch": 0.93, - "grad_norm": 0.8820846486012719, - "learning_rate": 2.5053766873979204e-07, - "loss": 0.163, - "step": 18303 - }, - { - "epoch": 0.93, - "grad_norm": 0.9880960254385381, - "learning_rate": 2.5017144819812766e-07, - "loss": 0.1483, - "step": 18304 - }, - { - "epoch": 0.93, - "grad_norm": 1.2739535762192775, - "learning_rate": 2.4980549212111236e-07, - "loss": 0.1561, - "step": 18305 - }, - { - "epoch": 0.93, - "grad_norm": 2.084334086475733, - "learning_rate": 2.494398005186749e-07, - "loss": 0.1584, - "step": 18306 - }, - { - "epoch": 0.93, - "grad_norm": 0.933607113556294, - "learning_rate": 2.490743734007328e-07, - "loss": 0.147, - "step": 18307 - }, - { - "epoch": 0.93, - "grad_norm": 1.0565751035924777, - "learning_rate": 2.4870921077720043e-07, - "loss": 0.1712, - "step": 18308 - }, - { - "epoch": 0.93, - "grad_norm": 0.7900491077807519, - "learning_rate": 2.483443126579799e-07, - "loss": 0.135, - "step": 18309 - }, - { - "epoch": 0.93, - "grad_norm": 0.7874166677831361, - "learning_rate": 2.4797967905297093e-07, - "loss": 0.1366, - "step": 18310 - }, - { - "epoch": 0.93, - "grad_norm": 1.1423803695769863, - "learning_rate": 2.476153099720635e-07, - "loss": 0.1542, - "step": 18311 - }, - { - "epoch": 0.93, - "grad_norm": 0.8768095068058398, - "learning_rate": 2.47251205425143e-07, - "loss": 0.1553, - "step": 18312 - }, - { - "epoch": 0.93, - "grad_norm": 1.1056877390442028, - "learning_rate": 2.468873654220838e-07, - "loss": 0.1493, - "step": 18313 - }, - { - "epoch": 0.93, - "grad_norm": 0.7899447781250339, - "learning_rate": 2.465237899727557e-07, - "loss": 0.1755, - "step": 18314 - }, - { - "epoch": 0.93, - "grad_norm": 1.7863005549546918, - "learning_rate": 2.461604790870209e-07, - "loss": 0.1535, - "step": 18315 - }, - { - "epoch": 0.93, - "grad_norm": 1.4838306408263966, - "learning_rate": 2.4579743277473365e-07, - "loss": 0.1582, - "step": 18316 - }, - { - "epoch": 0.93, - "grad_norm": 0.9945185936066469, - "learning_rate": 2.454346510457417e-07, - "loss": 0.1703, - "step": 18317 - }, - { - "epoch": 0.93, - "grad_norm": 1.184309812436225, - "learning_rate": 2.4507213390988604e-07, - "loss": 0.1555, - "step": 18318 - }, - { - "epoch": 0.93, - "grad_norm": 0.7978023591704787, - "learning_rate": 2.4470988137699993e-07, - "loss": 0.1503, - "step": 18319 - }, - { - "epoch": 0.93, - "grad_norm": 0.9209433934114493, - "learning_rate": 2.443478934569088e-07, - "loss": 0.1604, - "step": 18320 - }, - { - "epoch": 0.93, - "grad_norm": 1.5905991257382877, - "learning_rate": 2.439861701594326e-07, - "loss": 0.1831, - "step": 18321 - }, - { - "epoch": 0.93, - "grad_norm": 0.9910997370507246, - "learning_rate": 2.4362471149438237e-07, - "loss": 0.1515, - "step": 18322 - }, - { - "epoch": 0.93, - "grad_norm": 1.50148417664633, - "learning_rate": 2.4326351747156365e-07, - "loss": 0.1458, - "step": 18323 - }, - { - "epoch": 0.93, - "grad_norm": 0.9682930228090741, - "learning_rate": 2.4290258810077183e-07, - "loss": 0.1564, - "step": 18324 - }, - { - "epoch": 0.93, - "grad_norm": 1.0085186474825831, - "learning_rate": 2.4254192339179915e-07, - "loss": 0.1577, - "step": 18325 - }, - { - "epoch": 0.93, - "grad_norm": 1.0797598581900214, - "learning_rate": 2.421815233544267e-07, - "loss": 0.1556, - "step": 18326 - }, - { - "epoch": 0.93, - "grad_norm": 0.8254359964739783, - "learning_rate": 2.418213879984321e-07, - "loss": 0.1522, - "step": 18327 - }, - { - "epoch": 0.93, - "grad_norm": 0.9154350038423441, - "learning_rate": 2.4146151733358327e-07, - "loss": 0.1597, - "step": 18328 - }, - { - "epoch": 0.93, - "grad_norm": 1.8372875062859295, - "learning_rate": 2.4110191136964333e-07, - "loss": 0.1628, - "step": 18329 - }, - { - "epoch": 0.93, - "grad_norm": 1.228360493795967, - "learning_rate": 2.407425701163635e-07, - "loss": 0.1649, - "step": 18330 - }, - { - "epoch": 0.93, - "grad_norm": 1.325196990957879, - "learning_rate": 2.403834935834948e-07, - "loss": 0.1629, - "step": 18331 - }, - { - "epoch": 0.93, - "grad_norm": 1.3452048806832089, - "learning_rate": 2.400246817807728e-07, - "loss": 0.1703, - "step": 18332 - }, - { - "epoch": 0.93, - "grad_norm": 0.9918467529509656, - "learning_rate": 2.396661347179341e-07, - "loss": 0.1876, - "step": 18333 - }, - { - "epoch": 0.93, - "grad_norm": 1.4787781081476281, - "learning_rate": 2.393078524047021e-07, - "loss": 0.1545, - "step": 18334 - }, - { - "epoch": 0.93, - "grad_norm": 1.2674619556440334, - "learning_rate": 2.3894983485079683e-07, - "loss": 0.1668, - "step": 18335 - }, - { - "epoch": 0.93, - "grad_norm": 1.0590605390474992, - "learning_rate": 2.385920820659271e-07, - "loss": 0.1529, - "step": 18336 - }, - { - "epoch": 0.93, - "grad_norm": 1.201334926543082, - "learning_rate": 2.3823459405979854e-07, - "loss": 0.1582, - "step": 18337 - }, - { - "epoch": 0.93, - "grad_norm": 1.1333430396610051, - "learning_rate": 2.3787737084210893e-07, - "loss": 0.1554, - "step": 18338 - }, - { - "epoch": 0.93, - "grad_norm": 0.7409420607842309, - "learning_rate": 2.375204124225472e-07, - "loss": 0.1495, - "step": 18339 - }, - { - "epoch": 0.93, - "grad_norm": 1.0680731399439665, - "learning_rate": 2.3716371881079558e-07, - "loss": 0.1726, - "step": 18340 - }, - { - "epoch": 0.93, - "grad_norm": 1.0948349108272253, - "learning_rate": 2.3680729001652858e-07, - "loss": 0.1696, - "step": 18341 - }, - { - "epoch": 0.93, - "grad_norm": 1.051007648963238, - "learning_rate": 2.3645112604941623e-07, - "loss": 0.1737, - "step": 18342 - }, - { - "epoch": 0.93, - "grad_norm": 1.0056811990684162, - "learning_rate": 2.3609522691911746e-07, - "loss": 0.1605, - "step": 18343 - }, - { - "epoch": 0.93, - "grad_norm": 2.025955618299223, - "learning_rate": 2.3573959263528677e-07, - "loss": 0.1697, - "step": 18344 - }, - { - "epoch": 0.93, - "grad_norm": 1.0134681991322119, - "learning_rate": 2.3538422320757204e-07, - "loss": 0.1532, - "step": 18345 - }, - { - "epoch": 0.93, - "grad_norm": 1.2651014914706251, - "learning_rate": 2.3502911864561218e-07, - "loss": 0.1739, - "step": 18346 - }, - { - "epoch": 0.93, - "grad_norm": 1.0810092009920989, - "learning_rate": 2.346742789590384e-07, - "loss": 0.1534, - "step": 18347 - }, - { - "epoch": 0.93, - "grad_norm": 1.7203593062803417, - "learning_rate": 2.343197041574774e-07, - "loss": 0.1703, - "step": 18348 - }, - { - "epoch": 0.93, - "grad_norm": 0.988197154022997, - "learning_rate": 2.3396539425054376e-07, - "loss": 0.1713, - "step": 18349 - }, - { - "epoch": 0.93, - "grad_norm": 1.1065800214268064, - "learning_rate": 2.3361134924785313e-07, - "loss": 0.1667, - "step": 18350 - }, - { - "epoch": 0.93, - "grad_norm": 0.8575847025459382, - "learning_rate": 2.3325756915900445e-07, - "loss": 0.133, - "step": 18351 - }, - { - "epoch": 0.93, - "grad_norm": 0.8488946851219393, - "learning_rate": 2.329040539935967e-07, - "loss": 0.1523, - "step": 18352 - }, - { - "epoch": 0.93, - "grad_norm": 0.8545552151939171, - "learning_rate": 2.3255080376121676e-07, - "loss": 0.1528, - "step": 18353 - }, - { - "epoch": 0.93, - "grad_norm": 1.7129768044688733, - "learning_rate": 2.3219781847144906e-07, - "loss": 0.1884, - "step": 18354 - }, - { - "epoch": 0.93, - "grad_norm": 1.2532969807361725, - "learning_rate": 2.3184509813386713e-07, - "loss": 0.1554, - "step": 18355 - }, - { - "epoch": 0.93, - "grad_norm": 0.865328382304051, - "learning_rate": 2.3149264275803884e-07, - "loss": 0.1481, - "step": 18356 - }, - { - "epoch": 0.93, - "grad_norm": 0.819398377750584, - "learning_rate": 2.3114045235352433e-07, - "loss": 0.1597, - "step": 18357 - }, - { - "epoch": 0.93, - "grad_norm": 0.9760997913944155, - "learning_rate": 2.3078852692987596e-07, - "loss": 0.1533, - "step": 18358 - }, - { - "epoch": 0.93, - "grad_norm": 0.8124370575612277, - "learning_rate": 2.3043686649664165e-07, - "loss": 0.149, - "step": 18359 - }, - { - "epoch": 0.93, - "grad_norm": 0.9997317244657435, - "learning_rate": 2.3008547106335822e-07, - "loss": 0.1624, - "step": 18360 - }, - { - "epoch": 0.93, - "grad_norm": 1.2057741927307049, - "learning_rate": 2.2973434063955802e-07, - "loss": 0.1463, - "step": 18361 - }, - { - "epoch": 0.93, - "grad_norm": 0.8137021469097535, - "learning_rate": 2.2938347523476568e-07, - "loss": 0.1533, - "step": 18362 - }, - { - "epoch": 0.93, - "grad_norm": 3.0808834168689883, - "learning_rate": 2.2903287485849913e-07, - "loss": 0.1568, - "step": 18363 - }, - { - "epoch": 0.93, - "grad_norm": 0.9410298125961627, - "learning_rate": 2.2868253952026741e-07, - "loss": 0.1491, - "step": 18364 - }, - { - "epoch": 0.93, - "grad_norm": 1.0671457063576877, - "learning_rate": 2.2833246922957408e-07, - "loss": 0.1649, - "step": 18365 - }, - { - "epoch": 0.93, - "grad_norm": 1.163398353724536, - "learning_rate": 2.2798266399591374e-07, - "loss": 0.1666, - "step": 18366 - }, - { - "epoch": 0.93, - "grad_norm": 0.893536336859922, - "learning_rate": 2.2763312382877656e-07, - "loss": 0.1493, - "step": 18367 - }, - { - "epoch": 0.93, - "grad_norm": 1.1466722066958055, - "learning_rate": 2.2728384873764276e-07, - "loss": 0.1667, - "step": 18368 - }, - { - "epoch": 0.93, - "grad_norm": 1.1088802038655343, - "learning_rate": 2.2693483873198584e-07, - "loss": 0.1415, - "step": 18369 - }, - { - "epoch": 0.93, - "grad_norm": 1.1395079425027594, - "learning_rate": 2.2658609382127384e-07, - "loss": 0.1837, - "step": 18370 - }, - { - "epoch": 0.93, - "grad_norm": 1.0489862597909092, - "learning_rate": 2.2623761401496468e-07, - "loss": 0.1582, - "step": 18371 - }, - { - "epoch": 0.93, - "grad_norm": 1.030787526510104, - "learning_rate": 2.2588939932251418e-07, - "loss": 0.1592, - "step": 18372 - }, - { - "epoch": 0.93, - "grad_norm": 0.9229346827535737, - "learning_rate": 2.2554144975336477e-07, - "loss": 0.1509, - "step": 18373 - }, - { - "epoch": 0.93, - "grad_norm": 1.2346282121275605, - "learning_rate": 2.251937653169567e-07, - "loss": 0.1606, - "step": 18374 - }, - { - "epoch": 0.93, - "grad_norm": 0.8715933060605532, - "learning_rate": 2.2484634602271794e-07, - "loss": 0.1683, - "step": 18375 - }, - { - "epoch": 0.93, - "grad_norm": 0.941253110893302, - "learning_rate": 2.244991918800754e-07, - "loss": 0.1521, - "step": 18376 - }, - { - "epoch": 0.93, - "grad_norm": 0.9266730919291768, - "learning_rate": 2.2415230289844382e-07, - "loss": 0.1589, - "step": 18377 - }, - { - "epoch": 0.93, - "grad_norm": 0.9771410611139677, - "learning_rate": 2.238056790872345e-07, - "loss": 0.1724, - "step": 18378 - }, - { - "epoch": 0.93, - "grad_norm": 1.5235936206743117, - "learning_rate": 2.2345932045584662e-07, - "loss": 0.196, - "step": 18379 - }, - { - "epoch": 0.93, - "grad_norm": 0.9841273240110063, - "learning_rate": 2.231132270136771e-07, - "loss": 0.1763, - "step": 18380 - }, - { - "epoch": 0.93, - "grad_norm": 1.2765539802867825, - "learning_rate": 2.22767398770114e-07, - "loss": 0.155, - "step": 18381 - }, - { - "epoch": 0.93, - "grad_norm": 1.1186553643443082, - "learning_rate": 2.2242183573453756e-07, - "loss": 0.1868, - "step": 18382 - }, - { - "epoch": 0.93, - "grad_norm": 1.0287318642723777, - "learning_rate": 2.2207653791632035e-07, - "loss": 0.1417, - "step": 18383 - }, - { - "epoch": 0.93, - "grad_norm": 1.3699336996747302, - "learning_rate": 2.2173150532483035e-07, - "loss": 0.1648, - "step": 18384 - }, - { - "epoch": 0.93, - "grad_norm": 0.9571519442266735, - "learning_rate": 2.2138673796942457e-07, - "loss": 0.1568, - "step": 18385 - }, - { - "epoch": 0.93, - "grad_norm": 2.2746480790935992, - "learning_rate": 2.210422358594566e-07, - "loss": 0.167, - "step": 18386 - }, - { - "epoch": 0.94, - "grad_norm": 1.106230206821423, - "learning_rate": 2.2069799900427014e-07, - "loss": 0.1719, - "step": 18387 - }, - { - "epoch": 0.94, - "grad_norm": 2.791480398398672, - "learning_rate": 2.203540274132021e-07, - "loss": 0.149, - "step": 18388 - }, - { - "epoch": 0.94, - "grad_norm": 1.0617422537257237, - "learning_rate": 2.2001032109558396e-07, - "loss": 0.1616, - "step": 18389 - }, - { - "epoch": 0.94, - "grad_norm": 1.8104952562718482, - "learning_rate": 2.196668800607382e-07, - "loss": 0.1635, - "step": 18390 - }, - { - "epoch": 0.94, - "grad_norm": 0.9657527974823339, - "learning_rate": 2.1932370431798077e-07, - "loss": 0.1476, - "step": 18391 - }, - { - "epoch": 0.94, - "grad_norm": 0.983063383851243, - "learning_rate": 2.1898079387662085e-07, - "loss": 0.1919, - "step": 18392 - }, - { - "epoch": 0.94, - "grad_norm": 2.6038285199676197, - "learning_rate": 2.186381487459588e-07, - "loss": 0.1672, - "step": 18393 - }, - { - "epoch": 0.94, - "grad_norm": 0.8242832192874088, - "learning_rate": 2.1829576893528938e-07, - "loss": 0.1552, - "step": 18394 - }, - { - "epoch": 0.94, - "grad_norm": 2.072684200243416, - "learning_rate": 2.179536544538996e-07, - "loss": 0.1466, - "step": 18395 - }, - { - "epoch": 0.94, - "grad_norm": 0.9253757492899954, - "learning_rate": 2.1761180531106873e-07, - "loss": 0.1728, - "step": 18396 - }, - { - "epoch": 0.94, - "grad_norm": 1.066376706831964, - "learning_rate": 2.1727022151607046e-07, - "loss": 0.1488, - "step": 18397 - }, - { - "epoch": 0.94, - "grad_norm": 2.314431093429177, - "learning_rate": 2.1692890307817073e-07, - "loss": 0.1631, - "step": 18398 - }, - { - "epoch": 0.94, - "grad_norm": 0.8498934849664621, - "learning_rate": 2.1658785000662763e-07, - "loss": 0.1486, - "step": 18399 - }, - { - "epoch": 0.94, - "grad_norm": 1.0378436730938367, - "learning_rate": 2.1624706231068936e-07, - "loss": 0.1573, - "step": 18400 - }, - { - "epoch": 0.94, - "grad_norm": 0.8970051524119859, - "learning_rate": 2.159065399996041e-07, - "loss": 0.1624, - "step": 18401 - }, - { - "epoch": 0.94, - "grad_norm": 0.9922525353722269, - "learning_rate": 2.1556628308260552e-07, - "loss": 0.1818, - "step": 18402 - }, - { - "epoch": 0.94, - "grad_norm": 0.8332483233289575, - "learning_rate": 2.1522629156892516e-07, - "loss": 0.1438, - "step": 18403 - }, - { - "epoch": 0.94, - "grad_norm": 0.8701610657938882, - "learning_rate": 2.1488656546778342e-07, - "loss": 0.1745, - "step": 18404 - }, - { - "epoch": 0.94, - "grad_norm": 0.9793402914905001, - "learning_rate": 2.1454710478839736e-07, - "loss": 0.174, - "step": 18405 - }, - { - "epoch": 0.94, - "grad_norm": 2.817119570072124, - "learning_rate": 2.1420790953997294e-07, - "loss": 0.1722, - "step": 18406 - }, - { - "epoch": 0.94, - "grad_norm": 1.782561768407933, - "learning_rate": 2.138689797317106e-07, - "loss": 0.1741, - "step": 18407 - }, - { - "epoch": 0.94, - "grad_norm": 1.4030966551078101, - "learning_rate": 2.1353031537280743e-07, - "loss": 0.1519, - "step": 18408 - }, - { - "epoch": 0.94, - "grad_norm": 1.4607777385501066, - "learning_rate": 2.1319191647244497e-07, - "loss": 0.1799, - "step": 18409 - }, - { - "epoch": 0.94, - "grad_norm": 0.9416992136910253, - "learning_rate": 2.1285378303980585e-07, - "loss": 0.1441, - "step": 18410 - }, - { - "epoch": 0.94, - "grad_norm": 0.8708030270127827, - "learning_rate": 2.1251591508405945e-07, - "loss": 0.1561, - "step": 18411 - }, - { - "epoch": 0.94, - "grad_norm": 1.04718883719092, - "learning_rate": 2.1217831261437283e-07, - "loss": 0.1471, - "step": 18412 - }, - { - "epoch": 0.94, - "grad_norm": 1.4080305099714423, - "learning_rate": 2.1184097563990204e-07, - "loss": 0.1692, - "step": 18413 - }, - { - "epoch": 0.94, - "grad_norm": 1.3780724995478995, - "learning_rate": 2.115039041697964e-07, - "loss": 0.1723, - "step": 18414 - }, - { - "epoch": 0.94, - "grad_norm": 1.0810492064587849, - "learning_rate": 2.111670982132008e-07, - "loss": 0.1642, - "step": 18415 - }, - { - "epoch": 0.94, - "grad_norm": 1.4676322049548796, - "learning_rate": 2.1083055777925244e-07, - "loss": 0.1595, - "step": 18416 - }, - { - "epoch": 0.94, - "grad_norm": 1.3846819242414135, - "learning_rate": 2.104942828770762e-07, - "loss": 0.172, - "step": 18417 - }, - { - "epoch": 0.94, - "grad_norm": 1.007773102930094, - "learning_rate": 2.1015827351579588e-07, - "loss": 0.1845, - "step": 18418 - }, - { - "epoch": 0.94, - "grad_norm": 0.9008750167154151, - "learning_rate": 2.0982252970452532e-07, - "loss": 0.1526, - "step": 18419 - }, - { - "epoch": 0.94, - "grad_norm": 1.0345250458075697, - "learning_rate": 2.0948705145237168e-07, - "loss": 0.1622, - "step": 18420 - }, - { - "epoch": 0.94, - "grad_norm": 0.982428132994956, - "learning_rate": 2.0915183876843436e-07, - "loss": 0.1604, - "step": 18421 - }, - { - "epoch": 0.94, - "grad_norm": 0.9070283705308484, - "learning_rate": 2.0881689166180718e-07, - "loss": 0.1657, - "step": 18422 - }, - { - "epoch": 0.94, - "grad_norm": 1.2758911966430224, - "learning_rate": 2.0848221014157398e-07, - "loss": 0.1613, - "step": 18423 - }, - { - "epoch": 0.94, - "grad_norm": 1.1603708286280212, - "learning_rate": 2.081477942168142e-07, - "loss": 0.1639, - "step": 18424 - }, - { - "epoch": 0.94, - "grad_norm": 0.9503295180273585, - "learning_rate": 2.078136438965983e-07, - "loss": 0.1695, - "step": 18425 - }, - { - "epoch": 0.94, - "grad_norm": 1.0634378735733225, - "learning_rate": 2.0747975918999018e-07, - "loss": 0.1596, - "step": 18426 - }, - { - "epoch": 0.94, - "grad_norm": 1.3307297893329564, - "learning_rate": 2.0714614010604815e-07, - "loss": 0.1695, - "step": 18427 - }, - { - "epoch": 0.94, - "grad_norm": 1.3323480771188736, - "learning_rate": 2.0681278665381833e-07, - "loss": 0.145, - "step": 18428 - }, - { - "epoch": 0.94, - "grad_norm": 1.1547516123571364, - "learning_rate": 2.0647969884234676e-07, - "loss": 0.162, - "step": 18429 - }, - { - "epoch": 0.94, - "grad_norm": 1.032996754590199, - "learning_rate": 2.0614687668066403e-07, - "loss": 0.161, - "step": 18430 - }, - { - "epoch": 0.94, - "grad_norm": 1.3829844629918333, - "learning_rate": 2.058143201778029e-07, - "loss": 0.1819, - "step": 18431 - }, - { - "epoch": 0.94, - "grad_norm": 1.6345825829161515, - "learning_rate": 2.054820293427795e-07, - "loss": 0.1622, - "step": 18432 - }, - { - "epoch": 0.94, - "grad_norm": 1.13058969646442, - "learning_rate": 2.0515000418460995e-07, - "loss": 0.1688, - "step": 18433 - }, - { - "epoch": 0.94, - "grad_norm": 1.1264091710524033, - "learning_rate": 2.0481824471229927e-07, - "loss": 0.126, - "step": 18434 - }, - { - "epoch": 0.94, - "grad_norm": 0.9524093713904969, - "learning_rate": 2.0448675093484805e-07, - "loss": 0.1369, - "step": 18435 - }, - { - "epoch": 0.94, - "grad_norm": 1.0332060689778997, - "learning_rate": 2.0415552286124685e-07, - "loss": 0.164, - "step": 18436 - }, - { - "epoch": 0.94, - "grad_norm": 1.154708584588988, - "learning_rate": 2.0382456050048073e-07, - "loss": 0.1745, - "step": 18437 - }, - { - "epoch": 0.94, - "grad_norm": 1.9288602652094318, - "learning_rate": 2.034938638615247e-07, - "loss": 0.1668, - "step": 18438 - }, - { - "epoch": 0.94, - "grad_norm": 1.0836570479763294, - "learning_rate": 2.0316343295335272e-07, - "loss": 0.1602, - "step": 18439 - }, - { - "epoch": 0.94, - "grad_norm": 1.0766862097296297, - "learning_rate": 2.028332677849254e-07, - "loss": 0.1405, - "step": 18440 - }, - { - "epoch": 0.94, - "grad_norm": 1.012468415541933, - "learning_rate": 2.025033683651989e-07, - "loss": 0.1652, - "step": 18441 - }, - { - "epoch": 0.94, - "grad_norm": 2.088899367723937, - "learning_rate": 2.0217373470312275e-07, - "loss": 0.163, - "step": 18442 - }, - { - "epoch": 0.94, - "grad_norm": 1.1267366751772347, - "learning_rate": 2.018443668076364e-07, - "loss": 0.158, - "step": 18443 - }, - { - "epoch": 0.94, - "grad_norm": 1.1178922069284865, - "learning_rate": 2.0151526468767502e-07, - "loss": 0.1898, - "step": 18444 - }, - { - "epoch": 0.94, - "grad_norm": 0.9563470216819234, - "learning_rate": 2.0118642835216584e-07, - "loss": 0.1625, - "step": 18445 - }, - { - "epoch": 0.94, - "grad_norm": 0.9093202957962132, - "learning_rate": 2.0085785781002843e-07, - "loss": 0.1569, - "step": 18446 - }, - { - "epoch": 0.94, - "grad_norm": 1.0411398327926424, - "learning_rate": 2.005295530701745e-07, - "loss": 0.1465, - "step": 18447 - }, - { - "epoch": 0.94, - "grad_norm": 1.2165271777702973, - "learning_rate": 2.0020151414151146e-07, - "loss": 0.1421, - "step": 18448 - }, - { - "epoch": 0.94, - "grad_norm": 1.2697362647353578, - "learning_rate": 1.9987374103293433e-07, - "loss": 0.1722, - "step": 18449 - }, - { - "epoch": 0.94, - "grad_norm": 0.9059765341830827, - "learning_rate": 1.9954623375333493e-07, - "loss": 0.1608, - "step": 18450 - }, - { - "epoch": 0.94, - "grad_norm": 0.9790695354920086, - "learning_rate": 1.9921899231159836e-07, - "loss": 0.1538, - "step": 18451 - }, - { - "epoch": 0.94, - "grad_norm": 0.7967358277210099, - "learning_rate": 1.9889201671660084e-07, - "loss": 0.1439, - "step": 18452 - }, - { - "epoch": 0.94, - "grad_norm": 1.1676863492777445, - "learning_rate": 1.9856530697720976e-07, - "loss": 0.1732, - "step": 18453 - }, - { - "epoch": 0.94, - "grad_norm": 0.9026498634322633, - "learning_rate": 1.9823886310228911e-07, - "loss": 0.1515, - "step": 18454 - }, - { - "epoch": 0.94, - "grad_norm": 1.5898085358785705, - "learning_rate": 1.9791268510069184e-07, - "loss": 0.1791, - "step": 18455 - }, - { - "epoch": 0.94, - "grad_norm": 0.8838055911204667, - "learning_rate": 1.975867729812686e-07, - "loss": 0.1318, - "step": 18456 - }, - { - "epoch": 0.94, - "grad_norm": 0.8962589559141396, - "learning_rate": 1.9726112675285568e-07, - "loss": 0.1487, - "step": 18457 - }, - { - "epoch": 0.94, - "grad_norm": 1.1455441239812183, - "learning_rate": 1.9693574642428935e-07, - "loss": 0.1594, - "step": 18458 - }, - { - "epoch": 0.94, - "grad_norm": 0.8697554124013273, - "learning_rate": 1.9661063200439478e-07, - "loss": 0.1747, - "step": 18459 - }, - { - "epoch": 0.94, - "grad_norm": 0.8473698779305922, - "learning_rate": 1.9628578350198933e-07, - "loss": 0.1416, - "step": 18460 - }, - { - "epoch": 0.94, - "grad_norm": 1.1622171600184905, - "learning_rate": 1.959612009258871e-07, - "loss": 0.1898, - "step": 18461 - }, - { - "epoch": 0.94, - "grad_norm": 1.9635651441512039, - "learning_rate": 1.9563688428489103e-07, - "loss": 0.1467, - "step": 18462 - }, - { - "epoch": 0.94, - "grad_norm": 0.9709659456803279, - "learning_rate": 1.953128335877974e-07, - "loss": 0.1551, - "step": 18463 - }, - { - "epoch": 0.94, - "grad_norm": 1.1570414393860904, - "learning_rate": 1.9498904884339697e-07, - "loss": 0.1885, - "step": 18464 - }, - { - "epoch": 0.94, - "grad_norm": 1.0220800299049926, - "learning_rate": 1.9466553006047383e-07, - "loss": 0.1556, - "step": 18465 - }, - { - "epoch": 0.94, - "grad_norm": 1.17271748461141, - "learning_rate": 1.9434227724779987e-07, - "loss": 0.1475, - "step": 18466 - }, - { - "epoch": 0.94, - "grad_norm": 0.9698973968273322, - "learning_rate": 1.9401929041414692e-07, - "loss": 0.2208, - "step": 18467 - }, - { - "epoch": 0.94, - "grad_norm": 0.9539471986986922, - "learning_rate": 1.9369656956827355e-07, - "loss": 0.1628, - "step": 18468 - }, - { - "epoch": 0.94, - "grad_norm": 1.0196693761635764, - "learning_rate": 1.9337411471893498e-07, - "loss": 0.1703, - "step": 18469 - }, - { - "epoch": 0.94, - "grad_norm": 1.1934751615375507, - "learning_rate": 1.9305192587487753e-07, - "loss": 0.1462, - "step": 18470 - }, - { - "epoch": 0.94, - "grad_norm": 1.199618048089345, - "learning_rate": 1.927300030448409e-07, - "loss": 0.1973, - "step": 18471 - }, - { - "epoch": 0.94, - "grad_norm": 1.1152576551484823, - "learning_rate": 1.924083462375559e-07, - "loss": 0.1673, - "step": 18472 - }, - { - "epoch": 0.94, - "grad_norm": 1.0406551584167096, - "learning_rate": 1.9208695546174994e-07, - "loss": 0.1679, - "step": 18473 - }, - { - "epoch": 0.94, - "grad_norm": 1.1662476783946836, - "learning_rate": 1.917658307261383e-07, - "loss": 0.1899, - "step": 18474 - }, - { - "epoch": 0.94, - "grad_norm": 1.0191715895198943, - "learning_rate": 1.914449720394329e-07, - "loss": 0.1636, - "step": 18475 - }, - { - "epoch": 0.94, - "grad_norm": 1.1530279147333242, - "learning_rate": 1.9112437941033567e-07, - "loss": 0.1574, - "step": 18476 - }, - { - "epoch": 0.94, - "grad_norm": 1.1700995784032147, - "learning_rate": 1.908040528475441e-07, - "loss": 0.1662, - "step": 18477 - }, - { - "epoch": 0.94, - "grad_norm": 1.1758161723996137, - "learning_rate": 1.904839923597468e-07, - "loss": 0.1666, - "step": 18478 - }, - { - "epoch": 0.94, - "grad_norm": 1.2056737356424008, - "learning_rate": 1.9016419795562568e-07, - "loss": 0.177, - "step": 18479 - }, - { - "epoch": 0.94, - "grad_norm": 0.8871261305872965, - "learning_rate": 1.8984466964385384e-07, - "loss": 0.1552, - "step": 18480 - }, - { - "epoch": 0.94, - "grad_norm": 1.878197883112823, - "learning_rate": 1.8952540743309988e-07, - "loss": 0.1556, - "step": 18481 - }, - { - "epoch": 0.94, - "grad_norm": 1.0543218480340648, - "learning_rate": 1.8920641133202356e-07, - "loss": 0.1494, - "step": 18482 - }, - { - "epoch": 0.94, - "grad_norm": 0.941396341109944, - "learning_rate": 1.888876813492768e-07, - "loss": 0.1685, - "step": 18483 - }, - { - "epoch": 0.94, - "grad_norm": 0.9301788089742421, - "learning_rate": 1.8856921749350608e-07, - "loss": 0.1506, - "step": 18484 - }, - { - "epoch": 0.94, - "grad_norm": 1.1855751996628046, - "learning_rate": 1.8825101977334891e-07, - "loss": 0.1714, - "step": 18485 - }, - { - "epoch": 0.94, - "grad_norm": 1.7965655623295367, - "learning_rate": 1.8793308819743837e-07, - "loss": 0.1793, - "step": 18486 - }, - { - "epoch": 0.94, - "grad_norm": 1.0720094037637893, - "learning_rate": 1.8761542277439648e-07, - "loss": 0.1738, - "step": 18487 - }, - { - "epoch": 0.94, - "grad_norm": 1.4204108111036178, - "learning_rate": 1.8729802351284077e-07, - "loss": 0.1427, - "step": 18488 - }, - { - "epoch": 0.94, - "grad_norm": 1.0684230576153166, - "learning_rate": 1.869808904213799e-07, - "loss": 0.1785, - "step": 18489 - }, - { - "epoch": 0.94, - "grad_norm": 0.8876069325807378, - "learning_rate": 1.8666402350861701e-07, - "loss": 0.1544, - "step": 18490 - }, - { - "epoch": 0.94, - "grad_norm": 1.6348599888108668, - "learning_rate": 1.8634742278314632e-07, - "loss": 0.1696, - "step": 18491 - }, - { - "epoch": 0.94, - "grad_norm": 1.1283463049697549, - "learning_rate": 1.8603108825355654e-07, - "loss": 0.1829, - "step": 18492 - }, - { - "epoch": 0.94, - "grad_norm": 1.0989175147355235, - "learning_rate": 1.8571501992842634e-07, - "loss": 0.1541, - "step": 18493 - }, - { - "epoch": 0.94, - "grad_norm": 1.5485827407936898, - "learning_rate": 1.8539921781633107e-07, - "loss": 0.1574, - "step": 18494 - }, - { - "epoch": 0.94, - "grad_norm": 1.2035629459141746, - "learning_rate": 1.8508368192583838e-07, - "loss": 0.1599, - "step": 18495 - }, - { - "epoch": 0.94, - "grad_norm": 1.175110774320181, - "learning_rate": 1.8476841226550247e-07, - "loss": 0.166, - "step": 18496 - }, - { - "epoch": 0.94, - "grad_norm": 0.8648008632983784, - "learning_rate": 1.8445340884387986e-07, - "loss": 0.1555, - "step": 18497 - }, - { - "epoch": 0.94, - "grad_norm": 1.186022566237658, - "learning_rate": 1.841386716695115e-07, - "loss": 0.1621, - "step": 18498 - }, - { - "epoch": 0.94, - "grad_norm": 0.966907537171716, - "learning_rate": 1.8382420075093722e-07, - "loss": 0.1663, - "step": 18499 - }, - { - "epoch": 0.94, - "grad_norm": 1.35987273141365, - "learning_rate": 1.8350999609668462e-07, - "loss": 0.1931, - "step": 18500 - }, - { - "epoch": 0.94, - "grad_norm": 1.7185887664553376, - "learning_rate": 1.8319605771527916e-07, - "loss": 0.1508, - "step": 18501 - }, - { - "epoch": 0.94, - "grad_norm": 0.9654463459049943, - "learning_rate": 1.8288238561523397e-07, - "loss": 0.1851, - "step": 18502 - }, - { - "epoch": 0.94, - "grad_norm": 1.5191769294067194, - "learning_rate": 1.8256897980505895e-07, - "loss": 0.1786, - "step": 18503 - }, - { - "epoch": 0.94, - "grad_norm": 1.0547956256766307, - "learning_rate": 1.8225584029325394e-07, - "loss": 0.1451, - "step": 18504 - }, - { - "epoch": 0.94, - "grad_norm": 0.9602419545290328, - "learning_rate": 1.8194296708831548e-07, - "loss": 0.1654, - "step": 18505 - }, - { - "epoch": 0.94, - "grad_norm": 0.9905278074256648, - "learning_rate": 1.8163036019872682e-07, - "loss": 0.1699, - "step": 18506 - }, - { - "epoch": 0.94, - "grad_norm": 0.9878465962130885, - "learning_rate": 1.8131801963297112e-07, - "loss": 0.1574, - "step": 18507 - }, - { - "epoch": 0.94, - "grad_norm": 2.202120778350107, - "learning_rate": 1.810059453995172e-07, - "loss": 0.1595, - "step": 18508 - }, - { - "epoch": 0.94, - "grad_norm": 1.5424916385881604, - "learning_rate": 1.8069413750683274e-07, - "loss": 0.1531, - "step": 18509 - }, - { - "epoch": 0.94, - "grad_norm": 1.2032151268116966, - "learning_rate": 1.8038259596337316e-07, - "loss": 0.1527, - "step": 18510 - }, - { - "epoch": 0.94, - "grad_norm": 0.9692901581171245, - "learning_rate": 1.8007132077759059e-07, - "loss": 0.1611, - "step": 18511 - }, - { - "epoch": 0.94, - "grad_norm": 1.2384022796992629, - "learning_rate": 1.7976031195792942e-07, - "loss": 0.1616, - "step": 18512 - }, - { - "epoch": 0.94, - "grad_norm": 0.9518845029745795, - "learning_rate": 1.794495695128229e-07, - "loss": 0.1513, - "step": 18513 - }, - { - "epoch": 0.94, - "grad_norm": 1.3445048561231276, - "learning_rate": 1.7913909345070202e-07, - "loss": 0.1584, - "step": 18514 - }, - { - "epoch": 0.94, - "grad_norm": 0.8910941735915785, - "learning_rate": 1.7882888377998787e-07, - "loss": 0.1346, - "step": 18515 - }, - { - "epoch": 0.94, - "grad_norm": 1.058905833198794, - "learning_rate": 1.7851894050909479e-07, - "loss": 0.1772, - "step": 18516 - }, - { - "epoch": 0.94, - "grad_norm": 0.889457926665445, - "learning_rate": 1.7820926364643054e-07, - "loss": 0.1433, - "step": 18517 - }, - { - "epoch": 0.94, - "grad_norm": 1.1393201867917613, - "learning_rate": 1.7789985320039505e-07, - "loss": 0.1922, - "step": 18518 - }, - { - "epoch": 0.94, - "grad_norm": 1.1461805626320478, - "learning_rate": 1.7759070917937937e-07, - "loss": 0.1691, - "step": 18519 - }, - { - "epoch": 0.94, - "grad_norm": 1.9552372498782165, - "learning_rate": 1.7728183159177126e-07, - "loss": 0.1574, - "step": 18520 - }, - { - "epoch": 0.94, - "grad_norm": 1.317634139784253, - "learning_rate": 1.7697322044594846e-07, - "loss": 0.1621, - "step": 18521 - }, - { - "epoch": 0.94, - "grad_norm": 1.5026527540251304, - "learning_rate": 1.76664875750282e-07, - "loss": 0.1535, - "step": 18522 - }, - { - "epoch": 0.94, - "grad_norm": 0.9529695966362643, - "learning_rate": 1.7635679751313529e-07, - "loss": 0.1604, - "step": 18523 - }, - { - "epoch": 0.94, - "grad_norm": 1.362781251116253, - "learning_rate": 1.7604898574286488e-07, - "loss": 0.1676, - "step": 18524 - }, - { - "epoch": 0.94, - "grad_norm": 1.794298494281626, - "learning_rate": 1.7574144044782083e-07, - "loss": 0.1632, - "step": 18525 - }, - { - "epoch": 0.94, - "grad_norm": 1.3543147984644823, - "learning_rate": 1.754341616363464e-07, - "loss": 0.1539, - "step": 18526 - }, - { - "epoch": 0.94, - "grad_norm": 1.8098682812727673, - "learning_rate": 1.7512714931677387e-07, - "loss": 0.1744, - "step": 18527 - }, - { - "epoch": 0.94, - "grad_norm": 1.0965203264417904, - "learning_rate": 1.7482040349743323e-07, - "loss": 0.1719, - "step": 18528 - }, - { - "epoch": 0.94, - "grad_norm": 1.7137149759900159, - "learning_rate": 1.7451392418664227e-07, - "loss": 0.1512, - "step": 18529 - }, - { - "epoch": 0.94, - "grad_norm": 0.8312516693775386, - "learning_rate": 1.7420771139271765e-07, - "loss": 0.1578, - "step": 18530 - }, - { - "epoch": 0.94, - "grad_norm": 1.941353238215755, - "learning_rate": 1.7390176512396384e-07, - "loss": 0.1575, - "step": 18531 - }, - { - "epoch": 0.94, - "grad_norm": 0.9570430798627169, - "learning_rate": 1.7359608538867868e-07, - "loss": 0.164, - "step": 18532 - }, - { - "epoch": 0.94, - "grad_norm": 1.2019136658490237, - "learning_rate": 1.732906721951555e-07, - "loss": 0.1681, - "step": 18533 - }, - { - "epoch": 0.94, - "grad_norm": 2.6202511232946657, - "learning_rate": 1.729855255516777e-07, - "loss": 0.1778, - "step": 18534 - }, - { - "epoch": 0.94, - "grad_norm": 1.134760261935929, - "learning_rate": 1.7268064546652308e-07, - "loss": 0.1998, - "step": 18535 - }, - { - "epoch": 0.94, - "grad_norm": 0.8102705538864348, - "learning_rate": 1.7237603194795948e-07, - "loss": 0.1581, - "step": 18536 - }, - { - "epoch": 0.94, - "grad_norm": 0.8801215439384064, - "learning_rate": 1.7207168500425142e-07, - "loss": 0.1523, - "step": 18537 - }, - { - "epoch": 0.94, - "grad_norm": 0.8586275430989962, - "learning_rate": 1.7176760464365449e-07, - "loss": 0.1519, - "step": 18538 - }, - { - "epoch": 0.94, - "grad_norm": 1.9018279699676834, - "learning_rate": 1.7146379087441655e-07, - "loss": 0.1914, - "step": 18539 - }, - { - "epoch": 0.94, - "grad_norm": 1.5663057154553512, - "learning_rate": 1.711602437047788e-07, - "loss": 0.1854, - "step": 18540 - }, - { - "epoch": 0.94, - "grad_norm": 1.0250812414102985, - "learning_rate": 1.708569631429746e-07, - "loss": 0.1579, - "step": 18541 - }, - { - "epoch": 0.94, - "grad_norm": 1.4051658931794355, - "learning_rate": 1.7055394919722856e-07, - "loss": 0.1577, - "step": 18542 - }, - { - "epoch": 0.94, - "grad_norm": 0.7920877521485331, - "learning_rate": 1.7025120187576406e-07, - "loss": 0.1664, - "step": 18543 - }, - { - "epoch": 0.94, - "grad_norm": 1.0138473039450129, - "learning_rate": 1.6994872118679006e-07, - "loss": 0.1551, - "step": 18544 - }, - { - "epoch": 0.94, - "grad_norm": 0.8477002854411233, - "learning_rate": 1.6964650713851228e-07, - "loss": 0.1292, - "step": 18545 - }, - { - "epoch": 0.94, - "grad_norm": 1.2484301198202659, - "learning_rate": 1.6934455973912744e-07, - "loss": 0.1529, - "step": 18546 - }, - { - "epoch": 0.94, - "grad_norm": 6.576777670975822, - "learning_rate": 1.690428789968268e-07, - "loss": 0.1777, - "step": 18547 - }, - { - "epoch": 0.94, - "grad_norm": 0.91852055430131, - "learning_rate": 1.6874146491979493e-07, - "loss": 0.1524, - "step": 18548 - }, - { - "epoch": 0.94, - "grad_norm": 0.9500129432796155, - "learning_rate": 1.6844031751620414e-07, - "loss": 0.1788, - "step": 18549 - }, - { - "epoch": 0.94, - "grad_norm": 1.7767599888761454, - "learning_rate": 1.6813943679422684e-07, - "loss": 0.1486, - "step": 18550 - }, - { - "epoch": 0.94, - "grad_norm": 0.9680141309651069, - "learning_rate": 1.678388227620209e-07, - "loss": 0.1601, - "step": 18551 - }, - { - "epoch": 0.94, - "grad_norm": 1.0630914404291878, - "learning_rate": 1.6753847542774315e-07, - "loss": 0.1498, - "step": 18552 - }, - { - "epoch": 0.94, - "grad_norm": 1.5211018887090226, - "learning_rate": 1.6723839479953929e-07, - "loss": 0.1689, - "step": 18553 - }, - { - "epoch": 0.94, - "grad_norm": 2.468104798877948, - "learning_rate": 1.669385808855495e-07, - "loss": 0.1652, - "step": 18554 - }, - { - "epoch": 0.94, - "grad_norm": 1.001087755929558, - "learning_rate": 1.666390336939061e-07, - "loss": 0.1736, - "step": 18555 - }, - { - "epoch": 0.94, - "grad_norm": 0.8453852231440885, - "learning_rate": 1.6633975323273376e-07, - "loss": 0.1438, - "step": 18556 - }, - { - "epoch": 0.94, - "grad_norm": 1.0352472415273843, - "learning_rate": 1.6604073951015154e-07, - "loss": 0.1709, - "step": 18557 - }, - { - "epoch": 0.94, - "grad_norm": 0.9459021394612915, - "learning_rate": 1.6574199253426958e-07, - "loss": 0.1691, - "step": 18558 - }, - { - "epoch": 0.94, - "grad_norm": 1.8641474095406372, - "learning_rate": 1.6544351231319145e-07, - "loss": 0.1436, - "step": 18559 - }, - { - "epoch": 0.94, - "grad_norm": 1.1152650363612875, - "learning_rate": 1.6514529885501397e-07, - "loss": 0.1423, - "step": 18560 - }, - { - "epoch": 0.94, - "grad_norm": 1.2606785611258506, - "learning_rate": 1.648473521678251e-07, - "loss": 0.1796, - "step": 18561 - }, - { - "epoch": 0.94, - "grad_norm": 1.298872067227049, - "learning_rate": 1.645496722597084e-07, - "loss": 0.1715, - "step": 18562 - }, - { - "epoch": 0.94, - "grad_norm": 0.8674647051865187, - "learning_rate": 1.642522591387352e-07, - "loss": 0.1452, - "step": 18563 - }, - { - "epoch": 0.94, - "grad_norm": 1.0411258487676804, - "learning_rate": 1.6395511281297682e-07, - "loss": 0.1454, - "step": 18564 - }, - { - "epoch": 0.94, - "grad_norm": 1.2294606917015065, - "learning_rate": 1.6365823329049124e-07, - "loss": 0.1551, - "step": 18565 - }, - { - "epoch": 0.94, - "grad_norm": 1.2626410672708395, - "learning_rate": 1.633616205793309e-07, - "loss": 0.1647, - "step": 18566 - }, - { - "epoch": 0.94, - "grad_norm": 0.8820935476550902, - "learning_rate": 1.6306527468754384e-07, - "loss": 0.1538, - "step": 18567 - }, - { - "epoch": 0.94, - "grad_norm": 1.076423859688967, - "learning_rate": 1.6276919562316475e-07, - "loss": 0.1607, - "step": 18568 - }, - { - "epoch": 0.94, - "grad_norm": 1.058599832374519, - "learning_rate": 1.6247338339422823e-07, - "loss": 0.1652, - "step": 18569 - }, - { - "epoch": 0.94, - "grad_norm": 0.9862983112602691, - "learning_rate": 1.6217783800875576e-07, - "loss": 0.1525, - "step": 18570 - }, - { - "epoch": 0.94, - "grad_norm": 1.1039987539964633, - "learning_rate": 1.618825594747664e-07, - "loss": 0.1691, - "step": 18571 - }, - { - "epoch": 0.94, - "grad_norm": 1.1355796680292811, - "learning_rate": 1.615875478002671e-07, - "loss": 0.1444, - "step": 18572 - }, - { - "epoch": 0.94, - "grad_norm": 1.1295020685609354, - "learning_rate": 1.6129280299326144e-07, - "loss": 0.1779, - "step": 18573 - }, - { - "epoch": 0.94, - "grad_norm": 1.1228313989287582, - "learning_rate": 1.6099832506174419e-07, - "loss": 0.1544, - "step": 18574 - }, - { - "epoch": 0.94, - "grad_norm": 1.3138722407016137, - "learning_rate": 1.6070411401370335e-07, - "loss": 0.1783, - "step": 18575 - }, - { - "epoch": 0.94, - "grad_norm": 1.1715432450412604, - "learning_rate": 1.6041016985711923e-07, - "loss": 0.161, - "step": 18576 - }, - { - "epoch": 0.94, - "grad_norm": 1.3627767559615154, - "learning_rate": 1.6011649259996541e-07, - "loss": 0.172, - "step": 18577 - }, - { - "epoch": 0.94, - "grad_norm": 1.1528273504272677, - "learning_rate": 1.598230822502067e-07, - "loss": 0.1569, - "step": 18578 - }, - { - "epoch": 0.94, - "grad_norm": 1.5197766356269795, - "learning_rate": 1.5952993881580336e-07, - "loss": 0.1562, - "step": 18579 - }, - { - "epoch": 0.94, - "grad_norm": 1.6041305313914618, - "learning_rate": 1.592370623047046e-07, - "loss": 0.1507, - "step": 18580 - }, - { - "epoch": 0.94, - "grad_norm": 0.8985954018484826, - "learning_rate": 1.5894445272485736e-07, - "loss": 0.1629, - "step": 18581 - }, - { - "epoch": 0.94, - "grad_norm": 1.0707360930908472, - "learning_rate": 1.586521100841987e-07, - "loss": 0.1543, - "step": 18582 - }, - { - "epoch": 0.94, - "grad_norm": 1.3859762485376421, - "learning_rate": 1.583600343906566e-07, - "loss": 0.1772, - "step": 18583 - }, - { - "epoch": 0.95, - "grad_norm": 0.9957403277629844, - "learning_rate": 1.5806822565215373e-07, - "loss": 0.1627, - "step": 18584 - }, - { - "epoch": 0.95, - "grad_norm": 1.4046400140650128, - "learning_rate": 1.5777668387660706e-07, - "loss": 0.1851, - "step": 18585 - }, - { - "epoch": 0.95, - "grad_norm": 0.891535151011113, - "learning_rate": 1.5748540907192356e-07, - "loss": 0.2006, - "step": 18586 - }, - { - "epoch": 0.95, - "grad_norm": 1.5090297703983357, - "learning_rate": 1.571944012460036e-07, - "loss": 0.1712, - "step": 18587 - }, - { - "epoch": 0.95, - "grad_norm": 0.9003120205448941, - "learning_rate": 1.569036604067431e-07, - "loss": 0.1649, - "step": 18588 - }, - { - "epoch": 0.95, - "grad_norm": 1.1480753994260278, - "learning_rate": 1.566131865620246e-07, - "loss": 0.158, - "step": 18589 - }, - { - "epoch": 0.95, - "grad_norm": 0.9565359080976353, - "learning_rate": 1.5632297971972966e-07, - "loss": 0.1721, - "step": 18590 - }, - { - "epoch": 0.95, - "grad_norm": 1.0272957499468065, - "learning_rate": 1.5603303988773078e-07, - "loss": 0.1378, - "step": 18591 - }, - { - "epoch": 0.95, - "grad_norm": 0.9377209184491478, - "learning_rate": 1.5574336707389171e-07, - "loss": 0.1706, - "step": 18592 - }, - { - "epoch": 0.95, - "grad_norm": 0.7620016285666292, - "learning_rate": 1.554539612860695e-07, - "loss": 0.14, - "step": 18593 - }, - { - "epoch": 0.95, - "grad_norm": 0.9909527907118318, - "learning_rate": 1.551648225321145e-07, - "loss": 0.1582, - "step": 18594 - }, - { - "epoch": 0.95, - "grad_norm": 0.9335941141651837, - "learning_rate": 1.548759508198694e-07, - "loss": 0.1469, - "step": 18595 - }, - { - "epoch": 0.95, - "grad_norm": 0.7752699845301906, - "learning_rate": 1.545873461571712e-07, - "loss": 0.1461, - "step": 18596 - }, - { - "epoch": 0.95, - "grad_norm": 1.9913274331233224, - "learning_rate": 1.54299008551847e-07, - "loss": 0.1474, - "step": 18597 - }, - { - "epoch": 0.95, - "grad_norm": 1.2023120555067652, - "learning_rate": 1.5401093801171828e-07, - "loss": 0.1873, - "step": 18598 - }, - { - "epoch": 0.95, - "grad_norm": 1.071690745494057, - "learning_rate": 1.5372313454459887e-07, - "loss": 0.1433, - "step": 18599 - }, - { - "epoch": 0.95, - "grad_norm": 9.053479587695811, - "learning_rate": 1.5343559815829468e-07, - "loss": 0.1503, - "step": 18600 - }, - { - "epoch": 0.95, - "grad_norm": 0.9531335525572882, - "learning_rate": 1.5314832886060727e-07, - "loss": 0.1471, - "step": 18601 - }, - { - "epoch": 0.95, - "grad_norm": 1.036438135675386, - "learning_rate": 1.5286132665932706e-07, - "loss": 0.1672, - "step": 18602 - }, - { - "epoch": 0.95, - "grad_norm": 1.9933283290691903, - "learning_rate": 1.525745915622401e-07, - "loss": 0.1593, - "step": 18603 - }, - { - "epoch": 0.95, - "grad_norm": 1.1621345115927901, - "learning_rate": 1.5228812357712231e-07, - "loss": 0.1512, - "step": 18604 - }, - { - "epoch": 0.95, - "grad_norm": 0.860499166036676, - "learning_rate": 1.520019227117464e-07, - "loss": 0.167, - "step": 18605 - }, - { - "epoch": 0.95, - "grad_norm": 1.1558556579174626, - "learning_rate": 1.5171598897387395e-07, - "loss": 0.154, - "step": 18606 - }, - { - "epoch": 0.95, - "grad_norm": 0.9880628780813716, - "learning_rate": 1.514303223712621e-07, - "loss": 0.1725, - "step": 18607 - }, - { - "epoch": 0.95, - "grad_norm": 1.1171775767513268, - "learning_rate": 1.5114492291165794e-07, - "loss": 0.1747, - "step": 18608 - }, - { - "epoch": 0.95, - "grad_norm": 1.1338849876747592, - "learning_rate": 1.508597906028053e-07, - "loss": 0.1733, - "step": 18609 - }, - { - "epoch": 0.95, - "grad_norm": 1.1072484917924614, - "learning_rate": 1.505749254524358e-07, - "loss": 0.1475, - "step": 18610 - }, - { - "epoch": 0.95, - "grad_norm": 0.9919583007519222, - "learning_rate": 1.5029032746827875e-07, - "loss": 0.1575, - "step": 18611 - }, - { - "epoch": 0.95, - "grad_norm": 1.8184994189922, - "learning_rate": 1.500059966580525e-07, - "loss": 0.1411, - "step": 18612 - }, - { - "epoch": 0.95, - "grad_norm": 1.398520475493659, - "learning_rate": 1.497219330294708e-07, - "loss": 0.1708, - "step": 18613 - }, - { - "epoch": 0.95, - "grad_norm": 1.0964778320968296, - "learning_rate": 1.4943813659023753e-07, - "loss": 0.1693, - "step": 18614 - }, - { - "epoch": 0.95, - "grad_norm": 1.6528757245841295, - "learning_rate": 1.49154607348051e-07, - "loss": 0.1722, - "step": 18615 - }, - { - "epoch": 0.95, - "grad_norm": 0.9692735415387215, - "learning_rate": 1.4887134531060165e-07, - "loss": 0.1523, - "step": 18616 - }, - { - "epoch": 0.95, - "grad_norm": 1.0528475743073271, - "learning_rate": 1.485883504855734e-07, - "loss": 0.1507, - "step": 18617 - }, - { - "epoch": 0.95, - "grad_norm": 1.9032628322727292, - "learning_rate": 1.4830562288064344e-07, - "loss": 0.1719, - "step": 18618 - }, - { - "epoch": 0.95, - "grad_norm": 1.0427542611832965, - "learning_rate": 1.4802316250347893e-07, - "loss": 0.1806, - "step": 18619 - }, - { - "epoch": 0.95, - "grad_norm": 0.9516894573352137, - "learning_rate": 1.4774096936174376e-07, - "loss": 0.1616, - "step": 18620 - }, - { - "epoch": 0.95, - "grad_norm": 1.189078582059111, - "learning_rate": 1.474590434630907e-07, - "loss": 0.1558, - "step": 18621 - }, - { - "epoch": 0.95, - "grad_norm": 1.0663919954221222, - "learning_rate": 1.4717738481516808e-07, - "loss": 0.1742, - "step": 18622 - }, - { - "epoch": 0.95, - "grad_norm": 1.0112513802414314, - "learning_rate": 1.4689599342561423e-07, - "loss": 0.1508, - "step": 18623 - }, - { - "epoch": 0.95, - "grad_norm": 0.9221034568216941, - "learning_rate": 1.4661486930206415e-07, - "loss": 0.1558, - "step": 18624 - }, - { - "epoch": 0.95, - "grad_norm": 2.141316296046559, - "learning_rate": 1.4633401245214064e-07, - "loss": 0.156, - "step": 18625 - }, - { - "epoch": 0.95, - "grad_norm": 1.0125885082259565, - "learning_rate": 1.4605342288346536e-07, - "loss": 0.1609, - "step": 18626 - }, - { - "epoch": 0.95, - "grad_norm": 2.2259660659391147, - "learning_rate": 1.4577310060364558e-07, - "loss": 0.1641, - "step": 18627 - }, - { - "epoch": 0.95, - "grad_norm": 1.0070231515282992, - "learning_rate": 1.4549304562028966e-07, - "loss": 0.1706, - "step": 18628 - }, - { - "epoch": 0.95, - "grad_norm": 0.8620628643100052, - "learning_rate": 1.4521325794098928e-07, - "loss": 0.1614, - "step": 18629 - }, - { - "epoch": 0.95, - "grad_norm": 0.9305244147767624, - "learning_rate": 1.449337375733373e-07, - "loss": 0.1551, - "step": 18630 - }, - { - "epoch": 0.95, - "grad_norm": 2.5742305713151112, - "learning_rate": 1.4465448452491315e-07, - "loss": 0.1686, - "step": 18631 - }, - { - "epoch": 0.95, - "grad_norm": 1.2104559628853415, - "learning_rate": 1.4437549880329415e-07, - "loss": 0.16, - "step": 18632 - }, - { - "epoch": 0.95, - "grad_norm": 1.239220991219772, - "learning_rate": 1.4409678041604426e-07, - "loss": 0.1666, - "step": 18633 - }, - { - "epoch": 0.95, - "grad_norm": 0.786747929668593, - "learning_rate": 1.4381832937072737e-07, - "loss": 0.1394, - "step": 18634 - }, - { - "epoch": 0.95, - "grad_norm": 1.2145595739356847, - "learning_rate": 1.4354014567489528e-07, - "loss": 0.1649, - "step": 18635 - }, - { - "epoch": 0.95, - "grad_norm": 1.9754466948839298, - "learning_rate": 1.432622293360919e-07, - "loss": 0.1846, - "step": 18636 - }, - { - "epoch": 0.95, - "grad_norm": 1.0548122289678061, - "learning_rate": 1.4298458036185903e-07, - "loss": 0.1495, - "step": 18637 - }, - { - "epoch": 0.95, - "grad_norm": 1.403480202485536, - "learning_rate": 1.4270719875972506e-07, - "loss": 0.1592, - "step": 18638 - }, - { - "epoch": 0.95, - "grad_norm": 4.063392804506902, - "learning_rate": 1.424300845372162e-07, - "loss": 0.1699, - "step": 18639 - }, - { - "epoch": 0.95, - "grad_norm": 1.2431406279033599, - "learning_rate": 1.4215323770184642e-07, - "loss": 0.1733, - "step": 18640 - }, - { - "epoch": 0.95, - "grad_norm": 0.9805121919052685, - "learning_rate": 1.418766582611286e-07, - "loss": 0.1477, - "step": 18641 - }, - { - "epoch": 0.95, - "grad_norm": 1.1772098397613278, - "learning_rate": 1.4160034622256125e-07, - "loss": 0.1678, - "step": 18642 - }, - { - "epoch": 0.95, - "grad_norm": 1.2741303245522206, - "learning_rate": 1.4132430159364273e-07, - "loss": 0.154, - "step": 18643 - }, - { - "epoch": 0.95, - "grad_norm": 1.0401178606701769, - "learning_rate": 1.4104852438185823e-07, - "loss": 0.1571, - "step": 18644 - }, - { - "epoch": 0.95, - "grad_norm": 1.3489124060031128, - "learning_rate": 1.4077301459469062e-07, - "loss": 0.1594, - "step": 18645 - }, - { - "epoch": 0.95, - "grad_norm": 1.1101616198916429, - "learning_rate": 1.404977722396106e-07, - "loss": 0.1495, - "step": 18646 - }, - { - "epoch": 0.95, - "grad_norm": 1.266653077152881, - "learning_rate": 1.4022279732408661e-07, - "loss": 0.1544, - "step": 18647 - }, - { - "epoch": 0.95, - "grad_norm": 1.3044435715874434, - "learning_rate": 1.3994808985557497e-07, - "loss": 0.1595, - "step": 18648 - }, - { - "epoch": 0.95, - "grad_norm": 0.865404252667061, - "learning_rate": 1.3967364984152965e-07, - "loss": 0.1569, - "step": 18649 - }, - { - "epoch": 0.95, - "grad_norm": 1.1375610633591677, - "learning_rate": 1.393994772893925e-07, - "loss": 0.1546, - "step": 18650 - }, - { - "epoch": 0.95, - "grad_norm": 1.3318447589511289, - "learning_rate": 1.3912557220660206e-07, - "loss": 0.1477, - "step": 18651 - }, - { - "epoch": 0.95, - "grad_norm": 1.155841518237319, - "learning_rate": 1.3885193460058676e-07, - "loss": 0.1355, - "step": 18652 - }, - { - "epoch": 0.95, - "grad_norm": 1.148309023639851, - "learning_rate": 1.3857856447876962e-07, - "loss": 0.1764, - "step": 18653 - }, - { - "epoch": 0.95, - "grad_norm": 1.087533529247148, - "learning_rate": 1.3830546184856687e-07, - "loss": 0.1581, - "step": 18654 - }, - { - "epoch": 0.95, - "grad_norm": 1.167221914578398, - "learning_rate": 1.380326267173848e-07, - "loss": 0.1784, - "step": 18655 - }, - { - "epoch": 0.95, - "grad_norm": 0.8385862560019581, - "learning_rate": 1.3776005909262423e-07, - "loss": 0.1415, - "step": 18656 - }, - { - "epoch": 0.95, - "grad_norm": 1.1988900522919759, - "learning_rate": 1.374877589816792e-07, - "loss": 0.1595, - "step": 18657 - }, - { - "epoch": 0.95, - "grad_norm": 2.5518572845652296, - "learning_rate": 1.3721572639193714e-07, - "loss": 0.1569, - "step": 18658 - }, - { - "epoch": 0.95, - "grad_norm": 1.3188198072119175, - "learning_rate": 1.3694396133077436e-07, - "loss": 0.1524, - "step": 18659 - }, - { - "epoch": 0.95, - "grad_norm": 1.3273904922071935, - "learning_rate": 1.3667246380556386e-07, - "loss": 0.1603, - "step": 18660 - }, - { - "epoch": 0.95, - "grad_norm": 0.9632445855587911, - "learning_rate": 1.3640123382366977e-07, - "loss": 0.1464, - "step": 18661 - }, - { - "epoch": 0.95, - "grad_norm": 1.1982344972555086, - "learning_rate": 1.361302713924495e-07, - "loss": 0.1712, - "step": 18662 - }, - { - "epoch": 0.95, - "grad_norm": 2.628306264095215, - "learning_rate": 1.3585957651925274e-07, - "loss": 0.1512, - "step": 18663 - }, - { - "epoch": 0.95, - "grad_norm": 2.7487833080827206, - "learning_rate": 1.355891492114214e-07, - "loss": 0.1611, - "step": 18664 - }, - { - "epoch": 0.95, - "grad_norm": 0.9688560463974245, - "learning_rate": 1.3531898947629296e-07, - "loss": 0.161, - "step": 18665 - }, - { - "epoch": 0.95, - "grad_norm": 1.263410858922274, - "learning_rate": 1.3504909732119266e-07, - "loss": 0.1701, - "step": 18666 - }, - { - "epoch": 0.95, - "grad_norm": 1.545720766035433, - "learning_rate": 1.347794727534435e-07, - "loss": 0.1691, - "step": 18667 - }, - { - "epoch": 0.95, - "grad_norm": 1.280358835845226, - "learning_rate": 1.3451011578035856e-07, - "loss": 0.1576, - "step": 18668 - }, - { - "epoch": 0.95, - "grad_norm": 1.018370216604535, - "learning_rate": 1.3424102640924307e-07, - "loss": 0.1566, - "step": 18669 - }, - { - "epoch": 0.95, - "grad_norm": 3.649240179706749, - "learning_rate": 1.339722046473979e-07, - "loss": 0.1624, - "step": 18670 - }, - { - "epoch": 0.95, - "grad_norm": 1.585373607840606, - "learning_rate": 1.3370365050211387e-07, - "loss": 0.1445, - "step": 18671 - }, - { - "epoch": 0.95, - "grad_norm": 0.9880163861646013, - "learning_rate": 1.3343536398067513e-07, - "loss": 0.1682, - "step": 18672 - }, - { - "epoch": 0.95, - "grad_norm": 1.1272581002377124, - "learning_rate": 1.3316734509035922e-07, - "loss": 0.1606, - "step": 18673 - }, - { - "epoch": 0.95, - "grad_norm": 0.9879019391840964, - "learning_rate": 1.3289959383843698e-07, - "loss": 0.1414, - "step": 18674 - }, - { - "epoch": 0.95, - "grad_norm": 0.9941590979821857, - "learning_rate": 1.3263211023217038e-07, - "loss": 0.1523, - "step": 18675 - }, - { - "epoch": 0.95, - "grad_norm": 1.0029671973975969, - "learning_rate": 1.323648942788147e-07, - "loss": 0.1646, - "step": 18676 - }, - { - "epoch": 0.95, - "grad_norm": 0.9179981656127324, - "learning_rate": 1.3209794598561864e-07, - "loss": 0.1772, - "step": 18677 - }, - { - "epoch": 0.95, - "grad_norm": 0.9915365118700741, - "learning_rate": 1.3183126535982306e-07, - "loss": 0.1552, - "step": 18678 - }, - { - "epoch": 0.95, - "grad_norm": 1.6295534645843557, - "learning_rate": 1.3156485240866213e-07, - "loss": 0.1526, - "step": 18679 - }, - { - "epoch": 0.95, - "grad_norm": 1.1206894933896525, - "learning_rate": 1.312987071393612e-07, - "loss": 0.165, - "step": 18680 - }, - { - "epoch": 0.95, - "grad_norm": 1.1012662595538143, - "learning_rate": 1.310328295591412e-07, - "loss": 0.171, - "step": 18681 - }, - { - "epoch": 0.95, - "grad_norm": 1.112825275192214, - "learning_rate": 1.30767219675213e-07, - "loss": 0.1534, - "step": 18682 - }, - { - "epoch": 0.95, - "grad_norm": 1.042100924816748, - "learning_rate": 1.3050187749478192e-07, - "loss": 0.1683, - "step": 18683 - }, - { - "epoch": 0.95, - "grad_norm": 3.4419620222320186, - "learning_rate": 1.3023680302504338e-07, - "loss": 0.1751, - "step": 18684 - }, - { - "epoch": 0.95, - "grad_norm": 0.9906677459433227, - "learning_rate": 1.2997199627319047e-07, - "loss": 0.1514, - "step": 18685 - }, - { - "epoch": 0.95, - "grad_norm": 0.9638069201467271, - "learning_rate": 1.29707457246403e-07, - "loss": 0.1733, - "step": 18686 - }, - { - "epoch": 0.95, - "grad_norm": 1.1689158639836776, - "learning_rate": 1.2944318595185855e-07, - "loss": 0.1571, - "step": 18687 - }, - { - "epoch": 0.95, - "grad_norm": 0.9465121862654976, - "learning_rate": 1.29179182396727e-07, - "loss": 0.1454, - "step": 18688 - }, - { - "epoch": 0.95, - "grad_norm": 1.490398094105073, - "learning_rate": 1.289154465881659e-07, - "loss": 0.1399, - "step": 18689 - }, - { - "epoch": 0.95, - "grad_norm": 1.0151831663290165, - "learning_rate": 1.2865197853333179e-07, - "loss": 0.1606, - "step": 18690 - }, - { - "epoch": 0.95, - "grad_norm": 1.0566143989903571, - "learning_rate": 1.283887782393689e-07, - "loss": 0.1635, - "step": 18691 - }, - { - "epoch": 0.95, - "grad_norm": 1.450115053552093, - "learning_rate": 1.2812584571341936e-07, - "loss": 0.1679, - "step": 18692 - }, - { - "epoch": 0.95, - "grad_norm": 1.32553033080943, - "learning_rate": 1.2786318096261298e-07, - "loss": 0.1729, - "step": 18693 - }, - { - "epoch": 0.95, - "grad_norm": 1.0099512930173309, - "learning_rate": 1.2760078399407626e-07, - "loss": 0.1483, - "step": 18694 - }, - { - "epoch": 0.95, - "grad_norm": 1.0971077485331466, - "learning_rate": 1.273386548149247e-07, - "loss": 0.1406, - "step": 18695 - }, - { - "epoch": 0.95, - "grad_norm": 0.9603529997571104, - "learning_rate": 1.270767934322703e-07, - "loss": 0.1662, - "step": 18696 - }, - { - "epoch": 0.95, - "grad_norm": 1.7585292023883128, - "learning_rate": 1.2681519985321522e-07, - "loss": 0.1954, - "step": 18697 - }, - { - "epoch": 0.95, - "grad_norm": 1.3029394904526037, - "learning_rate": 1.2655387408485597e-07, - "loss": 0.1469, - "step": 18698 - }, - { - "epoch": 0.95, - "grad_norm": 0.9517084868877973, - "learning_rate": 1.2629281613428024e-07, - "loss": 0.1617, - "step": 18699 - }, - { - "epoch": 0.95, - "grad_norm": 0.8858492726075213, - "learning_rate": 1.260320260085701e-07, - "loss": 0.1501, - "step": 18700 - }, - { - "epoch": 0.95, - "grad_norm": 1.061871176835859, - "learning_rate": 1.2577150371479884e-07, - "loss": 0.1422, - "step": 18701 - }, - { - "epoch": 0.95, - "grad_norm": 1.2908260521128192, - "learning_rate": 1.25511249260033e-07, - "loss": 0.165, - "step": 18702 - }, - { - "epoch": 0.95, - "grad_norm": 1.1293124330242905, - "learning_rate": 1.2525126265133137e-07, - "loss": 0.1873, - "step": 18703 - }, - { - "epoch": 0.95, - "grad_norm": 1.085955564601038, - "learning_rate": 1.249915438957483e-07, - "loss": 0.1599, - "step": 18704 - }, - { - "epoch": 0.95, - "grad_norm": 1.021664792923056, - "learning_rate": 1.2473209300032706e-07, - "loss": 0.1607, - "step": 18705 - }, - { - "epoch": 0.95, - "grad_norm": 1.3178651427571242, - "learning_rate": 1.2447290997210426e-07, - "loss": 0.1552, - "step": 18706 - }, - { - "epoch": 0.95, - "grad_norm": 3.5516786445283004, - "learning_rate": 1.2421399481811313e-07, - "loss": 0.1702, - "step": 18707 - }, - { - "epoch": 0.95, - "grad_norm": 1.329356585607747, - "learning_rate": 1.2395534754537475e-07, - "loss": 0.1616, - "step": 18708 - }, - { - "epoch": 0.95, - "grad_norm": 1.1807373706580682, - "learning_rate": 1.236969681609057e-07, - "loss": 0.1495, - "step": 18709 - }, - { - "epoch": 0.95, - "grad_norm": 1.5458064139687657, - "learning_rate": 1.2343885667171373e-07, - "loss": 0.1954, - "step": 18710 - }, - { - "epoch": 0.95, - "grad_norm": 0.94979791393677, - "learning_rate": 1.23181013084801e-07, - "loss": 0.1673, - "step": 18711 - }, - { - "epoch": 0.95, - "grad_norm": 1.3667713860642676, - "learning_rate": 1.2292343740715973e-07, - "loss": 0.1695, - "step": 18712 - }, - { - "epoch": 0.95, - "grad_norm": 0.8269227975604101, - "learning_rate": 1.2266612964577984e-07, - "loss": 0.1403, - "step": 18713 - }, - { - "epoch": 0.95, - "grad_norm": 1.1395132914278172, - "learning_rate": 1.22409089807638e-07, - "loss": 0.1547, - "step": 18714 - }, - { - "epoch": 0.95, - "grad_norm": 1.3599861623733471, - "learning_rate": 1.221523178997075e-07, - "loss": 0.1406, - "step": 18715 - }, - { - "epoch": 0.95, - "grad_norm": 1.1998746875068946, - "learning_rate": 1.2189581392895388e-07, - "loss": 0.1695, - "step": 18716 - }, - { - "epoch": 0.95, - "grad_norm": 1.0641164731288262, - "learning_rate": 1.2163957790233382e-07, - "loss": 0.1664, - "step": 18717 - }, - { - "epoch": 0.95, - "grad_norm": 1.188292593607651, - "learning_rate": 1.2138360982679842e-07, - "loss": 0.1845, - "step": 18718 - }, - { - "epoch": 0.95, - "grad_norm": 1.253121358483849, - "learning_rate": 1.21127909709291e-07, - "loss": 0.1712, - "step": 18719 - }, - { - "epoch": 0.95, - "grad_norm": 0.9717715577170544, - "learning_rate": 1.2087247755674603e-07, - "loss": 0.1675, - "step": 18720 - }, - { - "epoch": 0.95, - "grad_norm": 1.258572516156622, - "learning_rate": 1.206173133760935e-07, - "loss": 0.1513, - "step": 18721 - }, - { - "epoch": 0.95, - "grad_norm": 1.0221084000142802, - "learning_rate": 1.2036241717425456e-07, - "loss": 0.1663, - "step": 18722 - }, - { - "epoch": 0.95, - "grad_norm": 1.2012471054237224, - "learning_rate": 1.2010778895814258e-07, - "loss": 0.1562, - "step": 18723 - }, - { - "epoch": 0.95, - "grad_norm": 1.594584266408908, - "learning_rate": 1.1985342873466532e-07, - "loss": 0.178, - "step": 18724 - }, - { - "epoch": 0.95, - "grad_norm": 2.201800662850403, - "learning_rate": 1.1959933651072065e-07, - "loss": 0.1562, - "step": 18725 - }, - { - "epoch": 0.95, - "grad_norm": 0.910808606905359, - "learning_rate": 1.1934551229320413e-07, - "loss": 0.1586, - "step": 18726 - }, - { - "epoch": 0.95, - "grad_norm": 1.004299274296927, - "learning_rate": 1.1909195608899694e-07, - "loss": 0.1672, - "step": 18727 - }, - { - "epoch": 0.95, - "grad_norm": 1.0095444901687516, - "learning_rate": 1.1883866790497911e-07, - "loss": 0.1509, - "step": 18728 - }, - { - "epoch": 0.95, - "grad_norm": 0.8761406814468267, - "learning_rate": 1.1858564774802073e-07, - "loss": 0.1589, - "step": 18729 - }, - { - "epoch": 0.95, - "grad_norm": 1.034883019557606, - "learning_rate": 1.1833289562498406e-07, - "loss": 0.1614, - "step": 18730 - }, - { - "epoch": 0.95, - "grad_norm": 1.1760568222090761, - "learning_rate": 1.1808041154272587e-07, - "loss": 0.1413, - "step": 18731 - }, - { - "epoch": 0.95, - "grad_norm": 0.8505003865615471, - "learning_rate": 1.178281955080951e-07, - "loss": 0.1571, - "step": 18732 - }, - { - "epoch": 0.95, - "grad_norm": 0.9717764432364998, - "learning_rate": 1.1757624752793184e-07, - "loss": 0.1698, - "step": 18733 - }, - { - "epoch": 0.95, - "grad_norm": 1.1320680343929483, - "learning_rate": 1.1732456760907174e-07, - "loss": 0.1648, - "step": 18734 - }, - { - "epoch": 0.95, - "grad_norm": 0.9709646357714493, - "learning_rate": 1.1707315575834044e-07, - "loss": 0.1525, - "step": 18735 - }, - { - "epoch": 0.95, - "grad_norm": 0.8168264220250449, - "learning_rate": 1.1682201198255916e-07, - "loss": 0.1456, - "step": 18736 - }, - { - "epoch": 0.95, - "grad_norm": 0.953615837573349, - "learning_rate": 1.16571136288538e-07, - "loss": 0.1477, - "step": 18737 - }, - { - "epoch": 0.95, - "grad_norm": 1.1923327503063306, - "learning_rate": 1.1632052868308375e-07, - "loss": 0.1763, - "step": 18738 - }, - { - "epoch": 0.95, - "grad_norm": 1.0083921240352136, - "learning_rate": 1.1607018917299207e-07, - "loss": 0.1484, - "step": 18739 - }, - { - "epoch": 0.95, - "grad_norm": 1.3179243790087516, - "learning_rate": 1.158201177650553e-07, - "loss": 0.1573, - "step": 18740 - }, - { - "epoch": 0.95, - "grad_norm": 1.2500627713525843, - "learning_rate": 1.1557031446605693e-07, - "loss": 0.1505, - "step": 18741 - }, - { - "epoch": 0.95, - "grad_norm": 1.239246970452079, - "learning_rate": 1.1532077928277152e-07, - "loss": 0.1716, - "step": 18742 - }, - { - "epoch": 0.95, - "grad_norm": 0.8462484777876413, - "learning_rate": 1.1507151222196811e-07, - "loss": 0.1404, - "step": 18743 - }, - { - "epoch": 0.95, - "grad_norm": 1.059018702238596, - "learning_rate": 1.1482251329040795e-07, - "loss": 0.1819, - "step": 18744 - }, - { - "epoch": 0.95, - "grad_norm": 1.4355337392067744, - "learning_rate": 1.1457378249484674e-07, - "loss": 0.1705, - "step": 18745 - }, - { - "epoch": 0.95, - "grad_norm": 0.9243722354509532, - "learning_rate": 1.143253198420291e-07, - "loss": 0.1648, - "step": 18746 - }, - { - "epoch": 0.95, - "grad_norm": 1.7227231193621009, - "learning_rate": 1.1407712533869519e-07, - "loss": 0.152, - "step": 18747 - }, - { - "epoch": 0.95, - "grad_norm": 1.1571786371702186, - "learning_rate": 1.1382919899157852e-07, - "loss": 0.1618, - "step": 18748 - }, - { - "epoch": 0.95, - "grad_norm": 1.0542535821163654, - "learning_rate": 1.1358154080740147e-07, - "loss": 0.1438, - "step": 18749 - }, - { - "epoch": 0.95, - "grad_norm": 2.031459382470392, - "learning_rate": 1.1333415079288424e-07, - "loss": 0.1706, - "step": 18750 - }, - { - "epoch": 0.95, - "grad_norm": 1.1376771610307743, - "learning_rate": 1.130870289547381e-07, - "loss": 0.1569, - "step": 18751 - }, - { - "epoch": 0.95, - "grad_norm": 1.1361995973113577, - "learning_rate": 1.1284017529966329e-07, - "loss": 0.1689, - "step": 18752 - }, - { - "epoch": 0.95, - "grad_norm": 0.9430514064426113, - "learning_rate": 1.1259358983435775e-07, - "loss": 0.1825, - "step": 18753 - }, - { - "epoch": 0.95, - "grad_norm": 1.0092812343386752, - "learning_rate": 1.1234727256550837e-07, - "loss": 0.1763, - "step": 18754 - }, - { - "epoch": 0.95, - "grad_norm": 0.8164572156621775, - "learning_rate": 1.1210122349979979e-07, - "loss": 0.154, - "step": 18755 - }, - { - "epoch": 0.95, - "grad_norm": 1.327723266994876, - "learning_rate": 1.1185544264390225e-07, - "loss": 0.1733, - "step": 18756 - }, - { - "epoch": 0.95, - "grad_norm": 1.6508728885834751, - "learning_rate": 1.1160993000448372e-07, - "loss": 0.1735, - "step": 18757 - }, - { - "epoch": 0.95, - "grad_norm": 0.9180275397058472, - "learning_rate": 1.1136468558820668e-07, - "loss": 0.1582, - "step": 18758 - }, - { - "epoch": 0.95, - "grad_norm": 1.101850420181442, - "learning_rate": 1.1111970940171912e-07, - "loss": 0.1516, - "step": 18759 - }, - { - "epoch": 0.95, - "grad_norm": 1.1555854613546752, - "learning_rate": 1.1087500145166908e-07, - "loss": 0.1411, - "step": 18760 - }, - { - "epoch": 0.95, - "grad_norm": 1.2756787789402384, - "learning_rate": 1.1063056174469234e-07, - "loss": 0.1396, - "step": 18761 - }, - { - "epoch": 0.95, - "grad_norm": 1.4078874087782576, - "learning_rate": 1.1038639028742138e-07, - "loss": 0.194, - "step": 18762 - }, - { - "epoch": 0.95, - "grad_norm": 0.8496709042888108, - "learning_rate": 1.1014248708647645e-07, - "loss": 0.1247, - "step": 18763 - }, - { - "epoch": 0.95, - "grad_norm": 0.8844652923011713, - "learning_rate": 1.0989885214847673e-07, - "loss": 0.1586, - "step": 18764 - }, - { - "epoch": 0.95, - "grad_norm": 1.001028423834968, - "learning_rate": 1.0965548548002802e-07, - "loss": 0.1641, - "step": 18765 - }, - { - "epoch": 0.95, - "grad_norm": 0.865470853867464, - "learning_rate": 1.0941238708773283e-07, - "loss": 0.1806, - "step": 18766 - }, - { - "epoch": 0.95, - "grad_norm": 1.408923759241474, - "learning_rate": 1.0916955697818587e-07, - "loss": 0.1504, - "step": 18767 - }, - { - "epoch": 0.95, - "grad_norm": 1.2024386779570817, - "learning_rate": 1.0892699515797411e-07, - "loss": 0.1791, - "step": 18768 - }, - { - "epoch": 0.95, - "grad_norm": 1.0923015100986826, - "learning_rate": 1.086847016336745e-07, - "loss": 0.1392, - "step": 18769 - }, - { - "epoch": 0.95, - "grad_norm": 1.8647220030074458, - "learning_rate": 1.084426764118629e-07, - "loss": 0.1486, - "step": 18770 - }, - { - "epoch": 0.95, - "grad_norm": 1.7755338691389857, - "learning_rate": 1.0820091949910072e-07, - "loss": 0.1672, - "step": 18771 - }, - { - "epoch": 0.95, - "grad_norm": 0.8298954613901326, - "learning_rate": 1.0795943090194827e-07, - "loss": 0.1649, - "step": 18772 - }, - { - "epoch": 0.95, - "grad_norm": 0.9795338903069379, - "learning_rate": 1.0771821062695476e-07, - "loss": 0.1489, - "step": 18773 - }, - { - "epoch": 0.95, - "grad_norm": 1.0089380968964687, - "learning_rate": 1.0747725868066383e-07, - "loss": 0.151, - "step": 18774 - }, - { - "epoch": 0.95, - "grad_norm": 1.2822032644898074, - "learning_rate": 1.0723657506961027e-07, - "loss": 0.172, - "step": 18775 - }, - { - "epoch": 0.95, - "grad_norm": 1.7322298865698078, - "learning_rate": 1.069961598003233e-07, - "loss": 0.1599, - "step": 18776 - }, - { - "epoch": 0.95, - "grad_norm": 2.046246918063842, - "learning_rate": 1.0675601287932547e-07, - "loss": 0.1966, - "step": 18777 - }, - { - "epoch": 0.95, - "grad_norm": 2.328086242939519, - "learning_rate": 1.0651613431312824e-07, - "loss": 0.1464, - "step": 18778 - }, - { - "epoch": 0.95, - "grad_norm": 1.294347819359744, - "learning_rate": 1.0627652410823975e-07, - "loss": 0.1369, - "step": 18779 - }, - { - "epoch": 0.95, - "grad_norm": 0.7989102185537711, - "learning_rate": 1.0603718227116034e-07, - "loss": 0.1613, - "step": 18780 - }, - { - "epoch": 0.96, - "grad_norm": 1.036952896112565, - "learning_rate": 1.0579810880838037e-07, - "loss": 0.1639, - "step": 18781 - }, - { - "epoch": 0.96, - "grad_norm": 1.2476287988888424, - "learning_rate": 1.0555930372638578e-07, - "loss": 0.1744, - "step": 18782 - }, - { - "epoch": 0.96, - "grad_norm": 3.37060572453245, - "learning_rate": 1.0532076703165362e-07, - "loss": 0.1899, - "step": 18783 - }, - { - "epoch": 0.96, - "grad_norm": 0.9596618628758464, - "learning_rate": 1.0508249873065424e-07, - "loss": 0.15, - "step": 18784 - }, - { - "epoch": 0.96, - "grad_norm": 1.185689083060517, - "learning_rate": 1.0484449882985138e-07, - "loss": 0.1541, - "step": 18785 - }, - { - "epoch": 0.96, - "grad_norm": 1.8349670685171569, - "learning_rate": 1.0460676733570096e-07, - "loss": 0.166, - "step": 18786 - }, - { - "epoch": 0.96, - "grad_norm": 3.0295126853380023, - "learning_rate": 1.0436930425465008e-07, - "loss": 0.1874, - "step": 18787 - }, - { - "epoch": 0.96, - "grad_norm": 1.0593615747004992, - "learning_rate": 1.0413210959314135e-07, - "loss": 0.1595, - "step": 18788 - }, - { - "epoch": 0.96, - "grad_norm": 2.168075177134784, - "learning_rate": 1.038951833576074e-07, - "loss": 0.1723, - "step": 18789 - }, - { - "epoch": 0.96, - "grad_norm": 1.1218155857630336, - "learning_rate": 1.0365852555447642e-07, - "loss": 0.1753, - "step": 18790 - }, - { - "epoch": 0.96, - "grad_norm": 0.8050559077961082, - "learning_rate": 1.0342213619016661e-07, - "loss": 0.1587, - "step": 18791 - }, - { - "epoch": 0.96, - "grad_norm": 0.9181660602621501, - "learning_rate": 1.0318601527108952e-07, - "loss": 0.1496, - "step": 18792 - }, - { - "epoch": 0.96, - "grad_norm": 1.1033877829228513, - "learning_rate": 1.0295016280365111e-07, - "loss": 0.1379, - "step": 18793 - }, - { - "epoch": 0.96, - "grad_norm": 0.9590634235469165, - "learning_rate": 1.0271457879424851e-07, - "loss": 0.1546, - "step": 18794 - }, - { - "epoch": 0.96, - "grad_norm": 1.952709206529511, - "learning_rate": 1.0247926324927215e-07, - "loss": 0.1687, - "step": 18795 - }, - { - "epoch": 0.96, - "grad_norm": 1.1759733030062076, - "learning_rate": 1.0224421617510471e-07, - "loss": 0.1583, - "step": 18796 - }, - { - "epoch": 0.96, - "grad_norm": 1.2763520910574768, - "learning_rate": 1.020094375781222e-07, - "loss": 0.1571, - "step": 18797 - }, - { - "epoch": 0.96, - "grad_norm": 1.7511259754137039, - "learning_rate": 1.0177492746469286e-07, - "loss": 0.1676, - "step": 18798 - }, - { - "epoch": 0.96, - "grad_norm": 1.148516859847391, - "learning_rate": 1.0154068584117716e-07, - "loss": 0.1773, - "step": 18799 - }, - { - "epoch": 0.96, - "grad_norm": 0.9627330663214961, - "learning_rate": 1.0130671271392889e-07, - "loss": 0.141, - "step": 18800 - }, - { - "epoch": 0.96, - "grad_norm": 0.9745604777780793, - "learning_rate": 1.0107300808929522e-07, - "loss": 0.1718, - "step": 18801 - }, - { - "epoch": 0.96, - "grad_norm": 0.8737543564782753, - "learning_rate": 1.008395719736166e-07, - "loss": 0.1648, - "step": 18802 - }, - { - "epoch": 0.96, - "grad_norm": 1.0772967372661506, - "learning_rate": 1.0060640437322244e-07, - "loss": 0.1645, - "step": 18803 - }, - { - "epoch": 0.96, - "grad_norm": 1.4726543395679172, - "learning_rate": 1.003735052944399e-07, - "loss": 0.1848, - "step": 18804 - }, - { - "epoch": 0.96, - "grad_norm": 1.169918964022628, - "learning_rate": 1.0014087474358392e-07, - "loss": 0.1543, - "step": 18805 - }, - { - "epoch": 0.96, - "grad_norm": 2.216989418510401, - "learning_rate": 9.990851272696722e-08, - "loss": 0.1576, - "step": 18806 - }, - { - "epoch": 0.96, - "grad_norm": 1.1686919692769921, - "learning_rate": 9.967641925089033e-08, - "loss": 0.173, - "step": 18807 - }, - { - "epoch": 0.96, - "grad_norm": 1.4996681879717797, - "learning_rate": 9.944459432165044e-08, - "loss": 0.1664, - "step": 18808 - }, - { - "epoch": 0.96, - "grad_norm": 1.1045459523269272, - "learning_rate": 9.92130379455336e-08, - "loss": 0.1522, - "step": 18809 - }, - { - "epoch": 0.96, - "grad_norm": 0.9910006468676175, - "learning_rate": 9.89817501288226e-08, - "loss": 0.1373, - "step": 18810 - }, - { - "epoch": 0.96, - "grad_norm": 1.1419457094601917, - "learning_rate": 9.875073087779241e-08, - "loss": 0.1856, - "step": 18811 - }, - { - "epoch": 0.96, - "grad_norm": 1.0739981086468395, - "learning_rate": 9.85199801987069e-08, - "loss": 0.1716, - "step": 18812 - }, - { - "epoch": 0.96, - "grad_norm": 0.9680114664815531, - "learning_rate": 9.828949809782662e-08, - "loss": 0.1678, - "step": 18813 - }, - { - "epoch": 0.96, - "grad_norm": 1.0786498296590985, - "learning_rate": 9.805928458140212e-08, - "loss": 0.1591, - "step": 18814 - }, - { - "epoch": 0.96, - "grad_norm": 1.64021686468835, - "learning_rate": 9.782933965567953e-08, - "loss": 0.1626, - "step": 18815 - }, - { - "epoch": 0.96, - "grad_norm": 1.1278692654493154, - "learning_rate": 9.759966332689497e-08, - "loss": 0.1598, - "step": 18816 - }, - { - "epoch": 0.96, - "grad_norm": 1.1266880264484171, - "learning_rate": 9.737025560127899e-08, - "loss": 0.1598, - "step": 18817 - }, - { - "epoch": 0.96, - "grad_norm": 1.1920107831865574, - "learning_rate": 9.714111648505442e-08, - "loss": 0.1857, - "step": 18818 - }, - { - "epoch": 0.96, - "grad_norm": 1.1175544268483446, - "learning_rate": 9.691224598443515e-08, - "loss": 0.1506, - "step": 18819 - }, - { - "epoch": 0.96, - "grad_norm": 1.0988902168208523, - "learning_rate": 9.66836441056318e-08, - "loss": 0.1727, - "step": 18820 - }, - { - "epoch": 0.96, - "grad_norm": 0.826008174194182, - "learning_rate": 9.645531085484383e-08, - "loss": 0.1501, - "step": 18821 - }, - { - "epoch": 0.96, - "grad_norm": 4.292188236065506, - "learning_rate": 9.622724623826407e-08, - "loss": 0.1771, - "step": 18822 - }, - { - "epoch": 0.96, - "grad_norm": 1.4458397407326542, - "learning_rate": 9.59994502620809e-08, - "loss": 0.1767, - "step": 18823 - }, - { - "epoch": 0.96, - "grad_norm": 1.2335768493154349, - "learning_rate": 9.57719229324705e-08, - "loss": 0.17, - "step": 18824 - }, - { - "epoch": 0.96, - "grad_norm": 0.9917330277547299, - "learning_rate": 9.554466425560793e-08, - "loss": 0.1501, - "step": 18825 - }, - { - "epoch": 0.96, - "grad_norm": 0.9023946695552103, - "learning_rate": 9.531767423765381e-08, - "loss": 0.1746, - "step": 18826 - }, - { - "epoch": 0.96, - "grad_norm": 1.1679938172110933, - "learning_rate": 9.509095288476767e-08, - "loss": 0.1623, - "step": 18827 - }, - { - "epoch": 0.96, - "grad_norm": 1.3085712269367609, - "learning_rate": 9.486450020310011e-08, - "loss": 0.1536, - "step": 18828 - }, - { - "epoch": 0.96, - "grad_norm": 1.1529943097730873, - "learning_rate": 9.463831619879183e-08, - "loss": 0.1537, - "step": 18829 - }, - { - "epoch": 0.96, - "grad_norm": 1.0872766259105977, - "learning_rate": 9.441240087797787e-08, - "loss": 0.172, - "step": 18830 - }, - { - "epoch": 0.96, - "grad_norm": 1.0909881135030688, - "learning_rate": 9.41867542467878e-08, - "loss": 0.1737, - "step": 18831 - }, - { - "epoch": 0.96, - "grad_norm": 0.8379483903814336, - "learning_rate": 9.396137631134116e-08, - "loss": 0.1478, - "step": 18832 - }, - { - "epoch": 0.96, - "grad_norm": 1.3761318998390024, - "learning_rate": 9.373626707775196e-08, - "loss": 0.1652, - "step": 18833 - }, - { - "epoch": 0.96, - "grad_norm": 1.2279841676392231, - "learning_rate": 9.351142655212642e-08, - "loss": 0.1867, - "step": 18834 - }, - { - "epoch": 0.96, - "grad_norm": 1.0752650044873828, - "learning_rate": 9.328685474056187e-08, - "loss": 0.1638, - "step": 18835 - }, - { - "epoch": 0.96, - "grad_norm": 1.2379873953884408, - "learning_rate": 9.306255164915123e-08, - "loss": 0.1366, - "step": 18836 - }, - { - "epoch": 0.96, - "grad_norm": 1.2130097579213732, - "learning_rate": 9.283851728397853e-08, - "loss": 0.1628, - "step": 18837 - }, - { - "epoch": 0.96, - "grad_norm": 0.8947669982322405, - "learning_rate": 9.26147516511211e-08, - "loss": 0.1507, - "step": 18838 - }, - { - "epoch": 0.96, - "grad_norm": 1.0036150550246519, - "learning_rate": 9.239125475664746e-08, - "loss": 0.159, - "step": 18839 - }, - { - "epoch": 0.96, - "grad_norm": 1.0657812870787666, - "learning_rate": 9.216802660662161e-08, - "loss": 0.1743, - "step": 18840 - }, - { - "epoch": 0.96, - "grad_norm": 0.9154068268279001, - "learning_rate": 9.194506720709651e-08, - "loss": 0.1747, - "step": 18841 - }, - { - "epoch": 0.96, - "grad_norm": 0.9264247298220879, - "learning_rate": 9.172237656412175e-08, - "loss": 0.1458, - "step": 18842 - }, - { - "epoch": 0.96, - "grad_norm": 0.9801921323891072, - "learning_rate": 9.149995468373696e-08, - "loss": 0.1621, - "step": 18843 - }, - { - "epoch": 0.96, - "grad_norm": 1.1632621863958537, - "learning_rate": 9.127780157197619e-08, - "loss": 0.1676, - "step": 18844 - }, - { - "epoch": 0.96, - "grad_norm": 1.1317424244696488, - "learning_rate": 9.105591723486352e-08, - "loss": 0.1542, - "step": 18845 - }, - { - "epoch": 0.96, - "grad_norm": 1.0736880815431518, - "learning_rate": 9.083430167841856e-08, - "loss": 0.1511, - "step": 18846 - }, - { - "epoch": 0.96, - "grad_norm": 2.0161957878733108, - "learning_rate": 9.061295490865429e-08, - "loss": 0.1674, - "step": 18847 - }, - { - "epoch": 0.96, - "grad_norm": 0.7928674453212935, - "learning_rate": 9.039187693157147e-08, - "loss": 0.1398, - "step": 18848 - }, - { - "epoch": 0.96, - "grad_norm": 0.9045398606870544, - "learning_rate": 9.017106775317086e-08, - "loss": 0.1855, - "step": 18849 - }, - { - "epoch": 0.96, - "grad_norm": 1.0535097383038936, - "learning_rate": 8.995052737943766e-08, - "loss": 0.163, - "step": 18850 - }, - { - "epoch": 0.96, - "grad_norm": 1.4496573427182988, - "learning_rate": 8.973025581635819e-08, - "loss": 0.1817, - "step": 18851 - }, - { - "epoch": 0.96, - "grad_norm": 1.0777957220050927, - "learning_rate": 8.951025306990324e-08, - "loss": 0.1563, - "step": 18852 - }, - { - "epoch": 0.96, - "grad_norm": 1.0506377567143983, - "learning_rate": 8.929051914604359e-08, - "loss": 0.1916, - "step": 18853 - }, - { - "epoch": 0.96, - "grad_norm": 3.002996556765619, - "learning_rate": 8.907105405073779e-08, - "loss": 0.1759, - "step": 18854 - }, - { - "epoch": 0.96, - "grad_norm": 0.998730610245173, - "learning_rate": 8.885185778993999e-08, - "loss": 0.1495, - "step": 18855 - }, - { - "epoch": 0.96, - "grad_norm": 1.2495765988017125, - "learning_rate": 8.863293036959431e-08, - "loss": 0.165, - "step": 18856 - }, - { - "epoch": 0.96, - "grad_norm": 1.0130819408578617, - "learning_rate": 8.841427179564154e-08, - "loss": 0.146, - "step": 18857 - }, - { - "epoch": 0.96, - "grad_norm": 1.8272516627383144, - "learning_rate": 8.819588207401142e-08, - "loss": 0.1715, - "step": 18858 - }, - { - "epoch": 0.96, - "grad_norm": 0.9948517329878664, - "learning_rate": 8.797776121062696e-08, - "loss": 0.1534, - "step": 18859 - }, - { - "epoch": 0.96, - "grad_norm": 1.3309641860820505, - "learning_rate": 8.775990921140565e-08, - "loss": 0.1656, - "step": 18860 - }, - { - "epoch": 0.96, - "grad_norm": 1.444462781797579, - "learning_rate": 8.754232608225722e-08, - "loss": 0.1625, - "step": 18861 - }, - { - "epoch": 0.96, - "grad_norm": 0.8860214555939581, - "learning_rate": 8.732501182908249e-08, - "loss": 0.1835, - "step": 18862 - }, - { - "epoch": 0.96, - "grad_norm": 1.300447118415056, - "learning_rate": 8.710796645777674e-08, - "loss": 0.1569, - "step": 18863 - }, - { - "epoch": 0.96, - "grad_norm": 1.011564783440716, - "learning_rate": 8.689118997422752e-08, - "loss": 0.1413, - "step": 18864 - }, - { - "epoch": 0.96, - "grad_norm": 2.543969469262822, - "learning_rate": 8.667468238431453e-08, - "loss": 0.151, - "step": 18865 - }, - { - "epoch": 0.96, - "grad_norm": 1.6519119012480639, - "learning_rate": 8.645844369391088e-08, - "loss": 0.155, - "step": 18866 - }, - { - "epoch": 0.96, - "grad_norm": 0.790938070722103, - "learning_rate": 8.624247390888186e-08, - "loss": 0.1392, - "step": 18867 - }, - { - "epoch": 0.96, - "grad_norm": 2.509865696614438, - "learning_rate": 8.602677303508611e-08, - "loss": 0.1826, - "step": 18868 - }, - { - "epoch": 0.96, - "grad_norm": 1.4021515201833346, - "learning_rate": 8.581134107837341e-08, - "loss": 0.1487, - "step": 18869 - }, - { - "epoch": 0.96, - "grad_norm": 1.6080350955718345, - "learning_rate": 8.559617804458798e-08, - "loss": 0.1771, - "step": 18870 - }, - { - "epoch": 0.96, - "grad_norm": 0.9752358089227612, - "learning_rate": 8.538128393956624e-08, - "loss": 0.1662, - "step": 18871 - }, - { - "epoch": 0.96, - "grad_norm": 1.3198701123217553, - "learning_rate": 8.516665876913687e-08, - "loss": 0.1725, - "step": 18872 - }, - { - "epoch": 0.96, - "grad_norm": 0.9611312290629432, - "learning_rate": 8.49523025391219e-08, - "loss": 0.1626, - "step": 18873 - }, - { - "epoch": 0.96, - "grad_norm": 1.2329227273918022, - "learning_rate": 8.473821525533665e-08, - "loss": 0.1747, - "step": 18874 - }, - { - "epoch": 0.96, - "grad_norm": 0.9490149018446066, - "learning_rate": 8.452439692358649e-08, - "loss": 0.172, - "step": 18875 - }, - { - "epoch": 0.96, - "grad_norm": 1.0099470775860748, - "learning_rate": 8.431084754967345e-08, - "loss": 0.1378, - "step": 18876 - }, - { - "epoch": 0.96, - "grad_norm": 0.7531190523770079, - "learning_rate": 8.409756713938733e-08, - "loss": 0.1473, - "step": 18877 - }, - { - "epoch": 0.96, - "grad_norm": 1.0347859261232761, - "learning_rate": 8.388455569851461e-08, - "loss": 0.1719, - "step": 18878 - }, - { - "epoch": 0.96, - "grad_norm": 0.8585341785400499, - "learning_rate": 8.367181323283402e-08, - "loss": 0.1678, - "step": 18879 - }, - { - "epoch": 0.96, - "grad_norm": 0.9211259975521868, - "learning_rate": 8.345933974811537e-08, - "loss": 0.1711, - "step": 18880 - }, - { - "epoch": 0.96, - "grad_norm": 1.208502773218923, - "learning_rate": 8.324713525012185e-08, - "loss": 0.1451, - "step": 18881 - }, - { - "epoch": 0.96, - "grad_norm": 0.854440510579316, - "learning_rate": 8.303519974461106e-08, - "loss": 0.1565, - "step": 18882 - }, - { - "epoch": 0.96, - "grad_norm": 0.9862239698761738, - "learning_rate": 8.28235332373295e-08, - "loss": 0.1941, - "step": 18883 - }, - { - "epoch": 0.96, - "grad_norm": 1.5255599255822914, - "learning_rate": 8.261213573402038e-08, - "loss": 0.159, - "step": 18884 - }, - { - "epoch": 0.96, - "grad_norm": 2.6668881972462515, - "learning_rate": 8.240100724041689e-08, - "loss": 0.1566, - "step": 18885 - }, - { - "epoch": 0.96, - "grad_norm": 0.9173579531264667, - "learning_rate": 8.219014776224555e-08, - "loss": 0.1594, - "step": 18886 - }, - { - "epoch": 0.96, - "grad_norm": 1.4391060778764997, - "learning_rate": 8.197955730522733e-08, - "loss": 0.1432, - "step": 18887 - }, - { - "epoch": 0.96, - "grad_norm": 1.7719058427891368, - "learning_rate": 8.176923587507434e-08, - "loss": 0.1642, - "step": 18888 - }, - { - "epoch": 0.96, - "grad_norm": 0.9890541847862871, - "learning_rate": 8.155918347748981e-08, - "loss": 0.15, - "step": 18889 - }, - { - "epoch": 0.96, - "grad_norm": 1.2971062282347046, - "learning_rate": 8.13494001181725e-08, - "loss": 0.1396, - "step": 18890 - }, - { - "epoch": 0.96, - "grad_norm": 0.89297258092725, - "learning_rate": 8.113988580281451e-08, - "loss": 0.1593, - "step": 18891 - }, - { - "epoch": 0.96, - "grad_norm": 1.1064976604626795, - "learning_rate": 8.093064053709576e-08, - "loss": 0.1901, - "step": 18892 - }, - { - "epoch": 0.96, - "grad_norm": 0.9866375316992192, - "learning_rate": 8.072166432669503e-08, - "loss": 0.1433, - "step": 18893 - }, - { - "epoch": 0.96, - "grad_norm": 2.1940034352979834, - "learning_rate": 8.051295717727892e-08, - "loss": 0.1657, - "step": 18894 - }, - { - "epoch": 0.96, - "grad_norm": 1.1200969835311203, - "learning_rate": 8.030451909450842e-08, - "loss": 0.1698, - "step": 18895 - }, - { - "epoch": 0.96, - "grad_norm": 1.4641811009107635, - "learning_rate": 8.009635008403904e-08, - "loss": 0.1399, - "step": 18896 - }, - { - "epoch": 0.96, - "grad_norm": 0.9307287020748609, - "learning_rate": 7.988845015151513e-08, - "loss": 0.1617, - "step": 18897 - }, - { - "epoch": 0.96, - "grad_norm": 1.2935854651952166, - "learning_rate": 7.968081930257887e-08, - "loss": 0.1524, - "step": 18898 - }, - { - "epoch": 0.96, - "grad_norm": 1.1529460938502556, - "learning_rate": 7.947345754285906e-08, - "loss": 0.1661, - "step": 18899 - }, - { - "epoch": 0.96, - "grad_norm": 1.6332754029727536, - "learning_rate": 7.926636487798345e-08, - "loss": 0.1597, - "step": 18900 - }, - { - "epoch": 0.96, - "grad_norm": 0.9341773562612538, - "learning_rate": 7.905954131356752e-08, - "loss": 0.1714, - "step": 18901 - }, - { - "epoch": 0.96, - "grad_norm": 0.7598173351821158, - "learning_rate": 7.885298685522235e-08, - "loss": 0.1494, - "step": 18902 - }, - { - "epoch": 0.96, - "grad_norm": 0.9162616507880349, - "learning_rate": 7.864670150854903e-08, - "loss": 0.1413, - "step": 18903 - }, - { - "epoch": 0.96, - "grad_norm": 0.9287105319838184, - "learning_rate": 7.844068527914528e-08, - "loss": 0.1839, - "step": 18904 - }, - { - "epoch": 0.96, - "grad_norm": 1.144598902170974, - "learning_rate": 7.823493817259776e-08, - "loss": 0.1772, - "step": 18905 - }, - { - "epoch": 0.96, - "grad_norm": 1.147242897737404, - "learning_rate": 7.802946019448864e-08, - "loss": 0.1854, - "step": 18906 - }, - { - "epoch": 0.96, - "grad_norm": 1.2450550086501007, - "learning_rate": 7.782425135039018e-08, - "loss": 0.1666, - "step": 18907 - }, - { - "epoch": 0.96, - "grad_norm": 1.0442645415313312, - "learning_rate": 7.7619311645869e-08, - "loss": 0.1513, - "step": 18908 - }, - { - "epoch": 0.96, - "grad_norm": 0.9659641393581991, - "learning_rate": 7.741464108648511e-08, - "loss": 0.1479, - "step": 18909 - }, - { - "epoch": 0.96, - "grad_norm": 1.0802928177377895, - "learning_rate": 7.721023967778962e-08, - "loss": 0.1478, - "step": 18910 - }, - { - "epoch": 0.96, - "grad_norm": 1.2944293111076215, - "learning_rate": 7.700610742532588e-08, - "loss": 0.165, - "step": 18911 - }, - { - "epoch": 0.96, - "grad_norm": 1.1730839439653735, - "learning_rate": 7.68022443346328e-08, - "loss": 0.1776, - "step": 18912 - }, - { - "epoch": 0.96, - "grad_norm": 0.9910193731834973, - "learning_rate": 7.659865041123926e-08, - "loss": 0.1662, - "step": 18913 - }, - { - "epoch": 0.96, - "grad_norm": 1.056809813408625, - "learning_rate": 7.639532566066755e-08, - "loss": 0.1683, - "step": 18914 - }, - { - "epoch": 0.96, - "grad_norm": 1.0770889532907564, - "learning_rate": 7.619227008843322e-08, - "loss": 0.1468, - "step": 18915 - }, - { - "epoch": 0.96, - "grad_norm": 1.033875052227631, - "learning_rate": 7.598948370004412e-08, - "loss": 0.1914, - "step": 18916 - }, - { - "epoch": 0.96, - "grad_norm": 1.0503186323084912, - "learning_rate": 7.578696650100025e-08, - "loss": 0.1652, - "step": 18917 - }, - { - "epoch": 0.96, - "grad_norm": 1.061638624902994, - "learning_rate": 7.558471849679616e-08, - "loss": 0.1609, - "step": 18918 - }, - { - "epoch": 0.96, - "grad_norm": 0.874059333965608, - "learning_rate": 7.538273969291743e-08, - "loss": 0.1628, - "step": 18919 - }, - { - "epoch": 0.96, - "grad_norm": 0.9470801203586013, - "learning_rate": 7.518103009484079e-08, - "loss": 0.1403, - "step": 18920 - }, - { - "epoch": 0.96, - "grad_norm": 1.200812196227737, - "learning_rate": 7.497958970804076e-08, - "loss": 0.1667, - "step": 18921 - }, - { - "epoch": 0.96, - "grad_norm": 1.0078825382674224, - "learning_rate": 7.477841853797963e-08, - "loss": 0.1674, - "step": 18922 - }, - { - "epoch": 0.96, - "grad_norm": 1.1759053898946894, - "learning_rate": 7.457751659011414e-08, - "loss": 0.1676, - "step": 18923 - }, - { - "epoch": 0.96, - "grad_norm": 0.8222529337220591, - "learning_rate": 7.437688386989438e-08, - "loss": 0.1599, - "step": 18924 - }, - { - "epoch": 0.96, - "grad_norm": 0.8533396031334325, - "learning_rate": 7.417652038276157e-08, - "loss": 0.15, - "step": 18925 - }, - { - "epoch": 0.96, - "grad_norm": 1.1056761413519485, - "learning_rate": 7.397642613415245e-08, - "loss": 0.1585, - "step": 18926 - }, - { - "epoch": 0.96, - "grad_norm": 1.3920512039464155, - "learning_rate": 7.37766011294927e-08, - "loss": 0.1554, - "step": 18927 - }, - { - "epoch": 0.96, - "grad_norm": 1.5934123887083915, - "learning_rate": 7.357704537420351e-08, - "loss": 0.1708, - "step": 18928 - }, - { - "epoch": 0.96, - "grad_norm": 1.2091051382121403, - "learning_rate": 7.337775887369725e-08, - "loss": 0.1646, - "step": 18929 - }, - { - "epoch": 0.96, - "grad_norm": 1.148339743268307, - "learning_rate": 7.317874163338068e-08, - "loss": 0.1679, - "step": 18930 - }, - { - "epoch": 0.96, - "grad_norm": 1.0786303167088929, - "learning_rate": 7.29799936586506e-08, - "loss": 0.1727, - "step": 18931 - }, - { - "epoch": 0.96, - "grad_norm": 1.132990580590024, - "learning_rate": 7.278151495489938e-08, - "loss": 0.1621, - "step": 18932 - }, - { - "epoch": 0.96, - "grad_norm": 0.839523001421132, - "learning_rate": 7.258330552750936e-08, - "loss": 0.1708, - "step": 18933 - }, - { - "epoch": 0.96, - "grad_norm": 2.1078873139031478, - "learning_rate": 7.238536538185848e-08, - "loss": 0.1629, - "step": 18934 - }, - { - "epoch": 0.96, - "grad_norm": 1.149341920558963, - "learning_rate": 7.218769452331576e-08, - "loss": 0.1722, - "step": 18935 - }, - { - "epoch": 0.96, - "grad_norm": 1.08472091194159, - "learning_rate": 7.199029295724247e-08, - "loss": 0.1674, - "step": 18936 - }, - { - "epoch": 0.96, - "grad_norm": 1.229463761384355, - "learning_rate": 7.179316068899211e-08, - "loss": 0.1659, - "step": 18937 - }, - { - "epoch": 0.96, - "grad_norm": 1.4155063754897228, - "learning_rate": 7.159629772391485e-08, - "loss": 0.1902, - "step": 18938 - }, - { - "epoch": 0.96, - "grad_norm": 0.9655177285562941, - "learning_rate": 7.139970406734753e-08, - "loss": 0.1753, - "step": 18939 - }, - { - "epoch": 0.96, - "grad_norm": 1.118473850390554, - "learning_rate": 7.120337972462365e-08, - "loss": 0.1529, - "step": 18940 - }, - { - "epoch": 0.96, - "grad_norm": 1.241518882169247, - "learning_rate": 7.100732470107008e-08, - "loss": 0.156, - "step": 18941 - }, - { - "epoch": 0.96, - "grad_norm": 0.913325622858883, - "learning_rate": 7.081153900200255e-08, - "loss": 0.1557, - "step": 18942 - }, - { - "epoch": 0.96, - "grad_norm": 1.2350006356643224, - "learning_rate": 7.061602263273237e-08, - "loss": 0.172, - "step": 18943 - }, - { - "epoch": 0.96, - "grad_norm": 1.1409555515039296, - "learning_rate": 7.04207755985653e-08, - "loss": 0.159, - "step": 18944 - }, - { - "epoch": 0.96, - "grad_norm": 0.8621408823943831, - "learning_rate": 7.022579790479378e-08, - "loss": 0.1437, - "step": 18945 - }, - { - "epoch": 0.96, - "grad_norm": 0.8696956434949746, - "learning_rate": 7.003108955670911e-08, - "loss": 0.1808, - "step": 18946 - }, - { - "epoch": 0.96, - "grad_norm": 1.0370223435433659, - "learning_rate": 6.983665055959154e-08, - "loss": 0.1462, - "step": 18947 - }, - { - "epoch": 0.96, - "grad_norm": 1.0713365572057763, - "learning_rate": 6.964248091871683e-08, - "loss": 0.1703, - "step": 18948 - }, - { - "epoch": 0.96, - "grad_norm": 0.9865314911287251, - "learning_rate": 6.944858063934967e-08, - "loss": 0.1432, - "step": 18949 - }, - { - "epoch": 0.96, - "grad_norm": 1.0416169563655078, - "learning_rate": 6.925494972675029e-08, - "loss": 0.1425, - "step": 18950 - }, - { - "epoch": 0.96, - "grad_norm": 1.2588921376373747, - "learning_rate": 6.906158818617226e-08, - "loss": 0.1717, - "step": 18951 - }, - { - "epoch": 0.96, - "grad_norm": 1.0330787653455085, - "learning_rate": 6.886849602285916e-08, - "loss": 0.1566, - "step": 18952 - }, - { - "epoch": 0.96, - "grad_norm": 1.0131289400465249, - "learning_rate": 6.867567324204905e-08, - "loss": 0.1638, - "step": 18953 - }, - { - "epoch": 0.96, - "grad_norm": 0.8849718675951277, - "learning_rate": 6.848311984897216e-08, - "loss": 0.1354, - "step": 18954 - }, - { - "epoch": 0.96, - "grad_norm": 1.2590838070351569, - "learning_rate": 6.82908358488521e-08, - "loss": 0.1656, - "step": 18955 - }, - { - "epoch": 0.96, - "grad_norm": 2.542926066213727, - "learning_rate": 6.809882124690358e-08, - "loss": 0.1639, - "step": 18956 - }, - { - "epoch": 0.96, - "grad_norm": 1.3956886472564511, - "learning_rate": 6.79070760483358e-08, - "loss": 0.1853, - "step": 18957 - }, - { - "epoch": 0.96, - "grad_norm": 1.3101703160806484, - "learning_rate": 6.771560025834901e-08, - "loss": 0.1963, - "step": 18958 - }, - { - "epoch": 0.96, - "grad_norm": 1.5623705229321303, - "learning_rate": 6.752439388213682e-08, - "loss": 0.1965, - "step": 18959 - }, - { - "epoch": 0.96, - "grad_norm": 1.12056540940211, - "learning_rate": 6.733345692488736e-08, - "loss": 0.1622, - "step": 18960 - }, - { - "epoch": 0.96, - "grad_norm": 1.0543695819488192, - "learning_rate": 6.714278939177976e-08, - "loss": 0.1504, - "step": 18961 - }, - { - "epoch": 0.96, - "grad_norm": 1.2197686288091252, - "learning_rate": 6.695239128798325e-08, - "loss": 0.1545, - "step": 18962 - }, - { - "epoch": 0.96, - "grad_norm": 0.819372886682206, - "learning_rate": 6.676226261866591e-08, - "loss": 0.1587, - "step": 18963 - }, - { - "epoch": 0.96, - "grad_norm": 1.1616646556897137, - "learning_rate": 6.657240338898141e-08, - "loss": 0.1658, - "step": 18964 - }, - { - "epoch": 0.96, - "grad_norm": 1.4269534756249098, - "learning_rate": 6.638281360408339e-08, - "loss": 0.1673, - "step": 18965 - }, - { - "epoch": 0.96, - "grad_norm": 1.578288524551596, - "learning_rate": 6.619349326911218e-08, - "loss": 0.143, - "step": 18966 - }, - { - "epoch": 0.96, - "grad_norm": 1.2967738604121724, - "learning_rate": 6.600444238920256e-08, - "loss": 0.1589, - "step": 18967 - }, - { - "epoch": 0.96, - "grad_norm": 0.9378892547102705, - "learning_rate": 6.581566096948377e-08, - "loss": 0.1514, - "step": 18968 - }, - { - "epoch": 0.96, - "grad_norm": 1.0222674207787896, - "learning_rate": 6.562714901507616e-08, - "loss": 0.1551, - "step": 18969 - }, - { - "epoch": 0.96, - "grad_norm": 1.4522802005044373, - "learning_rate": 6.543890653109341e-08, - "loss": 0.1993, - "step": 18970 - }, - { - "epoch": 0.96, - "grad_norm": 1.9929201580900462, - "learning_rate": 6.525093352264145e-08, - "loss": 0.1565, - "step": 18971 - }, - { - "epoch": 0.96, - "grad_norm": 1.2075074305444102, - "learning_rate": 6.506322999481951e-08, - "loss": 0.1568, - "step": 18972 - }, - { - "epoch": 0.96, - "grad_norm": 0.9119396547683619, - "learning_rate": 6.487579595271798e-08, - "loss": 0.1385, - "step": 18973 - }, - { - "epoch": 0.96, - "grad_norm": 0.9468835404050955, - "learning_rate": 6.468863140142279e-08, - "loss": 0.1522, - "step": 18974 - }, - { - "epoch": 0.96, - "grad_norm": 1.0360263939289716, - "learning_rate": 6.450173634600876e-08, - "loss": 0.1544, - "step": 18975 - }, - { - "epoch": 0.96, - "grad_norm": 0.9384445234929053, - "learning_rate": 6.431511079154517e-08, - "loss": 0.1421, - "step": 18976 - }, - { - "epoch": 0.97, - "grad_norm": 0.9717830054475515, - "learning_rate": 6.412875474309688e-08, - "loss": 0.143, - "step": 18977 - }, - { - "epoch": 0.97, - "grad_norm": 1.242839667500306, - "learning_rate": 6.394266820571649e-08, - "loss": 0.1657, - "step": 18978 - }, - { - "epoch": 0.97, - "grad_norm": 0.8385638418607924, - "learning_rate": 6.375685118445329e-08, - "loss": 0.1526, - "step": 18979 - }, - { - "epoch": 0.97, - "grad_norm": 1.0274904096006774, - "learning_rate": 6.35713036843455e-08, - "loss": 0.1462, - "step": 18980 - }, - { - "epoch": 0.97, - "grad_norm": 0.955038906916783, - "learning_rate": 6.338602571042795e-08, - "loss": 0.1813, - "step": 18981 - }, - { - "epoch": 0.97, - "grad_norm": 1.4956850451648465, - "learning_rate": 6.320101726772665e-08, - "loss": 0.1594, - "step": 18982 - }, - { - "epoch": 0.97, - "grad_norm": 1.2377378344046592, - "learning_rate": 6.301627836125757e-08, - "loss": 0.1779, - "step": 18983 - }, - { - "epoch": 0.97, - "grad_norm": 0.9260561719978769, - "learning_rate": 6.283180899603447e-08, - "loss": 0.171, - "step": 18984 - }, - { - "epoch": 0.97, - "grad_norm": 0.9400856349983138, - "learning_rate": 6.264760917705782e-08, - "loss": 0.1605, - "step": 18985 - }, - { - "epoch": 0.97, - "grad_norm": 1.4391237828833978, - "learning_rate": 6.246367890932803e-08, - "loss": 0.1747, - "step": 18986 - }, - { - "epoch": 0.97, - "grad_norm": 1.0650353212909858, - "learning_rate": 6.228001819783113e-08, - "loss": 0.1503, - "step": 18987 - }, - { - "epoch": 0.97, - "grad_norm": 1.2264828665242962, - "learning_rate": 6.209662704754982e-08, - "loss": 0.1679, - "step": 18988 - }, - { - "epoch": 0.97, - "grad_norm": 1.2608897282732527, - "learning_rate": 6.191350546346008e-08, - "loss": 0.1607, - "step": 18989 - }, - { - "epoch": 0.97, - "grad_norm": 1.0195823417707264, - "learning_rate": 6.173065345052687e-08, - "loss": 0.1584, - "step": 18990 - }, - { - "epoch": 0.97, - "grad_norm": 3.3217468456080654, - "learning_rate": 6.154807101371063e-08, - "loss": 0.1415, - "step": 18991 - }, - { - "epoch": 0.97, - "grad_norm": 1.3982193219953636, - "learning_rate": 6.136575815796519e-08, - "loss": 0.1604, - "step": 18992 - }, - { - "epoch": 0.97, - "grad_norm": 1.039154864355294, - "learning_rate": 6.118371488823439e-08, - "loss": 0.171, - "step": 18993 - }, - { - "epoch": 0.97, - "grad_norm": 1.449132551538369, - "learning_rate": 6.100194120945645e-08, - "loss": 0.185, - "step": 18994 - }, - { - "epoch": 0.97, - "grad_norm": 1.7339355163445271, - "learning_rate": 6.082043712656305e-08, - "loss": 0.1715, - "step": 18995 - }, - { - "epoch": 0.97, - "grad_norm": 2.6051479995279934, - "learning_rate": 6.063920264447576e-08, - "loss": 0.1468, - "step": 18996 - }, - { - "epoch": 0.97, - "grad_norm": 1.0033063919052598, - "learning_rate": 6.045823776811177e-08, - "loss": 0.1525, - "step": 18997 - }, - { - "epoch": 0.97, - "grad_norm": 0.9764565540340898, - "learning_rate": 6.027754250237938e-08, - "loss": 0.1755, - "step": 18998 - }, - { - "epoch": 0.97, - "grad_norm": 1.3922419999429214, - "learning_rate": 6.009711685218134e-08, - "loss": 0.1729, - "step": 18999 - }, - { - "epoch": 0.97, - "grad_norm": 1.1355283592252898, - "learning_rate": 5.991696082240817e-08, - "loss": 0.1741, - "step": 19000 - }, - { - "epoch": 0.97, - "grad_norm": 1.0226456454781145, - "learning_rate": 5.97370744179504e-08, - "loss": 0.1596, - "step": 19001 - }, - { - "epoch": 0.97, - "grad_norm": 1.022674255084776, - "learning_rate": 5.9557457643685257e-08, - "loss": 0.1534, - "step": 19002 - }, - { - "epoch": 0.97, - "grad_norm": 1.3787117157844775, - "learning_rate": 5.93781105044855e-08, - "loss": 0.1637, - "step": 19003 - }, - { - "epoch": 0.97, - "grad_norm": 1.5550964475153704, - "learning_rate": 5.9199033005217233e-08, - "loss": 0.157, - "step": 19004 - }, - { - "epoch": 0.97, - "grad_norm": 1.2498322296452542, - "learning_rate": 5.9020225150735466e-08, - "loss": 0.1518, - "step": 19005 - }, - { - "epoch": 0.97, - "grad_norm": 2.5263437302645886, - "learning_rate": 5.8841686945891874e-08, - "loss": 0.1414, - "step": 19006 - }, - { - "epoch": 0.97, - "grad_norm": 1.0709694859747907, - "learning_rate": 5.866341839552814e-08, - "loss": 0.1782, - "step": 19007 - }, - { - "epoch": 0.97, - "grad_norm": 1.4903520322067012, - "learning_rate": 5.848541950448261e-08, - "loss": 0.1648, - "step": 19008 - }, - { - "epoch": 0.97, - "grad_norm": 1.9290190400309384, - "learning_rate": 5.830769027757921e-08, - "loss": 0.1842, - "step": 19009 - }, - { - "epoch": 0.97, - "grad_norm": 1.1113056250270656, - "learning_rate": 5.813023071964296e-08, - "loss": 0.1524, - "step": 19010 - }, - { - "epoch": 0.97, - "grad_norm": 0.8503388097141104, - "learning_rate": 5.795304083548559e-08, - "loss": 0.1451, - "step": 19011 - }, - { - "epoch": 0.97, - "grad_norm": 1.0071646211516418, - "learning_rate": 5.777612062991211e-08, - "loss": 0.1571, - "step": 19012 - }, - { - "epoch": 0.97, - "grad_norm": 0.9059092713987779, - "learning_rate": 5.759947010772426e-08, - "loss": 0.1576, - "step": 19013 - }, - { - "epoch": 0.97, - "grad_norm": 0.9766952791494546, - "learning_rate": 5.742308927371154e-08, - "loss": 0.1701, - "step": 19014 - }, - { - "epoch": 0.97, - "grad_norm": 0.9185439725383592, - "learning_rate": 5.7246978132659e-08, - "loss": 0.148, - "step": 19015 - }, - { - "epoch": 0.97, - "grad_norm": 1.419729476220642, - "learning_rate": 5.707113668934505e-08, - "loss": 0.1695, - "step": 19016 - }, - { - "epoch": 0.97, - "grad_norm": 1.1401721563452156, - "learning_rate": 5.6895564948536984e-08, - "loss": 0.1611, - "step": 19017 - }, - { - "epoch": 0.97, - "grad_norm": 1.053616248533265, - "learning_rate": 5.672026291499877e-08, - "loss": 0.1598, - "step": 19018 - }, - { - "epoch": 0.97, - "grad_norm": 1.463192697506729, - "learning_rate": 5.6545230593484376e-08, - "loss": 0.1678, - "step": 19019 - }, - { - "epoch": 0.97, - "grad_norm": 0.9548696767406545, - "learning_rate": 5.637046798874335e-08, - "loss": 0.1494, - "step": 19020 - }, - { - "epoch": 0.97, - "grad_norm": 1.0769999525229907, - "learning_rate": 5.619597510551411e-08, - "loss": 0.1516, - "step": 19021 - }, - { - "epoch": 0.97, - "grad_norm": 1.1269093531874508, - "learning_rate": 5.6021751948530656e-08, - "loss": 0.1603, - "step": 19022 - }, - { - "epoch": 0.97, - "grad_norm": 0.9860154062923643, - "learning_rate": 5.58477985225192e-08, - "loss": 0.1511, - "step": 19023 - }, - { - "epoch": 0.97, - "grad_norm": 1.3408468901522745, - "learning_rate": 5.567411483219709e-08, - "loss": 0.1514, - "step": 19024 - }, - { - "epoch": 0.97, - "grad_norm": 1.0965825124126323, - "learning_rate": 5.550070088227721e-08, - "loss": 0.1573, - "step": 19025 - }, - { - "epoch": 0.97, - "grad_norm": 1.023824284466048, - "learning_rate": 5.532755667746248e-08, - "loss": 0.1946, - "step": 19026 - }, - { - "epoch": 0.97, - "grad_norm": 1.0445023365005794, - "learning_rate": 5.515468222244913e-08, - "loss": 0.1519, - "step": 19027 - }, - { - "epoch": 0.97, - "grad_norm": 0.7889943973151463, - "learning_rate": 5.4982077521926744e-08, - "loss": 0.1603, - "step": 19028 - }, - { - "epoch": 0.97, - "grad_norm": 1.113921879902848, - "learning_rate": 5.4809742580577144e-08, - "loss": 0.1807, - "step": 19029 - }, - { - "epoch": 0.97, - "grad_norm": 1.2384246609536174, - "learning_rate": 5.4637677403074355e-08, - "loss": 0.1507, - "step": 19030 - }, - { - "epoch": 0.97, - "grad_norm": 1.1053062410513481, - "learning_rate": 5.4465881994087976e-08, - "loss": 0.159, - "step": 19031 - }, - { - "epoch": 0.97, - "grad_norm": 1.317934675993351, - "learning_rate": 5.4294356358274294e-08, - "loss": 0.1587, - "step": 19032 - }, - { - "epoch": 0.97, - "grad_norm": 1.0281026923641639, - "learning_rate": 5.4123100500289574e-08, - "loss": 0.1613, - "step": 19033 - }, - { - "epoch": 0.97, - "grad_norm": 1.385724210989773, - "learning_rate": 5.395211442477677e-08, - "loss": 0.1577, - "step": 19034 - }, - { - "epoch": 0.97, - "grad_norm": 1.002683221703717, - "learning_rate": 5.3781398136374394e-08, - "loss": 0.1447, - "step": 19035 - }, - { - "epoch": 0.97, - "grad_norm": 1.3531188851630518, - "learning_rate": 5.3610951639714305e-08, - "loss": 0.1619, - "step": 19036 - }, - { - "epoch": 0.97, - "grad_norm": 1.7890219249827146, - "learning_rate": 5.344077493941835e-08, - "loss": 0.1771, - "step": 19037 - }, - { - "epoch": 0.97, - "grad_norm": 1.1012008101422308, - "learning_rate": 5.327086804010284e-08, - "loss": 0.1614, - "step": 19038 - }, - { - "epoch": 0.97, - "grad_norm": 1.0428208001568597, - "learning_rate": 5.3101230946376314e-08, - "loss": 0.1754, - "step": 19039 - }, - { - "epoch": 0.97, - "grad_norm": 1.026144332646727, - "learning_rate": 5.2931863662841755e-08, - "loss": 0.1688, - "step": 19040 - }, - { - "epoch": 0.97, - "grad_norm": 1.603608003297128, - "learning_rate": 5.276276619409215e-08, - "loss": 0.1589, - "step": 19041 - }, - { - "epoch": 0.97, - "grad_norm": 1.1969975578810148, - "learning_rate": 5.259393854471384e-08, - "loss": 0.1777, - "step": 19042 - }, - { - "epoch": 0.97, - "grad_norm": 1.0041848312681114, - "learning_rate": 5.242538071928649e-08, - "loss": 0.174, - "step": 19043 - }, - { - "epoch": 0.97, - "grad_norm": 1.2724602233865854, - "learning_rate": 5.225709272238311e-08, - "loss": 0.1433, - "step": 19044 - }, - { - "epoch": 0.97, - "grad_norm": 1.0626391350708608, - "learning_rate": 5.208907455856782e-08, - "loss": 0.1853, - "step": 19045 - }, - { - "epoch": 0.97, - "grad_norm": 0.9280447894665182, - "learning_rate": 5.19213262323981e-08, - "loss": 0.1554, - "step": 19046 - }, - { - "epoch": 0.97, - "grad_norm": 1.4995114449645333, - "learning_rate": 5.1753847748424735e-08, - "loss": 0.1609, - "step": 19047 - }, - { - "epoch": 0.97, - "grad_norm": 0.8714223222188999, - "learning_rate": 5.158663911118966e-08, - "loss": 0.1459, - "step": 19048 - }, - { - "epoch": 0.97, - "grad_norm": 1.1422759045113517, - "learning_rate": 5.141970032522925e-08, - "loss": 0.1769, - "step": 19049 - }, - { - "epoch": 0.97, - "grad_norm": 0.9078013270129156, - "learning_rate": 5.1253031395070985e-08, - "loss": 0.1561, - "step": 19050 - }, - { - "epoch": 0.97, - "grad_norm": 1.4725725563008483, - "learning_rate": 5.1086632325236804e-08, - "loss": 0.177, - "step": 19051 - }, - { - "epoch": 0.97, - "grad_norm": 1.0447898313350958, - "learning_rate": 5.0920503120239775e-08, - "loss": 0.1582, - "step": 19052 - }, - { - "epoch": 0.97, - "grad_norm": 2.403622306022244, - "learning_rate": 5.075464378458517e-08, - "loss": 0.1592, - "step": 19053 - }, - { - "epoch": 0.97, - "grad_norm": 1.2749760646824433, - "learning_rate": 5.0589054322772725e-08, - "loss": 0.1795, - "step": 19054 - }, - { - "epoch": 0.97, - "grad_norm": 1.151845211047926, - "learning_rate": 5.04237347392944e-08, - "loss": 0.1592, - "step": 19055 - }, - { - "epoch": 0.97, - "grad_norm": 1.0014197338720585, - "learning_rate": 5.0258685038634406e-08, - "loss": 0.151, - "step": 19056 - }, - { - "epoch": 0.97, - "grad_norm": 1.381306982875491, - "learning_rate": 5.009390522526914e-08, - "loss": 0.1544, - "step": 19057 - }, - { - "epoch": 0.97, - "grad_norm": 1.6958918491548332, - "learning_rate": 4.992939530366947e-08, - "loss": 0.1547, - "step": 19058 - }, - { - "epoch": 0.97, - "grad_norm": 2.4727261828084512, - "learning_rate": 4.9765155278296284e-08, - "loss": 0.1544, - "step": 19059 - }, - { - "epoch": 0.97, - "grad_norm": 2.328137183771419, - "learning_rate": 4.96011851536049e-08, - "loss": 0.169, - "step": 19060 - }, - { - "epoch": 0.97, - "grad_norm": 1.162867331490707, - "learning_rate": 4.9437484934043987e-08, - "loss": 0.1507, - "step": 19061 - }, - { - "epoch": 0.97, - "grad_norm": 0.9319785655569531, - "learning_rate": 4.927405462405332e-08, - "loss": 0.1596, - "step": 19062 - }, - { - "epoch": 0.97, - "grad_norm": 1.1297107255840486, - "learning_rate": 4.911089422806603e-08, - "loss": 0.1448, - "step": 19063 - }, - { - "epoch": 0.97, - "grad_norm": 1.0959641369353819, - "learning_rate": 4.8948003750507455e-08, - "loss": 0.1556, - "step": 19064 - }, - { - "epoch": 0.97, - "grad_norm": 1.4195341111796325, - "learning_rate": 4.878538319579629e-08, - "loss": 0.1801, - "step": 19065 - }, - { - "epoch": 0.97, - "grad_norm": 1.3786838067360543, - "learning_rate": 4.862303256834344e-08, - "loss": 0.173, - "step": 19066 - }, - { - "epoch": 0.97, - "grad_norm": 2.551126828679602, - "learning_rate": 4.846095187255318e-08, - "loss": 0.1725, - "step": 19067 - }, - { - "epoch": 0.97, - "grad_norm": 1.0936716110028684, - "learning_rate": 4.8299141112821966e-08, - "loss": 0.1677, - "step": 19068 - }, - { - "epoch": 0.97, - "grad_norm": 1.1227017299371511, - "learning_rate": 4.8137600293538536e-08, - "loss": 0.1773, - "step": 19069 - }, - { - "epoch": 0.97, - "grad_norm": 1.021515630146002, - "learning_rate": 4.7976329419084924e-08, - "loss": 0.1722, - "step": 19070 - }, - { - "epoch": 0.97, - "grad_norm": 1.1159643409886524, - "learning_rate": 4.781532849383541e-08, - "loss": 0.1559, - "step": 19071 - }, - { - "epoch": 0.97, - "grad_norm": 2.508985334043882, - "learning_rate": 4.765459752215651e-08, - "loss": 0.1624, - "step": 19072 - }, - { - "epoch": 0.97, - "grad_norm": 1.4314591906020395, - "learning_rate": 4.749413650840917e-08, - "loss": 0.1639, - "step": 19073 - }, - { - "epoch": 0.97, - "grad_norm": 1.1081591206295462, - "learning_rate": 4.7333945456945474e-08, - "loss": 0.1629, - "step": 19074 - }, - { - "epoch": 0.97, - "grad_norm": 0.9484977524062326, - "learning_rate": 4.717402437211083e-08, - "loss": 0.1546, - "step": 19075 - }, - { - "epoch": 0.97, - "grad_norm": 1.032321978750882, - "learning_rate": 4.701437325824287e-08, - "loss": 0.1548, - "step": 19076 - }, - { - "epoch": 0.97, - "grad_norm": 1.0338695817692656, - "learning_rate": 4.6854992119671484e-08, - "loss": 0.1546, - "step": 19077 - }, - { - "epoch": 0.97, - "grad_norm": 0.8415263499415587, - "learning_rate": 4.6695880960722085e-08, - "loss": 0.1634, - "step": 19078 - }, - { - "epoch": 0.97, - "grad_norm": 2.1299997587136064, - "learning_rate": 4.653703978570789e-08, - "loss": 0.1507, - "step": 19079 - }, - { - "epoch": 0.97, - "grad_norm": 3.050521335679299, - "learning_rate": 4.6378468598938794e-08, - "loss": 0.1812, - "step": 19080 - }, - { - "epoch": 0.97, - "grad_norm": 1.361031673379913, - "learning_rate": 4.62201674047158e-08, - "loss": 0.1644, - "step": 19081 - }, - { - "epoch": 0.97, - "grad_norm": 0.9886945750398041, - "learning_rate": 4.606213620733324e-08, - "loss": 0.1619, - "step": 19082 - }, - { - "epoch": 0.97, - "grad_norm": 1.1377023965397417, - "learning_rate": 4.59043750110777e-08, - "loss": 0.1677, - "step": 19083 - }, - { - "epoch": 0.97, - "grad_norm": 1.3184405498120437, - "learning_rate": 4.574688382022796e-08, - "loss": 0.1714, - "step": 19084 - }, - { - "epoch": 0.97, - "grad_norm": 0.8280553700474405, - "learning_rate": 4.5589662639056175e-08, - "loss": 0.1735, - "step": 19085 - }, - { - "epoch": 0.97, - "grad_norm": 1.005031500161824, - "learning_rate": 4.5432711471826704e-08, - "loss": 0.159, - "step": 19086 - }, - { - "epoch": 0.97, - "grad_norm": 1.1721295729425816, - "learning_rate": 4.527603032279726e-08, - "loss": 0.1513, - "step": 19087 - }, - { - "epoch": 0.97, - "grad_norm": 1.7713609394550736, - "learning_rate": 4.511961919621888e-08, - "loss": 0.1603, - "step": 19088 - }, - { - "epoch": 0.97, - "grad_norm": 4.941106674306136, - "learning_rate": 4.496347809633261e-08, - "loss": 0.1454, - "step": 19089 - }, - { - "epoch": 0.97, - "grad_norm": 0.824939947863532, - "learning_rate": 4.480760702737286e-08, - "loss": 0.137, - "step": 19090 - }, - { - "epoch": 0.97, - "grad_norm": 1.275105006746803, - "learning_rate": 4.465200599356956e-08, - "loss": 0.1616, - "step": 19091 - }, - { - "epoch": 0.97, - "grad_norm": 1.1215004426931332, - "learning_rate": 4.4496674999143786e-08, - "loss": 0.1473, - "step": 19092 - }, - { - "epoch": 0.97, - "grad_norm": 1.4440259031152056, - "learning_rate": 4.434161404830772e-08, - "loss": 0.1563, - "step": 19093 - }, - { - "epoch": 0.97, - "grad_norm": 2.7954277937536793, - "learning_rate": 4.41868231452669e-08, - "loss": 0.1615, - "step": 19094 - }, - { - "epoch": 0.97, - "grad_norm": 1.1079319217667891, - "learning_rate": 4.40323022942224e-08, - "loss": 0.1961, - "step": 19095 - }, - { - "epoch": 0.97, - "grad_norm": 1.3025911019288117, - "learning_rate": 4.387805149936197e-08, - "loss": 0.1534, - "step": 19096 - }, - { - "epoch": 0.97, - "grad_norm": 0.9653298618682854, - "learning_rate": 4.3724070764873396e-08, - "loss": 0.1516, - "step": 19097 - }, - { - "epoch": 0.97, - "grad_norm": 1.1497810602577705, - "learning_rate": 4.35703600949311e-08, - "loss": 0.158, - "step": 19098 - }, - { - "epoch": 0.97, - "grad_norm": 0.9031827811840533, - "learning_rate": 4.341691949370508e-08, - "loss": 0.1581, - "step": 19099 - }, - { - "epoch": 0.97, - "grad_norm": 1.0561575396976002, - "learning_rate": 4.326374896535757e-08, - "loss": 0.1816, - "step": 19100 - }, - { - "epoch": 0.97, - "grad_norm": 1.2191283042704604, - "learning_rate": 4.311084851404301e-08, - "loss": 0.1577, - "step": 19101 - }, - { - "epoch": 0.97, - "grad_norm": 1.7094001015154057, - "learning_rate": 4.2958218143909214e-08, - "loss": 0.1614, - "step": 19102 - }, - { - "epoch": 0.97, - "grad_norm": 1.0578646131166962, - "learning_rate": 4.280585785909619e-08, - "loss": 0.1417, - "step": 19103 - }, - { - "epoch": 0.97, - "grad_norm": 0.9456307144413473, - "learning_rate": 4.265376766373619e-08, - "loss": 0.1486, - "step": 19104 - }, - { - "epoch": 0.97, - "grad_norm": 0.9368889064225784, - "learning_rate": 4.2501947561955914e-08, - "loss": 0.1611, - "step": 19105 - }, - { - "epoch": 0.97, - "grad_norm": 0.9330791875403646, - "learning_rate": 4.235039755787318e-08, - "loss": 0.1539, - "step": 19106 - }, - { - "epoch": 0.97, - "grad_norm": 0.9165604635550397, - "learning_rate": 4.2199117655596924e-08, - "loss": 0.1688, - "step": 19107 - }, - { - "epoch": 0.97, - "grad_norm": 0.9110528720041452, - "learning_rate": 4.204810785923275e-08, - "loss": 0.1742, - "step": 19108 - }, - { - "epoch": 0.97, - "grad_norm": 1.9730364455869631, - "learning_rate": 4.1897368172875156e-08, - "loss": 0.146, - "step": 19109 - }, - { - "epoch": 0.97, - "grad_norm": 1.5814289522865332, - "learning_rate": 4.174689860061532e-08, - "loss": 0.1815, - "step": 19110 - }, - { - "epoch": 0.97, - "grad_norm": 1.543582728055052, - "learning_rate": 4.159669914653219e-08, - "loss": 0.1706, - "step": 19111 - }, - { - "epoch": 0.97, - "grad_norm": 1.1285363465670573, - "learning_rate": 4.144676981470142e-08, - "loss": 0.1716, - "step": 19112 - }, - { - "epoch": 0.97, - "grad_norm": 1.3445171338848143, - "learning_rate": 4.1297110609189726e-08, - "loss": 0.1576, - "step": 19113 - }, - { - "epoch": 0.97, - "grad_norm": 0.8753483217197732, - "learning_rate": 4.1147721534056106e-08, - "loss": 0.1479, - "step": 19114 - }, - { - "epoch": 0.97, - "grad_norm": 0.9138793875980022, - "learning_rate": 4.099860259335287e-08, - "loss": 0.1736, - "step": 19115 - }, - { - "epoch": 0.97, - "grad_norm": 1.1239955889645314, - "learning_rate": 4.084975379112566e-08, - "loss": 0.1492, - "step": 19116 - }, - { - "epoch": 0.97, - "grad_norm": 0.875353207317943, - "learning_rate": 4.070117513141014e-08, - "loss": 0.1633, - "step": 19117 - }, - { - "epoch": 0.97, - "grad_norm": 0.9480898716991211, - "learning_rate": 4.055286661823976e-08, - "loss": 0.1524, - "step": 19118 - }, - { - "epoch": 0.97, - "grad_norm": 1.0478210911230053, - "learning_rate": 4.040482825563352e-08, - "loss": 0.1739, - "step": 19119 - }, - { - "epoch": 0.97, - "grad_norm": 0.9717161847311233, - "learning_rate": 4.025706004760932e-08, - "loss": 0.1709, - "step": 19120 - }, - { - "epoch": 0.97, - "grad_norm": 0.9507746435779297, - "learning_rate": 4.010956199817506e-08, - "loss": 0.1628, - "step": 19121 - }, - { - "epoch": 0.97, - "grad_norm": 0.8743938983011655, - "learning_rate": 3.996233411133199e-08, - "loss": 0.1664, - "step": 19122 - }, - { - "epoch": 0.97, - "grad_norm": 1.0571851837849218, - "learning_rate": 3.981537639107247e-08, - "loss": 0.166, - "step": 19123 - }, - { - "epoch": 0.97, - "grad_norm": 1.0122948012698287, - "learning_rate": 3.966868884138442e-08, - "loss": 0.1588, - "step": 19124 - }, - { - "epoch": 0.97, - "grad_norm": 0.8442422327482219, - "learning_rate": 3.952227146624465e-08, - "loss": 0.161, - "step": 19125 - }, - { - "epoch": 0.97, - "grad_norm": 1.0852162833362355, - "learning_rate": 3.937612426962556e-08, - "loss": 0.1804, - "step": 19126 - }, - { - "epoch": 0.97, - "grad_norm": 0.9181077560303157, - "learning_rate": 3.923024725549285e-08, - "loss": 0.164, - "step": 19127 - }, - { - "epoch": 0.97, - "grad_norm": 1.2820486158378837, - "learning_rate": 3.908464042780114e-08, - "loss": 0.173, - "step": 19128 - }, - { - "epoch": 0.97, - "grad_norm": 1.136925404121932, - "learning_rate": 3.8939303790501706e-08, - "loss": 0.1761, - "step": 19129 - }, - { - "epoch": 0.97, - "grad_norm": 1.1490818691885378, - "learning_rate": 3.879423734753585e-08, - "loss": 0.1519, - "step": 19130 - }, - { - "epoch": 0.97, - "grad_norm": 1.4095179194478367, - "learning_rate": 3.864944110284041e-08, - "loss": 0.1591, - "step": 19131 - }, - { - "epoch": 0.97, - "grad_norm": 1.0354443872710228, - "learning_rate": 3.850491506034004e-08, - "loss": 0.1597, - "step": 19132 - }, - { - "epoch": 0.97, - "grad_norm": 0.927952075094008, - "learning_rate": 3.8360659223957155e-08, - "loss": 0.1622, - "step": 19133 - }, - { - "epoch": 0.97, - "grad_norm": 1.0288648343438391, - "learning_rate": 3.821667359760306e-08, - "loss": 0.1674, - "step": 19134 - }, - { - "epoch": 0.97, - "grad_norm": 0.9923904976945497, - "learning_rate": 3.8072958185184635e-08, - "loss": 0.1688, - "step": 19135 - }, - { - "epoch": 0.97, - "grad_norm": 0.9784718164833021, - "learning_rate": 3.7929512990600995e-08, - "loss": 0.1649, - "step": 19136 - }, - { - "epoch": 0.97, - "grad_norm": 12.11685222340869, - "learning_rate": 3.778633801774123e-08, - "loss": 0.1644, - "step": 19137 - }, - { - "epoch": 0.97, - "grad_norm": 0.8243294680976933, - "learning_rate": 3.764343327048892e-08, - "loss": 0.1577, - "step": 19138 - }, - { - "epoch": 0.97, - "grad_norm": 0.9167141022086956, - "learning_rate": 3.750079875272206e-08, - "loss": 0.1739, - "step": 19139 - }, - { - "epoch": 0.97, - "grad_norm": 1.072275320165148, - "learning_rate": 3.735843446830867e-08, - "loss": 0.1399, - "step": 19140 - }, - { - "epoch": 0.97, - "grad_norm": 0.9768691762964762, - "learning_rate": 3.721634042111011e-08, - "loss": 0.165, - "step": 19141 - }, - { - "epoch": 0.97, - "grad_norm": 0.838464973730494, - "learning_rate": 3.707451661498107e-08, - "loss": 0.1578, - "step": 19142 - }, - { - "epoch": 0.97, - "grad_norm": 0.9608964549999129, - "learning_rate": 3.693296305376959e-08, - "loss": 0.1492, - "step": 19143 - }, - { - "epoch": 0.97, - "grad_norm": 1.228769947210927, - "learning_rate": 3.679167974131259e-08, - "loss": 0.1453, - "step": 19144 - }, - { - "epoch": 0.97, - "grad_norm": 1.1040416367837809, - "learning_rate": 3.665066668144479e-08, - "loss": 0.1522, - "step": 19145 - }, - { - "epoch": 0.97, - "grad_norm": 0.8744148989593945, - "learning_rate": 3.65099238779909e-08, - "loss": 0.1632, - "step": 19146 - }, - { - "epoch": 0.97, - "grad_norm": 0.922122532580692, - "learning_rate": 3.636945133476677e-08, - "loss": 0.1846, - "step": 19147 - }, - { - "epoch": 0.97, - "grad_norm": 1.0865718018611439, - "learning_rate": 3.622924905558489e-08, - "loss": 0.1593, - "step": 19148 - }, - { - "epoch": 0.97, - "grad_norm": 0.973217513769555, - "learning_rate": 3.608931704424778e-08, - "loss": 0.1602, - "step": 19149 - }, - { - "epoch": 0.97, - "grad_norm": 1.2178422284953592, - "learning_rate": 3.5949655304550193e-08, - "loss": 0.1646, - "step": 19150 - }, - { - "epoch": 0.97, - "grad_norm": 0.8812935813974213, - "learning_rate": 3.581026384028019e-08, - "loss": 0.1523, - "step": 19151 - }, - { - "epoch": 0.97, - "grad_norm": 3.8532607015421814, - "learning_rate": 3.567114265522031e-08, - "loss": 0.1392, - "step": 19152 - }, - { - "epoch": 0.97, - "grad_norm": 1.116891255212077, - "learning_rate": 3.55322917531431e-08, - "loss": 0.161, - "step": 19153 - }, - { - "epoch": 0.97, - "grad_norm": 1.0735371441706794, - "learning_rate": 3.539371113781553e-08, - "loss": 0.1443, - "step": 19154 - }, - { - "epoch": 0.97, - "grad_norm": 0.995049097027222, - "learning_rate": 3.525540081299683e-08, - "loss": 0.1607, - "step": 19155 - }, - { - "epoch": 0.97, - "grad_norm": 1.372855437481362, - "learning_rate": 3.511736078243844e-08, - "loss": 0.1941, - "step": 19156 - }, - { - "epoch": 0.97, - "grad_norm": 0.9548197354253422, - "learning_rate": 3.497959104988291e-08, - "loss": 0.1552, - "step": 19157 - }, - { - "epoch": 0.97, - "grad_norm": 1.171880449888493, - "learning_rate": 3.4842091619070594e-08, - "loss": 0.1554, - "step": 19158 - }, - { - "epoch": 0.97, - "grad_norm": 1.0613535166121013, - "learning_rate": 3.470486249372851e-08, - "loss": 0.1488, - "step": 19159 - }, - { - "epoch": 0.97, - "grad_norm": 1.0495709235018573, - "learning_rate": 3.456790367757923e-08, - "loss": 0.1404, - "step": 19160 - }, - { - "epoch": 0.97, - "grad_norm": 0.9657074899743266, - "learning_rate": 3.4431215174338675e-08, - "loss": 0.1589, - "step": 19161 - }, - { - "epoch": 0.97, - "grad_norm": 1.615432278184323, - "learning_rate": 3.429479698771499e-08, - "loss": 0.1848, - "step": 19162 - }, - { - "epoch": 0.97, - "grad_norm": 0.8700346511067772, - "learning_rate": 3.415864912140743e-08, - "loss": 0.143, - "step": 19163 - }, - { - "epoch": 0.97, - "grad_norm": 0.998348237566735, - "learning_rate": 3.402277157910971e-08, - "loss": 0.1488, - "step": 19164 - }, - { - "epoch": 0.97, - "grad_norm": 1.1098996119917584, - "learning_rate": 3.3887164364506676e-08, - "loss": 0.156, - "step": 19165 - }, - { - "epoch": 0.97, - "grad_norm": 0.9244051009838743, - "learning_rate": 3.375182748127759e-08, - "loss": 0.1975, - "step": 19166 - }, - { - "epoch": 0.97, - "grad_norm": 1.0719850242465663, - "learning_rate": 3.361676093309285e-08, - "loss": 0.1649, - "step": 19167 - }, - { - "epoch": 0.97, - "grad_norm": 1.4384884218914096, - "learning_rate": 3.34819647236162e-08, - "loss": 0.1911, - "step": 19168 - }, - { - "epoch": 0.97, - "grad_norm": 1.6804235632026177, - "learning_rate": 3.334743885650471e-08, - "loss": 0.1742, - "step": 19169 - }, - { - "epoch": 0.97, - "grad_norm": 1.5391305538864972, - "learning_rate": 3.321318333540546e-08, - "loss": 0.1745, - "step": 19170 - }, - { - "epoch": 0.97, - "grad_norm": 0.821442405909109, - "learning_rate": 3.307919816396332e-08, - "loss": 0.1707, - "step": 19171 - }, - { - "epoch": 0.97, - "grad_norm": 0.9376385559888619, - "learning_rate": 3.294548334580982e-08, - "loss": 0.1571, - "step": 19172 - }, - { - "epoch": 0.97, - "grad_norm": 1.0596456111225416, - "learning_rate": 3.2812038884573185e-08, - "loss": 0.1722, - "step": 19173 - }, - { - "epoch": 0.98, - "grad_norm": 1.512084876890349, - "learning_rate": 3.267886478387383e-08, - "loss": 0.1509, - "step": 19174 - }, - { - "epoch": 0.98, - "grad_norm": 0.9142596774458099, - "learning_rate": 3.254596104732222e-08, - "loss": 0.1706, - "step": 19175 - }, - { - "epoch": 0.98, - "grad_norm": 1.227178005224331, - "learning_rate": 3.241332767852545e-08, - "loss": 0.1699, - "step": 19176 - }, - { - "epoch": 0.98, - "grad_norm": 0.988269507234306, - "learning_rate": 3.228096468107844e-08, - "loss": 0.1521, - "step": 19177 - }, - { - "epoch": 0.98, - "grad_norm": 1.0376160910233094, - "learning_rate": 3.214887205857387e-08, - "loss": 0.1781, - "step": 19178 - }, - { - "epoch": 0.98, - "grad_norm": 1.0444876319228076, - "learning_rate": 3.201704981459441e-08, - "loss": 0.1569, - "step": 19179 - }, - { - "epoch": 0.98, - "grad_norm": 1.151779282207466, - "learning_rate": 3.188549795271612e-08, - "loss": 0.1567, - "step": 19180 - }, - { - "epoch": 0.98, - "grad_norm": 1.1160958146018711, - "learning_rate": 3.175421647650612e-08, - "loss": 0.1439, - "step": 19181 - }, - { - "epoch": 0.98, - "grad_norm": 1.1713617665246183, - "learning_rate": 3.1623205389526015e-08, - "loss": 0.1822, - "step": 19182 - }, - { - "epoch": 0.98, - "grad_norm": 1.1129384740967305, - "learning_rate": 3.1492464695328517e-08, - "loss": 0.1473, - "step": 19183 - }, - { - "epoch": 0.98, - "grad_norm": 1.1710919498361196, - "learning_rate": 3.1361994397463015e-08, - "loss": 0.1641, - "step": 19184 - }, - { - "epoch": 0.98, - "grad_norm": 2.47322006099083, - "learning_rate": 3.123179449946445e-08, - "loss": 0.1597, - "step": 19185 - }, - { - "epoch": 0.98, - "grad_norm": 1.0377026560653144, - "learning_rate": 3.110186500486778e-08, - "loss": 0.1523, - "step": 19186 - }, - { - "epoch": 0.98, - "grad_norm": 0.8996411914981404, - "learning_rate": 3.097220591719574e-08, - "loss": 0.1602, - "step": 19187 - }, - { - "epoch": 0.98, - "grad_norm": 1.1501427842312555, - "learning_rate": 3.084281723996552e-08, - "loss": 0.1571, - "step": 19188 - }, - { - "epoch": 0.98, - "grad_norm": 1.0734279089738592, - "learning_rate": 3.071369897668652e-08, - "loss": 0.1622, - "step": 19189 - }, - { - "epoch": 0.98, - "grad_norm": 0.9380478409377784, - "learning_rate": 3.0584851130861516e-08, - "loss": 0.1438, - "step": 19190 - }, - { - "epoch": 0.98, - "grad_norm": 1.1629182446898398, - "learning_rate": 3.0456273705986585e-08, - "loss": 0.1703, - "step": 19191 - }, - { - "epoch": 0.98, - "grad_norm": 1.1467098491253804, - "learning_rate": 3.032796670554783e-08, - "loss": 0.166, - "step": 19192 - }, - { - "epoch": 0.98, - "grad_norm": 0.8112423150515589, - "learning_rate": 3.01999301330258e-08, - "loss": 0.1689, - "step": 19193 - }, - { - "epoch": 0.98, - "grad_norm": 0.9835760332721184, - "learning_rate": 3.007216399189328e-08, - "loss": 0.1427, - "step": 19194 - }, - { - "epoch": 0.98, - "grad_norm": 1.0386973083640156, - "learning_rate": 2.9944668285617486e-08, - "loss": 0.16, - "step": 19195 - }, - { - "epoch": 0.98, - "grad_norm": 1.2121770744729188, - "learning_rate": 2.981744301765454e-08, - "loss": 0.1702, - "step": 19196 - }, - { - "epoch": 0.98, - "grad_norm": 1.1948688625434023, - "learning_rate": 2.9690488191457256e-08, - "loss": 0.1715, - "step": 19197 - }, - { - "epoch": 0.98, - "grad_norm": 1.0920289287961265, - "learning_rate": 2.9563803810468417e-08, - "loss": 0.1407, - "step": 19198 - }, - { - "epoch": 0.98, - "grad_norm": 1.6118283072831598, - "learning_rate": 2.943738987812528e-08, - "loss": 0.1629, - "step": 19199 - }, - { - "epoch": 0.98, - "grad_norm": 1.0650245468422912, - "learning_rate": 2.9311246397855097e-08, - "loss": 0.1664, - "step": 19200 - }, - { - "epoch": 0.98, - "grad_norm": 0.9317474614007701, - "learning_rate": 2.9185373373080694e-08, - "loss": 0.1704, - "step": 19201 - }, - { - "epoch": 0.98, - "grad_norm": 1.0744841689521663, - "learning_rate": 2.9059770807217114e-08, - "loss": 0.1483, - "step": 19202 - }, - { - "epoch": 0.98, - "grad_norm": 1.0447025231148186, - "learning_rate": 2.893443870366941e-08, - "loss": 0.1695, - "step": 19203 - }, - { - "epoch": 0.98, - "grad_norm": 1.0831017244705572, - "learning_rate": 2.8809377065838195e-08, - "loss": 0.1731, - "step": 19204 - }, - { - "epoch": 0.98, - "grad_norm": 0.957580011722551, - "learning_rate": 2.8684585897116313e-08, - "loss": 0.1442, - "step": 19205 - }, - { - "epoch": 0.98, - "grad_norm": 1.1950503252036402, - "learning_rate": 2.856006520088772e-08, - "loss": 0.1447, - "step": 19206 - }, - { - "epoch": 0.98, - "grad_norm": 1.0911236067523062, - "learning_rate": 2.843581498053083e-08, - "loss": 0.154, - "step": 19207 - }, - { - "epoch": 0.98, - "grad_norm": 1.3110033762325044, - "learning_rate": 2.8311835239415166e-08, - "loss": 0.166, - "step": 19208 - }, - { - "epoch": 0.98, - "grad_norm": 0.8672953809905334, - "learning_rate": 2.8188125980904702e-08, - "loss": 0.144, - "step": 19209 - }, - { - "epoch": 0.98, - "grad_norm": 1.1167029052994197, - "learning_rate": 2.8064687208354534e-08, - "loss": 0.1766, - "step": 19210 - }, - { - "epoch": 0.98, - "grad_norm": 1.0134270323241488, - "learning_rate": 2.7941518925113098e-08, - "loss": 0.1565, - "step": 19211 - }, - { - "epoch": 0.98, - "grad_norm": 1.3869272247853248, - "learning_rate": 2.7818621134521052e-08, - "loss": 0.1532, - "step": 19212 - }, - { - "epoch": 0.98, - "grad_norm": 3.1589186608695665, - "learning_rate": 2.7695993839912394e-08, - "loss": 0.1417, - "step": 19213 - }, - { - "epoch": 0.98, - "grad_norm": 0.8894785662670377, - "learning_rate": 2.7573637044612245e-08, - "loss": 0.1661, - "step": 19214 - }, - { - "epoch": 0.98, - "grad_norm": 1.039413572033424, - "learning_rate": 2.7451550751941282e-08, - "loss": 0.1762, - "step": 19215 - }, - { - "epoch": 0.98, - "grad_norm": 1.1886792996172786, - "learning_rate": 2.7329734965210185e-08, - "loss": 0.1754, - "step": 19216 - }, - { - "epoch": 0.98, - "grad_norm": 0.9696007012494356, - "learning_rate": 2.7208189687722986e-08, - "loss": 0.1551, - "step": 19217 - }, - { - "epoch": 0.98, - "grad_norm": 1.037129191027604, - "learning_rate": 2.708691492277704e-08, - "loss": 0.1538, - "step": 19218 - }, - { - "epoch": 0.98, - "grad_norm": 1.0038345821338919, - "learning_rate": 2.6965910673661945e-08, - "loss": 0.1361, - "step": 19219 - }, - { - "epoch": 0.98, - "grad_norm": 1.0297748191335272, - "learning_rate": 2.684517694365951e-08, - "loss": 0.1653, - "step": 19220 - }, - { - "epoch": 0.98, - "grad_norm": 0.9427532899833283, - "learning_rate": 2.6724713736044904e-08, - "loss": 0.1593, - "step": 19221 - }, - { - "epoch": 0.98, - "grad_norm": 1.2120297659638368, - "learning_rate": 2.6604521054085507e-08, - "loss": 0.1655, - "step": 19222 - }, - { - "epoch": 0.98, - "grad_norm": 1.0842678478940873, - "learning_rate": 2.6484598901042047e-08, - "loss": 0.1572, - "step": 19223 - }, - { - "epoch": 0.98, - "grad_norm": 1.0850936239860367, - "learning_rate": 2.6364947280167474e-08, - "loss": 0.1567, - "step": 19224 - }, - { - "epoch": 0.98, - "grad_norm": 1.1589672563300961, - "learning_rate": 2.6245566194706973e-08, - "loss": 0.1565, - "step": 19225 - }, - { - "epoch": 0.98, - "grad_norm": 0.9041978598526125, - "learning_rate": 2.6126455647899063e-08, - "loss": 0.1401, - "step": 19226 - }, - { - "epoch": 0.98, - "grad_norm": 1.1471922837033912, - "learning_rate": 2.6007615642973383e-08, - "loss": 0.18, - "step": 19227 - }, - { - "epoch": 0.98, - "grad_norm": 0.9977763246122786, - "learning_rate": 2.588904618315513e-08, - "loss": 0.1533, - "step": 19228 - }, - { - "epoch": 0.98, - "grad_norm": 0.9580627412826506, - "learning_rate": 2.5770747271659512e-08, - "loss": 0.1619, - "step": 19229 - }, - { - "epoch": 0.98, - "grad_norm": 0.992200161492214, - "learning_rate": 2.5652718911696185e-08, - "loss": 0.1521, - "step": 19230 - }, - { - "epoch": 0.98, - "grad_norm": 1.0007268238753533, - "learning_rate": 2.5534961106465918e-08, - "loss": 0.1644, - "step": 19231 - }, - { - "epoch": 0.98, - "grad_norm": 0.8550931452612291, - "learning_rate": 2.5417473859162823e-08, - "loss": 0.147, - "step": 19232 - }, - { - "epoch": 0.98, - "grad_norm": 0.8227788485716492, - "learning_rate": 2.530025717297546e-08, - "loss": 0.1548, - "step": 19233 - }, - { - "epoch": 0.98, - "grad_norm": 1.1593496255775817, - "learning_rate": 2.5183311051080184e-08, - "loss": 0.1691, - "step": 19234 - }, - { - "epoch": 0.98, - "grad_norm": 2.1956712286111624, - "learning_rate": 2.5066635496652225e-08, - "loss": 0.1716, - "step": 19235 - }, - { - "epoch": 0.98, - "grad_norm": 0.9729699845844008, - "learning_rate": 2.4950230512854612e-08, - "loss": 0.1862, - "step": 19236 - }, - { - "epoch": 0.98, - "grad_norm": 1.0720151399962266, - "learning_rate": 2.4834096102845927e-08, - "loss": 0.167, - "step": 19237 - }, - { - "epoch": 0.98, - "grad_norm": 1.9591798117726331, - "learning_rate": 2.4718232269774767e-08, - "loss": 0.1603, - "step": 19238 - }, - { - "epoch": 0.98, - "grad_norm": 1.343576188483149, - "learning_rate": 2.460263901678639e-08, - "loss": 0.1562, - "step": 19239 - }, - { - "epoch": 0.98, - "grad_norm": 1.252390946762163, - "learning_rate": 2.4487316347013845e-08, - "loss": 0.1719, - "step": 19240 - }, - { - "epoch": 0.98, - "grad_norm": 1.156646101992709, - "learning_rate": 2.4372264263586852e-08, - "loss": 0.1698, - "step": 19241 - }, - { - "epoch": 0.98, - "grad_norm": 1.081421366726149, - "learning_rate": 2.425748276962514e-08, - "loss": 0.1574, - "step": 19242 - }, - { - "epoch": 0.98, - "grad_norm": 0.9253255105133054, - "learning_rate": 2.414297186824288e-08, - "loss": 0.1552, - "step": 19243 - }, - { - "epoch": 0.98, - "grad_norm": 0.8335899901045356, - "learning_rate": 2.402873156254537e-08, - "loss": 0.1423, - "step": 19244 - }, - { - "epoch": 0.98, - "grad_norm": 0.9997764295713396, - "learning_rate": 2.3914761855632353e-08, - "loss": 0.1778, - "step": 19245 - }, - { - "epoch": 0.98, - "grad_norm": 1.0051440205824753, - "learning_rate": 2.3801062750595794e-08, - "loss": 0.1605, - "step": 19246 - }, - { - "epoch": 0.98, - "grad_norm": 1.016491155010764, - "learning_rate": 2.3687634250517676e-08, - "loss": 0.1457, - "step": 19247 - }, - { - "epoch": 0.98, - "grad_norm": 0.9058016869766914, - "learning_rate": 2.357447635847554e-08, - "loss": 0.1631, - "step": 19248 - }, - { - "epoch": 0.98, - "grad_norm": 1.9075130738463206, - "learning_rate": 2.3461589077540258e-08, - "loss": 0.1589, - "step": 19249 - }, - { - "epoch": 0.98, - "grad_norm": 0.9980310571120593, - "learning_rate": 2.3348972410772718e-08, - "loss": 0.1454, - "step": 19250 - }, - { - "epoch": 0.98, - "grad_norm": 1.0595001302371383, - "learning_rate": 2.3236626361227145e-08, - "loss": 0.1514, - "step": 19251 - }, - { - "epoch": 0.98, - "grad_norm": 1.5416509882505045, - "learning_rate": 2.3124550931952205e-08, - "loss": 0.1594, - "step": 19252 - }, - { - "epoch": 0.98, - "grad_norm": 0.8453900521274997, - "learning_rate": 2.3012746125987695e-08, - "loss": 0.1525, - "step": 19253 - }, - { - "epoch": 0.98, - "grad_norm": 0.7985186282417956, - "learning_rate": 2.290121194636452e-08, - "loss": 0.1423, - "step": 19254 - }, - { - "epoch": 0.98, - "grad_norm": 0.8561020250996914, - "learning_rate": 2.278994839611026e-08, - "loss": 0.151, - "step": 19255 - }, - { - "epoch": 0.98, - "grad_norm": 1.27649659032301, - "learning_rate": 2.2678955478242504e-08, - "loss": 0.1467, - "step": 19256 - }, - { - "epoch": 0.98, - "grad_norm": 0.8932878466540868, - "learning_rate": 2.256823319577217e-08, - "loss": 0.1622, - "step": 19257 - }, - { - "epoch": 0.98, - "grad_norm": 0.9914841386610725, - "learning_rate": 2.2457781551700198e-08, - "loss": 0.1424, - "step": 19258 - }, - { - "epoch": 0.98, - "grad_norm": 1.0687596475817032, - "learning_rate": 2.2347600549025294e-08, - "loss": 0.1477, - "step": 19259 - }, - { - "epoch": 0.98, - "grad_norm": 1.8674035862177825, - "learning_rate": 2.2237690190736183e-08, - "loss": 0.1522, - "step": 19260 - }, - { - "epoch": 0.98, - "grad_norm": 1.1072519797129452, - "learning_rate": 2.2128050479812703e-08, - "loss": 0.1686, - "step": 19261 - }, - { - "epoch": 0.98, - "grad_norm": 0.8107126930749041, - "learning_rate": 2.2018681419229138e-08, - "loss": 0.1415, - "step": 19262 - }, - { - "epoch": 0.98, - "grad_norm": 1.0972507429958072, - "learning_rate": 2.1909583011952007e-08, - "loss": 0.1542, - "step": 19263 - }, - { - "epoch": 0.98, - "grad_norm": 0.9810825168290226, - "learning_rate": 2.1800755260942276e-08, - "loss": 0.1549, - "step": 19264 - }, - { - "epoch": 0.98, - "grad_norm": 1.7842847357181404, - "learning_rate": 2.169219816914869e-08, - "loss": 0.1415, - "step": 19265 - }, - { - "epoch": 0.98, - "grad_norm": 0.8654898219260009, - "learning_rate": 2.1583911739518904e-08, - "loss": 0.1685, - "step": 19266 - }, - { - "epoch": 0.98, - "grad_norm": 1.3440330628325314, - "learning_rate": 2.1475895974989446e-08, - "loss": 0.1689, - "step": 19267 - }, - { - "epoch": 0.98, - "grad_norm": 1.3828300319608515, - "learning_rate": 2.13681508784902e-08, - "loss": 0.1501, - "step": 19268 - }, - { - "epoch": 0.98, - "grad_norm": 1.462503265865077, - "learning_rate": 2.1260676452942164e-08, - "loss": 0.1763, - "step": 19269 - }, - { - "epoch": 0.98, - "grad_norm": 0.9461929891605176, - "learning_rate": 2.1153472701263e-08, - "loss": 0.1723, - "step": 19270 - }, - { - "epoch": 0.98, - "grad_norm": 1.2117507915141943, - "learning_rate": 2.1046539626359274e-08, - "loss": 0.1671, - "step": 19271 - }, - { - "epoch": 0.98, - "grad_norm": 1.0426408318344735, - "learning_rate": 2.093987723113089e-08, - "loss": 0.1633, - "step": 19272 - }, - { - "epoch": 0.98, - "grad_norm": 1.0395904044309374, - "learning_rate": 2.0833485518473305e-08, - "loss": 0.1538, - "step": 19273 - }, - { - "epoch": 0.98, - "grad_norm": 1.0043395663080972, - "learning_rate": 2.0727364491269773e-08, - "loss": 0.1555, - "step": 19274 - }, - { - "epoch": 0.98, - "grad_norm": 1.3419287721746127, - "learning_rate": 2.0621514152401323e-08, - "loss": 0.1582, - "step": 19275 - }, - { - "epoch": 0.98, - "grad_norm": 1.1290578600458498, - "learning_rate": 2.0515934504736768e-08, - "loss": 0.1365, - "step": 19276 - }, - { - "epoch": 0.98, - "grad_norm": 2.6278224170282116, - "learning_rate": 2.0410625551141594e-08, - "loss": 0.1416, - "step": 19277 - }, - { - "epoch": 0.98, - "grad_norm": 1.5792626413568063, - "learning_rate": 2.0305587294472408e-08, - "loss": 0.1629, - "step": 19278 - }, - { - "epoch": 0.98, - "grad_norm": 1.1919448565898834, - "learning_rate": 2.0200819737576926e-08, - "loss": 0.1526, - "step": 19279 - }, - { - "epoch": 0.98, - "grad_norm": 1.179775286640012, - "learning_rate": 2.0096322883298435e-08, - "loss": 0.1533, - "step": 19280 - }, - { - "epoch": 0.98, - "grad_norm": 1.0520319965190825, - "learning_rate": 1.9992096734471333e-08, - "loss": 0.1556, - "step": 19281 - }, - { - "epoch": 0.98, - "grad_norm": 1.2946183283037571, - "learning_rate": 1.988814129392114e-08, - "loss": 0.1473, - "step": 19282 - }, - { - "epoch": 0.98, - "grad_norm": 1.0521064047313005, - "learning_rate": 1.9784456564470035e-08, - "loss": 0.1545, - "step": 19283 - }, - { - "epoch": 0.98, - "grad_norm": 1.2723479920478658, - "learning_rate": 1.9681042548928e-08, - "loss": 0.1575, - "step": 19284 - }, - { - "epoch": 0.98, - "grad_norm": 0.9780784388943833, - "learning_rate": 1.9577899250101674e-08, - "loss": 0.1556, - "step": 19285 - }, - { - "epoch": 0.98, - "grad_norm": 0.9001383282508998, - "learning_rate": 1.9475026670788822e-08, - "loss": 0.1594, - "step": 19286 - }, - { - "epoch": 0.98, - "grad_norm": 1.193731060326972, - "learning_rate": 1.9372424813779432e-08, - "loss": 0.1526, - "step": 19287 - }, - { - "epoch": 0.98, - "grad_norm": 1.1469736780532818, - "learning_rate": 1.9270093681856837e-08, - "loss": 0.1938, - "step": 19288 - }, - { - "epoch": 0.98, - "grad_norm": 2.133842881745061, - "learning_rate": 1.9168033277796595e-08, - "loss": 0.1514, - "step": 19289 - }, - { - "epoch": 0.98, - "grad_norm": 2.4315479417567807, - "learning_rate": 1.9066243604367595e-08, - "loss": 0.1574, - "step": 19290 - }, - { - "epoch": 0.98, - "grad_norm": 1.1500103445417296, - "learning_rate": 1.896472466432986e-08, - "loss": 0.1771, - "step": 19291 - }, - { - "epoch": 0.98, - "grad_norm": 1.1258307209006846, - "learning_rate": 1.8863476460437847e-08, - "loss": 0.1519, - "step": 19292 - }, - { - "epoch": 0.98, - "grad_norm": 1.2529336735398418, - "learning_rate": 1.876249899543825e-08, - "loss": 0.1799, - "step": 19293 - }, - { - "epoch": 0.98, - "grad_norm": 0.8555299298372947, - "learning_rate": 1.866179227206888e-08, - "loss": 0.1459, - "step": 19294 - }, - { - "epoch": 0.98, - "grad_norm": 1.0055165033399343, - "learning_rate": 1.8561356293061995e-08, - "loss": 0.1629, - "step": 19295 - }, - { - "epoch": 0.98, - "grad_norm": 0.8929016490331868, - "learning_rate": 1.846119106114319e-08, - "loss": 0.1629, - "step": 19296 - }, - { - "epoch": 0.98, - "grad_norm": 1.4282189392830789, - "learning_rate": 1.8361296579026965e-08, - "loss": 0.173, - "step": 19297 - }, - { - "epoch": 0.98, - "grad_norm": 1.3290545789755563, - "learning_rate": 1.8261672849425593e-08, - "loss": 0.1651, - "step": 19298 - }, - { - "epoch": 0.98, - "grad_norm": 1.506479397433706, - "learning_rate": 1.8162319875040247e-08, - "loss": 0.1668, - "step": 19299 - }, - { - "epoch": 0.98, - "grad_norm": 1.2558990219488457, - "learning_rate": 1.8063237658564325e-08, - "loss": 0.1554, - "step": 19300 - }, - { - "epoch": 0.98, - "grad_norm": 1.2926613820482529, - "learning_rate": 1.7964426202687902e-08, - "loss": 0.1858, - "step": 19301 - }, - { - "epoch": 0.98, - "grad_norm": 0.9604834521488833, - "learning_rate": 1.7865885510089943e-08, - "loss": 0.1639, - "step": 19302 - }, - { - "epoch": 0.98, - "grad_norm": 0.8900020514217135, - "learning_rate": 1.7767615583443865e-08, - "loss": 0.1464, - "step": 19303 - }, - { - "epoch": 0.98, - "grad_norm": 3.1007333087388065, - "learning_rate": 1.7669616425414203e-08, - "loss": 0.1428, - "step": 19304 - }, - { - "epoch": 0.98, - "grad_norm": 1.255905003951268, - "learning_rate": 1.7571888038661056e-08, - "loss": 0.1575, - "step": 19305 - }, - { - "epoch": 0.98, - "grad_norm": 1.2103777182423623, - "learning_rate": 1.747443042583341e-08, - "loss": 0.1729, - "step": 19306 - }, - { - "epoch": 0.98, - "grad_norm": 0.8329630456146007, - "learning_rate": 1.737724358957582e-08, - "loss": 0.1635, - "step": 19307 - }, - { - "epoch": 0.98, - "grad_norm": 1.1297998214124445, - "learning_rate": 1.7280327532525066e-08, - "loss": 0.1525, - "step": 19308 - }, - { - "epoch": 0.98, - "grad_norm": 1.6314144516197124, - "learning_rate": 1.7183682257309043e-08, - "loss": 0.1684, - "step": 19309 - }, - { - "epoch": 0.98, - "grad_norm": 1.3026685814979018, - "learning_rate": 1.708730776654899e-08, - "loss": 0.1768, - "step": 19310 - }, - { - "epoch": 0.98, - "grad_norm": 0.8763899372471403, - "learning_rate": 1.6991204062859478e-08, - "loss": 0.145, - "step": 19311 - }, - { - "epoch": 0.98, - "grad_norm": 1.0921780067084008, - "learning_rate": 1.6895371148847316e-08, - "loss": 0.1505, - "step": 19312 - }, - { - "epoch": 0.98, - "grad_norm": 1.0594109223251242, - "learning_rate": 1.6799809027112645e-08, - "loss": 0.1549, - "step": 19313 - }, - { - "epoch": 0.98, - "grad_norm": 0.9525505933657265, - "learning_rate": 1.6704517700246725e-08, - "loss": 0.1458, - "step": 19314 - }, - { - "epoch": 0.98, - "grad_norm": 1.0551914198110557, - "learning_rate": 1.6609497170834154e-08, - "loss": 0.1677, - "step": 19315 - }, - { - "epoch": 0.98, - "grad_norm": 1.7643407849401922, - "learning_rate": 1.6514747441453983e-08, - "loss": 0.1662, - "step": 19316 - }, - { - "epoch": 0.98, - "grad_norm": 1.6991980825364204, - "learning_rate": 1.6420268514674153e-08, - "loss": 0.1714, - "step": 19317 - }, - { - "epoch": 0.98, - "grad_norm": 0.8688835282260733, - "learning_rate": 1.6326060393058174e-08, - "loss": 0.1686, - "step": 19318 - }, - { - "epoch": 0.98, - "grad_norm": 1.7232939795009312, - "learning_rate": 1.6232123079162887e-08, - "loss": 0.1594, - "step": 19319 - }, - { - "epoch": 0.98, - "grad_norm": 1.1199605143149651, - "learning_rate": 1.6138456575534034e-08, - "loss": 0.1828, - "step": 19320 - }, - { - "epoch": 0.98, - "grad_norm": 0.9937593945033635, - "learning_rate": 1.6045060884714027e-08, - "loss": 0.1543, - "step": 19321 - }, - { - "epoch": 0.98, - "grad_norm": 1.0113342428247933, - "learning_rate": 1.5951936009235282e-08, - "loss": 0.1813, - "step": 19322 - }, - { - "epoch": 0.98, - "grad_norm": 0.9151153716972111, - "learning_rate": 1.5859081951624668e-08, - "loss": 0.1466, - "step": 19323 - }, - { - "epoch": 0.98, - "grad_norm": 1.10118412807842, - "learning_rate": 1.5766498714400168e-08, - "loss": 0.1761, - "step": 19324 - }, - { - "epoch": 0.98, - "grad_norm": 1.1592831567954685, - "learning_rate": 1.5674186300073113e-08, - "loss": 0.1723, - "step": 19325 - }, - { - "epoch": 0.98, - "grad_norm": 1.0636078897730215, - "learning_rate": 1.5582144711148163e-08, - "loss": 0.1616, - "step": 19326 - }, - { - "epoch": 0.98, - "grad_norm": 1.157339395167924, - "learning_rate": 1.5490373950121097e-08, - "loss": 0.1554, - "step": 19327 - }, - { - "epoch": 0.98, - "grad_norm": 0.8743647019705796, - "learning_rate": 1.5398874019481037e-08, - "loss": 0.1766, - "step": 19328 - }, - { - "epoch": 0.98, - "grad_norm": 0.8649892968907565, - "learning_rate": 1.5307644921710442e-08, - "loss": 0.1613, - "step": 19329 - }, - { - "epoch": 0.98, - "grad_norm": 1.0898910343208772, - "learning_rate": 1.5216686659285106e-08, - "loss": 0.1548, - "step": 19330 - }, - { - "epoch": 0.98, - "grad_norm": 1.1210862777680415, - "learning_rate": 1.512599923467084e-08, - "loss": 0.1539, - "step": 19331 - }, - { - "epoch": 0.98, - "grad_norm": 0.8436561419359566, - "learning_rate": 1.5035582650326786e-08, - "loss": 0.1484, - "step": 19332 - }, - { - "epoch": 0.98, - "grad_norm": 1.0534986774048938, - "learning_rate": 1.4945436908707645e-08, - "loss": 0.1465, - "step": 19333 - }, - { - "epoch": 0.98, - "grad_norm": 1.1097802246883643, - "learning_rate": 1.4855562012257019e-08, - "loss": 0.1513, - "step": 19334 - }, - { - "epoch": 0.98, - "grad_norm": 0.9406791484763374, - "learning_rate": 1.4765957963412957e-08, - "loss": 0.1457, - "step": 19335 - }, - { - "epoch": 0.98, - "grad_norm": 0.9979331385771827, - "learning_rate": 1.467662476460574e-08, - "loss": 0.1778, - "step": 19336 - }, - { - "epoch": 0.98, - "grad_norm": 0.9978593170264436, - "learning_rate": 1.4587562418260092e-08, - "loss": 0.1857, - "step": 19337 - }, - { - "epoch": 0.98, - "grad_norm": 1.0801844337278366, - "learning_rate": 1.449877092679075e-08, - "loss": 0.1514, - "step": 19338 - }, - { - "epoch": 0.98, - "grad_norm": 0.9235210259060828, - "learning_rate": 1.4410250292605788e-08, - "loss": 0.1797, - "step": 19339 - }, - { - "epoch": 0.98, - "grad_norm": 1.5939303682114307, - "learning_rate": 1.4322000518106616e-08, - "loss": 0.168, - "step": 19340 - }, - { - "epoch": 0.98, - "grad_norm": 1.0240846558467789, - "learning_rate": 1.4234021605687987e-08, - "loss": 0.1645, - "step": 19341 - }, - { - "epoch": 0.98, - "grad_norm": 1.1303492834383382, - "learning_rate": 1.414631355773466e-08, - "loss": 0.1592, - "step": 19342 - }, - { - "epoch": 0.98, - "grad_norm": 1.0958579086559461, - "learning_rate": 1.405887637662695e-08, - "loss": 0.1577, - "step": 19343 - }, - { - "epoch": 0.98, - "grad_norm": 1.3423002977177827, - "learning_rate": 1.3971710064736299e-08, - "loss": 0.1589, - "step": 19344 - }, - { - "epoch": 0.98, - "grad_norm": 0.9730615483497294, - "learning_rate": 1.3884814624427478e-08, - "loss": 0.1797, - "step": 19345 - }, - { - "epoch": 0.98, - "grad_norm": 0.8705667810277697, - "learning_rate": 1.3798190058056383e-08, - "loss": 0.143, - "step": 19346 - }, - { - "epoch": 0.98, - "grad_norm": 1.0582401666488022, - "learning_rate": 1.3711836367973353e-08, - "loss": 0.1499, - "step": 19347 - }, - { - "epoch": 0.98, - "grad_norm": 1.2066413755415508, - "learning_rate": 1.362575355652096e-08, - "loss": 0.155, - "step": 19348 - }, - { - "epoch": 0.98, - "grad_norm": 1.6819423962631999, - "learning_rate": 1.3539941626034003e-08, - "loss": 0.1695, - "step": 19349 - }, - { - "epoch": 0.98, - "grad_norm": 1.8172723617428077, - "learning_rate": 1.345440057884062e-08, - "loss": 0.1632, - "step": 19350 - }, - { - "epoch": 0.98, - "grad_norm": 0.8142788410916596, - "learning_rate": 1.3369130417260067e-08, - "loss": 0.1624, - "step": 19351 - }, - { - "epoch": 0.98, - "grad_norm": 0.973732870831668, - "learning_rate": 1.3284131143606049e-08, - "loss": 0.1711, - "step": 19352 - }, - { - "epoch": 0.98, - "grad_norm": 0.9400105566861664, - "learning_rate": 1.3199402760184499e-08, - "loss": 0.1644, - "step": 19353 - }, - { - "epoch": 0.98, - "grad_norm": 1.0797258749734406, - "learning_rate": 1.3114945269292468e-08, - "loss": 0.1795, - "step": 19354 - }, - { - "epoch": 0.98, - "grad_norm": 1.1797224027283473, - "learning_rate": 1.3030758673221456e-08, - "loss": 0.1561, - "step": 19355 - }, - { - "epoch": 0.98, - "grad_norm": 1.1102196689897572, - "learning_rate": 1.2946842974256301e-08, - "loss": 0.1649, - "step": 19356 - }, - { - "epoch": 0.98, - "grad_norm": 1.6320482620567274, - "learning_rate": 1.2863198174671853e-08, - "loss": 0.1408, - "step": 19357 - }, - { - "epoch": 0.98, - "grad_norm": 1.3116515018242307, - "learning_rate": 1.2779824276736298e-08, - "loss": 0.159, - "step": 19358 - }, - { - "epoch": 0.98, - "grad_norm": 0.76738250679809, - "learning_rate": 1.2696721282712266e-08, - "loss": 0.1355, - "step": 19359 - }, - { - "epoch": 0.98, - "grad_norm": 1.1489894563325833, - "learning_rate": 1.2613889194854623e-08, - "loss": 0.1742, - "step": 19360 - }, - { - "epoch": 0.98, - "grad_norm": 0.876093932556513, - "learning_rate": 1.253132801540935e-08, - "loss": 0.1541, - "step": 19361 - }, - { - "epoch": 0.98, - "grad_norm": 1.1655631323365128, - "learning_rate": 1.2449037746614657e-08, - "loss": 0.1569, - "step": 19362 - }, - { - "epoch": 0.98, - "grad_norm": 1.3719793760387626, - "learning_rate": 1.2367018390704311e-08, - "loss": 0.1394, - "step": 19363 - }, - { - "epoch": 0.98, - "grad_norm": 1.1327152068419841, - "learning_rate": 1.22852699499032e-08, - "loss": 0.1777, - "step": 19364 - }, - { - "epoch": 0.98, - "grad_norm": 1.3962861506837203, - "learning_rate": 1.220379242642844e-08, - "loss": 0.1614, - "step": 19365 - }, - { - "epoch": 0.98, - "grad_norm": 0.9929737564903329, - "learning_rate": 1.2122585822489374e-08, - "loss": 0.1599, - "step": 19366 - }, - { - "epoch": 0.98, - "grad_norm": 0.9344319731990219, - "learning_rate": 1.2041650140289796e-08, - "loss": 0.1611, - "step": 19367 - }, - { - "epoch": 0.98, - "grad_norm": 0.9210789177024352, - "learning_rate": 1.1960985382024615e-08, - "loss": 0.1423, - "step": 19368 - }, - { - "epoch": 0.98, - "grad_norm": 1.2308739590673927, - "learning_rate": 1.188059154988097e-08, - "loss": 0.1977, - "step": 19369 - }, - { - "epoch": 0.98, - "grad_norm": 0.9493833067382035, - "learning_rate": 1.1800468646041563e-08, - "loss": 0.1815, - "step": 19370 - }, - { - "epoch": 0.99, - "grad_norm": 0.8787135242048683, - "learning_rate": 1.1720616672676876e-08, - "loss": 0.1478, - "step": 19371 - }, - { - "epoch": 0.99, - "grad_norm": 1.0307096213171172, - "learning_rate": 1.1641035631956288e-08, - "loss": 0.1711, - "step": 19372 - }, - { - "epoch": 0.99, - "grad_norm": 1.2366301080695932, - "learning_rate": 1.156172552603585e-08, - "loss": 0.1562, - "step": 19373 - }, - { - "epoch": 0.99, - "grad_norm": 0.9931567520163074, - "learning_rate": 1.1482686357068284e-08, - "loss": 0.1371, - "step": 19374 - }, - { - "epoch": 0.99, - "grad_norm": 0.9024408195387488, - "learning_rate": 1.1403918127196323e-08, - "loss": 0.173, - "step": 19375 - }, - { - "epoch": 0.99, - "grad_norm": 1.0171629994494382, - "learning_rate": 1.1325420838558254e-08, - "loss": 0.1705, - "step": 19376 - }, - { - "epoch": 0.99, - "grad_norm": 1.7020732645492647, - "learning_rate": 1.1247194493281266e-08, - "loss": 0.1393, - "step": 19377 - }, - { - "epoch": 0.99, - "grad_norm": 0.9275481098725942, - "learning_rate": 1.1169239093489214e-08, - "loss": 0.1614, - "step": 19378 - }, - { - "epoch": 0.99, - "grad_norm": 1.0968512398274697, - "learning_rate": 1.1091554641294854e-08, - "loss": 0.146, - "step": 19379 - }, - { - "epoch": 0.99, - "grad_norm": 1.1071662113271206, - "learning_rate": 1.10141411388065e-08, - "loss": 0.1928, - "step": 19380 - }, - { - "epoch": 0.99, - "grad_norm": 1.0772512461637493, - "learning_rate": 1.0936998588124693e-08, - "loss": 0.145, - "step": 19381 - }, - { - "epoch": 0.99, - "grad_norm": 1.1481733489822117, - "learning_rate": 1.0860126991339982e-08, - "loss": 0.1415, - "step": 19382 - }, - { - "epoch": 0.99, - "grad_norm": 0.8551816592286479, - "learning_rate": 1.0783526350538476e-08, - "loss": 0.1608, - "step": 19383 - }, - { - "epoch": 0.99, - "grad_norm": 0.9707542368946773, - "learning_rate": 1.0707196667798513e-08, - "loss": 0.1539, - "step": 19384 - }, - { - "epoch": 0.99, - "grad_norm": 1.5645121903262291, - "learning_rate": 1.063113794518955e-08, - "loss": 0.1612, - "step": 19385 - }, - { - "epoch": 0.99, - "grad_norm": 1.3182108529350907, - "learning_rate": 1.0555350184775493e-08, - "loss": 0.1936, - "step": 19386 - }, - { - "epoch": 0.99, - "grad_norm": 0.9540155428082053, - "learning_rate": 1.047983338861136e-08, - "loss": 0.1589, - "step": 19387 - }, - { - "epoch": 0.99, - "grad_norm": 0.9832984324694535, - "learning_rate": 1.0404587558746626e-08, - "loss": 0.1438, - "step": 19388 - }, - { - "epoch": 0.99, - "grad_norm": 0.9006067158466624, - "learning_rate": 1.032961269722077e-08, - "loss": 0.1776, - "step": 19389 - }, - { - "epoch": 0.99, - "grad_norm": 4.244743279426564, - "learning_rate": 1.0254908806068831e-08, - "loss": 0.1625, - "step": 19390 - }, - { - "epoch": 0.99, - "grad_norm": 0.9579273824568465, - "learning_rate": 1.0180475887316966e-08, - "loss": 0.1696, - "step": 19391 - }, - { - "epoch": 0.99, - "grad_norm": 1.0104067076033303, - "learning_rate": 1.010631394298467e-08, - "loss": 0.1642, - "step": 19392 - }, - { - "epoch": 0.99, - "grad_norm": 0.9938051532179932, - "learning_rate": 1.0032422975081446e-08, - "loss": 0.1532, - "step": 19393 - }, - { - "epoch": 0.99, - "grad_norm": 0.8113243547723034, - "learning_rate": 9.95880298561347e-09, - "loss": 0.1686, - "step": 19394 - }, - { - "epoch": 0.99, - "grad_norm": 1.6819415896919474, - "learning_rate": 9.88545397657692e-09, - "loss": 0.1578, - "step": 19395 - }, - { - "epoch": 0.99, - "grad_norm": 1.6053397774824008, - "learning_rate": 9.812375949962426e-09, - "loss": 0.1852, - "step": 19396 - }, - { - "epoch": 0.99, - "grad_norm": 1.088283665461383, - "learning_rate": 9.739568907750629e-09, - "loss": 0.136, - "step": 19397 - }, - { - "epoch": 0.99, - "grad_norm": 0.9290486862927769, - "learning_rate": 9.667032851917723e-09, - "loss": 0.167, - "step": 19398 - }, - { - "epoch": 0.99, - "grad_norm": 1.1691011391086281, - "learning_rate": 9.594767784431025e-09, - "loss": 0.139, - "step": 19399 - }, - { - "epoch": 0.99, - "grad_norm": 0.8467822619073552, - "learning_rate": 9.522773707250077e-09, - "loss": 0.1709, - "step": 19400 - }, - { - "epoch": 0.99, - "grad_norm": 1.471340651344358, - "learning_rate": 9.451050622328873e-09, - "loss": 0.16, - "step": 19401 - }, - { - "epoch": 0.99, - "grad_norm": 1.273843813651816, - "learning_rate": 9.379598531611412e-09, - "loss": 0.157, - "step": 19402 - }, - { - "epoch": 0.99, - "grad_norm": 1.5288014134271248, - "learning_rate": 9.308417437037254e-09, - "loss": 0.1755, - "step": 19403 - }, - { - "epoch": 0.99, - "grad_norm": 0.9807843288626784, - "learning_rate": 9.237507340535968e-09, - "loss": 0.1631, - "step": 19404 - }, - { - "epoch": 0.99, - "grad_norm": 0.9258704245017536, - "learning_rate": 9.166868244031568e-09, - "loss": 0.1685, - "step": 19405 - }, - { - "epoch": 0.99, - "grad_norm": 1.1879016453660538, - "learning_rate": 9.096500149440302e-09, - "loss": 0.1534, - "step": 19406 - }, - { - "epoch": 0.99, - "grad_norm": 0.866655679800848, - "learning_rate": 9.02640305867064e-09, - "loss": 0.1266, - "step": 19407 - }, - { - "epoch": 0.99, - "grad_norm": 1.0303505197830716, - "learning_rate": 8.956576973624398e-09, - "loss": 0.1772, - "step": 19408 - }, - { - "epoch": 0.99, - "grad_norm": 1.0919726382863375, - "learning_rate": 8.887021896195614e-09, - "loss": 0.1592, - "step": 19409 - }, - { - "epoch": 0.99, - "grad_norm": 0.986364397224031, - "learning_rate": 8.817737828269446e-09, - "loss": 0.1547, - "step": 19410 - }, - { - "epoch": 0.99, - "grad_norm": 1.5733663737118595, - "learning_rate": 8.748724771727724e-09, - "loss": 0.1783, - "step": 19411 - }, - { - "epoch": 0.99, - "grad_norm": 1.1926579268620583, - "learning_rate": 8.679982728440061e-09, - "loss": 0.1575, - "step": 19412 - }, - { - "epoch": 0.99, - "grad_norm": 1.0728604790120746, - "learning_rate": 8.611511700272746e-09, - "loss": 0.1656, - "step": 19413 - }, - { - "epoch": 0.99, - "grad_norm": 0.9749045658625051, - "learning_rate": 8.543311689083177e-09, - "loss": 0.142, - "step": 19414 - }, - { - "epoch": 0.99, - "grad_norm": 0.9908315305488623, - "learning_rate": 8.47538269671988e-09, - "loss": 0.1469, - "step": 19415 - }, - { - "epoch": 0.99, - "grad_norm": 1.0193752846011748, - "learning_rate": 8.407724725025823e-09, - "loss": 0.1627, - "step": 19416 - }, - { - "epoch": 0.99, - "grad_norm": 1.770904345379466, - "learning_rate": 8.340337775837316e-09, - "loss": 0.1468, - "step": 19417 - }, - { - "epoch": 0.99, - "grad_norm": 0.9309726993901151, - "learning_rate": 8.273221850980673e-09, - "loss": 0.1453, - "step": 19418 - }, - { - "epoch": 0.99, - "grad_norm": 1.1105475116378467, - "learning_rate": 8.206376952277772e-09, - "loss": 0.1756, - "step": 19419 - }, - { - "epoch": 0.99, - "grad_norm": 1.4908709404855125, - "learning_rate": 8.139803081540499e-09, - "loss": 0.1683, - "step": 19420 - }, - { - "epoch": 0.99, - "grad_norm": 0.939681217803894, - "learning_rate": 8.073500240576292e-09, - "loss": 0.1752, - "step": 19421 - }, - { - "epoch": 0.99, - "grad_norm": 0.9335727034124158, - "learning_rate": 8.007468431182609e-09, - "loss": 0.1528, - "step": 19422 - }, - { - "epoch": 0.99, - "grad_norm": 0.8478138172436791, - "learning_rate": 7.941707655150233e-09, - "loss": 0.1453, - "step": 19423 - }, - { - "epoch": 0.99, - "grad_norm": 0.9989968451138482, - "learning_rate": 7.876217914264406e-09, - "loss": 0.1616, - "step": 19424 - }, - { - "epoch": 0.99, - "grad_norm": 1.419711096650346, - "learning_rate": 7.810999210299263e-09, - "loss": 0.1573, - "step": 19425 - }, - { - "epoch": 0.99, - "grad_norm": 0.8033453385822357, - "learning_rate": 7.746051545025613e-09, - "loss": 0.1636, - "step": 19426 - }, - { - "epoch": 0.99, - "grad_norm": 1.2856780985716851, - "learning_rate": 7.681374920205375e-09, - "loss": 0.1671, - "step": 19427 - }, - { - "epoch": 0.99, - "grad_norm": 1.021890655817455, - "learning_rate": 7.616969337591595e-09, - "loss": 0.1625, - "step": 19428 - }, - { - "epoch": 0.99, - "grad_norm": 1.200794353149773, - "learning_rate": 7.552834798931763e-09, - "loss": 0.162, - "step": 19429 - }, - { - "epoch": 0.99, - "grad_norm": 0.9148040032386322, - "learning_rate": 7.488971305965598e-09, - "loss": 0.1706, - "step": 19430 - }, - { - "epoch": 0.99, - "grad_norm": 0.9170760257674808, - "learning_rate": 7.425378860425048e-09, - "loss": 0.1772, - "step": 19431 - }, - { - "epoch": 0.99, - "grad_norm": 1.1806206086463553, - "learning_rate": 7.36205746403651e-09, - "loss": 0.1531, - "step": 19432 - }, - { - "epoch": 0.99, - "grad_norm": 0.8990746509040047, - "learning_rate": 7.299007118516388e-09, - "loss": 0.1441, - "step": 19433 - }, - { - "epoch": 0.99, - "grad_norm": 1.1953447975827665, - "learning_rate": 7.236227825574427e-09, - "loss": 0.1672, - "step": 19434 - }, - { - "epoch": 0.99, - "grad_norm": 1.1967523379896308, - "learning_rate": 7.173719586914818e-09, - "loss": 0.1612, - "step": 19435 - }, - { - "epoch": 0.99, - "grad_norm": 1.091873566076658, - "learning_rate": 7.111482404231762e-09, - "loss": 0.1483, - "step": 19436 - }, - { - "epoch": 0.99, - "grad_norm": 1.3397730582631586, - "learning_rate": 7.049516279215018e-09, - "loss": 0.1588, - "step": 19437 - }, - { - "epoch": 0.99, - "grad_norm": 1.2216177742464538, - "learning_rate": 6.987821213544355e-09, - "loss": 0.1521, - "step": 19438 - }, - { - "epoch": 0.99, - "grad_norm": 1.2590376447335887, - "learning_rate": 6.926397208892877e-09, - "loss": 0.1614, - "step": 19439 - }, - { - "epoch": 0.99, - "grad_norm": 1.5650243589807098, - "learning_rate": 6.8652442669281394e-09, - "loss": 0.1776, - "step": 19440 - }, - { - "epoch": 0.99, - "grad_norm": 0.9726278730987528, - "learning_rate": 6.804362389306596e-09, - "loss": 0.185, - "step": 19441 - }, - { - "epoch": 0.99, - "grad_norm": 0.9403291135586567, - "learning_rate": 6.743751577682478e-09, - "loss": 0.1619, - "step": 19442 - }, - { - "epoch": 0.99, - "grad_norm": 0.9548342447709992, - "learning_rate": 6.683411833697806e-09, - "loss": 0.1671, - "step": 19443 - }, - { - "epoch": 0.99, - "grad_norm": 2.2582047847405757, - "learning_rate": 6.623343158990159e-09, - "loss": 0.1707, - "step": 19444 - }, - { - "epoch": 0.99, - "grad_norm": 0.8723806454457383, - "learning_rate": 6.563545555189343e-09, - "loss": 0.1594, - "step": 19445 - }, - { - "epoch": 0.99, - "grad_norm": 1.4617744785679774, - "learning_rate": 6.504019023916286e-09, - "loss": 0.1528, - "step": 19446 - }, - { - "epoch": 0.99, - "grad_norm": 0.967536921726817, - "learning_rate": 6.444763566786361e-09, - "loss": 0.1491, - "step": 19447 - }, - { - "epoch": 0.99, - "grad_norm": 1.3426213804774425, - "learning_rate": 6.385779185407171e-09, - "loss": 0.1664, - "step": 19448 - }, - { - "epoch": 0.99, - "grad_norm": 1.0871880246818988, - "learning_rate": 6.327065881377437e-09, - "loss": 0.1806, - "step": 19449 - }, - { - "epoch": 0.99, - "grad_norm": 1.8191155304043163, - "learning_rate": 6.2686236562903294e-09, - "loss": 0.1644, - "step": 19450 - }, - { - "epoch": 0.99, - "grad_norm": 0.8209864529546275, - "learning_rate": 6.210452511731246e-09, - "loss": 0.1499, - "step": 19451 - }, - { - "epoch": 0.99, - "grad_norm": 0.9778755032833552, - "learning_rate": 6.152552449278925e-09, - "loss": 0.1752, - "step": 19452 - }, - { - "epoch": 0.99, - "grad_norm": 0.9975335549067313, - "learning_rate": 6.094923470502112e-09, - "loss": 0.185, - "step": 19453 - }, - { - "epoch": 0.99, - "grad_norm": 0.8685575465714487, - "learning_rate": 6.03756557696511e-09, - "loss": 0.1568, - "step": 19454 - }, - { - "epoch": 0.99, - "grad_norm": 0.9752549270513626, - "learning_rate": 5.980478770224452e-09, - "loss": 0.1645, - "step": 19455 - }, - { - "epoch": 0.99, - "grad_norm": 1.002505500250835, - "learning_rate": 5.923663051826678e-09, - "loss": 0.154, - "step": 19456 - }, - { - "epoch": 0.99, - "grad_norm": 1.6090929823221471, - "learning_rate": 5.867118423314999e-09, - "loss": 0.1711, - "step": 19457 - }, - { - "epoch": 0.99, - "grad_norm": 1.1652972258786725, - "learning_rate": 5.810844886221523e-09, - "loss": 0.1514, - "step": 19458 - }, - { - "epoch": 0.99, - "grad_norm": 0.9219302327292703, - "learning_rate": 5.754842442073916e-09, - "loss": 0.1825, - "step": 19459 - }, - { - "epoch": 0.99, - "grad_norm": 1.075261151804333, - "learning_rate": 5.699111092389853e-09, - "loss": 0.146, - "step": 19460 - }, - { - "epoch": 0.99, - "grad_norm": 0.8768162885658606, - "learning_rate": 5.643650838682568e-09, - "loss": 0.1537, - "step": 19461 - }, - { - "epoch": 0.99, - "grad_norm": 1.0508940154630682, - "learning_rate": 5.588461682455304e-09, - "loss": 0.1637, - "step": 19462 - }, - { - "epoch": 0.99, - "grad_norm": 1.346320532701835, - "learning_rate": 5.53354362520575e-09, - "loss": 0.1735, - "step": 19463 - }, - { - "epoch": 0.99, - "grad_norm": 1.0066814892312403, - "learning_rate": 5.478896668423828e-09, - "loss": 0.1506, - "step": 19464 - }, - { - "epoch": 0.99, - "grad_norm": 1.0829562803149226, - "learning_rate": 5.4245208135905725e-09, - "loss": 0.1518, - "step": 19465 - }, - { - "epoch": 0.99, - "grad_norm": 1.2651296228774314, - "learning_rate": 5.370416062181472e-09, - "loss": 0.1628, - "step": 19466 - }, - { - "epoch": 0.99, - "grad_norm": 0.9646946338958992, - "learning_rate": 5.31658241566535e-09, - "loss": 0.1564, - "step": 19467 - }, - { - "epoch": 0.99, - "grad_norm": 3.692309308973504, - "learning_rate": 5.2630198754999304e-09, - "loss": 0.1626, - "step": 19468 - }, - { - "epoch": 0.99, - "grad_norm": 1.0675900182191573, - "learning_rate": 5.209728443140716e-09, - "loss": 0.1559, - "step": 19469 - }, - { - "epoch": 0.99, - "grad_norm": 1.0059903839169024, - "learning_rate": 5.156708120032106e-09, - "loss": 0.1625, - "step": 19470 - }, - { - "epoch": 0.99, - "grad_norm": 1.4617637087129334, - "learning_rate": 5.103958907611839e-09, - "loss": 0.1519, - "step": 19471 - }, - { - "epoch": 0.99, - "grad_norm": 1.1075830315746373, - "learning_rate": 5.051480807312103e-09, - "loss": 0.1766, - "step": 19472 - }, - { - "epoch": 0.99, - "grad_norm": 1.1481469399042683, - "learning_rate": 4.999273820553985e-09, - "loss": 0.1509, - "step": 19473 - }, - { - "epoch": 0.99, - "grad_norm": 1.1386676238961613, - "learning_rate": 4.947337948756348e-09, - "loss": 0.1763, - "step": 19474 - }, - { - "epoch": 0.99, - "grad_norm": 1.072465063966951, - "learning_rate": 4.895673193325845e-09, - "loss": 0.1605, - "step": 19475 - }, - { - "epoch": 0.99, - "grad_norm": 0.8936326244245394, - "learning_rate": 4.8442795556657984e-09, - "loss": 0.1624, - "step": 19476 - }, - { - "epoch": 0.99, - "grad_norm": 0.9424084453406708, - "learning_rate": 4.793157037168428e-09, - "loss": 0.1414, - "step": 19477 - }, - { - "epoch": 0.99, - "grad_norm": 1.0914969007378266, - "learning_rate": 4.742305639221512e-09, - "loss": 0.1686, - "step": 19478 - }, - { - "epoch": 0.99, - "grad_norm": 1.2472389434505498, - "learning_rate": 4.6917253632039475e-09, - "loss": 0.1683, - "step": 19479 - }, - { - "epoch": 0.99, - "grad_norm": 1.0606679946176014, - "learning_rate": 4.64141621048797e-09, - "loss": 0.1839, - "step": 19480 - }, - { - "epoch": 0.99, - "grad_norm": 1.1715536705735645, - "learning_rate": 4.591378182438044e-09, - "loss": 0.1557, - "step": 19481 - }, - { - "epoch": 0.99, - "grad_norm": 1.2390844663128597, - "learning_rate": 4.541611280410862e-09, - "loss": 0.1746, - "step": 19482 - }, - { - "epoch": 0.99, - "grad_norm": 0.8912082389176219, - "learning_rate": 4.492115505757566e-09, - "loss": 0.1536, - "step": 19483 - }, - { - "epoch": 0.99, - "grad_norm": 0.905352948158156, - "learning_rate": 4.442890859820414e-09, - "loss": 0.1621, - "step": 19484 - }, - { - "epoch": 0.99, - "grad_norm": 1.0956200005472319, - "learning_rate": 4.393937343933896e-09, - "loss": 0.1582, - "step": 19485 - }, - { - "epoch": 0.99, - "grad_norm": 0.8536426185940816, - "learning_rate": 4.345254959426948e-09, - "loss": 0.1571, - "step": 19486 - }, - { - "epoch": 0.99, - "grad_norm": 1.229837492542071, - "learning_rate": 4.296843707619625e-09, - "loss": 0.1574, - "step": 19487 - }, - { - "epoch": 0.99, - "grad_norm": 0.95821743748653, - "learning_rate": 4.2487035898242106e-09, - "loss": 0.161, - "step": 19488 - }, - { - "epoch": 0.99, - "grad_norm": 1.3127235685010321, - "learning_rate": 4.200834607348547e-09, - "loss": 0.1621, - "step": 19489 - }, - { - "epoch": 0.99, - "grad_norm": 1.2313703140225252, - "learning_rate": 4.153236761488266e-09, - "loss": 0.173, - "step": 19490 - }, - { - "epoch": 0.99, - "grad_norm": 1.4217470558916299, - "learning_rate": 4.105910053536777e-09, - "loss": 0.1551, - "step": 19491 - }, - { - "epoch": 0.99, - "grad_norm": 1.0070122397761707, - "learning_rate": 4.058854484777497e-09, - "loss": 0.1475, - "step": 19492 - }, - { - "epoch": 0.99, - "grad_norm": 2.1355502059955587, - "learning_rate": 4.012070056484963e-09, - "loss": 0.1322, - "step": 19493 - }, - { - "epoch": 0.99, - "grad_norm": 1.0643535971160303, - "learning_rate": 3.965556769930379e-09, - "loss": 0.1466, - "step": 19494 - }, - { - "epoch": 0.99, - "grad_norm": 0.9244812085559133, - "learning_rate": 3.9193146263749595e-09, - "loss": 0.1604, - "step": 19495 - }, - { - "epoch": 0.99, - "grad_norm": 1.3720180064592766, - "learning_rate": 3.873343627073256e-09, - "loss": 0.1453, - "step": 19496 - }, - { - "epoch": 0.99, - "grad_norm": 1.1020529971092021, - "learning_rate": 3.827643773270939e-09, - "loss": 0.1655, - "step": 19497 - }, - { - "epoch": 0.99, - "grad_norm": 0.8882727079169191, - "learning_rate": 3.782215066208128e-09, - "loss": 0.1532, - "step": 19498 - }, - { - "epoch": 0.99, - "grad_norm": 0.8608176999058977, - "learning_rate": 3.73705750711717e-09, - "loss": 0.138, - "step": 19499 - }, - { - "epoch": 0.99, - "grad_norm": 1.6619832340491163, - "learning_rate": 3.692171097223751e-09, - "loss": 0.1516, - "step": 19500 - }, - { - "epoch": 0.99, - "grad_norm": 0.9864753391422965, - "learning_rate": 3.647555837744676e-09, - "loss": 0.1703, - "step": 19501 - }, - { - "epoch": 0.99, - "grad_norm": 0.8821308274352536, - "learning_rate": 3.603211729890088e-09, - "loss": 0.1549, - "step": 19502 - }, - { - "epoch": 0.99, - "grad_norm": 1.0563855911607007, - "learning_rate": 3.5591387748634687e-09, - "loss": 0.1603, - "step": 19503 - }, - { - "epoch": 0.99, - "grad_norm": 1.144958580490145, - "learning_rate": 3.5153369738583078e-09, - "loss": 0.156, - "step": 19504 - }, - { - "epoch": 0.99, - "grad_norm": 1.4642207318698972, - "learning_rate": 3.471806328065874e-09, - "loss": 0.1537, - "step": 19505 - }, - { - "epoch": 0.99, - "grad_norm": 2.2838464816610706, - "learning_rate": 3.428546838664115e-09, - "loss": 0.1542, - "step": 19506 - }, - { - "epoch": 0.99, - "grad_norm": 1.1749911154214572, - "learning_rate": 3.3855585068287564e-09, - "loss": 0.1595, - "step": 19507 - }, - { - "epoch": 0.99, - "grad_norm": 0.9491328852325978, - "learning_rate": 3.3428413337244224e-09, - "loss": 0.145, - "step": 19508 - }, - { - "epoch": 0.99, - "grad_norm": 1.0850935098083156, - "learning_rate": 3.3003953205101857e-09, - "loss": 0.1725, - "step": 19509 - }, - { - "epoch": 0.99, - "grad_norm": 1.13078152777645, - "learning_rate": 3.2582204683362372e-09, - "loss": 0.1457, - "step": 19510 - }, - { - "epoch": 0.99, - "grad_norm": 1.3915491422700175, - "learning_rate": 3.216316778348327e-09, - "loss": 0.1465, - "step": 19511 - }, - { - "epoch": 0.99, - "grad_norm": 1.3624287232659398, - "learning_rate": 3.1746842516833243e-09, - "loss": 0.1536, - "step": 19512 - }, - { - "epoch": 0.99, - "grad_norm": 0.9363607116738734, - "learning_rate": 3.1333228894692147e-09, - "loss": 0.1725, - "step": 19513 - }, - { - "epoch": 0.99, - "grad_norm": 1.1789056989836593, - "learning_rate": 3.092232692827324e-09, - "loss": 0.1567, - "step": 19514 - }, - { - "epoch": 0.99, - "grad_norm": 1.1716631678255858, - "learning_rate": 3.0514136628745363e-09, - "loss": 0.1741, - "step": 19515 - }, - { - "epoch": 0.99, - "grad_norm": 0.9295582498624071, - "learning_rate": 3.0108658007155235e-09, - "loss": 0.1647, - "step": 19516 - }, - { - "epoch": 0.99, - "grad_norm": 1.4339393414590325, - "learning_rate": 2.970589107452737e-09, - "loss": 0.1936, - "step": 19517 - }, - { - "epoch": 0.99, - "grad_norm": 1.183292046360521, - "learning_rate": 2.930583584176416e-09, - "loss": 0.1558, - "step": 19518 - }, - { - "epoch": 0.99, - "grad_norm": 0.7651834428055864, - "learning_rate": 2.890849231973469e-09, - "loss": 0.1538, - "step": 19519 - }, - { - "epoch": 0.99, - "grad_norm": 1.5131433513827075, - "learning_rate": 2.851386051919702e-09, - "loss": 0.1729, - "step": 19520 - }, - { - "epoch": 0.99, - "grad_norm": 1.6187138358686646, - "learning_rate": 2.8121940450875907e-09, - "loss": 0.1875, - "step": 19521 - }, - { - "epoch": 0.99, - "grad_norm": 1.12632087757575, - "learning_rate": 2.7732732125396177e-09, - "loss": 0.168, - "step": 19522 - }, - { - "epoch": 0.99, - "grad_norm": 1.022487523217451, - "learning_rate": 2.7346235553304955e-09, - "loss": 0.1642, - "step": 19523 - }, - { - "epoch": 0.99, - "grad_norm": 1.059501541651189, - "learning_rate": 2.696245074509385e-09, - "loss": 0.1501, - "step": 19524 - }, - { - "epoch": 0.99, - "grad_norm": 0.9766296923988161, - "learning_rate": 2.6581377711176747e-09, - "loss": 0.158, - "step": 19525 - }, - { - "epoch": 0.99, - "grad_norm": 1.0403942539814275, - "learning_rate": 2.620301646188983e-09, - "loss": 0.194, - "step": 19526 - }, - { - "epoch": 0.99, - "grad_norm": 1.1544990838019817, - "learning_rate": 2.5827367007491555e-09, - "loss": 0.1649, - "step": 19527 - }, - { - "epoch": 0.99, - "grad_norm": 1.7138085897003958, - "learning_rate": 2.545442935816267e-09, - "loss": 0.1536, - "step": 19528 - }, - { - "epoch": 0.99, - "grad_norm": 0.8899480974681093, - "learning_rate": 2.5084203524039507e-09, - "loss": 0.157, - "step": 19529 - }, - { - "epoch": 0.99, - "grad_norm": 0.9575638072030332, - "learning_rate": 2.4716689515147386e-09, - "loss": 0.1568, - "step": 19530 - }, - { - "epoch": 0.99, - "grad_norm": 2.5434583311996226, - "learning_rate": 2.4351887341467206e-09, - "loss": 0.148, - "step": 19531 - }, - { - "epoch": 0.99, - "grad_norm": 2.157022762265355, - "learning_rate": 2.3989797012879957e-09, - "loss": 0.1785, - "step": 19532 - }, - { - "epoch": 0.99, - "grad_norm": 1.2032012954869988, - "learning_rate": 2.363041853922221e-09, - "loss": 0.1566, - "step": 19533 - }, - { - "epoch": 0.99, - "grad_norm": 1.016008852523729, - "learning_rate": 2.327375193024173e-09, - "loss": 0.1645, - "step": 19534 - }, - { - "epoch": 0.99, - "grad_norm": 0.8736000709689397, - "learning_rate": 2.291979719559745e-09, - "loss": 0.1517, - "step": 19535 - }, - { - "epoch": 0.99, - "grad_norm": 1.8238877899880166, - "learning_rate": 2.25685543449039e-09, - "loss": 0.1519, - "step": 19536 - }, - { - "epoch": 0.99, - "grad_norm": 1.0281866407460118, - "learning_rate": 2.2220023387686805e-09, - "loss": 0.1596, - "step": 19537 - }, - { - "epoch": 0.99, - "grad_norm": 0.8196043357201952, - "learning_rate": 2.1874204333394157e-09, - "loss": 0.1506, - "step": 19538 - }, - { - "epoch": 0.99, - "grad_norm": 1.5190087216009494, - "learning_rate": 2.1531097191418438e-09, - "loss": 0.1812, - "step": 19539 - }, - { - "epoch": 0.99, - "grad_norm": 2.078921323523072, - "learning_rate": 2.1190701971052218e-09, - "loss": 0.1842, - "step": 19540 - }, - { - "epoch": 0.99, - "grad_norm": 1.16461521444874, - "learning_rate": 2.0853018681532557e-09, - "loss": 0.1488, - "step": 19541 - }, - { - "epoch": 0.99, - "grad_norm": 1.0488166753749382, - "learning_rate": 2.051804733202989e-09, - "loss": 0.1616, - "step": 19542 - }, - { - "epoch": 0.99, - "grad_norm": 0.9667513288660236, - "learning_rate": 2.018578793161474e-09, - "loss": 0.1601, - "step": 19543 - }, - { - "epoch": 0.99, - "grad_norm": 0.9142594167680658, - "learning_rate": 1.985624048931323e-09, - "loss": 0.1453, - "step": 19544 - }, - { - "epoch": 0.99, - "grad_norm": 0.9533579569326212, - "learning_rate": 1.952940501405154e-09, - "loss": 0.176, - "step": 19545 - }, - { - "epoch": 0.99, - "grad_norm": 0.8812100501930893, - "learning_rate": 1.9205281514700356e-09, - "loss": 0.1419, - "step": 19546 - }, - { - "epoch": 0.99, - "grad_norm": 2.502962961960015, - "learning_rate": 1.8883870000063753e-09, - "loss": 0.154, - "step": 19547 - }, - { - "epoch": 0.99, - "grad_norm": 1.0899422449191924, - "learning_rate": 1.856517047883477e-09, - "loss": 0.1636, - "step": 19548 - }, - { - "epoch": 0.99, - "grad_norm": 0.9187958817022984, - "learning_rate": 1.8249182959684253e-09, - "loss": 0.1584, - "step": 19549 - }, - { - "epoch": 0.99, - "grad_norm": 0.8635351997259035, - "learning_rate": 1.7935907451172019e-09, - "loss": 0.1413, - "step": 19550 - }, - { - "epoch": 0.99, - "grad_norm": 1.0210498380084243, - "learning_rate": 1.7625343961791275e-09, - "loss": 0.1537, - "step": 19551 - }, - { - "epoch": 0.99, - "grad_norm": 1.689591606566535, - "learning_rate": 1.7317492499968614e-09, - "loss": 0.165, - "step": 19552 - }, - { - "epoch": 0.99, - "grad_norm": 1.3730827056860522, - "learning_rate": 1.7012353074052912e-09, - "loss": 0.1652, - "step": 19553 - }, - { - "epoch": 0.99, - "grad_norm": 0.9159153423477189, - "learning_rate": 1.6709925692326435e-09, - "loss": 0.1768, - "step": 19554 - }, - { - "epoch": 0.99, - "grad_norm": 1.328847538504899, - "learning_rate": 1.6410210362993729e-09, - "loss": 0.1754, - "step": 19555 - }, - { - "epoch": 0.99, - "grad_norm": 0.9906761771767876, - "learning_rate": 1.6113207094181626e-09, - "loss": 0.1257, - "step": 19556 - }, - { - "epoch": 0.99, - "grad_norm": 0.8607195647789683, - "learning_rate": 1.5818915893939246e-09, - "loss": 0.152, - "step": 19557 - }, - { - "epoch": 0.99, - "grad_norm": 0.877998645264861, - "learning_rate": 1.5527336770260193e-09, - "loss": 0.1637, - "step": 19558 - }, - { - "epoch": 0.99, - "grad_norm": 1.148237424551445, - "learning_rate": 1.5238469731049254e-09, - "loss": 0.1579, - "step": 19559 - }, - { - "epoch": 0.99, - "grad_norm": 1.3199269131975926, - "learning_rate": 1.4952314784144606e-09, - "loss": 0.1764, - "step": 19560 - }, - { - "epoch": 0.99, - "grad_norm": 1.5449672703207997, - "learning_rate": 1.4668871937306706e-09, - "loss": 0.1767, - "step": 19561 - }, - { - "epoch": 0.99, - "grad_norm": 0.9551654351745247, - "learning_rate": 1.4388141198218297e-09, - "loss": 0.1352, - "step": 19562 - }, - { - "epoch": 0.99, - "grad_norm": 0.9266927361479481, - "learning_rate": 1.4110122574506612e-09, - "loss": 0.1586, - "step": 19563 - }, - { - "epoch": 0.99, - "grad_norm": 0.8555127548715358, - "learning_rate": 1.3834816073687862e-09, - "loss": 0.1689, - "step": 19564 - }, - { - "epoch": 0.99, - "grad_norm": 0.947450765781288, - "learning_rate": 1.3562221703267153e-09, - "loss": 0.1448, - "step": 19565 - }, - { - "epoch": 0.99, - "grad_norm": 1.5330077115508238, - "learning_rate": 1.3292339470605264e-09, - "loss": 0.168, - "step": 19566 - }, - { - "epoch": 1.0, - "grad_norm": 1.185404398834737, - "learning_rate": 1.302516938304077e-09, - "loss": 0.1527, - "step": 19567 - }, - { - "epoch": 1.0, - "grad_norm": 1.3458661533631802, - "learning_rate": 1.2760711447812324e-09, - "loss": 0.1749, - "step": 19568 - }, - { - "epoch": 1.0, - "grad_norm": 0.8806027444964389, - "learning_rate": 1.249896567210307e-09, - "loss": 0.1538, - "step": 19569 - }, - { - "epoch": 1.0, - "grad_norm": 1.3762344261508295, - "learning_rate": 1.2239932062996229e-09, - "loss": 0.1651, - "step": 19570 - }, - { - "epoch": 1.0, - "grad_norm": 1.3219725268163043, - "learning_rate": 1.198361062754172e-09, - "loss": 0.1637, - "step": 19571 - }, - { - "epoch": 1.0, - "grad_norm": 1.108314030942384, - "learning_rate": 1.1730001372667332e-09, - "loss": 0.1611, - "step": 19572 - }, - { - "epoch": 1.0, - "grad_norm": 1.1122036596028475, - "learning_rate": 1.1479104305267553e-09, - "loss": 0.1613, - "step": 19573 - }, - { - "epoch": 1.0, - "grad_norm": 0.960709597396094, - "learning_rate": 1.1230919432148046e-09, - "loss": 0.1858, - "step": 19574 - }, - { - "epoch": 1.0, - "grad_norm": 1.322649053633847, - "learning_rate": 1.0985446760036766e-09, - "loss": 0.1546, - "step": 19575 - }, - { - "epoch": 1.0, - "grad_norm": 1.0212454361300904, - "learning_rate": 1.074268629559505e-09, - "loss": 0.1615, - "step": 19576 - }, - { - "epoch": 1.0, - "grad_norm": 0.9147610824352448, - "learning_rate": 1.050263804539542e-09, - "loss": 0.1607, - "step": 19577 - }, - { - "epoch": 1.0, - "grad_norm": 1.155984862637736, - "learning_rate": 1.0265302015965984e-09, - "loss": 0.1655, - "step": 19578 - }, - { - "epoch": 1.0, - "grad_norm": 4.158209229203261, - "learning_rate": 1.0030678213746037e-09, - "loss": 0.1675, - "step": 19579 - }, - { - "epoch": 1.0, - "grad_norm": 1.0781213576452073, - "learning_rate": 9.798766645074953e-10, - "loss": 0.1765, - "step": 19580 - }, - { - "epoch": 1.0, - "grad_norm": 0.9225119895905177, - "learning_rate": 9.569567316269901e-10, - "loss": 0.162, - "step": 19581 - }, - { - "epoch": 1.0, - "grad_norm": 1.3798220622535513, - "learning_rate": 9.343080233537028e-10, - "loss": 0.1559, - "step": 19582 - }, - { - "epoch": 1.0, - "grad_norm": 1.5883719794381368, - "learning_rate": 9.119305403015865e-10, - "loss": 0.1681, - "step": 19583 - }, - { - "epoch": 1.0, - "grad_norm": 0.9920192181846645, - "learning_rate": 8.898242830779336e-10, - "loss": 0.1702, - "step": 19584 - }, - { - "epoch": 1.0, - "grad_norm": 1.011582731494466, - "learning_rate": 8.679892522833744e-10, - "loss": 0.152, - "step": 19585 - }, - { - "epoch": 1.0, - "grad_norm": 0.8377799966259223, - "learning_rate": 8.464254485096579e-10, - "loss": 0.1716, - "step": 19586 - }, - { - "epoch": 1.0, - "grad_norm": 1.0654245746696522, - "learning_rate": 8.251328723407615e-10, - "loss": 0.2018, - "step": 19587 - }, - { - "epoch": 1.0, - "grad_norm": 1.0281990884378034, - "learning_rate": 8.04111524354001e-10, - "loss": 0.1515, - "step": 19588 - }, - { - "epoch": 1.0, - "grad_norm": 1.0625756720875446, - "learning_rate": 7.833614051222515e-10, - "loss": 0.1688, - "step": 19589 - }, - { - "epoch": 1.0, - "grad_norm": 1.0201727001752259, - "learning_rate": 7.628825152050656e-10, - "loss": 0.1583, - "step": 19590 - }, - { - "epoch": 1.0, - "grad_norm": 0.9110095625867899, - "learning_rate": 7.426748551597751e-10, - "loss": 0.1504, - "step": 19591 - }, - { - "epoch": 1.0, - "grad_norm": 1.1998072018181751, - "learning_rate": 7.227384255348302e-10, - "loss": 0.1589, - "step": 19592 - }, - { - "epoch": 1.0, - "grad_norm": 1.8852849233106013, - "learning_rate": 7.030732268697993e-10, - "loss": 0.2041, - "step": 19593 - }, - { - "epoch": 1.0, - "grad_norm": 1.0241911357987625, - "learning_rate": 6.836792596986996e-10, - "loss": 0.1654, - "step": 19594 - }, - { - "epoch": 1.0, - "grad_norm": 1.08609554904953, - "learning_rate": 6.64556524547777e-10, - "loss": 0.1553, - "step": 19595 - }, - { - "epoch": 1.0, - "grad_norm": 1.56946727399011, - "learning_rate": 6.457050219355054e-10, - "loss": 0.1606, - "step": 19596 - }, - { - "epoch": 1.0, - "grad_norm": 1.5602719586550857, - "learning_rate": 6.271247523736978e-10, - "loss": 0.1621, - "step": 19597 - }, - { - "epoch": 1.0, - "grad_norm": 1.318659581015296, - "learning_rate": 6.088157163652853e-10, - "loss": 0.1669, - "step": 19598 - }, - { - "epoch": 1.0, - "grad_norm": 0.9424409032077522, - "learning_rate": 5.907779144076475e-10, - "loss": 0.1562, - "step": 19599 - }, - { - "epoch": 1.0, - "grad_norm": 0.9982593233791778, - "learning_rate": 5.730113469903931e-10, - "loss": 0.1598, - "step": 19600 - }, - { - "epoch": 1.0, - "grad_norm": 1.5222263838126218, - "learning_rate": 5.555160145942485e-10, - "loss": 0.1492, - "step": 19601 - }, - { - "epoch": 1.0, - "grad_norm": 1.0665216789776741, - "learning_rate": 5.38291917694389e-10, - "loss": 0.1641, - "step": 19602 - }, - { - "epoch": 1.0, - "grad_norm": 1.2836376729407852, - "learning_rate": 5.21339056759329e-10, - "loss": 0.1614, - "step": 19603 - }, - { - "epoch": 1.0, - "grad_norm": 0.8861752562480091, - "learning_rate": 5.046574322464803e-10, - "loss": 0.1605, - "step": 19604 - }, - { - "epoch": 1.0, - "grad_norm": 1.573271589681279, - "learning_rate": 4.882470446099241e-10, - "loss": 0.1373, - "step": 19605 - }, - { - "epoch": 1.0, - "grad_norm": 1.3650632783304832, - "learning_rate": 4.721078942948598e-10, - "loss": 0.1643, - "step": 19606 - }, - { - "epoch": 1.0, - "grad_norm": 0.9852357360213777, - "learning_rate": 4.562399817376051e-10, - "loss": 0.1739, - "step": 19607 - }, - { - "epoch": 1.0, - "grad_norm": 0.9335982121563716, - "learning_rate": 4.406433073711469e-10, - "loss": 0.1748, - "step": 19608 - }, - { - "epoch": 1.0, - "grad_norm": 1.2664477625014325, - "learning_rate": 4.253178716162598e-10, - "loss": 0.1724, - "step": 19609 - }, - { - "epoch": 1.0, - "grad_norm": 0.9898241612198758, - "learning_rate": 4.1026367488927745e-10, - "loss": 0.1871, - "step": 19610 - }, - { - "epoch": 1.0, - "grad_norm": 0.8668942232628565, - "learning_rate": 3.9548071759876185e-10, - "loss": 0.1578, - "step": 19611 - }, - { - "epoch": 1.0, - "grad_norm": 1.016572741939258, - "learning_rate": 3.809690001455035e-10, - "loss": 0.152, - "step": 19612 - }, - { - "epoch": 1.0, - "grad_norm": 1.0372104824857318, - "learning_rate": 3.667285229236317e-10, - "loss": 0.1711, - "step": 19613 - }, - { - "epoch": 1.0, - "grad_norm": 1.2856759711969599, - "learning_rate": 3.5275928631839375e-10, - "loss": 0.1505, - "step": 19614 - }, - { - "epoch": 1.0, - "grad_norm": 1.2258181466335094, - "learning_rate": 3.390612907094859e-10, - "loss": 0.1553, - "step": 19615 - }, - { - "epoch": 1.0, - "grad_norm": 0.9124728749818874, - "learning_rate": 3.256345364688329e-10, - "loss": 0.1467, - "step": 19616 - }, - { - "epoch": 1.0, - "grad_norm": 0.9079859042074966, - "learning_rate": 3.124790239594777e-10, - "loss": 0.1459, - "step": 19617 - }, - { - "epoch": 1.0, - "grad_norm": 0.8911178077678168, - "learning_rate": 2.995947535389121e-10, - "loss": 0.1631, - "step": 19618 - }, - { - "epoch": 1.0, - "grad_norm": 2.200090105682176, - "learning_rate": 2.8698172555685634e-10, - "loss": 0.1464, - "step": 19619 - }, - { - "epoch": 1.0, - "grad_norm": 0.945105130525938, - "learning_rate": 2.746399403552591e-10, - "loss": 0.1725, - "step": 19620 - }, - { - "epoch": 1.0, - "grad_norm": 1.1593818063618362, - "learning_rate": 2.6256939826940774e-10, - "loss": 0.1595, - "step": 19621 - }, - { - "epoch": 1.0, - "grad_norm": 1.080925000211112, - "learning_rate": 2.5077009962570784e-10, - "loss": 0.1645, - "step": 19622 - }, - { - "epoch": 1.0, - "grad_norm": 0.9285369238641832, - "learning_rate": 2.392420447450139e-10, - "loss": 0.1516, - "step": 19623 - }, - { - "epoch": 1.0, - "grad_norm": 1.3598322147140447, - "learning_rate": 2.279852339392985e-10, - "loss": 0.1464, - "step": 19624 - }, - { - "epoch": 1.0, - "grad_norm": 1.0434638990818221, - "learning_rate": 2.1699966751387303e-10, - "loss": 0.1612, - "step": 19625 - }, - { - "epoch": 1.0, - "grad_norm": 0.9800285397183612, - "learning_rate": 2.0628534576738746e-10, - "loss": 0.1652, - "step": 19626 - }, - { - "epoch": 1.0, - "grad_norm": 1.5006060025917467, - "learning_rate": 1.9584226898961e-10, - "loss": 0.1796, - "step": 19627 - }, - { - "epoch": 1.0, - "grad_norm": 1.118456926792813, - "learning_rate": 1.85670437465868e-10, - "loss": 0.1532, - "step": 19628 - }, - { - "epoch": 1.0, - "grad_norm": 1.3154930824584454, - "learning_rate": 1.757698514692763e-10, - "loss": 0.1737, - "step": 19629 - }, - { - "epoch": 1.0, - "grad_norm": 1.2594644655238652, - "learning_rate": 1.6614051127072929e-10, - "loss": 0.1776, - "step": 19630 - }, - { - "epoch": 1.0, - "grad_norm": 1.2309343752309614, - "learning_rate": 1.567824171300192e-10, - "loss": 0.1664, - "step": 19631 - }, - { - "epoch": 1.0, - "grad_norm": 1.4860321980415303, - "learning_rate": 1.4769556930138707e-10, - "loss": 0.1441, - "step": 19632 - }, - { - "epoch": 1.0, - "grad_norm": 0.9653230918868438, - "learning_rate": 1.3887996803130242e-10, - "loss": 0.1638, - "step": 19633 - }, - { - "epoch": 1.0, - "grad_norm": 2.5851984809016977, - "learning_rate": 1.3033561355846324e-10, - "loss": 0.1601, - "step": 19634 - }, - { - "epoch": 1.0, - "grad_norm": 1.1925293052215677, - "learning_rate": 1.2206250611490612e-10, - "loss": 0.1721, - "step": 19635 - }, - { - "epoch": 1.0, - "grad_norm": 1.0617452077781544, - "learning_rate": 1.1406064592600629e-10, - "loss": 0.1635, - "step": 19636 - }, - { - "epoch": 1.0, - "grad_norm": 1.0323618468439235, - "learning_rate": 1.063300332082573e-10, - "loss": 0.1347, - "step": 19637 - }, - { - "epoch": 1.0, - "grad_norm": 1.2798116231535444, - "learning_rate": 9.887066817038105e-11, - "loss": 0.1562, - "step": 19638 - }, - { - "epoch": 1.0, - "grad_norm": 0.9078167713684868, - "learning_rate": 9.168255101554835e-11, - "loss": 0.148, - "step": 19639 - }, - { - "epoch": 1.0, - "grad_norm": 1.9109382670862405, - "learning_rate": 8.476568193804824e-11, - "loss": 0.1563, - "step": 19640 - }, - { - "epoch": 1.0, - "grad_norm": 0.9894577234866273, - "learning_rate": 7.812006112661863e-11, - "loss": 0.1612, - "step": 19641 - }, - { - "epoch": 1.0, - "grad_norm": 1.0206860112046388, - "learning_rate": 7.174568876111565e-11, - "loss": 0.1626, - "step": 19642 - }, - { - "epoch": 1.0, - "grad_norm": 1.3743462768995467, - "learning_rate": 6.56425650147341e-11, - "loss": 0.1498, - "step": 19643 - }, - { - "epoch": 1.0, - "grad_norm": 0.8318224179362256, - "learning_rate": 5.981069005178697e-11, - "loss": 0.1659, - "step": 19644 - }, - { - "epoch": 1.0, - "grad_norm": 1.247774085231404, - "learning_rate": 5.425006403214639e-11, - "loss": 0.1671, - "step": 19645 - }, - { - "epoch": 1.0, - "grad_norm": 1.2683171725518405, - "learning_rate": 4.8960687104582235e-11, - "loss": 0.17, - "step": 19646 - }, - { - "epoch": 1.0, - "grad_norm": 2.001840363433565, - "learning_rate": 4.3942559414533734e-11, - "loss": 0.1918, - "step": 19647 - }, - { - "epoch": 1.0, - "grad_norm": 1.2938793167991183, - "learning_rate": 3.919568109744809e-11, - "loss": 0.181, - "step": 19648 - }, - { - "epoch": 1.0, - "grad_norm": 1.0800015855464375, - "learning_rate": 3.472005228211117e-11, - "loss": 0.1809, - "step": 19649 - }, - { - "epoch": 1.0, - "grad_norm": 1.0588591780403915, - "learning_rate": 3.051567308953729e-11, - "loss": 0.1805, - "step": 19650 - }, - { - "epoch": 1.0, - "grad_norm": 1.2033718217226848, - "learning_rate": 2.6582543634079416e-11, - "loss": 0.1423, - "step": 19651 - }, - { - "epoch": 1.0, - "grad_norm": 1.4721066909443041, - "learning_rate": 2.292066402120874e-11, - "loss": 0.1634, - "step": 19652 - }, - { - "epoch": 1.0, - "grad_norm": 6.723042537451307, - "learning_rate": 1.9530034353065775e-11, - "loss": 0.1664, - "step": 19653 - }, - { - "epoch": 1.0, - "grad_norm": 0.9442132049513063, - "learning_rate": 1.6410654719578588e-11, - "loss": 0.1605, - "step": 19654 - }, - { - "epoch": 1.0, - "grad_norm": 0.9540795193476649, - "learning_rate": 1.3562525205124134e-11, - "loss": 0.1505, - "step": 19655 - }, - { - "epoch": 1.0, - "grad_norm": 1.1669133271587695, - "learning_rate": 1.0985645887418017e-11, - "loss": 0.1659, - "step": 19656 - }, - { - "epoch": 1.0, - "grad_norm": 1.0352443925181587, - "learning_rate": 8.680016837514516e-12, - "loss": 0.1578, - "step": 19657 - }, - { - "epoch": 1.0, - "grad_norm": 1.2268663110505775, - "learning_rate": 6.645638116475894e-12, - "loss": 0.1621, - "step": 19658 - }, - { - "epoch": 1.0, - "grad_norm": 1.1273871387987642, - "learning_rate": 4.882509779813305e-12, - "loss": 0.1569, - "step": 19659 - }, - { - "epoch": 1.0, - "grad_norm": 1.3269521982201904, - "learning_rate": 3.39063187637656e-12, - "loss": 0.171, - "step": 19660 - }, - { - "epoch": 1.0, - "grad_norm": 1.2765834298934498, - "learning_rate": 2.1700044450234657e-12, - "loss": 0.1679, - "step": 19661 - }, - { - "epoch": 1.0, - "grad_norm": 0.9155996421638892, - "learning_rate": 1.2206275190607132e-12, - "loss": 0.1621, - "step": 19662 - }, - { - "epoch": 1.0, - "grad_norm": 0.8992063234029176, - "learning_rate": 5.425011262438773e-13, - "loss": 0.137, - "step": 19663 - }, - { - "epoch": 1.0, - "grad_norm": 1.401467580475726, - "learning_rate": 1.3562528211608085e-13, - "loss": 0.1682, - "step": 19664 - }, - { - "epoch": 1.0, - "grad_norm": 1.0157853651703213, - "learning_rate": 0.0, - "loss": 0.1681, - "step": 19665 - }, - { - "epoch": 1.0, - "step": 19665, - "total_flos": 4376372611039232.0, - "train_loss": 0.19586870987702348, - "train_runtime": 255341.6294, - "train_samples_per_second": 19.716, - "train_steps_per_second": 0.077 - } - ], - "logging_steps": 1.0, - "max_steps": 19665, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 10000, - "total_flos": 4376372611039232.0, - "train_batch_size": 2, - "trial_name": null, - "trial_params": null -}