{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 200.0, "eval_steps": 500, "global_step": 7400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02702702702702703, "grad_norm": 6.875, "learning_rate": 0.0, "loss": 2.0453, "step": 1 }, { "epoch": 0.05405405405405406, "grad_norm": 6.90625, "learning_rate": 1.3513513513513515e-07, "loss": 2.0353, "step": 2 }, { "epoch": 0.08108108108108109, "grad_norm": 6.6875, "learning_rate": 2.702702702702703e-07, "loss": 1.9996, "step": 3 }, { "epoch": 0.10810810810810811, "grad_norm": 7.15625, "learning_rate": 4.0540540540540546e-07, "loss": 2.065, "step": 4 }, { "epoch": 0.13513513513513514, "grad_norm": 7.0625, "learning_rate": 5.405405405405406e-07, "loss": 2.1069, "step": 5 }, { "epoch": 0.16216216216216217, "grad_norm": 6.875, "learning_rate": 6.756756756756758e-07, "loss": 2.0036, "step": 6 }, { "epoch": 0.1891891891891892, "grad_norm": 7.09375, "learning_rate": 8.108108108108109e-07, "loss": 2.0665, "step": 7 }, { "epoch": 0.21621621621621623, "grad_norm": 7.21875, "learning_rate": 9.45945945945946e-07, "loss": 2.0651, "step": 8 }, { "epoch": 0.24324324324324326, "grad_norm": 6.96875, "learning_rate": 1.0810810810810812e-06, "loss": 2.0047, "step": 9 }, { "epoch": 0.2702702702702703, "grad_norm": 7.0, "learning_rate": 1.2162162162162164e-06, "loss": 2.0633, "step": 10 }, { "epoch": 0.2972972972972973, "grad_norm": 6.8125, "learning_rate": 1.3513513513513515e-06, "loss": 1.9804, "step": 11 }, { "epoch": 0.32432432432432434, "grad_norm": 7.0625, "learning_rate": 1.4864864864864866e-06, "loss": 2.0364, "step": 12 }, { "epoch": 0.35135135135135137, "grad_norm": 7.1875, "learning_rate": 1.6216216216216219e-06, "loss": 2.0729, "step": 13 }, { "epoch": 0.3783783783783784, "grad_norm": 7.09375, "learning_rate": 1.756756756756757e-06, "loss": 2.0485, "step": 14 }, { "epoch": 0.40540540540540543, "grad_norm": 7.21875, "learning_rate": 1.891891891891892e-06, "loss": 2.0739, "step": 15 }, { "epoch": 0.43243243243243246, "grad_norm": 6.875, "learning_rate": 2.0270270270270273e-06, "loss": 2.0768, "step": 16 }, { "epoch": 0.4594594594594595, "grad_norm": 7.21875, "learning_rate": 2.1621621621621623e-06, "loss": 2.091, "step": 17 }, { "epoch": 0.4864864864864865, "grad_norm": 6.875, "learning_rate": 2.2972972972972974e-06, "loss": 2.0193, "step": 18 }, { "epoch": 0.5135135135135135, "grad_norm": 6.46875, "learning_rate": 2.432432432432433e-06, "loss": 2.0188, "step": 19 }, { "epoch": 0.5405405405405406, "grad_norm": 6.90625, "learning_rate": 2.567567567567568e-06, "loss": 2.0085, "step": 20 }, { "epoch": 0.5675675675675675, "grad_norm": 7.1875, "learning_rate": 2.702702702702703e-06, "loss": 2.0599, "step": 21 }, { "epoch": 0.5945945945945946, "grad_norm": 6.78125, "learning_rate": 2.837837837837838e-06, "loss": 2.0477, "step": 22 }, { "epoch": 0.6216216216216216, "grad_norm": 6.96875, "learning_rate": 2.972972972972973e-06, "loss": 2.0224, "step": 23 }, { "epoch": 0.6486486486486487, "grad_norm": 6.875, "learning_rate": 3.1081081081081086e-06, "loss": 2.0959, "step": 24 }, { "epoch": 0.6756756756756757, "grad_norm": 6.9375, "learning_rate": 3.2432432432432437e-06, "loss": 2.0103, "step": 25 }, { "epoch": 0.7027027027027027, "grad_norm": 6.8125, "learning_rate": 3.3783783783783788e-06, "loss": 2.0299, "step": 26 }, { "epoch": 0.7297297297297297, "grad_norm": 6.375, "learning_rate": 3.513513513513514e-06, "loss": 2.0116, "step": 27 }, { "epoch": 0.7567567567567568, "grad_norm": 6.4375, "learning_rate": 3.648648648648649e-06, "loss": 2.005, "step": 28 }, { "epoch": 0.7837837837837838, "grad_norm": 6.9375, "learning_rate": 3.783783783783784e-06, "loss": 2.0049, "step": 29 }, { "epoch": 0.8108108108108109, "grad_norm": 6.78125, "learning_rate": 3.918918918918919e-06, "loss": 1.9917, "step": 30 }, { "epoch": 0.8378378378378378, "grad_norm": 6.75, "learning_rate": 4.0540540540540545e-06, "loss": 2.0227, "step": 31 }, { "epoch": 0.8648648648648649, "grad_norm": 6.46875, "learning_rate": 4.18918918918919e-06, "loss": 1.9622, "step": 32 }, { "epoch": 0.8918918918918919, "grad_norm": 6.375, "learning_rate": 4.324324324324325e-06, "loss": 1.9902, "step": 33 }, { "epoch": 0.918918918918919, "grad_norm": 6.5625, "learning_rate": 4.45945945945946e-06, "loss": 1.9692, "step": 34 }, { "epoch": 0.9459459459459459, "grad_norm": 6.28125, "learning_rate": 4.594594594594595e-06, "loss": 1.9275, "step": 35 }, { "epoch": 0.972972972972973, "grad_norm": 6.21875, "learning_rate": 4.72972972972973e-06, "loss": 1.9781, "step": 36 }, { "epoch": 1.0, "grad_norm": 6.21875, "learning_rate": 4.864864864864866e-06, "loss": 1.9352, "step": 37 }, { "epoch": 1.027027027027027, "grad_norm": 6.15625, "learning_rate": 5e-06, "loss": 1.9275, "step": 38 }, { "epoch": 1.054054054054054, "grad_norm": 5.75, "learning_rate": 5.135135135135136e-06, "loss": 1.8881, "step": 39 }, { "epoch": 1.0810810810810811, "grad_norm": 6.40625, "learning_rate": 5.2702702702702705e-06, "loss": 1.8939, "step": 40 }, { "epoch": 1.1081081081081081, "grad_norm": 5.84375, "learning_rate": 5.405405405405406e-06, "loss": 1.8475, "step": 41 }, { "epoch": 1.135135135135135, "grad_norm": 5.96875, "learning_rate": 5.5405405405405415e-06, "loss": 1.8966, "step": 42 }, { "epoch": 1.1621621621621623, "grad_norm": 5.75, "learning_rate": 5.675675675675676e-06, "loss": 1.8758, "step": 43 }, { "epoch": 1.1891891891891893, "grad_norm": 5.6875, "learning_rate": 5.810810810810812e-06, "loss": 1.8366, "step": 44 }, { "epoch": 1.2162162162162162, "grad_norm": 5.59375, "learning_rate": 5.945945945945946e-06, "loss": 1.8425, "step": 45 }, { "epoch": 1.2432432432432432, "grad_norm": 5.46875, "learning_rate": 6.081081081081082e-06, "loss": 1.8729, "step": 46 }, { "epoch": 1.2702702702702702, "grad_norm": 5.28125, "learning_rate": 6.216216216216217e-06, "loss": 1.7916, "step": 47 }, { "epoch": 1.2972972972972974, "grad_norm": 5.5, "learning_rate": 6.351351351351352e-06, "loss": 1.837, "step": 48 }, { "epoch": 1.3243243243243243, "grad_norm": 5.3125, "learning_rate": 6.486486486486487e-06, "loss": 1.8301, "step": 49 }, { "epoch": 1.3513513513513513, "grad_norm": 4.875, "learning_rate": 6.621621621621622e-06, "loss": 1.787, "step": 50 }, { "epoch": 1.3783783783783785, "grad_norm": 4.96875, "learning_rate": 6.7567567567567575e-06, "loss": 1.7989, "step": 51 }, { "epoch": 1.4054054054054055, "grad_norm": 4.8125, "learning_rate": 6.891891891891893e-06, "loss": 1.7882, "step": 52 }, { "epoch": 1.4324324324324325, "grad_norm": 4.84375, "learning_rate": 7.027027027027028e-06, "loss": 1.747, "step": 53 }, { "epoch": 1.4594594594594594, "grad_norm": 5.125, "learning_rate": 7.162162162162163e-06, "loss": 1.7576, "step": 54 }, { "epoch": 1.4864864864864864, "grad_norm": 4.625, "learning_rate": 7.297297297297298e-06, "loss": 1.727, "step": 55 }, { "epoch": 1.5135135135135136, "grad_norm": 4.5625, "learning_rate": 7.432432432432433e-06, "loss": 1.717, "step": 56 }, { "epoch": 1.5405405405405406, "grad_norm": 4.375, "learning_rate": 7.567567567567568e-06, "loss": 1.7196, "step": 57 }, { "epoch": 1.5675675675675675, "grad_norm": 4.28125, "learning_rate": 7.702702702702704e-06, "loss": 1.705, "step": 58 }, { "epoch": 1.5945945945945947, "grad_norm": 4.09375, "learning_rate": 7.837837837837838e-06, "loss": 1.6681, "step": 59 }, { "epoch": 1.6216216216216215, "grad_norm": 4.15625, "learning_rate": 7.972972972972974e-06, "loss": 1.7032, "step": 60 }, { "epoch": 1.6486486486486487, "grad_norm": 4.09375, "learning_rate": 8.108108108108109e-06, "loss": 1.6633, "step": 61 }, { "epoch": 1.6756756756756757, "grad_norm": 4.53125, "learning_rate": 8.243243243243245e-06, "loss": 1.6477, "step": 62 }, { "epoch": 1.7027027027027026, "grad_norm": 4.15625, "learning_rate": 8.37837837837838e-06, "loss": 1.6168, "step": 63 }, { "epoch": 1.7297297297297298, "grad_norm": 3.78125, "learning_rate": 8.513513513513514e-06, "loss": 1.5634, "step": 64 }, { "epoch": 1.7567567567567568, "grad_norm": 3.421875, "learning_rate": 8.64864864864865e-06, "loss": 1.5868, "step": 65 }, { "epoch": 1.7837837837837838, "grad_norm": 3.34375, "learning_rate": 8.783783783783785e-06, "loss": 1.6162, "step": 66 }, { "epoch": 1.810810810810811, "grad_norm": 2.9375, "learning_rate": 8.91891891891892e-06, "loss": 1.5436, "step": 67 }, { "epoch": 1.8378378378378377, "grad_norm": 2.828125, "learning_rate": 9.054054054054056e-06, "loss": 1.5672, "step": 68 }, { "epoch": 1.864864864864865, "grad_norm": 2.75, "learning_rate": 9.18918918918919e-06, "loss": 1.5513, "step": 69 }, { "epoch": 1.8918918918918919, "grad_norm": 2.53125, "learning_rate": 9.324324324324325e-06, "loss": 1.5542, "step": 70 }, { "epoch": 1.9189189189189189, "grad_norm": 2.734375, "learning_rate": 9.45945945945946e-06, "loss": 1.47, "step": 71 }, { "epoch": 1.945945945945946, "grad_norm": 2.234375, "learning_rate": 9.594594594594596e-06, "loss": 1.4972, "step": 72 }, { "epoch": 1.972972972972973, "grad_norm": 2.1875, "learning_rate": 9.729729729729732e-06, "loss": 1.4702, "step": 73 }, { "epoch": 2.0, "grad_norm": 2.171875, "learning_rate": 9.864864864864865e-06, "loss": 1.4774, "step": 74 }, { "epoch": 2.027027027027027, "grad_norm": 2.03125, "learning_rate": 1e-05, "loss": 1.4892, "step": 75 }, { "epoch": 2.054054054054054, "grad_norm": 2.15625, "learning_rate": 1.0135135135135136e-05, "loss": 1.4845, "step": 76 }, { "epoch": 2.081081081081081, "grad_norm": 2.53125, "learning_rate": 1.0270270270270272e-05, "loss": 1.5016, "step": 77 }, { "epoch": 2.108108108108108, "grad_norm": 2.125, "learning_rate": 1.0405405405405407e-05, "loss": 1.4514, "step": 78 }, { "epoch": 2.135135135135135, "grad_norm": 1.859375, "learning_rate": 1.0540540540540541e-05, "loss": 1.4397, "step": 79 }, { "epoch": 2.1621621621621623, "grad_norm": 1.8984375, "learning_rate": 1.0675675675675677e-05, "loss": 1.4369, "step": 80 }, { "epoch": 2.189189189189189, "grad_norm": 1.765625, "learning_rate": 1.0810810810810812e-05, "loss": 1.4077, "step": 81 }, { "epoch": 2.2162162162162162, "grad_norm": 2.078125, "learning_rate": 1.0945945945945948e-05, "loss": 1.4292, "step": 82 }, { "epoch": 2.2432432432432434, "grad_norm": 2.15625, "learning_rate": 1.1081081081081083e-05, "loss": 1.4093, "step": 83 }, { "epoch": 2.27027027027027, "grad_norm": 1.8046875, "learning_rate": 1.1216216216216217e-05, "loss": 1.4112, "step": 84 }, { "epoch": 2.2972972972972974, "grad_norm": 1.9453125, "learning_rate": 1.1351351351351352e-05, "loss": 1.3784, "step": 85 }, { "epoch": 2.3243243243243246, "grad_norm": 1.5625, "learning_rate": 1.1486486486486488e-05, "loss": 1.3944, "step": 86 }, { "epoch": 2.3513513513513513, "grad_norm": 1.796875, "learning_rate": 1.1621621621621623e-05, "loss": 1.4348, "step": 87 }, { "epoch": 2.3783783783783785, "grad_norm": 1.9453125, "learning_rate": 1.1756756756756759e-05, "loss": 1.3964, "step": 88 }, { "epoch": 2.4054054054054053, "grad_norm": 1.7265625, "learning_rate": 1.1891891891891893e-05, "loss": 1.3894, "step": 89 }, { "epoch": 2.4324324324324325, "grad_norm": 1.6875, "learning_rate": 1.2027027027027028e-05, "loss": 1.4322, "step": 90 }, { "epoch": 2.4594594594594597, "grad_norm": 1.8671875, "learning_rate": 1.2162162162162164e-05, "loss": 1.3988, "step": 91 }, { "epoch": 2.4864864864864864, "grad_norm": 1.625, "learning_rate": 1.2297297297297299e-05, "loss": 1.3513, "step": 92 }, { "epoch": 2.5135135135135136, "grad_norm": 1.6796875, "learning_rate": 1.2432432432432435e-05, "loss": 1.4075, "step": 93 }, { "epoch": 2.5405405405405403, "grad_norm": 1.5859375, "learning_rate": 1.2567567567567567e-05, "loss": 1.3621, "step": 94 }, { "epoch": 2.5675675675675675, "grad_norm": 1.6484375, "learning_rate": 1.2702702702702704e-05, "loss": 1.3877, "step": 95 }, { "epoch": 2.5945945945945947, "grad_norm": 1.5234375, "learning_rate": 1.2837837837837838e-05, "loss": 1.3663, "step": 96 }, { "epoch": 2.6216216216216215, "grad_norm": 1.7578125, "learning_rate": 1.2972972972972975e-05, "loss": 1.3849, "step": 97 }, { "epoch": 2.6486486486486487, "grad_norm": 1.5, "learning_rate": 1.3108108108108109e-05, "loss": 1.3893, "step": 98 }, { "epoch": 2.6756756756756754, "grad_norm": 1.65625, "learning_rate": 1.3243243243243244e-05, "loss": 1.3715, "step": 99 }, { "epoch": 2.7027027027027026, "grad_norm": 1.5859375, "learning_rate": 1.3378378378378378e-05, "loss": 1.3792, "step": 100 }, { "epoch": 2.72972972972973, "grad_norm": 1.4921875, "learning_rate": 1.3513513513513515e-05, "loss": 1.4089, "step": 101 }, { "epoch": 2.756756756756757, "grad_norm": 1.640625, "learning_rate": 1.3648648648648649e-05, "loss": 1.3168, "step": 102 }, { "epoch": 2.7837837837837838, "grad_norm": 1.6328125, "learning_rate": 1.3783783783783786e-05, "loss": 1.3796, "step": 103 }, { "epoch": 2.810810810810811, "grad_norm": 1.7578125, "learning_rate": 1.3918918918918918e-05, "loss": 1.3647, "step": 104 }, { "epoch": 2.8378378378378377, "grad_norm": 1.8828125, "learning_rate": 1.4054054054054055e-05, "loss": 1.3681, "step": 105 }, { "epoch": 2.864864864864865, "grad_norm": 1.625, "learning_rate": 1.4189189189189189e-05, "loss": 1.3616, "step": 106 }, { "epoch": 2.891891891891892, "grad_norm": 1.59375, "learning_rate": 1.4324324324324326e-05, "loss": 1.3394, "step": 107 }, { "epoch": 2.918918918918919, "grad_norm": 1.5390625, "learning_rate": 1.445945945945946e-05, "loss": 1.3578, "step": 108 }, { "epoch": 2.945945945945946, "grad_norm": 1.7265625, "learning_rate": 1.4594594594594596e-05, "loss": 1.2969, "step": 109 }, { "epoch": 2.972972972972973, "grad_norm": 1.8359375, "learning_rate": 1.472972972972973e-05, "loss": 1.3492, "step": 110 }, { "epoch": 3.0, "grad_norm": 1.9921875, "learning_rate": 1.4864864864864867e-05, "loss": 1.3677, "step": 111 }, { "epoch": 3.027027027027027, "grad_norm": 2.03125, "learning_rate": 1.5e-05, "loss": 1.3425, "step": 112 }, { "epoch": 3.054054054054054, "grad_norm": 1.703125, "learning_rate": 1.5135135135135136e-05, "loss": 1.3242, "step": 113 }, { "epoch": 3.081081081081081, "grad_norm": 1.3046875, "learning_rate": 1.527027027027027e-05, "loss": 1.3445, "step": 114 }, { "epoch": 3.108108108108108, "grad_norm": 2.046875, "learning_rate": 1.540540540540541e-05, "loss": 1.3343, "step": 115 }, { "epoch": 3.135135135135135, "grad_norm": 1.671875, "learning_rate": 1.554054054054054e-05, "loss": 1.2885, "step": 116 }, { "epoch": 3.1621621621621623, "grad_norm": 1.8828125, "learning_rate": 1.5675675675675676e-05, "loss": 1.3266, "step": 117 }, { "epoch": 3.189189189189189, "grad_norm": 1.8515625, "learning_rate": 1.581081081081081e-05, "loss": 1.3078, "step": 118 }, { "epoch": 3.2162162162162162, "grad_norm": 1.7421875, "learning_rate": 1.5945945945945947e-05, "loss": 1.2879, "step": 119 }, { "epoch": 3.2432432432432434, "grad_norm": 1.8828125, "learning_rate": 1.6081081081081083e-05, "loss": 1.3198, "step": 120 }, { "epoch": 3.27027027027027, "grad_norm": 1.6015625, "learning_rate": 1.6216216216216218e-05, "loss": 1.2537, "step": 121 }, { "epoch": 3.2972972972972974, "grad_norm": 1.953125, "learning_rate": 1.635135135135135e-05, "loss": 1.3177, "step": 122 }, { "epoch": 3.3243243243243246, "grad_norm": 1.8046875, "learning_rate": 1.648648648648649e-05, "loss": 1.3124, "step": 123 }, { "epoch": 3.3513513513513513, "grad_norm": 1.734375, "learning_rate": 1.662162162162162e-05, "loss": 1.2957, "step": 124 }, { "epoch": 3.3783783783783785, "grad_norm": 1.625, "learning_rate": 1.675675675675676e-05, "loss": 1.2855, "step": 125 }, { "epoch": 3.4054054054054053, "grad_norm": 1.84375, "learning_rate": 1.6891891891891892e-05, "loss": 1.31, "step": 126 }, { "epoch": 3.4324324324324325, "grad_norm": 1.7265625, "learning_rate": 1.7027027027027028e-05, "loss": 1.313, "step": 127 }, { "epoch": 3.4594594594594597, "grad_norm": 1.875, "learning_rate": 1.7162162162162163e-05, "loss": 1.285, "step": 128 }, { "epoch": 3.4864864864864864, "grad_norm": 1.71875, "learning_rate": 1.72972972972973e-05, "loss": 1.2774, "step": 129 }, { "epoch": 3.5135135135135136, "grad_norm": 1.7734375, "learning_rate": 1.7432432432432434e-05, "loss": 1.2469, "step": 130 }, { "epoch": 3.5405405405405403, "grad_norm": 1.8359375, "learning_rate": 1.756756756756757e-05, "loss": 1.2735, "step": 131 }, { "epoch": 3.5675675675675675, "grad_norm": 1.78125, "learning_rate": 1.7702702702702702e-05, "loss": 1.2575, "step": 132 }, { "epoch": 3.5945945945945947, "grad_norm": 1.6015625, "learning_rate": 1.783783783783784e-05, "loss": 1.2764, "step": 133 }, { "epoch": 3.6216216216216215, "grad_norm": 1.796875, "learning_rate": 1.7972972972972973e-05, "loss": 1.2257, "step": 134 }, { "epoch": 3.6486486486486487, "grad_norm": 1.96875, "learning_rate": 1.810810810810811e-05, "loss": 1.2378, "step": 135 }, { "epoch": 3.6756756756756754, "grad_norm": 1.8671875, "learning_rate": 1.8243243243243244e-05, "loss": 1.2623, "step": 136 }, { "epoch": 3.7027027027027026, "grad_norm": 1.9921875, "learning_rate": 1.837837837837838e-05, "loss": 1.2509, "step": 137 }, { "epoch": 3.72972972972973, "grad_norm": 1.796875, "learning_rate": 1.8513513513513515e-05, "loss": 1.2318, "step": 138 }, { "epoch": 3.756756756756757, "grad_norm": 1.78125, "learning_rate": 1.864864864864865e-05, "loss": 1.2631, "step": 139 }, { "epoch": 3.7837837837837838, "grad_norm": 1.78125, "learning_rate": 1.8783783783783786e-05, "loss": 1.1737, "step": 140 }, { "epoch": 3.810810810810811, "grad_norm": 1.7265625, "learning_rate": 1.891891891891892e-05, "loss": 1.1931, "step": 141 }, { "epoch": 3.8378378378378377, "grad_norm": 1.875, "learning_rate": 1.9054054054054053e-05, "loss": 1.2366, "step": 142 }, { "epoch": 3.864864864864865, "grad_norm": 1.90625, "learning_rate": 1.9189189189189192e-05, "loss": 1.2096, "step": 143 }, { "epoch": 3.891891891891892, "grad_norm": 1.7578125, "learning_rate": 1.9324324324324324e-05, "loss": 1.2085, "step": 144 }, { "epoch": 3.918918918918919, "grad_norm": 1.859375, "learning_rate": 1.9459459459459463e-05, "loss": 1.1855, "step": 145 }, { "epoch": 3.945945945945946, "grad_norm": 1.8046875, "learning_rate": 1.9594594594594595e-05, "loss": 1.1795, "step": 146 }, { "epoch": 3.972972972972973, "grad_norm": 1.6484375, "learning_rate": 1.972972972972973e-05, "loss": 1.1722, "step": 147 }, { "epoch": 4.0, "grad_norm": 1.703125, "learning_rate": 1.9864864864864866e-05, "loss": 1.1764, "step": 148 }, { "epoch": 4.027027027027027, "grad_norm": 1.8203125, "learning_rate": 2e-05, "loss": 1.1796, "step": 149 }, { "epoch": 4.054054054054054, "grad_norm": 1.765625, "learning_rate": 2.0135135135135137e-05, "loss": 1.1642, "step": 150 }, { "epoch": 4.081081081081081, "grad_norm": 1.703125, "learning_rate": 2.0270270270270273e-05, "loss": 1.1575, "step": 151 }, { "epoch": 4.108108108108108, "grad_norm": 1.71875, "learning_rate": 2.0405405405405405e-05, "loss": 1.1535, "step": 152 }, { "epoch": 4.135135135135135, "grad_norm": 1.6640625, "learning_rate": 2.0540540540540544e-05, "loss": 1.1435, "step": 153 }, { "epoch": 4.162162162162162, "grad_norm": 1.640625, "learning_rate": 2.0675675675675676e-05, "loss": 1.1473, "step": 154 }, { "epoch": 4.1891891891891895, "grad_norm": 1.578125, "learning_rate": 2.0810810810810815e-05, "loss": 1.1604, "step": 155 }, { "epoch": 4.216216216216216, "grad_norm": 1.6015625, "learning_rate": 2.0945945945945947e-05, "loss": 1.1186, "step": 156 }, { "epoch": 4.243243243243243, "grad_norm": 1.6484375, "learning_rate": 2.1081081081081082e-05, "loss": 1.1358, "step": 157 }, { "epoch": 4.27027027027027, "grad_norm": 1.6640625, "learning_rate": 2.1216216216216218e-05, "loss": 1.1236, "step": 158 }, { "epoch": 4.297297297297297, "grad_norm": 1.8828125, "learning_rate": 2.1351351351351353e-05, "loss": 1.1779, "step": 159 }, { "epoch": 4.324324324324325, "grad_norm": 1.859375, "learning_rate": 2.148648648648649e-05, "loss": 1.1745, "step": 160 }, { "epoch": 4.351351351351352, "grad_norm": 1.7734375, "learning_rate": 2.1621621621621624e-05, "loss": 1.1518, "step": 161 }, { "epoch": 4.378378378378378, "grad_norm": 1.765625, "learning_rate": 2.1756756756756756e-05, "loss": 1.1562, "step": 162 }, { "epoch": 4.405405405405405, "grad_norm": 1.96875, "learning_rate": 2.1891891891891895e-05, "loss": 1.1702, "step": 163 }, { "epoch": 4.4324324324324325, "grad_norm": 1.7109375, "learning_rate": 2.2027027027027027e-05, "loss": 1.1253, "step": 164 }, { "epoch": 4.45945945945946, "grad_norm": 1.75, "learning_rate": 2.2162162162162166e-05, "loss": 1.1199, "step": 165 }, { "epoch": 4.486486486486487, "grad_norm": 1.703125, "learning_rate": 2.2297297297297298e-05, "loss": 1.1036, "step": 166 }, { "epoch": 4.513513513513513, "grad_norm": 1.59375, "learning_rate": 2.2432432432432434e-05, "loss": 1.1292, "step": 167 }, { "epoch": 4.54054054054054, "grad_norm": 1.8515625, "learning_rate": 2.256756756756757e-05, "loss": 1.1077, "step": 168 }, { "epoch": 4.5675675675675675, "grad_norm": 1.6796875, "learning_rate": 2.2702702702702705e-05, "loss": 1.1155, "step": 169 }, { "epoch": 4.594594594594595, "grad_norm": 1.640625, "learning_rate": 2.283783783783784e-05, "loss": 1.0983, "step": 170 }, { "epoch": 4.621621621621622, "grad_norm": 1.5546875, "learning_rate": 2.2972972972972976e-05, "loss": 1.1254, "step": 171 }, { "epoch": 4.648648648648649, "grad_norm": 1.625, "learning_rate": 2.3108108108108108e-05, "loss": 1.1066, "step": 172 }, { "epoch": 4.675675675675675, "grad_norm": 1.859375, "learning_rate": 2.3243243243243247e-05, "loss": 1.1744, "step": 173 }, { "epoch": 4.702702702702703, "grad_norm": 1.6640625, "learning_rate": 2.337837837837838e-05, "loss": 1.1333, "step": 174 }, { "epoch": 4.72972972972973, "grad_norm": 1.71875, "learning_rate": 2.3513513513513518e-05, "loss": 1.1221, "step": 175 }, { "epoch": 4.756756756756757, "grad_norm": 1.7109375, "learning_rate": 2.364864864864865e-05, "loss": 1.0987, "step": 176 }, { "epoch": 4.783783783783784, "grad_norm": 1.6796875, "learning_rate": 2.3783783783783785e-05, "loss": 1.1125, "step": 177 }, { "epoch": 4.8108108108108105, "grad_norm": 1.6953125, "learning_rate": 2.391891891891892e-05, "loss": 1.1366, "step": 178 }, { "epoch": 4.837837837837838, "grad_norm": 1.734375, "learning_rate": 2.4054054054054056e-05, "loss": 1.1167, "step": 179 }, { "epoch": 4.864864864864865, "grad_norm": 1.5703125, "learning_rate": 2.418918918918919e-05, "loss": 1.0867, "step": 180 }, { "epoch": 4.891891891891892, "grad_norm": 1.59375, "learning_rate": 2.4324324324324327e-05, "loss": 1.1044, "step": 181 }, { "epoch": 4.918918918918919, "grad_norm": 1.640625, "learning_rate": 2.445945945945946e-05, "loss": 1.1076, "step": 182 }, { "epoch": 4.945945945945946, "grad_norm": 1.671875, "learning_rate": 2.4594594594594598e-05, "loss": 1.1689, "step": 183 }, { "epoch": 4.972972972972973, "grad_norm": 1.5390625, "learning_rate": 2.472972972972973e-05, "loss": 1.099, "step": 184 }, { "epoch": 5.0, "grad_norm": 1.65625, "learning_rate": 2.486486486486487e-05, "loss": 1.1149, "step": 185 }, { "epoch": 5.027027027027027, "grad_norm": 1.640625, "learning_rate": 2.5e-05, "loss": 1.1008, "step": 186 }, { "epoch": 5.054054054054054, "grad_norm": 1.6875, "learning_rate": 2.5135135135135133e-05, "loss": 1.1325, "step": 187 }, { "epoch": 5.081081081081081, "grad_norm": 1.5078125, "learning_rate": 2.527027027027027e-05, "loss": 1.0725, "step": 188 }, { "epoch": 5.108108108108108, "grad_norm": 1.6328125, "learning_rate": 2.5405405405405408e-05, "loss": 1.0922, "step": 189 }, { "epoch": 5.135135135135135, "grad_norm": 1.796875, "learning_rate": 2.5540540540540543e-05, "loss": 1.1311, "step": 190 }, { "epoch": 5.162162162162162, "grad_norm": 1.484375, "learning_rate": 2.5675675675675675e-05, "loss": 1.099, "step": 191 }, { "epoch": 5.1891891891891895, "grad_norm": 1.625, "learning_rate": 2.581081081081081e-05, "loss": 1.0902, "step": 192 }, { "epoch": 5.216216216216216, "grad_norm": 1.515625, "learning_rate": 2.594594594594595e-05, "loss": 1.1238, "step": 193 }, { "epoch": 5.243243243243243, "grad_norm": 1.46875, "learning_rate": 2.6081081081081082e-05, "loss": 1.0939, "step": 194 }, { "epoch": 5.27027027027027, "grad_norm": 1.6171875, "learning_rate": 2.6216216216216217e-05, "loss": 1.1197, "step": 195 }, { "epoch": 5.297297297297297, "grad_norm": 1.609375, "learning_rate": 2.635135135135135e-05, "loss": 1.098, "step": 196 }, { "epoch": 5.324324324324325, "grad_norm": 1.609375, "learning_rate": 2.6486486486486488e-05, "loss": 1.0892, "step": 197 }, { "epoch": 5.351351351351352, "grad_norm": 2.0625, "learning_rate": 2.6621621621621624e-05, "loss": 1.1118, "step": 198 }, { "epoch": 5.378378378378378, "grad_norm": 1.4609375, "learning_rate": 2.6756756756756756e-05, "loss": 1.0731, "step": 199 }, { "epoch": 5.405405405405405, "grad_norm": 1.5859375, "learning_rate": 2.689189189189189e-05, "loss": 1.1617, "step": 200 }, { "epoch": 5.4324324324324325, "grad_norm": 1.875, "learning_rate": 2.702702702702703e-05, "loss": 1.1064, "step": 201 }, { "epoch": 5.45945945945946, "grad_norm": 1.65625, "learning_rate": 2.7162162162162162e-05, "loss": 1.059, "step": 202 }, { "epoch": 5.486486486486487, "grad_norm": 1.859375, "learning_rate": 2.7297297297297298e-05, "loss": 1.0688, "step": 203 }, { "epoch": 5.513513513513513, "grad_norm": 1.4921875, "learning_rate": 2.743243243243243e-05, "loss": 1.098, "step": 204 }, { "epoch": 5.54054054054054, "grad_norm": 2.03125, "learning_rate": 2.7567567567567572e-05, "loss": 1.1024, "step": 205 }, { "epoch": 5.5675675675675675, "grad_norm": 2.21875, "learning_rate": 2.7702702702702704e-05, "loss": 1.1163, "step": 206 }, { "epoch": 5.594594594594595, "grad_norm": 1.5546875, "learning_rate": 2.7837837837837836e-05, "loss": 1.1315, "step": 207 }, { "epoch": 5.621621621621622, "grad_norm": 1.640625, "learning_rate": 2.7972972972972972e-05, "loss": 1.108, "step": 208 }, { "epoch": 5.648648648648649, "grad_norm": 1.6328125, "learning_rate": 2.810810810810811e-05, "loss": 1.0894, "step": 209 }, { "epoch": 5.675675675675675, "grad_norm": 1.703125, "learning_rate": 2.8243243243243246e-05, "loss": 1.1144, "step": 210 }, { "epoch": 5.702702702702703, "grad_norm": 1.9296875, "learning_rate": 2.8378378378378378e-05, "loss": 1.104, "step": 211 }, { "epoch": 5.72972972972973, "grad_norm": 1.5234375, "learning_rate": 2.8513513513513514e-05, "loss": 1.0857, "step": 212 }, { "epoch": 5.756756756756757, "grad_norm": 1.609375, "learning_rate": 2.8648648648648653e-05, "loss": 1.1336, "step": 213 }, { "epoch": 5.783783783783784, "grad_norm": 1.640625, "learning_rate": 2.8783783783783785e-05, "loss": 1.1338, "step": 214 }, { "epoch": 5.8108108108108105, "grad_norm": 1.6796875, "learning_rate": 2.891891891891892e-05, "loss": 1.0556, "step": 215 }, { "epoch": 5.837837837837838, "grad_norm": 1.8046875, "learning_rate": 2.9054054054054052e-05, "loss": 1.0829, "step": 216 }, { "epoch": 5.864864864864865, "grad_norm": 1.6484375, "learning_rate": 2.918918918918919e-05, "loss": 1.0795, "step": 217 }, { "epoch": 5.891891891891892, "grad_norm": 1.984375, "learning_rate": 2.9324324324324327e-05, "loss": 1.064, "step": 218 }, { "epoch": 5.918918918918919, "grad_norm": 1.6953125, "learning_rate": 2.945945945945946e-05, "loss": 1.0801, "step": 219 }, { "epoch": 5.945945945945946, "grad_norm": 1.7578125, "learning_rate": 2.9594594594594594e-05, "loss": 1.0684, "step": 220 }, { "epoch": 5.972972972972973, "grad_norm": 1.515625, "learning_rate": 2.9729729729729733e-05, "loss": 1.0927, "step": 221 }, { "epoch": 6.0, "grad_norm": 1.5859375, "learning_rate": 2.9864864864864865e-05, "loss": 1.0969, "step": 222 }, { "epoch": 6.027027027027027, "grad_norm": 1.7890625, "learning_rate": 3e-05, "loss": 1.0706, "step": 223 }, { "epoch": 6.054054054054054, "grad_norm": 2.078125, "learning_rate": 3.0135135135135133e-05, "loss": 1.1105, "step": 224 }, { "epoch": 6.081081081081081, "grad_norm": 1.8046875, "learning_rate": 3.0270270270270272e-05, "loss": 1.0545, "step": 225 }, { "epoch": 6.108108108108108, "grad_norm": 1.5625, "learning_rate": 3.0405405405405407e-05, "loss": 1.064, "step": 226 }, { "epoch": 6.135135135135135, "grad_norm": 1.5625, "learning_rate": 3.054054054054054e-05, "loss": 1.0661, "step": 227 }, { "epoch": 6.162162162162162, "grad_norm": 1.59375, "learning_rate": 3.0675675675675675e-05, "loss": 1.0642, "step": 228 }, { "epoch": 6.1891891891891895, "grad_norm": 1.734375, "learning_rate": 3.081081081081082e-05, "loss": 1.0542, "step": 229 }, { "epoch": 6.216216216216216, "grad_norm": 1.984375, "learning_rate": 3.0945945945945946e-05, "loss": 1.0861, "step": 230 }, { "epoch": 6.243243243243243, "grad_norm": 1.6484375, "learning_rate": 3.108108108108108e-05, "loss": 1.0644, "step": 231 }, { "epoch": 6.27027027027027, "grad_norm": 1.5078125, "learning_rate": 3.121621621621622e-05, "loss": 1.0406, "step": 232 }, { "epoch": 6.297297297297297, "grad_norm": 1.546875, "learning_rate": 3.135135135135135e-05, "loss": 1.0918, "step": 233 }, { "epoch": 6.324324324324325, "grad_norm": 1.84375, "learning_rate": 3.148648648648649e-05, "loss": 1.0579, "step": 234 }, { "epoch": 6.351351351351352, "grad_norm": 1.7109375, "learning_rate": 3.162162162162162e-05, "loss": 1.053, "step": 235 }, { "epoch": 6.378378378378378, "grad_norm": 2.09375, "learning_rate": 3.175675675675676e-05, "loss": 1.0839, "step": 236 }, { "epoch": 6.405405405405405, "grad_norm": 1.6171875, "learning_rate": 3.1891891891891894e-05, "loss": 1.0906, "step": 237 }, { "epoch": 6.4324324324324325, "grad_norm": 1.5859375, "learning_rate": 3.202702702702703e-05, "loss": 1.0721, "step": 238 }, { "epoch": 6.45945945945946, "grad_norm": 1.4921875, "learning_rate": 3.2162162162162165e-05, "loss": 1.0787, "step": 239 }, { "epoch": 6.486486486486487, "grad_norm": 1.859375, "learning_rate": 3.2297297297297294e-05, "loss": 1.0868, "step": 240 }, { "epoch": 6.513513513513513, "grad_norm": 1.6953125, "learning_rate": 3.2432432432432436e-05, "loss": 1.0917, "step": 241 }, { "epoch": 6.54054054054054, "grad_norm": 1.828125, "learning_rate": 3.256756756756757e-05, "loss": 1.0651, "step": 242 }, { "epoch": 6.5675675675675675, "grad_norm": 1.6640625, "learning_rate": 3.27027027027027e-05, "loss": 1.1036, "step": 243 }, { "epoch": 6.594594594594595, "grad_norm": 1.8359375, "learning_rate": 3.2837837837837836e-05, "loss": 1.0611, "step": 244 }, { "epoch": 6.621621621621622, "grad_norm": 2.21875, "learning_rate": 3.297297297297298e-05, "loss": 1.0791, "step": 245 }, { "epoch": 6.648648648648649, "grad_norm": 1.7734375, "learning_rate": 3.310810810810811e-05, "loss": 1.0806, "step": 246 }, { "epoch": 6.675675675675675, "grad_norm": 1.8671875, "learning_rate": 3.324324324324324e-05, "loss": 1.0765, "step": 247 }, { "epoch": 6.702702702702703, "grad_norm": 1.453125, "learning_rate": 3.337837837837838e-05, "loss": 1.0588, "step": 248 }, { "epoch": 6.72972972972973, "grad_norm": 1.53125, "learning_rate": 3.351351351351352e-05, "loss": 1.0947, "step": 249 }, { "epoch": 6.756756756756757, "grad_norm": 1.734375, "learning_rate": 3.364864864864865e-05, "loss": 1.0946, "step": 250 }, { "epoch": 6.783783783783784, "grad_norm": 1.9296875, "learning_rate": 3.3783783783783784e-05, "loss": 1.0903, "step": 251 }, { "epoch": 6.8108108108108105, "grad_norm": 1.59375, "learning_rate": 3.391891891891892e-05, "loss": 1.0832, "step": 252 }, { "epoch": 6.837837837837838, "grad_norm": 1.515625, "learning_rate": 3.4054054054054055e-05, "loss": 1.0787, "step": 253 }, { "epoch": 6.864864864864865, "grad_norm": 1.5703125, "learning_rate": 3.418918918918919e-05, "loss": 1.0797, "step": 254 }, { "epoch": 6.891891891891892, "grad_norm": 1.9453125, "learning_rate": 3.4324324324324326e-05, "loss": 1.0719, "step": 255 }, { "epoch": 6.918918918918919, "grad_norm": 1.515625, "learning_rate": 3.445945945945946e-05, "loss": 1.0709, "step": 256 }, { "epoch": 6.945945945945946, "grad_norm": 1.5546875, "learning_rate": 3.45945945945946e-05, "loss": 1.0636, "step": 257 }, { "epoch": 6.972972972972973, "grad_norm": 1.5234375, "learning_rate": 3.472972972972973e-05, "loss": 1.0683, "step": 258 }, { "epoch": 7.0, "grad_norm": 1.6015625, "learning_rate": 3.486486486486487e-05, "loss": 1.0963, "step": 259 }, { "epoch": 7.027027027027027, "grad_norm": 1.4296875, "learning_rate": 3.5e-05, "loss": 1.0288, "step": 260 }, { "epoch": 7.054054054054054, "grad_norm": 1.6484375, "learning_rate": 3.513513513513514e-05, "loss": 1.0805, "step": 261 }, { "epoch": 7.081081081081081, "grad_norm": 1.671875, "learning_rate": 3.5270270270270275e-05, "loss": 1.0721, "step": 262 }, { "epoch": 7.108108108108108, "grad_norm": 1.609375, "learning_rate": 3.5405405405405403e-05, "loss": 1.0353, "step": 263 }, { "epoch": 7.135135135135135, "grad_norm": 2.015625, "learning_rate": 3.554054054054054e-05, "loss": 1.0688, "step": 264 }, { "epoch": 7.162162162162162, "grad_norm": 1.6640625, "learning_rate": 3.567567567567568e-05, "loss": 1.0378, "step": 265 }, { "epoch": 7.1891891891891895, "grad_norm": 1.6015625, "learning_rate": 3.581081081081081e-05, "loss": 1.0644, "step": 266 }, { "epoch": 7.216216216216216, "grad_norm": 1.546875, "learning_rate": 3.5945945945945945e-05, "loss": 1.052, "step": 267 }, { "epoch": 7.243243243243243, "grad_norm": 1.5, "learning_rate": 3.608108108108108e-05, "loss": 1.0808, "step": 268 }, { "epoch": 7.27027027027027, "grad_norm": 1.71875, "learning_rate": 3.621621621621622e-05, "loss": 1.056, "step": 269 }, { "epoch": 7.297297297297297, "grad_norm": 1.765625, "learning_rate": 3.635135135135135e-05, "loss": 1.0582, "step": 270 }, { "epoch": 7.324324324324325, "grad_norm": 1.75, "learning_rate": 3.648648648648649e-05, "loss": 1.0743, "step": 271 }, { "epoch": 7.351351351351352, "grad_norm": 1.6015625, "learning_rate": 3.662162162162162e-05, "loss": 1.0243, "step": 272 }, { "epoch": 7.378378378378378, "grad_norm": 1.5078125, "learning_rate": 3.675675675675676e-05, "loss": 1.0639, "step": 273 }, { "epoch": 7.405405405405405, "grad_norm": 1.5078125, "learning_rate": 3.6891891891891894e-05, "loss": 1.0593, "step": 274 }, { "epoch": 7.4324324324324325, "grad_norm": 1.5859375, "learning_rate": 3.702702702702703e-05, "loss": 1.053, "step": 275 }, { "epoch": 7.45945945945946, "grad_norm": 1.546875, "learning_rate": 3.7162162162162165e-05, "loss": 1.0377, "step": 276 }, { "epoch": 7.486486486486487, "grad_norm": 1.5078125, "learning_rate": 3.72972972972973e-05, "loss": 1.0559, "step": 277 }, { "epoch": 7.513513513513513, "grad_norm": 1.6640625, "learning_rate": 3.7432432432432436e-05, "loss": 1.0604, "step": 278 }, { "epoch": 7.54054054054054, "grad_norm": 2.0, "learning_rate": 3.756756756756757e-05, "loss": 1.0689, "step": 279 }, { "epoch": 7.5675675675675675, "grad_norm": 1.796875, "learning_rate": 3.77027027027027e-05, "loss": 1.0563, "step": 280 }, { "epoch": 7.594594594594595, "grad_norm": 1.6484375, "learning_rate": 3.783783783783784e-05, "loss": 1.0783, "step": 281 }, { "epoch": 7.621621621621622, "grad_norm": 1.5859375, "learning_rate": 3.797297297297298e-05, "loss": 1.0534, "step": 282 }, { "epoch": 7.648648648648649, "grad_norm": 1.625, "learning_rate": 3.8108108108108106e-05, "loss": 1.0243, "step": 283 }, { "epoch": 7.675675675675675, "grad_norm": 1.8671875, "learning_rate": 3.824324324324324e-05, "loss": 1.0503, "step": 284 }, { "epoch": 7.702702702702703, "grad_norm": 1.625, "learning_rate": 3.8378378378378384e-05, "loss": 1.0538, "step": 285 }, { "epoch": 7.72972972972973, "grad_norm": 1.53125, "learning_rate": 3.851351351351351e-05, "loss": 1.0246, "step": 286 }, { "epoch": 7.756756756756757, "grad_norm": 1.546875, "learning_rate": 3.864864864864865e-05, "loss": 1.0446, "step": 287 }, { "epoch": 7.783783783783784, "grad_norm": 2.734375, "learning_rate": 3.8783783783783784e-05, "loss": 1.0332, "step": 288 }, { "epoch": 7.8108108108108105, "grad_norm": 1.7578125, "learning_rate": 3.8918918918918926e-05, "loss": 1.0331, "step": 289 }, { "epoch": 7.837837837837838, "grad_norm": 1.71875, "learning_rate": 3.9054054054054055e-05, "loss": 1.0613, "step": 290 }, { "epoch": 7.864864864864865, "grad_norm": 1.6328125, "learning_rate": 3.918918918918919e-05, "loss": 1.053, "step": 291 }, { "epoch": 7.891891891891892, "grad_norm": 1.53125, "learning_rate": 3.9324324324324326e-05, "loss": 1.058, "step": 292 }, { "epoch": 7.918918918918919, "grad_norm": 1.546875, "learning_rate": 3.945945945945946e-05, "loss": 1.0576, "step": 293 }, { "epoch": 7.945945945945946, "grad_norm": 1.8671875, "learning_rate": 3.95945945945946e-05, "loss": 1.0612, "step": 294 }, { "epoch": 7.972972972972973, "grad_norm": 1.609375, "learning_rate": 3.972972972972973e-05, "loss": 1.0453, "step": 295 }, { "epoch": 8.0, "grad_norm": 1.59375, "learning_rate": 3.986486486486487e-05, "loss": 1.0585, "step": 296 }, { "epoch": 8.027027027027026, "grad_norm": 1.59375, "learning_rate": 4e-05, "loss": 1.0297, "step": 297 }, { "epoch": 8.054054054054054, "grad_norm": 1.6484375, "learning_rate": 4.013513513513514e-05, "loss": 1.0335, "step": 298 }, { "epoch": 8.08108108108108, "grad_norm": 1.71875, "learning_rate": 4.0270270270270274e-05, "loss": 1.0453, "step": 299 }, { "epoch": 8.108108108108109, "grad_norm": 1.5390625, "learning_rate": 4.04054054054054e-05, "loss": 1.0281, "step": 300 }, { "epoch": 8.135135135135135, "grad_norm": 1.4765625, "learning_rate": 4.0540540540540545e-05, "loss": 1.0049, "step": 301 }, { "epoch": 8.162162162162161, "grad_norm": 2.0625, "learning_rate": 4.067567567567568e-05, "loss": 1.0643, "step": 302 }, { "epoch": 8.18918918918919, "grad_norm": 1.7578125, "learning_rate": 4.081081081081081e-05, "loss": 1.0394, "step": 303 }, { "epoch": 8.216216216216216, "grad_norm": 1.4765625, "learning_rate": 4.0945945945945945e-05, "loss": 1.0255, "step": 304 }, { "epoch": 8.243243243243244, "grad_norm": 1.8828125, "learning_rate": 4.108108108108109e-05, "loss": 1.0213, "step": 305 }, { "epoch": 8.27027027027027, "grad_norm": 1.7734375, "learning_rate": 4.1216216216216216e-05, "loss": 1.0921, "step": 306 }, { "epoch": 8.297297297297296, "grad_norm": 1.9453125, "learning_rate": 4.135135135135135e-05, "loss": 1.0197, "step": 307 }, { "epoch": 8.324324324324325, "grad_norm": 1.46875, "learning_rate": 4.148648648648649e-05, "loss": 1.0164, "step": 308 }, { "epoch": 8.35135135135135, "grad_norm": 1.59375, "learning_rate": 4.162162162162163e-05, "loss": 1.0249, "step": 309 }, { "epoch": 8.378378378378379, "grad_norm": 1.359375, "learning_rate": 4.175675675675676e-05, "loss": 1.0081, "step": 310 }, { "epoch": 8.405405405405405, "grad_norm": 1.6015625, "learning_rate": 4.189189189189189e-05, "loss": 1.013, "step": 311 }, { "epoch": 8.432432432432432, "grad_norm": 1.4453125, "learning_rate": 4.202702702702703e-05, "loss": 0.968, "step": 312 }, { "epoch": 8.45945945945946, "grad_norm": 1.5625, "learning_rate": 4.2162162162162164e-05, "loss": 1.0131, "step": 313 }, { "epoch": 8.486486486486486, "grad_norm": 1.6640625, "learning_rate": 4.22972972972973e-05, "loss": 0.9922, "step": 314 }, { "epoch": 8.513513513513514, "grad_norm": 1.6328125, "learning_rate": 4.2432432432432435e-05, "loss": 1.0275, "step": 315 }, { "epoch": 8.54054054054054, "grad_norm": 1.578125, "learning_rate": 4.256756756756757e-05, "loss": 0.9913, "step": 316 }, { "epoch": 8.567567567567568, "grad_norm": 1.7109375, "learning_rate": 4.2702702702702706e-05, "loss": 1.0152, "step": 317 }, { "epoch": 8.594594594594595, "grad_norm": 1.8046875, "learning_rate": 4.283783783783784e-05, "loss": 0.9974, "step": 318 }, { "epoch": 8.621621621621621, "grad_norm": 1.8671875, "learning_rate": 4.297297297297298e-05, "loss": 1.0099, "step": 319 }, { "epoch": 8.64864864864865, "grad_norm": 1.703125, "learning_rate": 4.3108108108108106e-05, "loss": 1.0132, "step": 320 }, { "epoch": 8.675675675675675, "grad_norm": 1.5859375, "learning_rate": 4.324324324324325e-05, "loss": 0.9794, "step": 321 }, { "epoch": 8.702702702702704, "grad_norm": 1.765625, "learning_rate": 4.3378378378378384e-05, "loss": 1.0059, "step": 322 }, { "epoch": 8.72972972972973, "grad_norm": 1.6328125, "learning_rate": 4.351351351351351e-05, "loss": 1.0268, "step": 323 }, { "epoch": 8.756756756756756, "grad_norm": 1.8671875, "learning_rate": 4.364864864864865e-05, "loss": 1.0062, "step": 324 }, { "epoch": 8.783783783783784, "grad_norm": 1.6875, "learning_rate": 4.378378378378379e-05, "loss": 0.9797, "step": 325 }, { "epoch": 8.81081081081081, "grad_norm": 1.5859375, "learning_rate": 4.391891891891892e-05, "loss": 0.9871, "step": 326 }, { "epoch": 8.837837837837839, "grad_norm": 1.6484375, "learning_rate": 4.4054054054054054e-05, "loss": 0.9794, "step": 327 }, { "epoch": 8.864864864864865, "grad_norm": 1.5546875, "learning_rate": 4.418918918918919e-05, "loss": 1.0002, "step": 328 }, { "epoch": 8.891891891891891, "grad_norm": 1.890625, "learning_rate": 4.432432432432433e-05, "loss": 0.9773, "step": 329 }, { "epoch": 8.91891891891892, "grad_norm": 1.6640625, "learning_rate": 4.445945945945946e-05, "loss": 0.9978, "step": 330 }, { "epoch": 8.945945945945946, "grad_norm": 1.8515625, "learning_rate": 4.4594594594594596e-05, "loss": 0.9799, "step": 331 }, { "epoch": 8.972972972972974, "grad_norm": 1.890625, "learning_rate": 4.472972972972973e-05, "loss": 0.9663, "step": 332 }, { "epoch": 9.0, "grad_norm": 1.828125, "learning_rate": 4.486486486486487e-05, "loss": 0.9814, "step": 333 }, { "epoch": 9.027027027027026, "grad_norm": 1.765625, "learning_rate": 4.5e-05, "loss": 0.998, "step": 334 }, { "epoch": 9.054054054054054, "grad_norm": 1.8125, "learning_rate": 4.513513513513514e-05, "loss": 0.9861, "step": 335 }, { "epoch": 9.08108108108108, "grad_norm": 1.6015625, "learning_rate": 4.5270270270270274e-05, "loss": 0.9739, "step": 336 }, { "epoch": 9.108108108108109, "grad_norm": 1.7578125, "learning_rate": 4.540540540540541e-05, "loss": 1.0046, "step": 337 }, { "epoch": 9.135135135135135, "grad_norm": 1.84375, "learning_rate": 4.5540540540540545e-05, "loss": 0.9903, "step": 338 }, { "epoch": 9.162162162162161, "grad_norm": 1.8671875, "learning_rate": 4.567567567567568e-05, "loss": 0.9996, "step": 339 }, { "epoch": 9.18918918918919, "grad_norm": 1.8515625, "learning_rate": 4.581081081081081e-05, "loss": 0.9803, "step": 340 }, { "epoch": 9.216216216216216, "grad_norm": 1.7578125, "learning_rate": 4.594594594594595e-05, "loss": 0.9874, "step": 341 }, { "epoch": 9.243243243243244, "grad_norm": 1.859375, "learning_rate": 4.608108108108109e-05, "loss": 0.9997, "step": 342 }, { "epoch": 9.27027027027027, "grad_norm": 1.8515625, "learning_rate": 4.6216216216216215e-05, "loss": 0.9775, "step": 343 }, { "epoch": 9.297297297297296, "grad_norm": 1.9140625, "learning_rate": 4.635135135135135e-05, "loss": 0.9674, "step": 344 }, { "epoch": 9.324324324324325, "grad_norm": 1.921875, "learning_rate": 4.648648648648649e-05, "loss": 0.9741, "step": 345 }, { "epoch": 9.35135135135135, "grad_norm": 1.90625, "learning_rate": 4.662162162162162e-05, "loss": 0.9911, "step": 346 }, { "epoch": 9.378378378378379, "grad_norm": 1.6640625, "learning_rate": 4.675675675675676e-05, "loss": 0.9868, "step": 347 }, { "epoch": 9.405405405405405, "grad_norm": 2.15625, "learning_rate": 4.689189189189189e-05, "loss": 0.9843, "step": 348 }, { "epoch": 9.432432432432432, "grad_norm": 2.78125, "learning_rate": 4.7027027027027035e-05, "loss": 1.0033, "step": 349 }, { "epoch": 9.45945945945946, "grad_norm": 1.6796875, "learning_rate": 4.7162162162162164e-05, "loss": 0.9513, "step": 350 }, { "epoch": 9.486486486486486, "grad_norm": 1.9375, "learning_rate": 4.72972972972973e-05, "loss": 0.9867, "step": 351 }, { "epoch": 9.513513513513514, "grad_norm": 2.65625, "learning_rate": 4.7432432432432435e-05, "loss": 0.9623, "step": 352 }, { "epoch": 9.54054054054054, "grad_norm": 2.1875, "learning_rate": 4.756756756756757e-05, "loss": 0.9275, "step": 353 }, { "epoch": 9.567567567567568, "grad_norm": 1.796875, "learning_rate": 4.7702702702702706e-05, "loss": 0.9618, "step": 354 }, { "epoch": 9.594594594594595, "grad_norm": 1.875, "learning_rate": 4.783783783783784e-05, "loss": 0.939, "step": 355 }, { "epoch": 9.621621621621621, "grad_norm": 2.046875, "learning_rate": 4.797297297297298e-05, "loss": 0.989, "step": 356 }, { "epoch": 9.64864864864865, "grad_norm": 1.703125, "learning_rate": 4.810810810810811e-05, "loss": 0.9604, "step": 357 }, { "epoch": 9.675675675675675, "grad_norm": 1.859375, "learning_rate": 4.824324324324325e-05, "loss": 0.975, "step": 358 }, { "epoch": 9.702702702702704, "grad_norm": 1.8125, "learning_rate": 4.837837837837838e-05, "loss": 0.9669, "step": 359 }, { "epoch": 9.72972972972973, "grad_norm": 1.796875, "learning_rate": 4.851351351351351e-05, "loss": 0.952, "step": 360 }, { "epoch": 9.756756756756756, "grad_norm": 1.6796875, "learning_rate": 4.8648648648648654e-05, "loss": 0.9442, "step": 361 }, { "epoch": 9.783783783783784, "grad_norm": 1.7421875, "learning_rate": 4.878378378378379e-05, "loss": 0.9358, "step": 362 }, { "epoch": 9.81081081081081, "grad_norm": 1.8671875, "learning_rate": 4.891891891891892e-05, "loss": 0.9573, "step": 363 }, { "epoch": 9.837837837837839, "grad_norm": 1.671875, "learning_rate": 4.9054054054054054e-05, "loss": 0.8979, "step": 364 }, { "epoch": 9.864864864864865, "grad_norm": 1.609375, "learning_rate": 4.9189189189189196e-05, "loss": 0.9053, "step": 365 }, { "epoch": 9.891891891891891, "grad_norm": 1.6640625, "learning_rate": 4.9324324324324325e-05, "loss": 0.8943, "step": 366 }, { "epoch": 9.91891891891892, "grad_norm": 1.796875, "learning_rate": 4.945945945945946e-05, "loss": 0.9212, "step": 367 }, { "epoch": 9.945945945945946, "grad_norm": 1.8203125, "learning_rate": 4.9594594594594596e-05, "loss": 0.9242, "step": 368 }, { "epoch": 9.972972972972974, "grad_norm": 1.734375, "learning_rate": 4.972972972972974e-05, "loss": 0.8988, "step": 369 }, { "epoch": 10.0, "grad_norm": 1.828125, "learning_rate": 4.986486486486487e-05, "loss": 0.8959, "step": 370 }, { "epoch": 10.027027027027026, "grad_norm": 1.96875, "learning_rate": 5e-05, "loss": 0.9448, "step": 371 }, { "epoch": 10.054054054054054, "grad_norm": 1.8515625, "learning_rate": 4.999999750368664e-05, "loss": 0.9283, "step": 372 }, { "epoch": 10.08108108108108, "grad_norm": 1.8515625, "learning_rate": 4.9999990014747055e-05, "loss": 0.9066, "step": 373 }, { "epoch": 10.108108108108109, "grad_norm": 1.9296875, "learning_rate": 4.999997753318275e-05, "loss": 0.8761, "step": 374 }, { "epoch": 10.135135135135135, "grad_norm": 1.8984375, "learning_rate": 4.9999960058996204e-05, "loss": 0.8906, "step": 375 }, { "epoch": 10.162162162162161, "grad_norm": 1.9140625, "learning_rate": 4.999993759219092e-05, "loss": 0.8897, "step": 376 }, { "epoch": 10.18918918918919, "grad_norm": 2.0, "learning_rate": 4.999991013277137e-05, "loss": 0.9598, "step": 377 }, { "epoch": 10.216216216216216, "grad_norm": 2.078125, "learning_rate": 4.9999877680743045e-05, "loss": 0.9043, "step": 378 }, { "epoch": 10.243243243243244, "grad_norm": 1.8828125, "learning_rate": 4.999984023611243e-05, "loss": 0.9087, "step": 379 }, { "epoch": 10.27027027027027, "grad_norm": 2.09375, "learning_rate": 4.9999797798887005e-05, "loss": 0.8768, "step": 380 }, { "epoch": 10.297297297297296, "grad_norm": 2.234375, "learning_rate": 4.999975036907523e-05, "loss": 0.8501, "step": 381 }, { "epoch": 10.324324324324325, "grad_norm": 2.140625, "learning_rate": 4.999969794668659e-05, "loss": 0.8691, "step": 382 }, { "epoch": 10.35135135135135, "grad_norm": 1.8984375, "learning_rate": 4.999964053173155e-05, "loss": 0.8658, "step": 383 }, { "epoch": 10.378378378378379, "grad_norm": 2.203125, "learning_rate": 4.999957812422158e-05, "loss": 0.864, "step": 384 }, { "epoch": 10.405405405405405, "grad_norm": 2.28125, "learning_rate": 4.999951072416914e-05, "loss": 0.9429, "step": 385 }, { "epoch": 10.432432432432432, "grad_norm": 2.390625, "learning_rate": 4.999943833158769e-05, "loss": 0.8678, "step": 386 }, { "epoch": 10.45945945945946, "grad_norm": 2.140625, "learning_rate": 4.999936094649168e-05, "loss": 0.9104, "step": 387 }, { "epoch": 10.486486486486486, "grad_norm": 2.03125, "learning_rate": 4.999927856889658e-05, "loss": 0.8512, "step": 388 }, { "epoch": 10.513513513513514, "grad_norm": 2.359375, "learning_rate": 4.999919119881882e-05, "loss": 0.918, "step": 389 }, { "epoch": 10.54054054054054, "grad_norm": 2.296875, "learning_rate": 4.999909883627587e-05, "loss": 0.8569, "step": 390 }, { "epoch": 10.567567567567568, "grad_norm": 2.328125, "learning_rate": 4.999900148128617e-05, "loss": 0.808, "step": 391 }, { "epoch": 10.594594594594595, "grad_norm": 2.359375, "learning_rate": 4.999889913386915e-05, "loss": 0.8529, "step": 392 }, { "epoch": 10.621621621621621, "grad_norm": 2.203125, "learning_rate": 4.999879179404526e-05, "loss": 0.8332, "step": 393 }, { "epoch": 10.64864864864865, "grad_norm": 2.203125, "learning_rate": 4.999867946183593e-05, "loss": 0.8341, "step": 394 }, { "epoch": 10.675675675675675, "grad_norm": 2.359375, "learning_rate": 4.99985621372636e-05, "loss": 0.814, "step": 395 }, { "epoch": 10.702702702702704, "grad_norm": 2.125, "learning_rate": 4.99984398203517e-05, "loss": 0.8502, "step": 396 }, { "epoch": 10.72972972972973, "grad_norm": 2.0625, "learning_rate": 4.9998312511124656e-05, "loss": 0.8545, "step": 397 }, { "epoch": 10.756756756756756, "grad_norm": 2.0, "learning_rate": 4.999818020960788e-05, "loss": 0.8129, "step": 398 }, { "epoch": 10.783783783783784, "grad_norm": 2.5, "learning_rate": 4.999804291582781e-05, "loss": 0.8158, "step": 399 }, { "epoch": 10.81081081081081, "grad_norm": 2.28125, "learning_rate": 4.999790062981187e-05, "loss": 0.7923, "step": 400 }, { "epoch": 10.837837837837839, "grad_norm": 2.203125, "learning_rate": 4.999775335158845e-05, "loss": 0.7918, "step": 401 }, { "epoch": 10.864864864864865, "grad_norm": 2.03125, "learning_rate": 4.999760108118698e-05, "loss": 0.8045, "step": 402 }, { "epoch": 10.891891891891891, "grad_norm": 2.375, "learning_rate": 4.999744381863787e-05, "loss": 0.764, "step": 403 }, { "epoch": 10.91891891891892, "grad_norm": 2.328125, "learning_rate": 4.999728156397251e-05, "loss": 0.7852, "step": 404 }, { "epoch": 10.945945945945946, "grad_norm": 2.296875, "learning_rate": 4.9997114317223327e-05, "loss": 0.9009, "step": 405 }, { "epoch": 10.972972972972974, "grad_norm": 2.734375, "learning_rate": 4.9996942078423695e-05, "loss": 0.8263, "step": 406 }, { "epoch": 11.0, "grad_norm": 2.5, "learning_rate": 4.999676484760803e-05, "loss": 0.7598, "step": 407 }, { "epoch": 11.027027027027026, "grad_norm": 2.296875, "learning_rate": 4.9996582624811725e-05, "loss": 0.8072, "step": 408 }, { "epoch": 11.054054054054054, "grad_norm": 2.28125, "learning_rate": 4.999639541007116e-05, "loss": 0.7621, "step": 409 }, { "epoch": 11.08108108108108, "grad_norm": 2.25, "learning_rate": 4.999620320342373e-05, "loss": 0.791, "step": 410 }, { "epoch": 11.108108108108109, "grad_norm": 2.171875, "learning_rate": 4.999600600490783e-05, "loss": 0.8106, "step": 411 }, { "epoch": 11.135135135135135, "grad_norm": 2.515625, "learning_rate": 4.9995803814562815e-05, "loss": 0.799, "step": 412 }, { "epoch": 11.162162162162161, "grad_norm": 2.25, "learning_rate": 4.999559663242909e-05, "loss": 0.7588, "step": 413 }, { "epoch": 11.18918918918919, "grad_norm": 2.140625, "learning_rate": 4.9995384458548e-05, "loss": 0.7503, "step": 414 }, { "epoch": 11.216216216216216, "grad_norm": 2.359375, "learning_rate": 4.999516729296195e-05, "loss": 0.7628, "step": 415 }, { "epoch": 11.243243243243244, "grad_norm": 2.28125, "learning_rate": 4.9994945135714297e-05, "loss": 0.791, "step": 416 }, { "epoch": 11.27027027027027, "grad_norm": 2.34375, "learning_rate": 4.999471798684941e-05, "loss": 0.695, "step": 417 }, { "epoch": 11.297297297297296, "grad_norm": 2.4375, "learning_rate": 4.999448584641263e-05, "loss": 0.7859, "step": 418 }, { "epoch": 11.324324324324325, "grad_norm": 2.21875, "learning_rate": 4.9994248714450344e-05, "loss": 0.7216, "step": 419 }, { "epoch": 11.35135135135135, "grad_norm": 2.953125, "learning_rate": 4.999400659100989e-05, "loss": 0.7626, "step": 420 }, { "epoch": 11.378378378378379, "grad_norm": 2.46875, "learning_rate": 4.999375947613963e-05, "loss": 0.7794, "step": 421 }, { "epoch": 11.405405405405405, "grad_norm": 2.640625, "learning_rate": 4.999350736988893e-05, "loss": 0.788, "step": 422 }, { "epoch": 11.432432432432432, "grad_norm": 2.609375, "learning_rate": 4.99932502723081e-05, "loss": 0.6896, "step": 423 }, { "epoch": 11.45945945945946, "grad_norm": 2.515625, "learning_rate": 4.9992988183448506e-05, "loss": 0.7338, "step": 424 }, { "epoch": 11.486486486486486, "grad_norm": 3.0, "learning_rate": 4.999272110336249e-05, "loss": 0.7873, "step": 425 }, { "epoch": 11.513513513513514, "grad_norm": 2.96875, "learning_rate": 4.9992449032103396e-05, "loss": 0.7575, "step": 426 }, { "epoch": 11.54054054054054, "grad_norm": 2.3125, "learning_rate": 4.9992171969725534e-05, "loss": 0.6989, "step": 427 }, { "epoch": 11.567567567567568, "grad_norm": 2.796875, "learning_rate": 4.999188991628425e-05, "loss": 0.6872, "step": 428 }, { "epoch": 11.594594594594595, "grad_norm": 3.296875, "learning_rate": 4.999160287183588e-05, "loss": 0.8174, "step": 429 }, { "epoch": 11.621621621621621, "grad_norm": 2.65625, "learning_rate": 4.999131083643772e-05, "loss": 0.6843, "step": 430 }, { "epoch": 11.64864864864865, "grad_norm": 2.71875, "learning_rate": 4.9991013810148124e-05, "loss": 0.7414, "step": 431 }, { "epoch": 11.675675675675675, "grad_norm": 2.40625, "learning_rate": 4.999071179302638e-05, "loss": 0.6467, "step": 432 }, { "epoch": 11.702702702702704, "grad_norm": 2.328125, "learning_rate": 4.9990404785132836e-05, "loss": 0.7164, "step": 433 }, { "epoch": 11.72972972972973, "grad_norm": 2.640625, "learning_rate": 4.9990092786528766e-05, "loss": 0.6946, "step": 434 }, { "epoch": 11.756756756756756, "grad_norm": 2.515625, "learning_rate": 4.99897757972765e-05, "loss": 0.7213, "step": 435 }, { "epoch": 11.783783783783784, "grad_norm": 2.28125, "learning_rate": 4.998945381743935e-05, "loss": 0.7095, "step": 436 }, { "epoch": 11.81081081081081, "grad_norm": 2.390625, "learning_rate": 4.99891268470816e-05, "loss": 0.6756, "step": 437 }, { "epoch": 11.837837837837839, "grad_norm": 2.40625, "learning_rate": 4.9988794886268545e-05, "loss": 0.6862, "step": 438 }, { "epoch": 11.864864864864865, "grad_norm": 2.640625, "learning_rate": 4.998845793506649e-05, "loss": 0.7239, "step": 439 }, { "epoch": 11.891891891891891, "grad_norm": 2.453125, "learning_rate": 4.998811599354273e-05, "loss": 0.7146, "step": 440 }, { "epoch": 11.91891891891892, "grad_norm": 2.65625, "learning_rate": 4.998776906176553e-05, "loss": 0.7414, "step": 441 }, { "epoch": 11.945945945945946, "grad_norm": 2.328125, "learning_rate": 4.9987417139804206e-05, "loss": 0.7183, "step": 442 }, { "epoch": 11.972972972972974, "grad_norm": 2.546875, "learning_rate": 4.998706022772901e-05, "loss": 0.6279, "step": 443 }, { "epoch": 12.0, "grad_norm": 2.375, "learning_rate": 4.998669832561124e-05, "loss": 0.6314, "step": 444 }, { "epoch": 12.027027027027026, "grad_norm": 2.75, "learning_rate": 4.9986331433523156e-05, "loss": 0.6794, "step": 445 }, { "epoch": 12.054054054054054, "grad_norm": 2.765625, "learning_rate": 4.998595955153803e-05, "loss": 0.6707, "step": 446 }, { "epoch": 12.08108108108108, "grad_norm": 2.84375, "learning_rate": 4.998558267973014e-05, "loss": 0.7326, "step": 447 }, { "epoch": 12.108108108108109, "grad_norm": 2.40625, "learning_rate": 4.998520081817473e-05, "loss": 0.6263, "step": 448 }, { "epoch": 12.135135135135135, "grad_norm": 2.3125, "learning_rate": 4.998481396694807e-05, "loss": 0.6887, "step": 449 }, { "epoch": 12.162162162162161, "grad_norm": 2.96875, "learning_rate": 4.998442212612743e-05, "loss": 0.7349, "step": 450 }, { "epoch": 12.18918918918919, "grad_norm": 2.40625, "learning_rate": 4.998402529579104e-05, "loss": 0.596, "step": 451 }, { "epoch": 12.216216216216216, "grad_norm": 2.375, "learning_rate": 4.998362347601816e-05, "loss": 0.6144, "step": 452 }, { "epoch": 12.243243243243244, "grad_norm": 2.9375, "learning_rate": 4.998321666688903e-05, "loss": 0.7175, "step": 453 }, { "epoch": 12.27027027027027, "grad_norm": 2.546875, "learning_rate": 4.99828048684849e-05, "loss": 0.6567, "step": 454 }, { "epoch": 12.297297297297296, "grad_norm": 2.75, "learning_rate": 4.998238808088801e-05, "loss": 0.6445, "step": 455 }, { "epoch": 12.324324324324325, "grad_norm": 2.28125, "learning_rate": 4.9981966304181583e-05, "loss": 0.6005, "step": 456 }, { "epoch": 12.35135135135135, "grad_norm": 2.34375, "learning_rate": 4.998153953844985e-05, "loss": 0.579, "step": 457 }, { "epoch": 12.378378378378379, "grad_norm": 2.71875, "learning_rate": 4.9981107783778045e-05, "loss": 0.6929, "step": 458 }, { "epoch": 12.405405405405405, "grad_norm": 2.703125, "learning_rate": 4.9980671040252396e-05, "loss": 0.6815, "step": 459 }, { "epoch": 12.432432432432432, "grad_norm": 2.5625, "learning_rate": 4.998022930796011e-05, "loss": 0.6158, "step": 460 }, { "epoch": 12.45945945945946, "grad_norm": 2.5, "learning_rate": 4.9979782586989424e-05, "loss": 0.6247, "step": 461 }, { "epoch": 12.486486486486486, "grad_norm": 2.390625, "learning_rate": 4.997933087742952e-05, "loss": 0.5931, "step": 462 }, { "epoch": 12.513513513513514, "grad_norm": 2.375, "learning_rate": 4.997887417937063e-05, "loss": 0.6198, "step": 463 }, { "epoch": 12.54054054054054, "grad_norm": 2.8125, "learning_rate": 4.997841249290396e-05, "loss": 0.694, "step": 464 }, { "epoch": 12.567567567567568, "grad_norm": 2.703125, "learning_rate": 4.997794581812169e-05, "loss": 0.6291, "step": 465 }, { "epoch": 12.594594594594595, "grad_norm": 2.21875, "learning_rate": 4.9977474155117045e-05, "loss": 0.5118, "step": 466 }, { "epoch": 12.621621621621621, "grad_norm": 2.640625, "learning_rate": 4.997699750398419e-05, "loss": 0.6551, "step": 467 }, { "epoch": 12.64864864864865, "grad_norm": 3.015625, "learning_rate": 4.9976515864818335e-05, "loss": 0.5999, "step": 468 }, { "epoch": 12.675675675675675, "grad_norm": 2.640625, "learning_rate": 4.997602923771566e-05, "loss": 0.5785, "step": 469 }, { "epoch": 12.702702702702704, "grad_norm": 2.6875, "learning_rate": 4.997553762277335e-05, "loss": 0.6246, "step": 470 }, { "epoch": 12.72972972972973, "grad_norm": 2.390625, "learning_rate": 4.9975041020089576e-05, "loss": 0.5175, "step": 471 }, { "epoch": 12.756756756756756, "grad_norm": 2.421875, "learning_rate": 4.997453942976351e-05, "loss": 0.599, "step": 472 }, { "epoch": 12.783783783783784, "grad_norm": 2.515625, "learning_rate": 4.997403285189534e-05, "loss": 0.5586, "step": 473 }, { "epoch": 12.81081081081081, "grad_norm": 2.546875, "learning_rate": 4.9973521286586214e-05, "loss": 0.5932, "step": 474 }, { "epoch": 12.837837837837839, "grad_norm": 2.609375, "learning_rate": 4.99730047339383e-05, "loss": 0.5801, "step": 475 }, { "epoch": 12.864864864864865, "grad_norm": 2.734375, "learning_rate": 4.997248319405475e-05, "loss": 0.6521, "step": 476 }, { "epoch": 12.891891891891891, "grad_norm": 2.921875, "learning_rate": 4.9971956667039735e-05, "loss": 0.6063, "step": 477 }, { "epoch": 12.91891891891892, "grad_norm": 2.859375, "learning_rate": 4.99714251529984e-05, "loss": 0.5588, "step": 478 }, { "epoch": 12.945945945945946, "grad_norm": 2.953125, "learning_rate": 4.9970888652036876e-05, "loss": 0.5682, "step": 479 }, { "epoch": 12.972972972972974, "grad_norm": 4.125, "learning_rate": 4.997034716426231e-05, "loss": 0.6206, "step": 480 }, { "epoch": 13.0, "grad_norm": 3.03125, "learning_rate": 4.996980068978285e-05, "loss": 0.5878, "step": 481 }, { "epoch": 13.027027027027026, "grad_norm": 3.234375, "learning_rate": 4.996924922870762e-05, "loss": 0.6787, "step": 482 }, { "epoch": 13.054054054054054, "grad_norm": 3.671875, "learning_rate": 4.996869278114676e-05, "loss": 0.7008, "step": 483 }, { "epoch": 13.08108108108108, "grad_norm": 2.984375, "learning_rate": 4.996813134721139e-05, "loss": 0.6262, "step": 484 }, { "epoch": 13.108108108108109, "grad_norm": 2.59375, "learning_rate": 4.996756492701362e-05, "loss": 0.5999, "step": 485 }, { "epoch": 13.135135135135135, "grad_norm": 3.0625, "learning_rate": 4.996699352066659e-05, "loss": 0.5662, "step": 486 }, { "epoch": 13.162162162162161, "grad_norm": 3.0625, "learning_rate": 4.996641712828439e-05, "loss": 0.4889, "step": 487 }, { "epoch": 13.18918918918919, "grad_norm": 2.5, "learning_rate": 4.9965835749982146e-05, "loss": 0.5813, "step": 488 }, { "epoch": 13.216216216216216, "grad_norm": 2.890625, "learning_rate": 4.996524938587596e-05, "loss": 0.5447, "step": 489 }, { "epoch": 13.243243243243244, "grad_norm": 2.703125, "learning_rate": 4.996465803608291e-05, "loss": 0.5396, "step": 490 }, { "epoch": 13.27027027027027, "grad_norm": 2.34375, "learning_rate": 4.996406170072112e-05, "loss": 0.563, "step": 491 }, { "epoch": 13.297297297297296, "grad_norm": 2.578125, "learning_rate": 4.996346037990967e-05, "loss": 0.532, "step": 492 }, { "epoch": 13.324324324324325, "grad_norm": 2.34375, "learning_rate": 4.996285407376864e-05, "loss": 0.4641, "step": 493 }, { "epoch": 13.35135135135135, "grad_norm": 2.53125, "learning_rate": 4.996224278241912e-05, "loss": 0.4638, "step": 494 }, { "epoch": 13.378378378378379, "grad_norm": 2.484375, "learning_rate": 4.9961626505983185e-05, "loss": 0.5486, "step": 495 }, { "epoch": 13.405405405405405, "grad_norm": 2.609375, "learning_rate": 4.996100524458391e-05, "loss": 0.5837, "step": 496 }, { "epoch": 13.432432432432432, "grad_norm": 2.71875, "learning_rate": 4.996037899834537e-05, "loss": 0.4814, "step": 497 }, { "epoch": 13.45945945945946, "grad_norm": 2.484375, "learning_rate": 4.995974776739262e-05, "loss": 0.5283, "step": 498 }, { "epoch": 13.486486486486486, "grad_norm": 2.4375, "learning_rate": 4.995911155185173e-05, "loss": 0.4971, "step": 499 }, { "epoch": 13.513513513513514, "grad_norm": 3.1875, "learning_rate": 4.995847035184974e-05, "loss": 0.5576, "step": 500 }, { "epoch": 13.54054054054054, "grad_norm": 2.65625, "learning_rate": 4.9957824167514715e-05, "loss": 0.5104, "step": 501 }, { "epoch": 13.567567567567568, "grad_norm": 3.015625, "learning_rate": 4.9957172998975696e-05, "loss": 0.5232, "step": 502 }, { "epoch": 13.594594594594595, "grad_norm": 3.84375, "learning_rate": 4.9956516846362726e-05, "loss": 0.5093, "step": 503 }, { "epoch": 13.621621621621621, "grad_norm": 2.71875, "learning_rate": 4.9955855709806845e-05, "loss": 0.541, "step": 504 }, { "epoch": 13.64864864864865, "grad_norm": 3.171875, "learning_rate": 4.995518958944007e-05, "loss": 0.4893, "step": 505 }, { "epoch": 13.675675675675675, "grad_norm": 3.4375, "learning_rate": 4.995451848539545e-05, "loss": 0.4869, "step": 506 }, { "epoch": 13.702702702702704, "grad_norm": 2.5625, "learning_rate": 4.9953842397806995e-05, "loss": 0.6615, "step": 507 }, { "epoch": 13.72972972972973, "grad_norm": 2.34375, "learning_rate": 4.995316132680972e-05, "loss": 0.5109, "step": 508 }, { "epoch": 13.756756756756756, "grad_norm": 2.984375, "learning_rate": 4.995247527253966e-05, "loss": 0.5369, "step": 509 }, { "epoch": 13.783783783783784, "grad_norm": 3.09375, "learning_rate": 4.995178423513379e-05, "loss": 0.6808, "step": 510 }, { "epoch": 13.81081081081081, "grad_norm": 2.453125, "learning_rate": 4.9951088214730134e-05, "loss": 0.4509, "step": 511 }, { "epoch": 13.837837837837839, "grad_norm": 2.875, "learning_rate": 4.99503872114677e-05, "loss": 0.484, "step": 512 }, { "epoch": 13.864864864864865, "grad_norm": 2.578125, "learning_rate": 4.994968122548646e-05, "loss": 0.4374, "step": 513 }, { "epoch": 13.891891891891891, "grad_norm": 3.09375, "learning_rate": 4.994897025692742e-05, "loss": 0.5476, "step": 514 }, { "epoch": 13.91891891891892, "grad_norm": 2.828125, "learning_rate": 4.994825430593255e-05, "loss": 0.4805, "step": 515 }, { "epoch": 13.945945945945946, "grad_norm": 2.53125, "learning_rate": 4.994753337264484e-05, "loss": 0.5423, "step": 516 }, { "epoch": 13.972972972972974, "grad_norm": 2.421875, "learning_rate": 4.9946807457208266e-05, "loss": 0.427, "step": 517 }, { "epoch": 14.0, "grad_norm": 3.015625, "learning_rate": 4.994607655976778e-05, "loss": 0.5792, "step": 518 }, { "epoch": 14.027027027027026, "grad_norm": 2.734375, "learning_rate": 4.994534068046937e-05, "loss": 0.4971, "step": 519 }, { "epoch": 14.054054054054054, "grad_norm": 2.34375, "learning_rate": 4.994459981945997e-05, "loss": 0.4884, "step": 520 }, { "epoch": 14.08108108108108, "grad_norm": 2.671875, "learning_rate": 4.9943853976887556e-05, "loss": 0.5379, "step": 521 }, { "epoch": 14.108108108108109, "grad_norm": 2.203125, "learning_rate": 4.994310315290106e-05, "loss": 0.4307, "step": 522 }, { "epoch": 14.135135135135135, "grad_norm": 2.4375, "learning_rate": 4.994234734765043e-05, "loss": 0.4419, "step": 523 }, { "epoch": 14.162162162162161, "grad_norm": 2.53125, "learning_rate": 4.994158656128661e-05, "loss": 0.534, "step": 524 }, { "epoch": 14.18918918918919, "grad_norm": 2.375, "learning_rate": 4.994082079396153e-05, "loss": 0.4291, "step": 525 }, { "epoch": 14.216216216216216, "grad_norm": 2.765625, "learning_rate": 4.994005004582811e-05, "loss": 0.5364, "step": 526 }, { "epoch": 14.243243243243244, "grad_norm": 2.578125, "learning_rate": 4.9939274317040285e-05, "loss": 0.5226, "step": 527 }, { "epoch": 14.27027027027027, "grad_norm": 2.71875, "learning_rate": 4.993849360775296e-05, "loss": 0.5834, "step": 528 }, { "epoch": 14.297297297297296, "grad_norm": 2.359375, "learning_rate": 4.993770791812206e-05, "loss": 0.4946, "step": 529 }, { "epoch": 14.324324324324325, "grad_norm": 2.578125, "learning_rate": 4.9936917248304476e-05, "loss": 0.5177, "step": 530 }, { "epoch": 14.35135135135135, "grad_norm": 2.546875, "learning_rate": 4.993612159845812e-05, "loss": 0.5425, "step": 531 }, { "epoch": 14.378378378378379, "grad_norm": 2.6875, "learning_rate": 4.993532096874188e-05, "loss": 0.6099, "step": 532 }, { "epoch": 14.405405405405405, "grad_norm": 2.75, "learning_rate": 4.9934515359315654e-05, "loss": 0.5142, "step": 533 }, { "epoch": 14.432432432432432, "grad_norm": 2.84375, "learning_rate": 4.993370477034032e-05, "loss": 0.494, "step": 534 }, { "epoch": 14.45945945945946, "grad_norm": 2.359375, "learning_rate": 4.993288920197776e-05, "loss": 0.5054, "step": 535 }, { "epoch": 14.486486486486486, "grad_norm": 2.703125, "learning_rate": 4.993206865439084e-05, "loss": 0.5392, "step": 536 }, { "epoch": 14.513513513513514, "grad_norm": 2.640625, "learning_rate": 4.993124312774343e-05, "loss": 0.4952, "step": 537 }, { "epoch": 14.54054054054054, "grad_norm": 2.40625, "learning_rate": 4.993041262220041e-05, "loss": 0.495, "step": 538 }, { "epoch": 14.567567567567568, "grad_norm": 2.765625, "learning_rate": 4.992957713792761e-05, "loss": 0.5812, "step": 539 }, { "epoch": 14.594594594594595, "grad_norm": 2.625, "learning_rate": 4.992873667509189e-05, "loss": 0.4997, "step": 540 }, { "epoch": 14.621621621621621, "grad_norm": 3.203125, "learning_rate": 4.99278912338611e-05, "loss": 0.5438, "step": 541 }, { "epoch": 14.64864864864865, "grad_norm": 2.921875, "learning_rate": 4.992704081440407e-05, "loss": 0.433, "step": 542 }, { "epoch": 14.675675675675675, "grad_norm": 2.625, "learning_rate": 4.9926185416890645e-05, "loss": 0.5272, "step": 543 }, { "epoch": 14.702702702702704, "grad_norm": 2.84375, "learning_rate": 4.992532504149164e-05, "loss": 0.5372, "step": 544 }, { "epoch": 14.72972972972973, "grad_norm": 2.921875, "learning_rate": 4.9924459688378886e-05, "loss": 0.4379, "step": 545 }, { "epoch": 14.756756756756756, "grad_norm": 2.59375, "learning_rate": 4.992358935772519e-05, "loss": 0.4094, "step": 546 }, { "epoch": 14.783783783783784, "grad_norm": 2.546875, "learning_rate": 4.992271404970437e-05, "loss": 0.4726, "step": 547 }, { "epoch": 14.81081081081081, "grad_norm": 3.0625, "learning_rate": 4.9921833764491224e-05, "loss": 0.47, "step": 548 }, { "epoch": 14.837837837837839, "grad_norm": 2.34375, "learning_rate": 4.9920948502261545e-05, "loss": 0.4386, "step": 549 }, { "epoch": 14.864864864864865, "grad_norm": 2.46875, "learning_rate": 4.992005826319214e-05, "loss": 0.4763, "step": 550 }, { "epoch": 14.891891891891891, "grad_norm": 2.46875, "learning_rate": 4.9919163047460774e-05, "loss": 0.5274, "step": 551 }, { "epoch": 14.91891891891892, "grad_norm": 2.109375, "learning_rate": 4.991826285524624e-05, "loss": 0.4313, "step": 552 }, { "epoch": 14.945945945945946, "grad_norm": 2.671875, "learning_rate": 4.99173576867283e-05, "loss": 0.544, "step": 553 }, { "epoch": 14.972972972972974, "grad_norm": 2.8125, "learning_rate": 4.991644754208774e-05, "loss": 0.5743, "step": 554 }, { "epoch": 15.0, "grad_norm": 2.609375, "learning_rate": 4.99155324215063e-05, "loss": 0.5516, "step": 555 }, { "epoch": 15.027027027027026, "grad_norm": 2.328125, "learning_rate": 4.991461232516675e-05, "loss": 0.476, "step": 556 }, { "epoch": 15.054054054054054, "grad_norm": 2.1875, "learning_rate": 4.991368725325283e-05, "loss": 0.4423, "step": 557 }, { "epoch": 15.08108108108108, "grad_norm": 2.640625, "learning_rate": 4.991275720594927e-05, "loss": 0.3985, "step": 558 }, { "epoch": 15.108108108108109, "grad_norm": 2.9375, "learning_rate": 4.991182218344182e-05, "loss": 0.4412, "step": 559 }, { "epoch": 15.135135135135135, "grad_norm": 2.21875, "learning_rate": 4.991088218591722e-05, "loss": 0.3666, "step": 560 }, { "epoch": 15.162162162162161, "grad_norm": 2.5625, "learning_rate": 4.9909937213563165e-05, "loss": 0.4159, "step": 561 }, { "epoch": 15.18918918918919, "grad_norm": 2.6875, "learning_rate": 4.990898726656839e-05, "loss": 0.4878, "step": 562 }, { "epoch": 15.216216216216216, "grad_norm": 3.015625, "learning_rate": 4.990803234512259e-05, "loss": 0.4672, "step": 563 }, { "epoch": 15.243243243243244, "grad_norm": 2.6875, "learning_rate": 4.990707244941648e-05, "loss": 0.4995, "step": 564 }, { "epoch": 15.27027027027027, "grad_norm": 2.375, "learning_rate": 4.990610757964176e-05, "loss": 0.4609, "step": 565 }, { "epoch": 15.297297297297296, "grad_norm": 2.578125, "learning_rate": 4.99051377359911e-05, "loss": 0.5299, "step": 566 }, { "epoch": 15.324324324324325, "grad_norm": 2.421875, "learning_rate": 4.99041629186582e-05, "loss": 0.4116, "step": 567 }, { "epoch": 15.35135135135135, "grad_norm": 2.15625, "learning_rate": 4.990318312783773e-05, "loss": 0.4453, "step": 568 }, { "epoch": 15.378378378378379, "grad_norm": 2.125, "learning_rate": 4.990219836372535e-05, "loss": 0.4707, "step": 569 }, { "epoch": 15.405405405405405, "grad_norm": 2.8125, "learning_rate": 4.990120862651774e-05, "loss": 0.5379, "step": 570 }, { "epoch": 15.432432432432432, "grad_norm": 2.546875, "learning_rate": 4.9900213916412544e-05, "loss": 0.4709, "step": 571 }, { "epoch": 15.45945945945946, "grad_norm": 2.390625, "learning_rate": 4.989921423360842e-05, "loss": 0.4623, "step": 572 }, { "epoch": 15.486486486486486, "grad_norm": 2.1875, "learning_rate": 4.9898209578304996e-05, "loss": 0.4343, "step": 573 }, { "epoch": 15.513513513513514, "grad_norm": 2.609375, "learning_rate": 4.9897199950702913e-05, "loss": 0.539, "step": 574 }, { "epoch": 15.54054054054054, "grad_norm": 2.265625, "learning_rate": 4.98961853510038e-05, "loss": 0.5219, "step": 575 }, { "epoch": 15.567567567567568, "grad_norm": 2.046875, "learning_rate": 4.989516577941028e-05, "loss": 0.4086, "step": 576 }, { "epoch": 15.594594594594595, "grad_norm": 2.515625, "learning_rate": 4.989414123612597e-05, "loss": 0.3584, "step": 577 }, { "epoch": 15.621621621621621, "grad_norm": 2.515625, "learning_rate": 4.9893111721355466e-05, "loss": 0.4936, "step": 578 }, { "epoch": 15.64864864864865, "grad_norm": 2.125, "learning_rate": 4.989207723530437e-05, "loss": 0.467, "step": 579 }, { "epoch": 15.675675675675675, "grad_norm": 2.515625, "learning_rate": 4.989103777817928e-05, "loss": 0.3794, "step": 580 }, { "epoch": 15.702702702702704, "grad_norm": 2.34375, "learning_rate": 4.988999335018777e-05, "loss": 0.4334, "step": 581 }, { "epoch": 15.72972972972973, "grad_norm": 2.078125, "learning_rate": 4.988894395153843e-05, "loss": 0.376, "step": 582 }, { "epoch": 15.756756756756756, "grad_norm": 3.015625, "learning_rate": 4.988788958244083e-05, "loss": 0.4672, "step": 583 }, { "epoch": 15.783783783783784, "grad_norm": 2.546875, "learning_rate": 4.988683024310552e-05, "loss": 0.4843, "step": 584 }, { "epoch": 15.81081081081081, "grad_norm": 2.328125, "learning_rate": 4.9885765933744056e-05, "loss": 0.4517, "step": 585 }, { "epoch": 15.837837837837839, "grad_norm": 2.296875, "learning_rate": 4.9884696654569006e-05, "loss": 0.4391, "step": 586 }, { "epoch": 15.864864864864865, "grad_norm": 2.1875, "learning_rate": 4.9883622405793886e-05, "loss": 0.3442, "step": 587 }, { "epoch": 15.891891891891891, "grad_norm": 2.28125, "learning_rate": 4.9882543187633254e-05, "loss": 0.3356, "step": 588 }, { "epoch": 15.91891891891892, "grad_norm": 2.0625, "learning_rate": 4.988145900030261e-05, "loss": 0.3815, "step": 589 }, { "epoch": 15.945945945945946, "grad_norm": 2.578125, "learning_rate": 4.9880369844018485e-05, "loss": 0.417, "step": 590 }, { "epoch": 15.972972972972974, "grad_norm": 2.546875, "learning_rate": 4.9879275718998395e-05, "loss": 0.4451, "step": 591 }, { "epoch": 16.0, "grad_norm": 2.3125, "learning_rate": 4.9878176625460825e-05, "loss": 0.4389, "step": 592 }, { "epoch": 16.027027027027028, "grad_norm": 2.46875, "learning_rate": 4.9877072563625285e-05, "loss": 0.4576, "step": 593 }, { "epoch": 16.054054054054053, "grad_norm": 2.515625, "learning_rate": 4.9875963533712255e-05, "loss": 0.3795, "step": 594 }, { "epoch": 16.08108108108108, "grad_norm": 2.40625, "learning_rate": 4.987484953594322e-05, "loss": 0.3966, "step": 595 }, { "epoch": 16.10810810810811, "grad_norm": 3.09375, "learning_rate": 4.9873730570540636e-05, "loss": 0.467, "step": 596 }, { "epoch": 16.135135135135137, "grad_norm": 2.109375, "learning_rate": 4.987260663772798e-05, "loss": 0.4093, "step": 597 }, { "epoch": 16.16216216216216, "grad_norm": 2.453125, "learning_rate": 4.987147773772971e-05, "loss": 0.3498, "step": 598 }, { "epoch": 16.18918918918919, "grad_norm": 2.828125, "learning_rate": 4.9870343870771256e-05, "loss": 0.5189, "step": 599 }, { "epoch": 16.216216216216218, "grad_norm": 2.25, "learning_rate": 4.986920503707908e-05, "loss": 0.5227, "step": 600 }, { "epoch": 16.243243243243242, "grad_norm": 2.65625, "learning_rate": 4.986806123688059e-05, "loss": 0.5061, "step": 601 }, { "epoch": 16.27027027027027, "grad_norm": 2.3125, "learning_rate": 4.9866912470404214e-05, "loss": 0.5086, "step": 602 }, { "epoch": 16.2972972972973, "grad_norm": 2.234375, "learning_rate": 4.9865758737879374e-05, "loss": 0.4687, "step": 603 }, { "epoch": 16.324324324324323, "grad_norm": 2.421875, "learning_rate": 4.986460003953648e-05, "loss": 0.3851, "step": 604 }, { "epoch": 16.35135135135135, "grad_norm": 2.109375, "learning_rate": 4.986343637560692e-05, "loss": 0.4418, "step": 605 }, { "epoch": 16.37837837837838, "grad_norm": 1.7890625, "learning_rate": 4.986226774632309e-05, "loss": 0.2922, "step": 606 }, { "epoch": 16.405405405405407, "grad_norm": 2.25, "learning_rate": 4.986109415191836e-05, "loss": 0.4462, "step": 607 }, { "epoch": 16.43243243243243, "grad_norm": 2.53125, "learning_rate": 4.985991559262712e-05, "loss": 0.4541, "step": 608 }, { "epoch": 16.45945945945946, "grad_norm": 2.375, "learning_rate": 4.985873206868472e-05, "loss": 0.4369, "step": 609 }, { "epoch": 16.486486486486488, "grad_norm": 2.5, "learning_rate": 4.985754358032752e-05, "loss": 0.3973, "step": 610 }, { "epoch": 16.513513513513512, "grad_norm": 2.796875, "learning_rate": 4.985635012779287e-05, "loss": 0.5209, "step": 611 }, { "epoch": 16.54054054054054, "grad_norm": 2.71875, "learning_rate": 4.98551517113191e-05, "loss": 0.4111, "step": 612 }, { "epoch": 16.56756756756757, "grad_norm": 2.15625, "learning_rate": 4.9853948331145564e-05, "loss": 0.3757, "step": 613 }, { "epoch": 16.594594594594593, "grad_norm": 2.4375, "learning_rate": 4.985273998751255e-05, "loss": 0.3417, "step": 614 }, { "epoch": 16.62162162162162, "grad_norm": 2.390625, "learning_rate": 4.9851526680661385e-05, "loss": 0.396, "step": 615 }, { "epoch": 16.64864864864865, "grad_norm": 2.4375, "learning_rate": 4.9850308410834386e-05, "loss": 0.5002, "step": 616 }, { "epoch": 16.675675675675677, "grad_norm": 2.578125, "learning_rate": 4.9849085178274824e-05, "loss": 0.3958, "step": 617 }, { "epoch": 16.7027027027027, "grad_norm": 2.265625, "learning_rate": 4.9847856983226996e-05, "loss": 0.3985, "step": 618 }, { "epoch": 16.72972972972973, "grad_norm": 2.53125, "learning_rate": 4.984662382593618e-05, "loss": 0.3239, "step": 619 }, { "epoch": 16.756756756756758, "grad_norm": 2.765625, "learning_rate": 4.984538570664865e-05, "loss": 0.4882, "step": 620 }, { "epoch": 16.783783783783782, "grad_norm": 3.328125, "learning_rate": 4.984414262561165e-05, "loss": 0.4421, "step": 621 }, { "epoch": 16.81081081081081, "grad_norm": 1.921875, "learning_rate": 4.984289458307344e-05, "loss": 0.3188, "step": 622 }, { "epoch": 16.83783783783784, "grad_norm": 2.328125, "learning_rate": 4.984164157928325e-05, "loss": 0.3762, "step": 623 }, { "epoch": 16.864864864864863, "grad_norm": 2.25, "learning_rate": 4.9840383614491325e-05, "loss": 0.3207, "step": 624 }, { "epoch": 16.89189189189189, "grad_norm": 2.5625, "learning_rate": 4.9839120688948874e-05, "loss": 0.4701, "step": 625 }, { "epoch": 16.91891891891892, "grad_norm": 2.546875, "learning_rate": 4.983785280290812e-05, "loss": 0.4204, "step": 626 }, { "epoch": 16.945945945945947, "grad_norm": 2.484375, "learning_rate": 4.9836579956622266e-05, "loss": 0.4252, "step": 627 }, { "epoch": 16.972972972972972, "grad_norm": 2.921875, "learning_rate": 4.9835302150345495e-05, "loss": 0.423, "step": 628 }, { "epoch": 17.0, "grad_norm": 2.34375, "learning_rate": 4.9834019384333003e-05, "loss": 0.3625, "step": 629 }, { "epoch": 17.027027027027028, "grad_norm": 2.5625, "learning_rate": 4.9832731658840956e-05, "loss": 0.4059, "step": 630 }, { "epoch": 17.054054054054053, "grad_norm": 2.171875, "learning_rate": 4.9831438974126523e-05, "loss": 0.3717, "step": 631 }, { "epoch": 17.08108108108108, "grad_norm": 2.5625, "learning_rate": 4.983014133044786e-05, "loss": 0.4029, "step": 632 }, { "epoch": 17.10810810810811, "grad_norm": 2.703125, "learning_rate": 4.982883872806412e-05, "loss": 0.3254, "step": 633 }, { "epoch": 17.135135135135137, "grad_norm": 1.953125, "learning_rate": 4.982753116723543e-05, "loss": 0.283, "step": 634 }, { "epoch": 17.16216216216216, "grad_norm": 2.4375, "learning_rate": 4.9826218648222913e-05, "loss": 0.3615, "step": 635 }, { "epoch": 17.18918918918919, "grad_norm": 2.5, "learning_rate": 4.9824901171288704e-05, "loss": 0.4098, "step": 636 }, { "epoch": 17.216216216216218, "grad_norm": 2.546875, "learning_rate": 4.982357873669589e-05, "loss": 0.4567, "step": 637 }, { "epoch": 17.243243243243242, "grad_norm": 2.25, "learning_rate": 4.982225134470857e-05, "loss": 0.3728, "step": 638 }, { "epoch": 17.27027027027027, "grad_norm": 2.203125, "learning_rate": 4.9820918995591845e-05, "loss": 0.4441, "step": 639 }, { "epoch": 17.2972972972973, "grad_norm": 2.03125, "learning_rate": 4.981958168961178e-05, "loss": 0.3282, "step": 640 }, { "epoch": 17.324324324324323, "grad_norm": 2.1875, "learning_rate": 4.981823942703544e-05, "loss": 0.4241, "step": 641 }, { "epoch": 17.35135135135135, "grad_norm": 2.265625, "learning_rate": 4.9816892208130895e-05, "loss": 0.4096, "step": 642 }, { "epoch": 17.37837837837838, "grad_norm": 2.625, "learning_rate": 4.9815540033167175e-05, "loss": 0.4586, "step": 643 }, { "epoch": 17.405405405405407, "grad_norm": 2.0625, "learning_rate": 4.981418290241433e-05, "loss": 0.3375, "step": 644 }, { "epoch": 17.43243243243243, "grad_norm": 2.15625, "learning_rate": 4.981282081614338e-05, "loss": 0.3051, "step": 645 }, { "epoch": 17.45945945945946, "grad_norm": 2.296875, "learning_rate": 4.9811453774626335e-05, "loss": 0.4253, "step": 646 }, { "epoch": 17.486486486486488, "grad_norm": 2.0625, "learning_rate": 4.9810081778136216e-05, "loss": 0.3864, "step": 647 }, { "epoch": 17.513513513513512, "grad_norm": 2.078125, "learning_rate": 4.9808704826947e-05, "loss": 0.3483, "step": 648 }, { "epoch": 17.54054054054054, "grad_norm": 2.359375, "learning_rate": 4.980732292133368e-05, "loss": 0.4227, "step": 649 }, { "epoch": 17.56756756756757, "grad_norm": 1.9375, "learning_rate": 4.9805936061572237e-05, "loss": 0.3204, "step": 650 }, { "epoch": 17.594594594594593, "grad_norm": 1.7734375, "learning_rate": 4.980454424793961e-05, "loss": 0.2635, "step": 651 }, { "epoch": 17.62162162162162, "grad_norm": 2.140625, "learning_rate": 4.9803147480713776e-05, "loss": 0.3642, "step": 652 }, { "epoch": 17.64864864864865, "grad_norm": 2.578125, "learning_rate": 4.980174576017366e-05, "loss": 0.4992, "step": 653 }, { "epoch": 17.675675675675677, "grad_norm": 2.3125, "learning_rate": 4.9800339086599203e-05, "loss": 0.3856, "step": 654 }, { "epoch": 17.7027027027027, "grad_norm": 2.09375, "learning_rate": 4.979892746027132e-05, "loss": 0.3277, "step": 655 }, { "epoch": 17.72972972972973, "grad_norm": 2.765625, "learning_rate": 4.979751088147192e-05, "loss": 0.3829, "step": 656 }, { "epoch": 17.756756756756758, "grad_norm": 2.53125, "learning_rate": 4.979608935048391e-05, "loss": 0.3661, "step": 657 }, { "epoch": 17.783783783783782, "grad_norm": 2.3125, "learning_rate": 4.9794662867591164e-05, "loss": 0.3506, "step": 658 }, { "epoch": 17.81081081081081, "grad_norm": 3.6875, "learning_rate": 4.979323143307856e-05, "loss": 0.4308, "step": 659 }, { "epoch": 17.83783783783784, "grad_norm": 2.46875, "learning_rate": 4.979179504723197e-05, "loss": 0.415, "step": 660 }, { "epoch": 17.864864864864863, "grad_norm": 2.515625, "learning_rate": 4.979035371033824e-05, "loss": 0.3835, "step": 661 }, { "epoch": 17.89189189189189, "grad_norm": 2.1875, "learning_rate": 4.978890742268523e-05, "loss": 0.276, "step": 662 }, { "epoch": 17.91891891891892, "grad_norm": 2.03125, "learning_rate": 4.978745618456174e-05, "loss": 0.2715, "step": 663 }, { "epoch": 17.945945945945947, "grad_norm": 1.875, "learning_rate": 4.978599999625761e-05, "loss": 0.3396, "step": 664 }, { "epoch": 17.972972972972972, "grad_norm": 2.046875, "learning_rate": 4.978453885806364e-05, "loss": 0.3074, "step": 665 }, { "epoch": 18.0, "grad_norm": 1.828125, "learning_rate": 4.978307277027164e-05, "loss": 0.2662, "step": 666 }, { "epoch": 18.027027027027028, "grad_norm": 2.125, "learning_rate": 4.978160173317438e-05, "loss": 0.2698, "step": 667 }, { "epoch": 18.054054054054053, "grad_norm": 2.3125, "learning_rate": 4.978012574706564e-05, "loss": 0.3512, "step": 668 }, { "epoch": 18.08108108108108, "grad_norm": 2.25, "learning_rate": 4.9778644812240186e-05, "loss": 0.3401, "step": 669 }, { "epoch": 18.10810810810811, "grad_norm": 2.25, "learning_rate": 4.9777158928993766e-05, "loss": 0.3939, "step": 670 }, { "epoch": 18.135135135135137, "grad_norm": 2.671875, "learning_rate": 4.977566809762311e-05, "loss": 0.4814, "step": 671 }, { "epoch": 18.16216216216216, "grad_norm": 2.453125, "learning_rate": 4.9774172318425955e-05, "loss": 0.3555, "step": 672 }, { "epoch": 18.18918918918919, "grad_norm": 1.96875, "learning_rate": 4.977267159170101e-05, "loss": 0.2894, "step": 673 }, { "epoch": 18.216216216216218, "grad_norm": 2.46875, "learning_rate": 4.9771165917747974e-05, "loss": 0.3816, "step": 674 }, { "epoch": 18.243243243243242, "grad_norm": 2.046875, "learning_rate": 4.9769655296867554e-05, "loss": 0.3652, "step": 675 }, { "epoch": 18.27027027027027, "grad_norm": 2.25, "learning_rate": 4.9768139729361404e-05, "loss": 0.3557, "step": 676 }, { "epoch": 18.2972972972973, "grad_norm": 2.375, "learning_rate": 4.976661921553222e-05, "loss": 0.3763, "step": 677 }, { "epoch": 18.324324324324323, "grad_norm": 2.28125, "learning_rate": 4.976509375568363e-05, "loss": 0.4034, "step": 678 }, { "epoch": 18.35135135135135, "grad_norm": 2.40625, "learning_rate": 4.976356335012029e-05, "loss": 0.4206, "step": 679 }, { "epoch": 18.37837837837838, "grad_norm": 2.265625, "learning_rate": 4.976202799914782e-05, "loss": 0.3706, "step": 680 }, { "epoch": 18.405405405405407, "grad_norm": 2.359375, "learning_rate": 4.976048770307286e-05, "loss": 0.3636, "step": 681 }, { "epoch": 18.43243243243243, "grad_norm": 2.3125, "learning_rate": 4.9758942462202986e-05, "loss": 0.3146, "step": 682 }, { "epoch": 18.45945945945946, "grad_norm": 1.9140625, "learning_rate": 4.975739227684681e-05, "loss": 0.3023, "step": 683 }, { "epoch": 18.486486486486488, "grad_norm": 2.140625, "learning_rate": 4.975583714731389e-05, "loss": 0.3278, "step": 684 }, { "epoch": 18.513513513513512, "grad_norm": 2.359375, "learning_rate": 4.975427707391482e-05, "loss": 0.3716, "step": 685 }, { "epoch": 18.54054054054054, "grad_norm": 2.015625, "learning_rate": 4.9752712056961145e-05, "loss": 0.3176, "step": 686 }, { "epoch": 18.56756756756757, "grad_norm": 2.25, "learning_rate": 4.9751142096765397e-05, "loss": 0.3207, "step": 687 }, { "epoch": 18.594594594594593, "grad_norm": 2.140625, "learning_rate": 4.9749567193641115e-05, "loss": 0.3183, "step": 688 }, { "epoch": 18.62162162162162, "grad_norm": 2.34375, "learning_rate": 4.9747987347902815e-05, "loss": 0.31, "step": 689 }, { "epoch": 18.64864864864865, "grad_norm": 2.0625, "learning_rate": 4.9746402559866004e-05, "loss": 0.3393, "step": 690 }, { "epoch": 18.675675675675677, "grad_norm": 1.8359375, "learning_rate": 4.9744812829847155e-05, "loss": 0.2883, "step": 691 }, { "epoch": 18.7027027027027, "grad_norm": 2.453125, "learning_rate": 4.9743218158163756e-05, "loss": 0.4108, "step": 692 }, { "epoch": 18.72972972972973, "grad_norm": 2.359375, "learning_rate": 4.974161854513428e-05, "loss": 0.3772, "step": 693 }, { "epoch": 18.756756756756758, "grad_norm": 2.515625, "learning_rate": 4.974001399107816e-05, "loss": 0.3527, "step": 694 }, { "epoch": 18.783783783783782, "grad_norm": 2.859375, "learning_rate": 4.973840449631585e-05, "loss": 0.4024, "step": 695 }, { "epoch": 18.81081081081081, "grad_norm": 2.53125, "learning_rate": 4.9736790061168756e-05, "loss": 0.3462, "step": 696 }, { "epoch": 18.83783783783784, "grad_norm": 1.9921875, "learning_rate": 4.9735170685959315e-05, "loss": 0.268, "step": 697 }, { "epoch": 18.864864864864863, "grad_norm": 2.203125, "learning_rate": 4.97335463710109e-05, "loss": 0.2776, "step": 698 }, { "epoch": 18.89189189189189, "grad_norm": 2.796875, "learning_rate": 4.9731917116647906e-05, "loss": 0.3207, "step": 699 }, { "epoch": 18.91891891891892, "grad_norm": 2.0625, "learning_rate": 4.97302829231957e-05, "loss": 0.3675, "step": 700 }, { "epoch": 18.945945945945947, "grad_norm": 1.890625, "learning_rate": 4.9728643790980645e-05, "loss": 0.2618, "step": 701 }, { "epoch": 18.972972972972972, "grad_norm": 2.546875, "learning_rate": 4.9726999720330084e-05, "loss": 0.3667, "step": 702 }, { "epoch": 19.0, "grad_norm": 2.390625, "learning_rate": 4.972535071157233e-05, "loss": 0.4651, "step": 703 }, { "epoch": 19.027027027027028, "grad_norm": 2.5625, "learning_rate": 4.972369676503672e-05, "loss": 0.3411, "step": 704 }, { "epoch": 19.054054054054053, "grad_norm": 2.046875, "learning_rate": 4.9722037881053535e-05, "loss": 0.2793, "step": 705 }, { "epoch": 19.08108108108108, "grad_norm": 2.296875, "learning_rate": 4.972037405995408e-05, "loss": 0.3565, "step": 706 }, { "epoch": 19.10810810810811, "grad_norm": 2.125, "learning_rate": 4.9718705302070613e-05, "loss": 0.321, "step": 707 }, { "epoch": 19.135135135135137, "grad_norm": 2.953125, "learning_rate": 4.9717031607736406e-05, "loss": 0.3714, "step": 708 }, { "epoch": 19.16216216216216, "grad_norm": 2.953125, "learning_rate": 4.971535297728571e-05, "loss": 0.3236, "step": 709 }, { "epoch": 19.18918918918919, "grad_norm": 2.3125, "learning_rate": 4.971366941105374e-05, "loss": 0.3742, "step": 710 }, { "epoch": 19.216216216216218, "grad_norm": 1.6640625, "learning_rate": 4.9711980909376706e-05, "loss": 0.2641, "step": 711 }, { "epoch": 19.243243243243242, "grad_norm": 1.5546875, "learning_rate": 4.971028747259184e-05, "loss": 0.2157, "step": 712 }, { "epoch": 19.27027027027027, "grad_norm": 2.0625, "learning_rate": 4.9708589101037306e-05, "loss": 0.3365, "step": 713 }, { "epoch": 19.2972972972973, "grad_norm": 2.15625, "learning_rate": 4.970688579505229e-05, "loss": 0.2474, "step": 714 }, { "epoch": 19.324324324324323, "grad_norm": 2.234375, "learning_rate": 4.9705177554976946e-05, "loss": 0.311, "step": 715 }, { "epoch": 19.35135135135135, "grad_norm": 2.515625, "learning_rate": 4.970346438115241e-05, "loss": 0.3606, "step": 716 }, { "epoch": 19.37837837837838, "grad_norm": 2.21875, "learning_rate": 4.9701746273920824e-05, "loss": 0.291, "step": 717 }, { "epoch": 19.405405405405407, "grad_norm": 2.125, "learning_rate": 4.970002323362529e-05, "loss": 0.3398, "step": 718 }, { "epoch": 19.43243243243243, "grad_norm": 2.5625, "learning_rate": 4.969829526060993e-05, "loss": 0.3551, "step": 719 }, { "epoch": 19.45945945945946, "grad_norm": 2.0625, "learning_rate": 4.96965623552198e-05, "loss": 0.339, "step": 720 }, { "epoch": 19.486486486486488, "grad_norm": 2.765625, "learning_rate": 4.9694824517800995e-05, "loss": 0.4501, "step": 721 }, { "epoch": 19.513513513513512, "grad_norm": 2.78125, "learning_rate": 4.9693081748700555e-05, "loss": 0.4338, "step": 722 }, { "epoch": 19.54054054054054, "grad_norm": 2.3125, "learning_rate": 4.969133404826652e-05, "loss": 0.3221, "step": 723 }, { "epoch": 19.56756756756757, "grad_norm": 2.046875, "learning_rate": 4.968958141684793e-05, "loss": 0.2868, "step": 724 }, { "epoch": 19.594594594594593, "grad_norm": 2.28125, "learning_rate": 4.9687823854794776e-05, "loss": 0.3541, "step": 725 }, { "epoch": 19.62162162162162, "grad_norm": 2.53125, "learning_rate": 4.968606136245806e-05, "loss": 0.434, "step": 726 }, { "epoch": 19.64864864864865, "grad_norm": 2.515625, "learning_rate": 4.968429394018976e-05, "loss": 0.4366, "step": 727 }, { "epoch": 19.675675675675677, "grad_norm": 2.640625, "learning_rate": 4.968252158834284e-05, "loss": 0.2765, "step": 728 }, { "epoch": 19.7027027027027, "grad_norm": 1.9609375, "learning_rate": 4.968074430727125e-05, "loss": 0.2304, "step": 729 }, { "epoch": 19.72972972972973, "grad_norm": 2.859375, "learning_rate": 4.967896209732992e-05, "loss": 0.3013, "step": 730 }, { "epoch": 19.756756756756758, "grad_norm": 2.03125, "learning_rate": 4.967717495887476e-05, "loss": 0.3435, "step": 731 }, { "epoch": 19.783783783783782, "grad_norm": 1.9765625, "learning_rate": 4.967538289226267e-05, "loss": 0.3156, "step": 732 }, { "epoch": 19.81081081081081, "grad_norm": 1.96875, "learning_rate": 4.967358589785155e-05, "loss": 0.2525, "step": 733 }, { "epoch": 19.83783783783784, "grad_norm": 1.7890625, "learning_rate": 4.967178397600026e-05, "loss": 0.2899, "step": 734 }, { "epoch": 19.864864864864863, "grad_norm": 1.984375, "learning_rate": 4.966997712706865e-05, "loss": 0.3292, "step": 735 }, { "epoch": 19.89189189189189, "grad_norm": 2.3125, "learning_rate": 4.966816535141756e-05, "loss": 0.3715, "step": 736 }, { "epoch": 19.91891891891892, "grad_norm": 2.203125, "learning_rate": 4.966634864940881e-05, "loss": 0.3187, "step": 737 }, { "epoch": 19.945945945945947, "grad_norm": 1.6875, "learning_rate": 4.9664527021405206e-05, "loss": 0.2503, "step": 738 }, { "epoch": 19.972972972972972, "grad_norm": 1.5625, "learning_rate": 4.966270046777053e-05, "loss": 0.2466, "step": 739 }, { "epoch": 20.0, "grad_norm": 1.9921875, "learning_rate": 4.9660868988869566e-05, "loss": 0.3213, "step": 740 }, { "epoch": 20.027027027027028, "grad_norm": 1.7578125, "learning_rate": 4.965903258506806e-05, "loss": 0.2736, "step": 741 }, { "epoch": 20.054054054054053, "grad_norm": 1.9296875, "learning_rate": 4.965719125673276e-05, "loss": 0.264, "step": 742 }, { "epoch": 20.08108108108108, "grad_norm": 2.09375, "learning_rate": 4.965534500423137e-05, "loss": 0.2894, "step": 743 }, { "epoch": 20.10810810810811, "grad_norm": 2.171875, "learning_rate": 4.965349382793262e-05, "loss": 0.338, "step": 744 }, { "epoch": 20.135135135135137, "grad_norm": 2.34375, "learning_rate": 4.965163772820618e-05, "loss": 0.399, "step": 745 }, { "epoch": 20.16216216216216, "grad_norm": 2.140625, "learning_rate": 4.9649776705422735e-05, "loss": 0.2863, "step": 746 }, { "epoch": 20.18918918918919, "grad_norm": 2.296875, "learning_rate": 4.964791075995394e-05, "loss": 0.255, "step": 747 }, { "epoch": 20.216216216216218, "grad_norm": 1.921875, "learning_rate": 4.9646039892172416e-05, "loss": 0.2832, "step": 748 }, { "epoch": 20.243243243243242, "grad_norm": 2.40625, "learning_rate": 4.964416410245181e-05, "loss": 0.3955, "step": 749 }, { "epoch": 20.27027027027027, "grad_norm": 1.6015625, "learning_rate": 4.964228339116671e-05, "loss": 0.2469, "step": 750 }, { "epoch": 20.2972972972973, "grad_norm": 1.703125, "learning_rate": 4.9640397758692715e-05, "loss": 0.2462, "step": 751 }, { "epoch": 20.324324324324323, "grad_norm": 2.140625, "learning_rate": 4.963850720540638e-05, "loss": 0.3601, "step": 752 }, { "epoch": 20.35135135135135, "grad_norm": 1.8125, "learning_rate": 4.9636611731685276e-05, "loss": 0.2784, "step": 753 }, { "epoch": 20.37837837837838, "grad_norm": 1.8984375, "learning_rate": 4.963471133790792e-05, "loss": 0.2711, "step": 754 }, { "epoch": 20.405405405405407, "grad_norm": 1.8125, "learning_rate": 4.963280602445386e-05, "loss": 0.2827, "step": 755 }, { "epoch": 20.43243243243243, "grad_norm": 2.046875, "learning_rate": 4.9630895791703555e-05, "loss": 0.264, "step": 756 }, { "epoch": 20.45945945945946, "grad_norm": 1.9375, "learning_rate": 4.9628980640038526e-05, "loss": 0.2977, "step": 757 }, { "epoch": 20.486486486486488, "grad_norm": 2.265625, "learning_rate": 4.9627060569841214e-05, "loss": 0.3876, "step": 758 }, { "epoch": 20.513513513513512, "grad_norm": 1.8671875, "learning_rate": 4.9625135581495073e-05, "loss": 0.3125, "step": 759 }, { "epoch": 20.54054054054054, "grad_norm": 1.8046875, "learning_rate": 4.962320567538454e-05, "loss": 0.3081, "step": 760 }, { "epoch": 20.56756756756757, "grad_norm": 1.9296875, "learning_rate": 4.962127085189503e-05, "loss": 0.2815, "step": 761 }, { "epoch": 20.594594594594593, "grad_norm": 1.7109375, "learning_rate": 4.961933111141292e-05, "loss": 0.2279, "step": 762 }, { "epoch": 20.62162162162162, "grad_norm": 2.15625, "learning_rate": 4.96173864543256e-05, "loss": 0.3713, "step": 763 }, { "epoch": 20.64864864864865, "grad_norm": 2.1875, "learning_rate": 4.9615436881021416e-05, "loss": 0.3235, "step": 764 }, { "epoch": 20.675675675675677, "grad_norm": 1.546875, "learning_rate": 4.961348239188972e-05, "loss": 0.2306, "step": 765 }, { "epoch": 20.7027027027027, "grad_norm": 2.890625, "learning_rate": 4.961152298732083e-05, "loss": 0.3601, "step": 766 }, { "epoch": 20.72972972972973, "grad_norm": 2.0, "learning_rate": 4.960955866770605e-05, "loss": 0.3063, "step": 767 }, { "epoch": 20.756756756756758, "grad_norm": 1.671875, "learning_rate": 4.9607589433437654e-05, "loss": 0.243, "step": 768 }, { "epoch": 20.783783783783782, "grad_norm": 1.390625, "learning_rate": 4.960561528490892e-05, "loss": 0.2273, "step": 769 }, { "epoch": 20.81081081081081, "grad_norm": 1.8203125, "learning_rate": 4.960363622251409e-05, "loss": 0.3074, "step": 770 }, { "epoch": 20.83783783783784, "grad_norm": 2.03125, "learning_rate": 4.96016522466484e-05, "loss": 0.2486, "step": 771 }, { "epoch": 20.864864864864863, "grad_norm": 1.90625, "learning_rate": 4.959966335770805e-05, "loss": 0.307, "step": 772 }, { "epoch": 20.89189189189189, "grad_norm": 1.6484375, "learning_rate": 4.959766955609023e-05, "loss": 0.2371, "step": 773 }, { "epoch": 20.91891891891892, "grad_norm": 2.015625, "learning_rate": 4.959567084219313e-05, "loss": 0.3236, "step": 774 }, { "epoch": 20.945945945945947, "grad_norm": 2.078125, "learning_rate": 4.9593667216415884e-05, "loss": 0.309, "step": 775 }, { "epoch": 20.972972972972972, "grad_norm": 2.328125, "learning_rate": 4.9591658679158634e-05, "loss": 0.3573, "step": 776 }, { "epoch": 21.0, "grad_norm": 1.8125, "learning_rate": 4.958964523082249e-05, "loss": 0.2776, "step": 777 }, { "epoch": 21.027027027027028, "grad_norm": 2.078125, "learning_rate": 4.958762687180956e-05, "loss": 0.3385, "step": 778 }, { "epoch": 21.054054054054053, "grad_norm": 1.7421875, "learning_rate": 4.958560360252291e-05, "loss": 0.2784, "step": 779 }, { "epoch": 21.08108108108108, "grad_norm": 2.578125, "learning_rate": 4.95835754233666e-05, "loss": 0.2563, "step": 780 }, { "epoch": 21.10810810810811, "grad_norm": 2.1875, "learning_rate": 4.958154233474567e-05, "loss": 0.1943, "step": 781 }, { "epoch": 21.135135135135137, "grad_norm": 1.859375, "learning_rate": 4.957950433706613e-05, "loss": 0.2523, "step": 782 }, { "epoch": 21.16216216216216, "grad_norm": 2.625, "learning_rate": 4.957746143073499e-05, "loss": 0.2528, "step": 783 }, { "epoch": 21.18918918918919, "grad_norm": 2.15625, "learning_rate": 4.957541361616022e-05, "loss": 0.2746, "step": 784 }, { "epoch": 21.216216216216218, "grad_norm": 1.6953125, "learning_rate": 4.9573360893750784e-05, "loss": 0.2618, "step": 785 }, { "epoch": 21.243243243243242, "grad_norm": 1.859375, "learning_rate": 4.9571303263916614e-05, "loss": 0.2108, "step": 786 }, { "epoch": 21.27027027027027, "grad_norm": 2.984375, "learning_rate": 4.956924072706864e-05, "loss": 0.2868, "step": 787 }, { "epoch": 21.2972972972973, "grad_norm": 1.8984375, "learning_rate": 4.956717328361875e-05, "loss": 0.2689, "step": 788 }, { "epoch": 21.324324324324323, "grad_norm": 2.203125, "learning_rate": 4.956510093397983e-05, "loss": 0.2897, "step": 789 }, { "epoch": 21.35135135135135, "grad_norm": 2.34375, "learning_rate": 4.956302367856575e-05, "loss": 0.3302, "step": 790 }, { "epoch": 21.37837837837838, "grad_norm": 2.140625, "learning_rate": 4.9560941517791316e-05, "loss": 0.3211, "step": 791 }, { "epoch": 21.405405405405407, "grad_norm": 3.859375, "learning_rate": 4.9558854452072365e-05, "loss": 0.3346, "step": 792 }, { "epoch": 21.43243243243243, "grad_norm": 3.125, "learning_rate": 4.955676248182571e-05, "loss": 0.2803, "step": 793 }, { "epoch": 21.45945945945946, "grad_norm": 2.0, "learning_rate": 4.9554665607469105e-05, "loss": 0.3263, "step": 794 }, { "epoch": 21.486486486486488, "grad_norm": 2.171875, "learning_rate": 4.955256382942132e-05, "loss": 0.3614, "step": 795 }, { "epoch": 21.513513513513512, "grad_norm": 2.9375, "learning_rate": 4.955045714810207e-05, "loss": 0.3045, "step": 796 }, { "epoch": 21.54054054054054, "grad_norm": 2.3125, "learning_rate": 4.95483455639321e-05, "loss": 0.2867, "step": 797 }, { "epoch": 21.56756756756757, "grad_norm": 2.515625, "learning_rate": 4.954622907733308e-05, "loss": 0.4077, "step": 798 }, { "epoch": 21.594594594594593, "grad_norm": 3.03125, "learning_rate": 4.95441076887277e-05, "loss": 0.2903, "step": 799 }, { "epoch": 21.62162162162162, "grad_norm": 1.921875, "learning_rate": 4.95419813985396e-05, "loss": 0.22, "step": 800 }, { "epoch": 21.64864864864865, "grad_norm": 1.609375, "learning_rate": 4.953985020719342e-05, "loss": 0.2188, "step": 801 }, { "epoch": 21.675675675675677, "grad_norm": 2.6875, "learning_rate": 4.9537714115114756e-05, "loss": 0.2189, "step": 802 }, { "epoch": 21.7027027027027, "grad_norm": 3.3125, "learning_rate": 4.953557312273021e-05, "loss": 0.3146, "step": 803 }, { "epoch": 21.72972972972973, "grad_norm": 2.171875, "learning_rate": 4.953342723046734e-05, "loss": 0.2805, "step": 804 }, { "epoch": 21.756756756756758, "grad_norm": 1.6328125, "learning_rate": 4.9531276438754704e-05, "loss": 0.2218, "step": 805 }, { "epoch": 21.783783783783782, "grad_norm": 3.21875, "learning_rate": 4.952912074802182e-05, "loss": 0.2694, "step": 806 }, { "epoch": 21.81081081081081, "grad_norm": 3.0625, "learning_rate": 4.952696015869918e-05, "loss": 0.1981, "step": 807 }, { "epoch": 21.83783783783784, "grad_norm": 2.875, "learning_rate": 4.952479467121827e-05, "loss": 0.4157, "step": 808 }, { "epoch": 21.864864864864863, "grad_norm": 2.421875, "learning_rate": 4.952262428601156e-05, "loss": 0.332, "step": 809 }, { "epoch": 21.89189189189189, "grad_norm": 2.34375, "learning_rate": 4.952044900351248e-05, "loss": 0.2223, "step": 810 }, { "epoch": 21.91891891891892, "grad_norm": 2.40625, "learning_rate": 4.951826882415544e-05, "loss": 0.3544, "step": 811 }, { "epoch": 21.945945945945947, "grad_norm": 2.328125, "learning_rate": 4.951608374837583e-05, "loss": 0.3049, "step": 812 }, { "epoch": 21.972972972972972, "grad_norm": 3.265625, "learning_rate": 4.951389377661003e-05, "loss": 0.3896, "step": 813 }, { "epoch": 22.0, "grad_norm": 2.171875, "learning_rate": 4.9511698909295395e-05, "loss": 0.2789, "step": 814 }, { "epoch": 22.027027027027028, "grad_norm": 2.390625, "learning_rate": 4.9509499146870236e-05, "loss": 0.3313, "step": 815 }, { "epoch": 22.054054054054053, "grad_norm": 2.3125, "learning_rate": 4.950729448977386e-05, "loss": 0.2734, "step": 816 }, { "epoch": 22.08108108108108, "grad_norm": 2.265625, "learning_rate": 4.9505084938446557e-05, "loss": 0.3144, "step": 817 }, { "epoch": 22.10810810810811, "grad_norm": 1.890625, "learning_rate": 4.950287049332958e-05, "loss": 0.2937, "step": 818 }, { "epoch": 22.135135135135137, "grad_norm": 2.078125, "learning_rate": 4.950065115486515e-05, "loss": 0.265, "step": 819 }, { "epoch": 22.16216216216216, "grad_norm": 1.859375, "learning_rate": 4.9498426923496514e-05, "loss": 0.3265, "step": 820 }, { "epoch": 22.18918918918919, "grad_norm": 2.109375, "learning_rate": 4.9496197799667835e-05, "loss": 0.1791, "step": 821 }, { "epoch": 22.216216216216218, "grad_norm": 2.359375, "learning_rate": 4.949396378382428e-05, "loss": 0.3667, "step": 822 }, { "epoch": 22.243243243243242, "grad_norm": 2.046875, "learning_rate": 4.9491724876412005e-05, "loss": 0.3225, "step": 823 }, { "epoch": 22.27027027027027, "grad_norm": 2.078125, "learning_rate": 4.948948107787813e-05, "loss": 0.2336, "step": 824 }, { "epoch": 22.2972972972973, "grad_norm": 2.375, "learning_rate": 4.948723238867075e-05, "loss": 0.3411, "step": 825 }, { "epoch": 22.324324324324323, "grad_norm": 1.5546875, "learning_rate": 4.948497880923894e-05, "loss": 0.2308, "step": 826 }, { "epoch": 22.35135135135135, "grad_norm": 1.984375, "learning_rate": 4.948272034003275e-05, "loss": 0.2478, "step": 827 }, { "epoch": 22.37837837837838, "grad_norm": 2.8125, "learning_rate": 4.948045698150322e-05, "loss": 0.3468, "step": 828 }, { "epoch": 22.405405405405407, "grad_norm": 2.375, "learning_rate": 4.9478188734102324e-05, "loss": 0.3842, "step": 829 }, { "epoch": 22.43243243243243, "grad_norm": 2.3125, "learning_rate": 4.9475915598283073e-05, "loss": 0.3076, "step": 830 }, { "epoch": 22.45945945945946, "grad_norm": 1.828125, "learning_rate": 4.947363757449941e-05, "loss": 0.213, "step": 831 }, { "epoch": 22.486486486486488, "grad_norm": 1.9609375, "learning_rate": 4.947135466320627e-05, "loss": 0.2268, "step": 832 }, { "epoch": 22.513513513513512, "grad_norm": 2.0, "learning_rate": 4.946906686485957e-05, "loss": 0.3199, "step": 833 }, { "epoch": 22.54054054054054, "grad_norm": 2.5, "learning_rate": 4.9466774179916175e-05, "loss": 0.2821, "step": 834 }, { "epoch": 22.56756756756757, "grad_norm": 1.6875, "learning_rate": 4.946447660883396e-05, "loss": 0.2284, "step": 835 }, { "epoch": 22.594594594594593, "grad_norm": 2.9375, "learning_rate": 4.946217415207177e-05, "loss": 0.3055, "step": 836 }, { "epoch": 22.62162162162162, "grad_norm": 2.96875, "learning_rate": 4.94598668100894e-05, "loss": 0.2983, "step": 837 }, { "epoch": 22.64864864864865, "grad_norm": 2.015625, "learning_rate": 4.945755458334764e-05, "loss": 0.2222, "step": 838 }, { "epoch": 22.675675675675677, "grad_norm": 1.9453125, "learning_rate": 4.945523747230827e-05, "loss": 0.2849, "step": 839 }, { "epoch": 22.7027027027027, "grad_norm": 2.390625, "learning_rate": 4.9452915477434e-05, "loss": 0.3037, "step": 840 }, { "epoch": 22.72972972972973, "grad_norm": 2.171875, "learning_rate": 4.945058859918858e-05, "loss": 0.2766, "step": 841 }, { "epoch": 22.756756756756758, "grad_norm": 2.265625, "learning_rate": 4.944825683803666e-05, "loss": 0.2741, "step": 842 }, { "epoch": 22.783783783783782, "grad_norm": 2.53125, "learning_rate": 4.944592019444394e-05, "loss": 0.2743, "step": 843 }, { "epoch": 22.81081081081081, "grad_norm": 1.96875, "learning_rate": 4.9443578668877045e-05, "loss": 0.3288, "step": 844 }, { "epoch": 22.83783783783784, "grad_norm": 1.5234375, "learning_rate": 4.944123226180359e-05, "loss": 0.2225, "step": 845 }, { "epoch": 22.864864864864863, "grad_norm": 2.3125, "learning_rate": 4.943888097369216e-05, "loss": 0.343, "step": 846 }, { "epoch": 22.89189189189189, "grad_norm": 1.9140625, "learning_rate": 4.943652480501232e-05, "loss": 0.2435, "step": 847 }, { "epoch": 22.91891891891892, "grad_norm": 1.8359375, "learning_rate": 4.943416375623461e-05, "loss": 0.2894, "step": 848 }, { "epoch": 22.945945945945947, "grad_norm": 1.4296875, "learning_rate": 4.943179782783055e-05, "loss": 0.206, "step": 849 }, { "epoch": 22.972972972972972, "grad_norm": 2.296875, "learning_rate": 4.942942702027262e-05, "loss": 0.261, "step": 850 }, { "epoch": 23.0, "grad_norm": 1.71875, "learning_rate": 4.9427051334034286e-05, "loss": 0.2352, "step": 851 }, { "epoch": 23.027027027027028, "grad_norm": 2.140625, "learning_rate": 4.9424670769589984e-05, "loss": 0.2403, "step": 852 }, { "epoch": 23.054054054054053, "grad_norm": 1.7890625, "learning_rate": 4.9422285327415126e-05, "loss": 0.2485, "step": 853 }, { "epoch": 23.08108108108108, "grad_norm": 2.125, "learning_rate": 4.9419895007986095e-05, "loss": 0.4317, "step": 854 }, { "epoch": 23.10810810810811, "grad_norm": 2.25, "learning_rate": 4.9417499811780245e-05, "loss": 0.3156, "step": 855 }, { "epoch": 23.135135135135137, "grad_norm": 2.421875, "learning_rate": 4.9415099739275914e-05, "loss": 0.4078, "step": 856 }, { "epoch": 23.16216216216216, "grad_norm": 1.90625, "learning_rate": 4.941269479095242e-05, "loss": 0.2295, "step": 857 }, { "epoch": 23.18918918918919, "grad_norm": 2.046875, "learning_rate": 4.941028496729002e-05, "loss": 0.2427, "step": 858 }, { "epoch": 23.216216216216218, "grad_norm": 1.71875, "learning_rate": 4.940787026876999e-05, "loss": 0.3054, "step": 859 }, { "epoch": 23.243243243243242, "grad_norm": 2.046875, "learning_rate": 4.940545069587454e-05, "loss": 0.3147, "step": 860 }, { "epoch": 23.27027027027027, "grad_norm": 2.0625, "learning_rate": 4.940302624908688e-05, "loss": 0.3692, "step": 861 }, { "epoch": 23.2972972972973, "grad_norm": 1.8046875, "learning_rate": 4.9400596928891185e-05, "loss": 0.2652, "step": 862 }, { "epoch": 23.324324324324323, "grad_norm": 1.8828125, "learning_rate": 4.93981627357726e-05, "loss": 0.269, "step": 863 }, { "epoch": 23.35135135135135, "grad_norm": 2.28125, "learning_rate": 4.939572367021724e-05, "loss": 0.2706, "step": 864 }, { "epoch": 23.37837837837838, "grad_norm": 2.03125, "learning_rate": 4.939327973271221e-05, "loss": 0.243, "step": 865 }, { "epoch": 23.405405405405407, "grad_norm": 2.5625, "learning_rate": 4.939083092374558e-05, "loss": 0.2681, "step": 866 }, { "epoch": 23.43243243243243, "grad_norm": 2.390625, "learning_rate": 4.938837724380637e-05, "loss": 0.364, "step": 867 }, { "epoch": 23.45945945945946, "grad_norm": 2.109375, "learning_rate": 4.9385918693384606e-05, "loss": 0.2215, "step": 868 }, { "epoch": 23.486486486486488, "grad_norm": 2.3125, "learning_rate": 4.938345527297127e-05, "loss": 0.199, "step": 869 }, { "epoch": 23.513513513513512, "grad_norm": 1.8828125, "learning_rate": 4.938098698305832e-05, "loss": 0.2539, "step": 870 }, { "epoch": 23.54054054054054, "grad_norm": 1.4921875, "learning_rate": 4.937851382413869e-05, "loss": 0.2418, "step": 871 }, { "epoch": 23.56756756756757, "grad_norm": 3.046875, "learning_rate": 4.937603579670628e-05, "loss": 0.2501, "step": 872 }, { "epoch": 23.594594594594593, "grad_norm": 2.59375, "learning_rate": 4.937355290125595e-05, "loss": 0.2889, "step": 873 }, { "epoch": 23.62162162162162, "grad_norm": 2.03125, "learning_rate": 4.937106513828357e-05, "loss": 0.2852, "step": 874 }, { "epoch": 23.64864864864865, "grad_norm": 2.046875, "learning_rate": 4.936857250828595e-05, "loss": 0.2915, "step": 875 }, { "epoch": 23.675675675675677, "grad_norm": 2.5625, "learning_rate": 4.936607501176088e-05, "loss": 0.2683, "step": 876 }, { "epoch": 23.7027027027027, "grad_norm": 1.7421875, "learning_rate": 4.936357264920712e-05, "loss": 0.2041, "step": 877 }, { "epoch": 23.72972972972973, "grad_norm": 1.796875, "learning_rate": 4.936106542112441e-05, "loss": 0.2678, "step": 878 }, { "epoch": 23.756756756756758, "grad_norm": 1.84375, "learning_rate": 4.9358553328013445e-05, "loss": 0.2466, "step": 879 }, { "epoch": 23.783783783783782, "grad_norm": 1.921875, "learning_rate": 4.9356036370375915e-05, "loss": 0.3213, "step": 880 }, { "epoch": 23.81081081081081, "grad_norm": 1.796875, "learning_rate": 4.935351454871446e-05, "loss": 0.2446, "step": 881 }, { "epoch": 23.83783783783784, "grad_norm": 1.3515625, "learning_rate": 4.9350987863532715e-05, "loss": 0.1923, "step": 882 }, { "epoch": 23.864864864864863, "grad_norm": 1.9453125, "learning_rate": 4.934845631533526e-05, "loss": 0.2602, "step": 883 }, { "epoch": 23.89189189189189, "grad_norm": 1.2734375, "learning_rate": 4.934591990462766e-05, "loss": 0.1838, "step": 884 }, { "epoch": 23.91891891891892, "grad_norm": 1.8203125, "learning_rate": 4.934337863191644e-05, "loss": 0.275, "step": 885 }, { "epoch": 23.945945945945947, "grad_norm": 1.46875, "learning_rate": 4.934083249770912e-05, "loss": 0.2576, "step": 886 }, { "epoch": 23.972972972972972, "grad_norm": 2.0, "learning_rate": 4.933828150251417e-05, "loss": 0.3097, "step": 887 }, { "epoch": 24.0, "grad_norm": 2.359375, "learning_rate": 4.9335725646841045e-05, "loss": 0.3769, "step": 888 }, { "epoch": 24.027027027027028, "grad_norm": 2.0, "learning_rate": 4.933316493120015e-05, "loss": 0.2539, "step": 889 }, { "epoch": 24.054054054054053, "grad_norm": 1.65625, "learning_rate": 4.933059935610287e-05, "loss": 0.2769, "step": 890 }, { "epoch": 24.08108108108108, "grad_norm": 1.453125, "learning_rate": 4.932802892206158e-05, "loss": 0.1817, "step": 891 }, { "epoch": 24.10810810810811, "grad_norm": 1.9296875, "learning_rate": 4.9325453629589593e-05, "loss": 0.2917, "step": 892 }, { "epoch": 24.135135135135137, "grad_norm": 1.6953125, "learning_rate": 4.9322873479201214e-05, "loss": 0.2119, "step": 893 }, { "epoch": 24.16216216216216, "grad_norm": 1.625, "learning_rate": 4.932028847141172e-05, "loss": 0.208, "step": 894 }, { "epoch": 24.18918918918919, "grad_norm": 2.0, "learning_rate": 4.931769860673734e-05, "loss": 0.3243, "step": 895 }, { "epoch": 24.216216216216218, "grad_norm": 2.40625, "learning_rate": 4.931510388569528e-05, "loss": 0.3425, "step": 896 }, { "epoch": 24.243243243243242, "grad_norm": 1.65625, "learning_rate": 4.9312504308803734e-05, "loss": 0.2248, "step": 897 }, { "epoch": 24.27027027027027, "grad_norm": 1.921875, "learning_rate": 4.9309899876581825e-05, "loss": 0.3136, "step": 898 }, { "epoch": 24.2972972972973, "grad_norm": 2.390625, "learning_rate": 4.9307290589549704e-05, "loss": 0.3072, "step": 899 }, { "epoch": 24.324324324324323, "grad_norm": 1.5859375, "learning_rate": 4.930467644822844e-05, "loss": 0.2252, "step": 900 }, { "epoch": 24.35135135135135, "grad_norm": 2.46875, "learning_rate": 4.930205745314008e-05, "loss": 0.3537, "step": 901 }, { "epoch": 24.37837837837838, "grad_norm": 1.703125, "learning_rate": 4.9299433604807675e-05, "loss": 0.2222, "step": 902 }, { "epoch": 24.405405405405407, "grad_norm": 1.8125, "learning_rate": 4.92968049037552e-05, "loss": 0.252, "step": 903 }, { "epoch": 24.43243243243243, "grad_norm": 2.328125, "learning_rate": 4.929417135050764e-05, "loss": 0.2636, "step": 904 }, { "epoch": 24.45945945945946, "grad_norm": 1.78125, "learning_rate": 4.929153294559091e-05, "loss": 0.2464, "step": 905 }, { "epoch": 24.486486486486488, "grad_norm": 1.578125, "learning_rate": 4.9288889689531926e-05, "loss": 0.1954, "step": 906 }, { "epoch": 24.513513513513512, "grad_norm": 2.1875, "learning_rate": 4.928624158285855e-05, "loss": 0.2607, "step": 907 }, { "epoch": 24.54054054054054, "grad_norm": 1.828125, "learning_rate": 4.928358862609963e-05, "loss": 0.3044, "step": 908 }, { "epoch": 24.56756756756757, "grad_norm": 1.859375, "learning_rate": 4.928093081978497e-05, "loss": 0.2462, "step": 909 }, { "epoch": 24.594594594594593, "grad_norm": 2.03125, "learning_rate": 4.927826816444535e-05, "loss": 0.3355, "step": 910 }, { "epoch": 24.62162162162162, "grad_norm": 2.15625, "learning_rate": 4.9275600660612515e-05, "loss": 0.2302, "step": 911 }, { "epoch": 24.64864864864865, "grad_norm": 2.078125, "learning_rate": 4.927292830881918e-05, "loss": 0.2636, "step": 912 }, { "epoch": 24.675675675675677, "grad_norm": 1.7421875, "learning_rate": 4.927025110959902e-05, "loss": 0.2016, "step": 913 }, { "epoch": 24.7027027027027, "grad_norm": 1.6875, "learning_rate": 4.9267569063486696e-05, "loss": 0.2004, "step": 914 }, { "epoch": 24.72972972972973, "grad_norm": 1.9609375, "learning_rate": 4.926488217101782e-05, "loss": 0.3859, "step": 915 }, { "epoch": 24.756756756756758, "grad_norm": 2.734375, "learning_rate": 4.9262190432728984e-05, "loss": 0.2806, "step": 916 }, { "epoch": 24.783783783783782, "grad_norm": 2.390625, "learning_rate": 4.925949384915773e-05, "loss": 0.3464, "step": 917 }, { "epoch": 24.81081081081081, "grad_norm": 2.390625, "learning_rate": 4.925679242084259e-05, "loss": 0.2439, "step": 918 }, { "epoch": 24.83783783783784, "grad_norm": 2.53125, "learning_rate": 4.925408614832305e-05, "loss": 0.2897, "step": 919 }, { "epoch": 24.864864864864863, "grad_norm": 2.09375, "learning_rate": 4.9251375032139566e-05, "loss": 0.2572, "step": 920 }, { "epoch": 24.89189189189189, "grad_norm": 2.875, "learning_rate": 4.924865907283356e-05, "loss": 0.2689, "step": 921 }, { "epoch": 24.91891891891892, "grad_norm": 2.609375, "learning_rate": 4.9245938270947435e-05, "loss": 0.4071, "step": 922 }, { "epoch": 24.945945945945947, "grad_norm": 1.9921875, "learning_rate": 4.9243212627024524e-05, "loss": 0.309, "step": 923 }, { "epoch": 24.972972972972972, "grad_norm": 1.5625, "learning_rate": 4.924048214160917e-05, "loss": 0.1982, "step": 924 }, { "epoch": 25.0, "grad_norm": 2.203125, "learning_rate": 4.9237746815246664e-05, "loss": 0.358, "step": 925 }, { "epoch": 25.027027027027028, "grad_norm": 2.421875, "learning_rate": 4.923500664848326e-05, "loss": 0.3506, "step": 926 }, { "epoch": 25.054054054054053, "grad_norm": 1.5078125, "learning_rate": 4.923226164186619e-05, "loss": 0.1811, "step": 927 }, { "epoch": 25.08108108108108, "grad_norm": 2.0625, "learning_rate": 4.922951179594364e-05, "loss": 0.3098, "step": 928 }, { "epoch": 25.10810810810811, "grad_norm": 2.171875, "learning_rate": 4.922675711126476e-05, "loss": 0.3303, "step": 929 }, { "epoch": 25.135135135135137, "grad_norm": 2.140625, "learning_rate": 4.922399758837969e-05, "loss": 0.3358, "step": 930 }, { "epoch": 25.16216216216216, "grad_norm": 1.71875, "learning_rate": 4.922123322783951e-05, "loss": 0.1883, "step": 931 }, { "epoch": 25.18918918918919, "grad_norm": 2.015625, "learning_rate": 4.921846403019628e-05, "loss": 0.2482, "step": 932 }, { "epoch": 25.216216216216218, "grad_norm": 2.109375, "learning_rate": 4.921568999600303e-05, "loss": 0.2533, "step": 933 }, { "epoch": 25.243243243243242, "grad_norm": 2.625, "learning_rate": 4.9212911125813735e-05, "loss": 0.2765, "step": 934 }, { "epoch": 25.27027027027027, "grad_norm": 1.9921875, "learning_rate": 4.9210127420183364e-05, "loss": 0.2539, "step": 935 }, { "epoch": 25.2972972972973, "grad_norm": 1.703125, "learning_rate": 4.920733887966782e-05, "loss": 0.2321, "step": 936 }, { "epoch": 25.324324324324323, "grad_norm": 2.5625, "learning_rate": 4.920454550482401e-05, "loss": 0.2097, "step": 937 }, { "epoch": 25.35135135135135, "grad_norm": 1.546875, "learning_rate": 4.920174729620977e-05, "loss": 0.1637, "step": 938 }, { "epoch": 25.37837837837838, "grad_norm": 1.8203125, "learning_rate": 4.919894425438392e-05, "loss": 0.2153, "step": 939 }, { "epoch": 25.405405405405407, "grad_norm": 2.34375, "learning_rate": 4.919613637990623e-05, "loss": 0.319, "step": 940 }, { "epoch": 25.43243243243243, "grad_norm": 2.046875, "learning_rate": 4.9193323673337476e-05, "loss": 0.2854, "step": 941 }, { "epoch": 25.45945945945946, "grad_norm": 1.96875, "learning_rate": 4.9190506135239346e-05, "loss": 0.302, "step": 942 }, { "epoch": 25.486486486486488, "grad_norm": 1.609375, "learning_rate": 4.918768376617452e-05, "loss": 0.2101, "step": 943 }, { "epoch": 25.513513513513512, "grad_norm": 1.484375, "learning_rate": 4.918485656670664e-05, "loss": 0.1911, "step": 944 }, { "epoch": 25.54054054054054, "grad_norm": 1.7734375, "learning_rate": 4.9182024537400314e-05, "loss": 0.2065, "step": 945 }, { "epoch": 25.56756756756757, "grad_norm": 2.109375, "learning_rate": 4.9179187678821124e-05, "loss": 0.2945, "step": 946 }, { "epoch": 25.594594594594593, "grad_norm": 1.8515625, "learning_rate": 4.917634599153559e-05, "loss": 0.2765, "step": 947 }, { "epoch": 25.62162162162162, "grad_norm": 1.9921875, "learning_rate": 4.9173499476111206e-05, "loss": 0.2364, "step": 948 }, { "epoch": 25.64864864864865, "grad_norm": 1.7421875, "learning_rate": 4.917064813311646e-05, "loss": 0.2726, "step": 949 }, { "epoch": 25.675675675675677, "grad_norm": 1.7890625, "learning_rate": 4.9167791963120746e-05, "loss": 0.1909, "step": 950 }, { "epoch": 25.7027027027027, "grad_norm": 2.078125, "learning_rate": 4.916493096669449e-05, "loss": 0.3466, "step": 951 }, { "epoch": 25.72972972972973, "grad_norm": 1.5859375, "learning_rate": 4.916206514440903e-05, "loss": 0.2137, "step": 952 }, { "epoch": 25.756756756756758, "grad_norm": 1.859375, "learning_rate": 4.915919449683668e-05, "loss": 0.2123, "step": 953 }, { "epoch": 25.783783783783782, "grad_norm": 2.125, "learning_rate": 4.915631902455073e-05, "loss": 0.31, "step": 954 }, { "epoch": 25.81081081081081, "grad_norm": 2.15625, "learning_rate": 4.9153438728125425e-05, "loss": 0.3215, "step": 955 }, { "epoch": 25.83783783783784, "grad_norm": 1.8046875, "learning_rate": 4.9150553608135986e-05, "loss": 0.2992, "step": 956 }, { "epoch": 25.864864864864863, "grad_norm": 2.03125, "learning_rate": 4.9147663665158564e-05, "loss": 0.2747, "step": 957 }, { "epoch": 25.89189189189189, "grad_norm": 2.0, "learning_rate": 4.914476889977031e-05, "loss": 0.3135, "step": 958 }, { "epoch": 25.91891891891892, "grad_norm": 1.8515625, "learning_rate": 4.914186931254933e-05, "loss": 0.1947, "step": 959 }, { "epoch": 25.945945945945947, "grad_norm": 2.140625, "learning_rate": 4.9138964904074667e-05, "loss": 0.2423, "step": 960 }, { "epoch": 25.972972972972972, "grad_norm": 1.640625, "learning_rate": 4.9136055674926365e-05, "loss": 0.2639, "step": 961 }, { "epoch": 26.0, "grad_norm": 2.546875, "learning_rate": 4.9133141625685394e-05, "loss": 0.2814, "step": 962 }, { "epoch": 26.027027027027028, "grad_norm": 2.625, "learning_rate": 4.913022275693372e-05, "loss": 0.2304, "step": 963 }, { "epoch": 26.054054054054053, "grad_norm": 1.4609375, "learning_rate": 4.9127299069254244e-05, "loss": 0.1435, "step": 964 }, { "epoch": 26.08108108108108, "grad_norm": 2.234375, "learning_rate": 4.912437056323085e-05, "loss": 0.2503, "step": 965 }, { "epoch": 26.10810810810811, "grad_norm": 1.765625, "learning_rate": 4.912143723944837e-05, "loss": 0.2119, "step": 966 }, { "epoch": 26.135135135135137, "grad_norm": 1.984375, "learning_rate": 4.91184990984926e-05, "loss": 0.2723, "step": 967 }, { "epoch": 26.16216216216216, "grad_norm": 2.390625, "learning_rate": 4.9115556140950315e-05, "loss": 0.219, "step": 968 }, { "epoch": 26.18918918918919, "grad_norm": 2.53125, "learning_rate": 4.911260836740923e-05, "loss": 0.2587, "step": 969 }, { "epoch": 26.216216216216218, "grad_norm": 2.640625, "learning_rate": 4.910965577845803e-05, "loss": 0.3237, "step": 970 }, { "epoch": 26.243243243243242, "grad_norm": 2.375, "learning_rate": 4.910669837468637e-05, "loss": 0.1833, "step": 971 }, { "epoch": 26.27027027027027, "grad_norm": 2.703125, "learning_rate": 4.910373615668484e-05, "loss": 0.3438, "step": 972 }, { "epoch": 26.2972972972973, "grad_norm": 1.4609375, "learning_rate": 4.910076912504502e-05, "loss": 0.1662, "step": 973 }, { "epoch": 26.324324324324323, "grad_norm": 1.2578125, "learning_rate": 4.909779728035945e-05, "loss": 0.167, "step": 974 }, { "epoch": 26.35135135135135, "grad_norm": 2.453125, "learning_rate": 4.909482062322162e-05, "loss": 0.237, "step": 975 }, { "epoch": 26.37837837837838, "grad_norm": 2.453125, "learning_rate": 4.909183915422596e-05, "loss": 0.4019, "step": 976 }, { "epoch": 26.405405405405407, "grad_norm": 1.7421875, "learning_rate": 4.9088852873967916e-05, "loss": 0.2427, "step": 977 }, { "epoch": 26.43243243243243, "grad_norm": 2.421875, "learning_rate": 4.9085861783043854e-05, "loss": 0.2353, "step": 978 }, { "epoch": 26.45945945945946, "grad_norm": 2.296875, "learning_rate": 4.90828658820511e-05, "loss": 0.3145, "step": 979 }, { "epoch": 26.486486486486488, "grad_norm": 1.5546875, "learning_rate": 4.907986517158796e-05, "loss": 0.2705, "step": 980 }, { "epoch": 26.513513513513512, "grad_norm": 2.171875, "learning_rate": 4.907685965225368e-05, "loss": 0.1844, "step": 981 }, { "epoch": 26.54054054054054, "grad_norm": 2.109375, "learning_rate": 4.90738493246485e-05, "loss": 0.2072, "step": 982 }, { "epoch": 26.56756756756757, "grad_norm": 1.625, "learning_rate": 4.907083418937357e-05, "loss": 0.2216, "step": 983 }, { "epoch": 26.594594594594593, "grad_norm": 1.53125, "learning_rate": 4.906781424703105e-05, "loss": 0.2065, "step": 984 }, { "epoch": 26.62162162162162, "grad_norm": 1.671875, "learning_rate": 4.906478949822403e-05, "loss": 0.255, "step": 985 }, { "epoch": 26.64864864864865, "grad_norm": 1.96875, "learning_rate": 4.906175994355656e-05, "loss": 0.3674, "step": 986 }, { "epoch": 26.675675675675677, "grad_norm": 1.09375, "learning_rate": 4.9058725583633666e-05, "loss": 0.1556, "step": 987 }, { "epoch": 26.7027027027027, "grad_norm": 1.125, "learning_rate": 4.905568641906132e-05, "loss": 0.1488, "step": 988 }, { "epoch": 26.72972972972973, "grad_norm": 2.203125, "learning_rate": 4.905264245044647e-05, "loss": 0.3139, "step": 989 }, { "epoch": 26.756756756756758, "grad_norm": 1.875, "learning_rate": 4.9049593678397e-05, "loss": 0.2548, "step": 990 }, { "epoch": 26.783783783783782, "grad_norm": 2.234375, "learning_rate": 4.9046540103521766e-05, "loss": 0.183, "step": 991 }, { "epoch": 26.81081081081081, "grad_norm": 1.7734375, "learning_rate": 4.904348172643059e-05, "loss": 0.2388, "step": 992 }, { "epoch": 26.83783783783784, "grad_norm": 1.5, "learning_rate": 4.904041854773423e-05, "loss": 0.245, "step": 993 }, { "epoch": 26.864864864864863, "grad_norm": 2.125, "learning_rate": 4.903735056804444e-05, "loss": 0.192, "step": 994 }, { "epoch": 26.89189189189189, "grad_norm": 1.625, "learning_rate": 4.903427778797389e-05, "loss": 0.2001, "step": 995 }, { "epoch": 26.91891891891892, "grad_norm": 1.859375, "learning_rate": 4.903120020813625e-05, "loss": 0.2621, "step": 996 }, { "epoch": 26.945945945945947, "grad_norm": 2.421875, "learning_rate": 4.902811782914611e-05, "loss": 0.3631, "step": 997 }, { "epoch": 26.972972972972972, "grad_norm": 1.9140625, "learning_rate": 4.9025030651619046e-05, "loss": 0.2633, "step": 998 }, { "epoch": 27.0, "grad_norm": 1.328125, "learning_rate": 4.9021938676171585e-05, "loss": 0.1752, "step": 999 }, { "epoch": 27.027027027027028, "grad_norm": 2.125, "learning_rate": 4.901884190342121e-05, "loss": 0.2398, "step": 1000 }, { "epoch": 27.054054054054053, "grad_norm": 1.9296875, "learning_rate": 4.9015740333986346e-05, "loss": 0.281, "step": 1001 }, { "epoch": 27.08108108108108, "grad_norm": 2.03125, "learning_rate": 4.901263396848642e-05, "loss": 0.3299, "step": 1002 }, { "epoch": 27.10810810810811, "grad_norm": 3.9375, "learning_rate": 4.900952280754177e-05, "loss": 0.2624, "step": 1003 }, { "epoch": 27.135135135135137, "grad_norm": 2.546875, "learning_rate": 4.900640685177371e-05, "loss": 0.3114, "step": 1004 }, { "epoch": 27.16216216216216, "grad_norm": 3.109375, "learning_rate": 4.9003286101804525e-05, "loss": 0.3564, "step": 1005 }, { "epoch": 27.18918918918919, "grad_norm": 1.9765625, "learning_rate": 4.900016055825743e-05, "loss": 0.2775, "step": 1006 }, { "epoch": 27.216216216216218, "grad_norm": 2.671875, "learning_rate": 4.899703022175662e-05, "loss": 0.2334, "step": 1007 }, { "epoch": 27.243243243243242, "grad_norm": 1.84375, "learning_rate": 4.899389509292723e-05, "loss": 0.173, "step": 1008 }, { "epoch": 27.27027027027027, "grad_norm": 1.734375, "learning_rate": 4.8990755172395385e-05, "loss": 0.3171, "step": 1009 }, { "epoch": 27.2972972972973, "grad_norm": 1.6953125, "learning_rate": 4.898761046078811e-05, "loss": 0.2146, "step": 1010 }, { "epoch": 27.324324324324323, "grad_norm": 1.6015625, "learning_rate": 4.898446095873345e-05, "loss": 0.1409, "step": 1011 }, { "epoch": 27.35135135135135, "grad_norm": 1.0859375, "learning_rate": 4.898130666686036e-05, "loss": 0.168, "step": 1012 }, { "epoch": 27.37837837837838, "grad_norm": 1.5234375, "learning_rate": 4.897814758579877e-05, "loss": 0.1968, "step": 1013 }, { "epoch": 27.405405405405407, "grad_norm": 1.890625, "learning_rate": 4.8974983716179555e-05, "loss": 0.263, "step": 1014 }, { "epoch": 27.43243243243243, "grad_norm": 1.3828125, "learning_rate": 4.897181505863459e-05, "loss": 0.2001, "step": 1015 }, { "epoch": 27.45945945945946, "grad_norm": 1.6171875, "learning_rate": 4.896864161379663e-05, "loss": 0.1981, "step": 1016 }, { "epoch": 27.486486486486488, "grad_norm": 1.3515625, "learning_rate": 4.896546338229945e-05, "loss": 0.2011, "step": 1017 }, { "epoch": 27.513513513513512, "grad_norm": 1.359375, "learning_rate": 4.896228036477775e-05, "loss": 0.1969, "step": 1018 }, { "epoch": 27.54054054054054, "grad_norm": 1.8046875, "learning_rate": 4.895909256186721e-05, "loss": 0.3645, "step": 1019 }, { "epoch": 27.56756756756757, "grad_norm": 1.0390625, "learning_rate": 4.8955899974204435e-05, "loss": 0.1512, "step": 1020 }, { "epoch": 27.594594594594593, "grad_norm": 1.7421875, "learning_rate": 4.895270260242701e-05, "loss": 0.2653, "step": 1021 }, { "epoch": 27.62162162162162, "grad_norm": 1.4453125, "learning_rate": 4.8949500447173456e-05, "loss": 0.194, "step": 1022 }, { "epoch": 27.64864864864865, "grad_norm": 1.796875, "learning_rate": 4.894629350908327e-05, "loss": 0.31, "step": 1023 }, { "epoch": 27.675675675675677, "grad_norm": 1.59375, "learning_rate": 4.8943081788796895e-05, "loss": 0.3203, "step": 1024 }, { "epoch": 27.7027027027027, "grad_norm": 1.6328125, "learning_rate": 4.893986528695572e-05, "loss": 0.227, "step": 1025 }, { "epoch": 27.72972972972973, "grad_norm": 1.859375, "learning_rate": 4.89366440042021e-05, "loss": 0.2194, "step": 1026 }, { "epoch": 27.756756756756758, "grad_norm": 1.953125, "learning_rate": 4.893341794117934e-05, "loss": 0.2323, "step": 1027 }, { "epoch": 27.783783783783782, "grad_norm": 2.0, "learning_rate": 4.8930187098531696e-05, "loss": 0.2074, "step": 1028 }, { "epoch": 27.81081081081081, "grad_norm": 1.1640625, "learning_rate": 4.8926951476904405e-05, "loss": 0.15, "step": 1029 }, { "epoch": 27.83783783783784, "grad_norm": 1.75, "learning_rate": 4.892371107694361e-05, "loss": 0.2652, "step": 1030 }, { "epoch": 27.864864864864863, "grad_norm": 2.171875, "learning_rate": 4.892046589929645e-05, "loss": 0.2161, "step": 1031 }, { "epoch": 27.89189189189189, "grad_norm": 1.7890625, "learning_rate": 4.891721594461101e-05, "loss": 0.2023, "step": 1032 }, { "epoch": 27.91891891891892, "grad_norm": 1.9375, "learning_rate": 4.8913961213536294e-05, "loss": 0.284, "step": 1033 }, { "epoch": 27.945945945945947, "grad_norm": 1.8984375, "learning_rate": 4.891070170672232e-05, "loss": 0.2435, "step": 1034 }, { "epoch": 27.972972972972972, "grad_norm": 1.375, "learning_rate": 4.8907437424820017e-05, "loss": 0.1776, "step": 1035 }, { "epoch": 28.0, "grad_norm": 1.421875, "learning_rate": 4.890416836848127e-05, "loss": 0.1971, "step": 1036 }, { "epoch": 28.027027027027028, "grad_norm": 1.7109375, "learning_rate": 4.8900894538358944e-05, "loss": 0.2666, "step": 1037 }, { "epoch": 28.054054054054053, "grad_norm": 1.3671875, "learning_rate": 4.889761593510682e-05, "loss": 0.2221, "step": 1038 }, { "epoch": 28.08108108108108, "grad_norm": 1.6796875, "learning_rate": 4.889433255937966e-05, "loss": 0.1464, "step": 1039 }, { "epoch": 28.10810810810811, "grad_norm": 2.109375, "learning_rate": 4.889104441183318e-05, "loss": 0.3043, "step": 1040 }, { "epoch": 28.135135135135137, "grad_norm": 1.8515625, "learning_rate": 4.8887751493124024e-05, "loss": 0.2329, "step": 1041 }, { "epoch": 28.16216216216216, "grad_norm": 1.1015625, "learning_rate": 4.8884453803909816e-05, "loss": 0.1395, "step": 1042 }, { "epoch": 28.18918918918919, "grad_norm": 1.7890625, "learning_rate": 4.888115134484911e-05, "loss": 0.2585, "step": 1043 }, { "epoch": 28.216216216216218, "grad_norm": 1.9296875, "learning_rate": 4.887784411660143e-05, "loss": 0.3405, "step": 1044 }, { "epoch": 28.243243243243242, "grad_norm": 1.484375, "learning_rate": 4.887453211982725e-05, "loss": 0.2106, "step": 1045 }, { "epoch": 28.27027027027027, "grad_norm": 2.03125, "learning_rate": 4.8871215355187994e-05, "loss": 0.1947, "step": 1046 }, { "epoch": 28.2972972972973, "grad_norm": 1.6796875, "learning_rate": 4.886789382334603e-05, "loss": 0.1869, "step": 1047 }, { "epoch": 28.324324324324323, "grad_norm": 2.0, "learning_rate": 4.886456752496469e-05, "loss": 0.306, "step": 1048 }, { "epoch": 28.35135135135135, "grad_norm": 2.9375, "learning_rate": 4.886123646070824e-05, "loss": 0.2793, "step": 1049 }, { "epoch": 28.37837837837838, "grad_norm": 2.484375, "learning_rate": 4.8857900631241924e-05, "loss": 0.2197, "step": 1050 }, { "epoch": 28.405405405405407, "grad_norm": 1.5859375, "learning_rate": 4.8854560037231925e-05, "loss": 0.2173, "step": 1051 }, { "epoch": 28.43243243243243, "grad_norm": 1.9375, "learning_rate": 4.885121467934536e-05, "loss": 0.2681, "step": 1052 }, { "epoch": 28.45945945945946, "grad_norm": 1.8828125, "learning_rate": 4.884786455825034e-05, "loss": 0.2808, "step": 1053 }, { "epoch": 28.486486486486488, "grad_norm": 1.8984375, "learning_rate": 4.884450967461588e-05, "loss": 0.2673, "step": 1054 }, { "epoch": 28.513513513513512, "grad_norm": 1.7578125, "learning_rate": 4.884115002911197e-05, "loss": 0.2385, "step": 1055 }, { "epoch": 28.54054054054054, "grad_norm": 2.234375, "learning_rate": 4.883778562240956e-05, "loss": 0.2386, "step": 1056 }, { "epoch": 28.56756756756757, "grad_norm": 1.609375, "learning_rate": 4.883441645518052e-05, "loss": 0.1728, "step": 1057 }, { "epoch": 28.594594594594593, "grad_norm": 1.5625, "learning_rate": 4.883104252809771e-05, "loss": 0.2522, "step": 1058 }, { "epoch": 28.62162162162162, "grad_norm": 1.921875, "learning_rate": 4.882766384183491e-05, "loss": 0.3261, "step": 1059 }, { "epoch": 28.64864864864865, "grad_norm": 1.625, "learning_rate": 4.882428039706686e-05, "loss": 0.2057, "step": 1060 }, { "epoch": 28.675675675675677, "grad_norm": 1.390625, "learning_rate": 4.8820892194469254e-05, "loss": 0.1635, "step": 1061 }, { "epoch": 28.7027027027027, "grad_norm": 2.078125, "learning_rate": 4.8817499234718734e-05, "loss": 0.2851, "step": 1062 }, { "epoch": 28.72972972972973, "grad_norm": 1.734375, "learning_rate": 4.881410151849288e-05, "loss": 0.2217, "step": 1063 }, { "epoch": 28.756756756756758, "grad_norm": 2.078125, "learning_rate": 4.8810699046470254e-05, "loss": 0.2516, "step": 1064 }, { "epoch": 28.783783783783782, "grad_norm": 1.734375, "learning_rate": 4.880729181933033e-05, "loss": 0.1829, "step": 1065 }, { "epoch": 28.81081081081081, "grad_norm": 1.8046875, "learning_rate": 4.8803879837753564e-05, "loss": 0.1947, "step": 1066 }, { "epoch": 28.83783783783784, "grad_norm": 1.4765625, "learning_rate": 4.880046310242132e-05, "loss": 0.2205, "step": 1067 }, { "epoch": 28.864864864864863, "grad_norm": 1.5859375, "learning_rate": 4.879704161401597e-05, "loss": 0.1752, "step": 1068 }, { "epoch": 28.89189189189189, "grad_norm": 2.171875, "learning_rate": 4.879361537322078e-05, "loss": 0.1816, "step": 1069 }, { "epoch": 28.91891891891892, "grad_norm": 1.328125, "learning_rate": 4.879018438071999e-05, "loss": 0.1608, "step": 1070 }, { "epoch": 28.945945945945947, "grad_norm": 2.15625, "learning_rate": 4.878674863719879e-05, "loss": 0.3193, "step": 1071 }, { "epoch": 28.972972972972972, "grad_norm": 2.28125, "learning_rate": 4.878330814334332e-05, "loss": 0.2784, "step": 1072 }, { "epoch": 29.0, "grad_norm": 1.9140625, "learning_rate": 4.877986289984066e-05, "loss": 0.2876, "step": 1073 }, { "epoch": 29.027027027027028, "grad_norm": 1.6171875, "learning_rate": 4.877641290737884e-05, "loss": 0.264, "step": 1074 }, { "epoch": 29.054054054054053, "grad_norm": 2.234375, "learning_rate": 4.877295816664684e-05, "loss": 0.2552, "step": 1075 }, { "epoch": 29.08108108108108, "grad_norm": 1.5703125, "learning_rate": 4.87694986783346e-05, "loss": 0.2357, "step": 1076 }, { "epoch": 29.10810810810811, "grad_norm": 1.21875, "learning_rate": 4.876603444313299e-05, "loss": 0.1592, "step": 1077 }, { "epoch": 29.135135135135137, "grad_norm": 1.7578125, "learning_rate": 4.876256546173383e-05, "loss": 0.3175, "step": 1078 }, { "epoch": 29.16216216216216, "grad_norm": 1.2734375, "learning_rate": 4.875909173482991e-05, "loss": 0.1934, "step": 1079 }, { "epoch": 29.18918918918919, "grad_norm": 1.3828125, "learning_rate": 4.875561326311493e-05, "loss": 0.1862, "step": 1080 }, { "epoch": 29.216216216216218, "grad_norm": 1.8671875, "learning_rate": 4.875213004728356e-05, "loss": 0.2084, "step": 1081 }, { "epoch": 29.243243243243242, "grad_norm": 1.78125, "learning_rate": 4.874864208803144e-05, "loss": 0.2531, "step": 1082 }, { "epoch": 29.27027027027027, "grad_norm": 2.421875, "learning_rate": 4.87451493860551e-05, "loss": 0.222, "step": 1083 }, { "epoch": 29.2972972972973, "grad_norm": 1.6953125, "learning_rate": 4.874165194205208e-05, "loss": 0.2271, "step": 1084 }, { "epoch": 29.324324324324323, "grad_norm": 1.1171875, "learning_rate": 4.8738149756720825e-05, "loss": 0.1441, "step": 1085 }, { "epoch": 29.35135135135135, "grad_norm": 1.671875, "learning_rate": 4.8734642830760734e-05, "loss": 0.2254, "step": 1086 }, { "epoch": 29.37837837837838, "grad_norm": 1.6640625, "learning_rate": 4.873113116487216e-05, "loss": 0.225, "step": 1087 }, { "epoch": 29.405405405405407, "grad_norm": 2.359375, "learning_rate": 4.872761475975641e-05, "loss": 0.3073, "step": 1088 }, { "epoch": 29.43243243243243, "grad_norm": 2.328125, "learning_rate": 4.872409361611572e-05, "loss": 0.2995, "step": 1089 }, { "epoch": 29.45945945945946, "grad_norm": 1.75, "learning_rate": 4.872056773465327e-05, "loss": 0.2597, "step": 1090 }, { "epoch": 29.486486486486488, "grad_norm": 1.71875, "learning_rate": 4.871703711607322e-05, "loss": 0.2848, "step": 1091 }, { "epoch": 29.513513513513512, "grad_norm": 1.3515625, "learning_rate": 4.8713501761080634e-05, "loss": 0.1718, "step": 1092 }, { "epoch": 29.54054054054054, "grad_norm": 1.40625, "learning_rate": 4.870996167038154e-05, "loss": 0.1573, "step": 1093 }, { "epoch": 29.56756756756757, "grad_norm": 2.953125, "learning_rate": 4.8706416844682934e-05, "loss": 0.2954, "step": 1094 }, { "epoch": 29.594594594594593, "grad_norm": 2.09375, "learning_rate": 4.870286728469271e-05, "loss": 0.243, "step": 1095 }, { "epoch": 29.62162162162162, "grad_norm": 2.34375, "learning_rate": 4.8699312991119746e-05, "loss": 0.2599, "step": 1096 }, { "epoch": 29.64864864864865, "grad_norm": 1.9375, "learning_rate": 4.869575396467385e-05, "loss": 0.2532, "step": 1097 }, { "epoch": 29.675675675675677, "grad_norm": 1.6875, "learning_rate": 4.8692190206065776e-05, "loss": 0.1489, "step": 1098 }, { "epoch": 29.7027027027027, "grad_norm": 1.6328125, "learning_rate": 4.868862171600722e-05, "loss": 0.2139, "step": 1099 }, { "epoch": 29.72972972972973, "grad_norm": 1.046875, "learning_rate": 4.868504849521084e-05, "loss": 0.1244, "step": 1100 }, { "epoch": 29.756756756756758, "grad_norm": 2.296875, "learning_rate": 4.868147054439022e-05, "loss": 0.3298, "step": 1101 }, { "epoch": 29.783783783783782, "grad_norm": 1.578125, "learning_rate": 4.86778878642599e-05, "loss": 0.1848, "step": 1102 }, { "epoch": 29.81081081081081, "grad_norm": 1.421875, "learning_rate": 4.867430045553534e-05, "loss": 0.1717, "step": 1103 }, { "epoch": 29.83783783783784, "grad_norm": 1.609375, "learning_rate": 4.867070831893299e-05, "loss": 0.2316, "step": 1104 }, { "epoch": 29.864864864864863, "grad_norm": 1.2421875, "learning_rate": 4.86671114551702e-05, "loss": 0.1508, "step": 1105 }, { "epoch": 29.89189189189189, "grad_norm": 1.671875, "learning_rate": 4.866350986496528e-05, "loss": 0.2175, "step": 1106 }, { "epoch": 29.91891891891892, "grad_norm": 1.703125, "learning_rate": 4.8659903549037506e-05, "loss": 0.2324, "step": 1107 }, { "epoch": 29.945945945945947, "grad_norm": 1.9296875, "learning_rate": 4.865629250810706e-05, "loss": 0.2453, "step": 1108 }, { "epoch": 29.972972972972972, "grad_norm": 1.890625, "learning_rate": 4.865267674289509e-05, "loss": 0.1762, "step": 1109 }, { "epoch": 30.0, "grad_norm": 1.375, "learning_rate": 4.864905625412368e-05, "loss": 0.2103, "step": 1110 }, { "epoch": 30.027027027027028, "grad_norm": 1.3984375, "learning_rate": 4.864543104251587e-05, "loss": 0.1776, "step": 1111 }, { "epoch": 30.054054054054053, "grad_norm": 1.2265625, "learning_rate": 4.864180110879562e-05, "loss": 0.1405, "step": 1112 }, { "epoch": 30.08108108108108, "grad_norm": 2.359375, "learning_rate": 4.863816645368785e-05, "loss": 0.2851, "step": 1113 }, { "epoch": 30.10810810810811, "grad_norm": 1.734375, "learning_rate": 4.863452707791843e-05, "loss": 0.2004, "step": 1114 }, { "epoch": 30.135135135135137, "grad_norm": 1.828125, "learning_rate": 4.863088298221414e-05, "loss": 0.2277, "step": 1115 }, { "epoch": 30.16216216216216, "grad_norm": 1.5546875, "learning_rate": 4.862723416730275e-05, "loss": 0.192, "step": 1116 }, { "epoch": 30.18918918918919, "grad_norm": 1.2578125, "learning_rate": 4.862358063391293e-05, "loss": 0.1965, "step": 1117 }, { "epoch": 30.216216216216218, "grad_norm": 1.53125, "learning_rate": 4.861992238277431e-05, "loss": 0.2029, "step": 1118 }, { "epoch": 30.243243243243242, "grad_norm": 1.734375, "learning_rate": 4.861625941461746e-05, "loss": 0.2958, "step": 1119 }, { "epoch": 30.27027027027027, "grad_norm": 1.6328125, "learning_rate": 4.8612591730173915e-05, "loss": 0.1693, "step": 1120 }, { "epoch": 30.2972972972973, "grad_norm": 2.296875, "learning_rate": 4.860891933017609e-05, "loss": 0.2754, "step": 1121 }, { "epoch": 30.324324324324323, "grad_norm": 1.6328125, "learning_rate": 4.8605242215357424e-05, "loss": 0.2175, "step": 1122 }, { "epoch": 30.35135135135135, "grad_norm": 1.8046875, "learning_rate": 4.8601560386452226e-05, "loss": 0.1683, "step": 1123 }, { "epoch": 30.37837837837838, "grad_norm": 2.9375, "learning_rate": 4.85978738441958e-05, "loss": 0.3544, "step": 1124 }, { "epoch": 30.405405405405407, "grad_norm": 1.9609375, "learning_rate": 4.859418258932433e-05, "loss": 0.2158, "step": 1125 }, { "epoch": 30.43243243243243, "grad_norm": 1.8359375, "learning_rate": 4.8590486622575013e-05, "loss": 0.2446, "step": 1126 }, { "epoch": 30.45945945945946, "grad_norm": 2.28125, "learning_rate": 4.8586785944685945e-05, "loss": 0.1899, "step": 1127 }, { "epoch": 30.486486486486488, "grad_norm": 2.265625, "learning_rate": 4.858308055639617e-05, "loss": 0.2354, "step": 1128 }, { "epoch": 30.513513513513512, "grad_norm": 1.28125, "learning_rate": 4.857937045844566e-05, "loss": 0.1684, "step": 1129 }, { "epoch": 30.54054054054054, "grad_norm": 1.7421875, "learning_rate": 4.8575655651575345e-05, "loss": 0.1775, "step": 1130 }, { "epoch": 30.56756756756757, "grad_norm": 1.4140625, "learning_rate": 4.857193613652711e-05, "loss": 0.1523, "step": 1131 }, { "epoch": 30.594594594594593, "grad_norm": 1.890625, "learning_rate": 4.856821191404374e-05, "loss": 0.2219, "step": 1132 }, { "epoch": 30.62162162162162, "grad_norm": 1.734375, "learning_rate": 4.8564482984868975e-05, "loss": 0.251, "step": 1133 }, { "epoch": 30.64864864864865, "grad_norm": 1.7734375, "learning_rate": 4.8560749349747525e-05, "loss": 0.1999, "step": 1134 }, { "epoch": 30.675675675675677, "grad_norm": 1.609375, "learning_rate": 4.8557011009425e-05, "loss": 0.1702, "step": 1135 }, { "epoch": 30.7027027027027, "grad_norm": 1.6015625, "learning_rate": 4.8553267964647976e-05, "loss": 0.2291, "step": 1136 }, { "epoch": 30.72972972972973, "grad_norm": 1.9296875, "learning_rate": 4.854952021616395e-05, "loss": 0.2644, "step": 1137 }, { "epoch": 30.756756756756758, "grad_norm": 1.0625, "learning_rate": 4.8545767764721364e-05, "loss": 0.1229, "step": 1138 }, { "epoch": 30.783783783783782, "grad_norm": 2.171875, "learning_rate": 4.85420106110696e-05, "loss": 0.194, "step": 1139 }, { "epoch": 30.81081081081081, "grad_norm": 1.7890625, "learning_rate": 4.8538248755958996e-05, "loss": 0.2372, "step": 1140 }, { "epoch": 30.83783783783784, "grad_norm": 2.1875, "learning_rate": 4.85344822001408e-05, "loss": 0.3218, "step": 1141 }, { "epoch": 30.864864864864863, "grad_norm": 1.796875, "learning_rate": 4.853071094436722e-05, "loss": 0.237, "step": 1142 }, { "epoch": 30.89189189189189, "grad_norm": 1.609375, "learning_rate": 4.852693498939139e-05, "loss": 0.2424, "step": 1143 }, { "epoch": 30.91891891891892, "grad_norm": 1.359375, "learning_rate": 4.852315433596738e-05, "loss": 0.2186, "step": 1144 }, { "epoch": 30.945945945945947, "grad_norm": 2.125, "learning_rate": 4.851936898485022e-05, "loss": 0.3185, "step": 1145 }, { "epoch": 30.972972972972972, "grad_norm": 1.390625, "learning_rate": 4.851557893679586e-05, "loss": 0.1765, "step": 1146 }, { "epoch": 31.0, "grad_norm": 1.3046875, "learning_rate": 4.8511784192561185e-05, "loss": 0.1621, "step": 1147 }, { "epoch": 31.027027027027028, "grad_norm": 1.296875, "learning_rate": 4.850798475290403e-05, "loss": 0.1699, "step": 1148 }, { "epoch": 31.054054054054053, "grad_norm": 1.4296875, "learning_rate": 4.850418061858317e-05, "loss": 0.2125, "step": 1149 }, { "epoch": 31.08108108108108, "grad_norm": 1.671875, "learning_rate": 4.850037179035829e-05, "loss": 0.2132, "step": 1150 }, { "epoch": 31.10810810810811, "grad_norm": 1.765625, "learning_rate": 4.849655826899005e-05, "loss": 0.2098, "step": 1151 }, { "epoch": 31.135135135135137, "grad_norm": 1.859375, "learning_rate": 4.849274005524002e-05, "loss": 0.2637, "step": 1152 }, { "epoch": 31.16216216216216, "grad_norm": 1.34375, "learning_rate": 4.848891714987072e-05, "loss": 0.1787, "step": 1153 }, { "epoch": 31.18918918918919, "grad_norm": 2.03125, "learning_rate": 4.848508955364561e-05, "loss": 0.2912, "step": 1154 }, { "epoch": 31.216216216216218, "grad_norm": 1.4765625, "learning_rate": 4.848125726732906e-05, "loss": 0.2143, "step": 1155 }, { "epoch": 31.243243243243242, "grad_norm": 1.3671875, "learning_rate": 4.8477420291686426e-05, "loss": 0.195, "step": 1156 }, { "epoch": 31.27027027027027, "grad_norm": 1.1640625, "learning_rate": 4.847357862748395e-05, "loss": 0.1483, "step": 1157 }, { "epoch": 31.2972972972973, "grad_norm": 1.921875, "learning_rate": 4.8469732275488835e-05, "loss": 0.1935, "step": 1158 }, { "epoch": 31.324324324324323, "grad_norm": 1.9765625, "learning_rate": 4.8465881236469225e-05, "loss": 0.176, "step": 1159 }, { "epoch": 31.35135135135135, "grad_norm": 1.515625, "learning_rate": 4.846202551119419e-05, "loss": 0.1452, "step": 1160 }, { "epoch": 31.37837837837838, "grad_norm": 2.375, "learning_rate": 4.845816510043373e-05, "loss": 0.2218, "step": 1161 }, { "epoch": 31.405405405405407, "grad_norm": 2.546875, "learning_rate": 4.845430000495878e-05, "loss": 0.2911, "step": 1162 }, { "epoch": 31.43243243243243, "grad_norm": 2.046875, "learning_rate": 4.845043022554125e-05, "loss": 0.3117, "step": 1163 }, { "epoch": 31.45945945945946, "grad_norm": 3.515625, "learning_rate": 4.844655576295394e-05, "loss": 0.2298, "step": 1164 }, { "epoch": 31.486486486486488, "grad_norm": 3.6875, "learning_rate": 4.84426766179706e-05, "loss": 0.2695, "step": 1165 }, { "epoch": 31.513513513513512, "grad_norm": 2.453125, "learning_rate": 4.8438792791365894e-05, "loss": 0.2252, "step": 1166 }, { "epoch": 31.54054054054054, "grad_norm": 1.6640625, "learning_rate": 4.843490428391547e-05, "loss": 0.2359, "step": 1167 }, { "epoch": 31.56756756756757, "grad_norm": 2.640625, "learning_rate": 4.8431011096395874e-05, "loss": 0.2126, "step": 1168 }, { "epoch": 31.594594594594593, "grad_norm": 2.734375, "learning_rate": 4.842711322958459e-05, "loss": 0.1958, "step": 1169 }, { "epoch": 31.62162162162162, "grad_norm": 1.5546875, "learning_rate": 4.842321068426005e-05, "loss": 0.1764, "step": 1170 }, { "epoch": 31.64864864864865, "grad_norm": 1.875, "learning_rate": 4.841930346120161e-05, "loss": 0.2512, "step": 1171 }, { "epoch": 31.675675675675677, "grad_norm": 1.6875, "learning_rate": 4.841539156118955e-05, "loss": 0.1768, "step": 1172 }, { "epoch": 31.7027027027027, "grad_norm": 2.421875, "learning_rate": 4.841147498500511e-05, "loss": 0.2905, "step": 1173 }, { "epoch": 31.72972972972973, "grad_norm": 1.734375, "learning_rate": 4.840755373343045e-05, "loss": 0.2397, "step": 1174 }, { "epoch": 31.756756756756758, "grad_norm": 1.6328125, "learning_rate": 4.8403627807248656e-05, "loss": 0.1548, "step": 1175 }, { "epoch": 31.783783783783782, "grad_norm": 1.7109375, "learning_rate": 4.839969720724376e-05, "loss": 0.2535, "step": 1176 }, { "epoch": 31.81081081081081, "grad_norm": 1.40625, "learning_rate": 4.839576193420073e-05, "loss": 0.2158, "step": 1177 }, { "epoch": 31.83783783783784, "grad_norm": 1.3671875, "learning_rate": 4.839182198890545e-05, "loss": 0.1788, "step": 1178 }, { "epoch": 31.864864864864863, "grad_norm": 1.8515625, "learning_rate": 4.838787737214474e-05, "loss": 0.3252, "step": 1179 }, { "epoch": 31.89189189189189, "grad_norm": 1.78125, "learning_rate": 4.838392808470638e-05, "loss": 0.2382, "step": 1180 }, { "epoch": 31.91891891891892, "grad_norm": 2.0625, "learning_rate": 4.837997412737905e-05, "loss": 0.2433, "step": 1181 }, { "epoch": 31.945945945945947, "grad_norm": 1.6953125, "learning_rate": 4.837601550095237e-05, "loss": 0.2352, "step": 1182 }, { "epoch": 31.972972972972972, "grad_norm": 1.7265625, "learning_rate": 4.837205220621691e-05, "loss": 0.2696, "step": 1183 }, { "epoch": 32.0, "grad_norm": 1.5390625, "learning_rate": 4.836808424396415e-05, "loss": 0.1573, "step": 1184 }, { "epoch": 32.027027027027025, "grad_norm": 1.7890625, "learning_rate": 4.8364111614986527e-05, "loss": 0.1965, "step": 1185 }, { "epoch": 32.054054054054056, "grad_norm": 1.859375, "learning_rate": 4.836013432007738e-05, "loss": 0.2391, "step": 1186 }, { "epoch": 32.08108108108108, "grad_norm": 1.4921875, "learning_rate": 4.8356152360031e-05, "loss": 0.1664, "step": 1187 }, { "epoch": 32.108108108108105, "grad_norm": 1.7578125, "learning_rate": 4.8352165735642604e-05, "loss": 0.2038, "step": 1188 }, { "epoch": 32.13513513513514, "grad_norm": 2.015625, "learning_rate": 4.8348174447708346e-05, "loss": 0.2898, "step": 1189 }, { "epoch": 32.16216216216216, "grad_norm": 1.21875, "learning_rate": 4.8344178497025296e-05, "loss": 0.1826, "step": 1190 }, { "epoch": 32.189189189189186, "grad_norm": 2.59375, "learning_rate": 4.8340177884391477e-05, "loss": 0.2812, "step": 1191 }, { "epoch": 32.21621621621622, "grad_norm": 1.78125, "learning_rate": 4.833617261060583e-05, "loss": 0.1806, "step": 1192 }, { "epoch": 32.24324324324324, "grad_norm": 1.640625, "learning_rate": 4.833216267646822e-05, "loss": 0.2288, "step": 1193 }, { "epoch": 32.270270270270274, "grad_norm": 1.3203125, "learning_rate": 4.832814808277946e-05, "loss": 0.2005, "step": 1194 }, { "epoch": 32.2972972972973, "grad_norm": 1.7265625, "learning_rate": 4.832412883034127e-05, "loss": 0.2897, "step": 1195 }, { "epoch": 32.32432432432432, "grad_norm": 1.6640625, "learning_rate": 4.832010491995634e-05, "loss": 0.2431, "step": 1196 }, { "epoch": 32.351351351351354, "grad_norm": 1.625, "learning_rate": 4.8316076352428255e-05, "loss": 0.1674, "step": 1197 }, { "epoch": 32.37837837837838, "grad_norm": 2.015625, "learning_rate": 4.831204312856152e-05, "loss": 0.2788, "step": 1198 }, { "epoch": 32.4054054054054, "grad_norm": 1.5546875, "learning_rate": 4.830800524916161e-05, "loss": 0.2276, "step": 1199 }, { "epoch": 32.432432432432435, "grad_norm": 1.1171875, "learning_rate": 4.8303962715034924e-05, "loss": 0.1464, "step": 1200 }, { "epoch": 32.45945945945946, "grad_norm": 1.3046875, "learning_rate": 4.829991552698875e-05, "loss": 0.1421, "step": 1201 }, { "epoch": 32.486486486486484, "grad_norm": 1.8671875, "learning_rate": 4.8295863685831334e-05, "loss": 0.2993, "step": 1202 }, { "epoch": 32.513513513513516, "grad_norm": 1.7421875, "learning_rate": 4.8291807192371865e-05, "loss": 0.2463, "step": 1203 }, { "epoch": 32.54054054054054, "grad_norm": 1.96875, "learning_rate": 4.8287746047420436e-05, "loss": 0.2597, "step": 1204 }, { "epoch": 32.567567567567565, "grad_norm": 1.609375, "learning_rate": 4.828368025178808e-05, "loss": 0.1899, "step": 1205 }, { "epoch": 32.5945945945946, "grad_norm": 1.8828125, "learning_rate": 4.8279609806286755e-05, "loss": 0.238, "step": 1206 }, { "epoch": 32.62162162162162, "grad_norm": 2.1875, "learning_rate": 4.827553471172935e-05, "loss": 0.3748, "step": 1207 }, { "epoch": 32.648648648648646, "grad_norm": 2.03125, "learning_rate": 4.827145496892968e-05, "loss": 0.21, "step": 1208 }, { "epoch": 32.67567567567568, "grad_norm": 1.8671875, "learning_rate": 4.82673705787025e-05, "loss": 0.3767, "step": 1209 }, { "epoch": 32.7027027027027, "grad_norm": 1.5625, "learning_rate": 4.826328154186347e-05, "loss": 0.1599, "step": 1210 }, { "epoch": 32.729729729729726, "grad_norm": 2.28125, "learning_rate": 4.82591878592292e-05, "loss": 0.2228, "step": 1211 }, { "epoch": 32.75675675675676, "grad_norm": 1.4375, "learning_rate": 4.8255089531617225e-05, "loss": 0.1867, "step": 1212 }, { "epoch": 32.78378378378378, "grad_norm": 2.15625, "learning_rate": 4.825098655984599e-05, "loss": 0.3201, "step": 1213 }, { "epoch": 32.810810810810814, "grad_norm": 1.4375, "learning_rate": 4.824687894473488e-05, "loss": 0.1674, "step": 1214 }, { "epoch": 32.83783783783784, "grad_norm": 1.921875, "learning_rate": 4.8242766687104215e-05, "loss": 0.2725, "step": 1215 }, { "epoch": 32.86486486486486, "grad_norm": 1.6328125, "learning_rate": 4.8238649787775224e-05, "loss": 0.2274, "step": 1216 }, { "epoch": 32.891891891891895, "grad_norm": 1.71875, "learning_rate": 4.8234528247570075e-05, "loss": 0.2323, "step": 1217 }, { "epoch": 32.91891891891892, "grad_norm": 1.875, "learning_rate": 4.823040206731187e-05, "loss": 0.2614, "step": 1218 }, { "epoch": 32.945945945945944, "grad_norm": 2.25, "learning_rate": 4.822627124782462e-05, "loss": 0.235, "step": 1219 }, { "epoch": 32.972972972972975, "grad_norm": 2.28125, "learning_rate": 4.8222135789933266e-05, "loss": 0.295, "step": 1220 }, { "epoch": 33.0, "grad_norm": 2.5625, "learning_rate": 4.821799569446368e-05, "loss": 0.2745, "step": 1221 }, { "epoch": 33.027027027027025, "grad_norm": 1.9453125, "learning_rate": 4.821385096224268e-05, "loss": 0.2211, "step": 1222 }, { "epoch": 33.054054054054056, "grad_norm": 0.96484375, "learning_rate": 4.820970159409797e-05, "loss": 0.1248, "step": 1223 }, { "epoch": 33.08108108108108, "grad_norm": 2.8125, "learning_rate": 4.82055475908582e-05, "loss": 0.2038, "step": 1224 }, { "epoch": 33.108108108108105, "grad_norm": 2.46875, "learning_rate": 4.820138895335295e-05, "loss": 0.2673, "step": 1225 }, { "epoch": 33.13513513513514, "grad_norm": 2.390625, "learning_rate": 4.819722568241274e-05, "loss": 0.2722, "step": 1226 }, { "epoch": 33.16216216216216, "grad_norm": 1.5234375, "learning_rate": 4.819305777886896e-05, "loss": 0.2004, "step": 1227 }, { "epoch": 33.189189189189186, "grad_norm": 2.671875, "learning_rate": 4.8188885243553984e-05, "loss": 0.1657, "step": 1228 }, { "epoch": 33.21621621621622, "grad_norm": 1.328125, "learning_rate": 4.818470807730109e-05, "loss": 0.1641, "step": 1229 }, { "epoch": 33.24324324324324, "grad_norm": 1.3515625, "learning_rate": 4.818052628094446e-05, "loss": 0.1429, "step": 1230 }, { "epoch": 33.270270270270274, "grad_norm": 1.8125, "learning_rate": 4.817633985531924e-05, "loss": 0.1842, "step": 1231 }, { "epoch": 33.2972972972973, "grad_norm": 1.6953125, "learning_rate": 4.817214880126148e-05, "loss": 0.2682, "step": 1232 }, { "epoch": 33.32432432432432, "grad_norm": 1.8828125, "learning_rate": 4.816795311960814e-05, "loss": 0.1898, "step": 1233 }, { "epoch": 33.351351351351354, "grad_norm": 0.91796875, "learning_rate": 4.8163752811197135e-05, "loss": 0.1269, "step": 1234 }, { "epoch": 33.37837837837838, "grad_norm": 1.96875, "learning_rate": 4.815954787686727e-05, "loss": 0.1876, "step": 1235 }, { "epoch": 33.4054054054054, "grad_norm": 2.40625, "learning_rate": 4.8155338317458306e-05, "loss": 0.2471, "step": 1236 }, { "epoch": 33.432432432432435, "grad_norm": 1.6640625, "learning_rate": 4.815112413381092e-05, "loss": 0.1695, "step": 1237 }, { "epoch": 33.45945945945946, "grad_norm": 1.375, "learning_rate": 4.8146905326766675e-05, "loss": 0.1695, "step": 1238 }, { "epoch": 33.486486486486484, "grad_norm": 1.734375, "learning_rate": 4.8142681897168126e-05, "loss": 0.2614, "step": 1239 }, { "epoch": 33.513513513513516, "grad_norm": 1.7109375, "learning_rate": 4.8138453845858687e-05, "loss": 0.2722, "step": 1240 }, { "epoch": 33.54054054054054, "grad_norm": 1.515625, "learning_rate": 4.813422117368273e-05, "loss": 0.2454, "step": 1241 }, { "epoch": 33.567567567567565, "grad_norm": 1.8515625, "learning_rate": 4.812998388148554e-05, "loss": 0.1664, "step": 1242 }, { "epoch": 33.5945945945946, "grad_norm": 1.578125, "learning_rate": 4.812574197011334e-05, "loss": 0.1717, "step": 1243 }, { "epoch": 33.62162162162162, "grad_norm": 1.734375, "learning_rate": 4.8121495440413225e-05, "loss": 0.2172, "step": 1244 }, { "epoch": 33.648648648648646, "grad_norm": 1.546875, "learning_rate": 4.811724429323329e-05, "loss": 0.1806, "step": 1245 }, { "epoch": 33.67567567567568, "grad_norm": 1.328125, "learning_rate": 4.811298852942248e-05, "loss": 0.1717, "step": 1246 }, { "epoch": 33.7027027027027, "grad_norm": 2.0, "learning_rate": 4.810872814983071e-05, "loss": 0.2674, "step": 1247 }, { "epoch": 33.729729729729726, "grad_norm": 1.078125, "learning_rate": 4.8104463155308795e-05, "loss": 0.14, "step": 1248 }, { "epoch": 33.75675675675676, "grad_norm": 1.6484375, "learning_rate": 4.810019354670847e-05, "loss": 0.1992, "step": 1249 }, { "epoch": 33.78378378378378, "grad_norm": 1.140625, "learning_rate": 4.809591932488241e-05, "loss": 0.1331, "step": 1250 }, { "epoch": 33.810810810810814, "grad_norm": 1.4453125, "learning_rate": 4.809164049068419e-05, "loss": 0.1925, "step": 1251 }, { "epoch": 33.83783783783784, "grad_norm": 1.28125, "learning_rate": 4.808735704496831e-05, "loss": 0.1347, "step": 1252 }, { "epoch": 33.86486486486486, "grad_norm": 2.140625, "learning_rate": 4.8083068988590204e-05, "loss": 0.2095, "step": 1253 }, { "epoch": 33.891891891891895, "grad_norm": 0.96875, "learning_rate": 4.807877632240622e-05, "loss": 0.1122, "step": 1254 }, { "epoch": 33.91891891891892, "grad_norm": 2.203125, "learning_rate": 4.807447904727361e-05, "loss": 0.2272, "step": 1255 }, { "epoch": 33.945945945945944, "grad_norm": 1.1328125, "learning_rate": 4.8070177164050586e-05, "loss": 0.1297, "step": 1256 }, { "epoch": 33.972972972972975, "grad_norm": 1.65625, "learning_rate": 4.806587067359624e-05, "loss": 0.2713, "step": 1257 }, { "epoch": 34.0, "grad_norm": 1.3671875, "learning_rate": 4.80615595767706e-05, "loss": 0.161, "step": 1258 }, { "epoch": 34.027027027027025, "grad_norm": 1.5625, "learning_rate": 4.805724387443462e-05, "loss": 0.2155, "step": 1259 }, { "epoch": 34.054054054054056, "grad_norm": 1.3046875, "learning_rate": 4.8052923567450165e-05, "loss": 0.153, "step": 1260 }, { "epoch": 34.08108108108108, "grad_norm": 1.5859375, "learning_rate": 4.804859865668002e-05, "loss": 0.1955, "step": 1261 }, { "epoch": 34.108108108108105, "grad_norm": 1.5078125, "learning_rate": 4.8044269142987895e-05, "loss": 0.1446, "step": 1262 }, { "epoch": 34.13513513513514, "grad_norm": 1.4609375, "learning_rate": 4.8039935027238414e-05, "loss": 0.1662, "step": 1263 }, { "epoch": 34.16216216216216, "grad_norm": 1.5703125, "learning_rate": 4.8035596310297124e-05, "loss": 0.2034, "step": 1264 }, { "epoch": 34.189189189189186, "grad_norm": 1.2109375, "learning_rate": 4.8031252993030484e-05, "loss": 0.1426, "step": 1265 }, { "epoch": 34.21621621621622, "grad_norm": 1.8671875, "learning_rate": 4.8026905076305884e-05, "loss": 0.1531, "step": 1266 }, { "epoch": 34.24324324324324, "grad_norm": 1.34375, "learning_rate": 4.802255256099162e-05, "loss": 0.2181, "step": 1267 }, { "epoch": 34.270270270270274, "grad_norm": 1.46875, "learning_rate": 4.8018195447956905e-05, "loss": 0.2443, "step": 1268 }, { "epoch": 34.2972972972973, "grad_norm": 1.75, "learning_rate": 4.8013833738071884e-05, "loss": 0.1461, "step": 1269 }, { "epoch": 34.32432432432432, "grad_norm": 1.5078125, "learning_rate": 4.800946743220762e-05, "loss": 0.1662, "step": 1270 }, { "epoch": 34.351351351351354, "grad_norm": 1.6484375, "learning_rate": 4.8005096531236074e-05, "loss": 0.2846, "step": 1271 }, { "epoch": 34.37837837837838, "grad_norm": 2.09375, "learning_rate": 4.800072103603014e-05, "loss": 0.2576, "step": 1272 }, { "epoch": 34.4054054054054, "grad_norm": 2.84375, "learning_rate": 4.799634094746363e-05, "loss": 0.2119, "step": 1273 }, { "epoch": 34.432432432432435, "grad_norm": 1.2578125, "learning_rate": 4.7991956266411266e-05, "loss": 0.112, "step": 1274 }, { "epoch": 34.45945945945946, "grad_norm": 1.21875, "learning_rate": 4.798756699374869e-05, "loss": 0.184, "step": 1275 }, { "epoch": 34.486486486486484, "grad_norm": 3.703125, "learning_rate": 4.798317313035247e-05, "loss": 0.2521, "step": 1276 }, { "epoch": 34.513513513513516, "grad_norm": 2.0625, "learning_rate": 4.797877467710008e-05, "loss": 0.2557, "step": 1277 }, { "epoch": 34.54054054054054, "grad_norm": 2.25, "learning_rate": 4.797437163486991e-05, "loss": 0.2874, "step": 1278 }, { "epoch": 34.567567567567565, "grad_norm": 2.109375, "learning_rate": 4.796996400454127e-05, "loss": 0.1927, "step": 1279 }, { "epoch": 34.5945945945946, "grad_norm": 2.875, "learning_rate": 4.796555178699438e-05, "loss": 0.1987, "step": 1280 }, { "epoch": 34.62162162162162, "grad_norm": 2.375, "learning_rate": 4.79611349831104e-05, "loss": 0.1541, "step": 1281 }, { "epoch": 34.648648648648646, "grad_norm": 1.9375, "learning_rate": 4.795671359377137e-05, "loss": 0.2036, "step": 1282 }, { "epoch": 34.67567567567568, "grad_norm": 2.703125, "learning_rate": 4.795228761986028e-05, "loss": 0.1873, "step": 1283 }, { "epoch": 34.7027027027027, "grad_norm": 4.65625, "learning_rate": 4.7947857062261006e-05, "loss": 0.3151, "step": 1284 }, { "epoch": 34.729729729729726, "grad_norm": 4.40625, "learning_rate": 4.794342192185835e-05, "loss": 0.2044, "step": 1285 }, { "epoch": 34.75675675675676, "grad_norm": 2.28125, "learning_rate": 4.7938982199538045e-05, "loss": 0.2202, "step": 1286 }, { "epoch": 34.78378378378378, "grad_norm": 3.109375, "learning_rate": 4.7934537896186724e-05, "loss": 0.2661, "step": 1287 }, { "epoch": 34.810810810810814, "grad_norm": 3.75, "learning_rate": 4.793008901269193e-05, "loss": 0.2303, "step": 1288 }, { "epoch": 34.83783783783784, "grad_norm": 3.625, "learning_rate": 4.7925635549942134e-05, "loss": 0.2733, "step": 1289 }, { "epoch": 34.86486486486486, "grad_norm": 1.765625, "learning_rate": 4.7921177508826706e-05, "loss": 0.1509, "step": 1290 }, { "epoch": 34.891891891891895, "grad_norm": 2.34375, "learning_rate": 4.791671489023595e-05, "loss": 0.2575, "step": 1291 }, { "epoch": 34.91891891891892, "grad_norm": 2.25, "learning_rate": 4.7912247695061074e-05, "loss": 0.2508, "step": 1292 }, { "epoch": 34.945945945945944, "grad_norm": 2.4375, "learning_rate": 4.790777592419419e-05, "loss": 0.2063, "step": 1293 }, { "epoch": 34.972972972972975, "grad_norm": 1.4296875, "learning_rate": 4.790329957852834e-05, "loss": 0.1509, "step": 1294 }, { "epoch": 35.0, "grad_norm": 1.2578125, "learning_rate": 4.7898818658957466e-05, "loss": 0.1418, "step": 1295 }, { "epoch": 35.027027027027025, "grad_norm": 2.171875, "learning_rate": 4.789433316637644e-05, "loss": 0.21, "step": 1296 }, { "epoch": 35.054054054054056, "grad_norm": 2.3125, "learning_rate": 4.7889843101681034e-05, "loss": 0.2626, "step": 1297 }, { "epoch": 35.08108108108108, "grad_norm": 1.4296875, "learning_rate": 4.788534846576793e-05, "loss": 0.2372, "step": 1298 }, { "epoch": 35.108108108108105, "grad_norm": 2.109375, "learning_rate": 4.7880849259534735e-05, "loss": 0.2182, "step": 1299 }, { "epoch": 35.13513513513514, "grad_norm": 1.6953125, "learning_rate": 4.787634548387997e-05, "loss": 0.162, "step": 1300 }, { "epoch": 35.16216216216216, "grad_norm": 1.2265625, "learning_rate": 4.787183713970305e-05, "loss": 0.1181, "step": 1301 }, { "epoch": 35.189189189189186, "grad_norm": 1.640625, "learning_rate": 4.786732422790432e-05, "loss": 0.2354, "step": 1302 }, { "epoch": 35.21621621621622, "grad_norm": 1.8984375, "learning_rate": 4.786280674938503e-05, "loss": 0.1897, "step": 1303 }, { "epoch": 35.24324324324324, "grad_norm": 2.015625, "learning_rate": 4.785828470504735e-05, "loss": 0.2462, "step": 1304 }, { "epoch": 35.270270270270274, "grad_norm": 1.7421875, "learning_rate": 4.785375809579434e-05, "loss": 0.2546, "step": 1305 }, { "epoch": 35.2972972972973, "grad_norm": 2.28125, "learning_rate": 4.7849226922530004e-05, "loss": 0.2322, "step": 1306 }, { "epoch": 35.32432432432432, "grad_norm": 1.40625, "learning_rate": 4.784469118615923e-05, "loss": 0.1285, "step": 1307 }, { "epoch": 35.351351351351354, "grad_norm": 1.5234375, "learning_rate": 4.7840150887587834e-05, "loss": 0.2194, "step": 1308 }, { "epoch": 35.37837837837838, "grad_norm": 1.421875, "learning_rate": 4.7835606027722524e-05, "loss": 0.1424, "step": 1309 }, { "epoch": 35.4054054054054, "grad_norm": 1.3984375, "learning_rate": 4.7831056607470945e-05, "loss": 0.1309, "step": 1310 }, { "epoch": 35.432432432432435, "grad_norm": 1.4765625, "learning_rate": 4.782650262774163e-05, "loss": 0.1869, "step": 1311 }, { "epoch": 35.45945945945946, "grad_norm": 1.2109375, "learning_rate": 4.782194408944404e-05, "loss": 0.1083, "step": 1312 }, { "epoch": 35.486486486486484, "grad_norm": 1.359375, "learning_rate": 4.7817380993488534e-05, "loss": 0.1525, "step": 1313 }, { "epoch": 35.513513513513516, "grad_norm": 1.4453125, "learning_rate": 4.7812813340786386e-05, "loss": 0.1989, "step": 1314 }, { "epoch": 35.54054054054054, "grad_norm": 1.4921875, "learning_rate": 4.780824113224978e-05, "loss": 0.2773, "step": 1315 }, { "epoch": 35.567567567567565, "grad_norm": 1.84375, "learning_rate": 4.780366436879181e-05, "loss": 0.2549, "step": 1316 }, { "epoch": 35.5945945945946, "grad_norm": 1.609375, "learning_rate": 4.779908305132648e-05, "loss": 0.2058, "step": 1317 }, { "epoch": 35.62162162162162, "grad_norm": 1.4375, "learning_rate": 4.779449718076869e-05, "loss": 0.1981, "step": 1318 }, { "epoch": 35.648648648648646, "grad_norm": 1.984375, "learning_rate": 4.778990675803428e-05, "loss": 0.3125, "step": 1319 }, { "epoch": 35.67567567567568, "grad_norm": 1.7734375, "learning_rate": 4.778531178403996e-05, "loss": 0.2009, "step": 1320 }, { "epoch": 35.7027027027027, "grad_norm": 1.8828125, "learning_rate": 4.77807122597034e-05, "loss": 0.2453, "step": 1321 }, { "epoch": 35.729729729729726, "grad_norm": 2.078125, "learning_rate": 4.777610818594311e-05, "loss": 0.1998, "step": 1322 }, { "epoch": 35.75675675675676, "grad_norm": 2.515625, "learning_rate": 4.777149956367858e-05, "loss": 0.3222, "step": 1323 }, { "epoch": 35.78378378378378, "grad_norm": 2.109375, "learning_rate": 4.776688639383016e-05, "loss": 0.2034, "step": 1324 }, { "epoch": 35.810810810810814, "grad_norm": 1.4140625, "learning_rate": 4.776226867731912e-05, "loss": 0.1428, "step": 1325 }, { "epoch": 35.83783783783784, "grad_norm": 3.59375, "learning_rate": 4.775764641506765e-05, "loss": 0.2485, "step": 1326 }, { "epoch": 35.86486486486486, "grad_norm": 2.359375, "learning_rate": 4.775301960799884e-05, "loss": 0.1879, "step": 1327 }, { "epoch": 35.891891891891895, "grad_norm": 2.71875, "learning_rate": 4.774838825703668e-05, "loss": 0.1916, "step": 1328 }, { "epoch": 35.91891891891892, "grad_norm": 1.390625, "learning_rate": 4.7743752363106074e-05, "loss": 0.1624, "step": 1329 }, { "epoch": 35.945945945945944, "grad_norm": 3.046875, "learning_rate": 4.773911192713284e-05, "loss": 0.2899, "step": 1330 }, { "epoch": 35.972972972972975, "grad_norm": 2.46875, "learning_rate": 4.773446695004369e-05, "loss": 0.1422, "step": 1331 }, { "epoch": 36.0, "grad_norm": 2.828125, "learning_rate": 4.7729817432766254e-05, "loss": 0.2635, "step": 1332 }, { "epoch": 36.027027027027025, "grad_norm": 1.8828125, "learning_rate": 4.7725163376229064e-05, "loss": 0.2757, "step": 1333 }, { "epoch": 36.054054054054056, "grad_norm": 1.8828125, "learning_rate": 4.772050478136156e-05, "loss": 0.2656, "step": 1334 }, { "epoch": 36.08108108108108, "grad_norm": 3.015625, "learning_rate": 4.771584164909408e-05, "loss": 0.2504, "step": 1335 }, { "epoch": 36.108108108108105, "grad_norm": 2.40625, "learning_rate": 4.771117398035788e-05, "loss": 0.2097, "step": 1336 }, { "epoch": 36.13513513513514, "grad_norm": 1.6796875, "learning_rate": 4.770650177608512e-05, "loss": 0.2487, "step": 1337 }, { "epoch": 36.16216216216216, "grad_norm": 2.3125, "learning_rate": 4.7701825037208856e-05, "loss": 0.2429, "step": 1338 }, { "epoch": 36.189189189189186, "grad_norm": 2.65625, "learning_rate": 4.769714376466306e-05, "loss": 0.2555, "step": 1339 }, { "epoch": 36.21621621621622, "grad_norm": 2.21875, "learning_rate": 4.769245795938261e-05, "loss": 0.297, "step": 1340 }, { "epoch": 36.24324324324324, "grad_norm": 1.8203125, "learning_rate": 4.768776762230327e-05, "loss": 0.2879, "step": 1341 }, { "epoch": 36.270270270270274, "grad_norm": 1.9375, "learning_rate": 4.768307275436174e-05, "loss": 0.1984, "step": 1342 }, { "epoch": 36.2972972972973, "grad_norm": 3.09375, "learning_rate": 4.7678373356495615e-05, "loss": 0.1927, "step": 1343 }, { "epoch": 36.32432432432432, "grad_norm": 2.546875, "learning_rate": 4.767366942964337e-05, "loss": 0.2285, "step": 1344 }, { "epoch": 36.351351351351354, "grad_norm": 1.2734375, "learning_rate": 4.766896097474441e-05, "loss": 0.1606, "step": 1345 }, { "epoch": 36.37837837837838, "grad_norm": 2.296875, "learning_rate": 4.766424799273904e-05, "loss": 0.2065, "step": 1346 }, { "epoch": 36.4054054054054, "grad_norm": 1.9921875, "learning_rate": 4.765953048456846e-05, "loss": 0.1472, "step": 1347 }, { "epoch": 36.432432432432435, "grad_norm": 1.859375, "learning_rate": 4.765480845117479e-05, "loss": 0.2079, "step": 1348 }, { "epoch": 36.45945945945946, "grad_norm": 1.1953125, "learning_rate": 4.7650081893501034e-05, "loss": 0.14, "step": 1349 }, { "epoch": 36.486486486486484, "grad_norm": 1.3828125, "learning_rate": 4.764535081249111e-05, "loss": 0.2144, "step": 1350 }, { "epoch": 36.513513513513516, "grad_norm": 2.25, "learning_rate": 4.764061520908985e-05, "loss": 0.168, "step": 1351 }, { "epoch": 36.54054054054054, "grad_norm": 1.8828125, "learning_rate": 4.763587508424298e-05, "loss": 0.1973, "step": 1352 }, { "epoch": 36.567567567567565, "grad_norm": 1.96875, "learning_rate": 4.7631130438897095e-05, "loss": 0.3051, "step": 1353 }, { "epoch": 36.5945945945946, "grad_norm": 1.5703125, "learning_rate": 4.7626381273999766e-05, "loss": 0.1813, "step": 1354 }, { "epoch": 36.62162162162162, "grad_norm": 1.3125, "learning_rate": 4.76216275904994e-05, "loss": 0.1506, "step": 1355 }, { "epoch": 36.648648648648646, "grad_norm": 1.40625, "learning_rate": 4.7616869389345345e-05, "loss": 0.2108, "step": 1356 }, { "epoch": 36.67567567567568, "grad_norm": 1.7109375, "learning_rate": 4.761210667148782e-05, "loss": 0.2239, "step": 1357 }, { "epoch": 36.7027027027027, "grad_norm": 2.234375, "learning_rate": 4.7607339437877986e-05, "loss": 0.2251, "step": 1358 }, { "epoch": 36.729729729729726, "grad_norm": 2.0, "learning_rate": 4.760256768946787e-05, "loss": 0.3036, "step": 1359 }, { "epoch": 36.75675675675676, "grad_norm": 2.125, "learning_rate": 4.7597791427210414e-05, "loss": 0.2786, "step": 1360 }, { "epoch": 36.78378378378378, "grad_norm": 1.3203125, "learning_rate": 4.7593010652059465e-05, "loss": 0.1774, "step": 1361 }, { "epoch": 36.810810810810814, "grad_norm": 2.296875, "learning_rate": 4.7588225364969774e-05, "loss": 0.2769, "step": 1362 }, { "epoch": 36.83783783783784, "grad_norm": 1.6015625, "learning_rate": 4.7583435566896985e-05, "loss": 0.1478, "step": 1363 }, { "epoch": 36.86486486486486, "grad_norm": 2.21875, "learning_rate": 4.7578641258797626e-05, "loss": 0.2089, "step": 1364 }, { "epoch": 36.891891891891895, "grad_norm": 1.7578125, "learning_rate": 4.757384244162917e-05, "loss": 0.2557, "step": 1365 }, { "epoch": 36.91891891891892, "grad_norm": 1.4453125, "learning_rate": 4.7569039116349955e-05, "loss": 0.1106, "step": 1366 }, { "epoch": 36.945945945945944, "grad_norm": 1.734375, "learning_rate": 4.7564231283919236e-05, "loss": 0.1603, "step": 1367 }, { "epoch": 36.972972972972975, "grad_norm": 1.640625, "learning_rate": 4.7559418945297145e-05, "loss": 0.2768, "step": 1368 }, { "epoch": 37.0, "grad_norm": 1.5, "learning_rate": 4.7554602101444744e-05, "loss": 0.1563, "step": 1369 }, { "epoch": 37.027027027027025, "grad_norm": 1.6796875, "learning_rate": 4.754978075332398e-05, "loss": 0.2166, "step": 1370 }, { "epoch": 37.054054054054056, "grad_norm": 1.21875, "learning_rate": 4.7544954901897684e-05, "loss": 0.1413, "step": 1371 }, { "epoch": 37.08108108108108, "grad_norm": 1.5, "learning_rate": 4.754012454812964e-05, "loss": 0.2141, "step": 1372 }, { "epoch": 37.108108108108105, "grad_norm": 1.609375, "learning_rate": 4.753528969298445e-05, "loss": 0.2057, "step": 1373 }, { "epoch": 37.13513513513514, "grad_norm": 1.796875, "learning_rate": 4.7530450337427696e-05, "loss": 0.2158, "step": 1374 }, { "epoch": 37.16216216216216, "grad_norm": 1.375, "learning_rate": 4.75256064824258e-05, "loss": 0.204, "step": 1375 }, { "epoch": 37.189189189189186, "grad_norm": 1.5703125, "learning_rate": 4.752075812894611e-05, "loss": 0.2016, "step": 1376 }, { "epoch": 37.21621621621622, "grad_norm": 1.3046875, "learning_rate": 4.7515905277956874e-05, "loss": 0.1496, "step": 1377 }, { "epoch": 37.24324324324324, "grad_norm": 1.15625, "learning_rate": 4.751104793042722e-05, "loss": 0.1379, "step": 1378 }, { "epoch": 37.270270270270274, "grad_norm": 1.359375, "learning_rate": 4.7506186087327184e-05, "loss": 0.1765, "step": 1379 }, { "epoch": 37.2972972972973, "grad_norm": 2.109375, "learning_rate": 4.7501319749627706e-05, "loss": 0.3124, "step": 1380 }, { "epoch": 37.32432432432432, "grad_norm": 1.734375, "learning_rate": 4.749644891830063e-05, "loss": 0.1717, "step": 1381 }, { "epoch": 37.351351351351354, "grad_norm": 1.515625, "learning_rate": 4.749157359431866e-05, "loss": 0.1386, "step": 1382 }, { "epoch": 37.37837837837838, "grad_norm": 1.2265625, "learning_rate": 4.748669377865544e-05, "loss": 0.1597, "step": 1383 }, { "epoch": 37.4054054054054, "grad_norm": 2.28125, "learning_rate": 4.74818094722855e-05, "loss": 0.2628, "step": 1384 }, { "epoch": 37.432432432432435, "grad_norm": 1.5390625, "learning_rate": 4.747692067618425e-05, "loss": 0.2009, "step": 1385 }, { "epoch": 37.45945945945946, "grad_norm": 1.65625, "learning_rate": 4.7472027391328e-05, "loss": 0.2118, "step": 1386 }, { "epoch": 37.486486486486484, "grad_norm": 0.76171875, "learning_rate": 4.746712961869398e-05, "loss": 0.1, "step": 1387 }, { "epoch": 37.513513513513516, "grad_norm": 2.40625, "learning_rate": 4.746222735926029e-05, "loss": 0.2996, "step": 1388 }, { "epoch": 37.54054054054054, "grad_norm": 1.546875, "learning_rate": 4.745732061400594e-05, "loss": 0.231, "step": 1389 }, { "epoch": 37.567567567567565, "grad_norm": 1.4375, "learning_rate": 4.745240938391083e-05, "loss": 0.2308, "step": 1390 }, { "epoch": 37.5945945945946, "grad_norm": 1.3515625, "learning_rate": 4.744749366995575e-05, "loss": 0.1689, "step": 1391 }, { "epoch": 37.62162162162162, "grad_norm": 1.71875, "learning_rate": 4.744257347312242e-05, "loss": 0.2925, "step": 1392 }, { "epoch": 37.648648648648646, "grad_norm": 1.1953125, "learning_rate": 4.74376487943934e-05, "loss": 0.1895, "step": 1393 }, { "epoch": 37.67567567567568, "grad_norm": 1.5625, "learning_rate": 4.7432719634752175e-05, "loss": 0.1784, "step": 1394 }, { "epoch": 37.7027027027027, "grad_norm": 1.3203125, "learning_rate": 4.7427785995183136e-05, "loss": 0.222, "step": 1395 }, { "epoch": 37.729729729729726, "grad_norm": 1.828125, "learning_rate": 4.742284787667156e-05, "loss": 0.2014, "step": 1396 }, { "epoch": 37.75675675675676, "grad_norm": 1.3359375, "learning_rate": 4.7417905280203594e-05, "loss": 0.1844, "step": 1397 }, { "epoch": 37.78378378378378, "grad_norm": 1.3671875, "learning_rate": 4.741295820676632e-05, "loss": 0.1886, "step": 1398 }, { "epoch": 37.810810810810814, "grad_norm": 1.3984375, "learning_rate": 4.740800665734768e-05, "loss": 0.188, "step": 1399 }, { "epoch": 37.83783783783784, "grad_norm": 1.8125, "learning_rate": 4.740305063293652e-05, "loss": 0.1947, "step": 1400 }, { "epoch": 37.86486486486486, "grad_norm": 1.40625, "learning_rate": 4.73980901345226e-05, "loss": 0.1752, "step": 1401 }, { "epoch": 37.891891891891895, "grad_norm": 1.2109375, "learning_rate": 4.739312516309654e-05, "loss": 0.1889, "step": 1402 }, { "epoch": 37.91891891891892, "grad_norm": 2.140625, "learning_rate": 4.738815571964988e-05, "loss": 0.3198, "step": 1403 }, { "epoch": 37.945945945945944, "grad_norm": 2.203125, "learning_rate": 4.738318180517505e-05, "loss": 0.2385, "step": 1404 }, { "epoch": 37.972972972972975, "grad_norm": 1.796875, "learning_rate": 4.737820342066535e-05, "loss": 0.2818, "step": 1405 }, { "epoch": 38.0, "grad_norm": 1.8125, "learning_rate": 4.7373220567114996e-05, "loss": 0.2088, "step": 1406 }, { "epoch": 38.027027027027025, "grad_norm": 1.828125, "learning_rate": 4.736823324551909e-05, "loss": 0.2342, "step": 1407 }, { "epoch": 38.054054054054056, "grad_norm": 1.5546875, "learning_rate": 4.736324145687362e-05, "loss": 0.2243, "step": 1408 }, { "epoch": 38.08108108108108, "grad_norm": 1.2421875, "learning_rate": 4.7358245202175475e-05, "loss": 0.1665, "step": 1409 }, { "epoch": 38.108108108108105, "grad_norm": 1.7890625, "learning_rate": 4.735324448242244e-05, "loss": 0.2371, "step": 1410 }, { "epoch": 38.13513513513514, "grad_norm": 1.921875, "learning_rate": 4.734823929861317e-05, "loss": 0.2189, "step": 1411 }, { "epoch": 38.16216216216216, "grad_norm": 1.3671875, "learning_rate": 4.7343229651747235e-05, "loss": 0.1406, "step": 1412 }, { "epoch": 38.189189189189186, "grad_norm": 1.59375, "learning_rate": 4.7338215542825084e-05, "loss": 0.2487, "step": 1413 }, { "epoch": 38.21621621621622, "grad_norm": 1.1875, "learning_rate": 4.733319697284806e-05, "loss": 0.1626, "step": 1414 }, { "epoch": 38.24324324324324, "grad_norm": 1.2734375, "learning_rate": 4.732817394281839e-05, "loss": 0.1871, "step": 1415 }, { "epoch": 38.270270270270274, "grad_norm": 1.2421875, "learning_rate": 4.732314645373921e-05, "loss": 0.1679, "step": 1416 }, { "epoch": 38.2972972972973, "grad_norm": 1.4609375, "learning_rate": 4.731811450661454e-05, "loss": 0.2219, "step": 1417 }, { "epoch": 38.32432432432432, "grad_norm": 1.5390625, "learning_rate": 4.7313078102449264e-05, "loss": 0.2326, "step": 1418 }, { "epoch": 38.351351351351354, "grad_norm": 1.7890625, "learning_rate": 4.730803724224919e-05, "loss": 0.2469, "step": 1419 }, { "epoch": 38.37837837837838, "grad_norm": 1.578125, "learning_rate": 4.730299192702101e-05, "loss": 0.2361, "step": 1420 }, { "epoch": 38.4054054054054, "grad_norm": 2.375, "learning_rate": 4.729794215777229e-05, "loss": 0.3695, "step": 1421 }, { "epoch": 38.432432432432435, "grad_norm": 1.46875, "learning_rate": 4.7292887935511495e-05, "loss": 0.1502, "step": 1422 }, { "epoch": 38.45945945945946, "grad_norm": 1.859375, "learning_rate": 4.728782926124798e-05, "loss": 0.1795, "step": 1423 }, { "epoch": 38.486486486486484, "grad_norm": 1.2265625, "learning_rate": 4.728276613599199e-05, "loss": 0.191, "step": 1424 }, { "epoch": 38.513513513513516, "grad_norm": 2.125, "learning_rate": 4.7277698560754647e-05, "loss": 0.188, "step": 1425 }, { "epoch": 38.54054054054054, "grad_norm": 1.0546875, "learning_rate": 4.7272626536547996e-05, "loss": 0.119, "step": 1426 }, { "epoch": 38.567567567567565, "grad_norm": 1.9921875, "learning_rate": 4.726755006438491e-05, "loss": 0.2528, "step": 1427 }, { "epoch": 38.5945945945946, "grad_norm": 2.046875, "learning_rate": 4.7262469145279214e-05, "loss": 0.253, "step": 1428 }, { "epoch": 38.62162162162162, "grad_norm": 2.53125, "learning_rate": 4.7257383780245585e-05, "loss": 0.238, "step": 1429 }, { "epoch": 38.648648648648646, "grad_norm": 2.21875, "learning_rate": 4.7252293970299595e-05, "loss": 0.2222, "step": 1430 }, { "epoch": 38.67567567567568, "grad_norm": 1.6875, "learning_rate": 4.7247199716457704e-05, "loss": 0.2233, "step": 1431 }, { "epoch": 38.7027027027027, "grad_norm": 2.6875, "learning_rate": 4.724210101973726e-05, "loss": 0.2349, "step": 1432 }, { "epoch": 38.729729729729726, "grad_norm": 2.5, "learning_rate": 4.723699788115651e-05, "loss": 0.1672, "step": 1433 }, { "epoch": 38.75675675675676, "grad_norm": 2.453125, "learning_rate": 4.723189030173456e-05, "loss": 0.1617, "step": 1434 }, { "epoch": 38.78378378378378, "grad_norm": 1.1875, "learning_rate": 4.7226778282491424e-05, "loss": 0.1532, "step": 1435 }, { "epoch": 38.810810810810814, "grad_norm": 2.234375, "learning_rate": 4.722166182444801e-05, "loss": 0.1319, "step": 1436 }, { "epoch": 38.83783783783784, "grad_norm": 2.3125, "learning_rate": 4.7216540928626086e-05, "loss": 0.2182, "step": 1437 }, { "epoch": 38.86486486486486, "grad_norm": 2.25, "learning_rate": 4.7211415596048324e-05, "loss": 0.2209, "step": 1438 }, { "epoch": 38.891891891891895, "grad_norm": 0.92578125, "learning_rate": 4.720628582773829e-05, "loss": 0.108, "step": 1439 }, { "epoch": 38.91891891891892, "grad_norm": 1.453125, "learning_rate": 4.7201151624720406e-05, "loss": 0.2074, "step": 1440 }, { "epoch": 38.945945945945944, "grad_norm": 2.53125, "learning_rate": 4.7196012988020016e-05, "loss": 0.2608, "step": 1441 }, { "epoch": 38.972972972972975, "grad_norm": 1.59375, "learning_rate": 4.7190869918663316e-05, "loss": 0.1794, "step": 1442 }, { "epoch": 39.0, "grad_norm": 1.3828125, "learning_rate": 4.7185722417677426e-05, "loss": 0.156, "step": 1443 }, { "epoch": 39.027027027027025, "grad_norm": 1.84375, "learning_rate": 4.71805704860903e-05, "loss": 0.2387, "step": 1444 }, { "epoch": 39.054054054054056, "grad_norm": 1.796875, "learning_rate": 4.7175414124930814e-05, "loss": 0.1828, "step": 1445 }, { "epoch": 39.08108108108108, "grad_norm": 1.6640625, "learning_rate": 4.7170253335228735e-05, "loss": 0.1907, "step": 1446 }, { "epoch": 39.108108108108105, "grad_norm": 1.46875, "learning_rate": 4.716508811801469e-05, "loss": 0.2162, "step": 1447 }, { "epoch": 39.13513513513514, "grad_norm": 0.97265625, "learning_rate": 4.7159918474320187e-05, "loss": 0.1121, "step": 1448 }, { "epoch": 39.16216216216216, "grad_norm": 1.671875, "learning_rate": 4.715474440517764e-05, "loss": 0.2768, "step": 1449 }, { "epoch": 39.189189189189186, "grad_norm": 1.15625, "learning_rate": 4.714956591162034e-05, "loss": 0.1504, "step": 1450 }, { "epoch": 39.21621621621622, "grad_norm": 1.3984375, "learning_rate": 4.714438299468245e-05, "loss": 0.1867, "step": 1451 }, { "epoch": 39.24324324324324, "grad_norm": 1.4609375, "learning_rate": 4.713919565539904e-05, "loss": 0.158, "step": 1452 }, { "epoch": 39.270270270270274, "grad_norm": 1.6484375, "learning_rate": 4.713400389480602e-05, "loss": 0.2023, "step": 1453 }, { "epoch": 39.2972972972973, "grad_norm": 1.4765625, "learning_rate": 4.712880771394024e-05, "loss": 0.147, "step": 1454 }, { "epoch": 39.32432432432432, "grad_norm": 1.4140625, "learning_rate": 4.7123607113839395e-05, "loss": 0.2046, "step": 1455 }, { "epoch": 39.351351351351354, "grad_norm": 1.7109375, "learning_rate": 4.711840209554206e-05, "loss": 0.2732, "step": 1456 }, { "epoch": 39.37837837837838, "grad_norm": 1.609375, "learning_rate": 4.711319266008773e-05, "loss": 0.2278, "step": 1457 }, { "epoch": 39.4054054054054, "grad_norm": 1.2421875, "learning_rate": 4.7107978808516716e-05, "loss": 0.1523, "step": 1458 }, { "epoch": 39.432432432432435, "grad_norm": 1.6796875, "learning_rate": 4.7102760541870276e-05, "loss": 0.293, "step": 1459 }, { "epoch": 39.45945945945946, "grad_norm": 1.0703125, "learning_rate": 4.709753786119052e-05, "loss": 0.1265, "step": 1460 }, { "epoch": 39.486486486486484, "grad_norm": 1.0859375, "learning_rate": 4.709231076752045e-05, "loss": 0.1186, "step": 1461 }, { "epoch": 39.513513513513516, "grad_norm": 2.015625, "learning_rate": 4.708707926190394e-05, "loss": 0.1596, "step": 1462 }, { "epoch": 39.54054054054054, "grad_norm": 1.3828125, "learning_rate": 4.708184334538573e-05, "loss": 0.1449, "step": 1463 }, { "epoch": 39.567567567567565, "grad_norm": 1.078125, "learning_rate": 4.7076603019011496e-05, "loss": 0.1108, "step": 1464 }, { "epoch": 39.5945945945946, "grad_norm": 1.84375, "learning_rate": 4.7071358283827727e-05, "loss": 0.2231, "step": 1465 }, { "epoch": 39.62162162162162, "grad_norm": 1.609375, "learning_rate": 4.7066109140881824e-05, "loss": 0.2498, "step": 1466 }, { "epoch": 39.648648648648646, "grad_norm": 1.25, "learning_rate": 4.7060855591222084e-05, "loss": 0.1463, "step": 1467 }, { "epoch": 39.67567567567568, "grad_norm": 1.0859375, "learning_rate": 4.7055597635897655e-05, "loss": 0.1271, "step": 1468 }, { "epoch": 39.7027027027027, "grad_norm": 1.3359375, "learning_rate": 4.705033527595859e-05, "loss": 0.1328, "step": 1469 }, { "epoch": 39.729729729729726, "grad_norm": 1.4921875, "learning_rate": 4.7045068512455794e-05, "loss": 0.1888, "step": 1470 }, { "epoch": 39.75675675675676, "grad_norm": 1.5859375, "learning_rate": 4.703979734644107e-05, "loss": 0.1623, "step": 1471 }, { "epoch": 39.78378378378378, "grad_norm": 1.703125, "learning_rate": 4.703452177896711e-05, "loss": 0.1518, "step": 1472 }, { "epoch": 39.810810810810814, "grad_norm": 1.4375, "learning_rate": 4.7029241811087457e-05, "loss": 0.1551, "step": 1473 }, { "epoch": 39.83783783783784, "grad_norm": 1.0703125, "learning_rate": 4.7023957443856545e-05, "loss": 0.1608, "step": 1474 }, { "epoch": 39.86486486486486, "grad_norm": 1.5859375, "learning_rate": 4.7018668678329704e-05, "loss": 0.1764, "step": 1475 }, { "epoch": 39.891891891891895, "grad_norm": 1.8828125, "learning_rate": 4.701337551556312e-05, "loss": 0.2039, "step": 1476 }, { "epoch": 39.91891891891892, "grad_norm": 1.3984375, "learning_rate": 4.700807795661385e-05, "loss": 0.1875, "step": 1477 }, { "epoch": 39.945945945945944, "grad_norm": 1.4921875, "learning_rate": 4.700277600253987e-05, "loss": 0.1716, "step": 1478 }, { "epoch": 39.972972972972975, "grad_norm": 1.3828125, "learning_rate": 4.699746965439998e-05, "loss": 0.1279, "step": 1479 }, { "epoch": 40.0, "grad_norm": 1.15625, "learning_rate": 4.6992158913253914e-05, "loss": 0.1361, "step": 1480 }, { "epoch": 40.027027027027025, "grad_norm": 0.91015625, "learning_rate": 4.698684378016222e-05, "loss": 0.1292, "step": 1481 }, { "epoch": 40.054054054054056, "grad_norm": 1.6484375, "learning_rate": 4.698152425618639e-05, "loss": 0.1369, "step": 1482 }, { "epoch": 40.08108108108108, "grad_norm": 1.890625, "learning_rate": 4.697620034238874e-05, "loss": 0.19, "step": 1483 }, { "epoch": 40.108108108108105, "grad_norm": 1.671875, "learning_rate": 4.6970872039832495e-05, "loss": 0.1869, "step": 1484 }, { "epoch": 40.13513513513514, "grad_norm": 2.171875, "learning_rate": 4.6965539349581735e-05, "loss": 0.2175, "step": 1485 }, { "epoch": 40.16216216216216, "grad_norm": 1.7578125, "learning_rate": 4.696020227270142e-05, "loss": 0.1824, "step": 1486 }, { "epoch": 40.189189189189186, "grad_norm": 1.296875, "learning_rate": 4.69548608102574e-05, "loss": 0.1861, "step": 1487 }, { "epoch": 40.21621621621622, "grad_norm": 1.03125, "learning_rate": 4.694951496331639e-05, "loss": 0.1324, "step": 1488 }, { "epoch": 40.24324324324324, "grad_norm": 1.53125, "learning_rate": 4.694416473294598e-05, "loss": 0.1994, "step": 1489 }, { "epoch": 40.270270270270274, "grad_norm": 1.7734375, "learning_rate": 4.693881012021465e-05, "loss": 0.2293, "step": 1490 }, { "epoch": 40.2972972972973, "grad_norm": 2.1875, "learning_rate": 4.693345112619173e-05, "loss": 0.203, "step": 1491 }, { "epoch": 40.32432432432432, "grad_norm": 0.98828125, "learning_rate": 4.692808775194745e-05, "loss": 0.1147, "step": 1492 }, { "epoch": 40.351351351351354, "grad_norm": 1.3046875, "learning_rate": 4.692271999855288e-05, "loss": 0.1948, "step": 1493 }, { "epoch": 40.37837837837838, "grad_norm": 1.5546875, "learning_rate": 4.691734786708002e-05, "loss": 0.215, "step": 1494 }, { "epoch": 40.4054054054054, "grad_norm": 2.234375, "learning_rate": 4.691197135860168e-05, "loss": 0.3169, "step": 1495 }, { "epoch": 40.432432432432435, "grad_norm": 1.6015625, "learning_rate": 4.6906590474191604e-05, "loss": 0.2782, "step": 1496 }, { "epoch": 40.45945945945946, "grad_norm": 1.4140625, "learning_rate": 4.690120521492436e-05, "loss": 0.1356, "step": 1497 }, { "epoch": 40.486486486486484, "grad_norm": 1.4921875, "learning_rate": 4.689581558187543e-05, "loss": 0.2377, "step": 1498 }, { "epoch": 40.513513513513516, "grad_norm": 1.265625, "learning_rate": 4.689042157612113e-05, "loss": 0.1922, "step": 1499 }, { "epoch": 40.54054054054054, "grad_norm": 1.15625, "learning_rate": 4.6885023198738695e-05, "loss": 0.1299, "step": 1500 }, { "epoch": 40.567567567567565, "grad_norm": 1.40625, "learning_rate": 4.687962045080619e-05, "loss": 0.1831, "step": 1501 }, { "epoch": 40.5945945945946, "grad_norm": 1.34375, "learning_rate": 4.687421333340258e-05, "loss": 0.1646, "step": 1502 }, { "epoch": 40.62162162162162, "grad_norm": 0.890625, "learning_rate": 4.686880184760768e-05, "loss": 0.1035, "step": 1503 }, { "epoch": 40.648648648648646, "grad_norm": 1.125, "learning_rate": 4.6863385994502205e-05, "loss": 0.1348, "step": 1504 }, { "epoch": 40.67567567567568, "grad_norm": 1.265625, "learning_rate": 4.685796577516773e-05, "loss": 0.1383, "step": 1505 }, { "epoch": 40.7027027027027, "grad_norm": 1.3828125, "learning_rate": 4.685254119068669e-05, "loss": 0.1829, "step": 1506 }, { "epoch": 40.729729729729726, "grad_norm": 1.375, "learning_rate": 4.684711224214242e-05, "loss": 0.1969, "step": 1507 }, { "epoch": 40.75675675675676, "grad_norm": 1.125, "learning_rate": 4.684167893061908e-05, "loss": 0.1382, "step": 1508 }, { "epoch": 40.78378378378378, "grad_norm": 1.8125, "learning_rate": 4.6836241257201754e-05, "loss": 0.1711, "step": 1509 }, { "epoch": 40.810810810810814, "grad_norm": 1.046875, "learning_rate": 4.6830799222976364e-05, "loss": 0.1046, "step": 1510 }, { "epoch": 40.83783783783784, "grad_norm": 1.7890625, "learning_rate": 4.6825352829029705e-05, "loss": 0.256, "step": 1511 }, { "epoch": 40.86486486486486, "grad_norm": 1.671875, "learning_rate": 4.681990207644946e-05, "loss": 0.1642, "step": 1512 }, { "epoch": 40.891891891891895, "grad_norm": 1.4765625, "learning_rate": 4.681444696632417e-05, "loss": 0.168, "step": 1513 }, { "epoch": 40.91891891891892, "grad_norm": 1.4609375, "learning_rate": 4.680898749974325e-05, "loss": 0.1904, "step": 1514 }, { "epoch": 40.945945945945944, "grad_norm": 1.3359375, "learning_rate": 4.680352367779698e-05, "loss": 0.1549, "step": 1515 }, { "epoch": 40.972972972972975, "grad_norm": 1.6953125, "learning_rate": 4.679805550157651e-05, "loss": 0.2131, "step": 1516 }, { "epoch": 41.0, "grad_norm": 1.7890625, "learning_rate": 4.679258297217386e-05, "loss": 0.198, "step": 1517 }, { "epoch": 41.027027027027025, "grad_norm": 1.2421875, "learning_rate": 4.678710609068193e-05, "loss": 0.1918, "step": 1518 }, { "epoch": 41.054054054054056, "grad_norm": 1.4140625, "learning_rate": 4.678162485819448e-05, "loss": 0.1996, "step": 1519 }, { "epoch": 41.08108108108108, "grad_norm": 2.203125, "learning_rate": 4.677613927580614e-05, "loss": 0.1681, "step": 1520 }, { "epoch": 41.108108108108105, "grad_norm": 1.5625, "learning_rate": 4.677064934461241e-05, "loss": 0.151, "step": 1521 }, { "epoch": 41.13513513513514, "grad_norm": 2.328125, "learning_rate": 4.6765155065709646e-05, "loss": 0.2149, "step": 1522 }, { "epoch": 41.16216216216216, "grad_norm": 1.59375, "learning_rate": 4.67596564401951e-05, "loss": 0.1825, "step": 1523 }, { "epoch": 41.189189189189186, "grad_norm": 1.59375, "learning_rate": 4.675415346916686e-05, "loss": 0.2365, "step": 1524 }, { "epoch": 41.21621621621622, "grad_norm": 1.3046875, "learning_rate": 4.6748646153723904e-05, "loss": 0.1784, "step": 1525 }, { "epoch": 41.24324324324324, "grad_norm": 2.09375, "learning_rate": 4.674313449496607e-05, "loss": 0.2333, "step": 1526 }, { "epoch": 41.270270270270274, "grad_norm": 1.6171875, "learning_rate": 4.673761849399407e-05, "loss": 0.2442, "step": 1527 }, { "epoch": 41.2972972972973, "grad_norm": 1.328125, "learning_rate": 4.6732098151909464e-05, "loss": 0.1331, "step": 1528 }, { "epoch": 41.32432432432432, "grad_norm": 1.46875, "learning_rate": 4.6726573469814705e-05, "loss": 0.1986, "step": 1529 }, { "epoch": 41.351351351351354, "grad_norm": 1.8828125, "learning_rate": 4.67210444488131e-05, "loss": 0.1686, "step": 1530 }, { "epoch": 41.37837837837838, "grad_norm": 1.84375, "learning_rate": 4.671551109000881e-05, "loss": 0.1461, "step": 1531 }, { "epoch": 41.4054054054054, "grad_norm": 1.125, "learning_rate": 4.670997339450689e-05, "loss": 0.1158, "step": 1532 }, { "epoch": 41.432432432432435, "grad_norm": 1.6953125, "learning_rate": 4.670443136341323e-05, "loss": 0.1278, "step": 1533 }, { "epoch": 41.45945945945946, "grad_norm": 2.09375, "learning_rate": 4.669888499783461e-05, "loss": 0.1904, "step": 1534 }, { "epoch": 41.486486486486484, "grad_norm": 1.375, "learning_rate": 4.6693334298878674e-05, "loss": 0.1591, "step": 1535 }, { "epoch": 41.513513513513516, "grad_norm": 1.4609375, "learning_rate": 4.668777926765393e-05, "loss": 0.1459, "step": 1536 }, { "epoch": 41.54054054054054, "grad_norm": 1.6171875, "learning_rate": 4.668221990526972e-05, "loss": 0.1783, "step": 1537 }, { "epoch": 41.567567567567565, "grad_norm": 2.59375, "learning_rate": 4.667665621283629e-05, "loss": 0.2062, "step": 1538 }, { "epoch": 41.5945945945946, "grad_norm": 2.296875, "learning_rate": 4.667108819146474e-05, "loss": 0.1979, "step": 1539 }, { "epoch": 41.62162162162162, "grad_norm": 1.671875, "learning_rate": 4.6665515842267035e-05, "loss": 0.2139, "step": 1540 }, { "epoch": 41.648648648648646, "grad_norm": 1.953125, "learning_rate": 4.6659939166355994e-05, "loss": 0.1438, "step": 1541 }, { "epoch": 41.67567567567568, "grad_norm": 2.421875, "learning_rate": 4.665435816484531e-05, "loss": 0.2325, "step": 1542 }, { "epoch": 41.7027027027027, "grad_norm": 2.578125, "learning_rate": 4.6648772838849535e-05, "loss": 0.2408, "step": 1543 }, { "epoch": 41.729729729729726, "grad_norm": 1.265625, "learning_rate": 4.66431831894841e-05, "loss": 0.1778, "step": 1544 }, { "epoch": 41.75675675675676, "grad_norm": 2.3125, "learning_rate": 4.663758921786526e-05, "loss": 0.319, "step": 1545 }, { "epoch": 41.78378378378378, "grad_norm": 2.15625, "learning_rate": 4.663199092511019e-05, "loss": 0.1638, "step": 1546 }, { "epoch": 41.810810810810814, "grad_norm": 2.234375, "learning_rate": 4.662638831233688e-05, "loss": 0.2274, "step": 1547 }, { "epoch": 41.83783783783784, "grad_norm": 1.59375, "learning_rate": 4.66207813806642e-05, "loss": 0.1684, "step": 1548 }, { "epoch": 41.86486486486486, "grad_norm": 1.4609375, "learning_rate": 4.661517013121189e-05, "loss": 0.1621, "step": 1549 }, { "epoch": 41.891891891891895, "grad_norm": 2.203125, "learning_rate": 4.660955456510054e-05, "loss": 0.2039, "step": 1550 }, { "epoch": 41.91891891891892, "grad_norm": 2.078125, "learning_rate": 4.66039346834516e-05, "loss": 0.1771, "step": 1551 }, { "epoch": 41.945945945945944, "grad_norm": 1.3828125, "learning_rate": 4.659831048738741e-05, "loss": 0.1665, "step": 1552 }, { "epoch": 41.972972972972975, "grad_norm": 1.3046875, "learning_rate": 4.6592681978031126e-05, "loss": 0.1521, "step": 1553 }, { "epoch": 42.0, "grad_norm": 1.6484375, "learning_rate": 4.6587049156506807e-05, "loss": 0.1618, "step": 1554 }, { "epoch": 42.027027027027025, "grad_norm": 1.046875, "learning_rate": 4.6581412023939354e-05, "loss": 0.1183, "step": 1555 }, { "epoch": 42.054054054054056, "grad_norm": 1.40625, "learning_rate": 4.6575770581454515e-05, "loss": 0.167, "step": 1556 }, { "epoch": 42.08108108108108, "grad_norm": 1.53125, "learning_rate": 4.657012483017893e-05, "loss": 0.1913, "step": 1557 }, { "epoch": 42.108108108108105, "grad_norm": 1.234375, "learning_rate": 4.656447477124009e-05, "loss": 0.1333, "step": 1558 }, { "epoch": 42.13513513513514, "grad_norm": 1.25, "learning_rate": 4.655882040576632e-05, "loss": 0.1552, "step": 1559 }, { "epoch": 42.16216216216216, "grad_norm": 0.97265625, "learning_rate": 4.655316173488685e-05, "loss": 0.1012, "step": 1560 }, { "epoch": 42.189189189189186, "grad_norm": 1.0546875, "learning_rate": 4.6547498759731724e-05, "loss": 0.1394, "step": 1561 }, { "epoch": 42.21621621621622, "grad_norm": 1.21875, "learning_rate": 4.654183148143187e-05, "loss": 0.1197, "step": 1562 }, { "epoch": 42.24324324324324, "grad_norm": 1.6015625, "learning_rate": 4.653615990111908e-05, "loss": 0.1834, "step": 1563 }, { "epoch": 42.270270270270274, "grad_norm": 1.75, "learning_rate": 4.6530484019926e-05, "loss": 0.2272, "step": 1564 }, { "epoch": 42.2972972972973, "grad_norm": 1.046875, "learning_rate": 4.652480383898612e-05, "loss": 0.1229, "step": 1565 }, { "epoch": 42.32432432432432, "grad_norm": 1.8359375, "learning_rate": 4.651911935943381e-05, "loss": 0.1856, "step": 1566 }, { "epoch": 42.351351351351354, "grad_norm": 1.65625, "learning_rate": 4.6513430582404286e-05, "loss": 0.1835, "step": 1567 }, { "epoch": 42.37837837837838, "grad_norm": 1.265625, "learning_rate": 4.650773750903363e-05, "loss": 0.1274, "step": 1568 }, { "epoch": 42.4054054054054, "grad_norm": 1.4140625, "learning_rate": 4.650204014045877e-05, "loss": 0.1816, "step": 1569 }, { "epoch": 42.432432432432435, "grad_norm": 1.84375, "learning_rate": 4.64963384778175e-05, "loss": 0.1966, "step": 1570 }, { "epoch": 42.45945945945946, "grad_norm": 1.5546875, "learning_rate": 4.649063252224848e-05, "loss": 0.1356, "step": 1571 }, { "epoch": 42.486486486486484, "grad_norm": 1.875, "learning_rate": 4.6484922274891214e-05, "loss": 0.191, "step": 1572 }, { "epoch": 42.513513513513516, "grad_norm": 1.875, "learning_rate": 4.647920773688606e-05, "loss": 0.1793, "step": 1573 }, { "epoch": 42.54054054054054, "grad_norm": 2.1875, "learning_rate": 4.647348890937425e-05, "loss": 0.17, "step": 1574 }, { "epoch": 42.567567567567565, "grad_norm": 1.359375, "learning_rate": 4.646776579349786e-05, "loss": 0.1546, "step": 1575 }, { "epoch": 42.5945945945946, "grad_norm": 1.8203125, "learning_rate": 4.646203839039983e-05, "loss": 0.1898, "step": 1576 }, { "epoch": 42.62162162162162, "grad_norm": 1.3125, "learning_rate": 4.645630670122394e-05, "loss": 0.1476, "step": 1577 }, { "epoch": 42.648648648648646, "grad_norm": 1.8046875, "learning_rate": 4.645057072711484e-05, "loss": 0.1569, "step": 1578 }, { "epoch": 42.67567567567568, "grad_norm": 1.7734375, "learning_rate": 4.6444830469218036e-05, "loss": 0.2587, "step": 1579 }, { "epoch": 42.7027027027027, "grad_norm": 1.296875, "learning_rate": 4.643908592867989e-05, "loss": 0.1562, "step": 1580 }, { "epoch": 42.729729729729726, "grad_norm": 1.1015625, "learning_rate": 4.643333710664761e-05, "loss": 0.1373, "step": 1581 }, { "epoch": 42.75675675675676, "grad_norm": 1.3125, "learning_rate": 4.642758400426927e-05, "loss": 0.1709, "step": 1582 }, { "epoch": 42.78378378378378, "grad_norm": 1.328125, "learning_rate": 4.6421826622693785e-05, "loss": 0.1432, "step": 1583 }, { "epoch": 42.810810810810814, "grad_norm": 1.5625, "learning_rate": 4.641606496307095e-05, "loss": 0.1802, "step": 1584 }, { "epoch": 42.83783783783784, "grad_norm": 1.4921875, "learning_rate": 4.641029902655138e-05, "loss": 0.1922, "step": 1585 }, { "epoch": 42.86486486486486, "grad_norm": 1.1328125, "learning_rate": 4.6404528814286575e-05, "loss": 0.1206, "step": 1586 }, { "epoch": 42.891891891891895, "grad_norm": 1.7109375, "learning_rate": 4.639875432742886e-05, "loss": 0.3065, "step": 1587 }, { "epoch": 42.91891891891892, "grad_norm": 1.8515625, "learning_rate": 4.6392975567131445e-05, "loss": 0.241, "step": 1588 }, { "epoch": 42.945945945945944, "grad_norm": 1.515625, "learning_rate": 4.638719253454837e-05, "loss": 0.1801, "step": 1589 }, { "epoch": 42.972972972972975, "grad_norm": 1.4375, "learning_rate": 4.638140523083455e-05, "loss": 0.1722, "step": 1590 }, { "epoch": 43.0, "grad_norm": 1.71875, "learning_rate": 4.637561365714571e-05, "loss": 0.2009, "step": 1591 }, { "epoch": 43.027027027027025, "grad_norm": 1.40625, "learning_rate": 4.6369817814638475e-05, "loss": 0.2256, "step": 1592 }, { "epoch": 43.054054054054056, "grad_norm": 1.3203125, "learning_rate": 4.636401770447031e-05, "loss": 0.1069, "step": 1593 }, { "epoch": 43.08108108108108, "grad_norm": 2.109375, "learning_rate": 4.6358213327799516e-05, "loss": 0.2047, "step": 1594 }, { "epoch": 43.108108108108105, "grad_norm": 1.140625, "learning_rate": 4.6352404685785255e-05, "loss": 0.1307, "step": 1595 }, { "epoch": 43.13513513513514, "grad_norm": 1.4453125, "learning_rate": 4.6346591779587545e-05, "loss": 0.1629, "step": 1596 }, { "epoch": 43.16216216216216, "grad_norm": 1.0078125, "learning_rate": 4.634077461036726e-05, "loss": 0.1145, "step": 1597 }, { "epoch": 43.189189189189186, "grad_norm": 1.2578125, "learning_rate": 4.6334953179286105e-05, "loss": 0.1596, "step": 1598 }, { "epoch": 43.21621621621622, "grad_norm": 1.390625, "learning_rate": 4.6329127487506666e-05, "loss": 0.2022, "step": 1599 }, { "epoch": 43.24324324324324, "grad_norm": 1.234375, "learning_rate": 4.632329753619234e-05, "loss": 0.2031, "step": 1600 }, { "epoch": 43.270270270270274, "grad_norm": 1.453125, "learning_rate": 4.631746332650743e-05, "loss": 0.2073, "step": 1601 }, { "epoch": 43.2972972972973, "grad_norm": 1.5234375, "learning_rate": 4.631162485961702e-05, "loss": 0.1597, "step": 1602 }, { "epoch": 43.32432432432432, "grad_norm": 1.3984375, "learning_rate": 4.630578213668711e-05, "loss": 0.1475, "step": 1603 }, { "epoch": 43.351351351351354, "grad_norm": 1.453125, "learning_rate": 4.629993515888452e-05, "loss": 0.1954, "step": 1604 }, { "epoch": 43.37837837837838, "grad_norm": 1.1953125, "learning_rate": 4.62940839273769e-05, "loss": 0.136, "step": 1605 }, { "epoch": 43.4054054054054, "grad_norm": 1.3515625, "learning_rate": 4.628822844333278e-05, "loss": 0.2361, "step": 1606 }, { "epoch": 43.432432432432435, "grad_norm": 1.125, "learning_rate": 4.628236870792154e-05, "loss": 0.1408, "step": 1607 }, { "epoch": 43.45945945945946, "grad_norm": 1.3125, "learning_rate": 4.62765047223134e-05, "loss": 0.2052, "step": 1608 }, { "epoch": 43.486486486486484, "grad_norm": 2.0, "learning_rate": 4.627063648767941e-05, "loss": 0.313, "step": 1609 }, { "epoch": 43.513513513513516, "grad_norm": 1.4921875, "learning_rate": 4.62647640051915e-05, "loss": 0.2342, "step": 1610 }, { "epoch": 43.54054054054054, "grad_norm": 1.6015625, "learning_rate": 4.625888727602242e-05, "loss": 0.2271, "step": 1611 }, { "epoch": 43.567567567567565, "grad_norm": 1.3671875, "learning_rate": 4.6253006301345804e-05, "loss": 0.2002, "step": 1612 }, { "epoch": 43.5945945945946, "grad_norm": 1.8984375, "learning_rate": 4.62471210823361e-05, "loss": 0.2675, "step": 1613 }, { "epoch": 43.62162162162162, "grad_norm": 1.4609375, "learning_rate": 4.6241231620168615e-05, "loss": 0.1635, "step": 1614 }, { "epoch": 43.648648648648646, "grad_norm": 1.390625, "learning_rate": 4.623533791601951e-05, "loss": 0.1484, "step": 1615 }, { "epoch": 43.67567567567568, "grad_norm": 1.7109375, "learning_rate": 4.622943997106578e-05, "loss": 0.2023, "step": 1616 }, { "epoch": 43.7027027027027, "grad_norm": 1.5078125, "learning_rate": 4.622353778648528e-05, "loss": 0.2404, "step": 1617 }, { "epoch": 43.729729729729726, "grad_norm": 1.65625, "learning_rate": 4.621763136345671e-05, "loss": 0.1725, "step": 1618 }, { "epoch": 43.75675675675676, "grad_norm": 0.87890625, "learning_rate": 4.62117207031596e-05, "loss": 0.104, "step": 1619 }, { "epoch": 43.78378378378378, "grad_norm": 0.89453125, "learning_rate": 4.6205805806774357e-05, "loss": 0.1133, "step": 1620 }, { "epoch": 43.810810810810814, "grad_norm": 1.2734375, "learning_rate": 4.61998866754822e-05, "loss": 0.1618, "step": 1621 }, { "epoch": 43.83783783783784, "grad_norm": 0.890625, "learning_rate": 4.619396331046521e-05, "loss": 0.1209, "step": 1622 }, { "epoch": 43.86486486486486, "grad_norm": 1.5625, "learning_rate": 4.6188035712906325e-05, "loss": 0.2228, "step": 1623 }, { "epoch": 43.891891891891895, "grad_norm": 1.3203125, "learning_rate": 4.618210388398931e-05, "loss": 0.1853, "step": 1624 }, { "epoch": 43.91891891891892, "grad_norm": 1.625, "learning_rate": 4.6176167824898773e-05, "loss": 0.2292, "step": 1625 }, { "epoch": 43.945945945945944, "grad_norm": 1.953125, "learning_rate": 4.617022753682019e-05, "loss": 0.2334, "step": 1626 }, { "epoch": 43.972972972972975, "grad_norm": 1.28125, "learning_rate": 4.6164283020939855e-05, "loss": 0.1758, "step": 1627 }, { "epoch": 44.0, "grad_norm": 1.5234375, "learning_rate": 4.6158334278444925e-05, "loss": 0.1393, "step": 1628 }, { "epoch": 44.027027027027025, "grad_norm": 1.4609375, "learning_rate": 4.6152381310523387e-05, "loss": 0.1804, "step": 1629 }, { "epoch": 44.054054054054056, "grad_norm": 2.09375, "learning_rate": 4.614642411836409e-05, "loss": 0.2647, "step": 1630 }, { "epoch": 44.08108108108108, "grad_norm": 0.8359375, "learning_rate": 4.614046270315671e-05, "loss": 0.1066, "step": 1631 }, { "epoch": 44.108108108108105, "grad_norm": 1.671875, "learning_rate": 4.613449706609176e-05, "loss": 0.2225, "step": 1632 }, { "epoch": 44.13513513513514, "grad_norm": 1.7421875, "learning_rate": 4.612852720836062e-05, "loss": 0.1953, "step": 1633 }, { "epoch": 44.16216216216216, "grad_norm": 1.8671875, "learning_rate": 4.6122553131155506e-05, "loss": 0.2548, "step": 1634 }, { "epoch": 44.189189189189186, "grad_norm": 1.4140625, "learning_rate": 4.6116574835669465e-05, "loss": 0.1787, "step": 1635 }, { "epoch": 44.21621621621622, "grad_norm": 1.46875, "learning_rate": 4.6110592323096384e-05, "loss": 0.131, "step": 1636 }, { "epoch": 44.24324324324324, "grad_norm": 1.2890625, "learning_rate": 4.610460559463101e-05, "loss": 0.171, "step": 1637 }, { "epoch": 44.270270270270274, "grad_norm": 1.3046875, "learning_rate": 4.6098614651468935e-05, "loss": 0.1631, "step": 1638 }, { "epoch": 44.2972972972973, "grad_norm": 1.6875, "learning_rate": 4.609261949480656e-05, "loss": 0.118, "step": 1639 }, { "epoch": 44.32432432432432, "grad_norm": 1.4609375, "learning_rate": 4.608662012584116e-05, "loss": 0.1861, "step": 1640 }, { "epoch": 44.351351351351354, "grad_norm": 1.453125, "learning_rate": 4.608061654577084e-05, "loss": 0.1838, "step": 1641 }, { "epoch": 44.37837837837838, "grad_norm": 1.25, "learning_rate": 4.607460875579453e-05, "loss": 0.1342, "step": 1642 }, { "epoch": 44.4054054054054, "grad_norm": 1.5859375, "learning_rate": 4.6068596757112035e-05, "loss": 0.1672, "step": 1643 }, { "epoch": 44.432432432432435, "grad_norm": 1.9140625, "learning_rate": 4.606258055092397e-05, "loss": 0.1866, "step": 1644 }, { "epoch": 44.45945945945946, "grad_norm": 1.28125, "learning_rate": 4.605656013843181e-05, "loss": 0.1518, "step": 1645 }, { "epoch": 44.486486486486484, "grad_norm": 1.6328125, "learning_rate": 4.6050535520837854e-05, "loss": 0.2093, "step": 1646 }, { "epoch": 44.513513513513516, "grad_norm": 1.8359375, "learning_rate": 4.604450669934526e-05, "loss": 0.1794, "step": 1647 }, { "epoch": 44.54054054054054, "grad_norm": 1.6015625, "learning_rate": 4.6038473675158e-05, "loss": 0.15, "step": 1648 }, { "epoch": 44.567567567567565, "grad_norm": 1.1640625, "learning_rate": 4.603243644948091e-05, "loss": 0.1264, "step": 1649 }, { "epoch": 44.5945945945946, "grad_norm": 1.1015625, "learning_rate": 4.602639502351965e-05, "loss": 0.158, "step": 1650 }, { "epoch": 44.62162162162162, "grad_norm": 1.5078125, "learning_rate": 4.6020349398480725e-05, "loss": 0.1422, "step": 1651 }, { "epoch": 44.648648648648646, "grad_norm": 1.328125, "learning_rate": 4.601429957557147e-05, "loss": 0.2385, "step": 1652 }, { "epoch": 44.67567567567568, "grad_norm": 1.203125, "learning_rate": 4.600824555600007e-05, "loss": 0.1331, "step": 1653 }, { "epoch": 44.7027027027027, "grad_norm": 1.6484375, "learning_rate": 4.6002187340975555e-05, "loss": 0.2178, "step": 1654 }, { "epoch": 44.729729729729726, "grad_norm": 1.4140625, "learning_rate": 4.599612493170777e-05, "loss": 0.1746, "step": 1655 }, { "epoch": 44.75675675675676, "grad_norm": 1.5078125, "learning_rate": 4.5990058329407404e-05, "loss": 0.2478, "step": 1656 }, { "epoch": 44.78378378378378, "grad_norm": 1.703125, "learning_rate": 4.5983987535286e-05, "loss": 0.2808, "step": 1657 }, { "epoch": 44.810810810810814, "grad_norm": 1.875, "learning_rate": 4.597791255055592e-05, "loss": 0.2525, "step": 1658 }, { "epoch": 44.83783783783784, "grad_norm": 1.0703125, "learning_rate": 4.597183337643037e-05, "loss": 0.1614, "step": 1659 }, { "epoch": 44.86486486486486, "grad_norm": 1.2578125, "learning_rate": 4.596575001412339e-05, "loss": 0.1352, "step": 1660 }, { "epoch": 44.891891891891895, "grad_norm": 1.15625, "learning_rate": 4.5959662464849854e-05, "loss": 0.1531, "step": 1661 }, { "epoch": 44.91891891891892, "grad_norm": 1.1953125, "learning_rate": 4.595357072982549e-05, "loss": 0.1334, "step": 1662 }, { "epoch": 44.945945945945944, "grad_norm": 1.453125, "learning_rate": 4.594747481026684e-05, "loss": 0.1727, "step": 1663 }, { "epoch": 44.972972972972975, "grad_norm": 1.421875, "learning_rate": 4.5941374707391295e-05, "loss": 0.1616, "step": 1664 }, { "epoch": 45.0, "grad_norm": 1.3984375, "learning_rate": 4.5935270422417064e-05, "loss": 0.1868, "step": 1665 }, { "epoch": 45.027027027027025, "grad_norm": 1.328125, "learning_rate": 4.592916195656322e-05, "loss": 0.1734, "step": 1666 }, { "epoch": 45.054054054054056, "grad_norm": 1.625, "learning_rate": 4.5923049311049636e-05, "loss": 0.2224, "step": 1667 }, { "epoch": 45.08108108108108, "grad_norm": 1.234375, "learning_rate": 4.591693248709706e-05, "loss": 0.1472, "step": 1668 }, { "epoch": 45.108108108108105, "grad_norm": 1.125, "learning_rate": 4.591081148592704e-05, "loss": 0.1346, "step": 1669 }, { "epoch": 45.13513513513514, "grad_norm": 1.5234375, "learning_rate": 4.590468630876197e-05, "loss": 0.1876, "step": 1670 }, { "epoch": 45.16216216216216, "grad_norm": 1.515625, "learning_rate": 4.589855695682508e-05, "loss": 0.2143, "step": 1671 }, { "epoch": 45.189189189189186, "grad_norm": 2.28125, "learning_rate": 4.589242343134043e-05, "loss": 0.2206, "step": 1672 }, { "epoch": 45.21621621621622, "grad_norm": 1.390625, "learning_rate": 4.588628573353293e-05, "loss": 0.1137, "step": 1673 }, { "epoch": 45.24324324324324, "grad_norm": 1.6484375, "learning_rate": 4.588014386462829e-05, "loss": 0.1321, "step": 1674 }, { "epoch": 45.270270270270274, "grad_norm": 1.7890625, "learning_rate": 4.5873997825853085e-05, "loss": 0.2287, "step": 1675 }, { "epoch": 45.2972972972973, "grad_norm": 1.296875, "learning_rate": 4.586784761843471e-05, "loss": 0.161, "step": 1676 }, { "epoch": 45.32432432432432, "grad_norm": 1.6328125, "learning_rate": 4.5861693243601386e-05, "loss": 0.1776, "step": 1677 }, { "epoch": 45.351351351351354, "grad_norm": 1.1875, "learning_rate": 4.5855534702582186e-05, "loss": 0.1227, "step": 1678 }, { "epoch": 45.37837837837838, "grad_norm": 0.99609375, "learning_rate": 4.584937199660699e-05, "loss": 0.0981, "step": 1679 }, { "epoch": 45.4054054054054, "grad_norm": 1.3359375, "learning_rate": 4.584320512690652e-05, "loss": 0.1889, "step": 1680 }, { "epoch": 45.432432432432435, "grad_norm": 1.265625, "learning_rate": 4.583703409471233e-05, "loss": 0.1471, "step": 1681 }, { "epoch": 45.45945945945946, "grad_norm": 1.28125, "learning_rate": 4.5830858901256826e-05, "loss": 0.1823, "step": 1682 }, { "epoch": 45.486486486486484, "grad_norm": 1.1875, "learning_rate": 4.5824679547773196e-05, "loss": 0.1904, "step": 1683 }, { "epoch": 45.513513513513516, "grad_norm": 1.5390625, "learning_rate": 4.581849603549551e-05, "loss": 0.1862, "step": 1684 }, { "epoch": 45.54054054054054, "grad_norm": 0.95703125, "learning_rate": 4.581230836565864e-05, "loss": 0.0964, "step": 1685 }, { "epoch": 45.567567567567565, "grad_norm": 1.375, "learning_rate": 4.5806116539498295e-05, "loss": 0.2141, "step": 1686 }, { "epoch": 45.5945945945946, "grad_norm": 1.5859375, "learning_rate": 4.5799920558251006e-05, "loss": 0.2333, "step": 1687 }, { "epoch": 45.62162162162162, "grad_norm": 1.515625, "learning_rate": 4.5793720423154163e-05, "loss": 0.2683, "step": 1688 }, { "epoch": 45.648648648648646, "grad_norm": 1.625, "learning_rate": 4.5787516135445954e-05, "loss": 0.2805, "step": 1689 }, { "epoch": 45.67567567567568, "grad_norm": 1.984375, "learning_rate": 4.57813076963654e-05, "loss": 0.2057, "step": 1690 }, { "epoch": 45.7027027027027, "grad_norm": 1.203125, "learning_rate": 4.5775095107152346e-05, "loss": 0.1323, "step": 1691 }, { "epoch": 45.729729729729726, "grad_norm": 1.5703125, "learning_rate": 4.5768878369047506e-05, "loss": 0.1892, "step": 1692 }, { "epoch": 45.75675675675676, "grad_norm": 1.359375, "learning_rate": 4.576265748329238e-05, "loss": 0.1866, "step": 1693 }, { "epoch": 45.78378378378378, "grad_norm": 1.5546875, "learning_rate": 4.5756432451129316e-05, "loss": 0.1994, "step": 1694 }, { "epoch": 45.810810810810814, "grad_norm": 1.296875, "learning_rate": 4.575020327380147e-05, "loss": 0.131, "step": 1695 }, { "epoch": 45.83783783783784, "grad_norm": 2.1875, "learning_rate": 4.574396995255286e-05, "loss": 0.3235, "step": 1696 }, { "epoch": 45.86486486486486, "grad_norm": 1.140625, "learning_rate": 4.5737732488628305e-05, "loss": 0.1706, "step": 1697 }, { "epoch": 45.891891891891895, "grad_norm": 0.9765625, "learning_rate": 4.573149088327345e-05, "loss": 0.1169, "step": 1698 }, { "epoch": 45.91891891891892, "grad_norm": 1.703125, "learning_rate": 4.572524513773478e-05, "loss": 0.2541, "step": 1699 }, { "epoch": 45.945945945945944, "grad_norm": 1.1640625, "learning_rate": 4.5718995253259606e-05, "loss": 0.123, "step": 1700 }, { "epoch": 45.972972972972975, "grad_norm": 1.5703125, "learning_rate": 4.571274123109606e-05, "loss": 0.2007, "step": 1701 }, { "epoch": 46.0, "grad_norm": 1.6640625, "learning_rate": 4.570648307249309e-05, "loss": 0.2017, "step": 1702 }, { "epoch": 46.027027027027025, "grad_norm": 1.4921875, "learning_rate": 4.5700220778700504e-05, "loss": 0.1848, "step": 1703 }, { "epoch": 46.054054054054056, "grad_norm": 1.234375, "learning_rate": 4.5693954350968895e-05, "loss": 0.1347, "step": 1704 }, { "epoch": 46.08108108108108, "grad_norm": 2.109375, "learning_rate": 4.568768379054972e-05, "loss": 0.1965, "step": 1705 }, { "epoch": 46.108108108108105, "grad_norm": 1.3359375, "learning_rate": 4.568140909869522e-05, "loss": 0.1815, "step": 1706 }, { "epoch": 46.13513513513514, "grad_norm": 0.98828125, "learning_rate": 4.5675130276658485e-05, "loss": 0.1295, "step": 1707 }, { "epoch": 46.16216216216216, "grad_norm": 1.234375, "learning_rate": 4.5668847325693446e-05, "loss": 0.1275, "step": 1708 }, { "epoch": 46.189189189189186, "grad_norm": 1.2734375, "learning_rate": 4.566256024705483e-05, "loss": 0.1308, "step": 1709 }, { "epoch": 46.21621621621622, "grad_norm": 1.3828125, "learning_rate": 4.565626904199819e-05, "loss": 0.1935, "step": 1710 }, { "epoch": 46.24324324324324, "grad_norm": 1.3203125, "learning_rate": 4.564997371177992e-05, "loss": 0.1548, "step": 1711 }, { "epoch": 46.270270270270274, "grad_norm": 1.3984375, "learning_rate": 4.564367425765723e-05, "loss": 0.1599, "step": 1712 }, { "epoch": 46.2972972972973, "grad_norm": 1.9921875, "learning_rate": 4.563737068088815e-05, "loss": 0.2431, "step": 1713 }, { "epoch": 46.32432432432432, "grad_norm": 1.3203125, "learning_rate": 4.563106298273154e-05, "loss": 0.1752, "step": 1714 }, { "epoch": 46.351351351351354, "grad_norm": 1.625, "learning_rate": 4.562475116444708e-05, "loss": 0.207, "step": 1715 }, { "epoch": 46.37837837837838, "grad_norm": 1.515625, "learning_rate": 4.561843522729526e-05, "loss": 0.1462, "step": 1716 }, { "epoch": 46.4054054054054, "grad_norm": 1.7265625, "learning_rate": 4.561211517253742e-05, "loss": 0.169, "step": 1717 }, { "epoch": 46.432432432432435, "grad_norm": 1.546875, "learning_rate": 4.560579100143569e-05, "loss": 0.1735, "step": 1718 }, { "epoch": 46.45945945945946, "grad_norm": 1.7890625, "learning_rate": 4.559946271525306e-05, "loss": 0.2249, "step": 1719 }, { "epoch": 46.486486486486484, "grad_norm": 1.40625, "learning_rate": 4.559313031525331e-05, "loss": 0.2249, "step": 1720 }, { "epoch": 46.513513513513516, "grad_norm": 0.91796875, "learning_rate": 4.558679380270105e-05, "loss": 0.1421, "step": 1721 }, { "epoch": 46.54054054054054, "grad_norm": 1.265625, "learning_rate": 4.558045317886171e-05, "loss": 0.1285, "step": 1722 }, { "epoch": 46.567567567567565, "grad_norm": 2.3125, "learning_rate": 4.557410844500156e-05, "loss": 0.2771, "step": 1723 }, { "epoch": 46.5945945945946, "grad_norm": 1.1015625, "learning_rate": 4.556775960238767e-05, "loss": 0.1096, "step": 1724 }, { "epoch": 46.62162162162162, "grad_norm": 1.609375, "learning_rate": 4.5561406652287927e-05, "loss": 0.1926, "step": 1725 }, { "epoch": 46.648648648648646, "grad_norm": 1.2890625, "learning_rate": 4.555504959597105e-05, "loss": 0.1467, "step": 1726 }, { "epoch": 46.67567567567568, "grad_norm": 1.75, "learning_rate": 4.5548688434706586e-05, "loss": 0.1733, "step": 1727 }, { "epoch": 46.7027027027027, "grad_norm": 1.4765625, "learning_rate": 4.554232316976488e-05, "loss": 0.1295, "step": 1728 }, { "epoch": 46.729729729729726, "grad_norm": 1.2265625, "learning_rate": 4.553595380241711e-05, "loss": 0.1364, "step": 1729 }, { "epoch": 46.75675675675676, "grad_norm": 1.9921875, "learning_rate": 4.552958033393527e-05, "loss": 0.1348, "step": 1730 }, { "epoch": 46.78378378378378, "grad_norm": 2.359375, "learning_rate": 4.5523202765592176e-05, "loss": 0.3274, "step": 1731 }, { "epoch": 46.810810810810814, "grad_norm": 1.2578125, "learning_rate": 4.5516821098661464e-05, "loss": 0.163, "step": 1732 }, { "epoch": 46.83783783783784, "grad_norm": 1.6640625, "learning_rate": 4.551043533441758e-05, "loss": 0.2022, "step": 1733 }, { "epoch": 46.86486486486486, "grad_norm": 1.5390625, "learning_rate": 4.550404547413579e-05, "loss": 0.1872, "step": 1734 }, { "epoch": 46.891891891891895, "grad_norm": 1.21875, "learning_rate": 4.54976515190922e-05, "loss": 0.1222, "step": 1735 }, { "epoch": 46.91891891891892, "grad_norm": 1.125, "learning_rate": 4.54912534705637e-05, "loss": 0.1104, "step": 1736 }, { "epoch": 46.945945945945944, "grad_norm": 0.828125, "learning_rate": 4.5484851329828004e-05, "loss": 0.1005, "step": 1737 }, { "epoch": 46.972972972972975, "grad_norm": 1.671875, "learning_rate": 4.5478445098163676e-05, "loss": 0.3048, "step": 1738 }, { "epoch": 47.0, "grad_norm": 1.296875, "learning_rate": 4.547203477685005e-05, "loss": 0.1841, "step": 1739 }, { "epoch": 47.027027027027025, "grad_norm": 1.546875, "learning_rate": 4.546562036716732e-05, "loss": 0.1926, "step": 1740 }, { "epoch": 47.054054054054056, "grad_norm": 1.28125, "learning_rate": 4.545920187039645e-05, "loss": 0.1494, "step": 1741 }, { "epoch": 47.08108108108108, "grad_norm": 1.4765625, "learning_rate": 4.545277928781927e-05, "loss": 0.1961, "step": 1742 }, { "epoch": 47.108108108108105, "grad_norm": 1.4765625, "learning_rate": 4.5446352620718396e-05, "loss": 0.215, "step": 1743 }, { "epoch": 47.13513513513514, "grad_norm": 1.359375, "learning_rate": 4.5439921870377266e-05, "loss": 0.1401, "step": 1744 }, { "epoch": 47.16216216216216, "grad_norm": 1.7421875, "learning_rate": 4.5433487038080124e-05, "loss": 0.162, "step": 1745 }, { "epoch": 47.189189189189186, "grad_norm": 1.1015625, "learning_rate": 4.5427048125112054e-05, "loss": 0.1288, "step": 1746 }, { "epoch": 47.21621621621622, "grad_norm": 1.5703125, "learning_rate": 4.5420605132758934e-05, "loss": 0.1823, "step": 1747 }, { "epoch": 47.24324324324324, "grad_norm": 0.84375, "learning_rate": 4.5414158062307454e-05, "loss": 0.0877, "step": 1748 }, { "epoch": 47.270270270270274, "grad_norm": 1.1328125, "learning_rate": 4.540770691504513e-05, "loss": 0.1177, "step": 1749 }, { "epoch": 47.2972972972973, "grad_norm": 1.171875, "learning_rate": 4.5401251692260296e-05, "loss": 0.1284, "step": 1750 }, { "epoch": 47.32432432432432, "grad_norm": 1.9921875, "learning_rate": 4.539479239524209e-05, "loss": 0.2191, "step": 1751 }, { "epoch": 47.351351351351354, "grad_norm": 1.4140625, "learning_rate": 4.5388329025280464e-05, "loss": 0.1266, "step": 1752 }, { "epoch": 47.37837837837838, "grad_norm": 1.109375, "learning_rate": 4.5381861583666186e-05, "loss": 0.1333, "step": 1753 }, { "epoch": 47.4054054054054, "grad_norm": 1.5390625, "learning_rate": 4.5375390071690834e-05, "loss": 0.1866, "step": 1754 }, { "epoch": 47.432432432432435, "grad_norm": 1.5546875, "learning_rate": 4.5368914490646806e-05, "loss": 0.1908, "step": 1755 }, { "epoch": 47.45945945945946, "grad_norm": 1.234375, "learning_rate": 4.5362434841827304e-05, "loss": 0.1519, "step": 1756 }, { "epoch": 47.486486486486484, "grad_norm": 1.234375, "learning_rate": 4.5355951126526356e-05, "loss": 0.1388, "step": 1757 }, { "epoch": 47.513513513513516, "grad_norm": 1.515625, "learning_rate": 4.534946334603879e-05, "loss": 0.1562, "step": 1758 }, { "epoch": 47.54054054054054, "grad_norm": 1.5703125, "learning_rate": 4.5342971501660233e-05, "loss": 0.2556, "step": 1759 }, { "epoch": 47.567567567567565, "grad_norm": 1.7890625, "learning_rate": 4.533647559468717e-05, "loss": 0.1786, "step": 1760 }, { "epoch": 47.5945945945946, "grad_norm": 1.15625, "learning_rate": 4.532997562641683e-05, "loss": 0.1688, "step": 1761 }, { "epoch": 47.62162162162162, "grad_norm": 1.515625, "learning_rate": 4.532347159814732e-05, "loss": 0.1356, "step": 1762 }, { "epoch": 47.648648648648646, "grad_norm": 1.34375, "learning_rate": 4.531696351117751e-05, "loss": 0.1466, "step": 1763 }, { "epoch": 47.67567567567568, "grad_norm": 1.7265625, "learning_rate": 4.531045136680711e-05, "loss": 0.2106, "step": 1764 }, { "epoch": 47.7027027027027, "grad_norm": 1.3359375, "learning_rate": 4.530393516633661e-05, "loss": 0.167, "step": 1765 }, { "epoch": 47.729729729729726, "grad_norm": 1.4453125, "learning_rate": 4.5297414911067346e-05, "loss": 0.1178, "step": 1766 }, { "epoch": 47.75675675675676, "grad_norm": 1.4375, "learning_rate": 4.529089060230144e-05, "loss": 0.2043, "step": 1767 }, { "epoch": 47.78378378378378, "grad_norm": 1.6640625, "learning_rate": 4.528436224134183e-05, "loss": 0.1672, "step": 1768 }, { "epoch": 47.810810810810814, "grad_norm": 2.09375, "learning_rate": 4.5277829829492265e-05, "loss": 0.1649, "step": 1769 }, { "epoch": 47.83783783783784, "grad_norm": 1.2734375, "learning_rate": 4.5271293368057285e-05, "loss": 0.1935, "step": 1770 }, { "epoch": 47.86486486486486, "grad_norm": 1.1796875, "learning_rate": 4.526475285834228e-05, "loss": 0.1495, "step": 1771 }, { "epoch": 47.891891891891895, "grad_norm": 1.5703125, "learning_rate": 4.525820830165341e-05, "loss": 0.1505, "step": 1772 }, { "epoch": 47.91891891891892, "grad_norm": 1.40625, "learning_rate": 4.5251659699297655e-05, "loss": 0.1664, "step": 1773 }, { "epoch": 47.945945945945944, "grad_norm": 1.4296875, "learning_rate": 4.52451070525828e-05, "loss": 0.178, "step": 1774 }, { "epoch": 47.972972972972975, "grad_norm": 2.265625, "learning_rate": 4.5238550362817456e-05, "loss": 0.2101, "step": 1775 }, { "epoch": 48.0, "grad_norm": 0.953125, "learning_rate": 4.523198963131101e-05, "loss": 0.0874, "step": 1776 }, { "epoch": 48.027027027027025, "grad_norm": 1.3203125, "learning_rate": 4.522542485937369e-05, "loss": 0.1387, "step": 1777 }, { "epoch": 48.054054054054056, "grad_norm": 0.9921875, "learning_rate": 4.52188560483165e-05, "loss": 0.1238, "step": 1778 }, { "epoch": 48.08108108108108, "grad_norm": 1.1796875, "learning_rate": 4.521228319945127e-05, "loss": 0.1257, "step": 1779 }, { "epoch": 48.108108108108105, "grad_norm": 0.89453125, "learning_rate": 4.5205706314090635e-05, "loss": 0.1064, "step": 1780 }, { "epoch": 48.13513513513514, "grad_norm": 1.203125, "learning_rate": 4.519912539354803e-05, "loss": 0.1318, "step": 1781 }, { "epoch": 48.16216216216216, "grad_norm": 1.78125, "learning_rate": 4.51925404391377e-05, "loss": 0.2009, "step": 1782 }, { "epoch": 48.189189189189186, "grad_norm": 1.3046875, "learning_rate": 4.5185951452174685e-05, "loss": 0.2045, "step": 1783 }, { "epoch": 48.21621621621622, "grad_norm": 1.8828125, "learning_rate": 4.517935843397485e-05, "loss": 0.2597, "step": 1784 }, { "epoch": 48.24324324324324, "grad_norm": 0.91796875, "learning_rate": 4.517276138585484e-05, "loss": 0.1069, "step": 1785 }, { "epoch": 48.270270270270274, "grad_norm": 1.671875, "learning_rate": 4.516616030913215e-05, "loss": 0.2396, "step": 1786 }, { "epoch": 48.2972972972973, "grad_norm": 1.578125, "learning_rate": 4.515955520512502e-05, "loss": 0.2585, "step": 1787 }, { "epoch": 48.32432432432432, "grad_norm": 1.078125, "learning_rate": 4.5152946075152516e-05, "loss": 0.1428, "step": 1788 }, { "epoch": 48.351351351351354, "grad_norm": 1.2265625, "learning_rate": 4.514633292053454e-05, "loss": 0.1639, "step": 1789 }, { "epoch": 48.37837837837838, "grad_norm": 1.59375, "learning_rate": 4.513971574259175e-05, "loss": 0.2223, "step": 1790 }, { "epoch": 48.4054054054054, "grad_norm": 1.3359375, "learning_rate": 4.5133094542645654e-05, "loss": 0.1199, "step": 1791 }, { "epoch": 48.432432432432435, "grad_norm": 1.5625, "learning_rate": 4.512646932201852e-05, "loss": 0.2657, "step": 1792 }, { "epoch": 48.45945945945946, "grad_norm": 1.2890625, "learning_rate": 4.511984008203345e-05, "loss": 0.1569, "step": 1793 }, { "epoch": 48.486486486486484, "grad_norm": 1.203125, "learning_rate": 4.511320682401433e-05, "loss": 0.1342, "step": 1794 }, { "epoch": 48.513513513513516, "grad_norm": 1.90625, "learning_rate": 4.510656954928585e-05, "loss": 0.2702, "step": 1795 }, { "epoch": 48.54054054054054, "grad_norm": 1.125, "learning_rate": 4.5099928259173516e-05, "loss": 0.1664, "step": 1796 }, { "epoch": 48.567567567567565, "grad_norm": 1.2109375, "learning_rate": 4.509328295500362e-05, "loss": 0.1067, "step": 1797 }, { "epoch": 48.5945945945946, "grad_norm": 1.09375, "learning_rate": 4.508663363810328e-05, "loss": 0.1551, "step": 1798 }, { "epoch": 48.62162162162162, "grad_norm": 1.390625, "learning_rate": 4.5079980309800383e-05, "loss": 0.1894, "step": 1799 }, { "epoch": 48.648648648648646, "grad_norm": 1.390625, "learning_rate": 4.507332297142364e-05, "loss": 0.1771, "step": 1800 }, { "epoch": 48.67567567567568, "grad_norm": 1.1640625, "learning_rate": 4.5066661624302544e-05, "loss": 0.1378, "step": 1801 }, { "epoch": 48.7027027027027, "grad_norm": 1.421875, "learning_rate": 4.5059996269767405e-05, "loss": 0.2024, "step": 1802 }, { "epoch": 48.729729729729726, "grad_norm": 1.3359375, "learning_rate": 4.505332690914933e-05, "loss": 0.1142, "step": 1803 }, { "epoch": 48.75675675675676, "grad_norm": 1.8671875, "learning_rate": 4.504665354378023e-05, "loss": 0.2131, "step": 1804 }, { "epoch": 48.78378378378378, "grad_norm": 1.265625, "learning_rate": 4.50399761749928e-05, "loss": 0.1302, "step": 1805 }, { "epoch": 48.810810810810814, "grad_norm": 1.0546875, "learning_rate": 4.503329480412055e-05, "loss": 0.1331, "step": 1806 }, { "epoch": 48.83783783783784, "grad_norm": 1.1640625, "learning_rate": 4.5026609432497786e-05, "loss": 0.1464, "step": 1807 }, { "epoch": 48.86486486486486, "grad_norm": 1.34375, "learning_rate": 4.50199200614596e-05, "loss": 0.1703, "step": 1808 }, { "epoch": 48.891891891891895, "grad_norm": 1.296875, "learning_rate": 4.5013226692341904e-05, "loss": 0.1533, "step": 1809 }, { "epoch": 48.91891891891892, "grad_norm": 1.453125, "learning_rate": 4.500652932648139e-05, "loss": 0.1814, "step": 1810 }, { "epoch": 48.945945945945944, "grad_norm": 1.09375, "learning_rate": 4.4999827965215554e-05, "loss": 0.1275, "step": 1811 }, { "epoch": 48.972972972972975, "grad_norm": 1.0546875, "learning_rate": 4.499312260988271e-05, "loss": 0.1374, "step": 1812 }, { "epoch": 49.0, "grad_norm": 1.3359375, "learning_rate": 4.498641326182194e-05, "loss": 0.1201, "step": 1813 }, { "epoch": 49.027027027027025, "grad_norm": 1.0234375, "learning_rate": 4.497969992237312e-05, "loss": 0.105, "step": 1814 }, { "epoch": 49.054054054054056, "grad_norm": 1.28125, "learning_rate": 4.497298259287696e-05, "loss": 0.1124, "step": 1815 }, { "epoch": 49.08108108108108, "grad_norm": 1.1328125, "learning_rate": 4.496626127467494e-05, "loss": 0.0936, "step": 1816 }, { "epoch": 49.108108108108105, "grad_norm": 1.625, "learning_rate": 4.4959535969109326e-05, "loss": 0.2449, "step": 1817 }, { "epoch": 49.13513513513514, "grad_norm": 0.98046875, "learning_rate": 4.495280667752322e-05, "loss": 0.1156, "step": 1818 }, { "epoch": 49.16216216216216, "grad_norm": 1.4296875, "learning_rate": 4.494607340126047e-05, "loss": 0.1548, "step": 1819 }, { "epoch": 49.189189189189186, "grad_norm": 1.15625, "learning_rate": 4.493933614166577e-05, "loss": 0.1315, "step": 1820 }, { "epoch": 49.21621621621622, "grad_norm": 1.0078125, "learning_rate": 4.493259490008456e-05, "loss": 0.1117, "step": 1821 }, { "epoch": 49.24324324324324, "grad_norm": 1.140625, "learning_rate": 4.4925849677863124e-05, "loss": 0.1747, "step": 1822 }, { "epoch": 49.270270270270274, "grad_norm": 1.1953125, "learning_rate": 4.49191004763485e-05, "loss": 0.139, "step": 1823 }, { "epoch": 49.2972972972973, "grad_norm": 1.3046875, "learning_rate": 4.4912347296888554e-05, "loss": 0.2119, "step": 1824 }, { "epoch": 49.32432432432432, "grad_norm": 1.6484375, "learning_rate": 4.490559014083191e-05, "loss": 0.1931, "step": 1825 }, { "epoch": 49.351351351351354, "grad_norm": 1.578125, "learning_rate": 4.489882900952802e-05, "loss": 0.1685, "step": 1826 }, { "epoch": 49.37837837837838, "grad_norm": 2.109375, "learning_rate": 4.489206390432711e-05, "loss": 0.2443, "step": 1827 }, { "epoch": 49.4054054054054, "grad_norm": 1.7734375, "learning_rate": 4.488529482658022e-05, "loss": 0.1759, "step": 1828 }, { "epoch": 49.432432432432435, "grad_norm": 1.2109375, "learning_rate": 4.487852177763914e-05, "loss": 0.1249, "step": 1829 }, { "epoch": 49.45945945945946, "grad_norm": 1.8515625, "learning_rate": 4.487174475885651e-05, "loss": 0.1786, "step": 1830 }, { "epoch": 49.486486486486484, "grad_norm": 1.4296875, "learning_rate": 4.486496377158572e-05, "loss": 0.1799, "step": 1831 }, { "epoch": 49.513513513513516, "grad_norm": 1.34375, "learning_rate": 4.485817881718098e-05, "loss": 0.1696, "step": 1832 }, { "epoch": 49.54054054054054, "grad_norm": 1.6015625, "learning_rate": 4.485138989699727e-05, "loss": 0.2394, "step": 1833 }, { "epoch": 49.567567567567565, "grad_norm": 1.71875, "learning_rate": 4.484459701239038e-05, "loss": 0.1744, "step": 1834 }, { "epoch": 49.5945945945946, "grad_norm": 2.046875, "learning_rate": 4.4837800164716866e-05, "loss": 0.1861, "step": 1835 }, { "epoch": 49.62162162162162, "grad_norm": 1.8671875, "learning_rate": 4.4830999355334105e-05, "loss": 0.2081, "step": 1836 }, { "epoch": 49.648648648648646, "grad_norm": 1.46875, "learning_rate": 4.4824194585600254e-05, "loss": 0.1827, "step": 1837 }, { "epoch": 49.67567567567568, "grad_norm": 1.9296875, "learning_rate": 4.481738585687426e-05, "loss": 0.1477, "step": 1838 }, { "epoch": 49.7027027027027, "grad_norm": 2.515625, "learning_rate": 4.4810573170515864e-05, "loss": 0.1903, "step": 1839 }, { "epoch": 49.729729729729726, "grad_norm": 2.203125, "learning_rate": 4.480375652788558e-05, "loss": 0.2111, "step": 1840 }, { "epoch": 49.75675675675676, "grad_norm": 1.234375, "learning_rate": 4.4796935930344743e-05, "loss": 0.1712, "step": 1841 }, { "epoch": 49.78378378378378, "grad_norm": 1.9765625, "learning_rate": 4.479011137925545e-05, "loss": 0.1737, "step": 1842 }, { "epoch": 49.810810810810814, "grad_norm": 1.65625, "learning_rate": 4.4783282875980596e-05, "loss": 0.155, "step": 1843 }, { "epoch": 49.83783783783784, "grad_norm": 1.8359375, "learning_rate": 4.477645042188388e-05, "loss": 0.1398, "step": 1844 }, { "epoch": 49.86486486486486, "grad_norm": 1.234375, "learning_rate": 4.476961401832978e-05, "loss": 0.15, "step": 1845 }, { "epoch": 49.891891891891895, "grad_norm": 2.0625, "learning_rate": 4.476277366668353e-05, "loss": 0.1472, "step": 1846 }, { "epoch": 49.91891891891892, "grad_norm": 1.9375, "learning_rate": 4.4755929368311225e-05, "loss": 0.1869, "step": 1847 }, { "epoch": 49.945945945945944, "grad_norm": 1.453125, "learning_rate": 4.474908112457967e-05, "loss": 0.1407, "step": 1848 }, { "epoch": 49.972972972972975, "grad_norm": 1.1171875, "learning_rate": 4.474222893685651e-05, "loss": 0.1308, "step": 1849 }, { "epoch": 50.0, "grad_norm": 1.71875, "learning_rate": 4.473537280651017e-05, "loss": 0.1243, "step": 1850 }, { "epoch": 50.027027027027025, "grad_norm": 1.59375, "learning_rate": 4.4728512734909844e-05, "loss": 0.1696, "step": 1851 }, { "epoch": 50.054054054054056, "grad_norm": 1.7890625, "learning_rate": 4.472164872342552e-05, "loss": 0.2118, "step": 1852 }, { "epoch": 50.08108108108108, "grad_norm": 1.40625, "learning_rate": 4.471478077342798e-05, "loss": 0.1241, "step": 1853 }, { "epoch": 50.108108108108105, "grad_norm": 1.8515625, "learning_rate": 4.47079088862888e-05, "loss": 0.1826, "step": 1854 }, { "epoch": 50.13513513513514, "grad_norm": 1.3828125, "learning_rate": 4.47010330633803e-05, "loss": 0.153, "step": 1855 }, { "epoch": 50.16216216216216, "grad_norm": 1.2890625, "learning_rate": 4.469415330607565e-05, "loss": 0.1665, "step": 1856 }, { "epoch": 50.189189189189186, "grad_norm": 1.7109375, "learning_rate": 4.468726961574875e-05, "loss": 0.2165, "step": 1857 }, { "epoch": 50.21621621621622, "grad_norm": 1.5859375, "learning_rate": 4.468038199377432e-05, "loss": 0.2567, "step": 1858 }, { "epoch": 50.24324324324324, "grad_norm": 1.109375, "learning_rate": 4.467349044152784e-05, "loss": 0.1103, "step": 1859 }, { "epoch": 50.270270270270274, "grad_norm": 1.1640625, "learning_rate": 4.466659496038561e-05, "loss": 0.1095, "step": 1860 }, { "epoch": 50.2972972972973, "grad_norm": 1.265625, "learning_rate": 4.465969555172467e-05, "loss": 0.1536, "step": 1861 }, { "epoch": 50.32432432432432, "grad_norm": 1.0859375, "learning_rate": 4.465279221692289e-05, "loss": 0.1042, "step": 1862 }, { "epoch": 50.351351351351354, "grad_norm": 1.4140625, "learning_rate": 4.4645884957358875e-05, "loss": 0.1956, "step": 1863 }, { "epoch": 50.37837837837838, "grad_norm": 1.3359375, "learning_rate": 4.463897377441205e-05, "loss": 0.1291, "step": 1864 }, { "epoch": 50.4054054054054, "grad_norm": 1.0390625, "learning_rate": 4.463205866946262e-05, "loss": 0.093, "step": 1865 }, { "epoch": 50.432432432432435, "grad_norm": 1.5625, "learning_rate": 4.4625139643891564e-05, "loss": 0.1961, "step": 1866 }, { "epoch": 50.45945945945946, "grad_norm": 0.9921875, "learning_rate": 4.461821669908065e-05, "loss": 0.1182, "step": 1867 }, { "epoch": 50.486486486486484, "grad_norm": 1.265625, "learning_rate": 4.461128983641241e-05, "loss": 0.1275, "step": 1868 }, { "epoch": 50.513513513513516, "grad_norm": 1.4296875, "learning_rate": 4.4604359057270185e-05, "loss": 0.1482, "step": 1869 }, { "epoch": 50.54054054054054, "grad_norm": 1.2890625, "learning_rate": 4.4597424363038084e-05, "loss": 0.1408, "step": 1870 }, { "epoch": 50.567567567567565, "grad_norm": 2.515625, "learning_rate": 4.4590485755101e-05, "loss": 0.2062, "step": 1871 }, { "epoch": 50.5945945945946, "grad_norm": 1.6953125, "learning_rate": 4.4583543234844616e-05, "loss": 0.1429, "step": 1872 }, { "epoch": 50.62162162162162, "grad_norm": 1.6484375, "learning_rate": 4.457659680365538e-05, "loss": 0.1995, "step": 1873 }, { "epoch": 50.648648648648646, "grad_norm": 1.2109375, "learning_rate": 4.456964646292052e-05, "loss": 0.1358, "step": 1874 }, { "epoch": 50.67567567567568, "grad_norm": 1.7421875, "learning_rate": 4.456269221402808e-05, "loss": 0.1663, "step": 1875 }, { "epoch": 50.7027027027027, "grad_norm": 1.40625, "learning_rate": 4.4555734058366835e-05, "loss": 0.1496, "step": 1876 }, { "epoch": 50.729729729729726, "grad_norm": 1.546875, "learning_rate": 4.454877199732639e-05, "loss": 0.163, "step": 1877 }, { "epoch": 50.75675675675676, "grad_norm": 0.97265625, "learning_rate": 4.454180603229707e-05, "loss": 0.113, "step": 1878 }, { "epoch": 50.78378378378378, "grad_norm": 1.421875, "learning_rate": 4.453483616467003e-05, "loss": 0.1513, "step": 1879 }, { "epoch": 50.810810810810814, "grad_norm": 1.1171875, "learning_rate": 4.4527862395837206e-05, "loss": 0.1024, "step": 1880 }, { "epoch": 50.83783783783784, "grad_norm": 1.1953125, "learning_rate": 4.452088472719127e-05, "loss": 0.0995, "step": 1881 }, { "epoch": 50.86486486486486, "grad_norm": 1.71875, "learning_rate": 4.4513903160125706e-05, "loss": 0.2253, "step": 1882 }, { "epoch": 50.891891891891895, "grad_norm": 1.6953125, "learning_rate": 4.450691769603476e-05, "loss": 0.1437, "step": 1883 }, { "epoch": 50.91891891891892, "grad_norm": 2.0, "learning_rate": 4.449992833631348e-05, "loss": 0.282, "step": 1884 }, { "epoch": 50.945945945945944, "grad_norm": 1.8125, "learning_rate": 4.449293508235767e-05, "loss": 0.2398, "step": 1885 }, { "epoch": 50.972972972972975, "grad_norm": 1.6328125, "learning_rate": 4.448593793556392e-05, "loss": 0.2157, "step": 1886 }, { "epoch": 51.0, "grad_norm": 1.703125, "learning_rate": 4.4478936897329585e-05, "loss": 0.1764, "step": 1887 }, { "epoch": 51.027027027027025, "grad_norm": 1.3203125, "learning_rate": 4.4471931969052816e-05, "loss": 0.1448, "step": 1888 }, { "epoch": 51.054054054054056, "grad_norm": 1.78125, "learning_rate": 4.446492315213253e-05, "loss": 0.1945, "step": 1889 }, { "epoch": 51.08108108108108, "grad_norm": 1.1328125, "learning_rate": 4.445791044796843e-05, "loss": 0.1463, "step": 1890 }, { "epoch": 51.108108108108105, "grad_norm": 1.421875, "learning_rate": 4.445089385796099e-05, "loss": 0.2324, "step": 1891 }, { "epoch": 51.13513513513514, "grad_norm": 1.4609375, "learning_rate": 4.444387338351145e-05, "loss": 0.1223, "step": 1892 }, { "epoch": 51.16216216216216, "grad_norm": 1.953125, "learning_rate": 4.443684902602183e-05, "loss": 0.1936, "step": 1893 }, { "epoch": 51.189189189189186, "grad_norm": 1.5, "learning_rate": 4.442982078689495e-05, "loss": 0.1062, "step": 1894 }, { "epoch": 51.21621621621622, "grad_norm": 1.1796875, "learning_rate": 4.4422788667534365e-05, "loss": 0.1027, "step": 1895 }, { "epoch": 51.24324324324324, "grad_norm": 1.4140625, "learning_rate": 4.4415752669344433e-05, "loss": 0.1772, "step": 1896 }, { "epoch": 51.270270270270274, "grad_norm": 1.3125, "learning_rate": 4.4408712793730275e-05, "loss": 0.1964, "step": 1897 }, { "epoch": 51.2972972972973, "grad_norm": 1.453125, "learning_rate": 4.4401669042097796e-05, "loss": 0.1465, "step": 1898 }, { "epoch": 51.32432432432432, "grad_norm": 1.6875, "learning_rate": 4.4394621415853666e-05, "loss": 0.1882, "step": 1899 }, { "epoch": 51.351351351351354, "grad_norm": 1.359375, "learning_rate": 4.438756991640532e-05, "loss": 0.1573, "step": 1900 }, { "epoch": 51.37837837837838, "grad_norm": 1.8515625, "learning_rate": 4.4380514545161e-05, "loss": 0.1653, "step": 1901 }, { "epoch": 51.4054054054054, "grad_norm": 1.296875, "learning_rate": 4.4373455303529684e-05, "loss": 0.1427, "step": 1902 }, { "epoch": 51.432432432432435, "grad_norm": 1.3359375, "learning_rate": 4.4366392192921146e-05, "loss": 0.1456, "step": 1903 }, { "epoch": 51.45945945945946, "grad_norm": 1.140625, "learning_rate": 4.435932521474593e-05, "loss": 0.1275, "step": 1904 }, { "epoch": 51.486486486486484, "grad_norm": 1.03125, "learning_rate": 4.435225437041533e-05, "loss": 0.1048, "step": 1905 }, { "epoch": 51.513513513513516, "grad_norm": 1.4453125, "learning_rate": 4.434517966134143e-05, "loss": 0.2117, "step": 1906 }, { "epoch": 51.54054054054054, "grad_norm": 1.203125, "learning_rate": 4.43381010889371e-05, "loss": 0.1411, "step": 1907 }, { "epoch": 51.567567567567565, "grad_norm": 1.25, "learning_rate": 4.433101865461597e-05, "loss": 0.1238, "step": 1908 }, { "epoch": 51.5945945945946, "grad_norm": 1.484375, "learning_rate": 4.432393235979242e-05, "loss": 0.1851, "step": 1909 }, { "epoch": 51.62162162162162, "grad_norm": 1.9609375, "learning_rate": 4.431684220588163e-05, "loss": 0.2589, "step": 1910 }, { "epoch": 51.648648648648646, "grad_norm": 1.359375, "learning_rate": 4.430974819429954e-05, "loss": 0.1201, "step": 1911 }, { "epoch": 51.67567567567568, "grad_norm": 1.9609375, "learning_rate": 4.430265032646285e-05, "loss": 0.2133, "step": 1912 }, { "epoch": 51.7027027027027, "grad_norm": 1.1484375, "learning_rate": 4.4295548603789053e-05, "loss": 0.0915, "step": 1913 }, { "epoch": 51.729729729729726, "grad_norm": 1.4453125, "learning_rate": 4.428844302769639e-05, "loss": 0.1374, "step": 1914 }, { "epoch": 51.75675675675676, "grad_norm": 1.2421875, "learning_rate": 4.428133359960389e-05, "loss": 0.1245, "step": 1915 }, { "epoch": 51.78378378378378, "grad_norm": 1.1015625, "learning_rate": 4.4274220320931336e-05, "loss": 0.0931, "step": 1916 }, { "epoch": 51.810810810810814, "grad_norm": 0.81640625, "learning_rate": 4.426710319309928e-05, "loss": 0.1023, "step": 1917 }, { "epoch": 51.83783783783784, "grad_norm": 1.3828125, "learning_rate": 4.425998221752906e-05, "loss": 0.177, "step": 1918 }, { "epoch": 51.86486486486486, "grad_norm": 1.7421875, "learning_rate": 4.425285739564277e-05, "loss": 0.1346, "step": 1919 }, { "epoch": 51.891891891891895, "grad_norm": 1.5703125, "learning_rate": 4.424572872886326e-05, "loss": 0.1119, "step": 1920 }, { "epoch": 51.91891891891892, "grad_norm": 2.640625, "learning_rate": 4.423859621861417e-05, "loss": 0.2357, "step": 1921 }, { "epoch": 51.945945945945944, "grad_norm": 2.09375, "learning_rate": 4.4231459866319894e-05, "loss": 0.1846, "step": 1922 }, { "epoch": 51.972972972972975, "grad_norm": 1.296875, "learning_rate": 4.422431967340561e-05, "loss": 0.1461, "step": 1923 }, { "epoch": 52.0, "grad_norm": 1.8203125, "learning_rate": 4.4217175641297236e-05, "loss": 0.2018, "step": 1924 }, { "epoch": 52.027027027027025, "grad_norm": 1.8203125, "learning_rate": 4.421002777142148e-05, "loss": 0.1731, "step": 1925 }, { "epoch": 52.054054054054056, "grad_norm": 1.3203125, "learning_rate": 4.42028760652058e-05, "loss": 0.11, "step": 1926 }, { "epoch": 52.08108108108108, "grad_norm": 1.125, "learning_rate": 4.4195720524078443e-05, "loss": 0.1206, "step": 1927 }, { "epoch": 52.108108108108105, "grad_norm": 1.328125, "learning_rate": 4.418856114946839e-05, "loss": 0.155, "step": 1928 }, { "epoch": 52.13513513513514, "grad_norm": 1.4609375, "learning_rate": 4.418139794280541e-05, "loss": 0.1115, "step": 1929 }, { "epoch": 52.16216216216216, "grad_norm": 2.109375, "learning_rate": 4.417423090552004e-05, "loss": 0.2181, "step": 1930 }, { "epoch": 52.189189189189186, "grad_norm": 1.71875, "learning_rate": 4.416706003904357e-05, "loss": 0.2141, "step": 1931 }, { "epoch": 52.21621621621622, "grad_norm": 1.3203125, "learning_rate": 4.415988534480805e-05, "loss": 0.1441, "step": 1932 }, { "epoch": 52.24324324324324, "grad_norm": 0.921875, "learning_rate": 4.41527068242463e-05, "loss": 0.0784, "step": 1933 }, { "epoch": 52.270270270270274, "grad_norm": 2.09375, "learning_rate": 4.4145524478791924e-05, "loss": 0.1428, "step": 1934 }, { "epoch": 52.2972972972973, "grad_norm": 1.4375, "learning_rate": 4.4138338309879266e-05, "loss": 0.1582, "step": 1935 }, { "epoch": 52.32432432432432, "grad_norm": 1.25, "learning_rate": 4.413114831894344e-05, "loss": 0.1542, "step": 1936 }, { "epoch": 52.351351351351354, "grad_norm": 1.3671875, "learning_rate": 4.412395450742031e-05, "loss": 0.1419, "step": 1937 }, { "epoch": 52.37837837837838, "grad_norm": 1.046875, "learning_rate": 4.4116756876746535e-05, "loss": 0.0983, "step": 1938 }, { "epoch": 52.4054054054054, "grad_norm": 1.25, "learning_rate": 4.4109555428359516e-05, "loss": 0.1234, "step": 1939 }, { "epoch": 52.432432432432435, "grad_norm": 1.2265625, "learning_rate": 4.410235016369741e-05, "loss": 0.1062, "step": 1940 }, { "epoch": 52.45945945945946, "grad_norm": 2.015625, "learning_rate": 4.409514108419916e-05, "loss": 0.2157, "step": 1941 }, { "epoch": 52.486486486486484, "grad_norm": 2.0625, "learning_rate": 4.408792819130443e-05, "loss": 0.1826, "step": 1942 }, { "epoch": 52.513513513513516, "grad_norm": 1.9375, "learning_rate": 4.408071148645369e-05, "loss": 0.2005, "step": 1943 }, { "epoch": 52.54054054054054, "grad_norm": 1.2421875, "learning_rate": 4.407349097108815e-05, "loss": 0.136, "step": 1944 }, { "epoch": 52.567567567567565, "grad_norm": 1.3125, "learning_rate": 4.406626664664978e-05, "loss": 0.1592, "step": 1945 }, { "epoch": 52.5945945945946, "grad_norm": 2.03125, "learning_rate": 4.405903851458133e-05, "loss": 0.2067, "step": 1946 }, { "epoch": 52.62162162162162, "grad_norm": 1.296875, "learning_rate": 4.405180657632627e-05, "loss": 0.1227, "step": 1947 }, { "epoch": 52.648648648648646, "grad_norm": 1.5234375, "learning_rate": 4.404457083332886e-05, "loss": 0.1738, "step": 1948 }, { "epoch": 52.67567567567568, "grad_norm": 1.7578125, "learning_rate": 4.4037331287034134e-05, "loss": 0.2076, "step": 1949 }, { "epoch": 52.7027027027027, "grad_norm": 2.25, "learning_rate": 4.403008793888785e-05, "loss": 0.1264, "step": 1950 }, { "epoch": 52.729729729729726, "grad_norm": 1.8984375, "learning_rate": 4.402284079033654e-05, "loss": 0.2103, "step": 1951 }, { "epoch": 52.75675675675676, "grad_norm": 1.78125, "learning_rate": 4.40155898428275e-05, "loss": 0.1499, "step": 1952 }, { "epoch": 52.78378378378378, "grad_norm": 1.328125, "learning_rate": 4.4008335097808784e-05, "loss": 0.151, "step": 1953 }, { "epoch": 52.810810810810814, "grad_norm": 2.234375, "learning_rate": 4.40010765567292e-05, "loss": 0.1766, "step": 1954 }, { "epoch": 52.83783783783784, "grad_norm": 2.328125, "learning_rate": 4.3993814221038307e-05, "loss": 0.1137, "step": 1955 }, { "epoch": 52.86486486486486, "grad_norm": 0.96875, "learning_rate": 4.398654809218644e-05, "loss": 0.1022, "step": 1956 }, { "epoch": 52.891891891891895, "grad_norm": 1.4609375, "learning_rate": 4.3979278171624685e-05, "loss": 0.1897, "step": 1957 }, { "epoch": 52.91891891891892, "grad_norm": 1.6796875, "learning_rate": 4.397200446080487e-05, "loss": 0.2008, "step": 1958 }, { "epoch": 52.945945945945944, "grad_norm": 1.0859375, "learning_rate": 4.39647269611796e-05, "loss": 0.1364, "step": 1959 }, { "epoch": 52.972972972972975, "grad_norm": 1.5859375, "learning_rate": 4.395744567420223e-05, "loss": 0.241, "step": 1960 }, { "epoch": 53.0, "grad_norm": 0.97265625, "learning_rate": 4.395016060132686e-05, "loss": 0.1142, "step": 1961 }, { "epoch": 53.027027027027025, "grad_norm": 1.3671875, "learning_rate": 4.3942871744008374e-05, "loss": 0.1595, "step": 1962 }, { "epoch": 53.054054054054056, "grad_norm": 1.5546875, "learning_rate": 4.3935579103702375e-05, "loss": 0.1968, "step": 1963 }, { "epoch": 53.08108108108108, "grad_norm": 1.4609375, "learning_rate": 4.392828268186524e-05, "loss": 0.173, "step": 1964 }, { "epoch": 53.108108108108105, "grad_norm": 0.9609375, "learning_rate": 4.392098247995412e-05, "loss": 0.1141, "step": 1965 }, { "epoch": 53.13513513513514, "grad_norm": 1.71875, "learning_rate": 4.391367849942689e-05, "loss": 0.2011, "step": 1966 }, { "epoch": 53.16216216216216, "grad_norm": 1.2734375, "learning_rate": 4.3906370741742186e-05, "loss": 0.099, "step": 1967 }, { "epoch": 53.189189189189186, "grad_norm": 1.4921875, "learning_rate": 4.3899059208359415e-05, "loss": 0.2673, "step": 1968 }, { "epoch": 53.21621621621622, "grad_norm": 1.2109375, "learning_rate": 4.389174390073872e-05, "loss": 0.1755, "step": 1969 }, { "epoch": 53.24324324324324, "grad_norm": 1.2578125, "learning_rate": 4.3884424820341016e-05, "loss": 0.1359, "step": 1970 }, { "epoch": 53.270270270270274, "grad_norm": 1.765625, "learning_rate": 4.387710196862795e-05, "loss": 0.2201, "step": 1971 }, { "epoch": 53.2972972972973, "grad_norm": 2.625, "learning_rate": 4.3869775347061934e-05, "loss": 0.167, "step": 1972 }, { "epoch": 53.32432432432432, "grad_norm": 1.203125, "learning_rate": 4.386244495710614e-05, "loss": 0.1026, "step": 1973 }, { "epoch": 53.351351351351354, "grad_norm": 1.125, "learning_rate": 4.385511080022448e-05, "loss": 0.1343, "step": 1974 }, { "epoch": 53.37837837837838, "grad_norm": 1.328125, "learning_rate": 4.38477728778816e-05, "loss": 0.1374, "step": 1975 }, { "epoch": 53.4054054054054, "grad_norm": 2.03125, "learning_rate": 4.3840431191542955e-05, "loss": 0.2414, "step": 1976 }, { "epoch": 53.432432432432435, "grad_norm": 1.5, "learning_rate": 4.38330857426747e-05, "loss": 0.2252, "step": 1977 }, { "epoch": 53.45945945945946, "grad_norm": 1.1171875, "learning_rate": 4.382573653274375e-05, "loss": 0.138, "step": 1978 }, { "epoch": 53.486486486486484, "grad_norm": 0.7421875, "learning_rate": 4.381838356321781e-05, "loss": 0.0838, "step": 1979 }, { "epoch": 53.513513513513516, "grad_norm": 1.3984375, "learning_rate": 4.381102683556527e-05, "loss": 0.1566, "step": 1980 }, { "epoch": 53.54054054054054, "grad_norm": 0.8203125, "learning_rate": 4.380366635125532e-05, "loss": 0.1018, "step": 1981 }, { "epoch": 53.567567567567565, "grad_norm": 1.3359375, "learning_rate": 4.37963021117579e-05, "loss": 0.1534, "step": 1982 }, { "epoch": 53.5945945945946, "grad_norm": 1.0234375, "learning_rate": 4.378893411854365e-05, "loss": 0.1152, "step": 1983 }, { "epoch": 53.62162162162162, "grad_norm": 1.65625, "learning_rate": 4.378156237308404e-05, "loss": 0.1643, "step": 1984 }, { "epoch": 53.648648648648646, "grad_norm": 1.3984375, "learning_rate": 4.377418687685122e-05, "loss": 0.168, "step": 1985 }, { "epoch": 53.67567567567568, "grad_norm": 1.1875, "learning_rate": 4.3766807631318106e-05, "loss": 0.1868, "step": 1986 }, { "epoch": 53.7027027027027, "grad_norm": 1.453125, "learning_rate": 4.375942463795839e-05, "loss": 0.1844, "step": 1987 }, { "epoch": 53.729729729729726, "grad_norm": 0.96875, "learning_rate": 4.375203789824649e-05, "loss": 0.0955, "step": 1988 }, { "epoch": 53.75675675675676, "grad_norm": 1.3828125, "learning_rate": 4.374464741365756e-05, "loss": 0.1815, "step": 1989 }, { "epoch": 53.78378378378378, "grad_norm": 1.0546875, "learning_rate": 4.373725318566754e-05, "loss": 0.1184, "step": 1990 }, { "epoch": 53.810810810810814, "grad_norm": 0.73828125, "learning_rate": 4.372985521575307e-05, "loss": 0.0925, "step": 1991 }, { "epoch": 53.83783783783784, "grad_norm": 1.546875, "learning_rate": 4.372245350539158e-05, "loss": 0.22, "step": 1992 }, { "epoch": 53.86486486486486, "grad_norm": 1.4609375, "learning_rate": 4.371504805606122e-05, "loss": 0.1578, "step": 1993 }, { "epoch": 53.891891891891895, "grad_norm": 0.953125, "learning_rate": 4.370763886924091e-05, "loss": 0.1342, "step": 1994 }, { "epoch": 53.91891891891892, "grad_norm": 1.28125, "learning_rate": 4.3700225946410275e-05, "loss": 0.1892, "step": 1995 }, { "epoch": 53.945945945945944, "grad_norm": 1.2578125, "learning_rate": 4.369280928904974e-05, "loss": 0.146, "step": 1996 }, { "epoch": 53.972972972972975, "grad_norm": 1.359375, "learning_rate": 4.368538889864044e-05, "loss": 0.213, "step": 1997 }, { "epoch": 54.0, "grad_norm": 1.3203125, "learning_rate": 4.367796477666427e-05, "loss": 0.1371, "step": 1998 }, { "epoch": 54.027027027027025, "grad_norm": 1.2578125, "learning_rate": 4.367053692460385e-05, "loss": 0.0998, "step": 1999 }, { "epoch": 54.054054054054056, "grad_norm": 1.25, "learning_rate": 4.3663105343942566e-05, "loss": 0.173, "step": 2000 }, { "epoch": 54.08108108108108, "grad_norm": 1.15625, "learning_rate": 4.3655670036164554e-05, "loss": 0.1109, "step": 2001 }, { "epoch": 54.108108108108105, "grad_norm": 0.78515625, "learning_rate": 4.364823100275466e-05, "loss": 0.0939, "step": 2002 }, { "epoch": 54.13513513513514, "grad_norm": 1.109375, "learning_rate": 4.364078824519852e-05, "loss": 0.1107, "step": 2003 }, { "epoch": 54.16216216216216, "grad_norm": 1.359375, "learning_rate": 4.363334176498247e-05, "loss": 0.2018, "step": 2004 }, { "epoch": 54.189189189189186, "grad_norm": 1.125, "learning_rate": 4.3625891563593626e-05, "loss": 0.133, "step": 2005 }, { "epoch": 54.21621621621622, "grad_norm": 1.1484375, "learning_rate": 4.361843764251983e-05, "loss": 0.1254, "step": 2006 }, { "epoch": 54.24324324324324, "grad_norm": 0.8359375, "learning_rate": 4.361098000324966e-05, "loss": 0.11, "step": 2007 }, { "epoch": 54.270270270270274, "grad_norm": 1.359375, "learning_rate": 4.3603518647272444e-05, "loss": 0.2426, "step": 2008 }, { "epoch": 54.2972972972973, "grad_norm": 1.4921875, "learning_rate": 4.359605357607825e-05, "loss": 0.1462, "step": 2009 }, { "epoch": 54.32432432432432, "grad_norm": 1.140625, "learning_rate": 4.358858479115792e-05, "loss": 0.1167, "step": 2010 }, { "epoch": 54.351351351351354, "grad_norm": 1.3671875, "learning_rate": 4.3581112294002955e-05, "loss": 0.1656, "step": 2011 }, { "epoch": 54.37837837837838, "grad_norm": 1.4296875, "learning_rate": 4.3573636086105694e-05, "loss": 0.121, "step": 2012 }, { "epoch": 54.4054054054054, "grad_norm": 1.3515625, "learning_rate": 4.356615616895916e-05, "loss": 0.152, "step": 2013 }, { "epoch": 54.432432432432435, "grad_norm": 1.6640625, "learning_rate": 4.355867254405713e-05, "loss": 0.2248, "step": 2014 }, { "epoch": 54.45945945945946, "grad_norm": 1.1953125, "learning_rate": 4.3551185212894125e-05, "loss": 0.1488, "step": 2015 }, { "epoch": 54.486486486486484, "grad_norm": 0.74609375, "learning_rate": 4.3543694176965394e-05, "loss": 0.0932, "step": 2016 }, { "epoch": 54.513513513513516, "grad_norm": 0.71484375, "learning_rate": 4.3536199437766945e-05, "loss": 0.0852, "step": 2017 }, { "epoch": 54.54054054054054, "grad_norm": 0.95703125, "learning_rate": 4.352870099679551e-05, "loss": 0.1158, "step": 2018 }, { "epoch": 54.567567567567565, "grad_norm": 1.2578125, "learning_rate": 4.352119885554856e-05, "loss": 0.1422, "step": 2019 }, { "epoch": 54.5945945945946, "grad_norm": 0.9140625, "learning_rate": 4.351369301552433e-05, "loss": 0.1168, "step": 2020 }, { "epoch": 54.62162162162162, "grad_norm": 1.1484375, "learning_rate": 4.350618347822175e-05, "loss": 0.1251, "step": 2021 }, { "epoch": 54.648648648648646, "grad_norm": 1.8515625, "learning_rate": 4.349867024514053e-05, "loss": 0.2235, "step": 2022 }, { "epoch": 54.67567567567568, "grad_norm": 1.5, "learning_rate": 4.34911533177811e-05, "loss": 0.1513, "step": 2023 }, { "epoch": 54.7027027027027, "grad_norm": 1.2109375, "learning_rate": 4.348363269764462e-05, "loss": 0.1434, "step": 2024 }, { "epoch": 54.729729729729726, "grad_norm": 1.8671875, "learning_rate": 4.3476108386232994e-05, "loss": 0.1885, "step": 2025 }, { "epoch": 54.75675675675676, "grad_norm": 1.53125, "learning_rate": 4.346858038504888e-05, "loss": 0.2034, "step": 2026 }, { "epoch": 54.78378378378378, "grad_norm": 1.078125, "learning_rate": 4.3461048695595634e-05, "loss": 0.1312, "step": 2027 }, { "epoch": 54.810810810810814, "grad_norm": 1.21875, "learning_rate": 4.34535133193774e-05, "loss": 0.1297, "step": 2028 }, { "epoch": 54.83783783783784, "grad_norm": 1.9453125, "learning_rate": 4.3445974257899014e-05, "loss": 0.1811, "step": 2029 }, { "epoch": 54.86486486486486, "grad_norm": 0.96875, "learning_rate": 4.343843151266607e-05, "loss": 0.1342, "step": 2030 }, { "epoch": 54.891891891891895, "grad_norm": 1.234375, "learning_rate": 4.3430885085184894e-05, "loss": 0.1263, "step": 2031 }, { "epoch": 54.91891891891892, "grad_norm": 1.140625, "learning_rate": 4.342333497696254e-05, "loss": 0.1199, "step": 2032 }, { "epoch": 54.945945945945944, "grad_norm": 1.421875, "learning_rate": 4.34157811895068e-05, "loss": 0.1341, "step": 2033 }, { "epoch": 54.972972972972975, "grad_norm": 1.03125, "learning_rate": 4.3408223724326224e-05, "loss": 0.1192, "step": 2034 }, { "epoch": 55.0, "grad_norm": 1.4453125, "learning_rate": 4.3400662582930055e-05, "loss": 0.1436, "step": 2035 }, { "epoch": 55.027027027027025, "grad_norm": 1.7890625, "learning_rate": 4.3393097766828293e-05, "loss": 0.2169, "step": 2036 }, { "epoch": 55.054054054054056, "grad_norm": 1.2578125, "learning_rate": 4.338552927753168e-05, "loss": 0.1381, "step": 2037 }, { "epoch": 55.08108108108108, "grad_norm": 1.3515625, "learning_rate": 4.337795711655168e-05, "loss": 0.1767, "step": 2038 }, { "epoch": 55.108108108108105, "grad_norm": 1.46875, "learning_rate": 4.337038128540048e-05, "loss": 0.1602, "step": 2039 }, { "epoch": 55.13513513513514, "grad_norm": 1.59375, "learning_rate": 4.336280178559102e-05, "loss": 0.1097, "step": 2040 }, { "epoch": 55.16216216216216, "grad_norm": 1.3203125, "learning_rate": 4.3355218618636975e-05, "loss": 0.1508, "step": 2041 }, { "epoch": 55.189189189189186, "grad_norm": 1.671875, "learning_rate": 4.334763178605273e-05, "loss": 0.1826, "step": 2042 }, { "epoch": 55.21621621621622, "grad_norm": 1.4765625, "learning_rate": 4.3340041289353415e-05, "loss": 0.1547, "step": 2043 }, { "epoch": 55.24324324324324, "grad_norm": 1.8046875, "learning_rate": 4.333244713005489e-05, "loss": 0.1631, "step": 2044 }, { "epoch": 55.270270270270274, "grad_norm": 1.9765625, "learning_rate": 4.332484930967375e-05, "loss": 0.1619, "step": 2045 }, { "epoch": 55.2972972972973, "grad_norm": 1.046875, "learning_rate": 4.331724782972731e-05, "loss": 0.1267, "step": 2046 }, { "epoch": 55.32432432432432, "grad_norm": 1.1015625, "learning_rate": 4.330964269173364e-05, "loss": 0.1359, "step": 2047 }, { "epoch": 55.351351351351354, "grad_norm": 2.09375, "learning_rate": 4.330203389721153e-05, "loss": 0.1491, "step": 2048 }, { "epoch": 55.37837837837838, "grad_norm": 1.453125, "learning_rate": 4.3294421447680454e-05, "loss": 0.1108, "step": 2049 }, { "epoch": 55.4054054054054, "grad_norm": 1.1484375, "learning_rate": 4.3286805344660695e-05, "loss": 0.15, "step": 2050 }, { "epoch": 55.432432432432435, "grad_norm": 0.81640625, "learning_rate": 4.327918558967321e-05, "loss": 0.0817, "step": 2051 }, { "epoch": 55.45945945945946, "grad_norm": 2.09375, "learning_rate": 4.327156218423971e-05, "loss": 0.1485, "step": 2052 }, { "epoch": 55.486486486486484, "grad_norm": 1.453125, "learning_rate": 4.3263935129882636e-05, "loss": 0.1574, "step": 2053 }, { "epoch": 55.513513513513516, "grad_norm": 1.28125, "learning_rate": 4.3256304428125124e-05, "loss": 0.2062, "step": 2054 }, { "epoch": 55.54054054054054, "grad_norm": 1.140625, "learning_rate": 4.324867008049109e-05, "loss": 0.1053, "step": 2055 }, { "epoch": 55.567567567567565, "grad_norm": 1.5703125, "learning_rate": 4.324103208850514e-05, "loss": 0.1795, "step": 2056 }, { "epoch": 55.5945945945946, "grad_norm": 1.7890625, "learning_rate": 4.323339045369262e-05, "loss": 0.2097, "step": 2057 }, { "epoch": 55.62162162162162, "grad_norm": 1.4375, "learning_rate": 4.322574517757961e-05, "loss": 0.1755, "step": 2058 }, { "epoch": 55.648648648648646, "grad_norm": 1.6640625, "learning_rate": 4.32180962616929e-05, "loss": 0.0974, "step": 2059 }, { "epoch": 55.67567567567568, "grad_norm": 1.3359375, "learning_rate": 4.3210443707560014e-05, "loss": 0.1799, "step": 2060 }, { "epoch": 55.7027027027027, "grad_norm": 1.1875, "learning_rate": 4.320278751670922e-05, "loss": 0.1253, "step": 2061 }, { "epoch": 55.729729729729726, "grad_norm": 1.828125, "learning_rate": 4.319512769066949e-05, "loss": 0.195, "step": 2062 }, { "epoch": 55.75675675675676, "grad_norm": 1.125, "learning_rate": 4.3187464230970544e-05, "loss": 0.1183, "step": 2063 }, { "epoch": 55.78378378378378, "grad_norm": 1.890625, "learning_rate": 4.317979713914279e-05, "loss": 0.2211, "step": 2064 }, { "epoch": 55.810810810810814, "grad_norm": 1.2578125, "learning_rate": 4.31721264167174e-05, "loss": 0.1456, "step": 2065 }, { "epoch": 55.83783783783784, "grad_norm": 1.3203125, "learning_rate": 4.3164452065226245e-05, "loss": 0.158, "step": 2066 }, { "epoch": 55.86486486486486, "grad_norm": 1.3125, "learning_rate": 4.315677408620194e-05, "loss": 0.1693, "step": 2067 }, { "epoch": 55.891891891891895, "grad_norm": 2.4375, "learning_rate": 4.3149092481177836e-05, "loss": 0.2464, "step": 2068 }, { "epoch": 55.91891891891892, "grad_norm": 1.3125, "learning_rate": 4.314140725168795e-05, "loss": 0.1405, "step": 2069 }, { "epoch": 55.945945945945944, "grad_norm": 1.65625, "learning_rate": 4.3133718399267085e-05, "loss": 0.1164, "step": 2070 }, { "epoch": 55.972972972972975, "grad_norm": 1.4375, "learning_rate": 4.312602592545074e-05, "loss": 0.1214, "step": 2071 }, { "epoch": 56.0, "grad_norm": 2.015625, "learning_rate": 4.3118329831775136e-05, "loss": 0.1705, "step": 2072 }, { "epoch": 56.027027027027025, "grad_norm": 1.6171875, "learning_rate": 4.311063011977723e-05, "loss": 0.2039, "step": 2073 }, { "epoch": 56.054054054054056, "grad_norm": 1.296875, "learning_rate": 4.310292679099468e-05, "loss": 0.1805, "step": 2074 }, { "epoch": 56.08108108108108, "grad_norm": 1.5703125, "learning_rate": 4.3095219846965895e-05, "loss": 0.2125, "step": 2075 }, { "epoch": 56.108108108108105, "grad_norm": 1.1484375, "learning_rate": 4.3087509289229986e-05, "loss": 0.1563, "step": 2076 }, { "epoch": 56.13513513513514, "grad_norm": 1.6171875, "learning_rate": 4.3079795119326797e-05, "loss": 0.1274, "step": 2077 }, { "epoch": 56.16216216216216, "grad_norm": 1.703125, "learning_rate": 4.307207733879687e-05, "loss": 0.1614, "step": 2078 }, { "epoch": 56.189189189189186, "grad_norm": 1.90625, "learning_rate": 4.306435594918149e-05, "loss": 0.1127, "step": 2079 }, { "epoch": 56.21621621621622, "grad_norm": 1.265625, "learning_rate": 4.305663095202267e-05, "loss": 0.1571, "step": 2080 }, { "epoch": 56.24324324324324, "grad_norm": 1.390625, "learning_rate": 4.3048902348863116e-05, "loss": 0.1597, "step": 2081 }, { "epoch": 56.270270270270274, "grad_norm": 2.375, "learning_rate": 4.3041170141246276e-05, "loss": 0.145, "step": 2082 }, { "epoch": 56.2972972972973, "grad_norm": 1.1640625, "learning_rate": 4.3033434330716316e-05, "loss": 0.0822, "step": 2083 }, { "epoch": 56.32432432432432, "grad_norm": 1.3984375, "learning_rate": 4.30256949188181e-05, "loss": 0.1954, "step": 2084 }, { "epoch": 56.351351351351354, "grad_norm": 1.109375, "learning_rate": 4.301795190709725e-05, "loss": 0.1072, "step": 2085 }, { "epoch": 56.37837837837838, "grad_norm": 1.015625, "learning_rate": 4.301020529710008e-05, "loss": 0.1036, "step": 2086 }, { "epoch": 56.4054054054054, "grad_norm": 1.4453125, "learning_rate": 4.300245509037361e-05, "loss": 0.1782, "step": 2087 }, { "epoch": 56.432432432432435, "grad_norm": 1.109375, "learning_rate": 4.299470128846561e-05, "loss": 0.1402, "step": 2088 }, { "epoch": 56.45945945945946, "grad_norm": 1.0390625, "learning_rate": 4.298694389292455e-05, "loss": 0.1237, "step": 2089 }, { "epoch": 56.486486486486484, "grad_norm": 1.2734375, "learning_rate": 4.297918290529962e-05, "loss": 0.1463, "step": 2090 }, { "epoch": 56.513513513513516, "grad_norm": 1.1875, "learning_rate": 4.2971418327140735e-05, "loss": 0.1503, "step": 2091 }, { "epoch": 56.54054054054054, "grad_norm": 1.6875, "learning_rate": 4.296365015999851e-05, "loss": 0.2254, "step": 2092 }, { "epoch": 56.567567567567565, "grad_norm": 0.9921875, "learning_rate": 4.29558784054243e-05, "loss": 0.0782, "step": 2093 }, { "epoch": 56.5945945945946, "grad_norm": 1.6171875, "learning_rate": 4.294810306497015e-05, "loss": 0.1732, "step": 2094 }, { "epoch": 56.62162162162162, "grad_norm": 0.8359375, "learning_rate": 4.294032414018885e-05, "loss": 0.0961, "step": 2095 }, { "epoch": 56.648648648648646, "grad_norm": 1.078125, "learning_rate": 4.293254163263388e-05, "loss": 0.1098, "step": 2096 }, { "epoch": 56.67567567567568, "grad_norm": 1.484375, "learning_rate": 4.292475554385944e-05, "loss": 0.1723, "step": 2097 }, { "epoch": 56.7027027027027, "grad_norm": 1.421875, "learning_rate": 4.291696587542047e-05, "loss": 0.1608, "step": 2098 }, { "epoch": 56.729729729729726, "grad_norm": 1.359375, "learning_rate": 4.290917262887259e-05, "loss": 0.1576, "step": 2099 }, { "epoch": 56.75675675675676, "grad_norm": 1.4140625, "learning_rate": 4.290137580577216e-05, "loss": 0.1206, "step": 2100 }, { "epoch": 56.78378378378378, "grad_norm": 0.85546875, "learning_rate": 4.289357540767624e-05, "loss": 0.0857, "step": 2101 }, { "epoch": 56.810810810810814, "grad_norm": 1.84375, "learning_rate": 4.2885771436142614e-05, "loss": 0.1822, "step": 2102 }, { "epoch": 56.83783783783784, "grad_norm": 1.3515625, "learning_rate": 4.287796389272977e-05, "loss": 0.1766, "step": 2103 }, { "epoch": 56.86486486486486, "grad_norm": 1.734375, "learning_rate": 4.287015277899692e-05, "loss": 0.1653, "step": 2104 }, { "epoch": 56.891891891891895, "grad_norm": 1.1640625, "learning_rate": 4.286233809650398e-05, "loss": 0.1192, "step": 2105 }, { "epoch": 56.91891891891892, "grad_norm": 1.46875, "learning_rate": 4.2854519846811566e-05, "loss": 0.1492, "step": 2106 }, { "epoch": 56.945945945945944, "grad_norm": 1.6875, "learning_rate": 4.2846698031481056e-05, "loss": 0.1541, "step": 2107 }, { "epoch": 56.972972972972975, "grad_norm": 1.6171875, "learning_rate": 4.283887265207448e-05, "loss": 0.1983, "step": 2108 }, { "epoch": 57.0, "grad_norm": 1.8515625, "learning_rate": 4.2831043710154606e-05, "loss": 0.2469, "step": 2109 }, { "epoch": 57.027027027027025, "grad_norm": 1.0859375, "learning_rate": 4.282321120728493e-05, "loss": 0.1142, "step": 2110 }, { "epoch": 57.054054054054056, "grad_norm": 1.4140625, "learning_rate": 4.281537514502963e-05, "loss": 0.1071, "step": 2111 }, { "epoch": 57.08108108108108, "grad_norm": 1.5859375, "learning_rate": 4.2807535524953604e-05, "loss": 0.2421, "step": 2112 }, { "epoch": 57.108108108108105, "grad_norm": 1.0078125, "learning_rate": 4.279969234862248e-05, "loss": 0.1196, "step": 2113 }, { "epoch": 57.13513513513514, "grad_norm": 1.34375, "learning_rate": 4.279184561760256e-05, "loss": 0.1998, "step": 2114 }, { "epoch": 57.16216216216216, "grad_norm": 1.8125, "learning_rate": 4.2783995333460895e-05, "loss": 0.2173, "step": 2115 }, { "epoch": 57.189189189189186, "grad_norm": 1.625, "learning_rate": 4.277614149776522e-05, "loss": 0.231, "step": 2116 }, { "epoch": 57.21621621621622, "grad_norm": 1.09375, "learning_rate": 4.276828411208397e-05, "loss": 0.1597, "step": 2117 }, { "epoch": 57.24324324324324, "grad_norm": 1.4375, "learning_rate": 4.276042317798633e-05, "loss": 0.1592, "step": 2118 }, { "epoch": 57.270270270270274, "grad_norm": 1.859375, "learning_rate": 4.275255869704214e-05, "loss": 0.2291, "step": 2119 }, { "epoch": 57.2972972972973, "grad_norm": 1.34375, "learning_rate": 4.274469067082201e-05, "loss": 0.1515, "step": 2120 }, { "epoch": 57.32432432432432, "grad_norm": 1.1171875, "learning_rate": 4.273681910089721e-05, "loss": 0.1071, "step": 2121 }, { "epoch": 57.351351351351354, "grad_norm": 1.2109375, "learning_rate": 4.272894398883972e-05, "loss": 0.1533, "step": 2122 }, { "epoch": 57.37837837837838, "grad_norm": 1.6328125, "learning_rate": 4.272106533622225e-05, "loss": 0.2155, "step": 2123 }, { "epoch": 57.4054054054054, "grad_norm": 2.0, "learning_rate": 4.271318314461822e-05, "loss": 0.2057, "step": 2124 }, { "epoch": 57.432432432432435, "grad_norm": 1.078125, "learning_rate": 4.270529741560172e-05, "loss": 0.0897, "step": 2125 }, { "epoch": 57.45945945945946, "grad_norm": 1.3671875, "learning_rate": 4.2697408150747576e-05, "loss": 0.1727, "step": 2126 }, { "epoch": 57.486486486486484, "grad_norm": 1.6796875, "learning_rate": 4.268951535163133e-05, "loss": 0.1892, "step": 2127 }, { "epoch": 57.513513513513516, "grad_norm": 2.578125, "learning_rate": 4.26816190198292e-05, "loss": 0.255, "step": 2128 }, { "epoch": 57.54054054054054, "grad_norm": 1.234375, "learning_rate": 4.2673719156918115e-05, "loss": 0.1655, "step": 2129 }, { "epoch": 57.567567567567565, "grad_norm": 1.703125, "learning_rate": 4.2665815764475744e-05, "loss": 0.1467, "step": 2130 }, { "epoch": 57.5945945945946, "grad_norm": 1.5859375, "learning_rate": 4.265790884408042e-05, "loss": 0.1778, "step": 2131 }, { "epoch": 57.62162162162162, "grad_norm": 2.140625, "learning_rate": 4.2649998397311186e-05, "loss": 0.1887, "step": 2132 }, { "epoch": 57.648648648648646, "grad_norm": 1.953125, "learning_rate": 4.264208442574781e-05, "loss": 0.2066, "step": 2133 }, { "epoch": 57.67567567567568, "grad_norm": 1.4140625, "learning_rate": 4.263416693097074e-05, "loss": 0.1799, "step": 2134 }, { "epoch": 57.7027027027027, "grad_norm": 1.09375, "learning_rate": 4.262624591456117e-05, "loss": 0.1043, "step": 2135 }, { "epoch": 57.729729729729726, "grad_norm": 1.375, "learning_rate": 4.261832137810092e-05, "loss": 0.1633, "step": 2136 }, { "epoch": 57.75675675675676, "grad_norm": 0.73828125, "learning_rate": 4.26103933231726e-05, "loss": 0.0933, "step": 2137 }, { "epoch": 57.78378378378378, "grad_norm": 1.5, "learning_rate": 4.260246175135948e-05, "loss": 0.1379, "step": 2138 }, { "epoch": 57.810810810810814, "grad_norm": 1.2734375, "learning_rate": 4.2594526664245505e-05, "loss": 0.1285, "step": 2139 }, { "epoch": 57.83783783783784, "grad_norm": 1.3046875, "learning_rate": 4.258658806341538e-05, "loss": 0.1575, "step": 2140 }, { "epoch": 57.86486486486486, "grad_norm": 1.3359375, "learning_rate": 4.257864595045448e-05, "loss": 0.1408, "step": 2141 }, { "epoch": 57.891891891891895, "grad_norm": 1.65625, "learning_rate": 4.257070032694888e-05, "loss": 0.2026, "step": 2142 }, { "epoch": 57.91891891891892, "grad_norm": 1.8359375, "learning_rate": 4.256275119448535e-05, "loss": 0.1953, "step": 2143 }, { "epoch": 57.945945945945944, "grad_norm": 0.953125, "learning_rate": 4.255479855465139e-05, "loss": 0.0991, "step": 2144 }, { "epoch": 57.972972972972975, "grad_norm": 1.4140625, "learning_rate": 4.2546842409035184e-05, "loss": 0.1404, "step": 2145 }, { "epoch": 58.0, "grad_norm": 1.1328125, "learning_rate": 4.253888275922559e-05, "loss": 0.1162, "step": 2146 }, { "epoch": 58.027027027027025, "grad_norm": 1.03125, "learning_rate": 4.2530919606812216e-05, "loss": 0.1146, "step": 2147 }, { "epoch": 58.054054054054056, "grad_norm": 1.359375, "learning_rate": 4.252295295338534e-05, "loss": 0.1561, "step": 2148 }, { "epoch": 58.08108108108108, "grad_norm": 1.0703125, "learning_rate": 4.251498280053593e-05, "loss": 0.1083, "step": 2149 }, { "epoch": 58.108108108108105, "grad_norm": 0.80078125, "learning_rate": 4.2507009149855676e-05, "loss": 0.0915, "step": 2150 }, { "epoch": 58.13513513513514, "grad_norm": 0.89453125, "learning_rate": 4.2499032002936956e-05, "loss": 0.1003, "step": 2151 }, { "epoch": 58.16216216216216, "grad_norm": 1.3984375, "learning_rate": 4.249105136137285e-05, "loss": 0.202, "step": 2152 }, { "epoch": 58.189189189189186, "grad_norm": 0.93359375, "learning_rate": 4.248306722675712e-05, "loss": 0.1059, "step": 2153 }, { "epoch": 58.21621621621622, "grad_norm": 0.86328125, "learning_rate": 4.247507960068425e-05, "loss": 0.0942, "step": 2154 }, { "epoch": 58.24324324324324, "grad_norm": 1.359375, "learning_rate": 4.2467088484749406e-05, "loss": 0.2103, "step": 2155 }, { "epoch": 58.270270270270274, "grad_norm": 1.3671875, "learning_rate": 4.245909388054845e-05, "loss": 0.2258, "step": 2156 }, { "epoch": 58.2972972972973, "grad_norm": 1.4765625, "learning_rate": 4.2451095789677945e-05, "loss": 0.1987, "step": 2157 }, { "epoch": 58.32432432432432, "grad_norm": 1.078125, "learning_rate": 4.2443094213735155e-05, "loss": 0.1682, "step": 2158 }, { "epoch": 58.351351351351354, "grad_norm": 1.296875, "learning_rate": 4.243508915431804e-05, "loss": 0.1112, "step": 2159 }, { "epoch": 58.37837837837838, "grad_norm": 1.390625, "learning_rate": 4.2427080613025246e-05, "loss": 0.1523, "step": 2160 }, { "epoch": 58.4054054054054, "grad_norm": 1.1015625, "learning_rate": 4.241906859145611e-05, "loss": 0.1291, "step": 2161 }, { "epoch": 58.432432432432435, "grad_norm": 1.1171875, "learning_rate": 4.241105309121069e-05, "loss": 0.0993, "step": 2162 }, { "epoch": 58.45945945945946, "grad_norm": 1.8515625, "learning_rate": 4.240303411388972e-05, "loss": 0.1884, "step": 2163 }, { "epoch": 58.486486486486484, "grad_norm": 1.8203125, "learning_rate": 4.2395011661094606e-05, "loss": 0.1621, "step": 2164 }, { "epoch": 58.513513513513516, "grad_norm": 0.875, "learning_rate": 4.238698573442751e-05, "loss": 0.1008, "step": 2165 }, { "epoch": 58.54054054054054, "grad_norm": 1.046875, "learning_rate": 4.237895633549121e-05, "loss": 0.0924, "step": 2166 }, { "epoch": 58.567567567567565, "grad_norm": 1.28125, "learning_rate": 4.237092346588926e-05, "loss": 0.1309, "step": 2167 }, { "epoch": 58.5945945945946, "grad_norm": 1.3359375, "learning_rate": 4.2362887127225824e-05, "loss": 0.1396, "step": 2168 }, { "epoch": 58.62162162162162, "grad_norm": 0.91796875, "learning_rate": 4.235484732110583e-05, "loss": 0.1056, "step": 2169 }, { "epoch": 58.648648648648646, "grad_norm": 1.0859375, "learning_rate": 4.234680404913486e-05, "loss": 0.0969, "step": 2170 }, { "epoch": 58.67567567567568, "grad_norm": 1.3046875, "learning_rate": 4.233875731291918e-05, "loss": 0.1425, "step": 2171 }, { "epoch": 58.7027027027027, "grad_norm": 1.296875, "learning_rate": 4.233070711406579e-05, "loss": 0.1131, "step": 2172 }, { "epoch": 58.729729729729726, "grad_norm": 0.59375, "learning_rate": 4.232265345418234e-05, "loss": 0.0714, "step": 2173 }, { "epoch": 58.75675675675676, "grad_norm": 0.88671875, "learning_rate": 4.2314596334877185e-05, "loss": 0.0896, "step": 2174 }, { "epoch": 58.78378378378378, "grad_norm": 1.2578125, "learning_rate": 4.230653575775938e-05, "loss": 0.101, "step": 2175 }, { "epoch": 58.810810810810814, "grad_norm": 1.2265625, "learning_rate": 4.229847172443866e-05, "loss": 0.1607, "step": 2176 }, { "epoch": 58.83783783783784, "grad_norm": 1.078125, "learning_rate": 4.229040423652545e-05, "loss": 0.1222, "step": 2177 }, { "epoch": 58.86486486486486, "grad_norm": 1.1640625, "learning_rate": 4.2282333295630874e-05, "loss": 0.1135, "step": 2178 }, { "epoch": 58.891891891891895, "grad_norm": 1.3046875, "learning_rate": 4.227425890336674e-05, "loss": 0.1182, "step": 2179 }, { "epoch": 58.91891891891892, "grad_norm": 1.2734375, "learning_rate": 4.226618106134554e-05, "loss": 0.0951, "step": 2180 }, { "epoch": 58.945945945945944, "grad_norm": 1.5234375, "learning_rate": 4.225809977118046e-05, "loss": 0.1955, "step": 2181 }, { "epoch": 58.972972972972975, "grad_norm": 1.203125, "learning_rate": 4.225001503448538e-05, "loss": 0.116, "step": 2182 }, { "epoch": 59.0, "grad_norm": 1.1484375, "learning_rate": 4.2241926852874856e-05, "loss": 0.1045, "step": 2183 }, { "epoch": 59.027027027027025, "grad_norm": 1.7890625, "learning_rate": 4.223383522796415e-05, "loss": 0.1891, "step": 2184 }, { "epoch": 59.054054054054056, "grad_norm": 1.34375, "learning_rate": 4.222574016136919e-05, "loss": 0.14, "step": 2185 }, { "epoch": 59.08108108108108, "grad_norm": 1.203125, "learning_rate": 4.221764165470661e-05, "loss": 0.1502, "step": 2186 }, { "epoch": 59.108108108108105, "grad_norm": 1.9765625, "learning_rate": 4.220953970959371e-05, "loss": 0.1505, "step": 2187 }, { "epoch": 59.13513513513514, "grad_norm": 1.28125, "learning_rate": 4.220143432764849e-05, "loss": 0.1056, "step": 2188 }, { "epoch": 59.16216216216216, "grad_norm": 1.296875, "learning_rate": 4.219332551048966e-05, "loss": 0.1786, "step": 2189 }, { "epoch": 59.189189189189186, "grad_norm": 1.6953125, "learning_rate": 4.218521325973657e-05, "loss": 0.1746, "step": 2190 }, { "epoch": 59.21621621621622, "grad_norm": 1.671875, "learning_rate": 4.2177097577009284e-05, "loss": 0.1564, "step": 2191 }, { "epoch": 59.24324324324324, "grad_norm": 1.46875, "learning_rate": 4.2168978463928546e-05, "loss": 0.1169, "step": 2192 }, { "epoch": 59.270270270270274, "grad_norm": 0.69921875, "learning_rate": 4.216085592211577e-05, "loss": 0.0848, "step": 2193 }, { "epoch": 59.2972972972973, "grad_norm": 0.94921875, "learning_rate": 4.215272995319309e-05, "loss": 0.102, "step": 2194 }, { "epoch": 59.32432432432432, "grad_norm": 1.1484375, "learning_rate": 4.214460055878329e-05, "loss": 0.112, "step": 2195 }, { "epoch": 59.351351351351354, "grad_norm": 1.3515625, "learning_rate": 4.2136467740509855e-05, "loss": 0.1993, "step": 2196 }, { "epoch": 59.37837837837838, "grad_norm": 1.4921875, "learning_rate": 4.2128331499996955e-05, "loss": 0.2068, "step": 2197 }, { "epoch": 59.4054054054054, "grad_norm": 1.53125, "learning_rate": 4.2120191838869426e-05, "loss": 0.1927, "step": 2198 }, { "epoch": 59.432432432432435, "grad_norm": 0.9453125, "learning_rate": 4.211204875875282e-05, "loss": 0.1019, "step": 2199 }, { "epoch": 59.45945945945946, "grad_norm": 1.3515625, "learning_rate": 4.210390226127333e-05, "loss": 0.1019, "step": 2200 }, { "epoch": 59.486486486486484, "grad_norm": 1.671875, "learning_rate": 4.209575234805786e-05, "loss": 0.2249, "step": 2201 }, { "epoch": 59.513513513513516, "grad_norm": 1.140625, "learning_rate": 4.2087599020733995e-05, "loss": 0.1117, "step": 2202 }, { "epoch": 59.54054054054054, "grad_norm": 1.015625, "learning_rate": 4.207944228092999e-05, "loss": 0.142, "step": 2203 }, { "epoch": 59.567567567567565, "grad_norm": 2.171875, "learning_rate": 4.207128213027479e-05, "loss": 0.145, "step": 2204 }, { "epoch": 59.5945945945946, "grad_norm": 2.0625, "learning_rate": 4.206311857039801e-05, "loss": 0.1184, "step": 2205 }, { "epoch": 59.62162162162162, "grad_norm": 1.6953125, "learning_rate": 4.205495160292996e-05, "loss": 0.1183, "step": 2206 }, { "epoch": 59.648648648648646, "grad_norm": 1.3046875, "learning_rate": 4.204678122950164e-05, "loss": 0.147, "step": 2207 }, { "epoch": 59.67567567567568, "grad_norm": 2.265625, "learning_rate": 4.203860745174468e-05, "loss": 0.2946, "step": 2208 }, { "epoch": 59.7027027027027, "grad_norm": 1.7578125, "learning_rate": 4.2030430271291465e-05, "loss": 0.1503, "step": 2209 }, { "epoch": 59.729729729729726, "grad_norm": 1.5078125, "learning_rate": 4.202224968977499e-05, "loss": 0.1388, "step": 2210 }, { "epoch": 59.75675675675676, "grad_norm": 1.390625, "learning_rate": 4.201406570882898e-05, "loss": 0.1722, "step": 2211 }, { "epoch": 59.78378378378378, "grad_norm": 1.328125, "learning_rate": 4.2005878330087795e-05, "loss": 0.1781, "step": 2212 }, { "epoch": 59.810810810810814, "grad_norm": 2.671875, "learning_rate": 4.199768755518651e-05, "loss": 0.148, "step": 2213 }, { "epoch": 59.83783783783784, "grad_norm": 1.875, "learning_rate": 4.1989493385760864e-05, "loss": 0.1044, "step": 2214 }, { "epoch": 59.86486486486486, "grad_norm": 1.46875, "learning_rate": 4.1981295823447264e-05, "loss": 0.1187, "step": 2215 }, { "epoch": 59.891891891891895, "grad_norm": 1.203125, "learning_rate": 4.197309486988282e-05, "loss": 0.1182, "step": 2216 }, { "epoch": 59.91891891891892, "grad_norm": 1.6640625, "learning_rate": 4.1964890526705295e-05, "loss": 0.1671, "step": 2217 }, { "epoch": 59.945945945945944, "grad_norm": 1.5078125, "learning_rate": 4.1956682795553146e-05, "loss": 0.2333, "step": 2218 }, { "epoch": 59.972972972972975, "grad_norm": 1.1484375, "learning_rate": 4.194847167806548e-05, "loss": 0.1243, "step": 2219 }, { "epoch": 60.0, "grad_norm": 1.1953125, "learning_rate": 4.1940257175882114e-05, "loss": 0.1409, "step": 2220 }, { "epoch": 60.027027027027025, "grad_norm": 1.1953125, "learning_rate": 4.193203929064353e-05, "loss": 0.1241, "step": 2221 }, { "epoch": 60.054054054054056, "grad_norm": 1.3515625, "learning_rate": 4.192381802399087e-05, "loss": 0.1578, "step": 2222 }, { "epoch": 60.08108108108108, "grad_norm": 1.4296875, "learning_rate": 4.191559337756596e-05, "loss": 0.2159, "step": 2223 }, { "epoch": 60.108108108108105, "grad_norm": 1.0390625, "learning_rate": 4.190736535301132e-05, "loss": 0.1464, "step": 2224 }, { "epoch": 60.13513513513514, "grad_norm": 1.40625, "learning_rate": 4.1899133951970115e-05, "loss": 0.1457, "step": 2225 }, { "epoch": 60.16216216216216, "grad_norm": 1.125, "learning_rate": 4.1890899176086196e-05, "loss": 0.1642, "step": 2226 }, { "epoch": 60.189189189189186, "grad_norm": 1.5390625, "learning_rate": 4.188266102700409e-05, "loss": 0.1657, "step": 2227 }, { "epoch": 60.21621621621622, "grad_norm": 1.4375, "learning_rate": 4.1874419506369014e-05, "loss": 0.1408, "step": 2228 }, { "epoch": 60.24324324324324, "grad_norm": 1.125, "learning_rate": 4.1866174615826825e-05, "loss": 0.1371, "step": 2229 }, { "epoch": 60.270270270270274, "grad_norm": 0.87890625, "learning_rate": 4.185792635702407e-05, "loss": 0.0992, "step": 2230 }, { "epoch": 60.2972972972973, "grad_norm": 1.6015625, "learning_rate": 4.184967473160797e-05, "loss": 0.2065, "step": 2231 }, { "epoch": 60.32432432432432, "grad_norm": 1.9296875, "learning_rate": 4.184141974122643e-05, "loss": 0.1572, "step": 2232 }, { "epoch": 60.351351351351354, "grad_norm": 0.76953125, "learning_rate": 4.1833161387527986e-05, "loss": 0.0988, "step": 2233 }, { "epoch": 60.37837837837838, "grad_norm": 1.0859375, "learning_rate": 4.1824899672161895e-05, "loss": 0.1204, "step": 2234 }, { "epoch": 60.4054054054054, "grad_norm": 0.90625, "learning_rate": 4.1816634596778054e-05, "loss": 0.088, "step": 2235 }, { "epoch": 60.432432432432435, "grad_norm": 1.4609375, "learning_rate": 4.180836616302704e-05, "loss": 0.1494, "step": 2236 }, { "epoch": 60.45945945945946, "grad_norm": 0.96875, "learning_rate": 4.1800094372560113e-05, "loss": 0.0843, "step": 2237 }, { "epoch": 60.486486486486484, "grad_norm": 1.4609375, "learning_rate": 4.179181922702918e-05, "loss": 0.208, "step": 2238 }, { "epoch": 60.513513513513516, "grad_norm": 1.5625, "learning_rate": 4.178354072808683e-05, "loss": 0.2196, "step": 2239 }, { "epoch": 60.54054054054054, "grad_norm": 0.90625, "learning_rate": 4.1775258877386324e-05, "loss": 0.0943, "step": 2240 }, { "epoch": 60.567567567567565, "grad_norm": 1.4453125, "learning_rate": 4.176697367658159e-05, "loss": 0.1908, "step": 2241 }, { "epoch": 60.5945945945946, "grad_norm": 1.640625, "learning_rate": 4.175868512732722e-05, "loss": 0.1838, "step": 2242 }, { "epoch": 60.62162162162162, "grad_norm": 1.28125, "learning_rate": 4.1750393231278484e-05, "loss": 0.1656, "step": 2243 }, { "epoch": 60.648648648648646, "grad_norm": 1.1484375, "learning_rate": 4.174209799009132e-05, "loss": 0.1032, "step": 2244 }, { "epoch": 60.67567567567568, "grad_norm": 1.578125, "learning_rate": 4.1733799405422326e-05, "loss": 0.1887, "step": 2245 }, { "epoch": 60.7027027027027, "grad_norm": 1.328125, "learning_rate": 4.172549747892876e-05, "loss": 0.176, "step": 2246 }, { "epoch": 60.729729729729726, "grad_norm": 1.046875, "learning_rate": 4.171719221226857e-05, "loss": 0.1198, "step": 2247 }, { "epoch": 60.75675675675676, "grad_norm": 1.6484375, "learning_rate": 4.1708883607100365e-05, "loss": 0.1437, "step": 2248 }, { "epoch": 60.78378378378378, "grad_norm": 1.3984375, "learning_rate": 4.1700571665083405e-05, "loss": 0.2109, "step": 2249 }, { "epoch": 60.810810810810814, "grad_norm": 1.28125, "learning_rate": 4.1692256387877624e-05, "loss": 0.1223, "step": 2250 }, { "epoch": 60.83783783783784, "grad_norm": 1.3359375, "learning_rate": 4.1683937777143646e-05, "loss": 0.0926, "step": 2251 }, { "epoch": 60.86486486486486, "grad_norm": 1.515625, "learning_rate": 4.167561583454272e-05, "loss": 0.2691, "step": 2252 }, { "epoch": 60.891891891891895, "grad_norm": 2.15625, "learning_rate": 4.166729056173678e-05, "loss": 0.2975, "step": 2253 }, { "epoch": 60.91891891891892, "grad_norm": 1.5, "learning_rate": 4.1658961960388435e-05, "loss": 0.1409, "step": 2254 }, { "epoch": 60.945945945945944, "grad_norm": 1.6328125, "learning_rate": 4.1650630032160946e-05, "loss": 0.1575, "step": 2255 }, { "epoch": 60.972972972972975, "grad_norm": 1.671875, "learning_rate": 4.164229477871824e-05, "loss": 0.2092, "step": 2256 }, { "epoch": 61.0, "grad_norm": 0.78125, "learning_rate": 4.163395620172491e-05, "loss": 0.0977, "step": 2257 }, { "epoch": 61.027027027027025, "grad_norm": 1.75, "learning_rate": 4.16256143028462e-05, "loss": 0.1599, "step": 2258 }, { "epoch": 61.054054054054056, "grad_norm": 1.4140625, "learning_rate": 4.161726908374806e-05, "loss": 0.1857, "step": 2259 }, { "epoch": 61.08108108108108, "grad_norm": 0.97265625, "learning_rate": 4.1608920546097044e-05, "loss": 0.1286, "step": 2260 }, { "epoch": 61.108108108108105, "grad_norm": 1.0078125, "learning_rate": 4.160056869156041e-05, "loss": 0.104, "step": 2261 }, { "epoch": 61.13513513513514, "grad_norm": 1.609375, "learning_rate": 4.159221352180606e-05, "loss": 0.1379, "step": 2262 }, { "epoch": 61.16216216216216, "grad_norm": 1.8125, "learning_rate": 4.158385503850257e-05, "loss": 0.1926, "step": 2263 }, { "epoch": 61.189189189189186, "grad_norm": 1.15625, "learning_rate": 4.157549324331916e-05, "loss": 0.1503, "step": 2264 }, { "epoch": 61.21621621621622, "grad_norm": 0.84375, "learning_rate": 4.1567128137925736e-05, "loss": 0.0883, "step": 2265 }, { "epoch": 61.24324324324324, "grad_norm": 1.109375, "learning_rate": 4.155875972399285e-05, "loss": 0.1, "step": 2266 }, { "epoch": 61.270270270270274, "grad_norm": 2.015625, "learning_rate": 4.155038800319171e-05, "loss": 0.1791, "step": 2267 }, { "epoch": 61.2972972972973, "grad_norm": 1.6953125, "learning_rate": 4.154201297719419e-05, "loss": 0.2332, "step": 2268 }, { "epoch": 61.32432432432432, "grad_norm": 1.6796875, "learning_rate": 4.153363464767284e-05, "loss": 0.1614, "step": 2269 }, { "epoch": 61.351351351351354, "grad_norm": 1.328125, "learning_rate": 4.1525253016300836e-05, "loss": 0.1781, "step": 2270 }, { "epoch": 61.37837837837838, "grad_norm": 1.4453125, "learning_rate": 4.151686808475204e-05, "loss": 0.1858, "step": 2271 }, { "epoch": 61.4054054054054, "grad_norm": 1.8984375, "learning_rate": 4.150847985470097e-05, "loss": 0.277, "step": 2272 }, { "epoch": 61.432432432432435, "grad_norm": 1.2421875, "learning_rate": 4.15000883278228e-05, "loss": 0.107, "step": 2273 }, { "epoch": 61.45945945945946, "grad_norm": 1.2265625, "learning_rate": 4.149169350579334e-05, "loss": 0.144, "step": 2274 }, { "epoch": 61.486486486486484, "grad_norm": 1.2265625, "learning_rate": 4.14832953902891e-05, "loss": 0.1231, "step": 2275 }, { "epoch": 61.513513513513516, "grad_norm": 1.125, "learning_rate": 4.1474893982987216e-05, "loss": 0.0967, "step": 2276 }, { "epoch": 61.54054054054054, "grad_norm": 1.1640625, "learning_rate": 4.14664892855655e-05, "loss": 0.108, "step": 2277 }, { "epoch": 61.567567567567565, "grad_norm": 1.2734375, "learning_rate": 4.1458081299702405e-05, "loss": 0.1379, "step": 2278 }, { "epoch": 61.5945945945946, "grad_norm": 1.9140625, "learning_rate": 4.144967002707706e-05, "loss": 0.2079, "step": 2279 }, { "epoch": 61.62162162162162, "grad_norm": 1.96875, "learning_rate": 4.1441255469369216e-05, "loss": 0.1476, "step": 2280 }, { "epoch": 61.648648648648646, "grad_norm": 2.125, "learning_rate": 4.143283762825932e-05, "loss": 0.2044, "step": 2281 }, { "epoch": 61.67567567567568, "grad_norm": 1.1484375, "learning_rate": 4.142441650542846e-05, "loss": 0.1508, "step": 2282 }, { "epoch": 61.7027027027027, "grad_norm": 1.03125, "learning_rate": 4.1415992102558366e-05, "loss": 0.1188, "step": 2283 }, { "epoch": 61.729729729729726, "grad_norm": 1.640625, "learning_rate": 4.140756442133145e-05, "loss": 0.157, "step": 2284 }, { "epoch": 61.75675675675676, "grad_norm": 1.765625, "learning_rate": 4.139913346343073e-05, "loss": 0.2414, "step": 2285 }, { "epoch": 61.78378378378378, "grad_norm": 1.046875, "learning_rate": 4.139069923053995e-05, "loss": 0.1059, "step": 2286 }, { "epoch": 61.810810810810814, "grad_norm": 1.375, "learning_rate": 4.138226172434345e-05, "loss": 0.1715, "step": 2287 }, { "epoch": 61.83783783783784, "grad_norm": 1.3828125, "learning_rate": 4.137382094652624e-05, "loss": 0.136, "step": 2288 }, { "epoch": 61.86486486486486, "grad_norm": 1.4375, "learning_rate": 4.136537689877399e-05, "loss": 0.1105, "step": 2289 }, { "epoch": 61.891891891891895, "grad_norm": 1.359375, "learning_rate": 4.135692958277302e-05, "loss": 0.1988, "step": 2290 }, { "epoch": 61.91891891891892, "grad_norm": 0.9296875, "learning_rate": 4.13484790002103e-05, "loss": 0.1121, "step": 2291 }, { "epoch": 61.945945945945944, "grad_norm": 1.2421875, "learning_rate": 4.134002515277347e-05, "loss": 0.1451, "step": 2292 }, { "epoch": 61.972972972972975, "grad_norm": 1.4765625, "learning_rate": 4.1331568042150773e-05, "loss": 0.1621, "step": 2293 }, { "epoch": 62.0, "grad_norm": 1.3671875, "learning_rate": 4.132310767003117e-05, "loss": 0.1811, "step": 2294 }, { "epoch": 62.027027027027025, "grad_norm": 1.2734375, "learning_rate": 4.131464403810422e-05, "loss": 0.1554, "step": 2295 }, { "epoch": 62.054054054054056, "grad_norm": 1.234375, "learning_rate": 4.130617714806016e-05, "loss": 0.1575, "step": 2296 }, { "epoch": 62.08108108108108, "grad_norm": 1.75, "learning_rate": 4.129770700158987e-05, "loss": 0.2305, "step": 2297 }, { "epoch": 62.108108108108105, "grad_norm": 1.5078125, "learning_rate": 4.1289233600384876e-05, "loss": 0.131, "step": 2298 }, { "epoch": 62.13513513513514, "grad_norm": 1.0390625, "learning_rate": 4.1280756946137367e-05, "loss": 0.1122, "step": 2299 }, { "epoch": 62.16216216216216, "grad_norm": 1.171875, "learning_rate": 4.1272277040540174e-05, "loss": 0.1198, "step": 2300 }, { "epoch": 62.189189189189186, "grad_norm": 1.2109375, "learning_rate": 4.1263793885286775e-05, "loss": 0.1024, "step": 2301 }, { "epoch": 62.21621621621622, "grad_norm": 1.171875, "learning_rate": 4.1255307482071296e-05, "loss": 0.1392, "step": 2302 }, { "epoch": 62.24324324324324, "grad_norm": 1.3671875, "learning_rate": 4.124681783258852e-05, "loss": 0.1853, "step": 2303 }, { "epoch": 62.270270270270274, "grad_norm": 1.2890625, "learning_rate": 4.1238324938533865e-05, "loss": 0.1483, "step": 2304 }, { "epoch": 62.2972972972973, "grad_norm": 1.859375, "learning_rate": 4.122982880160341e-05, "loss": 0.1935, "step": 2305 }, { "epoch": 62.32432432432432, "grad_norm": 0.74609375, "learning_rate": 4.122132942349388e-05, "loss": 0.0882, "step": 2306 }, { "epoch": 62.351351351351354, "grad_norm": 0.9765625, "learning_rate": 4.121282680590265e-05, "loss": 0.1023, "step": 2307 }, { "epoch": 62.37837837837838, "grad_norm": 0.66796875, "learning_rate": 4.120432095052772e-05, "loss": 0.078, "step": 2308 }, { "epoch": 62.4054054054054, "grad_norm": 1.3046875, "learning_rate": 4.1195811859067754e-05, "loss": 0.1429, "step": 2309 }, { "epoch": 62.432432432432435, "grad_norm": 1.390625, "learning_rate": 4.118729953322207e-05, "loss": 0.1427, "step": 2310 }, { "epoch": 62.45945945945946, "grad_norm": 1.015625, "learning_rate": 4.117878397469062e-05, "loss": 0.108, "step": 2311 }, { "epoch": 62.486486486486484, "grad_norm": 0.83203125, "learning_rate": 4.1170265185174e-05, "loss": 0.0949, "step": 2312 }, { "epoch": 62.513513513513516, "grad_norm": 1.171875, "learning_rate": 4.116174316637345e-05, "loss": 0.1555, "step": 2313 }, { "epoch": 62.54054054054054, "grad_norm": 0.85546875, "learning_rate": 4.115321791999088e-05, "loss": 0.1039, "step": 2314 }, { "epoch": 62.567567567567565, "grad_norm": 1.6875, "learning_rate": 4.1144689447728805e-05, "loss": 0.2978, "step": 2315 }, { "epoch": 62.5945945945946, "grad_norm": 1.515625, "learning_rate": 4.113615775129042e-05, "loss": 0.1936, "step": 2316 }, { "epoch": 62.62162162162162, "grad_norm": 1.46875, "learning_rate": 4.112762283237954e-05, "loss": 0.1412, "step": 2317 }, { "epoch": 62.648648648648646, "grad_norm": 1.1875, "learning_rate": 4.1119084692700624e-05, "loss": 0.1732, "step": 2318 }, { "epoch": 62.67567567567568, "grad_norm": 1.3125, "learning_rate": 4.1110543333958796e-05, "loss": 0.1525, "step": 2319 }, { "epoch": 62.7027027027027, "grad_norm": 1.625, "learning_rate": 4.1101998757859794e-05, "loss": 0.1482, "step": 2320 }, { "epoch": 62.729729729729726, "grad_norm": 1.140625, "learning_rate": 4.109345096611004e-05, "loss": 0.1264, "step": 2321 }, { "epoch": 62.75675675675676, "grad_norm": 1.125, "learning_rate": 4.1084899960416535e-05, "loss": 0.1257, "step": 2322 }, { "epoch": 62.78378378378378, "grad_norm": 1.7578125, "learning_rate": 4.107634574248698e-05, "loss": 0.1824, "step": 2323 }, { "epoch": 62.810810810810814, "grad_norm": 1.0859375, "learning_rate": 4.10677883140297e-05, "loss": 0.1047, "step": 2324 }, { "epoch": 62.83783783783784, "grad_norm": 1.5546875, "learning_rate": 4.1059227676753645e-05, "loss": 0.1318, "step": 2325 }, { "epoch": 62.86486486486486, "grad_norm": 1.4140625, "learning_rate": 4.105066383236841e-05, "loss": 0.2044, "step": 2326 }, { "epoch": 62.891891891891895, "grad_norm": 1.5078125, "learning_rate": 4.1042096782584264e-05, "loss": 0.1645, "step": 2327 }, { "epoch": 62.91891891891892, "grad_norm": 1.6796875, "learning_rate": 4.1033526529112064e-05, "loss": 0.1926, "step": 2328 }, { "epoch": 62.945945945945944, "grad_norm": 1.15625, "learning_rate": 4.102495307366335e-05, "loss": 0.1301, "step": 2329 }, { "epoch": 62.972972972972975, "grad_norm": 1.3671875, "learning_rate": 4.101637641795028e-05, "loss": 0.141, "step": 2330 }, { "epoch": 63.0, "grad_norm": 1.1640625, "learning_rate": 4.100779656368566e-05, "loss": 0.1294, "step": 2331 }, { "epoch": 63.027027027027025, "grad_norm": 0.78125, "learning_rate": 4.099921351258292e-05, "loss": 0.1185, "step": 2332 }, { "epoch": 63.054054054054056, "grad_norm": 1.09375, "learning_rate": 4.099062726635614e-05, "loss": 0.1314, "step": 2333 }, { "epoch": 63.08108108108108, "grad_norm": 0.6484375, "learning_rate": 4.098203782672004e-05, "loss": 0.0742, "step": 2334 }, { "epoch": 63.108108108108105, "grad_norm": 1.328125, "learning_rate": 4.097344519538998e-05, "loss": 0.1973, "step": 2335 }, { "epoch": 63.13513513513514, "grad_norm": 1.0546875, "learning_rate": 4.096484937408195e-05, "loss": 0.1114, "step": 2336 }, { "epoch": 63.16216216216216, "grad_norm": 1.171875, "learning_rate": 4.0956250364512575e-05, "loss": 0.1686, "step": 2337 }, { "epoch": 63.189189189189186, "grad_norm": 1.2265625, "learning_rate": 4.094764816839912e-05, "loss": 0.1534, "step": 2338 }, { "epoch": 63.21621621621622, "grad_norm": 1.3828125, "learning_rate": 4.09390427874595e-05, "loss": 0.1732, "step": 2339 }, { "epoch": 63.24324324324324, "grad_norm": 1.2109375, "learning_rate": 4.093043422341223e-05, "loss": 0.1287, "step": 2340 }, { "epoch": 63.270270270270274, "grad_norm": 1.1796875, "learning_rate": 4.09218224779765e-05, "loss": 0.1089, "step": 2341 }, { "epoch": 63.2972972972973, "grad_norm": 1.2265625, "learning_rate": 4.091320755287212e-05, "loss": 0.159, "step": 2342 }, { "epoch": 63.32432432432432, "grad_norm": 1.3828125, "learning_rate": 4.090458944981952e-05, "loss": 0.2053, "step": 2343 }, { "epoch": 63.351351351351354, "grad_norm": 0.87890625, "learning_rate": 4.08959681705398e-05, "loss": 0.0831, "step": 2344 }, { "epoch": 63.37837837837838, "grad_norm": 1.1640625, "learning_rate": 4.0887343716754653e-05, "loss": 0.1546, "step": 2345 }, { "epoch": 63.4054054054054, "grad_norm": 0.94921875, "learning_rate": 4.0878716090186447e-05, "loss": 0.0838, "step": 2346 }, { "epoch": 63.432432432432435, "grad_norm": 1.1015625, "learning_rate": 4.0870085292558145e-05, "loss": 0.1945, "step": 2347 }, { "epoch": 63.45945945945946, "grad_norm": 0.9921875, "learning_rate": 4.086145132559337e-05, "loss": 0.0933, "step": 2348 }, { "epoch": 63.486486486486484, "grad_norm": 1.1328125, "learning_rate": 4.085281419101636e-05, "loss": 0.1263, "step": 2349 }, { "epoch": 63.513513513513516, "grad_norm": 1.78125, "learning_rate": 4.084417389055199e-05, "loss": 0.1884, "step": 2350 }, { "epoch": 63.54054054054054, "grad_norm": 0.96875, "learning_rate": 4.08355304259258e-05, "loss": 0.1153, "step": 2351 }, { "epoch": 63.567567567567565, "grad_norm": 1.3046875, "learning_rate": 4.082688379886392e-05, "loss": 0.1373, "step": 2352 }, { "epoch": 63.5945945945946, "grad_norm": 1.203125, "learning_rate": 4.081823401109312e-05, "loss": 0.1444, "step": 2353 }, { "epoch": 63.62162162162162, "grad_norm": 2.0625, "learning_rate": 4.080958106434079e-05, "loss": 0.1282, "step": 2354 }, { "epoch": 63.648648648648646, "grad_norm": 1.6328125, "learning_rate": 4.0800924960335e-05, "loss": 0.2118, "step": 2355 }, { "epoch": 63.67567567567568, "grad_norm": 0.87109375, "learning_rate": 4.07922657008044e-05, "loss": 0.097, "step": 2356 }, { "epoch": 63.7027027027027, "grad_norm": 1.421875, "learning_rate": 4.078360328747829e-05, "loss": 0.1331, "step": 2357 }, { "epoch": 63.729729729729726, "grad_norm": 1.84375, "learning_rate": 4.07749377220866e-05, "loss": 0.1502, "step": 2358 }, { "epoch": 63.75675675675676, "grad_norm": 1.234375, "learning_rate": 4.076626900635988e-05, "loss": 0.1398, "step": 2359 }, { "epoch": 63.78378378378378, "grad_norm": 1.109375, "learning_rate": 4.0757597142029335e-05, "loss": 0.1344, "step": 2360 }, { "epoch": 63.810810810810814, "grad_norm": 1.2421875, "learning_rate": 4.074892213082676e-05, "loss": 0.1513, "step": 2361 }, { "epoch": 63.83783783783784, "grad_norm": 1.296875, "learning_rate": 4.07402439744846e-05, "loss": 0.0902, "step": 2362 }, { "epoch": 63.86486486486486, "grad_norm": 1.8984375, "learning_rate": 4.073156267473595e-05, "loss": 0.3685, "step": 2363 }, { "epoch": 63.891891891891895, "grad_norm": 1.2890625, "learning_rate": 4.0722878233314476e-05, "loss": 0.1609, "step": 2364 }, { "epoch": 63.91891891891892, "grad_norm": 1.078125, "learning_rate": 4.0714190651954534e-05, "loss": 0.0904, "step": 2365 }, { "epoch": 63.945945945945944, "grad_norm": 1.390625, "learning_rate": 4.070549993239106e-05, "loss": 0.1708, "step": 2366 }, { "epoch": 63.972972972972975, "grad_norm": 1.4140625, "learning_rate": 4.069680607635964e-05, "loss": 0.2065, "step": 2367 }, { "epoch": 64.0, "grad_norm": 1.1015625, "learning_rate": 4.0688109085596486e-05, "loss": 0.1062, "step": 2368 }, { "epoch": 64.02702702702703, "grad_norm": 1.0078125, "learning_rate": 4.067940896183843e-05, "loss": 0.1285, "step": 2369 }, { "epoch": 64.05405405405405, "grad_norm": 1.015625, "learning_rate": 4.067070570682292e-05, "loss": 0.1045, "step": 2370 }, { "epoch": 64.08108108108108, "grad_norm": 1.0859375, "learning_rate": 4.066199932228805e-05, "loss": 0.1008, "step": 2371 }, { "epoch": 64.10810810810811, "grad_norm": 1.4375, "learning_rate": 4.065328980997253e-05, "loss": 0.1548, "step": 2372 }, { "epoch": 64.13513513513513, "grad_norm": 1.3828125, "learning_rate": 4.0644577171615696e-05, "loss": 0.1598, "step": 2373 }, { "epoch": 64.16216216216216, "grad_norm": 1.2578125, "learning_rate": 4.063586140895749e-05, "loss": 0.1259, "step": 2374 }, { "epoch": 64.1891891891892, "grad_norm": 0.6875, "learning_rate": 4.062714252373851e-05, "loss": 0.0894, "step": 2375 }, { "epoch": 64.21621621621621, "grad_norm": 1.59375, "learning_rate": 4.061842051769995e-05, "loss": 0.1566, "step": 2376 }, { "epoch": 64.24324324324324, "grad_norm": 1.109375, "learning_rate": 4.0609695392583655e-05, "loss": 0.154, "step": 2377 }, { "epoch": 64.27027027027027, "grad_norm": 1.265625, "learning_rate": 4.0600967150132066e-05, "loss": 0.177, "step": 2378 }, { "epoch": 64.29729729729729, "grad_norm": 1.4453125, "learning_rate": 4.0592235792088254e-05, "loss": 0.1678, "step": 2379 }, { "epoch": 64.32432432432432, "grad_norm": 1.390625, "learning_rate": 4.0583501320195915e-05, "loss": 0.1486, "step": 2380 }, { "epoch": 64.35135135135135, "grad_norm": 1.2265625, "learning_rate": 4.057476373619938e-05, "loss": 0.1722, "step": 2381 }, { "epoch": 64.37837837837837, "grad_norm": 1.0234375, "learning_rate": 4.0566023041843584e-05, "loss": 0.1355, "step": 2382 }, { "epoch": 64.4054054054054, "grad_norm": 1.453125, "learning_rate": 4.055727923887408e-05, "loss": 0.139, "step": 2383 }, { "epoch": 64.43243243243244, "grad_norm": 1.765625, "learning_rate": 4.054853232903705e-05, "loss": 0.1247, "step": 2384 }, { "epoch": 64.45945945945945, "grad_norm": 1.921875, "learning_rate": 4.053978231407931e-05, "loss": 0.2032, "step": 2385 }, { "epoch": 64.48648648648648, "grad_norm": 1.6875, "learning_rate": 4.053102919574826e-05, "loss": 0.2999, "step": 2386 }, { "epoch": 64.51351351351352, "grad_norm": 1.5078125, "learning_rate": 4.052227297579197e-05, "loss": 0.1306, "step": 2387 }, { "epoch": 64.54054054054055, "grad_norm": 1.6171875, "learning_rate": 4.051351365595908e-05, "loss": 0.121, "step": 2388 }, { "epoch": 64.56756756756756, "grad_norm": 1.046875, "learning_rate": 4.0504751237998875e-05, "loss": 0.142, "step": 2389 }, { "epoch": 64.5945945945946, "grad_norm": 1.203125, "learning_rate": 4.0495985723661256e-05, "loss": 0.1872, "step": 2390 }, { "epoch": 64.62162162162163, "grad_norm": 1.1015625, "learning_rate": 4.048721711469675e-05, "loss": 0.1144, "step": 2391 }, { "epoch": 64.64864864864865, "grad_norm": 1.3125, "learning_rate": 4.047844541285647e-05, "loss": 0.123, "step": 2392 }, { "epoch": 64.67567567567568, "grad_norm": 1.21875, "learning_rate": 4.04696706198922e-05, "loss": 0.1188, "step": 2393 }, { "epoch": 64.70270270270271, "grad_norm": 1.1640625, "learning_rate": 4.046089273755628e-05, "loss": 0.1298, "step": 2394 }, { "epoch": 64.72972972972973, "grad_norm": 1.328125, "learning_rate": 4.0452111767601716e-05, "loss": 0.1527, "step": 2395 }, { "epoch": 64.75675675675676, "grad_norm": 1.40625, "learning_rate": 4.044332771178211e-05, "loss": 0.1877, "step": 2396 }, { "epoch": 64.78378378378379, "grad_norm": 1.8046875, "learning_rate": 4.043454057185168e-05, "loss": 0.1547, "step": 2397 }, { "epoch": 64.8108108108108, "grad_norm": 1.046875, "learning_rate": 4.0425750349565264e-05, "loss": 0.151, "step": 2398 }, { "epoch": 64.83783783783784, "grad_norm": 1.3984375, "learning_rate": 4.0416957046678304e-05, "loss": 0.0949, "step": 2399 }, { "epoch": 64.86486486486487, "grad_norm": 1.2890625, "learning_rate": 4.040816066494688e-05, "loss": 0.0968, "step": 2400 }, { "epoch": 64.89189189189189, "grad_norm": 0.90234375, "learning_rate": 4.0399361206127675e-05, "loss": 0.0904, "step": 2401 }, { "epoch": 64.91891891891892, "grad_norm": 1.3828125, "learning_rate": 4.0390558671977974e-05, "loss": 0.1944, "step": 2402 }, { "epoch": 64.94594594594595, "grad_norm": 1.5078125, "learning_rate": 4.0381753064255694e-05, "loss": 0.1637, "step": 2403 }, { "epoch": 64.97297297297297, "grad_norm": 1.390625, "learning_rate": 4.0372944384719364e-05, "loss": 0.1107, "step": 2404 }, { "epoch": 65.0, "grad_norm": 1.9765625, "learning_rate": 4.0364132635128116e-05, "loss": 0.1876, "step": 2405 }, { "epoch": 65.02702702702703, "grad_norm": 1.2421875, "learning_rate": 4.03553178172417e-05, "loss": 0.135, "step": 2406 }, { "epoch": 65.05405405405405, "grad_norm": 1.140625, "learning_rate": 4.034649993282048e-05, "loss": 0.1347, "step": 2407 }, { "epoch": 65.08108108108108, "grad_norm": 1.75, "learning_rate": 4.033767898362544e-05, "loss": 0.2125, "step": 2408 }, { "epoch": 65.10810810810811, "grad_norm": 1.421875, "learning_rate": 4.032885497141816e-05, "loss": 0.1687, "step": 2409 }, { "epoch": 65.13513513513513, "grad_norm": 1.421875, "learning_rate": 4.0320027897960845e-05, "loss": 0.1749, "step": 2410 }, { "epoch": 65.16216216216216, "grad_norm": 1.1953125, "learning_rate": 4.03111977650163e-05, "loss": 0.1507, "step": 2411 }, { "epoch": 65.1891891891892, "grad_norm": 1.484375, "learning_rate": 4.030236457434795e-05, "loss": 0.1479, "step": 2412 }, { "epoch": 65.21621621621621, "grad_norm": 1.515625, "learning_rate": 4.029352832771983e-05, "loss": 0.1982, "step": 2413 }, { "epoch": 65.24324324324324, "grad_norm": 1.2578125, "learning_rate": 4.028468902689659e-05, "loss": 0.1192, "step": 2414 }, { "epoch": 65.27027027027027, "grad_norm": 1.5, "learning_rate": 4.027584667364347e-05, "loss": 0.1769, "step": 2415 }, { "epoch": 65.29729729729729, "grad_norm": 0.953125, "learning_rate": 4.0267001269726334e-05, "loss": 0.1157, "step": 2416 }, { "epoch": 65.32432432432432, "grad_norm": 1.1015625, "learning_rate": 4.0258152816911665e-05, "loss": 0.1359, "step": 2417 }, { "epoch": 65.35135135135135, "grad_norm": 1.6328125, "learning_rate": 4.0249301316966536e-05, "loss": 0.2015, "step": 2418 }, { "epoch": 65.37837837837837, "grad_norm": 1.015625, "learning_rate": 4.024044677165864e-05, "loss": 0.1024, "step": 2419 }, { "epoch": 65.4054054054054, "grad_norm": 1.203125, "learning_rate": 4.023158918275627e-05, "loss": 0.1382, "step": 2420 }, { "epoch": 65.43243243243244, "grad_norm": 1.3125, "learning_rate": 4.0222728552028326e-05, "loss": 0.1902, "step": 2421 }, { "epoch": 65.45945945945945, "grad_norm": 1.0078125, "learning_rate": 4.0213864881244336e-05, "loss": 0.0899, "step": 2422 }, { "epoch": 65.48648648648648, "grad_norm": 1.5625, "learning_rate": 4.0204998172174415e-05, "loss": 0.1095, "step": 2423 }, { "epoch": 65.51351351351352, "grad_norm": 1.109375, "learning_rate": 4.019612842658929e-05, "loss": 0.1156, "step": 2424 }, { "epoch": 65.54054054054055, "grad_norm": 1.3671875, "learning_rate": 4.018725564626028e-05, "loss": 0.1779, "step": 2425 }, { "epoch": 65.56756756756756, "grad_norm": 1.1953125, "learning_rate": 4.0178379832959336e-05, "loss": 0.1213, "step": 2426 }, { "epoch": 65.5945945945946, "grad_norm": 0.953125, "learning_rate": 4.0169500988459005e-05, "loss": 0.1358, "step": 2427 }, { "epoch": 65.62162162162163, "grad_norm": 1.0234375, "learning_rate": 4.016061911453244e-05, "loss": 0.1023, "step": 2428 }, { "epoch": 65.64864864864865, "grad_norm": 1.046875, "learning_rate": 4.01517342129534e-05, "loss": 0.0951, "step": 2429 }, { "epoch": 65.67567567567568, "grad_norm": 0.90625, "learning_rate": 4.014284628549622e-05, "loss": 0.0982, "step": 2430 }, { "epoch": 65.70270270270271, "grad_norm": 1.53125, "learning_rate": 4.013395533393588e-05, "loss": 0.1672, "step": 2431 }, { "epoch": 65.72972972972973, "grad_norm": 1.078125, "learning_rate": 4.0125061360047956e-05, "loss": 0.1027, "step": 2432 }, { "epoch": 65.75675675675676, "grad_norm": 1.1875, "learning_rate": 4.011616436560861e-05, "loss": 0.1803, "step": 2433 }, { "epoch": 65.78378378378379, "grad_norm": 1.0703125, "learning_rate": 4.0107264352394617e-05, "loss": 0.1318, "step": 2434 }, { "epoch": 65.8108108108108, "grad_norm": 0.94140625, "learning_rate": 4.0098361322183356e-05, "loss": 0.1085, "step": 2435 }, { "epoch": 65.83783783783784, "grad_norm": 1.171875, "learning_rate": 4.008945527675281e-05, "loss": 0.1194, "step": 2436 }, { "epoch": 65.86486486486487, "grad_norm": 1.390625, "learning_rate": 4.0080546217881556e-05, "loss": 0.1549, "step": 2437 }, { "epoch": 65.89189189189189, "grad_norm": 1.09375, "learning_rate": 4.007163414734878e-05, "loss": 0.1254, "step": 2438 }, { "epoch": 65.91891891891892, "grad_norm": 1.0546875, "learning_rate": 4.0062719066934275e-05, "loss": 0.1065, "step": 2439 }, { "epoch": 65.94594594594595, "grad_norm": 1.6796875, "learning_rate": 4.005380097841841e-05, "loss": 0.2141, "step": 2440 }, { "epoch": 65.97297297297297, "grad_norm": 1.171875, "learning_rate": 4.00448798835822e-05, "loss": 0.1269, "step": 2441 }, { "epoch": 66.0, "grad_norm": 1.4375, "learning_rate": 4.0035955784207203e-05, "loss": 0.1895, "step": 2442 }, { "epoch": 66.02702702702703, "grad_norm": 0.96875, "learning_rate": 4.002702868207563e-05, "loss": 0.1051, "step": 2443 }, { "epoch": 66.05405405405405, "grad_norm": 1.7734375, "learning_rate": 4.001809857897025e-05, "loss": 0.2045, "step": 2444 }, { "epoch": 66.08108108108108, "grad_norm": 1.75, "learning_rate": 4.000916547667447e-05, "loss": 0.1898, "step": 2445 }, { "epoch": 66.10810810810811, "grad_norm": 1.5, "learning_rate": 4.000022937697227e-05, "loss": 0.2027, "step": 2446 }, { "epoch": 66.13513513513513, "grad_norm": 0.70703125, "learning_rate": 3.9991290281648217e-05, "loss": 0.0765, "step": 2447 }, { "epoch": 66.16216216216216, "grad_norm": 1.3515625, "learning_rate": 3.998234819248751e-05, "loss": 0.1383, "step": 2448 }, { "epoch": 66.1891891891892, "grad_norm": 0.96875, "learning_rate": 3.997340311127593e-05, "loss": 0.0869, "step": 2449 }, { "epoch": 66.21621621621621, "grad_norm": 1.3984375, "learning_rate": 3.996445503979984e-05, "loss": 0.1281, "step": 2450 }, { "epoch": 66.24324324324324, "grad_norm": 1.1171875, "learning_rate": 3.995550397984624e-05, "loss": 0.1041, "step": 2451 }, { "epoch": 66.27027027027027, "grad_norm": 1.1875, "learning_rate": 3.994654993320268e-05, "loss": 0.1214, "step": 2452 }, { "epoch": 66.29729729729729, "grad_norm": 1.546875, "learning_rate": 3.9937592901657336e-05, "loss": 0.1957, "step": 2453 }, { "epoch": 66.32432432432432, "grad_norm": 1.4375, "learning_rate": 3.9928632886998974e-05, "loss": 0.1403, "step": 2454 }, { "epoch": 66.35135135135135, "grad_norm": 2.03125, "learning_rate": 3.991966989101696e-05, "loss": 0.1218, "step": 2455 }, { "epoch": 66.37837837837837, "grad_norm": 1.5859375, "learning_rate": 3.9910703915501235e-05, "loss": 0.1913, "step": 2456 }, { "epoch": 66.4054054054054, "grad_norm": 1.5078125, "learning_rate": 3.990173496224236e-05, "loss": 0.1455, "step": 2457 }, { "epoch": 66.43243243243244, "grad_norm": 1.96875, "learning_rate": 3.989276303303148e-05, "loss": 0.2653, "step": 2458 }, { "epoch": 66.45945945945945, "grad_norm": 1.4296875, "learning_rate": 3.988378812966034e-05, "loss": 0.1101, "step": 2459 }, { "epoch": 66.48648648648648, "grad_norm": 1.296875, "learning_rate": 3.987481025392126e-05, "loss": 0.1155, "step": 2460 }, { "epoch": 66.51351351351352, "grad_norm": 1.2734375, "learning_rate": 3.986582940760717e-05, "loss": 0.156, "step": 2461 }, { "epoch": 66.54054054054055, "grad_norm": 2.078125, "learning_rate": 3.985684559251159e-05, "loss": 0.1469, "step": 2462 }, { "epoch": 66.56756756756756, "grad_norm": 1.9765625, "learning_rate": 3.9847858810428656e-05, "loss": 0.1086, "step": 2463 }, { "epoch": 66.5945945945946, "grad_norm": 0.76171875, "learning_rate": 3.983886906315304e-05, "loss": 0.0882, "step": 2464 }, { "epoch": 66.62162162162163, "grad_norm": 1.375, "learning_rate": 3.9829876352480066e-05, "loss": 0.1772, "step": 2465 }, { "epoch": 66.64864864864865, "grad_norm": 0.7421875, "learning_rate": 3.9820880680205616e-05, "loss": 0.0819, "step": 2466 }, { "epoch": 66.67567567567568, "grad_norm": 1.1953125, "learning_rate": 3.981188204812616e-05, "loss": 0.1057, "step": 2467 }, { "epoch": 66.70270270270271, "grad_norm": 1.0703125, "learning_rate": 3.980288045803878e-05, "loss": 0.1415, "step": 2468 }, { "epoch": 66.72972972972973, "grad_norm": 1.1171875, "learning_rate": 3.979387591174115e-05, "loss": 0.1116, "step": 2469 }, { "epoch": 66.75675675675676, "grad_norm": 1.171875, "learning_rate": 3.9784868411031504e-05, "loss": 0.1501, "step": 2470 }, { "epoch": 66.78378378378379, "grad_norm": 0.83984375, "learning_rate": 3.9775857957708693e-05, "loss": 0.0979, "step": 2471 }, { "epoch": 66.8108108108108, "grad_norm": 1.921875, "learning_rate": 3.9766844553572156e-05, "loss": 0.2711, "step": 2472 }, { "epoch": 66.83783783783784, "grad_norm": 1.4453125, "learning_rate": 3.975782820042192e-05, "loss": 0.1684, "step": 2473 }, { "epoch": 66.86486486486487, "grad_norm": 1.3125, "learning_rate": 3.974880890005857e-05, "loss": 0.1252, "step": 2474 }, { "epoch": 66.89189189189189, "grad_norm": 1.0078125, "learning_rate": 3.9739786654283326e-05, "loss": 0.1047, "step": 2475 }, { "epoch": 66.91891891891892, "grad_norm": 1.5234375, "learning_rate": 3.973076146489798e-05, "loss": 0.1687, "step": 2476 }, { "epoch": 66.94594594594595, "grad_norm": 1.28125, "learning_rate": 3.97217333337049e-05, "loss": 0.1234, "step": 2477 }, { "epoch": 66.97297297297297, "grad_norm": 1.2734375, "learning_rate": 3.9712702262507046e-05, "loss": 0.1636, "step": 2478 }, { "epoch": 67.0, "grad_norm": 1.5234375, "learning_rate": 3.9703668253107976e-05, "loss": 0.1452, "step": 2479 }, { "epoch": 67.02702702702703, "grad_norm": 1.0546875, "learning_rate": 3.969463130731183e-05, "loss": 0.1168, "step": 2480 }, { "epoch": 67.05405405405405, "grad_norm": 1.125, "learning_rate": 3.968559142692333e-05, "loss": 0.1682, "step": 2481 }, { "epoch": 67.08108108108108, "grad_norm": 0.98828125, "learning_rate": 3.967654861374777e-05, "loss": 0.1216, "step": 2482 }, { "epoch": 67.10810810810811, "grad_norm": 1.734375, "learning_rate": 3.9667502869591066e-05, "loss": 0.2258, "step": 2483 }, { "epoch": 67.13513513513513, "grad_norm": 1.3359375, "learning_rate": 3.9658454196259696e-05, "loss": 0.1603, "step": 2484 }, { "epoch": 67.16216216216216, "grad_norm": 1.265625, "learning_rate": 3.9649402595560716e-05, "loss": 0.1198, "step": 2485 }, { "epoch": 67.1891891891892, "grad_norm": 1.4140625, "learning_rate": 3.964034806930178e-05, "loss": 0.164, "step": 2486 }, { "epoch": 67.21621621621621, "grad_norm": 1.4609375, "learning_rate": 3.963129061929113e-05, "loss": 0.1975, "step": 2487 }, { "epoch": 67.24324324324324, "grad_norm": 0.8515625, "learning_rate": 3.9622230247337575e-05, "loss": 0.0915, "step": 2488 }, { "epoch": 67.27027027027027, "grad_norm": 1.53125, "learning_rate": 3.961316695525052e-05, "loss": 0.1524, "step": 2489 }, { "epoch": 67.29729729729729, "grad_norm": 1.40625, "learning_rate": 3.960410074483997e-05, "loss": 0.1263, "step": 2490 }, { "epoch": 67.32432432432432, "grad_norm": 1.296875, "learning_rate": 3.959503161791646e-05, "loss": 0.1264, "step": 2491 }, { "epoch": 67.35135135135135, "grad_norm": 1.34375, "learning_rate": 3.9585959576291156e-05, "loss": 0.1713, "step": 2492 }, { "epoch": 67.37837837837837, "grad_norm": 1.2109375, "learning_rate": 3.957688462177579e-05, "loss": 0.092, "step": 2493 }, { "epoch": 67.4054054054054, "grad_norm": 1.9296875, "learning_rate": 3.956780675618268e-05, "loss": 0.2175, "step": 2494 }, { "epoch": 67.43243243243244, "grad_norm": 1.546875, "learning_rate": 3.9558725981324715e-05, "loss": 0.1268, "step": 2495 }, { "epoch": 67.45945945945945, "grad_norm": 1.234375, "learning_rate": 3.954964229901538e-05, "loss": 0.123, "step": 2496 }, { "epoch": 67.48648648648648, "grad_norm": 1.3046875, "learning_rate": 3.9540555711068725e-05, "loss": 0.1692, "step": 2497 }, { "epoch": 67.51351351351352, "grad_norm": 1.21875, "learning_rate": 3.95314662192994e-05, "loss": 0.1864, "step": 2498 }, { "epoch": 67.54054054054055, "grad_norm": 1.953125, "learning_rate": 3.95223738255226e-05, "loss": 0.153, "step": 2499 }, { "epoch": 67.56756756756756, "grad_norm": 1.75, "learning_rate": 3.9513278531554143e-05, "loss": 0.1457, "step": 2500 }, { "epoch": 67.5945945945946, "grad_norm": 1.7734375, "learning_rate": 3.9504180339210396e-05, "loss": 0.2265, "step": 2501 }, { "epoch": 67.62162162162163, "grad_norm": 1.734375, "learning_rate": 3.949507925030832e-05, "loss": 0.1776, "step": 2502 }, { "epoch": 67.64864864864865, "grad_norm": 0.78125, "learning_rate": 3.948597526666544e-05, "loss": 0.0776, "step": 2503 }, { "epoch": 67.67567567567568, "grad_norm": 1.1953125, "learning_rate": 3.947686839009987e-05, "loss": 0.1598, "step": 2504 }, { "epoch": 67.70270270270271, "grad_norm": 1.2890625, "learning_rate": 3.94677586224303e-05, "loss": 0.1483, "step": 2505 }, { "epoch": 67.72972972972973, "grad_norm": 1.078125, "learning_rate": 3.9458645965476e-05, "loss": 0.1174, "step": 2506 }, { "epoch": 67.75675675675676, "grad_norm": 1.6484375, "learning_rate": 3.944953042105682e-05, "loss": 0.1778, "step": 2507 }, { "epoch": 67.78378378378379, "grad_norm": 1.484375, "learning_rate": 3.944041199099317e-05, "loss": 0.1375, "step": 2508 }, { "epoch": 67.8108108108108, "grad_norm": 1.3828125, "learning_rate": 3.9431290677106045e-05, "loss": 0.1497, "step": 2509 }, { "epoch": 67.83783783783784, "grad_norm": 2.015625, "learning_rate": 3.9422166481217026e-05, "loss": 0.1946, "step": 2510 }, { "epoch": 67.86486486486487, "grad_norm": 1.734375, "learning_rate": 3.9413039405148256e-05, "loss": 0.1502, "step": 2511 }, { "epoch": 67.89189189189189, "grad_norm": 1.15625, "learning_rate": 3.9403909450722456e-05, "loss": 0.1265, "step": 2512 }, { "epoch": 67.91891891891892, "grad_norm": 1.6171875, "learning_rate": 3.939477661976293e-05, "loss": 0.1508, "step": 2513 }, { "epoch": 67.94594594594595, "grad_norm": 0.74609375, "learning_rate": 3.938564091409355e-05, "loss": 0.0819, "step": 2514 }, { "epoch": 67.97297297297297, "grad_norm": 1.2265625, "learning_rate": 3.937650233553875e-05, "loss": 0.1, "step": 2515 }, { "epoch": 68.0, "grad_norm": 1.5390625, "learning_rate": 3.9367360885923566e-05, "loss": 0.1415, "step": 2516 }, { "epoch": 68.02702702702703, "grad_norm": 1.046875, "learning_rate": 3.935821656707359e-05, "loss": 0.0808, "step": 2517 }, { "epoch": 68.05405405405405, "grad_norm": 1.0234375, "learning_rate": 3.934906938081499e-05, "loss": 0.0873, "step": 2518 }, { "epoch": 68.08108108108108, "grad_norm": 0.9375, "learning_rate": 3.93399193289745e-05, "loss": 0.0849, "step": 2519 }, { "epoch": 68.10810810810811, "grad_norm": 1.3515625, "learning_rate": 3.933076641337943e-05, "loss": 0.1503, "step": 2520 }, { "epoch": 68.13513513513513, "grad_norm": 1.8046875, "learning_rate": 3.932161063585767e-05, "loss": 0.21, "step": 2521 }, { "epoch": 68.16216216216216, "grad_norm": 1.46875, "learning_rate": 3.931245199823767e-05, "loss": 0.177, "step": 2522 }, { "epoch": 68.1891891891892, "grad_norm": 1.2734375, "learning_rate": 3.9303290502348454e-05, "loss": 0.1411, "step": 2523 }, { "epoch": 68.21621621621621, "grad_norm": 1.390625, "learning_rate": 3.929412615001963e-05, "loss": 0.1001, "step": 2524 }, { "epoch": 68.24324324324324, "grad_norm": 1.25, "learning_rate": 3.928495894308136e-05, "loss": 0.1694, "step": 2525 }, { "epoch": 68.27027027027027, "grad_norm": 0.82421875, "learning_rate": 3.9275788883364374e-05, "loss": 0.0783, "step": 2526 }, { "epoch": 68.29729729729729, "grad_norm": 1.421875, "learning_rate": 3.926661597269999e-05, "loss": 0.1141, "step": 2527 }, { "epoch": 68.32432432432432, "grad_norm": 1.1796875, "learning_rate": 3.9257440212920085e-05, "loss": 0.0909, "step": 2528 }, { "epoch": 68.35135135135135, "grad_norm": 0.93359375, "learning_rate": 3.9248261605857095e-05, "loss": 0.0835, "step": 2529 }, { "epoch": 68.37837837837837, "grad_norm": 0.828125, "learning_rate": 3.923908015334404e-05, "loss": 0.0961, "step": 2530 }, { "epoch": 68.4054054054054, "grad_norm": 1.9765625, "learning_rate": 3.92298958572145e-05, "loss": 0.2145, "step": 2531 }, { "epoch": 68.43243243243244, "grad_norm": 1.5859375, "learning_rate": 3.922070871930263e-05, "loss": 0.156, "step": 2532 }, { "epoch": 68.45945945945945, "grad_norm": 1.703125, "learning_rate": 3.921151874144315e-05, "loss": 0.2472, "step": 2533 }, { "epoch": 68.48648648648648, "grad_norm": 1.1875, "learning_rate": 3.9202325925471347e-05, "loss": 0.1478, "step": 2534 }, { "epoch": 68.51351351351352, "grad_norm": 1.5078125, "learning_rate": 3.919313027322306e-05, "loss": 0.1766, "step": 2535 }, { "epoch": 68.54054054054055, "grad_norm": 1.3046875, "learning_rate": 3.918393178653472e-05, "loss": 0.2001, "step": 2536 }, { "epoch": 68.56756756756756, "grad_norm": 1.890625, "learning_rate": 3.9174730467243294e-05, "loss": 0.2355, "step": 2537 }, { "epoch": 68.5945945945946, "grad_norm": 1.328125, "learning_rate": 3.9165526317186355e-05, "loss": 0.1082, "step": 2538 }, { "epoch": 68.62162162162163, "grad_norm": 1.53125, "learning_rate": 3.9156319338202e-05, "loss": 0.189, "step": 2539 }, { "epoch": 68.64864864864865, "grad_norm": 1.0625, "learning_rate": 3.914710953212892e-05, "loss": 0.1235, "step": 2540 }, { "epoch": 68.67567567567568, "grad_norm": 1.3125, "learning_rate": 3.913789690080636e-05, "loss": 0.1108, "step": 2541 }, { "epoch": 68.70270270270271, "grad_norm": 1.6171875, "learning_rate": 3.9128681446074125e-05, "loss": 0.1077, "step": 2542 }, { "epoch": 68.72972972972973, "grad_norm": 0.87109375, "learning_rate": 3.911946316977259e-05, "loss": 0.0884, "step": 2543 }, { "epoch": 68.75675675675676, "grad_norm": 1.2265625, "learning_rate": 3.911024207374269e-05, "loss": 0.152, "step": 2544 }, { "epoch": 68.78378378378379, "grad_norm": 1.125, "learning_rate": 3.910101815982592e-05, "loss": 0.1073, "step": 2545 }, { "epoch": 68.8108108108108, "grad_norm": 1.4375, "learning_rate": 3.9091791429864355e-05, "loss": 0.1128, "step": 2546 }, { "epoch": 68.83783783783784, "grad_norm": 1.296875, "learning_rate": 3.908256188570061e-05, "loss": 0.1172, "step": 2547 }, { "epoch": 68.86486486486487, "grad_norm": 1.1953125, "learning_rate": 3.9073329529177874e-05, "loss": 0.1161, "step": 2548 }, { "epoch": 68.89189189189189, "grad_norm": 1.34375, "learning_rate": 3.906409436213989e-05, "loss": 0.1675, "step": 2549 }, { "epoch": 68.91891891891892, "grad_norm": 1.2109375, "learning_rate": 3.905485638643098e-05, "loss": 0.1453, "step": 2550 }, { "epoch": 68.94594594594595, "grad_norm": 0.859375, "learning_rate": 3.9045615603896e-05, "loss": 0.0865, "step": 2551 }, { "epoch": 68.97297297297297, "grad_norm": 1.5625, "learning_rate": 3.90363720163804e-05, "loss": 0.1427, "step": 2552 }, { "epoch": 69.0, "grad_norm": 1.71875, "learning_rate": 3.902712562573015e-05, "loss": 0.24, "step": 2553 }, { "epoch": 69.02702702702703, "grad_norm": 1.6953125, "learning_rate": 3.901787643379182e-05, "loss": 0.1696, "step": 2554 }, { "epoch": 69.05405405405405, "grad_norm": 1.3984375, "learning_rate": 3.900862444241251e-05, "loss": 0.1123, "step": 2555 }, { "epoch": 69.08108108108108, "grad_norm": 1.5859375, "learning_rate": 3.899936965343989e-05, "loss": 0.1962, "step": 2556 }, { "epoch": 69.10810810810811, "grad_norm": 0.97265625, "learning_rate": 3.8990112068722186e-05, "loss": 0.144, "step": 2557 }, { "epoch": 69.13513513513513, "grad_norm": 1.8125, "learning_rate": 3.898085169010819e-05, "loss": 0.1337, "step": 2558 }, { "epoch": 69.16216216216216, "grad_norm": 1.8359375, "learning_rate": 3.897158851944724e-05, "loss": 0.256, "step": 2559 }, { "epoch": 69.1891891891892, "grad_norm": 1.234375, "learning_rate": 3.896232255858926e-05, "loss": 0.1456, "step": 2560 }, { "epoch": 69.21621621621621, "grad_norm": 1.28125, "learning_rate": 3.8953053809384674e-05, "loss": 0.1639, "step": 2561 }, { "epoch": 69.24324324324324, "grad_norm": 1.25, "learning_rate": 3.8943782273684526e-05, "loss": 0.1281, "step": 2562 }, { "epoch": 69.27027027027027, "grad_norm": 1.484375, "learning_rate": 3.893450795334038e-05, "loss": 0.1439, "step": 2563 }, { "epoch": 69.29729729729729, "grad_norm": 1.3203125, "learning_rate": 3.8925230850204354e-05, "loss": 0.1902, "step": 2564 }, { "epoch": 69.32432432432432, "grad_norm": 1.4609375, "learning_rate": 3.891595096612914e-05, "loss": 0.1486, "step": 2565 }, { "epoch": 69.35135135135135, "grad_norm": 1.1015625, "learning_rate": 3.8906668302967995e-05, "loss": 0.1189, "step": 2566 }, { "epoch": 69.37837837837837, "grad_norm": 1.109375, "learning_rate": 3.8897382862574696e-05, "loss": 0.134, "step": 2567 }, { "epoch": 69.4054054054054, "grad_norm": 1.3828125, "learning_rate": 3.888809464680359e-05, "loss": 0.1677, "step": 2568 }, { "epoch": 69.43243243243244, "grad_norm": 1.171875, "learning_rate": 3.887880365750959e-05, "loss": 0.1213, "step": 2569 }, { "epoch": 69.45945945945945, "grad_norm": 1.546875, "learning_rate": 3.886950989654815e-05, "loss": 0.1673, "step": 2570 }, { "epoch": 69.48648648648648, "grad_norm": 0.9609375, "learning_rate": 3.8860213365775274e-05, "loss": 0.1087, "step": 2571 }, { "epoch": 69.51351351351352, "grad_norm": 1.6484375, "learning_rate": 3.8850914067047546e-05, "loss": 0.2021, "step": 2572 }, { "epoch": 69.54054054054055, "grad_norm": 1.1953125, "learning_rate": 3.884161200222206e-05, "loss": 0.1068, "step": 2573 }, { "epoch": 69.56756756756756, "grad_norm": 0.9765625, "learning_rate": 3.883230717315651e-05, "loss": 0.1129, "step": 2574 }, { "epoch": 69.5945945945946, "grad_norm": 0.765625, "learning_rate": 3.882299958170909e-05, "loss": 0.0853, "step": 2575 }, { "epoch": 69.62162162162163, "grad_norm": 1.5, "learning_rate": 3.881368922973859e-05, "loss": 0.1807, "step": 2576 }, { "epoch": 69.64864864864865, "grad_norm": 1.6171875, "learning_rate": 3.880437611910434e-05, "loss": 0.2783, "step": 2577 }, { "epoch": 69.67567567567568, "grad_norm": 1.5234375, "learning_rate": 3.8795060251666195e-05, "loss": 0.1965, "step": 2578 }, { "epoch": 69.70270270270271, "grad_norm": 1.53125, "learning_rate": 3.87857416292846e-05, "loss": 0.158, "step": 2579 }, { "epoch": 69.72972972972973, "grad_norm": 0.890625, "learning_rate": 3.8776420253820516e-05, "loss": 0.0952, "step": 2580 }, { "epoch": 69.75675675675676, "grad_norm": 1.171875, "learning_rate": 3.8767096127135484e-05, "loss": 0.1794, "step": 2581 }, { "epoch": 69.78378378378379, "grad_norm": 0.98046875, "learning_rate": 3.875776925109157e-05, "loss": 0.1142, "step": 2582 }, { "epoch": 69.8108108108108, "grad_norm": 1.2734375, "learning_rate": 3.87484396275514e-05, "loss": 0.122, "step": 2583 }, { "epoch": 69.83783783783784, "grad_norm": 1.328125, "learning_rate": 3.8739107258378147e-05, "loss": 0.14, "step": 2584 }, { "epoch": 69.86486486486487, "grad_norm": 1.203125, "learning_rate": 3.8729772145435537e-05, "loss": 0.1346, "step": 2585 }, { "epoch": 69.89189189189189, "grad_norm": 0.80859375, "learning_rate": 3.872043429058783e-05, "loss": 0.087, "step": 2586 }, { "epoch": 69.91891891891892, "grad_norm": 1.4140625, "learning_rate": 3.871109369569985e-05, "loss": 0.1294, "step": 2587 }, { "epoch": 69.94594594594595, "grad_norm": 1.203125, "learning_rate": 3.870175036263696e-05, "loss": 0.107, "step": 2588 }, { "epoch": 69.97297297297297, "grad_norm": 1.140625, "learning_rate": 3.869240429326507e-05, "loss": 0.1454, "step": 2589 }, { "epoch": 70.0, "grad_norm": 0.80078125, "learning_rate": 3.868305548945065e-05, "loss": 0.0938, "step": 2590 }, { "epoch": 70.02702702702703, "grad_norm": 1.046875, "learning_rate": 3.867370395306068e-05, "loss": 0.1062, "step": 2591 }, { "epoch": 70.05405405405405, "grad_norm": 1.234375, "learning_rate": 3.8664349685962716e-05, "loss": 0.1612, "step": 2592 }, { "epoch": 70.08108108108108, "grad_norm": 1.375, "learning_rate": 3.865499269002486e-05, "loss": 0.1699, "step": 2593 }, { "epoch": 70.10810810810811, "grad_norm": 1.5546875, "learning_rate": 3.8645632967115753e-05, "loss": 0.2208, "step": 2594 }, { "epoch": 70.13513513513513, "grad_norm": 1.6328125, "learning_rate": 3.863627051910458e-05, "loss": 0.2155, "step": 2595 }, { "epoch": 70.16216216216216, "grad_norm": 1.1875, "learning_rate": 3.862690534786105e-05, "loss": 0.1217, "step": 2596 }, { "epoch": 70.1891891891892, "grad_norm": 1.5234375, "learning_rate": 3.861753745525546e-05, "loss": 0.1424, "step": 2597 }, { "epoch": 70.21621621621621, "grad_norm": 0.9921875, "learning_rate": 3.860816684315861e-05, "loss": 0.1067, "step": 2598 }, { "epoch": 70.24324324324324, "grad_norm": 1.046875, "learning_rate": 3.859879351344186e-05, "loss": 0.1254, "step": 2599 }, { "epoch": 70.27027027027027, "grad_norm": 1.328125, "learning_rate": 3.858941746797713e-05, "loss": 0.0955, "step": 2600 }, { "epoch": 70.29729729729729, "grad_norm": 1.109375, "learning_rate": 3.858003870863684e-05, "loss": 0.1224, "step": 2601 }, { "epoch": 70.32432432432432, "grad_norm": 1.3515625, "learning_rate": 3.8570657237293985e-05, "loss": 0.1498, "step": 2602 }, { "epoch": 70.35135135135135, "grad_norm": 1.2265625, "learning_rate": 3.8561273055822086e-05, "loss": 0.1387, "step": 2603 }, { "epoch": 70.37837837837837, "grad_norm": 1.625, "learning_rate": 3.855188616609523e-05, "loss": 0.2192, "step": 2604 }, { "epoch": 70.4054054054054, "grad_norm": 1.1640625, "learning_rate": 3.854249656998801e-05, "loss": 0.1178, "step": 2605 }, { "epoch": 70.43243243243244, "grad_norm": 1.2734375, "learning_rate": 3.8533104269375576e-05, "loss": 0.1442, "step": 2606 }, { "epoch": 70.45945945945945, "grad_norm": 1.28125, "learning_rate": 3.852370926613362e-05, "loss": 0.1203, "step": 2607 }, { "epoch": 70.48648648648648, "grad_norm": 1.984375, "learning_rate": 3.8514311562138384e-05, "loss": 0.1342, "step": 2608 }, { "epoch": 70.51351351351352, "grad_norm": 1.4453125, "learning_rate": 3.8504911159266624e-05, "loss": 0.1672, "step": 2609 }, { "epoch": 70.54054054054055, "grad_norm": 1.3828125, "learning_rate": 3.8495508059395644e-05, "loss": 0.1479, "step": 2610 }, { "epoch": 70.56756756756756, "grad_norm": 1.375, "learning_rate": 3.84861022644033e-05, "loss": 0.1191, "step": 2611 }, { "epoch": 70.5945945945946, "grad_norm": 1.296875, "learning_rate": 3.847669377616798e-05, "loss": 0.1493, "step": 2612 }, { "epoch": 70.62162162162163, "grad_norm": 1.390625, "learning_rate": 3.84672825965686e-05, "loss": 0.1352, "step": 2613 }, { "epoch": 70.64864864864865, "grad_norm": 1.09375, "learning_rate": 3.845786872748462e-05, "loss": 0.0987, "step": 2614 }, { "epoch": 70.67567567567568, "grad_norm": 1.296875, "learning_rate": 3.844845217079604e-05, "loss": 0.1427, "step": 2615 }, { "epoch": 70.70270270270271, "grad_norm": 1.6640625, "learning_rate": 3.843903292838339e-05, "loss": 0.1603, "step": 2616 }, { "epoch": 70.72972972972973, "grad_norm": 1.78125, "learning_rate": 3.842961100212775e-05, "loss": 0.1482, "step": 2617 }, { "epoch": 70.75675675675676, "grad_norm": 1.28125, "learning_rate": 3.842018639391072e-05, "loss": 0.1641, "step": 2618 }, { "epoch": 70.78378378378379, "grad_norm": 1.8125, "learning_rate": 3.8410759105614434e-05, "loss": 0.1454, "step": 2619 }, { "epoch": 70.8108108108108, "grad_norm": 1.0625, "learning_rate": 3.840132913912159e-05, "loss": 0.0819, "step": 2620 }, { "epoch": 70.83783783783784, "grad_norm": 2.109375, "learning_rate": 3.839189649631538e-05, "loss": 0.2642, "step": 2621 }, { "epoch": 70.86486486486487, "grad_norm": 1.953125, "learning_rate": 3.838246117907956e-05, "loss": 0.2644, "step": 2622 }, { "epoch": 70.89189189189189, "grad_norm": 0.91796875, "learning_rate": 3.837302318929841e-05, "loss": 0.0818, "step": 2623 }, { "epoch": 70.91891891891892, "grad_norm": 1.4921875, "learning_rate": 3.836358252885674e-05, "loss": 0.2208, "step": 2624 }, { "epoch": 70.94594594594595, "grad_norm": 1.4296875, "learning_rate": 3.8354139199639905e-05, "loss": 0.0916, "step": 2625 }, { "epoch": 70.97297297297297, "grad_norm": 1.25, "learning_rate": 3.834469320353378e-05, "loss": 0.1226, "step": 2626 }, { "epoch": 71.0, "grad_norm": 0.84375, "learning_rate": 3.8335244542424774e-05, "loss": 0.0767, "step": 2627 }, { "epoch": 71.02702702702703, "grad_norm": 0.9140625, "learning_rate": 3.832579321819985e-05, "loss": 0.0906, "step": 2628 }, { "epoch": 71.05405405405405, "grad_norm": 1.015625, "learning_rate": 3.831633923274647e-05, "loss": 0.1284, "step": 2629 }, { "epoch": 71.08108108108108, "grad_norm": 1.59375, "learning_rate": 3.830688258795264e-05, "loss": 0.2098, "step": 2630 }, { "epoch": 71.10810810810811, "grad_norm": 1.9140625, "learning_rate": 3.829742328570691e-05, "loss": 0.1817, "step": 2631 }, { "epoch": 71.13513513513513, "grad_norm": 1.515625, "learning_rate": 3.828796132789835e-05, "loss": 0.1548, "step": 2632 }, { "epoch": 71.16216216216216, "grad_norm": 1.6015625, "learning_rate": 3.8278496716416555e-05, "loss": 0.1349, "step": 2633 }, { "epoch": 71.1891891891892, "grad_norm": 0.8515625, "learning_rate": 3.8269029453151665e-05, "loss": 0.0931, "step": 2634 }, { "epoch": 71.21621621621621, "grad_norm": 1.6171875, "learning_rate": 3.825955953999433e-05, "loss": 0.1437, "step": 2635 }, { "epoch": 71.24324324324324, "grad_norm": 2.421875, "learning_rate": 3.825008697883574e-05, "loss": 0.2235, "step": 2636 }, { "epoch": 71.27027027027027, "grad_norm": 1.3515625, "learning_rate": 3.824061177156762e-05, "loss": 0.1163, "step": 2637 }, { "epoch": 71.29729729729729, "grad_norm": 1.328125, "learning_rate": 3.8231133920082225e-05, "loss": 0.1707, "step": 2638 }, { "epoch": 71.32432432432432, "grad_norm": 1.1171875, "learning_rate": 3.822165342627231e-05, "loss": 0.1443, "step": 2639 }, { "epoch": 71.35135135135135, "grad_norm": 1.0234375, "learning_rate": 3.821217029203119e-05, "loss": 0.1152, "step": 2640 }, { "epoch": 71.37837837837837, "grad_norm": 1.3984375, "learning_rate": 3.820268451925269e-05, "loss": 0.1729, "step": 2641 }, { "epoch": 71.4054054054054, "grad_norm": 1.625, "learning_rate": 3.819319610983117e-05, "loss": 0.1358, "step": 2642 }, { "epoch": 71.43243243243244, "grad_norm": 1.7578125, "learning_rate": 3.8183705065661525e-05, "loss": 0.1733, "step": 2643 }, { "epoch": 71.45945945945945, "grad_norm": 1.0234375, "learning_rate": 3.8174211388639134e-05, "loss": 0.0962, "step": 2644 }, { "epoch": 71.48648648648648, "grad_norm": 1.0546875, "learning_rate": 3.816471508065996e-05, "loss": 0.0805, "step": 2645 }, { "epoch": 71.51351351351352, "grad_norm": 1.046875, "learning_rate": 3.8155216143620454e-05, "loss": 0.0994, "step": 2646 }, { "epoch": 71.54054054054055, "grad_norm": 1.8203125, "learning_rate": 3.814571457941759e-05, "loss": 0.1785, "step": 2647 }, { "epoch": 71.56756756756756, "grad_norm": 1.03125, "learning_rate": 3.81362103899489e-05, "loss": 0.095, "step": 2648 }, { "epoch": 71.5945945945946, "grad_norm": 1.046875, "learning_rate": 3.812670357711242e-05, "loss": 0.1126, "step": 2649 }, { "epoch": 71.62162162162163, "grad_norm": 1.0625, "learning_rate": 3.811719414280669e-05, "loss": 0.1016, "step": 2650 }, { "epoch": 71.64864864864865, "grad_norm": 0.9296875, "learning_rate": 3.8107682088930794e-05, "loss": 0.1139, "step": 2651 }, { "epoch": 71.67567567567568, "grad_norm": 1.03125, "learning_rate": 3.809816741738435e-05, "loss": 0.1011, "step": 2652 }, { "epoch": 71.70270270270271, "grad_norm": 1.0546875, "learning_rate": 3.808865013006748e-05, "loss": 0.1331, "step": 2653 }, { "epoch": 71.72972972972973, "grad_norm": 1.296875, "learning_rate": 3.807913022888083e-05, "loss": 0.1466, "step": 2654 }, { "epoch": 71.75675675675676, "grad_norm": 0.75, "learning_rate": 3.8069607715725586e-05, "loss": 0.0894, "step": 2655 }, { "epoch": 71.78378378378379, "grad_norm": 1.5390625, "learning_rate": 3.806008259250343e-05, "loss": 0.1925, "step": 2656 }, { "epoch": 71.8108108108108, "grad_norm": 0.98046875, "learning_rate": 3.805055486111658e-05, "loss": 0.1278, "step": 2657 }, { "epoch": 71.83783783783784, "grad_norm": 1.3203125, "learning_rate": 3.8041024523467774e-05, "loss": 0.1094, "step": 2658 }, { "epoch": 71.86486486486487, "grad_norm": 1.3125, "learning_rate": 3.8031491581460267e-05, "loss": 0.1465, "step": 2659 }, { "epoch": 71.89189189189189, "grad_norm": 0.94921875, "learning_rate": 3.802195603699783e-05, "loss": 0.1125, "step": 2660 }, { "epoch": 71.91891891891892, "grad_norm": 1.234375, "learning_rate": 3.801241789198478e-05, "loss": 0.1659, "step": 2661 }, { "epoch": 71.94594594594595, "grad_norm": 1.578125, "learning_rate": 3.800287714832591e-05, "loss": 0.1564, "step": 2662 }, { "epoch": 71.97297297297297, "grad_norm": 1.59375, "learning_rate": 3.7993333807926566e-05, "loss": 0.1601, "step": 2663 }, { "epoch": 72.0, "grad_norm": 1.1171875, "learning_rate": 3.79837878726926e-05, "loss": 0.1174, "step": 2664 }, { "epoch": 72.02702702702703, "grad_norm": 1.5, "learning_rate": 3.797423934453038e-05, "loss": 0.1622, "step": 2665 }, { "epoch": 72.05405405405405, "grad_norm": 1.453125, "learning_rate": 3.79646882253468e-05, "loss": 0.1135, "step": 2666 }, { "epoch": 72.08108108108108, "grad_norm": 1.2265625, "learning_rate": 3.795513451704927e-05, "loss": 0.1628, "step": 2667 }, { "epoch": 72.10810810810811, "grad_norm": 1.015625, "learning_rate": 3.79455782215457e-05, "loss": 0.106, "step": 2668 }, { "epoch": 72.13513513513513, "grad_norm": 1.453125, "learning_rate": 3.7936019340744545e-05, "loss": 0.1694, "step": 2669 }, { "epoch": 72.16216216216216, "grad_norm": 1.09375, "learning_rate": 3.792645787655476e-05, "loss": 0.1605, "step": 2670 }, { "epoch": 72.1891891891892, "grad_norm": 1.2578125, "learning_rate": 3.791689383088581e-05, "loss": 0.0931, "step": 2671 }, { "epoch": 72.21621621621621, "grad_norm": 0.921875, "learning_rate": 3.790732720564769e-05, "loss": 0.0856, "step": 2672 }, { "epoch": 72.24324324324324, "grad_norm": 1.8125, "learning_rate": 3.7897758002750906e-05, "loss": 0.206, "step": 2673 }, { "epoch": 72.27027027027027, "grad_norm": 1.1875, "learning_rate": 3.788818622410647e-05, "loss": 0.0932, "step": 2674 }, { "epoch": 72.29729729729729, "grad_norm": 1.2421875, "learning_rate": 3.7878611871625916e-05, "loss": 0.1236, "step": 2675 }, { "epoch": 72.32432432432432, "grad_norm": 0.78125, "learning_rate": 3.7869034947221284e-05, "loss": 0.0783, "step": 2676 }, { "epoch": 72.35135135135135, "grad_norm": 1.28125, "learning_rate": 3.785945545280515e-05, "loss": 0.1825, "step": 2677 }, { "epoch": 72.37837837837837, "grad_norm": 1.265625, "learning_rate": 3.784987339029058e-05, "loss": 0.1426, "step": 2678 }, { "epoch": 72.4054054054054, "grad_norm": 1.4140625, "learning_rate": 3.784028876159116e-05, "loss": 0.1713, "step": 2679 }, { "epoch": 72.43243243243244, "grad_norm": 1.28125, "learning_rate": 3.783070156862098e-05, "loss": 0.1325, "step": 2680 }, { "epoch": 72.45945945945945, "grad_norm": 0.875, "learning_rate": 3.7821111813294676e-05, "loss": 0.105, "step": 2681 }, { "epoch": 72.48648648648648, "grad_norm": 0.99609375, "learning_rate": 3.781151949752734e-05, "loss": 0.0905, "step": 2682 }, { "epoch": 72.51351351351352, "grad_norm": 1.25, "learning_rate": 3.780192462323463e-05, "loss": 0.1101, "step": 2683 }, { "epoch": 72.54054054054055, "grad_norm": 1.2578125, "learning_rate": 3.7792327192332676e-05, "loss": 0.1409, "step": 2684 }, { "epoch": 72.56756756756756, "grad_norm": 1.1640625, "learning_rate": 3.778272720673814e-05, "loss": 0.1309, "step": 2685 }, { "epoch": 72.5945945945946, "grad_norm": 1.171875, "learning_rate": 3.7773124668368184e-05, "loss": 0.1476, "step": 2686 }, { "epoch": 72.62162162162163, "grad_norm": 1.1796875, "learning_rate": 3.77635195791405e-05, "loss": 0.118, "step": 2687 }, { "epoch": 72.64864864864865, "grad_norm": 1.53125, "learning_rate": 3.775391194097325e-05, "loss": 0.1418, "step": 2688 }, { "epoch": 72.67567567567568, "grad_norm": 1.171875, "learning_rate": 3.774430175578514e-05, "loss": 0.1174, "step": 2689 }, { "epoch": 72.70270270270271, "grad_norm": 1.078125, "learning_rate": 3.7734689025495366e-05, "loss": 0.0987, "step": 2690 }, { "epoch": 72.72972972972973, "grad_norm": 1.78125, "learning_rate": 3.772507375202365e-05, "loss": 0.1914, "step": 2691 }, { "epoch": 72.75675675675676, "grad_norm": 0.81640625, "learning_rate": 3.7715455937290195e-05, "loss": 0.0847, "step": 2692 }, { "epoch": 72.78378378378379, "grad_norm": 1.484375, "learning_rate": 3.7705835583215743e-05, "loss": 0.0944, "step": 2693 }, { "epoch": 72.8108108108108, "grad_norm": 1.125, "learning_rate": 3.769621269172152e-05, "loss": 0.1246, "step": 2694 }, { "epoch": 72.83783783783784, "grad_norm": 1.4375, "learning_rate": 3.7686587264729276e-05, "loss": 0.1515, "step": 2695 }, { "epoch": 72.86486486486487, "grad_norm": 1.8828125, "learning_rate": 3.767695930416124e-05, "loss": 0.1641, "step": 2696 }, { "epoch": 72.89189189189189, "grad_norm": 1.5546875, "learning_rate": 3.766732881194017e-05, "loss": 0.1697, "step": 2697 }, { "epoch": 72.91891891891892, "grad_norm": 1.7734375, "learning_rate": 3.7657695789989334e-05, "loss": 0.2066, "step": 2698 }, { "epoch": 72.94594594594595, "grad_norm": 1.09375, "learning_rate": 3.764806024023248e-05, "loss": 0.1083, "step": 2699 }, { "epoch": 72.97297297297297, "grad_norm": 1.1015625, "learning_rate": 3.76384221645939e-05, "loss": 0.0967, "step": 2700 }, { "epoch": 73.0, "grad_norm": 1.4609375, "learning_rate": 3.762878156499835e-05, "loss": 0.0926, "step": 2701 }, { "epoch": 73.02702702702703, "grad_norm": 0.9765625, "learning_rate": 3.76191384433711e-05, "loss": 0.095, "step": 2702 }, { "epoch": 73.05405405405405, "grad_norm": 1.3046875, "learning_rate": 3.7609492801637935e-05, "loss": 0.1447, "step": 2703 }, { "epoch": 73.08108108108108, "grad_norm": 1.3125, "learning_rate": 3.759984464172514e-05, "loss": 0.141, "step": 2704 }, { "epoch": 73.10810810810811, "grad_norm": 1.53125, "learning_rate": 3.759019396555952e-05, "loss": 0.0967, "step": 2705 }, { "epoch": 73.13513513513513, "grad_norm": 1.640625, "learning_rate": 3.758054077506834e-05, "loss": 0.0984, "step": 2706 }, { "epoch": 73.16216216216216, "grad_norm": 1.3984375, "learning_rate": 3.757088507217939e-05, "loss": 0.1448, "step": 2707 }, { "epoch": 73.1891891891892, "grad_norm": 1.59375, "learning_rate": 3.756122685882098e-05, "loss": 0.181, "step": 2708 }, { "epoch": 73.21621621621621, "grad_norm": 1.1640625, "learning_rate": 3.7551566136921895e-05, "loss": 0.0922, "step": 2709 }, { "epoch": 73.24324324324324, "grad_norm": 0.99609375, "learning_rate": 3.754190290841143e-05, "loss": 0.1071, "step": 2710 }, { "epoch": 73.27027027027027, "grad_norm": 1.3125, "learning_rate": 3.753223717521938e-05, "loss": 0.1209, "step": 2711 }, { "epoch": 73.29729729729729, "grad_norm": 1.03125, "learning_rate": 3.752256893927604e-05, "loss": 0.1249, "step": 2712 }, { "epoch": 73.32432432432432, "grad_norm": 1.0859375, "learning_rate": 3.75128982025122e-05, "loss": 0.0978, "step": 2713 }, { "epoch": 73.35135135135135, "grad_norm": 1.21875, "learning_rate": 3.750322496685918e-05, "loss": 0.1183, "step": 2714 }, { "epoch": 73.37837837837837, "grad_norm": 1.109375, "learning_rate": 3.749354923424875e-05, "loss": 0.1459, "step": 2715 }, { "epoch": 73.4054054054054, "grad_norm": 1.65625, "learning_rate": 3.748387100661321e-05, "loss": 0.2176, "step": 2716 }, { "epoch": 73.43243243243244, "grad_norm": 1.7890625, "learning_rate": 3.747419028588535e-05, "loss": 0.2599, "step": 2717 }, { "epoch": 73.45945945945945, "grad_norm": 0.7734375, "learning_rate": 3.7464507073998465e-05, "loss": 0.0905, "step": 2718 }, { "epoch": 73.48648648648648, "grad_norm": 1.4765625, "learning_rate": 3.745482137288633e-05, "loss": 0.2066, "step": 2719 }, { "epoch": 73.51351351351352, "grad_norm": 1.1640625, "learning_rate": 3.7445133184483236e-05, "loss": 0.119, "step": 2720 }, { "epoch": 73.54054054054055, "grad_norm": 0.71875, "learning_rate": 3.743544251072397e-05, "loss": 0.0649, "step": 2721 }, { "epoch": 73.56756756756756, "grad_norm": 0.6328125, "learning_rate": 3.742574935354379e-05, "loss": 0.0727, "step": 2722 }, { "epoch": 73.5945945945946, "grad_norm": 1.1796875, "learning_rate": 3.741605371487849e-05, "loss": 0.0814, "step": 2723 }, { "epoch": 73.62162162162163, "grad_norm": 0.890625, "learning_rate": 3.7406355596664325e-05, "loss": 0.0932, "step": 2724 }, { "epoch": 73.64864864864865, "grad_norm": 1.3671875, "learning_rate": 3.739665500083806e-05, "loss": 0.1583, "step": 2725 }, { "epoch": 73.67567567567568, "grad_norm": 0.83984375, "learning_rate": 3.7386951929336956e-05, "loss": 0.084, "step": 2726 }, { "epoch": 73.70270270270271, "grad_norm": 1.296875, "learning_rate": 3.737724638409876e-05, "loss": 0.1569, "step": 2727 }, { "epoch": 73.72972972972973, "grad_norm": 1.3671875, "learning_rate": 3.736753836706173e-05, "loss": 0.1195, "step": 2728 }, { "epoch": 73.75675675675676, "grad_norm": 1.6484375, "learning_rate": 3.73578278801646e-05, "loss": 0.1103, "step": 2729 }, { "epoch": 73.78378378378379, "grad_norm": 1.2890625, "learning_rate": 3.734811492534659e-05, "loss": 0.1213, "step": 2730 }, { "epoch": 73.8108108108108, "grad_norm": 0.89453125, "learning_rate": 3.7338399504547446e-05, "loss": 0.103, "step": 2731 }, { "epoch": 73.83783783783784, "grad_norm": 1.046875, "learning_rate": 3.7328681619707385e-05, "loss": 0.1108, "step": 2732 }, { "epoch": 73.86486486486487, "grad_norm": 1.3671875, "learning_rate": 3.7318961272767106e-05, "loss": 0.1583, "step": 2733 }, { "epoch": 73.89189189189189, "grad_norm": 1.1796875, "learning_rate": 3.730923846566781e-05, "loss": 0.097, "step": 2734 }, { "epoch": 73.91891891891892, "grad_norm": 0.94140625, "learning_rate": 3.729951320035121e-05, "loss": 0.1035, "step": 2735 }, { "epoch": 73.94594594594595, "grad_norm": 1.046875, "learning_rate": 3.7289785478759484e-05, "loss": 0.1405, "step": 2736 }, { "epoch": 73.97297297297297, "grad_norm": 0.796875, "learning_rate": 3.728005530283529e-05, "loss": 0.0917, "step": 2737 }, { "epoch": 74.0, "grad_norm": 0.6484375, "learning_rate": 3.727032267452182e-05, "loss": 0.0759, "step": 2738 }, { "epoch": 74.02702702702703, "grad_norm": 1.0859375, "learning_rate": 3.726058759576271e-05, "loss": 0.1534, "step": 2739 }, { "epoch": 74.05405405405405, "grad_norm": 1.484375, "learning_rate": 3.7250850068502105e-05, "loss": 0.176, "step": 2740 }, { "epoch": 74.08108108108108, "grad_norm": 1.2421875, "learning_rate": 3.724111009468464e-05, "loss": 0.1394, "step": 2741 }, { "epoch": 74.10810810810811, "grad_norm": 1.1640625, "learning_rate": 3.723136767625546e-05, "loss": 0.1539, "step": 2742 }, { "epoch": 74.13513513513513, "grad_norm": 1.2734375, "learning_rate": 3.722162281516014e-05, "loss": 0.0955, "step": 2743 }, { "epoch": 74.16216216216216, "grad_norm": 1.1484375, "learning_rate": 3.72118755133448e-05, "loss": 0.1668, "step": 2744 }, { "epoch": 74.1891891891892, "grad_norm": 0.91015625, "learning_rate": 3.7202125772756014e-05, "loss": 0.0858, "step": 2745 }, { "epoch": 74.21621621621621, "grad_norm": 1.015625, "learning_rate": 3.719237359534087e-05, "loss": 0.1223, "step": 2746 }, { "epoch": 74.24324324324324, "grad_norm": 1.828125, "learning_rate": 3.718261898304691e-05, "loss": 0.1628, "step": 2747 }, { "epoch": 74.27027027027027, "grad_norm": 1.5234375, "learning_rate": 3.717286193782219e-05, "loss": 0.1552, "step": 2748 }, { "epoch": 74.29729729729729, "grad_norm": 1.53125, "learning_rate": 3.716310246161524e-05, "loss": 0.1382, "step": 2749 }, { "epoch": 74.32432432432432, "grad_norm": 1.5, "learning_rate": 3.7153340556375074e-05, "loss": 0.1979, "step": 2750 }, { "epoch": 74.35135135135135, "grad_norm": 1.4765625, "learning_rate": 3.714357622405119e-05, "loss": 0.2178, "step": 2751 }, { "epoch": 74.37837837837837, "grad_norm": 1.1796875, "learning_rate": 3.7133809466593586e-05, "loss": 0.0769, "step": 2752 }, { "epoch": 74.4054054054054, "grad_norm": 1.515625, "learning_rate": 3.712404028595273e-05, "loss": 0.1793, "step": 2753 }, { "epoch": 74.43243243243244, "grad_norm": 1.2578125, "learning_rate": 3.7114268684079574e-05, "loss": 0.1401, "step": 2754 }, { "epoch": 74.45945945945945, "grad_norm": 1.1328125, "learning_rate": 3.7104494662925546e-05, "loss": 0.1201, "step": 2755 }, { "epoch": 74.48648648648648, "grad_norm": 1.0859375, "learning_rate": 3.709471822444259e-05, "loss": 0.1258, "step": 2756 }, { "epoch": 74.51351351351352, "grad_norm": 1.78125, "learning_rate": 3.7084939370583096e-05, "loss": 0.155, "step": 2757 }, { "epoch": 74.54054054054055, "grad_norm": 1.03125, "learning_rate": 3.707515810329994e-05, "loss": 0.1055, "step": 2758 }, { "epoch": 74.56756756756756, "grad_norm": 0.84765625, "learning_rate": 3.706537442454652e-05, "loss": 0.0926, "step": 2759 }, { "epoch": 74.5945945945946, "grad_norm": 1.203125, "learning_rate": 3.705558833627667e-05, "loss": 0.0962, "step": 2760 }, { "epoch": 74.62162162162163, "grad_norm": 1.6015625, "learning_rate": 3.704579984044471e-05, "loss": 0.1696, "step": 2761 }, { "epoch": 74.64864864864865, "grad_norm": 1.6171875, "learning_rate": 3.7036008939005476e-05, "loss": 0.1635, "step": 2762 }, { "epoch": 74.67567567567568, "grad_norm": 1.015625, "learning_rate": 3.702621563391424e-05, "loss": 0.1039, "step": 2763 }, { "epoch": 74.70270270270271, "grad_norm": 1.234375, "learning_rate": 3.701641992712678e-05, "loss": 0.1047, "step": 2764 }, { "epoch": 74.72972972972973, "grad_norm": 1.3359375, "learning_rate": 3.7006621820599356e-05, "loss": 0.1304, "step": 2765 }, { "epoch": 74.75675675675676, "grad_norm": 0.8984375, "learning_rate": 3.699682131628869e-05, "loss": 0.0834, "step": 2766 }, { "epoch": 74.78378378378379, "grad_norm": 0.97265625, "learning_rate": 3.698701841615201e-05, "loss": 0.0976, "step": 2767 }, { "epoch": 74.8108108108108, "grad_norm": 1.6953125, "learning_rate": 3.6977213122146974e-05, "loss": 0.1185, "step": 2768 }, { "epoch": 74.83783783783784, "grad_norm": 0.8125, "learning_rate": 3.696740543623178e-05, "loss": 0.0833, "step": 2769 }, { "epoch": 74.86486486486487, "grad_norm": 1.609375, "learning_rate": 3.695759536036506e-05, "loss": 0.1367, "step": 2770 }, { "epoch": 74.89189189189189, "grad_norm": 1.4609375, "learning_rate": 3.694778289650593e-05, "loss": 0.1635, "step": 2771 }, { "epoch": 74.91891891891892, "grad_norm": 1.0625, "learning_rate": 3.693796804661399e-05, "loss": 0.1116, "step": 2772 }, { "epoch": 74.94594594594595, "grad_norm": 1.109375, "learning_rate": 3.6928150812649325e-05, "loss": 0.1301, "step": 2773 }, { "epoch": 74.97297297297297, "grad_norm": 1.0390625, "learning_rate": 3.691833119657249e-05, "loss": 0.1378, "step": 2774 }, { "epoch": 75.0, "grad_norm": 0.7734375, "learning_rate": 3.690850920034449e-05, "loss": 0.0757, "step": 2775 }, { "epoch": 75.02702702702703, "grad_norm": 0.97265625, "learning_rate": 3.689868482592684e-05, "loss": 0.0869, "step": 2776 }, { "epoch": 75.05405405405405, "grad_norm": 1.5859375, "learning_rate": 3.688885807528152e-05, "loss": 0.1788, "step": 2777 }, { "epoch": 75.08108108108108, "grad_norm": 1.0390625, "learning_rate": 3.687902895037099e-05, "loss": 0.139, "step": 2778 }, { "epoch": 75.10810810810811, "grad_norm": 0.75390625, "learning_rate": 3.686919745315815e-05, "loss": 0.0833, "step": 2779 }, { "epoch": 75.13513513513513, "grad_norm": 1.1796875, "learning_rate": 3.685936358560642e-05, "loss": 0.1462, "step": 2780 }, { "epoch": 75.16216216216216, "grad_norm": 1.25, "learning_rate": 3.684952734967968e-05, "loss": 0.1445, "step": 2781 }, { "epoch": 75.1891891891892, "grad_norm": 1.6171875, "learning_rate": 3.683968874734225e-05, "loss": 0.1715, "step": 2782 }, { "epoch": 75.21621621621621, "grad_norm": 1.25, "learning_rate": 3.682984778055897e-05, "loss": 0.1626, "step": 2783 }, { "epoch": 75.24324324324324, "grad_norm": 1.015625, "learning_rate": 3.682000445129512e-05, "loss": 0.1055, "step": 2784 }, { "epoch": 75.27027027027027, "grad_norm": 1.21875, "learning_rate": 3.681015876151648e-05, "loss": 0.1282, "step": 2785 }, { "epoch": 75.29729729729729, "grad_norm": 1.1875, "learning_rate": 3.6800310713189254e-05, "loss": 0.1492, "step": 2786 }, { "epoch": 75.32432432432432, "grad_norm": 1.171875, "learning_rate": 3.679046030828017e-05, "loss": 0.161, "step": 2787 }, { "epoch": 75.35135135135135, "grad_norm": 1.078125, "learning_rate": 3.6780607548756404e-05, "loss": 0.1231, "step": 2788 }, { "epoch": 75.37837837837837, "grad_norm": 1.1328125, "learning_rate": 3.6770752436585595e-05, "loss": 0.1047, "step": 2789 }, { "epoch": 75.4054054054054, "grad_norm": 1.7421875, "learning_rate": 3.676089497373586e-05, "loss": 0.2959, "step": 2790 }, { "epoch": 75.43243243243244, "grad_norm": 0.97265625, "learning_rate": 3.6751035162175784e-05, "loss": 0.0973, "step": 2791 }, { "epoch": 75.45945945945945, "grad_norm": 1.4453125, "learning_rate": 3.674117300387442e-05, "loss": 0.1477, "step": 2792 }, { "epoch": 75.48648648648648, "grad_norm": 1.1015625, "learning_rate": 3.673130850080129e-05, "loss": 0.1197, "step": 2793 }, { "epoch": 75.51351351351352, "grad_norm": 1.3828125, "learning_rate": 3.67214416549264e-05, "loss": 0.1514, "step": 2794 }, { "epoch": 75.54054054054055, "grad_norm": 1.109375, "learning_rate": 3.671157246822019e-05, "loss": 0.1096, "step": 2795 }, { "epoch": 75.56756756756756, "grad_norm": 0.609375, "learning_rate": 3.67017009426536e-05, "loss": 0.0609, "step": 2796 }, { "epoch": 75.5945945945946, "grad_norm": 0.9765625, "learning_rate": 3.669182708019801e-05, "loss": 0.1242, "step": 2797 }, { "epoch": 75.62162162162163, "grad_norm": 1.5625, "learning_rate": 3.668195088282529e-05, "loss": 0.1714, "step": 2798 }, { "epoch": 75.64864864864865, "grad_norm": 1.5625, "learning_rate": 3.6672072352507765e-05, "loss": 0.1778, "step": 2799 }, { "epoch": 75.67567567567568, "grad_norm": 1.3125, "learning_rate": 3.666219149121822e-05, "loss": 0.1465, "step": 2800 }, { "epoch": 75.70270270270271, "grad_norm": 1.3046875, "learning_rate": 3.665230830092993e-05, "loss": 0.1406, "step": 2801 }, { "epoch": 75.72972972972973, "grad_norm": 1.3828125, "learning_rate": 3.664242278361661e-05, "loss": 0.1411, "step": 2802 }, { "epoch": 75.75675675675676, "grad_norm": 0.9921875, "learning_rate": 3.663253494125244e-05, "loss": 0.1022, "step": 2803 }, { "epoch": 75.78378378378379, "grad_norm": 0.9921875, "learning_rate": 3.6622644775812084e-05, "loss": 0.1083, "step": 2804 }, { "epoch": 75.8108108108108, "grad_norm": 1.4765625, "learning_rate": 3.661275228927065e-05, "loss": 0.1292, "step": 2805 }, { "epoch": 75.83783783783784, "grad_norm": 1.640625, "learning_rate": 3.6602857483603714e-05, "loss": 0.155, "step": 2806 }, { "epoch": 75.86486486486487, "grad_norm": 1.1875, "learning_rate": 3.659296036078733e-05, "loss": 0.0926, "step": 2807 }, { "epoch": 75.89189189189189, "grad_norm": 1.0625, "learning_rate": 3.658306092279801e-05, "loss": 0.0856, "step": 2808 }, { "epoch": 75.91891891891892, "grad_norm": 1.921875, "learning_rate": 3.65731591716127e-05, "loss": 0.1395, "step": 2809 }, { "epoch": 75.94594594594595, "grad_norm": 1.6796875, "learning_rate": 3.656325510920884e-05, "loss": 0.1463, "step": 2810 }, { "epoch": 75.97297297297297, "grad_norm": 1.203125, "learning_rate": 3.6553348737564326e-05, "loss": 0.1188, "step": 2811 }, { "epoch": 76.0, "grad_norm": 0.7734375, "learning_rate": 3.654344005865751e-05, "loss": 0.076, "step": 2812 }, { "epoch": 76.02702702702703, "grad_norm": 1.890625, "learning_rate": 3.65335290744672e-05, "loss": 0.1503, "step": 2813 }, { "epoch": 76.05405405405405, "grad_norm": 1.75, "learning_rate": 3.652361578697267e-05, "loss": 0.1229, "step": 2814 }, { "epoch": 76.08108108108108, "grad_norm": 0.87890625, "learning_rate": 3.651370019815366e-05, "loss": 0.0778, "step": 2815 }, { "epoch": 76.10810810810811, "grad_norm": 1.5, "learning_rate": 3.6503782309990354e-05, "loss": 0.153, "step": 2816 }, { "epoch": 76.13513513513513, "grad_norm": 1.421875, "learning_rate": 3.649386212446341e-05, "loss": 0.1555, "step": 2817 }, { "epoch": 76.16216216216216, "grad_norm": 1.5546875, "learning_rate": 3.648393964355394e-05, "loss": 0.1202, "step": 2818 }, { "epoch": 76.1891891891892, "grad_norm": 1.671875, "learning_rate": 3.6474014869243515e-05, "loss": 0.095, "step": 2819 }, { "epoch": 76.21621621621621, "grad_norm": 1.6875, "learning_rate": 3.6464087803514156e-05, "loss": 0.1372, "step": 2820 }, { "epoch": 76.24324324324324, "grad_norm": 1.078125, "learning_rate": 3.645415844834836e-05, "loss": 0.0947, "step": 2821 }, { "epoch": 76.27027027027027, "grad_norm": 1.28125, "learning_rate": 3.6444226805729056e-05, "loss": 0.1128, "step": 2822 }, { "epoch": 76.29729729729729, "grad_norm": 1.4765625, "learning_rate": 3.643429287763966e-05, "loss": 0.1825, "step": 2823 }, { "epoch": 76.32432432432432, "grad_norm": 2.515625, "learning_rate": 3.642435666606401e-05, "loss": 0.2519, "step": 2824 }, { "epoch": 76.35135135135135, "grad_norm": 1.6875, "learning_rate": 3.641441817298642e-05, "loss": 0.1368, "step": 2825 }, { "epoch": 76.37837837837837, "grad_norm": 1.1796875, "learning_rate": 3.640447740039167e-05, "loss": 0.1309, "step": 2826 }, { "epoch": 76.4054054054054, "grad_norm": 1.1875, "learning_rate": 3.6394534350264975e-05, "loss": 0.1445, "step": 2827 }, { "epoch": 76.43243243243244, "grad_norm": 1.0078125, "learning_rate": 3.6384589024592016e-05, "loss": 0.0716, "step": 2828 }, { "epoch": 76.45945945945945, "grad_norm": 1.78125, "learning_rate": 3.637464142535892e-05, "loss": 0.1636, "step": 2829 }, { "epoch": 76.48648648648648, "grad_norm": 1.6875, "learning_rate": 3.636469155455227e-05, "loss": 0.1659, "step": 2830 }, { "epoch": 76.51351351351352, "grad_norm": 1.171875, "learning_rate": 3.635473941415911e-05, "loss": 0.1313, "step": 2831 }, { "epoch": 76.54054054054055, "grad_norm": 1.7890625, "learning_rate": 3.6344785006166946e-05, "loss": 0.184, "step": 2832 }, { "epoch": 76.56756756756756, "grad_norm": 1.0546875, "learning_rate": 3.63348283325637e-05, "loss": 0.0889, "step": 2833 }, { "epoch": 76.5945945945946, "grad_norm": 1.046875, "learning_rate": 3.632486939533779e-05, "loss": 0.113, "step": 2834 }, { "epoch": 76.62162162162163, "grad_norm": 1.140625, "learning_rate": 3.6314908196478056e-05, "loss": 0.1522, "step": 2835 }, { "epoch": 76.64864864864865, "grad_norm": 1.0234375, "learning_rate": 3.6304944737973795e-05, "loss": 0.1113, "step": 2836 }, { "epoch": 76.67567567567568, "grad_norm": 1.0234375, "learning_rate": 3.629497902181477e-05, "loss": 0.1082, "step": 2837 }, { "epoch": 76.70270270270271, "grad_norm": 1.5, "learning_rate": 3.628501104999118e-05, "loss": 0.1739, "step": 2838 }, { "epoch": 76.72972972972973, "grad_norm": 1.0, "learning_rate": 3.627504082449369e-05, "loss": 0.1558, "step": 2839 }, { "epoch": 76.75675675675676, "grad_norm": 1.421875, "learning_rate": 3.626506834731339e-05, "loss": 0.1677, "step": 2840 }, { "epoch": 76.78378378378379, "grad_norm": 0.83984375, "learning_rate": 3.6255093620441834e-05, "loss": 0.1061, "step": 2841 }, { "epoch": 76.8108108108108, "grad_norm": 0.65625, "learning_rate": 3.624511664587104e-05, "loss": 0.074, "step": 2842 }, { "epoch": 76.83783783783784, "grad_norm": 1.4140625, "learning_rate": 3.623513742559345e-05, "loss": 0.1784, "step": 2843 }, { "epoch": 76.86486486486487, "grad_norm": 1.578125, "learning_rate": 3.622515596160196e-05, "loss": 0.1973, "step": 2844 }, { "epoch": 76.89189189189189, "grad_norm": 0.890625, "learning_rate": 3.6215172255889936e-05, "loss": 0.1365, "step": 2845 }, { "epoch": 76.91891891891892, "grad_norm": 0.97265625, "learning_rate": 3.620518631045116e-05, "loss": 0.0919, "step": 2846 }, { "epoch": 76.94594594594595, "grad_norm": 1.1328125, "learning_rate": 3.6195198127279883e-05, "loss": 0.1203, "step": 2847 }, { "epoch": 76.97297297297297, "grad_norm": 1.0859375, "learning_rate": 3.618520770837079e-05, "loss": 0.1252, "step": 2848 }, { "epoch": 77.0, "grad_norm": 1.109375, "learning_rate": 3.6175215055719027e-05, "loss": 0.1532, "step": 2849 }, { "epoch": 77.02702702702703, "grad_norm": 0.5859375, "learning_rate": 3.616522017132017e-05, "loss": 0.074, "step": 2850 }, { "epoch": 77.05405405405405, "grad_norm": 0.8203125, "learning_rate": 3.6155223057170243e-05, "loss": 0.0957, "step": 2851 }, { "epoch": 77.08108108108108, "grad_norm": 1.1328125, "learning_rate": 3.614522371526573e-05, "loss": 0.1409, "step": 2852 }, { "epoch": 77.10810810810811, "grad_norm": 1.0078125, "learning_rate": 3.6135222147603554e-05, "loss": 0.123, "step": 2853 }, { "epoch": 77.13513513513513, "grad_norm": 1.375, "learning_rate": 3.612521835618107e-05, "loss": 0.1833, "step": 2854 }, { "epoch": 77.16216216216216, "grad_norm": 1.1796875, "learning_rate": 3.611521234299609e-05, "loss": 0.1645, "step": 2855 }, { "epoch": 77.1891891891892, "grad_norm": 1.4375, "learning_rate": 3.610520411004686e-05, "loss": 0.1668, "step": 2856 }, { "epoch": 77.21621621621621, "grad_norm": 1.1328125, "learning_rate": 3.609519365933208e-05, "loss": 0.1003, "step": 2857 }, { "epoch": 77.24324324324324, "grad_norm": 1.109375, "learning_rate": 3.608518099285088e-05, "loss": 0.0895, "step": 2858 }, { "epoch": 77.27027027027027, "grad_norm": 1.4296875, "learning_rate": 3.6075166112602856e-05, "loss": 0.143, "step": 2859 }, { "epoch": 77.29729729729729, "grad_norm": 1.3359375, "learning_rate": 3.606514902058802e-05, "loss": 0.1379, "step": 2860 }, { "epoch": 77.32432432432432, "grad_norm": 1.0546875, "learning_rate": 3.6055129718806836e-05, "loss": 0.102, "step": 2861 }, { "epoch": 77.35135135135135, "grad_norm": 1.453125, "learning_rate": 3.60451082092602e-05, "loss": 0.1575, "step": 2862 }, { "epoch": 77.37837837837837, "grad_norm": 1.515625, "learning_rate": 3.603508449394948e-05, "loss": 0.1617, "step": 2863 }, { "epoch": 77.4054054054054, "grad_norm": 1.3046875, "learning_rate": 3.602505857487645e-05, "loss": 0.1341, "step": 2864 }, { "epoch": 77.43243243243244, "grad_norm": 1.234375, "learning_rate": 3.601503045404333e-05, "loss": 0.1433, "step": 2865 }, { "epoch": 77.45945945945945, "grad_norm": 1.0703125, "learning_rate": 3.600500013345281e-05, "loss": 0.1134, "step": 2866 }, { "epoch": 77.48648648648648, "grad_norm": 1.4921875, "learning_rate": 3.5994967615107974e-05, "loss": 0.2085, "step": 2867 }, { "epoch": 77.51351351351352, "grad_norm": 1.5, "learning_rate": 3.598493290101237e-05, "loss": 0.1614, "step": 2868 }, { "epoch": 77.54054054054055, "grad_norm": 1.4296875, "learning_rate": 3.597489599316999e-05, "loss": 0.1819, "step": 2869 }, { "epoch": 77.56756756756756, "grad_norm": 1.6171875, "learning_rate": 3.596485689358524e-05, "loss": 0.1507, "step": 2870 }, { "epoch": 77.5945945945946, "grad_norm": 1.3046875, "learning_rate": 3.5954815604263e-05, "loss": 0.1452, "step": 2871 }, { "epoch": 77.62162162162163, "grad_norm": 1.4140625, "learning_rate": 3.594477212720855e-05, "loss": 0.1762, "step": 2872 }, { "epoch": 77.64864864864865, "grad_norm": 0.56640625, "learning_rate": 3.5934726464427634e-05, "loss": 0.0671, "step": 2873 }, { "epoch": 77.67567567567568, "grad_norm": 1.1875, "learning_rate": 3.5924678617926424e-05, "loss": 0.1073, "step": 2874 }, { "epoch": 77.70270270270271, "grad_norm": 0.77734375, "learning_rate": 3.59146285897115e-05, "loss": 0.0679, "step": 2875 }, { "epoch": 77.72972972972973, "grad_norm": 1.6484375, "learning_rate": 3.590457638178993e-05, "loss": 0.1909, "step": 2876 }, { "epoch": 77.75675675675676, "grad_norm": 0.55078125, "learning_rate": 3.589452199616918e-05, "loss": 0.0644, "step": 2877 }, { "epoch": 77.78378378378379, "grad_norm": 1.1640625, "learning_rate": 3.588446543485717e-05, "loss": 0.1089, "step": 2878 }, { "epoch": 77.8108108108108, "grad_norm": 1.3828125, "learning_rate": 3.587440669986224e-05, "loss": 0.1435, "step": 2879 }, { "epoch": 77.83783783783784, "grad_norm": 0.94140625, "learning_rate": 3.586434579319317e-05, "loss": 0.089, "step": 2880 }, { "epoch": 77.86486486486487, "grad_norm": 1.1796875, "learning_rate": 3.585428271685918e-05, "loss": 0.1396, "step": 2881 }, { "epoch": 77.89189189189189, "grad_norm": 0.69921875, "learning_rate": 3.58442174728699e-05, "loss": 0.0736, "step": 2882 }, { "epoch": 77.91891891891892, "grad_norm": 0.80859375, "learning_rate": 3.5834150063235434e-05, "loss": 0.0922, "step": 2883 }, { "epoch": 77.94594594594595, "grad_norm": 1.765625, "learning_rate": 3.582408048996628e-05, "loss": 0.2518, "step": 2884 }, { "epoch": 77.97297297297297, "grad_norm": 1.015625, "learning_rate": 3.5814008755073394e-05, "loss": 0.1249, "step": 2885 }, { "epoch": 78.0, "grad_norm": 1.0625, "learning_rate": 3.580393486056813e-05, "loss": 0.1383, "step": 2886 }, { "epoch": 78.02702702702703, "grad_norm": 1.1796875, "learning_rate": 3.579385880846232e-05, "loss": 0.1976, "step": 2887 }, { "epoch": 78.05405405405405, "grad_norm": 0.9609375, "learning_rate": 3.57837806007682e-05, "loss": 0.1211, "step": 2888 }, { "epoch": 78.08108108108108, "grad_norm": 1.21875, "learning_rate": 3.577370023949842e-05, "loss": 0.1391, "step": 2889 }, { "epoch": 78.10810810810811, "grad_norm": 1.3671875, "learning_rate": 3.57636177266661e-05, "loss": 0.1513, "step": 2890 }, { "epoch": 78.13513513513513, "grad_norm": 1.0625, "learning_rate": 3.575353306428476e-05, "loss": 0.1377, "step": 2891 }, { "epoch": 78.16216216216216, "grad_norm": 0.796875, "learning_rate": 3.5743446254368365e-05, "loss": 0.0946, "step": 2892 }, { "epoch": 78.1891891891892, "grad_norm": 0.6875, "learning_rate": 3.5733357298931285e-05, "loss": 0.0825, "step": 2893 }, { "epoch": 78.21621621621621, "grad_norm": 1.1484375, "learning_rate": 3.572326619998835e-05, "loss": 0.1059, "step": 2894 }, { "epoch": 78.24324324324324, "grad_norm": 1.3359375, "learning_rate": 3.571317295955481e-05, "loss": 0.1689, "step": 2895 }, { "epoch": 78.27027027027027, "grad_norm": 1.296875, "learning_rate": 3.570307757964631e-05, "loss": 0.1465, "step": 2896 }, { "epoch": 78.29729729729729, "grad_norm": 1.2890625, "learning_rate": 3.5692980062278976e-05, "loss": 0.1622, "step": 2897 }, { "epoch": 78.32432432432432, "grad_norm": 1.34375, "learning_rate": 3.5682880409469315e-05, "loss": 0.1883, "step": 2898 }, { "epoch": 78.35135135135135, "grad_norm": 0.90625, "learning_rate": 3.5672778623234295e-05, "loss": 0.1097, "step": 2899 }, { "epoch": 78.37837837837837, "grad_norm": 1.0546875, "learning_rate": 3.5662674705591275e-05, "loss": 0.134, "step": 2900 }, { "epoch": 78.4054054054054, "grad_norm": 0.80078125, "learning_rate": 3.565256865855808e-05, "loss": 0.08, "step": 2901 }, { "epoch": 78.43243243243244, "grad_norm": 1.296875, "learning_rate": 3.564246048415292e-05, "loss": 0.1517, "step": 2902 }, { "epoch": 78.45945945945945, "grad_norm": 1.0703125, "learning_rate": 3.5632350184394456e-05, "loss": 0.0909, "step": 2903 }, { "epoch": 78.48648648648648, "grad_norm": 1.4375, "learning_rate": 3.562223776130177e-05, "loss": 0.1457, "step": 2904 }, { "epoch": 78.51351351351352, "grad_norm": 1.28125, "learning_rate": 3.561212321689436e-05, "loss": 0.1003, "step": 2905 }, { "epoch": 78.54054054054055, "grad_norm": 1.328125, "learning_rate": 3.5602006553192155e-05, "loss": 0.1089, "step": 2906 }, { "epoch": 78.56756756756756, "grad_norm": 1.2890625, "learning_rate": 3.5591887772215494e-05, "loss": 0.104, "step": 2907 }, { "epoch": 78.5945945945946, "grad_norm": 1.0390625, "learning_rate": 3.558176687598516e-05, "loss": 0.1224, "step": 2908 }, { "epoch": 78.62162162162163, "grad_norm": 1.21875, "learning_rate": 3.557164386652235e-05, "loss": 0.1324, "step": 2909 }, { "epoch": 78.64864864864865, "grad_norm": 1.5703125, "learning_rate": 3.556151874584867e-05, "loss": 0.1613, "step": 2910 }, { "epoch": 78.67567567567568, "grad_norm": 1.25, "learning_rate": 3.555139151598616e-05, "loss": 0.185, "step": 2911 }, { "epoch": 78.70270270270271, "grad_norm": 0.9140625, "learning_rate": 3.554126217895728e-05, "loss": 0.1196, "step": 2912 }, { "epoch": 78.72972972972973, "grad_norm": 1.3984375, "learning_rate": 3.553113073678492e-05, "loss": 0.1765, "step": 2913 }, { "epoch": 78.75675675675676, "grad_norm": 1.203125, "learning_rate": 3.5520997191492356e-05, "loss": 0.1397, "step": 2914 }, { "epoch": 78.78378378378379, "grad_norm": 1.3984375, "learning_rate": 3.551086154510333e-05, "loss": 0.1667, "step": 2915 }, { "epoch": 78.8108108108108, "grad_norm": 1.21875, "learning_rate": 3.5500723799641976e-05, "loss": 0.1384, "step": 2916 }, { "epoch": 78.83783783783784, "grad_norm": 1.25, "learning_rate": 3.549058395713285e-05, "loss": 0.1284, "step": 2917 }, { "epoch": 78.86486486486487, "grad_norm": 0.91796875, "learning_rate": 3.548044201960094e-05, "loss": 0.1298, "step": 2918 }, { "epoch": 78.89189189189189, "grad_norm": 1.2578125, "learning_rate": 3.5470297989071635e-05, "loss": 0.0989, "step": 2919 }, { "epoch": 78.91891891891892, "grad_norm": 0.87890625, "learning_rate": 3.5460151867570734e-05, "loss": 0.0976, "step": 2920 }, { "epoch": 78.94594594594595, "grad_norm": 1.046875, "learning_rate": 3.54500036571245e-05, "loss": 0.0984, "step": 2921 }, { "epoch": 78.97297297297297, "grad_norm": 1.015625, "learning_rate": 3.543985335975956e-05, "loss": 0.1016, "step": 2922 }, { "epoch": 79.0, "grad_norm": 1.2109375, "learning_rate": 3.542970097750299e-05, "loss": 0.1335, "step": 2923 }, { "epoch": 79.02702702702703, "grad_norm": 1.5859375, "learning_rate": 3.5419546512382266e-05, "loss": 0.1005, "step": 2924 }, { "epoch": 79.05405405405405, "grad_norm": 0.90234375, "learning_rate": 3.540938996642528e-05, "loss": 0.0897, "step": 2925 }, { "epoch": 79.08108108108108, "grad_norm": 1.5859375, "learning_rate": 3.5399231341660365e-05, "loss": 0.2075, "step": 2926 }, { "epoch": 79.10810810810811, "grad_norm": 1.203125, "learning_rate": 3.5389070640116237e-05, "loss": 0.1442, "step": 2927 }, { "epoch": 79.13513513513513, "grad_norm": 1.15625, "learning_rate": 3.537890786382204e-05, "loss": 0.1031, "step": 2928 }, { "epoch": 79.16216216216216, "grad_norm": 2.1875, "learning_rate": 3.536874301480733e-05, "loss": 0.1744, "step": 2929 }, { "epoch": 79.1891891891892, "grad_norm": 1.2578125, "learning_rate": 3.535857609510209e-05, "loss": 0.0896, "step": 2930 }, { "epoch": 79.21621621621621, "grad_norm": 1.5390625, "learning_rate": 3.5348407106736696e-05, "loss": 0.1882, "step": 2931 }, { "epoch": 79.24324324324324, "grad_norm": 1.0625, "learning_rate": 3.533823605174195e-05, "loss": 0.108, "step": 2932 }, { "epoch": 79.27027027027027, "grad_norm": 1.2734375, "learning_rate": 3.532806293214906e-05, "loss": 0.1372, "step": 2933 }, { "epoch": 79.29729729729729, "grad_norm": 1.734375, "learning_rate": 3.5317887749989643e-05, "loss": 0.1551, "step": 2934 }, { "epoch": 79.32432432432432, "grad_norm": 1.375, "learning_rate": 3.530771050729575e-05, "loss": 0.1482, "step": 2935 }, { "epoch": 79.35135135135135, "grad_norm": 0.8046875, "learning_rate": 3.529753120609982e-05, "loss": 0.0938, "step": 2936 }, { "epoch": 79.37837837837837, "grad_norm": 1.4296875, "learning_rate": 3.5287349848434716e-05, "loss": 0.2266, "step": 2937 }, { "epoch": 79.4054054054054, "grad_norm": 1.28125, "learning_rate": 3.527716643633369e-05, "loss": 0.1097, "step": 2938 }, { "epoch": 79.43243243243244, "grad_norm": 1.53125, "learning_rate": 3.526698097183044e-05, "loss": 0.1928, "step": 2939 }, { "epoch": 79.45945945945945, "grad_norm": 0.98046875, "learning_rate": 3.5256793456959054e-05, "loss": 0.1066, "step": 2940 }, { "epoch": 79.48648648648648, "grad_norm": 1.828125, "learning_rate": 3.5246603893754015e-05, "loss": 0.2001, "step": 2941 }, { "epoch": 79.51351351351352, "grad_norm": 1.359375, "learning_rate": 3.5236412284250244e-05, "loss": 0.1187, "step": 2942 }, { "epoch": 79.54054054054055, "grad_norm": 0.8828125, "learning_rate": 3.522621863048305e-05, "loss": 0.0833, "step": 2943 }, { "epoch": 79.56756756756756, "grad_norm": 1.046875, "learning_rate": 3.521602293448817e-05, "loss": 0.1097, "step": 2944 }, { "epoch": 79.5945945945946, "grad_norm": 1.078125, "learning_rate": 3.520582519830172e-05, "loss": 0.1196, "step": 2945 }, { "epoch": 79.62162162162163, "grad_norm": 1.515625, "learning_rate": 3.5195625423960245e-05, "loss": 0.209, "step": 2946 }, { "epoch": 79.64864864864865, "grad_norm": 0.921875, "learning_rate": 3.51854236135007e-05, "loss": 0.0735, "step": 2947 }, { "epoch": 79.67567567567568, "grad_norm": 1.8515625, "learning_rate": 3.517521976896042e-05, "loss": 0.2231, "step": 2948 }, { "epoch": 79.70270270270271, "grad_norm": 1.5625, "learning_rate": 3.5165013892377184e-05, "loss": 0.1736, "step": 2949 }, { "epoch": 79.72972972972973, "grad_norm": 0.97265625, "learning_rate": 3.515480598578914e-05, "loss": 0.0973, "step": 2950 }, { "epoch": 79.75675675675676, "grad_norm": 1.875, "learning_rate": 3.514459605123487e-05, "loss": 0.175, "step": 2951 }, { "epoch": 79.78378378378379, "grad_norm": 1.4921875, "learning_rate": 3.5134384090753356e-05, "loss": 0.1584, "step": 2952 }, { "epoch": 79.8108108108108, "grad_norm": 1.5625, "learning_rate": 3.5124170106383966e-05, "loss": 0.2045, "step": 2953 }, { "epoch": 79.83783783783784, "grad_norm": 1.171875, "learning_rate": 3.511395410016649e-05, "loss": 0.1411, "step": 2954 }, { "epoch": 79.86486486486487, "grad_norm": 0.67578125, "learning_rate": 3.5103736074141105e-05, "loss": 0.0886, "step": 2955 }, { "epoch": 79.89189189189189, "grad_norm": 1.1640625, "learning_rate": 3.5093516030348414e-05, "loss": 0.1153, "step": 2956 }, { "epoch": 79.91891891891892, "grad_norm": 1.0078125, "learning_rate": 3.508329397082941e-05, "loss": 0.105, "step": 2957 }, { "epoch": 79.94594594594595, "grad_norm": 0.921875, "learning_rate": 3.50730698976255e-05, "loss": 0.0901, "step": 2958 }, { "epoch": 79.97297297297297, "grad_norm": 0.9609375, "learning_rate": 3.506284381277846e-05, "loss": 0.1182, "step": 2959 }, { "epoch": 80.0, "grad_norm": 1.21875, "learning_rate": 3.5052615718330504e-05, "loss": 0.1625, "step": 2960 }, { "epoch": 80.02702702702703, "grad_norm": 0.796875, "learning_rate": 3.504238561632424e-05, "loss": 0.0812, "step": 2961 }, { "epoch": 80.05405405405405, "grad_norm": 1.0859375, "learning_rate": 3.5032153508802654e-05, "loss": 0.1361, "step": 2962 }, { "epoch": 80.08108108108108, "grad_norm": 1.0, "learning_rate": 3.5021919397809176e-05, "loss": 0.0878, "step": 2963 }, { "epoch": 80.10810810810811, "grad_norm": 1.2421875, "learning_rate": 3.501168328538759e-05, "loss": 0.1212, "step": 2964 }, { "epoch": 80.13513513513513, "grad_norm": 1.078125, "learning_rate": 3.50014451735821e-05, "loss": 0.1254, "step": 2965 }, { "epoch": 80.16216216216216, "grad_norm": 1.203125, "learning_rate": 3.499120506443732e-05, "loss": 0.1079, "step": 2966 }, { "epoch": 80.1891891891892, "grad_norm": 1.1796875, "learning_rate": 3.4980962959998234e-05, "loss": 0.1367, "step": 2967 }, { "epoch": 80.21621621621621, "grad_norm": 1.5625, "learning_rate": 3.497071886231026e-05, "loss": 0.1557, "step": 2968 }, { "epoch": 80.24324324324324, "grad_norm": 1.421875, "learning_rate": 3.496047277341919e-05, "loss": 0.1726, "step": 2969 }, { "epoch": 80.27027027027027, "grad_norm": 1.234375, "learning_rate": 3.495022469537122e-05, "loss": 0.1445, "step": 2970 }, { "epoch": 80.29729729729729, "grad_norm": 1.0234375, "learning_rate": 3.493997463021294e-05, "loss": 0.1471, "step": 2971 }, { "epoch": 80.32432432432432, "grad_norm": 1.0546875, "learning_rate": 3.492972257999134e-05, "loss": 0.113, "step": 2972 }, { "epoch": 80.35135135135135, "grad_norm": 1.046875, "learning_rate": 3.491946854675381e-05, "loss": 0.1154, "step": 2973 }, { "epoch": 80.37837837837837, "grad_norm": 0.92578125, "learning_rate": 3.490921253254813e-05, "loss": 0.0872, "step": 2974 }, { "epoch": 80.4054054054054, "grad_norm": 1.3125, "learning_rate": 3.4898954539422483e-05, "loss": 0.1494, "step": 2975 }, { "epoch": 80.43243243243244, "grad_norm": 0.71875, "learning_rate": 3.4888694569425434e-05, "loss": 0.0749, "step": 2976 }, { "epoch": 80.45945945945945, "grad_norm": 1.3203125, "learning_rate": 3.487843262460595e-05, "loss": 0.1246, "step": 2977 }, { "epoch": 80.48648648648648, "grad_norm": 1.1640625, "learning_rate": 3.486816870701341e-05, "loss": 0.1239, "step": 2978 }, { "epoch": 80.51351351351352, "grad_norm": 1.0234375, "learning_rate": 3.485790281869755e-05, "loss": 0.1234, "step": 2979 }, { "epoch": 80.54054054054055, "grad_norm": 0.9765625, "learning_rate": 3.4847634961708545e-05, "loss": 0.1123, "step": 2980 }, { "epoch": 80.56756756756756, "grad_norm": 1.0078125, "learning_rate": 3.483736513809691e-05, "loss": 0.0981, "step": 2981 }, { "epoch": 80.5945945945946, "grad_norm": 1.1875, "learning_rate": 3.4827093349913584e-05, "loss": 0.1434, "step": 2982 }, { "epoch": 80.62162162162163, "grad_norm": 0.73828125, "learning_rate": 3.481681959920992e-05, "loss": 0.0771, "step": 2983 }, { "epoch": 80.64864864864865, "grad_norm": 1.0, "learning_rate": 3.480654388803761e-05, "loss": 0.1037, "step": 2984 }, { "epoch": 80.67567567567568, "grad_norm": 1.6484375, "learning_rate": 3.4796266218448785e-05, "loss": 0.2466, "step": 2985 }, { "epoch": 80.70270270270271, "grad_norm": 1.375, "learning_rate": 3.478598659249593e-05, "loss": 0.1189, "step": 2986 }, { "epoch": 80.72972972972973, "grad_norm": 1.5546875, "learning_rate": 3.477570501223196e-05, "loss": 0.1842, "step": 2987 }, { "epoch": 80.75675675675676, "grad_norm": 1.375, "learning_rate": 3.4765421479710145e-05, "loss": 0.1889, "step": 2988 }, { "epoch": 80.78378378378379, "grad_norm": 1.1953125, "learning_rate": 3.475513599698417e-05, "loss": 0.1111, "step": 2989 }, { "epoch": 80.8108108108108, "grad_norm": 1.3671875, "learning_rate": 3.474484856610808e-05, "loss": 0.1336, "step": 2990 }, { "epoch": 80.83783783783784, "grad_norm": 1.6640625, "learning_rate": 3.473455918913634e-05, "loss": 0.2171, "step": 2991 }, { "epoch": 80.86486486486487, "grad_norm": 0.74609375, "learning_rate": 3.472426786812378e-05, "loss": 0.0803, "step": 2992 }, { "epoch": 80.89189189189189, "grad_norm": 1.0625, "learning_rate": 3.471397460512563e-05, "loss": 0.139, "step": 2993 }, { "epoch": 80.91891891891892, "grad_norm": 1.1015625, "learning_rate": 3.470367940219753e-05, "loss": 0.1317, "step": 2994 }, { "epoch": 80.94594594594595, "grad_norm": 1.65625, "learning_rate": 3.469338226139546e-05, "loss": 0.269, "step": 2995 }, { "epoch": 80.97297297297297, "grad_norm": 1.15625, "learning_rate": 3.468308318477582e-05, "loss": 0.1209, "step": 2996 }, { "epoch": 81.0, "grad_norm": 1.2890625, "learning_rate": 3.467278217439537e-05, "loss": 0.1413, "step": 2997 }, { "epoch": 81.02702702702703, "grad_norm": 1.328125, "learning_rate": 3.4662479232311306e-05, "loss": 0.1473, "step": 2998 }, { "epoch": 81.05405405405405, "grad_norm": 1.90625, "learning_rate": 3.465217436058116e-05, "loss": 0.2887, "step": 2999 }, { "epoch": 81.08108108108108, "grad_norm": 0.90625, "learning_rate": 3.464186756126286e-05, "loss": 0.1051, "step": 3000 }, { "epoch": 81.10810810810811, "grad_norm": 1.203125, "learning_rate": 3.463155883641475e-05, "loss": 0.1188, "step": 3001 }, { "epoch": 81.13513513513513, "grad_norm": 0.984375, "learning_rate": 3.462124818809551e-05, "loss": 0.1411, "step": 3002 }, { "epoch": 81.16216216216216, "grad_norm": 0.875, "learning_rate": 3.461093561836424e-05, "loss": 0.0859, "step": 3003 }, { "epoch": 81.1891891891892, "grad_norm": 1.3125, "learning_rate": 3.4600621129280416e-05, "loss": 0.1479, "step": 3004 }, { "epoch": 81.21621621621621, "grad_norm": 1.09375, "learning_rate": 3.4590304722903885e-05, "loss": 0.1333, "step": 3005 }, { "epoch": 81.24324324324324, "grad_norm": 0.875, "learning_rate": 3.457998640129488e-05, "loss": 0.0742, "step": 3006 }, { "epoch": 81.27027027027027, "grad_norm": 1.78125, "learning_rate": 3.456966616651404e-05, "loss": 0.2359, "step": 3007 }, { "epoch": 81.29729729729729, "grad_norm": 1.28125, "learning_rate": 3.4559344020622366e-05, "loss": 0.1606, "step": 3008 }, { "epoch": 81.32432432432432, "grad_norm": 0.85546875, "learning_rate": 3.4549019965681226e-05, "loss": 0.0888, "step": 3009 }, { "epoch": 81.35135135135135, "grad_norm": 1.046875, "learning_rate": 3.4538694003752395e-05, "loss": 0.1046, "step": 3010 }, { "epoch": 81.37837837837837, "grad_norm": 1.078125, "learning_rate": 3.452836613689803e-05, "loss": 0.1382, "step": 3011 }, { "epoch": 81.4054054054054, "grad_norm": 1.1015625, "learning_rate": 3.451803636718064e-05, "loss": 0.1672, "step": 3012 }, { "epoch": 81.43243243243244, "grad_norm": 1.1015625, "learning_rate": 3.4507704696663145e-05, "loss": 0.1339, "step": 3013 }, { "epoch": 81.45945945945945, "grad_norm": 1.1640625, "learning_rate": 3.4497371127408826e-05, "loss": 0.1222, "step": 3014 }, { "epoch": 81.48648648648648, "grad_norm": 1.359375, "learning_rate": 3.448703566148136e-05, "loss": 0.164, "step": 3015 }, { "epoch": 81.51351351351352, "grad_norm": 1.1875, "learning_rate": 3.4476698300944785e-05, "loss": 0.1368, "step": 3016 }, { "epoch": 81.54054054054055, "grad_norm": 1.1484375, "learning_rate": 3.4466359047863515e-05, "loss": 0.1384, "step": 3017 }, { "epoch": 81.56756756756756, "grad_norm": 1.078125, "learning_rate": 3.445601790430236e-05, "loss": 0.0865, "step": 3018 }, { "epoch": 81.5945945945946, "grad_norm": 1.6875, "learning_rate": 3.4445674872326507e-05, "loss": 0.2263, "step": 3019 }, { "epoch": 81.62162162162163, "grad_norm": 0.90234375, "learning_rate": 3.44353299540015e-05, "loss": 0.0941, "step": 3020 }, { "epoch": 81.64864864864865, "grad_norm": 1.4765625, "learning_rate": 3.442498315139327e-05, "loss": 0.1686, "step": 3021 }, { "epoch": 81.67567567567568, "grad_norm": 1.4453125, "learning_rate": 3.4414634466568137e-05, "loss": 0.1592, "step": 3022 }, { "epoch": 81.70270270270271, "grad_norm": 1.2421875, "learning_rate": 3.440428390159278e-05, "loss": 0.0819, "step": 3023 }, { "epoch": 81.72972972972973, "grad_norm": 1.953125, "learning_rate": 3.439393145853425e-05, "loss": 0.1793, "step": 3024 }, { "epoch": 81.75675675675676, "grad_norm": 1.515625, "learning_rate": 3.438357713946e-05, "loss": 0.1847, "step": 3025 }, { "epoch": 81.78378378378379, "grad_norm": 1.3359375, "learning_rate": 3.4373220946437826e-05, "loss": 0.1189, "step": 3026 }, { "epoch": 81.8108108108108, "grad_norm": 0.69921875, "learning_rate": 3.4362862881535925e-05, "loss": 0.0643, "step": 3027 }, { "epoch": 81.83783783783784, "grad_norm": 1.3515625, "learning_rate": 3.435250294682284e-05, "loss": 0.163, "step": 3028 }, { "epoch": 81.86486486486487, "grad_norm": 0.6953125, "learning_rate": 3.434214114436752e-05, "loss": 0.0768, "step": 3029 }, { "epoch": 81.89189189189189, "grad_norm": 1.09375, "learning_rate": 3.433177747623925e-05, "loss": 0.1022, "step": 3030 }, { "epoch": 81.91891891891892, "grad_norm": 1.546875, "learning_rate": 3.432141194450772e-05, "loss": 0.1568, "step": 3031 }, { "epoch": 81.94594594594595, "grad_norm": 1.3203125, "learning_rate": 3.4311044551242975e-05, "loss": 0.1415, "step": 3032 }, { "epoch": 81.97297297297297, "grad_norm": 1.390625, "learning_rate": 3.430067529851544e-05, "loss": 0.1083, "step": 3033 }, { "epoch": 82.0, "grad_norm": 1.109375, "learning_rate": 3.42903041883959e-05, "loss": 0.1153, "step": 3034 }, { "epoch": 82.02702702702703, "grad_norm": 1.1953125, "learning_rate": 3.427993122295552e-05, "loss": 0.1591, "step": 3035 }, { "epoch": 82.05405405405405, "grad_norm": 0.75390625, "learning_rate": 3.426955640426584e-05, "loss": 0.071, "step": 3036 }, { "epoch": 82.08108108108108, "grad_norm": 1.234375, "learning_rate": 3.425917973439876e-05, "loss": 0.1592, "step": 3037 }, { "epoch": 82.10810810810811, "grad_norm": 1.8046875, "learning_rate": 3.424880121542655e-05, "loss": 0.2622, "step": 3038 }, { "epoch": 82.13513513513513, "grad_norm": 1.0703125, "learning_rate": 3.423842084942187e-05, "loss": 0.1162, "step": 3039 }, { "epoch": 82.16216216216216, "grad_norm": 1.6796875, "learning_rate": 3.422803863845771e-05, "loss": 0.1603, "step": 3040 }, { "epoch": 82.1891891891892, "grad_norm": 0.953125, "learning_rate": 3.421765458460745e-05, "loss": 0.0947, "step": 3041 }, { "epoch": 82.21621621621621, "grad_norm": 1.453125, "learning_rate": 3.420726868994485e-05, "loss": 0.2556, "step": 3042 }, { "epoch": 82.24324324324324, "grad_norm": 1.46875, "learning_rate": 3.419688095654403e-05, "loss": 0.0985, "step": 3043 }, { "epoch": 82.27027027027027, "grad_norm": 1.3828125, "learning_rate": 3.418649138647946e-05, "loss": 0.148, "step": 3044 }, { "epoch": 82.29729729729729, "grad_norm": 1.1328125, "learning_rate": 3.4176099981826006e-05, "loss": 0.1153, "step": 3045 }, { "epoch": 82.32432432432432, "grad_norm": 1.6875, "learning_rate": 3.4165706744658865e-05, "loss": 0.1822, "step": 3046 }, { "epoch": 82.35135135135135, "grad_norm": 1.4921875, "learning_rate": 3.4155311677053626e-05, "loss": 0.2187, "step": 3047 }, { "epoch": 82.37837837837837, "grad_norm": 1.1640625, "learning_rate": 3.414491478108623e-05, "loss": 0.1372, "step": 3048 }, { "epoch": 82.4054054054054, "grad_norm": 1.09375, "learning_rate": 3.413451605883301e-05, "loss": 0.1104, "step": 3049 }, { "epoch": 82.43243243243244, "grad_norm": 1.8125, "learning_rate": 3.412411551237064e-05, "loss": 0.2174, "step": 3050 }, { "epoch": 82.45945945945945, "grad_norm": 0.8046875, "learning_rate": 3.411371314377614e-05, "loss": 0.0855, "step": 3051 }, { "epoch": 82.48648648648648, "grad_norm": 1.21875, "learning_rate": 3.4103308955126926e-05, "loss": 0.1171, "step": 3052 }, { "epoch": 82.51351351351352, "grad_norm": 1.1484375, "learning_rate": 3.409290294850078e-05, "loss": 0.0946, "step": 3053 }, { "epoch": 82.54054054054055, "grad_norm": 1.46875, "learning_rate": 3.408249512597582e-05, "loss": 0.1372, "step": 3054 }, { "epoch": 82.56756756756756, "grad_norm": 0.8359375, "learning_rate": 3.407208548963054e-05, "loss": 0.0806, "step": 3055 }, { "epoch": 82.5945945945946, "grad_norm": 1.0625, "learning_rate": 3.406167404154381e-05, "loss": 0.1176, "step": 3056 }, { "epoch": 82.62162162162163, "grad_norm": 1.0390625, "learning_rate": 3.4051260783794844e-05, "loss": 0.0844, "step": 3057 }, { "epoch": 82.64864864864865, "grad_norm": 0.94140625, "learning_rate": 3.4040845718463216e-05, "loss": 0.1351, "step": 3058 }, { "epoch": 82.67567567567568, "grad_norm": 1.296875, "learning_rate": 3.403042884762887e-05, "loss": 0.1476, "step": 3059 }, { "epoch": 82.70270270270271, "grad_norm": 1.140625, "learning_rate": 3.402001017337211e-05, "loss": 0.1187, "step": 3060 }, { "epoch": 82.72972972972973, "grad_norm": 0.87890625, "learning_rate": 3.40095896977736e-05, "loss": 0.0877, "step": 3061 }, { "epoch": 82.75675675675676, "grad_norm": 1.4765625, "learning_rate": 3.399916742291435e-05, "loss": 0.1269, "step": 3062 }, { "epoch": 82.78378378378379, "grad_norm": 0.74609375, "learning_rate": 3.398874335087576e-05, "loss": 0.0948, "step": 3063 }, { "epoch": 82.8108108108108, "grad_norm": 0.87109375, "learning_rate": 3.397831748373956e-05, "loss": 0.1045, "step": 3064 }, { "epoch": 82.83783783783784, "grad_norm": 0.91796875, "learning_rate": 3.396788982358785e-05, "loss": 0.0811, "step": 3065 }, { "epoch": 82.86486486486487, "grad_norm": 1.28125, "learning_rate": 3.3957460372503086e-05, "loss": 0.1192, "step": 3066 }, { "epoch": 82.89189189189189, "grad_norm": 0.9921875, "learning_rate": 3.3947029132568074e-05, "loss": 0.0822, "step": 3067 }, { "epoch": 82.91891891891892, "grad_norm": 1.140625, "learning_rate": 3.3936596105866005e-05, "loss": 0.1178, "step": 3068 }, { "epoch": 82.94594594594595, "grad_norm": 1.625, "learning_rate": 3.392616129448038e-05, "loss": 0.2432, "step": 3069 }, { "epoch": 82.97297297297297, "grad_norm": 1.21875, "learning_rate": 3.391572470049511e-05, "loss": 0.1181, "step": 3070 }, { "epoch": 83.0, "grad_norm": 1.09375, "learning_rate": 3.390528632599442e-05, "loss": 0.1304, "step": 3071 }, { "epoch": 83.02702702702703, "grad_norm": 2.28125, "learning_rate": 3.389484617306292e-05, "loss": 0.1676, "step": 3072 }, { "epoch": 83.05405405405405, "grad_norm": 1.3203125, "learning_rate": 3.3884404243785536e-05, "loss": 0.1663, "step": 3073 }, { "epoch": 83.08108108108108, "grad_norm": 0.73046875, "learning_rate": 3.387396054024761e-05, "loss": 0.0746, "step": 3074 }, { "epoch": 83.10810810810811, "grad_norm": 2.09375, "learning_rate": 3.386351506453477e-05, "loss": 0.2432, "step": 3075 }, { "epoch": 83.13513513513513, "grad_norm": 1.328125, "learning_rate": 3.385306781873304e-05, "loss": 0.1659, "step": 3076 }, { "epoch": 83.16216216216216, "grad_norm": 1.421875, "learning_rate": 3.3842618804928806e-05, "loss": 0.1062, "step": 3077 }, { "epoch": 83.1891891891892, "grad_norm": 1.0859375, "learning_rate": 3.383216802520877e-05, "loss": 0.1359, "step": 3078 }, { "epoch": 83.21621621621621, "grad_norm": 1.015625, "learning_rate": 3.382171548166e-05, "loss": 0.1497, "step": 3079 }, { "epoch": 83.24324324324324, "grad_norm": 1.59375, "learning_rate": 3.3811261176369945e-05, "loss": 0.2542, "step": 3080 }, { "epoch": 83.27027027027027, "grad_norm": 1.421875, "learning_rate": 3.380080511142637e-05, "loss": 0.1935, "step": 3081 }, { "epoch": 83.29729729729729, "grad_norm": 1.3828125, "learning_rate": 3.37903472889174e-05, "loss": 0.1645, "step": 3082 }, { "epoch": 83.32432432432432, "grad_norm": 1.2421875, "learning_rate": 3.377988771093152e-05, "loss": 0.1169, "step": 3083 }, { "epoch": 83.35135135135135, "grad_norm": 0.765625, "learning_rate": 3.376942637955756e-05, "loss": 0.1073, "step": 3084 }, { "epoch": 83.37837837837837, "grad_norm": 1.8125, "learning_rate": 3.375896329688471e-05, "loss": 0.1817, "step": 3085 }, { "epoch": 83.4054054054054, "grad_norm": 2.03125, "learning_rate": 3.374849846500248e-05, "loss": 0.1671, "step": 3086 }, { "epoch": 83.43243243243244, "grad_norm": 0.890625, "learning_rate": 3.3738031886000765e-05, "loss": 0.0936, "step": 3087 }, { "epoch": 83.45945945945945, "grad_norm": 0.83203125, "learning_rate": 3.3727563561969787e-05, "loss": 0.0891, "step": 3088 }, { "epoch": 83.48648648648648, "grad_norm": 0.9375, "learning_rate": 3.3717093495000136e-05, "loss": 0.1104, "step": 3089 }, { "epoch": 83.51351351351352, "grad_norm": 1.203125, "learning_rate": 3.3706621687182717e-05, "loss": 0.1299, "step": 3090 }, { "epoch": 83.54054054054055, "grad_norm": 1.21875, "learning_rate": 3.369614814060883e-05, "loss": 0.1375, "step": 3091 }, { "epoch": 83.56756756756756, "grad_norm": 0.9609375, "learning_rate": 3.368567285737007e-05, "loss": 0.1375, "step": 3092 }, { "epoch": 83.5945945945946, "grad_norm": 1.2421875, "learning_rate": 3.36751958395584e-05, "loss": 0.1377, "step": 3093 }, { "epoch": 83.62162162162163, "grad_norm": 1.4921875, "learning_rate": 3.366471708926617e-05, "loss": 0.1752, "step": 3094 }, { "epoch": 83.64864864864865, "grad_norm": 1.53125, "learning_rate": 3.365423660858601e-05, "loss": 0.138, "step": 3095 }, { "epoch": 83.67567567567568, "grad_norm": 1.2734375, "learning_rate": 3.364375439961093e-05, "loss": 0.1443, "step": 3096 }, { "epoch": 83.70270270270271, "grad_norm": 1.0, "learning_rate": 3.3633270464434284e-05, "loss": 0.1373, "step": 3097 }, { "epoch": 83.72972972972973, "grad_norm": 0.921875, "learning_rate": 3.3622784805149766e-05, "loss": 0.0961, "step": 3098 }, { "epoch": 83.75675675675676, "grad_norm": 0.640625, "learning_rate": 3.361229742385142e-05, "loss": 0.0791, "step": 3099 }, { "epoch": 83.78378378378379, "grad_norm": 0.61328125, "learning_rate": 3.360180832263361e-05, "loss": 0.0674, "step": 3100 }, { "epoch": 83.8108108108108, "grad_norm": 0.83984375, "learning_rate": 3.359131750359109e-05, "loss": 0.0896, "step": 3101 }, { "epoch": 83.83783783783784, "grad_norm": 1.2578125, "learning_rate": 3.358082496881891e-05, "loss": 0.1737, "step": 3102 }, { "epoch": 83.86486486486487, "grad_norm": 1.2421875, "learning_rate": 3.3570330720412494e-05, "loss": 0.1141, "step": 3103 }, { "epoch": 83.89189189189189, "grad_norm": 1.25, "learning_rate": 3.355983476046759e-05, "loss": 0.1503, "step": 3104 }, { "epoch": 83.91891891891892, "grad_norm": 1.2578125, "learning_rate": 3.3549337091080296e-05, "loss": 0.1939, "step": 3105 }, { "epoch": 83.94594594594595, "grad_norm": 1.046875, "learning_rate": 3.353883771434705e-05, "loss": 0.1215, "step": 3106 }, { "epoch": 83.97297297297297, "grad_norm": 0.87109375, "learning_rate": 3.3528336632364624e-05, "loss": 0.1058, "step": 3107 }, { "epoch": 84.0, "grad_norm": 1.046875, "learning_rate": 3.351783384723015e-05, "loss": 0.1113, "step": 3108 }, { "epoch": 84.02702702702703, "grad_norm": 1.0078125, "learning_rate": 3.350732936104108e-05, "loss": 0.0897, "step": 3109 }, { "epoch": 84.05405405405405, "grad_norm": 1.03125, "learning_rate": 3.349682317589521e-05, "loss": 0.1178, "step": 3110 }, { "epoch": 84.08108108108108, "grad_norm": 1.03125, "learning_rate": 3.348631529389069e-05, "loss": 0.1321, "step": 3111 }, { "epoch": 84.10810810810811, "grad_norm": 1.6484375, "learning_rate": 3.347580571712599e-05, "loss": 0.2284, "step": 3112 }, { "epoch": 84.13513513513513, "grad_norm": 1.234375, "learning_rate": 3.346529444769992e-05, "loss": 0.1184, "step": 3113 }, { "epoch": 84.16216216216216, "grad_norm": 0.96484375, "learning_rate": 3.345478148771164e-05, "loss": 0.0875, "step": 3114 }, { "epoch": 84.1891891891892, "grad_norm": 0.859375, "learning_rate": 3.344426683926064e-05, "loss": 0.0805, "step": 3115 }, { "epoch": 84.21621621621621, "grad_norm": 0.6640625, "learning_rate": 3.343375050444675e-05, "loss": 0.0684, "step": 3116 }, { "epoch": 84.24324324324324, "grad_norm": 1.3359375, "learning_rate": 3.3423232485370136e-05, "loss": 0.1656, "step": 3117 }, { "epoch": 84.27027027027027, "grad_norm": 1.1171875, "learning_rate": 3.3412712784131304e-05, "loss": 0.1461, "step": 3118 }, { "epoch": 84.29729729729729, "grad_norm": 0.73046875, "learning_rate": 3.340219140283108e-05, "loss": 0.0832, "step": 3119 }, { "epoch": 84.32432432432432, "grad_norm": 1.296875, "learning_rate": 3.339166834357065e-05, "loss": 0.1725, "step": 3120 }, { "epoch": 84.35135135135135, "grad_norm": 1.734375, "learning_rate": 3.3381143608451504e-05, "loss": 0.1776, "step": 3121 }, { "epoch": 84.37837837837837, "grad_norm": 0.81640625, "learning_rate": 3.3370617199575506e-05, "loss": 0.0954, "step": 3122 }, { "epoch": 84.4054054054054, "grad_norm": 1.2578125, "learning_rate": 3.3360089119044825e-05, "loss": 0.1773, "step": 3123 }, { "epoch": 84.43243243243244, "grad_norm": 0.97265625, "learning_rate": 3.3349559368961966e-05, "loss": 0.1015, "step": 3124 }, { "epoch": 84.45945945945945, "grad_norm": 1.0625, "learning_rate": 3.333902795142978e-05, "loss": 0.1198, "step": 3125 }, { "epoch": 84.48648648648648, "grad_norm": 1.0, "learning_rate": 3.332849486855144e-05, "loss": 0.1032, "step": 3126 }, { "epoch": 84.51351351351352, "grad_norm": 0.75390625, "learning_rate": 3.3317960122430467e-05, "loss": 0.0715, "step": 3127 }, { "epoch": 84.54054054054055, "grad_norm": 1.1328125, "learning_rate": 3.330742371517069e-05, "loss": 0.1174, "step": 3128 }, { "epoch": 84.56756756756756, "grad_norm": 0.88671875, "learning_rate": 3.3296885648876286e-05, "loss": 0.0925, "step": 3129 }, { "epoch": 84.5945945945946, "grad_norm": 1.234375, "learning_rate": 3.3286345925651764e-05, "loss": 0.133, "step": 3130 }, { "epoch": 84.62162162162163, "grad_norm": 1.453125, "learning_rate": 3.327580454760195e-05, "loss": 0.1179, "step": 3131 }, { "epoch": 84.64864864864865, "grad_norm": 1.078125, "learning_rate": 3.3265261516832035e-05, "loss": 0.0847, "step": 3132 }, { "epoch": 84.67567567567568, "grad_norm": 0.90625, "learning_rate": 3.325471683544749e-05, "loss": 0.1021, "step": 3133 }, { "epoch": 84.70270270270271, "grad_norm": 1.4140625, "learning_rate": 3.3244170505554156e-05, "loss": 0.1698, "step": 3134 }, { "epoch": 84.72972972972973, "grad_norm": 1.140625, "learning_rate": 3.3233622529258176e-05, "loss": 0.1037, "step": 3135 }, { "epoch": 84.75675675675676, "grad_norm": 1.640625, "learning_rate": 3.322307290866605e-05, "loss": 0.1368, "step": 3136 }, { "epoch": 84.78378378378379, "grad_norm": 1.3203125, "learning_rate": 3.321252164588458e-05, "loss": 0.1158, "step": 3137 }, { "epoch": 84.8108108108108, "grad_norm": 0.859375, "learning_rate": 3.3201968743020904e-05, "loss": 0.0868, "step": 3138 }, { "epoch": 84.83783783783784, "grad_norm": 1.515625, "learning_rate": 3.31914142021825e-05, "loss": 0.1928, "step": 3139 }, { "epoch": 84.86486486486487, "grad_norm": 1.2265625, "learning_rate": 3.318085802547717e-05, "loss": 0.1251, "step": 3140 }, { "epoch": 84.89189189189189, "grad_norm": 1.125, "learning_rate": 3.317030021501301e-05, "loss": 0.1208, "step": 3141 }, { "epoch": 84.91891891891892, "grad_norm": 1.265625, "learning_rate": 3.315974077289849e-05, "loss": 0.1288, "step": 3142 }, { "epoch": 84.94594594594595, "grad_norm": 0.95703125, "learning_rate": 3.3149179701242375e-05, "loss": 0.118, "step": 3143 }, { "epoch": 84.97297297297297, "grad_norm": 1.078125, "learning_rate": 3.313861700215377e-05, "loss": 0.1448, "step": 3144 }, { "epoch": 85.0, "grad_norm": 1.109375, "learning_rate": 3.312805267774209e-05, "loss": 0.1286, "step": 3145 }, { "epoch": 85.02702702702703, "grad_norm": 0.66796875, "learning_rate": 3.311748673011709e-05, "loss": 0.0732, "step": 3146 }, { "epoch": 85.05405405405405, "grad_norm": 0.984375, "learning_rate": 3.310691916138885e-05, "loss": 0.1409, "step": 3147 }, { "epoch": 85.08108108108108, "grad_norm": 0.83984375, "learning_rate": 3.309634997366776e-05, "loss": 0.0833, "step": 3148 }, { "epoch": 85.10810810810811, "grad_norm": 0.95703125, "learning_rate": 3.3085779169064534e-05, "loss": 0.119, "step": 3149 }, { "epoch": 85.13513513513513, "grad_norm": 1.0078125, "learning_rate": 3.307520674969022e-05, "loss": 0.0958, "step": 3150 }, { "epoch": 85.16216216216216, "grad_norm": 0.87109375, "learning_rate": 3.30646327176562e-05, "loss": 0.0984, "step": 3151 }, { "epoch": 85.1891891891892, "grad_norm": 1.21875, "learning_rate": 3.305405707507414e-05, "loss": 0.1397, "step": 3152 }, { "epoch": 85.21621621621621, "grad_norm": 1.0234375, "learning_rate": 3.304347982405606e-05, "loss": 0.1101, "step": 3153 }, { "epoch": 85.24324324324324, "grad_norm": 1.2109375, "learning_rate": 3.3032900966714285e-05, "loss": 0.1095, "step": 3154 }, { "epoch": 85.27027027027027, "grad_norm": 1.3359375, "learning_rate": 3.3022320505161477e-05, "loss": 0.127, "step": 3155 }, { "epoch": 85.29729729729729, "grad_norm": 1.109375, "learning_rate": 3.301173844151059e-05, "loss": 0.1212, "step": 3156 }, { "epoch": 85.32432432432432, "grad_norm": 1.2265625, "learning_rate": 3.300115477787493e-05, "loss": 0.1446, "step": 3157 }, { "epoch": 85.35135135135135, "grad_norm": 1.2421875, "learning_rate": 3.29905695163681e-05, "loss": 0.1186, "step": 3158 }, { "epoch": 85.37837837837837, "grad_norm": 0.86328125, "learning_rate": 3.2979982659104034e-05, "loss": 0.0978, "step": 3159 }, { "epoch": 85.4054054054054, "grad_norm": 1.09375, "learning_rate": 3.296939420819699e-05, "loss": 0.1232, "step": 3160 }, { "epoch": 85.43243243243244, "grad_norm": 1.7265625, "learning_rate": 3.295880416576153e-05, "loss": 0.2298, "step": 3161 }, { "epoch": 85.45945945945945, "grad_norm": 0.9921875, "learning_rate": 3.2948212533912524e-05, "loss": 0.1095, "step": 3162 }, { "epoch": 85.48648648648648, "grad_norm": 0.796875, "learning_rate": 3.2937619314765195e-05, "loss": 0.0829, "step": 3163 }, { "epoch": 85.51351351351352, "grad_norm": 0.9140625, "learning_rate": 3.2927024510435055e-05, "loss": 0.0919, "step": 3164 }, { "epoch": 85.54054054054055, "grad_norm": 1.296875, "learning_rate": 3.291642812303794e-05, "loss": 0.1119, "step": 3165 }, { "epoch": 85.56756756756756, "grad_norm": 1.8828125, "learning_rate": 3.290583015468999e-05, "loss": 0.1981, "step": 3166 }, { "epoch": 85.5945945945946, "grad_norm": 1.4140625, "learning_rate": 3.2895230607507695e-05, "loss": 0.1393, "step": 3167 }, { "epoch": 85.62162162162163, "grad_norm": 0.58203125, "learning_rate": 3.288462948360782e-05, "loss": 0.0679, "step": 3168 }, { "epoch": 85.64864864864865, "grad_norm": 0.84375, "learning_rate": 3.287402678510748e-05, "loss": 0.09, "step": 3169 }, { "epoch": 85.67567567567568, "grad_norm": 1.109375, "learning_rate": 3.286342251412408e-05, "loss": 0.1351, "step": 3170 }, { "epoch": 85.70270270270271, "grad_norm": 1.3359375, "learning_rate": 3.285281667277534e-05, "loss": 0.0974, "step": 3171 }, { "epoch": 85.72972972972973, "grad_norm": 1.390625, "learning_rate": 3.28422092631793e-05, "loss": 0.1886, "step": 3172 }, { "epoch": 85.75675675675676, "grad_norm": 1.15625, "learning_rate": 3.283160028745433e-05, "loss": 0.1077, "step": 3173 }, { "epoch": 85.78378378378379, "grad_norm": 1.0234375, "learning_rate": 3.282098974771908e-05, "loss": 0.1026, "step": 3174 }, { "epoch": 85.8108108108108, "grad_norm": 1.1484375, "learning_rate": 3.281037764609254e-05, "loss": 0.1114, "step": 3175 }, { "epoch": 85.83783783783784, "grad_norm": 0.8984375, "learning_rate": 3.2799763984693985e-05, "loss": 0.0948, "step": 3176 }, { "epoch": 85.86486486486487, "grad_norm": 1.8671875, "learning_rate": 3.278914876564303e-05, "loss": 0.2625, "step": 3177 }, { "epoch": 85.89189189189189, "grad_norm": 0.87109375, "learning_rate": 3.2778531991059586e-05, "loss": 0.0968, "step": 3178 }, { "epoch": 85.91891891891892, "grad_norm": 0.83203125, "learning_rate": 3.276791366306387e-05, "loss": 0.1105, "step": 3179 }, { "epoch": 85.94594594594595, "grad_norm": 1.0078125, "learning_rate": 3.275729378377643e-05, "loss": 0.108, "step": 3180 }, { "epoch": 85.97297297297297, "grad_norm": 0.859375, "learning_rate": 3.2746672355318096e-05, "loss": 0.0955, "step": 3181 }, { "epoch": 86.0, "grad_norm": 0.796875, "learning_rate": 3.273604937981003e-05, "loss": 0.0896, "step": 3182 }, { "epoch": 86.02702702702703, "grad_norm": 0.67578125, "learning_rate": 3.272542485937369e-05, "loss": 0.0783, "step": 3183 }, { "epoch": 86.05405405405405, "grad_norm": 1.03125, "learning_rate": 3.271479879613084e-05, "loss": 0.1344, "step": 3184 }, { "epoch": 86.08108108108108, "grad_norm": 1.3671875, "learning_rate": 3.270417119220358e-05, "loss": 0.1576, "step": 3185 }, { "epoch": 86.10810810810811, "grad_norm": 0.8515625, "learning_rate": 3.2693542049714264e-05, "loss": 0.102, "step": 3186 }, { "epoch": 86.13513513513513, "grad_norm": 1.1796875, "learning_rate": 3.2682911370785605e-05, "loss": 0.123, "step": 3187 }, { "epoch": 86.16216216216216, "grad_norm": 0.9609375, "learning_rate": 3.267227915754061e-05, "loss": 0.1152, "step": 3188 }, { "epoch": 86.1891891891892, "grad_norm": 0.64453125, "learning_rate": 3.266164541210258e-05, "loss": 0.0757, "step": 3189 }, { "epoch": 86.21621621621621, "grad_norm": 1.421875, "learning_rate": 3.265101013659511e-05, "loss": 0.1797, "step": 3190 }, { "epoch": 86.24324324324324, "grad_norm": 1.484375, "learning_rate": 3.264037333314215e-05, "loss": 0.1932, "step": 3191 }, { "epoch": 86.27027027027027, "grad_norm": 0.65234375, "learning_rate": 3.26297350038679e-05, "loss": 0.082, "step": 3192 }, { "epoch": 86.29729729729729, "grad_norm": 1.1640625, "learning_rate": 3.2619095150896894e-05, "loss": 0.1761, "step": 3193 }, { "epoch": 86.32432432432432, "grad_norm": 1.078125, "learning_rate": 3.260845377635397e-05, "loss": 0.107, "step": 3194 }, { "epoch": 86.35135135135135, "grad_norm": 1.25, "learning_rate": 3.259781088236426e-05, "loss": 0.139, "step": 3195 }, { "epoch": 86.37837837837837, "grad_norm": 0.8671875, "learning_rate": 3.258716647105321e-05, "loss": 0.0922, "step": 3196 }, { "epoch": 86.4054054054054, "grad_norm": 1.1171875, "learning_rate": 3.257652054454655e-05, "loss": 0.1239, "step": 3197 }, { "epoch": 86.43243243243244, "grad_norm": 0.96875, "learning_rate": 3.256587310497033e-05, "loss": 0.1324, "step": 3198 }, { "epoch": 86.45945945945945, "grad_norm": 1.421875, "learning_rate": 3.255522415445091e-05, "loss": 0.209, "step": 3199 }, { "epoch": 86.48648648648648, "grad_norm": 1.3828125, "learning_rate": 3.2544573695114925e-05, "loss": 0.1656, "step": 3200 }, { "epoch": 86.51351351351352, "grad_norm": 0.53515625, "learning_rate": 3.2533921729089336e-05, "loss": 0.0646, "step": 3201 }, { "epoch": 86.54054054054055, "grad_norm": 0.58984375, "learning_rate": 3.252326825850139e-05, "loss": 0.0719, "step": 3202 }, { "epoch": 86.56756756756756, "grad_norm": 0.69921875, "learning_rate": 3.251261328547863e-05, "loss": 0.0819, "step": 3203 }, { "epoch": 86.5945945945946, "grad_norm": 1.203125, "learning_rate": 3.250195681214892e-05, "loss": 0.1205, "step": 3204 }, { "epoch": 86.62162162162163, "grad_norm": 0.9921875, "learning_rate": 3.249129884064041e-05, "loss": 0.1159, "step": 3205 }, { "epoch": 86.64864864864865, "grad_norm": 1.40625, "learning_rate": 3.248063937308155e-05, "loss": 0.2463, "step": 3206 }, { "epoch": 86.67567567567568, "grad_norm": 1.0, "learning_rate": 3.2469978411601084e-05, "loss": 0.1157, "step": 3207 }, { "epoch": 86.70270270270271, "grad_norm": 0.73046875, "learning_rate": 3.2459315958328066e-05, "loss": 0.0931, "step": 3208 }, { "epoch": 86.72972972972973, "grad_norm": 0.8671875, "learning_rate": 3.2448652015391844e-05, "loss": 0.0849, "step": 3209 }, { "epoch": 86.75675675675676, "grad_norm": 0.9453125, "learning_rate": 3.243798658492206e-05, "loss": 0.0968, "step": 3210 }, { "epoch": 86.78378378378379, "grad_norm": 0.94921875, "learning_rate": 3.242731966904865e-05, "loss": 0.113, "step": 3211 }, { "epoch": 86.8108108108108, "grad_norm": 0.94921875, "learning_rate": 3.2416651269901855e-05, "loss": 0.0983, "step": 3212 }, { "epoch": 86.83783783783784, "grad_norm": 1.421875, "learning_rate": 3.240598138961221e-05, "loss": 0.1514, "step": 3213 }, { "epoch": 86.86486486486487, "grad_norm": 0.8828125, "learning_rate": 3.239531003031053e-05, "loss": 0.1248, "step": 3214 }, { "epoch": 86.89189189189189, "grad_norm": 0.69140625, "learning_rate": 3.238463719412797e-05, "loss": 0.0724, "step": 3215 }, { "epoch": 86.91891891891892, "grad_norm": 1.4140625, "learning_rate": 3.2373962883195916e-05, "loss": 0.1488, "step": 3216 }, { "epoch": 86.94594594594595, "grad_norm": 0.89453125, "learning_rate": 3.236328709964611e-05, "loss": 0.0873, "step": 3217 }, { "epoch": 86.97297297297297, "grad_norm": 1.03125, "learning_rate": 3.235260984561053e-05, "loss": 0.1518, "step": 3218 }, { "epoch": 87.0, "grad_norm": 1.1875, "learning_rate": 3.23419311232215e-05, "loss": 0.1332, "step": 3219 }, { "epoch": 87.02702702702703, "grad_norm": 1.1171875, "learning_rate": 3.2331250934611624e-05, "loss": 0.0858, "step": 3220 }, { "epoch": 87.05405405405405, "grad_norm": 0.671875, "learning_rate": 3.232056928191376e-05, "loss": 0.0722, "step": 3221 }, { "epoch": 87.08108108108108, "grad_norm": 1.1015625, "learning_rate": 3.230988616726111e-05, "loss": 0.1369, "step": 3222 }, { "epoch": 87.10810810810811, "grad_norm": 0.83203125, "learning_rate": 3.2299201592787145e-05, "loss": 0.0924, "step": 3223 }, { "epoch": 87.13513513513513, "grad_norm": 1.21875, "learning_rate": 3.228851556062562e-05, "loss": 0.142, "step": 3224 }, { "epoch": 87.16216216216216, "grad_norm": 1.3515625, "learning_rate": 3.227782807291058e-05, "loss": 0.1496, "step": 3225 }, { "epoch": 87.1891891891892, "grad_norm": 1.21875, "learning_rate": 3.226713913177639e-05, "loss": 0.1553, "step": 3226 }, { "epoch": 87.21621621621621, "grad_norm": 0.921875, "learning_rate": 3.225644873935769e-05, "loss": 0.1104, "step": 3227 }, { "epoch": 87.24324324324324, "grad_norm": 0.87109375, "learning_rate": 3.224575689778938e-05, "loss": 0.1062, "step": 3228 }, { "epoch": 87.27027027027027, "grad_norm": 0.95703125, "learning_rate": 3.22350636092067e-05, "loss": 0.1297, "step": 3229 }, { "epoch": 87.29729729729729, "grad_norm": 0.71484375, "learning_rate": 3.222436887574514e-05, "loss": 0.0723, "step": 3230 }, { "epoch": 87.32432432432432, "grad_norm": 1.1640625, "learning_rate": 3.22136726995405e-05, "loss": 0.11, "step": 3231 }, { "epoch": 87.35135135135135, "grad_norm": 1.1015625, "learning_rate": 3.220297508272885e-05, "loss": 0.1251, "step": 3232 }, { "epoch": 87.37837837837837, "grad_norm": 1.140625, "learning_rate": 3.219227602744657e-05, "loss": 0.1756, "step": 3233 }, { "epoch": 87.4054054054054, "grad_norm": 0.85546875, "learning_rate": 3.2181575535830314e-05, "loss": 0.1071, "step": 3234 }, { "epoch": 87.43243243243244, "grad_norm": 1.0625, "learning_rate": 3.2170873610017015e-05, "loss": 0.0833, "step": 3235 }, { "epoch": 87.45945945945945, "grad_norm": 1.0390625, "learning_rate": 3.216017025214391e-05, "loss": 0.1017, "step": 3236 }, { "epoch": 87.48648648648648, "grad_norm": 1.1015625, "learning_rate": 3.2149465464348515e-05, "loss": 0.0982, "step": 3237 }, { "epoch": 87.51351351351352, "grad_norm": 1.2734375, "learning_rate": 3.213875924876862e-05, "loss": 0.1231, "step": 3238 }, { "epoch": 87.54054054054055, "grad_norm": 1.2109375, "learning_rate": 3.212805160754233e-05, "loss": 0.1011, "step": 3239 }, { "epoch": 87.56756756756756, "grad_norm": 1.2109375, "learning_rate": 3.211734254280799e-05, "loss": 0.0992, "step": 3240 }, { "epoch": 87.5945945945946, "grad_norm": 0.98828125, "learning_rate": 3.210663205670428e-05, "loss": 0.0898, "step": 3241 }, { "epoch": 87.62162162162163, "grad_norm": 1.4140625, "learning_rate": 3.209592015137012e-05, "loss": 0.1275, "step": 3242 }, { "epoch": 87.64864864864865, "grad_norm": 1.6953125, "learning_rate": 3.2085206828944734e-05, "loss": 0.241, "step": 3243 }, { "epoch": 87.67567567567568, "grad_norm": 1.859375, "learning_rate": 3.2074492091567634e-05, "loss": 0.1758, "step": 3244 }, { "epoch": 87.70270270270271, "grad_norm": 1.0703125, "learning_rate": 3.20637759413786e-05, "loss": 0.1364, "step": 3245 }, { "epoch": 87.72972972972973, "grad_norm": 1.546875, "learning_rate": 3.205305838051771e-05, "loss": 0.0937, "step": 3246 }, { "epoch": 87.75675675675676, "grad_norm": 1.71875, "learning_rate": 3.204233941112531e-05, "loss": 0.1204, "step": 3247 }, { "epoch": 87.78378378378379, "grad_norm": 1.2734375, "learning_rate": 3.203161903534203e-05, "loss": 0.0957, "step": 3248 }, { "epoch": 87.8108108108108, "grad_norm": 1.28125, "learning_rate": 3.2020897255308784e-05, "loss": 0.1116, "step": 3249 }, { "epoch": 87.83783783783784, "grad_norm": 0.7265625, "learning_rate": 3.2010174073166777e-05, "loss": 0.0769, "step": 3250 }, { "epoch": 87.86486486486487, "grad_norm": 1.6875, "learning_rate": 3.199944949105747e-05, "loss": 0.1076, "step": 3251 }, { "epoch": 87.89189189189189, "grad_norm": 0.96875, "learning_rate": 3.1988723511122615e-05, "loss": 0.0709, "step": 3252 }, { "epoch": 87.91891891891892, "grad_norm": 1.6953125, "learning_rate": 3.197799613550426e-05, "loss": 0.1397, "step": 3253 }, { "epoch": 87.94594594594595, "grad_norm": 0.90625, "learning_rate": 3.19672673663447e-05, "loss": 0.0778, "step": 3254 }, { "epoch": 87.97297297297297, "grad_norm": 0.9765625, "learning_rate": 3.195653720578653e-05, "loss": 0.091, "step": 3255 }, { "epoch": 88.0, "grad_norm": 1.625, "learning_rate": 3.194580565597262e-05, "loss": 0.2064, "step": 3256 }, { "epoch": 88.02702702702703, "grad_norm": 1.0546875, "learning_rate": 3.1935072719046115e-05, "loss": 0.0813, "step": 3257 }, { "epoch": 88.05405405405405, "grad_norm": 1.3046875, "learning_rate": 3.192433839715043e-05, "loss": 0.1198, "step": 3258 }, { "epoch": 88.08108108108108, "grad_norm": 0.76953125, "learning_rate": 3.191360269242928e-05, "loss": 0.0849, "step": 3259 }, { "epoch": 88.10810810810811, "grad_norm": 1.2421875, "learning_rate": 3.190286560702662e-05, "loss": 0.1264, "step": 3260 }, { "epoch": 88.13513513513513, "grad_norm": 1.203125, "learning_rate": 3.1892127143086714e-05, "loss": 0.1559, "step": 3261 }, { "epoch": 88.16216216216216, "grad_norm": 0.859375, "learning_rate": 3.188138730275408e-05, "loss": 0.0987, "step": 3262 }, { "epoch": 88.1891891891892, "grad_norm": 1.0859375, "learning_rate": 3.187064608817352e-05, "loss": 0.1448, "step": 3263 }, { "epoch": 88.21621621621621, "grad_norm": 1.5546875, "learning_rate": 3.185990350149011e-05, "loss": 0.1815, "step": 3264 }, { "epoch": 88.24324324324324, "grad_norm": 0.953125, "learning_rate": 3.1849159544849194e-05, "loss": 0.0858, "step": 3265 }, { "epoch": 88.27027027027027, "grad_norm": 1.2265625, "learning_rate": 3.183841422039641e-05, "loss": 0.1902, "step": 3266 }, { "epoch": 88.29729729729729, "grad_norm": 1.15625, "learning_rate": 3.1827667530277634e-05, "loss": 0.1227, "step": 3267 }, { "epoch": 88.32432432432432, "grad_norm": 1.0, "learning_rate": 3.181691947663905e-05, "loss": 0.0946, "step": 3268 }, { "epoch": 88.35135135135135, "grad_norm": 1.890625, "learning_rate": 3.180617006162709e-05, "loss": 0.1915, "step": 3269 }, { "epoch": 88.37837837837837, "grad_norm": 1.0546875, "learning_rate": 3.179541928738846e-05, "loss": 0.0986, "step": 3270 }, { "epoch": 88.4054054054054, "grad_norm": 0.9609375, "learning_rate": 3.178466715607017e-05, "loss": 0.0978, "step": 3271 }, { "epoch": 88.43243243243244, "grad_norm": 0.9375, "learning_rate": 3.177391366981945e-05, "loss": 0.0988, "step": 3272 }, { "epoch": 88.45945945945945, "grad_norm": 0.84765625, "learning_rate": 3.1763158830783826e-05, "loss": 0.0812, "step": 3273 }, { "epoch": 88.48648648648648, "grad_norm": 0.73046875, "learning_rate": 3.1752402641111116e-05, "loss": 0.0788, "step": 3274 }, { "epoch": 88.51351351351352, "grad_norm": 1.4609375, "learning_rate": 3.174164510294937e-05, "loss": 0.1673, "step": 3275 }, { "epoch": 88.54054054054055, "grad_norm": 1.390625, "learning_rate": 3.173088621844692e-05, "loss": 0.0981, "step": 3276 }, { "epoch": 88.56756756756756, "grad_norm": 1.1015625, "learning_rate": 3.172012598975238e-05, "loss": 0.112, "step": 3277 }, { "epoch": 88.5945945945946, "grad_norm": 1.46875, "learning_rate": 3.1709364419014615e-05, "loss": 0.1749, "step": 3278 }, { "epoch": 88.62162162162163, "grad_norm": 1.5703125, "learning_rate": 3.1698601508382756e-05, "loss": 0.1601, "step": 3279 }, { "epoch": 88.64864864864865, "grad_norm": 1.6640625, "learning_rate": 3.1687837260006236e-05, "loss": 0.1669, "step": 3280 }, { "epoch": 88.67567567567568, "grad_norm": 1.1796875, "learning_rate": 3.167707167603471e-05, "loss": 0.1236, "step": 3281 }, { "epoch": 88.70270270270271, "grad_norm": 0.87890625, "learning_rate": 3.1666304758618127e-05, "loss": 0.0825, "step": 3282 }, { "epoch": 88.72972972972973, "grad_norm": 1.1875, "learning_rate": 3.1655536509906686e-05, "loss": 0.1403, "step": 3283 }, { "epoch": 88.75675675675676, "grad_norm": 1.1796875, "learning_rate": 3.164476693205087e-05, "loss": 0.1119, "step": 3284 }, { "epoch": 88.78378378378379, "grad_norm": 1.4609375, "learning_rate": 3.1633996027201424e-05, "loss": 0.217, "step": 3285 }, { "epoch": 88.8108108108108, "grad_norm": 0.7734375, "learning_rate": 3.1623223797509345e-05, "loss": 0.0747, "step": 3286 }, { "epoch": 88.83783783783784, "grad_norm": 1.0234375, "learning_rate": 3.161245024512589e-05, "loss": 0.1279, "step": 3287 }, { "epoch": 88.86486486486487, "grad_norm": 0.66796875, "learning_rate": 3.160167537220261e-05, "loss": 0.0755, "step": 3288 }, { "epoch": 88.89189189189189, "grad_norm": 0.80078125, "learning_rate": 3.15908991808913e-05, "loss": 0.0969, "step": 3289 }, { "epoch": 88.91891891891892, "grad_norm": 1.125, "learning_rate": 3.158012167334401e-05, "loss": 0.1387, "step": 3290 }, { "epoch": 88.94594594594595, "grad_norm": 1.3046875, "learning_rate": 3.1569342851713066e-05, "loss": 0.146, "step": 3291 }, { "epoch": 88.97297297297297, "grad_norm": 1.03125, "learning_rate": 3.155856271815106e-05, "loss": 0.1404, "step": 3292 }, { "epoch": 89.0, "grad_norm": 1.34375, "learning_rate": 3.154778127481083e-05, "loss": 0.1722, "step": 3293 }, { "epoch": 89.02702702702703, "grad_norm": 1.1484375, "learning_rate": 3.1536998523845494e-05, "loss": 0.1834, "step": 3294 }, { "epoch": 89.05405405405405, "grad_norm": 1.3125, "learning_rate": 3.152621446740842e-05, "loss": 0.1722, "step": 3295 }, { "epoch": 89.08108108108108, "grad_norm": 1.2421875, "learning_rate": 3.151542910765323e-05, "loss": 0.116, "step": 3296 }, { "epoch": 89.10810810810811, "grad_norm": 1.3046875, "learning_rate": 3.150464244673383e-05, "loss": 0.1575, "step": 3297 }, { "epoch": 89.13513513513513, "grad_norm": 2.046875, "learning_rate": 3.149385448680436e-05, "loss": 0.2312, "step": 3298 }, { "epoch": 89.16216216216216, "grad_norm": 1.5859375, "learning_rate": 3.1483065230019225e-05, "loss": 0.1989, "step": 3299 }, { "epoch": 89.1891891891892, "grad_norm": 0.71875, "learning_rate": 3.147227467853311e-05, "loss": 0.0812, "step": 3300 }, { "epoch": 89.21621621621621, "grad_norm": 1.0703125, "learning_rate": 3.146148283450093e-05, "loss": 0.0999, "step": 3301 }, { "epoch": 89.24324324324324, "grad_norm": 0.8828125, "learning_rate": 3.145068970007788e-05, "loss": 0.0997, "step": 3302 }, { "epoch": 89.27027027027027, "grad_norm": 0.6953125, "learning_rate": 3.1439895277419394e-05, "loss": 0.0719, "step": 3303 }, { "epoch": 89.29729729729729, "grad_norm": 0.890625, "learning_rate": 3.142909956868118e-05, "loss": 0.1026, "step": 3304 }, { "epoch": 89.32432432432432, "grad_norm": 1.3125, "learning_rate": 3.14183025760192e-05, "loss": 0.1579, "step": 3305 }, { "epoch": 89.35135135135135, "grad_norm": 1.3046875, "learning_rate": 3.140750430158966e-05, "loss": 0.1685, "step": 3306 }, { "epoch": 89.37837837837837, "grad_norm": 0.9140625, "learning_rate": 3.1396704747549024e-05, "loss": 0.1073, "step": 3307 }, { "epoch": 89.4054054054054, "grad_norm": 0.61328125, "learning_rate": 3.1385903916054025e-05, "loss": 0.0672, "step": 3308 }, { "epoch": 89.43243243243244, "grad_norm": 1.046875, "learning_rate": 3.137510180926166e-05, "loss": 0.1036, "step": 3309 }, { "epoch": 89.45945945945945, "grad_norm": 0.67578125, "learning_rate": 3.136429842932914e-05, "loss": 0.08, "step": 3310 }, { "epoch": 89.48648648648648, "grad_norm": 1.421875, "learning_rate": 3.135349377841396e-05, "loss": 0.1675, "step": 3311 }, { "epoch": 89.51351351351352, "grad_norm": 1.3359375, "learning_rate": 3.1342687858673875e-05, "loss": 0.1091, "step": 3312 }, { "epoch": 89.54054054054055, "grad_norm": 0.89453125, "learning_rate": 3.133188067226687e-05, "loss": 0.0919, "step": 3313 }, { "epoch": 89.56756756756756, "grad_norm": 1.0234375, "learning_rate": 3.13210722213512e-05, "loss": 0.1119, "step": 3314 }, { "epoch": 89.5945945945946, "grad_norm": 1.21875, "learning_rate": 3.131026250808537e-05, "loss": 0.1207, "step": 3315 }, { "epoch": 89.62162162162163, "grad_norm": 1.2265625, "learning_rate": 3.1299451534628135e-05, "loss": 0.1144, "step": 3316 }, { "epoch": 89.64864864864865, "grad_norm": 0.9453125, "learning_rate": 3.1288639303138486e-05, "loss": 0.0942, "step": 3317 }, { "epoch": 89.67567567567568, "grad_norm": 1.15625, "learning_rate": 3.12778258157757e-05, "loss": 0.1121, "step": 3318 }, { "epoch": 89.70270270270271, "grad_norm": 0.66015625, "learning_rate": 3.126701107469928e-05, "loss": 0.0715, "step": 3319 }, { "epoch": 89.72972972972973, "grad_norm": 1.2421875, "learning_rate": 3.125619508206898e-05, "loss": 0.0917, "step": 3320 }, { "epoch": 89.75675675675676, "grad_norm": 1.0390625, "learning_rate": 3.1245377840044805e-05, "loss": 0.0992, "step": 3321 }, { "epoch": 89.78378378378379, "grad_norm": 1.4375, "learning_rate": 3.123455935078702e-05, "loss": 0.1574, "step": 3322 }, { "epoch": 89.8108108108108, "grad_norm": 1.109375, "learning_rate": 3.122373961645613e-05, "loss": 0.1182, "step": 3323 }, { "epoch": 89.83783783783784, "grad_norm": 1.6171875, "learning_rate": 3.1212918639212895e-05, "loss": 0.2714, "step": 3324 }, { "epoch": 89.86486486486487, "grad_norm": 1.265625, "learning_rate": 3.120209642121831e-05, "loss": 0.1378, "step": 3325 }, { "epoch": 89.89189189189189, "grad_norm": 1.78125, "learning_rate": 3.119127296463364e-05, "loss": 0.1715, "step": 3326 }, { "epoch": 89.91891891891892, "grad_norm": 0.9140625, "learning_rate": 3.1180448271620375e-05, "loss": 0.0909, "step": 3327 }, { "epoch": 89.94594594594595, "grad_norm": 0.81640625, "learning_rate": 3.116962234434025e-05, "loss": 0.0785, "step": 3328 }, { "epoch": 89.97297297297297, "grad_norm": 0.71875, "learning_rate": 3.1158795184955285e-05, "loss": 0.0797, "step": 3329 }, { "epoch": 90.0, "grad_norm": 1.2734375, "learning_rate": 3.11479667956277e-05, "loss": 0.0967, "step": 3330 }, { "epoch": 90.02702702702703, "grad_norm": 1.015625, "learning_rate": 3.1137137178519985e-05, "loss": 0.1358, "step": 3331 }, { "epoch": 90.05405405405405, "grad_norm": 0.7578125, "learning_rate": 3.112630633579486e-05, "loss": 0.0911, "step": 3332 }, { "epoch": 90.08108108108108, "grad_norm": 0.90625, "learning_rate": 3.111547426961531e-05, "loss": 0.1033, "step": 3333 }, { "epoch": 90.10810810810811, "grad_norm": 0.73046875, "learning_rate": 3.110464098214455e-05, "loss": 0.0818, "step": 3334 }, { "epoch": 90.13513513513513, "grad_norm": 0.515625, "learning_rate": 3.109380647554604e-05, "loss": 0.0606, "step": 3335 }, { "epoch": 90.16216216216216, "grad_norm": 1.09375, "learning_rate": 3.1082970751983496e-05, "loss": 0.1536, "step": 3336 }, { "epoch": 90.1891891891892, "grad_norm": 1.3828125, "learning_rate": 3.107213381362085e-05, "loss": 0.1987, "step": 3337 }, { "epoch": 90.21621621621621, "grad_norm": 0.92578125, "learning_rate": 3.1061295662622314e-05, "loss": 0.0937, "step": 3338 }, { "epoch": 90.24324324324324, "grad_norm": 1.234375, "learning_rate": 3.1050456301152295e-05, "loss": 0.1801, "step": 3339 }, { "epoch": 90.27027027027027, "grad_norm": 0.88671875, "learning_rate": 3.103961573137549e-05, "loss": 0.0948, "step": 3340 }, { "epoch": 90.29729729729729, "grad_norm": 1.109375, "learning_rate": 3.102877395545682e-05, "loss": 0.1545, "step": 3341 }, { "epoch": 90.32432432432432, "grad_norm": 1.1640625, "learning_rate": 3.101793097556142e-05, "loss": 0.1423, "step": 3342 }, { "epoch": 90.35135135135135, "grad_norm": 1.0234375, "learning_rate": 3.10070867938547e-05, "loss": 0.0952, "step": 3343 }, { "epoch": 90.37837837837837, "grad_norm": 1.2421875, "learning_rate": 3.09962414125023e-05, "loss": 0.0925, "step": 3344 }, { "epoch": 90.4054054054054, "grad_norm": 1.6328125, "learning_rate": 3.098539483367011e-05, "loss": 0.1665, "step": 3345 }, { "epoch": 90.43243243243244, "grad_norm": 1.046875, "learning_rate": 3.097454705952421e-05, "loss": 0.0784, "step": 3346 }, { "epoch": 90.45945945945945, "grad_norm": 1.3203125, "learning_rate": 3.096369809223099e-05, "loss": 0.1696, "step": 3347 }, { "epoch": 90.48648648648648, "grad_norm": 1.140625, "learning_rate": 3.095284793395703e-05, "loss": 0.1453, "step": 3348 }, { "epoch": 90.51351351351352, "grad_norm": 1.1015625, "learning_rate": 3.094199658686917e-05, "loss": 0.126, "step": 3349 }, { "epoch": 90.54054054054055, "grad_norm": 1.1640625, "learning_rate": 3.093114405313446e-05, "loss": 0.1027, "step": 3350 }, { "epoch": 90.56756756756756, "grad_norm": 1.328125, "learning_rate": 3.0920290334920224e-05, "loss": 0.094, "step": 3351 }, { "epoch": 90.5945945945946, "grad_norm": 0.94140625, "learning_rate": 3.0909435434394005e-05, "loss": 0.1, "step": 3352 }, { "epoch": 90.62162162162163, "grad_norm": 1.5234375, "learning_rate": 3.0898579353723566e-05, "loss": 0.1941, "step": 3353 }, { "epoch": 90.64864864864865, "grad_norm": 1.421875, "learning_rate": 3.088772209507694e-05, "loss": 0.134, "step": 3354 }, { "epoch": 90.67567567567568, "grad_norm": 2.328125, "learning_rate": 3.087686366062236e-05, "loss": 0.1348, "step": 3355 }, { "epoch": 90.70270270270271, "grad_norm": 2.375, "learning_rate": 3.086600405252833e-05, "loss": 0.1846, "step": 3356 }, { "epoch": 90.72972972972973, "grad_norm": 1.515625, "learning_rate": 3.085514327296355e-05, "loss": 0.1814, "step": 3357 }, { "epoch": 90.75675675675676, "grad_norm": 1.234375, "learning_rate": 3.084428132409698e-05, "loss": 0.123, "step": 3358 }, { "epoch": 90.78378378378379, "grad_norm": 1.75, "learning_rate": 3.08334182080978e-05, "loss": 0.1791, "step": 3359 }, { "epoch": 90.8108108108108, "grad_norm": 0.94921875, "learning_rate": 3.082255392713544e-05, "loss": 0.0696, "step": 3360 }, { "epoch": 90.83783783783784, "grad_norm": 1.7578125, "learning_rate": 3.081168848337955e-05, "loss": 0.1112, "step": 3361 }, { "epoch": 90.86486486486487, "grad_norm": 1.2421875, "learning_rate": 3.080082187900001e-05, "loss": 0.1226, "step": 3362 }, { "epoch": 90.89189189189189, "grad_norm": 1.2890625, "learning_rate": 3.0789954116166926e-05, "loss": 0.1596, "step": 3363 }, { "epoch": 90.91891891891892, "grad_norm": 1.5546875, "learning_rate": 3.0779085197050665e-05, "loss": 0.148, "step": 3364 }, { "epoch": 90.94594594594595, "grad_norm": 0.8359375, "learning_rate": 3.0768215123821806e-05, "loss": 0.0868, "step": 3365 }, { "epoch": 90.97297297297297, "grad_norm": 0.8984375, "learning_rate": 3.075734389865113e-05, "loss": 0.0938, "step": 3366 }, { "epoch": 91.0, "grad_norm": 0.91015625, "learning_rate": 3.0746471523709706e-05, "loss": 0.1309, "step": 3367 }, { "epoch": 91.02702702702703, "grad_norm": 1.625, "learning_rate": 3.073559800116879e-05, "loss": 0.2762, "step": 3368 }, { "epoch": 91.05405405405405, "grad_norm": 0.90625, "learning_rate": 3.072472333319988e-05, "loss": 0.1013, "step": 3369 }, { "epoch": 91.08108108108108, "grad_norm": 1.375, "learning_rate": 3.07138475219747e-05, "loss": 0.1907, "step": 3370 }, { "epoch": 91.10810810810811, "grad_norm": 1.125, "learning_rate": 3.070297056966521e-05, "loss": 0.1202, "step": 3371 }, { "epoch": 91.13513513513513, "grad_norm": 1.0703125, "learning_rate": 3.069209247844359e-05, "loss": 0.1226, "step": 3372 }, { "epoch": 91.16216216216216, "grad_norm": 0.78125, "learning_rate": 3.0681213250482255e-05, "loss": 0.0792, "step": 3373 }, { "epoch": 91.1891891891892, "grad_norm": 1.1640625, "learning_rate": 3.067033288795383e-05, "loss": 0.1131, "step": 3374 }, { "epoch": 91.21621621621621, "grad_norm": 0.98046875, "learning_rate": 3.065945139303119e-05, "loss": 0.1152, "step": 3375 }, { "epoch": 91.24324324324324, "grad_norm": 1.2265625, "learning_rate": 3.064856876788741e-05, "loss": 0.1462, "step": 3376 }, { "epoch": 91.27027027027027, "grad_norm": 0.62109375, "learning_rate": 3.0637685014695824e-05, "loss": 0.0713, "step": 3377 }, { "epoch": 91.29729729729729, "grad_norm": 1.171875, "learning_rate": 3.062680013562996e-05, "loss": 0.1176, "step": 3378 }, { "epoch": 91.32432432432432, "grad_norm": 1.0859375, "learning_rate": 3.061591413286359e-05, "loss": 0.1354, "step": 3379 }, { "epoch": 91.35135135135135, "grad_norm": 0.84765625, "learning_rate": 3.0605027008570696e-05, "loss": 0.0921, "step": 3380 }, { "epoch": 91.37837837837837, "grad_norm": 1.015625, "learning_rate": 3.059413876492551e-05, "loss": 0.1109, "step": 3381 }, { "epoch": 91.4054054054054, "grad_norm": 1.09375, "learning_rate": 3.058324940410244e-05, "loss": 0.112, "step": 3382 }, { "epoch": 91.43243243243244, "grad_norm": 0.54296875, "learning_rate": 3.057235892827618e-05, "loss": 0.0686, "step": 3383 }, { "epoch": 91.45945945945945, "grad_norm": 0.9921875, "learning_rate": 3.056146733962158e-05, "loss": 0.0899, "step": 3384 }, { "epoch": 91.48648648648648, "grad_norm": 1.296875, "learning_rate": 3.055057464031377e-05, "loss": 0.1372, "step": 3385 }, { "epoch": 91.51351351351352, "grad_norm": 1.3203125, "learning_rate": 3.053968083252807e-05, "loss": 0.1387, "step": 3386 }, { "epoch": 91.54054054054055, "grad_norm": 1.0703125, "learning_rate": 3.052878591844003e-05, "loss": 0.1206, "step": 3387 }, { "epoch": 91.56756756756756, "grad_norm": 0.5546875, "learning_rate": 3.0517889900225415e-05, "loss": 0.067, "step": 3388 }, { "epoch": 91.5945945945946, "grad_norm": 1.234375, "learning_rate": 3.0506992780060216e-05, "loss": 0.1823, "step": 3389 }, { "epoch": 91.62162162162163, "grad_norm": 1.796875, "learning_rate": 3.0496094560120653e-05, "loss": 0.2044, "step": 3390 }, { "epoch": 91.64864864864865, "grad_norm": 0.76171875, "learning_rate": 3.0485195242583135e-05, "loss": 0.0854, "step": 3391 }, { "epoch": 91.67567567567568, "grad_norm": 1.5234375, "learning_rate": 3.0474294829624327e-05, "loss": 0.1455, "step": 3392 }, { "epoch": 91.70270270270271, "grad_norm": 1.109375, "learning_rate": 3.04633933234211e-05, "loss": 0.1047, "step": 3393 }, { "epoch": 91.72972972972973, "grad_norm": 1.125, "learning_rate": 3.045249072615053e-05, "loss": 0.1165, "step": 3394 }, { "epoch": 91.75675675675676, "grad_norm": 0.6171875, "learning_rate": 3.0441587039989928e-05, "loss": 0.0754, "step": 3395 }, { "epoch": 91.78378378378379, "grad_norm": 1.1015625, "learning_rate": 3.0430682267116804e-05, "loss": 0.1217, "step": 3396 }, { "epoch": 91.8108108108108, "grad_norm": 0.546875, "learning_rate": 3.0419776409708906e-05, "loss": 0.0588, "step": 3397 }, { "epoch": 91.83783783783784, "grad_norm": 0.8125, "learning_rate": 3.040886946994419e-05, "loss": 0.0868, "step": 3398 }, { "epoch": 91.86486486486487, "grad_norm": 1.7890625, "learning_rate": 3.039796145000082e-05, "loss": 0.298, "step": 3399 }, { "epoch": 91.89189189189189, "grad_norm": 1.09375, "learning_rate": 3.0387052352057187e-05, "loss": 0.1276, "step": 3400 }, { "epoch": 91.91891891891892, "grad_norm": 0.859375, "learning_rate": 3.0376142178291894e-05, "loss": 0.0716, "step": 3401 }, { "epoch": 91.94594594594595, "grad_norm": 0.984375, "learning_rate": 3.0365230930883758e-05, "loss": 0.1034, "step": 3402 }, { "epoch": 91.97297297297297, "grad_norm": 0.8125, "learning_rate": 3.0354318612011812e-05, "loss": 0.0676, "step": 3403 }, { "epoch": 92.0, "grad_norm": 0.9765625, "learning_rate": 3.0343405223855292e-05, "loss": 0.0976, "step": 3404 }, { "epoch": 92.02702702702703, "grad_norm": 1.09375, "learning_rate": 3.0332490768593675e-05, "loss": 0.0853, "step": 3405 }, { "epoch": 92.05405405405405, "grad_norm": 1.1328125, "learning_rate": 3.0321575248406614e-05, "loss": 0.1205, "step": 3406 }, { "epoch": 92.08108108108108, "grad_norm": 1.3203125, "learning_rate": 3.0310658665474006e-05, "loss": 0.1135, "step": 3407 }, { "epoch": 92.10810810810811, "grad_norm": 1.1171875, "learning_rate": 3.0299741021975937e-05, "loss": 0.1478, "step": 3408 }, { "epoch": 92.13513513513513, "grad_norm": 0.83984375, "learning_rate": 3.0288822320092725e-05, "loss": 0.0934, "step": 3409 }, { "epoch": 92.16216216216216, "grad_norm": 1.71875, "learning_rate": 3.027790256200489e-05, "loss": 0.1748, "step": 3410 }, { "epoch": 92.1891891891892, "grad_norm": 1.0390625, "learning_rate": 3.0266981749893157e-05, "loss": 0.1222, "step": 3411 }, { "epoch": 92.21621621621621, "grad_norm": 1.0703125, "learning_rate": 3.0256059885938476e-05, "loss": 0.1302, "step": 3412 }, { "epoch": 92.24324324324324, "grad_norm": 1.0546875, "learning_rate": 3.024513697232199e-05, "loss": 0.0979, "step": 3413 }, { "epoch": 92.27027027027027, "grad_norm": 1.265625, "learning_rate": 3.0234213011225065e-05, "loss": 0.1098, "step": 3414 }, { "epoch": 92.29729729729729, "grad_norm": 1.7421875, "learning_rate": 3.022328800482927e-05, "loss": 0.2301, "step": 3415 }, { "epoch": 92.32432432432432, "grad_norm": 1.3046875, "learning_rate": 3.0212361955316388e-05, "loss": 0.1913, "step": 3416 }, { "epoch": 92.35135135135135, "grad_norm": 1.09375, "learning_rate": 3.02014348648684e-05, "loss": 0.1104, "step": 3417 }, { "epoch": 92.37837837837837, "grad_norm": 1.6171875, "learning_rate": 3.01905067356675e-05, "loss": 0.1643, "step": 3418 }, { "epoch": 92.4054054054054, "grad_norm": 1.8828125, "learning_rate": 3.01795775698961e-05, "loss": 0.125, "step": 3419 }, { "epoch": 92.43243243243244, "grad_norm": 1.8203125, "learning_rate": 3.0168647369736803e-05, "loss": 0.1459, "step": 3420 }, { "epoch": 92.45945945945945, "grad_norm": 0.81640625, "learning_rate": 3.0157716137372428e-05, "loss": 0.0689, "step": 3421 }, { "epoch": 92.48648648648648, "grad_norm": 0.921875, "learning_rate": 3.0146783874985994e-05, "loss": 0.0806, "step": 3422 }, { "epoch": 92.51351351351352, "grad_norm": 1.4765625, "learning_rate": 3.013585058476074e-05, "loss": 0.2036, "step": 3423 }, { "epoch": 92.54054054054055, "grad_norm": 1.5078125, "learning_rate": 3.0124916268880083e-05, "loss": 0.1219, "step": 3424 }, { "epoch": 92.56756756756756, "grad_norm": 1.5859375, "learning_rate": 3.011398092952767e-05, "loss": 0.167, "step": 3425 }, { "epoch": 92.5945945945946, "grad_norm": 1.015625, "learning_rate": 3.0103044568887344e-05, "loss": 0.1158, "step": 3426 }, { "epoch": 92.62162162162163, "grad_norm": 0.9296875, "learning_rate": 3.0092107189143154e-05, "loss": 0.0978, "step": 3427 }, { "epoch": 92.64864864864865, "grad_norm": 1.390625, "learning_rate": 3.0081168792479348e-05, "loss": 0.1295, "step": 3428 }, { "epoch": 92.67567567567568, "grad_norm": 1.34375, "learning_rate": 3.0070229381080372e-05, "loss": 0.0739, "step": 3429 }, { "epoch": 92.70270270270271, "grad_norm": 1.515625, "learning_rate": 3.0059288957130892e-05, "loss": 0.0995, "step": 3430 }, { "epoch": 92.72972972972973, "grad_norm": 1.703125, "learning_rate": 3.0048347522815766e-05, "loss": 0.1676, "step": 3431 }, { "epoch": 92.75675675675676, "grad_norm": 1.2109375, "learning_rate": 3.0037405080320047e-05, "loss": 0.1817, "step": 3432 }, { "epoch": 92.78378378378379, "grad_norm": 1.4609375, "learning_rate": 3.0026461631829005e-05, "loss": 0.1687, "step": 3433 }, { "epoch": 92.8108108108108, "grad_norm": 0.75390625, "learning_rate": 3.0015517179528092e-05, "loss": 0.0781, "step": 3434 }, { "epoch": 92.83783783783784, "grad_norm": 1.5078125, "learning_rate": 3.000457172560297e-05, "loss": 0.1946, "step": 3435 }, { "epoch": 92.86486486486487, "grad_norm": 1.390625, "learning_rate": 2.999362527223952e-05, "loss": 0.1051, "step": 3436 }, { "epoch": 92.89189189189189, "grad_norm": 1.140625, "learning_rate": 2.9982677821623782e-05, "loss": 0.1247, "step": 3437 }, { "epoch": 92.91891891891892, "grad_norm": 1.0703125, "learning_rate": 2.9971729375942027e-05, "loss": 0.1622, "step": 3438 }, { "epoch": 92.94594594594595, "grad_norm": 1.109375, "learning_rate": 2.9960779937380712e-05, "loss": 0.1459, "step": 3439 }, { "epoch": 92.97297297297297, "grad_norm": 1.046875, "learning_rate": 2.9949829508126505e-05, "loss": 0.1223, "step": 3440 }, { "epoch": 93.0, "grad_norm": 1.3828125, "learning_rate": 2.993887809036625e-05, "loss": 0.1223, "step": 3441 }, { "epoch": 93.02702702702703, "grad_norm": 1.5390625, "learning_rate": 2.9927925686287006e-05, "loss": 0.1184, "step": 3442 }, { "epoch": 93.05405405405405, "grad_norm": 1.5390625, "learning_rate": 2.991697229807603e-05, "loss": 0.1665, "step": 3443 }, { "epoch": 93.08108108108108, "grad_norm": 1.3359375, "learning_rate": 2.990601792792076e-05, "loss": 0.1463, "step": 3444 }, { "epoch": 93.10810810810811, "grad_norm": 0.80078125, "learning_rate": 2.9895062578008843e-05, "loss": 0.0867, "step": 3445 }, { "epoch": 93.13513513513513, "grad_norm": 1.171875, "learning_rate": 2.988410625052811e-05, "loss": 0.1314, "step": 3446 }, { "epoch": 93.16216216216216, "grad_norm": 1.3359375, "learning_rate": 2.9873148947666606e-05, "loss": 0.1189, "step": 3447 }, { "epoch": 93.1891891891892, "grad_norm": 1.09375, "learning_rate": 2.9862190671612555e-05, "loss": 0.1398, "step": 3448 }, { "epoch": 93.21621621621621, "grad_norm": 1.203125, "learning_rate": 2.985123142455438e-05, "loss": 0.1523, "step": 3449 }, { "epoch": 93.24324324324324, "grad_norm": 0.93359375, "learning_rate": 2.9840271208680702e-05, "loss": 0.1391, "step": 3450 }, { "epoch": 93.27027027027027, "grad_norm": 1.4140625, "learning_rate": 2.9829310026180335e-05, "loss": 0.1153, "step": 3451 }, { "epoch": 93.29729729729729, "grad_norm": 1.171875, "learning_rate": 2.981834787924227e-05, "loss": 0.1217, "step": 3452 }, { "epoch": 93.32432432432432, "grad_norm": 0.90625, "learning_rate": 2.9807384770055713e-05, "loss": 0.0901, "step": 3453 }, { "epoch": 93.35135135135135, "grad_norm": 1.296875, "learning_rate": 2.9796420700810046e-05, "loss": 0.1359, "step": 3454 }, { "epoch": 93.37837837837837, "grad_norm": 1.1484375, "learning_rate": 2.9785455673694856e-05, "loss": 0.0887, "step": 3455 }, { "epoch": 93.4054054054054, "grad_norm": 1.421875, "learning_rate": 2.977448969089991e-05, "loss": 0.1149, "step": 3456 }, { "epoch": 93.43243243243244, "grad_norm": 1.1875, "learning_rate": 2.976352275461517e-05, "loss": 0.1241, "step": 3457 }, { "epoch": 93.45945945945945, "grad_norm": 1.3671875, "learning_rate": 2.975255486703079e-05, "loss": 0.1285, "step": 3458 }, { "epoch": 93.48648648648648, "grad_norm": 1.5390625, "learning_rate": 2.9741586030337114e-05, "loss": 0.2354, "step": 3459 }, { "epoch": 93.51351351351352, "grad_norm": 0.93359375, "learning_rate": 2.9730616246724675e-05, "loss": 0.1129, "step": 3460 }, { "epoch": 93.54054054054055, "grad_norm": 1.6484375, "learning_rate": 2.9719645518384193e-05, "loss": 0.1605, "step": 3461 }, { "epoch": 93.56756756756756, "grad_norm": 1.296875, "learning_rate": 2.9708673847506573e-05, "loss": 0.1048, "step": 3462 }, { "epoch": 93.5945945945946, "grad_norm": 1.671875, "learning_rate": 2.9697701236282922e-05, "loss": 0.1667, "step": 3463 }, { "epoch": 93.62162162162163, "grad_norm": 1.296875, "learning_rate": 2.9686727686904518e-05, "loss": 0.1447, "step": 3464 }, { "epoch": 93.64864864864865, "grad_norm": 1.6796875, "learning_rate": 2.9675753201562846e-05, "loss": 0.1967, "step": 3465 }, { "epoch": 93.67567567567568, "grad_norm": 0.8828125, "learning_rate": 2.966477778244956e-05, "loss": 0.0857, "step": 3466 }, { "epoch": 93.70270270270271, "grad_norm": 1.484375, "learning_rate": 2.9653801431756495e-05, "loss": 0.1449, "step": 3467 }, { "epoch": 93.72972972972973, "grad_norm": 0.67578125, "learning_rate": 2.9642824151675702e-05, "loss": 0.0802, "step": 3468 }, { "epoch": 93.75675675675676, "grad_norm": 0.81640625, "learning_rate": 2.9631845944399387e-05, "loss": 0.0695, "step": 3469 }, { "epoch": 93.78378378378379, "grad_norm": 1.1171875, "learning_rate": 2.9620866812119967e-05, "loss": 0.1242, "step": 3470 }, { "epoch": 93.8108108108108, "grad_norm": 0.921875, "learning_rate": 2.960988675703002e-05, "loss": 0.1093, "step": 3471 }, { "epoch": 93.83783783783784, "grad_norm": 1.078125, "learning_rate": 2.9598905781322322e-05, "loss": 0.1064, "step": 3472 }, { "epoch": 93.86486486486487, "grad_norm": 1.171875, "learning_rate": 2.958792388718982e-05, "loss": 0.1261, "step": 3473 }, { "epoch": 93.89189189189189, "grad_norm": 0.75390625, "learning_rate": 2.957694107682567e-05, "loss": 0.0895, "step": 3474 }, { "epoch": 93.91891891891892, "grad_norm": 0.890625, "learning_rate": 2.9565957352423186e-05, "loss": 0.0946, "step": 3475 }, { "epoch": 93.94594594594595, "grad_norm": 1.0703125, "learning_rate": 2.955497271617587e-05, "loss": 0.1218, "step": 3476 }, { "epoch": 93.97297297297297, "grad_norm": 1.203125, "learning_rate": 2.9543987170277415e-05, "loss": 0.1449, "step": 3477 }, { "epoch": 94.0, "grad_norm": 0.91796875, "learning_rate": 2.9533000716921693e-05, "loss": 0.0927, "step": 3478 }, { "epoch": 94.02702702702703, "grad_norm": 1.6328125, "learning_rate": 2.952201335830275e-05, "loss": 0.1481, "step": 3479 }, { "epoch": 94.05405405405405, "grad_norm": 0.92578125, "learning_rate": 2.9511025096614815e-05, "loss": 0.1108, "step": 3480 }, { "epoch": 94.08108108108108, "grad_norm": 1.5, "learning_rate": 2.9500035934052306e-05, "loss": 0.2096, "step": 3481 }, { "epoch": 94.10810810810811, "grad_norm": 1.234375, "learning_rate": 2.948904587280981e-05, "loss": 0.0955, "step": 3482 }, { "epoch": 94.13513513513513, "grad_norm": 1.4921875, "learning_rate": 2.94780549150821e-05, "loss": 0.1345, "step": 3483 }, { "epoch": 94.16216216216216, "grad_norm": 1.5859375, "learning_rate": 2.946706306306412e-05, "loss": 0.189, "step": 3484 }, { "epoch": 94.1891891891892, "grad_norm": 0.99609375, "learning_rate": 2.9456070318951007e-05, "loss": 0.0892, "step": 3485 }, { "epoch": 94.21621621621621, "grad_norm": 0.5390625, "learning_rate": 2.9445076684938066e-05, "loss": 0.0609, "step": 3486 }, { "epoch": 94.24324324324324, "grad_norm": 0.9921875, "learning_rate": 2.9434082163220773e-05, "loss": 0.104, "step": 3487 }, { "epoch": 94.27027027027027, "grad_norm": 1.3359375, "learning_rate": 2.9423086755994806e-05, "loss": 0.0961, "step": 3488 }, { "epoch": 94.29729729729729, "grad_norm": 1.6328125, "learning_rate": 2.9412090465455987e-05, "loss": 0.2021, "step": 3489 }, { "epoch": 94.32432432432432, "grad_norm": 1.4296875, "learning_rate": 2.940109329380034e-05, "loss": 0.1384, "step": 3490 }, { "epoch": 94.35135135135135, "grad_norm": 1.3515625, "learning_rate": 2.939009524322406e-05, "loss": 0.1364, "step": 3491 }, { "epoch": 94.37837837837837, "grad_norm": 0.81640625, "learning_rate": 2.93790963159235e-05, "loss": 0.0703, "step": 3492 }, { "epoch": 94.4054054054054, "grad_norm": 1.328125, "learning_rate": 2.9368096514095212e-05, "loss": 0.1219, "step": 3493 }, { "epoch": 94.43243243243244, "grad_norm": 1.296875, "learning_rate": 2.93570958399359e-05, "loss": 0.0804, "step": 3494 }, { "epoch": 94.45945945945945, "grad_norm": 1.2109375, "learning_rate": 2.9346094295642475e-05, "loss": 0.1095, "step": 3495 }, { "epoch": 94.48648648648648, "grad_norm": 1.8046875, "learning_rate": 2.933509188341198e-05, "loss": 0.17, "step": 3496 }, { "epoch": 94.51351351351352, "grad_norm": 1.2421875, "learning_rate": 2.9324088605441668e-05, "loss": 0.1323, "step": 3497 }, { "epoch": 94.54054054054055, "grad_norm": 0.76953125, "learning_rate": 2.9313084463928943e-05, "loss": 0.1015, "step": 3498 }, { "epoch": 94.56756756756756, "grad_norm": 1.34375, "learning_rate": 2.9302079461071385e-05, "loss": 0.1551, "step": 3499 }, { "epoch": 94.5945945945946, "grad_norm": 1.0, "learning_rate": 2.9291073599066753e-05, "loss": 0.1228, "step": 3500 }, { "epoch": 94.62162162162163, "grad_norm": 1.5234375, "learning_rate": 2.9280066880112973e-05, "loss": 0.1622, "step": 3501 }, { "epoch": 94.64864864864865, "grad_norm": 0.94921875, "learning_rate": 2.9269059306408143e-05, "loss": 0.1166, "step": 3502 }, { "epoch": 94.67567567567568, "grad_norm": 1.5546875, "learning_rate": 2.9258050880150533e-05, "loss": 0.177, "step": 3503 }, { "epoch": 94.70270270270271, "grad_norm": 0.61328125, "learning_rate": 2.9247041603538576e-05, "loss": 0.0764, "step": 3504 }, { "epoch": 94.72972972972973, "grad_norm": 0.91796875, "learning_rate": 2.9236031478770876e-05, "loss": 0.0909, "step": 3505 }, { "epoch": 94.75675675675676, "grad_norm": 0.796875, "learning_rate": 2.9225020508046232e-05, "loss": 0.0884, "step": 3506 }, { "epoch": 94.78378378378379, "grad_norm": 1.0625, "learning_rate": 2.9214008693563564e-05, "loss": 0.129, "step": 3507 }, { "epoch": 94.8108108108108, "grad_norm": 1.1796875, "learning_rate": 2.9202996037522008e-05, "loss": 0.1326, "step": 3508 }, { "epoch": 94.83783783783784, "grad_norm": 1.046875, "learning_rate": 2.9191982542120834e-05, "loss": 0.1334, "step": 3509 }, { "epoch": 94.86486486486487, "grad_norm": 1.2109375, "learning_rate": 2.91809682095595e-05, "loss": 0.1568, "step": 3510 }, { "epoch": 94.89189189189189, "grad_norm": 0.984375, "learning_rate": 2.9169953042037623e-05, "loss": 0.1006, "step": 3511 }, { "epoch": 94.91891891891892, "grad_norm": 1.4453125, "learning_rate": 2.9158937041754985e-05, "loss": 0.1881, "step": 3512 }, { "epoch": 94.94594594594595, "grad_norm": 1.28125, "learning_rate": 2.9147920210911543e-05, "loss": 0.1688, "step": 3513 }, { "epoch": 94.97297297297297, "grad_norm": 1.2421875, "learning_rate": 2.9136902551707406e-05, "loss": 0.1505, "step": 3514 }, { "epoch": 95.0, "grad_norm": 1.4765625, "learning_rate": 2.9125884066342857e-05, "loss": 0.1749, "step": 3515 }, { "epoch": 95.02702702702703, "grad_norm": 1.078125, "learning_rate": 2.9114864757018352e-05, "loss": 0.084, "step": 3516 }, { "epoch": 95.05405405405405, "grad_norm": 0.8828125, "learning_rate": 2.9103844625934496e-05, "loss": 0.1033, "step": 3517 }, { "epoch": 95.08108108108108, "grad_norm": 0.984375, "learning_rate": 2.9092823675292057e-05, "loss": 0.0965, "step": 3518 }, { "epoch": 95.10810810810811, "grad_norm": 1.3828125, "learning_rate": 2.9081801907291995e-05, "loss": 0.1561, "step": 3519 }, { "epoch": 95.13513513513513, "grad_norm": 1.015625, "learning_rate": 2.9070779324135405e-05, "loss": 0.1272, "step": 3520 }, { "epoch": 95.16216216216216, "grad_norm": 1.1640625, "learning_rate": 2.9059755928023547e-05, "loss": 0.1207, "step": 3521 }, { "epoch": 95.1891891891892, "grad_norm": 0.78515625, "learning_rate": 2.904873172115784e-05, "loss": 0.0867, "step": 3522 }, { "epoch": 95.21621621621621, "grad_norm": 1.0390625, "learning_rate": 2.9037706705739898e-05, "loss": 0.1127, "step": 3523 }, { "epoch": 95.24324324324324, "grad_norm": 0.8125, "learning_rate": 2.9026680883971457e-05, "loss": 0.0798, "step": 3524 }, { "epoch": 95.27027027027027, "grad_norm": 1.34375, "learning_rate": 2.901565425805443e-05, "loss": 0.1486, "step": 3525 }, { "epoch": 95.29729729729729, "grad_norm": 0.9375, "learning_rate": 2.90046268301909e-05, "loss": 0.0861, "step": 3526 }, { "epoch": 95.32432432432432, "grad_norm": 0.94921875, "learning_rate": 2.899359860258309e-05, "loss": 0.083, "step": 3527 }, { "epoch": 95.35135135135135, "grad_norm": 1.796875, "learning_rate": 2.89825695774334e-05, "loss": 0.1836, "step": 3528 }, { "epoch": 95.37837837837837, "grad_norm": 0.98828125, "learning_rate": 2.897153975694437e-05, "loss": 0.0947, "step": 3529 }, { "epoch": 95.4054054054054, "grad_norm": 1.0546875, "learning_rate": 2.8960509143318726e-05, "loss": 0.1319, "step": 3530 }, { "epoch": 95.43243243243244, "grad_norm": 1.21875, "learning_rate": 2.8949477738759333e-05, "loss": 0.1613, "step": 3531 }, { "epoch": 95.45945945945945, "grad_norm": 1.390625, "learning_rate": 2.8938445545469205e-05, "loss": 0.155, "step": 3532 }, { "epoch": 95.48648648648648, "grad_norm": 0.91796875, "learning_rate": 2.8927412565651546e-05, "loss": 0.0735, "step": 3533 }, { "epoch": 95.51351351351352, "grad_norm": 1.5, "learning_rate": 2.8916378801509693e-05, "loss": 0.1707, "step": 3534 }, { "epoch": 95.54054054054055, "grad_norm": 1.1640625, "learning_rate": 2.8905344255247134e-05, "loss": 0.1076, "step": 3535 }, { "epoch": 95.56756756756756, "grad_norm": 1.40625, "learning_rate": 2.8894308929067537e-05, "loss": 0.1612, "step": 3536 }, { "epoch": 95.5945945945946, "grad_norm": 1.3828125, "learning_rate": 2.8883272825174705e-05, "loss": 0.1327, "step": 3537 }, { "epoch": 95.62162162162163, "grad_norm": 1.515625, "learning_rate": 2.8872235945772608e-05, "loss": 0.1348, "step": 3538 }, { "epoch": 95.64864864864865, "grad_norm": 1.7421875, "learning_rate": 2.886119829306536e-05, "loss": 0.1436, "step": 3539 }, { "epoch": 95.67567567567568, "grad_norm": 1.2109375, "learning_rate": 2.8850159869257247e-05, "loss": 0.1799, "step": 3540 }, { "epoch": 95.70270270270271, "grad_norm": 0.9296875, "learning_rate": 2.883912067655269e-05, "loss": 0.0724, "step": 3541 }, { "epoch": 95.72972972972973, "grad_norm": 1.1015625, "learning_rate": 2.8828080717156275e-05, "loss": 0.1259, "step": 3542 }, { "epoch": 95.75675675675676, "grad_norm": 1.1640625, "learning_rate": 2.881703999327273e-05, "loss": 0.144, "step": 3543 }, { "epoch": 95.78378378378379, "grad_norm": 1.390625, "learning_rate": 2.8805998507106956e-05, "loss": 0.1598, "step": 3544 }, { "epoch": 95.8108108108108, "grad_norm": 1.4921875, "learning_rate": 2.8794956260863994e-05, "loss": 0.1844, "step": 3545 }, { "epoch": 95.83783783783784, "grad_norm": 0.6796875, "learning_rate": 2.8783913256749018e-05, "loss": 0.0649, "step": 3546 }, { "epoch": 95.86486486486487, "grad_norm": 0.75390625, "learning_rate": 2.87728694969674e-05, "loss": 0.0634, "step": 3547 }, { "epoch": 95.89189189189189, "grad_norm": 1.4453125, "learning_rate": 2.8761824983724606e-05, "loss": 0.1362, "step": 3548 }, { "epoch": 95.91891891891892, "grad_norm": 1.0390625, "learning_rate": 2.87507797192263e-05, "loss": 0.0949, "step": 3549 }, { "epoch": 95.94594594594595, "grad_norm": 1.359375, "learning_rate": 2.8739733705678272e-05, "loss": 0.1714, "step": 3550 }, { "epoch": 95.97297297297297, "grad_norm": 1.34375, "learning_rate": 2.8728686945286472e-05, "loss": 0.1596, "step": 3551 }, { "epoch": 96.0, "grad_norm": 1.8671875, "learning_rate": 2.8717639440256983e-05, "loss": 0.1751, "step": 3552 }, { "epoch": 96.02702702702703, "grad_norm": 1.6015625, "learning_rate": 2.870659119279605e-05, "loss": 0.1544, "step": 3553 }, { "epoch": 96.05405405405405, "grad_norm": 1.2734375, "learning_rate": 2.869554220511007e-05, "loss": 0.1683, "step": 3554 }, { "epoch": 96.08108108108108, "grad_norm": 0.65234375, "learning_rate": 2.868449247940559e-05, "loss": 0.074, "step": 3555 }, { "epoch": 96.10810810810811, "grad_norm": 1.0546875, "learning_rate": 2.867344201788927e-05, "loss": 0.1016, "step": 3556 }, { "epoch": 96.13513513513513, "grad_norm": 1.5234375, "learning_rate": 2.8662390822767965e-05, "loss": 0.1159, "step": 3557 }, { "epoch": 96.16216216216216, "grad_norm": 1.015625, "learning_rate": 2.8651338896248653e-05, "loss": 0.1111, "step": 3558 }, { "epoch": 96.1891891891892, "grad_norm": 1.0625, "learning_rate": 2.864028624053845e-05, "loss": 0.1131, "step": 3559 }, { "epoch": 96.21621621621621, "grad_norm": 0.58984375, "learning_rate": 2.8629232857844636e-05, "loss": 0.0707, "step": 3560 }, { "epoch": 96.24324324324324, "grad_norm": 1.0859375, "learning_rate": 2.861817875037462e-05, "loss": 0.109, "step": 3561 }, { "epoch": 96.27027027027027, "grad_norm": 0.77734375, "learning_rate": 2.860712392033597e-05, "loss": 0.078, "step": 3562 }, { "epoch": 96.29729729729729, "grad_norm": 1.21875, "learning_rate": 2.8596068369936386e-05, "loss": 0.1036, "step": 3563 }, { "epoch": 96.32432432432432, "grad_norm": 1.4375, "learning_rate": 2.8585012101383723e-05, "loss": 0.1805, "step": 3564 }, { "epoch": 96.35135135135135, "grad_norm": 0.640625, "learning_rate": 2.8573955116885974e-05, "loss": 0.0776, "step": 3565 }, { "epoch": 96.37837837837837, "grad_norm": 1.1484375, "learning_rate": 2.8562897418651275e-05, "loss": 0.1537, "step": 3566 }, { "epoch": 96.4054054054054, "grad_norm": 1.2578125, "learning_rate": 2.85518390088879e-05, "loss": 0.1131, "step": 3567 }, { "epoch": 96.43243243243244, "grad_norm": 1.359375, "learning_rate": 2.854077988980427e-05, "loss": 0.2039, "step": 3568 }, { "epoch": 96.45945945945945, "grad_norm": 0.80859375, "learning_rate": 2.852972006360895e-05, "loss": 0.0743, "step": 3569 }, { "epoch": 96.48648648648648, "grad_norm": 0.8984375, "learning_rate": 2.8518659532510633e-05, "loss": 0.0967, "step": 3570 }, { "epoch": 96.51351351351352, "grad_norm": 1.6953125, "learning_rate": 2.850759829871818e-05, "loss": 0.193, "step": 3571 }, { "epoch": 96.54054054054055, "grad_norm": 1.4765625, "learning_rate": 2.8496536364440563e-05, "loss": 0.1976, "step": 3572 }, { "epoch": 96.56756756756756, "grad_norm": 1.0546875, "learning_rate": 2.8485473731886912e-05, "loss": 0.1444, "step": 3573 }, { "epoch": 96.5945945945946, "grad_norm": 1.171875, "learning_rate": 2.8474410403266488e-05, "loss": 0.1235, "step": 3574 }, { "epoch": 96.62162162162163, "grad_norm": 0.82421875, "learning_rate": 2.84633463807887e-05, "loss": 0.0894, "step": 3575 }, { "epoch": 96.64864864864865, "grad_norm": 1.1484375, "learning_rate": 2.845228166666308e-05, "loss": 0.1275, "step": 3576 }, { "epoch": 96.67567567567568, "grad_norm": 1.3125, "learning_rate": 2.8441216263099315e-05, "loss": 0.1086, "step": 3577 }, { "epoch": 96.70270270270271, "grad_norm": 1.375, "learning_rate": 2.8430150172307217e-05, "loss": 0.1578, "step": 3578 }, { "epoch": 96.72972972972973, "grad_norm": 0.76953125, "learning_rate": 2.841908339649675e-05, "loss": 0.0777, "step": 3579 }, { "epoch": 96.75675675675676, "grad_norm": 1.1328125, "learning_rate": 2.840801593787799e-05, "loss": 0.2016, "step": 3580 }, { "epoch": 96.78378378378379, "grad_norm": 0.79296875, "learning_rate": 2.8396947798661167e-05, "loss": 0.0708, "step": 3581 }, { "epoch": 96.8108108108108, "grad_norm": 1.21875, "learning_rate": 2.8385878981056662e-05, "loss": 0.1416, "step": 3582 }, { "epoch": 96.83783783783784, "grad_norm": 1.140625, "learning_rate": 2.8374809487274957e-05, "loss": 0.1331, "step": 3583 }, { "epoch": 96.86486486486487, "grad_norm": 1.1953125, "learning_rate": 2.836373931952669e-05, "loss": 0.1242, "step": 3584 }, { "epoch": 96.89189189189189, "grad_norm": 1.125, "learning_rate": 2.835266848002263e-05, "loss": 0.1116, "step": 3585 }, { "epoch": 96.91891891891892, "grad_norm": 0.89453125, "learning_rate": 2.8341596970973683e-05, "loss": 0.0868, "step": 3586 }, { "epoch": 96.94594594594595, "grad_norm": 0.98828125, "learning_rate": 2.8330524794590886e-05, "loss": 0.1151, "step": 3587 }, { "epoch": 96.97297297297297, "grad_norm": 1.40625, "learning_rate": 2.83194519530854e-05, "loss": 0.1397, "step": 3588 }, { "epoch": 97.0, "grad_norm": 0.8984375, "learning_rate": 2.830837844866854e-05, "loss": 0.0797, "step": 3589 }, { "epoch": 97.02702702702703, "grad_norm": 1.203125, "learning_rate": 2.8297304283551728e-05, "loss": 0.1522, "step": 3590 }, { "epoch": 97.05405405405405, "grad_norm": 1.6953125, "learning_rate": 2.8286229459946534e-05, "loss": 0.2278, "step": 3591 }, { "epoch": 97.08108108108108, "grad_norm": 1.3125, "learning_rate": 2.8275153980064667e-05, "loss": 0.1134, "step": 3592 }, { "epoch": 97.10810810810811, "grad_norm": 1.0390625, "learning_rate": 2.8264077846117948e-05, "loss": 0.1513, "step": 3593 }, { "epoch": 97.13513513513513, "grad_norm": 1.0234375, "learning_rate": 2.825300106031833e-05, "loss": 0.1141, "step": 3594 }, { "epoch": 97.16216216216216, "grad_norm": 1.1484375, "learning_rate": 2.824192362487792e-05, "loss": 0.1004, "step": 3595 }, { "epoch": 97.1891891891892, "grad_norm": 1.84375, "learning_rate": 2.8230845542008926e-05, "loss": 0.2101, "step": 3596 }, { "epoch": 97.21621621621621, "grad_norm": 0.5859375, "learning_rate": 2.8219766813923704e-05, "loss": 0.0633, "step": 3597 }, { "epoch": 97.24324324324324, "grad_norm": 1.3046875, "learning_rate": 2.8208687442834718e-05, "loss": 0.1033, "step": 3598 }, { "epoch": 97.27027027027027, "grad_norm": 0.5625, "learning_rate": 2.8197607430954597e-05, "loss": 0.0708, "step": 3599 }, { "epoch": 97.29729729729729, "grad_norm": 1.078125, "learning_rate": 2.818652678049606e-05, "loss": 0.1292, "step": 3600 }, { "epoch": 97.32432432432432, "grad_norm": 0.81640625, "learning_rate": 2.8175445493671972e-05, "loss": 0.0769, "step": 3601 }, { "epoch": 97.35135135135135, "grad_norm": 0.8984375, "learning_rate": 2.816436357269532e-05, "loss": 0.07, "step": 3602 }, { "epoch": 97.37837837837837, "grad_norm": 0.89453125, "learning_rate": 2.8153281019779227e-05, "loss": 0.0958, "step": 3603 }, { "epoch": 97.4054054054054, "grad_norm": 1.1484375, "learning_rate": 2.8142197837136934e-05, "loss": 0.1834, "step": 3604 }, { "epoch": 97.43243243243244, "grad_norm": 1.1171875, "learning_rate": 2.81311140269818e-05, "loss": 0.1028, "step": 3605 }, { "epoch": 97.45945945945945, "grad_norm": 1.0390625, "learning_rate": 2.812002959152733e-05, "loss": 0.1134, "step": 3606 }, { "epoch": 97.48648648648648, "grad_norm": 1.2109375, "learning_rate": 2.8108944532987125e-05, "loss": 0.1256, "step": 3607 }, { "epoch": 97.51351351351352, "grad_norm": 1.3828125, "learning_rate": 2.8097858853574948e-05, "loss": 0.1562, "step": 3608 }, { "epoch": 97.54054054054055, "grad_norm": 1.1875, "learning_rate": 2.8086772555504654e-05, "loss": 0.1007, "step": 3609 }, { "epoch": 97.56756756756756, "grad_norm": 1.4375, "learning_rate": 2.8075685640990233e-05, "loss": 0.1906, "step": 3610 }, { "epoch": 97.5945945945946, "grad_norm": 0.9765625, "learning_rate": 2.8064598112245798e-05, "loss": 0.1088, "step": 3611 }, { "epoch": 97.62162162162163, "grad_norm": 0.8125, "learning_rate": 2.805350997148558e-05, "loss": 0.0753, "step": 3612 }, { "epoch": 97.64864864864865, "grad_norm": 1.6015625, "learning_rate": 2.8042421220923955e-05, "loss": 0.1278, "step": 3613 }, { "epoch": 97.67567567567568, "grad_norm": 1.5, "learning_rate": 2.8031331862775385e-05, "loss": 0.143, "step": 3614 }, { "epoch": 97.70270270270271, "grad_norm": 0.86328125, "learning_rate": 2.8020241899254472e-05, "loss": 0.1105, "step": 3615 }, { "epoch": 97.72972972972973, "grad_norm": 1.125, "learning_rate": 2.800915133257595e-05, "loss": 0.1435, "step": 3616 }, { "epoch": 97.75675675675676, "grad_norm": 1.2890625, "learning_rate": 2.7998060164954655e-05, "loss": 0.1216, "step": 3617 }, { "epoch": 97.78378378378379, "grad_norm": 1.453125, "learning_rate": 2.7986968398605544e-05, "loss": 0.1305, "step": 3618 }, { "epoch": 97.8108108108108, "grad_norm": 1.2109375, "learning_rate": 2.7975876035743698e-05, "loss": 0.1456, "step": 3619 }, { "epoch": 97.83783783783784, "grad_norm": 1.1484375, "learning_rate": 2.7964783078584333e-05, "loss": 0.1493, "step": 3620 }, { "epoch": 97.86486486486487, "grad_norm": 1.2734375, "learning_rate": 2.7953689529342754e-05, "loss": 0.1453, "step": 3621 }, { "epoch": 97.89189189189189, "grad_norm": 1.375, "learning_rate": 2.7942595390234407e-05, "loss": 0.1959, "step": 3622 }, { "epoch": 97.91891891891892, "grad_norm": 1.625, "learning_rate": 2.793150066347484e-05, "loss": 0.1755, "step": 3623 }, { "epoch": 97.94594594594595, "grad_norm": 0.93359375, "learning_rate": 2.7920405351279737e-05, "loss": 0.1009, "step": 3624 }, { "epoch": 97.97297297297297, "grad_norm": 0.8828125, "learning_rate": 2.790930945586488e-05, "loss": 0.0981, "step": 3625 }, { "epoch": 98.0, "grad_norm": 1.1328125, "learning_rate": 2.7898212979446182e-05, "loss": 0.129, "step": 3626 }, { "epoch": 98.02702702702703, "grad_norm": 0.7578125, "learning_rate": 2.788711592423966e-05, "loss": 0.0861, "step": 3627 }, { "epoch": 98.05405405405405, "grad_norm": 1.125, "learning_rate": 2.7876018292461447e-05, "loss": 0.1511, "step": 3628 }, { "epoch": 98.08108108108108, "grad_norm": 0.84765625, "learning_rate": 2.7864920086327805e-05, "loss": 0.0876, "step": 3629 }, { "epoch": 98.10810810810811, "grad_norm": 0.98828125, "learning_rate": 2.7853821308055107e-05, "loss": 0.0871, "step": 3630 }, { "epoch": 98.13513513513513, "grad_norm": 0.953125, "learning_rate": 2.7842721959859817e-05, "loss": 0.1368, "step": 3631 }, { "epoch": 98.16216216216216, "grad_norm": 0.59375, "learning_rate": 2.783162204395855e-05, "loss": 0.0662, "step": 3632 }, { "epoch": 98.1891891891892, "grad_norm": 0.6953125, "learning_rate": 2.7820521562568004e-05, "loss": 0.0712, "step": 3633 }, { "epoch": 98.21621621621621, "grad_norm": 0.80078125, "learning_rate": 2.780942051790501e-05, "loss": 0.0918, "step": 3634 }, { "epoch": 98.24324324324324, "grad_norm": 0.734375, "learning_rate": 2.7798318912186495e-05, "loss": 0.0819, "step": 3635 }, { "epoch": 98.27027027027027, "grad_norm": 1.046875, "learning_rate": 2.778721674762951e-05, "loss": 0.1, "step": 3636 }, { "epoch": 98.29729729729729, "grad_norm": 1.0546875, "learning_rate": 2.777611402645121e-05, "loss": 0.1214, "step": 3637 }, { "epoch": 98.32432432432432, "grad_norm": 0.875, "learning_rate": 2.776501075086887e-05, "loss": 0.0753, "step": 3638 }, { "epoch": 98.35135135135135, "grad_norm": 1.4375, "learning_rate": 2.7753906923099865e-05, "loss": 0.0894, "step": 3639 }, { "epoch": 98.37837837837837, "grad_norm": 1.5703125, "learning_rate": 2.774280254536169e-05, "loss": 0.172, "step": 3640 }, { "epoch": 98.4054054054054, "grad_norm": 1.125, "learning_rate": 2.7731697619871944e-05, "loss": 0.1334, "step": 3641 }, { "epoch": 98.43243243243244, "grad_norm": 1.5078125, "learning_rate": 2.772059214884834e-05, "loss": 0.1292, "step": 3642 }, { "epoch": 98.45945945945945, "grad_norm": 1.28125, "learning_rate": 2.7709486134508684e-05, "loss": 0.1525, "step": 3643 }, { "epoch": 98.48648648648648, "grad_norm": 1.0546875, "learning_rate": 2.769837957907092e-05, "loss": 0.128, "step": 3644 }, { "epoch": 98.51351351351352, "grad_norm": 1.3125, "learning_rate": 2.7687272484753078e-05, "loss": 0.1534, "step": 3645 }, { "epoch": 98.54054054054055, "grad_norm": 1.3828125, "learning_rate": 2.76761648537733e-05, "loss": 0.1154, "step": 3646 }, { "epoch": 98.56756756756756, "grad_norm": 1.46875, "learning_rate": 2.7665056688349832e-05, "loss": 0.1314, "step": 3647 }, { "epoch": 98.5945945945946, "grad_norm": 1.5703125, "learning_rate": 2.7653947990701036e-05, "loss": 0.1693, "step": 3648 }, { "epoch": 98.62162162162163, "grad_norm": 1.5234375, "learning_rate": 2.7642838763045375e-05, "loss": 0.1394, "step": 3649 }, { "epoch": 98.64864864864865, "grad_norm": 0.890625, "learning_rate": 2.763172900760141e-05, "loss": 0.0956, "step": 3650 }, { "epoch": 98.67567567567568, "grad_norm": 1.3203125, "learning_rate": 2.7620618726587834e-05, "loss": 0.131, "step": 3651 }, { "epoch": 98.70270270270271, "grad_norm": 1.4453125, "learning_rate": 2.7609507922223404e-05, "loss": 0.144, "step": 3652 }, { "epoch": 98.72972972972973, "grad_norm": 2.09375, "learning_rate": 2.7598396596727016e-05, "loss": 0.1777, "step": 3653 }, { "epoch": 98.75675675675676, "grad_norm": 1.3515625, "learning_rate": 2.758728475231766e-05, "loss": 0.0869, "step": 3654 }, { "epoch": 98.78378378378379, "grad_norm": 1.015625, "learning_rate": 2.7576172391214427e-05, "loss": 0.1142, "step": 3655 }, { "epoch": 98.8108108108108, "grad_norm": 0.8828125, "learning_rate": 2.7565059515636503e-05, "loss": 0.1172, "step": 3656 }, { "epoch": 98.83783783783784, "grad_norm": 0.98828125, "learning_rate": 2.7553946127803194e-05, "loss": 0.0862, "step": 3657 }, { "epoch": 98.86486486486487, "grad_norm": 0.86328125, "learning_rate": 2.75428322299339e-05, "loss": 0.0686, "step": 3658 }, { "epoch": 98.89189189189189, "grad_norm": 1.3125, "learning_rate": 2.7531717824248123e-05, "loss": 0.1134, "step": 3659 }, { "epoch": 98.91891891891892, "grad_norm": 1.15625, "learning_rate": 2.7520602912965454e-05, "loss": 0.093, "step": 3660 }, { "epoch": 98.94594594594595, "grad_norm": 1.15625, "learning_rate": 2.7509487498305613e-05, "loss": 0.1528, "step": 3661 }, { "epoch": 98.97297297297297, "grad_norm": 0.8515625, "learning_rate": 2.74983715824884e-05, "loss": 0.0968, "step": 3662 }, { "epoch": 99.0, "grad_norm": 1.1875, "learning_rate": 2.7487255167733717e-05, "loss": 0.1328, "step": 3663 }, { "epoch": 99.02702702702703, "grad_norm": 1.4140625, "learning_rate": 2.7476138256261575e-05, "loss": 0.1607, "step": 3664 }, { "epoch": 99.05405405405405, "grad_norm": 1.1484375, "learning_rate": 2.7465020850292063e-05, "loss": 0.0923, "step": 3665 }, { "epoch": 99.08108108108108, "grad_norm": 1.1953125, "learning_rate": 2.7453902952045395e-05, "loss": 0.1185, "step": 3666 }, { "epoch": 99.10810810810811, "grad_norm": 1.1875, "learning_rate": 2.744278456374187e-05, "loss": 0.1204, "step": 3667 }, { "epoch": 99.13513513513513, "grad_norm": 1.0703125, "learning_rate": 2.7431665687601883e-05, "loss": 0.1137, "step": 3668 }, { "epoch": 99.16216216216216, "grad_norm": 1.0546875, "learning_rate": 2.7420546325845937e-05, "loss": 0.0793, "step": 3669 }, { "epoch": 99.1891891891892, "grad_norm": 1.234375, "learning_rate": 2.740942648069461e-05, "loss": 0.1546, "step": 3670 }, { "epoch": 99.21621621621621, "grad_norm": 1.15625, "learning_rate": 2.7398306154368615e-05, "loss": 0.0958, "step": 3671 }, { "epoch": 99.24324324324324, "grad_norm": 0.97265625, "learning_rate": 2.738718534908872e-05, "loss": 0.1039, "step": 3672 }, { "epoch": 99.27027027027027, "grad_norm": 0.490234375, "learning_rate": 2.7376064067075806e-05, "loss": 0.0657, "step": 3673 }, { "epoch": 99.29729729729729, "grad_norm": 0.8984375, "learning_rate": 2.7364942310550855e-05, "loss": 0.1035, "step": 3674 }, { "epoch": 99.32432432432432, "grad_norm": 1.21875, "learning_rate": 2.7353820081734943e-05, "loss": 0.1414, "step": 3675 }, { "epoch": 99.35135135135135, "grad_norm": 1.7109375, "learning_rate": 2.7342697382849226e-05, "loss": 0.1466, "step": 3676 }, { "epoch": 99.37837837837837, "grad_norm": 1.578125, "learning_rate": 2.7331574216114964e-05, "loss": 0.1343, "step": 3677 }, { "epoch": 99.4054054054054, "grad_norm": 1.0859375, "learning_rate": 2.7320450583753515e-05, "loss": 0.089, "step": 3678 }, { "epoch": 99.43243243243244, "grad_norm": 0.95703125, "learning_rate": 2.730932648798632e-05, "loss": 0.1021, "step": 3679 }, { "epoch": 99.45945945945945, "grad_norm": 1.390625, "learning_rate": 2.7298201931034928e-05, "loss": 0.1317, "step": 3680 }, { "epoch": 99.48648648648648, "grad_norm": 1.140625, "learning_rate": 2.728707691512095e-05, "loss": 0.1333, "step": 3681 }, { "epoch": 99.51351351351352, "grad_norm": 2.390625, "learning_rate": 2.7275951442466126e-05, "loss": 0.1513, "step": 3682 }, { "epoch": 99.54054054054055, "grad_norm": 1.7265625, "learning_rate": 2.7264825515292265e-05, "loss": 0.1263, "step": 3683 }, { "epoch": 99.56756756756756, "grad_norm": 2.234375, "learning_rate": 2.7253699135821266e-05, "loss": 0.1992, "step": 3684 }, { "epoch": 99.5945945945946, "grad_norm": 1.171875, "learning_rate": 2.7242572306275122e-05, "loss": 0.1099, "step": 3685 }, { "epoch": 99.62162162162163, "grad_norm": 1.3046875, "learning_rate": 2.7231445028875924e-05, "loss": 0.1363, "step": 3686 }, { "epoch": 99.64864864864865, "grad_norm": 1.3125, "learning_rate": 2.7220317305845844e-05, "loss": 0.1122, "step": 3687 }, { "epoch": 99.67567567567568, "grad_norm": 0.96484375, "learning_rate": 2.7209189139407142e-05, "loss": 0.0809, "step": 3688 }, { "epoch": 99.70270270270271, "grad_norm": 1.5625, "learning_rate": 2.7198060531782172e-05, "loss": 0.1369, "step": 3689 }, { "epoch": 99.72972972972973, "grad_norm": 1.28125, "learning_rate": 2.7186931485193375e-05, "loss": 0.1061, "step": 3690 }, { "epoch": 99.75675675675676, "grad_norm": 1.390625, "learning_rate": 2.7175802001863266e-05, "loss": 0.1272, "step": 3691 }, { "epoch": 99.78378378378379, "grad_norm": 1.34375, "learning_rate": 2.7164672084014475e-05, "loss": 0.1564, "step": 3692 }, { "epoch": 99.8108108108108, "grad_norm": 0.9140625, "learning_rate": 2.7153541733869697e-05, "loss": 0.0949, "step": 3693 }, { "epoch": 99.83783783783784, "grad_norm": 1.6484375, "learning_rate": 2.7142410953651715e-05, "loss": 0.1157, "step": 3694 }, { "epoch": 99.86486486486487, "grad_norm": 1.640625, "learning_rate": 2.7131279745583406e-05, "loss": 0.1475, "step": 3695 }, { "epoch": 99.89189189189189, "grad_norm": 1.4609375, "learning_rate": 2.7120148111887732e-05, "loss": 0.1252, "step": 3696 }, { "epoch": 99.91891891891892, "grad_norm": 1.078125, "learning_rate": 2.7109016054787732e-05, "loss": 0.1011, "step": 3697 }, { "epoch": 99.94594594594595, "grad_norm": 1.09375, "learning_rate": 2.709788357650653e-05, "loss": 0.1186, "step": 3698 }, { "epoch": 99.97297297297297, "grad_norm": 1.03125, "learning_rate": 2.708675067926735e-05, "loss": 0.0794, "step": 3699 }, { "epoch": 100.0, "grad_norm": 1.3359375, "learning_rate": 2.7075617365293487e-05, "loss": 0.1447, "step": 3700 }, { "epoch": 100.02702702702703, "grad_norm": 1.328125, "learning_rate": 2.7064483636808313e-05, "loss": 0.1833, "step": 3701 }, { "epoch": 100.05405405405405, "grad_norm": 0.75390625, "learning_rate": 2.7053349496035292e-05, "loss": 0.0751, "step": 3702 }, { "epoch": 100.08108108108108, "grad_norm": 0.859375, "learning_rate": 2.7042214945197962e-05, "loss": 0.0916, "step": 3703 }, { "epoch": 100.10810810810811, "grad_norm": 0.83984375, "learning_rate": 2.7031079986519963e-05, "loss": 0.086, "step": 3704 }, { "epoch": 100.13513513513513, "grad_norm": 1.1640625, "learning_rate": 2.7019944622224984e-05, "loss": 0.1455, "step": 3705 }, { "epoch": 100.16216216216216, "grad_norm": 1.3125, "learning_rate": 2.700880885453684e-05, "loss": 0.1324, "step": 3706 }, { "epoch": 100.1891891891892, "grad_norm": 1.359375, "learning_rate": 2.6997672685679375e-05, "loss": 0.1387, "step": 3707 }, { "epoch": 100.21621621621621, "grad_norm": 1.1328125, "learning_rate": 2.6986536117876556e-05, "loss": 0.1067, "step": 3708 }, { "epoch": 100.24324324324324, "grad_norm": 1.5, "learning_rate": 2.6975399153352398e-05, "loss": 0.2301, "step": 3709 }, { "epoch": 100.27027027027027, "grad_norm": 1.1015625, "learning_rate": 2.696426179433102e-05, "loss": 0.1432, "step": 3710 }, { "epoch": 100.29729729729729, "grad_norm": 1.7109375, "learning_rate": 2.6953124043036602e-05, "loss": 0.2549, "step": 3711 }, { "epoch": 100.32432432432432, "grad_norm": 1.03125, "learning_rate": 2.6941985901693413e-05, "loss": 0.0904, "step": 3712 }, { "epoch": 100.35135135135135, "grad_norm": 1.546875, "learning_rate": 2.6930847372525797e-05, "loss": 0.2344, "step": 3713 }, { "epoch": 100.37837837837837, "grad_norm": 1.0625, "learning_rate": 2.6919708457758175e-05, "loss": 0.0834, "step": 3714 }, { "epoch": 100.4054054054054, "grad_norm": 1.2578125, "learning_rate": 2.6908569159615038e-05, "loss": 0.1454, "step": 3715 }, { "epoch": 100.43243243243244, "grad_norm": 1.140625, "learning_rate": 2.6897429480320973e-05, "loss": 0.1516, "step": 3716 }, { "epoch": 100.45945945945945, "grad_norm": 0.875, "learning_rate": 2.6886289422100618e-05, "loss": 0.0896, "step": 3717 }, { "epoch": 100.48648648648648, "grad_norm": 1.484375, "learning_rate": 2.687514898717871e-05, "loss": 0.1437, "step": 3718 }, { "epoch": 100.51351351351352, "grad_norm": 0.93359375, "learning_rate": 2.6864008177780036e-05, "loss": 0.0911, "step": 3719 }, { "epoch": 100.54054054054055, "grad_norm": 1.4609375, "learning_rate": 2.6852866996129488e-05, "loss": 0.1443, "step": 3720 }, { "epoch": 100.56756756756756, "grad_norm": 1.5078125, "learning_rate": 2.6841725444452003e-05, "loss": 0.1679, "step": 3721 }, { "epoch": 100.5945945945946, "grad_norm": 1.0546875, "learning_rate": 2.683058352497262e-05, "loss": 0.1382, "step": 3722 }, { "epoch": 100.62162162162163, "grad_norm": 1.0390625, "learning_rate": 2.6819441239916422e-05, "loss": 0.1057, "step": 3723 }, { "epoch": 100.64864864864865, "grad_norm": 0.88671875, "learning_rate": 2.680829859150859e-05, "loss": 0.081, "step": 3724 }, { "epoch": 100.67567567567568, "grad_norm": 1.0859375, "learning_rate": 2.6797155581974358e-05, "loss": 0.1139, "step": 3725 }, { "epoch": 100.70270270270271, "grad_norm": 1.53125, "learning_rate": 2.6786012213539046e-05, "loss": 0.1582, "step": 3726 }, { "epoch": 100.72972972972973, "grad_norm": 0.71484375, "learning_rate": 2.6774868488428047e-05, "loss": 0.0797, "step": 3727 }, { "epoch": 100.75675675675676, "grad_norm": 1.65625, "learning_rate": 2.6763724408866807e-05, "loss": 0.2234, "step": 3728 }, { "epoch": 100.78378378378379, "grad_norm": 1.0234375, "learning_rate": 2.6752579977080865e-05, "loss": 0.1046, "step": 3729 }, { "epoch": 100.8108108108108, "grad_norm": 1.0625, "learning_rate": 2.6741435195295823e-05, "loss": 0.075, "step": 3730 }, { "epoch": 100.83783783783784, "grad_norm": 1.453125, "learning_rate": 2.6730290065737346e-05, "loss": 0.1584, "step": 3731 }, { "epoch": 100.86486486486487, "grad_norm": 0.546875, "learning_rate": 2.6719144590631168e-05, "loss": 0.0598, "step": 3732 }, { "epoch": 100.89189189189189, "grad_norm": 0.984375, "learning_rate": 2.6707998772203096e-05, "loss": 0.1053, "step": 3733 }, { "epoch": 100.91891891891892, "grad_norm": 1.3984375, "learning_rate": 2.6696852612679023e-05, "loss": 0.1326, "step": 3734 }, { "epoch": 100.94594594594595, "grad_norm": 1.7421875, "learning_rate": 2.6685706114284874e-05, "loss": 0.1912, "step": 3735 }, { "epoch": 100.97297297297297, "grad_norm": 1.9375, "learning_rate": 2.667455927924667e-05, "loss": 0.227, "step": 3736 }, { "epoch": 101.0, "grad_norm": 1.125, "learning_rate": 2.6663412109790492e-05, "loss": 0.1223, "step": 3737 }, { "epoch": 101.02702702702703, "grad_norm": 0.98046875, "learning_rate": 2.6652264608142484e-05, "loss": 0.1302, "step": 3738 }, { "epoch": 101.05405405405405, "grad_norm": 0.67578125, "learning_rate": 2.6641116776528864e-05, "loss": 0.0679, "step": 3739 }, { "epoch": 101.08108108108108, "grad_norm": 1.515625, "learning_rate": 2.6629968617175893e-05, "loss": 0.1866, "step": 3740 }, { "epoch": 101.10810810810811, "grad_norm": 1.5546875, "learning_rate": 2.661882013230994e-05, "loss": 0.1941, "step": 3741 }, { "epoch": 101.13513513513513, "grad_norm": 1.1484375, "learning_rate": 2.6607671324157395e-05, "loss": 0.1072, "step": 3742 }, { "epoch": 101.16216216216216, "grad_norm": 1.3046875, "learning_rate": 2.659652219494474e-05, "loss": 0.1455, "step": 3743 }, { "epoch": 101.1891891891892, "grad_norm": 0.97265625, "learning_rate": 2.6585372746898506e-05, "loss": 0.1407, "step": 3744 }, { "epoch": 101.21621621621621, "grad_norm": 1.625, "learning_rate": 2.6574222982245305e-05, "loss": 0.1421, "step": 3745 }, { "epoch": 101.24324324324324, "grad_norm": 1.234375, "learning_rate": 2.6563072903211787e-05, "loss": 0.0843, "step": 3746 }, { "epoch": 101.27027027027027, "grad_norm": 0.87109375, "learning_rate": 2.655192251202469e-05, "loss": 0.0672, "step": 3747 }, { "epoch": 101.29729729729729, "grad_norm": 1.125, "learning_rate": 2.6540771810910803e-05, "loss": 0.1166, "step": 3748 }, { "epoch": 101.32432432432432, "grad_norm": 1.109375, "learning_rate": 2.6529620802096978e-05, "loss": 0.1425, "step": 3749 }, { "epoch": 101.35135135135135, "grad_norm": 1.1328125, "learning_rate": 2.6518469487810115e-05, "loss": 0.1226, "step": 3750 }, { "epoch": 101.37837837837837, "grad_norm": 1.453125, "learning_rate": 2.6507317870277204e-05, "loss": 0.0953, "step": 3751 }, { "epoch": 101.4054054054054, "grad_norm": 1.0625, "learning_rate": 2.6496165951725277e-05, "loss": 0.0938, "step": 3752 }, { "epoch": 101.43243243243244, "grad_norm": 1.1796875, "learning_rate": 2.648501373438142e-05, "loss": 0.0956, "step": 3753 }, { "epoch": 101.45945945945945, "grad_norm": 1.2890625, "learning_rate": 2.6473861220472794e-05, "loss": 0.1328, "step": 3754 }, { "epoch": 101.48648648648648, "grad_norm": 0.765625, "learning_rate": 2.6462708412226617e-05, "loss": 0.0915, "step": 3755 }, { "epoch": 101.51351351351352, "grad_norm": 1.0078125, "learning_rate": 2.645155531187015e-05, "loss": 0.0956, "step": 3756 }, { "epoch": 101.54054054054055, "grad_norm": 1.4921875, "learning_rate": 2.644040192163073e-05, "loss": 0.136, "step": 3757 }, { "epoch": 101.56756756756756, "grad_norm": 1.15625, "learning_rate": 2.642924824373575e-05, "loss": 0.1187, "step": 3758 }, { "epoch": 101.5945945945946, "grad_norm": 1.3203125, "learning_rate": 2.6418094280412646e-05, "loss": 0.0943, "step": 3759 }, { "epoch": 101.62162162162163, "grad_norm": 0.494140625, "learning_rate": 2.6406940033888933e-05, "loss": 0.0655, "step": 3760 }, { "epoch": 101.64864864864865, "grad_norm": 0.984375, "learning_rate": 2.639578550639216e-05, "loss": 0.1012, "step": 3761 }, { "epoch": 101.67567567567568, "grad_norm": 0.6328125, "learning_rate": 2.6384630700149947e-05, "loss": 0.0607, "step": 3762 }, { "epoch": 101.70270270270271, "grad_norm": 1.3515625, "learning_rate": 2.6373475617389965e-05, "loss": 0.1099, "step": 3763 }, { "epoch": 101.72972972972973, "grad_norm": 0.6328125, "learning_rate": 2.6362320260339935e-05, "loss": 0.0695, "step": 3764 }, { "epoch": 101.75675675675676, "grad_norm": 1.078125, "learning_rate": 2.635116463122765e-05, "loss": 0.1361, "step": 3765 }, { "epoch": 101.78378378378379, "grad_norm": 0.5546875, "learning_rate": 2.6340008732280936e-05, "loss": 0.073, "step": 3766 }, { "epoch": 101.8108108108108, "grad_norm": 0.890625, "learning_rate": 2.632885256572768e-05, "loss": 0.0899, "step": 3767 }, { "epoch": 101.83783783783784, "grad_norm": 1.1484375, "learning_rate": 2.631769613379584e-05, "loss": 0.1609, "step": 3768 }, { "epoch": 101.86486486486487, "grad_norm": 1.203125, "learning_rate": 2.63065394387134e-05, "loss": 0.1339, "step": 3769 }, { "epoch": 101.89189189189189, "grad_norm": 0.62890625, "learning_rate": 2.6295382482708413e-05, "loss": 0.0711, "step": 3770 }, { "epoch": 101.91891891891892, "grad_norm": 0.63671875, "learning_rate": 2.6284225268008973e-05, "loss": 0.0827, "step": 3771 }, { "epoch": 101.94594594594595, "grad_norm": 0.66015625, "learning_rate": 2.6273067796843244e-05, "loss": 0.0777, "step": 3772 }, { "epoch": 101.97297297297297, "grad_norm": 1.28125, "learning_rate": 2.6261910071439417e-05, "loss": 0.1735, "step": 3773 }, { "epoch": 102.0, "grad_norm": 0.97265625, "learning_rate": 2.6250752094025754e-05, "loss": 0.0999, "step": 3774 }, { "epoch": 102.02702702702703, "grad_norm": 1.1171875, "learning_rate": 2.623959386683056e-05, "loss": 0.1036, "step": 3775 }, { "epoch": 102.05405405405405, "grad_norm": 1.0625, "learning_rate": 2.6228435392082183e-05, "loss": 0.1188, "step": 3776 }, { "epoch": 102.08108108108108, "grad_norm": 1.1875, "learning_rate": 2.621727667200904e-05, "loss": 0.1258, "step": 3777 }, { "epoch": 102.10810810810811, "grad_norm": 0.58984375, "learning_rate": 2.6206117708839565e-05, "loss": 0.0655, "step": 3778 }, { "epoch": 102.13513513513513, "grad_norm": 0.875, "learning_rate": 2.6194958504802276e-05, "loss": 0.1284, "step": 3779 }, { "epoch": 102.16216216216216, "grad_norm": 0.490234375, "learning_rate": 2.618379906212572e-05, "loss": 0.0601, "step": 3780 }, { "epoch": 102.1891891891892, "grad_norm": 1.3515625, "learning_rate": 2.6172639383038488e-05, "loss": 0.1938, "step": 3781 }, { "epoch": 102.21621621621621, "grad_norm": 1.2734375, "learning_rate": 2.6161479469769222e-05, "loss": 0.1284, "step": 3782 }, { "epoch": 102.24324324324324, "grad_norm": 0.6796875, "learning_rate": 2.6150319324546628e-05, "loss": 0.072, "step": 3783 }, { "epoch": 102.27027027027027, "grad_norm": 1.1484375, "learning_rate": 2.613915894959943e-05, "loss": 0.1458, "step": 3784 }, { "epoch": 102.29729729729729, "grad_norm": 0.796875, "learning_rate": 2.612799834715641e-05, "loss": 0.0832, "step": 3785 }, { "epoch": 102.32432432432432, "grad_norm": 0.6640625, "learning_rate": 2.611683751944641e-05, "loss": 0.0743, "step": 3786 }, { "epoch": 102.35135135135135, "grad_norm": 1.1953125, "learning_rate": 2.6105676468698293e-05, "loss": 0.1455, "step": 3787 }, { "epoch": 102.37837837837837, "grad_norm": 1.0859375, "learning_rate": 2.6094515197140972e-05, "loss": 0.1176, "step": 3788 }, { "epoch": 102.4054054054054, "grad_norm": 1.3046875, "learning_rate": 2.6083353707003432e-05, "loss": 0.2193, "step": 3789 }, { "epoch": 102.43243243243244, "grad_norm": 1.2265625, "learning_rate": 2.6072192000514663e-05, "loss": 0.1611, "step": 3790 }, { "epoch": 102.45945945945945, "grad_norm": 0.828125, "learning_rate": 2.606103007990371e-05, "loss": 0.0961, "step": 3791 }, { "epoch": 102.48648648648648, "grad_norm": 1.5703125, "learning_rate": 2.6049867947399676e-05, "loss": 0.2098, "step": 3792 }, { "epoch": 102.51351351351352, "grad_norm": 0.92578125, "learning_rate": 2.603870560523169e-05, "loss": 0.1149, "step": 3793 }, { "epoch": 102.54054054054055, "grad_norm": 0.90234375, "learning_rate": 2.6027543055628935e-05, "loss": 0.0928, "step": 3794 }, { "epoch": 102.56756756756756, "grad_norm": 0.91796875, "learning_rate": 2.6016380300820613e-05, "loss": 0.0876, "step": 3795 }, { "epoch": 102.5945945945946, "grad_norm": 1.328125, "learning_rate": 2.6005217343035997e-05, "loss": 0.2211, "step": 3796 }, { "epoch": 102.62162162162163, "grad_norm": 0.77734375, "learning_rate": 2.599405418450439e-05, "loss": 0.0816, "step": 3797 }, { "epoch": 102.64864864864865, "grad_norm": 1.015625, "learning_rate": 2.598289082745512e-05, "loss": 0.1375, "step": 3798 }, { "epoch": 102.67567567567568, "grad_norm": 1.4140625, "learning_rate": 2.597172727411757e-05, "loss": 0.1858, "step": 3799 }, { "epoch": 102.70270270270271, "grad_norm": 1.515625, "learning_rate": 2.5960563526721156e-05, "loss": 0.1114, "step": 3800 }, { "epoch": 102.72972972972973, "grad_norm": 1.1640625, "learning_rate": 2.5949399587495333e-05, "loss": 0.1427, "step": 3801 }, { "epoch": 102.75675675675676, "grad_norm": 1.4453125, "learning_rate": 2.5938235458669602e-05, "loss": 0.1831, "step": 3802 }, { "epoch": 102.78378378378379, "grad_norm": 1.0, "learning_rate": 2.5927071142473498e-05, "loss": 0.1335, "step": 3803 }, { "epoch": 102.8108108108108, "grad_norm": 0.9140625, "learning_rate": 2.5915906641136585e-05, "loss": 0.0977, "step": 3804 }, { "epoch": 102.83783783783784, "grad_norm": 0.8359375, "learning_rate": 2.5904741956888467e-05, "loss": 0.0835, "step": 3805 }, { "epoch": 102.86486486486487, "grad_norm": 1.3828125, "learning_rate": 2.5893577091958805e-05, "loss": 0.1352, "step": 3806 }, { "epoch": 102.89189189189189, "grad_norm": 0.86328125, "learning_rate": 2.5882412048577266e-05, "loss": 0.1291, "step": 3807 }, { "epoch": 102.91891891891892, "grad_norm": 0.8515625, "learning_rate": 2.587124682897356e-05, "loss": 0.0969, "step": 3808 }, { "epoch": 102.94594594594595, "grad_norm": 0.984375, "learning_rate": 2.5860081435377447e-05, "loss": 0.0997, "step": 3809 }, { "epoch": 102.97297297297297, "grad_norm": 0.82421875, "learning_rate": 2.5848915870018718e-05, "loss": 0.0778, "step": 3810 }, { "epoch": 103.0, "grad_norm": 1.046875, "learning_rate": 2.583775013512719e-05, "loss": 0.1376, "step": 3811 }, { "epoch": 103.02702702702703, "grad_norm": 1.7265625, "learning_rate": 2.5826584232932706e-05, "loss": 0.264, "step": 3812 }, { "epoch": 103.05405405405405, "grad_norm": 1.0546875, "learning_rate": 2.5815418165665163e-05, "loss": 0.0881, "step": 3813 }, { "epoch": 103.08108108108108, "grad_norm": 0.80078125, "learning_rate": 2.580425193555448e-05, "loss": 0.0893, "step": 3814 }, { "epoch": 103.10810810810811, "grad_norm": 1.1796875, "learning_rate": 2.5793085544830614e-05, "loss": 0.1557, "step": 3815 }, { "epoch": 103.13513513513513, "grad_norm": 1.09375, "learning_rate": 2.578191899572353e-05, "loss": 0.1256, "step": 3816 }, { "epoch": 103.16216216216216, "grad_norm": 0.86328125, "learning_rate": 2.5770752290463274e-05, "loss": 0.0804, "step": 3817 }, { "epoch": 103.1891891891892, "grad_norm": 1.0625, "learning_rate": 2.5759585431279875e-05, "loss": 0.1013, "step": 3818 }, { "epoch": 103.21621621621621, "grad_norm": 1.3046875, "learning_rate": 2.5748418420403416e-05, "loss": 0.1753, "step": 3819 }, { "epoch": 103.24324324324324, "grad_norm": 1.578125, "learning_rate": 2.5737251260063998e-05, "loss": 0.1911, "step": 3820 }, { "epoch": 103.27027027027027, "grad_norm": 0.90234375, "learning_rate": 2.5726083952491776e-05, "loss": 0.0893, "step": 3821 }, { "epoch": 103.29729729729729, "grad_norm": 1.1640625, "learning_rate": 2.57149164999169e-05, "loss": 0.1258, "step": 3822 }, { "epoch": 103.32432432432432, "grad_norm": 0.98046875, "learning_rate": 2.5703748904569575e-05, "loss": 0.101, "step": 3823 }, { "epoch": 103.35135135135135, "grad_norm": 1.2265625, "learning_rate": 2.5692581168680028e-05, "loss": 0.1336, "step": 3824 }, { "epoch": 103.37837837837837, "grad_norm": 0.84375, "learning_rate": 2.568141329447852e-05, "loss": 0.0933, "step": 3825 }, { "epoch": 103.4054054054054, "grad_norm": 0.7109375, "learning_rate": 2.567024528419531e-05, "loss": 0.0679, "step": 3826 }, { "epoch": 103.43243243243244, "grad_norm": 1.1328125, "learning_rate": 2.5659077140060727e-05, "loss": 0.1404, "step": 3827 }, { "epoch": 103.45945945945945, "grad_norm": 1.3515625, "learning_rate": 2.56479088643051e-05, "loss": 0.1635, "step": 3828 }, { "epoch": 103.48648648648648, "grad_norm": 1.2890625, "learning_rate": 2.5636740459158777e-05, "loss": 0.1431, "step": 3829 }, { "epoch": 103.51351351351352, "grad_norm": 0.65234375, "learning_rate": 2.5625571926852167e-05, "loss": 0.0705, "step": 3830 }, { "epoch": 103.54054054054055, "grad_norm": 1.546875, "learning_rate": 2.561440326961567e-05, "loss": 0.1843, "step": 3831 }, { "epoch": 103.56756756756756, "grad_norm": 1.2734375, "learning_rate": 2.5603234489679727e-05, "loss": 0.1272, "step": 3832 }, { "epoch": 103.5945945945946, "grad_norm": 0.8828125, "learning_rate": 2.5592065589274793e-05, "loss": 0.0928, "step": 3833 }, { "epoch": 103.62162162162163, "grad_norm": 0.703125, "learning_rate": 2.558089657063137e-05, "loss": 0.0735, "step": 3834 }, { "epoch": 103.64864864864865, "grad_norm": 1.2890625, "learning_rate": 2.556972743597995e-05, "loss": 0.1905, "step": 3835 }, { "epoch": 103.67567567567568, "grad_norm": 0.97265625, "learning_rate": 2.555855818755108e-05, "loss": 0.146, "step": 3836 }, { "epoch": 103.70270270270271, "grad_norm": 0.95703125, "learning_rate": 2.5547388827575302e-05, "loss": 0.1157, "step": 3837 }, { "epoch": 103.72972972972973, "grad_norm": 1.375, "learning_rate": 2.5536219358283197e-05, "loss": 0.1653, "step": 3838 }, { "epoch": 103.75675675675676, "grad_norm": 1.1328125, "learning_rate": 2.5525049781905374e-05, "loss": 0.1395, "step": 3839 }, { "epoch": 103.78378378378379, "grad_norm": 1.28125, "learning_rate": 2.551388010067245e-05, "loss": 0.1196, "step": 3840 }, { "epoch": 103.8108108108108, "grad_norm": 1.5390625, "learning_rate": 2.5502710316815065e-05, "loss": 0.1965, "step": 3841 }, { "epoch": 103.83783783783784, "grad_norm": 1.1484375, "learning_rate": 2.549154043256388e-05, "loss": 0.1332, "step": 3842 }, { "epoch": 103.86486486486487, "grad_norm": 0.91796875, "learning_rate": 2.5480370450149577e-05, "loss": 0.0894, "step": 3843 }, { "epoch": 103.89189189189189, "grad_norm": 1.0625, "learning_rate": 2.546920037180286e-05, "loss": 0.1021, "step": 3844 }, { "epoch": 103.91891891891892, "grad_norm": 1.296875, "learning_rate": 2.545803019975445e-05, "loss": 0.1123, "step": 3845 }, { "epoch": 103.94594594594595, "grad_norm": 1.078125, "learning_rate": 2.5446859936235095e-05, "loss": 0.1257, "step": 3846 }, { "epoch": 103.97297297297297, "grad_norm": 0.79296875, "learning_rate": 2.5435689583475536e-05, "loss": 0.0947, "step": 3847 }, { "epoch": 104.0, "grad_norm": 0.8671875, "learning_rate": 2.542451914370656e-05, "loss": 0.0834, "step": 3848 }, { "epoch": 104.02702702702703, "grad_norm": 1.1328125, "learning_rate": 2.5413348619158967e-05, "loss": 0.1013, "step": 3849 }, { "epoch": 104.05405405405405, "grad_norm": 0.8984375, "learning_rate": 2.540217801206355e-05, "loss": 0.0879, "step": 3850 }, { "epoch": 104.08108108108108, "grad_norm": 1.0078125, "learning_rate": 2.539100732465115e-05, "loss": 0.1403, "step": 3851 }, { "epoch": 104.10810810810811, "grad_norm": 1.0390625, "learning_rate": 2.5379836559152607e-05, "loss": 0.1003, "step": 3852 }, { "epoch": 104.13513513513513, "grad_norm": 0.97265625, "learning_rate": 2.536866571779878e-05, "loss": 0.0921, "step": 3853 }, { "epoch": 104.16216216216216, "grad_norm": 0.96484375, "learning_rate": 2.535749480282053e-05, "loss": 0.1369, "step": 3854 }, { "epoch": 104.1891891891892, "grad_norm": 1.8046875, "learning_rate": 2.5346323816448764e-05, "loss": 0.1941, "step": 3855 }, { "epoch": 104.21621621621621, "grad_norm": 1.1796875, "learning_rate": 2.5335152760914372e-05, "loss": 0.1564, "step": 3856 }, { "epoch": 104.24324324324324, "grad_norm": 1.1640625, "learning_rate": 2.532398163844828e-05, "loss": 0.1589, "step": 3857 }, { "epoch": 104.27027027027027, "grad_norm": 0.68359375, "learning_rate": 2.531281045128141e-05, "loss": 0.0713, "step": 3858 }, { "epoch": 104.29729729729729, "grad_norm": 1.1953125, "learning_rate": 2.5301639201644713e-05, "loss": 0.1676, "step": 3859 }, { "epoch": 104.32432432432432, "grad_norm": 1.1328125, "learning_rate": 2.5290467891769132e-05, "loss": 0.1198, "step": 3860 }, { "epoch": 104.35135135135135, "grad_norm": 1.1875, "learning_rate": 2.527929652388564e-05, "loss": 0.1206, "step": 3861 }, { "epoch": 104.37837837837837, "grad_norm": 1.390625, "learning_rate": 2.5268125100225215e-05, "loss": 0.1821, "step": 3862 }, { "epoch": 104.4054054054054, "grad_norm": 1.0546875, "learning_rate": 2.5256953623018852e-05, "loss": 0.1189, "step": 3863 }, { "epoch": 104.43243243243244, "grad_norm": 0.921875, "learning_rate": 2.5245782094497546e-05, "loss": 0.0922, "step": 3864 }, { "epoch": 104.45945945945945, "grad_norm": 1.171875, "learning_rate": 2.523461051689231e-05, "loss": 0.1529, "step": 3865 }, { "epoch": 104.48648648648648, "grad_norm": 1.078125, "learning_rate": 2.522343889243417e-05, "loss": 0.09, "step": 3866 }, { "epoch": 104.51351351351352, "grad_norm": 0.81640625, "learning_rate": 2.521226722335414e-05, "loss": 0.095, "step": 3867 }, { "epoch": 104.54054054054055, "grad_norm": 0.89453125, "learning_rate": 2.520109551188328e-05, "loss": 0.1231, "step": 3868 }, { "epoch": 104.56756756756756, "grad_norm": 1.7734375, "learning_rate": 2.518992376025262e-05, "loss": 0.2417, "step": 3869 }, { "epoch": 104.5945945945946, "grad_norm": 1.4765625, "learning_rate": 2.5178751970693225e-05, "loss": 0.1171, "step": 3870 }, { "epoch": 104.62162162162163, "grad_norm": 1.0390625, "learning_rate": 2.5167580145436155e-05, "loss": 0.077, "step": 3871 }, { "epoch": 104.64864864864865, "grad_norm": 0.9921875, "learning_rate": 2.515640828671248e-05, "loss": 0.085, "step": 3872 }, { "epoch": 104.67567567567568, "grad_norm": 1.203125, "learning_rate": 2.514523639675328e-05, "loss": 0.1619, "step": 3873 }, { "epoch": 104.70270270270271, "grad_norm": 1.5703125, "learning_rate": 2.513406447778963e-05, "loss": 0.1018, "step": 3874 }, { "epoch": 104.72972972972973, "grad_norm": 1.0625, "learning_rate": 2.5122892532052634e-05, "loss": 0.0858, "step": 3875 }, { "epoch": 104.75675675675676, "grad_norm": 1.6953125, "learning_rate": 2.5111720561773366e-05, "loss": 0.0932, "step": 3876 }, { "epoch": 104.78378378378379, "grad_norm": 1.4921875, "learning_rate": 2.5100548569182948e-05, "loss": 0.1638, "step": 3877 }, { "epoch": 104.8108108108108, "grad_norm": 1.3515625, "learning_rate": 2.5089376556512463e-05, "loss": 0.1614, "step": 3878 }, { "epoch": 104.83783783783784, "grad_norm": 1.421875, "learning_rate": 2.5078204525993028e-05, "loss": 0.1709, "step": 3879 }, { "epoch": 104.86486486486487, "grad_norm": 1.1171875, "learning_rate": 2.5067032479855752e-05, "loss": 0.0859, "step": 3880 }, { "epoch": 104.89189189189189, "grad_norm": 1.2734375, "learning_rate": 2.5055860420331744e-05, "loss": 0.112, "step": 3881 }, { "epoch": 104.91891891891892, "grad_norm": 1.1640625, "learning_rate": 2.5044688349652136e-05, "loss": 0.1406, "step": 3882 }, { "epoch": 104.94594594594595, "grad_norm": 1.328125, "learning_rate": 2.5033516270048036e-05, "loss": 0.0839, "step": 3883 }, { "epoch": 104.97297297297297, "grad_norm": 0.68359375, "learning_rate": 2.5022344183750564e-05, "loss": 0.0769, "step": 3884 }, { "epoch": 105.0, "grad_norm": 1.3515625, "learning_rate": 2.5011172092990843e-05, "loss": 0.182, "step": 3885 }, { "epoch": 105.02702702702703, "grad_norm": 1.4765625, "learning_rate": 2.5e-05, "loss": 0.1481, "step": 3886 }, { "epoch": 105.05405405405405, "grad_norm": 0.95703125, "learning_rate": 2.498882790700916e-05, "loss": 0.0921, "step": 3887 }, { "epoch": 105.08108108108108, "grad_norm": 1.890625, "learning_rate": 2.497765581624945e-05, "loss": 0.1907, "step": 3888 }, { "epoch": 105.10810810810811, "grad_norm": 1.4765625, "learning_rate": 2.4966483729951973e-05, "loss": 0.1413, "step": 3889 }, { "epoch": 105.13513513513513, "grad_norm": 1.1875, "learning_rate": 2.4955311650347867e-05, "loss": 0.1484, "step": 3890 }, { "epoch": 105.16216216216216, "grad_norm": 1.3046875, "learning_rate": 2.4944139579668262e-05, "loss": 0.1697, "step": 3891 }, { "epoch": 105.1891891891892, "grad_norm": 0.74609375, "learning_rate": 2.4932967520144254e-05, "loss": 0.0746, "step": 3892 }, { "epoch": 105.21621621621621, "grad_norm": 1.4609375, "learning_rate": 2.4921795474006978e-05, "loss": 0.1255, "step": 3893 }, { "epoch": 105.24324324324324, "grad_norm": 1.1171875, "learning_rate": 2.4910623443487547e-05, "loss": 0.0995, "step": 3894 }, { "epoch": 105.27027027027027, "grad_norm": 1.28125, "learning_rate": 2.489945143081706e-05, "loss": 0.1778, "step": 3895 }, { "epoch": 105.29729729729729, "grad_norm": 0.9453125, "learning_rate": 2.4888279438226637e-05, "loss": 0.1156, "step": 3896 }, { "epoch": 105.32432432432432, "grad_norm": 0.52734375, "learning_rate": 2.4877107467947375e-05, "loss": 0.0611, "step": 3897 }, { "epoch": 105.35135135135135, "grad_norm": 1.234375, "learning_rate": 2.486593552221037e-05, "loss": 0.1351, "step": 3898 }, { "epoch": 105.37837837837837, "grad_norm": 1.109375, "learning_rate": 2.4854763603246728e-05, "loss": 0.1262, "step": 3899 }, { "epoch": 105.4054054054054, "grad_norm": 1.4453125, "learning_rate": 2.484359171328752e-05, "loss": 0.1162, "step": 3900 }, { "epoch": 105.43243243243244, "grad_norm": 1.0703125, "learning_rate": 2.4832419854563844e-05, "loss": 0.1122, "step": 3901 }, { "epoch": 105.45945945945945, "grad_norm": 1.3984375, "learning_rate": 2.482124802930678e-05, "loss": 0.1545, "step": 3902 }, { "epoch": 105.48648648648648, "grad_norm": 1.0625, "learning_rate": 2.4810076239747386e-05, "loss": 0.144, "step": 3903 }, { "epoch": 105.51351351351352, "grad_norm": 0.89453125, "learning_rate": 2.4798904488116724e-05, "loss": 0.1087, "step": 3904 }, { "epoch": 105.54054054054055, "grad_norm": 0.94140625, "learning_rate": 2.4787732776645865e-05, "loss": 0.1088, "step": 3905 }, { "epoch": 105.56756756756756, "grad_norm": 0.81640625, "learning_rate": 2.4776561107565838e-05, "loss": 0.0741, "step": 3906 }, { "epoch": 105.5945945945946, "grad_norm": 1.078125, "learning_rate": 2.476538948310769e-05, "loss": 0.1049, "step": 3907 }, { "epoch": 105.62162162162163, "grad_norm": 1.0625, "learning_rate": 2.475421790550246e-05, "loss": 0.092, "step": 3908 }, { "epoch": 105.64864864864865, "grad_norm": 0.609375, "learning_rate": 2.4743046376981154e-05, "loss": 0.0676, "step": 3909 }, { "epoch": 105.67567567567568, "grad_norm": 1.0859375, "learning_rate": 2.4731874899774797e-05, "loss": 0.0857, "step": 3910 }, { "epoch": 105.70270270270271, "grad_norm": 0.63671875, "learning_rate": 2.4720703476114367e-05, "loss": 0.0669, "step": 3911 }, { "epoch": 105.72972972972973, "grad_norm": 1.5546875, "learning_rate": 2.4709532108230874e-05, "loss": 0.154, "step": 3912 }, { "epoch": 105.75675675675676, "grad_norm": 1.484375, "learning_rate": 2.46983607983553e-05, "loss": 0.1547, "step": 3913 }, { "epoch": 105.78378378378379, "grad_norm": 0.91015625, "learning_rate": 2.4687189548718592e-05, "loss": 0.0665, "step": 3914 }, { "epoch": 105.8108108108108, "grad_norm": 1.59375, "learning_rate": 2.4676018361551726e-05, "loss": 0.1704, "step": 3915 }, { "epoch": 105.83783783783784, "grad_norm": 0.78515625, "learning_rate": 2.466484723908563e-05, "loss": 0.0694, "step": 3916 }, { "epoch": 105.86486486486487, "grad_norm": 1.203125, "learning_rate": 2.4653676183551245e-05, "loss": 0.1624, "step": 3917 }, { "epoch": 105.89189189189189, "grad_norm": 0.5859375, "learning_rate": 2.4642505197179472e-05, "loss": 0.0688, "step": 3918 }, { "epoch": 105.91891891891892, "grad_norm": 0.5234375, "learning_rate": 2.4631334282201236e-05, "loss": 0.0661, "step": 3919 }, { "epoch": 105.94594594594595, "grad_norm": 1.09375, "learning_rate": 2.46201634408474e-05, "loss": 0.1384, "step": 3920 }, { "epoch": 105.97297297297297, "grad_norm": 1.46875, "learning_rate": 2.4608992675348848e-05, "loss": 0.1416, "step": 3921 }, { "epoch": 106.0, "grad_norm": 1.7421875, "learning_rate": 2.4597821987936456e-05, "loss": 0.1752, "step": 3922 }, { "epoch": 106.02702702702703, "grad_norm": 1.2421875, "learning_rate": 2.458665138084104e-05, "loss": 0.1194, "step": 3923 }, { "epoch": 106.05405405405405, "grad_norm": 0.8515625, "learning_rate": 2.4575480856293447e-05, "loss": 0.1049, "step": 3924 }, { "epoch": 106.08108108108108, "grad_norm": 0.8671875, "learning_rate": 2.4564310416524473e-05, "loss": 0.1033, "step": 3925 }, { "epoch": 106.10810810810811, "grad_norm": 1.1640625, "learning_rate": 2.455314006376491e-05, "loss": 0.1207, "step": 3926 }, { "epoch": 106.13513513513513, "grad_norm": 1.1484375, "learning_rate": 2.4541969800245556e-05, "loss": 0.1625, "step": 3927 }, { "epoch": 106.16216216216216, "grad_norm": 1.53125, "learning_rate": 2.4530799628197147e-05, "loss": 0.191, "step": 3928 }, { "epoch": 106.1891891891892, "grad_norm": 1.3203125, "learning_rate": 2.4519629549850426e-05, "loss": 0.1631, "step": 3929 }, { "epoch": 106.21621621621621, "grad_norm": 1.15625, "learning_rate": 2.4508459567436132e-05, "loss": 0.1346, "step": 3930 }, { "epoch": 106.24324324324324, "grad_norm": 1.21875, "learning_rate": 2.449728968318494e-05, "loss": 0.1601, "step": 3931 }, { "epoch": 106.27027027027027, "grad_norm": 1.0234375, "learning_rate": 2.4486119899327553e-05, "loss": 0.113, "step": 3932 }, { "epoch": 106.29729729729729, "grad_norm": 1.2734375, "learning_rate": 2.447495021809463e-05, "loss": 0.1995, "step": 3933 }, { "epoch": 106.32432432432432, "grad_norm": 1.015625, "learning_rate": 2.446378064171681e-05, "loss": 0.1025, "step": 3934 }, { "epoch": 106.35135135135135, "grad_norm": 1.34375, "learning_rate": 2.4452611172424704e-05, "loss": 0.1664, "step": 3935 }, { "epoch": 106.37837837837837, "grad_norm": 1.28125, "learning_rate": 2.444144181244893e-05, "loss": 0.168, "step": 3936 }, { "epoch": 106.4054054054054, "grad_norm": 1.421875, "learning_rate": 2.4430272564020055e-05, "loss": 0.1963, "step": 3937 }, { "epoch": 106.43243243243244, "grad_norm": 0.8671875, "learning_rate": 2.441910342936863e-05, "loss": 0.079, "step": 3938 }, { "epoch": 106.45945945945945, "grad_norm": 0.8984375, "learning_rate": 2.440793441072521e-05, "loss": 0.0848, "step": 3939 }, { "epoch": 106.48648648648648, "grad_norm": 1.09375, "learning_rate": 2.4396765510320275e-05, "loss": 0.1128, "step": 3940 }, { "epoch": 106.51351351351352, "grad_norm": 1.3515625, "learning_rate": 2.438559673038434e-05, "loss": 0.1492, "step": 3941 }, { "epoch": 106.54054054054055, "grad_norm": 0.5234375, "learning_rate": 2.4374428073147836e-05, "loss": 0.0554, "step": 3942 }, { "epoch": 106.56756756756756, "grad_norm": 1.2421875, "learning_rate": 2.4363259540841222e-05, "loss": 0.1406, "step": 3943 }, { "epoch": 106.5945945945946, "grad_norm": 1.078125, "learning_rate": 2.4352091135694914e-05, "loss": 0.119, "step": 3944 }, { "epoch": 106.62162162162163, "grad_norm": 0.9921875, "learning_rate": 2.434092285993928e-05, "loss": 0.1019, "step": 3945 }, { "epoch": 106.64864864864865, "grad_norm": 0.79296875, "learning_rate": 2.432975471580469e-05, "loss": 0.0948, "step": 3946 }, { "epoch": 106.67567567567568, "grad_norm": 1.3515625, "learning_rate": 2.4318586705521494e-05, "loss": 0.1921, "step": 3947 }, { "epoch": 106.70270270270271, "grad_norm": 0.8828125, "learning_rate": 2.4307418831319974e-05, "loss": 0.0835, "step": 3948 }, { "epoch": 106.72972972972973, "grad_norm": 1.1484375, "learning_rate": 2.4296251095430424e-05, "loss": 0.1624, "step": 3949 }, { "epoch": 106.75675675675676, "grad_norm": 0.98828125, "learning_rate": 2.4285083500083108e-05, "loss": 0.1093, "step": 3950 }, { "epoch": 106.78378378378379, "grad_norm": 1.484375, "learning_rate": 2.427391604750823e-05, "loss": 0.2224, "step": 3951 }, { "epoch": 106.8108108108108, "grad_norm": 1.6640625, "learning_rate": 2.4262748739936004e-05, "loss": 0.1097, "step": 3952 }, { "epoch": 106.83783783783784, "grad_norm": 1.1171875, "learning_rate": 2.425158157959659e-05, "loss": 0.0906, "step": 3953 }, { "epoch": 106.86486486486487, "grad_norm": 1.5078125, "learning_rate": 2.424041456872013e-05, "loss": 0.1928, "step": 3954 }, { "epoch": 106.89189189189189, "grad_norm": 0.99609375, "learning_rate": 2.4229247709536732e-05, "loss": 0.0903, "step": 3955 }, { "epoch": 106.91891891891892, "grad_norm": 1.5546875, "learning_rate": 2.4218081004276472e-05, "loss": 0.2304, "step": 3956 }, { "epoch": 106.94594594594595, "grad_norm": 1.203125, "learning_rate": 2.4206914455169395e-05, "loss": 0.1297, "step": 3957 }, { "epoch": 106.97297297297297, "grad_norm": 0.98046875, "learning_rate": 2.4195748064445527e-05, "loss": 0.1119, "step": 3958 }, { "epoch": 107.0, "grad_norm": 1.4921875, "learning_rate": 2.418458183433484e-05, "loss": 0.2646, "step": 3959 }, { "epoch": 107.02702702702703, "grad_norm": 1.0859375, "learning_rate": 2.4173415767067297e-05, "loss": 0.1129, "step": 3960 }, { "epoch": 107.05405405405405, "grad_norm": 0.86328125, "learning_rate": 2.416224986487282e-05, "loss": 0.0793, "step": 3961 }, { "epoch": 107.08108108108108, "grad_norm": 1.6796875, "learning_rate": 2.4151084129981285e-05, "loss": 0.1896, "step": 3962 }, { "epoch": 107.10810810810811, "grad_norm": 0.6953125, "learning_rate": 2.413991856462255e-05, "loss": 0.0838, "step": 3963 }, { "epoch": 107.13513513513513, "grad_norm": 1.28125, "learning_rate": 2.412875317102645e-05, "loss": 0.1161, "step": 3964 }, { "epoch": 107.16216216216216, "grad_norm": 1.40625, "learning_rate": 2.4117587951422743e-05, "loss": 0.1568, "step": 3965 }, { "epoch": 107.1891891891892, "grad_norm": 1.234375, "learning_rate": 2.4106422908041197e-05, "loss": 0.1341, "step": 3966 }, { "epoch": 107.21621621621621, "grad_norm": 0.90234375, "learning_rate": 2.409525804311154e-05, "loss": 0.0996, "step": 3967 }, { "epoch": 107.24324324324324, "grad_norm": 1.2734375, "learning_rate": 2.408409335886342e-05, "loss": 0.1535, "step": 3968 }, { "epoch": 107.27027027027027, "grad_norm": 0.9765625, "learning_rate": 2.407292885752651e-05, "loss": 0.0838, "step": 3969 }, { "epoch": 107.29729729729729, "grad_norm": 1.296875, "learning_rate": 2.4061764541330407e-05, "loss": 0.1698, "step": 3970 }, { "epoch": 107.32432432432432, "grad_norm": 1.078125, "learning_rate": 2.405060041250467e-05, "loss": 0.1347, "step": 3971 }, { "epoch": 107.35135135135135, "grad_norm": 1.046875, "learning_rate": 2.403943647327886e-05, "loss": 0.1326, "step": 3972 }, { "epoch": 107.37837837837837, "grad_norm": 0.69140625, "learning_rate": 2.402827272588244e-05, "loss": 0.0748, "step": 3973 }, { "epoch": 107.4054054054054, "grad_norm": 0.6875, "learning_rate": 2.401710917254489e-05, "loss": 0.0748, "step": 3974 }, { "epoch": 107.43243243243244, "grad_norm": 1.125, "learning_rate": 2.4005945815495617e-05, "loss": 0.1311, "step": 3975 }, { "epoch": 107.45945945945945, "grad_norm": 0.81640625, "learning_rate": 2.3994782656964006e-05, "loss": 0.0974, "step": 3976 }, { "epoch": 107.48648648648648, "grad_norm": 0.7890625, "learning_rate": 2.398361969917939e-05, "loss": 0.1028, "step": 3977 }, { "epoch": 107.51351351351352, "grad_norm": 0.58984375, "learning_rate": 2.3972456944371078e-05, "loss": 0.0725, "step": 3978 }, { "epoch": 107.54054054054055, "grad_norm": 1.40625, "learning_rate": 2.3961294394768315e-05, "loss": 0.1748, "step": 3979 }, { "epoch": 107.56756756756756, "grad_norm": 0.9140625, "learning_rate": 2.3950132052600326e-05, "loss": 0.1034, "step": 3980 }, { "epoch": 107.5945945945946, "grad_norm": 0.921875, "learning_rate": 2.39389699200963e-05, "loss": 0.109, "step": 3981 }, { "epoch": 107.62162162162163, "grad_norm": 0.57421875, "learning_rate": 2.3927807999485343e-05, "loss": 0.068, "step": 3982 }, { "epoch": 107.64864864864865, "grad_norm": 1.203125, "learning_rate": 2.391664629299657e-05, "loss": 0.1627, "step": 3983 }, { "epoch": 107.67567567567568, "grad_norm": 1.03125, "learning_rate": 2.390548480285903e-05, "loss": 0.1215, "step": 3984 }, { "epoch": 107.70270270270271, "grad_norm": 1.0625, "learning_rate": 2.3894323531301713e-05, "loss": 0.1248, "step": 3985 }, { "epoch": 107.72972972972973, "grad_norm": 1.3125, "learning_rate": 2.3883162480553604e-05, "loss": 0.1769, "step": 3986 }, { "epoch": 107.75675675675676, "grad_norm": 0.6171875, "learning_rate": 2.3872001652843594e-05, "loss": 0.0754, "step": 3987 }, { "epoch": 107.78378378378379, "grad_norm": 0.796875, "learning_rate": 2.3860841050400577e-05, "loss": 0.0822, "step": 3988 }, { "epoch": 107.8108108108108, "grad_norm": 0.91015625, "learning_rate": 2.3849680675453385e-05, "loss": 0.1021, "step": 3989 }, { "epoch": 107.83783783783784, "grad_norm": 1.15625, "learning_rate": 2.383852053023078e-05, "loss": 0.1305, "step": 3990 }, { "epoch": 107.86486486486487, "grad_norm": 0.78515625, "learning_rate": 2.3827360616961518e-05, "loss": 0.0824, "step": 3991 }, { "epoch": 107.89189189189189, "grad_norm": 0.84765625, "learning_rate": 2.3816200937874287e-05, "loss": 0.0849, "step": 3992 }, { "epoch": 107.91891891891892, "grad_norm": 0.8515625, "learning_rate": 2.3805041495197727e-05, "loss": 0.1006, "step": 3993 }, { "epoch": 107.94594594594595, "grad_norm": 1.3671875, "learning_rate": 2.3793882291160437e-05, "loss": 0.1948, "step": 3994 }, { "epoch": 107.97297297297297, "grad_norm": 0.62890625, "learning_rate": 2.3782723327990972e-05, "loss": 0.0715, "step": 3995 }, { "epoch": 108.0, "grad_norm": 1.09375, "learning_rate": 2.377156460791782e-05, "loss": 0.1363, "step": 3996 }, { "epoch": 108.02702702702703, "grad_norm": 0.83203125, "learning_rate": 2.3760406133169443e-05, "loss": 0.0947, "step": 3997 }, { "epoch": 108.05405405405405, "grad_norm": 1.0078125, "learning_rate": 2.3749247905974255e-05, "loss": 0.1122, "step": 3998 }, { "epoch": 108.08108108108108, "grad_norm": 0.55859375, "learning_rate": 2.3738089928560586e-05, "loss": 0.0707, "step": 3999 }, { "epoch": 108.10810810810811, "grad_norm": 0.93359375, "learning_rate": 2.3726932203156772e-05, "loss": 0.1069, "step": 4000 }, { "epoch": 108.13513513513513, "grad_norm": 1.0703125, "learning_rate": 2.3715774731991033e-05, "loss": 0.1307, "step": 4001 }, { "epoch": 108.16216216216216, "grad_norm": 0.9296875, "learning_rate": 2.3704617517291593e-05, "loss": 0.0985, "step": 4002 }, { "epoch": 108.1891891891892, "grad_norm": 0.86328125, "learning_rate": 2.3693460561286613e-05, "loss": 0.0982, "step": 4003 }, { "epoch": 108.21621621621621, "grad_norm": 0.9140625, "learning_rate": 2.3682303866204166e-05, "loss": 0.1047, "step": 4004 }, { "epoch": 108.24324324324324, "grad_norm": 0.73828125, "learning_rate": 2.367114743427232e-05, "loss": 0.0897, "step": 4005 }, { "epoch": 108.27027027027027, "grad_norm": 1.15625, "learning_rate": 2.3659991267719077e-05, "loss": 0.1328, "step": 4006 }, { "epoch": 108.29729729729729, "grad_norm": 1.3046875, "learning_rate": 2.364883536877236e-05, "loss": 0.1615, "step": 4007 }, { "epoch": 108.32432432432432, "grad_norm": 1.3359375, "learning_rate": 2.3637679739660064e-05, "loss": 0.1264, "step": 4008 }, { "epoch": 108.35135135135135, "grad_norm": 0.7578125, "learning_rate": 2.3626524382610048e-05, "loss": 0.0845, "step": 4009 }, { "epoch": 108.37837837837837, "grad_norm": 1.015625, "learning_rate": 2.3615369299850056e-05, "loss": 0.1032, "step": 4010 }, { "epoch": 108.4054054054054, "grad_norm": 0.94921875, "learning_rate": 2.3604214493607845e-05, "loss": 0.1028, "step": 4011 }, { "epoch": 108.43243243243244, "grad_norm": 0.66796875, "learning_rate": 2.3593059966111072e-05, "loss": 0.0819, "step": 4012 }, { "epoch": 108.45945945945945, "grad_norm": 0.9453125, "learning_rate": 2.3581905719587356e-05, "loss": 0.1062, "step": 4013 }, { "epoch": 108.48648648648648, "grad_norm": 1.3046875, "learning_rate": 2.357075175626425e-05, "loss": 0.1515, "step": 4014 }, { "epoch": 108.51351351351352, "grad_norm": 0.9921875, "learning_rate": 2.3559598078369276e-05, "loss": 0.1092, "step": 4015 }, { "epoch": 108.54054054054055, "grad_norm": 1.2578125, "learning_rate": 2.354844468812985e-05, "loss": 0.1324, "step": 4016 }, { "epoch": 108.56756756756756, "grad_norm": 1.0546875, "learning_rate": 2.3537291587773395e-05, "loss": 0.0894, "step": 4017 }, { "epoch": 108.5945945945946, "grad_norm": 1.15625, "learning_rate": 2.352613877952721e-05, "loss": 0.1045, "step": 4018 }, { "epoch": 108.62162162162163, "grad_norm": 0.85546875, "learning_rate": 2.351498626561858e-05, "loss": 0.0933, "step": 4019 }, { "epoch": 108.64864864864865, "grad_norm": 1.15625, "learning_rate": 2.3503834048274735e-05, "loss": 0.1301, "step": 4020 }, { "epoch": 108.67567567567568, "grad_norm": 1.4140625, "learning_rate": 2.34926821297228e-05, "loss": 0.1827, "step": 4021 }, { "epoch": 108.70270270270271, "grad_norm": 1.046875, "learning_rate": 2.3481530512189887e-05, "loss": 0.1134, "step": 4022 }, { "epoch": 108.72972972972973, "grad_norm": 0.7265625, "learning_rate": 2.3470379197903035e-05, "loss": 0.0749, "step": 4023 }, { "epoch": 108.75675675675676, "grad_norm": 2.3125, "learning_rate": 2.3459228189089203e-05, "loss": 0.2028, "step": 4024 }, { "epoch": 108.78378378378379, "grad_norm": 1.4375, "learning_rate": 2.344807748797531e-05, "loss": 0.1274, "step": 4025 }, { "epoch": 108.8108108108108, "grad_norm": 1.28125, "learning_rate": 2.343692709678822e-05, "loss": 0.0811, "step": 4026 }, { "epoch": 108.83783783783784, "grad_norm": 1.390625, "learning_rate": 2.34257770177547e-05, "loss": 0.1248, "step": 4027 }, { "epoch": 108.86486486486487, "grad_norm": 1.046875, "learning_rate": 2.34146272531015e-05, "loss": 0.1204, "step": 4028 }, { "epoch": 108.89189189189189, "grad_norm": 1.109375, "learning_rate": 2.3403477805055266e-05, "loss": 0.0862, "step": 4029 }, { "epoch": 108.91891891891892, "grad_norm": 0.82421875, "learning_rate": 2.3392328675842608e-05, "loss": 0.0774, "step": 4030 }, { "epoch": 108.94594594594595, "grad_norm": 1.3984375, "learning_rate": 2.3381179867690067e-05, "loss": 0.1189, "step": 4031 }, { "epoch": 108.97297297297297, "grad_norm": 1.546875, "learning_rate": 2.337003138282411e-05, "loss": 0.1674, "step": 4032 }, { "epoch": 109.0, "grad_norm": 1.0703125, "learning_rate": 2.3358883223471138e-05, "loss": 0.1153, "step": 4033 }, { "epoch": 109.02702702702703, "grad_norm": 0.671875, "learning_rate": 2.334773539185752e-05, "loss": 0.0806, "step": 4034 }, { "epoch": 109.05405405405405, "grad_norm": 0.5703125, "learning_rate": 2.333658789020951e-05, "loss": 0.0757, "step": 4035 }, { "epoch": 109.08108108108108, "grad_norm": 1.421875, "learning_rate": 2.3325440720753326e-05, "loss": 0.1797, "step": 4036 }, { "epoch": 109.10810810810811, "grad_norm": 1.171875, "learning_rate": 2.331429388571513e-05, "loss": 0.1492, "step": 4037 }, { "epoch": 109.13513513513513, "grad_norm": 1.8203125, "learning_rate": 2.3303147387320983e-05, "loss": 0.1444, "step": 4038 }, { "epoch": 109.16216216216216, "grad_norm": 1.09375, "learning_rate": 2.32920012277969e-05, "loss": 0.1027, "step": 4039 }, { "epoch": 109.1891891891892, "grad_norm": 1.0078125, "learning_rate": 2.3280855409368844e-05, "loss": 0.1406, "step": 4040 }, { "epoch": 109.21621621621621, "grad_norm": 0.9609375, "learning_rate": 2.3269709934262663e-05, "loss": 0.096, "step": 4041 }, { "epoch": 109.24324324324324, "grad_norm": 1.203125, "learning_rate": 2.3258564804704176e-05, "loss": 0.1906, "step": 4042 }, { "epoch": 109.27027027027027, "grad_norm": 1.09375, "learning_rate": 2.3247420022919138e-05, "loss": 0.1122, "step": 4043 }, { "epoch": 109.29729729729729, "grad_norm": 0.82421875, "learning_rate": 2.3236275591133196e-05, "loss": 0.0893, "step": 4044 }, { "epoch": 109.32432432432432, "grad_norm": 1.109375, "learning_rate": 2.3225131511571956e-05, "loss": 0.1171, "step": 4045 }, { "epoch": 109.35135135135135, "grad_norm": 0.734375, "learning_rate": 2.321398778646096e-05, "loss": 0.073, "step": 4046 }, { "epoch": 109.37837837837837, "grad_norm": 0.9765625, "learning_rate": 2.3202844418025645e-05, "loss": 0.1452, "step": 4047 }, { "epoch": 109.4054054054054, "grad_norm": 1.0078125, "learning_rate": 2.3191701408491423e-05, "loss": 0.1599, "step": 4048 }, { "epoch": 109.43243243243244, "grad_norm": 1.1328125, "learning_rate": 2.318055876008358e-05, "loss": 0.1183, "step": 4049 }, { "epoch": 109.45945945945945, "grad_norm": 1.0625, "learning_rate": 2.3169416475027386e-05, "loss": 0.1116, "step": 4050 }, { "epoch": 109.48648648648648, "grad_norm": 1.5625, "learning_rate": 2.3158274555548e-05, "loss": 0.2215, "step": 4051 }, { "epoch": 109.51351351351352, "grad_norm": 0.59765625, "learning_rate": 2.3147133003870518e-05, "loss": 0.0711, "step": 4052 }, { "epoch": 109.54054054054055, "grad_norm": 0.64453125, "learning_rate": 2.3135991822219963e-05, "loss": 0.083, "step": 4053 }, { "epoch": 109.56756756756756, "grad_norm": 0.86328125, "learning_rate": 2.31248510128213e-05, "loss": 0.0965, "step": 4054 }, { "epoch": 109.5945945945946, "grad_norm": 1.21875, "learning_rate": 2.3113710577899385e-05, "loss": 0.1129, "step": 4055 }, { "epoch": 109.62162162162163, "grad_norm": 0.9921875, "learning_rate": 2.3102570519679026e-05, "loss": 0.1192, "step": 4056 }, { "epoch": 109.64864864864865, "grad_norm": 1.5546875, "learning_rate": 2.3091430840384964e-05, "loss": 0.196, "step": 4057 }, { "epoch": 109.67567567567568, "grad_norm": 0.859375, "learning_rate": 2.3080291542241828e-05, "loss": 0.0907, "step": 4058 }, { "epoch": 109.70270270270271, "grad_norm": 1.0390625, "learning_rate": 2.3069152627474202e-05, "loss": 0.1304, "step": 4059 }, { "epoch": 109.72972972972973, "grad_norm": 0.82421875, "learning_rate": 2.3058014098306593e-05, "loss": 0.087, "step": 4060 }, { "epoch": 109.75675675675676, "grad_norm": 1.1171875, "learning_rate": 2.30468759569634e-05, "loss": 0.1252, "step": 4061 }, { "epoch": 109.78378378378379, "grad_norm": 1.0859375, "learning_rate": 2.3035738205668992e-05, "loss": 0.129, "step": 4062 }, { "epoch": 109.8108108108108, "grad_norm": 0.7734375, "learning_rate": 2.3024600846647608e-05, "loss": 0.0817, "step": 4063 }, { "epoch": 109.83783783783784, "grad_norm": 1.3671875, "learning_rate": 2.301346388212345e-05, "loss": 0.2297, "step": 4064 }, { "epoch": 109.86486486486487, "grad_norm": 0.87109375, "learning_rate": 2.3002327314320634e-05, "loss": 0.1364, "step": 4065 }, { "epoch": 109.89189189189189, "grad_norm": 0.98046875, "learning_rate": 2.2991191145463167e-05, "loss": 0.1239, "step": 4066 }, { "epoch": 109.91891891891892, "grad_norm": 1.40625, "learning_rate": 2.298005537777502e-05, "loss": 0.2086, "step": 4067 }, { "epoch": 109.94594594594595, "grad_norm": 1.296875, "learning_rate": 2.296892001348005e-05, "loss": 0.1608, "step": 4068 }, { "epoch": 109.97297297297297, "grad_norm": 0.82421875, "learning_rate": 2.295778505480204e-05, "loss": 0.0935, "step": 4069 }, { "epoch": 110.0, "grad_norm": 1.046875, "learning_rate": 2.2946650503964717e-05, "loss": 0.1055, "step": 4070 }, { "epoch": 110.02702702702703, "grad_norm": 0.52734375, "learning_rate": 2.2935516363191693e-05, "loss": 0.0675, "step": 4071 }, { "epoch": 110.05405405405405, "grad_norm": 1.0703125, "learning_rate": 2.292438263470652e-05, "loss": 0.1238, "step": 4072 }, { "epoch": 110.08108108108108, "grad_norm": 1.1171875, "learning_rate": 2.2913249320732648e-05, "loss": 0.1476, "step": 4073 }, { "epoch": 110.10810810810811, "grad_norm": 1.109375, "learning_rate": 2.290211642349347e-05, "loss": 0.1232, "step": 4074 }, { "epoch": 110.13513513513513, "grad_norm": 1.34375, "learning_rate": 2.289098394521227e-05, "loss": 0.1513, "step": 4075 }, { "epoch": 110.16216216216216, "grad_norm": 1.4609375, "learning_rate": 2.287985188811228e-05, "loss": 0.1449, "step": 4076 }, { "epoch": 110.1891891891892, "grad_norm": 1.265625, "learning_rate": 2.28687202544166e-05, "loss": 0.1722, "step": 4077 }, { "epoch": 110.21621621621621, "grad_norm": 1.1875, "learning_rate": 2.2857589046348287e-05, "loss": 0.0944, "step": 4078 }, { "epoch": 110.24324324324324, "grad_norm": 1.09375, "learning_rate": 2.2846458266130316e-05, "loss": 0.1032, "step": 4079 }, { "epoch": 110.27027027027027, "grad_norm": 1.2578125, "learning_rate": 2.2835327915985534e-05, "loss": 0.1401, "step": 4080 }, { "epoch": 110.29729729729729, "grad_norm": 1.46875, "learning_rate": 2.2824197998136736e-05, "loss": 0.203, "step": 4081 }, { "epoch": 110.32432432432432, "grad_norm": 1.171875, "learning_rate": 2.281306851480664e-05, "loss": 0.1388, "step": 4082 }, { "epoch": 110.35135135135135, "grad_norm": 1.03125, "learning_rate": 2.2801939468217834e-05, "loss": 0.1218, "step": 4083 }, { "epoch": 110.37837837837837, "grad_norm": 1.0078125, "learning_rate": 2.279081086059286e-05, "loss": 0.104, "step": 4084 }, { "epoch": 110.4054054054054, "grad_norm": 1.1796875, "learning_rate": 2.2779682694154165e-05, "loss": 0.1167, "step": 4085 }, { "epoch": 110.43243243243244, "grad_norm": 1.2421875, "learning_rate": 2.276855497112408e-05, "loss": 0.1269, "step": 4086 }, { "epoch": 110.45945945945945, "grad_norm": 0.9765625, "learning_rate": 2.275742769372488e-05, "loss": 0.1054, "step": 4087 }, { "epoch": 110.48648648648648, "grad_norm": 1.1484375, "learning_rate": 2.2746300864178743e-05, "loss": 0.1349, "step": 4088 }, { "epoch": 110.51351351351352, "grad_norm": 1.1484375, "learning_rate": 2.2735174484707744e-05, "loss": 0.1334, "step": 4089 }, { "epoch": 110.54054054054055, "grad_norm": 0.99609375, "learning_rate": 2.2724048557533877e-05, "loss": 0.1046, "step": 4090 }, { "epoch": 110.56756756756756, "grad_norm": 0.953125, "learning_rate": 2.2712923084879058e-05, "loss": 0.1026, "step": 4091 }, { "epoch": 110.5945945945946, "grad_norm": 0.66015625, "learning_rate": 2.270179806896508e-05, "loss": 0.0683, "step": 4092 }, { "epoch": 110.62162162162163, "grad_norm": 0.953125, "learning_rate": 2.269067351201369e-05, "loss": 0.1001, "step": 4093 }, { "epoch": 110.64864864864865, "grad_norm": 1.5625, "learning_rate": 2.2679549416246494e-05, "loss": 0.2004, "step": 4094 }, { "epoch": 110.67567567567568, "grad_norm": 1.328125, "learning_rate": 2.2668425783885038e-05, "loss": 0.1907, "step": 4095 }, { "epoch": 110.70270270270271, "grad_norm": 1.0078125, "learning_rate": 2.2657302617150787e-05, "loss": 0.0959, "step": 4096 }, { "epoch": 110.72972972972973, "grad_norm": 1.09375, "learning_rate": 2.2646179918265063e-05, "loss": 0.1032, "step": 4097 }, { "epoch": 110.75675675675676, "grad_norm": 1.1484375, "learning_rate": 2.2635057689449147e-05, "loss": 0.1171, "step": 4098 }, { "epoch": 110.78378378378379, "grad_norm": 1.1328125, "learning_rate": 2.2623935932924203e-05, "loss": 0.1488, "step": 4099 }, { "epoch": 110.8108108108108, "grad_norm": 0.7890625, "learning_rate": 2.2612814650911287e-05, "loss": 0.0889, "step": 4100 }, { "epoch": 110.83783783783784, "grad_norm": 0.7890625, "learning_rate": 2.2601693845631387e-05, "loss": 0.0752, "step": 4101 }, { "epoch": 110.86486486486487, "grad_norm": 0.65625, "learning_rate": 2.2590573519305393e-05, "loss": 0.0667, "step": 4102 }, { "epoch": 110.89189189189189, "grad_norm": 0.83984375, "learning_rate": 2.257945367415407e-05, "loss": 0.0917, "step": 4103 }, { "epoch": 110.91891891891892, "grad_norm": 1.0546875, "learning_rate": 2.2568334312398116e-05, "loss": 0.1182, "step": 4104 }, { "epoch": 110.94594594594595, "grad_norm": 1.0703125, "learning_rate": 2.255721543625814e-05, "loss": 0.1107, "step": 4105 }, { "epoch": 110.97297297297297, "grad_norm": 1.046875, "learning_rate": 2.254609704795461e-05, "loss": 0.1087, "step": 4106 }, { "epoch": 111.0, "grad_norm": 1.2109375, "learning_rate": 2.2534979149707947e-05, "loss": 0.1369, "step": 4107 }, { "epoch": 111.02702702702703, "grad_norm": 1.328125, "learning_rate": 2.2523861743738434e-05, "loss": 0.1899, "step": 4108 }, { "epoch": 111.05405405405405, "grad_norm": 0.546875, "learning_rate": 2.2512744832266286e-05, "loss": 0.0578, "step": 4109 }, { "epoch": 111.08108108108108, "grad_norm": 0.7578125, "learning_rate": 2.2501628417511604e-05, "loss": 0.0879, "step": 4110 }, { "epoch": 111.10810810810811, "grad_norm": 1.3125, "learning_rate": 2.2490512501694393e-05, "loss": 0.1556, "step": 4111 }, { "epoch": 111.13513513513513, "grad_norm": 0.80078125, "learning_rate": 2.2479397087034545e-05, "loss": 0.0811, "step": 4112 }, { "epoch": 111.16216216216216, "grad_norm": 0.76171875, "learning_rate": 2.246828217575189e-05, "loss": 0.0889, "step": 4113 }, { "epoch": 111.1891891891892, "grad_norm": 0.77734375, "learning_rate": 2.2457167770066105e-05, "loss": 0.0837, "step": 4114 }, { "epoch": 111.21621621621621, "grad_norm": 1.078125, "learning_rate": 2.2446053872196805e-05, "loss": 0.148, "step": 4115 }, { "epoch": 111.24324324324324, "grad_norm": 1.15625, "learning_rate": 2.2434940484363503e-05, "loss": 0.1614, "step": 4116 }, { "epoch": 111.27027027027027, "grad_norm": 0.97265625, "learning_rate": 2.242382760878558e-05, "loss": 0.1053, "step": 4117 }, { "epoch": 111.29729729729729, "grad_norm": 1.1171875, "learning_rate": 2.2412715247682338e-05, "loss": 0.1359, "step": 4118 }, { "epoch": 111.32432432432432, "grad_norm": 0.72265625, "learning_rate": 2.240160340327299e-05, "loss": 0.089, "step": 4119 }, { "epoch": 111.35135135135135, "grad_norm": 0.94140625, "learning_rate": 2.23904920777766e-05, "loss": 0.0995, "step": 4120 }, { "epoch": 111.37837837837837, "grad_norm": 1.2578125, "learning_rate": 2.2379381273412182e-05, "loss": 0.1327, "step": 4121 }, { "epoch": 111.4054054054054, "grad_norm": 1.0859375, "learning_rate": 2.2368270992398592e-05, "loss": 0.1345, "step": 4122 }, { "epoch": 111.43243243243244, "grad_norm": 0.87109375, "learning_rate": 2.235716123695463e-05, "loss": 0.1098, "step": 4123 }, { "epoch": 111.45945945945945, "grad_norm": 1.296875, "learning_rate": 2.2346052009298973e-05, "loss": 0.1179, "step": 4124 }, { "epoch": 111.48648648648648, "grad_norm": 0.8828125, "learning_rate": 2.2334943311650174e-05, "loss": 0.0934, "step": 4125 }, { "epoch": 111.51351351351352, "grad_norm": 0.92578125, "learning_rate": 2.2323835146226707e-05, "loss": 0.0863, "step": 4126 }, { "epoch": 111.54054054054055, "grad_norm": 0.6171875, "learning_rate": 2.2312727515246928e-05, "loss": 0.0625, "step": 4127 }, { "epoch": 111.56756756756756, "grad_norm": 0.53125, "learning_rate": 2.2301620420929084e-05, "loss": 0.0639, "step": 4128 }, { "epoch": 111.5945945945946, "grad_norm": 1.3984375, "learning_rate": 2.2290513865491318e-05, "loss": 0.1425, "step": 4129 }, { "epoch": 111.62162162162163, "grad_norm": 1.0625, "learning_rate": 2.2279407851151673e-05, "loss": 0.0965, "step": 4130 }, { "epoch": 111.64864864864865, "grad_norm": 1.0625, "learning_rate": 2.2268302380128062e-05, "loss": 0.1126, "step": 4131 }, { "epoch": 111.67567567567568, "grad_norm": 0.9375, "learning_rate": 2.2257197454638313e-05, "loss": 0.0891, "step": 4132 }, { "epoch": 111.70270270270271, "grad_norm": 1.125, "learning_rate": 2.2246093076900144e-05, "loss": 0.1032, "step": 4133 }, { "epoch": 111.72972972972973, "grad_norm": 1.1015625, "learning_rate": 2.2234989249131136e-05, "loss": 0.1142, "step": 4134 }, { "epoch": 111.75675675675676, "grad_norm": 0.87890625, "learning_rate": 2.222388597354879e-05, "loss": 0.0909, "step": 4135 }, { "epoch": 111.78378378378379, "grad_norm": 1.1953125, "learning_rate": 2.2212783252370497e-05, "loss": 0.1511, "step": 4136 }, { "epoch": 111.8108108108108, "grad_norm": 0.875, "learning_rate": 2.2201681087813507e-05, "loss": 0.0974, "step": 4137 }, { "epoch": 111.83783783783784, "grad_norm": 0.82421875, "learning_rate": 2.2190579482095e-05, "loss": 0.0778, "step": 4138 }, { "epoch": 111.86486486486487, "grad_norm": 1.1796875, "learning_rate": 2.2179478437431998e-05, "loss": 0.1218, "step": 4139 }, { "epoch": 111.89189189189189, "grad_norm": 0.921875, "learning_rate": 2.2168377956041452e-05, "loss": 0.1342, "step": 4140 }, { "epoch": 111.91891891891892, "grad_norm": 1.078125, "learning_rate": 2.215727804014019e-05, "loss": 0.116, "step": 4141 }, { "epoch": 111.94594594594595, "grad_norm": 0.94921875, "learning_rate": 2.2146178691944902e-05, "loss": 0.1122, "step": 4142 }, { "epoch": 111.97297297297297, "grad_norm": 0.53515625, "learning_rate": 2.2135079913672194e-05, "loss": 0.0657, "step": 4143 }, { "epoch": 112.0, "grad_norm": 1.0390625, "learning_rate": 2.212398170753856e-05, "loss": 0.1117, "step": 4144 }, { "epoch": 112.02702702702703, "grad_norm": 0.8515625, "learning_rate": 2.2112884075760347e-05, "loss": 0.099, "step": 4145 }, { "epoch": 112.05405405405405, "grad_norm": 1.1875, "learning_rate": 2.2101787020553824e-05, "loss": 0.1341, "step": 4146 }, { "epoch": 112.08108108108108, "grad_norm": 1.0, "learning_rate": 2.2090690544135122e-05, "loss": 0.1357, "step": 4147 }, { "epoch": 112.10810810810811, "grad_norm": 1.203125, "learning_rate": 2.207959464872027e-05, "loss": 0.1417, "step": 4148 }, { "epoch": 112.13513513513513, "grad_norm": 1.2890625, "learning_rate": 2.2068499336525158e-05, "loss": 0.1628, "step": 4149 }, { "epoch": 112.16216216216216, "grad_norm": 1.21875, "learning_rate": 2.2057404609765603e-05, "loss": 0.1181, "step": 4150 }, { "epoch": 112.1891891891892, "grad_norm": 0.97265625, "learning_rate": 2.204631047065725e-05, "loss": 0.0914, "step": 4151 }, { "epoch": 112.21621621621621, "grad_norm": 0.86328125, "learning_rate": 2.203521692141568e-05, "loss": 0.0898, "step": 4152 }, { "epoch": 112.24324324324324, "grad_norm": 1.0546875, "learning_rate": 2.2024123964256308e-05, "loss": 0.0864, "step": 4153 }, { "epoch": 112.27027027027027, "grad_norm": 1.2578125, "learning_rate": 2.2013031601394462e-05, "loss": 0.1662, "step": 4154 }, { "epoch": 112.29729729729729, "grad_norm": 0.84375, "learning_rate": 2.200193983504536e-05, "loss": 0.0998, "step": 4155 }, { "epoch": 112.32432432432432, "grad_norm": 0.89453125, "learning_rate": 2.1990848667424054e-05, "loss": 0.0841, "step": 4156 }, { "epoch": 112.35135135135135, "grad_norm": 0.6875, "learning_rate": 2.1979758100745527e-05, "loss": 0.0791, "step": 4157 }, { "epoch": 112.37837837837837, "grad_norm": 1.1796875, "learning_rate": 2.1968668137224624e-05, "loss": 0.1403, "step": 4158 }, { "epoch": 112.4054054054054, "grad_norm": 0.72265625, "learning_rate": 2.195757877907605e-05, "loss": 0.0644, "step": 4159 }, { "epoch": 112.43243243243244, "grad_norm": 0.76171875, "learning_rate": 2.1946490028514415e-05, "loss": 0.0706, "step": 4160 }, { "epoch": 112.45945945945945, "grad_norm": 1.046875, "learning_rate": 2.193540188775421e-05, "loss": 0.135, "step": 4161 }, { "epoch": 112.48648648648648, "grad_norm": 0.4453125, "learning_rate": 2.1924314359009773e-05, "loss": 0.0599, "step": 4162 }, { "epoch": 112.51351351351352, "grad_norm": 1.28125, "learning_rate": 2.191322744449535e-05, "loss": 0.1494, "step": 4163 }, { "epoch": 112.54054054054055, "grad_norm": 1.5234375, "learning_rate": 2.1902141146425058e-05, "loss": 0.1685, "step": 4164 }, { "epoch": 112.56756756756756, "grad_norm": 0.8046875, "learning_rate": 2.1891055467012877e-05, "loss": 0.0866, "step": 4165 }, { "epoch": 112.5945945945946, "grad_norm": 1.0, "learning_rate": 2.1879970408472676e-05, "loss": 0.1137, "step": 4166 }, { "epoch": 112.62162162162163, "grad_norm": 0.77734375, "learning_rate": 2.18688859730182e-05, "loss": 0.0712, "step": 4167 }, { "epoch": 112.64864864864865, "grad_norm": 0.7578125, "learning_rate": 2.1857802162863072e-05, "loss": 0.0839, "step": 4168 }, { "epoch": 112.67567567567568, "grad_norm": 1.1875, "learning_rate": 2.1846718980220775e-05, "loss": 0.1197, "step": 4169 }, { "epoch": 112.70270270270271, "grad_norm": 0.58984375, "learning_rate": 2.1835636427304683e-05, "loss": 0.0743, "step": 4170 }, { "epoch": 112.72972972972973, "grad_norm": 1.6171875, "learning_rate": 2.182455450632803e-05, "loss": 0.224, "step": 4171 }, { "epoch": 112.75675675675676, "grad_norm": 1.28125, "learning_rate": 2.1813473219503953e-05, "loss": 0.1188, "step": 4172 }, { "epoch": 112.78378378378379, "grad_norm": 1.125, "learning_rate": 2.180239256904541e-05, "loss": 0.1257, "step": 4173 }, { "epoch": 112.8108108108108, "grad_norm": 1.1328125, "learning_rate": 2.179131255716528e-05, "loss": 0.1054, "step": 4174 }, { "epoch": 112.83783783783784, "grad_norm": 0.8984375, "learning_rate": 2.178023318607631e-05, "loss": 0.0799, "step": 4175 }, { "epoch": 112.86486486486487, "grad_norm": 1.03125, "learning_rate": 2.176915445799108e-05, "loss": 0.114, "step": 4176 }, { "epoch": 112.89189189189189, "grad_norm": 0.9453125, "learning_rate": 2.175807637512208e-05, "loss": 0.138, "step": 4177 }, { "epoch": 112.91891891891892, "grad_norm": 0.640625, "learning_rate": 2.1746998939681677e-05, "loss": 0.0698, "step": 4178 }, { "epoch": 112.94594594594595, "grad_norm": 0.76171875, "learning_rate": 2.173592215388206e-05, "loss": 0.0793, "step": 4179 }, { "epoch": 112.97297297297297, "grad_norm": 1.140625, "learning_rate": 2.1724846019935335e-05, "loss": 0.114, "step": 4180 }, { "epoch": 113.0, "grad_norm": 0.88671875, "learning_rate": 2.1713770540053472e-05, "loss": 0.0947, "step": 4181 }, { "epoch": 113.02702702702703, "grad_norm": 0.65234375, "learning_rate": 2.1702695716448278e-05, "loss": 0.0658, "step": 4182 }, { "epoch": 113.05405405405405, "grad_norm": 1.0234375, "learning_rate": 2.1691621551331474e-05, "loss": 0.0643, "step": 4183 }, { "epoch": 113.08108108108108, "grad_norm": 1.78125, "learning_rate": 2.1680548046914602e-05, "loss": 0.1778, "step": 4184 }, { "epoch": 113.10810810810811, "grad_norm": 0.95703125, "learning_rate": 2.166947520540912e-05, "loss": 0.1023, "step": 4185 }, { "epoch": 113.13513513513513, "grad_norm": 1.0078125, "learning_rate": 2.165840302902632e-05, "loss": 0.1124, "step": 4186 }, { "epoch": 113.16216216216216, "grad_norm": 1.6015625, "learning_rate": 2.1647331519977375e-05, "loss": 0.2113, "step": 4187 }, { "epoch": 113.1891891891892, "grad_norm": 0.85546875, "learning_rate": 2.1636260680473312e-05, "loss": 0.0888, "step": 4188 }, { "epoch": 113.21621621621621, "grad_norm": 0.890625, "learning_rate": 2.1625190512725052e-05, "loss": 0.1387, "step": 4189 }, { "epoch": 113.24324324324324, "grad_norm": 1.125, "learning_rate": 2.1614121018943344e-05, "loss": 0.0919, "step": 4190 }, { "epoch": 113.27027027027027, "grad_norm": 0.58203125, "learning_rate": 2.160305220133883e-05, "loss": 0.0645, "step": 4191 }, { "epoch": 113.29729729729729, "grad_norm": 1.1484375, "learning_rate": 2.1591984062122022e-05, "loss": 0.1096, "step": 4192 }, { "epoch": 113.32432432432432, "grad_norm": 1.2265625, "learning_rate": 2.158091660350326e-05, "loss": 0.1584, "step": 4193 }, { "epoch": 113.35135135135135, "grad_norm": 0.89453125, "learning_rate": 2.156984982769278e-05, "loss": 0.1061, "step": 4194 }, { "epoch": 113.37837837837837, "grad_norm": 0.99609375, "learning_rate": 2.155878373690069e-05, "loss": 0.0785, "step": 4195 }, { "epoch": 113.4054054054054, "grad_norm": 0.7109375, "learning_rate": 2.154771833333692e-05, "loss": 0.0742, "step": 4196 }, { "epoch": 113.43243243243244, "grad_norm": 1.78125, "learning_rate": 2.153665361921131e-05, "loss": 0.144, "step": 4197 }, { "epoch": 113.45945945945945, "grad_norm": 1.1328125, "learning_rate": 2.1525589596733515e-05, "loss": 0.134, "step": 4198 }, { "epoch": 113.48648648648648, "grad_norm": 0.83984375, "learning_rate": 2.151452626811309e-05, "loss": 0.0798, "step": 4199 }, { "epoch": 113.51351351351352, "grad_norm": 0.9140625, "learning_rate": 2.1503463635559446e-05, "loss": 0.1042, "step": 4200 }, { "epoch": 113.54054054054055, "grad_norm": 1.296875, "learning_rate": 2.1492401701281827e-05, "loss": 0.1131, "step": 4201 }, { "epoch": 113.56756756756756, "grad_norm": 1.0390625, "learning_rate": 2.1481340467489366e-05, "loss": 0.1528, "step": 4202 }, { "epoch": 113.5945945945946, "grad_norm": 1.046875, "learning_rate": 2.1470279936391063e-05, "loss": 0.0945, "step": 4203 }, { "epoch": 113.62162162162163, "grad_norm": 0.85546875, "learning_rate": 2.1459220110195735e-05, "loss": 0.0816, "step": 4204 }, { "epoch": 113.64864864864865, "grad_norm": 1.03125, "learning_rate": 2.1448160991112106e-05, "loss": 0.1255, "step": 4205 }, { "epoch": 113.67567567567568, "grad_norm": 0.84765625, "learning_rate": 2.1437102581348728e-05, "loss": 0.0848, "step": 4206 }, { "epoch": 113.70270270270271, "grad_norm": 1.3125, "learning_rate": 2.1426044883114028e-05, "loss": 0.1188, "step": 4207 }, { "epoch": 113.72972972972973, "grad_norm": 0.91796875, "learning_rate": 2.1414987898616273e-05, "loss": 0.0929, "step": 4208 }, { "epoch": 113.75675675675676, "grad_norm": 0.8515625, "learning_rate": 2.1403931630063616e-05, "loss": 0.1089, "step": 4209 }, { "epoch": 113.78378378378379, "grad_norm": 0.72265625, "learning_rate": 2.1392876079664036e-05, "loss": 0.0716, "step": 4210 }, { "epoch": 113.8108108108108, "grad_norm": 0.8984375, "learning_rate": 2.138182124962538e-05, "loss": 0.0923, "step": 4211 }, { "epoch": 113.83783783783784, "grad_norm": 0.89453125, "learning_rate": 2.1370767142155373e-05, "loss": 0.0866, "step": 4212 }, { "epoch": 113.86486486486487, "grad_norm": 1.2734375, "learning_rate": 2.1359713759461552e-05, "loss": 0.1123, "step": 4213 }, { "epoch": 113.89189189189189, "grad_norm": 1.421875, "learning_rate": 2.134866110375136e-05, "loss": 0.1894, "step": 4214 }, { "epoch": 113.91891891891892, "grad_norm": 0.91015625, "learning_rate": 2.133760917723204e-05, "loss": 0.0976, "step": 4215 }, { "epoch": 113.94594594594595, "grad_norm": 0.70703125, "learning_rate": 2.132655798211073e-05, "loss": 0.0754, "step": 4216 }, { "epoch": 113.97297297297297, "grad_norm": 1.28125, "learning_rate": 2.1315507520594424e-05, "loss": 0.1656, "step": 4217 }, { "epoch": 114.0, "grad_norm": 0.90625, "learning_rate": 2.1304457794889933e-05, "loss": 0.1046, "step": 4218 }, { "epoch": 114.02702702702703, "grad_norm": 0.8671875, "learning_rate": 2.1293408807203947e-05, "loss": 0.0822, "step": 4219 }, { "epoch": 114.05405405405405, "grad_norm": 0.7890625, "learning_rate": 2.1282360559743026e-05, "loss": 0.0863, "step": 4220 }, { "epoch": 114.08108108108108, "grad_norm": 0.93359375, "learning_rate": 2.1271313054713534e-05, "loss": 0.0921, "step": 4221 }, { "epoch": 114.10810810810811, "grad_norm": 1.2578125, "learning_rate": 2.126026629432173e-05, "loss": 0.1711, "step": 4222 }, { "epoch": 114.13513513513513, "grad_norm": 0.984375, "learning_rate": 2.1249220280773703e-05, "loss": 0.1148, "step": 4223 }, { "epoch": 114.16216216216216, "grad_norm": 0.6328125, "learning_rate": 2.1238175016275397e-05, "loss": 0.0667, "step": 4224 }, { "epoch": 114.1891891891892, "grad_norm": 1.2734375, "learning_rate": 2.1227130503032604e-05, "loss": 0.1496, "step": 4225 }, { "epoch": 114.21621621621621, "grad_norm": 0.87109375, "learning_rate": 2.1216086743250984e-05, "loss": 0.0894, "step": 4226 }, { "epoch": 114.24324324324324, "grad_norm": 1.125, "learning_rate": 2.120504373913601e-05, "loss": 0.1312, "step": 4227 }, { "epoch": 114.27027027027027, "grad_norm": 1.2421875, "learning_rate": 2.119400149289305e-05, "loss": 0.1344, "step": 4228 }, { "epoch": 114.29729729729729, "grad_norm": 1.484375, "learning_rate": 2.1182960006727272e-05, "loss": 0.1414, "step": 4229 }, { "epoch": 114.32432432432432, "grad_norm": 0.80078125, "learning_rate": 2.117191928284373e-05, "loss": 0.0829, "step": 4230 }, { "epoch": 114.35135135135135, "grad_norm": 0.59765625, "learning_rate": 2.116087932344732e-05, "loss": 0.0686, "step": 4231 }, { "epoch": 114.37837837837837, "grad_norm": 1.234375, "learning_rate": 2.114984013074276e-05, "loss": 0.1341, "step": 4232 }, { "epoch": 114.4054054054054, "grad_norm": 0.99609375, "learning_rate": 2.113880170693464e-05, "loss": 0.1369, "step": 4233 }, { "epoch": 114.43243243243244, "grad_norm": 0.90625, "learning_rate": 2.1127764054227404e-05, "loss": 0.0962, "step": 4234 }, { "epoch": 114.45945945945945, "grad_norm": 1.0390625, "learning_rate": 2.11167271748253e-05, "loss": 0.0877, "step": 4235 }, { "epoch": 114.48648648648648, "grad_norm": 0.470703125, "learning_rate": 2.1105691070932466e-05, "loss": 0.0607, "step": 4236 }, { "epoch": 114.51351351351352, "grad_norm": 0.9921875, "learning_rate": 2.1094655744752872e-05, "loss": 0.1026, "step": 4237 }, { "epoch": 114.54054054054055, "grad_norm": 1.0546875, "learning_rate": 2.1083621198490313e-05, "loss": 0.1005, "step": 4238 }, { "epoch": 114.56756756756756, "grad_norm": 0.703125, "learning_rate": 2.1072587434348453e-05, "loss": 0.068, "step": 4239 }, { "epoch": 114.5945945945946, "grad_norm": 1.703125, "learning_rate": 2.1061554454530798e-05, "loss": 0.1709, "step": 4240 }, { "epoch": 114.62162162162163, "grad_norm": 1.125, "learning_rate": 2.1050522261240673e-05, "loss": 0.1092, "step": 4241 }, { "epoch": 114.64864864864865, "grad_norm": 1.1328125, "learning_rate": 2.1039490856681276e-05, "loss": 0.1452, "step": 4242 }, { "epoch": 114.67567567567568, "grad_norm": 0.96484375, "learning_rate": 2.102846024305563e-05, "loss": 0.0912, "step": 4243 }, { "epoch": 114.70270270270271, "grad_norm": 0.9453125, "learning_rate": 2.101743042256661e-05, "loss": 0.0718, "step": 4244 }, { "epoch": 114.72972972972973, "grad_norm": 1.375, "learning_rate": 2.1006401397416914e-05, "loss": 0.2059, "step": 4245 }, { "epoch": 114.75675675675676, "grad_norm": 1.6171875, "learning_rate": 2.0995373169809108e-05, "loss": 0.173, "step": 4246 }, { "epoch": 114.78378378378379, "grad_norm": 1.234375, "learning_rate": 2.098434574194557e-05, "loss": 0.1418, "step": 4247 }, { "epoch": 114.8108108108108, "grad_norm": 0.78515625, "learning_rate": 2.097331911602855e-05, "loss": 0.0642, "step": 4248 }, { "epoch": 114.83783783783784, "grad_norm": 1.625, "learning_rate": 2.0962293294260108e-05, "loss": 0.26, "step": 4249 }, { "epoch": 114.86486486486487, "grad_norm": 1.0546875, "learning_rate": 2.0951268278842158e-05, "loss": 0.151, "step": 4250 }, { "epoch": 114.89189189189189, "grad_norm": 0.86328125, "learning_rate": 2.0940244071976466e-05, "loss": 0.1039, "step": 4251 }, { "epoch": 114.91891891891892, "grad_norm": 1.0859375, "learning_rate": 2.09292206758646e-05, "loss": 0.1406, "step": 4252 }, { "epoch": 114.94594594594595, "grad_norm": 1.3046875, "learning_rate": 2.0918198092708004e-05, "loss": 0.1551, "step": 4253 }, { "epoch": 114.97297297297297, "grad_norm": 0.6328125, "learning_rate": 2.0907176324707945e-05, "loss": 0.0802, "step": 4254 }, { "epoch": 115.0, "grad_norm": 0.7421875, "learning_rate": 2.089615537406551e-05, "loss": 0.0808, "step": 4255 }, { "epoch": 115.02702702702703, "grad_norm": 1.140625, "learning_rate": 2.088513524298165e-05, "loss": 0.1169, "step": 4256 }, { "epoch": 115.05405405405405, "grad_norm": 0.890625, "learning_rate": 2.087411593365715e-05, "loss": 0.0841, "step": 4257 }, { "epoch": 115.08108108108108, "grad_norm": 0.66796875, "learning_rate": 2.08630974482926e-05, "loss": 0.0673, "step": 4258 }, { "epoch": 115.10810810810811, "grad_norm": 1.0859375, "learning_rate": 2.085207978908847e-05, "loss": 0.1138, "step": 4259 }, { "epoch": 115.13513513513513, "grad_norm": 0.58984375, "learning_rate": 2.0841062958245018e-05, "loss": 0.0721, "step": 4260 }, { "epoch": 115.16216216216216, "grad_norm": 1.0625, "learning_rate": 2.0830046957962382e-05, "loss": 0.1247, "step": 4261 }, { "epoch": 115.1891891891892, "grad_norm": 0.76171875, "learning_rate": 2.0819031790440504e-05, "loss": 0.0846, "step": 4262 }, { "epoch": 115.21621621621621, "grad_norm": 1.0234375, "learning_rate": 2.080801745787917e-05, "loss": 0.0978, "step": 4263 }, { "epoch": 115.24324324324324, "grad_norm": 1.4140625, "learning_rate": 2.0797003962477994e-05, "loss": 0.1783, "step": 4264 }, { "epoch": 115.27027027027027, "grad_norm": 1.546875, "learning_rate": 2.0785991306436438e-05, "loss": 0.1675, "step": 4265 }, { "epoch": 115.29729729729729, "grad_norm": 0.9375, "learning_rate": 2.0774979491953777e-05, "loss": 0.0851, "step": 4266 }, { "epoch": 115.32432432432432, "grad_norm": 1.03125, "learning_rate": 2.0763968521229123e-05, "loss": 0.1317, "step": 4267 }, { "epoch": 115.35135135135135, "grad_norm": 1.3046875, "learning_rate": 2.0752958396461436e-05, "loss": 0.1296, "step": 4268 }, { "epoch": 115.37837837837837, "grad_norm": 1.375, "learning_rate": 2.0741949119849473e-05, "loss": 0.1501, "step": 4269 }, { "epoch": 115.4054054054054, "grad_norm": 0.9765625, "learning_rate": 2.073094069359186e-05, "loss": 0.1185, "step": 4270 }, { "epoch": 115.43243243243244, "grad_norm": 0.79296875, "learning_rate": 2.0719933119887032e-05, "loss": 0.0817, "step": 4271 }, { "epoch": 115.45945945945945, "grad_norm": 0.65234375, "learning_rate": 2.070892640093325e-05, "loss": 0.0788, "step": 4272 }, { "epoch": 115.48648648648648, "grad_norm": 1.2109375, "learning_rate": 2.0697920538928624e-05, "loss": 0.1659, "step": 4273 }, { "epoch": 115.51351351351352, "grad_norm": 0.73828125, "learning_rate": 2.0686915536071067e-05, "loss": 0.0737, "step": 4274 }, { "epoch": 115.54054054054055, "grad_norm": 1.1875, "learning_rate": 2.0675911394558335e-05, "loss": 0.1381, "step": 4275 }, { "epoch": 115.56756756756756, "grad_norm": 1.0234375, "learning_rate": 2.066490811658803e-05, "loss": 0.1164, "step": 4276 }, { "epoch": 115.5945945945946, "grad_norm": 0.92578125, "learning_rate": 2.0653905704357534e-05, "loss": 0.1122, "step": 4277 }, { "epoch": 115.62162162162163, "grad_norm": 1.2578125, "learning_rate": 2.06429041600641e-05, "loss": 0.1486, "step": 4278 }, { "epoch": 115.64864864864865, "grad_norm": 0.921875, "learning_rate": 2.06319034859048e-05, "loss": 0.0996, "step": 4279 }, { "epoch": 115.67567567567568, "grad_norm": 1.3203125, "learning_rate": 2.0620903684076506e-05, "loss": 0.1531, "step": 4280 }, { "epoch": 115.70270270270271, "grad_norm": 1.203125, "learning_rate": 2.060990475677595e-05, "loss": 0.2126, "step": 4281 }, { "epoch": 115.72972972972973, "grad_norm": 1.4609375, "learning_rate": 2.0598906706199662e-05, "loss": 0.1946, "step": 4282 }, { "epoch": 115.75675675675676, "grad_norm": 1.640625, "learning_rate": 2.058790953454402e-05, "loss": 0.1962, "step": 4283 }, { "epoch": 115.78378378378379, "grad_norm": 0.9375, "learning_rate": 2.0576913244005196e-05, "loss": 0.0967, "step": 4284 }, { "epoch": 115.8108108108108, "grad_norm": 1.484375, "learning_rate": 2.056591783677923e-05, "loss": 0.1849, "step": 4285 }, { "epoch": 115.83783783783784, "grad_norm": 1.0859375, "learning_rate": 2.055492331506194e-05, "loss": 0.1647, "step": 4286 }, { "epoch": 115.86486486486487, "grad_norm": 1.2265625, "learning_rate": 2.0543929681048992e-05, "loss": 0.1183, "step": 4287 }, { "epoch": 115.89189189189189, "grad_norm": 0.5703125, "learning_rate": 2.0532936936935886e-05, "loss": 0.0684, "step": 4288 }, { "epoch": 115.91891891891892, "grad_norm": 1.3828125, "learning_rate": 2.0521945084917903e-05, "loss": 0.1975, "step": 4289 }, { "epoch": 115.94594594594595, "grad_norm": 1.53125, "learning_rate": 2.05109541271902e-05, "loss": 0.204, "step": 4290 }, { "epoch": 115.97297297297297, "grad_norm": 0.6328125, "learning_rate": 2.04999640659477e-05, "loss": 0.0735, "step": 4291 }, { "epoch": 116.0, "grad_norm": 0.953125, "learning_rate": 2.0488974903385187e-05, "loss": 0.1011, "step": 4292 }, { "epoch": 116.02702702702703, "grad_norm": 1.2109375, "learning_rate": 2.047798664169726e-05, "loss": 0.1789, "step": 4293 }, { "epoch": 116.05405405405405, "grad_norm": 1.125, "learning_rate": 2.0466999283078312e-05, "loss": 0.1448, "step": 4294 }, { "epoch": 116.08108108108108, "grad_norm": 0.69921875, "learning_rate": 2.0456012829722584e-05, "loss": 0.0797, "step": 4295 }, { "epoch": 116.10810810810811, "grad_norm": 0.6953125, "learning_rate": 2.0445027283824138e-05, "loss": 0.0765, "step": 4296 }, { "epoch": 116.13513513513513, "grad_norm": 0.58984375, "learning_rate": 2.0434042647576823e-05, "loss": 0.0684, "step": 4297 }, { "epoch": 116.16216216216216, "grad_norm": 0.91796875, "learning_rate": 2.0423058923174338e-05, "loss": 0.084, "step": 4298 }, { "epoch": 116.1891891891892, "grad_norm": 0.86328125, "learning_rate": 2.0412076112810184e-05, "loss": 0.0992, "step": 4299 }, { "epoch": 116.21621621621621, "grad_norm": 0.8828125, "learning_rate": 2.0401094218677687e-05, "loss": 0.0975, "step": 4300 }, { "epoch": 116.24324324324324, "grad_norm": 1.1953125, "learning_rate": 2.0390113242969987e-05, "loss": 0.1235, "step": 4301 }, { "epoch": 116.27027027027027, "grad_norm": 0.6640625, "learning_rate": 2.0379133187880036e-05, "loss": 0.0822, "step": 4302 }, { "epoch": 116.29729729729729, "grad_norm": 0.458984375, "learning_rate": 2.0368154055600615e-05, "loss": 0.0631, "step": 4303 }, { "epoch": 116.32432432432432, "grad_norm": 0.546875, "learning_rate": 2.0357175848324307e-05, "loss": 0.0627, "step": 4304 }, { "epoch": 116.35135135135135, "grad_norm": 1.65625, "learning_rate": 2.0346198568243514e-05, "loss": 0.2076, "step": 4305 }, { "epoch": 116.37837837837837, "grad_norm": 0.8671875, "learning_rate": 2.033522221755045e-05, "loss": 0.0801, "step": 4306 }, { "epoch": 116.4054054054054, "grad_norm": 0.890625, "learning_rate": 2.0324246798437163e-05, "loss": 0.0977, "step": 4307 }, { "epoch": 116.43243243243244, "grad_norm": 1.171875, "learning_rate": 2.0313272313095484e-05, "loss": 0.1517, "step": 4308 }, { "epoch": 116.45945945945945, "grad_norm": 1.25, "learning_rate": 2.030229876371708e-05, "loss": 0.1085, "step": 4309 }, { "epoch": 116.48648648648648, "grad_norm": 1.4296875, "learning_rate": 2.0291326152493433e-05, "loss": 0.2176, "step": 4310 }, { "epoch": 116.51351351351352, "grad_norm": 1.125, "learning_rate": 2.0280354481615816e-05, "loss": 0.1135, "step": 4311 }, { "epoch": 116.54054054054055, "grad_norm": 0.91796875, "learning_rate": 2.0269383753275327e-05, "loss": 0.093, "step": 4312 }, { "epoch": 116.56756756756756, "grad_norm": 1.0, "learning_rate": 2.0258413969662892e-05, "loss": 0.1165, "step": 4313 }, { "epoch": 116.5945945945946, "grad_norm": 0.9296875, "learning_rate": 2.0247445132969216e-05, "loss": 0.1061, "step": 4314 }, { "epoch": 116.62162162162163, "grad_norm": 0.94140625, "learning_rate": 2.023647724538483e-05, "loss": 0.1052, "step": 4315 }, { "epoch": 116.64864864864865, "grad_norm": 1.0390625, "learning_rate": 2.02255103091001e-05, "loss": 0.1162, "step": 4316 }, { "epoch": 116.67567567567568, "grad_norm": 1.15625, "learning_rate": 2.021454432630515e-05, "loss": 0.132, "step": 4317 }, { "epoch": 116.70270270270271, "grad_norm": 0.7890625, "learning_rate": 2.0203579299189964e-05, "loss": 0.0852, "step": 4318 }, { "epoch": 116.72972972972973, "grad_norm": 0.81640625, "learning_rate": 2.0192615229944293e-05, "loss": 0.0869, "step": 4319 }, { "epoch": 116.75675675675676, "grad_norm": 1.53125, "learning_rate": 2.0181652120757736e-05, "loss": 0.2125, "step": 4320 }, { "epoch": 116.78378378378379, "grad_norm": 1.4453125, "learning_rate": 2.0170689973819674e-05, "loss": 0.1866, "step": 4321 }, { "epoch": 116.8108108108108, "grad_norm": 0.76171875, "learning_rate": 2.01597287913193e-05, "loss": 0.0843, "step": 4322 }, { "epoch": 116.83783783783784, "grad_norm": 0.71484375, "learning_rate": 2.0148768575445617e-05, "loss": 0.0835, "step": 4323 }, { "epoch": 116.86486486486487, "grad_norm": 0.76953125, "learning_rate": 2.0137809328387454e-05, "loss": 0.0757, "step": 4324 }, { "epoch": 116.89189189189189, "grad_norm": 1.1875, "learning_rate": 2.0126851052333397e-05, "loss": 0.1692, "step": 4325 }, { "epoch": 116.91891891891892, "grad_norm": 0.474609375, "learning_rate": 2.011589374947189e-05, "loss": 0.0546, "step": 4326 }, { "epoch": 116.94594594594595, "grad_norm": 1.2421875, "learning_rate": 2.010493742199117e-05, "loss": 0.1363, "step": 4327 }, { "epoch": 116.97297297297297, "grad_norm": 1.234375, "learning_rate": 2.0093982072079246e-05, "loss": 0.0891, "step": 4328 }, { "epoch": 117.0, "grad_norm": 0.73046875, "learning_rate": 2.008302770192397e-05, "loss": 0.0895, "step": 4329 }, { "epoch": 117.02702702702703, "grad_norm": 1.203125, "learning_rate": 2.0072074313712997e-05, "loss": 0.1597, "step": 4330 }, { "epoch": 117.05405405405405, "grad_norm": 0.74609375, "learning_rate": 2.006112190963375e-05, "loss": 0.0801, "step": 4331 }, { "epoch": 117.08108108108108, "grad_norm": 0.9921875, "learning_rate": 2.0050170491873494e-05, "loss": 0.1194, "step": 4332 }, { "epoch": 117.10810810810811, "grad_norm": 1.1875, "learning_rate": 2.003922006261929e-05, "loss": 0.0821, "step": 4333 }, { "epoch": 117.13513513513513, "grad_norm": 1.0234375, "learning_rate": 2.0028270624057975e-05, "loss": 0.1146, "step": 4334 }, { "epoch": 117.16216216216216, "grad_norm": 0.94921875, "learning_rate": 2.001732217837623e-05, "loss": 0.1072, "step": 4335 }, { "epoch": 117.1891891891892, "grad_norm": 0.6015625, "learning_rate": 2.000637472776049e-05, "loss": 0.0656, "step": 4336 }, { "epoch": 117.21621621621621, "grad_norm": 0.9296875, "learning_rate": 1.9995428274397026e-05, "loss": 0.0939, "step": 4337 }, { "epoch": 117.24324324324324, "grad_norm": 0.66796875, "learning_rate": 1.9984482820471917e-05, "loss": 0.0767, "step": 4338 }, { "epoch": 117.27027027027027, "grad_norm": 0.98828125, "learning_rate": 1.9973538368171e-05, "loss": 0.0872, "step": 4339 }, { "epoch": 117.29729729729729, "grad_norm": 0.703125, "learning_rate": 1.9962594919679956e-05, "loss": 0.0864, "step": 4340 }, { "epoch": 117.32432432432432, "grad_norm": 1.2265625, "learning_rate": 1.9951652477184237e-05, "loss": 0.1326, "step": 4341 }, { "epoch": 117.35135135135135, "grad_norm": 1.140625, "learning_rate": 1.994071104286911e-05, "loss": 0.0865, "step": 4342 }, { "epoch": 117.37837837837837, "grad_norm": 0.95703125, "learning_rate": 1.9929770618919627e-05, "loss": 0.1027, "step": 4343 }, { "epoch": 117.4054054054054, "grad_norm": 1.1484375, "learning_rate": 1.9918831207520664e-05, "loss": 0.1283, "step": 4344 }, { "epoch": 117.43243243243244, "grad_norm": 0.62890625, "learning_rate": 1.9907892810856852e-05, "loss": 0.0748, "step": 4345 }, { "epoch": 117.45945945945945, "grad_norm": 1.0546875, "learning_rate": 1.9896955431112658e-05, "loss": 0.1404, "step": 4346 }, { "epoch": 117.48648648648648, "grad_norm": 0.73046875, "learning_rate": 1.988601907047234e-05, "loss": 0.0794, "step": 4347 }, { "epoch": 117.51351351351352, "grad_norm": 0.9765625, "learning_rate": 1.9875083731119923e-05, "loss": 0.1141, "step": 4348 }, { "epoch": 117.54054054054055, "grad_norm": 0.859375, "learning_rate": 1.9864149415239274e-05, "loss": 0.0845, "step": 4349 }, { "epoch": 117.56756756756756, "grad_norm": 0.9921875, "learning_rate": 1.985321612501401e-05, "loss": 0.1063, "step": 4350 }, { "epoch": 117.5945945945946, "grad_norm": 1.28125, "learning_rate": 1.9842283862627574e-05, "loss": 0.1398, "step": 4351 }, { "epoch": 117.62162162162163, "grad_norm": 1.3203125, "learning_rate": 1.9831352630263206e-05, "loss": 0.1497, "step": 4352 }, { "epoch": 117.64864864864865, "grad_norm": 1.140625, "learning_rate": 1.9820422430103904e-05, "loss": 0.1333, "step": 4353 }, { "epoch": 117.67567567567568, "grad_norm": 0.83984375, "learning_rate": 1.9809493264332503e-05, "loss": 0.0928, "step": 4354 }, { "epoch": 117.70270270270271, "grad_norm": 1.171875, "learning_rate": 1.979856513513161e-05, "loss": 0.1764, "step": 4355 }, { "epoch": 117.72972972972973, "grad_norm": 1.328125, "learning_rate": 1.9787638044683618e-05, "loss": 0.1568, "step": 4356 }, { "epoch": 117.75675675675676, "grad_norm": 0.65234375, "learning_rate": 1.9776711995170733e-05, "loss": 0.071, "step": 4357 }, { "epoch": 117.78378378378379, "grad_norm": 1.0234375, "learning_rate": 1.976578698877494e-05, "loss": 0.1465, "step": 4358 }, { "epoch": 117.8108108108108, "grad_norm": 2.015625, "learning_rate": 1.9754863027678016e-05, "loss": 0.2279, "step": 4359 }, { "epoch": 117.83783783783784, "grad_norm": 1.765625, "learning_rate": 1.9743940114061526e-05, "loss": 0.2369, "step": 4360 }, { "epoch": 117.86486486486487, "grad_norm": 0.64453125, "learning_rate": 1.973301825010685e-05, "loss": 0.0665, "step": 4361 }, { "epoch": 117.89189189189189, "grad_norm": 0.68359375, "learning_rate": 1.9722097437995113e-05, "loss": 0.0832, "step": 4362 }, { "epoch": 117.91891891891892, "grad_norm": 1.0625, "learning_rate": 1.971117767990727e-05, "loss": 0.1431, "step": 4363 }, { "epoch": 117.94594594594595, "grad_norm": 0.875, "learning_rate": 1.970025897802407e-05, "loss": 0.1181, "step": 4364 }, { "epoch": 117.97297297297297, "grad_norm": 0.50390625, "learning_rate": 1.9689341334526e-05, "loss": 0.0612, "step": 4365 }, { "epoch": 118.0, "grad_norm": 1.09375, "learning_rate": 1.9678424751593398e-05, "loss": 0.0858, "step": 4366 }, { "epoch": 118.02702702702703, "grad_norm": 1.03125, "learning_rate": 1.9667509231406334e-05, "loss": 0.0943, "step": 4367 }, { "epoch": 118.05405405405405, "grad_norm": 1.1796875, "learning_rate": 1.9656594776144703e-05, "loss": 0.1142, "step": 4368 }, { "epoch": 118.08108108108108, "grad_norm": 1.2265625, "learning_rate": 1.9645681387988197e-05, "loss": 0.1329, "step": 4369 }, { "epoch": 118.10810810810811, "grad_norm": 0.46875, "learning_rate": 1.9634769069116244e-05, "loss": 0.0598, "step": 4370 }, { "epoch": 118.13513513513513, "grad_norm": 1.1015625, "learning_rate": 1.9623857821708105e-05, "loss": 0.1214, "step": 4371 }, { "epoch": 118.16216216216216, "grad_norm": 0.71875, "learning_rate": 1.961294764794282e-05, "loss": 0.0822, "step": 4372 }, { "epoch": 118.1891891891892, "grad_norm": 1.28125, "learning_rate": 1.9602038549999184e-05, "loss": 0.168, "step": 4373 }, { "epoch": 118.21621621621621, "grad_norm": 1.0390625, "learning_rate": 1.9591130530055812e-05, "loss": 0.1538, "step": 4374 }, { "epoch": 118.24324324324324, "grad_norm": 0.91015625, "learning_rate": 1.9580223590291104e-05, "loss": 0.0927, "step": 4375 }, { "epoch": 118.27027027027027, "grad_norm": 0.7265625, "learning_rate": 1.9569317732883198e-05, "loss": 0.0724, "step": 4376 }, { "epoch": 118.29729729729729, "grad_norm": 1.109375, "learning_rate": 1.955841296001008e-05, "loss": 0.113, "step": 4377 }, { "epoch": 118.32432432432432, "grad_norm": 1.125, "learning_rate": 1.9547509273849472e-05, "loss": 0.1232, "step": 4378 }, { "epoch": 118.35135135135135, "grad_norm": 1.0546875, "learning_rate": 1.9536606676578905e-05, "loss": 0.1057, "step": 4379 }, { "epoch": 118.37837837837837, "grad_norm": 0.80078125, "learning_rate": 1.9525705170375675e-05, "loss": 0.0952, "step": 4380 }, { "epoch": 118.4054054054054, "grad_norm": 1.4375, "learning_rate": 1.951480475741687e-05, "loss": 0.1538, "step": 4381 }, { "epoch": 118.43243243243244, "grad_norm": 0.81640625, "learning_rate": 1.9503905439879356e-05, "loss": 0.0867, "step": 4382 }, { "epoch": 118.45945945945945, "grad_norm": 1.0390625, "learning_rate": 1.949300721993979e-05, "loss": 0.1058, "step": 4383 }, { "epoch": 118.48648648648648, "grad_norm": 1.1484375, "learning_rate": 1.948211009977459e-05, "loss": 0.1579, "step": 4384 }, { "epoch": 118.51351351351352, "grad_norm": 0.95703125, "learning_rate": 1.947121408155997e-05, "loss": 0.1029, "step": 4385 }, { "epoch": 118.54054054054055, "grad_norm": 1.015625, "learning_rate": 1.9460319167471935e-05, "loss": 0.0961, "step": 4386 }, { "epoch": 118.56756756756756, "grad_norm": 0.82421875, "learning_rate": 1.944942535968623e-05, "loss": 0.0967, "step": 4387 }, { "epoch": 118.5945945945946, "grad_norm": 1.2890625, "learning_rate": 1.943853266037842e-05, "loss": 0.1169, "step": 4388 }, { "epoch": 118.62162162162163, "grad_norm": 1.3984375, "learning_rate": 1.9427641071723835e-05, "loss": 0.1792, "step": 4389 }, { "epoch": 118.64864864864865, "grad_norm": 0.890625, "learning_rate": 1.941675059589756e-05, "loss": 0.0925, "step": 4390 }, { "epoch": 118.67567567567568, "grad_norm": 0.734375, "learning_rate": 1.9405861235074496e-05, "loss": 0.0766, "step": 4391 }, { "epoch": 118.70270270270271, "grad_norm": 1.0390625, "learning_rate": 1.9394972991429307e-05, "loss": 0.1184, "step": 4392 }, { "epoch": 118.72972972972973, "grad_norm": 0.9765625, "learning_rate": 1.9384085867136416e-05, "loss": 0.1223, "step": 4393 }, { "epoch": 118.75675675675676, "grad_norm": 1.0234375, "learning_rate": 1.937319986437005e-05, "loss": 0.1075, "step": 4394 }, { "epoch": 118.78378378378379, "grad_norm": 1.0859375, "learning_rate": 1.9362314985304182e-05, "loss": 0.1035, "step": 4395 }, { "epoch": 118.8108108108108, "grad_norm": 0.66015625, "learning_rate": 1.9351431232112595e-05, "loss": 0.0716, "step": 4396 }, { "epoch": 118.83783783783784, "grad_norm": 0.83203125, "learning_rate": 1.934054860696882e-05, "loss": 0.0932, "step": 4397 }, { "epoch": 118.86486486486487, "grad_norm": 0.7734375, "learning_rate": 1.9329667112046174e-05, "loss": 0.0873, "step": 4398 }, { "epoch": 118.89189189189189, "grad_norm": 0.74609375, "learning_rate": 1.9318786749517755e-05, "loss": 0.0763, "step": 4399 }, { "epoch": 118.91891891891892, "grad_norm": 0.61328125, "learning_rate": 1.930790752155641e-05, "loss": 0.0648, "step": 4400 }, { "epoch": 118.94594594594595, "grad_norm": 0.67578125, "learning_rate": 1.9297029430334792e-05, "loss": 0.0754, "step": 4401 }, { "epoch": 118.97297297297297, "grad_norm": 1.421875, "learning_rate": 1.9286152478025297e-05, "loss": 0.1413, "step": 4402 }, { "epoch": 119.0, "grad_norm": 0.9296875, "learning_rate": 1.927527666680013e-05, "loss": 0.09, "step": 4403 }, { "epoch": 119.02702702702703, "grad_norm": 1.34375, "learning_rate": 1.9264401998831213e-05, "loss": 0.1703, "step": 4404 }, { "epoch": 119.05405405405405, "grad_norm": 0.5859375, "learning_rate": 1.9253528476290293e-05, "loss": 0.0662, "step": 4405 }, { "epoch": 119.08108108108108, "grad_norm": 1.4296875, "learning_rate": 1.9242656101348875e-05, "loss": 0.1164, "step": 4406 }, { "epoch": 119.10810810810811, "grad_norm": 0.5390625, "learning_rate": 1.9231784876178206e-05, "loss": 0.0697, "step": 4407 }, { "epoch": 119.13513513513513, "grad_norm": 0.98046875, "learning_rate": 1.9220914802949334e-05, "loss": 0.1018, "step": 4408 }, { "epoch": 119.16216216216216, "grad_norm": 1.09375, "learning_rate": 1.9210045883833077e-05, "loss": 0.1207, "step": 4409 }, { "epoch": 119.1891891891892, "grad_norm": 1.46875, "learning_rate": 1.9199178121e-05, "loss": 0.1475, "step": 4410 }, { "epoch": 119.21621621621621, "grad_norm": 0.98046875, "learning_rate": 1.9188311516620466e-05, "loss": 0.131, "step": 4411 }, { "epoch": 119.24324324324324, "grad_norm": 0.91796875, "learning_rate": 1.9177446072864565e-05, "loss": 0.1136, "step": 4412 }, { "epoch": 119.27027027027027, "grad_norm": 1.1484375, "learning_rate": 1.9166581791902204e-05, "loss": 0.1359, "step": 4413 }, { "epoch": 119.29729729729729, "grad_norm": 0.9921875, "learning_rate": 1.9155718675903035e-05, "loss": 0.115, "step": 4414 }, { "epoch": 119.32432432432432, "grad_norm": 0.7109375, "learning_rate": 1.914485672703646e-05, "loss": 0.0851, "step": 4415 }, { "epoch": 119.35135135135135, "grad_norm": 0.89453125, "learning_rate": 1.9133995947471678e-05, "loss": 0.0916, "step": 4416 }, { "epoch": 119.37837837837837, "grad_norm": 0.6015625, "learning_rate": 1.912313633937764e-05, "loss": 0.0763, "step": 4417 }, { "epoch": 119.4054054054054, "grad_norm": 1.3203125, "learning_rate": 1.9112277904923065e-05, "loss": 0.145, "step": 4418 }, { "epoch": 119.43243243243244, "grad_norm": 0.54296875, "learning_rate": 1.9101420646276433e-05, "loss": 0.0683, "step": 4419 }, { "epoch": 119.45945945945945, "grad_norm": 0.98046875, "learning_rate": 1.9090564565606005e-05, "loss": 0.1317, "step": 4420 }, { "epoch": 119.48648648648648, "grad_norm": 0.625, "learning_rate": 1.9079709665079778e-05, "loss": 0.0748, "step": 4421 }, { "epoch": 119.51351351351352, "grad_norm": 0.56640625, "learning_rate": 1.9068855946865538e-05, "loss": 0.0697, "step": 4422 }, { "epoch": 119.54054054054055, "grad_norm": 1.015625, "learning_rate": 1.905800341313084e-05, "loss": 0.0789, "step": 4423 }, { "epoch": 119.56756756756756, "grad_norm": 0.92578125, "learning_rate": 1.9047152066042974e-05, "loss": 0.1232, "step": 4424 }, { "epoch": 119.5945945945946, "grad_norm": 1.328125, "learning_rate": 1.903630190776902e-05, "loss": 0.1088, "step": 4425 }, { "epoch": 119.62162162162163, "grad_norm": 0.53125, "learning_rate": 1.9025452940475794e-05, "loss": 0.0699, "step": 4426 }, { "epoch": 119.64864864864865, "grad_norm": 1.2421875, "learning_rate": 1.90146051663299e-05, "loss": 0.1672, "step": 4427 }, { "epoch": 119.67567567567568, "grad_norm": 0.490234375, "learning_rate": 1.9003758587497706e-05, "loss": 0.0621, "step": 4428 }, { "epoch": 119.70270270270271, "grad_norm": 1.046875, "learning_rate": 1.8992913206145303e-05, "loss": 0.1103, "step": 4429 }, { "epoch": 119.72972972972973, "grad_norm": 0.984375, "learning_rate": 1.8982069024438585e-05, "loss": 0.1072, "step": 4430 }, { "epoch": 119.75675675675676, "grad_norm": 0.765625, "learning_rate": 1.8971226044543196e-05, "loss": 0.0885, "step": 4431 }, { "epoch": 119.78378378378379, "grad_norm": 0.9921875, "learning_rate": 1.896038426862451e-05, "loss": 0.1025, "step": 4432 }, { "epoch": 119.8108108108108, "grad_norm": 0.90625, "learning_rate": 1.8949543698847704e-05, "loss": 0.0923, "step": 4433 }, { "epoch": 119.83783783783784, "grad_norm": 0.75, "learning_rate": 1.89387043373777e-05, "loss": 0.0842, "step": 4434 }, { "epoch": 119.86486486486487, "grad_norm": 1.234375, "learning_rate": 1.892786618637915e-05, "loss": 0.1965, "step": 4435 }, { "epoch": 119.89189189189189, "grad_norm": 1.0390625, "learning_rate": 1.891702924801651e-05, "loss": 0.1453, "step": 4436 }, { "epoch": 119.91891891891892, "grad_norm": 1.515625, "learning_rate": 1.8906193524453964e-05, "loss": 0.2204, "step": 4437 }, { "epoch": 119.94594594594595, "grad_norm": 0.78125, "learning_rate": 1.8895359017855458e-05, "loss": 0.0946, "step": 4438 }, { "epoch": 119.97297297297297, "grad_norm": 0.9296875, "learning_rate": 1.8884525730384693e-05, "loss": 0.0964, "step": 4439 }, { "epoch": 120.0, "grad_norm": 0.8125, "learning_rate": 1.887369366420515e-05, "loss": 0.0815, "step": 4440 }, { "epoch": 120.02702702702703, "grad_norm": 1.765625, "learning_rate": 1.8862862821480025e-05, "loss": 0.214, "step": 4441 }, { "epoch": 120.05405405405405, "grad_norm": 1.078125, "learning_rate": 1.885203320437231e-05, "loss": 0.1432, "step": 4442 }, { "epoch": 120.08108108108108, "grad_norm": 1.0859375, "learning_rate": 1.884120481504472e-05, "loss": 0.1264, "step": 4443 }, { "epoch": 120.10810810810811, "grad_norm": 0.482421875, "learning_rate": 1.8830377655659746e-05, "loss": 0.0614, "step": 4444 }, { "epoch": 120.13513513513513, "grad_norm": 0.4921875, "learning_rate": 1.8819551728379635e-05, "loss": 0.0662, "step": 4445 }, { "epoch": 120.16216216216216, "grad_norm": 0.9765625, "learning_rate": 1.8808727035366365e-05, "loss": 0.1037, "step": 4446 }, { "epoch": 120.1891891891892, "grad_norm": 1.5234375, "learning_rate": 1.879790357878169e-05, "loss": 0.1203, "step": 4447 }, { "epoch": 120.21621621621621, "grad_norm": 1.1171875, "learning_rate": 1.878708136078711e-05, "loss": 0.1349, "step": 4448 }, { "epoch": 120.24324324324324, "grad_norm": 1.0390625, "learning_rate": 1.8776260383543873e-05, "loss": 0.1223, "step": 4449 }, { "epoch": 120.27027027027027, "grad_norm": 0.6484375, "learning_rate": 1.876544064921298e-05, "loss": 0.0789, "step": 4450 }, { "epoch": 120.29729729729729, "grad_norm": 0.64453125, "learning_rate": 1.8754622159955205e-05, "loss": 0.0755, "step": 4451 }, { "epoch": 120.32432432432432, "grad_norm": 1.3359375, "learning_rate": 1.874380491793103e-05, "loss": 0.1459, "step": 4452 }, { "epoch": 120.35135135135135, "grad_norm": 1.296875, "learning_rate": 1.8732988925300725e-05, "loss": 0.1118, "step": 4453 }, { "epoch": 120.37837837837837, "grad_norm": 1.1328125, "learning_rate": 1.8722174184224303e-05, "loss": 0.1221, "step": 4454 }, { "epoch": 120.4054054054054, "grad_norm": 0.98828125, "learning_rate": 1.871136069686152e-05, "loss": 0.091, "step": 4455 }, { "epoch": 120.43243243243244, "grad_norm": 1.3125, "learning_rate": 1.8700548465371874e-05, "loss": 0.168, "step": 4456 }, { "epoch": 120.45945945945945, "grad_norm": 0.82421875, "learning_rate": 1.8689737491914634e-05, "loss": 0.0753, "step": 4457 }, { "epoch": 120.48648648648648, "grad_norm": 1.2578125, "learning_rate": 1.8678927778648798e-05, "loss": 0.1712, "step": 4458 }, { "epoch": 120.51351351351352, "grad_norm": 0.9765625, "learning_rate": 1.8668119327733134e-05, "loss": 0.0909, "step": 4459 }, { "epoch": 120.54054054054055, "grad_norm": 1.3515625, "learning_rate": 1.865731214132613e-05, "loss": 0.1975, "step": 4460 }, { "epoch": 120.56756756756756, "grad_norm": 0.6171875, "learning_rate": 1.864650622158604e-05, "loss": 0.0748, "step": 4461 }, { "epoch": 120.5945945945946, "grad_norm": 0.9140625, "learning_rate": 1.8635701570670872e-05, "loss": 0.1007, "step": 4462 }, { "epoch": 120.62162162162163, "grad_norm": 1.0390625, "learning_rate": 1.8624898190738348e-05, "loss": 0.0956, "step": 4463 }, { "epoch": 120.64864864864865, "grad_norm": 0.97265625, "learning_rate": 1.861409608394597e-05, "loss": 0.0902, "step": 4464 }, { "epoch": 120.67567567567568, "grad_norm": 1.3984375, "learning_rate": 1.8603295252450985e-05, "loss": 0.1846, "step": 4465 }, { "epoch": 120.70270270270271, "grad_norm": 1.1484375, "learning_rate": 1.859249569841035e-05, "loss": 0.1333, "step": 4466 }, { "epoch": 120.72972972972973, "grad_norm": 0.921875, "learning_rate": 1.8581697423980804e-05, "loss": 0.0958, "step": 4467 }, { "epoch": 120.75675675675676, "grad_norm": 1.375, "learning_rate": 1.8570900431318825e-05, "loss": 0.1471, "step": 4468 }, { "epoch": 120.78378378378379, "grad_norm": 1.375, "learning_rate": 1.856010472258061e-05, "loss": 0.1965, "step": 4469 }, { "epoch": 120.8108108108108, "grad_norm": 0.5703125, "learning_rate": 1.854931029992213e-05, "loss": 0.0664, "step": 4470 }, { "epoch": 120.83783783783784, "grad_norm": 0.8203125, "learning_rate": 1.8538517165499076e-05, "loss": 0.096, "step": 4471 }, { "epoch": 120.86486486486487, "grad_norm": 1.3125, "learning_rate": 1.8527725321466893e-05, "loss": 0.1449, "step": 4472 }, { "epoch": 120.89189189189189, "grad_norm": 1.0, "learning_rate": 1.851693476998078e-05, "loss": 0.1074, "step": 4473 }, { "epoch": 120.91891891891892, "grad_norm": 0.5703125, "learning_rate": 1.850614551319565e-05, "loss": 0.0737, "step": 4474 }, { "epoch": 120.94594594594595, "grad_norm": 0.8671875, "learning_rate": 1.8495357553266178e-05, "loss": 0.0965, "step": 4475 }, { "epoch": 120.97297297297297, "grad_norm": 0.4296875, "learning_rate": 1.848457089234677e-05, "loss": 0.0599, "step": 4476 }, { "epoch": 121.0, "grad_norm": 0.97265625, "learning_rate": 1.8473785532591588e-05, "loss": 0.0952, "step": 4477 }, { "epoch": 121.02702702702703, "grad_norm": 0.80078125, "learning_rate": 1.8463001476154508e-05, "loss": 0.0889, "step": 4478 }, { "epoch": 121.05405405405405, "grad_norm": 1.1640625, "learning_rate": 1.8452218725189177e-05, "loss": 0.136, "step": 4479 }, { "epoch": 121.08108108108108, "grad_norm": 0.796875, "learning_rate": 1.8441437281848943e-05, "loss": 0.0802, "step": 4480 }, { "epoch": 121.10810810810811, "grad_norm": 1.203125, "learning_rate": 1.8430657148286933e-05, "loss": 0.1336, "step": 4481 }, { "epoch": 121.13513513513513, "grad_norm": 0.95703125, "learning_rate": 1.8419878326656002e-05, "loss": 0.0997, "step": 4482 }, { "epoch": 121.16216216216216, "grad_norm": 0.7265625, "learning_rate": 1.8409100819108707e-05, "loss": 0.0662, "step": 4483 }, { "epoch": 121.1891891891892, "grad_norm": 0.75, "learning_rate": 1.839832462779739e-05, "loss": 0.0742, "step": 4484 }, { "epoch": 121.21621621621621, "grad_norm": 0.57421875, "learning_rate": 1.8387549754874116e-05, "loss": 0.0645, "step": 4485 }, { "epoch": 121.24324324324324, "grad_norm": 0.61328125, "learning_rate": 1.8376776202490664e-05, "loss": 0.0814, "step": 4486 }, { "epoch": 121.27027027027027, "grad_norm": 1.15625, "learning_rate": 1.8366003972798588e-05, "loss": 0.184, "step": 4487 }, { "epoch": 121.29729729729729, "grad_norm": 1.046875, "learning_rate": 1.8355233067949132e-05, "loss": 0.1207, "step": 4488 }, { "epoch": 121.32432432432432, "grad_norm": 0.9765625, "learning_rate": 1.8344463490093317e-05, "loss": 0.093, "step": 4489 }, { "epoch": 121.35135135135135, "grad_norm": 1.171875, "learning_rate": 1.8333695241381886e-05, "loss": 0.1146, "step": 4490 }, { "epoch": 121.37837837837837, "grad_norm": 1.1640625, "learning_rate": 1.8322928323965298e-05, "loss": 0.1656, "step": 4491 }, { "epoch": 121.4054054054054, "grad_norm": 1.078125, "learning_rate": 1.831216273999377e-05, "loss": 0.1318, "step": 4492 }, { "epoch": 121.43243243243244, "grad_norm": 0.92578125, "learning_rate": 1.8301398491617247e-05, "loss": 0.1359, "step": 4493 }, { "epoch": 121.45945945945945, "grad_norm": 1.03125, "learning_rate": 1.8290635580985394e-05, "loss": 0.0965, "step": 4494 }, { "epoch": 121.48648648648648, "grad_norm": 1.5546875, "learning_rate": 1.8279874010247628e-05, "loss": 0.2551, "step": 4495 }, { "epoch": 121.51351351351352, "grad_norm": 1.515625, "learning_rate": 1.8269113781553082e-05, "loss": 0.1641, "step": 4496 }, { "epoch": 121.54054054054055, "grad_norm": 1.3359375, "learning_rate": 1.8258354897050638e-05, "loss": 0.1739, "step": 4497 }, { "epoch": 121.56756756756756, "grad_norm": 1.078125, "learning_rate": 1.8247597358888886e-05, "loss": 0.1008, "step": 4498 }, { "epoch": 121.5945945945946, "grad_norm": 0.7265625, "learning_rate": 1.8236841169216173e-05, "loss": 0.0882, "step": 4499 }, { "epoch": 121.62162162162163, "grad_norm": 0.7578125, "learning_rate": 1.8226086330180558e-05, "loss": 0.0845, "step": 4500 }, { "epoch": 121.64864864864865, "grad_norm": 0.4296875, "learning_rate": 1.8215332843929845e-05, "loss": 0.0567, "step": 4501 }, { "epoch": 121.67567567567568, "grad_norm": 1.4375, "learning_rate": 1.820458071261154e-05, "loss": 0.1162, "step": 4502 }, { "epoch": 121.70270270270271, "grad_norm": 1.0625, "learning_rate": 1.819382993837292e-05, "loss": 0.1322, "step": 4503 }, { "epoch": 121.72972972972973, "grad_norm": 1.34375, "learning_rate": 1.8183080523360964e-05, "loss": 0.1159, "step": 4504 }, { "epoch": 121.75675675675676, "grad_norm": 0.408203125, "learning_rate": 1.8172332469722375e-05, "loss": 0.0591, "step": 4505 }, { "epoch": 121.78378378378379, "grad_norm": 0.7734375, "learning_rate": 1.8161585779603597e-05, "loss": 0.086, "step": 4506 }, { "epoch": 121.8108108108108, "grad_norm": 1.1328125, "learning_rate": 1.8150840455150815e-05, "loss": 0.1144, "step": 4507 }, { "epoch": 121.83783783783784, "grad_norm": 0.98828125, "learning_rate": 1.81400964985099e-05, "loss": 0.1286, "step": 4508 }, { "epoch": 121.86486486486487, "grad_norm": 0.73828125, "learning_rate": 1.8129353911826486e-05, "loss": 0.0875, "step": 4509 }, { "epoch": 121.89189189189189, "grad_norm": 1.3046875, "learning_rate": 1.811861269724593e-05, "loss": 0.1867, "step": 4510 }, { "epoch": 121.91891891891892, "grad_norm": 1.25, "learning_rate": 1.8107872856913292e-05, "loss": 0.1839, "step": 4511 }, { "epoch": 121.94594594594595, "grad_norm": 1.109375, "learning_rate": 1.809713439297338e-05, "loss": 0.1353, "step": 4512 }, { "epoch": 121.97297297297297, "grad_norm": 0.921875, "learning_rate": 1.8086397307570723e-05, "loss": 0.1078, "step": 4513 }, { "epoch": 122.0, "grad_norm": 1.265625, "learning_rate": 1.8075661602849568e-05, "loss": 0.1973, "step": 4514 }, { "epoch": 122.02702702702703, "grad_norm": 0.52734375, "learning_rate": 1.806492728095389e-05, "loss": 0.0632, "step": 4515 }, { "epoch": 122.05405405405405, "grad_norm": 1.0546875, "learning_rate": 1.8054194344027386e-05, "loss": 0.1291, "step": 4516 }, { "epoch": 122.08108108108108, "grad_norm": 1.2265625, "learning_rate": 1.8043462794213474e-05, "loss": 0.1451, "step": 4517 }, { "epoch": 122.10810810810811, "grad_norm": 1.2109375, "learning_rate": 1.803273263365531e-05, "loss": 0.1384, "step": 4518 }, { "epoch": 122.13513513513513, "grad_norm": 1.3203125, "learning_rate": 1.802200386449575e-05, "loss": 0.1458, "step": 4519 }, { "epoch": 122.16216216216216, "grad_norm": 1.0625, "learning_rate": 1.8011276488877384e-05, "loss": 0.1241, "step": 4520 }, { "epoch": 122.1891891891892, "grad_norm": 1.1953125, "learning_rate": 1.8000550508942543e-05, "loss": 0.1576, "step": 4521 }, { "epoch": 122.21621621621621, "grad_norm": 0.7265625, "learning_rate": 1.7989825926833233e-05, "loss": 0.0751, "step": 4522 }, { "epoch": 122.24324324324324, "grad_norm": 0.78125, "learning_rate": 1.7979102744691215e-05, "loss": 0.0779, "step": 4523 }, { "epoch": 122.27027027027027, "grad_norm": 1.5703125, "learning_rate": 1.796838096465798e-05, "loss": 0.1317, "step": 4524 }, { "epoch": 122.29729729729729, "grad_norm": 0.71484375, "learning_rate": 1.7957660588874696e-05, "loss": 0.0825, "step": 4525 }, { "epoch": 122.32432432432432, "grad_norm": 1.2421875, "learning_rate": 1.7946941619482294e-05, "loss": 0.1657, "step": 4526 }, { "epoch": 122.35135135135135, "grad_norm": 0.9921875, "learning_rate": 1.793622405862141e-05, "loss": 0.1109, "step": 4527 }, { "epoch": 122.37837837837837, "grad_norm": 1.03125, "learning_rate": 1.7925507908432372e-05, "loss": 0.1145, "step": 4528 }, { "epoch": 122.4054054054054, "grad_norm": 0.9765625, "learning_rate": 1.7914793171055265e-05, "loss": 0.1087, "step": 4529 }, { "epoch": 122.43243243243244, "grad_norm": 1.1015625, "learning_rate": 1.790407984862989e-05, "loss": 0.1256, "step": 4530 }, { "epoch": 122.45945945945945, "grad_norm": 0.8828125, "learning_rate": 1.7893367943295725e-05, "loss": 0.082, "step": 4531 }, { "epoch": 122.48648648648648, "grad_norm": 1.171875, "learning_rate": 1.7882657457192013e-05, "loss": 0.0992, "step": 4532 }, { "epoch": 122.51351351351352, "grad_norm": 0.6796875, "learning_rate": 1.7871948392457678e-05, "loss": 0.0678, "step": 4533 }, { "epoch": 122.54054054054055, "grad_norm": 0.7109375, "learning_rate": 1.786124075123138e-05, "loss": 0.0767, "step": 4534 }, { "epoch": 122.56756756756756, "grad_norm": 1.265625, "learning_rate": 1.785053453565149e-05, "loss": 0.106, "step": 4535 }, { "epoch": 122.5945945945946, "grad_norm": 0.97265625, "learning_rate": 1.7839829747856096e-05, "loss": 0.1079, "step": 4536 }, { "epoch": 122.62162162162163, "grad_norm": 0.86328125, "learning_rate": 1.7829126389982988e-05, "loss": 0.0984, "step": 4537 }, { "epoch": 122.64864864864865, "grad_norm": 0.86328125, "learning_rate": 1.78184244641697e-05, "loss": 0.0807, "step": 4538 }, { "epoch": 122.67567567567568, "grad_norm": 1.2109375, "learning_rate": 1.7807723972553433e-05, "loss": 0.1423, "step": 4539 }, { "epoch": 122.70270270270271, "grad_norm": 0.89453125, "learning_rate": 1.779702491727115e-05, "loss": 0.0982, "step": 4540 }, { "epoch": 122.72972972972973, "grad_norm": 1.34375, "learning_rate": 1.778632730045951e-05, "loss": 0.2237, "step": 4541 }, { "epoch": 122.75675675675676, "grad_norm": 0.43359375, "learning_rate": 1.7775631124254866e-05, "loss": 0.0629, "step": 4542 }, { "epoch": 122.78378378378379, "grad_norm": 0.9375, "learning_rate": 1.7764936390793302e-05, "loss": 0.1049, "step": 4543 }, { "epoch": 122.8108108108108, "grad_norm": 0.76171875, "learning_rate": 1.7754243102210623e-05, "loss": 0.074, "step": 4544 }, { "epoch": 122.83783783783784, "grad_norm": 1.5625, "learning_rate": 1.7743551260642317e-05, "loss": 0.152, "step": 4545 }, { "epoch": 122.86486486486487, "grad_norm": 0.9140625, "learning_rate": 1.7732860868223616e-05, "loss": 0.1149, "step": 4546 }, { "epoch": 122.89189189189189, "grad_norm": 1.125, "learning_rate": 1.7722171927089424e-05, "loss": 0.1514, "step": 4547 }, { "epoch": 122.91891891891892, "grad_norm": 1.7265625, "learning_rate": 1.771148443937439e-05, "loss": 0.2111, "step": 4548 }, { "epoch": 122.94594594594595, "grad_norm": 1.3203125, "learning_rate": 1.770079840721287e-05, "loss": 0.0951, "step": 4549 }, { "epoch": 122.97297297297297, "grad_norm": 1.1328125, "learning_rate": 1.769011383273889e-05, "loss": 0.1264, "step": 4550 }, { "epoch": 123.0, "grad_norm": 1.2890625, "learning_rate": 1.7679430718086243e-05, "loss": 0.1568, "step": 4551 }, { "epoch": 123.02702702702703, "grad_norm": 0.78515625, "learning_rate": 1.7668749065388385e-05, "loss": 0.0973, "step": 4552 }, { "epoch": 123.05405405405405, "grad_norm": 0.9765625, "learning_rate": 1.76580688767785e-05, "loss": 0.0833, "step": 4553 }, { "epoch": 123.08108108108108, "grad_norm": 1.1640625, "learning_rate": 1.764739015438947e-05, "loss": 0.1262, "step": 4554 }, { "epoch": 123.10810810810811, "grad_norm": 1.0859375, "learning_rate": 1.7636712900353905e-05, "loss": 0.1316, "step": 4555 }, { "epoch": 123.13513513513513, "grad_norm": 0.66796875, "learning_rate": 1.7626037116804087e-05, "loss": 0.0802, "step": 4556 }, { "epoch": 123.16216216216216, "grad_norm": 1.234375, "learning_rate": 1.7615362805872032e-05, "loss": 0.1717, "step": 4557 }, { "epoch": 123.1891891891892, "grad_norm": 0.51953125, "learning_rate": 1.760468996968947e-05, "loss": 0.0728, "step": 4558 }, { "epoch": 123.21621621621621, "grad_norm": 1.25, "learning_rate": 1.7594018610387795e-05, "loss": 0.134, "step": 4559 }, { "epoch": 123.24324324324324, "grad_norm": 1.359375, "learning_rate": 1.7583348730098144e-05, "loss": 0.1461, "step": 4560 }, { "epoch": 123.27027027027027, "grad_norm": 1.296875, "learning_rate": 1.757268033095136e-05, "loss": 0.1895, "step": 4561 }, { "epoch": 123.29729729729729, "grad_norm": 1.140625, "learning_rate": 1.7562013415077943e-05, "loss": 0.1124, "step": 4562 }, { "epoch": 123.32432432432432, "grad_norm": 0.65234375, "learning_rate": 1.755134798460816e-05, "loss": 0.0831, "step": 4563 }, { "epoch": 123.35135135135135, "grad_norm": 0.51953125, "learning_rate": 1.7540684041671936e-05, "loss": 0.0691, "step": 4564 }, { "epoch": 123.37837837837837, "grad_norm": 1.0, "learning_rate": 1.753002158839892e-05, "loss": 0.1176, "step": 4565 }, { "epoch": 123.4054054054054, "grad_norm": 0.9140625, "learning_rate": 1.7519360626918464e-05, "loss": 0.1081, "step": 4566 }, { "epoch": 123.43243243243244, "grad_norm": 0.74609375, "learning_rate": 1.7508701159359596e-05, "loss": 0.0854, "step": 4567 }, { "epoch": 123.45945945945945, "grad_norm": 0.92578125, "learning_rate": 1.7498043187851083e-05, "loss": 0.1079, "step": 4568 }, { "epoch": 123.48648648648648, "grad_norm": 0.8828125, "learning_rate": 1.7487386714521378e-05, "loss": 0.1064, "step": 4569 }, { "epoch": 123.51351351351352, "grad_norm": 1.0, "learning_rate": 1.747673174149862e-05, "loss": 0.084, "step": 4570 }, { "epoch": 123.54054054054055, "grad_norm": 1.1328125, "learning_rate": 1.746607827091067e-05, "loss": 0.1425, "step": 4571 }, { "epoch": 123.56756756756756, "grad_norm": 1.2578125, "learning_rate": 1.7455426304885078e-05, "loss": 0.1722, "step": 4572 }, { "epoch": 123.5945945945946, "grad_norm": 1.390625, "learning_rate": 1.7444775845549092e-05, "loss": 0.1828, "step": 4573 }, { "epoch": 123.62162162162163, "grad_norm": 1.21875, "learning_rate": 1.7434126895029667e-05, "loss": 0.1495, "step": 4574 }, { "epoch": 123.64864864864865, "grad_norm": 1.1015625, "learning_rate": 1.742347945545346e-05, "loss": 0.1056, "step": 4575 }, { "epoch": 123.67567567567568, "grad_norm": 0.640625, "learning_rate": 1.7412833528946797e-05, "loss": 0.0686, "step": 4576 }, { "epoch": 123.70270270270271, "grad_norm": 0.80078125, "learning_rate": 1.740218911763575e-05, "loss": 0.0875, "step": 4577 }, { "epoch": 123.72972972972973, "grad_norm": 1.15625, "learning_rate": 1.739154622364604e-05, "loss": 0.1475, "step": 4578 }, { "epoch": 123.75675675675676, "grad_norm": 0.859375, "learning_rate": 1.738090484910311e-05, "loss": 0.092, "step": 4579 }, { "epoch": 123.78378378378379, "grad_norm": 0.94921875, "learning_rate": 1.7370264996132112e-05, "loss": 0.0708, "step": 4580 }, { "epoch": 123.8108108108108, "grad_norm": 0.875, "learning_rate": 1.735962666685786e-05, "loss": 0.1063, "step": 4581 }, { "epoch": 123.83783783783784, "grad_norm": 0.91796875, "learning_rate": 1.7348989863404887e-05, "loss": 0.1182, "step": 4582 }, { "epoch": 123.86486486486487, "grad_norm": 1.0546875, "learning_rate": 1.7338354587897434e-05, "loss": 0.1167, "step": 4583 }, { "epoch": 123.89189189189189, "grad_norm": 0.62890625, "learning_rate": 1.7327720842459394e-05, "loss": 0.0712, "step": 4584 }, { "epoch": 123.91891891891892, "grad_norm": 1.0859375, "learning_rate": 1.7317088629214394e-05, "loss": 0.1165, "step": 4585 }, { "epoch": 123.94594594594595, "grad_norm": 1.1640625, "learning_rate": 1.7306457950285745e-05, "loss": 0.1327, "step": 4586 }, { "epoch": 123.97297297297297, "grad_norm": 0.875, "learning_rate": 1.7295828807796434e-05, "loss": 0.0997, "step": 4587 }, { "epoch": 124.0, "grad_norm": 0.8515625, "learning_rate": 1.7285201203869163e-05, "loss": 0.0923, "step": 4588 }, { "epoch": 124.02702702702703, "grad_norm": 1.1484375, "learning_rate": 1.7274575140626318e-05, "loss": 0.1323, "step": 4589 }, { "epoch": 124.05405405405405, "grad_norm": 0.46484375, "learning_rate": 1.7263950620189974e-05, "loss": 0.0638, "step": 4590 }, { "epoch": 124.08108108108108, "grad_norm": 1.0078125, "learning_rate": 1.7253327644681906e-05, "loss": 0.1177, "step": 4591 }, { "epoch": 124.10810810810811, "grad_norm": 0.90234375, "learning_rate": 1.724270621622357e-05, "loss": 0.078, "step": 4592 }, { "epoch": 124.13513513513513, "grad_norm": 0.62890625, "learning_rate": 1.7232086336936126e-05, "loss": 0.0713, "step": 4593 }, { "epoch": 124.16216216216216, "grad_norm": 0.62109375, "learning_rate": 1.7221468008940416e-05, "loss": 0.0689, "step": 4594 }, { "epoch": 124.1891891891892, "grad_norm": 0.76171875, "learning_rate": 1.7210851234356975e-05, "loss": 0.0847, "step": 4595 }, { "epoch": 124.21621621621621, "grad_norm": 0.97265625, "learning_rate": 1.7200236015306018e-05, "loss": 0.1084, "step": 4596 }, { "epoch": 124.24324324324324, "grad_norm": 0.9140625, "learning_rate": 1.7189622353907474e-05, "loss": 0.134, "step": 4597 }, { "epoch": 124.27027027027027, "grad_norm": 1.171875, "learning_rate": 1.7179010252280923e-05, "loss": 0.1348, "step": 4598 }, { "epoch": 124.29729729729729, "grad_norm": 1.28125, "learning_rate": 1.7168399712545674e-05, "loss": 0.1352, "step": 4599 }, { "epoch": 124.32432432432432, "grad_norm": 0.95703125, "learning_rate": 1.7157790736820706e-05, "loss": 0.0886, "step": 4600 }, { "epoch": 124.35135135135135, "grad_norm": 1.03125, "learning_rate": 1.7147183327224668e-05, "loss": 0.1037, "step": 4601 }, { "epoch": 124.37837837837837, "grad_norm": 0.96875, "learning_rate": 1.7136577485875925e-05, "loss": 0.1078, "step": 4602 }, { "epoch": 124.4054054054054, "grad_norm": 1.109375, "learning_rate": 1.7125973214892528e-05, "loss": 0.1762, "step": 4603 }, { "epoch": 124.43243243243244, "grad_norm": 0.875, "learning_rate": 1.711537051639218e-05, "loss": 0.1075, "step": 4604 }, { "epoch": 124.45945945945945, "grad_norm": 1.1640625, "learning_rate": 1.7104769392492307e-05, "loss": 0.1684, "step": 4605 }, { "epoch": 124.48648648648648, "grad_norm": 1.5546875, "learning_rate": 1.7094169845310016e-05, "loss": 0.2189, "step": 4606 }, { "epoch": 124.51351351351352, "grad_norm": 0.99609375, "learning_rate": 1.7083571876962068e-05, "loss": 0.0865, "step": 4607 }, { "epoch": 124.54054054054055, "grad_norm": 0.9140625, "learning_rate": 1.7072975489564958e-05, "loss": 0.1026, "step": 4608 }, { "epoch": 124.56756756756756, "grad_norm": 1.3125, "learning_rate": 1.7062380685234807e-05, "loss": 0.1225, "step": 4609 }, { "epoch": 124.5945945945946, "grad_norm": 1.0859375, "learning_rate": 1.7051787466087478e-05, "loss": 0.1336, "step": 4610 }, { "epoch": 124.62162162162163, "grad_norm": 1.140625, "learning_rate": 1.704119583423848e-05, "loss": 0.1414, "step": 4611 }, { "epoch": 124.64864864864865, "grad_norm": 1.0078125, "learning_rate": 1.7030605791803014e-05, "loss": 0.1143, "step": 4612 }, { "epoch": 124.67567567567568, "grad_norm": 1.1796875, "learning_rate": 1.7020017340895965e-05, "loss": 0.1047, "step": 4613 }, { "epoch": 124.70270270270271, "grad_norm": 0.9453125, "learning_rate": 1.700943048363191e-05, "loss": 0.0969, "step": 4614 }, { "epoch": 124.72972972972973, "grad_norm": 1.484375, "learning_rate": 1.699884522212508e-05, "loss": 0.1541, "step": 4615 }, { "epoch": 124.75675675675676, "grad_norm": 0.984375, "learning_rate": 1.6988261558489417e-05, "loss": 0.1133, "step": 4616 }, { "epoch": 124.78378378378379, "grad_norm": 0.6171875, "learning_rate": 1.697767949483854e-05, "loss": 0.071, "step": 4617 }, { "epoch": 124.8108108108108, "grad_norm": 0.77734375, "learning_rate": 1.696709903328572e-05, "loss": 0.0951, "step": 4618 }, { "epoch": 124.83783783783784, "grad_norm": 1.2734375, "learning_rate": 1.695652017594394e-05, "loss": 0.2156, "step": 4619 }, { "epoch": 124.86486486486487, "grad_norm": 0.6328125, "learning_rate": 1.6945942924925868e-05, "loss": 0.0648, "step": 4620 }, { "epoch": 124.89189189189189, "grad_norm": 0.8984375, "learning_rate": 1.6935367282343807e-05, "loss": 0.082, "step": 4621 }, { "epoch": 124.91891891891892, "grad_norm": 0.73828125, "learning_rate": 1.6924793250309785e-05, "loss": 0.0875, "step": 4622 }, { "epoch": 124.94594594594595, "grad_norm": 0.6953125, "learning_rate": 1.6914220830935472e-05, "loss": 0.0857, "step": 4623 }, { "epoch": 124.97297297297297, "grad_norm": 0.765625, "learning_rate": 1.6903650026332247e-05, "loss": 0.1154, "step": 4624 }, { "epoch": 125.0, "grad_norm": 0.890625, "learning_rate": 1.689308083861116e-05, "loss": 0.0883, "step": 4625 }, { "epoch": 125.02702702702703, "grad_norm": 1.0234375, "learning_rate": 1.6882513269882917e-05, "loss": 0.144, "step": 4626 }, { "epoch": 125.05405405405405, "grad_norm": 1.03125, "learning_rate": 1.6871947322257915e-05, "loss": 0.1132, "step": 4627 }, { "epoch": 125.08108108108108, "grad_norm": 0.7109375, "learning_rate": 1.6861382997846244e-05, "loss": 0.0658, "step": 4628 }, { "epoch": 125.10810810810811, "grad_norm": 0.95703125, "learning_rate": 1.685082029875763e-05, "loss": 0.0822, "step": 4629 }, { "epoch": 125.13513513513513, "grad_norm": 0.703125, "learning_rate": 1.6840259227101517e-05, "loss": 0.0802, "step": 4630 }, { "epoch": 125.16216216216216, "grad_norm": 0.77734375, "learning_rate": 1.6829699784986994e-05, "loss": 0.094, "step": 4631 }, { "epoch": 125.1891891891892, "grad_norm": 0.75390625, "learning_rate": 1.681914197452284e-05, "loss": 0.0785, "step": 4632 }, { "epoch": 125.21621621621621, "grad_norm": 0.98046875, "learning_rate": 1.6808585797817493e-05, "loss": 0.1296, "step": 4633 }, { "epoch": 125.24324324324324, "grad_norm": 1.125, "learning_rate": 1.6798031256979095e-05, "loss": 0.1456, "step": 4634 }, { "epoch": 125.27027027027027, "grad_norm": 0.890625, "learning_rate": 1.678747835411542e-05, "loss": 0.0968, "step": 4635 }, { "epoch": 125.29729729729729, "grad_norm": 0.8125, "learning_rate": 1.677692709133396e-05, "loss": 0.0846, "step": 4636 }, { "epoch": 125.32432432432432, "grad_norm": 1.2265625, "learning_rate": 1.676637747074183e-05, "loss": 0.1103, "step": 4637 }, { "epoch": 125.35135135135135, "grad_norm": 1.046875, "learning_rate": 1.6755829494445846e-05, "loss": 0.104, "step": 4638 }, { "epoch": 125.37837837837837, "grad_norm": 1.1796875, "learning_rate": 1.6745283164552515e-05, "loss": 0.1922, "step": 4639 }, { "epoch": 125.4054054054054, "grad_norm": 0.8828125, "learning_rate": 1.673473848316797e-05, "loss": 0.0984, "step": 4640 }, { "epoch": 125.43243243243244, "grad_norm": 0.953125, "learning_rate": 1.6724195452398045e-05, "loss": 0.1444, "step": 4641 }, { "epoch": 125.45945945945945, "grad_norm": 0.734375, "learning_rate": 1.6713654074348246e-05, "loss": 0.0682, "step": 4642 }, { "epoch": 125.48648648648648, "grad_norm": 0.62109375, "learning_rate": 1.6703114351123723e-05, "loss": 0.0735, "step": 4643 }, { "epoch": 125.51351351351352, "grad_norm": 0.76171875, "learning_rate": 1.6692576284829314e-05, "loss": 0.089, "step": 4644 }, { "epoch": 125.54054054054055, "grad_norm": 1.609375, "learning_rate": 1.6682039877569543e-05, "loss": 0.2161, "step": 4645 }, { "epoch": 125.56756756756756, "grad_norm": 0.875, "learning_rate": 1.667150513144856e-05, "loss": 0.1074, "step": 4646 }, { "epoch": 125.5945945945946, "grad_norm": 1.1015625, "learning_rate": 1.6660972048570226e-05, "loss": 0.1171, "step": 4647 }, { "epoch": 125.62162162162163, "grad_norm": 1.0625, "learning_rate": 1.665044063103804e-05, "loss": 0.1139, "step": 4648 }, { "epoch": 125.64864864864865, "grad_norm": 1.171875, "learning_rate": 1.6639910880955184e-05, "loss": 0.1017, "step": 4649 }, { "epoch": 125.67567567567568, "grad_norm": 0.97265625, "learning_rate": 1.6629382800424496e-05, "loss": 0.1438, "step": 4650 }, { "epoch": 125.70270270270271, "grad_norm": 0.96875, "learning_rate": 1.6618856391548502e-05, "loss": 0.1069, "step": 4651 }, { "epoch": 125.72972972972973, "grad_norm": 1.0625, "learning_rate": 1.660833165642936e-05, "loss": 0.1206, "step": 4652 }, { "epoch": 125.75675675675676, "grad_norm": 0.640625, "learning_rate": 1.659780859716893e-05, "loss": 0.0711, "step": 4653 }, { "epoch": 125.78378378378379, "grad_norm": 1.0703125, "learning_rate": 1.6587287215868705e-05, "loss": 0.1183, "step": 4654 }, { "epoch": 125.8108108108108, "grad_norm": 1.1015625, "learning_rate": 1.6576767514629866e-05, "loss": 0.1465, "step": 4655 }, { "epoch": 125.83783783783784, "grad_norm": 0.88671875, "learning_rate": 1.6566249495553256e-05, "loss": 0.0831, "step": 4656 }, { "epoch": 125.86486486486487, "grad_norm": 1.5703125, "learning_rate": 1.6555733160739367e-05, "loss": 0.2733, "step": 4657 }, { "epoch": 125.89189189189189, "grad_norm": 0.76953125, "learning_rate": 1.6545218512288364e-05, "loss": 0.0947, "step": 4658 }, { "epoch": 125.91891891891892, "grad_norm": 0.498046875, "learning_rate": 1.653470555230009e-05, "loss": 0.0607, "step": 4659 }, { "epoch": 125.94594594594595, "grad_norm": 1.7265625, "learning_rate": 1.652419428287402e-05, "loss": 0.2035, "step": 4660 }, { "epoch": 125.97297297297297, "grad_norm": 1.2421875, "learning_rate": 1.651368470610931e-05, "loss": 0.1711, "step": 4661 }, { "epoch": 126.0, "grad_norm": 1.0234375, "learning_rate": 1.6503176824104795e-05, "loss": 0.138, "step": 4662 }, { "epoch": 126.02702702702703, "grad_norm": 0.7109375, "learning_rate": 1.6492670638958924e-05, "loss": 0.0713, "step": 4663 }, { "epoch": 126.05405405405405, "grad_norm": 0.66796875, "learning_rate": 1.6482166152769853e-05, "loss": 0.0862, "step": 4664 }, { "epoch": 126.08108108108108, "grad_norm": 1.15625, "learning_rate": 1.647166336763538e-05, "loss": 0.1368, "step": 4665 }, { "epoch": 126.10810810810811, "grad_norm": 1.015625, "learning_rate": 1.6461162285652954e-05, "loss": 0.1049, "step": 4666 }, { "epoch": 126.13513513513513, "grad_norm": 1.203125, "learning_rate": 1.6450662908919713e-05, "loss": 0.1483, "step": 4667 }, { "epoch": 126.16216216216216, "grad_norm": 1.4453125, "learning_rate": 1.6440165239532413e-05, "loss": 0.2216, "step": 4668 }, { "epoch": 126.1891891891892, "grad_norm": 1.1171875, "learning_rate": 1.642966927958751e-05, "loss": 0.1426, "step": 4669 }, { "epoch": 126.21621621621621, "grad_norm": 0.72265625, "learning_rate": 1.641917503118109e-05, "loss": 0.0633, "step": 4670 }, { "epoch": 126.24324324324324, "grad_norm": 1.1796875, "learning_rate": 1.6408682496408915e-05, "loss": 0.1374, "step": 4671 }, { "epoch": 126.27027027027027, "grad_norm": 0.75, "learning_rate": 1.6398191677366387e-05, "loss": 0.0913, "step": 4672 }, { "epoch": 126.29729729729729, "grad_norm": 1.0546875, "learning_rate": 1.6387702576148593e-05, "loss": 0.1073, "step": 4673 }, { "epoch": 126.32432432432432, "grad_norm": 1.109375, "learning_rate": 1.6377215194850237e-05, "loss": 0.1226, "step": 4674 }, { "epoch": 126.35135135135135, "grad_norm": 0.6484375, "learning_rate": 1.636672953556572e-05, "loss": 0.0835, "step": 4675 }, { "epoch": 126.37837837837837, "grad_norm": 1.03125, "learning_rate": 1.6356245600389077e-05, "loss": 0.0893, "step": 4676 }, { "epoch": 126.4054054054054, "grad_norm": 0.71484375, "learning_rate": 1.6345763391413993e-05, "loss": 0.0847, "step": 4677 }, { "epoch": 126.43243243243244, "grad_norm": 1.09375, "learning_rate": 1.633528291073383e-05, "loss": 0.1201, "step": 4678 }, { "epoch": 126.45945945945945, "grad_norm": 0.80859375, "learning_rate": 1.6324804160441597e-05, "loss": 0.0926, "step": 4679 }, { "epoch": 126.48648648648648, "grad_norm": 0.9375, "learning_rate": 1.631432714262994e-05, "loss": 0.1144, "step": 4680 }, { "epoch": 126.51351351351352, "grad_norm": 1.125, "learning_rate": 1.630385185939118e-05, "loss": 0.091, "step": 4681 }, { "epoch": 126.54054054054055, "grad_norm": 1.0625, "learning_rate": 1.6293378312817286e-05, "loss": 0.1035, "step": 4682 }, { "epoch": 126.56756756756756, "grad_norm": 1.65625, "learning_rate": 1.628290650499987e-05, "loss": 0.2456, "step": 4683 }, { "epoch": 126.5945945945946, "grad_norm": 0.9140625, "learning_rate": 1.627243643803022e-05, "loss": 0.0875, "step": 4684 }, { "epoch": 126.62162162162163, "grad_norm": 0.6953125, "learning_rate": 1.6261968113999244e-05, "loss": 0.0832, "step": 4685 }, { "epoch": 126.64864864864865, "grad_norm": 1.1328125, "learning_rate": 1.625150153499753e-05, "loss": 0.1459, "step": 4686 }, { "epoch": 126.67567567567568, "grad_norm": 0.87890625, "learning_rate": 1.6241036703115305e-05, "loss": 0.0835, "step": 4687 }, { "epoch": 126.70270270270271, "grad_norm": 1.4140625, "learning_rate": 1.6230573620442445e-05, "loss": 0.1743, "step": 4688 }, { "epoch": 126.72972972972973, "grad_norm": 0.6171875, "learning_rate": 1.6220112289068483e-05, "loss": 0.0736, "step": 4689 }, { "epoch": 126.75675675675676, "grad_norm": 0.76171875, "learning_rate": 1.6209652711082608e-05, "loss": 0.0745, "step": 4690 }, { "epoch": 126.78378378378379, "grad_norm": 0.85546875, "learning_rate": 1.6199194888573638e-05, "loss": 0.1011, "step": 4691 }, { "epoch": 126.8108108108108, "grad_norm": 1.2890625, "learning_rate": 1.6188738823630057e-05, "loss": 0.1616, "step": 4692 }, { "epoch": 126.83783783783784, "grad_norm": 0.71875, "learning_rate": 1.6178284518340003e-05, "loss": 0.089, "step": 4693 }, { "epoch": 126.86486486486487, "grad_norm": 0.95703125, "learning_rate": 1.6167831974791238e-05, "loss": 0.1119, "step": 4694 }, { "epoch": 126.89189189189189, "grad_norm": 0.9765625, "learning_rate": 1.6157381195071197e-05, "loss": 0.1197, "step": 4695 }, { "epoch": 126.91891891891892, "grad_norm": 1.359375, "learning_rate": 1.6146932181266962e-05, "loss": 0.1699, "step": 4696 }, { "epoch": 126.94594594594595, "grad_norm": 1.109375, "learning_rate": 1.6136484935465233e-05, "loss": 0.1229, "step": 4697 }, { "epoch": 126.97297297297297, "grad_norm": 0.6953125, "learning_rate": 1.6126039459752405e-05, "loss": 0.0748, "step": 4698 }, { "epoch": 127.0, "grad_norm": 1.0859375, "learning_rate": 1.6115595756214466e-05, "loss": 0.1395, "step": 4699 }, { "epoch": 127.02702702702703, "grad_norm": 1.25, "learning_rate": 1.6105153826937085e-05, "loss": 0.1455, "step": 4700 }, { "epoch": 127.05405405405405, "grad_norm": 1.3828125, "learning_rate": 1.609471367400559e-05, "loss": 0.1916, "step": 4701 }, { "epoch": 127.08108108108108, "grad_norm": 0.828125, "learning_rate": 1.6084275299504893e-05, "loss": 0.1025, "step": 4702 }, { "epoch": 127.10810810810811, "grad_norm": 0.8515625, "learning_rate": 1.6073838705519616e-05, "loss": 0.0989, "step": 4703 }, { "epoch": 127.13513513513513, "grad_norm": 0.63671875, "learning_rate": 1.6063403894134008e-05, "loss": 0.0728, "step": 4704 }, { "epoch": 127.16216216216216, "grad_norm": 0.8671875, "learning_rate": 1.6052970867431928e-05, "loss": 0.1001, "step": 4705 }, { "epoch": 127.1891891891892, "grad_norm": 0.49609375, "learning_rate": 1.6042539627496923e-05, "loss": 0.07, "step": 4706 }, { "epoch": 127.21621621621621, "grad_norm": 0.859375, "learning_rate": 1.6032110176412154e-05, "loss": 0.1041, "step": 4707 }, { "epoch": 127.24324324324324, "grad_norm": 0.69921875, "learning_rate": 1.6021682516260446e-05, "loss": 0.087, "step": 4708 }, { "epoch": 127.27027027027027, "grad_norm": 1.1796875, "learning_rate": 1.6011256649124238e-05, "loss": 0.1222, "step": 4709 }, { "epoch": 127.29729729729729, "grad_norm": 1.0078125, "learning_rate": 1.600083257708565e-05, "loss": 0.1408, "step": 4710 }, { "epoch": 127.32432432432432, "grad_norm": 0.67578125, "learning_rate": 1.5990410302226407e-05, "loss": 0.0842, "step": 4711 }, { "epoch": 127.35135135135135, "grad_norm": 0.9921875, "learning_rate": 1.59799898266279e-05, "loss": 0.0961, "step": 4712 }, { "epoch": 127.37837837837837, "grad_norm": 0.984375, "learning_rate": 1.596957115237114e-05, "loss": 0.1172, "step": 4713 }, { "epoch": 127.4054054054054, "grad_norm": 0.76953125, "learning_rate": 1.595915428153679e-05, "loss": 0.0765, "step": 4714 }, { "epoch": 127.43243243243244, "grad_norm": 0.8515625, "learning_rate": 1.594873921620517e-05, "loss": 0.1054, "step": 4715 }, { "epoch": 127.45945945945945, "grad_norm": 1.1171875, "learning_rate": 1.5938325958456196e-05, "loss": 0.1207, "step": 4716 }, { "epoch": 127.48648648648648, "grad_norm": 1.03125, "learning_rate": 1.592791451036946e-05, "loss": 0.1024, "step": 4717 }, { "epoch": 127.51351351351352, "grad_norm": 0.99609375, "learning_rate": 1.5917504874024193e-05, "loss": 0.1192, "step": 4718 }, { "epoch": 127.54054054054055, "grad_norm": 0.68359375, "learning_rate": 1.590709705149923e-05, "loss": 0.0815, "step": 4719 }, { "epoch": 127.56756756756756, "grad_norm": 1.3515625, "learning_rate": 1.5896691044873073e-05, "loss": 0.1509, "step": 4720 }, { "epoch": 127.5945945945946, "grad_norm": 0.70703125, "learning_rate": 1.5886286856223874e-05, "loss": 0.0907, "step": 4721 }, { "epoch": 127.62162162162163, "grad_norm": 0.78125, "learning_rate": 1.587588448762937e-05, "loss": 0.0859, "step": 4722 }, { "epoch": 127.64864864864865, "grad_norm": 1.0703125, "learning_rate": 1.586548394116699e-05, "loss": 0.1436, "step": 4723 }, { "epoch": 127.67567567567568, "grad_norm": 0.455078125, "learning_rate": 1.585508521891377e-05, "loss": 0.0616, "step": 4724 }, { "epoch": 127.70270270270271, "grad_norm": 0.6796875, "learning_rate": 1.584468832294638e-05, "loss": 0.075, "step": 4725 }, { "epoch": 127.72972972972973, "grad_norm": 0.7265625, "learning_rate": 1.5834293255341144e-05, "loss": 0.0683, "step": 4726 }, { "epoch": 127.75675675675676, "grad_norm": 1.5625, "learning_rate": 1.5823900018174003e-05, "loss": 0.1443, "step": 4727 }, { "epoch": 127.78378378378379, "grad_norm": 0.8671875, "learning_rate": 1.5813508613520538e-05, "loss": 0.1095, "step": 4728 }, { "epoch": 127.8108108108108, "grad_norm": 0.78125, "learning_rate": 1.580311904345597e-05, "loss": 0.0889, "step": 4729 }, { "epoch": 127.83783783783784, "grad_norm": 0.60546875, "learning_rate": 1.579273131005515e-05, "loss": 0.0681, "step": 4730 }, { "epoch": 127.86486486486487, "grad_norm": 1.2109375, "learning_rate": 1.5782345415392553e-05, "loss": 0.1826, "step": 4731 }, { "epoch": 127.89189189189189, "grad_norm": 0.765625, "learning_rate": 1.5771961361542303e-05, "loss": 0.0779, "step": 4732 }, { "epoch": 127.91891891891892, "grad_norm": 1.59375, "learning_rate": 1.576157915057814e-05, "loss": 0.2639, "step": 4733 }, { "epoch": 127.94594594594595, "grad_norm": 0.88671875, "learning_rate": 1.5751198784573446e-05, "loss": 0.0834, "step": 4734 }, { "epoch": 127.97297297297297, "grad_norm": 1.015625, "learning_rate": 1.5740820265601246e-05, "loss": 0.1078, "step": 4735 }, { "epoch": 128.0, "grad_norm": 0.578125, "learning_rate": 1.573044359573416e-05, "loss": 0.0725, "step": 4736 }, { "epoch": 128.02702702702703, "grad_norm": 1.09375, "learning_rate": 1.5720068777044476e-05, "loss": 0.1211, "step": 4737 }, { "epoch": 128.05405405405406, "grad_norm": 1.1328125, "learning_rate": 1.570969581160411e-05, "loss": 0.1397, "step": 4738 }, { "epoch": 128.0810810810811, "grad_norm": 0.92578125, "learning_rate": 1.569932470148457e-05, "loss": 0.0958, "step": 4739 }, { "epoch": 128.1081081081081, "grad_norm": 0.90234375, "learning_rate": 1.5688955448757024e-05, "loss": 0.0878, "step": 4740 }, { "epoch": 128.13513513513513, "grad_norm": 1.046875, "learning_rate": 1.567858805549229e-05, "loss": 0.1002, "step": 4741 }, { "epoch": 128.16216216216216, "grad_norm": 0.5234375, "learning_rate": 1.5668222523760753e-05, "loss": 0.0685, "step": 4742 }, { "epoch": 128.1891891891892, "grad_norm": 0.78515625, "learning_rate": 1.5657858855632493e-05, "loss": 0.0746, "step": 4743 }, { "epoch": 128.21621621621622, "grad_norm": 1.3125, "learning_rate": 1.5647497053177163e-05, "loss": 0.1906, "step": 4744 }, { "epoch": 128.24324324324326, "grad_norm": 0.9765625, "learning_rate": 1.563713711846408e-05, "loss": 0.1083, "step": 4745 }, { "epoch": 128.27027027027026, "grad_norm": 1.03125, "learning_rate": 1.5626779053562176e-05, "loss": 0.1255, "step": 4746 }, { "epoch": 128.2972972972973, "grad_norm": 0.7734375, "learning_rate": 1.5616422860540008e-05, "loss": 0.0873, "step": 4747 }, { "epoch": 128.32432432432432, "grad_norm": 0.58984375, "learning_rate": 1.5606068541465753e-05, "loss": 0.0629, "step": 4748 }, { "epoch": 128.35135135135135, "grad_norm": 0.8359375, "learning_rate": 1.559571609840723e-05, "loss": 0.1062, "step": 4749 }, { "epoch": 128.3783783783784, "grad_norm": 0.98828125, "learning_rate": 1.558536553343187e-05, "loss": 0.1032, "step": 4750 }, { "epoch": 128.40540540540542, "grad_norm": 1.2421875, "learning_rate": 1.557501684860673e-05, "loss": 0.1023, "step": 4751 }, { "epoch": 128.43243243243242, "grad_norm": 0.89453125, "learning_rate": 1.556467004599851e-05, "loss": 0.09, "step": 4752 }, { "epoch": 128.45945945945945, "grad_norm": 0.99609375, "learning_rate": 1.5554325127673496e-05, "loss": 0.1452, "step": 4753 }, { "epoch": 128.48648648648648, "grad_norm": 0.90234375, "learning_rate": 1.5543982095697633e-05, "loss": 0.0879, "step": 4754 }, { "epoch": 128.51351351351352, "grad_norm": 0.455078125, "learning_rate": 1.553364095213649e-05, "loss": 0.059, "step": 4755 }, { "epoch": 128.54054054054055, "grad_norm": 1.1953125, "learning_rate": 1.552330169905522e-05, "loss": 0.1255, "step": 4756 }, { "epoch": 128.56756756756758, "grad_norm": 0.84765625, "learning_rate": 1.551296433851864e-05, "loss": 0.0834, "step": 4757 }, { "epoch": 128.59459459459458, "grad_norm": 0.55078125, "learning_rate": 1.5502628872591173e-05, "loss": 0.0671, "step": 4758 }, { "epoch": 128.6216216216216, "grad_norm": 1.1953125, "learning_rate": 1.5492295303336858e-05, "loss": 0.164, "step": 4759 }, { "epoch": 128.64864864864865, "grad_norm": 1.34375, "learning_rate": 1.548196363281937e-05, "loss": 0.1368, "step": 4760 }, { "epoch": 128.67567567567568, "grad_norm": 0.97265625, "learning_rate": 1.5471633863101982e-05, "loss": 0.1179, "step": 4761 }, { "epoch": 128.7027027027027, "grad_norm": 1.3359375, "learning_rate": 1.5461305996247604e-05, "loss": 0.1845, "step": 4762 }, { "epoch": 128.72972972972974, "grad_norm": 0.92578125, "learning_rate": 1.5450980034318784e-05, "loss": 0.1193, "step": 4763 }, { "epoch": 128.75675675675674, "grad_norm": 1.03125, "learning_rate": 1.5440655979377644e-05, "loss": 0.0998, "step": 4764 }, { "epoch": 128.78378378378378, "grad_norm": 0.62890625, "learning_rate": 1.543033383348596e-05, "loss": 0.0775, "step": 4765 }, { "epoch": 128.8108108108108, "grad_norm": 0.9375, "learning_rate": 1.542001359870512e-05, "loss": 0.1112, "step": 4766 }, { "epoch": 128.83783783783784, "grad_norm": 0.8125, "learning_rate": 1.5409695277096124e-05, "loss": 0.0804, "step": 4767 }, { "epoch": 128.86486486486487, "grad_norm": 1.1484375, "learning_rate": 1.539937887071959e-05, "loss": 0.1434, "step": 4768 }, { "epoch": 128.8918918918919, "grad_norm": 1.1171875, "learning_rate": 1.5389064381635766e-05, "loss": 0.1229, "step": 4769 }, { "epoch": 128.9189189189189, "grad_norm": 1.578125, "learning_rate": 1.537875181190449e-05, "loss": 0.2098, "step": 4770 }, { "epoch": 128.94594594594594, "grad_norm": 0.60546875, "learning_rate": 1.536844116358525e-05, "loss": 0.077, "step": 4771 }, { "epoch": 128.97297297297297, "grad_norm": 1.6484375, "learning_rate": 1.535813243873714e-05, "loss": 0.226, "step": 4772 }, { "epoch": 129.0, "grad_norm": 0.81640625, "learning_rate": 1.534782563941884e-05, "loss": 0.0885, "step": 4773 }, { "epoch": 129.02702702702703, "grad_norm": 0.88671875, "learning_rate": 1.5337520767688703e-05, "loss": 0.0883, "step": 4774 }, { "epoch": 129.05405405405406, "grad_norm": 0.78125, "learning_rate": 1.5327217825604628e-05, "loss": 0.0702, "step": 4775 }, { "epoch": 129.0810810810811, "grad_norm": 1.1796875, "learning_rate": 1.531691681522419e-05, "loss": 0.1364, "step": 4776 }, { "epoch": 129.1081081081081, "grad_norm": 1.203125, "learning_rate": 1.530661773860455e-05, "loss": 0.1869, "step": 4777 }, { "epoch": 129.13513513513513, "grad_norm": 1.296875, "learning_rate": 1.5296320597802477e-05, "loss": 0.153, "step": 4778 }, { "epoch": 129.16216216216216, "grad_norm": 0.52734375, "learning_rate": 1.5286025394874364e-05, "loss": 0.0697, "step": 4779 }, { "epoch": 129.1891891891892, "grad_norm": 1.1484375, "learning_rate": 1.527573213187623e-05, "loss": 0.1372, "step": 4780 }, { "epoch": 129.21621621621622, "grad_norm": 1.1171875, "learning_rate": 1.526544081086367e-05, "loss": 0.1243, "step": 4781 }, { "epoch": 129.24324324324326, "grad_norm": 0.69921875, "learning_rate": 1.525515143389193e-05, "loss": 0.0904, "step": 4782 }, { "epoch": 129.27027027027026, "grad_norm": 1.3046875, "learning_rate": 1.524486400301584e-05, "loss": 0.1705, "step": 4783 }, { "epoch": 129.2972972972973, "grad_norm": 0.85546875, "learning_rate": 1.5234578520289855e-05, "loss": 0.0886, "step": 4784 }, { "epoch": 129.32432432432432, "grad_norm": 1.2578125, "learning_rate": 1.5224294987768037e-05, "loss": 0.2228, "step": 4785 }, { "epoch": 129.35135135135135, "grad_norm": 0.75, "learning_rate": 1.5214013407504069e-05, "loss": 0.1169, "step": 4786 }, { "epoch": 129.3783783783784, "grad_norm": 1.0859375, "learning_rate": 1.5203733781551222e-05, "loss": 0.1243, "step": 4787 }, { "epoch": 129.40540540540542, "grad_norm": 1.234375, "learning_rate": 1.5193456111962395e-05, "loss": 0.1444, "step": 4788 }, { "epoch": 129.43243243243242, "grad_norm": 0.9375, "learning_rate": 1.5183180400790093e-05, "loss": 0.0882, "step": 4789 }, { "epoch": 129.45945945945945, "grad_norm": 1.703125, "learning_rate": 1.5172906650086416e-05, "loss": 0.1624, "step": 4790 }, { "epoch": 129.48648648648648, "grad_norm": 1.0625, "learning_rate": 1.5162634861903104e-05, "loss": 0.0937, "step": 4791 }, { "epoch": 129.51351351351352, "grad_norm": 0.65234375, "learning_rate": 1.5152365038291466e-05, "loss": 0.0708, "step": 4792 }, { "epoch": 129.54054054054055, "grad_norm": 1.1796875, "learning_rate": 1.5142097181302444e-05, "loss": 0.0861, "step": 4793 }, { "epoch": 129.56756756756758, "grad_norm": 0.7109375, "learning_rate": 1.5131831292986599e-05, "loss": 0.0854, "step": 4794 }, { "epoch": 129.59459459459458, "grad_norm": 2.015625, "learning_rate": 1.5121567375394047e-05, "loss": 0.2615, "step": 4795 }, { "epoch": 129.6216216216216, "grad_norm": 1.2421875, "learning_rate": 1.5111305430574568e-05, "loss": 0.1149, "step": 4796 }, { "epoch": 129.64864864864865, "grad_norm": 0.61328125, "learning_rate": 1.5101045460577526e-05, "loss": 0.0753, "step": 4797 }, { "epoch": 129.67567567567568, "grad_norm": 0.66796875, "learning_rate": 1.5090787467451872e-05, "loss": 0.0634, "step": 4798 }, { "epoch": 129.7027027027027, "grad_norm": 1.1875, "learning_rate": 1.5080531453246189e-05, "loss": 0.1338, "step": 4799 }, { "epoch": 129.72972972972974, "grad_norm": 1.0078125, "learning_rate": 1.5070277420008665e-05, "loss": 0.115, "step": 4800 }, { "epoch": 129.75675675675674, "grad_norm": 0.72265625, "learning_rate": 1.5060025369787065e-05, "loss": 0.0695, "step": 4801 }, { "epoch": 129.78378378378378, "grad_norm": 1.15625, "learning_rate": 1.5049775304628785e-05, "loss": 0.1358, "step": 4802 }, { "epoch": 129.8108108108108, "grad_norm": 0.88671875, "learning_rate": 1.5039527226580813e-05, "loss": 0.0874, "step": 4803 }, { "epoch": 129.83783783783784, "grad_norm": 1.03125, "learning_rate": 1.5029281137689744e-05, "loss": 0.1266, "step": 4804 }, { "epoch": 129.86486486486487, "grad_norm": 1.0703125, "learning_rate": 1.501903704000177e-05, "loss": 0.1365, "step": 4805 }, { "epoch": 129.8918918918919, "grad_norm": 1.03125, "learning_rate": 1.5008794935562693e-05, "loss": 0.1313, "step": 4806 }, { "epoch": 129.9189189189189, "grad_norm": 0.9296875, "learning_rate": 1.4998554826417905e-05, "loss": 0.0919, "step": 4807 }, { "epoch": 129.94594594594594, "grad_norm": 1.1328125, "learning_rate": 1.4988316714612421e-05, "loss": 0.1208, "step": 4808 }, { "epoch": 129.97297297297297, "grad_norm": 1.234375, "learning_rate": 1.497808060219083e-05, "loss": 0.165, "step": 4809 }, { "epoch": 130.0, "grad_norm": 0.484375, "learning_rate": 1.496784649119734e-05, "loss": 0.0662, "step": 4810 }, { "epoch": 130.02702702702703, "grad_norm": 1.1953125, "learning_rate": 1.495761438367577e-05, "loss": 0.1503, "step": 4811 }, { "epoch": 130.05405405405406, "grad_norm": 1.0078125, "learning_rate": 1.4947384281669499e-05, "loss": 0.1051, "step": 4812 }, { "epoch": 130.0810810810811, "grad_norm": 1.1171875, "learning_rate": 1.4937156187221543e-05, "loss": 0.1209, "step": 4813 }, { "epoch": 130.1081081081081, "grad_norm": 0.66796875, "learning_rate": 1.4926930102374514e-05, "loss": 0.0819, "step": 4814 }, { "epoch": 130.13513513513513, "grad_norm": 0.84375, "learning_rate": 1.491670602917059e-05, "loss": 0.0762, "step": 4815 }, { "epoch": 130.16216216216216, "grad_norm": 0.5078125, "learning_rate": 1.4906483969651585e-05, "loss": 0.0595, "step": 4816 }, { "epoch": 130.1891891891892, "grad_norm": 0.5859375, "learning_rate": 1.4896263925858903e-05, "loss": 0.0538, "step": 4817 }, { "epoch": 130.21621621621622, "grad_norm": 0.96484375, "learning_rate": 1.4886045899833518e-05, "loss": 0.0876, "step": 4818 }, { "epoch": 130.24324324324326, "grad_norm": 1.015625, "learning_rate": 1.4875829893616044e-05, "loss": 0.1409, "step": 4819 }, { "epoch": 130.27027027027026, "grad_norm": 0.98828125, "learning_rate": 1.486561590924665e-05, "loss": 0.1226, "step": 4820 }, { "epoch": 130.2972972972973, "grad_norm": 1.1015625, "learning_rate": 1.4855403948765128e-05, "loss": 0.1179, "step": 4821 }, { "epoch": 130.32432432432432, "grad_norm": 1.3046875, "learning_rate": 1.4845194014210865e-05, "loss": 0.1798, "step": 4822 }, { "epoch": 130.35135135135135, "grad_norm": 0.59375, "learning_rate": 1.4834986107622823e-05, "loss": 0.0697, "step": 4823 }, { "epoch": 130.3783783783784, "grad_norm": 0.59765625, "learning_rate": 1.4824780231039584e-05, "loss": 0.0666, "step": 4824 }, { "epoch": 130.40540540540542, "grad_norm": 0.79296875, "learning_rate": 1.4814576386499312e-05, "loss": 0.0916, "step": 4825 }, { "epoch": 130.43243243243242, "grad_norm": 0.62109375, "learning_rate": 1.4804374576039762e-05, "loss": 0.08, "step": 4826 }, { "epoch": 130.45945945945945, "grad_norm": 1.2890625, "learning_rate": 1.4794174801698286e-05, "loss": 0.1769, "step": 4827 }, { "epoch": 130.48648648648648, "grad_norm": 0.58984375, "learning_rate": 1.4783977065511839e-05, "loss": 0.0752, "step": 4828 }, { "epoch": 130.51351351351352, "grad_norm": 1.1015625, "learning_rate": 1.4773781369516951e-05, "loss": 0.103, "step": 4829 }, { "epoch": 130.54054054054055, "grad_norm": 1.15625, "learning_rate": 1.4763587715749755e-05, "loss": 0.1477, "step": 4830 }, { "epoch": 130.56756756756758, "grad_norm": 0.98046875, "learning_rate": 1.4753396106245993e-05, "loss": 0.1162, "step": 4831 }, { "epoch": 130.59459459459458, "grad_norm": 1.0234375, "learning_rate": 1.4743206543040952e-05, "loss": 0.1243, "step": 4832 }, { "epoch": 130.6216216216216, "grad_norm": 0.9609375, "learning_rate": 1.4733019028169568e-05, "loss": 0.1047, "step": 4833 }, { "epoch": 130.64864864864865, "grad_norm": 1.109375, "learning_rate": 1.4722833563666316e-05, "loss": 0.1752, "step": 4834 }, { "epoch": 130.67567567567568, "grad_norm": 0.91015625, "learning_rate": 1.4712650151565294e-05, "loss": 0.0996, "step": 4835 }, { "epoch": 130.7027027027027, "grad_norm": 1.171875, "learning_rate": 1.4702468793900188e-05, "loss": 0.1066, "step": 4836 }, { "epoch": 130.72972972972974, "grad_norm": 1.1796875, "learning_rate": 1.4692289492704252e-05, "loss": 0.1148, "step": 4837 }, { "epoch": 130.75675675675674, "grad_norm": 1.015625, "learning_rate": 1.4682112250010354e-05, "loss": 0.088, "step": 4838 }, { "epoch": 130.78378378378378, "grad_norm": 0.69140625, "learning_rate": 1.4671937067850951e-05, "loss": 0.0897, "step": 4839 }, { "epoch": 130.8108108108108, "grad_norm": 0.64453125, "learning_rate": 1.4661763948258056e-05, "loss": 0.0618, "step": 4840 }, { "epoch": 130.83783783783784, "grad_norm": 1.0234375, "learning_rate": 1.4651592893263308e-05, "loss": 0.1127, "step": 4841 }, { "epoch": 130.86486486486487, "grad_norm": 1.046875, "learning_rate": 1.4641423904897913e-05, "loss": 0.1076, "step": 4842 }, { "epoch": 130.8918918918919, "grad_norm": 1.1953125, "learning_rate": 1.463125698519267e-05, "loss": 0.1478, "step": 4843 }, { "epoch": 130.9189189189189, "grad_norm": 0.9609375, "learning_rate": 1.462109213617796e-05, "loss": 0.1114, "step": 4844 }, { "epoch": 130.94594594594594, "grad_norm": 0.7578125, "learning_rate": 1.4610929359883771e-05, "loss": 0.0957, "step": 4845 }, { "epoch": 130.97297297297297, "grad_norm": 0.65625, "learning_rate": 1.460076865833964e-05, "loss": 0.0739, "step": 4846 }, { "epoch": 131.0, "grad_norm": 1.0390625, "learning_rate": 1.4590610033574717e-05, "loss": 0.1137, "step": 4847 }, { "epoch": 131.02702702702703, "grad_norm": 0.48046875, "learning_rate": 1.4580453487617745e-05, "loss": 0.0639, "step": 4848 }, { "epoch": 131.05405405405406, "grad_norm": 1.2578125, "learning_rate": 1.4570299022497016e-05, "loss": 0.1374, "step": 4849 }, { "epoch": 131.0810810810811, "grad_norm": 0.8515625, "learning_rate": 1.4560146640240452e-05, "loss": 0.0959, "step": 4850 }, { "epoch": 131.1081081081081, "grad_norm": 1.15625, "learning_rate": 1.4549996342875507e-05, "loss": 0.1504, "step": 4851 }, { "epoch": 131.13513513513513, "grad_norm": 0.8671875, "learning_rate": 1.4539848132429265e-05, "loss": 0.1081, "step": 4852 }, { "epoch": 131.16216216216216, "grad_norm": 0.6640625, "learning_rate": 1.4529702010928381e-05, "loss": 0.0787, "step": 4853 }, { "epoch": 131.1891891891892, "grad_norm": 0.6953125, "learning_rate": 1.4519557980399067e-05, "loss": 0.0723, "step": 4854 }, { "epoch": 131.21621621621622, "grad_norm": 0.95703125, "learning_rate": 1.4509416042867149e-05, "loss": 0.109, "step": 4855 }, { "epoch": 131.24324324324326, "grad_norm": 1.4453125, "learning_rate": 1.449927620035803e-05, "loss": 0.1431, "step": 4856 }, { "epoch": 131.27027027027026, "grad_norm": 1.046875, "learning_rate": 1.4489138454896673e-05, "loss": 0.0879, "step": 4857 }, { "epoch": 131.2972972972973, "grad_norm": 1.3671875, "learning_rate": 1.4479002808507646e-05, "loss": 0.13, "step": 4858 }, { "epoch": 131.32432432432432, "grad_norm": 0.91796875, "learning_rate": 1.4468869263215095e-05, "loss": 0.124, "step": 4859 }, { "epoch": 131.35135135135135, "grad_norm": 1.125, "learning_rate": 1.4458737821042723e-05, "loss": 0.158, "step": 4860 }, { "epoch": 131.3783783783784, "grad_norm": 1.4375, "learning_rate": 1.444860848401384e-05, "loss": 0.2198, "step": 4861 }, { "epoch": 131.40540540540542, "grad_norm": 1.1875, "learning_rate": 1.4438481254151337e-05, "loss": 0.1507, "step": 4862 }, { "epoch": 131.43243243243242, "grad_norm": 0.67578125, "learning_rate": 1.442835613347765e-05, "loss": 0.0748, "step": 4863 }, { "epoch": 131.45945945945945, "grad_norm": 1.0625, "learning_rate": 1.441823312401484e-05, "loss": 0.1245, "step": 4864 }, { "epoch": 131.48648648648648, "grad_norm": 1.40625, "learning_rate": 1.4408112227784503e-05, "loss": 0.218, "step": 4865 }, { "epoch": 131.51351351351352, "grad_norm": 0.53515625, "learning_rate": 1.4397993446807844e-05, "loss": 0.0597, "step": 4866 }, { "epoch": 131.54054054054055, "grad_norm": 1.109375, "learning_rate": 1.4387876783105642e-05, "loss": 0.1479, "step": 4867 }, { "epoch": 131.56756756756758, "grad_norm": 0.6171875, "learning_rate": 1.4377762238698227e-05, "loss": 0.0739, "step": 4868 }, { "epoch": 131.59459459459458, "grad_norm": 1.078125, "learning_rate": 1.436764981560555e-05, "loss": 0.1305, "step": 4869 }, { "epoch": 131.6216216216216, "grad_norm": 1.1875, "learning_rate": 1.4357539515847084e-05, "loss": 0.1438, "step": 4870 }, { "epoch": 131.64864864864865, "grad_norm": 0.609375, "learning_rate": 1.4347431341441933e-05, "loss": 0.0633, "step": 4871 }, { "epoch": 131.67567567567568, "grad_norm": 1.140625, "learning_rate": 1.4337325294408726e-05, "loss": 0.1496, "step": 4872 }, { "epoch": 131.7027027027027, "grad_norm": 1.015625, "learning_rate": 1.432722137676571e-05, "loss": 0.1259, "step": 4873 }, { "epoch": 131.72972972972974, "grad_norm": 0.6796875, "learning_rate": 1.4317119590530692e-05, "loss": 0.0743, "step": 4874 }, { "epoch": 131.75675675675674, "grad_norm": 0.765625, "learning_rate": 1.4307019937721034e-05, "loss": 0.088, "step": 4875 }, { "epoch": 131.78378378378378, "grad_norm": 1.3359375, "learning_rate": 1.4296922420353704e-05, "loss": 0.1659, "step": 4876 }, { "epoch": 131.8108108108108, "grad_norm": 0.9296875, "learning_rate": 1.4286827040445206e-05, "loss": 0.1164, "step": 4877 }, { "epoch": 131.83783783783784, "grad_norm": 1.046875, "learning_rate": 1.4276733800011655e-05, "loss": 0.1205, "step": 4878 }, { "epoch": 131.86486486486487, "grad_norm": 1.265625, "learning_rate": 1.4266642701068725e-05, "loss": 0.1581, "step": 4879 }, { "epoch": 131.8918918918919, "grad_norm": 0.62890625, "learning_rate": 1.4256553745631646e-05, "loss": 0.07, "step": 4880 }, { "epoch": 131.9189189189189, "grad_norm": 1.2265625, "learning_rate": 1.4246466935715249e-05, "loss": 0.1307, "step": 4881 }, { "epoch": 131.94594594594594, "grad_norm": 1.0234375, "learning_rate": 1.4236382273333904e-05, "loss": 0.1101, "step": 4882 }, { "epoch": 131.97297297297297, "grad_norm": 1.140625, "learning_rate": 1.4226299760501576e-05, "loss": 0.1141, "step": 4883 }, { "epoch": 132.0, "grad_norm": 0.890625, "learning_rate": 1.4216219399231812e-05, "loss": 0.1011, "step": 4884 }, { "epoch": 132.02702702702703, "grad_norm": 1.21875, "learning_rate": 1.4206141191537682e-05, "loss": 0.1447, "step": 4885 }, { "epoch": 132.05405405405406, "grad_norm": 1.546875, "learning_rate": 1.4196065139431866e-05, "loss": 0.1729, "step": 4886 }, { "epoch": 132.0810810810811, "grad_norm": 1.203125, "learning_rate": 1.4185991244926617e-05, "loss": 0.1214, "step": 4887 }, { "epoch": 132.1081081081081, "grad_norm": 1.0703125, "learning_rate": 1.4175919510033723e-05, "loss": 0.1239, "step": 4888 }, { "epoch": 132.13513513513513, "grad_norm": 1.0078125, "learning_rate": 1.4165849936764563e-05, "loss": 0.1072, "step": 4889 }, { "epoch": 132.16216216216216, "grad_norm": 0.9609375, "learning_rate": 1.41557825271301e-05, "loss": 0.1064, "step": 4890 }, { "epoch": 132.1891891891892, "grad_norm": 0.60546875, "learning_rate": 1.4145717283140828e-05, "loss": 0.0676, "step": 4891 }, { "epoch": 132.21621621621622, "grad_norm": 1.046875, "learning_rate": 1.4135654206806831e-05, "loss": 0.125, "step": 4892 }, { "epoch": 132.24324324324326, "grad_norm": 1.3515625, "learning_rate": 1.4125593300137766e-05, "loss": 0.1419, "step": 4893 }, { "epoch": 132.27027027027026, "grad_norm": 1.0390625, "learning_rate": 1.4115534565142829e-05, "loss": 0.1028, "step": 4894 }, { "epoch": 132.2972972972973, "grad_norm": 1.5859375, "learning_rate": 1.4105478003830824e-05, "loss": 0.1465, "step": 4895 }, { "epoch": 132.32432432432432, "grad_norm": 1.125, "learning_rate": 1.4095423618210075e-05, "loss": 0.1057, "step": 4896 }, { "epoch": 132.35135135135135, "grad_norm": 1.21875, "learning_rate": 1.40853714102885e-05, "loss": 0.1219, "step": 4897 }, { "epoch": 132.3783783783784, "grad_norm": 1.046875, "learning_rate": 1.4075321382073592e-05, "loss": 0.1161, "step": 4898 }, { "epoch": 132.40540540540542, "grad_norm": 0.8125, "learning_rate": 1.4065273535572365e-05, "loss": 0.0699, "step": 4899 }, { "epoch": 132.43243243243242, "grad_norm": 0.80078125, "learning_rate": 1.4055227872791444e-05, "loss": 0.0988, "step": 4900 }, { "epoch": 132.45945945945945, "grad_norm": 0.85546875, "learning_rate": 1.4045184395737005e-05, "loss": 0.1277, "step": 4901 }, { "epoch": 132.48648648648648, "grad_norm": 0.9765625, "learning_rate": 1.4035143106414755e-05, "loss": 0.1171, "step": 4902 }, { "epoch": 132.51351351351352, "grad_norm": 1.0234375, "learning_rate": 1.402510400683001e-05, "loss": 0.0916, "step": 4903 }, { "epoch": 132.54054054054055, "grad_norm": 0.73046875, "learning_rate": 1.4015067098987633e-05, "loss": 0.0873, "step": 4904 }, { "epoch": 132.56756756756758, "grad_norm": 1.328125, "learning_rate": 1.4005032384892029e-05, "loss": 0.1894, "step": 4905 }, { "epoch": 132.59459459459458, "grad_norm": 1.171875, "learning_rate": 1.3994999866547199e-05, "loss": 0.1579, "step": 4906 }, { "epoch": 132.6216216216216, "grad_norm": 1.3515625, "learning_rate": 1.3984969545956666e-05, "loss": 0.1807, "step": 4907 }, { "epoch": 132.64864864864865, "grad_norm": 0.91796875, "learning_rate": 1.397494142512355e-05, "loss": 0.1019, "step": 4908 }, { "epoch": 132.67567567567568, "grad_norm": 0.63671875, "learning_rate": 1.3964915506050524e-05, "loss": 0.07, "step": 4909 }, { "epoch": 132.7027027027027, "grad_norm": 0.99609375, "learning_rate": 1.39548917907398e-05, "loss": 0.1277, "step": 4910 }, { "epoch": 132.72972972972974, "grad_norm": 1.484375, "learning_rate": 1.3944870281193179e-05, "loss": 0.1849, "step": 4911 }, { "epoch": 132.75675675675674, "grad_norm": 0.94921875, "learning_rate": 1.393485097941199e-05, "loss": 0.1289, "step": 4912 }, { "epoch": 132.78378378378378, "grad_norm": 1.0234375, "learning_rate": 1.3924833887397153e-05, "loss": 0.1023, "step": 4913 }, { "epoch": 132.8108108108108, "grad_norm": 1.0078125, "learning_rate": 1.3914819007149124e-05, "loss": 0.1233, "step": 4914 }, { "epoch": 132.83783783783784, "grad_norm": 0.73046875, "learning_rate": 1.3904806340667936e-05, "loss": 0.094, "step": 4915 }, { "epoch": 132.86486486486487, "grad_norm": 0.85546875, "learning_rate": 1.3894795889953153e-05, "loss": 0.1106, "step": 4916 }, { "epoch": 132.8918918918919, "grad_norm": 1.046875, "learning_rate": 1.3884787657003917e-05, "loss": 0.134, "step": 4917 }, { "epoch": 132.9189189189189, "grad_norm": 1.3515625, "learning_rate": 1.3874781643818941e-05, "loss": 0.1805, "step": 4918 }, { "epoch": 132.94594594594594, "grad_norm": 1.1484375, "learning_rate": 1.3864777852396455e-05, "loss": 0.1728, "step": 4919 }, { "epoch": 132.97297297297297, "grad_norm": 0.8671875, "learning_rate": 1.385477628473427e-05, "loss": 0.0835, "step": 4920 }, { "epoch": 133.0, "grad_norm": 1.078125, "learning_rate": 1.3844776942829766e-05, "loss": 0.1041, "step": 4921 }, { "epoch": 133.02702702702703, "grad_norm": 0.72265625, "learning_rate": 1.383477982867984e-05, "loss": 0.0785, "step": 4922 }, { "epoch": 133.05405405405406, "grad_norm": 1.234375, "learning_rate": 1.3824784944280978e-05, "loss": 0.1714, "step": 4923 }, { "epoch": 133.0810810810811, "grad_norm": 1.0390625, "learning_rate": 1.3814792291629217e-05, "loss": 0.1406, "step": 4924 }, { "epoch": 133.1081081081081, "grad_norm": 0.765625, "learning_rate": 1.3804801872720122e-05, "loss": 0.092, "step": 4925 }, { "epoch": 133.13513513513513, "grad_norm": 0.79296875, "learning_rate": 1.3794813689548847e-05, "loss": 0.0809, "step": 4926 }, { "epoch": 133.16216216216216, "grad_norm": 0.671875, "learning_rate": 1.3784827744110068e-05, "loss": 0.0746, "step": 4927 }, { "epoch": 133.1891891891892, "grad_norm": 1.046875, "learning_rate": 1.3774844038398037e-05, "loss": 0.1149, "step": 4928 }, { "epoch": 133.21621621621622, "grad_norm": 0.671875, "learning_rate": 1.3764862574406562e-05, "loss": 0.0732, "step": 4929 }, { "epoch": 133.24324324324326, "grad_norm": 1.125, "learning_rate": 1.3754883354128967e-05, "loss": 0.1258, "step": 4930 }, { "epoch": 133.27027027027026, "grad_norm": 0.953125, "learning_rate": 1.3744906379558165e-05, "loss": 0.1211, "step": 4931 }, { "epoch": 133.2972972972973, "grad_norm": 1.0, "learning_rate": 1.3734931652686622e-05, "loss": 0.1118, "step": 4932 }, { "epoch": 133.32432432432432, "grad_norm": 0.55859375, "learning_rate": 1.3724959175506319e-05, "loss": 0.0704, "step": 4933 }, { "epoch": 133.35135135135135, "grad_norm": 0.67578125, "learning_rate": 1.3714988950008817e-05, "loss": 0.0692, "step": 4934 }, { "epoch": 133.3783783783784, "grad_norm": 0.97265625, "learning_rate": 1.3705020978185235e-05, "loss": 0.1141, "step": 4935 }, { "epoch": 133.40540540540542, "grad_norm": 1.25, "learning_rate": 1.3695055262026207e-05, "loss": 0.1581, "step": 4936 }, { "epoch": 133.43243243243242, "grad_norm": 1.109375, "learning_rate": 1.3685091803521948e-05, "loss": 0.1085, "step": 4937 }, { "epoch": 133.45945945945945, "grad_norm": 1.2734375, "learning_rate": 1.3675130604662217e-05, "loss": 0.1611, "step": 4938 }, { "epoch": 133.48648648648648, "grad_norm": 1.171875, "learning_rate": 1.3665171667436299e-05, "loss": 0.1441, "step": 4939 }, { "epoch": 133.51351351351352, "grad_norm": 1.109375, "learning_rate": 1.365521499383306e-05, "loss": 0.1285, "step": 4940 }, { "epoch": 133.54054054054055, "grad_norm": 0.61328125, "learning_rate": 1.3645260585840885e-05, "loss": 0.0758, "step": 4941 }, { "epoch": 133.56756756756758, "grad_norm": 0.59375, "learning_rate": 1.3635308445447725e-05, "loss": 0.0753, "step": 4942 }, { "epoch": 133.59459459459458, "grad_norm": 0.68359375, "learning_rate": 1.3625358574641087e-05, "loss": 0.0876, "step": 4943 }, { "epoch": 133.6216216216216, "grad_norm": 1.1328125, "learning_rate": 1.3615410975407988e-05, "loss": 0.1386, "step": 4944 }, { "epoch": 133.64864864864865, "grad_norm": 0.7734375, "learning_rate": 1.3605465649735021e-05, "loss": 0.0642, "step": 4945 }, { "epoch": 133.67567567567568, "grad_norm": 0.9140625, "learning_rate": 1.3595522599608332e-05, "loss": 0.1185, "step": 4946 }, { "epoch": 133.7027027027027, "grad_norm": 1.3828125, "learning_rate": 1.3585581827013576e-05, "loss": 0.1728, "step": 4947 }, { "epoch": 133.72972972972974, "grad_norm": 0.90625, "learning_rate": 1.3575643333936e-05, "loss": 0.105, "step": 4948 }, { "epoch": 133.75675675675674, "grad_norm": 1.078125, "learning_rate": 1.3565707122360347e-05, "loss": 0.1317, "step": 4949 }, { "epoch": 133.78378378378378, "grad_norm": 0.8671875, "learning_rate": 1.3555773194270948e-05, "loss": 0.1057, "step": 4950 }, { "epoch": 133.8108108108108, "grad_norm": 1.296875, "learning_rate": 1.3545841551651644e-05, "loss": 0.1754, "step": 4951 }, { "epoch": 133.83783783783784, "grad_norm": 0.91796875, "learning_rate": 1.353591219648585e-05, "loss": 0.0903, "step": 4952 }, { "epoch": 133.86486486486487, "grad_norm": 1.1796875, "learning_rate": 1.3525985130756491e-05, "loss": 0.1283, "step": 4953 }, { "epoch": 133.8918918918919, "grad_norm": 0.671875, "learning_rate": 1.3516060356446063e-05, "loss": 0.0688, "step": 4954 }, { "epoch": 133.9189189189189, "grad_norm": 1.4375, "learning_rate": 1.3506137875536601e-05, "loss": 0.1725, "step": 4955 }, { "epoch": 133.94594594594594, "grad_norm": 1.0078125, "learning_rate": 1.3496217690009655e-05, "loss": 0.1338, "step": 4956 }, { "epoch": 133.97297297297297, "grad_norm": 1.3125, "learning_rate": 1.3486299801846357e-05, "loss": 0.1467, "step": 4957 }, { "epoch": 134.0, "grad_norm": 1.21875, "learning_rate": 1.3476384213027341e-05, "loss": 0.1214, "step": 4958 }, { "epoch": 134.02702702702703, "grad_norm": 1.125, "learning_rate": 1.346647092553281e-05, "loss": 0.1295, "step": 4959 }, { "epoch": 134.05405405405406, "grad_norm": 1.234375, "learning_rate": 1.34565599413425e-05, "loss": 0.1688, "step": 4960 }, { "epoch": 134.0810810810811, "grad_norm": 1.2265625, "learning_rate": 1.3446651262435678e-05, "loss": 0.1608, "step": 4961 }, { "epoch": 134.1081081081081, "grad_norm": 0.703125, "learning_rate": 1.3436744890791159e-05, "loss": 0.0742, "step": 4962 }, { "epoch": 134.13513513513513, "grad_norm": 0.5625, "learning_rate": 1.342684082838731e-05, "loss": 0.0559, "step": 4963 }, { "epoch": 134.16216216216216, "grad_norm": 0.765625, "learning_rate": 1.3416939077201998e-05, "loss": 0.0849, "step": 4964 }, { "epoch": 134.1891891891892, "grad_norm": 0.8125, "learning_rate": 1.3407039639212665e-05, "loss": 0.093, "step": 4965 }, { "epoch": 134.21621621621622, "grad_norm": 0.9921875, "learning_rate": 1.3397142516396288e-05, "loss": 0.1123, "step": 4966 }, { "epoch": 134.24324324324326, "grad_norm": 0.98046875, "learning_rate": 1.3387247710729355e-05, "loss": 0.1127, "step": 4967 }, { "epoch": 134.27027027027026, "grad_norm": 0.890625, "learning_rate": 1.337735522418792e-05, "loss": 0.098, "step": 4968 }, { "epoch": 134.2972972972973, "grad_norm": 1.3359375, "learning_rate": 1.3367465058747567e-05, "loss": 0.1773, "step": 4969 }, { "epoch": 134.32432432432432, "grad_norm": 0.84375, "learning_rate": 1.3357577216383394e-05, "loss": 0.0868, "step": 4970 }, { "epoch": 134.35135135135135, "grad_norm": 0.65625, "learning_rate": 1.3347691699070077e-05, "loss": 0.0745, "step": 4971 }, { "epoch": 134.3783783783784, "grad_norm": 0.6171875, "learning_rate": 1.333780850878178e-05, "loss": 0.0664, "step": 4972 }, { "epoch": 134.40540540540542, "grad_norm": 0.91015625, "learning_rate": 1.332792764749224e-05, "loss": 0.0966, "step": 4973 }, { "epoch": 134.43243243243242, "grad_norm": 0.53515625, "learning_rate": 1.3318049117174719e-05, "loss": 0.0657, "step": 4974 }, { "epoch": 134.45945945945945, "grad_norm": 1.2734375, "learning_rate": 1.3308172919801998e-05, "loss": 0.1409, "step": 4975 }, { "epoch": 134.48648648648648, "grad_norm": 0.96484375, "learning_rate": 1.3298299057346406e-05, "loss": 0.1128, "step": 4976 }, { "epoch": 134.51351351351352, "grad_norm": 0.74609375, "learning_rate": 1.3288427531779816e-05, "loss": 0.0968, "step": 4977 }, { "epoch": 134.54054054054055, "grad_norm": 0.75390625, "learning_rate": 1.3278558345073603e-05, "loss": 0.0818, "step": 4978 }, { "epoch": 134.56756756756758, "grad_norm": 0.80078125, "learning_rate": 1.3268691499198702e-05, "loss": 0.0922, "step": 4979 }, { "epoch": 134.59459459459458, "grad_norm": 0.94140625, "learning_rate": 1.3258826996125585e-05, "loss": 0.106, "step": 4980 }, { "epoch": 134.6216216216216, "grad_norm": 0.73828125, "learning_rate": 1.3248964837824218e-05, "loss": 0.0777, "step": 4981 }, { "epoch": 134.64864864864865, "grad_norm": 0.90625, "learning_rate": 1.3239105026264137e-05, "loss": 0.1012, "step": 4982 }, { "epoch": 134.67567567567568, "grad_norm": 0.890625, "learning_rate": 1.3229247563414408e-05, "loss": 0.1066, "step": 4983 }, { "epoch": 134.7027027027027, "grad_norm": 1.109375, "learning_rate": 1.3219392451243595e-05, "loss": 0.1735, "step": 4984 }, { "epoch": 134.72972972972974, "grad_norm": 0.484375, "learning_rate": 1.320953969171983e-05, "loss": 0.0628, "step": 4985 }, { "epoch": 134.75675675675674, "grad_norm": 0.72265625, "learning_rate": 1.3199689286810746e-05, "loss": 0.077, "step": 4986 }, { "epoch": 134.78378378378378, "grad_norm": 1.15625, "learning_rate": 1.3189841238483535e-05, "loss": 0.1354, "step": 4987 }, { "epoch": 134.8108108108108, "grad_norm": 1.4375, "learning_rate": 1.3179995548704883e-05, "loss": 0.2103, "step": 4988 }, { "epoch": 134.83783783783784, "grad_norm": 0.56640625, "learning_rate": 1.3170152219441043e-05, "loss": 0.0673, "step": 4989 }, { "epoch": 134.86486486486487, "grad_norm": 0.77734375, "learning_rate": 1.3160311252657758e-05, "loss": 0.0909, "step": 4990 }, { "epoch": 134.8918918918919, "grad_norm": 1.2890625, "learning_rate": 1.3150472650320339e-05, "loss": 0.1657, "step": 4991 }, { "epoch": 134.9189189189189, "grad_norm": 1.203125, "learning_rate": 1.3140636414393586e-05, "loss": 0.1168, "step": 4992 }, { "epoch": 134.94594594594594, "grad_norm": 0.76953125, "learning_rate": 1.3130802546841855e-05, "loss": 0.1003, "step": 4993 }, { "epoch": 134.97297297297297, "grad_norm": 0.89453125, "learning_rate": 1.3120971049629027e-05, "loss": 0.1103, "step": 4994 }, { "epoch": 135.0, "grad_norm": 0.8203125, "learning_rate": 1.3111141924718484e-05, "loss": 0.0831, "step": 4995 }, { "epoch": 135.02702702702703, "grad_norm": 1.328125, "learning_rate": 1.3101315174073162e-05, "loss": 0.1552, "step": 4996 }, { "epoch": 135.05405405405406, "grad_norm": 0.73046875, "learning_rate": 1.309149079965552e-05, "loss": 0.0925, "step": 4997 }, { "epoch": 135.0810810810811, "grad_norm": 1.25, "learning_rate": 1.3081668803427521e-05, "loss": 0.1252, "step": 4998 }, { "epoch": 135.1081081081081, "grad_norm": 1.25, "learning_rate": 1.3071849187350674e-05, "loss": 0.1786, "step": 4999 }, { "epoch": 135.13513513513513, "grad_norm": 0.85546875, "learning_rate": 1.3062031953386014e-05, "loss": 0.1266, "step": 5000 }, { "epoch": 135.16216216216216, "grad_norm": 1.15625, "learning_rate": 1.3052217103494074e-05, "loss": 0.1935, "step": 5001 }, { "epoch": 135.1891891891892, "grad_norm": 1.046875, "learning_rate": 1.3042404639634953e-05, "loss": 0.1272, "step": 5002 }, { "epoch": 135.21621621621622, "grad_norm": 0.63671875, "learning_rate": 1.3032594563768224e-05, "loss": 0.0695, "step": 5003 }, { "epoch": 135.24324324324326, "grad_norm": 0.9453125, "learning_rate": 1.3022786877853022e-05, "loss": 0.0836, "step": 5004 }, { "epoch": 135.27027027027026, "grad_norm": 0.9921875, "learning_rate": 1.3012981583848003e-05, "loss": 0.1057, "step": 5005 }, { "epoch": 135.2972972972973, "grad_norm": 1.3671875, "learning_rate": 1.300317868371131e-05, "loss": 0.1569, "step": 5006 }, { "epoch": 135.32432432432432, "grad_norm": 0.8203125, "learning_rate": 1.2993378179400645e-05, "loss": 0.0927, "step": 5007 }, { "epoch": 135.35135135135135, "grad_norm": 0.66015625, "learning_rate": 1.2983580072873225e-05, "loss": 0.0687, "step": 5008 }, { "epoch": 135.3783783783784, "grad_norm": 1.3828125, "learning_rate": 1.2973784366085767e-05, "loss": 0.1632, "step": 5009 }, { "epoch": 135.40540540540542, "grad_norm": 1.0078125, "learning_rate": 1.296399106099453e-05, "loss": 0.1489, "step": 5010 }, { "epoch": 135.43243243243242, "grad_norm": 0.83203125, "learning_rate": 1.2954200159555293e-05, "loss": 0.0949, "step": 5011 }, { "epoch": 135.45945945945945, "grad_norm": 0.578125, "learning_rate": 1.2944411663723335e-05, "loss": 0.0653, "step": 5012 }, { "epoch": 135.48648648648648, "grad_norm": 0.84375, "learning_rate": 1.2934625575453476e-05, "loss": 0.104, "step": 5013 }, { "epoch": 135.51351351351352, "grad_norm": 0.85546875, "learning_rate": 1.2924841896700056e-05, "loss": 0.0839, "step": 5014 }, { "epoch": 135.54054054054055, "grad_norm": 0.8828125, "learning_rate": 1.2915060629416908e-05, "loss": 0.1037, "step": 5015 }, { "epoch": 135.56756756756758, "grad_norm": 0.79296875, "learning_rate": 1.2905281775557418e-05, "loss": 0.0858, "step": 5016 }, { "epoch": 135.59459459459458, "grad_norm": 1.5078125, "learning_rate": 1.2895505337074453e-05, "loss": 0.2077, "step": 5017 }, { "epoch": 135.6216216216216, "grad_norm": 1.2578125, "learning_rate": 1.288573131592043e-05, "loss": 0.1804, "step": 5018 }, { "epoch": 135.64864864864865, "grad_norm": 0.953125, "learning_rate": 1.2875959714047275e-05, "loss": 0.0996, "step": 5019 }, { "epoch": 135.67567567567568, "grad_norm": 1.359375, "learning_rate": 1.2866190533406413e-05, "loss": 0.137, "step": 5020 }, { "epoch": 135.7027027027027, "grad_norm": 1.28125, "learning_rate": 1.2856423775948806e-05, "loss": 0.1785, "step": 5021 }, { "epoch": 135.72972972972974, "grad_norm": 0.84765625, "learning_rate": 1.284665944362493e-05, "loss": 0.1078, "step": 5022 }, { "epoch": 135.75675675675674, "grad_norm": 1.140625, "learning_rate": 1.2836897538384762e-05, "loss": 0.1476, "step": 5023 }, { "epoch": 135.78378378378378, "grad_norm": 0.474609375, "learning_rate": 1.2827138062177817e-05, "loss": 0.0601, "step": 5024 }, { "epoch": 135.8108108108108, "grad_norm": 1.0390625, "learning_rate": 1.2817381016953095e-05, "loss": 0.0816, "step": 5025 }, { "epoch": 135.83783783783784, "grad_norm": 0.83203125, "learning_rate": 1.2807626404659142e-05, "loss": 0.1188, "step": 5026 }, { "epoch": 135.86486486486487, "grad_norm": 0.86328125, "learning_rate": 1.2797874227243988e-05, "loss": 0.1428, "step": 5027 }, { "epoch": 135.8918918918919, "grad_norm": 0.60546875, "learning_rate": 1.2788124486655215e-05, "loss": 0.0673, "step": 5028 }, { "epoch": 135.9189189189189, "grad_norm": 0.89453125, "learning_rate": 1.2778377184839868e-05, "loss": 0.1013, "step": 5029 }, { "epoch": 135.94594594594594, "grad_norm": 1.28125, "learning_rate": 1.2768632323744559e-05, "loss": 0.1628, "step": 5030 }, { "epoch": 135.97297297297297, "grad_norm": 0.51953125, "learning_rate": 1.2758889905315364e-05, "loss": 0.0697, "step": 5031 }, { "epoch": 136.0, "grad_norm": 0.79296875, "learning_rate": 1.27491499314979e-05, "loss": 0.0942, "step": 5032 }, { "epoch": 136.02702702702703, "grad_norm": 0.94921875, "learning_rate": 1.2739412404237306e-05, "loss": 0.1074, "step": 5033 }, { "epoch": 136.05405405405406, "grad_norm": 0.6796875, "learning_rate": 1.272967732547819e-05, "loss": 0.0735, "step": 5034 }, { "epoch": 136.0810810810811, "grad_norm": 0.5546875, "learning_rate": 1.271994469716471e-05, "loss": 0.0606, "step": 5035 }, { "epoch": 136.1081081081081, "grad_norm": 0.83203125, "learning_rate": 1.2710214521240527e-05, "loss": 0.1042, "step": 5036 }, { "epoch": 136.13513513513513, "grad_norm": 0.9921875, "learning_rate": 1.2700486799648792e-05, "loss": 0.1382, "step": 5037 }, { "epoch": 136.16216216216216, "grad_norm": 1.078125, "learning_rate": 1.2690761534332185e-05, "loss": 0.1192, "step": 5038 }, { "epoch": 136.1891891891892, "grad_norm": 1.0390625, "learning_rate": 1.2681038727232905e-05, "loss": 0.1132, "step": 5039 }, { "epoch": 136.21621621621622, "grad_norm": 0.94140625, "learning_rate": 1.2671318380292624e-05, "loss": 0.0983, "step": 5040 }, { "epoch": 136.24324324324326, "grad_norm": 1.1796875, "learning_rate": 1.2661600495452552e-05, "loss": 0.126, "step": 5041 }, { "epoch": 136.27027027027026, "grad_norm": 1.125, "learning_rate": 1.2651885074653415e-05, "loss": 0.101, "step": 5042 }, { "epoch": 136.2972972972973, "grad_norm": 0.69921875, "learning_rate": 1.264217211983541e-05, "loss": 0.0762, "step": 5043 }, { "epoch": 136.32432432432432, "grad_norm": 1.21875, "learning_rate": 1.2632461632938271e-05, "loss": 0.1166, "step": 5044 }, { "epoch": 136.35135135135135, "grad_norm": 1.5546875, "learning_rate": 1.2622753615901244e-05, "loss": 0.1496, "step": 5045 }, { "epoch": 136.3783783783784, "grad_norm": 0.8125, "learning_rate": 1.2613048070663047e-05, "loss": 0.087, "step": 5046 }, { "epoch": 136.40540540540542, "grad_norm": 0.66015625, "learning_rate": 1.2603344999161947e-05, "loss": 0.0748, "step": 5047 }, { "epoch": 136.43243243243242, "grad_norm": 0.96875, "learning_rate": 1.2593644403335681e-05, "loss": 0.1333, "step": 5048 }, { "epoch": 136.45945945945945, "grad_norm": 0.87890625, "learning_rate": 1.2583946285121511e-05, "loss": 0.1136, "step": 5049 }, { "epoch": 136.48648648648648, "grad_norm": 1.1328125, "learning_rate": 1.2574250646456215e-05, "loss": 0.1432, "step": 5050 }, { "epoch": 136.51351351351352, "grad_norm": 0.5234375, "learning_rate": 1.2564557489276035e-05, "loss": 0.0624, "step": 5051 }, { "epoch": 136.54054054054055, "grad_norm": 0.61328125, "learning_rate": 1.2554866815516763e-05, "loss": 0.073, "step": 5052 }, { "epoch": 136.56756756756758, "grad_norm": 0.6015625, "learning_rate": 1.2545178627113676e-05, "loss": 0.0709, "step": 5053 }, { "epoch": 136.59459459459458, "grad_norm": 0.82421875, "learning_rate": 1.2535492926001541e-05, "loss": 0.089, "step": 5054 }, { "epoch": 136.6216216216216, "grad_norm": 0.8671875, "learning_rate": 1.2525809714114651e-05, "loss": 0.103, "step": 5055 }, { "epoch": 136.64864864864865, "grad_norm": 0.57421875, "learning_rate": 1.2516128993386795e-05, "loss": 0.0643, "step": 5056 }, { "epoch": 136.67567567567568, "grad_norm": 0.51953125, "learning_rate": 1.2506450765751252e-05, "loss": 0.0638, "step": 5057 }, { "epoch": 136.7027027027027, "grad_norm": 1.109375, "learning_rate": 1.2496775033140817e-05, "loss": 0.1208, "step": 5058 }, { "epoch": 136.72972972972974, "grad_norm": 1.359375, "learning_rate": 1.2487101797487797e-05, "loss": 0.1547, "step": 5059 }, { "epoch": 136.75675675675674, "grad_norm": 1.0234375, "learning_rate": 1.2477431060723963e-05, "loss": 0.1038, "step": 5060 }, { "epoch": 136.78378378378378, "grad_norm": 0.58984375, "learning_rate": 1.246776282478063e-05, "loss": 0.0655, "step": 5061 }, { "epoch": 136.8108108108108, "grad_norm": 0.875, "learning_rate": 1.2458097091588575e-05, "loss": 0.089, "step": 5062 }, { "epoch": 136.83783783783784, "grad_norm": 1.1171875, "learning_rate": 1.2448433863078116e-05, "loss": 0.1326, "step": 5063 }, { "epoch": 136.86486486486487, "grad_norm": 1.0390625, "learning_rate": 1.2438773141179025e-05, "loss": 0.1302, "step": 5064 }, { "epoch": 136.8918918918919, "grad_norm": 2.15625, "learning_rate": 1.2429114927820618e-05, "loss": 0.2845, "step": 5065 }, { "epoch": 136.9189189189189, "grad_norm": 1.0546875, "learning_rate": 1.241945922493167e-05, "loss": 0.1287, "step": 5066 }, { "epoch": 136.94594594594594, "grad_norm": 0.59765625, "learning_rate": 1.2409806034440486e-05, "loss": 0.0664, "step": 5067 }, { "epoch": 136.97297297297297, "grad_norm": 1.109375, "learning_rate": 1.2400155358274863e-05, "loss": 0.1303, "step": 5068 }, { "epoch": 137.0, "grad_norm": 0.84765625, "learning_rate": 1.239050719836207e-05, "loss": 0.093, "step": 5069 }, { "epoch": 137.02702702702703, "grad_norm": 0.90234375, "learning_rate": 1.2380861556628915e-05, "loss": 0.0941, "step": 5070 }, { "epoch": 137.05405405405406, "grad_norm": 1.0859375, "learning_rate": 1.2371218435001663e-05, "loss": 0.1132, "step": 5071 }, { "epoch": 137.0810810810811, "grad_norm": 1.4296875, "learning_rate": 1.2361577835406105e-05, "loss": 0.1973, "step": 5072 }, { "epoch": 137.1081081081081, "grad_norm": 1.2578125, "learning_rate": 1.2351939759767523e-05, "loss": 0.1719, "step": 5073 }, { "epoch": 137.13513513513513, "grad_norm": 1.3671875, "learning_rate": 1.2342304210010672e-05, "loss": 0.146, "step": 5074 }, { "epoch": 137.16216216216216, "grad_norm": 0.4609375, "learning_rate": 1.233267118805983e-05, "loss": 0.0639, "step": 5075 }, { "epoch": 137.1891891891892, "grad_norm": 0.796875, "learning_rate": 1.2323040695838769e-05, "loss": 0.089, "step": 5076 }, { "epoch": 137.21621621621622, "grad_norm": 0.828125, "learning_rate": 1.2313412735270732e-05, "loss": 0.0994, "step": 5077 }, { "epoch": 137.24324324324326, "grad_norm": 0.9375, "learning_rate": 1.2303787308278487e-05, "loss": 0.0917, "step": 5078 }, { "epoch": 137.27027027027026, "grad_norm": 1.1328125, "learning_rate": 1.2294164416784259e-05, "loss": 0.1574, "step": 5079 }, { "epoch": 137.2972972972973, "grad_norm": 0.99609375, "learning_rate": 1.2284544062709804e-05, "loss": 0.1126, "step": 5080 }, { "epoch": 137.32432432432432, "grad_norm": 1.21875, "learning_rate": 1.2274926247976362e-05, "loss": 0.1322, "step": 5081 }, { "epoch": 137.35135135135135, "grad_norm": 1.0390625, "learning_rate": 1.2265310974504638e-05, "loss": 0.1024, "step": 5082 }, { "epoch": 137.3783783783784, "grad_norm": 0.73828125, "learning_rate": 1.2255698244214864e-05, "loss": 0.0812, "step": 5083 }, { "epoch": 137.40540540540542, "grad_norm": 0.83984375, "learning_rate": 1.2246088059026758e-05, "loss": 0.1249, "step": 5084 }, { "epoch": 137.43243243243242, "grad_norm": 0.609375, "learning_rate": 1.2236480420859507e-05, "loss": 0.078, "step": 5085 }, { "epoch": 137.45945945945945, "grad_norm": 0.8984375, "learning_rate": 1.222687533163181e-05, "loss": 0.1129, "step": 5086 }, { "epoch": 137.48648648648648, "grad_norm": 1.1484375, "learning_rate": 1.2217272793261867e-05, "loss": 0.1566, "step": 5087 }, { "epoch": 137.51351351351352, "grad_norm": 1.484375, "learning_rate": 1.2207672807667328e-05, "loss": 0.193, "step": 5088 }, { "epoch": 137.54054054054055, "grad_norm": 1.453125, "learning_rate": 1.2198075376765372e-05, "loss": 0.1884, "step": 5089 }, { "epoch": 137.56756756756758, "grad_norm": 0.8046875, "learning_rate": 1.2188480502472662e-05, "loss": 0.1001, "step": 5090 }, { "epoch": 137.59459459459458, "grad_norm": 1.09375, "learning_rate": 1.2178888186705328e-05, "loss": 0.1384, "step": 5091 }, { "epoch": 137.6216216216216, "grad_norm": 1.0859375, "learning_rate": 1.2169298431379023e-05, "loss": 0.1267, "step": 5092 }, { "epoch": 137.64864864864865, "grad_norm": 0.80859375, "learning_rate": 1.2159711238408848e-05, "loss": 0.0865, "step": 5093 }, { "epoch": 137.67567567567568, "grad_norm": 1.234375, "learning_rate": 1.215012660970942e-05, "loss": 0.1896, "step": 5094 }, { "epoch": 137.7027027027027, "grad_norm": 1.3359375, "learning_rate": 1.2140544547194855e-05, "loss": 0.1829, "step": 5095 }, { "epoch": 137.72972972972974, "grad_norm": 0.640625, "learning_rate": 1.2130965052778715e-05, "loss": 0.0746, "step": 5096 }, { "epoch": 137.75675675675674, "grad_norm": 1.4453125, "learning_rate": 1.2121388128374087e-05, "loss": 0.2074, "step": 5097 }, { "epoch": 137.78378378378378, "grad_norm": 0.98046875, "learning_rate": 1.2111813775893539e-05, "loss": 0.1211, "step": 5098 }, { "epoch": 137.8108108108108, "grad_norm": 0.640625, "learning_rate": 1.2102241997249095e-05, "loss": 0.0818, "step": 5099 }, { "epoch": 137.83783783783784, "grad_norm": 1.1953125, "learning_rate": 1.2092672794352315e-05, "loss": 0.1579, "step": 5100 }, { "epoch": 137.86486486486487, "grad_norm": 0.65234375, "learning_rate": 1.2083106169114191e-05, "loss": 0.0871, "step": 5101 }, { "epoch": 137.8918918918919, "grad_norm": 1.46875, "learning_rate": 1.2073542123445239e-05, "loss": 0.1807, "step": 5102 }, { "epoch": 137.9189189189189, "grad_norm": 1.3515625, "learning_rate": 1.2063980659255459e-05, "loss": 0.1752, "step": 5103 }, { "epoch": 137.94594594594594, "grad_norm": 1.3125, "learning_rate": 1.20544217784543e-05, "loss": 0.1471, "step": 5104 }, { "epoch": 137.97297297297297, "grad_norm": 0.83984375, "learning_rate": 1.204486548295074e-05, "loss": 0.0976, "step": 5105 }, { "epoch": 138.0, "grad_norm": 1.1328125, "learning_rate": 1.2035311774653203e-05, "loss": 0.1449, "step": 5106 }, { "epoch": 138.02702702702703, "grad_norm": 0.71875, "learning_rate": 1.202576065546963e-05, "loss": 0.0744, "step": 5107 }, { "epoch": 138.05405405405406, "grad_norm": 0.8828125, "learning_rate": 1.2016212127307408e-05, "loss": 0.1159, "step": 5108 }, { "epoch": 138.0810810810811, "grad_norm": 0.65625, "learning_rate": 1.2006666192073445e-05, "loss": 0.0705, "step": 5109 }, { "epoch": 138.1081081081081, "grad_norm": 1.171875, "learning_rate": 1.1997122851674097e-05, "loss": 0.1281, "step": 5110 }, { "epoch": 138.13513513513513, "grad_norm": 1.1796875, "learning_rate": 1.1987582108015228e-05, "loss": 0.1295, "step": 5111 }, { "epoch": 138.16216216216216, "grad_norm": 0.90625, "learning_rate": 1.1978043963002175e-05, "loss": 0.1033, "step": 5112 }, { "epoch": 138.1891891891892, "grad_norm": 0.625, "learning_rate": 1.196850841853974e-05, "loss": 0.0715, "step": 5113 }, { "epoch": 138.21621621621622, "grad_norm": 0.78515625, "learning_rate": 1.195897547653223e-05, "loss": 0.1229, "step": 5114 }, { "epoch": 138.24324324324326, "grad_norm": 0.94140625, "learning_rate": 1.1949445138883428e-05, "loss": 0.1104, "step": 5115 }, { "epoch": 138.27027027027026, "grad_norm": 1.109375, "learning_rate": 1.1939917407496576e-05, "loss": 0.1302, "step": 5116 }, { "epoch": 138.2972972972973, "grad_norm": 0.984375, "learning_rate": 1.1930392284274413e-05, "loss": 0.1172, "step": 5117 }, { "epoch": 138.32432432432432, "grad_norm": 0.84765625, "learning_rate": 1.1920869771119173e-05, "loss": 0.0944, "step": 5118 }, { "epoch": 138.35135135135135, "grad_norm": 1.203125, "learning_rate": 1.1911349869932523e-05, "loss": 0.1501, "step": 5119 }, { "epoch": 138.3783783783784, "grad_norm": 0.58984375, "learning_rate": 1.1901832582615649e-05, "loss": 0.0667, "step": 5120 }, { "epoch": 138.40540540540542, "grad_norm": 1.15625, "learning_rate": 1.1892317911069212e-05, "loss": 0.1652, "step": 5121 }, { "epoch": 138.43243243243242, "grad_norm": 0.83203125, "learning_rate": 1.1882805857193319e-05, "loss": 0.0894, "step": 5122 }, { "epoch": 138.45945945945945, "grad_norm": 1.296875, "learning_rate": 1.1873296422887592e-05, "loss": 0.1329, "step": 5123 }, { "epoch": 138.48648648648648, "grad_norm": 0.86328125, "learning_rate": 1.18637896100511e-05, "loss": 0.08, "step": 5124 }, { "epoch": 138.51351351351352, "grad_norm": 0.8125, "learning_rate": 1.1854285420582409e-05, "loss": 0.0933, "step": 5125 }, { "epoch": 138.54054054054055, "grad_norm": 0.859375, "learning_rate": 1.184478385637956e-05, "loss": 0.0839, "step": 5126 }, { "epoch": 138.56756756756758, "grad_norm": 0.84375, "learning_rate": 1.1835284919340048e-05, "loss": 0.0875, "step": 5127 }, { "epoch": 138.59459459459458, "grad_norm": 0.75, "learning_rate": 1.1825788611360866e-05, "loss": 0.0918, "step": 5128 }, { "epoch": 138.6216216216216, "grad_norm": 0.76953125, "learning_rate": 1.1816294934338489e-05, "loss": 0.0891, "step": 5129 }, { "epoch": 138.64864864864865, "grad_norm": 1.109375, "learning_rate": 1.180680389016883e-05, "loss": 0.1205, "step": 5130 }, { "epoch": 138.67567567567568, "grad_norm": 0.87109375, "learning_rate": 1.1797315480747306e-05, "loss": 0.0995, "step": 5131 }, { "epoch": 138.7027027027027, "grad_norm": 1.015625, "learning_rate": 1.1787829707968815e-05, "loss": 0.1297, "step": 5132 }, { "epoch": 138.72972972972974, "grad_norm": 0.91015625, "learning_rate": 1.1778346573727688e-05, "loss": 0.0983, "step": 5133 }, { "epoch": 138.75675675675674, "grad_norm": 1.296875, "learning_rate": 1.1768866079917772e-05, "loss": 0.1822, "step": 5134 }, { "epoch": 138.78378378378378, "grad_norm": 0.69140625, "learning_rate": 1.1759388228432375e-05, "loss": 0.0772, "step": 5135 }, { "epoch": 138.8108108108108, "grad_norm": 1.3125, "learning_rate": 1.1749913021164255e-05, "loss": 0.1876, "step": 5136 }, { "epoch": 138.83783783783784, "grad_norm": 1.3046875, "learning_rate": 1.1740440460005675e-05, "loss": 0.1122, "step": 5137 }, { "epoch": 138.86486486486487, "grad_norm": 0.6171875, "learning_rate": 1.1730970546848338e-05, "loss": 0.074, "step": 5138 }, { "epoch": 138.8918918918919, "grad_norm": 1.2890625, "learning_rate": 1.172150328358344e-05, "loss": 0.187, "step": 5139 }, { "epoch": 138.9189189189189, "grad_norm": 0.765625, "learning_rate": 1.1712038672101654e-05, "loss": 0.0943, "step": 5140 }, { "epoch": 138.94594594594594, "grad_norm": 1.25, "learning_rate": 1.1702576714293089e-05, "loss": 0.1899, "step": 5141 }, { "epoch": 138.97297297297297, "grad_norm": 1.359375, "learning_rate": 1.1693117412047366e-05, "loss": 0.156, "step": 5142 }, { "epoch": 139.0, "grad_norm": 0.78125, "learning_rate": 1.1683660767253538e-05, "loss": 0.0872, "step": 5143 }, { "epoch": 139.02702702702703, "grad_norm": 1.21875, "learning_rate": 1.1674206781800162e-05, "loss": 0.1395, "step": 5144 }, { "epoch": 139.05405405405406, "grad_norm": 0.7734375, "learning_rate": 1.166475545757523e-05, "loss": 0.0776, "step": 5145 }, { "epoch": 139.0810810810811, "grad_norm": 0.90234375, "learning_rate": 1.1655306796466234e-05, "loss": 0.0862, "step": 5146 }, { "epoch": 139.1081081081081, "grad_norm": 0.88671875, "learning_rate": 1.1645860800360106e-05, "loss": 0.1105, "step": 5147 }, { "epoch": 139.13513513513513, "grad_norm": 0.80859375, "learning_rate": 1.1636417471143265e-05, "loss": 0.0808, "step": 5148 }, { "epoch": 139.16216216216216, "grad_norm": 1.1328125, "learning_rate": 1.1626976810701603e-05, "loss": 0.1161, "step": 5149 }, { "epoch": 139.1891891891892, "grad_norm": 0.97265625, "learning_rate": 1.1617538820920449e-05, "loss": 0.1379, "step": 5150 }, { "epoch": 139.21621621621622, "grad_norm": 0.57421875, "learning_rate": 1.1608103503684623e-05, "loss": 0.069, "step": 5151 }, { "epoch": 139.24324324324326, "grad_norm": 0.859375, "learning_rate": 1.1598670860878418e-05, "loss": 0.1011, "step": 5152 }, { "epoch": 139.27027027027026, "grad_norm": 0.92578125, "learning_rate": 1.1589240894385567e-05, "loss": 0.0881, "step": 5153 }, { "epoch": 139.2972972972973, "grad_norm": 0.6171875, "learning_rate": 1.1579813606089293e-05, "loss": 0.0714, "step": 5154 }, { "epoch": 139.32432432432432, "grad_norm": 0.8671875, "learning_rate": 1.1570388997872255e-05, "loss": 0.1375, "step": 5155 }, { "epoch": 139.35135135135135, "grad_norm": 1.21875, "learning_rate": 1.156096707161661e-05, "loss": 0.1385, "step": 5156 }, { "epoch": 139.3783783783784, "grad_norm": 0.984375, "learning_rate": 1.155154782920397e-05, "loss": 0.1342, "step": 5157 }, { "epoch": 139.40540540540542, "grad_norm": 0.8671875, "learning_rate": 1.1542131272515386e-05, "loss": 0.0978, "step": 5158 }, { "epoch": 139.43243243243242, "grad_norm": 1.046875, "learning_rate": 1.1532717403431403e-05, "loss": 0.1118, "step": 5159 }, { "epoch": 139.45945945945945, "grad_norm": 1.3984375, "learning_rate": 1.1523306223832029e-05, "loss": 0.1207, "step": 5160 }, { "epoch": 139.48648648648648, "grad_norm": 1.109375, "learning_rate": 1.1513897735596702e-05, "loss": 0.1153, "step": 5161 }, { "epoch": 139.51351351351352, "grad_norm": 0.84375, "learning_rate": 1.1504491940604357e-05, "loss": 0.0972, "step": 5162 }, { "epoch": 139.54054054054055, "grad_norm": 0.86328125, "learning_rate": 1.1495088840733387e-05, "loss": 0.108, "step": 5163 }, { "epoch": 139.56756756756758, "grad_norm": 1.0625, "learning_rate": 1.1485688437861622e-05, "loss": 0.1215, "step": 5164 }, { "epoch": 139.59459459459458, "grad_norm": 1.34375, "learning_rate": 1.1476290733866377e-05, "loss": 0.1332, "step": 5165 }, { "epoch": 139.6216216216216, "grad_norm": 0.6484375, "learning_rate": 1.146689573062443e-05, "loss": 0.0842, "step": 5166 }, { "epoch": 139.64864864864865, "grad_norm": 0.875, "learning_rate": 1.1457503430011995e-05, "loss": 0.1015, "step": 5167 }, { "epoch": 139.67567567567568, "grad_norm": 0.80078125, "learning_rate": 1.1448113833904777e-05, "loss": 0.1426, "step": 5168 }, { "epoch": 139.7027027027027, "grad_norm": 0.494140625, "learning_rate": 1.1438726944177913e-05, "loss": 0.0635, "step": 5169 }, { "epoch": 139.72972972972974, "grad_norm": 1.2265625, "learning_rate": 1.1429342762706015e-05, "loss": 0.1654, "step": 5170 }, { "epoch": 139.75675675675674, "grad_norm": 0.84765625, "learning_rate": 1.1419961291363168e-05, "loss": 0.0842, "step": 5171 }, { "epoch": 139.78378378378378, "grad_norm": 1.4765625, "learning_rate": 1.1410582532022873e-05, "loss": 0.2315, "step": 5172 }, { "epoch": 139.8108108108108, "grad_norm": 1.1484375, "learning_rate": 1.140120648655813e-05, "loss": 0.1549, "step": 5173 }, { "epoch": 139.83783783783784, "grad_norm": 1.1328125, "learning_rate": 1.1391833156841391e-05, "loss": 0.1531, "step": 5174 }, { "epoch": 139.86486486486487, "grad_norm": 1.53125, "learning_rate": 1.138246254474454e-05, "loss": 0.2056, "step": 5175 }, { "epoch": 139.8918918918919, "grad_norm": 1.0859375, "learning_rate": 1.1373094652138943e-05, "loss": 0.1426, "step": 5176 }, { "epoch": 139.9189189189189, "grad_norm": 1.2265625, "learning_rate": 1.136372948089543e-05, "loss": 0.1389, "step": 5177 }, { "epoch": 139.94594594594594, "grad_norm": 0.7421875, "learning_rate": 1.1354367032884244e-05, "loss": 0.0698, "step": 5178 }, { "epoch": 139.97297297297297, "grad_norm": 0.640625, "learning_rate": 1.1345007309975142e-05, "loss": 0.0707, "step": 5179 }, { "epoch": 140.0, "grad_norm": 0.5, "learning_rate": 1.1335650314037286e-05, "loss": 0.0647, "step": 5180 }, { "epoch": 140.02702702702703, "grad_norm": 1.109375, "learning_rate": 1.1326296046939333e-05, "loss": 0.1441, "step": 5181 }, { "epoch": 140.05405405405406, "grad_norm": 0.84765625, "learning_rate": 1.131694451054936e-05, "loss": 0.1003, "step": 5182 }, { "epoch": 140.0810810810811, "grad_norm": 0.9609375, "learning_rate": 1.1307595706734936e-05, "loss": 0.112, "step": 5183 }, { "epoch": 140.1081081081081, "grad_norm": 0.7265625, "learning_rate": 1.1298249637363042e-05, "loss": 0.0717, "step": 5184 }, { "epoch": 140.13513513513513, "grad_norm": 0.64453125, "learning_rate": 1.1288906304300158e-05, "loss": 0.0824, "step": 5185 }, { "epoch": 140.16216216216216, "grad_norm": 1.21875, "learning_rate": 1.127956570941218e-05, "loss": 0.1287, "step": 5186 }, { "epoch": 140.1891891891892, "grad_norm": 0.68359375, "learning_rate": 1.1270227854564473e-05, "loss": 0.0783, "step": 5187 }, { "epoch": 140.21621621621622, "grad_norm": 1.3203125, "learning_rate": 1.1260892741621864e-05, "loss": 0.1512, "step": 5188 }, { "epoch": 140.24324324324326, "grad_norm": 0.92578125, "learning_rate": 1.1251560372448608e-05, "loss": 0.1224, "step": 5189 }, { "epoch": 140.27027027027026, "grad_norm": 0.98828125, "learning_rate": 1.1242230748908437e-05, "loss": 0.1101, "step": 5190 }, { "epoch": 140.2972972972973, "grad_norm": 1.0625, "learning_rate": 1.1232903872864529e-05, "loss": 0.1253, "step": 5191 }, { "epoch": 140.32432432432432, "grad_norm": 0.99609375, "learning_rate": 1.1223579746179488e-05, "loss": 0.1172, "step": 5192 }, { "epoch": 140.35135135135135, "grad_norm": 0.95703125, "learning_rate": 1.1214258370715408e-05, "loss": 0.1063, "step": 5193 }, { "epoch": 140.3783783783784, "grad_norm": 0.73828125, "learning_rate": 1.1204939748333816e-05, "loss": 0.0696, "step": 5194 }, { "epoch": 140.40540540540542, "grad_norm": 1.0625, "learning_rate": 1.1195623880895672e-05, "loss": 0.1327, "step": 5195 }, { "epoch": 140.43243243243242, "grad_norm": 1.3515625, "learning_rate": 1.118631077026141e-05, "loss": 0.1742, "step": 5196 }, { "epoch": 140.45945945945945, "grad_norm": 0.734375, "learning_rate": 1.1177000418290917e-05, "loss": 0.0746, "step": 5197 }, { "epoch": 140.48648648648648, "grad_norm": 1.265625, "learning_rate": 1.1167692826843498e-05, "loss": 0.154, "step": 5198 }, { "epoch": 140.51351351351352, "grad_norm": 1.34375, "learning_rate": 1.1158387997777945e-05, "loss": 0.1421, "step": 5199 }, { "epoch": 140.54054054054055, "grad_norm": 0.64453125, "learning_rate": 1.1149085932952461e-05, "loss": 0.0804, "step": 5200 }, { "epoch": 140.56756756756758, "grad_norm": 1.3359375, "learning_rate": 1.1139786634224721e-05, "loss": 0.1515, "step": 5201 }, { "epoch": 140.59459459459458, "grad_norm": 0.8203125, "learning_rate": 1.1130490103451858e-05, "loss": 0.0954, "step": 5202 }, { "epoch": 140.6216216216216, "grad_norm": 1.1640625, "learning_rate": 1.1121196342490413e-05, "loss": 0.1266, "step": 5203 }, { "epoch": 140.64864864864865, "grad_norm": 1.0390625, "learning_rate": 1.1111905353196408e-05, "loss": 0.1394, "step": 5204 }, { "epoch": 140.67567567567568, "grad_norm": 0.62109375, "learning_rate": 1.1102617137425311e-05, "loss": 0.0709, "step": 5205 }, { "epoch": 140.7027027027027, "grad_norm": 0.578125, "learning_rate": 1.1093331697032005e-05, "loss": 0.0699, "step": 5206 }, { "epoch": 140.72972972972974, "grad_norm": 1.015625, "learning_rate": 1.108404903387085e-05, "loss": 0.1092, "step": 5207 }, { "epoch": 140.75675675675674, "grad_norm": 1.2734375, "learning_rate": 1.107476914979565e-05, "loss": 0.1975, "step": 5208 }, { "epoch": 140.78378378378378, "grad_norm": 0.8828125, "learning_rate": 1.1065492046659629e-05, "loss": 0.0895, "step": 5209 }, { "epoch": 140.8108108108108, "grad_norm": 0.9609375, "learning_rate": 1.1056217726315474e-05, "loss": 0.0968, "step": 5210 }, { "epoch": 140.83783783783784, "grad_norm": 1.1796875, "learning_rate": 1.1046946190615329e-05, "loss": 0.0975, "step": 5211 }, { "epoch": 140.86486486486487, "grad_norm": 0.9765625, "learning_rate": 1.1037677441410746e-05, "loss": 0.0896, "step": 5212 }, { "epoch": 140.8918918918919, "grad_norm": 0.86328125, "learning_rate": 1.1028411480552758e-05, "loss": 0.093, "step": 5213 }, { "epoch": 140.9189189189189, "grad_norm": 0.46875, "learning_rate": 1.101914830989181e-05, "loss": 0.0574, "step": 5214 }, { "epoch": 140.94594594594594, "grad_norm": 1.0703125, "learning_rate": 1.1009887931277813e-05, "loss": 0.1304, "step": 5215 }, { "epoch": 140.97297297297297, "grad_norm": 0.77734375, "learning_rate": 1.1000630346560117e-05, "loss": 0.0731, "step": 5216 }, { "epoch": 141.0, "grad_norm": 0.6328125, "learning_rate": 1.0991375557587494e-05, "loss": 0.0744, "step": 5217 }, { "epoch": 141.02702702702703, "grad_norm": 1.0859375, "learning_rate": 1.0982123566208185e-05, "loss": 0.1288, "step": 5218 }, { "epoch": 141.05405405405406, "grad_norm": 0.65625, "learning_rate": 1.0972874374269848e-05, "loss": 0.0763, "step": 5219 }, { "epoch": 141.0810810810811, "grad_norm": 1.328125, "learning_rate": 1.0963627983619609e-05, "loss": 0.1598, "step": 5220 }, { "epoch": 141.1081081081081, "grad_norm": 0.5546875, "learning_rate": 1.0954384396104e-05, "loss": 0.0741, "step": 5221 }, { "epoch": 141.13513513513513, "grad_norm": 1.1171875, "learning_rate": 1.0945143613569031e-05, "loss": 0.0998, "step": 5222 }, { "epoch": 141.16216216216216, "grad_norm": 0.9140625, "learning_rate": 1.0935905637860116e-05, "loss": 0.1347, "step": 5223 }, { "epoch": 141.1891891891892, "grad_norm": 1.296875, "learning_rate": 1.0926670470822133e-05, "loss": 0.1751, "step": 5224 }, { "epoch": 141.21621621621622, "grad_norm": 1.234375, "learning_rate": 1.0917438114299402e-05, "loss": 0.1436, "step": 5225 }, { "epoch": 141.24324324324326, "grad_norm": 1.4765625, "learning_rate": 1.0908208570135653e-05, "loss": 0.1591, "step": 5226 }, { "epoch": 141.27027027027026, "grad_norm": 0.56640625, "learning_rate": 1.0898981840174091e-05, "loss": 0.0737, "step": 5227 }, { "epoch": 141.2972972972973, "grad_norm": 0.76953125, "learning_rate": 1.088975792625732e-05, "loss": 0.0884, "step": 5228 }, { "epoch": 141.32432432432432, "grad_norm": 0.8046875, "learning_rate": 1.0880536830227418e-05, "loss": 0.0795, "step": 5229 }, { "epoch": 141.35135135135135, "grad_norm": 1.609375, "learning_rate": 1.0871318553925886e-05, "loss": 0.1758, "step": 5230 }, { "epoch": 141.3783783783784, "grad_norm": 1.2265625, "learning_rate": 1.0862103099193647e-05, "loss": 0.1233, "step": 5231 }, { "epoch": 141.40540540540542, "grad_norm": 1.09375, "learning_rate": 1.085289046787108e-05, "loss": 0.1328, "step": 5232 }, { "epoch": 141.43243243243242, "grad_norm": 0.55078125, "learning_rate": 1.0843680661798008e-05, "loss": 0.0712, "step": 5233 }, { "epoch": 141.45945945945945, "grad_norm": 1.234375, "learning_rate": 1.0834473682813654e-05, "loss": 0.1149, "step": 5234 }, { "epoch": 141.48648648648648, "grad_norm": 0.5078125, "learning_rate": 1.0825269532756707e-05, "loss": 0.0703, "step": 5235 }, { "epoch": 141.51351351351352, "grad_norm": 1.21875, "learning_rate": 1.0816068213465293e-05, "loss": 0.1356, "step": 5236 }, { "epoch": 141.54054054054055, "grad_norm": 0.9921875, "learning_rate": 1.0806869726776947e-05, "loss": 0.1269, "step": 5237 }, { "epoch": 141.56756756756758, "grad_norm": 0.58203125, "learning_rate": 1.0797674074528654e-05, "loss": 0.0732, "step": 5238 }, { "epoch": 141.59459459459458, "grad_norm": 0.984375, "learning_rate": 1.0788481258556851e-05, "loss": 0.1013, "step": 5239 }, { "epoch": 141.6216216216216, "grad_norm": 0.5703125, "learning_rate": 1.0779291280697368e-05, "loss": 0.0706, "step": 5240 }, { "epoch": 141.64864864864865, "grad_norm": 0.85546875, "learning_rate": 1.0770104142785497e-05, "loss": 0.0975, "step": 5241 }, { "epoch": 141.67567567567568, "grad_norm": 0.76171875, "learning_rate": 1.0760919846655968e-05, "loss": 0.0949, "step": 5242 }, { "epoch": 141.7027027027027, "grad_norm": 0.6640625, "learning_rate": 1.075173839414291e-05, "loss": 0.0743, "step": 5243 }, { "epoch": 141.72972972972974, "grad_norm": 1.0703125, "learning_rate": 1.0742559787079929e-05, "loss": 0.1025, "step": 5244 }, { "epoch": 141.75675675675674, "grad_norm": 1.03125, "learning_rate": 1.0733384027300013e-05, "loss": 0.1181, "step": 5245 }, { "epoch": 141.78378378378378, "grad_norm": 0.7109375, "learning_rate": 1.0724211116635627e-05, "loss": 0.0857, "step": 5246 }, { "epoch": 141.8108108108108, "grad_norm": 0.765625, "learning_rate": 1.0715041056918651e-05, "loss": 0.0798, "step": 5247 }, { "epoch": 141.83783783783784, "grad_norm": 0.87109375, "learning_rate": 1.0705873849980374e-05, "loss": 0.0942, "step": 5248 }, { "epoch": 141.86486486486487, "grad_norm": 1.0234375, "learning_rate": 1.0696709497651545e-05, "loss": 0.1238, "step": 5249 }, { "epoch": 141.8918918918919, "grad_norm": 1.0546875, "learning_rate": 1.0687548001762337e-05, "loss": 0.1064, "step": 5250 }, { "epoch": 141.9189189189189, "grad_norm": 0.66796875, "learning_rate": 1.0678389364142335e-05, "loss": 0.0655, "step": 5251 }, { "epoch": 141.94594594594594, "grad_norm": 1.2109375, "learning_rate": 1.066923358662057e-05, "loss": 0.1615, "step": 5252 }, { "epoch": 141.97297297297297, "grad_norm": 0.82421875, "learning_rate": 1.0660080671025505e-05, "loss": 0.1217, "step": 5253 }, { "epoch": 142.0, "grad_norm": 1.1875, "learning_rate": 1.065093061918501e-05, "loss": 0.127, "step": 5254 }, { "epoch": 142.02702702702703, "grad_norm": 1.34375, "learning_rate": 1.064178343292641e-05, "loss": 0.1908, "step": 5255 }, { "epoch": 142.05405405405406, "grad_norm": 0.703125, "learning_rate": 1.0632639114076431e-05, "loss": 0.073, "step": 5256 }, { "epoch": 142.0810810810811, "grad_norm": 1.015625, "learning_rate": 1.0623497664461258e-05, "loss": 0.115, "step": 5257 }, { "epoch": 142.1081081081081, "grad_norm": 0.6796875, "learning_rate": 1.0614359085906462e-05, "loss": 0.0767, "step": 5258 }, { "epoch": 142.13513513513513, "grad_norm": 0.453125, "learning_rate": 1.0605223380237084e-05, "loss": 0.0631, "step": 5259 }, { "epoch": 142.16216216216216, "grad_norm": 0.75, "learning_rate": 1.0596090549277554e-05, "loss": 0.0846, "step": 5260 }, { "epoch": 142.1891891891892, "grad_norm": 1.140625, "learning_rate": 1.0586960594851762e-05, "loss": 0.1508, "step": 5261 }, { "epoch": 142.21621621621622, "grad_norm": 0.91015625, "learning_rate": 1.0577833518782983e-05, "loss": 0.1469, "step": 5262 }, { "epoch": 142.24324324324326, "grad_norm": 0.984375, "learning_rate": 1.0568709322893961e-05, "loss": 0.1176, "step": 5263 }, { "epoch": 142.27027027027026, "grad_norm": 0.80078125, "learning_rate": 1.055958800900684e-05, "loss": 0.0844, "step": 5264 }, { "epoch": 142.2972972972973, "grad_norm": 0.578125, "learning_rate": 1.0550469578943184e-05, "loss": 0.0772, "step": 5265 }, { "epoch": 142.32432432432432, "grad_norm": 0.8515625, "learning_rate": 1.0541354034523995e-05, "loss": 0.0823, "step": 5266 }, { "epoch": 142.35135135135135, "grad_norm": 1.1875, "learning_rate": 1.0532241377569704e-05, "loss": 0.1526, "step": 5267 }, { "epoch": 142.3783783783784, "grad_norm": 0.62890625, "learning_rate": 1.0523131609900137e-05, "loss": 0.0697, "step": 5268 }, { "epoch": 142.40540540540542, "grad_norm": 0.859375, "learning_rate": 1.0514024733334563e-05, "loss": 0.1058, "step": 5269 }, { "epoch": 142.43243243243242, "grad_norm": 0.9296875, "learning_rate": 1.050492074969169e-05, "loss": 0.0982, "step": 5270 }, { "epoch": 142.45945945945945, "grad_norm": 0.60546875, "learning_rate": 1.0495819660789607e-05, "loss": 0.072, "step": 5271 }, { "epoch": 142.48648648648648, "grad_norm": 0.66796875, "learning_rate": 1.0486721468445857e-05, "loss": 0.0803, "step": 5272 }, { "epoch": 142.51351351351352, "grad_norm": 0.9296875, "learning_rate": 1.0477626174477404e-05, "loss": 0.1014, "step": 5273 }, { "epoch": 142.54054054054055, "grad_norm": 0.6328125, "learning_rate": 1.0468533780700606e-05, "loss": 0.074, "step": 5274 }, { "epoch": 142.56756756756758, "grad_norm": 0.66796875, "learning_rate": 1.045944428893128e-05, "loss": 0.0831, "step": 5275 }, { "epoch": 142.59459459459458, "grad_norm": 0.875, "learning_rate": 1.0450357700984623e-05, "loss": 0.1014, "step": 5276 }, { "epoch": 142.6216216216216, "grad_norm": 0.59765625, "learning_rate": 1.0441274018675284e-05, "loss": 0.0702, "step": 5277 }, { "epoch": 142.64864864864865, "grad_norm": 0.62890625, "learning_rate": 1.0432193243817328e-05, "loss": 0.0741, "step": 5278 }, { "epoch": 142.67567567567568, "grad_norm": 0.890625, "learning_rate": 1.0423115378224214e-05, "loss": 0.0933, "step": 5279 }, { "epoch": 142.7027027027027, "grad_norm": 0.890625, "learning_rate": 1.0414040423708848e-05, "loss": 0.0981, "step": 5280 }, { "epoch": 142.72972972972974, "grad_norm": 0.9609375, "learning_rate": 1.0404968382083552e-05, "loss": 0.1081, "step": 5281 }, { "epoch": 142.75675675675674, "grad_norm": 1.03125, "learning_rate": 1.0395899255160041e-05, "loss": 0.1233, "step": 5282 }, { "epoch": 142.78378378378378, "grad_norm": 0.6875, "learning_rate": 1.0386833044749473e-05, "loss": 0.077, "step": 5283 }, { "epoch": 142.8108108108108, "grad_norm": 0.55859375, "learning_rate": 1.0377769752662428e-05, "loss": 0.068, "step": 5284 }, { "epoch": 142.83783783783784, "grad_norm": 0.9609375, "learning_rate": 1.0368709380708871e-05, "loss": 0.0927, "step": 5285 }, { "epoch": 142.86486486486487, "grad_norm": 1.015625, "learning_rate": 1.0359651930698216e-05, "loss": 0.1346, "step": 5286 }, { "epoch": 142.8918918918919, "grad_norm": 1.1953125, "learning_rate": 1.0350597404439288e-05, "loss": 0.1257, "step": 5287 }, { "epoch": 142.9189189189189, "grad_norm": 1.171875, "learning_rate": 1.0341545803740308e-05, "loss": 0.1107, "step": 5288 }, { "epoch": 142.94594594594594, "grad_norm": 1.4375, "learning_rate": 1.0332497130408938e-05, "loss": 0.1636, "step": 5289 }, { "epoch": 142.97297297297297, "grad_norm": 0.98046875, "learning_rate": 1.0323451386252231e-05, "loss": 0.1201, "step": 5290 }, { "epoch": 143.0, "grad_norm": 1.359375, "learning_rate": 1.0314408573076675e-05, "loss": 0.1995, "step": 5291 }, { "epoch": 143.02702702702703, "grad_norm": 0.94140625, "learning_rate": 1.0305368692688174e-05, "loss": 0.114, "step": 5292 }, { "epoch": 143.05405405405406, "grad_norm": 1.1328125, "learning_rate": 1.0296331746892021e-05, "loss": 0.1013, "step": 5293 }, { "epoch": 143.0810810810811, "grad_norm": 0.71875, "learning_rate": 1.028729773749296e-05, "loss": 0.0915, "step": 5294 }, { "epoch": 143.1081081081081, "grad_norm": 1.0703125, "learning_rate": 1.0278266666295105e-05, "loss": 0.1286, "step": 5295 }, { "epoch": 143.13513513513513, "grad_norm": 1.1328125, "learning_rate": 1.026923853510203e-05, "loss": 0.0834, "step": 5296 }, { "epoch": 143.16216216216216, "grad_norm": 1.140625, "learning_rate": 1.0260213345716676e-05, "loss": 0.1044, "step": 5297 }, { "epoch": 143.1891891891892, "grad_norm": 1.484375, "learning_rate": 1.0251191099941434e-05, "loss": 0.1934, "step": 5298 }, { "epoch": 143.21621621621622, "grad_norm": 1.1015625, "learning_rate": 1.0242171799578096e-05, "loss": 0.1629, "step": 5299 }, { "epoch": 143.24324324324326, "grad_norm": 0.546875, "learning_rate": 1.0233155446427845e-05, "loss": 0.0622, "step": 5300 }, { "epoch": 143.27027027027026, "grad_norm": 1.3125, "learning_rate": 1.0224142042291312e-05, "loss": 0.232, "step": 5301 }, { "epoch": 143.2972972972973, "grad_norm": 1.2421875, "learning_rate": 1.0215131588968501e-05, "loss": 0.1654, "step": 5302 }, { "epoch": 143.32432432432432, "grad_norm": 0.890625, "learning_rate": 1.0206124088258864e-05, "loss": 0.1096, "step": 5303 }, { "epoch": 143.35135135135135, "grad_norm": 1.3046875, "learning_rate": 1.0197119541961222e-05, "loss": 0.1704, "step": 5304 }, { "epoch": 143.3783783783784, "grad_norm": 0.4765625, "learning_rate": 1.0188117951873844e-05, "loss": 0.0625, "step": 5305 }, { "epoch": 143.40540540540542, "grad_norm": 1.328125, "learning_rate": 1.0179119319794397e-05, "loss": 0.199, "step": 5306 }, { "epoch": 143.43243243243242, "grad_norm": 1.140625, "learning_rate": 1.0170123647519941e-05, "loss": 0.1817, "step": 5307 }, { "epoch": 143.45945945945945, "grad_norm": 0.58203125, "learning_rate": 1.016113093684696e-05, "loss": 0.0718, "step": 5308 }, { "epoch": 143.48648648648648, "grad_norm": 0.77734375, "learning_rate": 1.0152141189571357e-05, "loss": 0.0798, "step": 5309 }, { "epoch": 143.51351351351352, "grad_norm": 1.1484375, "learning_rate": 1.0143154407488409e-05, "loss": 0.16, "step": 5310 }, { "epoch": 143.54054054054055, "grad_norm": 0.85546875, "learning_rate": 1.0134170592392836e-05, "loss": 0.0668, "step": 5311 }, { "epoch": 143.56756756756758, "grad_norm": 1.0546875, "learning_rate": 1.0125189746078754e-05, "loss": 0.1353, "step": 5312 }, { "epoch": 143.59459459459458, "grad_norm": 0.86328125, "learning_rate": 1.011621187033967e-05, "loss": 0.1071, "step": 5313 }, { "epoch": 143.6216216216216, "grad_norm": 0.95703125, "learning_rate": 1.010723696696852e-05, "loss": 0.1084, "step": 5314 }, { "epoch": 143.64864864864865, "grad_norm": 0.69140625, "learning_rate": 1.0098265037757646e-05, "loss": 0.0834, "step": 5315 }, { "epoch": 143.67567567567568, "grad_norm": 1.21875, "learning_rate": 1.0089296084498768e-05, "loss": 0.1757, "step": 5316 }, { "epoch": 143.7027027027027, "grad_norm": 0.859375, "learning_rate": 1.0080330108983043e-05, "loss": 0.0979, "step": 5317 }, { "epoch": 143.72972972972974, "grad_norm": 1.1953125, "learning_rate": 1.0071367113001029e-05, "loss": 0.1842, "step": 5318 }, { "epoch": 143.75675675675674, "grad_norm": 0.8984375, "learning_rate": 1.0062407098342666e-05, "loss": 0.1212, "step": 5319 }, { "epoch": 143.78378378378378, "grad_norm": 1.1953125, "learning_rate": 1.005345006679733e-05, "loss": 0.1107, "step": 5320 }, { "epoch": 143.8108108108108, "grad_norm": 1.40625, "learning_rate": 1.0044496020153766e-05, "loss": 0.1514, "step": 5321 }, { "epoch": 143.83783783783784, "grad_norm": 1.2734375, "learning_rate": 1.0035544960200155e-05, "loss": 0.1811, "step": 5322 }, { "epoch": 143.86486486486487, "grad_norm": 1.0703125, "learning_rate": 1.0026596888724079e-05, "loss": 0.128, "step": 5323 }, { "epoch": 143.8918918918919, "grad_norm": 0.5390625, "learning_rate": 1.0017651807512494e-05, "loss": 0.0603, "step": 5324 }, { "epoch": 143.9189189189189, "grad_norm": 0.81640625, "learning_rate": 1.0008709718351782e-05, "loss": 0.1092, "step": 5325 }, { "epoch": 143.94594594594594, "grad_norm": 1.3515625, "learning_rate": 9.99977062302774e-06, "loss": 0.1913, "step": 5326 }, { "epoch": 143.97297297297297, "grad_norm": 0.9140625, "learning_rate": 9.99083452332553e-06, "loss": 0.1033, "step": 5327 }, { "epoch": 144.0, "grad_norm": 1.1796875, "learning_rate": 9.98190142102974e-06, "loss": 0.153, "step": 5328 }, { "epoch": 144.02702702702703, "grad_norm": 0.64453125, "learning_rate": 9.972971317924374e-06, "loss": 0.0717, "step": 5329 }, { "epoch": 144.05405405405406, "grad_norm": 0.93359375, "learning_rate": 9.964044215792796e-06, "loss": 0.1074, "step": 5330 }, { "epoch": 144.0810810810811, "grad_norm": 1.0234375, "learning_rate": 9.95512011641781e-06, "loss": 0.12, "step": 5331 }, { "epoch": 144.1081081081081, "grad_norm": 1.015625, "learning_rate": 9.94619902158159e-06, "loss": 0.0888, "step": 5332 }, { "epoch": 144.13513513513513, "grad_norm": 1.25, "learning_rate": 9.937280933065729e-06, "loss": 0.143, "step": 5333 }, { "epoch": 144.16216216216216, "grad_norm": 1.1015625, "learning_rate": 9.928365852651228e-06, "loss": 0.128, "step": 5334 }, { "epoch": 144.1891891891892, "grad_norm": 0.98046875, "learning_rate": 9.919453782118452e-06, "loss": 0.136, "step": 5335 }, { "epoch": 144.21621621621622, "grad_norm": 1.1328125, "learning_rate": 9.910544723247203e-06, "loss": 0.1136, "step": 5336 }, { "epoch": 144.24324324324326, "grad_norm": 0.58984375, "learning_rate": 9.901638677816651e-06, "loss": 0.0744, "step": 5337 }, { "epoch": 144.27027027027026, "grad_norm": 1.0546875, "learning_rate": 9.892735647605398e-06, "loss": 0.1105, "step": 5338 }, { "epoch": 144.2972972972973, "grad_norm": 0.67578125, "learning_rate": 9.8838356343914e-06, "loss": 0.0712, "step": 5339 }, { "epoch": 144.32432432432432, "grad_norm": 0.84765625, "learning_rate": 9.874938639952058e-06, "loss": 0.0883, "step": 5340 }, { "epoch": 144.35135135135135, "grad_norm": 0.75, "learning_rate": 9.866044666064128e-06, "loss": 0.0976, "step": 5341 }, { "epoch": 144.3783783783784, "grad_norm": 1.28125, "learning_rate": 9.85715371450379e-06, "loss": 0.1414, "step": 5342 }, { "epoch": 144.40540540540542, "grad_norm": 0.95703125, "learning_rate": 9.848265787046617e-06, "loss": 0.1074, "step": 5343 }, { "epoch": 144.43243243243242, "grad_norm": 0.57421875, "learning_rate": 9.839380885467563e-06, "loss": 0.0739, "step": 5344 }, { "epoch": 144.45945945945945, "grad_norm": 0.6875, "learning_rate": 9.830499011540992e-06, "loss": 0.0757, "step": 5345 }, { "epoch": 144.48648648648648, "grad_norm": 0.69921875, "learning_rate": 9.821620167040668e-06, "loss": 0.0827, "step": 5346 }, { "epoch": 144.51351351351352, "grad_norm": 0.90625, "learning_rate": 9.812744353739728e-06, "loss": 0.1047, "step": 5347 }, { "epoch": 144.54054054054055, "grad_norm": 0.4765625, "learning_rate": 9.803871573410719e-06, "loss": 0.0582, "step": 5348 }, { "epoch": 144.56756756756758, "grad_norm": 1.6796875, "learning_rate": 9.795001827825596e-06, "loss": 0.3118, "step": 5349 }, { "epoch": 144.59459459459458, "grad_norm": 0.83984375, "learning_rate": 9.786135118755666e-06, "loss": 0.0897, "step": 5350 }, { "epoch": 144.6216216216216, "grad_norm": 0.91796875, "learning_rate": 9.777271447971681e-06, "loss": 0.1351, "step": 5351 }, { "epoch": 144.64864864864865, "grad_norm": 1.015625, "learning_rate": 9.768410817243739e-06, "loss": 0.1299, "step": 5352 }, { "epoch": 144.67567567567568, "grad_norm": 0.71875, "learning_rate": 9.759553228341367e-06, "loss": 0.079, "step": 5353 }, { "epoch": 144.7027027027027, "grad_norm": 0.6171875, "learning_rate": 9.750698683033471e-06, "loss": 0.0746, "step": 5354 }, { "epoch": 144.72972972972974, "grad_norm": 1.0703125, "learning_rate": 9.74184718308834e-06, "loss": 0.1387, "step": 5355 }, { "epoch": 144.75675675675674, "grad_norm": 0.953125, "learning_rate": 9.732998730273665e-06, "loss": 0.1002, "step": 5356 }, { "epoch": 144.78378378378378, "grad_norm": 0.828125, "learning_rate": 9.724153326356539e-06, "loss": 0.1005, "step": 5357 }, { "epoch": 144.8108108108108, "grad_norm": 0.94921875, "learning_rate": 9.715310973103417e-06, "loss": 0.0949, "step": 5358 }, { "epoch": 144.83783783783784, "grad_norm": 1.4140625, "learning_rate": 9.706471672280169e-06, "loss": 0.1868, "step": 5359 }, { "epoch": 144.86486486486487, "grad_norm": 0.8984375, "learning_rate": 9.697635425652055e-06, "loss": 0.0942, "step": 5360 }, { "epoch": 144.8918918918919, "grad_norm": 1.1484375, "learning_rate": 9.688802234983704e-06, "loss": 0.1494, "step": 5361 }, { "epoch": 144.9189189189189, "grad_norm": 0.97265625, "learning_rate": 9.67997210203916e-06, "loss": 0.1054, "step": 5362 }, { "epoch": 144.94594594594594, "grad_norm": 0.494140625, "learning_rate": 9.671145028581845e-06, "loss": 0.0643, "step": 5363 }, { "epoch": 144.97297297297297, "grad_norm": 0.7734375, "learning_rate": 9.662321016374561e-06, "loss": 0.0852, "step": 5364 }, { "epoch": 145.0, "grad_norm": 0.72265625, "learning_rate": 9.653500067179525e-06, "loss": 0.0779, "step": 5365 }, { "epoch": 145.02702702702703, "grad_norm": 0.56640625, "learning_rate": 9.644682182758306e-06, "loss": 0.0713, "step": 5366 }, { "epoch": 145.05405405405406, "grad_norm": 0.6796875, "learning_rate": 9.635867364871887e-06, "loss": 0.083, "step": 5367 }, { "epoch": 145.0810810810811, "grad_norm": 1.2734375, "learning_rate": 9.627055615280642e-06, "loss": 0.1391, "step": 5368 }, { "epoch": 145.1081081081081, "grad_norm": 0.87890625, "learning_rate": 9.618246935744307e-06, "loss": 0.0863, "step": 5369 }, { "epoch": 145.13513513513513, "grad_norm": 0.83203125, "learning_rate": 9.609441328022025e-06, "loss": 0.0876, "step": 5370 }, { "epoch": 145.16216216216216, "grad_norm": 1.0234375, "learning_rate": 9.60063879387233e-06, "loss": 0.1205, "step": 5371 }, { "epoch": 145.1891891891892, "grad_norm": 1.1953125, "learning_rate": 9.591839335053119e-06, "loss": 0.1433, "step": 5372 }, { "epoch": 145.21621621621622, "grad_norm": 1.1328125, "learning_rate": 9.5830429533217e-06, "loss": 0.1586, "step": 5373 }, { "epoch": 145.24324324324326, "grad_norm": 1.4453125, "learning_rate": 9.574249650434745e-06, "loss": 0.136, "step": 5374 }, { "epoch": 145.27027027027026, "grad_norm": 0.890625, "learning_rate": 9.565459428148329e-06, "loss": 0.1014, "step": 5375 }, { "epoch": 145.2972972972973, "grad_norm": 1.1328125, "learning_rate": 9.556672288217893e-06, "loss": 0.1771, "step": 5376 }, { "epoch": 145.32432432432432, "grad_norm": 1.015625, "learning_rate": 9.547888232398291e-06, "loss": 0.1458, "step": 5377 }, { "epoch": 145.35135135135135, "grad_norm": 0.75, "learning_rate": 9.539107262443725e-06, "loss": 0.0951, "step": 5378 }, { "epoch": 145.3783783783784, "grad_norm": 1.0234375, "learning_rate": 9.530329380107816e-06, "loss": 0.1003, "step": 5379 }, { "epoch": 145.40540540540542, "grad_norm": 1.0390625, "learning_rate": 9.521554587143533e-06, "loss": 0.0868, "step": 5380 }, { "epoch": 145.43243243243242, "grad_norm": 0.92578125, "learning_rate": 9.512782885303258e-06, "loss": 0.1003, "step": 5381 }, { "epoch": 145.45945945945945, "grad_norm": 1.1953125, "learning_rate": 9.504014276338752e-06, "loss": 0.1478, "step": 5382 }, { "epoch": 145.48648648648648, "grad_norm": 1.0625, "learning_rate": 9.495248762001132e-06, "loss": 0.1033, "step": 5383 }, { "epoch": 145.51351351351352, "grad_norm": 1.1171875, "learning_rate": 9.486486344040927e-06, "loss": 0.1099, "step": 5384 }, { "epoch": 145.54054054054055, "grad_norm": 1.0703125, "learning_rate": 9.477727024208044e-06, "loss": 0.1484, "step": 5385 }, { "epoch": 145.56756756756758, "grad_norm": 1.125, "learning_rate": 9.468970804251742e-06, "loss": 0.1378, "step": 5386 }, { "epoch": 145.59459459459458, "grad_norm": 1.15625, "learning_rate": 9.460217685920697e-06, "loss": 0.1257, "step": 5387 }, { "epoch": 145.6216216216216, "grad_norm": 1.140625, "learning_rate": 9.451467670962957e-06, "loss": 0.1128, "step": 5388 }, { "epoch": 145.64864864864865, "grad_norm": 1.25, "learning_rate": 9.442720761125929e-06, "loss": 0.1695, "step": 5389 }, { "epoch": 145.67567567567568, "grad_norm": 1.4140625, "learning_rate": 9.43397695815642e-06, "loss": 0.1666, "step": 5390 }, { "epoch": 145.7027027027027, "grad_norm": 0.58984375, "learning_rate": 9.425236263800625e-06, "loss": 0.0724, "step": 5391 }, { "epoch": 145.72972972972974, "grad_norm": 0.80859375, "learning_rate": 9.416498679804085e-06, "loss": 0.0841, "step": 5392 }, { "epoch": 145.75675675675674, "grad_norm": 0.74609375, "learning_rate": 9.407764207911748e-06, "loss": 0.0907, "step": 5393 }, { "epoch": 145.78378378378378, "grad_norm": 1.3359375, "learning_rate": 9.399032849867941e-06, "loss": 0.1644, "step": 5394 }, { "epoch": 145.8108108108108, "grad_norm": 0.953125, "learning_rate": 9.390304607416347e-06, "loss": 0.1009, "step": 5395 }, { "epoch": 145.83783783783784, "grad_norm": 1.015625, "learning_rate": 9.381579482300055e-06, "loss": 0.1373, "step": 5396 }, { "epoch": 145.86486486486487, "grad_norm": 0.9609375, "learning_rate": 9.372857476261495e-06, "loss": 0.1291, "step": 5397 }, { "epoch": 145.8918918918919, "grad_norm": 1.1953125, "learning_rate": 9.364138591042512e-06, "loss": 0.1549, "step": 5398 }, { "epoch": 145.9189189189189, "grad_norm": 1.3671875, "learning_rate": 9.355422828384316e-06, "loss": 0.1453, "step": 5399 }, { "epoch": 145.94594594594594, "grad_norm": 0.71875, "learning_rate": 9.346710190027472e-06, "loss": 0.0845, "step": 5400 }, { "epoch": 145.97297297297297, "grad_norm": 0.6796875, "learning_rate": 9.338000677711946e-06, "loss": 0.0798, "step": 5401 }, { "epoch": 146.0, "grad_norm": 0.984375, "learning_rate": 9.329294293177084e-06, "loss": 0.1216, "step": 5402 }, { "epoch": 146.02702702702703, "grad_norm": 0.609375, "learning_rate": 9.320591038161574e-06, "loss": 0.0637, "step": 5403 }, { "epoch": 146.05405405405406, "grad_norm": 1.0234375, "learning_rate": 9.311890914403513e-06, "loss": 0.1121, "step": 5404 }, { "epoch": 146.0810810810811, "grad_norm": 1.1171875, "learning_rate": 9.30319392364036e-06, "loss": 0.1341, "step": 5405 }, { "epoch": 146.1081081081081, "grad_norm": 1.34375, "learning_rate": 9.29450006760894e-06, "loss": 0.1718, "step": 5406 }, { "epoch": 146.13513513513513, "grad_norm": 0.9140625, "learning_rate": 9.285809348045465e-06, "loss": 0.1191, "step": 5407 }, { "epoch": 146.16216216216216, "grad_norm": 1.1796875, "learning_rate": 9.277121766685523e-06, "loss": 0.1216, "step": 5408 }, { "epoch": 146.1891891891892, "grad_norm": 1.1328125, "learning_rate": 9.268437325264054e-06, "loss": 0.1133, "step": 5409 }, { "epoch": 146.21621621621622, "grad_norm": 0.66796875, "learning_rate": 9.2597560255154e-06, "loss": 0.077, "step": 5410 }, { "epoch": 146.24324324324326, "grad_norm": 0.8515625, "learning_rate": 9.251077869173244e-06, "loss": 0.1034, "step": 5411 }, { "epoch": 146.27027027027026, "grad_norm": 0.77734375, "learning_rate": 9.242402857970677e-06, "loss": 0.083, "step": 5412 }, { "epoch": 146.2972972972973, "grad_norm": 1.1015625, "learning_rate": 9.233730993640122e-06, "loss": 0.1386, "step": 5413 }, { "epoch": 146.32432432432432, "grad_norm": 0.91015625, "learning_rate": 9.225062277913413e-06, "loss": 0.0953, "step": 5414 }, { "epoch": 146.35135135135135, "grad_norm": 0.984375, "learning_rate": 9.21639671252172e-06, "loss": 0.1247, "step": 5415 }, { "epoch": 146.3783783783784, "grad_norm": 1.1484375, "learning_rate": 9.207734299195615e-06, "loss": 0.1603, "step": 5416 }, { "epoch": 146.40540540540542, "grad_norm": 1.0546875, "learning_rate": 9.19907503966501e-06, "loss": 0.1546, "step": 5417 }, { "epoch": 146.43243243243242, "grad_norm": 0.9296875, "learning_rate": 9.190418935659213e-06, "loss": 0.1409, "step": 5418 }, { "epoch": 146.45945945945945, "grad_norm": 0.48046875, "learning_rate": 9.181765988906898e-06, "loss": 0.0636, "step": 5419 }, { "epoch": 146.48648648648648, "grad_norm": 0.6015625, "learning_rate": 9.173116201136087e-06, "loss": 0.0757, "step": 5420 }, { "epoch": 146.51351351351352, "grad_norm": 0.98046875, "learning_rate": 9.164469574074197e-06, "loss": 0.124, "step": 5421 }, { "epoch": 146.54054054054055, "grad_norm": 1.046875, "learning_rate": 9.155826109448006e-06, "loss": 0.1282, "step": 5422 }, { "epoch": 146.56756756756758, "grad_norm": 0.76953125, "learning_rate": 9.147185808983646e-06, "loss": 0.088, "step": 5423 }, { "epoch": 146.59459459459458, "grad_norm": 0.94140625, "learning_rate": 9.138548674406647e-06, "loss": 0.1116, "step": 5424 }, { "epoch": 146.6216216216216, "grad_norm": 1.1015625, "learning_rate": 9.129914707441864e-06, "loss": 0.1499, "step": 5425 }, { "epoch": 146.64864864864865, "grad_norm": 0.7578125, "learning_rate": 9.121283909813558e-06, "loss": 0.0907, "step": 5426 }, { "epoch": 146.67567567567568, "grad_norm": 0.8046875, "learning_rate": 9.112656283245352e-06, "loss": 0.0813, "step": 5427 }, { "epoch": 146.7027027027027, "grad_norm": 0.7421875, "learning_rate": 9.104031829460205e-06, "loss": 0.084, "step": 5428 }, { "epoch": 146.72972972972974, "grad_norm": 1.0078125, "learning_rate": 9.095410550180478e-06, "loss": 0.1116, "step": 5429 }, { "epoch": 146.75675675675674, "grad_norm": 0.734375, "learning_rate": 9.08679244712789e-06, "loss": 0.0883, "step": 5430 }, { "epoch": 146.78378378378378, "grad_norm": 0.94921875, "learning_rate": 9.078177522023504e-06, "loss": 0.1117, "step": 5431 }, { "epoch": 146.8108108108108, "grad_norm": 0.9375, "learning_rate": 9.069565776587775e-06, "loss": 0.1211, "step": 5432 }, { "epoch": 146.83783783783784, "grad_norm": 0.99609375, "learning_rate": 9.060957212540513e-06, "loss": 0.1124, "step": 5433 }, { "epoch": 146.86486486486487, "grad_norm": 0.98046875, "learning_rate": 9.05235183160088e-06, "loss": 0.1469, "step": 5434 }, { "epoch": 146.8918918918919, "grad_norm": 1.0234375, "learning_rate": 9.043749635487424e-06, "loss": 0.1168, "step": 5435 }, { "epoch": 146.9189189189189, "grad_norm": 1.375, "learning_rate": 9.035150625918054e-06, "loss": 0.196, "step": 5436 }, { "epoch": 146.94594594594594, "grad_norm": 1.0859375, "learning_rate": 9.026554804610019e-06, "loss": 0.1366, "step": 5437 }, { "epoch": 146.97297297297297, "grad_norm": 0.63671875, "learning_rate": 9.017962173279956e-06, "loss": 0.0723, "step": 5438 }, { "epoch": 147.0, "grad_norm": 0.8515625, "learning_rate": 9.009372733643864e-06, "loss": 0.0984, "step": 5439 }, { "epoch": 147.02702702702703, "grad_norm": 1.0234375, "learning_rate": 9.000786487417085e-06, "loss": 0.1071, "step": 5440 }, { "epoch": 147.05405405405406, "grad_norm": 0.72265625, "learning_rate": 8.99220343631435e-06, "loss": 0.0848, "step": 5441 }, { "epoch": 147.0810810810811, "grad_norm": 0.9140625, "learning_rate": 8.983623582049721e-06, "loss": 0.1273, "step": 5442 }, { "epoch": 147.1081081081081, "grad_norm": 1.4375, "learning_rate": 8.975046926336648e-06, "loss": 0.1793, "step": 5443 }, { "epoch": 147.13513513513513, "grad_norm": 0.91015625, "learning_rate": 8.96647347088794e-06, "loss": 0.1117, "step": 5444 }, { "epoch": 147.16216216216216, "grad_norm": 0.68359375, "learning_rate": 8.957903217415742e-06, "loss": 0.0831, "step": 5445 }, { "epoch": 147.1891891891892, "grad_norm": 1.2109375, "learning_rate": 8.949336167631585e-06, "loss": 0.1833, "step": 5446 }, { "epoch": 147.21621621621622, "grad_norm": 1.3671875, "learning_rate": 8.940772323246366e-06, "loss": 0.1452, "step": 5447 }, { "epoch": 147.24324324324326, "grad_norm": 1.265625, "learning_rate": 8.932211685970307e-06, "loss": 0.1891, "step": 5448 }, { "epoch": 147.27027027027026, "grad_norm": 0.75390625, "learning_rate": 8.923654257513025e-06, "loss": 0.074, "step": 5449 }, { "epoch": 147.2972972972973, "grad_norm": 0.75390625, "learning_rate": 8.915100039583468e-06, "loss": 0.0901, "step": 5450 }, { "epoch": 147.32432432432432, "grad_norm": 0.828125, "learning_rate": 8.906549033889974e-06, "loss": 0.0811, "step": 5451 }, { "epoch": 147.35135135135135, "grad_norm": 0.875, "learning_rate": 8.898001242140205e-06, "loss": 0.1261, "step": 5452 }, { "epoch": 147.3783783783784, "grad_norm": 1.03125, "learning_rate": 8.889456666041219e-06, "loss": 0.1205, "step": 5453 }, { "epoch": 147.40540540540542, "grad_norm": 0.7890625, "learning_rate": 8.880915307299383e-06, "loss": 0.095, "step": 5454 }, { "epoch": 147.43243243243242, "grad_norm": 1.296875, "learning_rate": 8.872377167620477e-06, "loss": 0.1094, "step": 5455 }, { "epoch": 147.45945945945945, "grad_norm": 0.97265625, "learning_rate": 8.863842248709591e-06, "loss": 0.0802, "step": 5456 }, { "epoch": 147.48648648648648, "grad_norm": 0.78125, "learning_rate": 8.855310552271198e-06, "loss": 0.0915, "step": 5457 }, { "epoch": 147.51351351351352, "grad_norm": 0.97265625, "learning_rate": 8.846782080009134e-06, "loss": 0.0998, "step": 5458 }, { "epoch": 147.54054054054055, "grad_norm": 1.1796875, "learning_rate": 8.838256833626556e-06, "loss": 0.1408, "step": 5459 }, { "epoch": 147.56756756756758, "grad_norm": 1.171875, "learning_rate": 8.82973481482601e-06, "loss": 0.1477, "step": 5460 }, { "epoch": 147.59459459459458, "grad_norm": 0.71484375, "learning_rate": 8.821216025309395e-06, "loss": 0.0785, "step": 5461 }, { "epoch": 147.6216216216216, "grad_norm": 1.15625, "learning_rate": 8.812700466777937e-06, "loss": 0.1957, "step": 5462 }, { "epoch": 147.64864864864865, "grad_norm": 1.3125, "learning_rate": 8.80418814093225e-06, "loss": 0.1517, "step": 5463 }, { "epoch": 147.67567567567568, "grad_norm": 1.2421875, "learning_rate": 8.795679049472291e-06, "loss": 0.1362, "step": 5464 }, { "epoch": 147.7027027027027, "grad_norm": 0.91796875, "learning_rate": 8.787173194097357e-06, "loss": 0.0814, "step": 5465 }, { "epoch": 147.72972972972974, "grad_norm": 0.85546875, "learning_rate": 8.778670576506115e-06, "loss": 0.0843, "step": 5466 }, { "epoch": 147.75675675675674, "grad_norm": 1.0078125, "learning_rate": 8.77017119839659e-06, "loss": 0.1088, "step": 5467 }, { "epoch": 147.78378378378378, "grad_norm": 1.125, "learning_rate": 8.761675061466138e-06, "loss": 0.14, "step": 5468 }, { "epoch": 147.8108108108108, "grad_norm": 0.7734375, "learning_rate": 8.75318216741148e-06, "loss": 0.0809, "step": 5469 }, { "epoch": 147.83783783783784, "grad_norm": 1.5546875, "learning_rate": 8.74469251792871e-06, "loss": 0.2514, "step": 5470 }, { "epoch": 147.86486486486487, "grad_norm": 0.85546875, "learning_rate": 8.736206114713227e-06, "loss": 0.1083, "step": 5471 }, { "epoch": 147.8918918918919, "grad_norm": 1.015625, "learning_rate": 8.727722959459833e-06, "loss": 0.0918, "step": 5472 }, { "epoch": 147.9189189189189, "grad_norm": 0.6640625, "learning_rate": 8.719243053862636e-06, "loss": 0.0752, "step": 5473 }, { "epoch": 147.94594594594594, "grad_norm": 0.6484375, "learning_rate": 8.710766399615125e-06, "loss": 0.0768, "step": 5474 }, { "epoch": 147.97297297297297, "grad_norm": 1.5390625, "learning_rate": 8.702292998410142e-06, "loss": 0.1765, "step": 5475 }, { "epoch": 148.0, "grad_norm": 0.50390625, "learning_rate": 8.693822851939848e-06, "loss": 0.0614, "step": 5476 }, { "epoch": 148.02702702702703, "grad_norm": 0.7890625, "learning_rate": 8.685355961895784e-06, "loss": 0.0782, "step": 5477 }, { "epoch": 148.05405405405406, "grad_norm": 1.1484375, "learning_rate": 8.676892329968838e-06, "loss": 0.1137, "step": 5478 }, { "epoch": 148.0810810810811, "grad_norm": 0.5390625, "learning_rate": 8.668431957849227e-06, "loss": 0.0716, "step": 5479 }, { "epoch": 148.1081081081081, "grad_norm": 1.0234375, "learning_rate": 8.659974847226534e-06, "loss": 0.1118, "step": 5480 }, { "epoch": 148.13513513513513, "grad_norm": 0.8984375, "learning_rate": 8.651520999789698e-06, "loss": 0.0942, "step": 5481 }, { "epoch": 148.16216216216216, "grad_norm": 0.79296875, "learning_rate": 8.643070417226978e-06, "loss": 0.0917, "step": 5482 }, { "epoch": 148.1891891891892, "grad_norm": 0.486328125, "learning_rate": 8.634623101226009e-06, "loss": 0.067, "step": 5483 }, { "epoch": 148.21621621621622, "grad_norm": 0.796875, "learning_rate": 8.626179053473766e-06, "loss": 0.1176, "step": 5484 }, { "epoch": 148.24324324324326, "grad_norm": 0.953125, "learning_rate": 8.617738275656554e-06, "loss": 0.0909, "step": 5485 }, { "epoch": 148.27027027027026, "grad_norm": 1.2421875, "learning_rate": 8.609300769460055e-06, "loss": 0.1375, "step": 5486 }, { "epoch": 148.2972972972973, "grad_norm": 0.75390625, "learning_rate": 8.60086653656927e-06, "loss": 0.0894, "step": 5487 }, { "epoch": 148.32432432432432, "grad_norm": 1.0234375, "learning_rate": 8.592435578668564e-06, "loss": 0.1222, "step": 5488 }, { "epoch": 148.35135135135135, "grad_norm": 0.99609375, "learning_rate": 8.584007897441637e-06, "loss": 0.104, "step": 5489 }, { "epoch": 148.3783783783784, "grad_norm": 0.6953125, "learning_rate": 8.575583494571548e-06, "loss": 0.0785, "step": 5490 }, { "epoch": 148.40540540540542, "grad_norm": 0.66796875, "learning_rate": 8.567162371740683e-06, "loss": 0.0805, "step": 5491 }, { "epoch": 148.43243243243242, "grad_norm": 0.70703125, "learning_rate": 8.558744530630793e-06, "loss": 0.0825, "step": 5492 }, { "epoch": 148.45945945945945, "grad_norm": 0.8828125, "learning_rate": 8.550329972922954e-06, "loss": 0.1193, "step": 5493 }, { "epoch": 148.48648648648648, "grad_norm": 0.458984375, "learning_rate": 8.541918700297599e-06, "loss": 0.057, "step": 5494 }, { "epoch": 148.51351351351352, "grad_norm": 0.94140625, "learning_rate": 8.53351071443451e-06, "loss": 0.1296, "step": 5495 }, { "epoch": 148.54054054054055, "grad_norm": 0.83203125, "learning_rate": 8.525106017012788e-06, "loss": 0.0914, "step": 5496 }, { "epoch": 148.56756756756758, "grad_norm": 1.1640625, "learning_rate": 8.516704609710904e-06, "loss": 0.1375, "step": 5497 }, { "epoch": 148.59459459459458, "grad_norm": 1.0078125, "learning_rate": 8.508306494206671e-06, "loss": 0.1126, "step": 5498 }, { "epoch": 148.6216216216216, "grad_norm": 0.7734375, "learning_rate": 8.499911672177217e-06, "loss": 0.0736, "step": 5499 }, { "epoch": 148.64864864864865, "grad_norm": 1.1875, "learning_rate": 8.491520145299042e-06, "loss": 0.1092, "step": 5500 }, { "epoch": 148.67567567567568, "grad_norm": 1.0625, "learning_rate": 8.483131915247968e-06, "loss": 0.1284, "step": 5501 }, { "epoch": 148.7027027027027, "grad_norm": 1.046875, "learning_rate": 8.474746983699172e-06, "loss": 0.1361, "step": 5502 }, { "epoch": 148.72972972972974, "grad_norm": 0.92578125, "learning_rate": 8.466365352327175e-06, "loss": 0.1132, "step": 5503 }, { "epoch": 148.75675675675674, "grad_norm": 0.640625, "learning_rate": 8.457987022805815e-06, "loss": 0.0762, "step": 5504 }, { "epoch": 148.78378378378378, "grad_norm": 0.7734375, "learning_rate": 8.449611996808296e-06, "loss": 0.0914, "step": 5505 }, { "epoch": 148.8108108108108, "grad_norm": 1.1015625, "learning_rate": 8.441240276007162e-06, "loss": 0.141, "step": 5506 }, { "epoch": 148.83783783783784, "grad_norm": 0.65234375, "learning_rate": 8.432871862074268e-06, "loss": 0.08, "step": 5507 }, { "epoch": 148.86486486486487, "grad_norm": 0.8125, "learning_rate": 8.424506756680839e-06, "loss": 0.1011, "step": 5508 }, { "epoch": 148.8918918918919, "grad_norm": 0.578125, "learning_rate": 8.41614496149744e-06, "loss": 0.0554, "step": 5509 }, { "epoch": 148.9189189189189, "grad_norm": 1.28125, "learning_rate": 8.407786478193944e-06, "loss": 0.1666, "step": 5510 }, { "epoch": 148.94594594594594, "grad_norm": 0.859375, "learning_rate": 8.399431308439593e-06, "loss": 0.0983, "step": 5511 }, { "epoch": 148.97297297297297, "grad_norm": 0.91015625, "learning_rate": 8.391079453902962e-06, "loss": 0.1138, "step": 5512 }, { "epoch": 149.0, "grad_norm": 1.2890625, "learning_rate": 8.382730916251943e-06, "loss": 0.1574, "step": 5513 }, { "epoch": 149.02702702702703, "grad_norm": 0.69921875, "learning_rate": 8.374385697153792e-06, "loss": 0.0879, "step": 5514 }, { "epoch": 149.05405405405406, "grad_norm": 1.296875, "learning_rate": 8.366043798275097e-06, "loss": 0.1617, "step": 5515 }, { "epoch": 149.0810810810811, "grad_norm": 0.8515625, "learning_rate": 8.357705221281762e-06, "loss": 0.1046, "step": 5516 }, { "epoch": 149.1081081081081, "grad_norm": 0.478515625, "learning_rate": 8.34936996783906e-06, "loss": 0.0583, "step": 5517 }, { "epoch": 149.13513513513513, "grad_norm": 0.921875, "learning_rate": 8.341038039611568e-06, "loss": 0.1059, "step": 5518 }, { "epoch": 149.16216216216216, "grad_norm": 1.0078125, "learning_rate": 8.33270943826322e-06, "loss": 0.1276, "step": 5519 }, { "epoch": 149.1891891891892, "grad_norm": 1.2109375, "learning_rate": 8.324384165457289e-06, "loss": 0.1285, "step": 5520 }, { "epoch": 149.21621621621622, "grad_norm": 1.0703125, "learning_rate": 8.316062222856358e-06, "loss": 0.1013, "step": 5521 }, { "epoch": 149.24324324324326, "grad_norm": 0.87890625, "learning_rate": 8.30774361212237e-06, "loss": 0.1025, "step": 5522 }, { "epoch": 149.27027027027026, "grad_norm": 0.73046875, "learning_rate": 8.2994283349166e-06, "loss": 0.0865, "step": 5523 }, { "epoch": 149.2972972972973, "grad_norm": 0.8046875, "learning_rate": 8.291116392899639e-06, "loss": 0.1, "step": 5524 }, { "epoch": 149.32432432432432, "grad_norm": 0.7578125, "learning_rate": 8.282807787731434e-06, "loss": 0.0866, "step": 5525 }, { "epoch": 149.35135135135135, "grad_norm": 0.55859375, "learning_rate": 8.274502521071246e-06, "loss": 0.0692, "step": 5526 }, { "epoch": 149.3783783783784, "grad_norm": 1.109375, "learning_rate": 8.266200594577688e-06, "loss": 0.1327, "step": 5527 }, { "epoch": 149.40540540540542, "grad_norm": 1.1640625, "learning_rate": 8.257902009908686e-06, "loss": 0.172, "step": 5528 }, { "epoch": 149.43243243243242, "grad_norm": 1.140625, "learning_rate": 8.249606768721515e-06, "loss": 0.1616, "step": 5529 }, { "epoch": 149.45945945945945, "grad_norm": 0.953125, "learning_rate": 8.241314872672787e-06, "loss": 0.1183, "step": 5530 }, { "epoch": 149.48648648648648, "grad_norm": 0.63671875, "learning_rate": 8.233026323418416e-06, "loss": 0.0715, "step": 5531 }, { "epoch": 149.51351351351352, "grad_norm": 0.9765625, "learning_rate": 8.224741122613686e-06, "loss": 0.1051, "step": 5532 }, { "epoch": 149.54054054054055, "grad_norm": 1.09375, "learning_rate": 8.216459271913176e-06, "loss": 0.1268, "step": 5533 }, { "epoch": 149.56756756756758, "grad_norm": 0.5625, "learning_rate": 8.208180772970833e-06, "loss": 0.0662, "step": 5534 }, { "epoch": 149.59459459459458, "grad_norm": 0.8046875, "learning_rate": 8.199905627439897e-06, "loss": 0.1044, "step": 5535 }, { "epoch": 149.6216216216216, "grad_norm": 0.70703125, "learning_rate": 8.191633836972962e-06, "loss": 0.073, "step": 5536 }, { "epoch": 149.64864864864865, "grad_norm": 0.97265625, "learning_rate": 8.183365403221957e-06, "loss": 0.1161, "step": 5537 }, { "epoch": 149.67567567567568, "grad_norm": 0.57421875, "learning_rate": 8.175100327838114e-06, "loss": 0.0665, "step": 5538 }, { "epoch": 149.7027027027027, "grad_norm": 1.0625, "learning_rate": 8.16683861247202e-06, "loss": 0.1755, "step": 5539 }, { "epoch": 149.72972972972974, "grad_norm": 0.83203125, "learning_rate": 8.158580258773585e-06, "loss": 0.1066, "step": 5540 }, { "epoch": 149.75675675675674, "grad_norm": 0.82421875, "learning_rate": 8.150325268392034e-06, "loss": 0.0836, "step": 5541 }, { "epoch": 149.78378378378378, "grad_norm": 0.5078125, "learning_rate": 8.142073642975931e-06, "loss": 0.0656, "step": 5542 }, { "epoch": 149.8108108108108, "grad_norm": 0.9140625, "learning_rate": 8.133825384173182e-06, "loss": 0.1006, "step": 5543 }, { "epoch": 149.83783783783784, "grad_norm": 1.40625, "learning_rate": 8.12558049363099e-06, "loss": 0.161, "step": 5544 }, { "epoch": 149.86486486486487, "grad_norm": 0.84375, "learning_rate": 8.117338972995905e-06, "loss": 0.0957, "step": 5545 }, { "epoch": 149.8918918918919, "grad_norm": 0.625, "learning_rate": 8.109100823913811e-06, "loss": 0.0775, "step": 5546 }, { "epoch": 149.9189189189189, "grad_norm": 0.8046875, "learning_rate": 8.100866048029893e-06, "loss": 0.0926, "step": 5547 }, { "epoch": 149.94594594594594, "grad_norm": 1.0859375, "learning_rate": 8.092634646988689e-06, "loss": 0.1163, "step": 5548 }, { "epoch": 149.97297297297297, "grad_norm": 0.8359375, "learning_rate": 8.084406622434043e-06, "loss": 0.0932, "step": 5549 }, { "epoch": 150.0, "grad_norm": 1.1953125, "learning_rate": 8.076181976009133e-06, "loss": 0.1981, "step": 5550 }, { "epoch": 150.02702702702703, "grad_norm": 0.87890625, "learning_rate": 8.067960709356478e-06, "loss": 0.1017, "step": 5551 }, { "epoch": 150.05405405405406, "grad_norm": 0.53125, "learning_rate": 8.059742824117885e-06, "loss": 0.0696, "step": 5552 }, { "epoch": 150.0810810810811, "grad_norm": 1.1328125, "learning_rate": 8.051528321934518e-06, "loss": 0.1283, "step": 5553 }, { "epoch": 150.1081081081081, "grad_norm": 1.8125, "learning_rate": 8.043317204446863e-06, "loss": 0.2033, "step": 5554 }, { "epoch": 150.13513513513513, "grad_norm": 1.421875, "learning_rate": 8.035109473294706e-06, "loss": 0.183, "step": 5555 }, { "epoch": 150.16216216216216, "grad_norm": 0.56640625, "learning_rate": 8.026905130117176e-06, "loss": 0.0733, "step": 5556 }, { "epoch": 150.1891891891892, "grad_norm": 0.76171875, "learning_rate": 8.018704176552735e-06, "loss": 0.0873, "step": 5557 }, { "epoch": 150.21621621621622, "grad_norm": 1.0546875, "learning_rate": 8.010506614239139e-06, "loss": 0.1261, "step": 5558 }, { "epoch": 150.24324324324326, "grad_norm": 1.203125, "learning_rate": 8.00231244481349e-06, "loss": 0.1399, "step": 5559 }, { "epoch": 150.27027027027026, "grad_norm": 0.99609375, "learning_rate": 7.994121669912208e-06, "loss": 0.1123, "step": 5560 }, { "epoch": 150.2972972972973, "grad_norm": 1.2109375, "learning_rate": 7.985934291171024e-06, "loss": 0.1298, "step": 5561 }, { "epoch": 150.32432432432432, "grad_norm": 1.1328125, "learning_rate": 7.977750310225012e-06, "loss": 0.1394, "step": 5562 }, { "epoch": 150.35135135135135, "grad_norm": 1.3828125, "learning_rate": 7.969569728708537e-06, "loss": 0.177, "step": 5563 }, { "epoch": 150.3783783783784, "grad_norm": 0.7578125, "learning_rate": 7.961392548255312e-06, "loss": 0.0821, "step": 5564 }, { "epoch": 150.40540540540542, "grad_norm": 0.8828125, "learning_rate": 7.953218770498368e-06, "loss": 0.0984, "step": 5565 }, { "epoch": 150.43243243243242, "grad_norm": 1.046875, "learning_rate": 7.945048397070037e-06, "loss": 0.1227, "step": 5566 }, { "epoch": 150.45945945945945, "grad_norm": 0.9140625, "learning_rate": 7.936881429601997e-06, "loss": 0.1018, "step": 5567 }, { "epoch": 150.48648648648648, "grad_norm": 1.0859375, "learning_rate": 7.92871786972522e-06, "loss": 0.1124, "step": 5568 }, { "epoch": 150.51351351351352, "grad_norm": 0.94921875, "learning_rate": 7.92055771907002e-06, "loss": 0.0977, "step": 5569 }, { "epoch": 150.54054054054055, "grad_norm": 0.78515625, "learning_rate": 7.91240097926601e-06, "loss": 0.0934, "step": 5570 }, { "epoch": 150.56756756756758, "grad_norm": 0.490234375, "learning_rate": 7.904247651942149e-06, "loss": 0.0661, "step": 5571 }, { "epoch": 150.59459459459458, "grad_norm": 0.37890625, "learning_rate": 7.896097738726679e-06, "loss": 0.0532, "step": 5572 }, { "epoch": 150.6216216216216, "grad_norm": 0.451171875, "learning_rate": 7.887951241247185e-06, "loss": 0.0596, "step": 5573 }, { "epoch": 150.64864864864865, "grad_norm": 0.953125, "learning_rate": 7.879808161130578e-06, "loss": 0.104, "step": 5574 }, { "epoch": 150.67567567567568, "grad_norm": 0.76953125, "learning_rate": 7.87166850000305e-06, "loss": 0.0796, "step": 5575 }, { "epoch": 150.7027027027027, "grad_norm": 0.94921875, "learning_rate": 7.863532259490152e-06, "loss": 0.1127, "step": 5576 }, { "epoch": 150.72972972972974, "grad_norm": 0.91796875, "learning_rate": 7.855399441216716e-06, "loss": 0.1161, "step": 5577 }, { "epoch": 150.75675675675674, "grad_norm": 0.484375, "learning_rate": 7.847270046806915e-06, "loss": 0.0642, "step": 5578 }, { "epoch": 150.78378378378378, "grad_norm": 1.0625, "learning_rate": 7.839144077884239e-06, "loss": 0.11, "step": 5579 }, { "epoch": 150.8108108108108, "grad_norm": 1.1484375, "learning_rate": 7.831021536071468e-06, "loss": 0.1497, "step": 5580 }, { "epoch": 150.83783783783784, "grad_norm": 0.65234375, "learning_rate": 7.822902422990722e-06, "loss": 0.0798, "step": 5581 }, { "epoch": 150.86486486486487, "grad_norm": 0.94140625, "learning_rate": 7.814786740263438e-06, "loss": 0.1079, "step": 5582 }, { "epoch": 150.8918918918919, "grad_norm": 1.015625, "learning_rate": 7.806674489510344e-06, "loss": 0.1102, "step": 5583 }, { "epoch": 150.9189189189189, "grad_norm": 0.90625, "learning_rate": 7.798565672351504e-06, "loss": 0.0874, "step": 5584 }, { "epoch": 150.94594594594594, "grad_norm": 0.97265625, "learning_rate": 7.790460290406301e-06, "loss": 0.1138, "step": 5585 }, { "epoch": 150.97297297297297, "grad_norm": 0.80078125, "learning_rate": 7.7823583452934e-06, "loss": 0.1061, "step": 5586 }, { "epoch": 151.0, "grad_norm": 0.88671875, "learning_rate": 7.774259838630813e-06, "loss": 0.1138, "step": 5587 }, { "epoch": 151.02702702702703, "grad_norm": 0.9296875, "learning_rate": 7.766164772035856e-06, "loss": 0.1017, "step": 5588 }, { "epoch": 151.05405405405406, "grad_norm": 0.78125, "learning_rate": 7.758073147125144e-06, "loss": 0.0915, "step": 5589 }, { "epoch": 151.0810810810811, "grad_norm": 1.375, "learning_rate": 7.74998496551462e-06, "loss": 0.1264, "step": 5590 }, { "epoch": 151.1081081081081, "grad_norm": 1.359375, "learning_rate": 7.741900228819545e-06, "loss": 0.2269, "step": 5591 }, { "epoch": 151.13513513513513, "grad_norm": 0.984375, "learning_rate": 7.733818938654463e-06, "loss": 0.1039, "step": 5592 }, { "epoch": 151.16216216216216, "grad_norm": 1.15625, "learning_rate": 7.725741096633268e-06, "loss": 0.1557, "step": 5593 }, { "epoch": 151.1891891891892, "grad_norm": 0.79296875, "learning_rate": 7.717666704369129e-06, "loss": 0.0914, "step": 5594 }, { "epoch": 151.21621621621622, "grad_norm": 0.6328125, "learning_rate": 7.709595763474551e-06, "loss": 0.0762, "step": 5595 }, { "epoch": 151.24324324324326, "grad_norm": 1.140625, "learning_rate": 7.701528275561348e-06, "loss": 0.1002, "step": 5596 }, { "epoch": 151.27027027027026, "grad_norm": 1.234375, "learning_rate": 7.693464242240625e-06, "loss": 0.1946, "step": 5597 }, { "epoch": 151.2972972972973, "grad_norm": 0.671875, "learning_rate": 7.685403665122814e-06, "loss": 0.085, "step": 5598 }, { "epoch": 151.32432432432432, "grad_norm": 1.0078125, "learning_rate": 7.677346545817668e-06, "loss": 0.1354, "step": 5599 }, { "epoch": 151.35135135135135, "grad_norm": 0.984375, "learning_rate": 7.669292885934212e-06, "loss": 0.1071, "step": 5600 }, { "epoch": 151.3783783783784, "grad_norm": 1.1015625, "learning_rate": 7.661242687080813e-06, "loss": 0.118, "step": 5601 }, { "epoch": 151.40540540540542, "grad_norm": 0.953125, "learning_rate": 7.653195950865147e-06, "loss": 0.1031, "step": 5602 }, { "epoch": 151.43243243243242, "grad_norm": 0.7421875, "learning_rate": 7.64515267889417e-06, "loss": 0.0686, "step": 5603 }, { "epoch": 151.45945945945945, "grad_norm": 1.0234375, "learning_rate": 7.637112872774176e-06, "loss": 0.1129, "step": 5604 }, { "epoch": 151.48648648648648, "grad_norm": 1.2265625, "learning_rate": 7.629076534110746e-06, "loss": 0.1784, "step": 5605 }, { "epoch": 151.51351351351352, "grad_norm": 0.58203125, "learning_rate": 7.621043664508793e-06, "loss": 0.06, "step": 5606 }, { "epoch": 151.54054054054055, "grad_norm": 0.984375, "learning_rate": 7.613014265572499e-06, "loss": 0.1111, "step": 5607 }, { "epoch": 151.56756756756758, "grad_norm": 1.03125, "learning_rate": 7.604988338905397e-06, "loss": 0.0992, "step": 5608 }, { "epoch": 151.59459459459458, "grad_norm": 1.265625, "learning_rate": 7.596965886110291e-06, "loss": 0.1587, "step": 5609 }, { "epoch": 151.6216216216216, "grad_norm": 0.95703125, "learning_rate": 7.588946908789316e-06, "loss": 0.1037, "step": 5610 }, { "epoch": 151.64864864864865, "grad_norm": 0.91796875, "learning_rate": 7.58093140854389e-06, "loss": 0.1404, "step": 5611 }, { "epoch": 151.67567567567568, "grad_norm": 0.6328125, "learning_rate": 7.5729193869747585e-06, "loss": 0.0765, "step": 5612 }, { "epoch": 151.7027027027027, "grad_norm": 1.1328125, "learning_rate": 7.564910845681966e-06, "loss": 0.1611, "step": 5613 }, { "epoch": 151.72972972972974, "grad_norm": 0.470703125, "learning_rate": 7.556905786264845e-06, "loss": 0.067, "step": 5614 }, { "epoch": 151.75675675675674, "grad_norm": 1.03125, "learning_rate": 7.548904210322058e-06, "loss": 0.1219, "step": 5615 }, { "epoch": 151.78378378378378, "grad_norm": 0.8125, "learning_rate": 7.5409061194515625e-06, "loss": 0.098, "step": 5616 }, { "epoch": 151.8108108108108, "grad_norm": 1.2421875, "learning_rate": 7.532911515250604e-06, "loss": 0.1675, "step": 5617 }, { "epoch": 151.83783783783784, "grad_norm": 0.9453125, "learning_rate": 7.524920399315755e-06, "loss": 0.111, "step": 5618 }, { "epoch": 151.86486486486487, "grad_norm": 0.8671875, "learning_rate": 7.516932773242888e-06, "loss": 0.1081, "step": 5619 }, { "epoch": 151.8918918918919, "grad_norm": 0.8125, "learning_rate": 7.508948638627158e-06, "loss": 0.0969, "step": 5620 }, { "epoch": 151.9189189189189, "grad_norm": 0.470703125, "learning_rate": 7.500967997063052e-06, "loss": 0.0577, "step": 5621 }, { "epoch": 151.94594594594594, "grad_norm": 0.90625, "learning_rate": 7.4929908501443265e-06, "loss": 0.0964, "step": 5622 }, { "epoch": 151.97297297297297, "grad_norm": 0.71484375, "learning_rate": 7.485017199464073e-06, "loss": 0.0803, "step": 5623 }, { "epoch": 152.0, "grad_norm": 0.8125, "learning_rate": 7.477047046614671e-06, "loss": 0.0907, "step": 5624 }, { "epoch": 152.02702702702703, "grad_norm": 0.96484375, "learning_rate": 7.469080393187786e-06, "loss": 0.1237, "step": 5625 }, { "epoch": 152.05405405405406, "grad_norm": 0.89453125, "learning_rate": 7.4611172407744095e-06, "loss": 0.084, "step": 5626 }, { "epoch": 152.0810810810811, "grad_norm": 0.462890625, "learning_rate": 7.4531575909648296e-06, "loss": 0.0624, "step": 5627 }, { "epoch": 152.1081081081081, "grad_norm": 0.59765625, "learning_rate": 7.445201445348612e-06, "loss": 0.0691, "step": 5628 }, { "epoch": 152.13513513513513, "grad_norm": 1.046875, "learning_rate": 7.437248805514649e-06, "loss": 0.1174, "step": 5629 }, { "epoch": 152.16216216216216, "grad_norm": 0.65234375, "learning_rate": 7.429299673051132e-06, "loss": 0.0782, "step": 5630 }, { "epoch": 152.1891891891892, "grad_norm": 1.046875, "learning_rate": 7.421354049545523e-06, "loss": 0.1382, "step": 5631 }, { "epoch": 152.21621621621622, "grad_norm": 0.94140625, "learning_rate": 7.413411936584616e-06, "loss": 0.1117, "step": 5632 }, { "epoch": 152.24324324324326, "grad_norm": 0.69921875, "learning_rate": 7.4054733357544955e-06, "loss": 0.0771, "step": 5633 }, { "epoch": 152.27027027027026, "grad_norm": 0.90625, "learning_rate": 7.397538248640526e-06, "loss": 0.0919, "step": 5634 }, { "epoch": 152.2972972972973, "grad_norm": 0.73046875, "learning_rate": 7.389606676827391e-06, "loss": 0.0719, "step": 5635 }, { "epoch": 152.32432432432432, "grad_norm": 1.203125, "learning_rate": 7.381678621899077e-06, "loss": 0.1186, "step": 5636 }, { "epoch": 152.35135135135135, "grad_norm": 0.72265625, "learning_rate": 7.373754085438836e-06, "loss": 0.0978, "step": 5637 }, { "epoch": 152.3783783783784, "grad_norm": 0.921875, "learning_rate": 7.36583306902926e-06, "loss": 0.1389, "step": 5638 }, { "epoch": 152.40540540540542, "grad_norm": 1.09375, "learning_rate": 7.357915574252194e-06, "loss": 0.103, "step": 5639 }, { "epoch": 152.43243243243242, "grad_norm": 1.3203125, "learning_rate": 7.350001602688816e-06, "loss": 0.1874, "step": 5640 }, { "epoch": 152.45945945945945, "grad_norm": 1.1171875, "learning_rate": 7.3420911559195895e-06, "loss": 0.1138, "step": 5641 }, { "epoch": 152.48648648648648, "grad_norm": 0.84375, "learning_rate": 7.334184235524258e-06, "loss": 0.0981, "step": 5642 }, { "epoch": 152.51351351351352, "grad_norm": 1.078125, "learning_rate": 7.326280843081884e-06, "loss": 0.1434, "step": 5643 }, { "epoch": 152.54054054054055, "grad_norm": 0.79296875, "learning_rate": 7.318380980170805e-06, "loss": 0.0997, "step": 5644 }, { "epoch": 152.56756756756758, "grad_norm": 1.3828125, "learning_rate": 7.310484648368679e-06, "loss": 0.1568, "step": 5645 }, { "epoch": 152.59459459459458, "grad_norm": 0.6328125, "learning_rate": 7.302591849252424e-06, "loss": 0.0731, "step": 5646 }, { "epoch": 152.6216216216216, "grad_norm": 0.94921875, "learning_rate": 7.294702584398292e-06, "loss": 0.1282, "step": 5647 }, { "epoch": 152.64864864864865, "grad_norm": 0.703125, "learning_rate": 7.286816855381793e-06, "loss": 0.075, "step": 5648 }, { "epoch": 152.67567567567568, "grad_norm": 1.34375, "learning_rate": 7.278934663777748e-06, "loss": 0.1763, "step": 5649 }, { "epoch": 152.7027027027027, "grad_norm": 0.87109375, "learning_rate": 7.271056011160288e-06, "loss": 0.0936, "step": 5650 }, { "epoch": 152.72972972972974, "grad_norm": 1.03125, "learning_rate": 7.263180899102798e-06, "loss": 0.1232, "step": 5651 }, { "epoch": 152.75675675675674, "grad_norm": 0.68359375, "learning_rate": 7.255309329177995e-06, "loss": 0.0833, "step": 5652 }, { "epoch": 152.78378378378378, "grad_norm": 0.9453125, "learning_rate": 7.247441302957858e-06, "loss": 0.0817, "step": 5653 }, { "epoch": 152.8108108108108, "grad_norm": 0.8984375, "learning_rate": 7.239576822013677e-06, "loss": 0.119, "step": 5654 }, { "epoch": 152.83783783783784, "grad_norm": 1.4375, "learning_rate": 7.231715887916038e-06, "loss": 0.1693, "step": 5655 }, { "epoch": 152.86486486486487, "grad_norm": 1.1796875, "learning_rate": 7.2238585022347925e-06, "loss": 0.115, "step": 5656 }, { "epoch": 152.8918918918919, "grad_norm": 0.7734375, "learning_rate": 7.216004666539108e-06, "loss": 0.0866, "step": 5657 }, { "epoch": 152.9189189189189, "grad_norm": 1.4765625, "learning_rate": 7.2081543823974446e-06, "loss": 0.2, "step": 5658 }, { "epoch": 152.94594594594594, "grad_norm": 0.77734375, "learning_rate": 7.200307651377528e-06, "loss": 0.0811, "step": 5659 }, { "epoch": 152.97297297297297, "grad_norm": 0.69921875, "learning_rate": 7.1924644750463955e-06, "loss": 0.0804, "step": 5660 }, { "epoch": 153.0, "grad_norm": 0.734375, "learning_rate": 7.184624854970379e-06, "loss": 0.0901, "step": 5661 }, { "epoch": 153.02702702702703, "grad_norm": 0.91796875, "learning_rate": 7.176788792715075e-06, "loss": 0.0946, "step": 5662 }, { "epoch": 153.05405405405406, "grad_norm": 0.65625, "learning_rate": 7.168956289845391e-06, "loss": 0.0709, "step": 5663 }, { "epoch": 153.0810810810811, "grad_norm": 1.2578125, "learning_rate": 7.1611273479255285e-06, "loss": 0.1648, "step": 5664 }, { "epoch": 153.1081081081081, "grad_norm": 1.1484375, "learning_rate": 7.153301968518949e-06, "loss": 0.1042, "step": 5665 }, { "epoch": 153.13513513513513, "grad_norm": 0.73046875, "learning_rate": 7.1454801531884275e-06, "loss": 0.0801, "step": 5666 }, { "epoch": 153.16216216216216, "grad_norm": 0.5, "learning_rate": 7.137661903496029e-06, "loss": 0.0625, "step": 5667 }, { "epoch": 153.1891891891892, "grad_norm": 0.80859375, "learning_rate": 7.129847221003083e-06, "loss": 0.0951, "step": 5668 }, { "epoch": 153.21621621621622, "grad_norm": 0.73046875, "learning_rate": 7.122036107270236e-06, "loss": 0.0811, "step": 5669 }, { "epoch": 153.24324324324326, "grad_norm": 0.76953125, "learning_rate": 7.114228563857389e-06, "loss": 0.1105, "step": 5670 }, { "epoch": 153.27027027027026, "grad_norm": 1.2734375, "learning_rate": 7.10642459232376e-06, "loss": 0.1638, "step": 5671 }, { "epoch": 153.2972972972973, "grad_norm": 0.99609375, "learning_rate": 7.0986241942278455e-06, "loss": 0.1209, "step": 5672 }, { "epoch": 153.32432432432432, "grad_norm": 1.1015625, "learning_rate": 7.090827371127412e-06, "loss": 0.1392, "step": 5673 }, { "epoch": 153.35135135135135, "grad_norm": 1.078125, "learning_rate": 7.083034124579533e-06, "loss": 0.1737, "step": 5674 }, { "epoch": 153.3783783783784, "grad_norm": 1.0703125, "learning_rate": 7.075244456140564e-06, "loss": 0.1135, "step": 5675 }, { "epoch": 153.40540540540542, "grad_norm": 0.7109375, "learning_rate": 7.067458367366126e-06, "loss": 0.0947, "step": 5676 }, { "epoch": 153.43243243243242, "grad_norm": 0.5390625, "learning_rate": 7.059675859811152e-06, "loss": 0.0757, "step": 5677 }, { "epoch": 153.45945945945945, "grad_norm": 1.15625, "learning_rate": 7.05189693502985e-06, "loss": 0.1623, "step": 5678 }, { "epoch": 153.48648648648648, "grad_norm": 0.57421875, "learning_rate": 7.0441215945757006e-06, "loss": 0.0764, "step": 5679 }, { "epoch": 153.51351351351352, "grad_norm": 0.875, "learning_rate": 7.036349840001491e-06, "loss": 0.091, "step": 5680 }, { "epoch": 153.54054054054055, "grad_norm": 0.57421875, "learning_rate": 7.028581672859266e-06, "loss": 0.0731, "step": 5681 }, { "epoch": 153.56756756756758, "grad_norm": 0.5703125, "learning_rate": 7.020817094700383e-06, "loss": 0.0655, "step": 5682 }, { "epoch": 153.59459459459458, "grad_norm": 0.59375, "learning_rate": 7.013056107075453e-06, "loss": 0.0732, "step": 5683 }, { "epoch": 153.6216216216216, "grad_norm": 1.2265625, "learning_rate": 7.005298711534397e-06, "loss": 0.11, "step": 5684 }, { "epoch": 153.64864864864865, "grad_norm": 0.70703125, "learning_rate": 6.997544909626394e-06, "loss": 0.0678, "step": 5685 }, { "epoch": 153.67567567567568, "grad_norm": 0.6953125, "learning_rate": 6.989794702899932e-06, "loss": 0.0738, "step": 5686 }, { "epoch": 153.7027027027027, "grad_norm": 0.76171875, "learning_rate": 6.982048092902751e-06, "loss": 0.0946, "step": 5687 }, { "epoch": 153.72972972972974, "grad_norm": 1.03125, "learning_rate": 6.974305081181897e-06, "loss": 0.1304, "step": 5688 }, { "epoch": 153.75675675675674, "grad_norm": 0.80078125, "learning_rate": 6.966565669283695e-06, "loss": 0.0879, "step": 5689 }, { "epoch": 153.78378378378378, "grad_norm": 1.4453125, "learning_rate": 6.95882985875373e-06, "loss": 0.1797, "step": 5690 }, { "epoch": 153.8108108108108, "grad_norm": 1.140625, "learning_rate": 6.951097651136889e-06, "loss": 0.159, "step": 5691 }, { "epoch": 153.83783783783784, "grad_norm": 1.34375, "learning_rate": 6.943369047977344e-06, "loss": 0.1524, "step": 5692 }, { "epoch": 153.86486486486487, "grad_norm": 0.52734375, "learning_rate": 6.9356440508185155e-06, "loss": 0.0663, "step": 5693 }, { "epoch": 153.8918918918919, "grad_norm": 0.515625, "learning_rate": 6.927922661203137e-06, "loss": 0.0684, "step": 5694 }, { "epoch": 153.9189189189189, "grad_norm": 1.3046875, "learning_rate": 6.920204880673214e-06, "loss": 0.1572, "step": 5695 }, { "epoch": 153.94594594594594, "grad_norm": 0.85546875, "learning_rate": 6.912490710770015e-06, "loss": 0.0826, "step": 5696 }, { "epoch": 153.97297297297297, "grad_norm": 0.95703125, "learning_rate": 6.90478015303411e-06, "loss": 0.0966, "step": 5697 }, { "epoch": 154.0, "grad_norm": 1.0703125, "learning_rate": 6.897073209005323e-06, "loss": 0.1402, "step": 5698 }, { "epoch": 154.02702702702703, "grad_norm": 1.0, "learning_rate": 6.889369880222776e-06, "loss": 0.1084, "step": 5699 }, { "epoch": 154.05405405405406, "grad_norm": 1.28125, "learning_rate": 6.8816701682248744e-06, "loss": 0.2064, "step": 5700 }, { "epoch": 154.0810810810811, "grad_norm": 0.546875, "learning_rate": 6.873974074549269e-06, "loss": 0.0678, "step": 5701 }, { "epoch": 154.1081081081081, "grad_norm": 0.66015625, "learning_rate": 6.866281600732918e-06, "loss": 0.0785, "step": 5702 }, { "epoch": 154.13513513513513, "grad_norm": 0.455078125, "learning_rate": 6.858592748312059e-06, "loss": 0.0602, "step": 5703 }, { "epoch": 154.16216216216216, "grad_norm": 0.4375, "learning_rate": 6.850907518822175e-06, "loss": 0.0544, "step": 5704 }, { "epoch": 154.1891891891892, "grad_norm": 1.8828125, "learning_rate": 6.843225913798054e-06, "loss": 0.2559, "step": 5705 }, { "epoch": 154.21621621621622, "grad_norm": 0.83984375, "learning_rate": 6.835547934773759e-06, "loss": 0.1119, "step": 5706 }, { "epoch": 154.24324324324326, "grad_norm": 1.421875, "learning_rate": 6.8278735832826065e-06, "loss": 0.2921, "step": 5707 }, { "epoch": 154.27027027027026, "grad_norm": 0.68359375, "learning_rate": 6.820202860857214e-06, "loss": 0.0797, "step": 5708 }, { "epoch": 154.2972972972973, "grad_norm": 0.9375, "learning_rate": 6.812535769029466e-06, "loss": 0.104, "step": 5709 }, { "epoch": 154.32432432432432, "grad_norm": 0.73828125, "learning_rate": 6.804872309330506e-06, "loss": 0.0651, "step": 5710 }, { "epoch": 154.35135135135135, "grad_norm": 0.53125, "learning_rate": 6.797212483290777e-06, "loss": 0.0702, "step": 5711 }, { "epoch": 154.3783783783784, "grad_norm": 0.65234375, "learning_rate": 6.789556292439986e-06, "loss": 0.0806, "step": 5712 }, { "epoch": 154.40540540540542, "grad_norm": 0.8125, "learning_rate": 6.7819037383071045e-06, "loss": 0.0873, "step": 5713 }, { "epoch": 154.43243243243242, "grad_norm": 0.78515625, "learning_rate": 6.774254822420398e-06, "loss": 0.0848, "step": 5714 }, { "epoch": 154.45945945945945, "grad_norm": 0.94921875, "learning_rate": 6.766609546307378e-06, "loss": 0.1374, "step": 5715 }, { "epoch": 154.48648648648648, "grad_norm": 1.1484375, "learning_rate": 6.758967911494857e-06, "loss": 0.1448, "step": 5716 }, { "epoch": 154.51351351351352, "grad_norm": 0.6875, "learning_rate": 6.751329919508908e-06, "loss": 0.0829, "step": 5717 }, { "epoch": 154.54054054054055, "grad_norm": 0.5546875, "learning_rate": 6.7436955718748695e-06, "loss": 0.0665, "step": 5718 }, { "epoch": 154.56756756756758, "grad_norm": 0.54296875, "learning_rate": 6.73606487011737e-06, "loss": 0.0647, "step": 5719 }, { "epoch": 154.59459459459458, "grad_norm": 1.1328125, "learning_rate": 6.728437815760289e-06, "loss": 0.137, "step": 5720 }, { "epoch": 154.6216216216216, "grad_norm": 0.4609375, "learning_rate": 6.720814410326795e-06, "loss": 0.0587, "step": 5721 }, { "epoch": 154.64864864864865, "grad_norm": 0.875, "learning_rate": 6.713194655339311e-06, "loss": 0.0938, "step": 5722 }, { "epoch": 154.67567567567568, "grad_norm": 1.0390625, "learning_rate": 6.705578552319555e-06, "loss": 0.1171, "step": 5723 }, { "epoch": 154.7027027027027, "grad_norm": 1.203125, "learning_rate": 6.697966102788486e-06, "loss": 0.1382, "step": 5724 }, { "epoch": 154.72972972972974, "grad_norm": 0.98828125, "learning_rate": 6.690357308266356e-06, "loss": 0.1385, "step": 5725 }, { "epoch": 154.75675675675674, "grad_norm": 1.5703125, "learning_rate": 6.682752170272691e-06, "loss": 0.2327, "step": 5726 }, { "epoch": 154.78378378378378, "grad_norm": 0.71875, "learning_rate": 6.675150690326254e-06, "loss": 0.0865, "step": 5727 }, { "epoch": 154.8108108108108, "grad_norm": 1.1640625, "learning_rate": 6.667552869945118e-06, "loss": 0.1195, "step": 5728 }, { "epoch": 154.83783783783784, "grad_norm": 0.6328125, "learning_rate": 6.659958710646591e-06, "loss": 0.0711, "step": 5729 }, { "epoch": 154.86486486486487, "grad_norm": 0.84765625, "learning_rate": 6.652368213947274e-06, "loss": 0.117, "step": 5730 }, { "epoch": 154.8918918918919, "grad_norm": 0.72265625, "learning_rate": 6.64478138136303e-06, "loss": 0.0846, "step": 5731 }, { "epoch": 154.9189189189189, "grad_norm": 0.37890625, "learning_rate": 6.637198214408979e-06, "loss": 0.0542, "step": 5732 }, { "epoch": 154.94594594594594, "grad_norm": 0.6171875, "learning_rate": 6.629618714599522e-06, "loss": 0.0736, "step": 5733 }, { "epoch": 154.97297297297297, "grad_norm": 1.28125, "learning_rate": 6.622042883448332e-06, "loss": 0.1464, "step": 5734 }, { "epoch": 155.0, "grad_norm": 0.984375, "learning_rate": 6.614470722468325e-06, "loss": 0.1045, "step": 5735 }, { "epoch": 155.02702702702703, "grad_norm": 0.74609375, "learning_rate": 6.606902233171711e-06, "loss": 0.0824, "step": 5736 }, { "epoch": 155.05405405405406, "grad_norm": 0.98046875, "learning_rate": 6.599337417069957e-06, "loss": 0.1251, "step": 5737 }, { "epoch": 155.0810810810811, "grad_norm": 0.765625, "learning_rate": 6.591776275673783e-06, "loss": 0.0864, "step": 5738 }, { "epoch": 155.1081081081081, "grad_norm": 1.015625, "learning_rate": 6.5842188104931975e-06, "loss": 0.1186, "step": 5739 }, { "epoch": 155.13513513513513, "grad_norm": 0.828125, "learning_rate": 6.576665023037468e-06, "loss": 0.0914, "step": 5740 }, { "epoch": 155.16216216216216, "grad_norm": 1.21875, "learning_rate": 6.569114914815111e-06, "loss": 0.0865, "step": 5741 }, { "epoch": 155.1891891891892, "grad_norm": 1.0390625, "learning_rate": 6.561568487333938e-06, "loss": 0.138, "step": 5742 }, { "epoch": 155.21621621621622, "grad_norm": 1.453125, "learning_rate": 6.554025742100992e-06, "loss": 0.147, "step": 5743 }, { "epoch": 155.24324324324326, "grad_norm": 0.6015625, "learning_rate": 6.546486680622601e-06, "loss": 0.0781, "step": 5744 }, { "epoch": 155.27027027027026, "grad_norm": 1.3515625, "learning_rate": 6.53895130440437e-06, "loss": 0.1909, "step": 5745 }, { "epoch": 155.2972972972973, "grad_norm": 0.625, "learning_rate": 6.531419614951132e-06, "loss": 0.0681, "step": 5746 }, { "epoch": 155.32432432432432, "grad_norm": 0.90234375, "learning_rate": 6.523891613767008e-06, "loss": 0.1019, "step": 5747 }, { "epoch": 155.35135135135135, "grad_norm": 1.0859375, "learning_rate": 6.5163673023553915e-06, "loss": 0.0938, "step": 5748 }, { "epoch": 155.3783783783784, "grad_norm": 1.203125, "learning_rate": 6.508846682218908e-06, "loss": 0.164, "step": 5749 }, { "epoch": 155.40540540540542, "grad_norm": 1.0234375, "learning_rate": 6.50132975485947e-06, "loss": 0.1069, "step": 5750 }, { "epoch": 155.43243243243242, "grad_norm": 0.65234375, "learning_rate": 6.4938165217782535e-06, "loss": 0.0784, "step": 5751 }, { "epoch": 155.45945945945945, "grad_norm": 0.59375, "learning_rate": 6.486306984475676e-06, "loss": 0.0726, "step": 5752 }, { "epoch": 155.48648648648648, "grad_norm": 0.87109375, "learning_rate": 6.478801144451435e-06, "loss": 0.0786, "step": 5753 }, { "epoch": 155.51351351351352, "grad_norm": 0.6015625, "learning_rate": 6.471299003204495e-06, "loss": 0.0677, "step": 5754 }, { "epoch": 155.54054054054055, "grad_norm": 1.1328125, "learning_rate": 6.463800562233058e-06, "loss": 0.1377, "step": 5755 }, { "epoch": 155.56756756756758, "grad_norm": 0.83984375, "learning_rate": 6.4563058230346114e-06, "loss": 0.1035, "step": 5756 }, { "epoch": 155.59459459459458, "grad_norm": 1.5625, "learning_rate": 6.448814787105878e-06, "loss": 0.204, "step": 5757 }, { "epoch": 155.6216216216216, "grad_norm": 0.71484375, "learning_rate": 6.4413274559428745e-06, "loss": 0.0855, "step": 5758 }, { "epoch": 155.64864864864865, "grad_norm": 0.63671875, "learning_rate": 6.433843831040842e-06, "loss": 0.0831, "step": 5759 }, { "epoch": 155.67567567567568, "grad_norm": 0.53125, "learning_rate": 6.426363913894304e-06, "loss": 0.0641, "step": 5760 }, { "epoch": 155.7027027027027, "grad_norm": 0.87109375, "learning_rate": 6.418887705997045e-06, "loss": 0.1027, "step": 5761 }, { "epoch": 155.72972972972974, "grad_norm": 0.7890625, "learning_rate": 6.411415208842092e-06, "loss": 0.0798, "step": 5762 }, { "epoch": 155.75675675675674, "grad_norm": 0.4921875, "learning_rate": 6.40394642392175e-06, "loss": 0.0675, "step": 5763 }, { "epoch": 155.78378378378378, "grad_norm": 0.80078125, "learning_rate": 6.3964813527275596e-06, "loss": 0.0801, "step": 5764 }, { "epoch": 155.8108108108108, "grad_norm": 0.91796875, "learning_rate": 6.38901999675035e-06, "loss": 0.114, "step": 5765 }, { "epoch": 155.83783783783784, "grad_norm": 0.84765625, "learning_rate": 6.381562357480175e-06, "loss": 0.0889, "step": 5766 }, { "epoch": 155.86486486486487, "grad_norm": 1.1484375, "learning_rate": 6.374108436406373e-06, "loss": 0.1608, "step": 5767 }, { "epoch": 155.8918918918919, "grad_norm": 1.109375, "learning_rate": 6.366658235017533e-06, "loss": 0.1365, "step": 5768 }, { "epoch": 155.9189189189189, "grad_norm": 0.68359375, "learning_rate": 6.359211754801486e-06, "loss": 0.074, "step": 5769 }, { "epoch": 155.94594594594594, "grad_norm": 1.125, "learning_rate": 6.351768997245341e-06, "loss": 0.1262, "step": 5770 }, { "epoch": 155.97297297297297, "grad_norm": 1.203125, "learning_rate": 6.344329963835457e-06, "loss": 0.1723, "step": 5771 }, { "epoch": 156.0, "grad_norm": 0.65234375, "learning_rate": 6.336894656057438e-06, "loss": 0.0754, "step": 5772 }, { "epoch": 156.02702702702703, "grad_norm": 0.9453125, "learning_rate": 6.329463075396161e-06, "loss": 0.1279, "step": 5773 }, { "epoch": 156.05405405405406, "grad_norm": 0.828125, "learning_rate": 6.322035223335737e-06, "loss": 0.0855, "step": 5774 }, { "epoch": 156.0810810810811, "grad_norm": 1.5703125, "learning_rate": 6.314611101359558e-06, "loss": 0.1621, "step": 5775 }, { "epoch": 156.1081081081081, "grad_norm": 0.46484375, "learning_rate": 6.307190710950262e-06, "loss": 0.0625, "step": 5776 }, { "epoch": 156.13513513513513, "grad_norm": 1.390625, "learning_rate": 6.299774053589725e-06, "loss": 0.1895, "step": 5777 }, { "epoch": 156.16216216216216, "grad_norm": 0.98828125, "learning_rate": 6.292361130759094e-06, "loss": 0.1051, "step": 5778 }, { "epoch": 156.1891891891892, "grad_norm": 0.8125, "learning_rate": 6.2849519439387825e-06, "loss": 0.0872, "step": 5779 }, { "epoch": 156.21621621621622, "grad_norm": 0.953125, "learning_rate": 6.2775464946084245e-06, "loss": 0.1141, "step": 5780 }, { "epoch": 156.24324324324326, "grad_norm": 0.62109375, "learning_rate": 6.2701447842469314e-06, "loss": 0.0786, "step": 5781 }, { "epoch": 156.27027027027026, "grad_norm": 0.67578125, "learning_rate": 6.2627468143324706e-06, "loss": 0.0846, "step": 5782 }, { "epoch": 156.2972972972973, "grad_norm": 0.7890625, "learning_rate": 6.2553525863424385e-06, "loss": 0.1104, "step": 5783 }, { "epoch": 156.32432432432432, "grad_norm": 0.80078125, "learning_rate": 6.247962101753513e-06, "loss": 0.0969, "step": 5784 }, { "epoch": 156.35135135135135, "grad_norm": 1.171875, "learning_rate": 6.2405753620416116e-06, "loss": 0.0986, "step": 5785 }, { "epoch": 156.3783783783784, "grad_norm": 1.0625, "learning_rate": 6.23319236868189e-06, "loss": 0.1284, "step": 5786 }, { "epoch": 156.40540540540542, "grad_norm": 1.1328125, "learning_rate": 6.22581312314878e-06, "loss": 0.1524, "step": 5787 }, { "epoch": 156.43243243243242, "grad_norm": 1.171875, "learning_rate": 6.21843762691596e-06, "loss": 0.1546, "step": 5788 }, { "epoch": 156.45945945945945, "grad_norm": 1.3125, "learning_rate": 6.211065881456341e-06, "loss": 0.1846, "step": 5789 }, { "epoch": 156.48648648648648, "grad_norm": 0.88671875, "learning_rate": 6.20369788824211e-06, "loss": 0.0963, "step": 5790 }, { "epoch": 156.51351351351352, "grad_norm": 1.2109375, "learning_rate": 6.196333648744679e-06, "loss": 0.159, "step": 5791 }, { "epoch": 156.54054054054055, "grad_norm": 1.1015625, "learning_rate": 6.18897316443473e-06, "loss": 0.1584, "step": 5792 }, { "epoch": 156.56756756756758, "grad_norm": 1.390625, "learning_rate": 6.181616436782198e-06, "loss": 0.1694, "step": 5793 }, { "epoch": 156.59459459459458, "grad_norm": 0.7578125, "learning_rate": 6.1742634672562445e-06, "loss": 0.0945, "step": 5794 }, { "epoch": 156.6216216216216, "grad_norm": 0.65234375, "learning_rate": 6.1669142573252985e-06, "loss": 0.0832, "step": 5795 }, { "epoch": 156.64864864864865, "grad_norm": 1.4296875, "learning_rate": 6.159568808457048e-06, "loss": 0.1469, "step": 5796 }, { "epoch": 156.67567567567568, "grad_norm": 0.9453125, "learning_rate": 6.152227122118398e-06, "loss": 0.0894, "step": 5797 }, { "epoch": 156.7027027027027, "grad_norm": 0.57421875, "learning_rate": 6.144889199775533e-06, "loss": 0.0654, "step": 5798 }, { "epoch": 156.72972972972974, "grad_norm": 0.81640625, "learning_rate": 6.137555042893864e-06, "loss": 0.0906, "step": 5799 }, { "epoch": 156.75675675675674, "grad_norm": 0.97265625, "learning_rate": 6.130224652938069e-06, "loss": 0.1073, "step": 5800 }, { "epoch": 156.78378378378378, "grad_norm": 1.0234375, "learning_rate": 6.122898031372051e-06, "loss": 0.1233, "step": 5801 }, { "epoch": 156.8108108108108, "grad_norm": 1.828125, "learning_rate": 6.115575179658992e-06, "loss": 0.3026, "step": 5802 }, { "epoch": 156.83783783783784, "grad_norm": 1.203125, "learning_rate": 6.1082560992612806e-06, "loss": 0.1341, "step": 5803 }, { "epoch": 156.86486486486487, "grad_norm": 1.140625, "learning_rate": 6.100940791640594e-06, "loss": 0.1664, "step": 5804 }, { "epoch": 156.8918918918919, "grad_norm": 0.875, "learning_rate": 6.093629258257821e-06, "loss": 0.1058, "step": 5805 }, { "epoch": 156.9189189189189, "grad_norm": 1.421875, "learning_rate": 6.0863215005731196e-06, "loss": 0.2032, "step": 5806 }, { "epoch": 156.94594594594594, "grad_norm": 1.1328125, "learning_rate": 6.07901752004589e-06, "loss": 0.151, "step": 5807 }, { "epoch": 156.97297297297297, "grad_norm": 1.0, "learning_rate": 6.071717318134765e-06, "loss": 0.1225, "step": 5808 }, { "epoch": 157.0, "grad_norm": 1.328125, "learning_rate": 6.064420896297634e-06, "loss": 0.1658, "step": 5809 }, { "epoch": 157.02702702702703, "grad_norm": 1.1484375, "learning_rate": 6.057128255991637e-06, "loss": 0.1756, "step": 5810 }, { "epoch": 157.05405405405406, "grad_norm": 0.86328125, "learning_rate": 6.0498393986731405e-06, "loss": 0.1108, "step": 5811 }, { "epoch": 157.0810810810811, "grad_norm": 1.2734375, "learning_rate": 6.042554325797773e-06, "loss": 0.2053, "step": 5812 }, { "epoch": 157.1081081081081, "grad_norm": 0.71875, "learning_rate": 6.035273038820405e-06, "loss": 0.088, "step": 5813 }, { "epoch": 157.13513513513513, "grad_norm": 1.5, "learning_rate": 6.027995539195131e-06, "loss": 0.2004, "step": 5814 }, { "epoch": 157.16216216216216, "grad_norm": 0.99609375, "learning_rate": 6.020721828375317e-06, "loss": 0.1125, "step": 5815 }, { "epoch": 157.1891891891892, "grad_norm": 0.98046875, "learning_rate": 6.013451907813561e-06, "loss": 0.1375, "step": 5816 }, { "epoch": 157.21621621621622, "grad_norm": 0.404296875, "learning_rate": 6.006185778961695e-06, "loss": 0.0544, "step": 5817 }, { "epoch": 157.24324324324326, "grad_norm": 0.76953125, "learning_rate": 5.99892344327081e-06, "loss": 0.091, "step": 5818 }, { "epoch": 157.27027027027026, "grad_norm": 1.40625, "learning_rate": 5.991664902191222e-06, "loss": 0.1782, "step": 5819 }, { "epoch": 157.2972972972973, "grad_norm": 0.55078125, "learning_rate": 5.984410157172502e-06, "loss": 0.065, "step": 5820 }, { "epoch": 157.32432432432432, "grad_norm": 0.58203125, "learning_rate": 5.977159209663469e-06, "loss": 0.0757, "step": 5821 }, { "epoch": 157.35135135135135, "grad_norm": 1.171875, "learning_rate": 5.969912061112157e-06, "loss": 0.1573, "step": 5822 }, { "epoch": 157.3783783783784, "grad_norm": 1.078125, "learning_rate": 5.962668712965869e-06, "loss": 0.1353, "step": 5823 }, { "epoch": 157.40540540540542, "grad_norm": 0.99609375, "learning_rate": 5.95542916667114e-06, "loss": 0.1068, "step": 5824 }, { "epoch": 157.43243243243242, "grad_norm": 1.03125, "learning_rate": 5.948193423673737e-06, "loss": 0.123, "step": 5825 }, { "epoch": 157.45945945945945, "grad_norm": 1.3671875, "learning_rate": 5.940961485418675e-06, "loss": 0.1492, "step": 5826 }, { "epoch": 157.48648648648648, "grad_norm": 0.9921875, "learning_rate": 5.933733353350221e-06, "loss": 0.1028, "step": 5827 }, { "epoch": 157.51351351351352, "grad_norm": 1.1796875, "learning_rate": 5.9265090289118505e-06, "loss": 0.167, "step": 5828 }, { "epoch": 157.54054054054055, "grad_norm": 0.7265625, "learning_rate": 5.919288513546309e-06, "loss": 0.0879, "step": 5829 }, { "epoch": 157.56756756756758, "grad_norm": 0.87109375, "learning_rate": 5.912071808695577e-06, "loss": 0.0899, "step": 5830 }, { "epoch": 157.59459459459458, "grad_norm": 0.7109375, "learning_rate": 5.904858915800849e-06, "loss": 0.0814, "step": 5831 }, { "epoch": 157.6216216216216, "grad_norm": 1.1484375, "learning_rate": 5.897649836302588e-06, "loss": 0.133, "step": 5832 }, { "epoch": 157.64864864864865, "grad_norm": 0.64453125, "learning_rate": 5.890444571640485e-06, "loss": 0.0792, "step": 5833 }, { "epoch": 157.67567567567568, "grad_norm": 0.52734375, "learning_rate": 5.8832431232534624e-06, "loss": 0.0725, "step": 5834 }, { "epoch": 157.7027027027027, "grad_norm": 0.412109375, "learning_rate": 5.87604549257969e-06, "loss": 0.0536, "step": 5835 }, { "epoch": 157.72972972972974, "grad_norm": 1.1484375, "learning_rate": 5.868851681056567e-06, "loss": 0.163, "step": 5836 }, { "epoch": 157.75675675675674, "grad_norm": 1.171875, "learning_rate": 5.86166169012074e-06, "loss": 0.147, "step": 5837 }, { "epoch": 157.78378378378378, "grad_norm": 1.1171875, "learning_rate": 5.8544755212080745e-06, "loss": 0.1283, "step": 5838 }, { "epoch": 157.8108108108108, "grad_norm": 0.578125, "learning_rate": 5.8472931757537046e-06, "loss": 0.0759, "step": 5839 }, { "epoch": 157.83783783783784, "grad_norm": 0.96484375, "learning_rate": 5.8401146551919585e-06, "loss": 0.1177, "step": 5840 }, { "epoch": 157.86486486486487, "grad_norm": 1.09375, "learning_rate": 5.832939960956443e-06, "loss": 0.1201, "step": 5841 }, { "epoch": 157.8918918918919, "grad_norm": 0.8671875, "learning_rate": 5.825769094479966e-06, "loss": 0.103, "step": 5842 }, { "epoch": 157.9189189189189, "grad_norm": 0.58203125, "learning_rate": 5.818602057194589e-06, "loss": 0.07, "step": 5843 }, { "epoch": 157.94594594594594, "grad_norm": 1.0546875, "learning_rate": 5.811438850531617e-06, "loss": 0.1193, "step": 5844 }, { "epoch": 157.97297297297297, "grad_norm": 0.8671875, "learning_rate": 5.804279475921565e-06, "loss": 0.1237, "step": 5845 }, { "epoch": 158.0, "grad_norm": 0.5625, "learning_rate": 5.797123934794199e-06, "loss": 0.0779, "step": 5846 }, { "epoch": 158.02702702702703, "grad_norm": 1.4765625, "learning_rate": 5.78997222857853e-06, "loss": 0.1913, "step": 5847 }, { "epoch": 158.05405405405406, "grad_norm": 1.1640625, "learning_rate": 5.782824358702771e-06, "loss": 0.1323, "step": 5848 }, { "epoch": 158.0810810810811, "grad_norm": 1.28125, "learning_rate": 5.775680326594402e-06, "loss": 0.1096, "step": 5849 }, { "epoch": 158.1081081081081, "grad_norm": 0.51171875, "learning_rate": 5.768540133680111e-06, "loss": 0.0611, "step": 5850 }, { "epoch": 158.13513513513513, "grad_norm": 0.796875, "learning_rate": 5.7614037813858376e-06, "loss": 0.0827, "step": 5851 }, { "epoch": 158.16216216216216, "grad_norm": 0.9609375, "learning_rate": 5.7542712711367515e-06, "loss": 0.1283, "step": 5852 }, { "epoch": 158.1891891891892, "grad_norm": 0.6796875, "learning_rate": 5.747142604357242e-06, "loss": 0.0787, "step": 5853 }, { "epoch": 158.21621621621622, "grad_norm": 0.94921875, "learning_rate": 5.7400177824709414e-06, "loss": 0.1045, "step": 5854 }, { "epoch": 158.24324324324326, "grad_norm": 0.8359375, "learning_rate": 5.732896806900722e-06, "loss": 0.1026, "step": 5855 }, { "epoch": 158.27027027027026, "grad_norm": 1.0234375, "learning_rate": 5.725779679068669e-06, "loss": 0.135, "step": 5856 }, { "epoch": 158.2972972972973, "grad_norm": 1.1328125, "learning_rate": 5.718666400396108e-06, "loss": 0.1384, "step": 5857 }, { "epoch": 158.32432432432432, "grad_norm": 0.49609375, "learning_rate": 5.711556972303611e-06, "loss": 0.0657, "step": 5858 }, { "epoch": 158.35135135135135, "grad_norm": 0.70703125, "learning_rate": 5.70445139621095e-06, "loss": 0.0771, "step": 5859 }, { "epoch": 158.3783783783784, "grad_norm": 1.140625, "learning_rate": 5.697349673537147e-06, "loss": 0.151, "step": 5860 }, { "epoch": 158.40540540540542, "grad_norm": 0.8203125, "learning_rate": 5.6902518057004665e-06, "loss": 0.0924, "step": 5861 }, { "epoch": 158.43243243243242, "grad_norm": 0.6640625, "learning_rate": 5.683157794118371e-06, "loss": 0.0858, "step": 5862 }, { "epoch": 158.45945945945945, "grad_norm": 0.5859375, "learning_rate": 5.67606764020758e-06, "loss": 0.0715, "step": 5863 }, { "epoch": 158.48648648648648, "grad_norm": 0.96484375, "learning_rate": 5.668981345384036e-06, "loss": 0.1173, "step": 5864 }, { "epoch": 158.51351351351352, "grad_norm": 0.76953125, "learning_rate": 5.661898911062896e-06, "loss": 0.0838, "step": 5865 }, { "epoch": 158.54054054054055, "grad_norm": 1.0234375, "learning_rate": 5.654820338658573e-06, "loss": 0.115, "step": 5866 }, { "epoch": 158.56756756756758, "grad_norm": 0.59375, "learning_rate": 5.64774562958468e-06, "loss": 0.0651, "step": 5867 }, { "epoch": 158.59459459459458, "grad_norm": 1.0625, "learning_rate": 5.640674785254077e-06, "loss": 0.1313, "step": 5868 }, { "epoch": 158.6216216216216, "grad_norm": 0.92578125, "learning_rate": 5.633607807078856e-06, "loss": 0.11, "step": 5869 }, { "epoch": 158.64864864864865, "grad_norm": 1.125, "learning_rate": 5.626544696470315e-06, "loss": 0.1524, "step": 5870 }, { "epoch": 158.67567567567568, "grad_norm": 0.51953125, "learning_rate": 5.619485454838996e-06, "loss": 0.0708, "step": 5871 }, { "epoch": 158.7027027027027, "grad_norm": 0.95703125, "learning_rate": 5.6124300835946805e-06, "loss": 0.0961, "step": 5872 }, { "epoch": 158.72972972972974, "grad_norm": 0.7265625, "learning_rate": 5.605378584146339e-06, "loss": 0.0728, "step": 5873 }, { "epoch": 158.75675675675674, "grad_norm": 1.46875, "learning_rate": 5.598330957902212e-06, "loss": 0.2419, "step": 5874 }, { "epoch": 158.78378378378378, "grad_norm": 0.9453125, "learning_rate": 5.591287206269727e-06, "loss": 0.1119, "step": 5875 }, { "epoch": 158.8108108108108, "grad_norm": 0.8359375, "learning_rate": 5.584247330655576e-06, "loss": 0.1161, "step": 5876 }, { "epoch": 158.83783783783784, "grad_norm": 1.46875, "learning_rate": 5.57721133246564e-06, "loss": 0.1821, "step": 5877 }, { "epoch": 158.86486486486487, "grad_norm": 0.6171875, "learning_rate": 5.570179213105059e-06, "loss": 0.0626, "step": 5878 }, { "epoch": 158.8918918918919, "grad_norm": 0.94921875, "learning_rate": 5.563150973978171e-06, "loss": 0.0873, "step": 5879 }, { "epoch": 158.9189189189189, "grad_norm": 0.63671875, "learning_rate": 5.55612661648856e-06, "loss": 0.0822, "step": 5880 }, { "epoch": 158.94594594594594, "grad_norm": 1.1484375, "learning_rate": 5.549106142039018e-06, "loss": 0.1278, "step": 5881 }, { "epoch": 158.97297297297297, "grad_norm": 1.2421875, "learning_rate": 5.542089552031571e-06, "loss": 0.1722, "step": 5882 }, { "epoch": 159.0, "grad_norm": 0.61328125, "learning_rate": 5.535076847867476e-06, "loss": 0.0791, "step": 5883 }, { "epoch": 159.02702702702703, "grad_norm": 0.7734375, "learning_rate": 5.528068030947192e-06, "loss": 0.0802, "step": 5884 }, { "epoch": 159.05405405405406, "grad_norm": 1.4140625, "learning_rate": 5.521063102670424e-06, "loss": 0.1127, "step": 5885 }, { "epoch": 159.0810810810811, "grad_norm": 0.56640625, "learning_rate": 5.514062064436096e-06, "loss": 0.0702, "step": 5886 }, { "epoch": 159.1081081081081, "grad_norm": 0.8828125, "learning_rate": 5.507064917642338e-06, "loss": 0.0884, "step": 5887 }, { "epoch": 159.13513513513513, "grad_norm": 0.5078125, "learning_rate": 5.500071663686524e-06, "loss": 0.0664, "step": 5888 }, { "epoch": 159.16216216216216, "grad_norm": 1.1640625, "learning_rate": 5.493082303965246e-06, "loss": 0.1191, "step": 5889 }, { "epoch": 159.1891891891892, "grad_norm": 1.1796875, "learning_rate": 5.486096839874302e-06, "loss": 0.1598, "step": 5890 }, { "epoch": 159.21621621621622, "grad_norm": 0.9296875, "learning_rate": 5.479115272808735e-06, "loss": 0.1217, "step": 5891 }, { "epoch": 159.24324324324326, "grad_norm": 0.96484375, "learning_rate": 5.4721376041628e-06, "loss": 0.0977, "step": 5892 }, { "epoch": 159.27027027027026, "grad_norm": 0.62109375, "learning_rate": 5.465163835329964e-06, "loss": 0.0689, "step": 5893 }, { "epoch": 159.2972972972973, "grad_norm": 1.1796875, "learning_rate": 5.458193967702935e-06, "loss": 0.1298, "step": 5894 }, { "epoch": 159.32432432432432, "grad_norm": 0.8359375, "learning_rate": 5.451228002673619e-06, "loss": 0.0882, "step": 5895 }, { "epoch": 159.35135135135135, "grad_norm": 0.98046875, "learning_rate": 5.444265941633162e-06, "loss": 0.1312, "step": 5896 }, { "epoch": 159.3783783783784, "grad_norm": 0.84765625, "learning_rate": 5.4373077859719265e-06, "loss": 0.1104, "step": 5897 }, { "epoch": 159.40540540540542, "grad_norm": 0.90234375, "learning_rate": 5.43035353707948e-06, "loss": 0.0888, "step": 5898 }, { "epoch": 159.43243243243242, "grad_norm": 0.671875, "learning_rate": 5.423403196344626e-06, "loss": 0.0741, "step": 5899 }, { "epoch": 159.45945945945945, "grad_norm": 0.80078125, "learning_rate": 5.416456765155392e-06, "loss": 0.1354, "step": 5900 }, { "epoch": 159.48648648648648, "grad_norm": 1.4921875, "learning_rate": 5.409514244899003e-06, "loss": 0.2377, "step": 5901 }, { "epoch": 159.51351351351352, "grad_norm": 1.296875, "learning_rate": 5.402575636961918e-06, "loss": 0.1514, "step": 5902 }, { "epoch": 159.54054054054055, "grad_norm": 1.1484375, "learning_rate": 5.395640942729824e-06, "loss": 0.1709, "step": 5903 }, { "epoch": 159.56756756756758, "grad_norm": 1.0625, "learning_rate": 5.3887101635875955e-06, "loss": 0.13, "step": 5904 }, { "epoch": 159.59459459459458, "grad_norm": 0.75, "learning_rate": 5.381783300919355e-06, "loss": 0.0893, "step": 5905 }, { "epoch": 159.6216216216216, "grad_norm": 0.73046875, "learning_rate": 5.374860356108439e-06, "loss": 0.0901, "step": 5906 }, { "epoch": 159.64864864864865, "grad_norm": 1.5390625, "learning_rate": 5.367941330537377e-06, "loss": 0.1865, "step": 5907 }, { "epoch": 159.67567567567568, "grad_norm": 0.72265625, "learning_rate": 5.361026225587945e-06, "loss": 0.0912, "step": 5908 }, { "epoch": 159.7027027027027, "grad_norm": 1.1796875, "learning_rate": 5.3541150426411305e-06, "loss": 0.115, "step": 5909 }, { "epoch": 159.72972972972974, "grad_norm": 1.1640625, "learning_rate": 5.347207783077116e-06, "loss": 0.1214, "step": 5910 }, { "epoch": 159.75675675675674, "grad_norm": 0.86328125, "learning_rate": 5.340304448275329e-06, "loss": 0.1105, "step": 5911 }, { "epoch": 159.78378378378378, "grad_norm": 1.109375, "learning_rate": 5.333405039614392e-06, "loss": 0.1049, "step": 5912 }, { "epoch": 159.8108108108108, "grad_norm": 1.1328125, "learning_rate": 5.326509558472162e-06, "loss": 0.163, "step": 5913 }, { "epoch": 159.83783783783784, "grad_norm": 1.4375, "learning_rate": 5.319618006225688e-06, "loss": 0.1749, "step": 5914 }, { "epoch": 159.86486486486487, "grad_norm": 1.234375, "learning_rate": 5.3127303842512605e-06, "loss": 0.1869, "step": 5915 }, { "epoch": 159.8918918918919, "grad_norm": 0.8046875, "learning_rate": 5.30584669392436e-06, "loss": 0.0854, "step": 5916 }, { "epoch": 159.9189189189189, "grad_norm": 0.8828125, "learning_rate": 5.2989669366197096e-06, "loss": 0.1117, "step": 5917 }, { "epoch": 159.94594594594594, "grad_norm": 0.828125, "learning_rate": 5.2920911137112146e-06, "loss": 0.0975, "step": 5918 }, { "epoch": 159.97297297297297, "grad_norm": 0.466796875, "learning_rate": 5.2852192265720216e-06, "loss": 0.0667, "step": 5919 }, { "epoch": 160.0, "grad_norm": 1.1484375, "learning_rate": 5.278351276574489e-06, "loss": 0.1488, "step": 5920 }, { "epoch": 160.02702702702703, "grad_norm": 0.859375, "learning_rate": 5.271487265090163e-06, "loss": 0.0876, "step": 5921 }, { "epoch": 160.05405405405406, "grad_norm": 0.6796875, "learning_rate": 5.264627193489833e-06, "loss": 0.0896, "step": 5922 }, { "epoch": 160.0810810810811, "grad_norm": 0.72265625, "learning_rate": 5.257771063143494e-06, "loss": 0.0933, "step": 5923 }, { "epoch": 160.1081081081081, "grad_norm": 0.76953125, "learning_rate": 5.250918875420335e-06, "loss": 0.1205, "step": 5924 }, { "epoch": 160.13513513513513, "grad_norm": 0.83203125, "learning_rate": 5.244070631688791e-06, "loss": 0.0977, "step": 5925 }, { "epoch": 160.16216216216216, "grad_norm": 0.76171875, "learning_rate": 5.237226333316472e-06, "loss": 0.0844, "step": 5926 }, { "epoch": 160.1891891891892, "grad_norm": 1.65625, "learning_rate": 5.230385981670232e-06, "loss": 0.2816, "step": 5927 }, { "epoch": 160.21621621621622, "grad_norm": 1.125, "learning_rate": 5.223549578116124e-06, "loss": 0.1452, "step": 5928 }, { "epoch": 160.24324324324326, "grad_norm": 0.84765625, "learning_rate": 5.216717124019405e-06, "loss": 0.0775, "step": 5929 }, { "epoch": 160.27027027027026, "grad_norm": 1.03125, "learning_rate": 5.209888620744554e-06, "loss": 0.1112, "step": 5930 }, { "epoch": 160.2972972972973, "grad_norm": 1.0859375, "learning_rate": 5.203064069655267e-06, "loss": 0.1274, "step": 5931 }, { "epoch": 160.32432432432432, "grad_norm": 1.1328125, "learning_rate": 5.196243472114424e-06, "loss": 0.1368, "step": 5932 }, { "epoch": 160.35135135135135, "grad_norm": 0.92578125, "learning_rate": 5.18942682948414e-06, "loss": 0.0744, "step": 5933 }, { "epoch": 160.3783783783784, "grad_norm": 0.89453125, "learning_rate": 5.182614143125744e-06, "loss": 0.109, "step": 5934 }, { "epoch": 160.40540540540542, "grad_norm": 1.0625, "learning_rate": 5.175805414399748e-06, "loss": 0.1015, "step": 5935 }, { "epoch": 160.43243243243242, "grad_norm": 1.140625, "learning_rate": 5.169000644665894e-06, "loss": 0.1602, "step": 5936 }, { "epoch": 160.45945945945945, "grad_norm": 0.953125, "learning_rate": 5.162199835283141e-06, "loss": 0.1211, "step": 5937 }, { "epoch": 160.48648648648648, "grad_norm": 0.91015625, "learning_rate": 5.155402987609628e-06, "loss": 0.1178, "step": 5938 }, { "epoch": 160.51351351351352, "grad_norm": 0.9296875, "learning_rate": 5.148610103002735e-06, "loss": 0.1131, "step": 5939 }, { "epoch": 160.54054054054055, "grad_norm": 1.140625, "learning_rate": 5.14182118281902e-06, "loss": 0.1274, "step": 5940 }, { "epoch": 160.56756756756758, "grad_norm": 1.0703125, "learning_rate": 5.135036228414275e-06, "loss": 0.0927, "step": 5941 }, { "epoch": 160.59459459459458, "grad_norm": 1.0234375, "learning_rate": 5.128255241143493e-06, "loss": 0.1011, "step": 5942 }, { "epoch": 160.6216216216216, "grad_norm": 1.046875, "learning_rate": 5.121478222360859e-06, "loss": 0.119, "step": 5943 }, { "epoch": 160.64864864864865, "grad_norm": 0.67578125, "learning_rate": 5.114705173419787e-06, "loss": 0.0742, "step": 5944 }, { "epoch": 160.67567567567568, "grad_norm": 0.5390625, "learning_rate": 5.1079360956728915e-06, "loss": 0.0747, "step": 5945 }, { "epoch": 160.7027027027027, "grad_norm": 0.8046875, "learning_rate": 5.1011709904719816e-06, "loss": 0.1008, "step": 5946 }, { "epoch": 160.72972972972974, "grad_norm": 0.94140625, "learning_rate": 5.09440985916809e-06, "loss": 0.1102, "step": 5947 }, { "epoch": 160.75675675675674, "grad_norm": 1.1796875, "learning_rate": 5.087652703111453e-06, "loss": 0.1505, "step": 5948 }, { "epoch": 160.78378378378378, "grad_norm": 1.0234375, "learning_rate": 5.080899523651497e-06, "loss": 0.1233, "step": 5949 }, { "epoch": 160.8108108108108, "grad_norm": 1.265625, "learning_rate": 5.074150322136881e-06, "loss": 0.1202, "step": 5950 }, { "epoch": 160.83783783783784, "grad_norm": 1.0546875, "learning_rate": 5.06740509991544e-06, "loss": 0.1291, "step": 5951 }, { "epoch": 160.86486486486487, "grad_norm": 0.90625, "learning_rate": 5.060663858334241e-06, "loss": 0.0919, "step": 5952 }, { "epoch": 160.8918918918919, "grad_norm": 1.25, "learning_rate": 5.053926598739534e-06, "loss": 0.1833, "step": 5953 }, { "epoch": 160.9189189189189, "grad_norm": 0.76953125, "learning_rate": 5.047193322476792e-06, "loss": 0.0741, "step": 5954 }, { "epoch": 160.94594594594594, "grad_norm": 0.9296875, "learning_rate": 5.040464030890679e-06, "loss": 0.1031, "step": 5955 }, { "epoch": 160.97297297297297, "grad_norm": 0.6796875, "learning_rate": 5.033738725325069e-06, "loss": 0.0661, "step": 5956 }, { "epoch": 161.0, "grad_norm": 1.0390625, "learning_rate": 5.027017407123047e-06, "loss": 0.1105, "step": 5957 }, { "epoch": 161.02702702702703, "grad_norm": 0.7265625, "learning_rate": 5.0203000776268825e-06, "loss": 0.0756, "step": 5958 }, { "epoch": 161.05405405405406, "grad_norm": 1.1484375, "learning_rate": 5.013586738178075e-06, "loss": 0.1242, "step": 5959 }, { "epoch": 161.0810810810811, "grad_norm": 0.6640625, "learning_rate": 5.006877390117295e-06, "loss": 0.0832, "step": 5960 }, { "epoch": 161.1081081081081, "grad_norm": 0.953125, "learning_rate": 5.000172034784442e-06, "loss": 0.1142, "step": 5961 }, { "epoch": 161.13513513513513, "grad_norm": 1.046875, "learning_rate": 4.993470673518619e-06, "loss": 0.1013, "step": 5962 }, { "epoch": 161.16216216216216, "grad_norm": 1.0625, "learning_rate": 4.986773307658105e-06, "loss": 0.1289, "step": 5963 }, { "epoch": 161.1891891891892, "grad_norm": 1.484375, "learning_rate": 4.980079938540403e-06, "loss": 0.1624, "step": 5964 }, { "epoch": 161.21621621621622, "grad_norm": 1.3046875, "learning_rate": 4.973390567502223e-06, "loss": 0.0934, "step": 5965 }, { "epoch": 161.24324324324326, "grad_norm": 0.8359375, "learning_rate": 4.966705195879454e-06, "loss": 0.0956, "step": 5966 }, { "epoch": 161.27027027027026, "grad_norm": 0.9609375, "learning_rate": 4.960023825007198e-06, "loss": 0.1199, "step": 5967 }, { "epoch": 161.2972972972973, "grad_norm": 0.9765625, "learning_rate": 4.953346456219776e-06, "loss": 0.1105, "step": 5968 }, { "epoch": 161.32432432432432, "grad_norm": 1.2265625, "learning_rate": 4.94667309085067e-06, "loss": 0.1231, "step": 5969 }, { "epoch": 161.35135135135135, "grad_norm": 0.75, "learning_rate": 4.940003730232604e-06, "loss": 0.0936, "step": 5970 }, { "epoch": 161.3783783783784, "grad_norm": 0.5390625, "learning_rate": 4.933338375697464e-06, "loss": 0.0719, "step": 5971 }, { "epoch": 161.40540540540542, "grad_norm": 0.73828125, "learning_rate": 4.926677028576368e-06, "loss": 0.0782, "step": 5972 }, { "epoch": 161.43243243243242, "grad_norm": 0.62890625, "learning_rate": 4.920019690199623e-06, "loss": 0.0772, "step": 5973 }, { "epoch": 161.45945945945945, "grad_norm": 0.9609375, "learning_rate": 4.91336636189672e-06, "loss": 0.0995, "step": 5974 }, { "epoch": 161.48648648648648, "grad_norm": 0.54296875, "learning_rate": 4.906717044996375e-06, "loss": 0.0701, "step": 5975 }, { "epoch": 161.51351351351352, "grad_norm": 0.6171875, "learning_rate": 4.900071740826489e-06, "loss": 0.0697, "step": 5976 }, { "epoch": 161.54054054054055, "grad_norm": 0.94140625, "learning_rate": 4.8934304507141535e-06, "loss": 0.1119, "step": 5977 }, { "epoch": 161.56756756756758, "grad_norm": 0.80078125, "learning_rate": 4.886793175985677e-06, "loss": 0.0874, "step": 5978 }, { "epoch": 161.59459459459458, "grad_norm": 0.99609375, "learning_rate": 4.8801599179665566e-06, "loss": 0.1228, "step": 5979 }, { "epoch": 161.6216216216216, "grad_norm": 0.98828125, "learning_rate": 4.87353067798148e-06, "loss": 0.1205, "step": 5980 }, { "epoch": 161.64864864864865, "grad_norm": 0.578125, "learning_rate": 4.866905457354346e-06, "loss": 0.0565, "step": 5981 }, { "epoch": 161.67567567567568, "grad_norm": 1.234375, "learning_rate": 4.8602842574082485e-06, "loss": 0.1411, "step": 5982 }, { "epoch": 161.7027027027027, "grad_norm": 1.09375, "learning_rate": 4.853667079465465e-06, "loss": 0.1358, "step": 5983 }, { "epoch": 161.72972972972974, "grad_norm": 1.421875, "learning_rate": 4.8470539248474834e-06, "loss": 0.1807, "step": 5984 }, { "epoch": 161.75675675675674, "grad_norm": 1.0234375, "learning_rate": 4.8404447948749934e-06, "loss": 0.1018, "step": 5985 }, { "epoch": 161.78378378378378, "grad_norm": 1.0625, "learning_rate": 4.833839690867853e-06, "loss": 0.1185, "step": 5986 }, { "epoch": 161.8108108108108, "grad_norm": 1.0234375, "learning_rate": 4.827238614145155e-06, "loss": 0.1132, "step": 5987 }, { "epoch": 161.83783783783784, "grad_norm": 0.98046875, "learning_rate": 4.820641566025152e-06, "loss": 0.1191, "step": 5988 }, { "epoch": 161.86486486486487, "grad_norm": 0.875, "learning_rate": 4.81404854782532e-06, "loss": 0.0903, "step": 5989 }, { "epoch": 161.8918918918919, "grad_norm": 1.1171875, "learning_rate": 4.807459560862304e-06, "loss": 0.1598, "step": 5990 }, { "epoch": 161.9189189189189, "grad_norm": 0.7109375, "learning_rate": 4.800874606451969e-06, "loss": 0.0783, "step": 5991 }, { "epoch": 161.94594594594594, "grad_norm": 1.2265625, "learning_rate": 4.794293685909368e-06, "loss": 0.1465, "step": 5992 }, { "epoch": 161.97297297297297, "grad_norm": 0.74609375, "learning_rate": 4.787716800548731e-06, "loss": 0.0836, "step": 5993 }, { "epoch": 162.0, "grad_norm": 1.0546875, "learning_rate": 4.781143951683506e-06, "loss": 0.1652, "step": 5994 }, { "epoch": 162.02702702702703, "grad_norm": 1.1640625, "learning_rate": 4.7745751406263165e-06, "loss": 0.1503, "step": 5995 }, { "epoch": 162.05405405405406, "grad_norm": 0.8984375, "learning_rate": 4.768010368688996e-06, "loss": 0.1107, "step": 5996 }, { "epoch": 162.0810810810811, "grad_norm": 1.15625, "learning_rate": 4.761449637182552e-06, "loss": 0.1692, "step": 5997 }, { "epoch": 162.1081081081081, "grad_norm": 0.9453125, "learning_rate": 4.7548929474172e-06, "loss": 0.1082, "step": 5998 }, { "epoch": 162.13513513513513, "grad_norm": 0.79296875, "learning_rate": 4.748340300702353e-06, "loss": 0.0906, "step": 5999 }, { "epoch": 162.16216216216216, "grad_norm": 1.3359375, "learning_rate": 4.7417916983465926e-06, "loss": 0.1633, "step": 6000 }, { "epoch": 162.1891891891892, "grad_norm": 1.390625, "learning_rate": 4.735247141657725e-06, "loss": 0.1533, "step": 6001 }, { "epoch": 162.21621621621622, "grad_norm": 0.9765625, "learning_rate": 4.728706631942714e-06, "loss": 0.1321, "step": 6002 }, { "epoch": 162.24324324324326, "grad_norm": 0.89453125, "learning_rate": 4.722170170507739e-06, "loss": 0.0973, "step": 6003 }, { "epoch": 162.27027027027026, "grad_norm": 1.0078125, "learning_rate": 4.715637758658176e-06, "loss": 0.1181, "step": 6004 }, { "epoch": 162.2972972972973, "grad_norm": 0.8984375, "learning_rate": 4.709109397698564e-06, "loss": 0.1221, "step": 6005 }, { "epoch": 162.32432432432432, "grad_norm": 0.4765625, "learning_rate": 4.702585088932654e-06, "loss": 0.0616, "step": 6006 }, { "epoch": 162.35135135135135, "grad_norm": 1.140625, "learning_rate": 4.696064833663394e-06, "loss": 0.153, "step": 6007 }, { "epoch": 162.3783783783784, "grad_norm": 0.87890625, "learning_rate": 4.689548633192898e-06, "loss": 0.1029, "step": 6008 }, { "epoch": 162.40540540540542, "grad_norm": 1.1953125, "learning_rate": 4.6830364888224894e-06, "loss": 0.1096, "step": 6009 }, { "epoch": 162.43243243243242, "grad_norm": 0.96875, "learning_rate": 4.676528401852687e-06, "loss": 0.1144, "step": 6010 }, { "epoch": 162.45945945945945, "grad_norm": 0.77734375, "learning_rate": 4.670024373583171e-06, "loss": 0.1156, "step": 6011 }, { "epoch": 162.48648648648648, "grad_norm": 0.5234375, "learning_rate": 4.663524405312835e-06, "loss": 0.0696, "step": 6012 }, { "epoch": 162.51351351351352, "grad_norm": 1.2109375, "learning_rate": 4.657028498339766e-06, "loss": 0.1552, "step": 6013 }, { "epoch": 162.54054054054055, "grad_norm": 0.4765625, "learning_rate": 4.650536653961215e-06, "loss": 0.0613, "step": 6014 }, { "epoch": 162.56756756756758, "grad_norm": 0.9453125, "learning_rate": 4.644048873473647e-06, "loss": 0.0997, "step": 6015 }, { "epoch": 162.59459459459458, "grad_norm": 1.046875, "learning_rate": 4.637565158172697e-06, "loss": 0.117, "step": 6016 }, { "epoch": 162.6216216216216, "grad_norm": 0.50390625, "learning_rate": 4.631085509353198e-06, "loss": 0.0561, "step": 6017 }, { "epoch": 162.64864864864865, "grad_norm": 0.8203125, "learning_rate": 4.624609928309173e-06, "loss": 0.1222, "step": 6018 }, { "epoch": 162.67567567567568, "grad_norm": 0.5859375, "learning_rate": 4.61813841633382e-06, "loss": 0.0741, "step": 6019 }, { "epoch": 162.7027027027027, "grad_norm": 1.28125, "learning_rate": 4.6116709747195394e-06, "loss": 0.2103, "step": 6020 }, { "epoch": 162.72972972972974, "grad_norm": 1.0546875, "learning_rate": 4.605207604757916e-06, "loss": 0.1221, "step": 6021 }, { "epoch": 162.75675675675674, "grad_norm": 0.86328125, "learning_rate": 4.598748307739706e-06, "loss": 0.0998, "step": 6022 }, { "epoch": 162.78378378378378, "grad_norm": 0.76953125, "learning_rate": 4.592293084954869e-06, "loss": 0.0895, "step": 6023 }, { "epoch": 162.8108108108108, "grad_norm": 1.703125, "learning_rate": 4.585841937692553e-06, "loss": 0.2333, "step": 6024 }, { "epoch": 162.83783783783784, "grad_norm": 0.890625, "learning_rate": 4.579394867241071e-06, "loss": 0.0982, "step": 6025 }, { "epoch": 162.86486486486487, "grad_norm": 1.125, "learning_rate": 4.572951874887943e-06, "loss": 0.1412, "step": 6026 }, { "epoch": 162.8918918918919, "grad_norm": 0.79296875, "learning_rate": 4.5665129619198745e-06, "loss": 0.0846, "step": 6027 }, { "epoch": 162.9189189189189, "grad_norm": 1.0625, "learning_rate": 4.560078129622736e-06, "loss": 0.1358, "step": 6028 }, { "epoch": 162.94594594594594, "grad_norm": 0.87109375, "learning_rate": 4.553647379281606e-06, "loss": 0.1319, "step": 6029 }, { "epoch": 162.97297297297297, "grad_norm": 0.609375, "learning_rate": 4.547220712180731e-06, "loss": 0.0734, "step": 6030 }, { "epoch": 163.0, "grad_norm": 0.8125, "learning_rate": 4.540798129603554e-06, "loss": 0.0822, "step": 6031 }, { "epoch": 163.02702702702703, "grad_norm": 1.4921875, "learning_rate": 4.534379632832692e-06, "loss": 0.1973, "step": 6032 }, { "epoch": 163.05405405405406, "grad_norm": 1.0546875, "learning_rate": 4.527965223149957e-06, "loss": 0.1665, "step": 6033 }, { "epoch": 163.0810810810811, "grad_norm": 1.3125, "learning_rate": 4.521554901836333e-06, "loss": 0.1447, "step": 6034 }, { "epoch": 163.1081081081081, "grad_norm": 1.171875, "learning_rate": 4.515148670172001e-06, "loss": 0.1412, "step": 6035 }, { "epoch": 163.13513513513513, "grad_norm": 1.1171875, "learning_rate": 4.508746529436311e-06, "loss": 0.102, "step": 6036 }, { "epoch": 163.16216216216216, "grad_norm": 0.50390625, "learning_rate": 4.502348480907803e-06, "loss": 0.0636, "step": 6037 }, { "epoch": 163.1891891891892, "grad_norm": 0.8125, "learning_rate": 4.49595452586421e-06, "loss": 0.0901, "step": 6038 }, { "epoch": 163.21621621621622, "grad_norm": 0.92578125, "learning_rate": 4.489564665582424e-06, "loss": 0.1187, "step": 6039 }, { "epoch": 163.24324324324326, "grad_norm": 1.2109375, "learning_rate": 4.483178901338539e-06, "loss": 0.1841, "step": 6040 }, { "epoch": 163.27027027027026, "grad_norm": 0.384765625, "learning_rate": 4.476797234407831e-06, "loss": 0.0563, "step": 6041 }, { "epoch": 163.2972972972973, "grad_norm": 0.75390625, "learning_rate": 4.4704196660647374e-06, "loss": 0.087, "step": 6042 }, { "epoch": 163.32432432432432, "grad_norm": 0.83984375, "learning_rate": 4.4640461975828964e-06, "loss": 0.1043, "step": 6043 }, { "epoch": 163.35135135135135, "grad_norm": 0.6953125, "learning_rate": 4.45767683023513e-06, "loss": 0.0752, "step": 6044 }, { "epoch": 163.3783783783784, "grad_norm": 1.390625, "learning_rate": 4.45131156529342e-06, "loss": 0.1457, "step": 6045 }, { "epoch": 163.40540540540542, "grad_norm": 1.0703125, "learning_rate": 4.444950404028955e-06, "loss": 0.0856, "step": 6046 }, { "epoch": 163.43243243243242, "grad_norm": 1.59375, "learning_rate": 4.438593347712081e-06, "loss": 0.1516, "step": 6047 }, { "epoch": 163.45945945945945, "grad_norm": 1.296875, "learning_rate": 4.432240397612333e-06, "loss": 0.1862, "step": 6048 }, { "epoch": 163.48648648648648, "grad_norm": 1.1640625, "learning_rate": 4.425891554998443e-06, "loss": 0.142, "step": 6049 }, { "epoch": 163.51351351351352, "grad_norm": 1.25, "learning_rate": 4.4195468211382876e-06, "loss": 0.1485, "step": 6050 }, { "epoch": 163.54054054054055, "grad_norm": 0.375, "learning_rate": 4.4132061972989525e-06, "loss": 0.054, "step": 6051 }, { "epoch": 163.56756756756758, "grad_norm": 0.8046875, "learning_rate": 4.4068696847466975e-06, "loss": 0.1245, "step": 6052 }, { "epoch": 163.59459459459458, "grad_norm": 1.046875, "learning_rate": 4.400537284746941e-06, "loss": 0.1312, "step": 6053 }, { "epoch": 163.6216216216216, "grad_norm": 0.65625, "learning_rate": 4.394208998564306e-06, "loss": 0.0843, "step": 6054 }, { "epoch": 163.64864864864865, "grad_norm": 1.1171875, "learning_rate": 4.387884827462588e-06, "loss": 0.1317, "step": 6055 }, { "epoch": 163.67567567567568, "grad_norm": 0.4921875, "learning_rate": 4.3815647727047435e-06, "loss": 0.061, "step": 6056 }, { "epoch": 163.7027027027027, "grad_norm": 0.64453125, "learning_rate": 4.375248835552926e-06, "loss": 0.0762, "step": 6057 }, { "epoch": 163.72972972972974, "grad_norm": 0.8828125, "learning_rate": 4.368937017268465e-06, "loss": 0.0918, "step": 6058 }, { "epoch": 163.75675675675674, "grad_norm": 0.6875, "learning_rate": 4.362629319111852e-06, "loss": 0.0816, "step": 6059 }, { "epoch": 163.78378378378378, "grad_norm": 1.15625, "learning_rate": 4.35632574234277e-06, "loss": 0.1533, "step": 6060 }, { "epoch": 163.8108108108108, "grad_norm": 1.0625, "learning_rate": 4.350026288220082e-06, "loss": 0.1356, "step": 6061 }, { "epoch": 163.83783783783784, "grad_norm": 0.97265625, "learning_rate": 4.343730958001813e-06, "loss": 0.0996, "step": 6062 }, { "epoch": 163.86486486486487, "grad_norm": 0.80859375, "learning_rate": 4.33743975294518e-06, "loss": 0.085, "step": 6063 }, { "epoch": 163.8918918918919, "grad_norm": 1.2109375, "learning_rate": 4.3311526743065575e-06, "loss": 0.1778, "step": 6064 }, { "epoch": 163.9189189189189, "grad_norm": 0.69140625, "learning_rate": 4.324869723341513e-06, "loss": 0.0898, "step": 6065 }, { "epoch": 163.94594594594594, "grad_norm": 1.0703125, "learning_rate": 4.318590901304789e-06, "loss": 0.1249, "step": 6066 }, { "epoch": 163.97297297297297, "grad_norm": 0.8984375, "learning_rate": 4.312316209450287e-06, "loss": 0.0934, "step": 6067 }, { "epoch": 164.0, "grad_norm": 0.82421875, "learning_rate": 4.306045649031107e-06, "loss": 0.0879, "step": 6068 }, { "epoch": 164.02702702702703, "grad_norm": 0.53125, "learning_rate": 4.299779221299499e-06, "loss": 0.0677, "step": 6069 }, { "epoch": 164.05405405405406, "grad_norm": 0.92578125, "learning_rate": 4.293516927506913e-06, "loss": 0.1045, "step": 6070 }, { "epoch": 164.0810810810811, "grad_norm": 0.73046875, "learning_rate": 4.2872587689039484e-06, "loss": 0.0675, "step": 6071 }, { "epoch": 164.1081081081081, "grad_norm": 1.015625, "learning_rate": 4.281004746740405e-06, "loss": 0.1366, "step": 6072 }, { "epoch": 164.13513513513513, "grad_norm": 1.0078125, "learning_rate": 4.2747548622652264e-06, "loss": 0.1369, "step": 6073 }, { "epoch": 164.16216216216216, "grad_norm": 0.53125, "learning_rate": 4.268509116726555e-06, "loss": 0.0647, "step": 6074 }, { "epoch": 164.1891891891892, "grad_norm": 1.1484375, "learning_rate": 4.262267511371706e-06, "loss": 0.1141, "step": 6075 }, { "epoch": 164.21621621621622, "grad_norm": 0.5234375, "learning_rate": 4.2560300474471434e-06, "loss": 0.0675, "step": 6076 }, { "epoch": 164.24324324324326, "grad_norm": 0.9609375, "learning_rate": 4.249796726198535e-06, "loss": 0.1119, "step": 6077 }, { "epoch": 164.27027027027026, "grad_norm": 1.078125, "learning_rate": 4.243567548870695e-06, "loss": 0.1208, "step": 6078 }, { "epoch": 164.2972972972973, "grad_norm": 0.482421875, "learning_rate": 4.237342516707624e-06, "loss": 0.0644, "step": 6079 }, { "epoch": 164.32432432432432, "grad_norm": 0.69921875, "learning_rate": 4.231121630952503e-06, "loss": 0.0872, "step": 6080 }, { "epoch": 164.35135135135135, "grad_norm": 0.96875, "learning_rate": 4.2249048928476584e-06, "loss": 0.1043, "step": 6081 }, { "epoch": 164.3783783783784, "grad_norm": 0.85546875, "learning_rate": 4.218692303634614e-06, "loss": 0.0874, "step": 6082 }, { "epoch": 164.40540540540542, "grad_norm": 1.25, "learning_rate": 4.212483864554059e-06, "loss": 0.1431, "step": 6083 }, { "epoch": 164.43243243243242, "grad_norm": 0.9375, "learning_rate": 4.20627957684584e-06, "loss": 0.1116, "step": 6084 }, { "epoch": 164.45945945945945, "grad_norm": 0.65234375, "learning_rate": 4.200079441748989e-06, "loss": 0.0743, "step": 6085 }, { "epoch": 164.48648648648648, "grad_norm": 0.89453125, "learning_rate": 4.193883460501713e-06, "loss": 0.1476, "step": 6086 }, { "epoch": 164.51351351351352, "grad_norm": 0.82421875, "learning_rate": 4.187691634341365e-06, "loss": 0.0967, "step": 6087 }, { "epoch": 164.54054054054055, "grad_norm": 0.890625, "learning_rate": 4.18150396450449e-06, "loss": 0.0841, "step": 6088 }, { "epoch": 164.56756756756758, "grad_norm": 1.328125, "learning_rate": 4.175320452226808e-06, "loss": 0.1748, "step": 6089 }, { "epoch": 164.59459459459458, "grad_norm": 0.99609375, "learning_rate": 4.169141098743182e-06, "loss": 0.1225, "step": 6090 }, { "epoch": 164.6216216216216, "grad_norm": 0.9921875, "learning_rate": 4.162965905287672e-06, "loss": 0.0998, "step": 6091 }, { "epoch": 164.64864864864865, "grad_norm": 0.48828125, "learning_rate": 4.156794873093486e-06, "loss": 0.0691, "step": 6092 }, { "epoch": 164.67567567567568, "grad_norm": 0.78515625, "learning_rate": 4.1506280033930146e-06, "loss": 0.0925, "step": 6093 }, { "epoch": 164.7027027027027, "grad_norm": 0.828125, "learning_rate": 4.144465297417818e-06, "loss": 0.0931, "step": 6094 }, { "epoch": 164.72972972972974, "grad_norm": 1.03125, "learning_rate": 4.1383067563986105e-06, "loss": 0.1462, "step": 6095 }, { "epoch": 164.75675675675674, "grad_norm": 1.171875, "learning_rate": 4.132152381565288e-06, "loss": 0.1664, "step": 6096 }, { "epoch": 164.78378378378378, "grad_norm": 1.0390625, "learning_rate": 4.126002174146915e-06, "loss": 0.1403, "step": 6097 }, { "epoch": 164.8108108108108, "grad_norm": 1.1015625, "learning_rate": 4.119856135371711e-06, "loss": 0.1594, "step": 6098 }, { "epoch": 164.83783783783784, "grad_norm": 1.03125, "learning_rate": 4.1137142664670726e-06, "loss": 0.1163, "step": 6099 }, { "epoch": 164.86486486486487, "grad_norm": 0.78515625, "learning_rate": 4.107576568659571e-06, "loss": 0.0832, "step": 6100 }, { "epoch": 164.8918918918919, "grad_norm": 1.109375, "learning_rate": 4.101443043174924e-06, "loss": 0.1474, "step": 6101 }, { "epoch": 164.9189189189189, "grad_norm": 1.546875, "learning_rate": 4.095313691238034e-06, "loss": 0.2532, "step": 6102 }, { "epoch": 164.94594594594594, "grad_norm": 1.1171875, "learning_rate": 4.089188514072967e-06, "loss": 0.1357, "step": 6103 }, { "epoch": 164.97297297297297, "grad_norm": 0.6875, "learning_rate": 4.083067512902941e-06, "loss": 0.0869, "step": 6104 }, { "epoch": 165.0, "grad_norm": 0.6640625, "learning_rate": 4.076950688950365e-06, "loss": 0.0829, "step": 6105 }, { "epoch": 165.02702702702703, "grad_norm": 0.83203125, "learning_rate": 4.070838043436786e-06, "loss": 0.0933, "step": 6106 }, { "epoch": 165.05405405405406, "grad_norm": 1.515625, "learning_rate": 4.06472957758294e-06, "loss": 0.1414, "step": 6107 }, { "epoch": 165.0810810810811, "grad_norm": 0.96875, "learning_rate": 4.058625292608711e-06, "loss": 0.1055, "step": 6108 }, { "epoch": 165.1081081081081, "grad_norm": 1.0078125, "learning_rate": 4.052525189733167e-06, "loss": 0.1131, "step": 6109 }, { "epoch": 165.13513513513513, "grad_norm": 0.84375, "learning_rate": 4.046429270174512e-06, "loss": 0.0739, "step": 6110 }, { "epoch": 165.16216216216216, "grad_norm": 0.9375, "learning_rate": 4.040337535150151e-06, "loss": 0.0879, "step": 6111 }, { "epoch": 165.1891891891892, "grad_norm": 1.2890625, "learning_rate": 4.034249985876618e-06, "loss": 0.1297, "step": 6112 }, { "epoch": 165.21621621621622, "grad_norm": 1.515625, "learning_rate": 4.028166623569638e-06, "loss": 0.198, "step": 6113 }, { "epoch": 165.24324324324326, "grad_norm": 1.0859375, "learning_rate": 4.02208744944409e-06, "loss": 0.1154, "step": 6114 }, { "epoch": 165.27027027027026, "grad_norm": 0.78125, "learning_rate": 4.016012464714006e-06, "loss": 0.0917, "step": 6115 }, { "epoch": 165.2972972972973, "grad_norm": 0.80859375, "learning_rate": 4.009941670592595e-06, "loss": 0.1046, "step": 6116 }, { "epoch": 165.32432432432432, "grad_norm": 1.0390625, "learning_rate": 4.003875068292237e-06, "loss": 0.1282, "step": 6117 }, { "epoch": 165.35135135135135, "grad_norm": 0.67578125, "learning_rate": 3.997812659024447e-06, "loss": 0.0645, "step": 6118 }, { "epoch": 165.3783783783784, "grad_norm": 1.0546875, "learning_rate": 3.991754443999926e-06, "loss": 0.1266, "step": 6119 }, { "epoch": 165.40540540540542, "grad_norm": 0.4921875, "learning_rate": 3.985700424428537e-06, "loss": 0.0634, "step": 6120 }, { "epoch": 165.43243243243242, "grad_norm": 1.0859375, "learning_rate": 3.979650601519283e-06, "loss": 0.1353, "step": 6121 }, { "epoch": 165.45945945945945, "grad_norm": 1.53125, "learning_rate": 3.973604976480361e-06, "loss": 0.2804, "step": 6122 }, { "epoch": 165.48648648648648, "grad_norm": 0.81640625, "learning_rate": 3.967563550519099e-06, "loss": 0.0918, "step": 6123 }, { "epoch": 165.51351351351352, "grad_norm": 0.91796875, "learning_rate": 3.961526324842004e-06, "loss": 0.1032, "step": 6124 }, { "epoch": 165.54054054054055, "grad_norm": 0.78125, "learning_rate": 3.95549330065475e-06, "loss": 0.0975, "step": 6125 }, { "epoch": 165.56756756756758, "grad_norm": 1.1953125, "learning_rate": 3.949464479162149e-06, "loss": 0.1833, "step": 6126 }, { "epoch": 165.59459459459458, "grad_norm": 0.80859375, "learning_rate": 3.943439861568193e-06, "loss": 0.083, "step": 6127 }, { "epoch": 165.6216216216216, "grad_norm": 1.234375, "learning_rate": 3.937419449076035e-06, "loss": 0.1173, "step": 6128 }, { "epoch": 165.64864864864865, "grad_norm": 0.89453125, "learning_rate": 3.931403242887971e-06, "loss": 0.1285, "step": 6129 }, { "epoch": 165.67567567567568, "grad_norm": 1.6015625, "learning_rate": 3.9253912442054704e-06, "loss": 0.1579, "step": 6130 }, { "epoch": 165.7027027027027, "grad_norm": 1.375, "learning_rate": 3.9193834542291694e-06, "loss": 0.1814, "step": 6131 }, { "epoch": 165.72972972972974, "grad_norm": 0.90625, "learning_rate": 3.9133798741588425e-06, "loss": 0.1055, "step": 6132 }, { "epoch": 165.75675675675674, "grad_norm": 0.93359375, "learning_rate": 3.907380505193439e-06, "loss": 0.1103, "step": 6133 }, { "epoch": 165.78378378378378, "grad_norm": 0.9921875, "learning_rate": 3.90138534853107e-06, "loss": 0.1306, "step": 6134 }, { "epoch": 165.8108108108108, "grad_norm": 0.59375, "learning_rate": 3.895394405368985e-06, "loss": 0.0646, "step": 6135 }, { "epoch": 165.83783783783784, "grad_norm": 0.890625, "learning_rate": 3.88940767690362e-06, "loss": 0.0914, "step": 6136 }, { "epoch": 165.86486486486487, "grad_norm": 1.171875, "learning_rate": 3.883425164330542e-06, "loss": 0.1311, "step": 6137 }, { "epoch": 165.8918918918919, "grad_norm": 0.94921875, "learning_rate": 3.877446868844495e-06, "loss": 0.1097, "step": 6138 }, { "epoch": 165.9189189189189, "grad_norm": 0.953125, "learning_rate": 3.871472791639383e-06, "loss": 0.0957, "step": 6139 }, { "epoch": 165.94594594594594, "grad_norm": 0.875, "learning_rate": 3.865502933908244e-06, "loss": 0.1176, "step": 6140 }, { "epoch": 165.97297297297297, "grad_norm": 1.375, "learning_rate": 3.8595372968432965e-06, "loss": 0.2182, "step": 6141 }, { "epoch": 166.0, "grad_norm": 0.6953125, "learning_rate": 3.853575881635915e-06, "loss": 0.0836, "step": 6142 }, { "epoch": 166.02702702702703, "grad_norm": 0.99609375, "learning_rate": 3.847618689476612e-06, "loss": 0.12, "step": 6143 }, { "epoch": 166.05405405405406, "grad_norm": 0.99609375, "learning_rate": 3.841665721555082e-06, "loss": 0.1146, "step": 6144 }, { "epoch": 166.0810810810811, "grad_norm": 1.0390625, "learning_rate": 3.835716979060147e-06, "loss": 0.125, "step": 6145 }, { "epoch": 166.1081081081081, "grad_norm": 0.8828125, "learning_rate": 3.829772463179817e-06, "loss": 0.0867, "step": 6146 }, { "epoch": 166.13513513513513, "grad_norm": 0.6640625, "learning_rate": 3.82383217510123e-06, "loss": 0.0747, "step": 6147 }, { "epoch": 166.16216216216216, "grad_norm": 0.70703125, "learning_rate": 3.8178961160107025e-06, "loss": 0.0879, "step": 6148 }, { "epoch": 166.1891891891892, "grad_norm": 1.0078125, "learning_rate": 3.8119642870936804e-06, "loss": 0.097, "step": 6149 }, { "epoch": 166.21621621621622, "grad_norm": 0.87890625, "learning_rate": 3.806036689534792e-06, "loss": 0.1103, "step": 6150 }, { "epoch": 166.24324324324326, "grad_norm": 1.2109375, "learning_rate": 3.8001133245178093e-06, "loss": 0.1385, "step": 6151 }, { "epoch": 166.27027027027026, "grad_norm": 0.68359375, "learning_rate": 3.7941941932256505e-06, "loss": 0.0828, "step": 6152 }, { "epoch": 166.2972972972973, "grad_norm": 1.15625, "learning_rate": 3.788279296840405e-06, "loss": 0.1255, "step": 6153 }, { "epoch": 166.32432432432432, "grad_norm": 0.75390625, "learning_rate": 3.7823686365432985e-06, "loss": 0.083, "step": 6154 }, { "epoch": 166.35135135135135, "grad_norm": 0.87109375, "learning_rate": 3.7764622135147227e-06, "loss": 0.0978, "step": 6155 }, { "epoch": 166.3783783783784, "grad_norm": 0.83984375, "learning_rate": 3.770560028934228e-06, "loss": 0.0943, "step": 6156 }, { "epoch": 166.40540540540542, "grad_norm": 1.453125, "learning_rate": 3.764662083980497e-06, "loss": 0.2342, "step": 6157 }, { "epoch": 166.43243243243242, "grad_norm": 0.53515625, "learning_rate": 3.758768379831387e-06, "loss": 0.0631, "step": 6158 }, { "epoch": 166.45945945945945, "grad_norm": 1.3203125, "learning_rate": 3.752878917663907e-06, "loss": 0.1542, "step": 6159 }, { "epoch": 166.48648648648648, "grad_norm": 0.9453125, "learning_rate": 3.7469936986541996e-06, "loss": 0.127, "step": 6160 }, { "epoch": 166.51351351351352, "grad_norm": 0.796875, "learning_rate": 3.7411127239775774e-06, "loss": 0.084, "step": 6161 }, { "epoch": 166.54054054054055, "grad_norm": 0.58203125, "learning_rate": 3.7352359948085088e-06, "loss": 0.0739, "step": 6162 }, { "epoch": 166.56756756756758, "grad_norm": 0.92578125, "learning_rate": 3.729363512320594e-06, "loss": 0.1166, "step": 6163 }, { "epoch": 166.59459459459458, "grad_norm": 1.1796875, "learning_rate": 3.7234952776866033e-06, "loss": 0.16, "step": 6164 }, { "epoch": 166.6216216216216, "grad_norm": 0.83984375, "learning_rate": 3.7176312920784604e-06, "loss": 0.094, "step": 6165 }, { "epoch": 166.64864864864865, "grad_norm": 1.0234375, "learning_rate": 3.711771556667218e-06, "loss": 0.1243, "step": 6166 }, { "epoch": 166.67567567567568, "grad_norm": 0.7421875, "learning_rate": 3.7059160726231086e-06, "loss": 0.0925, "step": 6167 }, { "epoch": 166.7027027027027, "grad_norm": 0.8046875, "learning_rate": 3.7000648411154885e-06, "loss": 0.0939, "step": 6168 }, { "epoch": 166.72972972972974, "grad_norm": 1.34375, "learning_rate": 3.694217863312888e-06, "loss": 0.209, "step": 6169 }, { "epoch": 166.75675675675674, "grad_norm": 1.0625, "learning_rate": 3.6883751403829797e-06, "loss": 0.1266, "step": 6170 }, { "epoch": 166.78378378378378, "grad_norm": 0.62109375, "learning_rate": 3.6825366734925787e-06, "loss": 0.0785, "step": 6171 }, { "epoch": 166.8108108108108, "grad_norm": 0.95703125, "learning_rate": 3.676702463807655e-06, "loss": 0.1283, "step": 6172 }, { "epoch": 166.83783783783784, "grad_norm": 0.9453125, "learning_rate": 3.6708725124933426e-06, "loss": 0.1385, "step": 6173 }, { "epoch": 166.86486486486487, "grad_norm": 1.140625, "learning_rate": 3.665046820713897e-06, "loss": 0.1421, "step": 6174 }, { "epoch": 166.8918918918919, "grad_norm": 0.70703125, "learning_rate": 3.6592253896327422e-06, "loss": 0.0853, "step": 6175 }, { "epoch": 166.9189189189189, "grad_norm": 1.4453125, "learning_rate": 3.6534082204124574e-06, "loss": 0.1856, "step": 6176 }, { "epoch": 166.94594594594594, "grad_norm": 1.0078125, "learning_rate": 3.6475953142147497e-06, "loss": 0.1598, "step": 6177 }, { "epoch": 166.97297297297297, "grad_norm": 0.65625, "learning_rate": 3.6417866722004867e-06, "loss": 0.082, "step": 6178 }, { "epoch": 167.0, "grad_norm": 0.51171875, "learning_rate": 3.635982295529694e-06, "loss": 0.0699, "step": 6179 }, { "epoch": 167.02702702702703, "grad_norm": 0.90234375, "learning_rate": 3.630182185361522e-06, "loss": 0.108, "step": 6180 }, { "epoch": 167.05405405405406, "grad_norm": 1.0234375, "learning_rate": 3.624386342854294e-06, "loss": 0.1149, "step": 6181 }, { "epoch": 167.0810810810811, "grad_norm": 0.60546875, "learning_rate": 3.6185947691654594e-06, "loss": 0.0703, "step": 6182 }, { "epoch": 167.1081081081081, "grad_norm": 0.9375, "learning_rate": 3.6128074654516314e-06, "loss": 0.1254, "step": 6183 }, { "epoch": 167.13513513513513, "grad_norm": 0.87890625, "learning_rate": 3.6070244328685553e-06, "loss": 0.1029, "step": 6184 }, { "epoch": 167.16216216216216, "grad_norm": 0.5703125, "learning_rate": 3.6012456725711437e-06, "loss": 0.0732, "step": 6185 }, { "epoch": 167.1891891891892, "grad_norm": 1.0859375, "learning_rate": 3.595471185713431e-06, "loss": 0.1089, "step": 6186 }, { "epoch": 167.21621621621622, "grad_norm": 0.80078125, "learning_rate": 3.58970097344862e-06, "loss": 0.0931, "step": 6187 }, { "epoch": 167.24324324324326, "grad_norm": 1.0546875, "learning_rate": 3.5839350369290564e-06, "loss": 0.1513, "step": 6188 }, { "epoch": 167.27027027027026, "grad_norm": 1.1953125, "learning_rate": 3.5781733773062128e-06, "loss": 0.1149, "step": 6189 }, { "epoch": 167.2972972972973, "grad_norm": 0.9296875, "learning_rate": 3.572415995730738e-06, "loss": 0.1098, "step": 6190 }, { "epoch": 167.32432432432432, "grad_norm": 1.3828125, "learning_rate": 3.566662893352396e-06, "loss": 0.1432, "step": 6191 }, { "epoch": 167.35135135135135, "grad_norm": 0.87109375, "learning_rate": 3.560914071320115e-06, "loss": 0.0923, "step": 6192 }, { "epoch": 167.3783783783784, "grad_norm": 1.4140625, "learning_rate": 3.555169530781971e-06, "loss": 0.182, "step": 6193 }, { "epoch": 167.40540540540542, "grad_norm": 1.484375, "learning_rate": 3.5494292728851657e-06, "loss": 0.1694, "step": 6194 }, { "epoch": 167.43243243243242, "grad_norm": 0.984375, "learning_rate": 3.5436932987760657e-06, "loss": 0.1075, "step": 6195 }, { "epoch": 167.45945945945945, "grad_norm": 1.1015625, "learning_rate": 3.5379616096001794e-06, "loss": 0.1408, "step": 6196 }, { "epoch": 167.48648648648648, "grad_norm": 0.7265625, "learning_rate": 3.5322342065021415e-06, "loss": 0.0896, "step": 6197 }, { "epoch": 167.51351351351352, "grad_norm": 1.34375, "learning_rate": 3.526511090625753e-06, "loss": 0.1721, "step": 6198 }, { "epoch": 167.54054054054055, "grad_norm": 1.0, "learning_rate": 3.5207922631139397e-06, "loss": 0.1039, "step": 6199 }, { "epoch": 167.56756756756758, "grad_norm": 1.1953125, "learning_rate": 3.5150777251087895e-06, "loss": 0.1804, "step": 6200 }, { "epoch": 167.59459459459458, "grad_norm": 1.4765625, "learning_rate": 3.509367477751524e-06, "loss": 0.2024, "step": 6201 }, { "epoch": 167.6216216216216, "grad_norm": 0.90625, "learning_rate": 3.5036615221824996e-06, "loss": 0.1099, "step": 6202 }, { "epoch": 167.64864864864865, "grad_norm": 0.890625, "learning_rate": 3.4979598595412333e-06, "loss": 0.1086, "step": 6203 }, { "epoch": 167.67567567567568, "grad_norm": 0.9375, "learning_rate": 3.4922624909663774e-06, "loss": 0.1155, "step": 6204 }, { "epoch": 167.7027027027027, "grad_norm": 1.0078125, "learning_rate": 3.486569417595717e-06, "loss": 0.1639, "step": 6205 }, { "epoch": 167.72972972972974, "grad_norm": 0.953125, "learning_rate": 3.480880640566192e-06, "loss": 0.0949, "step": 6206 }, { "epoch": 167.75675675675674, "grad_norm": 0.640625, "learning_rate": 3.4751961610138856e-06, "loss": 0.0675, "step": 6207 }, { "epoch": 167.78378378378378, "grad_norm": 0.890625, "learning_rate": 3.469515980074006e-06, "loss": 0.0826, "step": 6208 }, { "epoch": 167.8108108108108, "grad_norm": 1.21875, "learning_rate": 3.4638400988809187e-06, "loss": 0.1185, "step": 6209 }, { "epoch": 167.83783783783784, "grad_norm": 1.0546875, "learning_rate": 3.4581685185681335e-06, "loss": 0.1153, "step": 6210 }, { "epoch": 167.86486486486487, "grad_norm": 1.25, "learning_rate": 3.4525012402682827e-06, "loss": 0.1498, "step": 6211 }, { "epoch": 167.8918918918919, "grad_norm": 1.2265625, "learning_rate": 3.4468382651131585e-06, "loss": 0.1433, "step": 6212 }, { "epoch": 167.9189189189189, "grad_norm": 1.0859375, "learning_rate": 3.4411795942336787e-06, "loss": 0.1222, "step": 6213 }, { "epoch": 167.94594594594594, "grad_norm": 1.21875, "learning_rate": 3.4355252287599114e-06, "loss": 0.1431, "step": 6214 }, { "epoch": 167.97297297297297, "grad_norm": 1.1015625, "learning_rate": 3.42987516982107e-06, "loss": 0.163, "step": 6215 }, { "epoch": 168.0, "grad_norm": 1.2109375, "learning_rate": 3.4242294185454854e-06, "loss": 0.1777, "step": 6216 }, { "epoch": 168.02702702702703, "grad_norm": 1.0546875, "learning_rate": 3.418587976060653e-06, "loss": 0.1565, "step": 6217 }, { "epoch": 168.05405405405406, "grad_norm": 1.2734375, "learning_rate": 3.412950843493196e-06, "loss": 0.1572, "step": 6218 }, { "epoch": 168.0810810810811, "grad_norm": 0.71484375, "learning_rate": 3.407318021968875e-06, "loss": 0.0765, "step": 6219 }, { "epoch": 168.1081081081081, "grad_norm": 1.03125, "learning_rate": 3.4016895126125963e-06, "loss": 0.1119, "step": 6220 }, { "epoch": 168.13513513513513, "grad_norm": 1.03125, "learning_rate": 3.3960653165483996e-06, "loss": 0.1309, "step": 6221 }, { "epoch": 168.16216216216216, "grad_norm": 1.0, "learning_rate": 3.390445434899464e-06, "loss": 0.1118, "step": 6222 }, { "epoch": 168.1891891891892, "grad_norm": 0.8046875, "learning_rate": 3.3848298687881143e-06, "loss": 0.101, "step": 6223 }, { "epoch": 168.21621621621622, "grad_norm": 0.60546875, "learning_rate": 3.3792186193358e-06, "loss": 0.0743, "step": 6224 }, { "epoch": 168.24324324324326, "grad_norm": 0.8359375, "learning_rate": 3.373611687663125e-06, "loss": 0.1275, "step": 6225 }, { "epoch": 168.27027027027026, "grad_norm": 0.72265625, "learning_rate": 3.3680090748898135e-06, "loss": 0.0851, "step": 6226 }, { "epoch": 168.2972972972973, "grad_norm": 0.75390625, "learning_rate": 3.3624107821347423e-06, "loss": 0.0927, "step": 6227 }, { "epoch": 168.32432432432432, "grad_norm": 0.75, "learning_rate": 3.3568168105159087e-06, "loss": 0.0882, "step": 6228 }, { "epoch": 168.35135135135135, "grad_norm": 0.76953125, "learning_rate": 3.351227161150472e-06, "loss": 0.1057, "step": 6229 }, { "epoch": 168.3783783783784, "grad_norm": 1.1328125, "learning_rate": 3.3456418351546975e-06, "loss": 0.1643, "step": 6230 }, { "epoch": 168.40540540540542, "grad_norm": 1.140625, "learning_rate": 3.3400608336440124e-06, "loss": 0.1514, "step": 6231 }, { "epoch": 168.43243243243242, "grad_norm": 0.73828125, "learning_rate": 3.3344841577329753e-06, "loss": 0.0993, "step": 6232 }, { "epoch": 168.45945945945945, "grad_norm": 0.96875, "learning_rate": 3.328911808535265e-06, "loss": 0.0967, "step": 6233 }, { "epoch": 168.48648648648648, "grad_norm": 0.80078125, "learning_rate": 3.323343787163716e-06, "loss": 0.0927, "step": 6234 }, { "epoch": 168.51351351351352, "grad_norm": 0.52734375, "learning_rate": 3.3177800947302913e-06, "loss": 0.0668, "step": 6235 }, { "epoch": 168.54054054054055, "grad_norm": 0.82421875, "learning_rate": 3.3122207323460804e-06, "loss": 0.0984, "step": 6236 }, { "epoch": 168.56756756756758, "grad_norm": 0.7890625, "learning_rate": 3.3066657011213227e-06, "loss": 0.0996, "step": 6237 }, { "epoch": 168.59459459459458, "grad_norm": 0.69921875, "learning_rate": 3.301115002165389e-06, "loss": 0.0816, "step": 6238 }, { "epoch": 168.6216216216216, "grad_norm": 0.69140625, "learning_rate": 3.295568636586771e-06, "loss": 0.0837, "step": 6239 }, { "epoch": 168.64864864864865, "grad_norm": 0.66015625, "learning_rate": 3.2900266054931126e-06, "loss": 0.0748, "step": 6240 }, { "epoch": 168.67567567567568, "grad_norm": 0.984375, "learning_rate": 3.284488909991193e-06, "loss": 0.1137, "step": 6241 }, { "epoch": 168.7027027027027, "grad_norm": 0.94921875, "learning_rate": 3.2789555511869047e-06, "loss": 0.1131, "step": 6242 }, { "epoch": 168.72972972972974, "grad_norm": 0.94140625, "learning_rate": 3.2734265301853e-06, "loss": 0.1072, "step": 6243 }, { "epoch": 168.75675675675674, "grad_norm": 0.796875, "learning_rate": 3.2679018480905377e-06, "loss": 0.0985, "step": 6244 }, { "epoch": 168.78378378378378, "grad_norm": 0.828125, "learning_rate": 3.2623815060059347e-06, "loss": 0.1012, "step": 6245 }, { "epoch": 168.8108108108108, "grad_norm": 1.234375, "learning_rate": 3.256865505033935e-06, "loss": 0.1615, "step": 6246 }, { "epoch": 168.83783783783784, "grad_norm": 0.8828125, "learning_rate": 3.2513538462761005e-06, "loss": 0.1404, "step": 6247 }, { "epoch": 168.86486486486487, "grad_norm": 0.64453125, "learning_rate": 3.2458465308331436e-06, "loss": 0.0796, "step": 6248 }, { "epoch": 168.8918918918919, "grad_norm": 0.94921875, "learning_rate": 3.2403435598049065e-06, "loss": 0.1084, "step": 6249 }, { "epoch": 168.9189189189189, "grad_norm": 0.5390625, "learning_rate": 3.234844934290354e-06, "loss": 0.0718, "step": 6250 }, { "epoch": 168.94594594594594, "grad_norm": 0.81640625, "learning_rate": 3.229350655387592e-06, "loss": 0.0856, "step": 6251 }, { "epoch": 168.97297297297297, "grad_norm": 0.78125, "learning_rate": 3.2238607241938613e-06, "loss": 0.0933, "step": 6252 }, { "epoch": 169.0, "grad_norm": 1.1171875, "learning_rate": 3.218375141805516e-06, "loss": 0.167, "step": 6253 }, { "epoch": 169.02702702702703, "grad_norm": 1.421875, "learning_rate": 3.2128939093180655e-06, "loss": 0.1982, "step": 6254 }, { "epoch": 169.05405405405406, "grad_norm": 1.5234375, "learning_rate": 3.20741702782614e-06, "loss": 0.1714, "step": 6255 }, { "epoch": 169.0810810810811, "grad_norm": 0.92578125, "learning_rate": 3.2019444984234954e-06, "loss": 0.092, "step": 6256 }, { "epoch": 169.1081081081081, "grad_norm": 1.5859375, "learning_rate": 3.1964763222030208e-06, "loss": 0.2024, "step": 6257 }, { "epoch": 169.13513513513513, "grad_norm": 0.8046875, "learning_rate": 3.1910125002567514e-06, "loss": 0.0894, "step": 6258 }, { "epoch": 169.16216216216216, "grad_norm": 0.56640625, "learning_rate": 3.1855530336758283e-06, "loss": 0.0786, "step": 6259 }, { "epoch": 169.1891891891892, "grad_norm": 1.1796875, "learning_rate": 3.18009792355054e-06, "loss": 0.1379, "step": 6260 }, { "epoch": 169.21621621621622, "grad_norm": 1.1953125, "learning_rate": 3.1746471709702964e-06, "loss": 0.1471, "step": 6261 }, { "epoch": 169.24324324324326, "grad_norm": 0.9296875, "learning_rate": 3.169200777023645e-06, "loss": 0.1044, "step": 6262 }, { "epoch": 169.27027027027026, "grad_norm": 1.2265625, "learning_rate": 3.1637587427982508e-06, "loss": 0.1562, "step": 6263 }, { "epoch": 169.2972972972973, "grad_norm": 1.015625, "learning_rate": 3.1583210693809234e-06, "loss": 0.1257, "step": 6264 }, { "epoch": 169.32432432432432, "grad_norm": 0.53125, "learning_rate": 3.1528877578575864e-06, "loss": 0.0685, "step": 6265 }, { "epoch": 169.35135135135135, "grad_norm": 0.6484375, "learning_rate": 3.147458809313311e-06, "loss": 0.0686, "step": 6266 }, { "epoch": 169.3783783783784, "grad_norm": 0.96484375, "learning_rate": 3.1420342248322715e-06, "loss": 0.1426, "step": 6267 }, { "epoch": 169.40540540540542, "grad_norm": 1.109375, "learning_rate": 3.136614005497793e-06, "loss": 0.126, "step": 6268 }, { "epoch": 169.43243243243242, "grad_norm": 1.0546875, "learning_rate": 3.1311981523923226e-06, "loss": 0.1536, "step": 6269 }, { "epoch": 169.45945945945945, "grad_norm": 1.3046875, "learning_rate": 3.125786666597427e-06, "loss": 0.1297, "step": 6270 }, { "epoch": 169.48648648648648, "grad_norm": 0.87890625, "learning_rate": 3.120379549193814e-06, "loss": 0.101, "step": 6271 }, { "epoch": 169.51351351351352, "grad_norm": 1.0546875, "learning_rate": 3.11497680126131e-06, "loss": 0.1246, "step": 6272 }, { "epoch": 169.54054054054055, "grad_norm": 1.1640625, "learning_rate": 3.109578423878867e-06, "loss": 0.1766, "step": 6273 }, { "epoch": 169.56756756756758, "grad_norm": 0.62890625, "learning_rate": 3.104184418124578e-06, "loss": 0.0781, "step": 6274 }, { "epoch": 169.59459459459458, "grad_norm": 0.9765625, "learning_rate": 3.09879478507564e-06, "loss": 0.1144, "step": 6275 }, { "epoch": 169.6216216216216, "grad_norm": 1.0, "learning_rate": 3.0934095258083994e-06, "loss": 0.1114, "step": 6276 }, { "epoch": 169.64864864864865, "grad_norm": 0.7734375, "learning_rate": 3.088028641398322e-06, "loss": 0.1038, "step": 6277 }, { "epoch": 169.67567567567568, "grad_norm": 0.98828125, "learning_rate": 3.0826521329199885e-06, "loss": 0.1149, "step": 6278 }, { "epoch": 169.7027027027027, "grad_norm": 0.61328125, "learning_rate": 3.0772800014471194e-06, "loss": 0.0766, "step": 6279 }, { "epoch": 169.72972972972974, "grad_norm": 0.9609375, "learning_rate": 3.0719122480525607e-06, "loss": 0.1321, "step": 6280 }, { "epoch": 169.75675675675674, "grad_norm": 1.328125, "learning_rate": 3.0665488738082726e-06, "loss": 0.1642, "step": 6281 }, { "epoch": 169.78378378378378, "grad_norm": 0.91796875, "learning_rate": 3.0611898797853506e-06, "loss": 0.0905, "step": 6282 }, { "epoch": 169.8108108108108, "grad_norm": 1.015625, "learning_rate": 3.0558352670540193e-06, "loss": 0.1145, "step": 6283 }, { "epoch": 169.83783783783784, "grad_norm": 0.91796875, "learning_rate": 3.0504850366836117e-06, "loss": 0.1062, "step": 6284 }, { "epoch": 169.86486486486487, "grad_norm": 1.015625, "learning_rate": 3.045139189742602e-06, "loss": 0.1107, "step": 6285 }, { "epoch": 169.8918918918919, "grad_norm": 0.98046875, "learning_rate": 3.039797727298585e-06, "loss": 0.1233, "step": 6286 }, { "epoch": 169.9189189189189, "grad_norm": 1.15625, "learning_rate": 3.034460650418272e-06, "loss": 0.1242, "step": 6287 }, { "epoch": 169.94594594594594, "grad_norm": 1.1953125, "learning_rate": 3.0291279601675106e-06, "loss": 0.121, "step": 6288 }, { "epoch": 169.97297297297297, "grad_norm": 0.5, "learning_rate": 3.0237996576112563e-06, "loss": 0.0681, "step": 6289 }, { "epoch": 170.0, "grad_norm": 1.0234375, "learning_rate": 3.0184757438136063e-06, "loss": 0.1132, "step": 6290 }, { "epoch": 170.02702702702703, "grad_norm": 1.15625, "learning_rate": 3.013156219837776e-06, "loss": 0.1524, "step": 6291 }, { "epoch": 170.05405405405406, "grad_norm": 1.375, "learning_rate": 3.007841086746091e-06, "loss": 0.187, "step": 6292 }, { "epoch": 170.0810810810811, "grad_norm": 1.0859375, "learning_rate": 3.002530345600016e-06, "loss": 0.1121, "step": 6293 }, { "epoch": 170.1081081081081, "grad_norm": 0.58203125, "learning_rate": 2.997223997460136e-06, "loss": 0.07, "step": 6294 }, { "epoch": 170.13513513513513, "grad_norm": 0.60546875, "learning_rate": 2.99192204338615e-06, "loss": 0.0788, "step": 6295 }, { "epoch": 170.16216216216216, "grad_norm": 0.62109375, "learning_rate": 2.9866244844368867e-06, "loss": 0.0728, "step": 6296 }, { "epoch": 170.1891891891892, "grad_norm": 1.296875, "learning_rate": 2.9813313216702997e-06, "loss": 0.1672, "step": 6297 }, { "epoch": 170.21621621621622, "grad_norm": 0.76171875, "learning_rate": 2.9760425561434545e-06, "loss": 0.0929, "step": 6298 }, { "epoch": 170.24324324324326, "grad_norm": 1.1015625, "learning_rate": 2.970758188912551e-06, "loss": 0.1408, "step": 6299 }, { "epoch": 170.27027027027026, "grad_norm": 1.1640625, "learning_rate": 2.965478221032894e-06, "loss": 0.1572, "step": 6300 }, { "epoch": 170.2972972972973, "grad_norm": 0.9140625, "learning_rate": 2.96020265355893e-06, "loss": 0.1056, "step": 6301 }, { "epoch": 170.32432432432432, "grad_norm": 0.85546875, "learning_rate": 2.9549314875442075e-06, "loss": 0.0745, "step": 6302 }, { "epoch": 170.35135135135135, "grad_norm": 0.765625, "learning_rate": 2.9496647240414176e-06, "loss": 0.0814, "step": 6303 }, { "epoch": 170.3783783783784, "grad_norm": 0.875, "learning_rate": 2.9444023641023443e-06, "loss": 0.1076, "step": 6304 }, { "epoch": 170.40540540540542, "grad_norm": 0.84375, "learning_rate": 2.939144408777922e-06, "loss": 0.0862, "step": 6305 }, { "epoch": 170.43243243243242, "grad_norm": 0.94921875, "learning_rate": 2.9338908591181797e-06, "loss": 0.1023, "step": 6306 }, { "epoch": 170.45945945945945, "grad_norm": 0.890625, "learning_rate": 2.9286417161722813e-06, "loss": 0.0896, "step": 6307 }, { "epoch": 170.48648648648648, "grad_norm": 0.69140625, "learning_rate": 2.923396980988513e-06, "loss": 0.0824, "step": 6308 }, { "epoch": 170.51351351351352, "grad_norm": 1.09375, "learning_rate": 2.918156654614268e-06, "loss": 0.1356, "step": 6309 }, { "epoch": 170.54054054054055, "grad_norm": 1.09375, "learning_rate": 2.9129207380960643e-06, "loss": 0.1038, "step": 6310 }, { "epoch": 170.56756756756758, "grad_norm": 0.9140625, "learning_rate": 2.9076892324795545e-06, "loss": 0.1022, "step": 6311 }, { "epoch": 170.59459459459458, "grad_norm": 0.51953125, "learning_rate": 2.902462138809481e-06, "loss": 0.0661, "step": 6312 }, { "epoch": 170.6216216216216, "grad_norm": 0.8125, "learning_rate": 2.8972394581297245e-06, "loss": 0.0971, "step": 6313 }, { "epoch": 170.64864864864865, "grad_norm": 0.67578125, "learning_rate": 2.8920211914832906e-06, "loss": 0.0824, "step": 6314 }, { "epoch": 170.67567567567568, "grad_norm": 0.8515625, "learning_rate": 2.8868073399122842e-06, "loss": 0.095, "step": 6315 }, { "epoch": 170.7027027027027, "grad_norm": 1.1328125, "learning_rate": 2.881597904457936e-06, "loss": 0.134, "step": 6316 }, { "epoch": 170.72972972972974, "grad_norm": 0.95703125, "learning_rate": 2.876392886160609e-06, "loss": 0.1158, "step": 6317 }, { "epoch": 170.75675675675674, "grad_norm": 1.046875, "learning_rate": 2.8711922860597593e-06, "loss": 0.1344, "step": 6318 }, { "epoch": 170.78378378378378, "grad_norm": 1.28125, "learning_rate": 2.8659961051939798e-06, "loss": 0.1822, "step": 6319 }, { "epoch": 170.8108108108108, "grad_norm": 1.40625, "learning_rate": 2.86080434460097e-06, "loss": 0.1941, "step": 6320 }, { "epoch": 170.83783783783784, "grad_norm": 0.74609375, "learning_rate": 2.8556170053175514e-06, "loss": 0.0917, "step": 6321 }, { "epoch": 170.86486486486487, "grad_norm": 0.9296875, "learning_rate": 2.8504340883796667e-06, "loss": 0.134, "step": 6322 }, { "epoch": 170.8918918918919, "grad_norm": 0.59765625, "learning_rate": 2.845255594822363e-06, "loss": 0.0725, "step": 6323 }, { "epoch": 170.9189189189189, "grad_norm": 0.93359375, "learning_rate": 2.8400815256798163e-06, "loss": 0.1072, "step": 6324 }, { "epoch": 170.94594594594594, "grad_norm": 0.8359375, "learning_rate": 2.8349118819853195e-06, "loss": 0.1068, "step": 6325 }, { "epoch": 170.97297297297297, "grad_norm": 0.51171875, "learning_rate": 2.8297466647712655e-06, "loss": 0.0641, "step": 6326 }, { "epoch": 171.0, "grad_norm": 0.458984375, "learning_rate": 2.82458587506918e-06, "loss": 0.0526, "step": 6327 }, { "epoch": 171.02702702702703, "grad_norm": 0.89453125, "learning_rate": 2.8194295139097048e-06, "loss": 0.1141, "step": 6328 }, { "epoch": 171.05405405405406, "grad_norm": 1.2890625, "learning_rate": 2.8142775823225803e-06, "loss": 0.1927, "step": 6329 }, { "epoch": 171.0810810810811, "grad_norm": 1.5546875, "learning_rate": 2.8091300813366804e-06, "loss": 0.2621, "step": 6330 }, { "epoch": 171.1081081081081, "grad_norm": 1.1015625, "learning_rate": 2.803987011979989e-06, "loss": 0.1183, "step": 6331 }, { "epoch": 171.13513513513513, "grad_norm": 1.0390625, "learning_rate": 2.798848375279592e-06, "loss": 0.1136, "step": 6332 }, { "epoch": 171.16216216216216, "grad_norm": 0.7265625, "learning_rate": 2.793714172261716e-06, "loss": 0.0868, "step": 6333 }, { "epoch": 171.1891891891892, "grad_norm": 1.1171875, "learning_rate": 2.7885844039516747e-06, "loss": 0.136, "step": 6334 }, { "epoch": 171.21621621621622, "grad_norm": 0.9140625, "learning_rate": 2.7834590713739146e-06, "loss": 0.1126, "step": 6335 }, { "epoch": 171.24324324324326, "grad_norm": 0.828125, "learning_rate": 2.778338175551995e-06, "loss": 0.0967, "step": 6336 }, { "epoch": 171.27027027027026, "grad_norm": 0.87109375, "learning_rate": 2.773221717508573e-06, "loss": 0.1042, "step": 6337 }, { "epoch": 171.2972972972973, "grad_norm": 1.234375, "learning_rate": 2.768109698265445e-06, "loss": 0.16, "step": 6338 }, { "epoch": 171.32432432432432, "grad_norm": 1.0625, "learning_rate": 2.763002118843494e-06, "loss": 0.1327, "step": 6339 }, { "epoch": 171.35135135135135, "grad_norm": 0.45703125, "learning_rate": 2.7578989802627407e-06, "loss": 0.0582, "step": 6340 }, { "epoch": 171.3783783783784, "grad_norm": 1.2421875, "learning_rate": 2.7528002835423006e-06, "loss": 0.1498, "step": 6341 }, { "epoch": 171.40540540540542, "grad_norm": 0.94921875, "learning_rate": 2.7477060297004142e-06, "loss": 0.0956, "step": 6342 }, { "epoch": 171.43243243243242, "grad_norm": 0.8359375, "learning_rate": 2.7426162197544208e-06, "loss": 0.118, "step": 6343 }, { "epoch": 171.45945945945945, "grad_norm": 1.1640625, "learning_rate": 2.7375308547207907e-06, "loss": 0.1019, "step": 6344 }, { "epoch": 171.48648648648648, "grad_norm": 0.75390625, "learning_rate": 2.7324499356150974e-06, "loss": 0.0904, "step": 6345 }, { "epoch": 171.51351351351352, "grad_norm": 0.9609375, "learning_rate": 2.7273734634520152e-06, "loss": 0.1131, "step": 6346 }, { "epoch": 171.54054054054055, "grad_norm": 0.96484375, "learning_rate": 2.7223014392453533e-06, "loss": 0.1064, "step": 6347 }, { "epoch": 171.56756756756758, "grad_norm": 0.953125, "learning_rate": 2.7172338640080176e-06, "loss": 0.1106, "step": 6348 }, { "epoch": 171.59459459459458, "grad_norm": 0.83984375, "learning_rate": 2.7121707387520234e-06, "loss": 0.0947, "step": 6349 }, { "epoch": 171.6216216216216, "grad_norm": 1.1640625, "learning_rate": 2.7071120644885123e-06, "loss": 0.159, "step": 6350 }, { "epoch": 171.64864864864865, "grad_norm": 0.6640625, "learning_rate": 2.7020578422277147e-06, "loss": 0.0816, "step": 6351 }, { "epoch": 171.67567567567568, "grad_norm": 0.6484375, "learning_rate": 2.6970080729789903e-06, "loss": 0.0781, "step": 6352 }, { "epoch": 171.7027027027027, "grad_norm": 1.0703125, "learning_rate": 2.69196275775081e-06, "loss": 0.1159, "step": 6353 }, { "epoch": 171.72972972972974, "grad_norm": 1.1796875, "learning_rate": 2.6869218975507397e-06, "loss": 0.1352, "step": 6354 }, { "epoch": 171.75675675675674, "grad_norm": 0.99609375, "learning_rate": 2.6818854933854664e-06, "loss": 0.1148, "step": 6355 }, { "epoch": 171.78378378378378, "grad_norm": 1.703125, "learning_rate": 2.676853546260791e-06, "loss": 0.1994, "step": 6356 }, { "epoch": 171.8108108108108, "grad_norm": 0.6640625, "learning_rate": 2.671826057181612e-06, "loss": 0.0746, "step": 6357 }, { "epoch": 171.83783783783784, "grad_norm": 0.67578125, "learning_rate": 2.6668030271519455e-06, "loss": 0.0769, "step": 6358 }, { "epoch": 171.86486486486487, "grad_norm": 0.67578125, "learning_rate": 2.6617844571749232e-06, "loss": 0.0725, "step": 6359 }, { "epoch": 171.8918918918919, "grad_norm": 1.0703125, "learning_rate": 2.6567703482527683e-06, "loss": 0.1405, "step": 6360 }, { "epoch": 171.9189189189189, "grad_norm": 0.6796875, "learning_rate": 2.651760701386832e-06, "loss": 0.0789, "step": 6361 }, { "epoch": 171.94594594594594, "grad_norm": 0.52734375, "learning_rate": 2.6467555175775648e-06, "loss": 0.0622, "step": 6362 }, { "epoch": 171.97297297297297, "grad_norm": 0.8984375, "learning_rate": 2.641754797824525e-06, "loss": 0.1092, "step": 6363 }, { "epoch": 172.0, "grad_norm": 0.58203125, "learning_rate": 2.636758543126383e-06, "loss": 0.0801, "step": 6364 }, { "epoch": 172.02702702702703, "grad_norm": 1.2734375, "learning_rate": 2.6317667544809134e-06, "loss": 0.1809, "step": 6365 }, { "epoch": 172.05405405405406, "grad_norm": 1.140625, "learning_rate": 2.626779432885004e-06, "loss": 0.1376, "step": 6366 }, { "epoch": 172.0810810810811, "grad_norm": 1.15625, "learning_rate": 2.621796579334654e-06, "loss": 0.1601, "step": 6367 }, { "epoch": 172.1081081081081, "grad_norm": 1.1640625, "learning_rate": 2.616818194824952e-06, "loss": 0.137, "step": 6368 }, { "epoch": 172.13513513513513, "grad_norm": 1.1953125, "learning_rate": 2.6118442803501145e-06, "loss": 0.1374, "step": 6369 }, { "epoch": 172.16216216216216, "grad_norm": 0.60546875, "learning_rate": 2.6068748369034586e-06, "loss": 0.077, "step": 6370 }, { "epoch": 172.1891891891892, "grad_norm": 0.9375, "learning_rate": 2.601909865477403e-06, "loss": 0.1107, "step": 6371 }, { "epoch": 172.21621621621622, "grad_norm": 0.83984375, "learning_rate": 2.596949367063478e-06, "loss": 0.0936, "step": 6372 }, { "epoch": 172.24324324324326, "grad_norm": 0.6640625, "learning_rate": 2.59199334265233e-06, "loss": 0.0823, "step": 6373 }, { "epoch": 172.27027027027026, "grad_norm": 1.15625, "learning_rate": 2.5870417932336856e-06, "loss": 0.1137, "step": 6374 }, { "epoch": 172.2972972972973, "grad_norm": 0.470703125, "learning_rate": 2.5820947197964103e-06, "loss": 0.0621, "step": 6375 }, { "epoch": 172.32432432432432, "grad_norm": 1.421875, "learning_rate": 2.577152123328444e-06, "loss": 0.189, "step": 6376 }, { "epoch": 172.35135135135135, "grad_norm": 1.1796875, "learning_rate": 2.5722140048168643e-06, "loss": 0.1573, "step": 6377 }, { "epoch": 172.3783783783784, "grad_norm": 0.55859375, "learning_rate": 2.567280365247826e-06, "loss": 0.0766, "step": 6378 }, { "epoch": 172.40540540540542, "grad_norm": 1.2109375, "learning_rate": 2.5623512056066113e-06, "loss": 0.1414, "step": 6379 }, { "epoch": 172.43243243243242, "grad_norm": 1.203125, "learning_rate": 2.5574265268775877e-06, "loss": 0.1977, "step": 6380 }, { "epoch": 172.45945945945945, "grad_norm": 1.28125, "learning_rate": 2.5525063300442465e-06, "loss": 0.2124, "step": 6381 }, { "epoch": 172.48648648648648, "grad_norm": 1.140625, "learning_rate": 2.5475906160891767e-06, "loss": 0.1372, "step": 6382 }, { "epoch": 172.51351351351352, "grad_norm": 1.328125, "learning_rate": 2.542679385994065e-06, "loss": 0.1473, "step": 6383 }, { "epoch": 172.54054054054055, "grad_norm": 1.078125, "learning_rate": 2.5377726407397163e-06, "loss": 0.1189, "step": 6384 }, { "epoch": 172.56756756756758, "grad_norm": 1.125, "learning_rate": 2.5328703813060266e-06, "loss": 0.1306, "step": 6385 }, { "epoch": 172.59459459459458, "grad_norm": 1.0, "learning_rate": 2.5279726086720024e-06, "loss": 0.1387, "step": 6386 }, { "epoch": 172.6216216216216, "grad_norm": 0.7265625, "learning_rate": 2.5230793238157606e-06, "loss": 0.0895, "step": 6387 }, { "epoch": 172.64864864864865, "grad_norm": 1.1484375, "learning_rate": 2.5181905277145036e-06, "loss": 0.1469, "step": 6388 }, { "epoch": 172.67567567567568, "grad_norm": 0.7734375, "learning_rate": 2.5133062213445554e-06, "loss": 0.099, "step": 6389 }, { "epoch": 172.7027027027027, "grad_norm": 1.1171875, "learning_rate": 2.508426405681344e-06, "loss": 0.1452, "step": 6390 }, { "epoch": 172.72972972972974, "grad_norm": 0.83984375, "learning_rate": 2.503551081699379e-06, "loss": 0.1302, "step": 6391 }, { "epoch": 172.75675675675674, "grad_norm": 1.140625, "learning_rate": 2.4986802503722926e-06, "loss": 0.1468, "step": 6392 }, { "epoch": 172.78378378378378, "grad_norm": 1.1015625, "learning_rate": 2.493813912672821e-06, "loss": 0.1347, "step": 6393 }, { "epoch": 172.8108108108108, "grad_norm": 0.8515625, "learning_rate": 2.488952069572789e-06, "loss": 0.0917, "step": 6394 }, { "epoch": 172.83783783783784, "grad_norm": 0.890625, "learning_rate": 2.484094722043137e-06, "loss": 0.1066, "step": 6395 }, { "epoch": 172.86486486486487, "grad_norm": 0.953125, "learning_rate": 2.479241871053892e-06, "loss": 0.0871, "step": 6396 }, { "epoch": 172.8918918918919, "grad_norm": 0.890625, "learning_rate": 2.474393517574203e-06, "loss": 0.1186, "step": 6397 }, { "epoch": 172.9189189189189, "grad_norm": 0.69140625, "learning_rate": 2.46954966257231e-06, "loss": 0.079, "step": 6398 }, { "epoch": 172.94594594594594, "grad_norm": 1.328125, "learning_rate": 2.464710307015547e-06, "loss": 0.195, "step": 6399 }, { "epoch": 172.97297297297297, "grad_norm": 1.1640625, "learning_rate": 2.4598754518703665e-06, "loss": 0.1046, "step": 6400 }, { "epoch": 173.0, "grad_norm": 0.953125, "learning_rate": 2.455045098102313e-06, "loss": 0.1091, "step": 6401 }, { "epoch": 173.02702702702703, "grad_norm": 0.875, "learning_rate": 2.4502192466760276e-06, "loss": 0.1149, "step": 6402 }, { "epoch": 173.05405405405406, "grad_norm": 0.88671875, "learning_rate": 2.4453978985552573e-06, "loss": 0.1042, "step": 6403 }, { "epoch": 173.0810810810811, "grad_norm": 1.296875, "learning_rate": 2.440581054702859e-06, "loss": 0.1538, "step": 6404 }, { "epoch": 173.1081081081081, "grad_norm": 1.078125, "learning_rate": 2.435768716080769e-06, "loss": 0.1442, "step": 6405 }, { "epoch": 173.13513513513513, "grad_norm": 1.0234375, "learning_rate": 2.430960883650041e-06, "loss": 0.1318, "step": 6406 }, { "epoch": 173.16216216216216, "grad_norm": 1.1015625, "learning_rate": 2.4261575583708286e-06, "loss": 0.1457, "step": 6407 }, { "epoch": 173.1891891891892, "grad_norm": 1.03125, "learning_rate": 2.42135874120237e-06, "loss": 0.1544, "step": 6408 }, { "epoch": 173.21621621621622, "grad_norm": 0.7578125, "learning_rate": 2.4165644331030245e-06, "loss": 0.0863, "step": 6409 }, { "epoch": 173.24324324324326, "grad_norm": 0.640625, "learning_rate": 2.4117746350302255e-06, "loss": 0.0772, "step": 6410 }, { "epoch": 173.27027027027026, "grad_norm": 1.1171875, "learning_rate": 2.40698934794053e-06, "loss": 0.133, "step": 6411 }, { "epoch": 173.2972972972973, "grad_norm": 1.0859375, "learning_rate": 2.4022085727895883e-06, "loss": 0.1169, "step": 6412 }, { "epoch": 173.32432432432432, "grad_norm": 1.078125, "learning_rate": 2.397432310532133e-06, "loss": 0.1026, "step": 6413 }, { "epoch": 173.35135135135135, "grad_norm": 0.77734375, "learning_rate": 2.3926605621220205e-06, "loss": 0.0871, "step": 6414 }, { "epoch": 173.3783783783784, "grad_norm": 1.0390625, "learning_rate": 2.387893328512181e-06, "loss": 0.1413, "step": 6415 }, { "epoch": 173.40540540540542, "grad_norm": 0.8828125, "learning_rate": 2.3831306106546603e-06, "loss": 0.0859, "step": 6416 }, { "epoch": 173.43243243243242, "grad_norm": 0.93359375, "learning_rate": 2.378372409500604e-06, "loss": 0.1051, "step": 6417 }, { "epoch": 173.45945945945945, "grad_norm": 0.609375, "learning_rate": 2.3736187260002376e-06, "loss": 0.0701, "step": 6418 }, { "epoch": 173.48648648648648, "grad_norm": 1.1875, "learning_rate": 2.3688695611029042e-06, "loss": 0.1677, "step": 6419 }, { "epoch": 173.51351351351352, "grad_norm": 0.80078125, "learning_rate": 2.3641249157570296e-06, "loss": 0.0734, "step": 6420 }, { "epoch": 173.54054054054055, "grad_norm": 0.6328125, "learning_rate": 2.3593847909101546e-06, "loss": 0.0792, "step": 6421 }, { "epoch": 173.56756756756758, "grad_norm": 1.296875, "learning_rate": 2.3546491875088894e-06, "loss": 0.1614, "step": 6422 }, { "epoch": 173.59459459459458, "grad_norm": 0.828125, "learning_rate": 2.349918106498972e-06, "loss": 0.0823, "step": 6423 }, { "epoch": 173.6216216216216, "grad_norm": 0.9375, "learning_rate": 2.345191548825218e-06, "loss": 0.1166, "step": 6424 }, { "epoch": 173.64864864864865, "grad_norm": 0.765625, "learning_rate": 2.3404695154315454e-06, "loss": 0.083, "step": 6425 }, { "epoch": 173.67567567567568, "grad_norm": 0.8984375, "learning_rate": 2.335752007260969e-06, "loss": 0.1347, "step": 6426 }, { "epoch": 173.7027027027027, "grad_norm": 1.1796875, "learning_rate": 2.3310390252555962e-06, "loss": 0.1255, "step": 6427 }, { "epoch": 173.72972972972974, "grad_norm": 1.34375, "learning_rate": 2.326330570356636e-06, "loss": 0.168, "step": 6428 }, { "epoch": 173.75675675675674, "grad_norm": 1.0234375, "learning_rate": 2.321626643504393e-06, "loss": 0.1002, "step": 6429 }, { "epoch": 173.78378378378378, "grad_norm": 1.0390625, "learning_rate": 2.316927245638256e-06, "loss": 0.105, "step": 6430 }, { "epoch": 173.8108108108108, "grad_norm": 0.54296875, "learning_rate": 2.3122323776967296e-06, "loss": 0.0679, "step": 6431 }, { "epoch": 173.83783783783784, "grad_norm": 0.875, "learning_rate": 2.3075420406173997e-06, "loss": 0.0974, "step": 6432 }, { "epoch": 173.86486486486487, "grad_norm": 0.953125, "learning_rate": 2.302856235336945e-06, "loss": 0.0948, "step": 6433 }, { "epoch": 173.8918918918919, "grad_norm": 0.859375, "learning_rate": 2.298174962791147e-06, "loss": 0.0854, "step": 6434 }, { "epoch": 173.9189189189189, "grad_norm": 1.0078125, "learning_rate": 2.293498223914889e-06, "loss": 0.1321, "step": 6435 }, { "epoch": 173.94594594594594, "grad_norm": 1.0703125, "learning_rate": 2.2888260196421234e-06, "loss": 0.1252, "step": 6436 }, { "epoch": 173.97297297297297, "grad_norm": 0.427734375, "learning_rate": 2.2841583509059237e-06, "loss": 0.0598, "step": 6437 }, { "epoch": 174.0, "grad_norm": 0.62109375, "learning_rate": 2.279495218638447e-06, "loss": 0.067, "step": 6438 }, { "epoch": 174.02702702702703, "grad_norm": 1.0625, "learning_rate": 2.2748366237709374e-06, "loss": 0.1311, "step": 6439 }, { "epoch": 174.05405405405406, "grad_norm": 0.80078125, "learning_rate": 2.27018256723375e-06, "loss": 0.0929, "step": 6440 }, { "epoch": 174.0810810810811, "grad_norm": 0.671875, "learning_rate": 2.2655330499563117e-06, "loss": 0.0708, "step": 6441 }, { "epoch": 174.1081081081081, "grad_norm": 1.0703125, "learning_rate": 2.260888072867162e-06, "loss": 0.1513, "step": 6442 }, { "epoch": 174.13513513513513, "grad_norm": 1.0625, "learning_rate": 2.2562476368939295e-06, "loss": 0.1252, "step": 6443 }, { "epoch": 174.16216216216216, "grad_norm": 0.921875, "learning_rate": 2.251611742963325e-06, "loss": 0.1273, "step": 6444 }, { "epoch": 174.1891891891892, "grad_norm": 1.015625, "learning_rate": 2.2469803920011607e-06, "loss": 0.1133, "step": 6445 }, { "epoch": 174.21621621621622, "grad_norm": 1.328125, "learning_rate": 2.2423535849323496e-06, "loss": 0.188, "step": 6446 }, { "epoch": 174.24324324324326, "grad_norm": 1.3125, "learning_rate": 2.2377313226808803e-06, "loss": 0.1754, "step": 6447 }, { "epoch": 174.27027027027026, "grad_norm": 1.1171875, "learning_rate": 2.2331136061698397e-06, "loss": 0.1484, "step": 6448 }, { "epoch": 174.2972972972973, "grad_norm": 1.453125, "learning_rate": 2.2285004363214234e-06, "loss": 0.2019, "step": 6449 }, { "epoch": 174.32432432432432, "grad_norm": 0.5546875, "learning_rate": 2.223891814056886e-06, "loss": 0.0649, "step": 6450 }, { "epoch": 174.35135135135135, "grad_norm": 0.64453125, "learning_rate": 2.219287740296605e-06, "loss": 0.0747, "step": 6451 }, { "epoch": 174.3783783783784, "grad_norm": 0.85546875, "learning_rate": 2.2146882159600367e-06, "loss": 0.0995, "step": 6452 }, { "epoch": 174.40540540540542, "grad_norm": 1.203125, "learning_rate": 2.2100932419657245e-06, "loss": 0.1402, "step": 6453 }, { "epoch": 174.43243243243242, "grad_norm": 0.71875, "learning_rate": 2.205502819231314e-06, "loss": 0.0869, "step": 6454 }, { "epoch": 174.45945945945945, "grad_norm": 0.67578125, "learning_rate": 2.200916948673526e-06, "loss": 0.0853, "step": 6455 }, { "epoch": 174.48648648648648, "grad_norm": 0.5078125, "learning_rate": 2.1963356312081933e-06, "loss": 0.0546, "step": 6456 }, { "epoch": 174.51351351351352, "grad_norm": 0.490234375, "learning_rate": 2.1917588677502208e-06, "loss": 0.0584, "step": 6457 }, { "epoch": 174.54054054054055, "grad_norm": 0.875, "learning_rate": 2.1871866592136166e-06, "loss": 0.101, "step": 6458 }, { "epoch": 174.56756756756758, "grad_norm": 0.625, "learning_rate": 2.1826190065114688e-06, "loss": 0.0748, "step": 6459 }, { "epoch": 174.59459459459458, "grad_norm": 0.7109375, "learning_rate": 2.1780559105559652e-06, "loss": 0.0807, "step": 6460 }, { "epoch": 174.6216216216216, "grad_norm": 0.99609375, "learning_rate": 2.1734973722583737e-06, "loss": 0.1048, "step": 6461 }, { "epoch": 174.64864864864865, "grad_norm": 0.70703125, "learning_rate": 2.16894339252906e-06, "loss": 0.0764, "step": 6462 }, { "epoch": 174.67567567567568, "grad_norm": 0.98046875, "learning_rate": 2.1643939722774843e-06, "loss": 0.1029, "step": 6463 }, { "epoch": 174.7027027027027, "grad_norm": 1.1953125, "learning_rate": 2.1598491124121767e-06, "loss": 0.1326, "step": 6464 }, { "epoch": 174.72972972972974, "grad_norm": 0.7109375, "learning_rate": 2.155308813840773e-06, "loss": 0.0642, "step": 6465 }, { "epoch": 174.75675675675674, "grad_norm": 0.91015625, "learning_rate": 2.150773077470003e-06, "loss": 0.1106, "step": 6466 }, { "epoch": 174.78378378378378, "grad_norm": 0.91015625, "learning_rate": 2.146241904205662e-06, "loss": 0.1088, "step": 6467 }, { "epoch": 174.8108108108108, "grad_norm": 0.640625, "learning_rate": 2.1417152949526562e-06, "loss": 0.0778, "step": 6468 }, { "epoch": 174.83783783783784, "grad_norm": 1.140625, "learning_rate": 2.1371932506149746e-06, "loss": 0.1474, "step": 6469 }, { "epoch": 174.86486486486487, "grad_norm": 1.1171875, "learning_rate": 2.1326757720956827e-06, "loss": 0.1355, "step": 6470 }, { "epoch": 174.8918918918919, "grad_norm": 0.86328125, "learning_rate": 2.128162860296956e-06, "loss": 0.1056, "step": 6471 }, { "epoch": 174.9189189189189, "grad_norm": 0.77734375, "learning_rate": 2.1236545161200384e-06, "loss": 0.0884, "step": 6472 }, { "epoch": 174.94594594594594, "grad_norm": 0.55078125, "learning_rate": 2.119150740465267e-06, "loss": 0.0745, "step": 6473 }, { "epoch": 174.97297297297297, "grad_norm": 0.8359375, "learning_rate": 2.114651534232076e-06, "loss": 0.0691, "step": 6474 }, { "epoch": 175.0, "grad_norm": 0.62109375, "learning_rate": 2.110156898318974e-06, "loss": 0.0712, "step": 6475 }, { "epoch": 175.02702702702703, "grad_norm": 0.56640625, "learning_rate": 2.1056668336235622e-06, "loss": 0.0691, "step": 6476 }, { "epoch": 175.05405405405406, "grad_norm": 1.03125, "learning_rate": 2.101181341042538e-06, "loss": 0.1145, "step": 6477 }, { "epoch": 175.0810810810811, "grad_norm": 0.8203125, "learning_rate": 2.0967004214716647e-06, "loss": 0.0914, "step": 6478 }, { "epoch": 175.1081081081081, "grad_norm": 0.96484375, "learning_rate": 2.0922240758058128e-06, "loss": 0.0903, "step": 6479 }, { "epoch": 175.13513513513513, "grad_norm": 0.90625, "learning_rate": 2.087752304938931e-06, "loss": 0.1162, "step": 6480 }, { "epoch": 175.16216216216216, "grad_norm": 1.1171875, "learning_rate": 2.0832851097640492e-06, "loss": 0.1323, "step": 6481 }, { "epoch": 175.1891891891892, "grad_norm": 0.72265625, "learning_rate": 2.0788224911732905e-06, "loss": 0.0826, "step": 6482 }, { "epoch": 175.21621621621622, "grad_norm": 0.7421875, "learning_rate": 2.0743644500578697e-06, "loss": 0.0968, "step": 6483 }, { "epoch": 175.24324324324326, "grad_norm": 0.8046875, "learning_rate": 2.0699109873080714e-06, "loss": 0.0819, "step": 6484 }, { "epoch": 175.27027027027026, "grad_norm": 1.1015625, "learning_rate": 2.0654621038132815e-06, "loss": 0.1256, "step": 6485 }, { "epoch": 175.2972972972973, "grad_norm": 0.455078125, "learning_rate": 2.061017800461956e-06, "loss": 0.0646, "step": 6486 }, { "epoch": 175.32432432432432, "grad_norm": 0.76171875, "learning_rate": 2.056578078141652e-06, "loss": 0.096, "step": 6487 }, { "epoch": 175.35135135135135, "grad_norm": 0.875, "learning_rate": 2.0521429377390045e-06, "loss": 0.1024, "step": 6488 }, { "epoch": 175.3783783783784, "grad_norm": 0.9140625, "learning_rate": 2.047712380139727e-06, "loss": 0.0946, "step": 6489 }, { "epoch": 175.40540540540542, "grad_norm": 0.640625, "learning_rate": 2.043286406228631e-06, "loss": 0.0747, "step": 6490 }, { "epoch": 175.43243243243242, "grad_norm": 1.0, "learning_rate": 2.038865016889607e-06, "loss": 0.1313, "step": 6491 }, { "epoch": 175.45945945945945, "grad_norm": 0.8203125, "learning_rate": 2.0344482130056186e-06, "loss": 0.1019, "step": 6492 }, { "epoch": 175.48648648648648, "grad_norm": 0.7265625, "learning_rate": 2.03003599545874e-06, "loss": 0.094, "step": 6493 }, { "epoch": 175.51351351351352, "grad_norm": 0.73046875, "learning_rate": 2.025628365130097e-06, "loss": 0.0867, "step": 6494 }, { "epoch": 175.54054054054055, "grad_norm": 0.625, "learning_rate": 2.021225322899928e-06, "loss": 0.0717, "step": 6495 }, { "epoch": 175.56756756756758, "grad_norm": 0.95703125, "learning_rate": 2.016826869647531e-06, "loss": 0.1126, "step": 6496 }, { "epoch": 175.59459459459458, "grad_norm": 1.28125, "learning_rate": 2.012433006251313e-06, "loss": 0.1778, "step": 6497 }, { "epoch": 175.6216216216216, "grad_norm": 1.0390625, "learning_rate": 2.008043733588741e-06, "loss": 0.0884, "step": 6498 }, { "epoch": 175.64864864864865, "grad_norm": 0.78125, "learning_rate": 2.003659052536375e-06, "loss": 0.0737, "step": 6499 }, { "epoch": 175.67567567567568, "grad_norm": 1.0703125, "learning_rate": 1.999278963969867e-06, "loss": 0.1201, "step": 6500 }, { "epoch": 175.7027027027027, "grad_norm": 0.515625, "learning_rate": 1.994903468763934e-06, "loss": 0.0744, "step": 6501 }, { "epoch": 175.72972972972974, "grad_norm": 0.890625, "learning_rate": 1.9905325677923886e-06, "loss": 0.1248, "step": 6502 }, { "epoch": 175.75675675675674, "grad_norm": 1.140625, "learning_rate": 1.9861662619281186e-06, "loss": 0.1554, "step": 6503 }, { "epoch": 175.78378378378378, "grad_norm": 1.3046875, "learning_rate": 1.981804552043101e-06, "loss": 0.1454, "step": 6504 }, { "epoch": 175.8108108108108, "grad_norm": 1.234375, "learning_rate": 1.9774474390083926e-06, "loss": 0.1896, "step": 6505 }, { "epoch": 175.83783783783784, "grad_norm": 0.984375, "learning_rate": 1.973094923694124e-06, "loss": 0.0879, "step": 6506 }, { "epoch": 175.86486486486487, "grad_norm": 0.89453125, "learning_rate": 1.968747006969518e-06, "loss": 0.0824, "step": 6507 }, { "epoch": 175.8918918918919, "grad_norm": 0.90234375, "learning_rate": 1.964403689702882e-06, "loss": 0.1416, "step": 6508 }, { "epoch": 175.9189189189189, "grad_norm": 0.87890625, "learning_rate": 1.960064972761588e-06, "loss": 0.1123, "step": 6509 }, { "epoch": 175.94594594594594, "grad_norm": 1.640625, "learning_rate": 1.955730857012106e-06, "loss": 0.2546, "step": 6510 }, { "epoch": 175.97297297297297, "grad_norm": 1.6953125, "learning_rate": 1.9514013433199836e-06, "loss": 0.2071, "step": 6511 }, { "epoch": 176.0, "grad_norm": 0.9375, "learning_rate": 1.947076432549838e-06, "loss": 0.1365, "step": 6512 }, { "epoch": 176.02702702702703, "grad_norm": 1.1796875, "learning_rate": 1.9427561255653816e-06, "loss": 0.1651, "step": 6513 }, { "epoch": 176.05405405405406, "grad_norm": 1.203125, "learning_rate": 1.938440423229404e-06, "loss": 0.1414, "step": 6514 }, { "epoch": 176.0810810810811, "grad_norm": 1.03125, "learning_rate": 1.9341293264037634e-06, "loss": 0.1299, "step": 6515 }, { "epoch": 176.1081081081081, "grad_norm": 0.75, "learning_rate": 1.9298228359494215e-06, "loss": 0.0866, "step": 6516 }, { "epoch": 176.13513513513513, "grad_norm": 0.82421875, "learning_rate": 1.9255209527263924e-06, "loss": 0.0925, "step": 6517 }, { "epoch": 176.16216216216216, "grad_norm": 0.74609375, "learning_rate": 1.9212236775937893e-06, "loss": 0.0774, "step": 6518 }, { "epoch": 176.1891891891892, "grad_norm": 0.87109375, "learning_rate": 1.916931011409803e-06, "loss": 0.1126, "step": 6519 }, { "epoch": 176.21621621621622, "grad_norm": 1.0859375, "learning_rate": 1.912642955031696e-06, "loss": 0.13, "step": 6520 }, { "epoch": 176.24324324324326, "grad_norm": 0.6640625, "learning_rate": 1.9083595093158186e-06, "loss": 0.0794, "step": 6521 }, { "epoch": 176.27027027027026, "grad_norm": 0.8359375, "learning_rate": 1.9040806751175976e-06, "loss": 0.1152, "step": 6522 }, { "epoch": 176.2972972972973, "grad_norm": 0.5546875, "learning_rate": 1.8998064532915305e-06, "loss": 0.0687, "step": 6523 }, { "epoch": 176.32432432432432, "grad_norm": 0.6796875, "learning_rate": 1.8955368446912063e-06, "loss": 0.0868, "step": 6524 }, { "epoch": 176.35135135135135, "grad_norm": 0.65234375, "learning_rate": 1.8912718501692933e-06, "loss": 0.0778, "step": 6525 }, { "epoch": 176.3783783783784, "grad_norm": 0.57421875, "learning_rate": 1.887011470577521e-06, "loss": 0.0728, "step": 6526 }, { "epoch": 176.40540540540542, "grad_norm": 0.87109375, "learning_rate": 1.8827557067667146e-06, "loss": 0.1011, "step": 6527 }, { "epoch": 176.43243243243242, "grad_norm": 0.828125, "learning_rate": 1.8785045595867745e-06, "loss": 0.095, "step": 6528 }, { "epoch": 176.45945945945945, "grad_norm": 0.8671875, "learning_rate": 1.8742580298866691e-06, "loss": 0.1059, "step": 6529 }, { "epoch": 176.48648648648648, "grad_norm": 1.2890625, "learning_rate": 1.8700161185144587e-06, "loss": 0.159, "step": 6530 }, { "epoch": 176.51351351351352, "grad_norm": 0.890625, "learning_rate": 1.8657788263172715e-06, "loss": 0.1159, "step": 6531 }, { "epoch": 176.54054054054055, "grad_norm": 1.078125, "learning_rate": 1.8615461541413165e-06, "loss": 0.1215, "step": 6532 }, { "epoch": 176.56756756756758, "grad_norm": 0.86328125, "learning_rate": 1.8573181028318787e-06, "loss": 0.1011, "step": 6533 }, { "epoch": 176.59459459459458, "grad_norm": 1.1640625, "learning_rate": 1.8530946732333243e-06, "loss": 0.1586, "step": 6534 }, { "epoch": 176.6216216216216, "grad_norm": 1.375, "learning_rate": 1.848875866189087e-06, "loss": 0.1893, "step": 6535 }, { "epoch": 176.64864864864865, "grad_norm": 0.828125, "learning_rate": 1.8446616825416956e-06, "loss": 0.0902, "step": 6536 }, { "epoch": 176.67567567567568, "grad_norm": 0.875, "learning_rate": 1.8404521231327327e-06, "loss": 0.094, "step": 6537 }, { "epoch": 176.7027027027027, "grad_norm": 0.9296875, "learning_rate": 1.8362471888028699e-06, "loss": 0.1322, "step": 6538 }, { "epoch": 176.72972972972974, "grad_norm": 0.52734375, "learning_rate": 1.8320468803918633e-06, "loss": 0.0678, "step": 6539 }, { "epoch": 176.75675675675674, "grad_norm": 1.15625, "learning_rate": 1.8278511987385256e-06, "loss": 0.1697, "step": 6540 }, { "epoch": 176.78378378378378, "grad_norm": 0.78125, "learning_rate": 1.8236601446807583e-06, "loss": 0.0991, "step": 6541 }, { "epoch": 176.8108108108108, "grad_norm": 0.5625, "learning_rate": 1.8194737190555422e-06, "loss": 0.069, "step": 6542 }, { "epoch": 176.83783783783784, "grad_norm": 1.2109375, "learning_rate": 1.8152919226989169e-06, "loss": 0.1729, "step": 6543 }, { "epoch": 176.86486486486487, "grad_norm": 0.9453125, "learning_rate": 1.8111147564460167e-06, "loss": 0.1086, "step": 6544 }, { "epoch": 176.8918918918919, "grad_norm": 1.0703125, "learning_rate": 1.8069422211310444e-06, "loss": 0.1177, "step": 6545 }, { "epoch": 176.9189189189189, "grad_norm": 0.81640625, "learning_rate": 1.8027743175872664e-06, "loss": 0.0998, "step": 6546 }, { "epoch": 176.94594594594594, "grad_norm": 0.875, "learning_rate": 1.7986110466470475e-06, "loss": 0.0945, "step": 6547 }, { "epoch": 176.97297297297297, "grad_norm": 1.3828125, "learning_rate": 1.7944524091418003e-06, "loss": 0.2147, "step": 6548 }, { "epoch": 177.0, "grad_norm": 0.7265625, "learning_rate": 1.7902984059020328e-06, "loss": 0.0775, "step": 6549 }, { "epoch": 177.02702702702703, "grad_norm": 0.59375, "learning_rate": 1.7861490377573258e-06, "loss": 0.0649, "step": 6550 }, { "epoch": 177.05405405405406, "grad_norm": 0.90625, "learning_rate": 1.7820043055363168e-06, "loss": 0.0938, "step": 6551 }, { "epoch": 177.0810810810811, "grad_norm": 0.578125, "learning_rate": 1.7778642100667374e-06, "loss": 0.0745, "step": 6552 }, { "epoch": 177.1081081081081, "grad_norm": 0.5390625, "learning_rate": 1.773728752175388e-06, "loss": 0.0746, "step": 6553 }, { "epoch": 177.13513513513513, "grad_norm": 1.0390625, "learning_rate": 1.7695979326881356e-06, "loss": 0.1255, "step": 6554 }, { "epoch": 177.16216216216216, "grad_norm": 1.1015625, "learning_rate": 1.7654717524299259e-06, "loss": 0.1063, "step": 6555 }, { "epoch": 177.1891891891892, "grad_norm": 0.66796875, "learning_rate": 1.7613502122247826e-06, "loss": 0.0791, "step": 6556 }, { "epoch": 177.21621621621622, "grad_norm": 0.921875, "learning_rate": 1.7572333128957897e-06, "loss": 0.1172, "step": 6557 }, { "epoch": 177.24324324324326, "grad_norm": 0.74609375, "learning_rate": 1.7531210552651222e-06, "loss": 0.071, "step": 6558 }, { "epoch": 177.27027027027026, "grad_norm": 0.92578125, "learning_rate": 1.7490134401540153e-06, "loss": 0.1025, "step": 6559 }, { "epoch": 177.2972972972973, "grad_norm": 0.640625, "learning_rate": 1.7449104683827766e-06, "loss": 0.0756, "step": 6560 }, { "epoch": 177.32432432432432, "grad_norm": 0.66015625, "learning_rate": 1.7408121407708005e-06, "loss": 0.0718, "step": 6561 }, { "epoch": 177.35135135135135, "grad_norm": 1.5546875, "learning_rate": 1.7367184581365297e-06, "loss": 0.2687, "step": 6562 }, { "epoch": 177.3783783783784, "grad_norm": 1.1640625, "learning_rate": 1.732629421297502e-06, "loss": 0.1389, "step": 6563 }, { "epoch": 177.40540540540542, "grad_norm": 1.0390625, "learning_rate": 1.7285450310703222e-06, "loss": 0.1427, "step": 6564 }, { "epoch": 177.43243243243242, "grad_norm": 0.72265625, "learning_rate": 1.7244652882706546e-06, "loss": 0.0898, "step": 6565 }, { "epoch": 177.45945945945945, "grad_norm": 1.5703125, "learning_rate": 1.7203901937132477e-06, "loss": 0.1457, "step": 6566 }, { "epoch": 177.48648648648648, "grad_norm": 1.0625, "learning_rate": 1.7163197482119254e-06, "loss": 0.1407, "step": 6567 }, { "epoch": 177.51351351351352, "grad_norm": 1.109375, "learning_rate": 1.7122539525795678e-06, "loss": 0.1213, "step": 6568 }, { "epoch": 177.54054054054055, "grad_norm": 0.73828125, "learning_rate": 1.7081928076281396e-06, "loss": 0.1051, "step": 6569 }, { "epoch": 177.56756756756758, "grad_norm": 0.890625, "learning_rate": 1.7041363141686666e-06, "loss": 0.0984, "step": 6570 }, { "epoch": 177.59459459459458, "grad_norm": 0.70703125, "learning_rate": 1.7000844730112598e-06, "loss": 0.0733, "step": 6571 }, { "epoch": 177.6216216216216, "grad_norm": 1.4140625, "learning_rate": 1.6960372849650823e-06, "loss": 0.1639, "step": 6572 }, { "epoch": 177.64864864864865, "grad_norm": 1.078125, "learning_rate": 1.6919947508383882e-06, "loss": 0.1243, "step": 6573 }, { "epoch": 177.67567567567568, "grad_norm": 0.51171875, "learning_rate": 1.6879568714384813e-06, "loss": 0.0736, "step": 6574 }, { "epoch": 177.7027027027027, "grad_norm": 1.359375, "learning_rate": 1.6839236475717557e-06, "loss": 0.1845, "step": 6575 }, { "epoch": 177.72972972972974, "grad_norm": 0.9765625, "learning_rate": 1.679895080043664e-06, "loss": 0.0988, "step": 6576 }, { "epoch": 177.75675675675674, "grad_norm": 0.439453125, "learning_rate": 1.675871169658727e-06, "loss": 0.0554, "step": 6577 }, { "epoch": 177.78378378378378, "grad_norm": 0.8671875, "learning_rate": 1.6718519172205487e-06, "loss": 0.0906, "step": 6578 }, { "epoch": 177.8108108108108, "grad_norm": 0.96875, "learning_rate": 1.6678373235317846e-06, "loss": 0.0793, "step": 6579 }, { "epoch": 177.83783783783784, "grad_norm": 0.58203125, "learning_rate": 1.6638273893941763e-06, "loss": 0.0663, "step": 6580 }, { "epoch": 177.86486486486487, "grad_norm": 0.74609375, "learning_rate": 1.659822115608528e-06, "loss": 0.0885, "step": 6581 }, { "epoch": 177.8918918918919, "grad_norm": 1.3828125, "learning_rate": 1.6558215029747053e-06, "loss": 0.1596, "step": 6582 }, { "epoch": 177.9189189189189, "grad_norm": 0.8671875, "learning_rate": 1.6518255522916582e-06, "loss": 0.0969, "step": 6583 }, { "epoch": 177.94594594594594, "grad_norm": 0.95703125, "learning_rate": 1.6478342643574008e-06, "loss": 0.0863, "step": 6584 }, { "epoch": 177.97297297297297, "grad_norm": 0.625, "learning_rate": 1.6438476399690067e-06, "loss": 0.0777, "step": 6585 }, { "epoch": 178.0, "grad_norm": 0.9453125, "learning_rate": 1.6398656799226252e-06, "loss": 0.1332, "step": 6586 }, { "epoch": 178.02702702702703, "grad_norm": 0.9921875, "learning_rate": 1.6358883850134816e-06, "loss": 0.1095, "step": 6587 }, { "epoch": 178.05405405405406, "grad_norm": 0.875, "learning_rate": 1.631915756035851e-06, "loss": 0.102, "step": 6588 }, { "epoch": 178.0810810810811, "grad_norm": 0.70703125, "learning_rate": 1.627947793783094e-06, "loss": 0.0736, "step": 6589 }, { "epoch": 178.1081081081081, "grad_norm": 0.6875, "learning_rate": 1.6239844990476372e-06, "loss": 0.0803, "step": 6590 }, { "epoch": 178.13513513513513, "grad_norm": 0.92578125, "learning_rate": 1.6200258726209588e-06, "loss": 0.0899, "step": 6591 }, { "epoch": 178.16216216216216, "grad_norm": 0.5625, "learning_rate": 1.6160719152936294e-06, "loss": 0.0776, "step": 6592 }, { "epoch": 178.1891891891892, "grad_norm": 1.0625, "learning_rate": 1.6121226278552615e-06, "loss": 0.1242, "step": 6593 }, { "epoch": 178.21621621621622, "grad_norm": 0.69140625, "learning_rate": 1.6081780110945578e-06, "loss": 0.0825, "step": 6594 }, { "epoch": 178.24324324324326, "grad_norm": 0.421875, "learning_rate": 1.6042380657992767e-06, "loss": 0.0619, "step": 6595 }, { "epoch": 178.27027027027026, "grad_norm": 0.67578125, "learning_rate": 1.6003027927562392e-06, "loss": 0.0672, "step": 6596 }, { "epoch": 178.2972972972973, "grad_norm": 0.90625, "learning_rate": 1.5963721927513415e-06, "loss": 0.1118, "step": 6597 }, { "epoch": 178.32432432432432, "grad_norm": 0.5859375, "learning_rate": 1.5924462665695527e-06, "loss": 0.0755, "step": 6598 }, { "epoch": 178.35135135135135, "grad_norm": 0.94140625, "learning_rate": 1.5885250149948904e-06, "loss": 0.1271, "step": 6599 }, { "epoch": 178.3783783783784, "grad_norm": 1.3359375, "learning_rate": 1.5846084388104504e-06, "loss": 0.1993, "step": 6600 }, { "epoch": 178.40540540540542, "grad_norm": 0.77734375, "learning_rate": 1.5806965387983985e-06, "loss": 0.0767, "step": 6601 }, { "epoch": 178.43243243243242, "grad_norm": 0.73828125, "learning_rate": 1.5767893157399517e-06, "loss": 0.0819, "step": 6602 }, { "epoch": 178.45945945945945, "grad_norm": 0.58203125, "learning_rate": 1.5728867704154077e-06, "loss": 0.0738, "step": 6603 }, { "epoch": 178.48648648648648, "grad_norm": 0.498046875, "learning_rate": 1.5689889036041293e-06, "loss": 0.0581, "step": 6604 }, { "epoch": 178.51351351351352, "grad_norm": 0.6640625, "learning_rate": 1.5650957160845303e-06, "loss": 0.0702, "step": 6605 }, { "epoch": 178.54054054054055, "grad_norm": 0.56640625, "learning_rate": 1.5612072086341078e-06, "loss": 0.0714, "step": 6606 }, { "epoch": 178.56756756756758, "grad_norm": 0.94921875, "learning_rate": 1.5573233820294103e-06, "loss": 0.1112, "step": 6607 }, { "epoch": 178.59459459459458, "grad_norm": 1.125, "learning_rate": 1.5534442370460645e-06, "loss": 0.1035, "step": 6608 }, { "epoch": 178.6216216216216, "grad_norm": 0.85546875, "learning_rate": 1.5495697744587477e-06, "loss": 0.0971, "step": 6609 }, { "epoch": 178.64864864864865, "grad_norm": 0.703125, "learning_rate": 1.5456999950412166e-06, "loss": 0.0725, "step": 6610 }, { "epoch": 178.67567567567568, "grad_norm": 1.3046875, "learning_rate": 1.5418348995662774e-06, "loss": 0.1595, "step": 6611 }, { "epoch": 178.7027027027027, "grad_norm": 0.8359375, "learning_rate": 1.5379744888058184e-06, "loss": 0.1014, "step": 6612 }, { "epoch": 178.72972972972974, "grad_norm": 1.015625, "learning_rate": 1.5341187635307814e-06, "loss": 0.1421, "step": 6613 }, { "epoch": 178.75675675675674, "grad_norm": 0.92578125, "learning_rate": 1.5302677245111668e-06, "loss": 0.1137, "step": 6614 }, { "epoch": 178.78378378378378, "grad_norm": 0.69921875, "learning_rate": 1.526421372516057e-06, "loss": 0.0862, "step": 6615 }, { "epoch": 178.8108108108108, "grad_norm": 1.15625, "learning_rate": 1.522579708313579e-06, "loss": 0.1757, "step": 6616 }, { "epoch": 178.83783783783784, "grad_norm": 0.447265625, "learning_rate": 1.5187427326709386e-06, "loss": 0.0592, "step": 6617 }, { "epoch": 178.86486486486487, "grad_norm": 1.390625, "learning_rate": 1.5149104463543978e-06, "loss": 0.1692, "step": 6618 }, { "epoch": 178.8918918918919, "grad_norm": 0.6328125, "learning_rate": 1.5110828501292833e-06, "loss": 0.0703, "step": 6619 }, { "epoch": 178.9189189189189, "grad_norm": 1.0625, "learning_rate": 1.5072599447599812e-06, "loss": 0.1342, "step": 6620 }, { "epoch": 178.94594594594594, "grad_norm": 1.015625, "learning_rate": 1.5034417310099551e-06, "loss": 0.0767, "step": 6621 }, { "epoch": 178.97297297297297, "grad_norm": 0.96484375, "learning_rate": 1.4996282096417125e-06, "loss": 0.1064, "step": 6622 }, { "epoch": 179.0, "grad_norm": 0.8359375, "learning_rate": 1.495819381416838e-06, "loss": 0.1113, "step": 6623 }, { "epoch": 179.02702702702703, "grad_norm": 0.77734375, "learning_rate": 1.4920152470959707e-06, "loss": 0.0936, "step": 6624 }, { "epoch": 179.05405405405406, "grad_norm": 0.86328125, "learning_rate": 1.4882158074388142e-06, "loss": 0.0917, "step": 6625 }, { "epoch": 179.0810810810811, "grad_norm": 0.69921875, "learning_rate": 1.4844210632041445e-06, "loss": 0.0833, "step": 6626 }, { "epoch": 179.1081081081081, "grad_norm": 0.56640625, "learning_rate": 1.4806310151497804e-06, "loss": 0.0739, "step": 6627 }, { "epoch": 179.13513513513513, "grad_norm": 1.3359375, "learning_rate": 1.4768456640326195e-06, "loss": 0.1456, "step": 6628 }, { "epoch": 179.16216216216216, "grad_norm": 1.0390625, "learning_rate": 1.473065010608618e-06, "loss": 0.1092, "step": 6629 }, { "epoch": 179.1891891891892, "grad_norm": 1.4765625, "learning_rate": 1.4692890556327832e-06, "loss": 0.1844, "step": 6630 }, { "epoch": 179.21621621621622, "grad_norm": 1.2265625, "learning_rate": 1.465517799859198e-06, "loss": 0.1779, "step": 6631 }, { "epoch": 179.24324324324326, "grad_norm": 0.6171875, "learning_rate": 1.4617512440410069e-06, "loss": 0.0781, "step": 6632 }, { "epoch": 179.27027027027026, "grad_norm": 0.765625, "learning_rate": 1.4579893889303974e-06, "loss": 0.0927, "step": 6633 }, { "epoch": 179.2972972972973, "grad_norm": 0.82421875, "learning_rate": 1.454232235278638e-06, "loss": 0.0974, "step": 6634 }, { "epoch": 179.32432432432432, "grad_norm": 0.65625, "learning_rate": 1.4504797838360562e-06, "loss": 0.0778, "step": 6635 }, { "epoch": 179.35135135135135, "grad_norm": 0.96875, "learning_rate": 1.4467320353520274e-06, "loss": 0.107, "step": 6636 }, { "epoch": 179.3783783783784, "grad_norm": 0.99609375, "learning_rate": 1.4429889905750006e-06, "loss": 0.1094, "step": 6637 }, { "epoch": 179.40540540540542, "grad_norm": 0.82421875, "learning_rate": 1.4392506502524773e-06, "loss": 0.0992, "step": 6638 }, { "epoch": 179.43243243243242, "grad_norm": 0.5625, "learning_rate": 1.4355170151310214e-06, "loss": 0.0767, "step": 6639 }, { "epoch": 179.45945945945945, "grad_norm": 0.83203125, "learning_rate": 1.4317880859562704e-06, "loss": 0.1101, "step": 6640 }, { "epoch": 179.48648648648648, "grad_norm": 0.671875, "learning_rate": 1.428063863472895e-06, "loss": 0.0829, "step": 6641 }, { "epoch": 179.51351351351352, "grad_norm": 0.439453125, "learning_rate": 1.42434434842465e-06, "loss": 0.0618, "step": 6642 }, { "epoch": 179.54054054054055, "grad_norm": 0.96484375, "learning_rate": 1.4206295415543442e-06, "loss": 0.1146, "step": 6643 }, { "epoch": 179.56756756756758, "grad_norm": 1.421875, "learning_rate": 1.4169194436038342e-06, "loss": 0.1594, "step": 6644 }, { "epoch": 179.59459459459458, "grad_norm": 1.015625, "learning_rate": 1.413214055314055e-06, "loss": 0.1276, "step": 6645 }, { "epoch": 179.6216216216216, "grad_norm": 0.9765625, "learning_rate": 1.4095133774249841e-06, "loss": 0.1175, "step": 6646 }, { "epoch": 179.64864864864865, "grad_norm": 0.447265625, "learning_rate": 1.4058174106756665e-06, "loss": 0.0565, "step": 6647 }, { "epoch": 179.67567567567568, "grad_norm": 0.484375, "learning_rate": 1.402126155804212e-06, "loss": 0.0616, "step": 6648 }, { "epoch": 179.7027027027027, "grad_norm": 1.15625, "learning_rate": 1.3984396135477722e-06, "loss": 0.1559, "step": 6649 }, { "epoch": 179.72972972972974, "grad_norm": 0.69140625, "learning_rate": 1.3947577846425807e-06, "loss": 0.0812, "step": 6650 }, { "epoch": 179.75675675675674, "grad_norm": 0.625, "learning_rate": 1.391080669823905e-06, "loss": 0.0766, "step": 6651 }, { "epoch": 179.78378378378378, "grad_norm": 1.140625, "learning_rate": 1.3874082698260959e-06, "loss": 0.1572, "step": 6652 }, { "epoch": 179.8108108108108, "grad_norm": 1.0390625, "learning_rate": 1.383740585382537e-06, "loss": 0.0947, "step": 6653 }, { "epoch": 179.83783783783784, "grad_norm": 1.2578125, "learning_rate": 1.3800776172256946e-06, "loss": 0.1719, "step": 6654 }, { "epoch": 179.86486486486487, "grad_norm": 1.0859375, "learning_rate": 1.3764193660870756e-06, "loss": 0.1265, "step": 6655 }, { "epoch": 179.8918918918919, "grad_norm": 0.92578125, "learning_rate": 1.3727658326972538e-06, "loss": 0.1048, "step": 6656 }, { "epoch": 179.9189189189189, "grad_norm": 1.3984375, "learning_rate": 1.3691170177858591e-06, "loss": 0.183, "step": 6657 }, { "epoch": 179.94594594594594, "grad_norm": 1.046875, "learning_rate": 1.365472922081576e-06, "loss": 0.1017, "step": 6658 }, { "epoch": 179.97297297297297, "grad_norm": 1.0703125, "learning_rate": 1.3618335463121496e-06, "loss": 0.126, "step": 6659 }, { "epoch": 180.0, "grad_norm": 1.5078125, "learning_rate": 1.3581988912043847e-06, "loss": 0.2475, "step": 6660 }, { "epoch": 180.02702702702703, "grad_norm": 1.203125, "learning_rate": 1.3545689574841342e-06, "loss": 0.1791, "step": 6661 }, { "epoch": 180.05405405405406, "grad_norm": 0.89453125, "learning_rate": 1.350943745876318e-06, "loss": 0.1026, "step": 6662 }, { "epoch": 180.0810810810811, "grad_norm": 0.7265625, "learning_rate": 1.3473232571049126e-06, "loss": 0.0792, "step": 6663 }, { "epoch": 180.1081081081081, "grad_norm": 0.62890625, "learning_rate": 1.3437074918929427e-06, "loss": 0.0807, "step": 6664 }, { "epoch": 180.13513513513513, "grad_norm": 1.3359375, "learning_rate": 1.340096450962497e-06, "loss": 0.1481, "step": 6665 }, { "epoch": 180.16216216216216, "grad_norm": 1.046875, "learning_rate": 1.3364901350347181e-06, "loss": 0.1382, "step": 6666 }, { "epoch": 180.1891891891892, "grad_norm": 1.0625, "learning_rate": 1.332888544829805e-06, "loss": 0.1529, "step": 6667 }, { "epoch": 180.21621621621622, "grad_norm": 0.70703125, "learning_rate": 1.3292916810670186e-06, "loss": 0.0765, "step": 6668 }, { "epoch": 180.24324324324326, "grad_norm": 0.9765625, "learning_rate": 1.3256995444646618e-06, "loss": 0.1118, "step": 6669 }, { "epoch": 180.27027027027026, "grad_norm": 0.8203125, "learning_rate": 1.3221121357401085e-06, "loss": 0.0902, "step": 6670 }, { "epoch": 180.2972972972973, "grad_norm": 0.91015625, "learning_rate": 1.318529455609785e-06, "loss": 0.1166, "step": 6671 }, { "epoch": 180.32432432432432, "grad_norm": 0.62109375, "learning_rate": 1.3149515047891609e-06, "loss": 0.0765, "step": 6672 }, { "epoch": 180.35135135135135, "grad_norm": 0.94921875, "learning_rate": 1.3113782839927785e-06, "loss": 0.0854, "step": 6673 }, { "epoch": 180.3783783783784, "grad_norm": 0.7265625, "learning_rate": 1.307809793934231e-06, "loss": 0.0914, "step": 6674 }, { "epoch": 180.40540540540542, "grad_norm": 0.91796875, "learning_rate": 1.3042460353261538e-06, "loss": 0.1108, "step": 6675 }, { "epoch": 180.43243243243242, "grad_norm": 1.0234375, "learning_rate": 1.3006870088802553e-06, "loss": 0.1186, "step": 6676 }, { "epoch": 180.45945945945945, "grad_norm": 0.96875, "learning_rate": 1.2971327153072921e-06, "loss": 0.1246, "step": 6677 }, { "epoch": 180.48648648648648, "grad_norm": 0.6171875, "learning_rate": 1.2935831553170685e-06, "loss": 0.0729, "step": 6678 }, { "epoch": 180.51351351351352, "grad_norm": 1.3125, "learning_rate": 1.2900383296184536e-06, "loss": 0.1983, "step": 6679 }, { "epoch": 180.54054054054055, "grad_norm": 0.8359375, "learning_rate": 1.2864982389193702e-06, "loss": 0.0832, "step": 6680 }, { "epoch": 180.56756756756758, "grad_norm": 0.77734375, "learning_rate": 1.2829628839267832e-06, "loss": 0.0904, "step": 6681 }, { "epoch": 180.59459459459458, "grad_norm": 0.80859375, "learning_rate": 1.2794322653467306e-06, "loss": 0.116, "step": 6682 }, { "epoch": 180.6216216216216, "grad_norm": 1.3515625, "learning_rate": 1.2759063838842873e-06, "loss": 0.1791, "step": 6683 }, { "epoch": 180.64864864864865, "grad_norm": 0.83203125, "learning_rate": 1.2723852402435926e-06, "loss": 0.1179, "step": 6684 }, { "epoch": 180.67567567567568, "grad_norm": 0.53515625, "learning_rate": 1.2688688351278426e-06, "loss": 0.0664, "step": 6685 }, { "epoch": 180.7027027027027, "grad_norm": 0.70703125, "learning_rate": 1.2653571692392702e-06, "loss": 0.0758, "step": 6686 }, { "epoch": 180.72972972972974, "grad_norm": 0.65625, "learning_rate": 1.2618502432791808e-06, "loss": 0.0838, "step": 6687 }, { "epoch": 180.75675675675674, "grad_norm": 0.72265625, "learning_rate": 1.2583480579479195e-06, "loss": 0.0805, "step": 6688 }, { "epoch": 180.78378378378378, "grad_norm": 0.73046875, "learning_rate": 1.2548506139448995e-06, "loss": 0.0848, "step": 6689 }, { "epoch": 180.8108108108108, "grad_norm": 0.875, "learning_rate": 1.2513579119685675e-06, "loss": 0.1293, "step": 6690 }, { "epoch": 180.83783783783784, "grad_norm": 1.1640625, "learning_rate": 1.2478699527164406e-06, "loss": 0.139, "step": 6691 }, { "epoch": 180.86486486486487, "grad_norm": 0.890625, "learning_rate": 1.2443867368850781e-06, "loss": 0.1135, "step": 6692 }, { "epoch": 180.8918918918919, "grad_norm": 1.4140625, "learning_rate": 1.2409082651700987e-06, "loss": 0.1766, "step": 6693 }, { "epoch": 180.9189189189189, "grad_norm": 1.078125, "learning_rate": 1.2374345382661718e-06, "loss": 0.1304, "step": 6694 }, { "epoch": 180.94594594594594, "grad_norm": 0.98828125, "learning_rate": 1.2339655568670117e-06, "loss": 0.1157, "step": 6695 }, { "epoch": 180.97297297297297, "grad_norm": 0.859375, "learning_rate": 1.2305013216653977e-06, "loss": 0.0942, "step": 6696 }, { "epoch": 181.0, "grad_norm": 1.2734375, "learning_rate": 1.227041833353157e-06, "loss": 0.2063, "step": 6697 }, { "epoch": 181.02702702702703, "grad_norm": 1.0, "learning_rate": 1.2235870926211619e-06, "loss": 0.1023, "step": 6698 }, { "epoch": 181.05405405405406, "grad_norm": 1.390625, "learning_rate": 1.2201371001593437e-06, "loss": 0.1997, "step": 6699 }, { "epoch": 181.0810810810811, "grad_norm": 0.47265625, "learning_rate": 1.216691856656682e-06, "loss": 0.0655, "step": 6700 }, { "epoch": 181.1081081081081, "grad_norm": 1.1953125, "learning_rate": 1.2132513628012098e-06, "loss": 0.0891, "step": 6701 }, { "epoch": 181.13513513513513, "grad_norm": 1.28125, "learning_rate": 1.2098156192800137e-06, "loss": 0.1604, "step": 6702 }, { "epoch": 181.16216216216216, "grad_norm": 0.796875, "learning_rate": 1.2063846267792251e-06, "loss": 0.0874, "step": 6703 }, { "epoch": 181.1891891891892, "grad_norm": 1.0390625, "learning_rate": 1.202958385984035e-06, "loss": 0.1116, "step": 6704 }, { "epoch": 181.21621621621622, "grad_norm": 0.93359375, "learning_rate": 1.1995368975786792e-06, "loss": 0.1007, "step": 6705 }, { "epoch": 181.24324324324326, "grad_norm": 0.625, "learning_rate": 1.1961201622464419e-06, "loss": 0.0737, "step": 6706 }, { "epoch": 181.27027027027026, "grad_norm": 1.265625, "learning_rate": 1.192708180669666e-06, "loss": 0.1346, "step": 6707 }, { "epoch": 181.2972972972973, "grad_norm": 1.015625, "learning_rate": 1.1893009535297483e-06, "loss": 0.1498, "step": 6708 }, { "epoch": 181.32432432432432, "grad_norm": 1.125, "learning_rate": 1.1858984815071168e-06, "loss": 0.133, "step": 6709 }, { "epoch": 181.35135135135135, "grad_norm": 1.0390625, "learning_rate": 1.1825007652812692e-06, "loss": 0.0969, "step": 6710 }, { "epoch": 181.3783783783784, "grad_norm": 0.68359375, "learning_rate": 1.1791078055307493e-06, "loss": 0.0824, "step": 6711 }, { "epoch": 181.40540540540542, "grad_norm": 1.09375, "learning_rate": 1.1757196029331425e-06, "loss": 0.1107, "step": 6712 }, { "epoch": 181.43243243243242, "grad_norm": 0.86328125, "learning_rate": 1.1723361581650965e-06, "loss": 0.1077, "step": 6713 }, { "epoch": 181.45945945945945, "grad_norm": 0.80078125, "learning_rate": 1.1689574719022933e-06, "loss": 0.0801, "step": 6714 }, { "epoch": 181.48648648648648, "grad_norm": 1.0546875, "learning_rate": 1.165583544819479e-06, "loss": 0.1368, "step": 6715 }, { "epoch": 181.51351351351352, "grad_norm": 0.70703125, "learning_rate": 1.162214377590448e-06, "loss": 0.0833, "step": 6716 }, { "epoch": 181.54054054054055, "grad_norm": 1.1796875, "learning_rate": 1.158849970888032e-06, "loss": 0.1636, "step": 6717 }, { "epoch": 181.56756756756758, "grad_norm": 0.4921875, "learning_rate": 1.1554903253841232e-06, "loss": 0.0697, "step": 6718 }, { "epoch": 181.59459459459458, "grad_norm": 0.9609375, "learning_rate": 1.152135441749666e-06, "loss": 0.1033, "step": 6719 }, { "epoch": 181.6216216216216, "grad_norm": 1.0703125, "learning_rate": 1.1487853206546356e-06, "loss": 0.1265, "step": 6720 }, { "epoch": 181.64864864864865, "grad_norm": 0.84765625, "learning_rate": 1.1454399627680774e-06, "loss": 0.0968, "step": 6721 }, { "epoch": 181.67567567567568, "grad_norm": 1.421875, "learning_rate": 1.1420993687580761e-06, "loss": 0.1794, "step": 6722 }, { "epoch": 181.7027027027027, "grad_norm": 0.62890625, "learning_rate": 1.1387635392917594e-06, "loss": 0.0787, "step": 6723 }, { "epoch": 181.72972972972974, "grad_norm": 1.5859375, "learning_rate": 1.1354324750353167e-06, "loss": 0.2405, "step": 6724 }, { "epoch": 181.75675675675674, "grad_norm": 1.3828125, "learning_rate": 1.1321061766539714e-06, "loss": 0.1856, "step": 6725 }, { "epoch": 181.78378378378378, "grad_norm": 0.95703125, "learning_rate": 1.1287846448120086e-06, "loss": 0.1425, "step": 6726 }, { "epoch": 181.8108108108108, "grad_norm": 1.0, "learning_rate": 1.1254678801727482e-06, "loss": 0.1264, "step": 6727 }, { "epoch": 181.83783783783784, "grad_norm": 1.046875, "learning_rate": 1.1221558833985707e-06, "loss": 0.1207, "step": 6728 }, { "epoch": 181.86486486486487, "grad_norm": 1.25, "learning_rate": 1.118848655150892e-06, "loss": 0.1351, "step": 6729 }, { "epoch": 181.8918918918919, "grad_norm": 0.80859375, "learning_rate": 1.1155461960901919e-06, "loss": 0.0876, "step": 6730 }, { "epoch": 181.9189189189189, "grad_norm": 1.234375, "learning_rate": 1.1122485068759791e-06, "loss": 0.195, "step": 6731 }, { "epoch": 181.94594594594594, "grad_norm": 0.921875, "learning_rate": 1.108955588166824e-06, "loss": 0.1099, "step": 6732 }, { "epoch": 181.97297297297297, "grad_norm": 1.21875, "learning_rate": 1.1056674406203393e-06, "loss": 0.1889, "step": 6733 }, { "epoch": 182.0, "grad_norm": 0.94921875, "learning_rate": 1.1023840648931831e-06, "loss": 0.1027, "step": 6734 }, { "epoch": 182.02702702702703, "grad_norm": 0.9296875, "learning_rate": 1.0991054616410589e-06, "loss": 0.1135, "step": 6735 }, { "epoch": 182.05405405405406, "grad_norm": 0.6640625, "learning_rate": 1.095831631518729e-06, "loss": 0.0768, "step": 6736 }, { "epoch": 182.0810810810811, "grad_norm": 1.0, "learning_rate": 1.0925625751799867e-06, "loss": 0.1168, "step": 6737 }, { "epoch": 182.1081081081081, "grad_norm": 1.046875, "learning_rate": 1.0892982932776796e-06, "loss": 0.0961, "step": 6738 }, { "epoch": 182.13513513513513, "grad_norm": 0.8046875, "learning_rate": 1.0860387864637056e-06, "loss": 0.0935, "step": 6739 }, { "epoch": 182.16216216216216, "grad_norm": 0.9375, "learning_rate": 1.0827840553889996e-06, "loss": 0.1323, "step": 6740 }, { "epoch": 182.1891891891892, "grad_norm": 1.4921875, "learning_rate": 1.0795341007035498e-06, "loss": 0.2106, "step": 6741 }, { "epoch": 182.21621621621622, "grad_norm": 0.6796875, "learning_rate": 1.0762889230563928e-06, "loss": 0.0862, "step": 6742 }, { "epoch": 182.24324324324326, "grad_norm": 1.0546875, "learning_rate": 1.0730485230956017e-06, "loss": 0.1345, "step": 6743 }, { "epoch": 182.27027027027026, "grad_norm": 0.75, "learning_rate": 1.0698129014683034e-06, "loss": 0.0779, "step": 6744 }, { "epoch": 182.2972972972973, "grad_norm": 1.1875, "learning_rate": 1.0665820588206644e-06, "loss": 0.1472, "step": 6745 }, { "epoch": 182.32432432432432, "grad_norm": 0.419921875, "learning_rate": 1.0633559957979044e-06, "loss": 0.0566, "step": 6746 }, { "epoch": 182.35135135135135, "grad_norm": 0.76953125, "learning_rate": 1.0601347130442863e-06, "loss": 0.0752, "step": 6747 }, { "epoch": 182.3783783783784, "grad_norm": 0.58203125, "learning_rate": 1.056918211203109e-06, "loss": 0.077, "step": 6748 }, { "epoch": 182.40540540540542, "grad_norm": 1.078125, "learning_rate": 1.0537064909167283e-06, "loss": 0.1223, "step": 6749 }, { "epoch": 182.43243243243242, "grad_norm": 0.5390625, "learning_rate": 1.050499552826545e-06, "loss": 0.0732, "step": 6750 }, { "epoch": 182.45945945945945, "grad_norm": 1.0625, "learning_rate": 1.0472973975729937e-06, "loss": 0.1206, "step": 6751 }, { "epoch": 182.48648648648648, "grad_norm": 0.9453125, "learning_rate": 1.0441000257955662e-06, "loss": 0.1062, "step": 6752 }, { "epoch": 182.51351351351352, "grad_norm": 1.703125, "learning_rate": 1.040907438132796e-06, "loss": 0.2049, "step": 6753 }, { "epoch": 182.54054054054055, "grad_norm": 0.859375, "learning_rate": 1.0377196352222506e-06, "loss": 0.0924, "step": 6754 }, { "epoch": 182.56756756756758, "grad_norm": 1.1953125, "learning_rate": 1.0345366177005544e-06, "loss": 0.1661, "step": 6755 }, { "epoch": 182.59459459459458, "grad_norm": 0.75390625, "learning_rate": 1.0313583862033766e-06, "loss": 0.0994, "step": 6756 }, { "epoch": 182.6216216216216, "grad_norm": 1.3671875, "learning_rate": 1.0281849413654205e-06, "loss": 0.2098, "step": 6757 }, { "epoch": 182.64864864864865, "grad_norm": 1.0546875, "learning_rate": 1.025016283820443e-06, "loss": 0.1408, "step": 6758 }, { "epoch": 182.67567567567568, "grad_norm": 0.94921875, "learning_rate": 1.0218524142012354e-06, "loss": 0.0838, "step": 6759 }, { "epoch": 182.7027027027027, "grad_norm": 0.8515625, "learning_rate": 1.0186933331396448e-06, "loss": 0.0846, "step": 6760 }, { "epoch": 182.72972972972974, "grad_norm": 1.03125, "learning_rate": 1.0155390412665527e-06, "loss": 0.1502, "step": 6761 }, { "epoch": 182.75675675675674, "grad_norm": 1.109375, "learning_rate": 1.012389539211886e-06, "loss": 0.1772, "step": 6762 }, { "epoch": 182.78378378378378, "grad_norm": 0.66015625, "learning_rate": 1.0092448276046217e-06, "loss": 0.0795, "step": 6763 }, { "epoch": 182.8108108108108, "grad_norm": 1.1015625, "learning_rate": 1.0061049070727664e-06, "loss": 0.1264, "step": 6764 }, { "epoch": 182.83783783783784, "grad_norm": 0.90234375, "learning_rate": 1.0029697782433873e-06, "loss": 0.1031, "step": 6765 }, { "epoch": 182.86486486486487, "grad_norm": 0.88671875, "learning_rate": 9.998394417425755e-07, "loss": 0.1238, "step": 6766 }, { "epoch": 182.8918918918919, "grad_norm": 1.0078125, "learning_rate": 9.967138981954837e-07, "loss": 0.1531, "step": 6767 }, { "epoch": 182.9189189189189, "grad_norm": 0.76171875, "learning_rate": 9.935931482262934e-07, "loss": 0.0733, "step": 6768 }, { "epoch": 182.94594594594594, "grad_norm": 0.671875, "learning_rate": 9.904771924582363e-07, "loss": 0.0718, "step": 6769 }, { "epoch": 182.97297297297297, "grad_norm": 1.0078125, "learning_rate": 9.87366031513584e-07, "loss": 0.1199, "step": 6770 }, { "epoch": 183.0, "grad_norm": 1.15625, "learning_rate": 9.84259666013651e-07, "loss": 0.1448, "step": 6771 }, { "epoch": 183.02702702702703, "grad_norm": 1.09375, "learning_rate": 9.811580965787965e-07, "loss": 0.1005, "step": 6772 }, { "epoch": 183.05405405405406, "grad_norm": 0.921875, "learning_rate": 9.780613238284192e-07, "loss": 0.1006, "step": 6773 }, { "epoch": 183.0810810810811, "grad_norm": 0.74609375, "learning_rate": 9.749693483809552e-07, "loss": 0.0918, "step": 6774 }, { "epoch": 183.1081081081081, "grad_norm": 0.578125, "learning_rate": 9.718821708538967e-07, "loss": 0.0649, "step": 6775 }, { "epoch": 183.13513513513513, "grad_norm": 0.921875, "learning_rate": 9.687997918637586e-07, "loss": 0.107, "step": 6776 }, { "epoch": 183.16216216216216, "grad_norm": 1.390625, "learning_rate": 9.657222120261128e-07, "loss": 0.188, "step": 6777 }, { "epoch": 183.1891891891892, "grad_norm": 0.75390625, "learning_rate": 9.6264943195557e-07, "loss": 0.0917, "step": 6778 }, { "epoch": 183.21621621621622, "grad_norm": 1.2265625, "learning_rate": 9.595814522657758e-07, "loss": 0.1822, "step": 6779 }, { "epoch": 183.24324324324326, "grad_norm": 0.60546875, "learning_rate": 9.565182735694206e-07, "loss": 0.0774, "step": 6780 }, { "epoch": 183.27027027027026, "grad_norm": 1.140625, "learning_rate": 9.534598964782426e-07, "loss": 0.1084, "step": 6781 }, { "epoch": 183.2972972972973, "grad_norm": 0.84765625, "learning_rate": 9.50406321603009e-07, "loss": 0.1144, "step": 6782 }, { "epoch": 183.32432432432432, "grad_norm": 0.65625, "learning_rate": 9.473575495535347e-07, "loss": 0.0709, "step": 6783 }, { "epoch": 183.35135135135135, "grad_norm": 0.94140625, "learning_rate": 9.443135809386827e-07, "loss": 0.1017, "step": 6784 }, { "epoch": 183.3783783783784, "grad_norm": 0.78515625, "learning_rate": 9.412744163663389e-07, "loss": 0.0756, "step": 6785 }, { "epoch": 183.40540540540542, "grad_norm": 0.75, "learning_rate": 9.382400564434429e-07, "loss": 0.0869, "step": 6786 }, { "epoch": 183.43243243243242, "grad_norm": 1.0859375, "learning_rate": 9.352105017759766e-07, "loss": 0.1103, "step": 6787 }, { "epoch": 183.45945945945945, "grad_norm": 0.77734375, "learning_rate": 9.321857529689532e-07, "loss": 0.0906, "step": 6788 }, { "epoch": 183.48648648648648, "grad_norm": 0.96875, "learning_rate": 9.291658106264312e-07, "loss": 0.1467, "step": 6789 }, { "epoch": 183.51351351351352, "grad_norm": 0.97265625, "learning_rate": 9.261506753515059e-07, "loss": 0.1211, "step": 6790 }, { "epoch": 183.54054054054055, "grad_norm": 1.3671875, "learning_rate": 9.231403477463152e-07, "loss": 0.2429, "step": 6791 }, { "epoch": 183.56756756756758, "grad_norm": 0.68359375, "learning_rate": 9.201348284120448e-07, "loss": 0.0794, "step": 6792 }, { "epoch": 183.59459459459458, "grad_norm": 0.8515625, "learning_rate": 9.171341179489034e-07, "loss": 0.0929, "step": 6793 }, { "epoch": 183.6216216216216, "grad_norm": 1.3359375, "learning_rate": 9.141382169561479e-07, "loss": 0.1253, "step": 6794 }, { "epoch": 183.64864864864865, "grad_norm": 1.078125, "learning_rate": 9.111471260320831e-07, "loss": 0.122, "step": 6795 }, { "epoch": 183.67567567567568, "grad_norm": 1.78125, "learning_rate": 9.081608457740392e-07, "loss": 0.2592, "step": 6796 }, { "epoch": 183.7027027027027, "grad_norm": 1.2578125, "learning_rate": 9.051793767783895e-07, "loss": 0.181, "step": 6797 }, { "epoch": 183.72972972972974, "grad_norm": 0.85546875, "learning_rate": 9.022027196405547e-07, "loss": 0.1075, "step": 6798 }, { "epoch": 183.75675675675674, "grad_norm": 1.2265625, "learning_rate": 8.992308749549788e-07, "loss": 0.1576, "step": 6799 }, { "epoch": 183.78378378378378, "grad_norm": 1.078125, "learning_rate": 8.962638433151676e-07, "loss": 0.1267, "step": 6800 }, { "epoch": 183.8108108108108, "grad_norm": 0.91796875, "learning_rate": 8.933016253136389e-07, "loss": 0.1064, "step": 6801 }, { "epoch": 183.83783783783784, "grad_norm": 1.3984375, "learning_rate": 8.90344221541975e-07, "loss": 0.1344, "step": 6802 }, { "epoch": 183.86486486486487, "grad_norm": 0.419921875, "learning_rate": 8.873916325907728e-07, "loss": 0.0608, "step": 6803 }, { "epoch": 183.8918918918919, "grad_norm": 0.91015625, "learning_rate": 8.844438590496885e-07, "loss": 0.1038, "step": 6804 }, { "epoch": 183.9189189189189, "grad_norm": 1.015625, "learning_rate": 8.815009015073983e-07, "loss": 0.0887, "step": 6805 }, { "epoch": 183.94594594594594, "grad_norm": 0.91796875, "learning_rate": 8.78562760551635e-07, "loss": 0.1216, "step": 6806 }, { "epoch": 183.97297297297297, "grad_norm": 0.9765625, "learning_rate": 8.756294367691542e-07, "loss": 0.0978, "step": 6807 }, { "epoch": 184.0, "grad_norm": 1.3359375, "learning_rate": 8.727009307457596e-07, "loss": 0.1503, "step": 6808 }, { "epoch": 184.02702702702703, "grad_norm": 1.375, "learning_rate": 8.697772430662859e-07, "loss": 0.195, "step": 6809 }, { "epoch": 184.05405405405406, "grad_norm": 1.2421875, "learning_rate": 8.66858374314608e-07, "loss": 0.1319, "step": 6810 }, { "epoch": 184.0810810810811, "grad_norm": 1.03125, "learning_rate": 8.6394432507364e-07, "loss": 0.127, "step": 6811 }, { "epoch": 184.1081081081081, "grad_norm": 0.72265625, "learning_rate": 8.610350959253332e-07, "loss": 0.0747, "step": 6812 }, { "epoch": 184.13513513513513, "grad_norm": 0.92578125, "learning_rate": 8.581306874506728e-07, "loss": 0.1128, "step": 6813 }, { "epoch": 184.16216216216216, "grad_norm": 1.0859375, "learning_rate": 8.552311002296865e-07, "loss": 0.1268, "step": 6814 }, { "epoch": 184.1891891891892, "grad_norm": 0.9296875, "learning_rate": 8.523363348414359e-07, "loss": 0.1113, "step": 6815 }, { "epoch": 184.21621621621622, "grad_norm": 0.60546875, "learning_rate": 8.4944639186402e-07, "loss": 0.0634, "step": 6816 }, { "epoch": 184.24324324324326, "grad_norm": 1.171875, "learning_rate": 8.465612718745741e-07, "loss": 0.1421, "step": 6817 }, { "epoch": 184.27027027027026, "grad_norm": 0.96484375, "learning_rate": 8.436809754492736e-07, "loss": 0.11, "step": 6818 }, { "epoch": 184.2972972972973, "grad_norm": 1.203125, "learning_rate": 8.408055031633249e-07, "loss": 0.1306, "step": 6819 }, { "epoch": 184.32432432432432, "grad_norm": 0.921875, "learning_rate": 8.379348555909799e-07, "loss": 0.1168, "step": 6820 }, { "epoch": 184.35135135135135, "grad_norm": 0.98828125, "learning_rate": 8.350690333055161e-07, "loss": 0.1219, "step": 6821 }, { "epoch": 184.3783783783784, "grad_norm": 0.8203125, "learning_rate": 8.322080368792534e-07, "loss": 0.11, "step": 6822 }, { "epoch": 184.40540540540542, "grad_norm": 1.0546875, "learning_rate": 8.293518668835487e-07, "loss": 0.1224, "step": 6823 }, { "epoch": 184.43243243243242, "grad_norm": 1.125, "learning_rate": 8.265005238887957e-07, "loss": 0.1311, "step": 6824 }, { "epoch": 184.45945945945945, "grad_norm": 1.2734375, "learning_rate": 8.236540084644168e-07, "loss": 0.18, "step": 6825 }, { "epoch": 184.48648648648648, "grad_norm": 0.859375, "learning_rate": 8.208123211788821e-07, "loss": 0.0986, "step": 6826 }, { "epoch": 184.51351351351352, "grad_norm": 1.390625, "learning_rate": 8.179754625996821e-07, "loss": 0.1992, "step": 6827 }, { "epoch": 184.54054054054055, "grad_norm": 0.98828125, "learning_rate": 8.151434332933611e-07, "loss": 0.1216, "step": 6828 }, { "epoch": 184.56756756756758, "grad_norm": 0.7265625, "learning_rate": 8.123162338254859e-07, "loss": 0.0806, "step": 6829 }, { "epoch": 184.59459459459458, "grad_norm": 1.265625, "learning_rate": 8.094938647606604e-07, "loss": 0.1758, "step": 6830 }, { "epoch": 184.6216216216216, "grad_norm": 0.71484375, "learning_rate": 8.066763266625282e-07, "loss": 0.0978, "step": 6831 }, { "epoch": 184.64864864864865, "grad_norm": 0.74609375, "learning_rate": 8.038636200937672e-07, "loss": 0.0805, "step": 6832 }, { "epoch": 184.67567567567568, "grad_norm": 0.94921875, "learning_rate": 8.01055745616086e-07, "loss": 0.0928, "step": 6833 }, { "epoch": 184.7027027027027, "grad_norm": 1.2578125, "learning_rate": 7.982527037902365e-07, "loss": 0.1613, "step": 6834 }, { "epoch": 184.72972972972974, "grad_norm": 1.03125, "learning_rate": 7.95454495175993e-07, "loss": 0.1194, "step": 6835 }, { "epoch": 184.75675675675674, "grad_norm": 0.7421875, "learning_rate": 7.926611203321777e-07, "loss": 0.088, "step": 6836 }, { "epoch": 184.78378378378378, "grad_norm": 0.79296875, "learning_rate": 7.898725798166417e-07, "loss": 0.0911, "step": 6837 }, { "epoch": 184.8108108108108, "grad_norm": 1.2890625, "learning_rate": 7.870888741862648e-07, "loss": 0.1565, "step": 6838 }, { "epoch": 184.83783783783784, "grad_norm": 0.84375, "learning_rate": 7.843100039969742e-07, "loss": 0.1077, "step": 6839 }, { "epoch": 184.86486486486487, "grad_norm": 0.61328125, "learning_rate": 7.815359698037205e-07, "loss": 0.0616, "step": 6840 }, { "epoch": 184.8918918918919, "grad_norm": 0.859375, "learning_rate": 7.787667721604941e-07, "loss": 0.0986, "step": 6841 }, { "epoch": 184.9189189189189, "grad_norm": 0.96484375, "learning_rate": 7.760024116203162e-07, "loss": 0.1134, "step": 6842 }, { "epoch": 184.94594594594594, "grad_norm": 0.58984375, "learning_rate": 7.732428887352427e-07, "loss": 0.0638, "step": 6843 }, { "epoch": 184.97297297297297, "grad_norm": 0.6328125, "learning_rate": 7.704882040563688e-07, "loss": 0.0867, "step": 6844 }, { "epoch": 185.0, "grad_norm": 1.2421875, "learning_rate": 7.677383581338154e-07, "loss": 0.1616, "step": 6845 }, { "epoch": 185.02702702702703, "grad_norm": 0.83203125, "learning_rate": 7.649933515167407e-07, "loss": 0.0889, "step": 6846 }, { "epoch": 185.05405405405406, "grad_norm": 1.25, "learning_rate": 7.622531847533365e-07, "loss": 0.1588, "step": 6847 }, { "epoch": 185.0810810810811, "grad_norm": 1.0390625, "learning_rate": 7.595178583908319e-07, "loss": 0.1279, "step": 6848 }, { "epoch": 185.1081081081081, "grad_norm": 0.76171875, "learning_rate": 7.567873729754788e-07, "loss": 0.0743, "step": 6849 }, { "epoch": 185.13513513513513, "grad_norm": 0.984375, "learning_rate": 7.540617290525743e-07, "loss": 0.131, "step": 6850 }, { "epoch": 185.16216216216216, "grad_norm": 0.65234375, "learning_rate": 7.513409271664413e-07, "loss": 0.0751, "step": 6851 }, { "epoch": 185.1891891891892, "grad_norm": 0.6171875, "learning_rate": 7.486249678604368e-07, "loss": 0.0774, "step": 6852 }, { "epoch": 185.21621621621622, "grad_norm": 1.015625, "learning_rate": 7.459138516769492e-07, "loss": 0.1253, "step": 6853 }, { "epoch": 185.24324324324326, "grad_norm": 1.171875, "learning_rate": 7.432075791574123e-07, "loss": 0.1683, "step": 6854 }, { "epoch": 185.27027027027026, "grad_norm": 0.81640625, "learning_rate": 7.405061508422712e-07, "loss": 0.1008, "step": 6855 }, { "epoch": 185.2972972972973, "grad_norm": 0.49609375, "learning_rate": 7.378095672710194e-07, "loss": 0.0573, "step": 6856 }, { "epoch": 185.32432432432432, "grad_norm": 1.2265625, "learning_rate": 7.35117828982182e-07, "loss": 0.167, "step": 6857 }, { "epoch": 185.35135135135135, "grad_norm": 0.87109375, "learning_rate": 7.324309365133064e-07, "loss": 0.0752, "step": 6858 }, { "epoch": 185.3783783783784, "grad_norm": 0.9609375, "learning_rate": 7.297488904009803e-07, "loss": 0.114, "step": 6859 }, { "epoch": 185.40540540540542, "grad_norm": 0.78125, "learning_rate": 7.270716911808279e-07, "loss": 0.0887, "step": 6860 }, { "epoch": 185.43243243243242, "grad_norm": 1.046875, "learning_rate": 7.243993393874882e-07, "loss": 0.1163, "step": 6861 }, { "epoch": 185.45945945945945, "grad_norm": 0.404296875, "learning_rate": 7.217318355546509e-07, "loss": 0.0595, "step": 6862 }, { "epoch": 185.48648648648648, "grad_norm": 0.66796875, "learning_rate": 7.190691802150317e-07, "loss": 0.0777, "step": 6863 }, { "epoch": 185.51351351351352, "grad_norm": 0.63671875, "learning_rate": 7.16411373900372e-07, "loss": 0.0804, "step": 6864 }, { "epoch": 185.54054054054055, "grad_norm": 1.03125, "learning_rate": 7.137584171414524e-07, "loss": 0.1546, "step": 6865 }, { "epoch": 185.56756756756758, "grad_norm": 0.65625, "learning_rate": 7.111103104680772e-07, "loss": 0.0785, "step": 6866 }, { "epoch": 185.59459459459458, "grad_norm": 0.7890625, "learning_rate": 7.084670544090899e-07, "loss": 0.0727, "step": 6867 }, { "epoch": 185.6216216216216, "grad_norm": 1.2265625, "learning_rate": 7.058286494923655e-07, "loss": 0.1269, "step": 6868 }, { "epoch": 185.64864864864865, "grad_norm": 0.7265625, "learning_rate": 7.031950962447992e-07, "loss": 0.0835, "step": 6869 }, { "epoch": 185.67567567567568, "grad_norm": 1.015625, "learning_rate": 7.005663951923285e-07, "loss": 0.1178, "step": 6870 }, { "epoch": 185.7027027027027, "grad_norm": 0.6015625, "learning_rate": 6.979425468599227e-07, "loss": 0.0601, "step": 6871 }, { "epoch": 185.72972972972974, "grad_norm": 0.875, "learning_rate": 6.953235517715678e-07, "loss": 0.1171, "step": 6872 }, { "epoch": 185.75675675675674, "grad_norm": 0.95703125, "learning_rate": 6.927094104503012e-07, "loss": 0.1336, "step": 6873 }, { "epoch": 185.78378378378378, "grad_norm": 0.80859375, "learning_rate": 6.901001234181748e-07, "loss": 0.0761, "step": 6874 }, { "epoch": 185.8108108108108, "grad_norm": 0.97265625, "learning_rate": 6.874956911962743e-07, "loss": 0.0894, "step": 6875 }, { "epoch": 185.83783783783784, "grad_norm": 0.9375, "learning_rate": 6.848961143047227e-07, "loss": 0.0851, "step": 6876 }, { "epoch": 185.86486486486487, "grad_norm": 0.984375, "learning_rate": 6.82301393262666e-07, "loss": 0.1583, "step": 6877 }, { "epoch": 185.8918918918919, "grad_norm": 0.80078125, "learning_rate": 6.79711528588281e-07, "loss": 0.0969, "step": 6878 }, { "epoch": 185.9189189189189, "grad_norm": 1.28125, "learning_rate": 6.771265207987848e-07, "loss": 0.2151, "step": 6879 }, { "epoch": 185.94594594594594, "grad_norm": 1.609375, "learning_rate": 6.74546370410406e-07, "loss": 0.217, "step": 6880 }, { "epoch": 185.97297297297297, "grad_norm": 0.7890625, "learning_rate": 6.719710779384242e-07, "loss": 0.1017, "step": 6881 }, { "epoch": 186.0, "grad_norm": 0.9453125, "learning_rate": 6.694006438971279e-07, "loss": 0.1075, "step": 6882 }, { "epoch": 186.02702702702703, "grad_norm": 0.73828125, "learning_rate": 6.668350687998565e-07, "loss": 0.0816, "step": 6883 }, { "epoch": 186.05405405405406, "grad_norm": 1.1953125, "learning_rate": 6.642743531589585e-07, "loss": 0.1526, "step": 6884 }, { "epoch": 186.0810810810811, "grad_norm": 0.51171875, "learning_rate": 6.617184974858304e-07, "loss": 0.0699, "step": 6885 }, { "epoch": 186.1081081081081, "grad_norm": 0.78515625, "learning_rate": 6.591675022908806e-07, "loss": 0.0878, "step": 6886 }, { "epoch": 186.13513513513513, "grad_norm": 1.234375, "learning_rate": 6.566213680835598e-07, "loss": 0.1329, "step": 6887 }, { "epoch": 186.16216216216216, "grad_norm": 1.03125, "learning_rate": 6.540800953723503e-07, "loss": 0.1083, "step": 6888 }, { "epoch": 186.1891891891892, "grad_norm": 0.6796875, "learning_rate": 6.51543684664746e-07, "loss": 0.0732, "step": 6889 }, { "epoch": 186.21621621621622, "grad_norm": 0.87109375, "learning_rate": 6.49012136467289e-07, "loss": 0.1062, "step": 6890 }, { "epoch": 186.24324324324326, "grad_norm": 1.1875, "learning_rate": 6.46485451285539e-07, "loss": 0.1213, "step": 6891 }, { "epoch": 186.27027027027026, "grad_norm": 0.68359375, "learning_rate": 6.439636296240892e-07, "loss": 0.0749, "step": 6892 }, { "epoch": 186.2972972972973, "grad_norm": 1.140625, "learning_rate": 6.414466719865592e-07, "loss": 0.1577, "step": 6893 }, { "epoch": 186.32432432432432, "grad_norm": 0.82421875, "learning_rate": 6.389345788755996e-07, "loss": 0.0908, "step": 6894 }, { "epoch": 186.35135135135135, "grad_norm": 1.2578125, "learning_rate": 6.364273507928842e-07, "loss": 0.133, "step": 6895 }, { "epoch": 186.3783783783784, "grad_norm": 0.74609375, "learning_rate": 6.339249882391263e-07, "loss": 0.0862, "step": 6896 }, { "epoch": 186.40540540540542, "grad_norm": 1.1328125, "learning_rate": 6.314274917140511e-07, "loss": 0.1274, "step": 6897 }, { "epoch": 186.43243243243242, "grad_norm": 0.8828125, "learning_rate": 6.289348617164292e-07, "loss": 0.1196, "step": 6898 }, { "epoch": 186.45945945945945, "grad_norm": 0.6015625, "learning_rate": 6.264470987440485e-07, "loss": 0.0635, "step": 6899 }, { "epoch": 186.48648648648648, "grad_norm": 1.1953125, "learning_rate": 6.239642032937282e-07, "loss": 0.1431, "step": 6900 }, { "epoch": 186.51351351351352, "grad_norm": 0.63671875, "learning_rate": 6.214861758613106e-07, "loss": 0.0809, "step": 6901 }, { "epoch": 186.54054054054055, "grad_norm": 0.9296875, "learning_rate": 6.190130169416802e-07, "loss": 0.1284, "step": 6902 }, { "epoch": 186.56756756756758, "grad_norm": 0.5078125, "learning_rate": 6.165447270287311e-07, "loss": 0.0694, "step": 6903 }, { "epoch": 186.59459459459458, "grad_norm": 1.2109375, "learning_rate": 6.140813066153967e-07, "loss": 0.1449, "step": 6904 }, { "epoch": 186.6216216216216, "grad_norm": 0.765625, "learning_rate": 6.116227561936333e-07, "loss": 0.0945, "step": 6905 }, { "epoch": 186.64864864864865, "grad_norm": 1.046875, "learning_rate": 6.091690762544261e-07, "loss": 0.1052, "step": 6906 }, { "epoch": 186.67567567567568, "grad_norm": 0.91015625, "learning_rate": 6.067202672877886e-07, "loss": 0.0977, "step": 6907 }, { "epoch": 186.7027027027027, "grad_norm": 0.54296875, "learning_rate": 6.042763297827603e-07, "loss": 0.0698, "step": 6908 }, { "epoch": 186.72972972972974, "grad_norm": 1.1953125, "learning_rate": 6.018372642274034e-07, "loss": 0.1612, "step": 6909 }, { "epoch": 186.75675675675674, "grad_norm": 0.98828125, "learning_rate": 5.9940307110882e-07, "loss": 0.1035, "step": 6910 }, { "epoch": 186.78378378378378, "grad_norm": 1.09375, "learning_rate": 5.96973750913124e-07, "loss": 0.1467, "step": 6911 }, { "epoch": 186.8108108108108, "grad_norm": 0.765625, "learning_rate": 5.945493041254635e-07, "loss": 0.0996, "step": 6912 }, { "epoch": 186.83783783783784, "grad_norm": 1.3828125, "learning_rate": 5.921297312300178e-07, "loss": 0.1992, "step": 6913 }, { "epoch": 186.86486486486487, "grad_norm": 1.28125, "learning_rate": 5.897150327099809e-07, "loss": 0.1149, "step": 6914 }, { "epoch": 186.8918918918919, "grad_norm": 0.73828125, "learning_rate": 5.873052090475839e-07, "loss": 0.0867, "step": 6915 }, { "epoch": 186.9189189189189, "grad_norm": 0.9921875, "learning_rate": 5.849002607240834e-07, "loss": 0.1402, "step": 6916 }, { "epoch": 186.94594594594594, "grad_norm": 1.1953125, "learning_rate": 5.825001882197561e-07, "loss": 0.1647, "step": 6917 }, { "epoch": 186.97297297297297, "grad_norm": 0.9765625, "learning_rate": 5.801049920139107e-07, "loss": 0.0976, "step": 6918 }, { "epoch": 187.0, "grad_norm": 0.93359375, "learning_rate": 5.777146725848781e-07, "loss": 0.1073, "step": 6919 }, { "epoch": 187.02702702702703, "grad_norm": 1.265625, "learning_rate": 5.753292304100183e-07, "loss": 0.1325, "step": 6920 }, { "epoch": 187.05405405405406, "grad_norm": 1.0, "learning_rate": 5.729486659657169e-07, "loss": 0.0981, "step": 6921 }, { "epoch": 187.0810810810811, "grad_norm": 0.34375, "learning_rate": 5.705729797273824e-07, "loss": 0.0522, "step": 6922 }, { "epoch": 187.1081081081081, "grad_norm": 0.91015625, "learning_rate": 5.682021721694547e-07, "loss": 0.107, "step": 6923 }, { "epoch": 187.13513513513513, "grad_norm": 0.95703125, "learning_rate": 5.658362437653941e-07, "loss": 0.0891, "step": 6924 }, { "epoch": 187.16216216216216, "grad_norm": 0.953125, "learning_rate": 5.634751949876866e-07, "loss": 0.1341, "step": 6925 }, { "epoch": 187.1891891891892, "grad_norm": 1.09375, "learning_rate": 5.611190263078464e-07, "loss": 0.1262, "step": 6926 }, { "epoch": 187.21621621621622, "grad_norm": 0.8046875, "learning_rate": 5.587677381964168e-07, "loss": 0.0864, "step": 6927 }, { "epoch": 187.24324324324326, "grad_norm": 1.453125, "learning_rate": 5.564213311229555e-07, "loss": 0.2128, "step": 6928 }, { "epoch": 187.27027027027026, "grad_norm": 0.69140625, "learning_rate": 5.540798055560569e-07, "loss": 0.0837, "step": 6929 }, { "epoch": 187.2972972972973, "grad_norm": 0.4453125, "learning_rate": 5.517431619633335e-07, "loss": 0.0594, "step": 6930 }, { "epoch": 187.32432432432432, "grad_norm": 0.83984375, "learning_rate": 5.494114008114254e-07, "loss": 0.0942, "step": 6931 }, { "epoch": 187.35135135135135, "grad_norm": 0.9375, "learning_rate": 5.470845225659965e-07, "loss": 0.1314, "step": 6932 }, { "epoch": 187.3783783783784, "grad_norm": 1.625, "learning_rate": 5.447625276917362e-07, "loss": 0.2465, "step": 6933 }, { "epoch": 187.40540540540542, "grad_norm": 0.96875, "learning_rate": 5.424454166523596e-07, "loss": 0.1142, "step": 6934 }, { "epoch": 187.43243243243242, "grad_norm": 0.4140625, "learning_rate": 5.401331899106022e-07, "loss": 0.0528, "step": 6935 }, { "epoch": 187.45945945945945, "grad_norm": 0.87890625, "learning_rate": 5.378258479282334e-07, "loss": 0.0881, "step": 6936 }, { "epoch": 187.48648648648648, "grad_norm": 1.0078125, "learning_rate": 5.355233911660373e-07, "loss": 0.1096, "step": 6937 }, { "epoch": 187.51351351351352, "grad_norm": 0.765625, "learning_rate": 5.332258200838242e-07, "loss": 0.0888, "step": 6938 }, { "epoch": 187.54054054054055, "grad_norm": 0.56640625, "learning_rate": 5.309331351404351e-07, "loss": 0.0719, "step": 6939 }, { "epoch": 187.56756756756758, "grad_norm": 0.490234375, "learning_rate": 5.286453367937289e-07, "loss": 0.0625, "step": 6940 }, { "epoch": 187.59459459459458, "grad_norm": 0.9296875, "learning_rate": 5.2636242550059e-07, "loss": 0.0997, "step": 6941 }, { "epoch": 187.6216216216216, "grad_norm": 1.03125, "learning_rate": 5.240844017169288e-07, "loss": 0.1111, "step": 6942 }, { "epoch": 187.64864864864865, "grad_norm": 0.94921875, "learning_rate": 5.218112658976732e-07, "loss": 0.133, "step": 6943 }, { "epoch": 187.67567567567568, "grad_norm": 1.1015625, "learning_rate": 5.195430184967903e-07, "loss": 0.1654, "step": 6944 }, { "epoch": 187.7027027027027, "grad_norm": 1.21875, "learning_rate": 5.172796599672485e-07, "loss": 0.1675, "step": 6945 }, { "epoch": 187.72972972972974, "grad_norm": 0.71484375, "learning_rate": 5.150211907610586e-07, "loss": 0.0848, "step": 6946 }, { "epoch": 187.75675675675674, "grad_norm": 0.6328125, "learning_rate": 5.127676113292513e-07, "loss": 0.0826, "step": 6947 }, { "epoch": 187.78378378378378, "grad_norm": 0.7265625, "learning_rate": 5.105189221218692e-07, "loss": 0.0846, "step": 6948 }, { "epoch": 187.8108108108108, "grad_norm": 0.84375, "learning_rate": 5.082751235879951e-07, "loss": 0.0933, "step": 6949 }, { "epoch": 187.83783783783784, "grad_norm": 0.609375, "learning_rate": 5.060362161757232e-07, "loss": 0.0739, "step": 6950 }, { "epoch": 187.86486486486487, "grad_norm": 1.0859375, "learning_rate": 5.038022003321735e-07, "loss": 0.1139, "step": 6951 }, { "epoch": 187.8918918918919, "grad_norm": 1.1875, "learning_rate": 5.015730765034921e-07, "loss": 0.1623, "step": 6952 }, { "epoch": 187.9189189189189, "grad_norm": 0.5703125, "learning_rate": 4.993488451348477e-07, "loss": 0.0701, "step": 6953 }, { "epoch": 187.94594594594594, "grad_norm": 0.484375, "learning_rate": 4.971295066704267e-07, "loss": 0.0605, "step": 6954 }, { "epoch": 187.97297297297297, "grad_norm": 1.140625, "learning_rate": 4.94915061553447e-07, "loss": 0.1294, "step": 6955 }, { "epoch": 188.0, "grad_norm": 0.828125, "learning_rate": 4.927055102261379e-07, "loss": 0.1314, "step": 6956 }, { "epoch": 188.02702702702703, "grad_norm": 1.484375, "learning_rate": 4.905008531297661e-07, "loss": 0.1927, "step": 6957 }, { "epoch": 188.05405405405406, "grad_norm": 1.2421875, "learning_rate": 4.883010907046071e-07, "loss": 0.1182, "step": 6958 }, { "epoch": 188.0810810810811, "grad_norm": 0.87890625, "learning_rate": 4.861062233899677e-07, "loss": 0.1037, "step": 6959 }, { "epoch": 188.1081081081081, "grad_norm": 0.83203125, "learning_rate": 4.839162516241725e-07, "loss": 0.0924, "step": 6960 }, { "epoch": 188.13513513513513, "grad_norm": 0.498046875, "learning_rate": 4.817311758445686e-07, "loss": 0.069, "step": 6961 }, { "epoch": 188.16216216216216, "grad_norm": 1.171875, "learning_rate": 4.795509964875267e-07, "loss": 0.115, "step": 6962 }, { "epoch": 188.1891891891892, "grad_norm": 1.0078125, "learning_rate": 4.773757139884427e-07, "loss": 0.1376, "step": 6963 }, { "epoch": 188.21621621621622, "grad_norm": 1.2109375, "learning_rate": 4.752053287817332e-07, "loss": 0.1597, "step": 6964 }, { "epoch": 188.24324324324326, "grad_norm": 0.85546875, "learning_rate": 4.7303984130082636e-07, "loss": 0.0987, "step": 6965 }, { "epoch": 188.27027027027026, "grad_norm": 1.0078125, "learning_rate": 4.708792519781874e-07, "loss": 0.1105, "step": 6966 }, { "epoch": 188.2972972972973, "grad_norm": 1.109375, "learning_rate": 4.687235612452989e-07, "loss": 0.1543, "step": 6967 }, { "epoch": 188.32432432432432, "grad_norm": 0.57421875, "learning_rate": 4.66572769532661e-07, "loss": 0.0727, "step": 6968 }, { "epoch": 188.35135135135135, "grad_norm": 0.7890625, "learning_rate": 4.6442687726979406e-07, "loss": 0.0951, "step": 6969 }, { "epoch": 188.3783783783784, "grad_norm": 0.921875, "learning_rate": 4.622858848852496e-07, "loss": 0.0898, "step": 6970 }, { "epoch": 188.40540540540542, "grad_norm": 0.91015625, "learning_rate": 4.6014979280658844e-07, "loss": 0.0964, "step": 6971 }, { "epoch": 188.43243243243242, "grad_norm": 0.6640625, "learning_rate": 4.580186014604054e-07, "loss": 0.0797, "step": 6972 }, { "epoch": 188.45945945945945, "grad_norm": 0.97265625, "learning_rate": 4.558923112723046e-07, "loss": 0.1129, "step": 6973 }, { "epoch": 188.48648648648648, "grad_norm": 1.015625, "learning_rate": 4.537709226669212e-07, "loss": 0.1465, "step": 6974 }, { "epoch": 188.51351351351352, "grad_norm": 0.94140625, "learning_rate": 4.5165443606790536e-07, "loss": 0.1123, "step": 6975 }, { "epoch": 188.54054054054055, "grad_norm": 1.2109375, "learning_rate": 4.4954285189793e-07, "loss": 0.1609, "step": 6976 }, { "epoch": 188.56756756756758, "grad_norm": 0.875, "learning_rate": 4.474361705786884e-07, "loss": 0.1046, "step": 6977 }, { "epoch": 188.59459459459458, "grad_norm": 0.62890625, "learning_rate": 4.453343925308995e-07, "loss": 0.0747, "step": 6978 }, { "epoch": 188.6216216216216, "grad_norm": 0.78515625, "learning_rate": 4.432375181742943e-07, "loss": 0.0886, "step": 6979 }, { "epoch": 188.64864864864865, "grad_norm": 0.95703125, "learning_rate": 4.411455479276294e-07, "loss": 0.1388, "step": 6980 }, { "epoch": 188.67567567567568, "grad_norm": 0.96484375, "learning_rate": 4.390584822086874e-07, "loss": 0.12, "step": 6981 }, { "epoch": 188.7027027027027, "grad_norm": 1.0625, "learning_rate": 4.369763214342598e-07, "loss": 0.1417, "step": 6982 }, { "epoch": 188.72972972972974, "grad_norm": 1.1953125, "learning_rate": 4.3489906602016686e-07, "loss": 0.1585, "step": 6983 }, { "epoch": 188.75675675675674, "grad_norm": 1.3984375, "learning_rate": 4.3282671638124883e-07, "loss": 0.135, "step": 6984 }, { "epoch": 188.78378378378378, "grad_norm": 1.34375, "learning_rate": 4.307592729313609e-07, "loss": 0.1724, "step": 6985 }, { "epoch": 188.8108108108108, "grad_norm": 0.95703125, "learning_rate": 4.2869673608338657e-07, "loss": 0.1075, "step": 6986 }, { "epoch": 188.83783783783784, "grad_norm": 1.5234375, "learning_rate": 4.2663910624921864e-07, "loss": 0.1849, "step": 6987 }, { "epoch": 188.86486486486487, "grad_norm": 1.1328125, "learning_rate": 4.245863838397812e-07, "loss": 0.1361, "step": 6988 }, { "epoch": 188.8918918918919, "grad_norm": 1.125, "learning_rate": 4.22538569265013e-07, "loss": 0.1339, "step": 6989 }, { "epoch": 188.9189189189189, "grad_norm": 1.078125, "learning_rate": 4.204956629338702e-07, "loss": 0.1582, "step": 6990 }, { "epoch": 188.94594594594594, "grad_norm": 1.375, "learning_rate": 4.184576652543321e-07, "loss": 0.1774, "step": 6991 }, { "epoch": 188.97297297297297, "grad_norm": 0.88671875, "learning_rate": 4.1642457663340075e-07, "loss": 0.1083, "step": 6992 }, { "epoch": 189.0, "grad_norm": 1.25, "learning_rate": 4.1439639747709037e-07, "loss": 0.1368, "step": 6993 }, { "epoch": 189.02702702702703, "grad_norm": 0.80859375, "learning_rate": 4.1237312819044085e-07, "loss": 0.084, "step": 6994 }, { "epoch": 189.05405405405406, "grad_norm": 0.6796875, "learning_rate": 4.1035476917750947e-07, "loss": 0.0863, "step": 6995 }, { "epoch": 189.0810810810811, "grad_norm": 0.494140625, "learning_rate": 4.083413208413711e-07, "loss": 0.0724, "step": 6996 }, { "epoch": 189.1081081081081, "grad_norm": 1.0234375, "learning_rate": 4.0633278358412075e-07, "loss": 0.1451, "step": 6997 }, { "epoch": 189.13513513513513, "grad_norm": 0.83203125, "learning_rate": 4.043291578068764e-07, "loss": 0.094, "step": 6998 }, { "epoch": 189.16216216216216, "grad_norm": 0.8671875, "learning_rate": 4.023304439097708e-07, "loss": 0.1055, "step": 6999 }, { "epoch": 189.1891891891892, "grad_norm": 1.0, "learning_rate": 4.003366422919569e-07, "loss": 0.1192, "step": 7000 }, { "epoch": 189.21621621621622, "grad_norm": 1.234375, "learning_rate": 3.9834775335160504e-07, "loss": 0.1641, "step": 7001 }, { "epoch": 189.24324324324326, "grad_norm": 0.6484375, "learning_rate": 3.963637774859114e-07, "loss": 0.0731, "step": 7002 }, { "epoch": 189.27027027027026, "grad_norm": 0.6015625, "learning_rate": 3.94384715091084e-07, "loss": 0.0741, "step": 7003 }, { "epoch": 189.2972972972973, "grad_norm": 0.68359375, "learning_rate": 3.9241056656234845e-07, "loss": 0.0881, "step": 7004 }, { "epoch": 189.32432432432432, "grad_norm": 1.0078125, "learning_rate": 3.9044133229395596e-07, "loss": 0.1255, "step": 7005 }, { "epoch": 189.35135135135135, "grad_norm": 0.81640625, "learning_rate": 3.8847701267917257e-07, "loss": 0.1179, "step": 7006 }, { "epoch": 189.3783783783784, "grad_norm": 1.140625, "learning_rate": 3.865176081102817e-07, "loss": 0.1543, "step": 7007 }, { "epoch": 189.40540540540542, "grad_norm": 1.671875, "learning_rate": 3.8456311897858424e-07, "loss": 0.2337, "step": 7008 }, { "epoch": 189.43243243243242, "grad_norm": 1.0546875, "learning_rate": 3.826135456744068e-07, "loss": 0.1243, "step": 7009 }, { "epoch": 189.45945945945945, "grad_norm": 1.3203125, "learning_rate": 3.8066888858708237e-07, "loss": 0.1408, "step": 7010 }, { "epoch": 189.48648648648648, "grad_norm": 0.9765625, "learning_rate": 3.7872914810497537e-07, "loss": 0.1133, "step": 7011 }, { "epoch": 189.51351351351352, "grad_norm": 1.0, "learning_rate": 3.7679432461545926e-07, "loss": 0.1221, "step": 7012 }, { "epoch": 189.54054054054055, "grad_norm": 0.8203125, "learning_rate": 3.748644185049249e-07, "loss": 0.0935, "step": 7013 }, { "epoch": 189.56756756756758, "grad_norm": 1.2890625, "learning_rate": 3.729394301587863e-07, "loss": 0.183, "step": 7014 }, { "epoch": 189.59459459459458, "grad_norm": 1.1640625, "learning_rate": 3.710193599614803e-07, "loss": 0.1457, "step": 7015 }, { "epoch": 189.6216216216216, "grad_norm": 0.546875, "learning_rate": 3.691042082964419e-07, "loss": 0.0721, "step": 7016 }, { "epoch": 189.64864864864865, "grad_norm": 0.921875, "learning_rate": 3.6719397554614844e-07, "loss": 0.0937, "step": 7017 }, { "epoch": 189.67567567567568, "grad_norm": 1.03125, "learning_rate": 3.6528866209207537e-07, "loss": 0.0899, "step": 7018 }, { "epoch": 189.7027027027027, "grad_norm": 0.76953125, "learning_rate": 3.633882683147238e-07, "loss": 0.0815, "step": 7019 }, { "epoch": 189.72972972972974, "grad_norm": 1.015625, "learning_rate": 3.6149279459361807e-07, "loss": 0.1177, "step": 7020 }, { "epoch": 189.75675675675674, "grad_norm": 0.55078125, "learning_rate": 3.5960224130728857e-07, "loss": 0.0668, "step": 7021 }, { "epoch": 189.78378378378378, "grad_norm": 1.1484375, "learning_rate": 3.5771660883328897e-07, "loss": 0.0947, "step": 7022 }, { "epoch": 189.8108108108108, "grad_norm": 0.97265625, "learning_rate": 3.5583589754819293e-07, "loss": 0.1139, "step": 7023 }, { "epoch": 189.83783783783784, "grad_norm": 0.72265625, "learning_rate": 3.5396010782758337e-07, "loss": 0.0726, "step": 7024 }, { "epoch": 189.86486486486487, "grad_norm": 1.0390625, "learning_rate": 3.520892400460662e-07, "loss": 0.1121, "step": 7025 }, { "epoch": 189.8918918918919, "grad_norm": 0.77734375, "learning_rate": 3.502232945772677e-07, "loss": 0.086, "step": 7026 }, { "epoch": 189.9189189189189, "grad_norm": 0.87109375, "learning_rate": 3.4836227179382007e-07, "loss": 0.1006, "step": 7027 }, { "epoch": 189.94594594594594, "grad_norm": 1.2265625, "learning_rate": 3.465061720673846e-07, "loss": 0.1694, "step": 7028 }, { "epoch": 189.97297297297297, "grad_norm": 0.83984375, "learning_rate": 3.446549957686285e-07, "loss": 0.1002, "step": 7029 }, { "epoch": 190.0, "grad_norm": 0.86328125, "learning_rate": 3.42808743267245e-07, "loss": 0.0925, "step": 7030 }, { "epoch": 190.02702702702703, "grad_norm": 1.03125, "learning_rate": 3.4096741493194197e-07, "loss": 0.1269, "step": 7031 }, { "epoch": 190.05405405405406, "grad_norm": 1.1328125, "learning_rate": 3.3913101113043355e-07, "loss": 0.135, "step": 7032 }, { "epoch": 190.0810810810811, "grad_norm": 1.015625, "learning_rate": 3.372995322294681e-07, "loss": 0.1148, "step": 7033 }, { "epoch": 190.1081081081081, "grad_norm": 0.5625, "learning_rate": 3.354729785947974e-07, "loss": 0.0695, "step": 7034 }, { "epoch": 190.13513513513513, "grad_norm": 0.796875, "learning_rate": 3.336513505911937e-07, "loss": 0.1146, "step": 7035 }, { "epoch": 190.16216216216216, "grad_norm": 0.86328125, "learning_rate": 3.318346485824436e-07, "loss": 0.1134, "step": 7036 }, { "epoch": 190.1891891891892, "grad_norm": 1.15625, "learning_rate": 3.3002287293135426e-07, "loss": 0.1576, "step": 7037 }, { "epoch": 190.21621621621622, "grad_norm": 0.5859375, "learning_rate": 3.282160239997445e-07, "loss": 0.0671, "step": 7038 }, { "epoch": 190.24324324324326, "grad_norm": 0.61328125, "learning_rate": 3.264141021484507e-07, "loss": 0.0672, "step": 7039 }, { "epoch": 190.27027027027026, "grad_norm": 0.9296875, "learning_rate": 3.2461710773732947e-07, "loss": 0.1155, "step": 7040 }, { "epoch": 190.2972972972973, "grad_norm": 0.9921875, "learning_rate": 3.2282504112524647e-07, "loss": 0.1457, "step": 7041 }, { "epoch": 190.32432432432432, "grad_norm": 0.6875, "learning_rate": 3.210379026700877e-07, "loss": 0.0816, "step": 7042 }, { "epoch": 190.35135135135135, "grad_norm": 0.7265625, "learning_rate": 3.1925569272875655e-07, "loss": 0.0839, "step": 7043 }, { "epoch": 190.3783783783784, "grad_norm": 0.51171875, "learning_rate": 3.1747841165716274e-07, "loss": 0.0695, "step": 7044 }, { "epoch": 190.40540540540542, "grad_norm": 0.6796875, "learning_rate": 3.1570605981024457e-07, "loss": 0.0846, "step": 7045 }, { "epoch": 190.43243243243242, "grad_norm": 0.796875, "learning_rate": 3.1393863754194396e-07, "loss": 0.0927, "step": 7046 }, { "epoch": 190.45945945945945, "grad_norm": 0.671875, "learning_rate": 3.1217614520522854e-07, "loss": 0.0801, "step": 7047 }, { "epoch": 190.48648648648648, "grad_norm": 1.2890625, "learning_rate": 3.104185831520778e-07, "loss": 0.2124, "step": 7048 }, { "epoch": 190.51351351351352, "grad_norm": 0.78515625, "learning_rate": 3.0866595173348056e-07, "loss": 0.096, "step": 7049 }, { "epoch": 190.54054054054055, "grad_norm": 0.83984375, "learning_rate": 3.069182512994512e-07, "loss": 0.0901, "step": 7050 }, { "epoch": 190.56756756756758, "grad_norm": 0.3828125, "learning_rate": 3.051754821990105e-07, "loss": 0.0538, "step": 7051 }, { "epoch": 190.59459459459458, "grad_norm": 0.90625, "learning_rate": 3.034376447802023e-07, "loss": 0.1039, "step": 7052 }, { "epoch": 190.6216216216216, "grad_norm": 1.4921875, "learning_rate": 3.017047393900768e-07, "loss": 0.1777, "step": 7053 }, { "epoch": 190.64864864864865, "grad_norm": 1.015625, "learning_rate": 2.9997676637470994e-07, "loss": 0.1152, "step": 7054 }, { "epoch": 190.67567567567568, "grad_norm": 1.2265625, "learning_rate": 2.9825372607918125e-07, "loss": 0.1136, "step": 7055 }, { "epoch": 190.7027027027027, "grad_norm": 0.71875, "learning_rate": 2.9653561884759317e-07, "loss": 0.0782, "step": 7056 }, { "epoch": 190.72972972972974, "grad_norm": 0.79296875, "learning_rate": 2.94822445023063e-07, "loss": 0.0975, "step": 7057 }, { "epoch": 190.75675675675674, "grad_norm": 0.71875, "learning_rate": 2.9311420494771145e-07, "loss": 0.0777, "step": 7058 }, { "epoch": 190.78378378378378, "grad_norm": 1.1484375, "learning_rate": 2.9141089896269346e-07, "loss": 0.1288, "step": 7059 }, { "epoch": 190.8108108108108, "grad_norm": 1.0546875, "learning_rate": 2.897125274081619e-07, "loss": 0.1279, "step": 7060 }, { "epoch": 190.83783783783784, "grad_norm": 0.7265625, "learning_rate": 2.880190906232899e-07, "loss": 0.0892, "step": 7061 }, { "epoch": 190.86486486486487, "grad_norm": 0.984375, "learning_rate": 2.8633058894626806e-07, "loss": 0.1068, "step": 7062 }, { "epoch": 190.8918918918919, "grad_norm": 0.43359375, "learning_rate": 2.846470227142961e-07, "loss": 0.0575, "step": 7063 }, { "epoch": 190.9189189189189, "grad_norm": 1.078125, "learning_rate": 2.829683922635912e-07, "loss": 0.1616, "step": 7064 }, { "epoch": 190.94594594594594, "grad_norm": 1.390625, "learning_rate": 2.812946979293879e-07, "loss": 0.1538, "step": 7065 }, { "epoch": 190.97297297297297, "grad_norm": 0.6328125, "learning_rate": 2.7962594004592444e-07, "loss": 0.0772, "step": 7066 }, { "epoch": 191.0, "grad_norm": 1.0703125, "learning_rate": 2.779621189464676e-07, "loss": 0.1643, "step": 7067 }, { "epoch": 191.02702702702703, "grad_norm": 1.6328125, "learning_rate": 2.763032349632877e-07, "loss": 0.192, "step": 7068 }, { "epoch": 191.05405405405406, "grad_norm": 0.69140625, "learning_rate": 2.7464928842767244e-07, "loss": 0.0868, "step": 7069 }, { "epoch": 191.0810810810811, "grad_norm": 0.8359375, "learning_rate": 2.7300027966992437e-07, "loss": 0.0881, "step": 7070 }, { "epoch": 191.1081081081081, "grad_norm": 0.67578125, "learning_rate": 2.71356209019355e-07, "loss": 0.0764, "step": 7071 }, { "epoch": 191.13513513513513, "grad_norm": 0.5390625, "learning_rate": 2.697170768042989e-07, "loss": 0.0719, "step": 7072 }, { "epoch": 191.16216216216216, "grad_norm": 0.984375, "learning_rate": 2.680828833520943e-07, "loss": 0.1282, "step": 7073 }, { "epoch": 191.1891891891892, "grad_norm": 1.109375, "learning_rate": 2.664536289890995e-07, "loss": 0.134, "step": 7074 }, { "epoch": 191.21621621621622, "grad_norm": 0.5859375, "learning_rate": 2.648293140406877e-07, "loss": 0.0705, "step": 7075 }, { "epoch": 191.24324324324326, "grad_norm": 0.91015625, "learning_rate": 2.632099388312409e-07, "loss": 0.1065, "step": 7076 }, { "epoch": 191.27027027027026, "grad_norm": 1.34375, "learning_rate": 2.615955036841561e-07, "loss": 0.1683, "step": 7077 }, { "epoch": 191.2972972972973, "grad_norm": 1.5, "learning_rate": 2.59986008921842e-07, "loss": 0.2156, "step": 7078 }, { "epoch": 191.32432432432432, "grad_norm": 0.71875, "learning_rate": 2.5838145486572765e-07, "loss": 0.0883, "step": 7079 }, { "epoch": 191.35135135135135, "grad_norm": 1.1953125, "learning_rate": 2.567818418362455e-07, "loss": 0.1664, "step": 7080 }, { "epoch": 191.3783783783784, "grad_norm": 1.2421875, "learning_rate": 2.5518717015285133e-07, "loss": 0.1625, "step": 7081 }, { "epoch": 191.40540540540542, "grad_norm": 0.73046875, "learning_rate": 2.535974401340069e-07, "loss": 0.0867, "step": 7082 }, { "epoch": 191.43243243243242, "grad_norm": 0.482421875, "learning_rate": 2.5201265209718885e-07, "loss": 0.0659, "step": 7083 }, { "epoch": 191.45945945945945, "grad_norm": 1.53125, "learning_rate": 2.5043280635888565e-07, "loss": 0.1921, "step": 7084 }, { "epoch": 191.48648648648648, "grad_norm": 0.87890625, "learning_rate": 2.4885790323460614e-07, "loss": 0.1192, "step": 7085 }, { "epoch": 191.51351351351352, "grad_norm": 1.0234375, "learning_rate": 2.472879430388625e-07, "loss": 0.1223, "step": 7086 }, { "epoch": 191.54054054054055, "grad_norm": 1.1796875, "learning_rate": 2.457229260851818e-07, "loss": 0.1253, "step": 7087 }, { "epoch": 191.56756756756758, "grad_norm": 0.671875, "learning_rate": 2.4416285268611115e-07, "loss": 0.0792, "step": 7088 }, { "epoch": 191.59459459459458, "grad_norm": 0.9453125, "learning_rate": 2.426077231531987e-07, "loss": 0.1004, "step": 7089 }, { "epoch": 191.6216216216216, "grad_norm": 1.28125, "learning_rate": 2.410575377970181e-07, "loss": 0.155, "step": 7090 }, { "epoch": 191.64864864864865, "grad_norm": 1.0703125, "learning_rate": 2.3951229692714683e-07, "loss": 0.1421, "step": 7091 }, { "epoch": 191.67567567567568, "grad_norm": 1.0625, "learning_rate": 2.3797200085217685e-07, "loss": 0.1386, "step": 7092 }, { "epoch": 191.7027027027027, "grad_norm": 1.046875, "learning_rate": 2.3643664987971215e-07, "loss": 0.1223, "step": 7093 }, { "epoch": 191.72972972972974, "grad_norm": 0.84375, "learning_rate": 2.3490624431637132e-07, "loss": 0.1062, "step": 7094 }, { "epoch": 191.75675675675674, "grad_norm": 1.0625, "learning_rate": 2.333807844677821e-07, "loss": 0.1376, "step": 7095 }, { "epoch": 191.78378378378378, "grad_norm": 1.3359375, "learning_rate": 2.318602706385925e-07, "loss": 0.1646, "step": 7096 }, { "epoch": 191.8108108108108, "grad_norm": 0.89453125, "learning_rate": 2.303447031324485e-07, "loss": 0.1075, "step": 7097 }, { "epoch": 191.83783783783784, "grad_norm": 0.9375, "learning_rate": 2.2883408225202184e-07, "loss": 0.1051, "step": 7098 }, { "epoch": 191.86486486486487, "grad_norm": 0.75390625, "learning_rate": 2.2732840829899348e-07, "loss": 0.0904, "step": 7099 }, { "epoch": 191.8918918918919, "grad_norm": 1.015625, "learning_rate": 2.2582768157404787e-07, "loss": 0.1187, "step": 7100 }, { "epoch": 191.9189189189189, "grad_norm": 1.2734375, "learning_rate": 2.2433190237688973e-07, "loss": 0.122, "step": 7101 }, { "epoch": 191.94594594594594, "grad_norm": 0.9609375, "learning_rate": 2.2284107100623842e-07, "loss": 0.1058, "step": 7102 }, { "epoch": 191.97297297297297, "grad_norm": 1.0390625, "learning_rate": 2.2135518775981413e-07, "loss": 0.1051, "step": 7103 }, { "epoch": 192.0, "grad_norm": 1.2578125, "learning_rate": 2.1987425293435727e-07, "loss": 0.1415, "step": 7104 }, { "epoch": 192.02702702702703, "grad_norm": 0.94140625, "learning_rate": 2.1839826682562015e-07, "loss": 0.1006, "step": 7105 }, { "epoch": 192.05405405405406, "grad_norm": 1.1171875, "learning_rate": 2.1692722972836144e-07, "loss": 0.1464, "step": 7106 }, { "epoch": 192.0810810810811, "grad_norm": 1.4453125, "learning_rate": 2.1546114193636002e-07, "loss": 0.1964, "step": 7107 }, { "epoch": 192.1081081081081, "grad_norm": 0.77734375, "learning_rate": 2.1400000374239282e-07, "loss": 0.0939, "step": 7108 }, { "epoch": 192.13513513513513, "grad_norm": 0.82421875, "learning_rate": 2.125438154382653e-07, "loss": 0.0902, "step": 7109 }, { "epoch": 192.16216216216216, "grad_norm": 0.6640625, "learning_rate": 2.1109257731477817e-07, "loss": 0.0817, "step": 7110 }, { "epoch": 192.1891891891892, "grad_norm": 0.765625, "learning_rate": 2.0964628966175792e-07, "loss": 0.067, "step": 7111 }, { "epoch": 192.21621621621622, "grad_norm": 1.2890625, "learning_rate": 2.082049527680291e-07, "loss": 0.1909, "step": 7112 }, { "epoch": 192.24324324324326, "grad_norm": 0.90625, "learning_rate": 2.067685669214392e-07, "loss": 0.0996, "step": 7113 }, { "epoch": 192.27027027027026, "grad_norm": 0.53515625, "learning_rate": 2.0533713240883933e-07, "loss": 0.0727, "step": 7114 }, { "epoch": 192.2972972972973, "grad_norm": 0.953125, "learning_rate": 2.0391064951609252e-07, "loss": 0.1125, "step": 7115 }, { "epoch": 192.32432432432432, "grad_norm": 0.8828125, "learning_rate": 2.0248911852807916e-07, "loss": 0.1049, "step": 7116 }, { "epoch": 192.35135135135135, "grad_norm": 1.0078125, "learning_rate": 2.0107253972868057e-07, "loss": 0.1382, "step": 7117 }, { "epoch": 192.3783783783784, "grad_norm": 0.57421875, "learning_rate": 1.9966091340079818e-07, "loss": 0.0689, "step": 7118 }, { "epoch": 192.40540540540542, "grad_norm": 0.828125, "learning_rate": 1.9825423982634262e-07, "loss": 0.1079, "step": 7119 }, { "epoch": 192.43243243243242, "grad_norm": 1.1796875, "learning_rate": 1.9685251928623083e-07, "loss": 0.1366, "step": 7120 }, { "epoch": 192.45945945945945, "grad_norm": 0.953125, "learning_rate": 1.9545575206039446e-07, "loss": 0.105, "step": 7121 }, { "epoch": 192.48648648648648, "grad_norm": 1.3828125, "learning_rate": 1.9406393842777427e-07, "loss": 0.2219, "step": 7122 }, { "epoch": 192.51351351351352, "grad_norm": 0.9921875, "learning_rate": 1.926770786663229e-07, "loss": 0.1111, "step": 7123 }, { "epoch": 192.54054054054055, "grad_norm": 0.86328125, "learning_rate": 1.9129517305300215e-07, "loss": 0.1089, "step": 7124 }, { "epoch": 192.56756756756758, "grad_norm": 1.171875, "learning_rate": 1.899182218637885e-07, "loss": 0.1533, "step": 7125 }, { "epoch": 192.59459459459458, "grad_norm": 1.109375, "learning_rate": 1.8854622537366196e-07, "loss": 0.1249, "step": 7126 }, { "epoch": 192.6216216216216, "grad_norm": 0.87890625, "learning_rate": 1.871791838566228e-07, "loss": 0.0995, "step": 7127 }, { "epoch": 192.64864864864865, "grad_norm": 1.1953125, "learning_rate": 1.8581709758567213e-07, "loss": 0.1617, "step": 7128 }, { "epoch": 192.67567567567568, "grad_norm": 0.86328125, "learning_rate": 1.8445996683282562e-07, "loss": 0.096, "step": 7129 }, { "epoch": 192.7027027027027, "grad_norm": 0.81640625, "learning_rate": 1.8310779186911098e-07, "loss": 0.0919, "step": 7130 }, { "epoch": 192.72972972972974, "grad_norm": 1.34375, "learning_rate": 1.817605729645594e-07, "loss": 0.1996, "step": 7131 }, { "epoch": 192.75675675675674, "grad_norm": 1.1484375, "learning_rate": 1.804183103882251e-07, "loss": 0.1424, "step": 7132 }, { "epoch": 192.78378378378378, "grad_norm": 1.2109375, "learning_rate": 1.7908100440816032e-07, "loss": 0.1531, "step": 7133 }, { "epoch": 192.8108108108108, "grad_norm": 1.4453125, "learning_rate": 1.7774865529143204e-07, "loss": 0.1944, "step": 7134 }, { "epoch": 192.83783783783784, "grad_norm": 1.046875, "learning_rate": 1.7642126330411624e-07, "loss": 0.1183, "step": 7135 }, { "epoch": 192.86486486486487, "grad_norm": 0.8984375, "learning_rate": 1.750988287113009e-07, "loss": 0.0998, "step": 7136 }, { "epoch": 192.8918918918919, "grad_norm": 0.6484375, "learning_rate": 1.7378135177708588e-07, "loss": 0.0863, "step": 7137 }, { "epoch": 192.9189189189189, "grad_norm": 1.328125, "learning_rate": 1.724688327645718e-07, "loss": 0.1668, "step": 7138 }, { "epoch": 192.94594594594594, "grad_norm": 0.69140625, "learning_rate": 1.7116127193587962e-07, "loss": 0.0807, "step": 7139 }, { "epoch": 192.97297297297297, "grad_norm": 1.28125, "learning_rate": 1.6985866955213647e-07, "loss": 0.1713, "step": 7140 }, { "epoch": 193.0, "grad_norm": 0.98046875, "learning_rate": 1.68561025873476e-07, "loss": 0.0887, "step": 7141 }, { "epoch": 193.02702702702703, "grad_norm": 0.74609375, "learning_rate": 1.6726834115904643e-07, "loss": 0.094, "step": 7142 }, { "epoch": 193.05405405405406, "grad_norm": 1.046875, "learning_rate": 1.6598061566699963e-07, "loss": 0.1168, "step": 7143 }, { "epoch": 193.0810810810811, "grad_norm": 0.6328125, "learning_rate": 1.6469784965450762e-07, "loss": 0.0684, "step": 7144 }, { "epoch": 193.1081081081081, "grad_norm": 1.0546875, "learning_rate": 1.6342004337773774e-07, "loss": 0.125, "step": 7145 }, { "epoch": 193.13513513513513, "grad_norm": 0.82421875, "learning_rate": 1.6214719709188031e-07, "loss": 0.0968, "step": 7146 }, { "epoch": 193.16216216216216, "grad_norm": 1.1875, "learning_rate": 1.6087931105112642e-07, "loss": 0.1523, "step": 7147 }, { "epoch": 193.1891891891892, "grad_norm": 1.1171875, "learning_rate": 1.5961638550867908e-07, "loss": 0.1276, "step": 7148 }, { "epoch": 193.21621621621622, "grad_norm": 1.1328125, "learning_rate": 1.5835842071675045e-07, "loss": 0.1763, "step": 7149 }, { "epoch": 193.24324324324326, "grad_norm": 0.84375, "learning_rate": 1.5710541692656455e-07, "loss": 0.0933, "step": 7150 }, { "epoch": 193.27027027027026, "grad_norm": 0.7421875, "learning_rate": 1.5585737438835457e-07, "loss": 0.0957, "step": 7151 }, { "epoch": 193.2972972972973, "grad_norm": 0.640625, "learning_rate": 1.5461429335135447e-07, "loss": 0.0722, "step": 7152 }, { "epoch": 193.32432432432432, "grad_norm": 0.86328125, "learning_rate": 1.5337617406381843e-07, "loss": 0.1151, "step": 7153 }, { "epoch": 193.35135135135135, "grad_norm": 0.96484375, "learning_rate": 1.5214301677300425e-07, "loss": 0.1329, "step": 7154 }, { "epoch": 193.3783783783784, "grad_norm": 0.82421875, "learning_rate": 1.509148217251788e-07, "loss": 0.097, "step": 7155 }, { "epoch": 193.40540540540542, "grad_norm": 0.73828125, "learning_rate": 1.4969158916562086e-07, "loss": 0.0998, "step": 7156 }, { "epoch": 193.43243243243242, "grad_norm": 1.4921875, "learning_rate": 1.4847331933861286e-07, "loss": 0.2195, "step": 7157 }, { "epoch": 193.45945945945945, "grad_norm": 0.99609375, "learning_rate": 1.4726001248745457e-07, "loss": 0.1023, "step": 7158 }, { "epoch": 193.48648648648648, "grad_norm": 0.9609375, "learning_rate": 1.4605166885444388e-07, "loss": 0.1157, "step": 7159 }, { "epoch": 193.51351351351352, "grad_norm": 0.828125, "learning_rate": 1.4484828868089605e-07, "loss": 0.0814, "step": 7160 }, { "epoch": 193.54054054054055, "grad_norm": 1.046875, "learning_rate": 1.4364987220713278e-07, "loss": 0.1245, "step": 7161 }, { "epoch": 193.56756756756758, "grad_norm": 1.109375, "learning_rate": 1.4245641967248204e-07, "loss": 0.1472, "step": 7162 }, { "epoch": 193.59459459459458, "grad_norm": 0.62890625, "learning_rate": 1.4126793131528372e-07, "loss": 0.0781, "step": 7163 }, { "epoch": 193.6216216216216, "grad_norm": 0.90234375, "learning_rate": 1.400844073728841e-07, "loss": 0.1039, "step": 7164 }, { "epoch": 193.64864864864865, "grad_norm": 0.79296875, "learning_rate": 1.3890584808163854e-07, "loss": 0.0913, "step": 7165 }, { "epoch": 193.67567567567568, "grad_norm": 0.984375, "learning_rate": 1.3773225367691434e-07, "loss": 0.1267, "step": 7166 }, { "epoch": 193.7027027027027, "grad_norm": 0.640625, "learning_rate": 1.3656362439308236e-07, "loss": 0.0789, "step": 7167 }, { "epoch": 193.72972972972974, "grad_norm": 0.5625, "learning_rate": 1.3539996046352255e-07, "loss": 0.0708, "step": 7168 }, { "epoch": 193.75675675675674, "grad_norm": 1.546875, "learning_rate": 1.3424126212062404e-07, "loss": 0.1601, "step": 7169 }, { "epoch": 193.78378378378378, "grad_norm": 0.8515625, "learning_rate": 1.3308752959578785e-07, "loss": 0.1142, "step": 7170 }, { "epoch": 193.8108108108108, "grad_norm": 0.7421875, "learning_rate": 1.3193876311941578e-07, "loss": 0.067, "step": 7171 }, { "epoch": 193.83783783783784, "grad_norm": 0.92578125, "learning_rate": 1.3079496292092984e-07, "loss": 0.1398, "step": 7172 }, { "epoch": 193.86486486486487, "grad_norm": 0.8671875, "learning_rate": 1.296561292287446e-07, "loss": 0.0941, "step": 7173 }, { "epoch": 193.8918918918919, "grad_norm": 0.89453125, "learning_rate": 1.2852226227029474e-07, "loss": 0.1072, "step": 7174 }, { "epoch": 193.9189189189189, "grad_norm": 1.0703125, "learning_rate": 1.273933622720186e-07, "loss": 0.1683, "step": 7175 }, { "epoch": 193.94594594594594, "grad_norm": 0.470703125, "learning_rate": 1.262694294593636e-07, "loss": 0.0669, "step": 7176 }, { "epoch": 193.97297297297297, "grad_norm": 0.875, "learning_rate": 1.2515046405678355e-07, "loss": 0.0962, "step": 7177 }, { "epoch": 194.0, "grad_norm": 1.3203125, "learning_rate": 1.2403646628774412e-07, "loss": 0.1468, "step": 7178 }, { "epoch": 194.02702702702703, "grad_norm": 0.83203125, "learning_rate": 1.229274363747146e-07, "loss": 0.0976, "step": 7179 }, { "epoch": 194.05405405405406, "grad_norm": 0.7734375, "learning_rate": 1.2182337453917337e-07, "loss": 0.081, "step": 7180 }, { "epoch": 194.0810810810811, "grad_norm": 1.3828125, "learning_rate": 1.2072428100160794e-07, "loss": 0.1223, "step": 7181 }, { "epoch": 194.1081081081081, "grad_norm": 1.3359375, "learning_rate": 1.1963015598151494e-07, "loss": 0.1599, "step": 7182 }, { "epoch": 194.13513513513513, "grad_norm": 1.0390625, "learning_rate": 1.1854099969739186e-07, "loss": 0.1271, "step": 7183 }, { "epoch": 194.16216216216216, "grad_norm": 0.734375, "learning_rate": 1.1745681236675077e-07, "loss": 0.0767, "step": 7184 }, { "epoch": 194.1891891891892, "grad_norm": 0.98828125, "learning_rate": 1.1637759420611294e-07, "loss": 0.1277, "step": 7185 }, { "epoch": 194.21621621621622, "grad_norm": 0.84765625, "learning_rate": 1.1530334543099763e-07, "loss": 0.078, "step": 7186 }, { "epoch": 194.24324324324326, "grad_norm": 1.28125, "learning_rate": 1.1423406625594435e-07, "loss": 0.1558, "step": 7187 }, { "epoch": 194.27027027027026, "grad_norm": 0.87890625, "learning_rate": 1.1316975689448784e-07, "loss": 0.1116, "step": 7188 }, { "epoch": 194.2972972972973, "grad_norm": 1.1796875, "learning_rate": 1.121104175591775e-07, "loss": 0.1412, "step": 7189 }, { "epoch": 194.32432432432432, "grad_norm": 0.96875, "learning_rate": 1.1105604846157192e-07, "loss": 0.0991, "step": 7190 }, { "epoch": 194.35135135135135, "grad_norm": 0.55859375, "learning_rate": 1.1000664981223041e-07, "loss": 0.066, "step": 7191 }, { "epoch": 194.3783783783784, "grad_norm": 1.203125, "learning_rate": 1.0896222182072424e-07, "loss": 0.1656, "step": 7192 }, { "epoch": 194.40540540540542, "grad_norm": 1.2265625, "learning_rate": 1.07922764695631e-07, "loss": 0.1507, "step": 7193 }, { "epoch": 194.43243243243242, "grad_norm": 1.28125, "learning_rate": 1.0688827864453743e-07, "loss": 0.2134, "step": 7194 }, { "epoch": 194.45945945945945, "grad_norm": 0.94140625, "learning_rate": 1.0585876387403381e-07, "loss": 0.1309, "step": 7195 }, { "epoch": 194.48648648648648, "grad_norm": 0.9453125, "learning_rate": 1.0483422058971682e-07, "loss": 0.1189, "step": 7196 }, { "epoch": 194.51351351351352, "grad_norm": 0.6953125, "learning_rate": 1.0381464899619775e-07, "loss": 0.0819, "step": 7197 }, { "epoch": 194.54054054054055, "grad_norm": 0.8125, "learning_rate": 1.0280004929708875e-07, "loss": 0.1214, "step": 7198 }, { "epoch": 194.56756756756758, "grad_norm": 0.447265625, "learning_rate": 1.017904216950083e-07, "loss": 0.0586, "step": 7199 }, { "epoch": 194.59459459459458, "grad_norm": 1.140625, "learning_rate": 1.0078576639158677e-07, "loss": 0.1577, "step": 7200 }, { "epoch": 194.6216216216216, "grad_norm": 0.423828125, "learning_rate": 9.978608358745533e-08, "loss": 0.062, "step": 7201 }, { "epoch": 194.64864864864865, "grad_norm": 1.0703125, "learning_rate": 9.879137348225987e-08, "loss": 0.132, "step": 7202 }, { "epoch": 194.67567567567568, "grad_norm": 0.56640625, "learning_rate": 9.780163627464978e-08, "loss": 0.0672, "step": 7203 }, { "epoch": 194.7027027027027, "grad_norm": 0.9921875, "learning_rate": 9.681687216227531e-08, "loss": 0.1348, "step": 7204 }, { "epoch": 194.72972972972974, "grad_norm": 0.8828125, "learning_rate": 9.583708134180413e-08, "loss": 0.1013, "step": 7205 }, { "epoch": 194.75675675675674, "grad_norm": 0.9921875, "learning_rate": 9.486226400890197e-08, "loss": 0.1308, "step": 7206 }, { "epoch": 194.78378378378378, "grad_norm": 0.8515625, "learning_rate": 9.389242035824641e-08, "loss": 0.1033, "step": 7207 }, { "epoch": 194.8108108108108, "grad_norm": 0.5546875, "learning_rate": 9.292755058351865e-08, "loss": 0.0725, "step": 7208 }, { "epoch": 194.83783783783784, "grad_norm": 1.0546875, "learning_rate": 9.196765487740899e-08, "loss": 0.1154, "step": 7209 }, { "epoch": 194.86486486486487, "grad_norm": 0.73046875, "learning_rate": 9.101273343161409e-08, "loss": 0.089, "step": 7210 }, { "epoch": 194.8918918918919, "grad_norm": 0.91796875, "learning_rate": 9.006278643683696e-08, "loss": 0.1001, "step": 7211 }, { "epoch": 194.9189189189189, "grad_norm": 0.69140625, "learning_rate": 8.911781408278697e-08, "loss": 0.0843, "step": 7212 }, { "epoch": 194.94594594594594, "grad_norm": 0.86328125, "learning_rate": 8.817781655817704e-08, "loss": 0.0978, "step": 7213 }, { "epoch": 194.97297297297297, "grad_norm": 1.4609375, "learning_rate": 8.7242794050732e-08, "loss": 0.1648, "step": 7214 }, { "epoch": 195.0, "grad_norm": 1.328125, "learning_rate": 8.631274674717748e-08, "loss": 0.1362, "step": 7215 }, { "epoch": 195.02702702702703, "grad_norm": 1.0859375, "learning_rate": 8.538767483325383e-08, "loss": 0.1268, "step": 7216 }, { "epoch": 195.05405405405406, "grad_norm": 0.734375, "learning_rate": 8.446757849369935e-08, "loss": 0.0918, "step": 7217 }, { "epoch": 195.0810810810811, "grad_norm": 0.87109375, "learning_rate": 8.355245791226152e-08, "loss": 0.0976, "step": 7218 }, { "epoch": 195.1081081081081, "grad_norm": 0.8515625, "learning_rate": 8.264231327169691e-08, "loss": 0.098, "step": 7219 }, { "epoch": 195.13513513513513, "grad_norm": 0.6640625, "learning_rate": 8.173714475376293e-08, "loss": 0.0764, "step": 7220 }, { "epoch": 195.16216216216216, "grad_norm": 1.0859375, "learning_rate": 8.083695253922885e-08, "loss": 0.1373, "step": 7221 }, { "epoch": 195.1891891891892, "grad_norm": 1.1640625, "learning_rate": 7.994173680786476e-08, "loss": 0.1439, "step": 7222 }, { "epoch": 195.21621621621622, "grad_norm": 0.828125, "learning_rate": 7.905149773845543e-08, "loss": 0.0821, "step": 7223 }, { "epoch": 195.24324324324326, "grad_norm": 0.9609375, "learning_rate": 7.81662355087781e-08, "loss": 0.0973, "step": 7224 }, { "epoch": 195.27027027027026, "grad_norm": 1.1953125, "learning_rate": 7.728595029563301e-08, "loss": 0.1467, "step": 7225 }, { "epoch": 195.2972972972973, "grad_norm": 1.296875, "learning_rate": 7.641064227481009e-08, "loss": 0.1506, "step": 7226 }, { "epoch": 195.32432432432432, "grad_norm": 1.1875, "learning_rate": 7.554031162111674e-08, "loss": 0.1395, "step": 7227 }, { "epoch": 195.35135135135135, "grad_norm": 0.78515625, "learning_rate": 7.467495850836115e-08, "loss": 0.0871, "step": 7228 }, { "epoch": 195.3783783783784, "grad_norm": 0.66015625, "learning_rate": 7.381458310935786e-08, "loss": 0.0777, "step": 7229 }, { "epoch": 195.40540540540542, "grad_norm": 1.15625, "learning_rate": 7.295918559593051e-08, "loss": 0.1159, "step": 7230 }, { "epoch": 195.43243243243242, "grad_norm": 0.5234375, "learning_rate": 7.210876613890361e-08, "loss": 0.065, "step": 7231 }, { "epoch": 195.45945945945945, "grad_norm": 1.109375, "learning_rate": 7.126332490811349e-08, "loss": 0.1172, "step": 7232 }, { "epoch": 195.48648648648648, "grad_norm": 1.0703125, "learning_rate": 7.042286207239735e-08, "loss": 0.1274, "step": 7233 }, { "epoch": 195.51351351351352, "grad_norm": 1.3828125, "learning_rate": 6.958737779959867e-08, "loss": 0.1878, "step": 7234 }, { "epoch": 195.54054054054055, "grad_norm": 0.54296875, "learning_rate": 6.875687225656735e-08, "loss": 0.0685, "step": 7235 }, { "epoch": 195.56756756756758, "grad_norm": 0.92578125, "learning_rate": 6.793134560916514e-08, "loss": 0.0929, "step": 7236 }, { "epoch": 195.59459459459458, "grad_norm": 0.765625, "learning_rate": 6.71107980222463e-08, "loss": 0.0895, "step": 7237 }, { "epoch": 195.6216216216216, "grad_norm": 1.0390625, "learning_rate": 6.629522965968527e-08, "loss": 0.131, "step": 7238 }, { "epoch": 195.64864864864865, "grad_norm": 0.859375, "learning_rate": 6.5484640684349e-08, "loss": 0.0805, "step": 7239 }, { "epoch": 195.67567567567568, "grad_norm": 1.0859375, "learning_rate": 6.467903125812191e-08, "loss": 0.1253, "step": 7240 }, { "epoch": 195.7027027027027, "grad_norm": 0.94140625, "learning_rate": 6.387840154188362e-08, "loss": 0.0968, "step": 7241 }, { "epoch": 195.72972972972974, "grad_norm": 1.1484375, "learning_rate": 6.308275169552569e-08, "loss": 0.1461, "step": 7242 }, { "epoch": 195.75675675675674, "grad_norm": 0.80859375, "learning_rate": 6.2292081877946e-08, "loss": 0.0978, "step": 7243 }, { "epoch": 195.78378378378378, "grad_norm": 0.7109375, "learning_rate": 6.150639224704047e-08, "loss": 0.0955, "step": 7244 }, { "epoch": 195.8108108108108, "grad_norm": 1.2421875, "learning_rate": 6.07256829597197e-08, "loss": 0.1485, "step": 7245 }, { "epoch": 195.83783783783784, "grad_norm": 1.2890625, "learning_rate": 5.994995417189231e-08, "loss": 0.2435, "step": 7246 }, { "epoch": 195.86486486486487, "grad_norm": 0.86328125, "learning_rate": 5.9179206038476044e-08, "loss": 0.0992, "step": 7247 }, { "epoch": 195.8918918918919, "grad_norm": 0.73046875, "learning_rate": 5.841343871339222e-08, "loss": 0.0866, "step": 7248 }, { "epoch": 195.9189189189189, "grad_norm": 0.7421875, "learning_rate": 5.765265234957129e-08, "loss": 0.0721, "step": 7249 }, { "epoch": 195.94594594594594, "grad_norm": 1.0625, "learning_rate": 5.68968470989445e-08, "loss": 0.1472, "step": 7250 }, { "epoch": 195.97297297297297, "grad_norm": 1.203125, "learning_rate": 5.614602311244943e-08, "loss": 0.157, "step": 7251 }, { "epoch": 196.0, "grad_norm": 0.98046875, "learning_rate": 5.540018054003004e-08, "loss": 0.119, "step": 7252 }, { "epoch": 196.02702702702703, "grad_norm": 1.1171875, "learning_rate": 5.4659319530636633e-08, "loss": 0.1369, "step": 7253 }, { "epoch": 196.05405405405406, "grad_norm": 0.890625, "learning_rate": 5.392344023222029e-08, "loss": 0.094, "step": 7254 }, { "epoch": 196.0810810810811, "grad_norm": 1.09375, "learning_rate": 5.319254279173846e-08, "loss": 0.1386, "step": 7255 }, { "epoch": 196.1081081081081, "grad_norm": 0.69921875, "learning_rate": 5.2466627355160504e-08, "loss": 0.0839, "step": 7256 }, { "epoch": 196.13513513513513, "grad_norm": 0.86328125, "learning_rate": 5.1745694067451e-08, "loss": 0.1055, "step": 7257 }, { "epoch": 196.16216216216216, "grad_norm": 0.88671875, "learning_rate": 5.102974307258368e-08, "loss": 0.0975, "step": 7258 }, { "epoch": 196.1891891891892, "grad_norm": 0.83203125, "learning_rate": 5.031877451354139e-08, "loss": 0.1105, "step": 7259 }, { "epoch": 196.21621621621622, "grad_norm": 0.69921875, "learning_rate": 4.9612788532305e-08, "loss": 0.0778, "step": 7260 }, { "epoch": 196.24324324324326, "grad_norm": 1.09375, "learning_rate": 4.891178526986451e-08, "loss": 0.1109, "step": 7261 }, { "epoch": 196.27027027027026, "grad_norm": 1.4765625, "learning_rate": 4.821576486621349e-08, "loss": 0.273, "step": 7262 }, { "epoch": 196.2972972972973, "grad_norm": 1.15625, "learning_rate": 4.7524727460349085e-08, "loss": 0.1426, "step": 7263 }, { "epoch": 196.32432432432432, "grad_norm": 0.87109375, "learning_rate": 4.6838673190280345e-08, "loss": 0.1147, "step": 7264 }, { "epoch": 196.35135135135135, "grad_norm": 1.0546875, "learning_rate": 4.615760219301157e-08, "loss": 0.101, "step": 7265 }, { "epoch": 196.3783783783784, "grad_norm": 0.55078125, "learning_rate": 4.548151460455619e-08, "loss": 0.0717, "step": 7266 }, { "epoch": 196.40540540540542, "grad_norm": 0.859375, "learning_rate": 4.481041055993118e-08, "loss": 0.1014, "step": 7267 }, { "epoch": 196.43243243243242, "grad_norm": 0.99609375, "learning_rate": 4.414429019316268e-08, "loss": 0.0984, "step": 7268 }, { "epoch": 196.45945945945945, "grad_norm": 1.015625, "learning_rate": 4.348315363727762e-08, "loss": 0.0998, "step": 7269 }, { "epoch": 196.48648648648648, "grad_norm": 0.89453125, "learning_rate": 4.2827001024303705e-08, "loss": 0.118, "step": 7270 }, { "epoch": 196.51351351351352, "grad_norm": 1.0078125, "learning_rate": 4.217583248528611e-08, "loss": 0.1167, "step": 7271 }, { "epoch": 196.54054054054055, "grad_norm": 0.71875, "learning_rate": 4.152964815025972e-08, "loss": 0.08, "step": 7272 }, { "epoch": 196.56756756756758, "grad_norm": 0.62109375, "learning_rate": 4.088844814827408e-08, "loss": 0.0832, "step": 7273 }, { "epoch": 196.59459459459458, "grad_norm": 1.0546875, "learning_rate": 4.0252232607379536e-08, "loss": 0.1001, "step": 7274 }, { "epoch": 196.6216216216216, "grad_norm": 0.93359375, "learning_rate": 3.962100165463001e-08, "loss": 0.1094, "step": 7275 }, { "epoch": 196.64864864864865, "grad_norm": 0.81640625, "learning_rate": 3.899475541608855e-08, "loss": 0.093, "step": 7276 }, { "epoch": 196.67567567567568, "grad_norm": 1.2265625, "learning_rate": 3.8373494016816225e-08, "loss": 0.1661, "step": 7277 }, { "epoch": 196.7027027027027, "grad_norm": 0.6640625, "learning_rate": 3.775721758088324e-08, "loss": 0.0839, "step": 7278 }, { "epoch": 196.72972972972974, "grad_norm": 0.8828125, "learning_rate": 3.7145926231366146e-08, "loss": 0.1089, "step": 7279 }, { "epoch": 196.75675675675674, "grad_norm": 1.03125, "learning_rate": 3.653962009033673e-08, "loss": 0.1358, "step": 7280 }, { "epoch": 196.78378378378378, "grad_norm": 0.4609375, "learning_rate": 3.5938299278884254e-08, "loss": 0.0621, "step": 7281 }, { "epoch": 196.8108108108108, "grad_norm": 0.5390625, "learning_rate": 3.534196391709044e-08, "loss": 0.0687, "step": 7282 }, { "epoch": 196.83783783783784, "grad_norm": 0.91015625, "learning_rate": 3.4750614124048896e-08, "loss": 0.1088, "step": 7283 }, { "epoch": 196.86486486486487, "grad_norm": 1.1171875, "learning_rate": 3.416425001785684e-08, "loss": 0.1131, "step": 7284 }, { "epoch": 196.8918918918919, "grad_norm": 0.51171875, "learning_rate": 3.358287171560948e-08, "loss": 0.0649, "step": 7285 }, { "epoch": 196.9189189189189, "grad_norm": 0.89453125, "learning_rate": 3.300647933341394e-08, "loss": 0.1104, "step": 7286 }, { "epoch": 196.94594594594594, "grad_norm": 0.875, "learning_rate": 3.2435072986378154e-08, "loss": 0.1268, "step": 7287 }, { "epoch": 196.97297297297297, "grad_norm": 1.21875, "learning_rate": 3.1868652788616394e-08, "loss": 0.1548, "step": 7288 }, { "epoch": 197.0, "grad_norm": 0.9921875, "learning_rate": 3.1307218853243724e-08, "loss": 0.1084, "step": 7289 }, { "epoch": 197.02702702702703, "grad_norm": 1.21875, "learning_rate": 3.075077129238158e-08, "loss": 0.1396, "step": 7290 }, { "epoch": 197.05405405405406, "grad_norm": 0.8671875, "learning_rate": 3.019931021715494e-08, "loss": 0.1181, "step": 7291 }, { "epoch": 197.0810810810811, "grad_norm": 0.953125, "learning_rate": 2.9652835737692397e-08, "loss": 0.1249, "step": 7292 }, { "epoch": 197.1081081081081, "grad_norm": 0.7890625, "learning_rate": 2.911134796313164e-08, "loss": 0.108, "step": 7293 }, { "epoch": 197.13513513513513, "grad_norm": 0.94140625, "learning_rate": 2.8574847001608397e-08, "loss": 0.1129, "step": 7294 }, { "epoch": 197.16216216216216, "grad_norm": 1.4453125, "learning_rate": 2.8043332960264734e-08, "loss": 0.184, "step": 7295 }, { "epoch": 197.1891891891892, "grad_norm": 0.8828125, "learning_rate": 2.7516805945246303e-08, "loss": 0.1053, "step": 7296 }, { "epoch": 197.21621621621622, "grad_norm": 0.58203125, "learning_rate": 2.699526606170233e-08, "loss": 0.0735, "step": 7297 }, { "epoch": 197.24324324324326, "grad_norm": 1.0078125, "learning_rate": 2.647871341378838e-08, "loss": 0.1579, "step": 7298 }, { "epoch": 197.27027027027026, "grad_norm": 0.75, "learning_rate": 2.5967148104663607e-08, "loss": 0.0986, "step": 7299 }, { "epoch": 197.2972972972973, "grad_norm": 0.69921875, "learning_rate": 2.5460570236487957e-08, "loss": 0.0803, "step": 7300 }, { "epoch": 197.32432432432432, "grad_norm": 0.53125, "learning_rate": 2.4958979910427727e-08, "loss": 0.0696, "step": 7301 }, { "epoch": 197.35135135135135, "grad_norm": 0.84765625, "learning_rate": 2.4462377226652787e-08, "loss": 0.0803, "step": 7302 }, { "epoch": 197.3783783783784, "grad_norm": 0.7109375, "learning_rate": 2.397076228434214e-08, "loss": 0.0916, "step": 7303 }, { "epoch": 197.40540540540542, "grad_norm": 1.09375, "learning_rate": 2.3484135181667256e-08, "loss": 0.1352, "step": 7304 }, { "epoch": 197.43243243243242, "grad_norm": 0.87890625, "learning_rate": 2.3002496015811504e-08, "loss": 0.0857, "step": 7305 }, { "epoch": 197.45945945945945, "grad_norm": 0.96484375, "learning_rate": 2.2525844882964607e-08, "loss": 0.1025, "step": 7306 }, { "epoch": 197.48648648648648, "grad_norm": 0.7578125, "learning_rate": 2.2054181878311543e-08, "loss": 0.0899, "step": 7307 }, { "epoch": 197.51351351351352, "grad_norm": 1.0546875, "learning_rate": 2.15875070960464e-08, "loss": 0.1165, "step": 7308 }, { "epoch": 197.54054054054055, "grad_norm": 0.9765625, "learning_rate": 2.112582062936963e-08, "loss": 0.1138, "step": 7309 }, { "epoch": 197.56756756756758, "grad_norm": 0.90625, "learning_rate": 2.066912257048248e-08, "loss": 0.1051, "step": 7310 }, { "epoch": 197.59459459459458, "grad_norm": 0.67578125, "learning_rate": 2.0217413010584217e-08, "loss": 0.0814, "step": 7311 }, { "epoch": 197.6216216216216, "grad_norm": 0.5625, "learning_rate": 1.9770692039888793e-08, "loss": 0.0623, "step": 7312 }, { "epoch": 197.64864864864865, "grad_norm": 0.734375, "learning_rate": 1.9328959747608178e-08, "loss": 0.0882, "step": 7313 }, { "epoch": 197.67567567567568, "grad_norm": 0.9453125, "learning_rate": 1.8892216221955138e-08, "loss": 0.0917, "step": 7314 }, { "epoch": 197.7027027027027, "grad_norm": 1.1953125, "learning_rate": 1.8460461550151574e-08, "loss": 0.1721, "step": 7315 }, { "epoch": 197.72972972972974, "grad_norm": 1.1484375, "learning_rate": 1.8033695818422957e-08, "loss": 0.1099, "step": 7316 }, { "epoch": 197.75675675675674, "grad_norm": 1.1796875, "learning_rate": 1.7611919111995556e-08, "loss": 0.1342, "step": 7317 }, { "epoch": 197.78378378378378, "grad_norm": 0.55078125, "learning_rate": 1.7195131515099216e-08, "loss": 0.0785, "step": 7318 }, { "epoch": 197.8108108108108, "grad_norm": 0.50390625, "learning_rate": 1.678333311096736e-08, "loss": 0.0639, "step": 7319 }, { "epoch": 197.83783783783784, "grad_norm": 1.0546875, "learning_rate": 1.6376523981842527e-08, "loss": 0.1118, "step": 7320 }, { "epoch": 197.86486486486487, "grad_norm": 1.078125, "learning_rate": 1.5974704208962523e-08, "loss": 0.1084, "step": 7321 }, { "epoch": 197.8918918918919, "grad_norm": 0.625, "learning_rate": 1.5577873872574254e-08, "loss": 0.0732, "step": 7322 }, { "epoch": 197.9189189189189, "grad_norm": 0.875, "learning_rate": 1.518603305192545e-08, "loss": 0.0994, "step": 7323 }, { "epoch": 197.94594594594594, "grad_norm": 1.375, "learning_rate": 1.4799181825272956e-08, "loss": 0.1764, "step": 7324 }, { "epoch": 197.97297297297297, "grad_norm": 1.0, "learning_rate": 1.4417320269868861e-08, "loss": 0.1441, "step": 7325 }, { "epoch": 198.0, "grad_norm": 0.765625, "learning_rate": 1.4040448461971611e-08, "loss": 0.0916, "step": 7326 }, { "epoch": 198.02702702702703, "grad_norm": 1.21875, "learning_rate": 1.3668566476848777e-08, "loss": 0.1388, "step": 7327 }, { "epoch": 198.05405405405406, "grad_norm": 0.6953125, "learning_rate": 1.330167438876595e-08, "loss": 0.0754, "step": 7328 }, { "epoch": 198.0810810810811, "grad_norm": 1.015625, "learning_rate": 1.29397722709923e-08, "loss": 0.1196, "step": 7329 }, { "epoch": 198.1081081081081, "grad_norm": 1.0, "learning_rate": 1.2582860195800572e-08, "loss": 0.1056, "step": 7330 }, { "epoch": 198.13513513513513, "grad_norm": 1.265625, "learning_rate": 1.2230938234467081e-08, "loss": 0.1966, "step": 7331 }, { "epoch": 198.16216216216216, "grad_norm": 1.296875, "learning_rate": 1.1884006457277274e-08, "loss": 0.1751, "step": 7332 }, { "epoch": 198.1891891891892, "grad_norm": 0.96484375, "learning_rate": 1.1542064933511842e-08, "loss": 0.143, "step": 7333 }, { "epoch": 198.21621621621622, "grad_norm": 0.67578125, "learning_rate": 1.1205113731457828e-08, "loss": 0.0773, "step": 7334 }, { "epoch": 198.24324324324326, "grad_norm": 0.83203125, "learning_rate": 1.0873152918405849e-08, "loss": 0.0936, "step": 7335 }, { "epoch": 198.27027027027026, "grad_norm": 0.6484375, "learning_rate": 1.0546182560652874e-08, "loss": 0.0771, "step": 7336 }, { "epoch": 198.2972972972973, "grad_norm": 1.1953125, "learning_rate": 1.0224202723493893e-08, "loss": 0.1295, "step": 7337 }, { "epoch": 198.32432432432432, "grad_norm": 0.94921875, "learning_rate": 9.907213471233023e-09, "loss": 0.111, "step": 7338 }, { "epoch": 198.35135135135135, "grad_norm": 0.57421875, "learning_rate": 9.595214867169633e-09, "loss": 0.0776, "step": 7339 }, { "epoch": 198.3783783783784, "grad_norm": 1.2265625, "learning_rate": 9.288206973617764e-09, "loss": 0.1615, "step": 7340 }, { "epoch": 198.40540540540542, "grad_norm": 0.80078125, "learning_rate": 8.986189851881155e-09, "loss": 0.0953, "step": 7341 }, { "epoch": 198.43243243243242, "grad_norm": 0.56640625, "learning_rate": 8.689163562281e-09, "loss": 0.0719, "step": 7342 }, { "epoch": 198.45945945945945, "grad_norm": 1.34375, "learning_rate": 8.397128164130962e-09, "loss": 0.1885, "step": 7343 }, { "epoch": 198.48648648648648, "grad_norm": 1.0546875, "learning_rate": 8.11008371575106e-09, "loss": 0.1078, "step": 7344 }, { "epoch": 198.51351351351352, "grad_norm": 1.140625, "learning_rate": 7.828030274470432e-09, "loss": 0.1417, "step": 7345 }, { "epoch": 198.54054054054055, "grad_norm": 1.09375, "learning_rate": 7.55096789661347e-09, "loss": 0.1422, "step": 7346 }, { "epoch": 198.56756756756758, "grad_norm": 1.109375, "learning_rate": 7.278896637508137e-09, "loss": 0.1106, "step": 7347 }, { "epoch": 198.59459459459458, "grad_norm": 0.4453125, "learning_rate": 7.011816551494299e-09, "loss": 0.0533, "step": 7348 }, { "epoch": 198.6216216216216, "grad_norm": 0.76171875, "learning_rate": 6.749727691904295e-09, "loss": 0.0831, "step": 7349 }, { "epoch": 198.64864864864865, "grad_norm": 1.21875, "learning_rate": 6.492630111082365e-09, "loss": 0.1744, "step": 7350 }, { "epoch": 198.67567567567568, "grad_norm": 0.9453125, "learning_rate": 6.2405238603679975e-09, "loss": 0.0771, "step": 7351 }, { "epoch": 198.7027027027027, "grad_norm": 1.140625, "learning_rate": 5.993408990112581e-09, "loss": 0.1081, "step": 7352 }, { "epoch": 198.72972972972974, "grad_norm": 0.98046875, "learning_rate": 5.751285549662755e-09, "loss": 0.1317, "step": 7353 }, { "epoch": 198.75675675675674, "grad_norm": 1.4765625, "learning_rate": 5.514153587374282e-09, "loss": 0.1863, "step": 7354 }, { "epoch": 198.78378378378378, "grad_norm": 0.79296875, "learning_rate": 5.282013150600951e-09, "loss": 0.1326, "step": 7355 }, { "epoch": 198.8108108108108, "grad_norm": 1.3671875, "learning_rate": 5.054864285705674e-09, "loss": 0.118, "step": 7356 }, { "epoch": 198.83783783783784, "grad_norm": 1.296875, "learning_rate": 4.832707038046613e-09, "loss": 0.191, "step": 7357 }, { "epoch": 198.86486486486487, "grad_norm": 1.0625, "learning_rate": 4.615541451996608e-09, "loss": 0.1216, "step": 7358 }, { "epoch": 198.8918918918919, "grad_norm": 1.1953125, "learning_rate": 4.403367570918193e-09, "loss": 0.1579, "step": 7359 }, { "epoch": 198.9189189189189, "grad_norm": 0.490234375, "learning_rate": 4.196185437185806e-09, "loss": 0.0608, "step": 7360 }, { "epoch": 198.94594594594594, "grad_norm": 0.84765625, "learning_rate": 3.993995092177461e-09, "loss": 0.0996, "step": 7361 }, { "epoch": 198.97297297297297, "grad_norm": 1.0625, "learning_rate": 3.7967965762664145e-09, "loss": 0.1301, "step": 7362 }, { "epoch": 199.0, "grad_norm": 0.57421875, "learning_rate": 3.6045899288378317e-09, "loss": 0.0684, "step": 7363 }, { "epoch": 199.02702702702703, "grad_norm": 1.078125, "learning_rate": 3.417375188274896e-09, "loss": 0.1191, "step": 7364 }, { "epoch": 199.05405405405406, "grad_norm": 0.82421875, "learning_rate": 3.2351523919671443e-09, "loss": 0.1224, "step": 7365 }, { "epoch": 199.0810810810811, "grad_norm": 1.3203125, "learning_rate": 3.057921576304912e-09, "loss": 0.1329, "step": 7366 }, { "epoch": 199.1081081081081, "grad_norm": 1.3984375, "learning_rate": 2.8856827766793325e-09, "loss": 0.2223, "step": 7367 }, { "epoch": 199.13513513513513, "grad_norm": 1.203125, "learning_rate": 2.7184360274906672e-09, "loss": 0.1501, "step": 7368 }, { "epoch": 199.16216216216216, "grad_norm": 1.203125, "learning_rate": 2.5561813621372e-09, "loss": 0.1332, "step": 7369 }, { "epoch": 199.1891891891892, "grad_norm": 0.90625, "learning_rate": 2.3989188130207897e-09, "loss": 0.0963, "step": 7370 }, { "epoch": 199.21621621621622, "grad_norm": 0.5859375, "learning_rate": 2.246648411552421e-09, "loss": 0.0721, "step": 7371 }, { "epoch": 199.24324324324326, "grad_norm": 0.8203125, "learning_rate": 2.099370188135552e-09, "loss": 0.0994, "step": 7372 }, { "epoch": 199.27027027027026, "grad_norm": 1.1953125, "learning_rate": 1.9570841721855417e-09, "loss": 0.1479, "step": 7373 }, { "epoch": 199.2972972972973, "grad_norm": 1.328125, "learning_rate": 1.8197903921185477e-09, "loss": 0.1943, "step": 7374 }, { "epoch": 199.32432432432432, "grad_norm": 0.61328125, "learning_rate": 1.6874888753487528e-09, "loss": 0.071, "step": 7375 }, { "epoch": 199.35135135135135, "grad_norm": 1.0703125, "learning_rate": 1.56017964830224e-09, "loss": 0.1123, "step": 7376 }, { "epoch": 199.3783783783784, "grad_norm": 1.1484375, "learning_rate": 1.4378627364003416e-09, "loss": 0.1509, "step": 7377 }, { "epoch": 199.40540540540542, "grad_norm": 1.0, "learning_rate": 1.3205381640707394e-09, "loss": 0.1387, "step": 7378 }, { "epoch": 199.43243243243242, "grad_norm": 0.99609375, "learning_rate": 1.20820595474469e-09, "loss": 0.1487, "step": 7379 }, { "epoch": 199.45945945945945, "grad_norm": 0.64453125, "learning_rate": 1.10086613085425e-09, "loss": 0.0784, "step": 7380 }, { "epoch": 199.48648648648648, "grad_norm": 1.0703125, "learning_rate": 9.98518713837826e-10, "loss": 0.1284, "step": 7381 }, { "epoch": 199.51351351351352, "grad_norm": 1.0078125, "learning_rate": 9.011637241318483e-10, "loss": 0.1208, "step": 7382 }, { "epoch": 199.54054054054055, "grad_norm": 0.84765625, "learning_rate": 8.088011811790974e-10, "loss": 0.0972, "step": 7383 }, { "epoch": 199.56756756756758, "grad_norm": 1.1171875, "learning_rate": 7.214311034259291e-10, "loss": 0.1221, "step": 7384 }, { "epoch": 199.59459459459458, "grad_norm": 0.9609375, "learning_rate": 6.390535083222737e-10, "loss": 0.1179, "step": 7385 }, { "epoch": 199.6216216216216, "grad_norm": 0.74609375, "learning_rate": 5.616684123160854e-10, "loss": 0.0869, "step": 7386 }, { "epoch": 199.64864864864865, "grad_norm": 1.078125, "learning_rate": 4.892758308644441e-10, "loss": 0.1357, "step": 7387 }, { "epoch": 199.67567567567568, "grad_norm": 0.6796875, "learning_rate": 4.2187577841967807e-10, "loss": 0.0774, "step": 7388 }, { "epoch": 199.7027027027027, "grad_norm": 1.328125, "learning_rate": 3.594682684487927e-10, "loss": 0.1513, "step": 7389 }, { "epoch": 199.72972972972974, "grad_norm": 1.15625, "learning_rate": 3.0205331341126577e-10, "loss": 0.119, "step": 7390 }, { "epoch": 199.75675675675674, "grad_norm": 0.98046875, "learning_rate": 2.4963092477015006e-10, "loss": 0.1064, "step": 7391 }, { "epoch": 199.78378378378378, "grad_norm": 0.671875, "learning_rate": 2.0220111300039979e-10, "loss": 0.0909, "step": 7392 }, { "epoch": 199.8108108108108, "grad_norm": 0.7109375, "learning_rate": 1.5976388756944184e-10, "loss": 0.0879, "step": 7393 }, { "epoch": 199.83783783783784, "grad_norm": 0.84375, "learning_rate": 1.223192569566045e-10, "loss": 0.081, "step": 7394 }, { "epoch": 199.86486486486487, "grad_norm": 0.75390625, "learning_rate": 8.98672286336888e-11, "loss": 0.0755, "step": 7395 }, { "epoch": 199.8918918918919, "grad_norm": 0.98828125, "learning_rate": 6.24078090871727e-11, "loss": 0.1088, "step": 7396 }, { "epoch": 199.9189189189189, "grad_norm": 0.8515625, "learning_rate": 3.994100379878241e-11, "loss": 0.1119, "step": 7397 }, { "epoch": 199.94594594594594, "grad_norm": 0.59765625, "learning_rate": 2.246681725381894e-11, "loss": 0.0834, "step": 7398 }, { "epoch": 199.97297297297297, "grad_norm": 0.72265625, "learning_rate": 9.98525294393371e-12, "loss": 0.0934, "step": 7399 }, { "epoch": 200.0, "grad_norm": 1.359375, "learning_rate": 2.496313361577407e-12, "loss": 0.1973, "step": 7400 }, { "epoch": 200.0, "step": 7400, "total_flos": 0.0, "train_loss": 0.21357366642335782, "train_runtime": 5213.3985, "train_samples_per_second": 91.764, "train_steps_per_second": 1.419 } ], "logging_steps": 1, "max_steps": 7400, "num_input_tokens_seen": 0, "num_train_epochs": 200, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }