{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4117647058823529, "eval_steps": 1000, "global_step": 17500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.3529411764705884e-05, "grad_norm": 1.90625, "learning_rate": 1.9607843137254903e-05, "loss": 3.9568, "step": 1 }, { "epoch": 4.705882352941177e-05, "grad_norm": 2.234375, "learning_rate": 3.9215686274509805e-05, "loss": 3.9789, "step": 2 }, { "epoch": 7.058823529411764e-05, "grad_norm": 2.28125, "learning_rate": 5.882352941176471e-05, "loss": 3.7759, "step": 3 }, { "epoch": 9.411764705882353e-05, "grad_norm": 3.21875, "learning_rate": 7.843137254901961e-05, "loss": 3.8232, "step": 4 }, { "epoch": 0.00011764705882352942, "grad_norm": 2.25, "learning_rate": 9.80392156862745e-05, "loss": 4.1094, "step": 5 }, { "epoch": 0.00014117647058823528, "grad_norm": 1.5546875, "learning_rate": 0.00011764705882352942, "loss": 3.7314, "step": 6 }, { "epoch": 0.0001647058823529412, "grad_norm": 2.625, "learning_rate": 0.0001372549019607843, "loss": 4.0436, "step": 7 }, { "epoch": 0.00018823529411764707, "grad_norm": 2.140625, "learning_rate": 0.00015686274509803922, "loss": 3.7547, "step": 8 }, { "epoch": 0.00021176470588235295, "grad_norm": 1.953125, "learning_rate": 0.00017647058823529413, "loss": 3.8313, "step": 9 }, { "epoch": 0.00023529411764705883, "grad_norm": 2.125, "learning_rate": 0.000196078431372549, "loss": 3.7643, "step": 10 }, { "epoch": 0.0002588235294117647, "grad_norm": 2.390625, "learning_rate": 0.00021568627450980392, "loss": 3.7204, "step": 11 }, { "epoch": 0.00028235294117647056, "grad_norm": 2.53125, "learning_rate": 0.00023529411764705883, "loss": 4.0806, "step": 12 }, { "epoch": 0.00030588235294117644, "grad_norm": 7.53125, "learning_rate": 0.00025490196078431374, "loss": 4.1665, "step": 13 }, { "epoch": 0.0003294117647058824, "grad_norm": 2.0625, "learning_rate": 0.0002745098039215686, "loss": 3.5025, "step": 14 }, { "epoch": 0.00035294117647058826, "grad_norm": 1.5390625, "learning_rate": 0.00029411764705882356, "loss": 3.551, "step": 15 }, { "epoch": 0.00037647058823529414, "grad_norm": 2.375, "learning_rate": 0.00031372549019607844, "loss": 3.9256, "step": 16 }, { "epoch": 0.0004, "grad_norm": 2.21875, "learning_rate": 0.0003333333333333333, "loss": 3.4917, "step": 17 }, { "epoch": 0.0004235294117647059, "grad_norm": 3.25, "learning_rate": 0.00035294117647058826, "loss": 3.7274, "step": 18 }, { "epoch": 0.0004470588235294118, "grad_norm": 3.015625, "learning_rate": 0.00037254901960784314, "loss": 3.9154, "step": 19 }, { "epoch": 0.00047058823529411766, "grad_norm": 7.5, "learning_rate": 0.000392156862745098, "loss": 3.8858, "step": 20 }, { "epoch": 0.0004941176470588235, "grad_norm": 9.375, "learning_rate": 0.00041176470588235296, "loss": 3.5193, "step": 21 }, { "epoch": 0.0005176470588235294, "grad_norm": 8.375, "learning_rate": 0.00043137254901960784, "loss": 3.8833, "step": 22 }, { "epoch": 0.0005411764705882352, "grad_norm": 6.1875, "learning_rate": 0.0004509803921568627, "loss": 3.4274, "step": 23 }, { "epoch": 0.0005647058823529411, "grad_norm": 8.875, "learning_rate": 0.00047058823529411766, "loss": 3.9125, "step": 24 }, { "epoch": 0.000588235294117647, "grad_norm": 5.09375, "learning_rate": 0.0004901960784313725, "loss": 3.642, "step": 25 }, { "epoch": 0.0006117647058823529, "grad_norm": 3.5625, "learning_rate": 0.0005098039215686275, "loss": 3.5938, "step": 26 }, { "epoch": 0.0006352941176470589, "grad_norm": 2.5, "learning_rate": 0.0005294117647058823, "loss": 3.5931, "step": 27 }, { "epoch": 0.0006588235294117648, "grad_norm": 2.71875, "learning_rate": 0.0005490196078431372, "loss": 3.4867, "step": 28 }, { "epoch": 0.0006823529411764706, "grad_norm": 1.3359375, "learning_rate": 0.0005686274509803922, "loss": 3.6586, "step": 29 }, { "epoch": 0.0007058823529411765, "grad_norm": 1.484375, "learning_rate": 0.0005882352941176471, "loss": 3.7071, "step": 30 }, { "epoch": 0.0007294117647058824, "grad_norm": 1.4609375, "learning_rate": 0.0006078431372549019, "loss": 3.6543, "step": 31 }, { "epoch": 0.0007529411764705883, "grad_norm": 1.578125, "learning_rate": 0.0006274509803921569, "loss": 3.7594, "step": 32 }, { "epoch": 0.0007764705882352942, "grad_norm": 1.4375, "learning_rate": 0.0006470588235294118, "loss": 3.5442, "step": 33 }, { "epoch": 0.0008, "grad_norm": 1.46875, "learning_rate": 0.0006666666666666666, "loss": 3.7202, "step": 34 }, { "epoch": 0.0008235294117647059, "grad_norm": 1.15625, "learning_rate": 0.0006862745098039217, "loss": 3.7258, "step": 35 }, { "epoch": 0.0008470588235294118, "grad_norm": 1.1796875, "learning_rate": 0.0007058823529411765, "loss": 3.699, "step": 36 }, { "epoch": 0.0008705882352941177, "grad_norm": 1.2421875, "learning_rate": 0.0007254901960784315, "loss": 3.8727, "step": 37 }, { "epoch": 0.0008941176470588236, "grad_norm": 1.25, "learning_rate": 0.0007450980392156863, "loss": 3.5435, "step": 38 }, { "epoch": 0.0009176470588235294, "grad_norm": 1.203125, "learning_rate": 0.0007647058823529412, "loss": 3.5568, "step": 39 }, { "epoch": 0.0009411764705882353, "grad_norm": 1.1015625, "learning_rate": 0.000784313725490196, "loss": 3.6674, "step": 40 }, { "epoch": 0.0009647058823529412, "grad_norm": 1.0703125, "learning_rate": 0.0008039215686274511, "loss": 3.464, "step": 41 }, { "epoch": 0.000988235294117647, "grad_norm": 0.93359375, "learning_rate": 0.0008235294117647059, "loss": 3.0771, "step": 42 }, { "epoch": 0.0010117647058823529, "grad_norm": 0.98828125, "learning_rate": 0.0008431372549019609, "loss": 3.4763, "step": 43 }, { "epoch": 0.0010352941176470587, "grad_norm": 1.1015625, "learning_rate": 0.0008627450980392157, "loss": 3.699, "step": 44 }, { "epoch": 0.0010588235294117646, "grad_norm": 1.09375, "learning_rate": 0.0008823529411764706, "loss": 3.6161, "step": 45 }, { "epoch": 0.0010823529411764705, "grad_norm": 1.015625, "learning_rate": 0.0009019607843137254, "loss": 3.8047, "step": 46 }, { "epoch": 0.0011058823529411764, "grad_norm": 1.046875, "learning_rate": 0.0009215686274509805, "loss": 3.3518, "step": 47 }, { "epoch": 0.0011294117647058823, "grad_norm": 0.984375, "learning_rate": 0.0009411764705882353, "loss": 3.5424, "step": 48 }, { "epoch": 0.0011529411764705881, "grad_norm": 0.9375, "learning_rate": 0.0009607843137254903, "loss": 3.2536, "step": 49 }, { "epoch": 0.001176470588235294, "grad_norm": 1.0859375, "learning_rate": 0.000980392156862745, "loss": 3.776, "step": 50 }, { "epoch": 0.0012, "grad_norm": 1.1015625, "learning_rate": 0.001, "loss": 3.7646, "step": 51 }, { "epoch": 0.0012235294117647058, "grad_norm": 0.9765625, "learning_rate": 0.001019607843137255, "loss": 3.6662, "step": 52 }, { "epoch": 0.0012470588235294119, "grad_norm": 0.9609375, "learning_rate": 0.00103921568627451, "loss": 3.3299, "step": 53 }, { "epoch": 0.0012705882352941178, "grad_norm": 0.9765625, "learning_rate": 0.0010588235294117646, "loss": 3.5746, "step": 54 }, { "epoch": 0.0012941176470588236, "grad_norm": 0.83203125, "learning_rate": 0.0010784313725490198, "loss": 3.0822, "step": 55 }, { "epoch": 0.0013176470588235295, "grad_norm": 1.2578125, "learning_rate": 0.0010980392156862745, "loss": 3.0851, "step": 56 }, { "epoch": 0.0013411764705882354, "grad_norm": 0.87109375, "learning_rate": 0.0011176470588235294, "loss": 3.4332, "step": 57 }, { "epoch": 0.0013647058823529413, "grad_norm": 0.86328125, "learning_rate": 0.0011372549019607844, "loss": 3.3422, "step": 58 }, { "epoch": 0.0013882352941176472, "grad_norm": 0.84765625, "learning_rate": 0.0011568627450980393, "loss": 3.1687, "step": 59 }, { "epoch": 0.001411764705882353, "grad_norm": 0.828125, "learning_rate": 0.0011764705882352942, "loss": 3.3994, "step": 60 }, { "epoch": 0.001435294117647059, "grad_norm": 0.80078125, "learning_rate": 0.0011960784313725492, "loss": 3.3964, "step": 61 }, { "epoch": 0.0014588235294117648, "grad_norm": 0.71875, "learning_rate": 0.0012156862745098039, "loss": 3.201, "step": 62 }, { "epoch": 0.0014823529411764707, "grad_norm": 0.83984375, "learning_rate": 0.0012352941176470588, "loss": 3.5362, "step": 63 }, { "epoch": 0.0015058823529411766, "grad_norm": 1.1484375, "learning_rate": 0.0012549019607843138, "loss": 3.1059, "step": 64 }, { "epoch": 0.0015294117647058824, "grad_norm": 0.640625, "learning_rate": 0.0012745098039215685, "loss": 3.3991, "step": 65 }, { "epoch": 0.0015529411764705883, "grad_norm": 0.890625, "learning_rate": 0.0012941176470588236, "loss": 3.4027, "step": 66 }, { "epoch": 0.0015764705882352942, "grad_norm": 0.703125, "learning_rate": 0.0013137254901960786, "loss": 3.1496, "step": 67 }, { "epoch": 0.0016, "grad_norm": 0.90625, "learning_rate": 0.0013333333333333333, "loss": 3.3433, "step": 68 }, { "epoch": 0.001623529411764706, "grad_norm": 0.6953125, "learning_rate": 0.0013529411764705882, "loss": 3.1302, "step": 69 }, { "epoch": 0.0016470588235294118, "grad_norm": 0.7578125, "learning_rate": 0.0013725490196078434, "loss": 3.2126, "step": 70 }, { "epoch": 0.0016705882352941177, "grad_norm": 0.69921875, "learning_rate": 0.001392156862745098, "loss": 3.2959, "step": 71 }, { "epoch": 0.0016941176470588236, "grad_norm": 0.6640625, "learning_rate": 0.001411764705882353, "loss": 2.9175, "step": 72 }, { "epoch": 0.0017176470588235295, "grad_norm": 0.6015625, "learning_rate": 0.0014313725490196078, "loss": 3.3352, "step": 73 }, { "epoch": 0.0017411764705882354, "grad_norm": 0.60546875, "learning_rate": 0.001450980392156863, "loss": 3.3455, "step": 74 }, { "epoch": 0.0017647058823529412, "grad_norm": 0.76171875, "learning_rate": 0.0014705882352941176, "loss": 3.3827, "step": 75 }, { "epoch": 0.0017882352941176471, "grad_norm": 0.609375, "learning_rate": 0.0014901960784313726, "loss": 3.2903, "step": 76 }, { "epoch": 0.001811764705882353, "grad_norm": 0.484375, "learning_rate": 0.0015098039215686275, "loss": 2.8939, "step": 77 }, { "epoch": 0.0018352941176470589, "grad_norm": 0.498046875, "learning_rate": 0.0015294117647058824, "loss": 3.1135, "step": 78 }, { "epoch": 0.0018588235294117648, "grad_norm": 0.5, "learning_rate": 0.0015490196078431374, "loss": 3.0559, "step": 79 }, { "epoch": 0.0018823529411764706, "grad_norm": 0.55859375, "learning_rate": 0.001568627450980392, "loss": 3.079, "step": 80 }, { "epoch": 0.0019058823529411765, "grad_norm": 0.578125, "learning_rate": 0.001588235294117647, "loss": 3.2016, "step": 81 }, { "epoch": 0.0019294117647058824, "grad_norm": 0.63671875, "learning_rate": 0.0016078431372549022, "loss": 3.1778, "step": 82 }, { "epoch": 0.0019529411764705883, "grad_norm": 0.55859375, "learning_rate": 0.001627450980392157, "loss": 2.9344, "step": 83 }, { "epoch": 0.001976470588235294, "grad_norm": 0.57421875, "learning_rate": 0.0016470588235294118, "loss": 3.1187, "step": 84 }, { "epoch": 0.002, "grad_norm": 0.45703125, "learning_rate": 0.0016666666666666666, "loss": 2.7453, "step": 85 }, { "epoch": 0.0020235294117647057, "grad_norm": 3.796875, "learning_rate": 0.0016862745098039217, "loss": 3.1954, "step": 86 }, { "epoch": 0.002047058823529412, "grad_norm": 0.44921875, "learning_rate": 0.0017058823529411766, "loss": 2.8589, "step": 87 }, { "epoch": 0.0020705882352941175, "grad_norm": 0.515625, "learning_rate": 0.0017254901960784314, "loss": 3.0293, "step": 88 }, { "epoch": 0.0020941176470588236, "grad_norm": 0.443359375, "learning_rate": 0.0017450980392156863, "loss": 2.9419, "step": 89 }, { "epoch": 0.0021176470588235292, "grad_norm": 0.57421875, "learning_rate": 0.0017647058823529412, "loss": 3.217, "step": 90 }, { "epoch": 0.0021411764705882353, "grad_norm": 0.5390625, "learning_rate": 0.0017843137254901962, "loss": 2.8917, "step": 91 }, { "epoch": 0.002164705882352941, "grad_norm": 0.4921875, "learning_rate": 0.001803921568627451, "loss": 2.9018, "step": 92 }, { "epoch": 0.002188235294117647, "grad_norm": 0.5078125, "learning_rate": 0.0018235294117647058, "loss": 2.9433, "step": 93 }, { "epoch": 0.0022117647058823527, "grad_norm": 0.53125, "learning_rate": 0.001843137254901961, "loss": 3.0387, "step": 94 }, { "epoch": 0.002235294117647059, "grad_norm": 0.482421875, "learning_rate": 0.0018627450980392157, "loss": 3.0548, "step": 95 }, { "epoch": 0.0022588235294117645, "grad_norm": 0.51171875, "learning_rate": 0.0018823529411764706, "loss": 3.1088, "step": 96 }, { "epoch": 0.0022823529411764706, "grad_norm": 0.6328125, "learning_rate": 0.0019019607843137254, "loss": 2.9728, "step": 97 }, { "epoch": 0.0023058823529411763, "grad_norm": 0.55859375, "learning_rate": 0.0019215686274509805, "loss": 2.9939, "step": 98 }, { "epoch": 0.0023294117647058824, "grad_norm": 0.4375, "learning_rate": 0.0019411764705882354, "loss": 2.9188, "step": 99 }, { "epoch": 0.002352941176470588, "grad_norm": 0.388671875, "learning_rate": 0.00196078431372549, "loss": 2.7921, "step": 100 }, { "epoch": 0.002376470588235294, "grad_norm": 0.400390625, "learning_rate": 0.001980392156862745, "loss": 2.7044, "step": 101 }, { "epoch": 0.0024, "grad_norm": 0.4140625, "learning_rate": 0.002, "loss": 2.804, "step": 102 }, { "epoch": 0.002423529411764706, "grad_norm": 0.39453125, "learning_rate": 0.002019607843137255, "loss": 2.9178, "step": 103 }, { "epoch": 0.0024470588235294116, "grad_norm": 0.453125, "learning_rate": 0.00203921568627451, "loss": 2.9348, "step": 104 }, { "epoch": 0.0024705882352941176, "grad_norm": 0.390625, "learning_rate": 0.002058823529411765, "loss": 2.7403, "step": 105 }, { "epoch": 0.0024941176470588237, "grad_norm": 0.470703125, "learning_rate": 0.00207843137254902, "loss": 2.7532, "step": 106 }, { "epoch": 0.0025176470588235294, "grad_norm": 0.37890625, "learning_rate": 0.0020980392156862747, "loss": 2.6425, "step": 107 }, { "epoch": 0.0025411764705882355, "grad_norm": 0.5859375, "learning_rate": 0.0021176470588235292, "loss": 2.7924, "step": 108 }, { "epoch": 0.002564705882352941, "grad_norm": 0.37890625, "learning_rate": 0.002137254901960784, "loss": 2.8035, "step": 109 }, { "epoch": 0.0025882352941176473, "grad_norm": 0.462890625, "learning_rate": 0.0021568627450980395, "loss": 3.0268, "step": 110 }, { "epoch": 0.002611764705882353, "grad_norm": 0.447265625, "learning_rate": 0.002176470588235294, "loss": 2.5819, "step": 111 }, { "epoch": 0.002635294117647059, "grad_norm": 0.357421875, "learning_rate": 0.002196078431372549, "loss": 2.4956, "step": 112 }, { "epoch": 0.0026588235294117647, "grad_norm": 0.416015625, "learning_rate": 0.002215686274509804, "loss": 2.9831, "step": 113 }, { "epoch": 0.002682352941176471, "grad_norm": 0.41015625, "learning_rate": 0.002235294117647059, "loss": 2.6408, "step": 114 }, { "epoch": 0.0027058823529411765, "grad_norm": 0.49609375, "learning_rate": 0.0022549019607843138, "loss": 2.5874, "step": 115 }, { "epoch": 0.0027294117647058825, "grad_norm": 0.400390625, "learning_rate": 0.0022745098039215687, "loss": 2.7487, "step": 116 }, { "epoch": 0.002752941176470588, "grad_norm": 0.427734375, "learning_rate": 0.0022941176470588237, "loss": 2.6498, "step": 117 }, { "epoch": 0.0027764705882352943, "grad_norm": 0.408203125, "learning_rate": 0.0023137254901960786, "loss": 2.9084, "step": 118 }, { "epoch": 0.0028, "grad_norm": 0.56640625, "learning_rate": 0.0023333333333333335, "loss": 2.8356, "step": 119 }, { "epoch": 0.002823529411764706, "grad_norm": 0.408203125, "learning_rate": 0.0023529411764705885, "loss": 2.6952, "step": 120 }, { "epoch": 0.0028470588235294117, "grad_norm": 0.33984375, "learning_rate": 0.002372549019607843, "loss": 2.6401, "step": 121 }, { "epoch": 0.002870588235294118, "grad_norm": 0.388671875, "learning_rate": 0.0023921568627450983, "loss": 2.6351, "step": 122 }, { "epoch": 0.0028941176470588235, "grad_norm": 0.375, "learning_rate": 0.002411764705882353, "loss": 2.7059, "step": 123 }, { "epoch": 0.0029176470588235296, "grad_norm": 0.42578125, "learning_rate": 0.0024313725490196078, "loss": 2.7688, "step": 124 }, { "epoch": 0.0029411764705882353, "grad_norm": 0.330078125, "learning_rate": 0.0024509803921568627, "loss": 2.5525, "step": 125 }, { "epoch": 0.0029647058823529414, "grad_norm": 0.3203125, "learning_rate": 0.0024705882352941176, "loss": 2.5386, "step": 126 }, { "epoch": 0.002988235294117647, "grad_norm": 0.376953125, "learning_rate": 0.0024901960784313726, "loss": 2.768, "step": 127 }, { "epoch": 0.003011764705882353, "grad_norm": 0.34375, "learning_rate": 0.0025098039215686275, "loss": 2.6744, "step": 128 }, { "epoch": 0.0030352941176470588, "grad_norm": 0.41796875, "learning_rate": 0.0025294117647058825, "loss": 2.6531, "step": 129 }, { "epoch": 0.003058823529411765, "grad_norm": 0.31640625, "learning_rate": 0.002549019607843137, "loss": 2.3649, "step": 130 }, { "epoch": 0.0030823529411764705, "grad_norm": 0.349609375, "learning_rate": 0.002568627450980392, "loss": 2.6988, "step": 131 }, { "epoch": 0.0031058823529411766, "grad_norm": 0.294921875, "learning_rate": 0.0025882352941176473, "loss": 2.3315, "step": 132 }, { "epoch": 0.0031294117647058823, "grad_norm": 0.318359375, "learning_rate": 0.002607843137254902, "loss": 2.4722, "step": 133 }, { "epoch": 0.0031529411764705884, "grad_norm": 0.349609375, "learning_rate": 0.002627450980392157, "loss": 2.5248, "step": 134 }, { "epoch": 0.003176470588235294, "grad_norm": 0.33203125, "learning_rate": 0.0026470588235294116, "loss": 2.551, "step": 135 }, { "epoch": 0.0032, "grad_norm": 0.3359375, "learning_rate": 0.0026666666666666666, "loss": 2.5369, "step": 136 }, { "epoch": 0.003223529411764706, "grad_norm": 0.33203125, "learning_rate": 0.0026862745098039215, "loss": 2.8454, "step": 137 }, { "epoch": 0.003247058823529412, "grad_norm": 0.302734375, "learning_rate": 0.0027058823529411765, "loss": 2.4885, "step": 138 }, { "epoch": 0.0032705882352941176, "grad_norm": 0.404296875, "learning_rate": 0.0027254901960784314, "loss": 2.7896, "step": 139 }, { "epoch": 0.0032941176470588237, "grad_norm": 0.326171875, "learning_rate": 0.0027450980392156868, "loss": 2.5455, "step": 140 }, { "epoch": 0.0033176470588235293, "grad_norm": 0.365234375, "learning_rate": 0.0027647058823529413, "loss": 2.5221, "step": 141 }, { "epoch": 0.0033411764705882354, "grad_norm": 0.296875, "learning_rate": 0.002784313725490196, "loss": 2.6032, "step": 142 }, { "epoch": 0.003364705882352941, "grad_norm": 0.306640625, "learning_rate": 0.002803921568627451, "loss": 2.5689, "step": 143 }, { "epoch": 0.003388235294117647, "grad_norm": 0.3125, "learning_rate": 0.002823529411764706, "loss": 2.6036, "step": 144 }, { "epoch": 0.003411764705882353, "grad_norm": 0.341796875, "learning_rate": 0.0028431372549019606, "loss": 2.5662, "step": 145 }, { "epoch": 0.003435294117647059, "grad_norm": 0.302734375, "learning_rate": 0.0028627450980392155, "loss": 2.4867, "step": 146 }, { "epoch": 0.0034588235294117646, "grad_norm": 0.28125, "learning_rate": 0.0028823529411764704, "loss": 2.3913, "step": 147 }, { "epoch": 0.0034823529411764707, "grad_norm": 0.283203125, "learning_rate": 0.002901960784313726, "loss": 2.5443, "step": 148 }, { "epoch": 0.0035058823529411764, "grad_norm": 0.30078125, "learning_rate": 0.0029215686274509808, "loss": 2.3521, "step": 149 }, { "epoch": 0.0035294117647058825, "grad_norm": 0.302734375, "learning_rate": 0.0029411764705882353, "loss": 2.6995, "step": 150 }, { "epoch": 0.003552941176470588, "grad_norm": 0.294921875, "learning_rate": 0.00296078431372549, "loss": 2.3555, "step": 151 }, { "epoch": 0.0035764705882352942, "grad_norm": 0.275390625, "learning_rate": 0.002980392156862745, "loss": 2.6314, "step": 152 }, { "epoch": 0.0036, "grad_norm": 0.271484375, "learning_rate": 0.003, "loss": 2.5629, "step": 153 }, { "epoch": 0.003623529411764706, "grad_norm": 0.2734375, "learning_rate": 0.003019607843137255, "loss": 2.5639, "step": 154 }, { "epoch": 0.0036470588235294117, "grad_norm": 0.28125, "learning_rate": 0.0030392156862745095, "loss": 2.4456, "step": 155 }, { "epoch": 0.0036705882352941178, "grad_norm": 0.26953125, "learning_rate": 0.003058823529411765, "loss": 2.5811, "step": 156 }, { "epoch": 0.0036941176470588234, "grad_norm": 0.251953125, "learning_rate": 0.00307843137254902, "loss": 2.3388, "step": 157 }, { "epoch": 0.0037176470588235295, "grad_norm": 0.265625, "learning_rate": 0.0030980392156862747, "loss": 2.6341, "step": 158 }, { "epoch": 0.003741176470588235, "grad_norm": 0.267578125, "learning_rate": 0.0031176470588235297, "loss": 2.6175, "step": 159 }, { "epoch": 0.0037647058823529413, "grad_norm": 0.265625, "learning_rate": 0.003137254901960784, "loss": 2.379, "step": 160 }, { "epoch": 0.003788235294117647, "grad_norm": 0.26171875, "learning_rate": 0.003156862745098039, "loss": 2.3883, "step": 161 }, { "epoch": 0.003811764705882353, "grad_norm": 0.2578125, "learning_rate": 0.003176470588235294, "loss": 2.4857, "step": 162 }, { "epoch": 0.0038352941176470587, "grad_norm": 0.26171875, "learning_rate": 0.003196078431372549, "loss": 2.4179, "step": 163 }, { "epoch": 0.003858823529411765, "grad_norm": 0.234375, "learning_rate": 0.0032156862745098044, "loss": 2.338, "step": 164 }, { "epoch": 0.0038823529411764705, "grad_norm": 0.248046875, "learning_rate": 0.003235294117647059, "loss": 2.3605, "step": 165 }, { "epoch": 0.0039058823529411766, "grad_norm": 0.255859375, "learning_rate": 0.003254901960784314, "loss": 2.437, "step": 166 }, { "epoch": 0.003929411764705883, "grad_norm": 0.25390625, "learning_rate": 0.0032745098039215687, "loss": 2.378, "step": 167 }, { "epoch": 0.003952941176470588, "grad_norm": 0.251953125, "learning_rate": 0.0032941176470588237, "loss": 2.3987, "step": 168 }, { "epoch": 0.003976470588235294, "grad_norm": 0.255859375, "learning_rate": 0.003313725490196078, "loss": 2.5146, "step": 169 }, { "epoch": 0.004, "grad_norm": 0.2451171875, "learning_rate": 0.003333333333333333, "loss": 2.3232, "step": 170 }, { "epoch": 0.004023529411764706, "grad_norm": 0.263671875, "learning_rate": 0.003352941176470588, "loss": 2.3866, "step": 171 }, { "epoch": 0.004047058823529411, "grad_norm": 0.25, "learning_rate": 0.0033725490196078434, "loss": 2.3696, "step": 172 }, { "epoch": 0.0040705882352941175, "grad_norm": 0.2451171875, "learning_rate": 0.0033921568627450984, "loss": 2.3369, "step": 173 }, { "epoch": 0.004094117647058824, "grad_norm": 0.23828125, "learning_rate": 0.0034117647058823533, "loss": 2.4021, "step": 174 }, { "epoch": 0.00411764705882353, "grad_norm": 0.23828125, "learning_rate": 0.003431372549019608, "loss": 2.5087, "step": 175 }, { "epoch": 0.004141176470588235, "grad_norm": 0.2333984375, "learning_rate": 0.0034509803921568627, "loss": 2.5508, "step": 176 }, { "epoch": 0.004164705882352941, "grad_norm": 0.2236328125, "learning_rate": 0.0034705882352941177, "loss": 2.3707, "step": 177 }, { "epoch": 0.004188235294117647, "grad_norm": 0.251953125, "learning_rate": 0.0034901960784313726, "loss": 2.4821, "step": 178 }, { "epoch": 0.004211764705882353, "grad_norm": 0.2470703125, "learning_rate": 0.003509803921568627, "loss": 2.3921, "step": 179 }, { "epoch": 0.0042352941176470585, "grad_norm": 0.2431640625, "learning_rate": 0.0035294117647058825, "loss": 2.4739, "step": 180 }, { "epoch": 0.0042588235294117645, "grad_norm": 0.2578125, "learning_rate": 0.0035490196078431374, "loss": 2.4282, "step": 181 }, { "epoch": 0.004282352941176471, "grad_norm": 0.2578125, "learning_rate": 0.0035686274509803924, "loss": 2.4806, "step": 182 }, { "epoch": 0.004305882352941177, "grad_norm": 0.244140625, "learning_rate": 0.0035882352941176473, "loss": 2.4293, "step": 183 }, { "epoch": 0.004329411764705882, "grad_norm": 0.2578125, "learning_rate": 0.003607843137254902, "loss": 2.3393, "step": 184 }, { "epoch": 0.004352941176470588, "grad_norm": 0.25, "learning_rate": 0.0036274509803921567, "loss": 2.362, "step": 185 }, { "epoch": 0.004376470588235294, "grad_norm": 0.2412109375, "learning_rate": 0.0036470588235294117, "loss": 2.514, "step": 186 }, { "epoch": 0.0044, "grad_norm": 0.2353515625, "learning_rate": 0.0036666666666666666, "loss": 2.2548, "step": 187 }, { "epoch": 0.0044235294117647055, "grad_norm": 0.224609375, "learning_rate": 0.003686274509803922, "loss": 2.3184, "step": 188 }, { "epoch": 0.004447058823529412, "grad_norm": 0.2255859375, "learning_rate": 0.003705882352941177, "loss": 2.2887, "step": 189 }, { "epoch": 0.004470588235294118, "grad_norm": 0.2314453125, "learning_rate": 0.0037254901960784314, "loss": 2.4153, "step": 190 }, { "epoch": 0.004494117647058824, "grad_norm": 0.2255859375, "learning_rate": 0.0037450980392156863, "loss": 2.3524, "step": 191 }, { "epoch": 0.004517647058823529, "grad_norm": 0.23046875, "learning_rate": 0.0037647058823529413, "loss": 2.3794, "step": 192 }, { "epoch": 0.004541176470588235, "grad_norm": 0.2353515625, "learning_rate": 0.003784313725490196, "loss": 2.3653, "step": 193 }, { "epoch": 0.004564705882352941, "grad_norm": 0.2255859375, "learning_rate": 0.0038039215686274507, "loss": 2.3105, "step": 194 }, { "epoch": 0.004588235294117647, "grad_norm": 0.228515625, "learning_rate": 0.0038235294117647057, "loss": 2.4153, "step": 195 }, { "epoch": 0.0046117647058823525, "grad_norm": 0.2373046875, "learning_rate": 0.003843137254901961, "loss": 2.3851, "step": 196 }, { "epoch": 0.004635294117647059, "grad_norm": 0.2353515625, "learning_rate": 0.003862745098039216, "loss": 2.1889, "step": 197 }, { "epoch": 0.004658823529411765, "grad_norm": 0.2392578125, "learning_rate": 0.003882352941176471, "loss": 2.4044, "step": 198 }, { "epoch": 0.004682352941176471, "grad_norm": 0.236328125, "learning_rate": 0.0039019607843137254, "loss": 2.3428, "step": 199 }, { "epoch": 0.004705882352941176, "grad_norm": 0.23828125, "learning_rate": 0.00392156862745098, "loss": 2.3869, "step": 200 }, { "epoch": 0.004729411764705882, "grad_norm": 0.2177734375, "learning_rate": 0.003941176470588235, "loss": 2.3408, "step": 201 }, { "epoch": 0.004752941176470588, "grad_norm": 0.232421875, "learning_rate": 0.00396078431372549, "loss": 2.2928, "step": 202 }, { "epoch": 0.004776470588235294, "grad_norm": 0.232421875, "learning_rate": 0.003980392156862745, "loss": 2.4375, "step": 203 }, { "epoch": 0.0048, "grad_norm": 0.21484375, "learning_rate": 0.004, "loss": 2.1251, "step": 204 }, { "epoch": 0.004823529411764706, "grad_norm": 0.2265625, "learning_rate": 0.004019607843137255, "loss": 2.565, "step": 205 }, { "epoch": 0.004847058823529412, "grad_norm": 0.228515625, "learning_rate": 0.00403921568627451, "loss": 2.3847, "step": 206 }, { "epoch": 0.004870588235294118, "grad_norm": 0.2412109375, "learning_rate": 0.004058823529411765, "loss": 2.2639, "step": 207 }, { "epoch": 0.004894117647058823, "grad_norm": 0.220703125, "learning_rate": 0.00407843137254902, "loss": 2.2048, "step": 208 }, { "epoch": 0.004917647058823529, "grad_norm": 0.2255859375, "learning_rate": 0.004098039215686275, "loss": 2.2819, "step": 209 }, { "epoch": 0.004941176470588235, "grad_norm": 0.2236328125, "learning_rate": 0.00411764705882353, "loss": 2.212, "step": 210 }, { "epoch": 0.004964705882352941, "grad_norm": 0.2138671875, "learning_rate": 0.004137254901960784, "loss": 2.3711, "step": 211 }, { "epoch": 0.0049882352941176475, "grad_norm": 0.2236328125, "learning_rate": 0.00415686274509804, "loss": 2.4142, "step": 212 }, { "epoch": 0.005011764705882353, "grad_norm": 0.22265625, "learning_rate": 0.0041764705882352945, "loss": 2.2355, "step": 213 }, { "epoch": 0.005035294117647059, "grad_norm": 0.2236328125, "learning_rate": 0.0041960784313725494, "loss": 2.2607, "step": 214 }, { "epoch": 0.005058823529411765, "grad_norm": 0.2119140625, "learning_rate": 0.004215686274509804, "loss": 2.2491, "step": 215 }, { "epoch": 0.005082352941176471, "grad_norm": 0.2236328125, "learning_rate": 0.0042352941176470585, "loss": 2.1752, "step": 216 }, { "epoch": 0.005105882352941176, "grad_norm": 0.2353515625, "learning_rate": 0.004254901960784313, "loss": 2.3886, "step": 217 }, { "epoch": 0.005129411764705882, "grad_norm": 0.2216796875, "learning_rate": 0.004274509803921568, "loss": 2.2018, "step": 218 }, { "epoch": 0.0051529411764705884, "grad_norm": 0.220703125, "learning_rate": 0.004294117647058823, "loss": 2.1634, "step": 219 }, { "epoch": 0.0051764705882352945, "grad_norm": 0.2197265625, "learning_rate": 0.004313725490196079, "loss": 2.1835, "step": 220 }, { "epoch": 0.0052, "grad_norm": 0.21875, "learning_rate": 0.004333333333333334, "loss": 2.227, "step": 221 }, { "epoch": 0.005223529411764706, "grad_norm": 0.2275390625, "learning_rate": 0.004352941176470588, "loss": 2.3413, "step": 222 }, { "epoch": 0.005247058823529412, "grad_norm": 0.212890625, "learning_rate": 0.004372549019607843, "loss": 2.2824, "step": 223 }, { "epoch": 0.005270588235294118, "grad_norm": 0.2275390625, "learning_rate": 0.004392156862745098, "loss": 2.3083, "step": 224 }, { "epoch": 0.005294117647058823, "grad_norm": 0.2177734375, "learning_rate": 0.004411764705882353, "loss": 2.2657, "step": 225 }, { "epoch": 0.005317647058823529, "grad_norm": 0.22265625, "learning_rate": 0.004431372549019608, "loss": 2.3261, "step": 226 }, { "epoch": 0.0053411764705882355, "grad_norm": 0.2236328125, "learning_rate": 0.004450980392156863, "loss": 2.2492, "step": 227 }, { "epoch": 0.005364705882352942, "grad_norm": 0.212890625, "learning_rate": 0.004470588235294118, "loss": 2.2707, "step": 228 }, { "epoch": 0.005388235294117647, "grad_norm": 0.21875, "learning_rate": 0.004490196078431373, "loss": 2.22, "step": 229 }, { "epoch": 0.005411764705882353, "grad_norm": 0.208984375, "learning_rate": 0.0045098039215686276, "loss": 2.2627, "step": 230 }, { "epoch": 0.005435294117647059, "grad_norm": 0.2060546875, "learning_rate": 0.0045294117647058825, "loss": 2.2264, "step": 231 }, { "epoch": 0.005458823529411765, "grad_norm": 0.220703125, "learning_rate": 0.0045490196078431374, "loss": 2.4973, "step": 232 }, { "epoch": 0.00548235294117647, "grad_norm": 0.2216796875, "learning_rate": 0.004568627450980392, "loss": 2.2939, "step": 233 }, { "epoch": 0.005505882352941176, "grad_norm": 0.22265625, "learning_rate": 0.004588235294117647, "loss": 2.3832, "step": 234 }, { "epoch": 0.0055294117647058825, "grad_norm": 0.232421875, "learning_rate": 0.004607843137254901, "loss": 2.3509, "step": 235 }, { "epoch": 0.005552941176470589, "grad_norm": 0.228515625, "learning_rate": 0.004627450980392157, "loss": 2.4068, "step": 236 }, { "epoch": 0.005576470588235294, "grad_norm": 0.203125, "learning_rate": 0.004647058823529412, "loss": 2.2272, "step": 237 }, { "epoch": 0.0056, "grad_norm": 0.2080078125, "learning_rate": 0.004666666666666667, "loss": 2.2508, "step": 238 }, { "epoch": 0.005623529411764706, "grad_norm": 0.212890625, "learning_rate": 0.004686274509803922, "loss": 2.3443, "step": 239 }, { "epoch": 0.005647058823529412, "grad_norm": 0.2412109375, "learning_rate": 0.004705882352941177, "loss": 2.2683, "step": 240 }, { "epoch": 0.005670588235294117, "grad_norm": 0.2177734375, "learning_rate": 0.004725490196078431, "loss": 2.2034, "step": 241 }, { "epoch": 0.0056941176470588235, "grad_norm": 0.21484375, "learning_rate": 0.004745098039215686, "loss": 2.2424, "step": 242 }, { "epoch": 0.0057176470588235296, "grad_norm": 0.22265625, "learning_rate": 0.004764705882352941, "loss": 2.2204, "step": 243 }, { "epoch": 0.005741176470588236, "grad_norm": 0.201171875, "learning_rate": 0.004784313725490197, "loss": 2.2765, "step": 244 }, { "epoch": 0.005764705882352941, "grad_norm": 0.21484375, "learning_rate": 0.004803921568627452, "loss": 2.3654, "step": 245 }, { "epoch": 0.005788235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.004823529411764706, "loss": 2.2633, "step": 246 }, { "epoch": 0.005811764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.004843137254901961, "loss": 2.3213, "step": 247 }, { "epoch": 0.005835294117647059, "grad_norm": 0.2197265625, "learning_rate": 0.0048627450980392155, "loss": 2.3858, "step": 248 }, { "epoch": 0.005858823529411764, "grad_norm": 0.1904296875, "learning_rate": 0.0048823529411764705, "loss": 2.1756, "step": 249 }, { "epoch": 0.0058823529411764705, "grad_norm": 0.2060546875, "learning_rate": 0.004901960784313725, "loss": 2.2957, "step": 250 }, { "epoch": 0.005905882352941177, "grad_norm": 0.1923828125, "learning_rate": 0.00492156862745098, "loss": 2.068, "step": 251 }, { "epoch": 0.005929411764705883, "grad_norm": 0.248046875, "learning_rate": 0.004941176470588235, "loss": 2.4052, "step": 252 }, { "epoch": 0.005952941176470588, "grad_norm": 0.205078125, "learning_rate": 0.00496078431372549, "loss": 2.3512, "step": 253 }, { "epoch": 0.005976470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.004980392156862745, "loss": 2.2266, "step": 254 }, { "epoch": 0.006, "grad_norm": 0.19921875, "learning_rate": 0.005, "loss": 2.267, "step": 255 }, { "epoch": 0.006023529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.005019607843137255, "loss": 2.1559, "step": 256 }, { "epoch": 0.0060470588235294115, "grad_norm": 0.220703125, "learning_rate": 0.00503921568627451, "loss": 2.1198, "step": 257 }, { "epoch": 0.0060705882352941175, "grad_norm": 0.197265625, "learning_rate": 0.005058823529411765, "loss": 2.2206, "step": 258 }, { "epoch": 0.006094117647058824, "grad_norm": 0.400390625, "learning_rate": 0.00507843137254902, "loss": 2.2863, "step": 259 }, { "epoch": 0.00611764705882353, "grad_norm": 0.18359375, "learning_rate": 0.005098039215686274, "loss": 2.2522, "step": 260 }, { "epoch": 0.006141176470588235, "grad_norm": 0.2060546875, "learning_rate": 0.005117647058823529, "loss": 2.0909, "step": 261 }, { "epoch": 0.006164705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.005137254901960784, "loss": 2.0993, "step": 262 }, { "epoch": 0.006188235294117647, "grad_norm": 0.2041015625, "learning_rate": 0.00515686274509804, "loss": 2.1303, "step": 263 }, { "epoch": 0.006211764705882353, "grad_norm": 0.19140625, "learning_rate": 0.0051764705882352945, "loss": 2.1485, "step": 264 }, { "epoch": 0.0062352941176470585, "grad_norm": 0.2080078125, "learning_rate": 0.0051960784313725495, "loss": 2.2073, "step": 265 }, { "epoch": 0.006258823529411765, "grad_norm": 0.1904296875, "learning_rate": 0.005215686274509804, "loss": 2.1251, "step": 266 }, { "epoch": 0.006282352941176471, "grad_norm": 0.2041015625, "learning_rate": 0.005235294117647059, "loss": 2.2986, "step": 267 }, { "epoch": 0.006305882352941177, "grad_norm": 0.19921875, "learning_rate": 0.005254901960784314, "loss": 2.3288, "step": 268 }, { "epoch": 0.006329411764705882, "grad_norm": 0.2001953125, "learning_rate": 0.005274509803921569, "loss": 2.2303, "step": 269 }, { "epoch": 0.006352941176470588, "grad_norm": 0.201171875, "learning_rate": 0.005294117647058823, "loss": 2.2022, "step": 270 }, { "epoch": 0.006376470588235294, "grad_norm": 0.205078125, "learning_rate": 0.005313725490196078, "loss": 2.1487, "step": 271 }, { "epoch": 0.0064, "grad_norm": 0.2109375, "learning_rate": 0.005333333333333333, "loss": 2.3544, "step": 272 }, { "epoch": 0.0064235294117647055, "grad_norm": 0.1865234375, "learning_rate": 0.005352941176470588, "loss": 2.1653, "step": 273 }, { "epoch": 0.006447058823529412, "grad_norm": 0.2216796875, "learning_rate": 0.005372549019607843, "loss": 2.3084, "step": 274 }, { "epoch": 0.006470588235294118, "grad_norm": 0.20703125, "learning_rate": 0.005392156862745098, "loss": 2.3033, "step": 275 }, { "epoch": 0.006494117647058824, "grad_norm": 0.197265625, "learning_rate": 0.005411764705882353, "loss": 2.1827, "step": 276 }, { "epoch": 0.006517647058823529, "grad_norm": 0.212890625, "learning_rate": 0.005431372549019608, "loss": 2.2299, "step": 277 }, { "epoch": 0.006541176470588235, "grad_norm": 0.21484375, "learning_rate": 0.005450980392156863, "loss": 2.1967, "step": 278 }, { "epoch": 0.006564705882352941, "grad_norm": 0.1962890625, "learning_rate": 0.005470588235294119, "loss": 2.1228, "step": 279 }, { "epoch": 0.006588235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.0054901960784313735, "loss": 2.1909, "step": 280 }, { "epoch": 0.006611764705882353, "grad_norm": 0.203125, "learning_rate": 0.005509803921568628, "loss": 2.1586, "step": 281 }, { "epoch": 0.006635294117647059, "grad_norm": 0.1943359375, "learning_rate": 0.0055294117647058825, "loss": 2.3327, "step": 282 }, { "epoch": 0.006658823529411765, "grad_norm": 0.2099609375, "learning_rate": 0.0055490196078431375, "loss": 2.1708, "step": 283 }, { "epoch": 0.006682352941176471, "grad_norm": 0.2294921875, "learning_rate": 0.005568627450980392, "loss": 2.2662, "step": 284 }, { "epoch": 0.006705882352941176, "grad_norm": 0.197265625, "learning_rate": 0.005588235294117647, "loss": 2.2464, "step": 285 }, { "epoch": 0.006729411764705882, "grad_norm": 0.2041015625, "learning_rate": 0.005607843137254902, "loss": 2.0931, "step": 286 }, { "epoch": 0.006752941176470588, "grad_norm": 0.212890625, "learning_rate": 0.005627450980392157, "loss": 2.1943, "step": 287 }, { "epoch": 0.006776470588235294, "grad_norm": 0.197265625, "learning_rate": 0.005647058823529412, "loss": 2.3344, "step": 288 }, { "epoch": 0.0068, "grad_norm": 0.2119140625, "learning_rate": 0.005666666666666666, "loss": 2.2258, "step": 289 }, { "epoch": 0.006823529411764706, "grad_norm": 0.2158203125, "learning_rate": 0.005686274509803921, "loss": 2.2853, "step": 290 }, { "epoch": 0.006847058823529412, "grad_norm": 0.205078125, "learning_rate": 0.005705882352941176, "loss": 2.256, "step": 291 }, { "epoch": 0.006870588235294118, "grad_norm": 0.197265625, "learning_rate": 0.005725490196078431, "loss": 2.1996, "step": 292 }, { "epoch": 0.006894117647058823, "grad_norm": 0.20703125, "learning_rate": 0.005745098039215686, "loss": 2.1479, "step": 293 }, { "epoch": 0.006917647058823529, "grad_norm": 0.2001953125, "learning_rate": 0.005764705882352941, "loss": 2.214, "step": 294 }, { "epoch": 0.006941176470588235, "grad_norm": 0.2021484375, "learning_rate": 0.005784313725490197, "loss": 2.2882, "step": 295 }, { "epoch": 0.0069647058823529414, "grad_norm": 0.208984375, "learning_rate": 0.005803921568627452, "loss": 2.207, "step": 296 }, { "epoch": 0.006988235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.0058235294117647066, "loss": 2.2478, "step": 297 }, { "epoch": 0.007011764705882353, "grad_norm": 0.212890625, "learning_rate": 0.0058431372549019615, "loss": 2.2355, "step": 298 }, { "epoch": 0.007035294117647059, "grad_norm": 0.177734375, "learning_rate": 0.0058627450980392164, "loss": 2.1455, "step": 299 }, { "epoch": 0.007058823529411765, "grad_norm": 0.2001953125, "learning_rate": 0.0058823529411764705, "loss": 2.2455, "step": 300 }, { "epoch": 0.00708235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.0059019607843137254, "loss": 2.0384, "step": 301 }, { "epoch": 0.007105882352941176, "grad_norm": 0.181640625, "learning_rate": 0.00592156862745098, "loss": 2.2009, "step": 302 }, { "epoch": 0.007129411764705882, "grad_norm": 0.1845703125, "learning_rate": 0.005941176470588235, "loss": 2.0704, "step": 303 }, { "epoch": 0.0071529411764705885, "grad_norm": 0.1806640625, "learning_rate": 0.00596078431372549, "loss": 2.0397, "step": 304 }, { "epoch": 0.007176470588235294, "grad_norm": 0.1875, "learning_rate": 0.005980392156862745, "loss": 2.1747, "step": 305 }, { "epoch": 0.0072, "grad_norm": 0.1826171875, "learning_rate": 0.006, "loss": 2.2924, "step": 306 }, { "epoch": 0.007223529411764706, "grad_norm": 0.1875, "learning_rate": 0.006019607843137255, "loss": 2.1607, "step": 307 }, { "epoch": 0.007247058823529412, "grad_norm": 0.18359375, "learning_rate": 0.00603921568627451, "loss": 2.2654, "step": 308 }, { "epoch": 0.007270588235294117, "grad_norm": 0.1884765625, "learning_rate": 0.006058823529411764, "loss": 1.9992, "step": 309 }, { "epoch": 0.007294117647058823, "grad_norm": 0.197265625, "learning_rate": 0.006078431372549019, "loss": 2.1464, "step": 310 }, { "epoch": 0.007317647058823529, "grad_norm": 0.208984375, "learning_rate": 0.006098039215686275, "loss": 2.3234, "step": 311 }, { "epoch": 0.0073411764705882355, "grad_norm": 0.173828125, "learning_rate": 0.00611764705882353, "loss": 2.2331, "step": 312 }, { "epoch": 0.007364705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.006137254901960785, "loss": 2.1579, "step": 313 }, { "epoch": 0.007388235294117647, "grad_norm": 0.1708984375, "learning_rate": 0.00615686274509804, "loss": 2.2339, "step": 314 }, { "epoch": 0.007411764705882353, "grad_norm": 0.193359375, "learning_rate": 0.0061764705882352946, "loss": 2.2379, "step": 315 }, { "epoch": 0.007435294117647059, "grad_norm": 0.1884765625, "learning_rate": 0.0061960784313725495, "loss": 2.1149, "step": 316 }, { "epoch": 0.007458823529411765, "grad_norm": 0.177734375, "learning_rate": 0.006215686274509804, "loss": 2.267, "step": 317 }, { "epoch": 0.00748235294117647, "grad_norm": 0.193359375, "learning_rate": 0.006235294117647059, "loss": 2.1016, "step": 318 }, { "epoch": 0.0075058823529411765, "grad_norm": 0.17578125, "learning_rate": 0.006254901960784313, "loss": 2.2415, "step": 319 }, { "epoch": 0.0075294117647058826, "grad_norm": 0.2001953125, "learning_rate": 0.006274509803921568, "loss": 2.2038, "step": 320 }, { "epoch": 0.007552941176470589, "grad_norm": 0.1923828125, "learning_rate": 0.006294117647058823, "loss": 2.0857, "step": 321 }, { "epoch": 0.007576470588235294, "grad_norm": 0.1845703125, "learning_rate": 0.006313725490196078, "loss": 2.216, "step": 322 }, { "epoch": 0.0076, "grad_norm": 0.201171875, "learning_rate": 0.006333333333333333, "loss": 2.0823, "step": 323 }, { "epoch": 0.007623529411764706, "grad_norm": 0.205078125, "learning_rate": 0.006352941176470588, "loss": 2.2043, "step": 324 }, { "epoch": 0.007647058823529412, "grad_norm": 0.1806640625, "learning_rate": 0.006372549019607843, "loss": 2.1453, "step": 325 }, { "epoch": 0.007670588235294117, "grad_norm": 0.1748046875, "learning_rate": 0.006392156862745098, "loss": 1.9719, "step": 326 }, { "epoch": 0.0076941176470588235, "grad_norm": 0.1875, "learning_rate": 0.006411764705882354, "loss": 2.1663, "step": 327 }, { "epoch": 0.00771764705882353, "grad_norm": 0.19140625, "learning_rate": 0.006431372549019609, "loss": 1.9677, "step": 328 }, { "epoch": 0.007741176470588236, "grad_norm": 0.1884765625, "learning_rate": 0.006450980392156864, "loss": 2.0481, "step": 329 }, { "epoch": 0.007764705882352941, "grad_norm": 0.18359375, "learning_rate": 0.006470588235294118, "loss": 2.2803, "step": 330 }, { "epoch": 0.007788235294117647, "grad_norm": 0.1787109375, "learning_rate": 0.006490196078431373, "loss": 2.1173, "step": 331 }, { "epoch": 0.007811764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.006509803921568628, "loss": 2.1792, "step": 332 }, { "epoch": 0.007835294117647058, "grad_norm": 0.19140625, "learning_rate": 0.0065294117647058825, "loss": 2.2146, "step": 333 }, { "epoch": 0.007858823529411765, "grad_norm": 0.228515625, "learning_rate": 0.0065490196078431375, "loss": 2.1554, "step": 334 }, { "epoch": 0.00788235294117647, "grad_norm": 0.216796875, "learning_rate": 0.006568627450980392, "loss": 2.2285, "step": 335 }, { "epoch": 0.007905882352941176, "grad_norm": 0.2138671875, "learning_rate": 0.006588235294117647, "loss": 2.2655, "step": 336 }, { "epoch": 0.007929411764705883, "grad_norm": 0.2119140625, "learning_rate": 0.006607843137254902, "loss": 2.2599, "step": 337 }, { "epoch": 0.007952941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.006627450980392156, "loss": 2.1056, "step": 338 }, { "epoch": 0.007976470588235295, "grad_norm": 0.19921875, "learning_rate": 0.006647058823529411, "loss": 2.0897, "step": 339 }, { "epoch": 0.008, "grad_norm": 0.2197265625, "learning_rate": 0.006666666666666666, "loss": 2.0884, "step": 340 }, { "epoch": 0.008023529411764705, "grad_norm": 0.2080078125, "learning_rate": 0.006686274509803921, "loss": 2.1943, "step": 341 }, { "epoch": 0.008047058823529412, "grad_norm": 0.2158203125, "learning_rate": 0.006705882352941176, "loss": 2.1829, "step": 342 }, { "epoch": 0.008070588235294118, "grad_norm": 0.1904296875, "learning_rate": 0.006725490196078432, "loss": 2.0665, "step": 343 }, { "epoch": 0.008094117647058823, "grad_norm": 0.2060546875, "learning_rate": 0.006745098039215687, "loss": 2.3008, "step": 344 }, { "epoch": 0.00811764705882353, "grad_norm": 0.205078125, "learning_rate": 0.006764705882352942, "loss": 2.0112, "step": 345 }, { "epoch": 0.008141176470588235, "grad_norm": 0.1845703125, "learning_rate": 0.006784313725490197, "loss": 2.2294, "step": 346 }, { "epoch": 0.008164705882352942, "grad_norm": 0.1796875, "learning_rate": 0.006803921568627452, "loss": 2.2759, "step": 347 }, { "epoch": 0.008188235294117647, "grad_norm": 0.203125, "learning_rate": 0.006823529411764707, "loss": 2.2057, "step": 348 }, { "epoch": 0.008211764705882352, "grad_norm": 0.181640625, "learning_rate": 0.006843137254901961, "loss": 2.09, "step": 349 }, { "epoch": 0.00823529411764706, "grad_norm": 0.205078125, "learning_rate": 0.006862745098039216, "loss": 2.0605, "step": 350 }, { "epoch": 0.008258823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.0068823529411764705, "loss": 2.2348, "step": 351 }, { "epoch": 0.00828235294117647, "grad_norm": 0.201171875, "learning_rate": 0.0069019607843137255, "loss": 2.1495, "step": 352 }, { "epoch": 0.008305882352941177, "grad_norm": 0.21484375, "learning_rate": 0.00692156862745098, "loss": 2.1784, "step": 353 }, { "epoch": 0.008329411764705882, "grad_norm": 0.1953125, "learning_rate": 0.006941176470588235, "loss": 2.0765, "step": 354 }, { "epoch": 0.008352941176470589, "grad_norm": 0.1943359375, "learning_rate": 0.00696078431372549, "loss": 2.2231, "step": 355 }, { "epoch": 0.008376470588235294, "grad_norm": 0.2080078125, "learning_rate": 0.006980392156862745, "loss": 2.146, "step": 356 }, { "epoch": 0.0084, "grad_norm": 0.1962890625, "learning_rate": 0.006999999999999999, "loss": 2.0444, "step": 357 }, { "epoch": 0.008423529411764706, "grad_norm": 0.1904296875, "learning_rate": 0.007019607843137254, "loss": 2.1239, "step": 358 }, { "epoch": 0.008447058823529412, "grad_norm": 0.1748046875, "learning_rate": 0.007039215686274511, "loss": 2.1227, "step": 359 }, { "epoch": 0.008470588235294117, "grad_norm": 0.1787109375, "learning_rate": 0.007058823529411765, "loss": 2.1576, "step": 360 }, { "epoch": 0.008494117647058824, "grad_norm": 0.2099609375, "learning_rate": 0.00707843137254902, "loss": 2.065, "step": 361 }, { "epoch": 0.008517647058823529, "grad_norm": 0.1845703125, "learning_rate": 0.007098039215686275, "loss": 2.1478, "step": 362 }, { "epoch": 0.008541176470588236, "grad_norm": 0.2060546875, "learning_rate": 0.00711764705882353, "loss": 2.0753, "step": 363 }, { "epoch": 0.008564705882352941, "grad_norm": 0.1953125, "learning_rate": 0.007137254901960785, "loss": 2.1687, "step": 364 }, { "epoch": 0.008588235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.00715686274509804, "loss": 2.2874, "step": 365 }, { "epoch": 0.008611764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.007176470588235295, "loss": 2.225, "step": 366 }, { "epoch": 0.008635294117647059, "grad_norm": 0.236328125, "learning_rate": 0.0071960784313725495, "loss": 2.3942, "step": 367 }, { "epoch": 0.008658823529411764, "grad_norm": 0.2109375, "learning_rate": 0.007215686274509804, "loss": 2.2125, "step": 368 }, { "epoch": 0.008682352941176471, "grad_norm": 0.1904296875, "learning_rate": 0.0072352941176470585, "loss": 2.0872, "step": 369 }, { "epoch": 0.008705882352941176, "grad_norm": 0.1884765625, "learning_rate": 0.0072549019607843135, "loss": 2.141, "step": 370 }, { "epoch": 0.008729411764705883, "grad_norm": 0.19140625, "learning_rate": 0.007274509803921568, "loss": 2.1336, "step": 371 }, { "epoch": 0.008752941176470588, "grad_norm": 0.1787109375, "learning_rate": 0.007294117647058823, "loss": 2.1631, "step": 372 }, { "epoch": 0.008776470588235294, "grad_norm": 0.1875, "learning_rate": 0.007313725490196078, "loss": 2.209, "step": 373 }, { "epoch": 0.0088, "grad_norm": 0.2236328125, "learning_rate": 0.007333333333333333, "loss": 2.1608, "step": 374 }, { "epoch": 0.008823529411764706, "grad_norm": 0.216796875, "learning_rate": 0.007352941176470589, "loss": 2.0835, "step": 375 }, { "epoch": 0.008847058823529411, "grad_norm": 0.21875, "learning_rate": 0.007372549019607844, "loss": 2.1893, "step": 376 }, { "epoch": 0.008870588235294118, "grad_norm": 0.2060546875, "learning_rate": 0.007392156862745099, "loss": 2.185, "step": 377 }, { "epoch": 0.008894117647058823, "grad_norm": 0.193359375, "learning_rate": 0.007411764705882354, "loss": 2.3353, "step": 378 }, { "epoch": 0.00891764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.007431372549019608, "loss": 2.3497, "step": 379 }, { "epoch": 0.008941176470588235, "grad_norm": 0.1923828125, "learning_rate": 0.007450980392156863, "loss": 2.0108, "step": 380 }, { "epoch": 0.00896470588235294, "grad_norm": 0.1826171875, "learning_rate": 0.007470588235294118, "loss": 2.1573, "step": 381 }, { "epoch": 0.008988235294117648, "grad_norm": 0.185546875, "learning_rate": 0.007490196078431373, "loss": 2.1313, "step": 382 }, { "epoch": 0.009011764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.007509803921568628, "loss": 2.1006, "step": 383 }, { "epoch": 0.009035294117647058, "grad_norm": 0.1875, "learning_rate": 0.0075294117647058826, "loss": 2.1571, "step": 384 }, { "epoch": 0.009058823529411765, "grad_norm": 0.1796875, "learning_rate": 0.0075490196078431375, "loss": 2.0449, "step": 385 }, { "epoch": 0.00908235294117647, "grad_norm": 0.201171875, "learning_rate": 0.007568627450980392, "loss": 2.1997, "step": 386 }, { "epoch": 0.009105882352941177, "grad_norm": 0.1884765625, "learning_rate": 0.0075882352941176465, "loss": 2.1927, "step": 387 }, { "epoch": 0.009129411764705882, "grad_norm": 0.181640625, "learning_rate": 0.0076078431372549014, "loss": 2.1963, "step": 388 }, { "epoch": 0.009152941176470588, "grad_norm": 0.193359375, "learning_rate": 0.007627450980392156, "loss": 2.0641, "step": 389 }, { "epoch": 0.009176470588235295, "grad_norm": 0.162109375, "learning_rate": 0.007647058823529411, "loss": 2.2003, "step": 390 }, { "epoch": 0.0092, "grad_norm": 0.205078125, "learning_rate": 0.007666666666666667, "loss": 2.1684, "step": 391 }, { "epoch": 0.009223529411764705, "grad_norm": 0.1708984375, "learning_rate": 0.007686274509803922, "loss": 2.1669, "step": 392 }, { "epoch": 0.009247058823529412, "grad_norm": 0.2119140625, "learning_rate": 0.007705882352941177, "loss": 2.1013, "step": 393 }, { "epoch": 0.009270588235294117, "grad_norm": 0.1806640625, "learning_rate": 0.007725490196078432, "loss": 2.2286, "step": 394 }, { "epoch": 0.009294117647058824, "grad_norm": 0.205078125, "learning_rate": 0.007745098039215687, "loss": 2.0893, "step": 395 }, { "epoch": 0.00931764705882353, "grad_norm": 0.16796875, "learning_rate": 0.007764705882352942, "loss": 2.127, "step": 396 }, { "epoch": 0.009341176470588235, "grad_norm": 0.1982421875, "learning_rate": 0.007784313725490197, "loss": 2.217, "step": 397 }, { "epoch": 0.009364705882352942, "grad_norm": 0.2080078125, "learning_rate": 0.007803921568627451, "loss": 2.1797, "step": 398 }, { "epoch": 0.009388235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.007823529411764707, "loss": 2.1499, "step": 399 }, { "epoch": 0.009411764705882352, "grad_norm": 0.1845703125, "learning_rate": 0.00784313725490196, "loss": 2.1353, "step": 400 }, { "epoch": 0.009435294117647059, "grad_norm": 0.2138671875, "learning_rate": 0.007862745098039216, "loss": 2.1124, "step": 401 }, { "epoch": 0.009458823529411764, "grad_norm": 0.1943359375, "learning_rate": 0.00788235294117647, "loss": 2.0926, "step": 402 }, { "epoch": 0.009482352941176471, "grad_norm": 0.1943359375, "learning_rate": 0.007901960784313725, "loss": 2.1036, "step": 403 }, { "epoch": 0.009505882352941177, "grad_norm": 0.2138671875, "learning_rate": 0.00792156862745098, "loss": 2.2467, "step": 404 }, { "epoch": 0.009529411764705882, "grad_norm": 0.2041015625, "learning_rate": 0.007941176470588234, "loss": 2.1827, "step": 405 }, { "epoch": 0.009552941176470589, "grad_norm": 0.2021484375, "learning_rate": 0.00796078431372549, "loss": 2.1607, "step": 406 }, { "epoch": 0.009576470588235294, "grad_norm": 0.1953125, "learning_rate": 0.007980392156862746, "loss": 2.1403, "step": 407 }, { "epoch": 0.0096, "grad_norm": 0.1845703125, "learning_rate": 0.008, "loss": 2.0227, "step": 408 }, { "epoch": 0.009623529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.008019607843137256, "loss": 2.0715, "step": 409 }, { "epoch": 0.009647058823529411, "grad_norm": 0.19921875, "learning_rate": 0.00803921568627451, "loss": 2.1582, "step": 410 }, { "epoch": 0.009670588235294118, "grad_norm": 0.189453125, "learning_rate": 0.008058823529411766, "loss": 2.1764, "step": 411 }, { "epoch": 0.009694117647058824, "grad_norm": 0.203125, "learning_rate": 0.00807843137254902, "loss": 2.145, "step": 412 }, { "epoch": 0.009717647058823529, "grad_norm": 0.205078125, "learning_rate": 0.008098039215686274, "loss": 1.7898, "step": 413 }, { "epoch": 0.009741176470588236, "grad_norm": 0.1787109375, "learning_rate": 0.00811764705882353, "loss": 1.9961, "step": 414 }, { "epoch": 0.009764705882352941, "grad_norm": 0.2138671875, "learning_rate": 0.008137254901960784, "loss": 2.0484, "step": 415 }, { "epoch": 0.009788235294117646, "grad_norm": 0.1904296875, "learning_rate": 0.00815686274509804, "loss": 2.1118, "step": 416 }, { "epoch": 0.009811764705882353, "grad_norm": 0.19921875, "learning_rate": 0.008176470588235294, "loss": 2.1203, "step": 417 }, { "epoch": 0.009835294117647058, "grad_norm": 0.1982421875, "learning_rate": 0.00819607843137255, "loss": 2.15, "step": 418 }, { "epoch": 0.009858823529411765, "grad_norm": 0.1845703125, "learning_rate": 0.008215686274509804, "loss": 2.0821, "step": 419 }, { "epoch": 0.00988235294117647, "grad_norm": 0.201171875, "learning_rate": 0.00823529411764706, "loss": 2.1261, "step": 420 }, { "epoch": 0.009905882352941176, "grad_norm": 0.193359375, "learning_rate": 0.008254901960784313, "loss": 2.0952, "step": 421 }, { "epoch": 0.009929411764705883, "grad_norm": 0.220703125, "learning_rate": 0.008274509803921568, "loss": 2.1504, "step": 422 }, { "epoch": 0.009952941176470588, "grad_norm": 0.201171875, "learning_rate": 0.008294117647058823, "loss": 2.0334, "step": 423 }, { "epoch": 0.009976470588235295, "grad_norm": 0.19921875, "learning_rate": 0.00831372549019608, "loss": 2.0421, "step": 424 }, { "epoch": 0.01, "grad_norm": 0.1826171875, "learning_rate": 0.008333333333333333, "loss": 2.1192, "step": 425 }, { "epoch": 0.010023529411764705, "grad_norm": 0.2041015625, "learning_rate": 0.008352941176470589, "loss": 2.1195, "step": 426 }, { "epoch": 0.010047058823529412, "grad_norm": 0.1904296875, "learning_rate": 0.008372549019607843, "loss": 2.1695, "step": 427 }, { "epoch": 0.010070588235294118, "grad_norm": 0.1962890625, "learning_rate": 0.008392156862745099, "loss": 1.9726, "step": 428 }, { "epoch": 0.010094117647058823, "grad_norm": 0.1767578125, "learning_rate": 0.008411764705882353, "loss": 2.1306, "step": 429 }, { "epoch": 0.01011764705882353, "grad_norm": 0.17578125, "learning_rate": 0.008431372549019609, "loss": 2.0452, "step": 430 }, { "epoch": 0.010141176470588235, "grad_norm": 0.1884765625, "learning_rate": 0.008450980392156863, "loss": 2.2273, "step": 431 }, { "epoch": 0.010164705882352942, "grad_norm": 0.189453125, "learning_rate": 0.008470588235294117, "loss": 2.0373, "step": 432 }, { "epoch": 0.010188235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.008490196078431373, "loss": 1.8611, "step": 433 }, { "epoch": 0.010211764705882352, "grad_norm": 0.19921875, "learning_rate": 0.008509803921568627, "loss": 2.1983, "step": 434 }, { "epoch": 0.01023529411764706, "grad_norm": 0.2119140625, "learning_rate": 0.008529411764705883, "loss": 2.1181, "step": 435 }, { "epoch": 0.010258823529411765, "grad_norm": 0.236328125, "learning_rate": 0.008549019607843137, "loss": 2.1015, "step": 436 }, { "epoch": 0.01028235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.008568627450980392, "loss": 2.0905, "step": 437 }, { "epoch": 0.010305882352941177, "grad_norm": 0.2314453125, "learning_rate": 0.008588235294117647, "loss": 2.2497, "step": 438 }, { "epoch": 0.010329411764705882, "grad_norm": 0.2060546875, "learning_rate": 0.008607843137254902, "loss": 2.1347, "step": 439 }, { "epoch": 0.010352941176470589, "grad_norm": 0.1884765625, "learning_rate": 0.008627450980392158, "loss": 2.1154, "step": 440 }, { "epoch": 0.010376470588235294, "grad_norm": 0.18359375, "learning_rate": 0.008647058823529412, "loss": 1.9484, "step": 441 }, { "epoch": 0.0104, "grad_norm": 0.197265625, "learning_rate": 0.008666666666666668, "loss": 1.9952, "step": 442 }, { "epoch": 0.010423529411764706, "grad_norm": 0.19140625, "learning_rate": 0.008686274509803922, "loss": 2.0503, "step": 443 }, { "epoch": 0.010447058823529412, "grad_norm": 0.216796875, "learning_rate": 0.008705882352941176, "loss": 2.1388, "step": 444 }, { "epoch": 0.010470588235294117, "grad_norm": 0.1982421875, "learning_rate": 0.008725490196078432, "loss": 2.0707, "step": 445 }, { "epoch": 0.010494117647058824, "grad_norm": 0.19921875, "learning_rate": 0.008745098039215686, "loss": 2.1499, "step": 446 }, { "epoch": 0.01051764705882353, "grad_norm": 0.228515625, "learning_rate": 0.008764705882352942, "loss": 2.1149, "step": 447 }, { "epoch": 0.010541176470588236, "grad_norm": 0.2060546875, "learning_rate": 0.008784313725490196, "loss": 2.1019, "step": 448 }, { "epoch": 0.010564705882352941, "grad_norm": 0.1845703125, "learning_rate": 0.008803921568627452, "loss": 2.0561, "step": 449 }, { "epoch": 0.010588235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.008823529411764706, "loss": 2.1767, "step": 450 }, { "epoch": 0.010611764705882354, "grad_norm": 0.193359375, "learning_rate": 0.00884313725490196, "loss": 2.1577, "step": 451 }, { "epoch": 0.010635294117647059, "grad_norm": 0.185546875, "learning_rate": 0.008862745098039216, "loss": 2.115, "step": 452 }, { "epoch": 0.010658823529411764, "grad_norm": 0.1923828125, "learning_rate": 0.00888235294117647, "loss": 2.1383, "step": 453 }, { "epoch": 0.010682352941176471, "grad_norm": 0.234375, "learning_rate": 0.008901960784313726, "loss": 2.1398, "step": 454 }, { "epoch": 0.010705882352941176, "grad_norm": 0.1748046875, "learning_rate": 0.008921568627450981, "loss": 2.1097, "step": 455 }, { "epoch": 0.010729411764705883, "grad_norm": 0.2060546875, "learning_rate": 0.008941176470588235, "loss": 2.1297, "step": 456 }, { "epoch": 0.010752941176470588, "grad_norm": 0.189453125, "learning_rate": 0.008960784313725491, "loss": 2.1422, "step": 457 }, { "epoch": 0.010776470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.008980392156862745, "loss": 2.1612, "step": 458 }, { "epoch": 0.0108, "grad_norm": 0.240234375, "learning_rate": 0.009000000000000001, "loss": 2.1037, "step": 459 }, { "epoch": 0.010823529411764706, "grad_norm": 0.2333984375, "learning_rate": 0.009019607843137255, "loss": 1.9064, "step": 460 }, { "epoch": 0.010847058823529411, "grad_norm": 0.216796875, "learning_rate": 0.009039215686274511, "loss": 2.0484, "step": 461 }, { "epoch": 0.010870588235294118, "grad_norm": 0.21875, "learning_rate": 0.009058823529411765, "loss": 2.1581, "step": 462 }, { "epoch": 0.010894117647058823, "grad_norm": 0.1923828125, "learning_rate": 0.009078431372549019, "loss": 2.0081, "step": 463 }, { "epoch": 0.01091764705882353, "grad_norm": 0.2265625, "learning_rate": 0.009098039215686275, "loss": 2.3011, "step": 464 }, { "epoch": 0.010941176470588235, "grad_norm": 0.22265625, "learning_rate": 0.009117647058823529, "loss": 2.1539, "step": 465 }, { "epoch": 0.01096470588235294, "grad_norm": 0.23046875, "learning_rate": 0.009137254901960785, "loss": 2.2157, "step": 466 }, { "epoch": 0.010988235294117648, "grad_norm": 0.228515625, "learning_rate": 0.009156862745098039, "loss": 2.1755, "step": 467 }, { "epoch": 0.011011764705882353, "grad_norm": 0.21484375, "learning_rate": 0.009176470588235295, "loss": 2.108, "step": 468 }, { "epoch": 0.011035294117647058, "grad_norm": 0.2138671875, "learning_rate": 0.009196078431372549, "loss": 2.1063, "step": 469 }, { "epoch": 0.011058823529411765, "grad_norm": 0.20703125, "learning_rate": 0.009215686274509803, "loss": 2.175, "step": 470 }, { "epoch": 0.01108235294117647, "grad_norm": 0.2099609375, "learning_rate": 0.00923529411764706, "loss": 2.0269, "step": 471 }, { "epoch": 0.011105882352941177, "grad_norm": 0.177734375, "learning_rate": 0.009254901960784314, "loss": 2.0749, "step": 472 }, { "epoch": 0.011129411764705882, "grad_norm": 0.20703125, "learning_rate": 0.009274509803921568, "loss": 2.1214, "step": 473 }, { "epoch": 0.011152941176470588, "grad_norm": 0.193359375, "learning_rate": 0.009294117647058824, "loss": 2.2554, "step": 474 }, { "epoch": 0.011176470588235295, "grad_norm": 0.2216796875, "learning_rate": 0.009313725490196078, "loss": 2.1121, "step": 475 }, { "epoch": 0.0112, "grad_norm": 0.20703125, "learning_rate": 0.009333333333333334, "loss": 2.2039, "step": 476 }, { "epoch": 0.011223529411764705, "grad_norm": 0.2294921875, "learning_rate": 0.009352941176470588, "loss": 1.9848, "step": 477 }, { "epoch": 0.011247058823529412, "grad_norm": 0.22265625, "learning_rate": 0.009372549019607844, "loss": 2.0259, "step": 478 }, { "epoch": 0.011270588235294117, "grad_norm": 0.1962890625, "learning_rate": 0.009392156862745098, "loss": 2.1038, "step": 479 }, { "epoch": 0.011294117647058824, "grad_norm": 0.1748046875, "learning_rate": 0.009411764705882354, "loss": 2.1103, "step": 480 }, { "epoch": 0.01131764705882353, "grad_norm": 0.1875, "learning_rate": 0.009431372549019608, "loss": 2.067, "step": 481 }, { "epoch": 0.011341176470588235, "grad_norm": 0.1845703125, "learning_rate": 0.009450980392156862, "loss": 2.0848, "step": 482 }, { "epoch": 0.011364705882352942, "grad_norm": 0.2041015625, "learning_rate": 0.009470588235294118, "loss": 2.1269, "step": 483 }, { "epoch": 0.011388235294117647, "grad_norm": 0.205078125, "learning_rate": 0.009490196078431372, "loss": 2.0856, "step": 484 }, { "epoch": 0.011411764705882352, "grad_norm": 0.201171875, "learning_rate": 0.009509803921568628, "loss": 2.0485, "step": 485 }, { "epoch": 0.011435294117647059, "grad_norm": 0.201171875, "learning_rate": 0.009529411764705882, "loss": 2.0014, "step": 486 }, { "epoch": 0.011458823529411764, "grad_norm": 0.1689453125, "learning_rate": 0.009549019607843138, "loss": 2.1104, "step": 487 }, { "epoch": 0.011482352941176471, "grad_norm": 0.18359375, "learning_rate": 0.009568627450980393, "loss": 2.1822, "step": 488 }, { "epoch": 0.011505882352941177, "grad_norm": 0.2060546875, "learning_rate": 0.009588235294117647, "loss": 1.8574, "step": 489 }, { "epoch": 0.011529411764705882, "grad_norm": 0.1796875, "learning_rate": 0.009607843137254903, "loss": 2.045, "step": 490 }, { "epoch": 0.011552941176470589, "grad_norm": 0.193359375, "learning_rate": 0.009627450980392157, "loss": 2.1607, "step": 491 }, { "epoch": 0.011576470588235294, "grad_norm": 0.21484375, "learning_rate": 0.009647058823529411, "loss": 2.1248, "step": 492 }, { "epoch": 0.0116, "grad_norm": 0.251953125, "learning_rate": 0.009666666666666667, "loss": 2.2034, "step": 493 }, { "epoch": 0.011623529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.009686274509803921, "loss": 2.1445, "step": 494 }, { "epoch": 0.011647058823529411, "grad_norm": 0.1884765625, "learning_rate": 0.009705882352941177, "loss": 2.1803, "step": 495 }, { "epoch": 0.011670588235294118, "grad_norm": 0.197265625, "learning_rate": 0.009725490196078431, "loss": 1.9335, "step": 496 }, { "epoch": 0.011694117647058824, "grad_norm": 0.2138671875, "learning_rate": 0.009745098039215687, "loss": 2.0651, "step": 497 }, { "epoch": 0.011717647058823529, "grad_norm": 0.1953125, "learning_rate": 0.009764705882352941, "loss": 2.0316, "step": 498 }, { "epoch": 0.011741176470588236, "grad_norm": 0.2119140625, "learning_rate": 0.009784313725490197, "loss": 2.1402, "step": 499 }, { "epoch": 0.011764705882352941, "grad_norm": 0.208984375, "learning_rate": 0.00980392156862745, "loss": 2.0787, "step": 500 }, { "epoch": 0.011788235294117646, "grad_norm": 0.19921875, "learning_rate": 0.009823529411764705, "loss": 2.0241, "step": 501 }, { "epoch": 0.011811764705882353, "grad_norm": 0.26953125, "learning_rate": 0.00984313725490196, "loss": 2.0326, "step": 502 }, { "epoch": 0.011835294117647058, "grad_norm": 0.197265625, "learning_rate": 0.009862745098039217, "loss": 1.9835, "step": 503 }, { "epoch": 0.011858823529411765, "grad_norm": 0.1826171875, "learning_rate": 0.00988235294117647, "loss": 2.0104, "step": 504 }, { "epoch": 0.01188235294117647, "grad_norm": 0.1953125, "learning_rate": 0.009901960784313726, "loss": 2.2219, "step": 505 }, { "epoch": 0.011905882352941176, "grad_norm": 0.19140625, "learning_rate": 0.00992156862745098, "loss": 2.1326, "step": 506 }, { "epoch": 0.011929411764705883, "grad_norm": 0.18359375, "learning_rate": 0.009941176470588236, "loss": 2.0285, "step": 507 }, { "epoch": 0.011952941176470588, "grad_norm": 0.1982421875, "learning_rate": 0.00996078431372549, "loss": 2.2001, "step": 508 }, { "epoch": 0.011976470588235293, "grad_norm": 0.1875, "learning_rate": 0.009980392156862746, "loss": 2.0764, "step": 509 }, { "epoch": 0.012, "grad_norm": 0.20703125, "learning_rate": 0.01, "loss": 2.1242, "step": 510 }, { "epoch": 0.012023529411764705, "grad_norm": 0.1865234375, "learning_rate": 0.010019607843137254, "loss": 2.0826, "step": 511 }, { "epoch": 0.012047058823529412, "grad_norm": 0.2119140625, "learning_rate": 0.01003921568627451, "loss": 2.0804, "step": 512 }, { "epoch": 0.012070588235294118, "grad_norm": 0.224609375, "learning_rate": 0.010058823529411764, "loss": 2.091, "step": 513 }, { "epoch": 0.012094117647058823, "grad_norm": 0.197265625, "learning_rate": 0.01007843137254902, "loss": 2.0525, "step": 514 }, { "epoch": 0.01211764705882353, "grad_norm": 0.216796875, "learning_rate": 0.010098039215686274, "loss": 1.9225, "step": 515 }, { "epoch": 0.012141176470588235, "grad_norm": 0.1640625, "learning_rate": 0.01011764705882353, "loss": 2.0344, "step": 516 }, { "epoch": 0.01216470588235294, "grad_norm": 0.181640625, "learning_rate": 0.010137254901960784, "loss": 2.1709, "step": 517 }, { "epoch": 0.012188235294117647, "grad_norm": 0.2158203125, "learning_rate": 0.01015686274509804, "loss": 2.0741, "step": 518 }, { "epoch": 0.012211764705882353, "grad_norm": 0.203125, "learning_rate": 0.010176470588235294, "loss": 2.1261, "step": 519 }, { "epoch": 0.01223529411764706, "grad_norm": 0.19140625, "learning_rate": 0.010196078431372548, "loss": 2.0721, "step": 520 }, { "epoch": 0.012258823529411765, "grad_norm": 0.2294921875, "learning_rate": 0.010215686274509804, "loss": 1.9618, "step": 521 }, { "epoch": 0.01228235294117647, "grad_norm": 0.19140625, "learning_rate": 0.010235294117647058, "loss": 2.1736, "step": 522 }, { "epoch": 0.012305882352941177, "grad_norm": 0.189453125, "learning_rate": 0.010254901960784314, "loss": 2.0893, "step": 523 }, { "epoch": 0.012329411764705882, "grad_norm": 0.2060546875, "learning_rate": 0.010274509803921568, "loss": 1.9437, "step": 524 }, { "epoch": 0.012352941176470587, "grad_norm": 0.1826171875, "learning_rate": 0.010294117647058823, "loss": 2.0714, "step": 525 }, { "epoch": 0.012376470588235294, "grad_norm": 0.1806640625, "learning_rate": 0.01031372549019608, "loss": 2.0922, "step": 526 }, { "epoch": 0.0124, "grad_norm": 0.1943359375, "learning_rate": 0.010333333333333335, "loss": 2.2482, "step": 527 }, { "epoch": 0.012423529411764707, "grad_norm": 0.322265625, "learning_rate": 0.010352941176470589, "loss": 2.0776, "step": 528 }, { "epoch": 0.012447058823529412, "grad_norm": 0.1806640625, "learning_rate": 0.010372549019607845, "loss": 2.0218, "step": 529 }, { "epoch": 0.012470588235294117, "grad_norm": 0.1845703125, "learning_rate": 0.010392156862745099, "loss": 2.1595, "step": 530 }, { "epoch": 0.012494117647058824, "grad_norm": 0.1865234375, "learning_rate": 0.010411764705882355, "loss": 2.0498, "step": 531 }, { "epoch": 0.01251764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.010431372549019609, "loss": 2.1099, "step": 532 }, { "epoch": 0.012541176470588236, "grad_norm": 0.162109375, "learning_rate": 0.010450980392156863, "loss": 2.1428, "step": 533 }, { "epoch": 0.012564705882352941, "grad_norm": 0.1953125, "learning_rate": 0.010470588235294119, "loss": 2.0563, "step": 534 }, { "epoch": 0.012588235294117647, "grad_norm": 0.2001953125, "learning_rate": 0.010490196078431373, "loss": 2.0932, "step": 535 }, { "epoch": 0.012611764705882354, "grad_norm": 0.177734375, "learning_rate": 0.010509803921568629, "loss": 2.033, "step": 536 }, { "epoch": 0.012635294117647059, "grad_norm": 0.1923828125, "learning_rate": 0.010529411764705883, "loss": 2.0231, "step": 537 }, { "epoch": 0.012658823529411764, "grad_norm": 0.1767578125, "learning_rate": 0.010549019607843138, "loss": 2.059, "step": 538 }, { "epoch": 0.012682352941176471, "grad_norm": 0.1904296875, "learning_rate": 0.010568627450980392, "loss": 2.1413, "step": 539 }, { "epoch": 0.012705882352941176, "grad_norm": 0.1748046875, "learning_rate": 0.010588235294117647, "loss": 2.204, "step": 540 }, { "epoch": 0.012729411764705883, "grad_norm": 0.1826171875, "learning_rate": 0.010607843137254902, "loss": 2.1414, "step": 541 }, { "epoch": 0.012752941176470588, "grad_norm": 0.1953125, "learning_rate": 0.010627450980392156, "loss": 2.1974, "step": 542 }, { "epoch": 0.012776470588235294, "grad_norm": 0.1865234375, "learning_rate": 0.010647058823529412, "loss": 2.0953, "step": 543 }, { "epoch": 0.0128, "grad_norm": 0.2041015625, "learning_rate": 0.010666666666666666, "loss": 2.1846, "step": 544 }, { "epoch": 0.012823529411764706, "grad_norm": 0.1787109375, "learning_rate": 0.010686274509803922, "loss": 2.1334, "step": 545 }, { "epoch": 0.012847058823529411, "grad_norm": 0.197265625, "learning_rate": 0.010705882352941176, "loss": 2.2247, "step": 546 }, { "epoch": 0.012870588235294118, "grad_norm": 0.2041015625, "learning_rate": 0.010725490196078432, "loss": 2.0523, "step": 547 }, { "epoch": 0.012894117647058823, "grad_norm": 0.1650390625, "learning_rate": 0.010745098039215686, "loss": 2.0604, "step": 548 }, { "epoch": 0.01291764705882353, "grad_norm": 0.2255859375, "learning_rate": 0.01076470588235294, "loss": 2.1265, "step": 549 }, { "epoch": 0.012941176470588235, "grad_norm": 0.1796875, "learning_rate": 0.010784313725490196, "loss": 2.017, "step": 550 }, { "epoch": 0.01296470588235294, "grad_norm": 0.1826171875, "learning_rate": 0.01080392156862745, "loss": 2.1126, "step": 551 }, { "epoch": 0.012988235294117648, "grad_norm": 0.216796875, "learning_rate": 0.010823529411764706, "loss": 2.139, "step": 552 }, { "epoch": 0.013011764705882353, "grad_norm": 0.216796875, "learning_rate": 0.01084313725490196, "loss": 2.1673, "step": 553 }, { "epoch": 0.013035294117647058, "grad_norm": 0.205078125, "learning_rate": 0.010862745098039216, "loss": 2.0313, "step": 554 }, { "epoch": 0.013058823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.01088235294117647, "loss": 2.0061, "step": 555 }, { "epoch": 0.01308235294117647, "grad_norm": 0.2001953125, "learning_rate": 0.010901960784313726, "loss": 2.0618, "step": 556 }, { "epoch": 0.013105882352941177, "grad_norm": 0.2158203125, "learning_rate": 0.01092156862745098, "loss": 2.1524, "step": 557 }, { "epoch": 0.013129411764705883, "grad_norm": 0.1982421875, "learning_rate": 0.010941176470588237, "loss": 2.0503, "step": 558 }, { "epoch": 0.013152941176470588, "grad_norm": 0.189453125, "learning_rate": 0.010960784313725491, "loss": 2.0985, "step": 559 }, { "epoch": 0.013176470588235295, "grad_norm": 0.234375, "learning_rate": 0.010980392156862747, "loss": 2.1575, "step": 560 }, { "epoch": 0.0132, "grad_norm": 0.2041015625, "learning_rate": 0.011000000000000001, "loss": 1.8644, "step": 561 }, { "epoch": 0.013223529411764705, "grad_norm": 0.212890625, "learning_rate": 0.011019607843137255, "loss": 2.0956, "step": 562 }, { "epoch": 0.013247058823529412, "grad_norm": 0.25, "learning_rate": 0.011039215686274511, "loss": 2.2369, "step": 563 }, { "epoch": 0.013270588235294117, "grad_norm": 0.224609375, "learning_rate": 0.011058823529411765, "loss": 1.9664, "step": 564 }, { "epoch": 0.013294117647058824, "grad_norm": 0.2314453125, "learning_rate": 0.01107843137254902, "loss": 2.1718, "step": 565 }, { "epoch": 0.01331764705882353, "grad_norm": 0.212890625, "learning_rate": 0.011098039215686275, "loss": 2.1139, "step": 566 }, { "epoch": 0.013341176470588235, "grad_norm": 0.1962890625, "learning_rate": 0.01111764705882353, "loss": 2.024, "step": 567 }, { "epoch": 0.013364705882352942, "grad_norm": 0.185546875, "learning_rate": 0.011137254901960785, "loss": 1.9504, "step": 568 }, { "epoch": 0.013388235294117647, "grad_norm": 0.1953125, "learning_rate": 0.01115686274509804, "loss": 1.9977, "step": 569 }, { "epoch": 0.013411764705882352, "grad_norm": 0.212890625, "learning_rate": 0.011176470588235295, "loss": 2.2212, "step": 570 }, { "epoch": 0.01343529411764706, "grad_norm": 0.2294921875, "learning_rate": 0.011196078431372549, "loss": 2.1077, "step": 571 }, { "epoch": 0.013458823529411764, "grad_norm": 0.1748046875, "learning_rate": 0.011215686274509805, "loss": 1.9784, "step": 572 }, { "epoch": 0.013482352941176471, "grad_norm": 0.1982421875, "learning_rate": 0.011235294117647059, "loss": 2.1405, "step": 573 }, { "epoch": 0.013505882352941177, "grad_norm": 0.2080078125, "learning_rate": 0.011254901960784314, "loss": 1.9762, "step": 574 }, { "epoch": 0.013529411764705882, "grad_norm": 0.193359375, "learning_rate": 0.011274509803921568, "loss": 2.1075, "step": 575 }, { "epoch": 0.013552941176470589, "grad_norm": 0.1826171875, "learning_rate": 0.011294117647058824, "loss": 2.0681, "step": 576 }, { "epoch": 0.013576470588235294, "grad_norm": 0.2099609375, "learning_rate": 0.011313725490196078, "loss": 2.3306, "step": 577 }, { "epoch": 0.0136, "grad_norm": 0.2236328125, "learning_rate": 0.011333333333333332, "loss": 2.0423, "step": 578 }, { "epoch": 0.013623529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.011352941176470588, "loss": 2.1421, "step": 579 }, { "epoch": 0.013647058823529411, "grad_norm": 0.2099609375, "learning_rate": 0.011372549019607842, "loss": 2.1155, "step": 580 }, { "epoch": 0.013670588235294118, "grad_norm": 0.201171875, "learning_rate": 0.011392156862745098, "loss": 2.028, "step": 581 }, { "epoch": 0.013694117647058824, "grad_norm": 0.2041015625, "learning_rate": 0.011411764705882352, "loss": 2.1249, "step": 582 }, { "epoch": 0.013717647058823529, "grad_norm": 0.2041015625, "learning_rate": 0.011431372549019608, "loss": 2.0044, "step": 583 }, { "epoch": 0.013741176470588236, "grad_norm": 0.2099609375, "learning_rate": 0.011450980392156862, "loss": 2.199, "step": 584 }, { "epoch": 0.013764705882352941, "grad_norm": 0.1962890625, "learning_rate": 0.011470588235294118, "loss": 2.028, "step": 585 }, { "epoch": 0.013788235294117646, "grad_norm": 0.23046875, "learning_rate": 0.011490196078431372, "loss": 2.1397, "step": 586 }, { "epoch": 0.013811764705882353, "grad_norm": 0.1923828125, "learning_rate": 0.011509803921568626, "loss": 2.0652, "step": 587 }, { "epoch": 0.013835294117647058, "grad_norm": 0.20703125, "learning_rate": 0.011529411764705882, "loss": 2.0602, "step": 588 }, { "epoch": 0.013858823529411765, "grad_norm": 0.189453125, "learning_rate": 0.011549019607843136, "loss": 2.0649, "step": 589 }, { "epoch": 0.01388235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.011568627450980393, "loss": 2.0808, "step": 590 }, { "epoch": 0.013905882352941176, "grad_norm": 0.1904296875, "learning_rate": 0.011588235294117647, "loss": 2.137, "step": 591 }, { "epoch": 0.013929411764705883, "grad_norm": 0.2041015625, "learning_rate": 0.011607843137254903, "loss": 2.1129, "step": 592 }, { "epoch": 0.013952941176470588, "grad_norm": 0.1962890625, "learning_rate": 0.011627450980392157, "loss": 2.2098, "step": 593 }, { "epoch": 0.013976470588235293, "grad_norm": 0.2197265625, "learning_rate": 0.011647058823529413, "loss": 2.1169, "step": 594 }, { "epoch": 0.014, "grad_norm": 0.228515625, "learning_rate": 0.011666666666666667, "loss": 2.053, "step": 595 }, { "epoch": 0.014023529411764706, "grad_norm": 0.18359375, "learning_rate": 0.011686274509803923, "loss": 2.0214, "step": 596 }, { "epoch": 0.014047058823529412, "grad_norm": 0.1962890625, "learning_rate": 0.011705882352941177, "loss": 2.0643, "step": 597 }, { "epoch": 0.014070588235294118, "grad_norm": 0.1875, "learning_rate": 0.011725490196078433, "loss": 2.0185, "step": 598 }, { "epoch": 0.014094117647058823, "grad_norm": 0.19921875, "learning_rate": 0.011745098039215687, "loss": 2.1595, "step": 599 }, { "epoch": 0.01411764705882353, "grad_norm": 7.375, "learning_rate": 0.011764705882352941, "loss": 2.2205, "step": 600 }, { "epoch": 0.014141176470588235, "grad_norm": 0.16796875, "learning_rate": 0.011784313725490197, "loss": 1.9963, "step": 601 }, { "epoch": 0.01416470588235294, "grad_norm": 0.1845703125, "learning_rate": 0.011803921568627451, "loss": 2.193, "step": 602 }, { "epoch": 0.014188235294117647, "grad_norm": 0.2041015625, "learning_rate": 0.011823529411764707, "loss": 2.0049, "step": 603 }, { "epoch": 0.014211764705882353, "grad_norm": 0.203125, "learning_rate": 0.01184313725490196, "loss": 2.1838, "step": 604 }, { "epoch": 0.01423529411764706, "grad_norm": 0.20703125, "learning_rate": 0.011862745098039217, "loss": 2.0535, "step": 605 }, { "epoch": 0.014258823529411765, "grad_norm": 0.1708984375, "learning_rate": 0.01188235294117647, "loss": 1.9982, "step": 606 }, { "epoch": 0.01428235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.011901960784313726, "loss": 2.0949, "step": 607 }, { "epoch": 0.014305882352941177, "grad_norm": 0.17578125, "learning_rate": 0.01192156862745098, "loss": 2.0606, "step": 608 }, { "epoch": 0.014329411764705882, "grad_norm": 0.1904296875, "learning_rate": 0.011941176470588235, "loss": 2.136, "step": 609 }, { "epoch": 0.014352941176470587, "grad_norm": 0.1806640625, "learning_rate": 0.01196078431372549, "loss": 2.1132, "step": 610 }, { "epoch": 0.014376470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.011980392156862744, "loss": 2.0711, "step": 611 }, { "epoch": 0.0144, "grad_norm": 0.2138671875, "learning_rate": 0.012, "loss": 2.1503, "step": 612 }, { "epoch": 0.014423529411764707, "grad_norm": 0.203125, "learning_rate": 0.012019607843137254, "loss": 1.9778, "step": 613 }, { "epoch": 0.014447058823529412, "grad_norm": 0.1796875, "learning_rate": 0.01203921568627451, "loss": 2.1092, "step": 614 }, { "epoch": 0.014470588235294117, "grad_norm": 0.1787109375, "learning_rate": 0.012058823529411764, "loss": 2.0752, "step": 615 }, { "epoch": 0.014494117647058824, "grad_norm": 0.169921875, "learning_rate": 0.01207843137254902, "loss": 2.0316, "step": 616 }, { "epoch": 0.01451764705882353, "grad_norm": 0.185546875, "learning_rate": 0.012098039215686274, "loss": 2.0265, "step": 617 }, { "epoch": 0.014541176470588234, "grad_norm": 0.1953125, "learning_rate": 0.012117647058823528, "loss": 2.0905, "step": 618 }, { "epoch": 0.014564705882352941, "grad_norm": 0.2060546875, "learning_rate": 0.012137254901960784, "loss": 2.0174, "step": 619 }, { "epoch": 0.014588235294117647, "grad_norm": 0.203125, "learning_rate": 0.012156862745098038, "loss": 2.1529, "step": 620 }, { "epoch": 0.014611764705882354, "grad_norm": 0.208984375, "learning_rate": 0.012176470588235294, "loss": 1.9191, "step": 621 }, { "epoch": 0.014635294117647059, "grad_norm": 0.197265625, "learning_rate": 0.01219607843137255, "loss": 2.1311, "step": 622 }, { "epoch": 0.014658823529411764, "grad_norm": 0.189453125, "learning_rate": 0.012215686274509805, "loss": 2.0927, "step": 623 }, { "epoch": 0.014682352941176471, "grad_norm": 0.2451171875, "learning_rate": 0.01223529411764706, "loss": 1.945, "step": 624 }, { "epoch": 0.014705882352941176, "grad_norm": 0.2109375, "learning_rate": 0.012254901960784315, "loss": 2.0937, "step": 625 }, { "epoch": 0.014729411764705881, "grad_norm": 0.1904296875, "learning_rate": 0.01227450980392157, "loss": 2.0055, "step": 626 }, { "epoch": 0.014752941176470588, "grad_norm": 0.2001953125, "learning_rate": 0.012294117647058825, "loss": 2.2028, "step": 627 }, { "epoch": 0.014776470588235294, "grad_norm": 0.1962890625, "learning_rate": 0.01231372549019608, "loss": 1.9753, "step": 628 }, { "epoch": 0.0148, "grad_norm": 0.201171875, "learning_rate": 0.012333333333333335, "loss": 2.0311, "step": 629 }, { "epoch": 0.014823529411764706, "grad_norm": 0.189453125, "learning_rate": 0.012352941176470589, "loss": 2.0592, "step": 630 }, { "epoch": 0.014847058823529411, "grad_norm": 0.2080078125, "learning_rate": 0.012372549019607843, "loss": 2.1233, "step": 631 }, { "epoch": 0.014870588235294118, "grad_norm": 0.2275390625, "learning_rate": 0.012392156862745099, "loss": 2.084, "step": 632 }, { "epoch": 0.014894117647058823, "grad_norm": 0.22265625, "learning_rate": 0.012411764705882353, "loss": 2.1377, "step": 633 }, { "epoch": 0.01491764705882353, "grad_norm": 0.2197265625, "learning_rate": 0.012431372549019609, "loss": 2.0817, "step": 634 }, { "epoch": 0.014941176470588236, "grad_norm": 0.2138671875, "learning_rate": 0.012450980392156863, "loss": 1.9436, "step": 635 }, { "epoch": 0.01496470588235294, "grad_norm": 0.212890625, "learning_rate": 0.012470588235294119, "loss": 1.9902, "step": 636 }, { "epoch": 0.014988235294117648, "grad_norm": 0.2060546875, "learning_rate": 0.012490196078431373, "loss": 2.0502, "step": 637 }, { "epoch": 0.015011764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.012509803921568627, "loss": 2.1683, "step": 638 }, { "epoch": 0.015035294117647058, "grad_norm": 0.201171875, "learning_rate": 0.012529411764705883, "loss": 2.0576, "step": 639 }, { "epoch": 0.015058823529411765, "grad_norm": 0.18359375, "learning_rate": 0.012549019607843137, "loss": 2.092, "step": 640 }, { "epoch": 0.01508235294117647, "grad_norm": 0.203125, "learning_rate": 0.012568627450980393, "loss": 2.0635, "step": 641 }, { "epoch": 0.015105882352941177, "grad_norm": 0.1884765625, "learning_rate": 0.012588235294117647, "loss": 2.0515, "step": 642 }, { "epoch": 0.015129411764705883, "grad_norm": 0.1865234375, "learning_rate": 0.012607843137254902, "loss": 1.8519, "step": 643 }, { "epoch": 0.015152941176470588, "grad_norm": 0.17578125, "learning_rate": 0.012627450980392156, "loss": 2.0834, "step": 644 }, { "epoch": 0.015176470588235295, "grad_norm": 0.193359375, "learning_rate": 0.012647058823529412, "loss": 2.1503, "step": 645 }, { "epoch": 0.0152, "grad_norm": 0.208984375, "learning_rate": 0.012666666666666666, "loss": 2.0831, "step": 646 }, { "epoch": 0.015223529411764705, "grad_norm": 0.220703125, "learning_rate": 0.01268627450980392, "loss": 2.1905, "step": 647 }, { "epoch": 0.015247058823529412, "grad_norm": 0.25390625, "learning_rate": 0.012705882352941176, "loss": 2.036, "step": 648 }, { "epoch": 0.015270588235294117, "grad_norm": 0.1865234375, "learning_rate": 0.01272549019607843, "loss": 2.0934, "step": 649 }, { "epoch": 0.015294117647058824, "grad_norm": 0.201171875, "learning_rate": 0.012745098039215686, "loss": 2.0621, "step": 650 }, { "epoch": 0.01531764705882353, "grad_norm": 0.18359375, "learning_rate": 0.01276470588235294, "loss": 1.9856, "step": 651 }, { "epoch": 0.015341176470588235, "grad_norm": 0.2001953125, "learning_rate": 0.012784313725490196, "loss": 2.133, "step": 652 }, { "epoch": 0.015364705882352942, "grad_norm": 0.2314453125, "learning_rate": 0.01280392156862745, "loss": 2.2082, "step": 653 }, { "epoch": 0.015388235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.012823529411764708, "loss": 2.0193, "step": 654 }, { "epoch": 0.015411764705882352, "grad_norm": 0.193359375, "learning_rate": 0.012843137254901962, "loss": 1.9874, "step": 655 }, { "epoch": 0.01543529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.012862745098039217, "loss": 2.0638, "step": 656 }, { "epoch": 0.015458823529411764, "grad_norm": 0.21484375, "learning_rate": 0.012882352941176472, "loss": 1.9419, "step": 657 }, { "epoch": 0.015482352941176471, "grad_norm": 0.21484375, "learning_rate": 0.012901960784313727, "loss": 2.0068, "step": 658 }, { "epoch": 0.015505882352941177, "grad_norm": 0.203125, "learning_rate": 0.012921568627450981, "loss": 2.0021, "step": 659 }, { "epoch": 0.015529411764705882, "grad_norm": 0.21484375, "learning_rate": 0.012941176470588235, "loss": 2.0206, "step": 660 }, { "epoch": 0.015552941176470589, "grad_norm": 0.2216796875, "learning_rate": 0.012960784313725491, "loss": 2.1862, "step": 661 }, { "epoch": 0.015576470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.012980392156862745, "loss": 2.2162, "step": 662 }, { "epoch": 0.0156, "grad_norm": 0.2080078125, "learning_rate": 0.013000000000000001, "loss": 2.1281, "step": 663 }, { "epoch": 0.015623529411764706, "grad_norm": 0.19921875, "learning_rate": 0.013019607843137255, "loss": 2.1647, "step": 664 }, { "epoch": 0.015647058823529413, "grad_norm": 0.203125, "learning_rate": 0.013039215686274511, "loss": 2.1162, "step": 665 }, { "epoch": 0.015670588235294117, "grad_norm": 0.2265625, "learning_rate": 0.013058823529411765, "loss": 2.1944, "step": 666 }, { "epoch": 0.015694117647058824, "grad_norm": 0.1943359375, "learning_rate": 0.013078431372549021, "loss": 2.1525, "step": 667 }, { "epoch": 0.01571764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.013098039215686275, "loss": 2.0534, "step": 668 }, { "epoch": 0.015741176470588234, "grad_norm": 0.1875, "learning_rate": 0.013117647058823529, "loss": 1.9543, "step": 669 }, { "epoch": 0.01576470588235294, "grad_norm": 0.1806640625, "learning_rate": 0.013137254901960785, "loss": 1.9969, "step": 670 }, { "epoch": 0.015788235294117648, "grad_norm": 0.189453125, "learning_rate": 0.013156862745098039, "loss": 2.0765, "step": 671 }, { "epoch": 0.01581176470588235, "grad_norm": 0.2001953125, "learning_rate": 0.013176470588235295, "loss": 2.0456, "step": 672 }, { "epoch": 0.01583529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.013196078431372549, "loss": 2.0345, "step": 673 }, { "epoch": 0.015858823529411765, "grad_norm": 0.1943359375, "learning_rate": 0.013215686274509805, "loss": 1.9678, "step": 674 }, { "epoch": 0.01588235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.013235294117647059, "loss": 2.0594, "step": 675 }, { "epoch": 0.015905882352941176, "grad_norm": 0.21484375, "learning_rate": 0.013254901960784313, "loss": 2.0095, "step": 676 }, { "epoch": 0.015929411764705883, "grad_norm": 0.1865234375, "learning_rate": 0.013274509803921569, "loss": 2.0772, "step": 677 }, { "epoch": 0.01595294117647059, "grad_norm": 0.19921875, "learning_rate": 0.013294117647058823, "loss": 2.0467, "step": 678 }, { "epoch": 0.015976470588235293, "grad_norm": 0.1923828125, "learning_rate": 0.013313725490196078, "loss": 2.1912, "step": 679 }, { "epoch": 0.016, "grad_norm": 0.1796875, "learning_rate": 0.013333333333333332, "loss": 1.9899, "step": 680 }, { "epoch": 0.016023529411764707, "grad_norm": 0.208984375, "learning_rate": 0.013352941176470588, "loss": 2.1917, "step": 681 }, { "epoch": 0.01604705882352941, "grad_norm": 0.1962890625, "learning_rate": 0.013372549019607842, "loss": 1.9918, "step": 682 }, { "epoch": 0.016070588235294118, "grad_norm": 0.1875, "learning_rate": 0.013392156862745098, "loss": 2.0937, "step": 683 }, { "epoch": 0.016094117647058825, "grad_norm": 0.2138671875, "learning_rate": 0.013411764705882352, "loss": 2.0918, "step": 684 }, { "epoch": 0.016117647058823528, "grad_norm": 0.1904296875, "learning_rate": 0.013431372549019606, "loss": 2.0576, "step": 685 }, { "epoch": 0.016141176470588235, "grad_norm": 0.2080078125, "learning_rate": 0.013450980392156864, "loss": 1.9445, "step": 686 }, { "epoch": 0.016164705882352942, "grad_norm": 0.1953125, "learning_rate": 0.01347058823529412, "loss": 2.1343, "step": 687 }, { "epoch": 0.016188235294117646, "grad_norm": 0.1884765625, "learning_rate": 0.013490196078431374, "loss": 2.1482, "step": 688 }, { "epoch": 0.016211764705882353, "grad_norm": 0.1826171875, "learning_rate": 0.013509803921568628, "loss": 2.0476, "step": 689 }, { "epoch": 0.01623529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.013529411764705884, "loss": 2.0645, "step": 690 }, { "epoch": 0.016258823529411763, "grad_norm": 0.2001953125, "learning_rate": 0.013549019607843138, "loss": 2.1038, "step": 691 }, { "epoch": 0.01628235294117647, "grad_norm": 0.205078125, "learning_rate": 0.013568627450980393, "loss": 1.9731, "step": 692 }, { "epoch": 0.016305882352941177, "grad_norm": 0.1923828125, "learning_rate": 0.013588235294117647, "loss": 2.1135, "step": 693 }, { "epoch": 0.016329411764705884, "grad_norm": 0.1767578125, "learning_rate": 0.013607843137254903, "loss": 2.0914, "step": 694 }, { "epoch": 0.016352941176470587, "grad_norm": 0.1943359375, "learning_rate": 0.013627450980392157, "loss": 2.0653, "step": 695 }, { "epoch": 0.016376470588235294, "grad_norm": 0.2041015625, "learning_rate": 0.013647058823529413, "loss": 2.013, "step": 696 }, { "epoch": 0.0164, "grad_norm": 0.2353515625, "learning_rate": 0.013666666666666667, "loss": 2.0461, "step": 697 }, { "epoch": 0.016423529411764705, "grad_norm": 0.208984375, "learning_rate": 0.013686274509803921, "loss": 2.0954, "step": 698 }, { "epoch": 0.016447058823529412, "grad_norm": 0.185546875, "learning_rate": 0.013705882352941177, "loss": 1.8668, "step": 699 }, { "epoch": 0.01647058823529412, "grad_norm": 0.19921875, "learning_rate": 0.013725490196078431, "loss": 1.9492, "step": 700 }, { "epoch": 0.016494117647058822, "grad_norm": 0.2109375, "learning_rate": 0.013745098039215687, "loss": 1.9823, "step": 701 }, { "epoch": 0.01651764705882353, "grad_norm": 0.1806640625, "learning_rate": 0.013764705882352941, "loss": 1.9439, "step": 702 }, { "epoch": 0.016541176470588236, "grad_norm": 0.1865234375, "learning_rate": 0.013784313725490197, "loss": 2.0239, "step": 703 }, { "epoch": 0.01656470588235294, "grad_norm": 0.19921875, "learning_rate": 0.013803921568627451, "loss": 1.982, "step": 704 }, { "epoch": 0.016588235294117647, "grad_norm": 0.234375, "learning_rate": 0.013823529411764707, "loss": 2.0539, "step": 705 }, { "epoch": 0.016611764705882354, "grad_norm": 0.224609375, "learning_rate": 0.01384313725490196, "loss": 2.037, "step": 706 }, { "epoch": 0.016635294117647057, "grad_norm": 0.2197265625, "learning_rate": 0.013862745098039215, "loss": 2.0561, "step": 707 }, { "epoch": 0.016658823529411764, "grad_norm": 0.2001953125, "learning_rate": 0.01388235294117647, "loss": 2.0332, "step": 708 }, { "epoch": 0.01668235294117647, "grad_norm": 0.2314453125, "learning_rate": 0.013901960784313725, "loss": 2.1291, "step": 709 }, { "epoch": 0.016705882352941178, "grad_norm": 0.21484375, "learning_rate": 0.01392156862745098, "loss": 2.2497, "step": 710 }, { "epoch": 0.01672941176470588, "grad_norm": 0.23046875, "learning_rate": 0.013941176470588235, "loss": 1.9734, "step": 711 }, { "epoch": 0.01675294117647059, "grad_norm": 0.205078125, "learning_rate": 0.01396078431372549, "loss": 2.1375, "step": 712 }, { "epoch": 0.016776470588235295, "grad_norm": 0.228515625, "learning_rate": 0.013980392156862744, "loss": 2.0593, "step": 713 }, { "epoch": 0.0168, "grad_norm": 0.203125, "learning_rate": 0.013999999999999999, "loss": 2.0199, "step": 714 }, { "epoch": 0.016823529411764706, "grad_norm": 0.2109375, "learning_rate": 0.014019607843137254, "loss": 2.1901, "step": 715 }, { "epoch": 0.016847058823529413, "grad_norm": 0.21875, "learning_rate": 0.014039215686274508, "loss": 2.1708, "step": 716 }, { "epoch": 0.016870588235294116, "grad_norm": 0.2021484375, "learning_rate": 0.014058823529411764, "loss": 1.9762, "step": 717 }, { "epoch": 0.016894117647058823, "grad_norm": 0.185546875, "learning_rate": 0.014078431372549022, "loss": 1.9248, "step": 718 }, { "epoch": 0.01691764705882353, "grad_norm": 0.185546875, "learning_rate": 0.014098039215686276, "loss": 2.0737, "step": 719 }, { "epoch": 0.016941176470588234, "grad_norm": 0.1787109375, "learning_rate": 0.01411764705882353, "loss": 1.925, "step": 720 }, { "epoch": 0.01696470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.014137254901960786, "loss": 1.8835, "step": 721 }, { "epoch": 0.016988235294117648, "grad_norm": 0.1689453125, "learning_rate": 0.01415686274509804, "loss": 2.0038, "step": 722 }, { "epoch": 0.01701176470588235, "grad_norm": 0.193359375, "learning_rate": 0.014176470588235296, "loss": 1.9385, "step": 723 }, { "epoch": 0.017035294117647058, "grad_norm": 0.1767578125, "learning_rate": 0.01419607843137255, "loss": 1.9318, "step": 724 }, { "epoch": 0.017058823529411765, "grad_norm": 0.1728515625, "learning_rate": 0.014215686274509805, "loss": 2.0836, "step": 725 }, { "epoch": 0.017082352941176472, "grad_norm": 0.2001953125, "learning_rate": 0.01423529411764706, "loss": 2.0464, "step": 726 }, { "epoch": 0.017105882352941176, "grad_norm": 0.2099609375, "learning_rate": 0.014254901960784314, "loss": 2.0533, "step": 727 }, { "epoch": 0.017129411764705883, "grad_norm": 0.16796875, "learning_rate": 0.01427450980392157, "loss": 2.1183, "step": 728 }, { "epoch": 0.01715294117647059, "grad_norm": 0.1796875, "learning_rate": 0.014294117647058823, "loss": 1.9216, "step": 729 }, { "epoch": 0.017176470588235293, "grad_norm": 0.17578125, "learning_rate": 0.01431372549019608, "loss": 1.9485, "step": 730 }, { "epoch": 0.0172, "grad_norm": 0.1708984375, "learning_rate": 0.014333333333333333, "loss": 2.0948, "step": 731 }, { "epoch": 0.017223529411764707, "grad_norm": 0.2001953125, "learning_rate": 0.01435294117647059, "loss": 2.0591, "step": 732 }, { "epoch": 0.01724705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.014372549019607843, "loss": 2.1161, "step": 733 }, { "epoch": 0.017270588235294117, "grad_norm": 0.1904296875, "learning_rate": 0.014392156862745099, "loss": 1.9023, "step": 734 }, { "epoch": 0.017294117647058824, "grad_norm": 0.1796875, "learning_rate": 0.014411764705882353, "loss": 1.9241, "step": 735 }, { "epoch": 0.017317647058823528, "grad_norm": 0.189453125, "learning_rate": 0.014431372549019607, "loss": 2.2231, "step": 736 }, { "epoch": 0.017341176470588235, "grad_norm": 0.189453125, "learning_rate": 0.014450980392156863, "loss": 2.0856, "step": 737 }, { "epoch": 0.017364705882352942, "grad_norm": 0.17578125, "learning_rate": 0.014470588235294117, "loss": 1.9643, "step": 738 }, { "epoch": 0.01738823529411765, "grad_norm": 0.169921875, "learning_rate": 0.014490196078431373, "loss": 1.9748, "step": 739 }, { "epoch": 0.017411764705882352, "grad_norm": 0.201171875, "learning_rate": 0.014509803921568627, "loss": 1.9859, "step": 740 }, { "epoch": 0.01743529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.014529411764705883, "loss": 2.1208, "step": 741 }, { "epoch": 0.017458823529411766, "grad_norm": 0.1875, "learning_rate": 0.014549019607843137, "loss": 2.0727, "step": 742 }, { "epoch": 0.01748235294117647, "grad_norm": 0.197265625, "learning_rate": 0.014568627450980393, "loss": 1.9939, "step": 743 }, { "epoch": 0.017505882352941177, "grad_norm": 0.19140625, "learning_rate": 0.014588235294117647, "loss": 2.036, "step": 744 }, { "epoch": 0.017529411764705884, "grad_norm": 0.181640625, "learning_rate": 0.0146078431372549, "loss": 1.9722, "step": 745 }, { "epoch": 0.017552941176470587, "grad_norm": 0.2099609375, "learning_rate": 0.014627450980392157, "loss": 2.0815, "step": 746 }, { "epoch": 0.017576470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.01464705882352941, "loss": 2.1432, "step": 747 }, { "epoch": 0.0176, "grad_norm": 0.2060546875, "learning_rate": 0.014666666666666666, "loss": 2.0126, "step": 748 }, { "epoch": 0.017623529411764705, "grad_norm": 0.2275390625, "learning_rate": 0.01468627450980392, "loss": 1.8929, "step": 749 }, { "epoch": 0.01764705882352941, "grad_norm": 0.255859375, "learning_rate": 0.014705882352941178, "loss": 2.0487, "step": 750 }, { "epoch": 0.01767058823529412, "grad_norm": 0.193359375, "learning_rate": 0.014725490196078432, "loss": 1.9343, "step": 751 }, { "epoch": 0.017694117647058822, "grad_norm": 0.203125, "learning_rate": 0.014745098039215688, "loss": 1.8906, "step": 752 }, { "epoch": 0.01771764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.014764705882352942, "loss": 1.9338, "step": 753 }, { "epoch": 0.017741176470588236, "grad_norm": 0.2353515625, "learning_rate": 0.014784313725490198, "loss": 1.9124, "step": 754 }, { "epoch": 0.017764705882352943, "grad_norm": 0.2177734375, "learning_rate": 0.014803921568627452, "loss": 2.024, "step": 755 }, { "epoch": 0.017788235294117646, "grad_norm": 0.189453125, "learning_rate": 0.014823529411764708, "loss": 1.9727, "step": 756 }, { "epoch": 0.017811764705882353, "grad_norm": 0.2275390625, "learning_rate": 0.014843137254901962, "loss": 2.0323, "step": 757 }, { "epoch": 0.01783529411764706, "grad_norm": 0.23828125, "learning_rate": 0.014862745098039216, "loss": 1.8184, "step": 758 }, { "epoch": 0.017858823529411764, "grad_norm": 0.21875, "learning_rate": 0.014882352941176472, "loss": 1.9484, "step": 759 }, { "epoch": 0.01788235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.014901960784313726, "loss": 2.0152, "step": 760 }, { "epoch": 0.017905882352941178, "grad_norm": 0.1904296875, "learning_rate": 0.014921568627450981, "loss": 1.8354, "step": 761 }, { "epoch": 0.01792941176470588, "grad_norm": 0.1923828125, "learning_rate": 0.014941176470588236, "loss": 2.0598, "step": 762 }, { "epoch": 0.017952941176470588, "grad_norm": 0.1845703125, "learning_rate": 0.014960784313725491, "loss": 2.0777, "step": 763 }, { "epoch": 0.017976470588235295, "grad_norm": 0.21484375, "learning_rate": 0.014980392156862745, "loss": 2.1139, "step": 764 }, { "epoch": 0.018, "grad_norm": 0.1962890625, "learning_rate": 0.015, "loss": 2.0121, "step": 765 }, { "epoch": 0.018023529411764706, "grad_norm": 0.2109375, "learning_rate": 0.015019607843137255, "loss": 2.1161, "step": 766 }, { "epoch": 0.018047058823529413, "grad_norm": 0.15625, "learning_rate": 0.01503921568627451, "loss": 2.0359, "step": 767 }, { "epoch": 0.018070588235294116, "grad_norm": 0.1875, "learning_rate": 0.015058823529411765, "loss": 2.1287, "step": 768 }, { "epoch": 0.018094117647058823, "grad_norm": 0.1826171875, "learning_rate": 0.01507843137254902, "loss": 1.9984, "step": 769 }, { "epoch": 0.01811764705882353, "grad_norm": 0.171875, "learning_rate": 0.015098039215686275, "loss": 2.1423, "step": 770 }, { "epoch": 0.018141176470588237, "grad_norm": 0.205078125, "learning_rate": 0.015117647058823529, "loss": 2.1058, "step": 771 }, { "epoch": 0.01816470588235294, "grad_norm": 0.2177734375, "learning_rate": 0.015137254901960785, "loss": 2.1351, "step": 772 }, { "epoch": 0.018188235294117647, "grad_norm": 0.2119140625, "learning_rate": 0.015156862745098039, "loss": 2.102, "step": 773 }, { "epoch": 0.018211764705882354, "grad_norm": 0.216796875, "learning_rate": 0.015176470588235293, "loss": 2.165, "step": 774 }, { "epoch": 0.018235294117647058, "grad_norm": 0.2001953125, "learning_rate": 0.015196078431372549, "loss": 1.9474, "step": 775 }, { "epoch": 0.018258823529411765, "grad_norm": 0.2021484375, "learning_rate": 0.015215686274509803, "loss": 1.8934, "step": 776 }, { "epoch": 0.018282352941176472, "grad_norm": 0.1865234375, "learning_rate": 0.015235294117647059, "loss": 2.0634, "step": 777 }, { "epoch": 0.018305882352941175, "grad_norm": 0.1865234375, "learning_rate": 0.015254901960784313, "loss": 2.0288, "step": 778 }, { "epoch": 0.018329411764705882, "grad_norm": 0.1806640625, "learning_rate": 0.015274509803921569, "loss": 2.0515, "step": 779 }, { "epoch": 0.01835294117647059, "grad_norm": 0.1923828125, "learning_rate": 0.015294117647058823, "loss": 2.0081, "step": 780 }, { "epoch": 0.018376470588235293, "grad_norm": 0.212890625, "learning_rate": 0.01531372549019608, "loss": 2.2536, "step": 781 }, { "epoch": 0.0184, "grad_norm": 0.1845703125, "learning_rate": 0.015333333333333334, "loss": 1.8673, "step": 782 }, { "epoch": 0.018423529411764707, "grad_norm": 0.1904296875, "learning_rate": 0.01535294117647059, "loss": 2.0765, "step": 783 }, { "epoch": 0.01844705882352941, "grad_norm": 0.1943359375, "learning_rate": 0.015372549019607844, "loss": 1.8582, "step": 784 }, { "epoch": 0.018470588235294117, "grad_norm": 0.224609375, "learning_rate": 0.0153921568627451, "loss": 2.0207, "step": 785 }, { "epoch": 0.018494117647058824, "grad_norm": 0.21484375, "learning_rate": 0.015411764705882354, "loss": 1.9912, "step": 786 }, { "epoch": 0.01851764705882353, "grad_norm": 0.2197265625, "learning_rate": 0.015431372549019608, "loss": 2.1676, "step": 787 }, { "epoch": 0.018541176470588235, "grad_norm": 0.2138671875, "learning_rate": 0.015450980392156864, "loss": 2.0695, "step": 788 }, { "epoch": 0.01856470588235294, "grad_norm": 0.208984375, "learning_rate": 0.015470588235294118, "loss": 1.9578, "step": 789 }, { "epoch": 0.01858823529411765, "grad_norm": 0.1962890625, "learning_rate": 0.015490196078431374, "loss": 1.9133, "step": 790 }, { "epoch": 0.018611764705882352, "grad_norm": 0.1767578125, "learning_rate": 0.015509803921568628, "loss": 2.139, "step": 791 }, { "epoch": 0.01863529411764706, "grad_norm": 0.1904296875, "learning_rate": 0.015529411764705884, "loss": 1.9071, "step": 792 }, { "epoch": 0.018658823529411766, "grad_norm": 0.2119140625, "learning_rate": 0.015549019607843138, "loss": 1.9151, "step": 793 }, { "epoch": 0.01868235294117647, "grad_norm": 0.2177734375, "learning_rate": 0.015568627450980393, "loss": 1.9431, "step": 794 }, { "epoch": 0.018705882352941176, "grad_norm": 0.208984375, "learning_rate": 0.015588235294117648, "loss": 1.9113, "step": 795 }, { "epoch": 0.018729411764705883, "grad_norm": 0.2021484375, "learning_rate": 0.015607843137254902, "loss": 1.9021, "step": 796 }, { "epoch": 0.018752941176470587, "grad_norm": 0.185546875, "learning_rate": 0.015627450980392157, "loss": 2.1068, "step": 797 }, { "epoch": 0.018776470588235294, "grad_norm": 0.1953125, "learning_rate": 0.015647058823529413, "loss": 1.9939, "step": 798 }, { "epoch": 0.0188, "grad_norm": 0.193359375, "learning_rate": 0.015666666666666666, "loss": 2.0982, "step": 799 }, { "epoch": 0.018823529411764704, "grad_norm": 0.2001953125, "learning_rate": 0.01568627450980392, "loss": 2.0035, "step": 800 }, { "epoch": 0.01884705882352941, "grad_norm": 0.189453125, "learning_rate": 0.015705882352941177, "loss": 1.9293, "step": 801 }, { "epoch": 0.018870588235294118, "grad_norm": 0.1943359375, "learning_rate": 0.015725490196078433, "loss": 1.9186, "step": 802 }, { "epoch": 0.018894117647058825, "grad_norm": 0.1884765625, "learning_rate": 0.015745098039215685, "loss": 1.9852, "step": 803 }, { "epoch": 0.01891764705882353, "grad_norm": 0.2060546875, "learning_rate": 0.01576470588235294, "loss": 1.9696, "step": 804 }, { "epoch": 0.018941176470588236, "grad_norm": 0.1796875, "learning_rate": 0.015784313725490197, "loss": 2.0689, "step": 805 }, { "epoch": 0.018964705882352943, "grad_norm": 0.1953125, "learning_rate": 0.01580392156862745, "loss": 1.9725, "step": 806 }, { "epoch": 0.018988235294117646, "grad_norm": 0.1982421875, "learning_rate": 0.015823529411764705, "loss": 1.8992, "step": 807 }, { "epoch": 0.019011764705882353, "grad_norm": 0.173828125, "learning_rate": 0.01584313725490196, "loss": 2.0191, "step": 808 }, { "epoch": 0.01903529411764706, "grad_norm": 0.181640625, "learning_rate": 0.015862745098039217, "loss": 2.0215, "step": 809 }, { "epoch": 0.019058823529411763, "grad_norm": 0.21875, "learning_rate": 0.01588235294117647, "loss": 2.0093, "step": 810 }, { "epoch": 0.01908235294117647, "grad_norm": 0.177734375, "learning_rate": 0.015901960784313725, "loss": 2.0319, "step": 811 }, { "epoch": 0.019105882352941177, "grad_norm": 0.1982421875, "learning_rate": 0.01592156862745098, "loss": 1.8627, "step": 812 }, { "epoch": 0.01912941176470588, "grad_norm": 0.2158203125, "learning_rate": 0.015941176470588236, "loss": 1.9777, "step": 813 }, { "epoch": 0.019152941176470588, "grad_norm": 0.224609375, "learning_rate": 0.015960784313725492, "loss": 1.8873, "step": 814 }, { "epoch": 0.019176470588235295, "grad_norm": 0.1669921875, "learning_rate": 0.015980392156862748, "loss": 2.1264, "step": 815 }, { "epoch": 0.0192, "grad_norm": 0.1875, "learning_rate": 0.016, "loss": 1.826, "step": 816 }, { "epoch": 0.019223529411764705, "grad_norm": 0.181640625, "learning_rate": 0.016019607843137256, "loss": 2.0196, "step": 817 }, { "epoch": 0.019247058823529412, "grad_norm": 0.197265625, "learning_rate": 0.016039215686274512, "loss": 2.0454, "step": 818 }, { "epoch": 0.01927058823529412, "grad_norm": 0.201171875, "learning_rate": 0.016058823529411764, "loss": 1.9986, "step": 819 }, { "epoch": 0.019294117647058823, "grad_norm": 0.21875, "learning_rate": 0.01607843137254902, "loss": 2.1162, "step": 820 }, { "epoch": 0.01931764705882353, "grad_norm": 0.1748046875, "learning_rate": 0.016098039215686276, "loss": 2.0113, "step": 821 }, { "epoch": 0.019341176470588237, "grad_norm": 0.1865234375, "learning_rate": 0.01611764705882353, "loss": 1.9895, "step": 822 }, { "epoch": 0.01936470588235294, "grad_norm": 0.244140625, "learning_rate": 0.016137254901960784, "loss": 1.94, "step": 823 }, { "epoch": 0.019388235294117647, "grad_norm": 0.2314453125, "learning_rate": 0.01615686274509804, "loss": 2.1669, "step": 824 }, { "epoch": 0.019411764705882354, "grad_norm": 0.201171875, "learning_rate": 0.016176470588235296, "loss": 2.0231, "step": 825 }, { "epoch": 0.019435294117647058, "grad_norm": 0.1962890625, "learning_rate": 0.016196078431372548, "loss": 1.9223, "step": 826 }, { "epoch": 0.019458823529411765, "grad_norm": 0.17578125, "learning_rate": 0.016215686274509804, "loss": 1.9286, "step": 827 }, { "epoch": 0.01948235294117647, "grad_norm": 0.17578125, "learning_rate": 0.01623529411764706, "loss": 2.079, "step": 828 }, { "epoch": 0.019505882352941175, "grad_norm": 0.2158203125, "learning_rate": 0.016254901960784315, "loss": 2.05, "step": 829 }, { "epoch": 0.019529411764705882, "grad_norm": 0.1884765625, "learning_rate": 0.016274509803921568, "loss": 2.0234, "step": 830 }, { "epoch": 0.01955294117647059, "grad_norm": 0.2119140625, "learning_rate": 0.016294117647058824, "loss": 1.8585, "step": 831 }, { "epoch": 0.019576470588235292, "grad_norm": 0.1904296875, "learning_rate": 0.01631372549019608, "loss": 2.0067, "step": 832 }, { "epoch": 0.0196, "grad_norm": 0.203125, "learning_rate": 0.01633333333333333, "loss": 2.1679, "step": 833 }, { "epoch": 0.019623529411764706, "grad_norm": 0.212890625, "learning_rate": 0.016352941176470587, "loss": 1.9871, "step": 834 }, { "epoch": 0.019647058823529413, "grad_norm": 0.1748046875, "learning_rate": 0.016372549019607843, "loss": 2.1213, "step": 835 }, { "epoch": 0.019670588235294117, "grad_norm": 0.181640625, "learning_rate": 0.0163921568627451, "loss": 2.056, "step": 836 }, { "epoch": 0.019694117647058824, "grad_norm": 0.19921875, "learning_rate": 0.01641176470588235, "loss": 1.9997, "step": 837 }, { "epoch": 0.01971764705882353, "grad_norm": 0.1845703125, "learning_rate": 0.016431372549019607, "loss": 1.9433, "step": 838 }, { "epoch": 0.019741176470588234, "grad_norm": 0.1982421875, "learning_rate": 0.016450980392156863, "loss": 1.7692, "step": 839 }, { "epoch": 0.01976470588235294, "grad_norm": 0.181640625, "learning_rate": 0.01647058823529412, "loss": 2.013, "step": 840 }, { "epoch": 0.019788235294117648, "grad_norm": 0.19140625, "learning_rate": 0.01649019607843137, "loss": 2.0949, "step": 841 }, { "epoch": 0.01981176470588235, "grad_norm": 0.2001953125, "learning_rate": 0.016509803921568627, "loss": 1.9545, "step": 842 }, { "epoch": 0.01983529411764706, "grad_norm": 0.18359375, "learning_rate": 0.016529411764705883, "loss": 2.0266, "step": 843 }, { "epoch": 0.019858823529411766, "grad_norm": 0.2099609375, "learning_rate": 0.016549019607843135, "loss": 1.8543, "step": 844 }, { "epoch": 0.01988235294117647, "grad_norm": 0.2041015625, "learning_rate": 0.016568627450980394, "loss": 1.9835, "step": 845 }, { "epoch": 0.019905882352941176, "grad_norm": 0.185546875, "learning_rate": 0.016588235294117647, "loss": 2.0109, "step": 846 }, { "epoch": 0.019929411764705883, "grad_norm": 0.197265625, "learning_rate": 0.016607843137254902, "loss": 2.1187, "step": 847 }, { "epoch": 0.01995294117647059, "grad_norm": 0.1953125, "learning_rate": 0.01662745098039216, "loss": 2.0659, "step": 848 }, { "epoch": 0.019976470588235293, "grad_norm": 0.2138671875, "learning_rate": 0.016647058823529414, "loss": 1.9427, "step": 849 }, { "epoch": 0.02, "grad_norm": 0.1845703125, "learning_rate": 0.016666666666666666, "loss": 1.9498, "step": 850 }, { "epoch": 0.020023529411764707, "grad_norm": 0.1884765625, "learning_rate": 0.016686274509803922, "loss": 2.0393, "step": 851 }, { "epoch": 0.02004705882352941, "grad_norm": 0.220703125, "learning_rate": 0.016705882352941178, "loss": 2.0923, "step": 852 }, { "epoch": 0.020070588235294118, "grad_norm": 0.205078125, "learning_rate": 0.016725490196078434, "loss": 1.9783, "step": 853 }, { "epoch": 0.020094117647058825, "grad_norm": 0.2138671875, "learning_rate": 0.016745098039215686, "loss": 1.7334, "step": 854 }, { "epoch": 0.02011764705882353, "grad_norm": 0.2275390625, "learning_rate": 0.016764705882352942, "loss": 2.0357, "step": 855 }, { "epoch": 0.020141176470588235, "grad_norm": 0.1787109375, "learning_rate": 0.016784313725490198, "loss": 2.0396, "step": 856 }, { "epoch": 0.020164705882352942, "grad_norm": 0.2109375, "learning_rate": 0.01680392156862745, "loss": 1.9563, "step": 857 }, { "epoch": 0.020188235294117646, "grad_norm": 0.2138671875, "learning_rate": 0.016823529411764706, "loss": 2.1334, "step": 858 }, { "epoch": 0.020211764705882353, "grad_norm": 0.2138671875, "learning_rate": 0.01684313725490196, "loss": 2.0088, "step": 859 }, { "epoch": 0.02023529411764706, "grad_norm": 0.18359375, "learning_rate": 0.016862745098039218, "loss": 1.9871, "step": 860 }, { "epoch": 0.020258823529411763, "grad_norm": 0.208984375, "learning_rate": 0.01688235294117647, "loss": 2.1158, "step": 861 }, { "epoch": 0.02028235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.016901960784313726, "loss": 2.0962, "step": 862 }, { "epoch": 0.020305882352941177, "grad_norm": 0.1962890625, "learning_rate": 0.01692156862745098, "loss": 1.9923, "step": 863 }, { "epoch": 0.020329411764705884, "grad_norm": 0.1923828125, "learning_rate": 0.016941176470588234, "loss": 1.8632, "step": 864 }, { "epoch": 0.020352941176470588, "grad_norm": 0.2109375, "learning_rate": 0.01696078431372549, "loss": 2.0502, "step": 865 }, { "epoch": 0.020376470588235295, "grad_norm": 0.2041015625, "learning_rate": 0.016980392156862745, "loss": 2.0106, "step": 866 }, { "epoch": 0.0204, "grad_norm": 0.1962890625, "learning_rate": 0.017, "loss": 1.8715, "step": 867 }, { "epoch": 0.020423529411764705, "grad_norm": 0.2060546875, "learning_rate": 0.017019607843137254, "loss": 1.8703, "step": 868 }, { "epoch": 0.020447058823529412, "grad_norm": 0.2236328125, "learning_rate": 0.01703921568627451, "loss": 1.9605, "step": 869 }, { "epoch": 0.02047058823529412, "grad_norm": 0.1806640625, "learning_rate": 0.017058823529411765, "loss": 2.0035, "step": 870 }, { "epoch": 0.020494117647058822, "grad_norm": 0.20703125, "learning_rate": 0.01707843137254902, "loss": 1.9216, "step": 871 }, { "epoch": 0.02051764705882353, "grad_norm": 0.177734375, "learning_rate": 0.017098039215686273, "loss": 2.0194, "step": 872 }, { "epoch": 0.020541176470588236, "grad_norm": 0.173828125, "learning_rate": 0.01711764705882353, "loss": 1.8921, "step": 873 }, { "epoch": 0.02056470588235294, "grad_norm": 0.22265625, "learning_rate": 0.017137254901960785, "loss": 1.8747, "step": 874 }, { "epoch": 0.020588235294117647, "grad_norm": 0.208984375, "learning_rate": 0.017156862745098037, "loss": 2.1082, "step": 875 }, { "epoch": 0.020611764705882354, "grad_norm": 0.240234375, "learning_rate": 0.017176470588235293, "loss": 2.1249, "step": 876 }, { "epoch": 0.020635294117647057, "grad_norm": 0.2138671875, "learning_rate": 0.01719607843137255, "loss": 2.0714, "step": 877 }, { "epoch": 0.020658823529411764, "grad_norm": 0.2314453125, "learning_rate": 0.017215686274509805, "loss": 2.0462, "step": 878 }, { "epoch": 0.02068235294117647, "grad_norm": 0.25, "learning_rate": 0.01723529411764706, "loss": 2.0486, "step": 879 }, { "epoch": 0.020705882352941178, "grad_norm": 0.2275390625, "learning_rate": 0.017254901960784316, "loss": 2.0468, "step": 880 }, { "epoch": 0.02072941176470588, "grad_norm": 0.205078125, "learning_rate": 0.01727450980392157, "loss": 1.8853, "step": 881 }, { "epoch": 0.02075294117647059, "grad_norm": 0.208984375, "learning_rate": 0.017294117647058824, "loss": 1.9967, "step": 882 }, { "epoch": 0.020776470588235296, "grad_norm": 0.212890625, "learning_rate": 0.01731372549019608, "loss": 2.0732, "step": 883 }, { "epoch": 0.0208, "grad_norm": 0.185546875, "learning_rate": 0.017333333333333336, "loss": 1.9702, "step": 884 }, { "epoch": 0.020823529411764706, "grad_norm": 0.21484375, "learning_rate": 0.01735294117647059, "loss": 2.0623, "step": 885 }, { "epoch": 0.020847058823529413, "grad_norm": 0.208984375, "learning_rate": 0.017372549019607844, "loss": 2.1191, "step": 886 }, { "epoch": 0.020870588235294116, "grad_norm": 0.2158203125, "learning_rate": 0.0173921568627451, "loss": 1.9991, "step": 887 }, { "epoch": 0.020894117647058823, "grad_norm": 0.208984375, "learning_rate": 0.017411764705882352, "loss": 2.0862, "step": 888 }, { "epoch": 0.02091764705882353, "grad_norm": 0.1875, "learning_rate": 0.017431372549019608, "loss": 1.906, "step": 889 }, { "epoch": 0.020941176470588234, "grad_norm": 0.19921875, "learning_rate": 0.017450980392156864, "loss": 1.8141, "step": 890 }, { "epoch": 0.02096470588235294, "grad_norm": 0.19140625, "learning_rate": 0.01747058823529412, "loss": 1.9398, "step": 891 }, { "epoch": 0.020988235294117648, "grad_norm": 0.1845703125, "learning_rate": 0.017490196078431372, "loss": 1.8402, "step": 892 }, { "epoch": 0.02101176470588235, "grad_norm": 0.203125, "learning_rate": 0.017509803921568628, "loss": 2.1111, "step": 893 }, { "epoch": 0.02103529411764706, "grad_norm": 0.23046875, "learning_rate": 0.017529411764705884, "loss": 1.9251, "step": 894 }, { "epoch": 0.021058823529411765, "grad_norm": 0.1962890625, "learning_rate": 0.017549019607843136, "loss": 2.0184, "step": 895 }, { "epoch": 0.021082352941176472, "grad_norm": 0.2177734375, "learning_rate": 0.017568627450980392, "loss": 2.0534, "step": 896 }, { "epoch": 0.021105882352941176, "grad_norm": 0.19140625, "learning_rate": 0.017588235294117648, "loss": 1.8513, "step": 897 }, { "epoch": 0.021129411764705883, "grad_norm": 0.1923828125, "learning_rate": 0.017607843137254903, "loss": 1.9517, "step": 898 }, { "epoch": 0.02115294117647059, "grad_norm": 0.21484375, "learning_rate": 0.017627450980392156, "loss": 2.099, "step": 899 }, { "epoch": 0.021176470588235293, "grad_norm": 0.23046875, "learning_rate": 0.01764705882352941, "loss": 1.9641, "step": 900 }, { "epoch": 0.0212, "grad_norm": 0.1875, "learning_rate": 0.017666666666666667, "loss": 2.0201, "step": 901 }, { "epoch": 0.021223529411764707, "grad_norm": 0.189453125, "learning_rate": 0.01768627450980392, "loss": 1.9719, "step": 902 }, { "epoch": 0.02124705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.017705882352941175, "loss": 1.9524, "step": 903 }, { "epoch": 0.021270588235294118, "grad_norm": 0.1748046875, "learning_rate": 0.01772549019607843, "loss": 2.0481, "step": 904 }, { "epoch": 0.021294117647058824, "grad_norm": 0.1982421875, "learning_rate": 0.017745098039215687, "loss": 2.0472, "step": 905 }, { "epoch": 0.021317647058823528, "grad_norm": 0.197265625, "learning_rate": 0.01776470588235294, "loss": 1.8897, "step": 906 }, { "epoch": 0.021341176470588235, "grad_norm": 0.1982421875, "learning_rate": 0.017784313725490195, "loss": 2.0546, "step": 907 }, { "epoch": 0.021364705882352942, "grad_norm": 0.2109375, "learning_rate": 0.01780392156862745, "loss": 1.8933, "step": 908 }, { "epoch": 0.021388235294117645, "grad_norm": 0.1826171875, "learning_rate": 0.017823529411764707, "loss": 1.9716, "step": 909 }, { "epoch": 0.021411764705882352, "grad_norm": 0.212890625, "learning_rate": 0.017843137254901963, "loss": 1.8304, "step": 910 }, { "epoch": 0.02143529411764706, "grad_norm": 0.2119140625, "learning_rate": 0.01786274509803922, "loss": 2.0366, "step": 911 }, { "epoch": 0.021458823529411766, "grad_norm": 0.2001953125, "learning_rate": 0.01788235294117647, "loss": 2.0673, "step": 912 }, { "epoch": 0.02148235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.017901960784313727, "loss": 1.9314, "step": 913 }, { "epoch": 0.021505882352941177, "grad_norm": 0.1875, "learning_rate": 0.017921568627450982, "loss": 1.9498, "step": 914 }, { "epoch": 0.021529411764705884, "grad_norm": 0.2001953125, "learning_rate": 0.017941176470588235, "loss": 2.0704, "step": 915 }, { "epoch": 0.021552941176470587, "grad_norm": 0.197265625, "learning_rate": 0.01796078431372549, "loss": 1.84, "step": 916 }, { "epoch": 0.021576470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.017980392156862746, "loss": 1.902, "step": 917 }, { "epoch": 0.0216, "grad_norm": 0.19140625, "learning_rate": 0.018000000000000002, "loss": 1.9741, "step": 918 }, { "epoch": 0.021623529411764705, "grad_norm": 0.20703125, "learning_rate": 0.018019607843137254, "loss": 2.0209, "step": 919 }, { "epoch": 0.02164705882352941, "grad_norm": 0.1943359375, "learning_rate": 0.01803921568627451, "loss": 1.9545, "step": 920 }, { "epoch": 0.02167058823529412, "grad_norm": 0.1904296875, "learning_rate": 0.018058823529411766, "loss": 1.9845, "step": 921 }, { "epoch": 0.021694117647058822, "grad_norm": 0.2177734375, "learning_rate": 0.018078431372549022, "loss": 1.9355, "step": 922 }, { "epoch": 0.02171764705882353, "grad_norm": 0.1904296875, "learning_rate": 0.018098039215686274, "loss": 1.8894, "step": 923 }, { "epoch": 0.021741176470588236, "grad_norm": 0.1904296875, "learning_rate": 0.01811764705882353, "loss": 1.8536, "step": 924 }, { "epoch": 0.02176470588235294, "grad_norm": 0.212890625, "learning_rate": 0.018137254901960786, "loss": 1.9452, "step": 925 }, { "epoch": 0.021788235294117646, "grad_norm": 0.1982421875, "learning_rate": 0.018156862745098038, "loss": 1.9626, "step": 926 }, { "epoch": 0.021811764705882353, "grad_norm": 0.2216796875, "learning_rate": 0.018176470588235294, "loss": 1.9735, "step": 927 }, { "epoch": 0.02183529411764706, "grad_norm": 0.20703125, "learning_rate": 0.01819607843137255, "loss": 1.9846, "step": 928 }, { "epoch": 0.021858823529411764, "grad_norm": 0.224609375, "learning_rate": 0.018215686274509806, "loss": 1.9758, "step": 929 }, { "epoch": 0.02188235294117647, "grad_norm": 0.251953125, "learning_rate": 0.018235294117647058, "loss": 1.7638, "step": 930 }, { "epoch": 0.021905882352941178, "grad_norm": 0.1845703125, "learning_rate": 0.018254901960784314, "loss": 2.0394, "step": 931 }, { "epoch": 0.02192941176470588, "grad_norm": 0.2099609375, "learning_rate": 0.01827450980392157, "loss": 1.9298, "step": 932 }, { "epoch": 0.021952941176470588, "grad_norm": 0.1943359375, "learning_rate": 0.018294117647058822, "loss": 1.9837, "step": 933 }, { "epoch": 0.021976470588235295, "grad_norm": 0.2119140625, "learning_rate": 0.018313725490196078, "loss": 2.0315, "step": 934 }, { "epoch": 0.022, "grad_norm": 0.22265625, "learning_rate": 0.018333333333333333, "loss": 2.0284, "step": 935 }, { "epoch": 0.022023529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.01835294117647059, "loss": 1.8585, "step": 936 }, { "epoch": 0.022047058823529413, "grad_norm": 0.197265625, "learning_rate": 0.01837254901960784, "loss": 2.0577, "step": 937 }, { "epoch": 0.022070588235294116, "grad_norm": 0.201171875, "learning_rate": 0.018392156862745097, "loss": 1.8081, "step": 938 }, { "epoch": 0.022094117647058823, "grad_norm": 0.2060546875, "learning_rate": 0.018411764705882353, "loss": 2.0614, "step": 939 }, { "epoch": 0.02211764705882353, "grad_norm": 0.17578125, "learning_rate": 0.018431372549019606, "loss": 2.0538, "step": 940 }, { "epoch": 0.022141176470588234, "grad_norm": 0.2041015625, "learning_rate": 0.018450980392156865, "loss": 1.9271, "step": 941 }, { "epoch": 0.02216470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.01847058823529412, "loss": 2.002, "step": 942 }, { "epoch": 0.022188235294117648, "grad_norm": 0.2041015625, "learning_rate": 0.018490196078431373, "loss": 1.9416, "step": 943 }, { "epoch": 0.022211764705882354, "grad_norm": 0.193359375, "learning_rate": 0.01850980392156863, "loss": 1.8982, "step": 944 }, { "epoch": 0.022235294117647058, "grad_norm": 0.20703125, "learning_rate": 0.018529411764705885, "loss": 1.8526, "step": 945 }, { "epoch": 0.022258823529411765, "grad_norm": 0.1923828125, "learning_rate": 0.018549019607843137, "loss": 2.0232, "step": 946 }, { "epoch": 0.022282352941176472, "grad_norm": 0.1982421875, "learning_rate": 0.018568627450980393, "loss": 1.9455, "step": 947 }, { "epoch": 0.022305882352941175, "grad_norm": 0.2060546875, "learning_rate": 0.01858823529411765, "loss": 1.9882, "step": 948 }, { "epoch": 0.022329411764705882, "grad_norm": 0.1943359375, "learning_rate": 0.018607843137254904, "loss": 1.8928, "step": 949 }, { "epoch": 0.02235294117647059, "grad_norm": 0.228515625, "learning_rate": 0.018627450980392157, "loss": 1.8783, "step": 950 }, { "epoch": 0.022376470588235293, "grad_norm": 0.21484375, "learning_rate": 0.018647058823529412, "loss": 1.9617, "step": 951 }, { "epoch": 0.0224, "grad_norm": 0.224609375, "learning_rate": 0.018666666666666668, "loss": 2.0043, "step": 952 }, { "epoch": 0.022423529411764707, "grad_norm": 0.1982421875, "learning_rate": 0.01868627450980392, "loss": 2.0664, "step": 953 }, { "epoch": 0.02244705882352941, "grad_norm": 0.21484375, "learning_rate": 0.018705882352941176, "loss": 2.0095, "step": 954 }, { "epoch": 0.022470588235294117, "grad_norm": 0.220703125, "learning_rate": 0.018725490196078432, "loss": 2.1061, "step": 955 }, { "epoch": 0.022494117647058824, "grad_norm": 0.2080078125, "learning_rate": 0.018745098039215688, "loss": 1.8346, "step": 956 }, { "epoch": 0.02251764705882353, "grad_norm": 0.185546875, "learning_rate": 0.01876470588235294, "loss": 1.9552, "step": 957 }, { "epoch": 0.022541176470588235, "grad_norm": 0.203125, "learning_rate": 0.018784313725490196, "loss": 1.8551, "step": 958 }, { "epoch": 0.02256470588235294, "grad_norm": 0.1865234375, "learning_rate": 0.018803921568627452, "loss": 1.8887, "step": 959 }, { "epoch": 0.02258823529411765, "grad_norm": 0.169921875, "learning_rate": 0.018823529411764708, "loss": 2.1027, "step": 960 }, { "epoch": 0.022611764705882352, "grad_norm": 0.2138671875, "learning_rate": 0.01884313725490196, "loss": 2.0738, "step": 961 }, { "epoch": 0.02263529411764706, "grad_norm": 0.228515625, "learning_rate": 0.018862745098039216, "loss": 1.9634, "step": 962 }, { "epoch": 0.022658823529411766, "grad_norm": 0.2080078125, "learning_rate": 0.01888235294117647, "loss": 1.9743, "step": 963 }, { "epoch": 0.02268235294117647, "grad_norm": 0.212890625, "learning_rate": 0.018901960784313724, "loss": 1.8804, "step": 964 }, { "epoch": 0.022705882352941176, "grad_norm": 0.212890625, "learning_rate": 0.01892156862745098, "loss": 2.1187, "step": 965 }, { "epoch": 0.022729411764705883, "grad_norm": 0.2216796875, "learning_rate": 0.018941176470588236, "loss": 1.9367, "step": 966 }, { "epoch": 0.022752941176470587, "grad_norm": 0.22265625, "learning_rate": 0.01896078431372549, "loss": 2.1467, "step": 967 }, { "epoch": 0.022776470588235294, "grad_norm": 0.1923828125, "learning_rate": 0.018980392156862744, "loss": 1.9395, "step": 968 }, { "epoch": 0.0228, "grad_norm": 0.2216796875, "learning_rate": 0.019, "loss": 1.8324, "step": 969 }, { "epoch": 0.022823529411764704, "grad_norm": 0.2158203125, "learning_rate": 0.019019607843137255, "loss": 1.8566, "step": 970 }, { "epoch": 0.02284705882352941, "grad_norm": 0.205078125, "learning_rate": 0.019039215686274508, "loss": 1.9015, "step": 971 }, { "epoch": 0.022870588235294118, "grad_norm": 0.19140625, "learning_rate": 0.019058823529411763, "loss": 1.9128, "step": 972 }, { "epoch": 0.022894117647058825, "grad_norm": 0.22265625, "learning_rate": 0.019078431372549023, "loss": 1.9646, "step": 973 }, { "epoch": 0.02291764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.019098039215686275, "loss": 2.0401, "step": 974 }, { "epoch": 0.022941176470588236, "grad_norm": 0.2021484375, "learning_rate": 0.01911764705882353, "loss": 1.9694, "step": 975 }, { "epoch": 0.022964705882352943, "grad_norm": 0.18359375, "learning_rate": 0.019137254901960787, "loss": 2.0224, "step": 976 }, { "epoch": 0.022988235294117646, "grad_norm": 0.21484375, "learning_rate": 0.01915686274509804, "loss": 2.0175, "step": 977 }, { "epoch": 0.023011764705882353, "grad_norm": 0.19921875, "learning_rate": 0.019176470588235295, "loss": 2.0317, "step": 978 }, { "epoch": 0.02303529411764706, "grad_norm": 0.1875, "learning_rate": 0.01919607843137255, "loss": 1.8926, "step": 979 }, { "epoch": 0.023058823529411764, "grad_norm": 0.18359375, "learning_rate": 0.019215686274509806, "loss": 2.0854, "step": 980 }, { "epoch": 0.02308235294117647, "grad_norm": 0.193359375, "learning_rate": 0.01923529411764706, "loss": 1.9954, "step": 981 }, { "epoch": 0.023105882352941177, "grad_norm": 0.1748046875, "learning_rate": 0.019254901960784315, "loss": 1.8605, "step": 982 }, { "epoch": 0.02312941176470588, "grad_norm": 0.1953125, "learning_rate": 0.01927450980392157, "loss": 1.8874, "step": 983 }, { "epoch": 0.023152941176470588, "grad_norm": 0.181640625, "learning_rate": 0.019294117647058823, "loss": 1.9408, "step": 984 }, { "epoch": 0.023176470588235295, "grad_norm": 0.2236328125, "learning_rate": 0.01931372549019608, "loss": 2.1186, "step": 985 }, { "epoch": 0.0232, "grad_norm": 0.216796875, "learning_rate": 0.019333333333333334, "loss": 1.8886, "step": 986 }, { "epoch": 0.023223529411764705, "grad_norm": 0.216796875, "learning_rate": 0.01935294117647059, "loss": 1.9675, "step": 987 }, { "epoch": 0.023247058823529412, "grad_norm": 0.2421875, "learning_rate": 0.019372549019607842, "loss": 2.0989, "step": 988 }, { "epoch": 0.02327058823529412, "grad_norm": 0.2412109375, "learning_rate": 0.019392156862745098, "loss": 1.9019, "step": 989 }, { "epoch": 0.023294117647058823, "grad_norm": 0.197265625, "learning_rate": 0.019411764705882354, "loss": 2.0025, "step": 990 }, { "epoch": 0.02331764705882353, "grad_norm": 0.19140625, "learning_rate": 0.019431372549019606, "loss": 2.0342, "step": 991 }, { "epoch": 0.023341176470588237, "grad_norm": 0.2373046875, "learning_rate": 0.019450980392156862, "loss": 1.9791, "step": 992 }, { "epoch": 0.02336470588235294, "grad_norm": 0.1650390625, "learning_rate": 0.019470588235294118, "loss": 1.9265, "step": 993 }, { "epoch": 0.023388235294117647, "grad_norm": 0.171875, "learning_rate": 0.019490196078431374, "loss": 2.108, "step": 994 }, { "epoch": 0.023411764705882354, "grad_norm": 0.1923828125, "learning_rate": 0.019509803921568626, "loss": 1.7812, "step": 995 }, { "epoch": 0.023435294117647058, "grad_norm": 0.1728515625, "learning_rate": 0.019529411764705882, "loss": 2.0016, "step": 996 }, { "epoch": 0.023458823529411765, "grad_norm": 0.189453125, "learning_rate": 0.019549019607843138, "loss": 1.9207, "step": 997 }, { "epoch": 0.02348235294117647, "grad_norm": 0.212890625, "learning_rate": 0.019568627450980394, "loss": 1.9535, "step": 998 }, { "epoch": 0.023505882352941175, "grad_norm": 0.2001953125, "learning_rate": 0.019588235294117646, "loss": 1.9885, "step": 999 }, { "epoch": 0.023529411764705882, "grad_norm": 0.1904296875, "learning_rate": 0.0196078431372549, "loss": 1.9422, "step": 1000 }, { "epoch": 0.023529411764705882, "eval_loss": 2.3058552742004395, "eval_runtime": 572.1549, "eval_samples_per_second": 14.856, "eval_steps_per_second": 3.714, "step": 1000 }, { "epoch": 0.02355294117647059, "grad_norm": 0.1982421875, "learning_rate": 0.019627450980392157, "loss": 1.857, "step": 1001 }, { "epoch": 0.023576470588235292, "grad_norm": 0.2216796875, "learning_rate": 0.01964705882352941, "loss": 1.9268, "step": 1002 }, { "epoch": 0.0236, "grad_norm": 0.1904296875, "learning_rate": 0.019666666666666666, "loss": 1.9579, "step": 1003 }, { "epoch": 0.023623529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.01968627450980392, "loss": 1.8477, "step": 1004 }, { "epoch": 0.023647058823529413, "grad_norm": 0.2021484375, "learning_rate": 0.019705882352941177, "loss": 2.0614, "step": 1005 }, { "epoch": 0.023670588235294117, "grad_norm": 0.2197265625, "learning_rate": 0.019725490196078433, "loss": 2.0462, "step": 1006 }, { "epoch": 0.023694117647058824, "grad_norm": 0.2099609375, "learning_rate": 0.01974509803921569, "loss": 1.7971, "step": 1007 }, { "epoch": 0.02371764705882353, "grad_norm": 0.1875, "learning_rate": 0.01976470588235294, "loss": 2.1321, "step": 1008 }, { "epoch": 0.023741176470588234, "grad_norm": 0.201171875, "learning_rate": 0.019784313725490197, "loss": 1.9477, "step": 1009 }, { "epoch": 0.02376470588235294, "grad_norm": 0.205078125, "learning_rate": 0.019803921568627453, "loss": 1.9775, "step": 1010 }, { "epoch": 0.023788235294117648, "grad_norm": 0.1904296875, "learning_rate": 0.01982352941176471, "loss": 1.9135, "step": 1011 }, { "epoch": 0.02381176470588235, "grad_norm": 0.1826171875, "learning_rate": 0.01984313725490196, "loss": 1.9539, "step": 1012 }, { "epoch": 0.02383529411764706, "grad_norm": 0.177734375, "learning_rate": 0.019862745098039217, "loss": 1.9487, "step": 1013 }, { "epoch": 0.023858823529411766, "grad_norm": 0.2216796875, "learning_rate": 0.019882352941176473, "loss": 1.8243, "step": 1014 }, { "epoch": 0.02388235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.019901960784313725, "loss": 1.9229, "step": 1015 }, { "epoch": 0.023905882352941176, "grad_norm": 0.2041015625, "learning_rate": 0.01992156862745098, "loss": 1.9341, "step": 1016 }, { "epoch": 0.023929411764705883, "grad_norm": 0.177734375, "learning_rate": 0.019941176470588236, "loss": 2.0764, "step": 1017 }, { "epoch": 0.023952941176470587, "grad_norm": 0.21484375, "learning_rate": 0.019960784313725492, "loss": 1.7523, "step": 1018 }, { "epoch": 0.023976470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.019980392156862745, "loss": 2.1098, "step": 1019 }, { "epoch": 0.024, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7288, "step": 1020 }, { "epoch": 0.024023529411764707, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.9493, "step": 1021 }, { "epoch": 0.02404705882352941, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.9629, "step": 1022 }, { "epoch": 0.024070588235294118, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.9951, "step": 1023 }, { "epoch": 0.024094117647058825, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.9186, "step": 1024 }, { "epoch": 0.02411764705882353, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.9262, "step": 1025 }, { "epoch": 0.024141176470588235, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.8377, "step": 1026 }, { "epoch": 0.024164705882352942, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.9567, "step": 1027 }, { "epoch": 0.024188235294117646, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.889, "step": 1028 }, { "epoch": 0.024211764705882353, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.8378, "step": 1029 }, { "epoch": 0.02423529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.8462, "step": 1030 }, { "epoch": 0.024258823529411763, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.8657, "step": 1031 }, { "epoch": 0.02428235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 2.0112, "step": 1032 }, { "epoch": 0.024305882352941177, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 2.0628, "step": 1033 }, { "epoch": 0.02432941176470588, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.9651, "step": 1034 }, { "epoch": 0.024352941176470588, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.9476, "step": 1035 }, { "epoch": 0.024376470588235295, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 2.0951, "step": 1036 }, { "epoch": 0.0244, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 2.0272, "step": 1037 }, { "epoch": 0.024423529411764705, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 2.0416, "step": 1038 }, { "epoch": 0.024447058823529412, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.9444, "step": 1039 }, { "epoch": 0.02447058823529412, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.8359, "step": 1040 }, { "epoch": 0.024494117647058822, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 2.031, "step": 1041 }, { "epoch": 0.02451764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.9973, "step": 1042 }, { "epoch": 0.024541176470588236, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.8839, "step": 1043 }, { "epoch": 0.02456470588235294, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.9332, "step": 1044 }, { "epoch": 0.024588235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.8953, "step": 1045 }, { "epoch": 0.024611764705882354, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.9026, "step": 1046 }, { "epoch": 0.024635294117647057, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 2.0087, "step": 1047 }, { "epoch": 0.024658823529411764, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.9301, "step": 1048 }, { "epoch": 0.02468235294117647, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.941, "step": 1049 }, { "epoch": 0.024705882352941175, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.9793, "step": 1050 }, { "epoch": 0.02472941176470588, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 2.0626, "step": 1051 }, { "epoch": 0.02475294117647059, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 2.0155, "step": 1052 }, { "epoch": 0.024776470588235296, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.9646, "step": 1053 }, { "epoch": 0.0248, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.9844, "step": 1054 }, { "epoch": 0.024823529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 2.016, "step": 1055 }, { "epoch": 0.024847058823529413, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.8959, "step": 1056 }, { "epoch": 0.024870588235294117, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.9972, "step": 1057 }, { "epoch": 0.024894117647058824, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 2.0685, "step": 1058 }, { "epoch": 0.02491764705882353, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.979, "step": 1059 }, { "epoch": 0.024941176470588234, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.9986, "step": 1060 }, { "epoch": 0.02496470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 2.0373, "step": 1061 }, { "epoch": 0.024988235294117648, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 2.1247, "step": 1062 }, { "epoch": 0.02501176470588235, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 2.1275, "step": 1063 }, { "epoch": 0.02503529411764706, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8895, "step": 1064 }, { "epoch": 0.025058823529411765, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 2.0197, "step": 1065 }, { "epoch": 0.025082352941176472, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.8136, "step": 1066 }, { "epoch": 0.025105882352941176, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 2.0453, "step": 1067 }, { "epoch": 0.025129411764705883, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.9473, "step": 1068 }, { "epoch": 0.02515294117647059, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.8901, "step": 1069 }, { "epoch": 0.025176470588235293, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.9596, "step": 1070 }, { "epoch": 0.0252, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 2.1105, "step": 1071 }, { "epoch": 0.025223529411764707, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 2.0538, "step": 1072 }, { "epoch": 0.02524705882352941, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 2.0291, "step": 1073 }, { "epoch": 0.025270588235294118, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.9061, "step": 1074 }, { "epoch": 0.025294117647058825, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8564, "step": 1075 }, { "epoch": 0.025317647058823528, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.947, "step": 1076 }, { "epoch": 0.025341176470588235, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 2.0139, "step": 1077 }, { "epoch": 0.025364705882352942, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.9677, "step": 1078 }, { "epoch": 0.025388235294117645, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.8774, "step": 1079 }, { "epoch": 0.025411764705882352, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.857, "step": 1080 }, { "epoch": 0.02543529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.8704, "step": 1081 }, { "epoch": 0.025458823529411766, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.9408, "step": 1082 }, { "epoch": 0.02548235294117647, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.936, "step": 1083 }, { "epoch": 0.025505882352941177, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.8778, "step": 1084 }, { "epoch": 0.025529411764705884, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.7381, "step": 1085 }, { "epoch": 0.025552941176470587, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.9207, "step": 1086 }, { "epoch": 0.025576470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.9872, "step": 1087 }, { "epoch": 0.0256, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.9436, "step": 1088 }, { "epoch": 0.025623529411764705, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.9544, "step": 1089 }, { "epoch": 0.02564705882352941, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.9332, "step": 1090 }, { "epoch": 0.02567058823529412, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.9302, "step": 1091 }, { "epoch": 0.025694117647058822, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.9461, "step": 1092 }, { "epoch": 0.02571764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.9969, "step": 1093 }, { "epoch": 0.025741176470588236, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.9057, "step": 1094 }, { "epoch": 0.02576470588235294, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.8942, "step": 1095 }, { "epoch": 0.025788235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.9731, "step": 1096 }, { "epoch": 0.025811764705882353, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 2.0203, "step": 1097 }, { "epoch": 0.02583529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.8863, "step": 1098 }, { "epoch": 0.025858823529411764, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.9604, "step": 1099 }, { "epoch": 0.02588235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.9243, "step": 1100 }, { "epoch": 0.025905882352941178, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.916, "step": 1101 }, { "epoch": 0.02592941176470588, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 2.0526, "step": 1102 }, { "epoch": 0.02595294117647059, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8843, "step": 1103 }, { "epoch": 0.025976470588235295, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.8811, "step": 1104 }, { "epoch": 0.026, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.8291, "step": 1105 }, { "epoch": 0.026023529411764706, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.8268, "step": 1106 }, { "epoch": 0.026047058823529413, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 2.0167, "step": 1107 }, { "epoch": 0.026070588235294116, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 2.0091, "step": 1108 }, { "epoch": 0.026094117647058823, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.7639, "step": 1109 }, { "epoch": 0.02611764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.9038, "step": 1110 }, { "epoch": 0.026141176470588234, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.9152, "step": 1111 }, { "epoch": 0.02616470588235294, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.9819, "step": 1112 }, { "epoch": 0.026188235294117648, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.858, "step": 1113 }, { "epoch": 0.026211764705882355, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.8218, "step": 1114 }, { "epoch": 0.026235294117647058, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.9914, "step": 1115 }, { "epoch": 0.026258823529411765, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 2.011, "step": 1116 }, { "epoch": 0.026282352941176472, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7053, "step": 1117 }, { "epoch": 0.026305882352941175, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.91, "step": 1118 }, { "epoch": 0.026329411764705882, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 2.0429, "step": 1119 }, { "epoch": 0.02635294117647059, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.913, "step": 1120 }, { "epoch": 0.026376470588235293, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 2.056, "step": 1121 }, { "epoch": 0.0264, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8456, "step": 1122 }, { "epoch": 0.026423529411764707, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 2.0101, "step": 1123 }, { "epoch": 0.02644705882352941, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.8263, "step": 1124 }, { "epoch": 0.026470588235294117, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.9263, "step": 1125 }, { "epoch": 0.026494117647058824, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.9009, "step": 1126 }, { "epoch": 0.026517647058823528, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.9271, "step": 1127 }, { "epoch": 0.026541176470588235, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.832, "step": 1128 }, { "epoch": 0.02656470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.9479, "step": 1129 }, { "epoch": 0.02658823529411765, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 2.1429, "step": 1130 }, { "epoch": 0.026611764705882352, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.922, "step": 1131 }, { "epoch": 0.02663529411764706, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.9165, "step": 1132 }, { "epoch": 0.026658823529411766, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.9407, "step": 1133 }, { "epoch": 0.02668235294117647, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 2.0616, "step": 1134 }, { "epoch": 0.026705882352941177, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.955, "step": 1135 }, { "epoch": 0.026729411764705883, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.9398, "step": 1136 }, { "epoch": 0.026752941176470587, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.9259, "step": 1137 }, { "epoch": 0.026776470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.8154, "step": 1138 }, { "epoch": 0.0268, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8929, "step": 1139 }, { "epoch": 0.026823529411764704, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 2.0221, "step": 1140 }, { "epoch": 0.02684705882352941, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.9407, "step": 1141 }, { "epoch": 0.02687058823529412, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.9032, "step": 1142 }, { "epoch": 0.026894117647058822, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 2.0695, "step": 1143 }, { "epoch": 0.02691764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.7735, "step": 1144 }, { "epoch": 0.026941176470588236, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.9316, "step": 1145 }, { "epoch": 0.026964705882352943, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8993, "step": 1146 }, { "epoch": 0.026988235294117646, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.9745, "step": 1147 }, { "epoch": 0.027011764705882353, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.8822, "step": 1148 }, { "epoch": 0.02703529411764706, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7975, "step": 1149 }, { "epoch": 0.027058823529411764, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.9406, "step": 1150 }, { "epoch": 0.02708235294117647, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8658, "step": 1151 }, { "epoch": 0.027105882352941178, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 2.0293, "step": 1152 }, { "epoch": 0.02712941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.958, "step": 1153 }, { "epoch": 0.027152941176470588, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.8089, "step": 1154 }, { "epoch": 0.027176470588235295, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.9032, "step": 1155 }, { "epoch": 0.0272, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.9676, "step": 1156 }, { "epoch": 0.027223529411764705, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.9631, "step": 1157 }, { "epoch": 0.027247058823529412, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.954, "step": 1158 }, { "epoch": 0.02727058823529412, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.7312, "step": 1159 }, { "epoch": 0.027294117647058823, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 2.0528, "step": 1160 }, { "epoch": 0.02731764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8244, "step": 1161 }, { "epoch": 0.027341176470588237, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.9033, "step": 1162 }, { "epoch": 0.02736470588235294, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.9466, "step": 1163 }, { "epoch": 0.027388235294117647, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 2.0559, "step": 1164 }, { "epoch": 0.027411764705882354, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.9012, "step": 1165 }, { "epoch": 0.027435294117647058, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.8892, "step": 1166 }, { "epoch": 0.027458823529411765, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 2.0467, "step": 1167 }, { "epoch": 0.02748235294117647, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.9126, "step": 1168 }, { "epoch": 0.027505882352941175, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.8699, "step": 1169 }, { "epoch": 0.027529411764705882, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8969, "step": 1170 }, { "epoch": 0.02755294117647059, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 2.0053, "step": 1171 }, { "epoch": 0.027576470588235293, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8532, "step": 1172 }, { "epoch": 0.0276, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 2.0508, "step": 1173 }, { "epoch": 0.027623529411764706, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.9737, "step": 1174 }, { "epoch": 0.027647058823529413, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8795, "step": 1175 }, { "epoch": 0.027670588235294117, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.9353, "step": 1176 }, { "epoch": 0.027694117647058824, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 2.0001, "step": 1177 }, { "epoch": 0.02771764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8145, "step": 1178 }, { "epoch": 0.027741176470588234, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 2.093, "step": 1179 }, { "epoch": 0.02776470588235294, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 2.0559, "step": 1180 }, { "epoch": 0.02778823529411765, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 2.0231, "step": 1181 }, { "epoch": 0.027811764705882352, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 2.0635, "step": 1182 }, { "epoch": 0.02783529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.8673, "step": 1183 }, { "epoch": 0.027858823529411766, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.983, "step": 1184 }, { "epoch": 0.02788235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.9078, "step": 1185 }, { "epoch": 0.027905882352941176, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 2.0545, "step": 1186 }, { "epoch": 0.027929411764705883, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 2.0036, "step": 1187 }, { "epoch": 0.027952941176470587, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.8972, "step": 1188 }, { "epoch": 0.027976470588235294, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.9474, "step": 1189 }, { "epoch": 0.028, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.9386, "step": 1190 }, { "epoch": 0.028023529411764708, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.9375, "step": 1191 }, { "epoch": 0.02804705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8775, "step": 1192 }, { "epoch": 0.028070588235294118, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8315, "step": 1193 }, { "epoch": 0.028094117647058825, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8974, "step": 1194 }, { "epoch": 0.02811764705882353, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 2.0015, "step": 1195 }, { "epoch": 0.028141176470588235, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.8078, "step": 1196 }, { "epoch": 0.028164705882352942, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 2.0218, "step": 1197 }, { "epoch": 0.028188235294117646, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.9564, "step": 1198 }, { "epoch": 0.028211764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 2.0103, "step": 1199 }, { "epoch": 0.02823529411764706, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.8691, "step": 1200 }, { "epoch": 0.028258823529411763, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.8882, "step": 1201 }, { "epoch": 0.02828235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.9827, "step": 1202 }, { "epoch": 0.028305882352941177, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 2.0295, "step": 1203 }, { "epoch": 0.02832941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8286, "step": 1204 }, { "epoch": 0.028352941176470588, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.8466, "step": 1205 }, { "epoch": 0.028376470588235295, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8541, "step": 1206 }, { "epoch": 0.0284, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.9738, "step": 1207 }, { "epoch": 0.028423529411764705, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8387, "step": 1208 }, { "epoch": 0.028447058823529412, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8891, "step": 1209 }, { "epoch": 0.02847058823529412, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 2.0786, "step": 1210 }, { "epoch": 0.028494117647058823, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8114, "step": 1211 }, { "epoch": 0.02851764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 2.0073, "step": 1212 }, { "epoch": 0.028541176470588236, "grad_norm": 0.310546875, "learning_rate": 0.02, "loss": 1.7755, "step": 1213 }, { "epoch": 0.02856470588235294, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 2.0919, "step": 1214 }, { "epoch": 0.028588235294117647, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.9492, "step": 1215 }, { "epoch": 0.028611764705882354, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8933, "step": 1216 }, { "epoch": 0.028635294117647057, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.9423, "step": 1217 }, { "epoch": 0.028658823529411764, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8912, "step": 1218 }, { "epoch": 0.02868235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.915, "step": 1219 }, { "epoch": 0.028705882352941175, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.9718, "step": 1220 }, { "epoch": 0.028729411764705882, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 2.0172, "step": 1221 }, { "epoch": 0.02875294117647059, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.8054, "step": 1222 }, { "epoch": 0.028776470588235296, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 2.0373, "step": 1223 }, { "epoch": 0.0288, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8637, "step": 1224 }, { "epoch": 0.028823529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.9214, "step": 1225 }, { "epoch": 0.028847058823529413, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.8096, "step": 1226 }, { "epoch": 0.028870588235294117, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.8689, "step": 1227 }, { "epoch": 0.028894117647058824, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8975, "step": 1228 }, { "epoch": 0.02891764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8934, "step": 1229 }, { "epoch": 0.028941176470588234, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.773, "step": 1230 }, { "epoch": 0.02896470588235294, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.7809, "step": 1231 }, { "epoch": 0.028988235294117648, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 2.0221, "step": 1232 }, { "epoch": 0.02901176470588235, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8286, "step": 1233 }, { "epoch": 0.02903529411764706, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 2.0086, "step": 1234 }, { "epoch": 0.029058823529411765, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.9378, "step": 1235 }, { "epoch": 0.02908235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.9897, "step": 1236 }, { "epoch": 0.029105882352941176, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.7497, "step": 1237 }, { "epoch": 0.029129411764705883, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 2.0394, "step": 1238 }, { "epoch": 0.02915294117647059, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.774, "step": 1239 }, { "epoch": 0.029176470588235293, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.9507, "step": 1240 }, { "epoch": 0.0292, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.7825, "step": 1241 }, { "epoch": 0.029223529411764707, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.7701, "step": 1242 }, { "epoch": 0.02924705882352941, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8572, "step": 1243 }, { "epoch": 0.029270588235294118, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.652, "step": 1244 }, { "epoch": 0.029294117647058825, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.9165, "step": 1245 }, { "epoch": 0.029317647058823528, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.6806, "step": 1246 }, { "epoch": 0.029341176470588235, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.9528, "step": 1247 }, { "epoch": 0.029364705882352942, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.9356, "step": 1248 }, { "epoch": 0.029388235294117646, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8374, "step": 1249 }, { "epoch": 0.029411764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.998, "step": 1250 }, { "epoch": 0.02943529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.8688, "step": 1251 }, { "epoch": 0.029458823529411763, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 2.0496, "step": 1252 }, { "epoch": 0.02948235294117647, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.8791, "step": 1253 }, { "epoch": 0.029505882352941177, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.7936, "step": 1254 }, { "epoch": 0.029529411764705884, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.7743, "step": 1255 }, { "epoch": 0.029552941176470587, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.826, "step": 1256 }, { "epoch": 0.029576470588235294, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.896, "step": 1257 }, { "epoch": 0.0296, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 2.071, "step": 1258 }, { "epoch": 0.029623529411764705, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.7952, "step": 1259 }, { "epoch": 0.029647058823529412, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.8289, "step": 1260 }, { "epoch": 0.02967058823529412, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 2.0351, "step": 1261 }, { "epoch": 0.029694117647058822, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.908, "step": 1262 }, { "epoch": 0.02971764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.8617, "step": 1263 }, { "epoch": 0.029741176470588236, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.7765, "step": 1264 }, { "epoch": 0.02976470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.9335, "step": 1265 }, { "epoch": 0.029788235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.9285, "step": 1266 }, { "epoch": 0.029811764705882354, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.9569, "step": 1267 }, { "epoch": 0.02983529411764706, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.9513, "step": 1268 }, { "epoch": 0.029858823529411764, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.851, "step": 1269 }, { "epoch": 0.02988235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.8625, "step": 1270 }, { "epoch": 0.029905882352941178, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.9501, "step": 1271 }, { "epoch": 0.02992941176470588, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.9853, "step": 1272 }, { "epoch": 0.02995294117647059, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.9222, "step": 1273 }, { "epoch": 0.029976470588235295, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.9675, "step": 1274 }, { "epoch": 0.03, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.6688, "step": 1275 }, { "epoch": 0.030023529411764706, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8341, "step": 1276 }, { "epoch": 0.030047058823529413, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.9166, "step": 1277 }, { "epoch": 0.030070588235294116, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.8891, "step": 1278 }, { "epoch": 0.030094117647058823, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.9042, "step": 1279 }, { "epoch": 0.03011764705882353, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.847, "step": 1280 }, { "epoch": 0.030141176470588234, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.9794, "step": 1281 }, { "epoch": 0.03016470588235294, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8156, "step": 1282 }, { "epoch": 0.030188235294117648, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.9576, "step": 1283 }, { "epoch": 0.030211764705882355, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.9686, "step": 1284 }, { "epoch": 0.030235294117647058, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7986, "step": 1285 }, { "epoch": 0.030258823529411765, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.9225, "step": 1286 }, { "epoch": 0.030282352941176472, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.9082, "step": 1287 }, { "epoch": 0.030305882352941176, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.8693, "step": 1288 }, { "epoch": 0.030329411764705883, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.8447, "step": 1289 }, { "epoch": 0.03035294117647059, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.8463, "step": 1290 }, { "epoch": 0.030376470588235293, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.7309, "step": 1291 }, { "epoch": 0.0304, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.8608, "step": 1292 }, { "epoch": 0.030423529411764707, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8943, "step": 1293 }, { "epoch": 0.03044705882352941, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.7828, "step": 1294 }, { "epoch": 0.030470588235294117, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.9869, "step": 1295 }, { "epoch": 0.030494117647058824, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8906, "step": 1296 }, { "epoch": 0.030517647058823528, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.8894, "step": 1297 }, { "epoch": 0.030541176470588235, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.9362, "step": 1298 }, { "epoch": 0.030564705882352942, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8148, "step": 1299 }, { "epoch": 0.03058823529411765, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.8871, "step": 1300 }, { "epoch": 0.030611764705882352, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.8993, "step": 1301 }, { "epoch": 0.03063529411764706, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.7538, "step": 1302 }, { "epoch": 0.030658823529411766, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.9199, "step": 1303 }, { "epoch": 0.03068235294117647, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.9213, "step": 1304 }, { "epoch": 0.030705882352941177, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.9403, "step": 1305 }, { "epoch": 0.030729411764705884, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.6331, "step": 1306 }, { "epoch": 0.030752941176470587, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.7785, "step": 1307 }, { "epoch": 0.030776470588235294, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.8978, "step": 1308 }, { "epoch": 0.0308, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.7674, "step": 1309 }, { "epoch": 0.030823529411764704, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7592, "step": 1310 }, { "epoch": 0.03084705882352941, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 2.1095, "step": 1311 }, { "epoch": 0.03087058823529412, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.78, "step": 1312 }, { "epoch": 0.030894117647058822, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 2.053, "step": 1313 }, { "epoch": 0.03091764705882353, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.8532, "step": 1314 }, { "epoch": 0.030941176470588236, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8371, "step": 1315 }, { "epoch": 0.030964705882352943, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.9257, "step": 1316 }, { "epoch": 0.030988235294117646, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 2.0202, "step": 1317 }, { "epoch": 0.031011764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.9338, "step": 1318 }, { "epoch": 0.03103529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 2.0083, "step": 1319 }, { "epoch": 0.031058823529411764, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8316, "step": 1320 }, { "epoch": 0.03108235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8893, "step": 1321 }, { "epoch": 0.031105882352941178, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.9612, "step": 1322 }, { "epoch": 0.03112941176470588, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8145, "step": 1323 }, { "epoch": 0.031152941176470588, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.8743, "step": 1324 }, { "epoch": 0.031176470588235295, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8329, "step": 1325 }, { "epoch": 0.0312, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.9633, "step": 1326 }, { "epoch": 0.031223529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.9646, "step": 1327 }, { "epoch": 0.031247058823529412, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.7206, "step": 1328 }, { "epoch": 0.03127058823529412, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.8229, "step": 1329 }, { "epoch": 0.031294117647058826, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.5176, "step": 1330 }, { "epoch": 0.031317647058823526, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.753, "step": 1331 }, { "epoch": 0.03134117647058823, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.9995, "step": 1332 }, { "epoch": 0.03136470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.8198, "step": 1333 }, { "epoch": 0.03138823529411765, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7953, "step": 1334 }, { "epoch": 0.031411764705882354, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.9692, "step": 1335 }, { "epoch": 0.03143529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.8083, "step": 1336 }, { "epoch": 0.03145882352941176, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.8179, "step": 1337 }, { "epoch": 0.03148235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.7661, "step": 1338 }, { "epoch": 0.031505882352941175, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.959, "step": 1339 }, { "epoch": 0.03152941176470588, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.7562, "step": 1340 }, { "epoch": 0.03155294117647059, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8364, "step": 1341 }, { "epoch": 0.031576470588235296, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.9643, "step": 1342 }, { "epoch": 0.0316, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7297, "step": 1343 }, { "epoch": 0.0316235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7377, "step": 1344 }, { "epoch": 0.03164705882352941, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.9377, "step": 1345 }, { "epoch": 0.03167058823529412, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.9264, "step": 1346 }, { "epoch": 0.031694117647058824, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.7272, "step": 1347 }, { "epoch": 0.03171764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.9769, "step": 1348 }, { "epoch": 0.03174117647058824, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.8958, "step": 1349 }, { "epoch": 0.03176470588235294, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 2.0399, "step": 1350 }, { "epoch": 0.031788235294117645, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8783, "step": 1351 }, { "epoch": 0.03181176470588235, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.8479, "step": 1352 }, { "epoch": 0.03183529411764706, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.7601, "step": 1353 }, { "epoch": 0.031858823529411766, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.763, "step": 1354 }, { "epoch": 0.03188235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 2.0047, "step": 1355 }, { "epoch": 0.03190588235294118, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.9238, "step": 1356 }, { "epoch": 0.03192941176470588, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.9403, "step": 1357 }, { "epoch": 0.03195294117647059, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8969, "step": 1358 }, { "epoch": 0.031976470588235294, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.9566, "step": 1359 }, { "epoch": 0.032, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.8877, "step": 1360 }, { "epoch": 0.03202352941176471, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8933, "step": 1361 }, { "epoch": 0.032047058823529415, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.7583, "step": 1362 }, { "epoch": 0.032070588235294115, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8998, "step": 1363 }, { "epoch": 0.03209411764705882, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.9069, "step": 1364 }, { "epoch": 0.03211764705882353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.9893, "step": 1365 }, { "epoch": 0.032141176470588236, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8113, "step": 1366 }, { "epoch": 0.03216470588235294, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.9584, "step": 1367 }, { "epoch": 0.03218823529411765, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8457, "step": 1368 }, { "epoch": 0.032211764705882356, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8257, "step": 1369 }, { "epoch": 0.032235294117647056, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.9569, "step": 1370 }, { "epoch": 0.03225882352941176, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8158, "step": 1371 }, { "epoch": 0.03228235294117647, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.9479, "step": 1372 }, { "epoch": 0.03230588235294118, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 2.0768, "step": 1373 }, { "epoch": 0.032329411764705884, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.9014, "step": 1374 }, { "epoch": 0.03235294117647059, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.9558, "step": 1375 }, { "epoch": 0.03237647058823529, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.808, "step": 1376 }, { "epoch": 0.0324, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.9386, "step": 1377 }, { "epoch": 0.032423529411764705, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8793, "step": 1378 }, { "epoch": 0.03244705882352941, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.8397, "step": 1379 }, { "epoch": 0.03247058823529412, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 2.0215, "step": 1380 }, { "epoch": 0.032494117647058826, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.7505, "step": 1381 }, { "epoch": 0.032517647058823526, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8556, "step": 1382 }, { "epoch": 0.03254117647058823, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.8629, "step": 1383 }, { "epoch": 0.03256470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7135, "step": 1384 }, { "epoch": 0.03258823529411765, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.8359, "step": 1385 }, { "epoch": 0.032611764705882354, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.8277, "step": 1386 }, { "epoch": 0.03263529411764706, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.8116, "step": 1387 }, { "epoch": 0.03265882352941177, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.8104, "step": 1388 }, { "epoch": 0.03268235294117647, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7837, "step": 1389 }, { "epoch": 0.032705882352941175, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.841, "step": 1390 }, { "epoch": 0.03272941176470588, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.8835, "step": 1391 }, { "epoch": 0.03275294117647059, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.9048, "step": 1392 }, { "epoch": 0.032776470588235296, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8586, "step": 1393 }, { "epoch": 0.0328, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8034, "step": 1394 }, { "epoch": 0.0328235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.5946, "step": 1395 }, { "epoch": 0.03284705882352941, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 2.0251, "step": 1396 }, { "epoch": 0.03287058823529412, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8204, "step": 1397 }, { "epoch": 0.032894117647058824, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.7593, "step": 1398 }, { "epoch": 0.03291764705882353, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.9034, "step": 1399 }, { "epoch": 0.03294117647058824, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.8431, "step": 1400 }, { "epoch": 0.032964705882352945, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.8735, "step": 1401 }, { "epoch": 0.032988235294117645, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 2.0146, "step": 1402 }, { "epoch": 0.03301176470588235, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.886, "step": 1403 }, { "epoch": 0.03303529411764706, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8649, "step": 1404 }, { "epoch": 0.033058823529411765, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.7449, "step": 1405 }, { "epoch": 0.03308235294117647, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.9267, "step": 1406 }, { "epoch": 0.03310588235294118, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.9877, "step": 1407 }, { "epoch": 0.03312941176470588, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8087, "step": 1408 }, { "epoch": 0.033152941176470586, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8419, "step": 1409 }, { "epoch": 0.03317647058823529, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8992, "step": 1410 }, { "epoch": 0.0332, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 2.0818, "step": 1411 }, { "epoch": 0.03322352941176471, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.8694, "step": 1412 }, { "epoch": 0.033247058823529414, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.7578, "step": 1413 }, { "epoch": 0.033270588235294114, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8499, "step": 1414 }, { "epoch": 0.03329411764705882, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.747, "step": 1415 }, { "epoch": 0.03331764705882353, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.7318, "step": 1416 }, { "epoch": 0.033341176470588235, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.8938, "step": 1417 }, { "epoch": 0.03336470588235294, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.8873, "step": 1418 }, { "epoch": 0.03338823529411765, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.9225, "step": 1419 }, { "epoch": 0.033411764705882356, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.9327, "step": 1420 }, { "epoch": 0.033435294117647056, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.9287, "step": 1421 }, { "epoch": 0.03345882352941176, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.8321, "step": 1422 }, { "epoch": 0.03348235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.9743, "step": 1423 }, { "epoch": 0.03350588235294118, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.7627, "step": 1424 }, { "epoch": 0.033529411764705884, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8592, "step": 1425 }, { "epoch": 0.03355294117647059, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.6683, "step": 1426 }, { "epoch": 0.03357647058823529, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.833, "step": 1427 }, { "epoch": 0.0336, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8692, "step": 1428 }, { "epoch": 0.033623529411764705, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.8034, "step": 1429 }, { "epoch": 0.03364705882352941, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.9014, "step": 1430 }, { "epoch": 0.03367058823529412, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.7485, "step": 1431 }, { "epoch": 0.033694117647058826, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.9786, "step": 1432 }, { "epoch": 0.03371764705882353, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.9134, "step": 1433 }, { "epoch": 0.03374117647058823, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.889, "step": 1434 }, { "epoch": 0.03376470588235294, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.8892, "step": 1435 }, { "epoch": 0.03378823529411765, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.9979, "step": 1436 }, { "epoch": 0.033811764705882354, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.9049, "step": 1437 }, { "epoch": 0.03383529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.877, "step": 1438 }, { "epoch": 0.03385882352941177, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.907, "step": 1439 }, { "epoch": 0.03388235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7624, "step": 1440 }, { "epoch": 0.033905882352941175, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.6559, "step": 1441 }, { "epoch": 0.03392941176470588, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8333, "step": 1442 }, { "epoch": 0.03395294117647059, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8827, "step": 1443 }, { "epoch": 0.033976470588235295, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.9542, "step": 1444 }, { "epoch": 0.034, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.8911, "step": 1445 }, { "epoch": 0.0340235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.8066, "step": 1446 }, { "epoch": 0.03404705882352941, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.7652, "step": 1447 }, { "epoch": 0.034070588235294116, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.8634, "step": 1448 }, { "epoch": 0.03409411764705882, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8792, "step": 1449 }, { "epoch": 0.03411764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7992, "step": 1450 }, { "epoch": 0.03414117647058824, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6901, "step": 1451 }, { "epoch": 0.034164705882352944, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.9741, "step": 1452 }, { "epoch": 0.034188235294117644, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.7054, "step": 1453 }, { "epoch": 0.03421176470588235, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6302, "step": 1454 }, { "epoch": 0.03423529411764706, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.7145, "step": 1455 }, { "epoch": 0.034258823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8461, "step": 1456 }, { "epoch": 0.03428235294117647, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.8102, "step": 1457 }, { "epoch": 0.03430588235294118, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8036, "step": 1458 }, { "epoch": 0.03432941176470588, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.7898, "step": 1459 }, { "epoch": 0.034352941176470586, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.7085, "step": 1460 }, { "epoch": 0.03437647058823529, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8041, "step": 1461 }, { "epoch": 0.0344, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.9066, "step": 1462 }, { "epoch": 0.03442352941176471, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.9179, "step": 1463 }, { "epoch": 0.034447058823529414, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.8681, "step": 1464 }, { "epoch": 0.03447058823529412, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.7115, "step": 1465 }, { "epoch": 0.03449411764705882, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.9169, "step": 1466 }, { "epoch": 0.03451764705882353, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.9165, "step": 1467 }, { "epoch": 0.034541176470588235, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.8628, "step": 1468 }, { "epoch": 0.03456470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.8766, "step": 1469 }, { "epoch": 0.03458823529411765, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.9333, "step": 1470 }, { "epoch": 0.034611764705882356, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8325, "step": 1471 }, { "epoch": 0.034635294117647056, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.9774, "step": 1472 }, { "epoch": 0.03465882352941176, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8906, "step": 1473 }, { "epoch": 0.03468235294117647, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.7487, "step": 1474 }, { "epoch": 0.03470588235294118, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.8411, "step": 1475 }, { "epoch": 0.034729411764705884, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.7697, "step": 1476 }, { "epoch": 0.03475294117647059, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.8043, "step": 1477 }, { "epoch": 0.0347764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.839, "step": 1478 }, { "epoch": 0.0348, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7517, "step": 1479 }, { "epoch": 0.034823529411764705, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.851, "step": 1480 }, { "epoch": 0.03484705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8858, "step": 1481 }, { "epoch": 0.03487058823529412, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8501, "step": 1482 }, { "epoch": 0.034894117647058825, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8402, "step": 1483 }, { "epoch": 0.03491764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8463, "step": 1484 }, { "epoch": 0.03494117647058823, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.9314, "step": 1485 }, { "epoch": 0.03496470588235294, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.9673, "step": 1486 }, { "epoch": 0.034988235294117646, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.7734, "step": 1487 }, { "epoch": 0.03501176470588235, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.7383, "step": 1488 }, { "epoch": 0.03503529411764706, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.9822, "step": 1489 }, { "epoch": 0.03505882352941177, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8156, "step": 1490 }, { "epoch": 0.03508235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.8592, "step": 1491 }, { "epoch": 0.035105882352941174, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.709, "step": 1492 }, { "epoch": 0.03512941176470588, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.678, "step": 1493 }, { "epoch": 0.03515294117647059, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7386, "step": 1494 }, { "epoch": 0.035176470588235295, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.9468, "step": 1495 }, { "epoch": 0.0352, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6516, "step": 1496 }, { "epoch": 0.03522352941176471, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7478, "step": 1497 }, { "epoch": 0.03524705882352941, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7643, "step": 1498 }, { "epoch": 0.035270588235294116, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8045, "step": 1499 }, { "epoch": 0.03529411764705882, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8625, "step": 1500 }, { "epoch": 0.03531764705882353, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7011, "step": 1501 }, { "epoch": 0.03534117647058824, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.9256, "step": 1502 }, { "epoch": 0.035364705882352944, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.841, "step": 1503 }, { "epoch": 0.035388235294117644, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.8116, "step": 1504 }, { "epoch": 0.03541176470588235, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8596, "step": 1505 }, { "epoch": 0.03543529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.782, "step": 1506 }, { "epoch": 0.035458823529411765, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8814, "step": 1507 }, { "epoch": 0.03548235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.9823, "step": 1508 }, { "epoch": 0.03550588235294118, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.817, "step": 1509 }, { "epoch": 0.035529411764705886, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.7486, "step": 1510 }, { "epoch": 0.035552941176470586, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7581, "step": 1511 }, { "epoch": 0.03557647058823529, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.7578, "step": 1512 }, { "epoch": 0.0356, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.6568, "step": 1513 }, { "epoch": 0.03562352941176471, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.9138, "step": 1514 }, { "epoch": 0.035647058823529414, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8923, "step": 1515 }, { "epoch": 0.03567058823529412, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8502, "step": 1516 }, { "epoch": 0.03569411764705882, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.7559, "step": 1517 }, { "epoch": 0.03571764705882353, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.8343, "step": 1518 }, { "epoch": 0.035741176470588235, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.787, "step": 1519 }, { "epoch": 0.03576470588235294, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.7017, "step": 1520 }, { "epoch": 0.03578823529411765, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 2.0124, "step": 1521 }, { "epoch": 0.035811764705882355, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8259, "step": 1522 }, { "epoch": 0.035835294117647055, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.7354, "step": 1523 }, { "epoch": 0.03585882352941176, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.7633, "step": 1524 }, { "epoch": 0.03588235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.7913, "step": 1525 }, { "epoch": 0.035905882352941176, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.8911, "step": 1526 }, { "epoch": 0.03592941176470588, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7096, "step": 1527 }, { "epoch": 0.03595294117647059, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8543, "step": 1528 }, { "epoch": 0.0359764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.7425, "step": 1529 }, { "epoch": 0.036, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8122, "step": 1530 }, { "epoch": 0.036023529411764704, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8803, "step": 1531 }, { "epoch": 0.03604705882352941, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.8157, "step": 1532 }, { "epoch": 0.03607058823529412, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8255, "step": 1533 }, { "epoch": 0.036094117647058825, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.9734, "step": 1534 }, { "epoch": 0.03611764705882353, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.6169, "step": 1535 }, { "epoch": 0.03614117647058823, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.8097, "step": 1536 }, { "epoch": 0.03616470588235294, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7141, "step": 1537 }, { "epoch": 0.036188235294117646, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.6655, "step": 1538 }, { "epoch": 0.03621176470588235, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8344, "step": 1539 }, { "epoch": 0.03623529411764706, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.9494, "step": 1540 }, { "epoch": 0.03625882352941177, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.9202, "step": 1541 }, { "epoch": 0.036282352941176474, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.9247, "step": 1542 }, { "epoch": 0.036305882352941174, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.8813, "step": 1543 }, { "epoch": 0.03632941176470588, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.8104, "step": 1544 }, { "epoch": 0.03635294117647059, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6938, "step": 1545 }, { "epoch": 0.036376470588235295, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.8015, "step": 1546 }, { "epoch": 0.0364, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.6279, "step": 1547 }, { "epoch": 0.03642352941176471, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.912, "step": 1548 }, { "epoch": 0.03644705882352941, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8825, "step": 1549 }, { "epoch": 0.036470588235294116, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8035, "step": 1550 }, { "epoch": 0.03649411764705882, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.8851, "step": 1551 }, { "epoch": 0.03651764705882353, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8019, "step": 1552 }, { "epoch": 0.03654117647058824, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 2.0053, "step": 1553 }, { "epoch": 0.036564705882352944, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.8381, "step": 1554 }, { "epoch": 0.036588235294117644, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.8019, "step": 1555 }, { "epoch": 0.03661176470588235, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.8835, "step": 1556 }, { "epoch": 0.03663529411764706, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.6789, "step": 1557 }, { "epoch": 0.036658823529411765, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.6519, "step": 1558 }, { "epoch": 0.03668235294117647, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8392, "step": 1559 }, { "epoch": 0.03670588235294118, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.7167, "step": 1560 }, { "epoch": 0.036729411764705885, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.9657, "step": 1561 }, { "epoch": 0.036752941176470585, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.9614, "step": 1562 }, { "epoch": 0.03677647058823529, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.8661, "step": 1563 }, { "epoch": 0.0368, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.6673, "step": 1564 }, { "epoch": 0.036823529411764706, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.619, "step": 1565 }, { "epoch": 0.03684705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.9296, "step": 1566 }, { "epoch": 0.03687058823529412, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6427, "step": 1567 }, { "epoch": 0.03689411764705882, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.7524, "step": 1568 }, { "epoch": 0.03691764705882353, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.6458, "step": 1569 }, { "epoch": 0.036941176470588234, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.6635, "step": 1570 }, { "epoch": 0.03696470588235294, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.6952, "step": 1571 }, { "epoch": 0.03698823529411765, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.9219, "step": 1572 }, { "epoch": 0.037011764705882355, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.7922, "step": 1573 }, { "epoch": 0.03703529411764706, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.6855, "step": 1574 }, { "epoch": 0.03705882352941176, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.7392, "step": 1575 }, { "epoch": 0.03708235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.778, "step": 1576 }, { "epoch": 0.037105882352941176, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7095, "step": 1577 }, { "epoch": 0.03712941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8812, "step": 1578 }, { "epoch": 0.03715294117647059, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6939, "step": 1579 }, { "epoch": 0.0371764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.8686, "step": 1580 }, { "epoch": 0.0372, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.9545, "step": 1581 }, { "epoch": 0.037223529411764704, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8037, "step": 1582 }, { "epoch": 0.03724705882352941, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.756, "step": 1583 }, { "epoch": 0.03727058823529412, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7626, "step": 1584 }, { "epoch": 0.037294117647058825, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8703, "step": 1585 }, { "epoch": 0.03731764705882353, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7699, "step": 1586 }, { "epoch": 0.03734117647058824, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5686, "step": 1587 }, { "epoch": 0.03736470588235294, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.683, "step": 1588 }, { "epoch": 0.037388235294117646, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8856, "step": 1589 }, { "epoch": 0.03741176470588235, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.829, "step": 1590 }, { "epoch": 0.03743529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6599, "step": 1591 }, { "epoch": 0.03745882352941177, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 2.0123, "step": 1592 }, { "epoch": 0.037482352941176474, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.5786, "step": 1593 }, { "epoch": 0.037505882352941174, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.6509, "step": 1594 }, { "epoch": 0.03752941176470588, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.755, "step": 1595 }, { "epoch": 0.03755294117647059, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.719, "step": 1596 }, { "epoch": 0.037576470588235295, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.7484, "step": 1597 }, { "epoch": 0.0376, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.8189, "step": 1598 }, { "epoch": 0.03762352941176471, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.7057, "step": 1599 }, { "epoch": 0.03764705882352941, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.6573, "step": 1600 }, { "epoch": 0.037670588235294115, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.851, "step": 1601 }, { "epoch": 0.03769411764705882, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6904, "step": 1602 }, { "epoch": 0.03771764705882353, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7872, "step": 1603 }, { "epoch": 0.037741176470588236, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.6256, "step": 1604 }, { "epoch": 0.03776470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.5533, "step": 1605 }, { "epoch": 0.03778823529411765, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.9465, "step": 1606 }, { "epoch": 0.03781176470588235, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.5709, "step": 1607 }, { "epoch": 0.03783529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.6877, "step": 1608 }, { "epoch": 0.037858823529411764, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.7238, "step": 1609 }, { "epoch": 0.03788235294117647, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.7625, "step": 1610 }, { "epoch": 0.03790588235294118, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.8294, "step": 1611 }, { "epoch": 0.037929411764705885, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.8414, "step": 1612 }, { "epoch": 0.037952941176470585, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.9132, "step": 1613 }, { "epoch": 0.03797647058823529, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.8155, "step": 1614 }, { "epoch": 0.038, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.7573, "step": 1615 }, { "epoch": 0.038023529411764706, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.762, "step": 1616 }, { "epoch": 0.03804705882352941, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7399, "step": 1617 }, { "epoch": 0.03807058823529412, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6245, "step": 1618 }, { "epoch": 0.03809411764705883, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.6463, "step": 1619 }, { "epoch": 0.03811764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.7623, "step": 1620 }, { "epoch": 0.038141176470588234, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7834, "step": 1621 }, { "epoch": 0.03816470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.9126, "step": 1622 }, { "epoch": 0.03818823529411765, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.817, "step": 1623 }, { "epoch": 0.038211764705882355, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8729, "step": 1624 }, { "epoch": 0.03823529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.7729, "step": 1625 }, { "epoch": 0.03825882352941176, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7579, "step": 1626 }, { "epoch": 0.03828235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.7094, "step": 1627 }, { "epoch": 0.038305882352941176, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8712, "step": 1628 }, { "epoch": 0.03832941176470588, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7274, "step": 1629 }, { "epoch": 0.03835294117647059, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.7634, "step": 1630 }, { "epoch": 0.0383764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.7714, "step": 1631 }, { "epoch": 0.0384, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.834, "step": 1632 }, { "epoch": 0.038423529411764704, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.7503, "step": 1633 }, { "epoch": 0.03844705882352941, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.9602, "step": 1634 }, { "epoch": 0.03847058823529412, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7399, "step": 1635 }, { "epoch": 0.038494117647058824, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.6096, "step": 1636 }, { "epoch": 0.03851764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.917, "step": 1637 }, { "epoch": 0.03854117647058824, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7724, "step": 1638 }, { "epoch": 0.03856470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.849, "step": 1639 }, { "epoch": 0.038588235294117645, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.7389, "step": 1640 }, { "epoch": 0.03861176470588235, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.7321, "step": 1641 }, { "epoch": 0.03863529411764706, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.9412, "step": 1642 }, { "epoch": 0.038658823529411766, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7599, "step": 1643 }, { "epoch": 0.03868235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.752, "step": 1644 }, { "epoch": 0.03870588235294117, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.9771, "step": 1645 }, { "epoch": 0.03872941176470588, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.7148, "step": 1646 }, { "epoch": 0.03875294117647059, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.7313, "step": 1647 }, { "epoch": 0.038776470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.8099, "step": 1648 }, { "epoch": 0.0388, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.8597, "step": 1649 }, { "epoch": 0.03882352941176471, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5945, "step": 1650 }, { "epoch": 0.038847058823529415, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6459, "step": 1651 }, { "epoch": 0.038870588235294115, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6166, "step": 1652 }, { "epoch": 0.03889411764705882, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.8168, "step": 1653 }, { "epoch": 0.03891764705882353, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8002, "step": 1654 }, { "epoch": 0.038941176470588236, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.7018, "step": 1655 }, { "epoch": 0.03896470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.7522, "step": 1656 }, { "epoch": 0.03898823529411765, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8522, "step": 1657 }, { "epoch": 0.03901176470588235, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.6938, "step": 1658 }, { "epoch": 0.03903529411764706, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8438, "step": 1659 }, { "epoch": 0.039058823529411764, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8874, "step": 1660 }, { "epoch": 0.03908235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.697, "step": 1661 }, { "epoch": 0.03910588235294118, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.6171, "step": 1662 }, { "epoch": 0.039129411764705885, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.9464, "step": 1663 }, { "epoch": 0.039152941176470585, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8553, "step": 1664 }, { "epoch": 0.03917647058823529, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.845, "step": 1665 }, { "epoch": 0.0392, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.6808, "step": 1666 }, { "epoch": 0.039223529411764706, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.7192, "step": 1667 }, { "epoch": 0.03924705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8622, "step": 1668 }, { "epoch": 0.03927058823529412, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.7091, "step": 1669 }, { "epoch": 0.03929411764705883, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.6621, "step": 1670 }, { "epoch": 0.03931764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.64, "step": 1671 }, { "epoch": 0.039341176470588234, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.7286, "step": 1672 }, { "epoch": 0.03936470588235294, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.8421, "step": 1673 }, { "epoch": 0.03938823529411765, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.8589, "step": 1674 }, { "epoch": 0.039411764705882354, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.8814, "step": 1675 }, { "epoch": 0.03943529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.603, "step": 1676 }, { "epoch": 0.03945882352941176, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.856, "step": 1677 }, { "epoch": 0.03948235294117647, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7699, "step": 1678 }, { "epoch": 0.039505882352941175, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.6377, "step": 1679 }, { "epoch": 0.03952941176470588, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.9513, "step": 1680 }, { "epoch": 0.03955294117647059, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7575, "step": 1681 }, { "epoch": 0.039576470588235296, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7494, "step": 1682 }, { "epoch": 0.0396, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 2.0159, "step": 1683 }, { "epoch": 0.0396235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8192, "step": 1684 }, { "epoch": 0.03964705882352941, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6582, "step": 1685 }, { "epoch": 0.03967058823529412, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7685, "step": 1686 }, { "epoch": 0.039694117647058824, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7971, "step": 1687 }, { "epoch": 0.03971764705882353, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.7382, "step": 1688 }, { "epoch": 0.03974117647058824, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.72, "step": 1689 }, { "epoch": 0.03976470588235294, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7329, "step": 1690 }, { "epoch": 0.039788235294117645, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.7099, "step": 1691 }, { "epoch": 0.03981176470588235, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8635, "step": 1692 }, { "epoch": 0.03983529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.7404, "step": 1693 }, { "epoch": 0.039858823529411766, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.6337, "step": 1694 }, { "epoch": 0.03988235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.7272, "step": 1695 }, { "epoch": 0.03990588235294118, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.5986, "step": 1696 }, { "epoch": 0.03992941176470588, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.9215, "step": 1697 }, { "epoch": 0.03995294117647059, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.7491, "step": 1698 }, { "epoch": 0.039976470588235294, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.7596, "step": 1699 }, { "epoch": 0.04, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.6718, "step": 1700 }, { "epoch": 0.04002352941176471, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.7229, "step": 1701 }, { "epoch": 0.040047058823529415, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8041, "step": 1702 }, { "epoch": 0.040070588235294115, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.695, "step": 1703 }, { "epoch": 0.04009411764705882, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.6359, "step": 1704 }, { "epoch": 0.04011764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8555, "step": 1705 }, { "epoch": 0.040141176470588236, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7762, "step": 1706 }, { "epoch": 0.04016470588235294, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.7366, "step": 1707 }, { "epoch": 0.04018823529411765, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.7424, "step": 1708 }, { "epoch": 0.04021176470588235, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7558, "step": 1709 }, { "epoch": 0.04023529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.7456, "step": 1710 }, { "epoch": 0.040258823529411764, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.7561, "step": 1711 }, { "epoch": 0.04028235294117647, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7751, "step": 1712 }, { "epoch": 0.04030588235294118, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.718, "step": 1713 }, { "epoch": 0.040329411764705884, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.608, "step": 1714 }, { "epoch": 0.04035294117647059, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.7623, "step": 1715 }, { "epoch": 0.04037647058823529, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.8498, "step": 1716 }, { "epoch": 0.0404, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8028, "step": 1717 }, { "epoch": 0.040423529411764705, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7073, "step": 1718 }, { "epoch": 0.04044705882352941, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.8818, "step": 1719 }, { "epoch": 0.04047058823529412, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.8928, "step": 1720 }, { "epoch": 0.040494117647058826, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.7961, "step": 1721 }, { "epoch": 0.040517647058823526, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8332, "step": 1722 }, { "epoch": 0.04054117647058823, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8113, "step": 1723 }, { "epoch": 0.04056470588235294, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7196, "step": 1724 }, { "epoch": 0.04058823529411765, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.8356, "step": 1725 }, { "epoch": 0.040611764705882354, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.7765, "step": 1726 }, { "epoch": 0.04063529411764706, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.7946, "step": 1727 }, { "epoch": 0.04065882352941177, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.7002, "step": 1728 }, { "epoch": 0.04068235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6871, "step": 1729 }, { "epoch": 0.040705882352941175, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8279, "step": 1730 }, { "epoch": 0.04072941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6798, "step": 1731 }, { "epoch": 0.04075294117647059, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7944, "step": 1732 }, { "epoch": 0.040776470588235296, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.7252, "step": 1733 }, { "epoch": 0.0408, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8188, "step": 1734 }, { "epoch": 0.0408235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.669, "step": 1735 }, { "epoch": 0.04084705882352941, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.5971, "step": 1736 }, { "epoch": 0.04087058823529412, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.7152, "step": 1737 }, { "epoch": 0.040894117647058824, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.6852, "step": 1738 }, { "epoch": 0.04091764705882353, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.9235, "step": 1739 }, { "epoch": 0.04094117647058824, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.8056, "step": 1740 }, { "epoch": 0.04096470588235294, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.9228, "step": 1741 }, { "epoch": 0.040988235294117645, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.6506, "step": 1742 }, { "epoch": 0.04101176470588235, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.9238, "step": 1743 }, { "epoch": 0.04103529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.8973, "step": 1744 }, { "epoch": 0.041058823529411766, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.8516, "step": 1745 }, { "epoch": 0.04108235294117647, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7167, "step": 1746 }, { "epoch": 0.04110588235294118, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7077, "step": 1747 }, { "epoch": 0.04112941176470588, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 2.0164, "step": 1748 }, { "epoch": 0.04115294117647059, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.8412, "step": 1749 }, { "epoch": 0.041176470588235294, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.6344, "step": 1750 }, { "epoch": 0.0412, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.7844, "step": 1751 }, { "epoch": 0.04122352941176471, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 2.0221, "step": 1752 }, { "epoch": 0.041247058823529414, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7453, "step": 1753 }, { "epoch": 0.041270588235294114, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.7363, "step": 1754 }, { "epoch": 0.04129411764705882, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.9846, "step": 1755 }, { "epoch": 0.04131764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.6048, "step": 1756 }, { "epoch": 0.041341176470588235, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.7354, "step": 1757 }, { "epoch": 0.04136470588235294, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.7062, "step": 1758 }, { "epoch": 0.04138823529411765, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.6621, "step": 1759 }, { "epoch": 0.041411764705882356, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.557, "step": 1760 }, { "epoch": 0.041435294117647056, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.7535, "step": 1761 }, { "epoch": 0.04145882352941176, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.8961, "step": 1762 }, { "epoch": 0.04148235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7942, "step": 1763 }, { "epoch": 0.04150588235294118, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.7048, "step": 1764 }, { "epoch": 0.041529411764705884, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6849, "step": 1765 }, { "epoch": 0.04155294117647059, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.6511, "step": 1766 }, { "epoch": 0.04157647058823529, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.7932, "step": 1767 }, { "epoch": 0.0416, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8086, "step": 1768 }, { "epoch": 0.041623529411764705, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.9317, "step": 1769 }, { "epoch": 0.04164705882352941, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.7543, "step": 1770 }, { "epoch": 0.04167058823529412, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.7038, "step": 1771 }, { "epoch": 0.041694117647058826, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7436, "step": 1772 }, { "epoch": 0.041717647058823526, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.6342, "step": 1773 }, { "epoch": 0.04174117647058823, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.6748, "step": 1774 }, { "epoch": 0.04176470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8443, "step": 1775 }, { "epoch": 0.04178823529411765, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8179, "step": 1776 }, { "epoch": 0.041811764705882354, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8081, "step": 1777 }, { "epoch": 0.04183529411764706, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.6166, "step": 1778 }, { "epoch": 0.04185882352941177, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.8144, "step": 1779 }, { "epoch": 0.04188235294117647, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5059, "step": 1780 }, { "epoch": 0.041905882352941175, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.5508, "step": 1781 }, { "epoch": 0.04192941176470588, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.6734, "step": 1782 }, { "epoch": 0.04195294117647059, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.5736, "step": 1783 }, { "epoch": 0.041976470588235296, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.7523, "step": 1784 }, { "epoch": 0.042, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.6223, "step": 1785 }, { "epoch": 0.0420235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8251, "step": 1786 }, { "epoch": 0.04204705882352941, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.896, "step": 1787 }, { "epoch": 0.04207058823529412, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.787, "step": 1788 }, { "epoch": 0.042094117647058824, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.793, "step": 1789 }, { "epoch": 0.04211764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.5486, "step": 1790 }, { "epoch": 0.04214117647058824, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.8334, "step": 1791 }, { "epoch": 0.042164705882352944, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.9185, "step": 1792 }, { "epoch": 0.042188235294117644, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7309, "step": 1793 }, { "epoch": 0.04221176470588235, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8251, "step": 1794 }, { "epoch": 0.04223529411764706, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7706, "step": 1795 }, { "epoch": 0.042258823529411765, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.572, "step": 1796 }, { "epoch": 0.04228235294117647, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7351, "step": 1797 }, { "epoch": 0.04230588235294118, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.6562, "step": 1798 }, { "epoch": 0.04232941176470588, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.6811, "step": 1799 }, { "epoch": 0.042352941176470586, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.8185, "step": 1800 }, { "epoch": 0.04237647058823529, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.6458, "step": 1801 }, { "epoch": 0.0424, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.7266, "step": 1802 }, { "epoch": 0.04242352941176471, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7895, "step": 1803 }, { "epoch": 0.042447058823529414, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7364, "step": 1804 }, { "epoch": 0.04247058823529412, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.8823, "step": 1805 }, { "epoch": 0.04249411764705882, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.5452, "step": 1806 }, { "epoch": 0.04251764705882353, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.8889, "step": 1807 }, { "epoch": 0.042541176470588235, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.9327, "step": 1808 }, { "epoch": 0.04256470588235294, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6519, "step": 1809 }, { "epoch": 0.04258823529411765, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.6755, "step": 1810 }, { "epoch": 0.042611764705882356, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.4529, "step": 1811 }, { "epoch": 0.042635294117647056, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.7593, "step": 1812 }, { "epoch": 0.04265882352941176, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.9076, "step": 1813 }, { "epoch": 0.04268235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.7374, "step": 1814 }, { "epoch": 0.04270588235294118, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.6895, "step": 1815 }, { "epoch": 0.042729411764705884, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.555, "step": 1816 }, { "epoch": 0.04275294117647059, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.6041, "step": 1817 }, { "epoch": 0.04277647058823529, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.5219, "step": 1818 }, { "epoch": 0.0428, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7913, "step": 1819 }, { "epoch": 0.042823529411764705, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8167, "step": 1820 }, { "epoch": 0.04284705882352941, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7688, "step": 1821 }, { "epoch": 0.04287058823529412, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.7961, "step": 1822 }, { "epoch": 0.042894117647058826, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7308, "step": 1823 }, { "epoch": 0.04291764705882353, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.6021, "step": 1824 }, { "epoch": 0.04294117647058823, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.5424, "step": 1825 }, { "epoch": 0.04296470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.712, "step": 1826 }, { "epoch": 0.04298823529411765, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.6622, "step": 1827 }, { "epoch": 0.043011764705882354, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.7609, "step": 1828 }, { "epoch": 0.04303529411764706, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7132, "step": 1829 }, { "epoch": 0.04305882352941177, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.634, "step": 1830 }, { "epoch": 0.04308235294117647, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7223, "step": 1831 }, { "epoch": 0.043105882352941174, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6728, "step": 1832 }, { "epoch": 0.04312941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6125, "step": 1833 }, { "epoch": 0.04315294117647059, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.6886, "step": 1834 }, { "epoch": 0.043176470588235295, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.7982, "step": 1835 }, { "epoch": 0.0432, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.827, "step": 1836 }, { "epoch": 0.04322352941176471, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.9562, "step": 1837 }, { "epoch": 0.04324705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.7239, "step": 1838 }, { "epoch": 0.043270588235294116, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8785, "step": 1839 }, { "epoch": 0.04329411764705882, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8146, "step": 1840 }, { "epoch": 0.04331764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.8746, "step": 1841 }, { "epoch": 0.04334117647058824, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.6913, "step": 1842 }, { "epoch": 0.043364705882352944, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7639, "step": 1843 }, { "epoch": 0.043388235294117644, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.8306, "step": 1844 }, { "epoch": 0.04341176470588235, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.7732, "step": 1845 }, { "epoch": 0.04343529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.7088, "step": 1846 }, { "epoch": 0.043458823529411765, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8005, "step": 1847 }, { "epoch": 0.04348235294117647, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.6405, "step": 1848 }, { "epoch": 0.04350588235294118, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.6081, "step": 1849 }, { "epoch": 0.04352941176470588, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.7342, "step": 1850 }, { "epoch": 0.043552941176470586, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.6723, "step": 1851 }, { "epoch": 0.04357647058823529, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.75, "step": 1852 }, { "epoch": 0.0436, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.5895, "step": 1853 }, { "epoch": 0.04362352941176471, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.9268, "step": 1854 }, { "epoch": 0.043647058823529414, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.6062, "step": 1855 }, { "epoch": 0.04367058823529412, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.6705, "step": 1856 }, { "epoch": 0.04369411764705882, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.6328, "step": 1857 }, { "epoch": 0.04371764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.6232, "step": 1858 }, { "epoch": 0.043741176470588235, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.7068, "step": 1859 }, { "epoch": 0.04376470588235294, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.5679, "step": 1860 }, { "epoch": 0.04378823529411765, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.7174, "step": 1861 }, { "epoch": 0.043811764705882356, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.8167, "step": 1862 }, { "epoch": 0.043835294117647056, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.6732, "step": 1863 }, { "epoch": 0.04385882352941176, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.6085, "step": 1864 }, { "epoch": 0.04388235294117647, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.7985, "step": 1865 }, { "epoch": 0.043905882352941177, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.827, "step": 1866 }, { "epoch": 0.043929411764705883, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8385, "step": 1867 }, { "epoch": 0.04395294117647059, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.9128, "step": 1868 }, { "epoch": 0.0439764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.5851, "step": 1869 }, { "epoch": 0.044, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.8933, "step": 1870 }, { "epoch": 0.044023529411764704, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6939, "step": 1871 }, { "epoch": 0.04404705882352941, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.7504, "step": 1872 }, { "epoch": 0.04407058823529412, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.6518, "step": 1873 }, { "epoch": 0.044094117647058825, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.7169, "step": 1874 }, { "epoch": 0.04411764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.815, "step": 1875 }, { "epoch": 0.04414117647058823, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.8283, "step": 1876 }, { "epoch": 0.04416470588235294, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.9374, "step": 1877 }, { "epoch": 0.044188235294117646, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.518, "step": 1878 }, { "epoch": 0.04421176470588235, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7628, "step": 1879 }, { "epoch": 0.04423529411764706, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.6043, "step": 1880 }, { "epoch": 0.04425882352941177, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8122, "step": 1881 }, { "epoch": 0.04428235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.6895, "step": 1882 }, { "epoch": 0.044305882352941174, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.6294, "step": 1883 }, { "epoch": 0.04432941176470588, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.8311, "step": 1884 }, { "epoch": 0.04435294117647059, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7962, "step": 1885 }, { "epoch": 0.044376470588235295, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.7985, "step": 1886 }, { "epoch": 0.0444, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.8256, "step": 1887 }, { "epoch": 0.04442352941176471, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.7376, "step": 1888 }, { "epoch": 0.04444705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.7068, "step": 1889 }, { "epoch": 0.044470588235294116, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8372, "step": 1890 }, { "epoch": 0.04449411764705882, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.8333, "step": 1891 }, { "epoch": 0.04451764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6861, "step": 1892 }, { "epoch": 0.04454117647058824, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.7761, "step": 1893 }, { "epoch": 0.044564705882352944, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8767, "step": 1894 }, { "epoch": 0.044588235294117644, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4273, "step": 1895 }, { "epoch": 0.04461176470588235, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.6447, "step": 1896 }, { "epoch": 0.04463529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.6893, "step": 1897 }, { "epoch": 0.044658823529411765, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3945, "step": 1898 }, { "epoch": 0.04468235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.6837, "step": 1899 }, { "epoch": 0.04470588235294118, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.8064, "step": 1900 }, { "epoch": 0.044729411764705886, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.8655, "step": 1901 }, { "epoch": 0.044752941176470586, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.8871, "step": 1902 }, { "epoch": 0.04477647058823529, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.9252, "step": 1903 }, { "epoch": 0.0448, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4706, "step": 1904 }, { "epoch": 0.044823529411764707, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5738, "step": 1905 }, { "epoch": 0.044847058823529413, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.7938, "step": 1906 }, { "epoch": 0.04487058823529412, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.9, "step": 1907 }, { "epoch": 0.04489411764705882, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.9591, "step": 1908 }, { "epoch": 0.04491764705882353, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.7051, "step": 1909 }, { "epoch": 0.044941176470588234, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.7355, "step": 1910 }, { "epoch": 0.04496470588235294, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.6996, "step": 1911 }, { "epoch": 0.04498823529411765, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7551, "step": 1912 }, { "epoch": 0.045011764705882355, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.4988, "step": 1913 }, { "epoch": 0.04503529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7678, "step": 1914 }, { "epoch": 0.04505882352941176, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.7003, "step": 1915 }, { "epoch": 0.04508235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.7063, "step": 1916 }, { "epoch": 0.045105882352941176, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.6948, "step": 1917 }, { "epoch": 0.04512941176470588, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.7859, "step": 1918 }, { "epoch": 0.04515294117647059, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.6116, "step": 1919 }, { "epoch": 0.0451764705882353, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.6542, "step": 1920 }, { "epoch": 0.0452, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7784, "step": 1921 }, { "epoch": 0.045223529411764704, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.7128, "step": 1922 }, { "epoch": 0.04524705882352941, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.8106, "step": 1923 }, { "epoch": 0.04527058823529412, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.8393, "step": 1924 }, { "epoch": 0.045294117647058825, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.5892, "step": 1925 }, { "epoch": 0.04531764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.5395, "step": 1926 }, { "epoch": 0.04534117647058823, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.4699, "step": 1927 }, { "epoch": 0.04536470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7973, "step": 1928 }, { "epoch": 0.045388235294117646, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4265, "step": 1929 }, { "epoch": 0.04541176470588235, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.646, "step": 1930 }, { "epoch": 0.04543529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6654, "step": 1931 }, { "epoch": 0.04545882352941177, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.4828, "step": 1932 }, { "epoch": 0.045482352941176474, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7838, "step": 1933 }, { "epoch": 0.045505882352941174, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.6654, "step": 1934 }, { "epoch": 0.04552941176470588, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6904, "step": 1935 }, { "epoch": 0.04555294117647059, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.6971, "step": 1936 }, { "epoch": 0.045576470588235295, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.7965, "step": 1937 }, { "epoch": 0.0456, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.8097, "step": 1938 }, { "epoch": 0.04562352941176471, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.8018, "step": 1939 }, { "epoch": 0.04564705882352941, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.8416, "step": 1940 }, { "epoch": 0.045670588235294116, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.7306, "step": 1941 }, { "epoch": 0.04569411764705882, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6124, "step": 1942 }, { "epoch": 0.04571764705882353, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.7455, "step": 1943 }, { "epoch": 0.045741176470588236, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7733, "step": 1944 }, { "epoch": 0.04576470588235294, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.8008, "step": 1945 }, { "epoch": 0.04578823529411765, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6879, "step": 1946 }, { "epoch": 0.04581176470588235, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8412, "step": 1947 }, { "epoch": 0.04583529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6983, "step": 1948 }, { "epoch": 0.045858823529411764, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.8291, "step": 1949 }, { "epoch": 0.04588235294117647, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.4111, "step": 1950 }, { "epoch": 0.04590588235294118, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7158, "step": 1951 }, { "epoch": 0.045929411764705885, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.8756, "step": 1952 }, { "epoch": 0.045952941176470585, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.5916, "step": 1953 }, { "epoch": 0.04597647058823529, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.8903, "step": 1954 }, { "epoch": 0.046, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.5145, "step": 1955 }, { "epoch": 0.046023529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.6289, "step": 1956 }, { "epoch": 0.04604705882352941, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8445, "step": 1957 }, { "epoch": 0.04607058823529412, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.6467, "step": 1958 }, { "epoch": 0.04609411764705882, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.5227, "step": 1959 }, { "epoch": 0.04611764705882353, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7938, "step": 1960 }, { "epoch": 0.046141176470588234, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7994, "step": 1961 }, { "epoch": 0.04616470588235294, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.7311, "step": 1962 }, { "epoch": 0.04618823529411765, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7886, "step": 1963 }, { "epoch": 0.046211764705882355, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.6314, "step": 1964 }, { "epoch": 0.04623529411764706, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.6972, "step": 1965 }, { "epoch": 0.04625882352941176, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.849, "step": 1966 }, { "epoch": 0.04628235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6085, "step": 1967 }, { "epoch": 0.046305882352941176, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.604, "step": 1968 }, { "epoch": 0.04632941176470588, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.6546, "step": 1969 }, { "epoch": 0.04635294117647059, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.6, "step": 1970 }, { "epoch": 0.0463764705882353, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.6286, "step": 1971 }, { "epoch": 0.0464, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.5962, "step": 1972 }, { "epoch": 0.046423529411764704, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.6415, "step": 1973 }, { "epoch": 0.04644705882352941, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.6281, "step": 1974 }, { "epoch": 0.04647058823529412, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7965, "step": 1975 }, { "epoch": 0.046494117647058825, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6771, "step": 1976 }, { "epoch": 0.04651764705882353, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.6782, "step": 1977 }, { "epoch": 0.04654117647058824, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.6681, "step": 1978 }, { "epoch": 0.04656470588235294, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7492, "step": 1979 }, { "epoch": 0.046588235294117646, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.9723, "step": 1980 }, { "epoch": 0.04661176470588235, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3911, "step": 1981 }, { "epoch": 0.04663529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4312, "step": 1982 }, { "epoch": 0.046658823529411766, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.6826, "step": 1983 }, { "epoch": 0.04668235294117647, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4577, "step": 1984 }, { "epoch": 0.04670588235294117, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.8454, "step": 1985 }, { "epoch": 0.04672941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.7134, "step": 1986 }, { "epoch": 0.04675294117647059, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.6398, "step": 1987 }, { "epoch": 0.046776470588235294, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.874, "step": 1988 }, { "epoch": 0.0468, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.7384, "step": 1989 }, { "epoch": 0.04682352941176471, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7503, "step": 1990 }, { "epoch": 0.04684705882352941, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7857, "step": 1991 }, { "epoch": 0.046870588235294115, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.6339, "step": 1992 }, { "epoch": 0.04689411764705882, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.6722, "step": 1993 }, { "epoch": 0.04691764705882353, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.8102, "step": 1994 }, { "epoch": 0.046941176470588236, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.6712, "step": 1995 }, { "epoch": 0.04696470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6506, "step": 1996 }, { "epoch": 0.04698823529411765, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.708, "step": 1997 }, { "epoch": 0.04701176470588235, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.7189, "step": 1998 }, { "epoch": 0.04703529411764706, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8909, "step": 1999 }, { "epoch": 0.047058823529411764, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.6441, "step": 2000 }, { "epoch": 0.047058823529411764, "eval_loss": 2.263021469116211, "eval_runtime": 676.2315, "eval_samples_per_second": 12.57, "eval_steps_per_second": 3.142, "step": 2000 }, { "epoch": 0.04708235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7192, "step": 2001 }, { "epoch": 0.04710588235294118, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.5818, "step": 2002 }, { "epoch": 0.047129411764705885, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.7346, "step": 2003 }, { "epoch": 0.047152941176470585, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6061, "step": 2004 }, { "epoch": 0.04717647058823529, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.5946, "step": 2005 }, { "epoch": 0.0472, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.8624, "step": 2006 }, { "epoch": 0.047223529411764706, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7715, "step": 2007 }, { "epoch": 0.04724705882352941, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.8046, "step": 2008 }, { "epoch": 0.04727058823529412, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.8892, "step": 2009 }, { "epoch": 0.04729411764705883, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.7292, "step": 2010 }, { "epoch": 0.04731764705882353, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.8077, "step": 2011 }, { "epoch": 0.047341176470588234, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.789, "step": 2012 }, { "epoch": 0.04736470588235294, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.79, "step": 2013 }, { "epoch": 0.04738823529411765, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.771, "step": 2014 }, { "epoch": 0.047411764705882355, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.5452, "step": 2015 }, { "epoch": 0.04743529411764706, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.7471, "step": 2016 }, { "epoch": 0.04745882352941176, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.6742, "step": 2017 }, { "epoch": 0.04748235294117647, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.6303, "step": 2018 }, { "epoch": 0.047505882352941176, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4539, "step": 2019 }, { "epoch": 0.04752941176470588, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.6728, "step": 2020 }, { "epoch": 0.04755294117647059, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.6091, "step": 2021 }, { "epoch": 0.047576470588235296, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.6893, "step": 2022 }, { "epoch": 0.0476, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.6719, "step": 2023 }, { "epoch": 0.0476235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7912, "step": 2024 }, { "epoch": 0.04764705882352941, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.7486, "step": 2025 }, { "epoch": 0.04767058823529412, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.5003, "step": 2026 }, { "epoch": 0.047694117647058824, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.6424, "step": 2027 }, { "epoch": 0.04771764705882353, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.6649, "step": 2028 }, { "epoch": 0.04774117647058824, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.5891, "step": 2029 }, { "epoch": 0.04776470588235294, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.7431, "step": 2030 }, { "epoch": 0.047788235294117645, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.8176, "step": 2031 }, { "epoch": 0.04781176470588235, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7544, "step": 2032 }, { "epoch": 0.04783529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.5835, "step": 2033 }, { "epoch": 0.047858823529411766, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5895, "step": 2034 }, { "epoch": 0.04788235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6684, "step": 2035 }, { "epoch": 0.04790588235294117, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7845, "step": 2036 }, { "epoch": 0.04792941176470588, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.6105, "step": 2037 }, { "epoch": 0.04795294117647059, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.7949, "step": 2038 }, { "epoch": 0.047976470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.6844, "step": 2039 }, { "epoch": 0.048, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.6095, "step": 2040 }, { "epoch": 0.04802352941176471, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.7058, "step": 2041 }, { "epoch": 0.048047058823529415, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7747, "step": 2042 }, { "epoch": 0.048070588235294115, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.4196, "step": 2043 }, { "epoch": 0.04809411764705882, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.6599, "step": 2044 }, { "epoch": 0.04811764705882353, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7213, "step": 2045 }, { "epoch": 0.048141176470588236, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.7273, "step": 2046 }, { "epoch": 0.04816470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.7196, "step": 2047 }, { "epoch": 0.04818823529411765, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7889, "step": 2048 }, { "epoch": 0.04821176470588235, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.7086, "step": 2049 }, { "epoch": 0.04823529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.6154, "step": 2050 }, { "epoch": 0.048258823529411764, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.7005, "step": 2051 }, { "epoch": 0.04828235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.654, "step": 2052 }, { "epoch": 0.04830588235294118, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8394, "step": 2053 }, { "epoch": 0.048329411764705885, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6701, "step": 2054 }, { "epoch": 0.04835294117647059, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.6679, "step": 2055 }, { "epoch": 0.04837647058823529, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.7906, "step": 2056 }, { "epoch": 0.0484, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.6368, "step": 2057 }, { "epoch": 0.048423529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.681, "step": 2058 }, { "epoch": 0.04844705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.7783, "step": 2059 }, { "epoch": 0.04847058823529412, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7206, "step": 2060 }, { "epoch": 0.048494117647058826, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.9258, "step": 2061 }, { "epoch": 0.048517647058823526, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.6462, "step": 2062 }, { "epoch": 0.04854117647058823, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.6584, "step": 2063 }, { "epoch": 0.04856470588235294, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.6514, "step": 2064 }, { "epoch": 0.04858823529411765, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.6638, "step": 2065 }, { "epoch": 0.048611764705882354, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.6079, "step": 2066 }, { "epoch": 0.04863529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.6951, "step": 2067 }, { "epoch": 0.04865882352941176, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.5287, "step": 2068 }, { "epoch": 0.04868235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.6251, "step": 2069 }, { "epoch": 0.048705882352941175, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.7078, "step": 2070 }, { "epoch": 0.04872941176470588, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.6178, "step": 2071 }, { "epoch": 0.04875294117647059, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.7471, "step": 2072 }, { "epoch": 0.048776470588235296, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.6949, "step": 2073 }, { "epoch": 0.0488, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8773, "step": 2074 }, { "epoch": 0.0488235294117647, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.7017, "step": 2075 }, { "epoch": 0.04884705882352941, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5856, "step": 2076 }, { "epoch": 0.04887058823529412, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.6496, "step": 2077 }, { "epoch": 0.048894117647058824, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.6763, "step": 2078 }, { "epoch": 0.04891764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.6183, "step": 2079 }, { "epoch": 0.04894117647058824, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7232, "step": 2080 }, { "epoch": 0.04896470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.7327, "step": 2081 }, { "epoch": 0.048988235294117645, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.5099, "step": 2082 }, { "epoch": 0.04901176470588235, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6583, "step": 2083 }, { "epoch": 0.04903529411764706, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.6489, "step": 2084 }, { "epoch": 0.049058823529411766, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.6374, "step": 2085 }, { "epoch": 0.04908235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4352, "step": 2086 }, { "epoch": 0.04910588235294118, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6706, "step": 2087 }, { "epoch": 0.04912941176470588, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.6737, "step": 2088 }, { "epoch": 0.04915294117647059, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7219, "step": 2089 }, { "epoch": 0.049176470588235294, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.6221, "step": 2090 }, { "epoch": 0.0492, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.631, "step": 2091 }, { "epoch": 0.04922352941176471, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6467, "step": 2092 }, { "epoch": 0.049247058823529415, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.5579, "step": 2093 }, { "epoch": 0.049270588235294115, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.6748, "step": 2094 }, { "epoch": 0.04929411764705882, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5768, "step": 2095 }, { "epoch": 0.04931764705882353, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.6541, "step": 2096 }, { "epoch": 0.049341176470588236, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.7136, "step": 2097 }, { "epoch": 0.04936470588235294, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.4011, "step": 2098 }, { "epoch": 0.04938823529411765, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.3921, "step": 2099 }, { "epoch": 0.04941176470588235, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.5924, "step": 2100 }, { "epoch": 0.049435294117647056, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.7148, "step": 2101 }, { "epoch": 0.04945882352941176, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.7045, "step": 2102 }, { "epoch": 0.04948235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6938, "step": 2103 }, { "epoch": 0.04950588235294118, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6452, "step": 2104 }, { "epoch": 0.049529411764705884, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.7483, "step": 2105 }, { "epoch": 0.04955294117647059, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.542, "step": 2106 }, { "epoch": 0.04957647058823529, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.6839, "step": 2107 }, { "epoch": 0.0496, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7769, "step": 2108 }, { "epoch": 0.049623529411764705, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.6568, "step": 2109 }, { "epoch": 0.04964705882352941, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.7362, "step": 2110 }, { "epoch": 0.04967058823529412, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.6851, "step": 2111 }, { "epoch": 0.049694117647058826, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.6514, "step": 2112 }, { "epoch": 0.049717647058823526, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.6557, "step": 2113 }, { "epoch": 0.04974117647058823, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.7111, "step": 2114 }, { "epoch": 0.04976470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8645, "step": 2115 }, { "epoch": 0.04978823529411765, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5223, "step": 2116 }, { "epoch": 0.049811764705882354, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.8329, "step": 2117 }, { "epoch": 0.04983529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7049, "step": 2118 }, { "epoch": 0.04985882352941177, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.6561, "step": 2119 }, { "epoch": 0.04988235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.5797, "step": 2120 }, { "epoch": 0.049905882352941175, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.6962, "step": 2121 }, { "epoch": 0.04992941176470588, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8252, "step": 2122 }, { "epoch": 0.04995294117647059, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.5319, "step": 2123 }, { "epoch": 0.049976470588235296, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.5843, "step": 2124 }, { "epoch": 0.05, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6235, "step": 2125 }, { "epoch": 0.0500235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.5048, "step": 2126 }, { "epoch": 0.05004705882352941, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.6297, "step": 2127 }, { "epoch": 0.05007058823529412, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.385, "step": 2128 }, { "epoch": 0.050094117647058824, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8255, "step": 2129 }, { "epoch": 0.05011764705882353, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.8841, "step": 2130 }, { "epoch": 0.05014117647058824, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.7885, "step": 2131 }, { "epoch": 0.050164705882352945, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5494, "step": 2132 }, { "epoch": 0.050188235294117645, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.4071, "step": 2133 }, { "epoch": 0.05021176470588235, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.6295, "step": 2134 }, { "epoch": 0.05023529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.5653, "step": 2135 }, { "epoch": 0.050258823529411766, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.727, "step": 2136 }, { "epoch": 0.05028235294117647, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.6208, "step": 2137 }, { "epoch": 0.05030588235294118, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.48, "step": 2138 }, { "epoch": 0.05032941176470588, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.5317, "step": 2139 }, { "epoch": 0.050352941176470586, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.6503, "step": 2140 }, { "epoch": 0.05037647058823529, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8597, "step": 2141 }, { "epoch": 0.0504, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.7032, "step": 2142 }, { "epoch": 0.05042352941176471, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.86, "step": 2143 }, { "epoch": 0.050447058823529414, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.4363, "step": 2144 }, { "epoch": 0.050470588235294114, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7457, "step": 2145 }, { "epoch": 0.05049411764705882, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5571, "step": 2146 }, { "epoch": 0.05051764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.6995, "step": 2147 }, { "epoch": 0.050541176470588235, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.6069, "step": 2148 }, { "epoch": 0.05056470588235294, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.5446, "step": 2149 }, { "epoch": 0.05058823529411765, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.6093, "step": 2150 }, { "epoch": 0.050611764705882356, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4714, "step": 2151 }, { "epoch": 0.050635294117647056, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.6334, "step": 2152 }, { "epoch": 0.05065882352941176, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5897, "step": 2153 }, { "epoch": 0.05068235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.7626, "step": 2154 }, { "epoch": 0.05070588235294118, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.7158, "step": 2155 }, { "epoch": 0.050729411764705884, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.7515, "step": 2156 }, { "epoch": 0.05075294117647059, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.2999, "step": 2157 }, { "epoch": 0.05077647058823529, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.7856, "step": 2158 }, { "epoch": 0.0508, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.719, "step": 2159 }, { "epoch": 0.050823529411764705, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7783, "step": 2160 }, { "epoch": 0.05084705882352941, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.8757, "step": 2161 }, { "epoch": 0.05087058823529412, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.698, "step": 2162 }, { "epoch": 0.050894117647058826, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.7593, "step": 2163 }, { "epoch": 0.05091764705882353, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.5855, "step": 2164 }, { "epoch": 0.05094117647058823, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.7019, "step": 2165 }, { "epoch": 0.05096470588235294, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.503, "step": 2166 }, { "epoch": 0.05098823529411765, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.6532, "step": 2167 }, { "epoch": 0.051011764705882354, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6467, "step": 2168 }, { "epoch": 0.05103529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.5472, "step": 2169 }, { "epoch": 0.05105882352941177, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.7615, "step": 2170 }, { "epoch": 0.05108235294117647, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.4389, "step": 2171 }, { "epoch": 0.051105882352941175, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6574, "step": 2172 }, { "epoch": 0.05112941176470588, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7905, "step": 2173 }, { "epoch": 0.05115294117647059, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.6638, "step": 2174 }, { "epoch": 0.051176470588235295, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.5326, "step": 2175 }, { "epoch": 0.0512, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.6883, "step": 2176 }, { "epoch": 0.0512235294117647, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.7306, "step": 2177 }, { "epoch": 0.05124705882352941, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.6769, "step": 2178 }, { "epoch": 0.051270588235294116, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.5943, "step": 2179 }, { "epoch": 0.05129411764705882, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.511, "step": 2180 }, { "epoch": 0.05131764705882353, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.6215, "step": 2181 }, { "epoch": 0.05134117647058824, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4711, "step": 2182 }, { "epoch": 0.051364705882352944, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.6441, "step": 2183 }, { "epoch": 0.051388235294117644, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.8125, "step": 2184 }, { "epoch": 0.05141176470588235, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.6089, "step": 2185 }, { "epoch": 0.05143529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.629, "step": 2186 }, { "epoch": 0.051458823529411765, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.445, "step": 2187 }, { "epoch": 0.05148235294117647, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.706, "step": 2188 }, { "epoch": 0.05150588235294118, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.8115, "step": 2189 }, { "epoch": 0.05152941176470588, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.5523, "step": 2190 }, { "epoch": 0.051552941176470586, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.6836, "step": 2191 }, { "epoch": 0.05157647058823529, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5895, "step": 2192 }, { "epoch": 0.0516, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.581, "step": 2193 }, { "epoch": 0.05162352941176471, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.6909, "step": 2194 }, { "epoch": 0.051647058823529414, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.8205, "step": 2195 }, { "epoch": 0.05167058823529412, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7581, "step": 2196 }, { "epoch": 0.05169411764705882, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.5263, "step": 2197 }, { "epoch": 0.05171764705882353, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.4769, "step": 2198 }, { "epoch": 0.051741176470588235, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.5687, "step": 2199 }, { "epoch": 0.05176470588235294, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.6618, "step": 2200 }, { "epoch": 0.05178823529411765, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.5429, "step": 2201 }, { "epoch": 0.051811764705882356, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.6152, "step": 2202 }, { "epoch": 0.051835294117647056, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.7711, "step": 2203 }, { "epoch": 0.05185882352941176, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.6872, "step": 2204 }, { "epoch": 0.05188235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6615, "step": 2205 }, { "epoch": 0.05190588235294118, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.4917, "step": 2206 }, { "epoch": 0.051929411764705884, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.6449, "step": 2207 }, { "epoch": 0.05195294117647059, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.7936, "step": 2208 }, { "epoch": 0.05197647058823529, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.5404, "step": 2209 }, { "epoch": 0.052, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8003, "step": 2210 }, { "epoch": 0.052023529411764705, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.6294, "step": 2211 }, { "epoch": 0.05204705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.5158, "step": 2212 }, { "epoch": 0.05207058823529412, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.8349, "step": 2213 }, { "epoch": 0.052094117647058825, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6518, "step": 2214 }, { "epoch": 0.05211764705882353, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.7245, "step": 2215 }, { "epoch": 0.05214117647058823, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.637, "step": 2216 }, { "epoch": 0.05216470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7291, "step": 2217 }, { "epoch": 0.052188235294117646, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6507, "step": 2218 }, { "epoch": 0.05221176470588235, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.5237, "step": 2219 }, { "epoch": 0.05223529411764706, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8264, "step": 2220 }, { "epoch": 0.05225882352941177, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.7701, "step": 2221 }, { "epoch": 0.05228235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.5588, "step": 2222 }, { "epoch": 0.052305882352941174, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.606, "step": 2223 }, { "epoch": 0.05232941176470588, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.582, "step": 2224 }, { "epoch": 0.05235294117647059, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.6646, "step": 2225 }, { "epoch": 0.052376470588235295, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.6875, "step": 2226 }, { "epoch": 0.0524, "grad_norm": 0.287109375, "learning_rate": 0.02, "loss": 1.3057, "step": 2227 }, { "epoch": 0.05242352941176471, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.638, "step": 2228 }, { "epoch": 0.05244705882352941, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.7163, "step": 2229 }, { "epoch": 0.052470588235294116, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.5944, "step": 2230 }, { "epoch": 0.05249411764705882, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.5185, "step": 2231 }, { "epoch": 0.05251764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.6441, "step": 2232 }, { "epoch": 0.05254117647058824, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4904, "step": 2233 }, { "epoch": 0.052564705882352944, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7279, "step": 2234 }, { "epoch": 0.052588235294117644, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.5105, "step": 2235 }, { "epoch": 0.05261176470588235, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.5254, "step": 2236 }, { "epoch": 0.05263529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.7533, "step": 2237 }, { "epoch": 0.052658823529411765, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.6801, "step": 2238 }, { "epoch": 0.05268235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.6431, "step": 2239 }, { "epoch": 0.05270588235294118, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.48, "step": 2240 }, { "epoch": 0.052729411764705886, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.5848, "step": 2241 }, { "epoch": 0.052752941176470586, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.4242, "step": 2242 }, { "epoch": 0.05277647058823529, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4496, "step": 2243 }, { "epoch": 0.0528, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.7428, "step": 2244 }, { "epoch": 0.05282352941176471, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4896, "step": 2245 }, { "epoch": 0.052847058823529414, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.4211, "step": 2246 }, { "epoch": 0.05287058823529412, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.7671, "step": 2247 }, { "epoch": 0.05289411764705882, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3942, "step": 2248 }, { "epoch": 0.05291764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.5028, "step": 2249 }, { "epoch": 0.052941176470588235, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4805, "step": 2250 }, { "epoch": 0.05296470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.5707, "step": 2251 }, { "epoch": 0.05298823529411765, "grad_norm": 0.33203125, "learning_rate": 0.02, "loss": 1.7902, "step": 2252 }, { "epoch": 0.053011764705882355, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7185, "step": 2253 }, { "epoch": 0.053035294117647055, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.759, "step": 2254 }, { "epoch": 0.05305882352941176, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6259, "step": 2255 }, { "epoch": 0.05308235294117647, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.5491, "step": 2256 }, { "epoch": 0.053105882352941176, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.6749, "step": 2257 }, { "epoch": 0.05312941176470588, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.6161, "step": 2258 }, { "epoch": 0.05315294117647059, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.6414, "step": 2259 }, { "epoch": 0.0531764705882353, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7299, "step": 2260 }, { "epoch": 0.0532, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3872, "step": 2261 }, { "epoch": 0.053223529411764704, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.9144, "step": 2262 }, { "epoch": 0.05324705882352941, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.527, "step": 2263 }, { "epoch": 0.05327058823529412, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.6669, "step": 2264 }, { "epoch": 0.053294117647058825, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.6082, "step": 2265 }, { "epoch": 0.05331764705882353, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.5571, "step": 2266 }, { "epoch": 0.05334117647058823, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.6269, "step": 2267 }, { "epoch": 0.05336470588235294, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.4604, "step": 2268 }, { "epoch": 0.053388235294117646, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7286, "step": 2269 }, { "epoch": 0.05341176470588235, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.6916, "step": 2270 }, { "epoch": 0.05343529411764706, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7427, "step": 2271 }, { "epoch": 0.05345882352941177, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.7295, "step": 2272 }, { "epoch": 0.053482352941176474, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6067, "step": 2273 }, { "epoch": 0.053505882352941174, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4387, "step": 2274 }, { "epoch": 0.05352941176470588, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.6617, "step": 2275 }, { "epoch": 0.05355294117647059, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.5391, "step": 2276 }, { "epoch": 0.053576470588235295, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.4529, "step": 2277 }, { "epoch": 0.0536, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4277, "step": 2278 }, { "epoch": 0.05362352941176471, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.6615, "step": 2279 }, { "epoch": 0.05364705882352941, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.431, "step": 2280 }, { "epoch": 0.053670588235294116, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.7013, "step": 2281 }, { "epoch": 0.05369411764705882, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.6214, "step": 2282 }, { "epoch": 0.05371764705882353, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.4532, "step": 2283 }, { "epoch": 0.05374117647058824, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.6699, "step": 2284 }, { "epoch": 0.053764705882352944, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.5944, "step": 2285 }, { "epoch": 0.053788235294117644, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.619, "step": 2286 }, { "epoch": 0.05381176470588235, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.6427, "step": 2287 }, { "epoch": 0.05383529411764706, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.4588, "step": 2288 }, { "epoch": 0.053858823529411765, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.6636, "step": 2289 }, { "epoch": 0.05388235294117647, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.6914, "step": 2290 }, { "epoch": 0.05390588235294118, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.8605, "step": 2291 }, { "epoch": 0.053929411764705885, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7274, "step": 2292 }, { "epoch": 0.053952941176470585, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4878, "step": 2293 }, { "epoch": 0.05397647058823529, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7203, "step": 2294 }, { "epoch": 0.054, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.6421, "step": 2295 }, { "epoch": 0.054023529411764706, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.643, "step": 2296 }, { "epoch": 0.05404705882352941, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.5895, "step": 2297 }, { "epoch": 0.05407058823529412, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7337, "step": 2298 }, { "epoch": 0.05409411764705882, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.6695, "step": 2299 }, { "epoch": 0.05411764705882353, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3718, "step": 2300 }, { "epoch": 0.054141176470588234, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4991, "step": 2301 }, { "epoch": 0.05416470588235294, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4232, "step": 2302 }, { "epoch": 0.05418823529411765, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6758, "step": 2303 }, { "epoch": 0.054211764705882355, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6331, "step": 2304 }, { "epoch": 0.05423529411764706, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.7037, "step": 2305 }, { "epoch": 0.05425882352941176, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.8062, "step": 2306 }, { "epoch": 0.05428235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.5874, "step": 2307 }, { "epoch": 0.054305882352941176, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.6445, "step": 2308 }, { "epoch": 0.05432941176470588, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.4705, "step": 2309 }, { "epoch": 0.05435294117647059, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.8071, "step": 2310 }, { "epoch": 0.0543764705882353, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.4212, "step": 2311 }, { "epoch": 0.0544, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.4857, "step": 2312 }, { "epoch": 0.054423529411764704, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7939, "step": 2313 }, { "epoch": 0.05444705882352941, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.4887, "step": 2314 }, { "epoch": 0.05447058823529412, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.5795, "step": 2315 }, { "epoch": 0.054494117647058825, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.7796, "step": 2316 }, { "epoch": 0.05451764705882353, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.7945, "step": 2317 }, { "epoch": 0.05454117647058824, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6666, "step": 2318 }, { "epoch": 0.05456470588235294, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.4297, "step": 2319 }, { "epoch": 0.054588235294117646, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4365, "step": 2320 }, { "epoch": 0.05461176470588235, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.5105, "step": 2321 }, { "epoch": 0.05463529411764706, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.6015, "step": 2322 }, { "epoch": 0.05465882352941177, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.6264, "step": 2323 }, { "epoch": 0.054682352941176474, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.6533, "step": 2324 }, { "epoch": 0.054705882352941174, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.5586, "step": 2325 }, { "epoch": 0.05472941176470588, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.6738, "step": 2326 }, { "epoch": 0.05475294117647059, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.4458, "step": 2327 }, { "epoch": 0.054776470588235295, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.8443, "step": 2328 }, { "epoch": 0.0548, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.6764, "step": 2329 }, { "epoch": 0.05482352941176471, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.5252, "step": 2330 }, { "epoch": 0.05484705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.586, "step": 2331 }, { "epoch": 0.054870588235294115, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.8323, "step": 2332 }, { "epoch": 0.05489411764705882, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.5322, "step": 2333 }, { "epoch": 0.05491764705882353, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.6051, "step": 2334 }, { "epoch": 0.054941176470588236, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.5107, "step": 2335 }, { "epoch": 0.05496470588235294, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.6336, "step": 2336 }, { "epoch": 0.05498823529411765, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.6934, "step": 2337 }, { "epoch": 0.05501176470588235, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.4177, "step": 2338 }, { "epoch": 0.05503529411764706, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.6242, "step": 2339 }, { "epoch": 0.055058823529411764, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.6815, "step": 2340 }, { "epoch": 0.05508235294117647, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.6318, "step": 2341 }, { "epoch": 0.05510588235294118, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.7333, "step": 2342 }, { "epoch": 0.055129411764705885, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.732, "step": 2343 }, { "epoch": 0.055152941176470585, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4659, "step": 2344 }, { "epoch": 0.05517647058823529, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.737, "step": 2345 }, { "epoch": 0.0552, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3961, "step": 2346 }, { "epoch": 0.055223529411764706, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8172, "step": 2347 }, { "epoch": 0.05524705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.5983, "step": 2348 }, { "epoch": 0.05527058823529412, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.498, "step": 2349 }, { "epoch": 0.05529411764705883, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.6584, "step": 2350 }, { "epoch": 0.05531764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.5367, "step": 2351 }, { "epoch": 0.055341176470588234, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.6522, "step": 2352 }, { "epoch": 0.05536470588235294, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7597, "step": 2353 }, { "epoch": 0.05538823529411765, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.4553, "step": 2354 }, { "epoch": 0.055411764705882355, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.6742, "step": 2355 }, { "epoch": 0.05543529411764706, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.6798, "step": 2356 }, { "epoch": 0.05545882352941176, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.5784, "step": 2357 }, { "epoch": 0.05548235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.5269, "step": 2358 }, { "epoch": 0.055505882352941176, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.4862, "step": 2359 }, { "epoch": 0.05552941176470588, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.6926, "step": 2360 }, { "epoch": 0.05555294117647059, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.5118, "step": 2361 }, { "epoch": 0.0555764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.623, "step": 2362 }, { "epoch": 0.0556, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6114, "step": 2363 }, { "epoch": 0.055623529411764704, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.6732, "step": 2364 }, { "epoch": 0.05564705882352941, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.7916, "step": 2365 }, { "epoch": 0.05567058823529412, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.6332, "step": 2366 }, { "epoch": 0.055694117647058825, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7017, "step": 2367 }, { "epoch": 0.05571764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.5623, "step": 2368 }, { "epoch": 0.05574117647058824, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.5589, "step": 2369 }, { "epoch": 0.05576470588235294, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.4929, "step": 2370 }, { "epoch": 0.055788235294117645, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.6689, "step": 2371 }, { "epoch": 0.05581176470588235, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.4186, "step": 2372 }, { "epoch": 0.05583529411764706, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5398, "step": 2373 }, { "epoch": 0.055858823529411766, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.7499, "step": 2374 }, { "epoch": 0.05588235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.5923, "step": 2375 }, { "epoch": 0.05590588235294117, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.6023, "step": 2376 }, { "epoch": 0.05592941176470588, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.6987, "step": 2377 }, { "epoch": 0.05595294117647059, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.5529, "step": 2378 }, { "epoch": 0.055976470588235294, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7554, "step": 2379 }, { "epoch": 0.056, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.6689, "step": 2380 }, { "epoch": 0.05602352941176471, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.6583, "step": 2381 }, { "epoch": 0.056047058823529415, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5373, "step": 2382 }, { "epoch": 0.056070588235294115, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.5862, "step": 2383 }, { "epoch": 0.05609411764705882, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6866, "step": 2384 }, { "epoch": 0.05611764705882353, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.5624, "step": 2385 }, { "epoch": 0.056141176470588236, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.5747, "step": 2386 }, { "epoch": 0.05616470588235294, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.5599, "step": 2387 }, { "epoch": 0.05618823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.7842, "step": 2388 }, { "epoch": 0.05621176470588235, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.7524, "step": 2389 }, { "epoch": 0.05623529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.7304, "step": 2390 }, { "epoch": 0.056258823529411764, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.5746, "step": 2391 }, { "epoch": 0.05628235294117647, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.4327, "step": 2392 }, { "epoch": 0.05630588235294118, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.6659, "step": 2393 }, { "epoch": 0.056329411764705885, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.5571, "step": 2394 }, { "epoch": 0.056352941176470585, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.5965, "step": 2395 }, { "epoch": 0.05637647058823529, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.7415, "step": 2396 }, { "epoch": 0.0564, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5526, "step": 2397 }, { "epoch": 0.056423529411764706, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.5094, "step": 2398 }, { "epoch": 0.05644705882352941, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.6507, "step": 2399 }, { "epoch": 0.05647058823529412, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6117, "step": 2400 }, { "epoch": 0.05649411764705883, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.5353, "step": 2401 }, { "epoch": 0.05651764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.7111, "step": 2402 }, { "epoch": 0.056541176470588234, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.5165, "step": 2403 }, { "epoch": 0.05656470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.5512, "step": 2404 }, { "epoch": 0.05658823529411765, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.318, "step": 2405 }, { "epoch": 0.056611764705882354, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.6636, "step": 2406 }, { "epoch": 0.05663529411764706, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4985, "step": 2407 }, { "epoch": 0.05665882352941176, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5431, "step": 2408 }, { "epoch": 0.05668235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.6306, "step": 2409 }, { "epoch": 0.056705882352941175, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.7726, "step": 2410 }, { "epoch": 0.05672941176470588, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.6789, "step": 2411 }, { "epoch": 0.05675294117647059, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.6654, "step": 2412 }, { "epoch": 0.056776470588235296, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5526, "step": 2413 }, { "epoch": 0.0568, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.6662, "step": 2414 }, { "epoch": 0.0568235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.6571, "step": 2415 }, { "epoch": 0.05684705882352941, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.6011, "step": 2416 }, { "epoch": 0.05687058823529412, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.7143, "step": 2417 }, { "epoch": 0.056894117647058824, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.7267, "step": 2418 }, { "epoch": 0.05691764705882353, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.5993, "step": 2419 }, { "epoch": 0.05694117647058824, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.5161, "step": 2420 }, { "epoch": 0.05696470588235294, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.4724, "step": 2421 }, { "epoch": 0.056988235294117645, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.6034, "step": 2422 }, { "epoch": 0.05701176470588235, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3007, "step": 2423 }, { "epoch": 0.05703529411764706, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.515, "step": 2424 }, { "epoch": 0.057058823529411766, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.6909, "step": 2425 }, { "epoch": 0.05708235294117647, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.5961, "step": 2426 }, { "epoch": 0.05710588235294118, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.6075, "step": 2427 }, { "epoch": 0.05712941176470588, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.611, "step": 2428 }, { "epoch": 0.05715294117647059, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4243, "step": 2429 }, { "epoch": 0.057176470588235294, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.5541, "step": 2430 }, { "epoch": 0.0572, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.4424, "step": 2431 }, { "epoch": 0.05722352941176471, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.6903, "step": 2432 }, { "epoch": 0.057247058823529415, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.6325, "step": 2433 }, { "epoch": 0.057270588235294115, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.5195, "step": 2434 }, { "epoch": 0.05729411764705882, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.6607, "step": 2435 }, { "epoch": 0.05731764705882353, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.7293, "step": 2436 }, { "epoch": 0.057341176470588236, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.5116, "step": 2437 }, { "epoch": 0.05736470588235294, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4582, "step": 2438 }, { "epoch": 0.05738823529411765, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5973, "step": 2439 }, { "epoch": 0.05741176470588235, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.607, "step": 2440 }, { "epoch": 0.05743529411764706, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.4216, "step": 2441 }, { "epoch": 0.057458823529411764, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.4716, "step": 2442 }, { "epoch": 0.05748235294117647, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.5696, "step": 2443 }, { "epoch": 0.05750588235294118, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.5873, "step": 2444 }, { "epoch": 0.057529411764705884, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.5658, "step": 2445 }, { "epoch": 0.05755294117647059, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.6513, "step": 2446 }, { "epoch": 0.05757647058823529, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.546, "step": 2447 }, { "epoch": 0.0576, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.5221, "step": 2448 }, { "epoch": 0.057623529411764705, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.4991, "step": 2449 }, { "epoch": 0.05764705882352941, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.877, "step": 2450 }, { "epoch": 0.05767058823529412, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.5293, "step": 2451 }, { "epoch": 0.057694117647058826, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.465, "step": 2452 }, { "epoch": 0.057717647058823526, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.6887, "step": 2453 }, { "epoch": 0.05774117647058823, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.607, "step": 2454 }, { "epoch": 0.05776470588235294, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.4599, "step": 2455 }, { "epoch": 0.05778823529411765, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.5624, "step": 2456 }, { "epoch": 0.057811764705882354, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.5395, "step": 2457 }, { "epoch": 0.05783529411764706, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.5799, "step": 2458 }, { "epoch": 0.05785882352941177, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.647, "step": 2459 }, { "epoch": 0.05788235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.6623, "step": 2460 }, { "epoch": 0.057905882352941175, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.5695, "step": 2461 }, { "epoch": 0.05792941176470588, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6033, "step": 2462 }, { "epoch": 0.05795294117647059, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3443, "step": 2463 }, { "epoch": 0.057976470588235296, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.5344, "step": 2464 }, { "epoch": 0.058, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.5716, "step": 2465 }, { "epoch": 0.0580235294117647, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.8171, "step": 2466 }, { "epoch": 0.05804705882352941, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.4429, "step": 2467 }, { "epoch": 0.05807058823529412, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3993, "step": 2468 }, { "epoch": 0.058094117647058824, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.6655, "step": 2469 }, { "epoch": 0.05811764705882353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.8674, "step": 2470 }, { "epoch": 0.05814117647058824, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.6874, "step": 2471 }, { "epoch": 0.05816470588235294, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.6654, "step": 2472 }, { "epoch": 0.058188235294117645, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.3484, "step": 2473 }, { "epoch": 0.05821176470588235, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.5456, "step": 2474 }, { "epoch": 0.05823529411764706, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.575, "step": 2475 }, { "epoch": 0.058258823529411766, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.5584, "step": 2476 }, { "epoch": 0.05828235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.6619, "step": 2477 }, { "epoch": 0.05830588235294118, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8014, "step": 2478 }, { "epoch": 0.05832941176470588, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.4826, "step": 2479 }, { "epoch": 0.05835294117647059, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.6729, "step": 2480 }, { "epoch": 0.058376470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.695, "step": 2481 }, { "epoch": 0.0584, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.4471, "step": 2482 }, { "epoch": 0.05842352941176471, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.5236, "step": 2483 }, { "epoch": 0.058447058823529414, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.5219, "step": 2484 }, { "epoch": 0.058470588235294114, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6314, "step": 2485 }, { "epoch": 0.05849411764705882, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.485, "step": 2486 }, { "epoch": 0.05851764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.5114, "step": 2487 }, { "epoch": 0.058541176470588235, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.5278, "step": 2488 }, { "epoch": 0.05856470588235294, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.6885, "step": 2489 }, { "epoch": 0.05858823529411765, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.7687, "step": 2490 }, { "epoch": 0.058611764705882356, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.5782, "step": 2491 }, { "epoch": 0.058635294117647056, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.4009, "step": 2492 }, { "epoch": 0.05865882352941176, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.5771, "step": 2493 }, { "epoch": 0.05868235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.7405, "step": 2494 }, { "epoch": 0.05870588235294118, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7873, "step": 2495 }, { "epoch": 0.058729411764705884, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5693, "step": 2496 }, { "epoch": 0.05875294117647059, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.5194, "step": 2497 }, { "epoch": 0.05877647058823529, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.5604, "step": 2498 }, { "epoch": 0.0588, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.7258, "step": 2499 }, { "epoch": 0.058823529411764705, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.6687, "step": 2500 }, { "epoch": 0.05884705882352941, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.5841, "step": 2501 }, { "epoch": 0.05887058823529412, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4378, "step": 2502 }, { "epoch": 0.058894117647058826, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4911, "step": 2503 }, { "epoch": 0.058917647058823526, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.6673, "step": 2504 }, { "epoch": 0.05894117647058823, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.4535, "step": 2505 }, { "epoch": 0.05896470588235294, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.5691, "step": 2506 }, { "epoch": 0.05898823529411765, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.6715, "step": 2507 }, { "epoch": 0.059011764705882354, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.487, "step": 2508 }, { "epoch": 0.05903529411764706, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.5877, "step": 2509 }, { "epoch": 0.05905882352941177, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.738, "step": 2510 }, { "epoch": 0.05908235294117647, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5079, "step": 2511 }, { "epoch": 0.059105882352941175, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.5544, "step": 2512 }, { "epoch": 0.05912941176470588, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.7977, "step": 2513 }, { "epoch": 0.05915294117647059, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.5177, "step": 2514 }, { "epoch": 0.059176470588235296, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.4568, "step": 2515 }, { "epoch": 0.0592, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.3152, "step": 2516 }, { "epoch": 0.0592235294117647, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.6707, "step": 2517 }, { "epoch": 0.05924705882352941, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3648, "step": 2518 }, { "epoch": 0.05927058823529412, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3791, "step": 2519 }, { "epoch": 0.059294117647058824, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4239, "step": 2520 }, { "epoch": 0.05931764705882353, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.4816, "step": 2521 }, { "epoch": 0.05934117647058824, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.5519, "step": 2522 }, { "epoch": 0.059364705882352944, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.439, "step": 2523 }, { "epoch": 0.059388235294117644, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.6217, "step": 2524 }, { "epoch": 0.05941176470588235, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.5328, "step": 2525 }, { "epoch": 0.05943529411764706, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.6589, "step": 2526 }, { "epoch": 0.059458823529411765, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.6958, "step": 2527 }, { "epoch": 0.05948235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.5472, "step": 2528 }, { "epoch": 0.05950588235294118, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.5418, "step": 2529 }, { "epoch": 0.05952941176470588, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.5874, "step": 2530 }, { "epoch": 0.059552941176470586, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.6284, "step": 2531 }, { "epoch": 0.05957647058823529, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.7194, "step": 2532 }, { "epoch": 0.0596, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.6991, "step": 2533 }, { "epoch": 0.05962352941176471, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.5672, "step": 2534 }, { "epoch": 0.059647058823529414, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.6455, "step": 2535 }, { "epoch": 0.05967058823529412, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.6142, "step": 2536 }, { "epoch": 0.05969411764705882, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.6525, "step": 2537 }, { "epoch": 0.05971764705882353, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1834, "step": 2538 }, { "epoch": 0.059741176470588235, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4627, "step": 2539 }, { "epoch": 0.05976470588235294, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.7075, "step": 2540 }, { "epoch": 0.05978823529411765, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.7067, "step": 2541 }, { "epoch": 0.059811764705882356, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6277, "step": 2542 }, { "epoch": 0.059835294117647056, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.6632, "step": 2543 }, { "epoch": 0.05985882352941176, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.5214, "step": 2544 }, { "epoch": 0.05988235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.7595, "step": 2545 }, { "epoch": 0.05990588235294118, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6297, "step": 2546 }, { "epoch": 0.059929411764705884, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.4846, "step": 2547 }, { "epoch": 0.05995294117647059, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.6373, "step": 2548 }, { "epoch": 0.05997647058823529, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.7806, "step": 2549 }, { "epoch": 0.06, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.7355, "step": 2550 }, { "epoch": 0.060023529411764705, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.783, "step": 2551 }, { "epoch": 0.06004705882352941, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.6245, "step": 2552 }, { "epoch": 0.06007058823529412, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.4924, "step": 2553 }, { "epoch": 0.060094117647058826, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.5727, "step": 2554 }, { "epoch": 0.06011764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.5285, "step": 2555 }, { "epoch": 0.06014117647058823, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.5453, "step": 2556 }, { "epoch": 0.06016470588235294, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.6484, "step": 2557 }, { "epoch": 0.06018823529411765, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.6009, "step": 2558 }, { "epoch": 0.060211764705882354, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5682, "step": 2559 }, { "epoch": 0.06023529411764706, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4061, "step": 2560 }, { "epoch": 0.06025882352941177, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5784, "step": 2561 }, { "epoch": 0.06028235294117647, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4453, "step": 2562 }, { "epoch": 0.060305882352941174, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4144, "step": 2563 }, { "epoch": 0.06032941176470588, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.4871, "step": 2564 }, { "epoch": 0.06035294117647059, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.5515, "step": 2565 }, { "epoch": 0.060376470588235295, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6429, "step": 2566 }, { "epoch": 0.0604, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.5646, "step": 2567 }, { "epoch": 0.06042352941176471, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5855, "step": 2568 }, { "epoch": 0.06044705882352941, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.481, "step": 2569 }, { "epoch": 0.060470588235294116, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3148, "step": 2570 }, { "epoch": 0.06049411764705882, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3313, "step": 2571 }, { "epoch": 0.06051764705882353, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.5118, "step": 2572 }, { "epoch": 0.06054117647058824, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.7791, "step": 2573 }, { "epoch": 0.060564705882352944, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.6725, "step": 2574 }, { "epoch": 0.060588235294117644, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.555, "step": 2575 }, { "epoch": 0.06061176470588235, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.5988, "step": 2576 }, { "epoch": 0.06063529411764706, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4562, "step": 2577 }, { "epoch": 0.060658823529411765, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.285, "step": 2578 }, { "epoch": 0.06068235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6174, "step": 2579 }, { "epoch": 0.06070588235294118, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.7247, "step": 2580 }, { "epoch": 0.06072941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.5737, "step": 2581 }, { "epoch": 0.060752941176470586, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.5579, "step": 2582 }, { "epoch": 0.06077647058823529, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.6271, "step": 2583 }, { "epoch": 0.0608, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.5431, "step": 2584 }, { "epoch": 0.06082352941176471, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.6452, "step": 2585 }, { "epoch": 0.060847058823529414, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.6787, "step": 2586 }, { "epoch": 0.06087058823529412, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.5321, "step": 2587 }, { "epoch": 0.06089411764705882, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.7363, "step": 2588 }, { "epoch": 0.06091764705882353, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.5892, "step": 2589 }, { "epoch": 0.060941176470588235, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4349, "step": 2590 }, { "epoch": 0.06096470588235294, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.6533, "step": 2591 }, { "epoch": 0.06098823529411765, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.5564, "step": 2592 }, { "epoch": 0.061011764705882356, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.4826, "step": 2593 }, { "epoch": 0.061035294117647056, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.5485, "step": 2594 }, { "epoch": 0.06105882352941176, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.5274, "step": 2595 }, { "epoch": 0.06108235294117647, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.5798, "step": 2596 }, { "epoch": 0.06110588235294118, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.6287, "step": 2597 }, { "epoch": 0.061129411764705884, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.6075, "step": 2598 }, { "epoch": 0.06115294117647059, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.6299, "step": 2599 }, { "epoch": 0.0611764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.7792, "step": 2600 }, { "epoch": 0.0612, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.5354, "step": 2601 }, { "epoch": 0.061223529411764704, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.141, "step": 2602 }, { "epoch": 0.06124705882352941, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.7231, "step": 2603 }, { "epoch": 0.06127058823529412, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5162, "step": 2604 }, { "epoch": 0.061294117647058825, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.4662, "step": 2605 }, { "epoch": 0.06131764705882353, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.3146, "step": 2606 }, { "epoch": 0.06134117647058823, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.5693, "step": 2607 }, { "epoch": 0.06136470588235294, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4902, "step": 2608 }, { "epoch": 0.061388235294117646, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.8654, "step": 2609 }, { "epoch": 0.06141176470588235, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.6584, "step": 2610 }, { "epoch": 0.06143529411764706, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.6807, "step": 2611 }, { "epoch": 0.06145882352941177, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.4579, "step": 2612 }, { "epoch": 0.06148235294117647, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5117, "step": 2613 }, { "epoch": 0.061505882352941174, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3024, "step": 2614 }, { "epoch": 0.06152941176470588, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.4379, "step": 2615 }, { "epoch": 0.06155294117647059, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6063, "step": 2616 }, { "epoch": 0.061576470588235295, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.5104, "step": 2617 }, { "epoch": 0.0616, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2378, "step": 2618 }, { "epoch": 0.06162352941176471, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5852, "step": 2619 }, { "epoch": 0.06164705882352941, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.6974, "step": 2620 }, { "epoch": 0.061670588235294116, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.214, "step": 2621 }, { "epoch": 0.06169411764705882, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.6071, "step": 2622 }, { "epoch": 0.06171764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.6047, "step": 2623 }, { "epoch": 0.06174117647058824, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.6185, "step": 2624 }, { "epoch": 0.061764705882352944, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5325, "step": 2625 }, { "epoch": 0.061788235294117644, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.741, "step": 2626 }, { "epoch": 0.06181176470588235, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.634, "step": 2627 }, { "epoch": 0.06183529411764706, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3323, "step": 2628 }, { "epoch": 0.061858823529411765, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.3913, "step": 2629 }, { "epoch": 0.06188235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.4772, "step": 2630 }, { "epoch": 0.06190588235294118, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.544, "step": 2631 }, { "epoch": 0.061929411764705886, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.5045, "step": 2632 }, { "epoch": 0.061952941176470586, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.5407, "step": 2633 }, { "epoch": 0.06197647058823529, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7285, "step": 2634 }, { "epoch": 0.062, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.6805, "step": 2635 }, { "epoch": 0.062023529411764707, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5162, "step": 2636 }, { "epoch": 0.062047058823529413, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3843, "step": 2637 }, { "epoch": 0.06207058823529412, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.4651, "step": 2638 }, { "epoch": 0.06209411764705882, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.6672, "step": 2639 }, { "epoch": 0.06211764705882353, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.5478, "step": 2640 }, { "epoch": 0.062141176470588234, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.575, "step": 2641 }, { "epoch": 0.06216470588235294, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.6691, "step": 2642 }, { "epoch": 0.06218823529411765, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.5624, "step": 2643 }, { "epoch": 0.062211764705882355, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7001, "step": 2644 }, { "epoch": 0.06223529411764706, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.5598, "step": 2645 }, { "epoch": 0.06225882352941176, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.6877, "step": 2646 }, { "epoch": 0.06228235294117647, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.4521, "step": 2647 }, { "epoch": 0.062305882352941176, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.6457, "step": 2648 }, { "epoch": 0.06232941176470588, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.6294, "step": 2649 }, { "epoch": 0.06235294117647059, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4222, "step": 2650 }, { "epoch": 0.0623764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.765, "step": 2651 }, { "epoch": 0.0624, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.4729, "step": 2652 }, { "epoch": 0.062423529411764704, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3119, "step": 2653 }, { "epoch": 0.06244705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.4495, "step": 2654 }, { "epoch": 0.06247058823529412, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.6124, "step": 2655 }, { "epoch": 0.062494117647058825, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.5538, "step": 2656 }, { "epoch": 0.06251764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.7565, "step": 2657 }, { "epoch": 0.06254117647058824, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.5642, "step": 2658 }, { "epoch": 0.06256470588235294, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.5675, "step": 2659 }, { "epoch": 0.06258823529411765, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.6479, "step": 2660 }, { "epoch": 0.06261176470588235, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.491, "step": 2661 }, { "epoch": 0.06263529411764705, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7817, "step": 2662 }, { "epoch": 0.06265882352941177, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.7575, "step": 2663 }, { "epoch": 0.06268235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.7065, "step": 2664 }, { "epoch": 0.06270588235294118, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.6727, "step": 2665 }, { "epoch": 0.06272941176470588, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.5561, "step": 2666 }, { "epoch": 0.0627529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.5246, "step": 2667 }, { "epoch": 0.0627764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.7576, "step": 2668 }, { "epoch": 0.0628, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.643, "step": 2669 }, { "epoch": 0.06282352941176471, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.5686, "step": 2670 }, { "epoch": 0.06284705882352941, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.5111, "step": 2671 }, { "epoch": 0.06287058823529412, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.474, "step": 2672 }, { "epoch": 0.06289411764705882, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.4876, "step": 2673 }, { "epoch": 0.06291764705882352, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2953, "step": 2674 }, { "epoch": 0.06294117647058824, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.5756, "step": 2675 }, { "epoch": 0.06296470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.6455, "step": 2676 }, { "epoch": 0.06298823529411765, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.3846, "step": 2677 }, { "epoch": 0.06301176470588235, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3216, "step": 2678 }, { "epoch": 0.06303529411764706, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2938, "step": 2679 }, { "epoch": 0.06305882352941176, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.5632, "step": 2680 }, { "epoch": 0.06308235294117646, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.5272, "step": 2681 }, { "epoch": 0.06310588235294118, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.4838, "step": 2682 }, { "epoch": 0.06312941176470588, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.4668, "step": 2683 }, { "epoch": 0.06315294117647059, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.5628, "step": 2684 }, { "epoch": 0.06317647058823529, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4812, "step": 2685 }, { "epoch": 0.0632, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.5839, "step": 2686 }, { "epoch": 0.0632235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.5182, "step": 2687 }, { "epoch": 0.0632470588235294, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.4096, "step": 2688 }, { "epoch": 0.06327058823529412, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.68, "step": 2689 }, { "epoch": 0.06329411764705882, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.7542, "step": 2690 }, { "epoch": 0.06331764705882353, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5164, "step": 2691 }, { "epoch": 0.06334117647058823, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.6466, "step": 2692 }, { "epoch": 0.06336470588235295, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.777, "step": 2693 }, { "epoch": 0.06338823529411765, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.2895, "step": 2694 }, { "epoch": 0.06341176470588235, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.437, "step": 2695 }, { "epoch": 0.06343529411764706, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.5095, "step": 2696 }, { "epoch": 0.06345882352941176, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.3927, "step": 2697 }, { "epoch": 0.06348235294117648, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.7286, "step": 2698 }, { "epoch": 0.06350588235294118, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.3344, "step": 2699 }, { "epoch": 0.06352941176470588, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.4605, "step": 2700 }, { "epoch": 0.06355294117647059, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.368, "step": 2701 }, { "epoch": 0.06357647058823529, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4981, "step": 2702 }, { "epoch": 0.0636, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.7345, "step": 2703 }, { "epoch": 0.0636235294117647, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.4194, "step": 2704 }, { "epoch": 0.06364705882352942, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3701, "step": 2705 }, { "epoch": 0.06367058823529412, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.4201, "step": 2706 }, { "epoch": 0.06369411764705882, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.5464, "step": 2707 }, { "epoch": 0.06371764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.7028, "step": 2708 }, { "epoch": 0.06374117647058823, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.492, "step": 2709 }, { "epoch": 0.06376470588235295, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.5573, "step": 2710 }, { "epoch": 0.06378823529411765, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2392, "step": 2711 }, { "epoch": 0.06381176470588236, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.5485, "step": 2712 }, { "epoch": 0.06383529411764706, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.376, "step": 2713 }, { "epoch": 0.06385882352941176, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.296, "step": 2714 }, { "epoch": 0.06388235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.4744, "step": 2715 }, { "epoch": 0.06390588235294117, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.6621, "step": 2716 }, { "epoch": 0.06392941176470589, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5699, "step": 2717 }, { "epoch": 0.06395294117647059, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.7623, "step": 2718 }, { "epoch": 0.06397647058823529, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.6936, "step": 2719 }, { "epoch": 0.064, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.3839, "step": 2720 }, { "epoch": 0.0640235294117647, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2903, "step": 2721 }, { "epoch": 0.06404705882352942, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.5679, "step": 2722 }, { "epoch": 0.06407058823529412, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.4283, "step": 2723 }, { "epoch": 0.06409411764705883, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.627, "step": 2724 }, { "epoch": 0.06411764705882353, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.7428, "step": 2725 }, { "epoch": 0.06414117647058823, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.3165, "step": 2726 }, { "epoch": 0.06416470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.588, "step": 2727 }, { "epoch": 0.06418823529411764, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.5037, "step": 2728 }, { "epoch": 0.06421176470588236, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.4249, "step": 2729 }, { "epoch": 0.06423529411764706, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.5163, "step": 2730 }, { "epoch": 0.06425882352941177, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.5199, "step": 2731 }, { "epoch": 0.06428235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3652, "step": 2732 }, { "epoch": 0.06430588235294117, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.4198, "step": 2733 }, { "epoch": 0.06432941176470588, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.5926, "step": 2734 }, { "epoch": 0.06435294117647058, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.6474, "step": 2735 }, { "epoch": 0.0643764705882353, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3128, "step": 2736 }, { "epoch": 0.0644, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.6937, "step": 2737 }, { "epoch": 0.06442352941176471, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.4744, "step": 2738 }, { "epoch": 0.06444705882352941, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.2669, "step": 2739 }, { "epoch": 0.06447058823529411, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.5496, "step": 2740 }, { "epoch": 0.06449411764705883, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6446, "step": 2741 }, { "epoch": 0.06451764705882353, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.6975, "step": 2742 }, { "epoch": 0.06454117647058824, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2008, "step": 2743 }, { "epoch": 0.06456470588235294, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3526, "step": 2744 }, { "epoch": 0.06458823529411764, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.6442, "step": 2745 }, { "epoch": 0.06461176470588235, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.6286, "step": 2746 }, { "epoch": 0.06463529411764705, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6873, "step": 2747 }, { "epoch": 0.06465882352941177, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.6463, "step": 2748 }, { "epoch": 0.06468235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.6913, "step": 2749 }, { "epoch": 0.06470588235294118, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.7035, "step": 2750 }, { "epoch": 0.06472941176470588, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.6871, "step": 2751 }, { "epoch": 0.06475294117647058, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.6801, "step": 2752 }, { "epoch": 0.0647764705882353, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.7352, "step": 2753 }, { "epoch": 0.0648, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2216, "step": 2754 }, { "epoch": 0.06482352941176471, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.6423, "step": 2755 }, { "epoch": 0.06484705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.5841, "step": 2756 }, { "epoch": 0.06487058823529412, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.4588, "step": 2757 }, { "epoch": 0.06489411764705882, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.5653, "step": 2758 }, { "epoch": 0.06491764705882352, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5463, "step": 2759 }, { "epoch": 0.06494117647058824, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1798, "step": 2760 }, { "epoch": 0.06496470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6164, "step": 2761 }, { "epoch": 0.06498823529411765, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.5263, "step": 2762 }, { "epoch": 0.06501176470588235, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6462, "step": 2763 }, { "epoch": 0.06503529411764705, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.5398, "step": 2764 }, { "epoch": 0.06505882352941177, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1499, "step": 2765 }, { "epoch": 0.06508235294117647, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7306, "step": 2766 }, { "epoch": 0.06510588235294118, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.557, "step": 2767 }, { "epoch": 0.06512941176470588, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3936, "step": 2768 }, { "epoch": 0.0651529411764706, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.6835, "step": 2769 }, { "epoch": 0.0651764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.6842, "step": 2770 }, { "epoch": 0.0652, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.5701, "step": 2771 }, { "epoch": 0.06522352941176471, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.5466, "step": 2772 }, { "epoch": 0.06524705882352941, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.397, "step": 2773 }, { "epoch": 0.06527058823529412, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.4545, "step": 2774 }, { "epoch": 0.06529411764705882, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4159, "step": 2775 }, { "epoch": 0.06531764705882354, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.6606, "step": 2776 }, { "epoch": 0.06534117647058824, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.5999, "step": 2777 }, { "epoch": 0.06536470588235294, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.4554, "step": 2778 }, { "epoch": 0.06538823529411765, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.5316, "step": 2779 }, { "epoch": 0.06541176470588235, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.325, "step": 2780 }, { "epoch": 0.06543529411764706, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5485, "step": 2781 }, { "epoch": 0.06545882352941176, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.5017, "step": 2782 }, { "epoch": 0.06548235294117646, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.6012, "step": 2783 }, { "epoch": 0.06550588235294118, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.4511, "step": 2784 }, { "epoch": 0.06552941176470588, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4149, "step": 2785 }, { "epoch": 0.06555294117647059, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.4801, "step": 2786 }, { "epoch": 0.06557647058823529, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.4539, "step": 2787 }, { "epoch": 0.0656, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.691, "step": 2788 }, { "epoch": 0.0656235294117647, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.6134, "step": 2789 }, { "epoch": 0.0656470588235294, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2866, "step": 2790 }, { "epoch": 0.06567058823529412, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.4517, "step": 2791 }, { "epoch": 0.06569411764705882, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4414, "step": 2792 }, { "epoch": 0.06571764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.5224, "step": 2793 }, { "epoch": 0.06574117647058823, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3954, "step": 2794 }, { "epoch": 0.06576470588235295, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2115, "step": 2795 }, { "epoch": 0.06578823529411765, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.5572, "step": 2796 }, { "epoch": 0.06581176470588235, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.4641, "step": 2797 }, { "epoch": 0.06583529411764706, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.6752, "step": 2798 }, { "epoch": 0.06585882352941176, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2997, "step": 2799 }, { "epoch": 0.06588235294117648, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 1.3056, "step": 2800 }, { "epoch": 0.06590588235294118, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.5866, "step": 2801 }, { "epoch": 0.06592941176470589, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.6488, "step": 2802 }, { "epoch": 0.06595294117647059, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.5204, "step": 2803 }, { "epoch": 0.06597647058823529, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.294, "step": 2804 }, { "epoch": 0.066, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.6448, "step": 2805 }, { "epoch": 0.0660235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3678, "step": 2806 }, { "epoch": 0.06604705882352942, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.5577, "step": 2807 }, { "epoch": 0.06607058823529412, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6876, "step": 2808 }, { "epoch": 0.06609411764705882, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4724, "step": 2809 }, { "epoch": 0.06611764705882353, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.5268, "step": 2810 }, { "epoch": 0.06614117647058823, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.5552, "step": 2811 }, { "epoch": 0.06616470588235294, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5522, "step": 2812 }, { "epoch": 0.06618823529411764, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.6023, "step": 2813 }, { "epoch": 0.06621176470588236, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.4787, "step": 2814 }, { "epoch": 0.06623529411764706, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2282, "step": 2815 }, { "epoch": 0.06625882352941176, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.7715, "step": 2816 }, { "epoch": 0.06628235294117647, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4698, "step": 2817 }, { "epoch": 0.06630588235294117, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.5989, "step": 2818 }, { "epoch": 0.06632941176470589, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.4924, "step": 2819 }, { "epoch": 0.06635294117647059, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.5726, "step": 2820 }, { "epoch": 0.0663764705882353, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.461, "step": 2821 }, { "epoch": 0.0664, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.6378, "step": 2822 }, { "epoch": 0.0664235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.4247, "step": 2823 }, { "epoch": 0.06644705882352941, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4932, "step": 2824 }, { "epoch": 0.06647058823529411, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.4266, "step": 2825 }, { "epoch": 0.06649411764705883, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.6682, "step": 2826 }, { "epoch": 0.06651764705882353, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.5453, "step": 2827 }, { "epoch": 0.06654117647058823, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.5969, "step": 2828 }, { "epoch": 0.06656470588235294, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.5847, "step": 2829 }, { "epoch": 0.06658823529411764, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3014, "step": 2830 }, { "epoch": 0.06661176470588236, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3876, "step": 2831 }, { "epoch": 0.06663529411764706, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.2189, "step": 2832 }, { "epoch": 0.06665882352941177, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.5789, "step": 2833 }, { "epoch": 0.06668235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.4844, "step": 2834 }, { "epoch": 0.06670588235294117, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.5893, "step": 2835 }, { "epoch": 0.06672941176470588, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3325, "step": 2836 }, { "epoch": 0.06675294117647058, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.5734, "step": 2837 }, { "epoch": 0.0667764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.5122, "step": 2838 }, { "epoch": 0.0668, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4356, "step": 2839 }, { "epoch": 0.06682352941176471, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.7035, "step": 2840 }, { "epoch": 0.06684705882352941, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4805, "step": 2841 }, { "epoch": 0.06687058823529411, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3347, "step": 2842 }, { "epoch": 0.06689411764705883, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.4986, "step": 2843 }, { "epoch": 0.06691764705882353, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.6632, "step": 2844 }, { "epoch": 0.06694117647058824, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.6858, "step": 2845 }, { "epoch": 0.06696470588235294, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5431, "step": 2846 }, { "epoch": 0.06698823529411765, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.5602, "step": 2847 }, { "epoch": 0.06701176470588235, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2786, "step": 2848 }, { "epoch": 0.06703529411764705, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.5192, "step": 2849 }, { "epoch": 0.06705882352941177, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4538, "step": 2850 }, { "epoch": 0.06708235294117647, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.5964, "step": 2851 }, { "epoch": 0.06710588235294118, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.5778, "step": 2852 }, { "epoch": 0.06712941176470588, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4485, "step": 2853 }, { "epoch": 0.06715294117647058, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.4472, "step": 2854 }, { "epoch": 0.0671764705882353, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.7192, "step": 2855 }, { "epoch": 0.0672, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.4563, "step": 2856 }, { "epoch": 0.06722352941176471, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.5294, "step": 2857 }, { "epoch": 0.06724705882352941, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.6018, "step": 2858 }, { "epoch": 0.06727058823529412, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.6316, "step": 2859 }, { "epoch": 0.06729411764705882, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3925, "step": 2860 }, { "epoch": 0.06731764705882352, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.8493, "step": 2861 }, { "epoch": 0.06734117647058824, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7608, "step": 2862 }, { "epoch": 0.06736470588235294, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5374, "step": 2863 }, { "epoch": 0.06738823529411765, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2909, "step": 2864 }, { "epoch": 0.06741176470588235, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2295, "step": 2865 }, { "epoch": 0.06743529411764707, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6358, "step": 2866 }, { "epoch": 0.06745882352941177, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.4892, "step": 2867 }, { "epoch": 0.06748235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.571, "step": 2868 }, { "epoch": 0.06750588235294118, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3077, "step": 2869 }, { "epoch": 0.06752941176470588, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.553, "step": 2870 }, { "epoch": 0.0675529411764706, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.7406, "step": 2871 }, { "epoch": 0.0675764705882353, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6444, "step": 2872 }, { "epoch": 0.0676, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.6593, "step": 2873 }, { "epoch": 0.06762352941176471, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.3586, "step": 2874 }, { "epoch": 0.06764705882352941, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.3166, "step": 2875 }, { "epoch": 0.06767058823529412, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.4238, "step": 2876 }, { "epoch": 0.06769411764705882, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.7171, "step": 2877 }, { "epoch": 0.06771764705882354, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4733, "step": 2878 }, { "epoch": 0.06774117647058824, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.5071, "step": 2879 }, { "epoch": 0.06776470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.6977, "step": 2880 }, { "epoch": 0.06778823529411765, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.6856, "step": 2881 }, { "epoch": 0.06781176470588235, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.5112, "step": 2882 }, { "epoch": 0.06783529411764706, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.5905, "step": 2883 }, { "epoch": 0.06785882352941176, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.468, "step": 2884 }, { "epoch": 0.06788235294117648, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.3427, "step": 2885 }, { "epoch": 0.06790588235294118, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.5308, "step": 2886 }, { "epoch": 0.06792941176470588, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3681, "step": 2887 }, { "epoch": 0.06795294117647059, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4546, "step": 2888 }, { "epoch": 0.06797647058823529, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.4498, "step": 2889 }, { "epoch": 0.068, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.4427, "step": 2890 }, { "epoch": 0.0680235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6328, "step": 2891 }, { "epoch": 0.0680470588235294, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.4556, "step": 2892 }, { "epoch": 0.06807058823529412, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4788, "step": 2893 }, { "epoch": 0.06809411764705882, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.4449, "step": 2894 }, { "epoch": 0.06811764705882353, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.5493, "step": 2895 }, { "epoch": 0.06814117647058823, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.5079, "step": 2896 }, { "epoch": 0.06816470588235295, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.3412, "step": 2897 }, { "epoch": 0.06818823529411765, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5458, "step": 2898 }, { "epoch": 0.06821176470588235, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.52, "step": 2899 }, { "epoch": 0.06823529411764706, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.677, "step": 2900 }, { "epoch": 0.06825882352941176, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.5168, "step": 2901 }, { "epoch": 0.06828235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.4775, "step": 2902 }, { "epoch": 0.06830588235294117, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4171, "step": 2903 }, { "epoch": 0.06832941176470589, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.5137, "step": 2904 }, { "epoch": 0.06835294117647059, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.6161, "step": 2905 }, { "epoch": 0.06837647058823529, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.5345, "step": 2906 }, { "epoch": 0.0684, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.5462, "step": 2907 }, { "epoch": 0.0684235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.576, "step": 2908 }, { "epoch": 0.06844705882352942, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.3865, "step": 2909 }, { "epoch": 0.06847058823529412, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.5879, "step": 2910 }, { "epoch": 0.06849411764705883, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.6034, "step": 2911 }, { "epoch": 0.06851764705882353, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.6637, "step": 2912 }, { "epoch": 0.06854117647058823, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.3453, "step": 2913 }, { "epoch": 0.06856470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.4615, "step": 2914 }, { "epoch": 0.06858823529411764, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.6289, "step": 2915 }, { "epoch": 0.06861176470588236, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.62, "step": 2916 }, { "epoch": 0.06863529411764706, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.5374, "step": 2917 }, { "epoch": 0.06865882352941176, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.5686, "step": 2918 }, { "epoch": 0.06868235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.4515, "step": 2919 }, { "epoch": 0.06870588235294117, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.6666, "step": 2920 }, { "epoch": 0.06872941176470589, "grad_norm": 0.37109375, "learning_rate": 0.02, "loss": 1.3161, "step": 2921 }, { "epoch": 0.06875294117647059, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5861, "step": 2922 }, { "epoch": 0.0687764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.705, "step": 2923 }, { "epoch": 0.0688, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.5582, "step": 2924 }, { "epoch": 0.0688235294117647, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.4775, "step": 2925 }, { "epoch": 0.06884705882352941, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.4143, "step": 2926 }, { "epoch": 0.06887058823529411, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.382, "step": 2927 }, { "epoch": 0.06889411764705883, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.5062, "step": 2928 }, { "epoch": 0.06891764705882353, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.33, "step": 2929 }, { "epoch": 0.06894117647058824, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.5108, "step": 2930 }, { "epoch": 0.06896470588235294, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.5501, "step": 2931 }, { "epoch": 0.06898823529411764, "grad_norm": 0.287109375, "learning_rate": 0.02, "loss": 1.233, "step": 2932 }, { "epoch": 0.06901176470588236, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3459, "step": 2933 }, { "epoch": 0.06903529411764706, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.7025, "step": 2934 }, { "epoch": 0.06905882352941177, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7169, "step": 2935 }, { "epoch": 0.06908235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.5306, "step": 2936 }, { "epoch": 0.06910588235294117, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3446, "step": 2937 }, { "epoch": 0.06912941176470588, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 1.3988, "step": 2938 }, { "epoch": 0.06915294117647058, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.6346, "step": 2939 }, { "epoch": 0.0691764705882353, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.478, "step": 2940 }, { "epoch": 0.0692, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.5236, "step": 2941 }, { "epoch": 0.06922352941176471, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.5266, "step": 2942 }, { "epoch": 0.06924705882352941, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.5402, "step": 2943 }, { "epoch": 0.06927058823529411, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.5968, "step": 2944 }, { "epoch": 0.06929411764705883, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4532, "step": 2945 }, { "epoch": 0.06931764705882353, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.6082, "step": 2946 }, { "epoch": 0.06934117647058824, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2997, "step": 2947 }, { "epoch": 0.06936470588235294, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.7442, "step": 2948 }, { "epoch": 0.06938823529411765, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.3408, "step": 2949 }, { "epoch": 0.06941176470588235, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.5361, "step": 2950 }, { "epoch": 0.06943529411764705, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.769, "step": 2951 }, { "epoch": 0.06945882352941177, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.634, "step": 2952 }, { "epoch": 0.06948235294117647, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6556, "step": 2953 }, { "epoch": 0.06950588235294118, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.6923, "step": 2954 }, { "epoch": 0.06952941176470588, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.4009, "step": 2955 }, { "epoch": 0.0695529411764706, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7097, "step": 2956 }, { "epoch": 0.0695764705882353, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.3485, "step": 2957 }, { "epoch": 0.0696, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2648, "step": 2958 }, { "epoch": 0.06962352941176471, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.436, "step": 2959 }, { "epoch": 0.06964705882352941, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.3278, "step": 2960 }, { "epoch": 0.06967058823529412, "grad_norm": 0.4375, "learning_rate": 0.02, "loss": 1.6172, "step": 2961 }, { "epoch": 0.06969411764705882, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.5453, "step": 2962 }, { "epoch": 0.06971764705882352, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4988, "step": 2963 }, { "epoch": 0.06974117647058824, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.5962, "step": 2964 }, { "epoch": 0.06976470588235294, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5296, "step": 2965 }, { "epoch": 0.06978823529411765, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.5864, "step": 2966 }, { "epoch": 0.06981176470588235, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.501, "step": 2967 }, { "epoch": 0.06983529411764706, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.5445, "step": 2968 }, { "epoch": 0.06985882352941176, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.2118, "step": 2969 }, { "epoch": 0.06988235294117646, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.6086, "step": 2970 }, { "epoch": 0.06990588235294118, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3997, "step": 2971 }, { "epoch": 0.06992941176470588, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.5826, "step": 2972 }, { "epoch": 0.06995294117647059, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.3975, "step": 2973 }, { "epoch": 0.06997647058823529, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.485, "step": 2974 }, { "epoch": 0.07, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.6995, "step": 2975 }, { "epoch": 0.0700235294117647, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4854, "step": 2976 }, { "epoch": 0.0700470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.7225, "step": 2977 }, { "epoch": 0.07007058823529412, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.4405, "step": 2978 }, { "epoch": 0.07009411764705882, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5537, "step": 2979 }, { "epoch": 0.07011764705882353, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.2317, "step": 2980 }, { "epoch": 0.07014117647058823, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.4486, "step": 2981 }, { "epoch": 0.07016470588235293, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.456, "step": 2982 }, { "epoch": 0.07018823529411765, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.533, "step": 2983 }, { "epoch": 0.07021176470588235, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2994, "step": 2984 }, { "epoch": 0.07023529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.5392, "step": 2985 }, { "epoch": 0.07025882352941176, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.3845, "step": 2986 }, { "epoch": 0.07028235294117648, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4638, "step": 2987 }, { "epoch": 0.07030588235294118, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.6061, "step": 2988 }, { "epoch": 0.07032941176470588, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5678, "step": 2989 }, { "epoch": 0.07035294117647059, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.6513, "step": 2990 }, { "epoch": 0.07037647058823529, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4113, "step": 2991 }, { "epoch": 0.0704, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.5177, "step": 2992 }, { "epoch": 0.0704235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.4709, "step": 2993 }, { "epoch": 0.07044705882352942, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2854, "step": 2994 }, { "epoch": 0.07047058823529412, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2966, "step": 2995 }, { "epoch": 0.07049411764705882, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3667, "step": 2996 }, { "epoch": 0.07051764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.6361, "step": 2997 }, { "epoch": 0.07054117647058823, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.5696, "step": 2998 }, { "epoch": 0.07056470588235295, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.8359, "step": 2999 }, { "epoch": 0.07058823529411765, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.6019, "step": 3000 }, { "epoch": 0.07058823529411765, "eval_loss": 2.2543089389801025, "eval_runtime": 677.5565, "eval_samples_per_second": 12.545, "eval_steps_per_second": 3.136, "step": 3000 }, { "epoch": 0.07061176470588235, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.4226, "step": 3001 }, { "epoch": 0.07063529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6905, "step": 3002 }, { "epoch": 0.07065882352941176, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4736, "step": 3003 }, { "epoch": 0.07068235294117647, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.4373, "step": 3004 }, { "epoch": 0.07070588235294117, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.6568, "step": 3005 }, { "epoch": 0.07072941176470589, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.6615, "step": 3006 }, { "epoch": 0.07075294117647059, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.5764, "step": 3007 }, { "epoch": 0.07077647058823529, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.5196, "step": 3008 }, { "epoch": 0.0708, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.6577, "step": 3009 }, { "epoch": 0.0708235294117647, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3937, "step": 3010 }, { "epoch": 0.07084705882352942, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3001, "step": 3011 }, { "epoch": 0.07087058823529412, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.5784, "step": 3012 }, { "epoch": 0.07089411764705883, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.4928, "step": 3013 }, { "epoch": 0.07091764705882353, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.5267, "step": 3014 }, { "epoch": 0.07094117647058823, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4248, "step": 3015 }, { "epoch": 0.07096470588235294, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.3291, "step": 3016 }, { "epoch": 0.07098823529411764, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.3293, "step": 3017 }, { "epoch": 0.07101176470588236, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.6009, "step": 3018 }, { "epoch": 0.07103529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.5609, "step": 3019 }, { "epoch": 0.07105882352941177, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3903, "step": 3020 }, { "epoch": 0.07108235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.5069, "step": 3021 }, { "epoch": 0.07110588235294117, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.5436, "step": 3022 }, { "epoch": 0.07112941176470589, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4315, "step": 3023 }, { "epoch": 0.07115294117647059, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.635, "step": 3024 }, { "epoch": 0.0711764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.5003, "step": 3025 }, { "epoch": 0.0712, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.5362, "step": 3026 }, { "epoch": 0.0712235294117647, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.535, "step": 3027 }, { "epoch": 0.07124705882352941, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.4863, "step": 3028 }, { "epoch": 0.07127058823529411, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.3953, "step": 3029 }, { "epoch": 0.07129411764705883, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1818, "step": 3030 }, { "epoch": 0.07131764705882353, "grad_norm": 0.28515625, "learning_rate": 0.02, "loss": 1.1104, "step": 3031 }, { "epoch": 0.07134117647058824, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.5388, "step": 3032 }, { "epoch": 0.07136470588235294, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.6368, "step": 3033 }, { "epoch": 0.07138823529411764, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4729, "step": 3034 }, { "epoch": 0.07141176470588236, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.5555, "step": 3035 }, { "epoch": 0.07143529411764706, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.5434, "step": 3036 }, { "epoch": 0.07145882352941177, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.3799, "step": 3037 }, { "epoch": 0.07148235294117647, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.3448, "step": 3038 }, { "epoch": 0.07150588235294118, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.624, "step": 3039 }, { "epoch": 0.07152941176470588, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5355, "step": 3040 }, { "epoch": 0.07155294117647058, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.4633, "step": 3041 }, { "epoch": 0.0715764705882353, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5704, "step": 3042 }, { "epoch": 0.0716, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.45, "step": 3043 }, { "epoch": 0.07162352941176471, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.3567, "step": 3044 }, { "epoch": 0.07164705882352941, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.4621, "step": 3045 }, { "epoch": 0.07167058823529411, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.5654, "step": 3046 }, { "epoch": 0.07169411764705882, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3935, "step": 3047 }, { "epoch": 0.07171764705882352, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.4701, "step": 3048 }, { "epoch": 0.07174117647058824, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.7536, "step": 3049 }, { "epoch": 0.07176470588235294, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.5196, "step": 3050 }, { "epoch": 0.07178823529411765, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3376, "step": 3051 }, { "epoch": 0.07181176470588235, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3834, "step": 3052 }, { "epoch": 0.07183529411764705, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.6447, "step": 3053 }, { "epoch": 0.07185882352941177, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.479, "step": 3054 }, { "epoch": 0.07188235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3962, "step": 3055 }, { "epoch": 0.07190588235294118, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.5119, "step": 3056 }, { "epoch": 0.07192941176470588, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.5368, "step": 3057 }, { "epoch": 0.0719529411764706, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4431, "step": 3058 }, { "epoch": 0.0719764705882353, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.4289, "step": 3059 }, { "epoch": 0.072, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6237, "step": 3060 }, { "epoch": 0.07202352941176471, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.6027, "step": 3061 }, { "epoch": 0.07204705882352941, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4917, "step": 3062 }, { "epoch": 0.07207058823529412, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.3922, "step": 3063 }, { "epoch": 0.07209411764705882, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.6006, "step": 3064 }, { "epoch": 0.07211764705882354, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.4909, "step": 3065 }, { "epoch": 0.07214117647058824, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4703, "step": 3066 }, { "epoch": 0.07216470588235294, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.5021, "step": 3067 }, { "epoch": 0.07218823529411765, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5925, "step": 3068 }, { "epoch": 0.07221176470588235, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3903, "step": 3069 }, { "epoch": 0.07223529411764706, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3428, "step": 3070 }, { "epoch": 0.07225882352941176, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.4716, "step": 3071 }, { "epoch": 0.07228235294117646, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5302, "step": 3072 }, { "epoch": 0.07230588235294118, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.5268, "step": 3073 }, { "epoch": 0.07232941176470588, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.6536, "step": 3074 }, { "epoch": 0.07235294117647059, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1692, "step": 3075 }, { "epoch": 0.07237647058823529, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.9233, "step": 3076 }, { "epoch": 0.0724, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.5731, "step": 3077 }, { "epoch": 0.0724235294117647, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.2626, "step": 3078 }, { "epoch": 0.0724470588235294, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.6858, "step": 3079 }, { "epoch": 0.07247058823529412, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.4447, "step": 3080 }, { "epoch": 0.07249411764705882, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4539, "step": 3081 }, { "epoch": 0.07251764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7181, "step": 3082 }, { "epoch": 0.07254117647058823, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2294, "step": 3083 }, { "epoch": 0.07256470588235295, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.42, "step": 3084 }, { "epoch": 0.07258823529411765, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.4517, "step": 3085 }, { "epoch": 0.07261176470588235, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.5752, "step": 3086 }, { "epoch": 0.07263529411764706, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.4507, "step": 3087 }, { "epoch": 0.07265882352941176, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 1.2776, "step": 3088 }, { "epoch": 0.07268235294117648, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.4956, "step": 3089 }, { "epoch": 0.07270588235294118, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6403, "step": 3090 }, { "epoch": 0.07272941176470588, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.3938, "step": 3091 }, { "epoch": 0.07275294117647059, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3442, "step": 3092 }, { "epoch": 0.07277647058823529, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4066, "step": 3093 }, { "epoch": 0.0728, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.4942, "step": 3094 }, { "epoch": 0.0728235294117647, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.4299, "step": 3095 }, { "epoch": 0.07284705882352942, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.7208, "step": 3096 }, { "epoch": 0.07287058823529412, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3378, "step": 3097 }, { "epoch": 0.07289411764705882, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.6165, "step": 3098 }, { "epoch": 0.07291764705882353, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.4866, "step": 3099 }, { "epoch": 0.07294117647058823, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.3398, "step": 3100 }, { "epoch": 0.07296470588235295, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3307, "step": 3101 }, { "epoch": 0.07298823529411765, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4773, "step": 3102 }, { "epoch": 0.07301176470588236, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3931, "step": 3103 }, { "epoch": 0.07303529411764706, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2927, "step": 3104 }, { "epoch": 0.07305882352941176, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.3631, "step": 3105 }, { "epoch": 0.07308235294117647, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.6272, "step": 3106 }, { "epoch": 0.07310588235294117, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.5896, "step": 3107 }, { "epoch": 0.07312941176470589, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.327, "step": 3108 }, { "epoch": 0.07315294117647059, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3867, "step": 3109 }, { "epoch": 0.07317647058823529, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3185, "step": 3110 }, { "epoch": 0.0732, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1905, "step": 3111 }, { "epoch": 0.0732235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2968, "step": 3112 }, { "epoch": 0.07324705882352942, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7025, "step": 3113 }, { "epoch": 0.07327058823529412, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.5129, "step": 3114 }, { "epoch": 0.07329411764705883, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.317, "step": 3115 }, { "epoch": 0.07331764705882353, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.4017, "step": 3116 }, { "epoch": 0.07334117647058823, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2855, "step": 3117 }, { "epoch": 0.07336470588235294, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.461, "step": 3118 }, { "epoch": 0.07338823529411764, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.6821, "step": 3119 }, { "epoch": 0.07341176470588236, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.401, "step": 3120 }, { "epoch": 0.07343529411764706, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.5732, "step": 3121 }, { "epoch": 0.07345882352941177, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.4949, "step": 3122 }, { "epoch": 0.07348235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.697, "step": 3123 }, { "epoch": 0.07350588235294117, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.5023, "step": 3124 }, { "epoch": 0.07352941176470588, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5814, "step": 3125 }, { "epoch": 0.07355294117647058, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.6569, "step": 3126 }, { "epoch": 0.0735764705882353, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4316, "step": 3127 }, { "epoch": 0.0736, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.413, "step": 3128 }, { "epoch": 0.07362352941176471, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.4943, "step": 3129 }, { "epoch": 0.07364705882352941, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.4439, "step": 3130 }, { "epoch": 0.07367058823529411, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.6934, "step": 3131 }, { "epoch": 0.07369411764705883, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.4579, "step": 3132 }, { "epoch": 0.07371764705882353, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.4235, "step": 3133 }, { "epoch": 0.07374117647058824, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.3512, "step": 3134 }, { "epoch": 0.07376470588235294, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.5311, "step": 3135 }, { "epoch": 0.07378823529411764, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.4756, "step": 3136 }, { "epoch": 0.07381176470588235, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4748, "step": 3137 }, { "epoch": 0.07383529411764705, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.4289, "step": 3138 }, { "epoch": 0.07385882352941177, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.5612, "step": 3139 }, { "epoch": 0.07388235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.5199, "step": 3140 }, { "epoch": 0.07390588235294118, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2295, "step": 3141 }, { "epoch": 0.07392941176470588, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.0363, "step": 3142 }, { "epoch": 0.07395294117647058, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.5577, "step": 3143 }, { "epoch": 0.0739764705882353, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.4101, "step": 3144 }, { "epoch": 0.074, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3591, "step": 3145 }, { "epoch": 0.07402352941176471, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.305, "step": 3146 }, { "epoch": 0.07404705882352941, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3554, "step": 3147 }, { "epoch": 0.07407058823529412, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.4261, "step": 3148 }, { "epoch": 0.07409411764705882, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.6002, "step": 3149 }, { "epoch": 0.07411764705882352, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5022, "step": 3150 }, { "epoch": 0.07414117647058824, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.4715, "step": 3151 }, { "epoch": 0.07416470588235294, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.1711, "step": 3152 }, { "epoch": 0.07418823529411765, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6272, "step": 3153 }, { "epoch": 0.07421176470588235, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.314, "step": 3154 }, { "epoch": 0.07423529411764705, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.634, "step": 3155 }, { "epoch": 0.07425882352941177, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6458, "step": 3156 }, { "epoch": 0.07428235294117647, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5754, "step": 3157 }, { "epoch": 0.07430588235294118, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4831, "step": 3158 }, { "epoch": 0.07432941176470588, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.4453, "step": 3159 }, { "epoch": 0.0743529411764706, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.5052, "step": 3160 }, { "epoch": 0.0743764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.6214, "step": 3161 }, { "epoch": 0.0744, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.6689, "step": 3162 }, { "epoch": 0.07442352941176471, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.5626, "step": 3163 }, { "epoch": 0.07444705882352941, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.3058, "step": 3164 }, { "epoch": 0.07447058823529412, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.2675, "step": 3165 }, { "epoch": 0.07449411764705882, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.5955, "step": 3166 }, { "epoch": 0.07451764705882354, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.4064, "step": 3167 }, { "epoch": 0.07454117647058824, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.3706, "step": 3168 }, { "epoch": 0.07456470588235294, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3683, "step": 3169 }, { "epoch": 0.07458823529411765, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.3849, "step": 3170 }, { "epoch": 0.07461176470588235, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.6331, "step": 3171 }, { "epoch": 0.07463529411764706, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.4577, "step": 3172 }, { "epoch": 0.07465882352941176, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3686, "step": 3173 }, { "epoch": 0.07468235294117648, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4439, "step": 3174 }, { "epoch": 0.07470588235294118, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.6298, "step": 3175 }, { "epoch": 0.07472941176470588, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.5029, "step": 3176 }, { "epoch": 0.07475294117647059, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.6037, "step": 3177 }, { "epoch": 0.07477647058823529, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.4732, "step": 3178 }, { "epoch": 0.0748, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3337, "step": 3179 }, { "epoch": 0.0748235294117647, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.5022, "step": 3180 }, { "epoch": 0.0748470588235294, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2928, "step": 3181 }, { "epoch": 0.07487058823529412, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.2945, "step": 3182 }, { "epoch": 0.07489411764705882, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.621, "step": 3183 }, { "epoch": 0.07491764705882353, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1935, "step": 3184 }, { "epoch": 0.07494117647058823, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.5839, "step": 3185 }, { "epoch": 0.07496470588235295, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.4223, "step": 3186 }, { "epoch": 0.07498823529411765, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.5692, "step": 3187 }, { "epoch": 0.07501176470588235, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.5474, "step": 3188 }, { "epoch": 0.07503529411764706, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.4981, "step": 3189 }, { "epoch": 0.07505882352941176, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2305, "step": 3190 }, { "epoch": 0.07508235294117648, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4718, "step": 3191 }, { "epoch": 0.07510588235294118, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2643, "step": 3192 }, { "epoch": 0.07512941176470589, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.3164, "step": 3193 }, { "epoch": 0.07515294117647059, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.575, "step": 3194 }, { "epoch": 0.07517647058823529, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.5499, "step": 3195 }, { "epoch": 0.0752, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3992, "step": 3196 }, { "epoch": 0.0752235294117647, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.4521, "step": 3197 }, { "epoch": 0.07524705882352942, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.4311, "step": 3198 }, { "epoch": 0.07527058823529412, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.5552, "step": 3199 }, { "epoch": 0.07529411764705882, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3523, "step": 3200 }, { "epoch": 0.07531764705882353, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.4386, "step": 3201 }, { "epoch": 0.07534117647058823, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.3983, "step": 3202 }, { "epoch": 0.07536470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.4747, "step": 3203 }, { "epoch": 0.07538823529411764, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.8639, "step": 3204 }, { "epoch": 0.07541176470588236, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.413, "step": 3205 }, { "epoch": 0.07543529411764706, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.428, "step": 3206 }, { "epoch": 0.07545882352941176, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5913, "step": 3207 }, { "epoch": 0.07548235294117647, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.7188, "step": 3208 }, { "epoch": 0.07550588235294117, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.4363, "step": 3209 }, { "epoch": 0.07552941176470589, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.3953, "step": 3210 }, { "epoch": 0.07555294117647059, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.5278, "step": 3211 }, { "epoch": 0.0755764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4783, "step": 3212 }, { "epoch": 0.0756, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.2233, "step": 3213 }, { "epoch": 0.0756235294117647, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.4532, "step": 3214 }, { "epoch": 0.07564705882352941, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.6873, "step": 3215 }, { "epoch": 0.07567058823529411, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.6463, "step": 3216 }, { "epoch": 0.07569411764705883, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.529, "step": 3217 }, { "epoch": 0.07571764705882353, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.5214, "step": 3218 }, { "epoch": 0.07574117647058823, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.2503, "step": 3219 }, { "epoch": 0.07576470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.5059, "step": 3220 }, { "epoch": 0.07578823529411764, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.578, "step": 3221 }, { "epoch": 0.07581176470588236, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.4335, "step": 3222 }, { "epoch": 0.07583529411764706, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.3474, "step": 3223 }, { "epoch": 0.07585882352941177, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.6597, "step": 3224 }, { "epoch": 0.07588235294117647, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1983, "step": 3225 }, { "epoch": 0.07590588235294117, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3343, "step": 3226 }, { "epoch": 0.07592941176470588, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.2876, "step": 3227 }, { "epoch": 0.07595294117647058, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.5323, "step": 3228 }, { "epoch": 0.0759764705882353, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.329, "step": 3229 }, { "epoch": 0.076, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.4034, "step": 3230 }, { "epoch": 0.07602352941176471, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.3337, "step": 3231 }, { "epoch": 0.07604705882352941, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.3032, "step": 3232 }, { "epoch": 0.07607058823529411, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.4817, "step": 3233 }, { "epoch": 0.07609411764705883, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4649, "step": 3234 }, { "epoch": 0.07611764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.6682, "step": 3235 }, { "epoch": 0.07614117647058824, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3806, "step": 3236 }, { "epoch": 0.07616470588235294, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.4833, "step": 3237 }, { "epoch": 0.07618823529411765, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5406, "step": 3238 }, { "epoch": 0.07621176470588235, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4517, "step": 3239 }, { "epoch": 0.07623529411764705, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.5196, "step": 3240 }, { "epoch": 0.07625882352941177, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2505, "step": 3241 }, { "epoch": 0.07628235294117647, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.2592, "step": 3242 }, { "epoch": 0.07630588235294118, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4065, "step": 3243 }, { "epoch": 0.07632941176470588, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.6052, "step": 3244 }, { "epoch": 0.07635294117647058, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.277, "step": 3245 }, { "epoch": 0.0763764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.5152, "step": 3246 }, { "epoch": 0.0764, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.4709, "step": 3247 }, { "epoch": 0.07642352941176471, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.4639, "step": 3248 }, { "epoch": 0.07644705882352941, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1088, "step": 3249 }, { "epoch": 0.07647058823529412, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2599, "step": 3250 }, { "epoch": 0.07649411764705882, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.4614, "step": 3251 }, { "epoch": 0.07651764705882352, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2749, "step": 3252 }, { "epoch": 0.07654117647058824, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.4031, "step": 3253 }, { "epoch": 0.07656470588235294, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4591, "step": 3254 }, { "epoch": 0.07658823529411765, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4163, "step": 3255 }, { "epoch": 0.07661176470588235, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.4553, "step": 3256 }, { "epoch": 0.07663529411764707, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.603, "step": 3257 }, { "epoch": 0.07665882352941177, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2078, "step": 3258 }, { "epoch": 0.07668235294117647, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.4169, "step": 3259 }, { "epoch": 0.07670588235294118, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.5216, "step": 3260 }, { "epoch": 0.07672941176470588, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3995, "step": 3261 }, { "epoch": 0.0767529411764706, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.5833, "step": 3262 }, { "epoch": 0.0767764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.568, "step": 3263 }, { "epoch": 0.0768, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.3163, "step": 3264 }, { "epoch": 0.07682352941176471, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6406, "step": 3265 }, { "epoch": 0.07684705882352941, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.2362, "step": 3266 }, { "epoch": 0.07687058823529412, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.4111, "step": 3267 }, { "epoch": 0.07689411764705882, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1575, "step": 3268 }, { "epoch": 0.07691764705882354, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3639, "step": 3269 }, { "epoch": 0.07694117647058824, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.6111, "step": 3270 }, { "epoch": 0.07696470588235294, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.288, "step": 3271 }, { "epoch": 0.07698823529411765, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4855, "step": 3272 }, { "epoch": 0.07701176470588235, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.2514, "step": 3273 }, { "epoch": 0.07703529411764706, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.5455, "step": 3274 }, { "epoch": 0.07705882352941176, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3237, "step": 3275 }, { "epoch": 0.07708235294117648, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.6192, "step": 3276 }, { "epoch": 0.07710588235294118, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3695, "step": 3277 }, { "epoch": 0.07712941176470588, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.4347, "step": 3278 }, { "epoch": 0.07715294117647059, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2175, "step": 3279 }, { "epoch": 0.07717647058823529, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.4102, "step": 3280 }, { "epoch": 0.0772, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.5096, "step": 3281 }, { "epoch": 0.0772235294117647, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.5301, "step": 3282 }, { "epoch": 0.07724705882352942, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2901, "step": 3283 }, { "epoch": 0.07727058823529412, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.5366, "step": 3284 }, { "epoch": 0.07729411764705882, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.5858, "step": 3285 }, { "epoch": 0.07731764705882353, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.7137, "step": 3286 }, { "epoch": 0.07734117647058823, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.4781, "step": 3287 }, { "epoch": 0.07736470588235295, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.5425, "step": 3288 }, { "epoch": 0.07738823529411765, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.5168, "step": 3289 }, { "epoch": 0.07741176470588235, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4691, "step": 3290 }, { "epoch": 0.07743529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.5786, "step": 3291 }, { "epoch": 0.07745882352941176, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.7252, "step": 3292 }, { "epoch": 0.07748235294117647, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.1714, "step": 3293 }, { "epoch": 0.07750588235294117, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.5537, "step": 3294 }, { "epoch": 0.07752941176470589, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.2212, "step": 3295 }, { "epoch": 0.07755294117647059, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6727, "step": 3296 }, { "epoch": 0.07757647058823529, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.3651, "step": 3297 }, { "epoch": 0.0776, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4156, "step": 3298 }, { "epoch": 0.0776235294117647, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5379, "step": 3299 }, { "epoch": 0.07764705882352942, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.4056, "step": 3300 }, { "epoch": 0.07767058823529412, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3458, "step": 3301 }, { "epoch": 0.07769411764705883, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5515, "step": 3302 }, { "epoch": 0.07771764705882353, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.3971, "step": 3303 }, { "epoch": 0.07774117647058823, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.4251, "step": 3304 }, { "epoch": 0.07776470588235294, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3081, "step": 3305 }, { "epoch": 0.07778823529411764, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.5508, "step": 3306 }, { "epoch": 0.07781176470588236, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.5419, "step": 3307 }, { "epoch": 0.07783529411764706, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.734, "step": 3308 }, { "epoch": 0.07785882352941176, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.158, "step": 3309 }, { "epoch": 0.07788235294117647, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.5018, "step": 3310 }, { "epoch": 0.07790588235294117, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1853, "step": 3311 }, { "epoch": 0.07792941176470589, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.564, "step": 3312 }, { "epoch": 0.07795294117647059, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.4411, "step": 3313 }, { "epoch": 0.0779764705882353, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5521, "step": 3314 }, { "epoch": 0.078, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.6477, "step": 3315 }, { "epoch": 0.0780235294117647, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.5502, "step": 3316 }, { "epoch": 0.07804705882352941, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.508, "step": 3317 }, { "epoch": 0.07807058823529411, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.4499, "step": 3318 }, { "epoch": 0.07809411764705883, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.645, "step": 3319 }, { "epoch": 0.07811764705882353, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.4081, "step": 3320 }, { "epoch": 0.07814117647058824, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3305, "step": 3321 }, { "epoch": 0.07816470588235294, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3893, "step": 3322 }, { "epoch": 0.07818823529411764, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.2064, "step": 3323 }, { "epoch": 0.07821176470588236, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4192, "step": 3324 }, { "epoch": 0.07823529411764706, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.4862, "step": 3325 }, { "epoch": 0.07825882352941177, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.4738, "step": 3326 }, { "epoch": 0.07828235294117647, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.3746, "step": 3327 }, { "epoch": 0.07830588235294117, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.4167, "step": 3328 }, { "epoch": 0.07832941176470588, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3019, "step": 3329 }, { "epoch": 0.07835294117647058, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4746, "step": 3330 }, { "epoch": 0.0783764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.6707, "step": 3331 }, { "epoch": 0.0784, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3245, "step": 3332 }, { "epoch": 0.07842352941176471, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4762, "step": 3333 }, { "epoch": 0.07844705882352941, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.488, "step": 3334 }, { "epoch": 0.07847058823529411, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2288, "step": 3335 }, { "epoch": 0.07849411764705883, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.4675, "step": 3336 }, { "epoch": 0.07851764705882353, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3368, "step": 3337 }, { "epoch": 0.07854117647058824, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.3775, "step": 3338 }, { "epoch": 0.07856470588235294, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2256, "step": 3339 }, { "epoch": 0.07858823529411765, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.2303, "step": 3340 }, { "epoch": 0.07861176470588235, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.3092, "step": 3341 }, { "epoch": 0.07863529411764705, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.3118, "step": 3342 }, { "epoch": 0.07865882352941177, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3685, "step": 3343 }, { "epoch": 0.07868235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.5654, "step": 3344 }, { "epoch": 0.07870588235294118, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.6085, "step": 3345 }, { "epoch": 0.07872941176470588, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2372, "step": 3346 }, { "epoch": 0.0787529411764706, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.2622, "step": 3347 }, { "epoch": 0.0787764705882353, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.4992, "step": 3348 }, { "epoch": 0.0788, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.2159, "step": 3349 }, { "epoch": 0.07882352941176471, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.4387, "step": 3350 }, { "epoch": 0.07884705882352941, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.614, "step": 3351 }, { "epoch": 0.07887058823529412, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1573, "step": 3352 }, { "epoch": 0.07889411764705882, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3232, "step": 3353 }, { "epoch": 0.07891764705882352, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2347, "step": 3354 }, { "epoch": 0.07894117647058824, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2938, "step": 3355 }, { "epoch": 0.07896470588235294, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.6805, "step": 3356 }, { "epoch": 0.07898823529411765, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.6099, "step": 3357 }, { "epoch": 0.07901176470588235, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5185, "step": 3358 }, { "epoch": 0.07903529411764706, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4069, "step": 3359 }, { "epoch": 0.07905882352941176, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3253, "step": 3360 }, { "epoch": 0.07908235294117646, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.5556, "step": 3361 }, { "epoch": 0.07910588235294118, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.5311, "step": 3362 }, { "epoch": 0.07912941176470588, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.6211, "step": 3363 }, { "epoch": 0.07915294117647059, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.5252, "step": 3364 }, { "epoch": 0.07917647058823529, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4884, "step": 3365 }, { "epoch": 0.0792, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1427, "step": 3366 }, { "epoch": 0.0792235294117647, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2267, "step": 3367 }, { "epoch": 0.0792470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.4096, "step": 3368 }, { "epoch": 0.07927058823529412, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.4654, "step": 3369 }, { "epoch": 0.07929411764705882, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1758, "step": 3370 }, { "epoch": 0.07931764705882353, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1783, "step": 3371 }, { "epoch": 0.07934117647058823, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.5476, "step": 3372 }, { "epoch": 0.07936470588235293, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.2308, "step": 3373 }, { "epoch": 0.07938823529411765, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.2498, "step": 3374 }, { "epoch": 0.07941176470588235, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.5682, "step": 3375 }, { "epoch": 0.07943529411764706, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 1.0912, "step": 3376 }, { "epoch": 0.07945882352941176, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.6998, "step": 3377 }, { "epoch": 0.07948235294117648, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3134, "step": 3378 }, { "epoch": 0.07950588235294118, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3327, "step": 3379 }, { "epoch": 0.07952941176470588, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.2661, "step": 3380 }, { "epoch": 0.07955294117647059, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3834, "step": 3381 }, { "epoch": 0.07957647058823529, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.38, "step": 3382 }, { "epoch": 0.0796, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.4526, "step": 3383 }, { "epoch": 0.0796235294117647, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3758, "step": 3384 }, { "epoch": 0.07964705882352942, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.5298, "step": 3385 }, { "epoch": 0.07967058823529412, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.2365, "step": 3386 }, { "epoch": 0.07969411764705882, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.2323, "step": 3387 }, { "epoch": 0.07971764705882353, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.2493, "step": 3388 }, { "epoch": 0.07974117647058823, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.615, "step": 3389 }, { "epoch": 0.07976470588235295, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.4763, "step": 3390 }, { "epoch": 0.07978823529411765, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4885, "step": 3391 }, { "epoch": 0.07981176470588236, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.4427, "step": 3392 }, { "epoch": 0.07983529411764706, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.5823, "step": 3393 }, { "epoch": 0.07985882352941176, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.3037, "step": 3394 }, { "epoch": 0.07988235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3441, "step": 3395 }, { "epoch": 0.07990588235294117, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3713, "step": 3396 }, { "epoch": 0.07992941176470589, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.5041, "step": 3397 }, { "epoch": 0.07995294117647059, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1925, "step": 3398 }, { "epoch": 0.07997647058823529, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.4141, "step": 3399 }, { "epoch": 0.08, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.4185, "step": 3400 }, { "epoch": 0.0800235294117647, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.3265, "step": 3401 }, { "epoch": 0.08004705882352942, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.4447, "step": 3402 }, { "epoch": 0.08007058823529412, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4633, "step": 3403 }, { "epoch": 0.08009411764705883, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3229, "step": 3404 }, { "epoch": 0.08011764705882353, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.5838, "step": 3405 }, { "epoch": 0.08014117647058823, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.5638, "step": 3406 }, { "epoch": 0.08016470588235294, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.3843, "step": 3407 }, { "epoch": 0.08018823529411764, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.2729, "step": 3408 }, { "epoch": 0.08021176470588236, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3752, "step": 3409 }, { "epoch": 0.08023529411764706, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3325, "step": 3410 }, { "epoch": 0.08025882352941177, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.4778, "step": 3411 }, { "epoch": 0.08028235294117647, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3312, "step": 3412 }, { "epoch": 0.08030588235294117, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.2947, "step": 3413 }, { "epoch": 0.08032941176470589, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.3187, "step": 3414 }, { "epoch": 0.08035294117647059, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.499, "step": 3415 }, { "epoch": 0.0803764705882353, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1749, "step": 3416 }, { "epoch": 0.0804, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2988, "step": 3417 }, { "epoch": 0.0804235294117647, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.4304, "step": 3418 }, { "epoch": 0.08044705882352941, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3887, "step": 3419 }, { "epoch": 0.08047058823529411, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.3284, "step": 3420 }, { "epoch": 0.08049411764705883, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2542, "step": 3421 }, { "epoch": 0.08051764705882353, "grad_norm": 0.2890625, "learning_rate": 0.02, "loss": 1.1075, "step": 3422 }, { "epoch": 0.08054117647058824, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.2947, "step": 3423 }, { "epoch": 0.08056470588235294, "grad_norm": 0.287109375, "learning_rate": 0.02, "loss": 1.188, "step": 3424 }, { "epoch": 0.08058823529411764, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.6481, "step": 3425 }, { "epoch": 0.08061176470588235, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.5723, "step": 3426 }, { "epoch": 0.08063529411764706, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.5134, "step": 3427 }, { "epoch": 0.08065882352941177, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.5026, "step": 3428 }, { "epoch": 0.08068235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3747, "step": 3429 }, { "epoch": 0.08070588235294118, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4649, "step": 3430 }, { "epoch": 0.08072941176470588, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4892, "step": 3431 }, { "epoch": 0.08075294117647058, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.5444, "step": 3432 }, { "epoch": 0.0807764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.4931, "step": 3433 }, { "epoch": 0.0808, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.3487, "step": 3434 }, { "epoch": 0.08082352941176471, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6582, "step": 3435 }, { "epoch": 0.08084705882352941, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.4487, "step": 3436 }, { "epoch": 0.08087058823529411, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1374, "step": 3437 }, { "epoch": 0.08089411764705882, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.3924, "step": 3438 }, { "epoch": 0.08091764705882352, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.4411, "step": 3439 }, { "epoch": 0.08094117647058824, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.5068, "step": 3440 }, { "epoch": 0.08096470588235294, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.241, "step": 3441 }, { "epoch": 0.08098823529411765, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.3238, "step": 3442 }, { "epoch": 0.08101176470588235, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3589, "step": 3443 }, { "epoch": 0.08103529411764705, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.5133, "step": 3444 }, { "epoch": 0.08105882352941177, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.3019, "step": 3445 }, { "epoch": 0.08108235294117647, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4423, "step": 3446 }, { "epoch": 0.08110588235294118, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4647, "step": 3447 }, { "epoch": 0.08112941176470588, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.2797, "step": 3448 }, { "epoch": 0.0811529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.6465, "step": 3449 }, { "epoch": 0.0811764705882353, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2745, "step": 3450 }, { "epoch": 0.0812, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.539, "step": 3451 }, { "epoch": 0.08122352941176471, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.2488, "step": 3452 }, { "epoch": 0.08124705882352941, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.5127, "step": 3453 }, { "epoch": 0.08127058823529412, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.3055, "step": 3454 }, { "epoch": 0.08129411764705882, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1725, "step": 3455 }, { "epoch": 0.08131764705882354, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4522, "step": 3456 }, { "epoch": 0.08134117647058824, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1551, "step": 3457 }, { "epoch": 0.08136470588235294, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5098, "step": 3458 }, { "epoch": 0.08138823529411765, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.577, "step": 3459 }, { "epoch": 0.08141176470588235, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3266, "step": 3460 }, { "epoch": 0.08143529411764706, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.4336, "step": 3461 }, { "epoch": 0.08145882352941176, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.4515, "step": 3462 }, { "epoch": 0.08148235294117646, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2026, "step": 3463 }, { "epoch": 0.08150588235294118, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.5929, "step": 3464 }, { "epoch": 0.08152941176470588, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.1113, "step": 3465 }, { "epoch": 0.08155294117647059, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3304, "step": 3466 }, { "epoch": 0.08157647058823529, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.4042, "step": 3467 }, { "epoch": 0.0816, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4713, "step": 3468 }, { "epoch": 0.0816235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.5133, "step": 3469 }, { "epoch": 0.0816470588235294, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3913, "step": 3470 }, { "epoch": 0.08167058823529412, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.526, "step": 3471 }, { "epoch": 0.08169411764705882, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.4421, "step": 3472 }, { "epoch": 0.08171764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.5372, "step": 3473 }, { "epoch": 0.08174117647058823, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1965, "step": 3474 }, { "epoch": 0.08176470588235295, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3059, "step": 3475 }, { "epoch": 0.08178823529411765, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.4995, "step": 3476 }, { "epoch": 0.08181176470588235, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.4826, "step": 3477 }, { "epoch": 0.08183529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2651, "step": 3478 }, { "epoch": 0.08185882352941176, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.5776, "step": 3479 }, { "epoch": 0.08188235294117648, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.4566, "step": 3480 }, { "epoch": 0.08190588235294118, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1908, "step": 3481 }, { "epoch": 0.08192941176470588, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3692, "step": 3482 }, { "epoch": 0.08195294117647059, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3464, "step": 3483 }, { "epoch": 0.08197647058823529, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.3471, "step": 3484 }, { "epoch": 0.082, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.6082, "step": 3485 }, { "epoch": 0.0820235294117647, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.3567, "step": 3486 }, { "epoch": 0.08204705882352942, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.5277, "step": 3487 }, { "epoch": 0.08207058823529412, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.3093, "step": 3488 }, { "epoch": 0.08209411764705882, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.5058, "step": 3489 }, { "epoch": 0.08211764705882353, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.432, "step": 3490 }, { "epoch": 0.08214117647058823, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3303, "step": 3491 }, { "epoch": 0.08216470588235295, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2651, "step": 3492 }, { "epoch": 0.08218823529411765, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.5387, "step": 3493 }, { "epoch": 0.08221176470588236, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.2075, "step": 3494 }, { "epoch": 0.08223529411764706, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5436, "step": 3495 }, { "epoch": 0.08225882352941176, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.2598, "step": 3496 }, { "epoch": 0.08228235294117647, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5012, "step": 3497 }, { "epoch": 0.08230588235294117, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.464, "step": 3498 }, { "epoch": 0.08232941176470589, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.3667, "step": 3499 }, { "epoch": 0.08235294117647059, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5279, "step": 3500 }, { "epoch": 0.0823764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.3635, "step": 3501 }, { "epoch": 0.0824, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4032, "step": 3502 }, { "epoch": 0.0824235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3973, "step": 3503 }, { "epoch": 0.08244705882352941, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3753, "step": 3504 }, { "epoch": 0.08247058823529411, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.3696, "step": 3505 }, { "epoch": 0.08249411764705883, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.4164, "step": 3506 }, { "epoch": 0.08251764705882353, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2223, "step": 3507 }, { "epoch": 0.08254117647058823, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.5089, "step": 3508 }, { "epoch": 0.08256470588235294, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.1958, "step": 3509 }, { "epoch": 0.08258823529411764, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.4334, "step": 3510 }, { "epoch": 0.08261176470588236, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.4658, "step": 3511 }, { "epoch": 0.08263529411764706, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3201, "step": 3512 }, { "epoch": 0.08265882352941177, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5541, "step": 3513 }, { "epoch": 0.08268235294117647, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.1412, "step": 3514 }, { "epoch": 0.08270588235294117, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.554, "step": 3515 }, { "epoch": 0.08272941176470588, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.4457, "step": 3516 }, { "epoch": 0.08275294117647058, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5521, "step": 3517 }, { "epoch": 0.0827764705882353, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.557, "step": 3518 }, { "epoch": 0.0828, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.4411, "step": 3519 }, { "epoch": 0.08282352941176471, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.4297, "step": 3520 }, { "epoch": 0.08284705882352941, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.3112, "step": 3521 }, { "epoch": 0.08287058823529411, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.2156, "step": 3522 }, { "epoch": 0.08289411764705883, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3849, "step": 3523 }, { "epoch": 0.08291764705882353, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.0023, "step": 3524 }, { "epoch": 0.08294117647058824, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4033, "step": 3525 }, { "epoch": 0.08296470588235294, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.3995, "step": 3526 }, { "epoch": 0.08298823529411764, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4203, "step": 3527 }, { "epoch": 0.08301176470588235, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 1.1742, "step": 3528 }, { "epoch": 0.08303529411764705, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.4062, "step": 3529 }, { "epoch": 0.08305882352941177, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.5096, "step": 3530 }, { "epoch": 0.08308235294117647, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.5208, "step": 3531 }, { "epoch": 0.08310588235294118, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5993, "step": 3532 }, { "epoch": 0.08312941176470588, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2063, "step": 3533 }, { "epoch": 0.08315294117647058, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.3356, "step": 3534 }, { "epoch": 0.0831764705882353, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.6359, "step": 3535 }, { "epoch": 0.0832, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.7033, "step": 3536 }, { "epoch": 0.08322352941176471, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4026, "step": 3537 }, { "epoch": 0.08324705882352941, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 1.291, "step": 3538 }, { "epoch": 0.08327058823529412, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.428, "step": 3539 }, { "epoch": 0.08329411764705882, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.4304, "step": 3540 }, { "epoch": 0.08331764705882352, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6129, "step": 3541 }, { "epoch": 0.08334117647058824, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.2646, "step": 3542 }, { "epoch": 0.08336470588235294, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1103, "step": 3543 }, { "epoch": 0.08338823529411765, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3992, "step": 3544 }, { "epoch": 0.08341176470588235, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1899, "step": 3545 }, { "epoch": 0.08343529411764705, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.4984, "step": 3546 }, { "epoch": 0.08345882352941177, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.624, "step": 3547 }, { "epoch": 0.08348235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3134, "step": 3548 }, { "epoch": 0.08350588235294118, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.4133, "step": 3549 }, { "epoch": 0.08352941176470588, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3931, "step": 3550 }, { "epoch": 0.0835529411764706, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 1.4287, "step": 3551 }, { "epoch": 0.0835764705882353, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.6502, "step": 3552 }, { "epoch": 0.0836, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.2099, "step": 3553 }, { "epoch": 0.08362352941176471, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.4693, "step": 3554 }, { "epoch": 0.08364705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.5794, "step": 3555 }, { "epoch": 0.08367058823529412, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.4317, "step": 3556 }, { "epoch": 0.08369411764705882, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.552, "step": 3557 }, { "epoch": 0.08371764705882354, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.4623, "step": 3558 }, { "epoch": 0.08374117647058824, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.5024, "step": 3559 }, { "epoch": 0.08376470588235294, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.2536, "step": 3560 }, { "epoch": 0.08378823529411765, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2871, "step": 3561 }, { "epoch": 0.08381176470588235, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.336, "step": 3562 }, { "epoch": 0.08383529411764706, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.5385, "step": 3563 }, { "epoch": 0.08385882352941176, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2554, "step": 3564 }, { "epoch": 0.08388235294117648, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.2181, "step": 3565 }, { "epoch": 0.08390588235294118, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.3536, "step": 3566 }, { "epoch": 0.08392941176470588, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 0.9547, "step": 3567 }, { "epoch": 0.08395294117647059, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6346, "step": 3568 }, { "epoch": 0.08397647058823529, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3974, "step": 3569 }, { "epoch": 0.084, "grad_norm": 0.294921875, "learning_rate": 0.02, "loss": 1.1059, "step": 3570 }, { "epoch": 0.0840235294117647, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.4769, "step": 3571 }, { "epoch": 0.0840470588235294, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.4089, "step": 3572 }, { "epoch": 0.08407058823529412, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.6489, "step": 3573 }, { "epoch": 0.08409411764705882, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2774, "step": 3574 }, { "epoch": 0.08411764705882353, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.6142, "step": 3575 }, { "epoch": 0.08414117647058823, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.4495, "step": 3576 }, { "epoch": 0.08416470588235295, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.4377, "step": 3577 }, { "epoch": 0.08418823529411765, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3709, "step": 3578 }, { "epoch": 0.08421176470588235, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.3378, "step": 3579 }, { "epoch": 0.08423529411764706, "grad_norm": 0.298828125, "learning_rate": 0.02, "loss": 1.0326, "step": 3580 }, { "epoch": 0.08425882352941176, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.1697, "step": 3581 }, { "epoch": 0.08428235294117647, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4047, "step": 3582 }, { "epoch": 0.08430588235294117, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.1314, "step": 3583 }, { "epoch": 0.08432941176470589, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.5646, "step": 3584 }, { "epoch": 0.08435294117647059, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2827, "step": 3585 }, { "epoch": 0.08437647058823529, "grad_norm": 0.287109375, "learning_rate": 0.02, "loss": 1.255, "step": 3586 }, { "epoch": 0.0844, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3843, "step": 3587 }, { "epoch": 0.0844235294117647, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3397, "step": 3588 }, { "epoch": 0.08444705882352942, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.5174, "step": 3589 }, { "epoch": 0.08447058823529412, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.242, "step": 3590 }, { "epoch": 0.08449411764705882, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.4984, "step": 3591 }, { "epoch": 0.08451764705882353, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.6232, "step": 3592 }, { "epoch": 0.08454117647058823, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.4057, "step": 3593 }, { "epoch": 0.08456470588235294, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3029, "step": 3594 }, { "epoch": 0.08458823529411764, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.1939, "step": 3595 }, { "epoch": 0.08461176470588236, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2118, "step": 3596 }, { "epoch": 0.08463529411764706, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.9865, "step": 3597 }, { "epoch": 0.08465882352941176, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.3213, "step": 3598 }, { "epoch": 0.08468235294117647, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.5676, "step": 3599 }, { "epoch": 0.08470588235294117, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.6395, "step": 3600 }, { "epoch": 0.08472941176470589, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 1.2301, "step": 3601 }, { "epoch": 0.08475294117647059, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.5236, "step": 3602 }, { "epoch": 0.0847764705882353, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.5189, "step": 3603 }, { "epoch": 0.0848, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.6573, "step": 3604 }, { "epoch": 0.0848235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.556, "step": 3605 }, { "epoch": 0.08484705882352941, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.4704, "step": 3606 }, { "epoch": 0.08487058823529411, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2078, "step": 3607 }, { "epoch": 0.08489411764705883, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4991, "step": 3608 }, { "epoch": 0.08491764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.4952, "step": 3609 }, { "epoch": 0.08494117647058824, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.308, "step": 3610 }, { "epoch": 0.08496470588235294, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3282, "step": 3611 }, { "epoch": 0.08498823529411764, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2656, "step": 3612 }, { "epoch": 0.08501176470588236, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2974, "step": 3613 }, { "epoch": 0.08503529411764706, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.3512, "step": 3614 }, { "epoch": 0.08505882352941177, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.2643, "step": 3615 }, { "epoch": 0.08508235294117647, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.6791, "step": 3616 }, { "epoch": 0.08510588235294117, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.4217, "step": 3617 }, { "epoch": 0.08512941176470588, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3035, "step": 3618 }, { "epoch": 0.08515294117647058, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3178, "step": 3619 }, { "epoch": 0.0851764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3735, "step": 3620 }, { "epoch": 0.0852, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.5329, "step": 3621 }, { "epoch": 0.08522352941176471, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3239, "step": 3622 }, { "epoch": 0.08524705882352941, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.3464, "step": 3623 }, { "epoch": 0.08527058823529411, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.2629, "step": 3624 }, { "epoch": 0.08529411764705883, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.554, "step": 3625 }, { "epoch": 0.08531764705882353, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.3162, "step": 3626 }, { "epoch": 0.08534117647058824, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3825, "step": 3627 }, { "epoch": 0.08536470588235294, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.229, "step": 3628 }, { "epoch": 0.08538823529411765, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3048, "step": 3629 }, { "epoch": 0.08541176470588235, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.3599, "step": 3630 }, { "epoch": 0.08543529411764705, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2856, "step": 3631 }, { "epoch": 0.08545882352941177, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3892, "step": 3632 }, { "epoch": 0.08548235294117647, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.3175, "step": 3633 }, { "epoch": 0.08550588235294118, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.5825, "step": 3634 }, { "epoch": 0.08552941176470588, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.4156, "step": 3635 }, { "epoch": 0.08555294117647058, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.5903, "step": 3636 }, { "epoch": 0.0855764705882353, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.535, "step": 3637 }, { "epoch": 0.0856, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3623, "step": 3638 }, { "epoch": 0.08562352941176471, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.5087, "step": 3639 }, { "epoch": 0.08564705882352941, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4316, "step": 3640 }, { "epoch": 0.08567058823529412, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.7218, "step": 3641 }, { "epoch": 0.08569411764705882, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.0369, "step": 3642 }, { "epoch": 0.08571764705882352, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3101, "step": 3643 }, { "epoch": 0.08574117647058824, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.4646, "step": 3644 }, { "epoch": 0.08576470588235294, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.4663, "step": 3645 }, { "epoch": 0.08578823529411765, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5607, "step": 3646 }, { "epoch": 0.08581176470588235, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4514, "step": 3647 }, { "epoch": 0.08583529411764707, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.5513, "step": 3648 }, { "epoch": 0.08585882352941177, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4534, "step": 3649 }, { "epoch": 0.08588235294117647, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.3119, "step": 3650 }, { "epoch": 0.08590588235294118, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.6107, "step": 3651 }, { "epoch": 0.08592941176470588, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.2649, "step": 3652 }, { "epoch": 0.0859529411764706, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.5635, "step": 3653 }, { "epoch": 0.0859764705882353, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.2513, "step": 3654 }, { "epoch": 0.086, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.4216, "step": 3655 }, { "epoch": 0.08602352941176471, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.7263, "step": 3656 }, { "epoch": 0.08604705882352941, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3139, "step": 3657 }, { "epoch": 0.08607058823529412, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.5184, "step": 3658 }, { "epoch": 0.08609411764705882, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.3665, "step": 3659 }, { "epoch": 0.08611764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.5659, "step": 3660 }, { "epoch": 0.08614117647058823, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.528, "step": 3661 }, { "epoch": 0.08616470588235293, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1063, "step": 3662 }, { "epoch": 0.08618823529411765, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2178, "step": 3663 }, { "epoch": 0.08621176470588235, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4432, "step": 3664 }, { "epoch": 0.08623529411764706, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.3425, "step": 3665 }, { "epoch": 0.08625882352941176, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.2113, "step": 3666 }, { "epoch": 0.08628235294117648, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.4296, "step": 3667 }, { "epoch": 0.08630588235294118, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3742, "step": 3668 }, { "epoch": 0.08632941176470588, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.2978, "step": 3669 }, { "epoch": 0.08635294117647059, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.4006, "step": 3670 }, { "epoch": 0.08637647058823529, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2136, "step": 3671 }, { "epoch": 0.0864, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3638, "step": 3672 }, { "epoch": 0.0864235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2514, "step": 3673 }, { "epoch": 0.08644705882352942, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3827, "step": 3674 }, { "epoch": 0.08647058823529412, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.4242, "step": 3675 }, { "epoch": 0.08649411764705882, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2825, "step": 3676 }, { "epoch": 0.08651764705882353, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2722, "step": 3677 }, { "epoch": 0.08654117647058823, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4401, "step": 3678 }, { "epoch": 0.08656470588235295, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.6128, "step": 3679 }, { "epoch": 0.08658823529411765, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1999, "step": 3680 }, { "epoch": 0.08661176470588235, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2274, "step": 3681 }, { "epoch": 0.08663529411764706, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.5795, "step": 3682 }, { "epoch": 0.08665882352941176, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2653, "step": 3683 }, { "epoch": 0.08668235294117647, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4854, "step": 3684 }, { "epoch": 0.08670588235294117, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3511, "step": 3685 }, { "epoch": 0.08672941176470589, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3818, "step": 3686 }, { "epoch": 0.08675294117647059, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2704, "step": 3687 }, { "epoch": 0.08677647058823529, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2636, "step": 3688 }, { "epoch": 0.0868, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.5298, "step": 3689 }, { "epoch": 0.0868235294117647, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4205, "step": 3690 }, { "epoch": 0.08684705882352942, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4033, "step": 3691 }, { "epoch": 0.08687058823529412, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3322, "step": 3692 }, { "epoch": 0.08689411764705883, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.322, "step": 3693 }, { "epoch": 0.08691764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3522, "step": 3694 }, { "epoch": 0.08694117647058823, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.5582, "step": 3695 }, { "epoch": 0.08696470588235294, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.7104, "step": 3696 }, { "epoch": 0.08698823529411764, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3979, "step": 3697 }, { "epoch": 0.08701176470588236, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2897, "step": 3698 }, { "epoch": 0.08703529411764706, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.3673, "step": 3699 }, { "epoch": 0.08705882352941176, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3366, "step": 3700 }, { "epoch": 0.08708235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.554, "step": 3701 }, { "epoch": 0.08710588235294117, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2916, "step": 3702 }, { "epoch": 0.08712941176470589, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.4955, "step": 3703 }, { "epoch": 0.08715294117647059, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.714, "step": 3704 }, { "epoch": 0.0871764705882353, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.2497, "step": 3705 }, { "epoch": 0.0872, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.5399, "step": 3706 }, { "epoch": 0.0872235294117647, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.1702, "step": 3707 }, { "epoch": 0.08724705882352941, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.4175, "step": 3708 }, { "epoch": 0.08727058823529411, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.3609, "step": 3709 }, { "epoch": 0.08729411764705883, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.302, "step": 3710 }, { "epoch": 0.08731764705882353, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.6191, "step": 3711 }, { "epoch": 0.08734117647058824, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4343, "step": 3712 }, { "epoch": 0.08736470588235294, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.5301, "step": 3713 }, { "epoch": 0.08738823529411764, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.3866, "step": 3714 }, { "epoch": 0.08741176470588236, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.414, "step": 3715 }, { "epoch": 0.08743529411764706, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3252, "step": 3716 }, { "epoch": 0.08745882352941177, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3797, "step": 3717 }, { "epoch": 0.08748235294117647, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.5651, "step": 3718 }, { "epoch": 0.08750588235294118, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2904, "step": 3719 }, { "epoch": 0.08752941176470588, "grad_norm": 0.28515625, "learning_rate": 0.02, "loss": 1.2368, "step": 3720 }, { "epoch": 0.08755294117647058, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2669, "step": 3721 }, { "epoch": 0.0875764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.5924, "step": 3722 }, { "epoch": 0.0876, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.4391, "step": 3723 }, { "epoch": 0.08762352941176471, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.3289, "step": 3724 }, { "epoch": 0.08764705882352941, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.1213, "step": 3725 }, { "epoch": 0.08767058823529411, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.816, "step": 3726 }, { "epoch": 0.08769411764705883, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3568, "step": 3727 }, { "epoch": 0.08771764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4442, "step": 3728 }, { "epoch": 0.08774117647058824, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2856, "step": 3729 }, { "epoch": 0.08776470588235294, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.3621, "step": 3730 }, { "epoch": 0.08778823529411765, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2362, "step": 3731 }, { "epoch": 0.08781176470588235, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.34, "step": 3732 }, { "epoch": 0.08783529411764705, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.2459, "step": 3733 }, { "epoch": 0.08785882352941177, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.4392, "step": 3734 }, { "epoch": 0.08788235294117647, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3632, "step": 3735 }, { "epoch": 0.08790588235294118, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.372, "step": 3736 }, { "epoch": 0.08792941176470588, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4577, "step": 3737 }, { "epoch": 0.0879529411764706, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.4487, "step": 3738 }, { "epoch": 0.0879764705882353, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.1823, "step": 3739 }, { "epoch": 0.088, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3954, "step": 3740 }, { "epoch": 0.08802352941176471, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.6559, "step": 3741 }, { "epoch": 0.08804705882352941, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3858, "step": 3742 }, { "epoch": 0.08807058823529412, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.5021, "step": 3743 }, { "epoch": 0.08809411764705882, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.5009, "step": 3744 }, { "epoch": 0.08811764705882352, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.4344, "step": 3745 }, { "epoch": 0.08814117647058824, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3021, "step": 3746 }, { "epoch": 0.08816470588235294, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.4081, "step": 3747 }, { "epoch": 0.08818823529411765, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.5671, "step": 3748 }, { "epoch": 0.08821176470588235, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4049, "step": 3749 }, { "epoch": 0.08823529411764706, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.4711, "step": 3750 }, { "epoch": 0.08825882352941176, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3224, "step": 3751 }, { "epoch": 0.08828235294117646, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.437, "step": 3752 }, { "epoch": 0.08830588235294118, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.5199, "step": 3753 }, { "epoch": 0.08832941176470588, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3902, "step": 3754 }, { "epoch": 0.08835294117647059, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.5328, "step": 3755 }, { "epoch": 0.08837647058823529, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.4534, "step": 3756 }, { "epoch": 0.0884, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6264, "step": 3757 }, { "epoch": 0.0884235294117647, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.9784, "step": 3758 }, { "epoch": 0.0884470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.5538, "step": 3759 }, { "epoch": 0.08847058823529412, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5603, "step": 3760 }, { "epoch": 0.08849411764705882, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.486, "step": 3761 }, { "epoch": 0.08851764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4039, "step": 3762 }, { "epoch": 0.08854117647058823, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.3645, "step": 3763 }, { "epoch": 0.08856470588235293, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2865, "step": 3764 }, { "epoch": 0.08858823529411765, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4585, "step": 3765 }, { "epoch": 0.08861176470588235, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.9395, "step": 3766 }, { "epoch": 0.08863529411764706, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3285, "step": 3767 }, { "epoch": 0.08865882352941176, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3679, "step": 3768 }, { "epoch": 0.08868235294117648, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.4953, "step": 3769 }, { "epoch": 0.08870588235294118, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4511, "step": 3770 }, { "epoch": 0.08872941176470588, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.471, "step": 3771 }, { "epoch": 0.08875294117647059, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3205, "step": 3772 }, { "epoch": 0.08877647058823529, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 1.456, "step": 3773 }, { "epoch": 0.0888, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.1986, "step": 3774 }, { "epoch": 0.0888235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.337, "step": 3775 }, { "epoch": 0.08884705882352942, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.6449, "step": 3776 }, { "epoch": 0.08887058823529412, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.4609, "step": 3777 }, { "epoch": 0.08889411764705882, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2045, "step": 3778 }, { "epoch": 0.08891764705882353, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.5196, "step": 3779 }, { "epoch": 0.08894117647058823, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2701, "step": 3780 }, { "epoch": 0.08896470588235295, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.357, "step": 3781 }, { "epoch": 0.08898823529411765, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.332, "step": 3782 }, { "epoch": 0.08901176470588236, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.4961, "step": 3783 }, { "epoch": 0.08903529411764706, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.5313, "step": 3784 }, { "epoch": 0.08905882352941176, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.6119, "step": 3785 }, { "epoch": 0.08908235294117647, "grad_norm": 0.296875, "learning_rate": 0.02, "loss": 1.1529, "step": 3786 }, { "epoch": 0.08910588235294117, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.2681, "step": 3787 }, { "epoch": 0.08912941176470589, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.5864, "step": 3788 }, { "epoch": 0.08915294117647059, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.515, "step": 3789 }, { "epoch": 0.08917647058823529, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2141, "step": 3790 }, { "epoch": 0.0892, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2412, "step": 3791 }, { "epoch": 0.0892235294117647, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.2786, "step": 3792 }, { "epoch": 0.08924705882352942, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3499, "step": 3793 }, { "epoch": 0.08927058823529412, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.1464, "step": 3794 }, { "epoch": 0.08929411764705883, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4281, "step": 3795 }, { "epoch": 0.08931764705882353, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.3766, "step": 3796 }, { "epoch": 0.08934117647058823, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.276, "step": 3797 }, { "epoch": 0.08936470588235294, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.265, "step": 3798 }, { "epoch": 0.08938823529411764, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4199, "step": 3799 }, { "epoch": 0.08941176470588236, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.3001, "step": 3800 }, { "epoch": 0.08943529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3975, "step": 3801 }, { "epoch": 0.08945882352941177, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.2631, "step": 3802 }, { "epoch": 0.08948235294117647, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2153, "step": 3803 }, { "epoch": 0.08950588235294117, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2662, "step": 3804 }, { "epoch": 0.08952941176470589, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.4943, "step": 3805 }, { "epoch": 0.08955294117647059, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4267, "step": 3806 }, { "epoch": 0.0895764705882353, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1844, "step": 3807 }, { "epoch": 0.0896, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3844, "step": 3808 }, { "epoch": 0.0896235294117647, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.2903, "step": 3809 }, { "epoch": 0.08964705882352941, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.535, "step": 3810 }, { "epoch": 0.08967058823529411, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2301, "step": 3811 }, { "epoch": 0.08969411764705883, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3694, "step": 3812 }, { "epoch": 0.08971764705882353, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.1552, "step": 3813 }, { "epoch": 0.08974117647058824, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.509, "step": 3814 }, { "epoch": 0.08976470588235294, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4254, "step": 3815 }, { "epoch": 0.08978823529411764, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.4947, "step": 3816 }, { "epoch": 0.08981176470588235, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.3094, "step": 3817 }, { "epoch": 0.08983529411764705, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.6437, "step": 3818 }, { "epoch": 0.08985882352941177, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.4658, "step": 3819 }, { "epoch": 0.08988235294117647, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.1684, "step": 3820 }, { "epoch": 0.08990588235294118, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.5021, "step": 3821 }, { "epoch": 0.08992941176470588, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.2012, "step": 3822 }, { "epoch": 0.08995294117647058, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4313, "step": 3823 }, { "epoch": 0.0899764705882353, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4812, "step": 3824 }, { "epoch": 0.09, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2596, "step": 3825 }, { "epoch": 0.09002352941176471, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4374, "step": 3826 }, { "epoch": 0.09004705882352941, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.5625, "step": 3827 }, { "epoch": 0.09007058823529412, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.4448, "step": 3828 }, { "epoch": 0.09009411764705882, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3517, "step": 3829 }, { "epoch": 0.09011764705882352, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.1787, "step": 3830 }, { "epoch": 0.09014117647058824, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3842, "step": 3831 }, { "epoch": 0.09016470588235294, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2655, "step": 3832 }, { "epoch": 0.09018823529411765, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.3425, "step": 3833 }, { "epoch": 0.09021176470588235, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4611, "step": 3834 }, { "epoch": 0.09023529411764705, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.543, "step": 3835 }, { "epoch": 0.09025882352941177, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.3613, "step": 3836 }, { "epoch": 0.09028235294117647, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.9595, "step": 3837 }, { "epoch": 0.09030588235294118, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5275, "step": 3838 }, { "epoch": 0.09032941176470588, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.4609, "step": 3839 }, { "epoch": 0.0903529411764706, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 1.1802, "step": 3840 }, { "epoch": 0.0903764705882353, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.458, "step": 3841 }, { "epoch": 0.0904, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.3129, "step": 3842 }, { "epoch": 0.09042352941176471, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1454, "step": 3843 }, { "epoch": 0.09044705882352941, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.3313, "step": 3844 }, { "epoch": 0.09047058823529412, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3864, "step": 3845 }, { "epoch": 0.09049411764705882, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.6103, "step": 3846 }, { "epoch": 0.09051764705882354, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2626, "step": 3847 }, { "epoch": 0.09054117647058824, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4944, "step": 3848 }, { "epoch": 0.09056470588235294, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.3955, "step": 3849 }, { "epoch": 0.09058823529411765, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.0746, "step": 3850 }, { "epoch": 0.09061176470588235, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.1077, "step": 3851 }, { "epoch": 0.09063529411764706, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.5096, "step": 3852 }, { "epoch": 0.09065882352941176, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.4267, "step": 3853 }, { "epoch": 0.09068235294117646, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0713, "step": 3854 }, { "epoch": 0.09070588235294118, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.2315, "step": 3855 }, { "epoch": 0.09072941176470588, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.2419, "step": 3856 }, { "epoch": 0.09075294117647059, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.26, "step": 3857 }, { "epoch": 0.09077647058823529, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.1648, "step": 3858 }, { "epoch": 0.0908, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.0613, "step": 3859 }, { "epoch": 0.0908235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.4599, "step": 3860 }, { "epoch": 0.0908470588235294, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2106, "step": 3861 }, { "epoch": 0.09087058823529412, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4289, "step": 3862 }, { "epoch": 0.09089411764705882, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.222, "step": 3863 }, { "epoch": 0.09091764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.4522, "step": 3864 }, { "epoch": 0.09094117647058823, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3264, "step": 3865 }, { "epoch": 0.09096470588235295, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.22, "step": 3866 }, { "epoch": 0.09098823529411765, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4252, "step": 3867 }, { "epoch": 0.09101176470588235, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2286, "step": 3868 }, { "epoch": 0.09103529411764706, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5433, "step": 3869 }, { "epoch": 0.09105882352941176, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1464, "step": 3870 }, { "epoch": 0.09108235294117648, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.4615, "step": 3871 }, { "epoch": 0.09110588235294118, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.7398, "step": 3872 }, { "epoch": 0.09112941176470588, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.4197, "step": 3873 }, { "epoch": 0.09115294117647059, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3514, "step": 3874 }, { "epoch": 0.09117647058823529, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.4335, "step": 3875 }, { "epoch": 0.0912, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2339, "step": 3876 }, { "epoch": 0.0912235294117647, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.4352, "step": 3877 }, { "epoch": 0.09124705882352942, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.5982, "step": 3878 }, { "epoch": 0.09127058823529412, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.5283, "step": 3879 }, { "epoch": 0.09129411764705882, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4564, "step": 3880 }, { "epoch": 0.09131764705882353, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.5872, "step": 3881 }, { "epoch": 0.09134117647058823, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1904, "step": 3882 }, { "epoch": 0.09136470588235295, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.3184, "step": 3883 }, { "epoch": 0.09138823529411765, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.5574, "step": 3884 }, { "epoch": 0.09141176470588236, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3523, "step": 3885 }, { "epoch": 0.09143529411764706, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3583, "step": 3886 }, { "epoch": 0.09145882352941176, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0027, "step": 3887 }, { "epoch": 0.09148235294117647, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2863, "step": 3888 }, { "epoch": 0.09150588235294117, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4723, "step": 3889 }, { "epoch": 0.09152941176470589, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3283, "step": 3890 }, { "epoch": 0.09155294117647059, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0342, "step": 3891 }, { "epoch": 0.0915764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4876, "step": 3892 }, { "epoch": 0.0916, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.3764, "step": 3893 }, { "epoch": 0.0916235294117647, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.4191, "step": 3894 }, { "epoch": 0.09164705882352941, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.6584, "step": 3895 }, { "epoch": 0.09167058823529411, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1857, "step": 3896 }, { "epoch": 0.09169411764705883, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2393, "step": 3897 }, { "epoch": 0.09171764705882353, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5783, "step": 3898 }, { "epoch": 0.09174117647058823, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3782, "step": 3899 }, { "epoch": 0.09176470588235294, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.1542, "step": 3900 }, { "epoch": 0.09178823529411764, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.5784, "step": 3901 }, { "epoch": 0.09181176470588236, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.4398, "step": 3902 }, { "epoch": 0.09183529411764706, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.9794, "step": 3903 }, { "epoch": 0.09185882352941177, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.5388, "step": 3904 }, { "epoch": 0.09188235294117647, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.4182, "step": 3905 }, { "epoch": 0.09190588235294117, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.4183, "step": 3906 }, { "epoch": 0.09192941176470588, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.5792, "step": 3907 }, { "epoch": 0.09195294117647058, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.6703, "step": 3908 }, { "epoch": 0.0919764705882353, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2134, "step": 3909 }, { "epoch": 0.092, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4609, "step": 3910 }, { "epoch": 0.09202352941176471, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2879, "step": 3911 }, { "epoch": 0.09204705882352941, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3605, "step": 3912 }, { "epoch": 0.09207058823529411, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2382, "step": 3913 }, { "epoch": 0.09209411764705883, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.5373, "step": 3914 }, { "epoch": 0.09211764705882353, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3352, "step": 3915 }, { "epoch": 0.09214117647058824, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2594, "step": 3916 }, { "epoch": 0.09216470588235294, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.4461, "step": 3917 }, { "epoch": 0.09218823529411764, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4243, "step": 3918 }, { "epoch": 0.09221176470588235, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3827, "step": 3919 }, { "epoch": 0.09223529411764705, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2781, "step": 3920 }, { "epoch": 0.09225882352941177, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.166, "step": 3921 }, { "epoch": 0.09228235294117647, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.5131, "step": 3922 }, { "epoch": 0.09230588235294118, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.5831, "step": 3923 }, { "epoch": 0.09232941176470588, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.2425, "step": 3924 }, { "epoch": 0.09235294117647058, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3255, "step": 3925 }, { "epoch": 0.0923764705882353, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3644, "step": 3926 }, { "epoch": 0.0924, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1866, "step": 3927 }, { "epoch": 0.09242352941176471, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2378, "step": 3928 }, { "epoch": 0.09244705882352941, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.4971, "step": 3929 }, { "epoch": 0.09247058823529412, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.9662, "step": 3930 }, { "epoch": 0.09249411764705882, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.0032, "step": 3931 }, { "epoch": 0.09251764705882352, "grad_norm": 1.53125, "learning_rate": 0.02, "loss": 1.1461, "step": 3932 }, { "epoch": 0.09254117647058824, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2306, "step": 3933 }, { "epoch": 0.09256470588235294, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.424, "step": 3934 }, { "epoch": 0.09258823529411765, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4104, "step": 3935 }, { "epoch": 0.09261176470588235, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6269, "step": 3936 }, { "epoch": 0.09263529411764707, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2919, "step": 3937 }, { "epoch": 0.09265882352941177, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.5958, "step": 3938 }, { "epoch": 0.09268235294117647, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.3573, "step": 3939 }, { "epoch": 0.09270588235294118, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2882, "step": 3940 }, { "epoch": 0.09272941176470588, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1957, "step": 3941 }, { "epoch": 0.0927529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3671, "step": 3942 }, { "epoch": 0.0927764705882353, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.3928, "step": 3943 }, { "epoch": 0.0928, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.4069, "step": 3944 }, { "epoch": 0.09282352941176471, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3734, "step": 3945 }, { "epoch": 0.09284705882352941, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2902, "step": 3946 }, { "epoch": 0.09287058823529412, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.9306, "step": 3947 }, { "epoch": 0.09289411764705882, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5928, "step": 3948 }, { "epoch": 0.09291764705882354, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.1842, "step": 3949 }, { "epoch": 0.09294117647058824, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 1.0463, "step": 3950 }, { "epoch": 0.09296470588235294, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4844, "step": 3951 }, { "epoch": 0.09298823529411765, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.075, "step": 3952 }, { "epoch": 0.09301176470588235, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.3898, "step": 3953 }, { "epoch": 0.09303529411764706, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2784, "step": 3954 }, { "epoch": 0.09305882352941176, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2699, "step": 3955 }, { "epoch": 0.09308235294117648, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.4345, "step": 3956 }, { "epoch": 0.09310588235294118, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3781, "step": 3957 }, { "epoch": 0.09312941176470588, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.3658, "step": 3958 }, { "epoch": 0.09315294117647059, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.2143, "step": 3959 }, { "epoch": 0.09317647058823529, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3713, "step": 3960 }, { "epoch": 0.0932, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.3148, "step": 3961 }, { "epoch": 0.0932235294117647, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.5304, "step": 3962 }, { "epoch": 0.0932470588235294, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3132, "step": 3963 }, { "epoch": 0.09327058823529412, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.375, "step": 3964 }, { "epoch": 0.09329411764705882, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4422, "step": 3965 }, { "epoch": 0.09331764705882353, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2464, "step": 3966 }, { "epoch": 0.09334117647058823, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3312, "step": 3967 }, { "epoch": 0.09336470588235295, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4311, "step": 3968 }, { "epoch": 0.09338823529411765, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1349, "step": 3969 }, { "epoch": 0.09341176470588235, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.3736, "step": 3970 }, { "epoch": 0.09343529411764706, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2293, "step": 3971 }, { "epoch": 0.09345882352941176, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4389, "step": 3972 }, { "epoch": 0.09348235294117647, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.2227, "step": 3973 }, { "epoch": 0.09350588235294117, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3618, "step": 3974 }, { "epoch": 0.09352941176470589, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.4226, "step": 3975 }, { "epoch": 0.09355294117647059, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5262, "step": 3976 }, { "epoch": 0.09357647058823529, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.4641, "step": 3977 }, { "epoch": 0.0936, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2983, "step": 3978 }, { "epoch": 0.0936235294117647, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.4609, "step": 3979 }, { "epoch": 0.09364705882352942, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3486, "step": 3980 }, { "epoch": 0.09367058823529412, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.1808, "step": 3981 }, { "epoch": 0.09369411764705882, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.3083, "step": 3982 }, { "epoch": 0.09371764705882353, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.355, "step": 3983 }, { "epoch": 0.09374117647058823, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.3296, "step": 3984 }, { "epoch": 0.09376470588235294, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.3607, "step": 3985 }, { "epoch": 0.09378823529411764, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3918, "step": 3986 }, { "epoch": 0.09381176470588236, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.9987, "step": 3987 }, { "epoch": 0.09383529411764706, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.408, "step": 3988 }, { "epoch": 0.09385882352941176, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.4606, "step": 3989 }, { "epoch": 0.09388235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3409, "step": 3990 }, { "epoch": 0.09390588235294117, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.4761, "step": 3991 }, { "epoch": 0.09392941176470589, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4, "step": 3992 }, { "epoch": 0.09395294117647059, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.4677, "step": 3993 }, { "epoch": 0.0939764705882353, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4106, "step": 3994 }, { "epoch": 0.094, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.3188, "step": 3995 }, { "epoch": 0.0940235294117647, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.0789, "step": 3996 }, { "epoch": 0.09404705882352941, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.4762, "step": 3997 }, { "epoch": 0.09407058823529411, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.5691, "step": 3998 }, { "epoch": 0.09409411764705883, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3529, "step": 3999 }, { "epoch": 0.09411764705882353, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2231, "step": 4000 }, { "epoch": 0.09411764705882353, "eval_loss": 2.247562885284424, "eval_runtime": 679.3602, "eval_samples_per_second": 12.512, "eval_steps_per_second": 3.128, "step": 4000 }, { "epoch": 0.09414117647058824, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.3159, "step": 4001 }, { "epoch": 0.09416470588235294, "grad_norm": 0.291015625, "learning_rate": 0.02, "loss": 1.2281, "step": 4002 }, { "epoch": 0.09418823529411764, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.5315, "step": 4003 }, { "epoch": 0.09421176470588236, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.1394, "step": 4004 }, { "epoch": 0.09423529411764706, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.1955, "step": 4005 }, { "epoch": 0.09425882352941177, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.3713, "step": 4006 }, { "epoch": 0.09428235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3337, "step": 4007 }, { "epoch": 0.09430588235294117, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.223, "step": 4008 }, { "epoch": 0.09432941176470588, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.4469, "step": 4009 }, { "epoch": 0.09435294117647058, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.1236, "step": 4010 }, { "epoch": 0.0943764705882353, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.3553, "step": 4011 }, { "epoch": 0.0944, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2207, "step": 4012 }, { "epoch": 0.09442352941176471, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.5981, "step": 4013 }, { "epoch": 0.09444705882352941, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.1513, "step": 4014 }, { "epoch": 0.09447058823529411, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3182, "step": 4015 }, { "epoch": 0.09449411764705883, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3779, "step": 4016 }, { "epoch": 0.09451764705882353, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.4293, "step": 4017 }, { "epoch": 0.09454117647058824, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.3111, "step": 4018 }, { "epoch": 0.09456470588235294, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2927, "step": 4019 }, { "epoch": 0.09458823529411765, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.4257, "step": 4020 }, { "epoch": 0.09461176470588235, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.2225, "step": 4021 }, { "epoch": 0.09463529411764705, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.4639, "step": 4022 }, { "epoch": 0.09465882352941177, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.6524, "step": 4023 }, { "epoch": 0.09468235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.6222, "step": 4024 }, { "epoch": 0.09470588235294118, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.2166, "step": 4025 }, { "epoch": 0.09472941176470588, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.6217, "step": 4026 }, { "epoch": 0.09475294117647058, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.3302, "step": 4027 }, { "epoch": 0.0947764705882353, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.2594, "step": 4028 }, { "epoch": 0.0948, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4964, "step": 4029 }, { "epoch": 0.09482352941176471, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.3473, "step": 4030 }, { "epoch": 0.09484705882352941, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3116, "step": 4031 }, { "epoch": 0.09487058823529412, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.0456, "step": 4032 }, { "epoch": 0.09489411764705882, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.4366, "step": 4033 }, { "epoch": 0.09491764705882352, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3584, "step": 4034 }, { "epoch": 0.09494117647058824, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4148, "step": 4035 }, { "epoch": 0.09496470588235294, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.5063, "step": 4036 }, { "epoch": 0.09498823529411765, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.3153, "step": 4037 }, { "epoch": 0.09501176470588235, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2266, "step": 4038 }, { "epoch": 0.09503529411764707, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.4304, "step": 4039 }, { "epoch": 0.09505882352941177, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.5222, "step": 4040 }, { "epoch": 0.09508235294117647, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.2399, "step": 4041 }, { "epoch": 0.09510588235294118, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.3338, "step": 4042 }, { "epoch": 0.09512941176470588, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3359, "step": 4043 }, { "epoch": 0.09515294117647059, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1339, "step": 4044 }, { "epoch": 0.09517647058823529, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.9399, "step": 4045 }, { "epoch": 0.0952, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4809, "step": 4046 }, { "epoch": 0.0952235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.5217, "step": 4047 }, { "epoch": 0.0952470588235294, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.1601, "step": 4048 }, { "epoch": 0.09527058823529412, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.3725, "step": 4049 }, { "epoch": 0.09529411764705882, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.3222, "step": 4050 }, { "epoch": 0.09531764705882353, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.298, "step": 4051 }, { "epoch": 0.09534117647058823, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4012, "step": 4052 }, { "epoch": 0.09536470588235293, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.301, "step": 4053 }, { "epoch": 0.09538823529411765, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.417, "step": 4054 }, { "epoch": 0.09541176470588235, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4214, "step": 4055 }, { "epoch": 0.09543529411764706, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.4102, "step": 4056 }, { "epoch": 0.09545882352941176, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2373, "step": 4057 }, { "epoch": 0.09548235294117648, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.0895, "step": 4058 }, { "epoch": 0.09550588235294118, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3389, "step": 4059 }, { "epoch": 0.09552941176470588, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3462, "step": 4060 }, { "epoch": 0.09555294117647059, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.4124, "step": 4061 }, { "epoch": 0.09557647058823529, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.6887, "step": 4062 }, { "epoch": 0.0956, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3054, "step": 4063 }, { "epoch": 0.0956235294117647, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.375, "step": 4064 }, { "epoch": 0.09564705882352942, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.1082, "step": 4065 }, { "epoch": 0.09567058823529412, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2158, "step": 4066 }, { "epoch": 0.09569411764705882, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.0962, "step": 4067 }, { "epoch": 0.09571764705882353, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.2274, "step": 4068 }, { "epoch": 0.09574117647058823, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4446, "step": 4069 }, { "epoch": 0.09576470588235295, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3554, "step": 4070 }, { "epoch": 0.09578823529411765, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.276, "step": 4071 }, { "epoch": 0.09581176470588235, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3368, "step": 4072 }, { "epoch": 0.09583529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.5874, "step": 4073 }, { "epoch": 0.09585882352941176, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.5454, "step": 4074 }, { "epoch": 0.09588235294117647, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.6291, "step": 4075 }, { "epoch": 0.09590588235294117, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2361, "step": 4076 }, { "epoch": 0.09592941176470589, "grad_norm": 0.28515625, "learning_rate": 0.02, "loss": 1.0681, "step": 4077 }, { "epoch": 0.09595294117647059, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.4149, "step": 4078 }, { "epoch": 0.09597647058823529, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 0.9431, "step": 4079 }, { "epoch": 0.096, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.5397, "step": 4080 }, { "epoch": 0.0960235294117647, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.3475, "step": 4081 }, { "epoch": 0.09604705882352942, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3702, "step": 4082 }, { "epoch": 0.09607058823529412, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3336, "step": 4083 }, { "epoch": 0.09609411764705883, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.6716, "step": 4084 }, { "epoch": 0.09611764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.4174, "step": 4085 }, { "epoch": 0.09614117647058823, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.5, "step": 4086 }, { "epoch": 0.09616470588235294, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.31, "step": 4087 }, { "epoch": 0.09618823529411764, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 1.2562, "step": 4088 }, { "epoch": 0.09621176470588236, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4994, "step": 4089 }, { "epoch": 0.09623529411764706, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.3037, "step": 4090 }, { "epoch": 0.09625882352941176, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.2163, "step": 4091 }, { "epoch": 0.09628235294117647, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.491, "step": 4092 }, { "epoch": 0.09630588235294117, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.3689, "step": 4093 }, { "epoch": 0.09632941176470589, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.4795, "step": 4094 }, { "epoch": 0.09635294117647059, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.1587, "step": 4095 }, { "epoch": 0.0963764705882353, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.206, "step": 4096 }, { "epoch": 0.0964, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3925, "step": 4097 }, { "epoch": 0.0964235294117647, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3982, "step": 4098 }, { "epoch": 0.09644705882352941, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.5652, "step": 4099 }, { "epoch": 0.09647058823529411, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2988, "step": 4100 }, { "epoch": 0.09649411764705883, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.2942, "step": 4101 }, { "epoch": 0.09651764705882353, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.4106, "step": 4102 }, { "epoch": 0.09654117647058824, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3547, "step": 4103 }, { "epoch": 0.09656470588235294, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.3491, "step": 4104 }, { "epoch": 0.09658823529411764, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3587, "step": 4105 }, { "epoch": 0.09661176470588236, "grad_norm": 0.306640625, "learning_rate": 0.02, "loss": 1.1712, "step": 4106 }, { "epoch": 0.09663529411764706, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.3446, "step": 4107 }, { "epoch": 0.09665882352941177, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 1.3509, "step": 4108 }, { "epoch": 0.09668235294117647, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.3018, "step": 4109 }, { "epoch": 0.09670588235294118, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 1.1491, "step": 4110 }, { "epoch": 0.09672941176470588, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2709, "step": 4111 }, { "epoch": 0.09675294117647058, "grad_norm": 0.2890625, "learning_rate": 0.02, "loss": 1.2372, "step": 4112 }, { "epoch": 0.0967764705882353, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.136, "step": 4113 }, { "epoch": 0.0968, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.5731, "step": 4114 }, { "epoch": 0.09682352941176471, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.3941, "step": 4115 }, { "epoch": 0.09684705882352941, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3892, "step": 4116 }, { "epoch": 0.09687058823529411, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2828, "step": 4117 }, { "epoch": 0.09689411764705883, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.42, "step": 4118 }, { "epoch": 0.09691764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4016, "step": 4119 }, { "epoch": 0.09694117647058824, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.4791, "step": 4120 }, { "epoch": 0.09696470588235294, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.5257, "step": 4121 }, { "epoch": 0.09698823529411765, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.344, "step": 4122 }, { "epoch": 0.09701176470588235, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.4108, "step": 4123 }, { "epoch": 0.09703529411764705, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.3017, "step": 4124 }, { "epoch": 0.09705882352941177, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.285, "step": 4125 }, { "epoch": 0.09708235294117647, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.1896, "step": 4126 }, { "epoch": 0.09710588235294118, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.5443, "step": 4127 }, { "epoch": 0.09712941176470588, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.4068, "step": 4128 }, { "epoch": 0.0971529411764706, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3169, "step": 4129 }, { "epoch": 0.0971764705882353, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 1.3153, "step": 4130 }, { "epoch": 0.0972, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.13, "step": 4131 }, { "epoch": 0.09722352941176471, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.6509, "step": 4132 }, { "epoch": 0.09724705882352941, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0419, "step": 4133 }, { "epoch": 0.09727058823529412, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1775, "step": 4134 }, { "epoch": 0.09729411764705882, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2701, "step": 4135 }, { "epoch": 0.09731764705882352, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.3126, "step": 4136 }, { "epoch": 0.09734117647058824, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2364, "step": 4137 }, { "epoch": 0.09736470588235294, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3594, "step": 4138 }, { "epoch": 0.09738823529411765, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.559, "step": 4139 }, { "epoch": 0.09741176470588235, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.4089, "step": 4140 }, { "epoch": 0.09743529411764706, "grad_norm": 0.29296875, "learning_rate": 0.02, "loss": 1.2216, "step": 4141 }, { "epoch": 0.09745882352941176, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3991, "step": 4142 }, { "epoch": 0.09748235294117646, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.476, "step": 4143 }, { "epoch": 0.09750588235294118, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.2846, "step": 4144 }, { "epoch": 0.09752941176470588, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.3981, "step": 4145 }, { "epoch": 0.09755294117647059, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2151, "step": 4146 }, { "epoch": 0.09757647058823529, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.4391, "step": 4147 }, { "epoch": 0.0976, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 1.3659, "step": 4148 }, { "epoch": 0.0976235294117647, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3545, "step": 4149 }, { "epoch": 0.0976470588235294, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.2945, "step": 4150 }, { "epoch": 0.09767058823529412, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.1247, "step": 4151 }, { "epoch": 0.09769411764705882, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3705, "step": 4152 }, { "epoch": 0.09771764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3348, "step": 4153 }, { "epoch": 0.09774117647058823, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.1307, "step": 4154 }, { "epoch": 0.09776470588235295, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.6142, "step": 4155 }, { "epoch": 0.09778823529411765, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.4196, "step": 4156 }, { "epoch": 0.09781176470588235, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.5039, "step": 4157 }, { "epoch": 0.09783529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.6139, "step": 4158 }, { "epoch": 0.09785882352941176, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.526, "step": 4159 }, { "epoch": 0.09788235294117648, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2597, "step": 4160 }, { "epoch": 0.09790588235294118, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.4365, "step": 4161 }, { "epoch": 0.09792941176470588, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.1199, "step": 4162 }, { "epoch": 0.09795294117647059, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4135, "step": 4163 }, { "epoch": 0.09797647058823529, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4363, "step": 4164 }, { "epoch": 0.098, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3182, "step": 4165 }, { "epoch": 0.0980235294117647, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 1.184, "step": 4166 }, { "epoch": 0.09804705882352942, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.5492, "step": 4167 }, { "epoch": 0.09807058823529412, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2288, "step": 4168 }, { "epoch": 0.09809411764705882, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0529, "step": 4169 }, { "epoch": 0.09811764705882353, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.3534, "step": 4170 }, { "epoch": 0.09814117647058823, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3413, "step": 4171 }, { "epoch": 0.09816470588235295, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.0964, "step": 4172 }, { "epoch": 0.09818823529411765, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3139, "step": 4173 }, { "epoch": 0.09821176470588236, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4365, "step": 4174 }, { "epoch": 0.09823529411764706, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.2565, "step": 4175 }, { "epoch": 0.09825882352941176, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4684, "step": 4176 }, { "epoch": 0.09828235294117647, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.353, "step": 4177 }, { "epoch": 0.09830588235294117, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.4059, "step": 4178 }, { "epoch": 0.09832941176470589, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2813, "step": 4179 }, { "epoch": 0.09835294117647059, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3442, "step": 4180 }, { "epoch": 0.09837647058823529, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.3912, "step": 4181 }, { "epoch": 0.0984, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.433, "step": 4182 }, { "epoch": 0.0984235294117647, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2719, "step": 4183 }, { "epoch": 0.09844705882352942, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.0693, "step": 4184 }, { "epoch": 0.09847058823529412, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3665, "step": 4185 }, { "epoch": 0.09849411764705883, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1917, "step": 4186 }, { "epoch": 0.09851764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.4511, "step": 4187 }, { "epoch": 0.09854117647058823, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.7665, "step": 4188 }, { "epoch": 0.09856470588235294, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.4962, "step": 4189 }, { "epoch": 0.09858823529411764, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.5639, "step": 4190 }, { "epoch": 0.09861176470588236, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3159, "step": 4191 }, { "epoch": 0.09863529411764706, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.4123, "step": 4192 }, { "epoch": 0.09865882352941177, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4653, "step": 4193 }, { "epoch": 0.09868235294117647, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.6195, "step": 4194 }, { "epoch": 0.09870588235294117, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1732, "step": 4195 }, { "epoch": 0.09872941176470588, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.4168, "step": 4196 }, { "epoch": 0.09875294117647059, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0854, "step": 4197 }, { "epoch": 0.0987764705882353, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.4475, "step": 4198 }, { "epoch": 0.0988, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.5135, "step": 4199 }, { "epoch": 0.0988235294117647, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3736, "step": 4200 }, { "epoch": 0.09884705882352941, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4432, "step": 4201 }, { "epoch": 0.09887058823529411, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.085, "step": 4202 }, { "epoch": 0.09889411764705883, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.4077, "step": 4203 }, { "epoch": 0.09891764705882353, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.3615, "step": 4204 }, { "epoch": 0.09894117647058824, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.4109, "step": 4205 }, { "epoch": 0.09896470588235294, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.3615, "step": 4206 }, { "epoch": 0.09898823529411764, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.1183, "step": 4207 }, { "epoch": 0.09901176470588235, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.3878, "step": 4208 }, { "epoch": 0.09903529411764705, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.5344, "step": 4209 }, { "epoch": 0.09905882352941177, "grad_norm": 0.291015625, "learning_rate": 0.02, "loss": 1.0728, "step": 4210 }, { "epoch": 0.09908235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.5686, "step": 4211 }, { "epoch": 0.09910588235294118, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.1008, "step": 4212 }, { "epoch": 0.09912941176470588, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4804, "step": 4213 }, { "epoch": 0.09915294117647058, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.2866, "step": 4214 }, { "epoch": 0.0991764705882353, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.5086, "step": 4215 }, { "epoch": 0.0992, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.4923, "step": 4216 }, { "epoch": 0.09922352941176471, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.1072, "step": 4217 }, { "epoch": 0.09924705882352941, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.381, "step": 4218 }, { "epoch": 0.09927058823529412, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.3739, "step": 4219 }, { "epoch": 0.09929411764705882, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.331, "step": 4220 }, { "epoch": 0.09931764705882352, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.3788, "step": 4221 }, { "epoch": 0.09934117647058824, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3415, "step": 4222 }, { "epoch": 0.09936470588235294, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1256, "step": 4223 }, { "epoch": 0.09938823529411765, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.3981, "step": 4224 }, { "epoch": 0.09941176470588235, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4704, "step": 4225 }, { "epoch": 0.09943529411764705, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.4885, "step": 4226 }, { "epoch": 0.09945882352941177, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1565, "step": 4227 }, { "epoch": 0.09948235294117647, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.3259, "step": 4228 }, { "epoch": 0.09950588235294118, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.3698, "step": 4229 }, { "epoch": 0.09952941176470588, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.3253, "step": 4230 }, { "epoch": 0.0995529411764706, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3207, "step": 4231 }, { "epoch": 0.0995764705882353, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.1027, "step": 4232 }, { "epoch": 0.0996, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.5623, "step": 4233 }, { "epoch": 0.09962352941176471, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4372, "step": 4234 }, { "epoch": 0.09964705882352941, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.444, "step": 4235 }, { "epoch": 0.09967058823529412, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.3384, "step": 4236 }, { "epoch": 0.09969411764705882, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.575, "step": 4237 }, { "epoch": 0.09971764705882354, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.2381, "step": 4238 }, { "epoch": 0.09974117647058824, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.2656, "step": 4239 }, { "epoch": 0.09976470588235294, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2503, "step": 4240 }, { "epoch": 0.09978823529411765, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.7421, "step": 4241 }, { "epoch": 0.09981176470588235, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.259, "step": 4242 }, { "epoch": 0.09983529411764706, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.2935, "step": 4243 }, { "epoch": 0.09985882352941176, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4361, "step": 4244 }, { "epoch": 0.09988235294117646, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.2712, "step": 4245 }, { "epoch": 0.09990588235294118, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3412, "step": 4246 }, { "epoch": 0.09992941176470588, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.2231, "step": 4247 }, { "epoch": 0.09995294117647059, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.209, "step": 4248 }, { "epoch": 0.09997647058823529, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2109, "step": 4249 }, { "epoch": 0.1, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1838, "step": 4250 }, { "epoch": 0.1000235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.205, "step": 4251 }, { "epoch": 0.1000470588235294, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1823, "step": 4252 }, { "epoch": 0.10007058823529412, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1726, "step": 4253 }, { "epoch": 0.10009411764705882, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3867, "step": 4254 }, { "epoch": 0.10011764705882353, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2761, "step": 4255 }, { "epoch": 0.10014117647058823, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.091, "step": 4256 }, { "epoch": 0.10016470588235295, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3948, "step": 4257 }, { "epoch": 0.10018823529411765, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1602, "step": 4258 }, { "epoch": 0.10021176470588235, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3003, "step": 4259 }, { "epoch": 0.10023529411764706, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.4466, "step": 4260 }, { "epoch": 0.10025882352941176, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4948, "step": 4261 }, { "epoch": 0.10028235294117648, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.175, "step": 4262 }, { "epoch": 0.10030588235294118, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.278, "step": 4263 }, { "epoch": 0.10032941176470589, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.2682, "step": 4264 }, { "epoch": 0.10035294117647059, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.5596, "step": 4265 }, { "epoch": 0.10037647058823529, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3917, "step": 4266 }, { "epoch": 0.1004, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.1045, "step": 4267 }, { "epoch": 0.1004235294117647, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.1815, "step": 4268 }, { "epoch": 0.10044705882352942, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1567, "step": 4269 }, { "epoch": 0.10047058823529412, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.4569, "step": 4270 }, { "epoch": 0.10049411764705882, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2674, "step": 4271 }, { "epoch": 0.10051764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3429, "step": 4272 }, { "epoch": 0.10054117647058823, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1682, "step": 4273 }, { "epoch": 0.10056470588235294, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.4472, "step": 4274 }, { "epoch": 0.10058823529411764, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.146, "step": 4275 }, { "epoch": 0.10061176470588236, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0837, "step": 4276 }, { "epoch": 0.10063529411764706, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.5192, "step": 4277 }, { "epoch": 0.10065882352941176, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.2742, "step": 4278 }, { "epoch": 0.10068235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0814, "step": 4279 }, { "epoch": 0.10070588235294117, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1528, "step": 4280 }, { "epoch": 0.10072941176470589, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3644, "step": 4281 }, { "epoch": 0.10075294117647059, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.3237, "step": 4282 }, { "epoch": 0.1007764705882353, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.6213, "step": 4283 }, { "epoch": 0.1008, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4278, "step": 4284 }, { "epoch": 0.1008235294117647, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.3208, "step": 4285 }, { "epoch": 0.10084705882352941, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.5712, "step": 4286 }, { "epoch": 0.10087058823529411, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3085, "step": 4287 }, { "epoch": 0.10089411764705883, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3719, "step": 4288 }, { "epoch": 0.10091764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2344, "step": 4289 }, { "epoch": 0.10094117647058823, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.4281, "step": 4290 }, { "epoch": 0.10096470588235294, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.381, "step": 4291 }, { "epoch": 0.10098823529411764, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.4332, "step": 4292 }, { "epoch": 0.10101176470588236, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.3343, "step": 4293 }, { "epoch": 0.10103529411764706, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2892, "step": 4294 }, { "epoch": 0.10105882352941177, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.4921, "step": 4295 }, { "epoch": 0.10108235294117647, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2762, "step": 4296 }, { "epoch": 0.10110588235294117, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4553, "step": 4297 }, { "epoch": 0.10112941176470588, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.4487, "step": 4298 }, { "epoch": 0.10115294117647058, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.5143, "step": 4299 }, { "epoch": 0.1011764705882353, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.3101, "step": 4300 }, { "epoch": 0.1012, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2974, "step": 4301 }, { "epoch": 0.10122352941176471, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.4353, "step": 4302 }, { "epoch": 0.10124705882352941, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.5445, "step": 4303 }, { "epoch": 0.10127058823529411, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0365, "step": 4304 }, { "epoch": 0.10129411764705883, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3369, "step": 4305 }, { "epoch": 0.10131764705882353, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.4975, "step": 4306 }, { "epoch": 0.10134117647058824, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3822, "step": 4307 }, { "epoch": 0.10136470588235294, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5116, "step": 4308 }, { "epoch": 0.10138823529411764, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 1.0353, "step": 4309 }, { "epoch": 0.10141176470588235, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.182, "step": 4310 }, { "epoch": 0.10143529411764705, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.2221, "step": 4311 }, { "epoch": 0.10145882352941177, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.5932, "step": 4312 }, { "epoch": 0.10148235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3415, "step": 4313 }, { "epoch": 0.10150588235294118, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.5099, "step": 4314 }, { "epoch": 0.10152941176470588, "grad_norm": 0.291015625, "learning_rate": 0.02, "loss": 1.165, "step": 4315 }, { "epoch": 0.10155294117647058, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.2695, "step": 4316 }, { "epoch": 0.1015764705882353, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 1.138, "step": 4317 }, { "epoch": 0.1016, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0121, "step": 4318 }, { "epoch": 0.10162352941176471, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2049, "step": 4319 }, { "epoch": 0.10164705882352941, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.3445, "step": 4320 }, { "epoch": 0.10167058823529412, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.9156, "step": 4321 }, { "epoch": 0.10169411764705882, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.3136, "step": 4322 }, { "epoch": 0.10171764705882352, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1196, "step": 4323 }, { "epoch": 0.10174117647058824, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.3184, "step": 4324 }, { "epoch": 0.10176470588235294, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.0092, "step": 4325 }, { "epoch": 0.10178823529411765, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.346, "step": 4326 }, { "epoch": 0.10181176470588235, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.5397, "step": 4327 }, { "epoch": 0.10183529411764707, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2705, "step": 4328 }, { "epoch": 0.10185882352941177, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2411, "step": 4329 }, { "epoch": 0.10188235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2903, "step": 4330 }, { "epoch": 0.10190588235294118, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2774, "step": 4331 }, { "epoch": 0.10192941176470588, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 1.213, "step": 4332 }, { "epoch": 0.1019529411764706, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3613, "step": 4333 }, { "epoch": 0.1019764705882353, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.4506, "step": 4334 }, { "epoch": 0.102, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.1584, "step": 4335 }, { "epoch": 0.10202352941176471, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.1953, "step": 4336 }, { "epoch": 0.10204705882352941, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2031, "step": 4337 }, { "epoch": 0.10207058823529412, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 1.1958, "step": 4338 }, { "epoch": 0.10209411764705882, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.544, "step": 4339 }, { "epoch": 0.10211764705882354, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.1877, "step": 4340 }, { "epoch": 0.10214117647058824, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.4394, "step": 4341 }, { "epoch": 0.10216470588235294, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.3034, "step": 4342 }, { "epoch": 0.10218823529411765, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.3666, "step": 4343 }, { "epoch": 0.10221176470588235, "grad_norm": 0.291015625, "learning_rate": 0.02, "loss": 1.1542, "step": 4344 }, { "epoch": 0.10223529411764706, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2259, "step": 4345 }, { "epoch": 0.10225882352941176, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.2913, "step": 4346 }, { "epoch": 0.10228235294117648, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1626, "step": 4347 }, { "epoch": 0.10230588235294118, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.4988, "step": 4348 }, { "epoch": 0.10232941176470588, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.49, "step": 4349 }, { "epoch": 0.10235294117647059, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.2996, "step": 4350 }, { "epoch": 0.10237647058823529, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2543, "step": 4351 }, { "epoch": 0.1024, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.1678, "step": 4352 }, { "epoch": 0.1024235294117647, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.3418, "step": 4353 }, { "epoch": 0.1024470588235294, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2958, "step": 4354 }, { "epoch": 0.10247058823529412, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4256, "step": 4355 }, { "epoch": 0.10249411764705882, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1916, "step": 4356 }, { "epoch": 0.10251764705882353, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2116, "step": 4357 }, { "epoch": 0.10254117647058823, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4695, "step": 4358 }, { "epoch": 0.10256470588235295, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.5098, "step": 4359 }, { "epoch": 0.10258823529411765, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.4266, "step": 4360 }, { "epoch": 0.10261176470588235, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.2897, "step": 4361 }, { "epoch": 0.10263529411764706, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.3982, "step": 4362 }, { "epoch": 0.10265882352941176, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.3191, "step": 4363 }, { "epoch": 0.10268235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2455, "step": 4364 }, { "epoch": 0.10270588235294117, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.3807, "step": 4365 }, { "epoch": 0.10272941176470589, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3484, "step": 4366 }, { "epoch": 0.10275294117647059, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.1734, "step": 4367 }, { "epoch": 0.10277647058823529, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.958, "step": 4368 }, { "epoch": 0.1028, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.9601, "step": 4369 }, { "epoch": 0.1028235294117647, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1799, "step": 4370 }, { "epoch": 0.10284705882352942, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.3232, "step": 4371 }, { "epoch": 0.10287058823529412, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.4815, "step": 4372 }, { "epoch": 0.10289411764705883, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.218, "step": 4373 }, { "epoch": 0.10291764705882353, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4827, "step": 4374 }, { "epoch": 0.10294117647058823, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3456, "step": 4375 }, { "epoch": 0.10296470588235294, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.5334, "step": 4376 }, { "epoch": 0.10298823529411764, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2032, "step": 4377 }, { "epoch": 0.10301176470588236, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3008, "step": 4378 }, { "epoch": 0.10303529411764706, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4611, "step": 4379 }, { "epoch": 0.10305882352941176, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1467, "step": 4380 }, { "epoch": 0.10308235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4603, "step": 4381 }, { "epoch": 0.10310588235294117, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.6028, "step": 4382 }, { "epoch": 0.10312941176470589, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1702, "step": 4383 }, { "epoch": 0.10315294117647059, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.3555, "step": 4384 }, { "epoch": 0.1031764705882353, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3558, "step": 4385 }, { "epoch": 0.1032, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.0948, "step": 4386 }, { "epoch": 0.1032235294117647, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.4257, "step": 4387 }, { "epoch": 0.10324705882352941, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3803, "step": 4388 }, { "epoch": 0.10327058823529411, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.1041, "step": 4389 }, { "epoch": 0.10329411764705883, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0502, "step": 4390 }, { "epoch": 0.10331764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4975, "step": 4391 }, { "epoch": 0.10334117647058824, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.197, "step": 4392 }, { "epoch": 0.10336470588235294, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.5084, "step": 4393 }, { "epoch": 0.10338823529411764, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4208, "step": 4394 }, { "epoch": 0.10341176470588236, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.4037, "step": 4395 }, { "epoch": 0.10343529411764706, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.99, "step": 4396 }, { "epoch": 0.10345882352941177, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2229, "step": 4397 }, { "epoch": 0.10348235294117647, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3444, "step": 4398 }, { "epoch": 0.10350588235294117, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.5114, "step": 4399 }, { "epoch": 0.10352941176470588, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.413, "step": 4400 }, { "epoch": 0.10355294117647058, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 1.1768, "step": 4401 }, { "epoch": 0.1035764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.372, "step": 4402 }, { "epoch": 0.1036, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.2448, "step": 4403 }, { "epoch": 0.10362352941176471, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5577, "step": 4404 }, { "epoch": 0.10364705882352941, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.4488, "step": 4405 }, { "epoch": 0.10367058823529411, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.438, "step": 4406 }, { "epoch": 0.10369411764705883, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.1093, "step": 4407 }, { "epoch": 0.10371764705882353, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.1499, "step": 4408 }, { "epoch": 0.10374117647058824, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.1876, "step": 4409 }, { "epoch": 0.10376470588235294, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1524, "step": 4410 }, { "epoch": 0.10378823529411765, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.5474, "step": 4411 }, { "epoch": 0.10381176470588235, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.3953, "step": 4412 }, { "epoch": 0.10383529411764705, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1791, "step": 4413 }, { "epoch": 0.10385882352941177, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.247, "step": 4414 }, { "epoch": 0.10388235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3392, "step": 4415 }, { "epoch": 0.10390588235294118, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.3482, "step": 4416 }, { "epoch": 0.10392941176470588, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3489, "step": 4417 }, { "epoch": 0.10395294117647058, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.0674, "step": 4418 }, { "epoch": 0.1039764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.3126, "step": 4419 }, { "epoch": 0.104, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.0801, "step": 4420 }, { "epoch": 0.10402352941176471, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2109, "step": 4421 }, { "epoch": 0.10404705882352941, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.3584, "step": 4422 }, { "epoch": 0.10407058823529412, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4881, "step": 4423 }, { "epoch": 0.10409411764705882, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3518, "step": 4424 }, { "epoch": 0.10411764705882352, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2439, "step": 4425 }, { "epoch": 0.10414117647058824, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2364, "step": 4426 }, { "epoch": 0.10416470588235294, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.5965, "step": 4427 }, { "epoch": 0.10418823529411765, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1459, "step": 4428 }, { "epoch": 0.10421176470588235, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.5427, "step": 4429 }, { "epoch": 0.10423529411764706, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.0608, "step": 4430 }, { "epoch": 0.10425882352941176, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.29, "step": 4431 }, { "epoch": 0.10428235294117646, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1609, "step": 4432 }, { "epoch": 0.10430588235294118, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.3149, "step": 4433 }, { "epoch": 0.10432941176470588, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5091, "step": 4434 }, { "epoch": 0.10435294117647059, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.349, "step": 4435 }, { "epoch": 0.10437647058823529, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.0931, "step": 4436 }, { "epoch": 0.1044, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.1822, "step": 4437 }, { "epoch": 0.1044235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3339, "step": 4438 }, { "epoch": 0.1044470588235294, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0013, "step": 4439 }, { "epoch": 0.10447058823529412, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.5192, "step": 4440 }, { "epoch": 0.10449411764705882, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.4428, "step": 4441 }, { "epoch": 0.10451764705882353, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3095, "step": 4442 }, { "epoch": 0.10454117647058823, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4516, "step": 4443 }, { "epoch": 0.10456470588235293, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.9806, "step": 4444 }, { "epoch": 0.10458823529411765, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4297, "step": 4445 }, { "epoch": 0.10461176470588235, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.4033, "step": 4446 }, { "epoch": 0.10463529411764706, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.2437, "step": 4447 }, { "epoch": 0.10465882352941176, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 1.2465, "step": 4448 }, { "epoch": 0.10468235294117648, "grad_norm": 0.30078125, "learning_rate": 0.02, "loss": 1.0181, "step": 4449 }, { "epoch": 0.10470588235294118, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.2566, "step": 4450 }, { "epoch": 0.10472941176470588, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3373, "step": 4451 }, { "epoch": 0.10475294117647059, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2337, "step": 4452 }, { "epoch": 0.10477647058823529, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2057, "step": 4453 }, { "epoch": 0.1048, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.2028, "step": 4454 }, { "epoch": 0.1048235294117647, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3223, "step": 4455 }, { "epoch": 0.10484705882352942, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.2338, "step": 4456 }, { "epoch": 0.10487058823529412, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3944, "step": 4457 }, { "epoch": 0.10489411764705882, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3734, "step": 4458 }, { "epoch": 0.10491764705882353, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2767, "step": 4459 }, { "epoch": 0.10494117647058823, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.3083, "step": 4460 }, { "epoch": 0.10496470588235295, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4956, "step": 4461 }, { "epoch": 0.10498823529411765, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4943, "step": 4462 }, { "epoch": 0.10501176470588235, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.4855, "step": 4463 }, { "epoch": 0.10503529411764706, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3245, "step": 4464 }, { "epoch": 0.10505882352941176, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.4529, "step": 4465 }, { "epoch": 0.10508235294117647, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2923, "step": 4466 }, { "epoch": 0.10510588235294117, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 0.9987, "step": 4467 }, { "epoch": 0.10512941176470589, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.2438, "step": 4468 }, { "epoch": 0.10515294117647059, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.3249, "step": 4469 }, { "epoch": 0.10517647058823529, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2665, "step": 4470 }, { "epoch": 0.1052, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.3144, "step": 4471 }, { "epoch": 0.1052235294117647, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.3039, "step": 4472 }, { "epoch": 0.10524705882352942, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3368, "step": 4473 }, { "epoch": 0.10527058823529412, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2067, "step": 4474 }, { "epoch": 0.10529411764705883, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.2524, "step": 4475 }, { "epoch": 0.10531764705882353, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1342, "step": 4476 }, { "epoch": 0.10534117647058823, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.379, "step": 4477 }, { "epoch": 0.10536470588235294, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.2117, "step": 4478 }, { "epoch": 0.10538823529411764, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.3415, "step": 4479 }, { "epoch": 0.10541176470588236, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2777, "step": 4480 }, { "epoch": 0.10543529411764706, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2167, "step": 4481 }, { "epoch": 0.10545882352941177, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5508, "step": 4482 }, { "epoch": 0.10548235294117647, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.3201, "step": 4483 }, { "epoch": 0.10550588235294117, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.3851, "step": 4484 }, { "epoch": 0.10552941176470589, "grad_norm": 0.2890625, "learning_rate": 0.02, "loss": 0.8748, "step": 4485 }, { "epoch": 0.10555294117647059, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2465, "step": 4486 }, { "epoch": 0.1055764705882353, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.4111, "step": 4487 }, { "epoch": 0.1056, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3672, "step": 4488 }, { "epoch": 0.1056235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.5435, "step": 4489 }, { "epoch": 0.10564705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.4836, "step": 4490 }, { "epoch": 0.10567058823529411, "grad_norm": 0.3046875, "learning_rate": 0.02, "loss": 0.8126, "step": 4491 }, { "epoch": 0.10569411764705883, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1486, "step": 4492 }, { "epoch": 0.10571764705882353, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3582, "step": 4493 }, { "epoch": 0.10574117647058824, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.0778, "step": 4494 }, { "epoch": 0.10576470588235294, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.389, "step": 4495 }, { "epoch": 0.10578823529411764, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.5841, "step": 4496 }, { "epoch": 0.10581176470588236, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.3946, "step": 4497 }, { "epoch": 0.10583529411764706, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.0633, "step": 4498 }, { "epoch": 0.10585882352941177, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1892, "step": 4499 }, { "epoch": 0.10588235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2309, "step": 4500 }, { "epoch": 0.10590588235294118, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1148, "step": 4501 }, { "epoch": 0.10592941176470588, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.41, "step": 4502 }, { "epoch": 0.10595294117647058, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2874, "step": 4503 }, { "epoch": 0.1059764705882353, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2861, "step": 4504 }, { "epoch": 0.106, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.138, "step": 4505 }, { "epoch": 0.10602352941176471, "grad_norm": 0.2890625, "learning_rate": 0.02, "loss": 0.9786, "step": 4506 }, { "epoch": 0.10604705882352941, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.6451, "step": 4507 }, { "epoch": 0.10607058823529411, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1784, "step": 4508 }, { "epoch": 0.10609411764705882, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.8376, "step": 4509 }, { "epoch": 0.10611764705882352, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3758, "step": 4510 }, { "epoch": 0.10614117647058824, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3967, "step": 4511 }, { "epoch": 0.10616470588235294, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.9266, "step": 4512 }, { "epoch": 0.10618823529411765, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 1.0922, "step": 4513 }, { "epoch": 0.10621176470588235, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2045, "step": 4514 }, { "epoch": 0.10623529411764705, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.2113, "step": 4515 }, { "epoch": 0.10625882352941177, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.5162, "step": 4516 }, { "epoch": 0.10628235294117647, "grad_norm": 0.298828125, "learning_rate": 0.02, "loss": 1.0409, "step": 4517 }, { "epoch": 0.10630588235294118, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2739, "step": 4518 }, { "epoch": 0.10632941176470588, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.2268, "step": 4519 }, { "epoch": 0.1063529411764706, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3361, "step": 4520 }, { "epoch": 0.1063764705882353, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1509, "step": 4521 }, { "epoch": 0.1064, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3491, "step": 4522 }, { "epoch": 0.10642352941176471, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.279, "step": 4523 }, { "epoch": 0.10644705882352941, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3261, "step": 4524 }, { "epoch": 0.10647058823529412, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.3044, "step": 4525 }, { "epoch": 0.10649411764705882, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.3944, "step": 4526 }, { "epoch": 0.10651764705882354, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.3169, "step": 4527 }, { "epoch": 0.10654117647058824, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3841, "step": 4528 }, { "epoch": 0.10656470588235294, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1691, "step": 4529 }, { "epoch": 0.10658823529411765, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3026, "step": 4530 }, { "epoch": 0.10661176470588235, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1224, "step": 4531 }, { "epoch": 0.10663529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.4961, "step": 4532 }, { "epoch": 0.10665882352941176, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3958, "step": 4533 }, { "epoch": 0.10668235294117646, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.4851, "step": 4534 }, { "epoch": 0.10670588235294118, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.4215, "step": 4535 }, { "epoch": 0.10672941176470588, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2619, "step": 4536 }, { "epoch": 0.10675294117647059, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.226, "step": 4537 }, { "epoch": 0.10677647058823529, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4722, "step": 4538 }, { "epoch": 0.1068, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.006, "step": 4539 }, { "epoch": 0.1068235294117647, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.3293, "step": 4540 }, { "epoch": 0.1068470588235294, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1897, "step": 4541 }, { "epoch": 0.10687058823529412, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2792, "step": 4542 }, { "epoch": 0.10689411764705882, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4154, "step": 4543 }, { "epoch": 0.10691764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.3732, "step": 4544 }, { "epoch": 0.10694117647058823, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2429, "step": 4545 }, { "epoch": 0.10696470588235295, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.311, "step": 4546 }, { "epoch": 0.10698823529411765, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2238, "step": 4547 }, { "epoch": 0.10701176470588235, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.47, "step": 4548 }, { "epoch": 0.10703529411764706, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1993, "step": 4549 }, { "epoch": 0.10705882352941176, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2255, "step": 4550 }, { "epoch": 0.10708235294117648, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2584, "step": 4551 }, { "epoch": 0.10710588235294118, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.1403, "step": 4552 }, { "epoch": 0.10712941176470588, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.2511, "step": 4553 }, { "epoch": 0.10715294117647059, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9762, "step": 4554 }, { "epoch": 0.10717647058823529, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.4594, "step": 4555 }, { "epoch": 0.1072, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.5063, "step": 4556 }, { "epoch": 0.1072235294117647, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0082, "step": 4557 }, { "epoch": 0.10724705882352942, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.3375, "step": 4558 }, { "epoch": 0.10727058823529412, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1539, "step": 4559 }, { "epoch": 0.10729411764705882, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.3549, "step": 4560 }, { "epoch": 0.10731764705882353, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.1882, "step": 4561 }, { "epoch": 0.10734117647058823, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4391, "step": 4562 }, { "epoch": 0.10736470588235295, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.4178, "step": 4563 }, { "epoch": 0.10738823529411765, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2617, "step": 4564 }, { "epoch": 0.10741176470588236, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.0696, "step": 4565 }, { "epoch": 0.10743529411764706, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.235, "step": 4566 }, { "epoch": 0.10745882352941176, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0562, "step": 4567 }, { "epoch": 0.10748235294117647, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.3405, "step": 4568 }, { "epoch": 0.10750588235294117, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3195, "step": 4569 }, { "epoch": 0.10752941176470589, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3109, "step": 4570 }, { "epoch": 0.10755294117647059, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1357, "step": 4571 }, { "epoch": 0.10757647058823529, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.1144, "step": 4572 }, { "epoch": 0.1076, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3391, "step": 4573 }, { "epoch": 0.1076235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.4354, "step": 4574 }, { "epoch": 0.10764705882352942, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4608, "step": 4575 }, { "epoch": 0.10767058823529412, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.2602, "step": 4576 }, { "epoch": 0.10769411764705883, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0194, "step": 4577 }, { "epoch": 0.10771764705882353, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2927, "step": 4578 }, { "epoch": 0.10774117647058823, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3458, "step": 4579 }, { "epoch": 0.10776470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3084, "step": 4580 }, { "epoch": 0.10778823529411764, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.2541, "step": 4581 }, { "epoch": 0.10781176470588236, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1576, "step": 4582 }, { "epoch": 0.10783529411764706, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0971, "step": 4583 }, { "epoch": 0.10785882352941177, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2608, "step": 4584 }, { "epoch": 0.10788235294117647, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.3873, "step": 4585 }, { "epoch": 0.10790588235294117, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.2434, "step": 4586 }, { "epoch": 0.10792941176470588, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1802, "step": 4587 }, { "epoch": 0.10795294117647058, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.0869, "step": 4588 }, { "epoch": 0.1079764705882353, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.0216, "step": 4589 }, { "epoch": 0.108, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.3886, "step": 4590 }, { "epoch": 0.10802352941176471, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.293, "step": 4591 }, { "epoch": 0.10804705882352941, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.155, "step": 4592 }, { "epoch": 0.10807058823529411, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.2809, "step": 4593 }, { "epoch": 0.10809411764705883, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0681, "step": 4594 }, { "epoch": 0.10811764705882353, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.3243, "step": 4595 }, { "epoch": 0.10814117647058824, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.2631, "step": 4596 }, { "epoch": 0.10816470588235294, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2672, "step": 4597 }, { "epoch": 0.10818823529411764, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.2509, "step": 4598 }, { "epoch": 0.10821176470588235, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.2629, "step": 4599 }, { "epoch": 0.10823529411764705, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.4851, "step": 4600 }, { "epoch": 0.10825882352941177, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.376, "step": 4601 }, { "epoch": 0.10828235294117647, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.329, "step": 4602 }, { "epoch": 0.10830588235294118, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.4483, "step": 4603 }, { "epoch": 0.10832941176470588, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4359, "step": 4604 }, { "epoch": 0.10835294117647058, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.0864, "step": 4605 }, { "epoch": 0.1083764705882353, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.356, "step": 4606 }, { "epoch": 0.1084, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.3145, "step": 4607 }, { "epoch": 0.10842352941176471, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.1337, "step": 4608 }, { "epoch": 0.10844705882352941, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.277, "step": 4609 }, { "epoch": 0.10847058823529412, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.209, "step": 4610 }, { "epoch": 0.10849411764705882, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2323, "step": 4611 }, { "epoch": 0.10851764705882352, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.3221, "step": 4612 }, { "epoch": 0.10854117647058824, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.7745, "step": 4613 }, { "epoch": 0.10856470588235294, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2684, "step": 4614 }, { "epoch": 0.10858823529411765, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1934, "step": 4615 }, { "epoch": 0.10861176470588235, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3548, "step": 4616 }, { "epoch": 0.10863529411764705, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1408, "step": 4617 }, { "epoch": 0.10865882352941177, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2381, "step": 4618 }, { "epoch": 0.10868235294117647, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3089, "step": 4619 }, { "epoch": 0.10870588235294118, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.1212, "step": 4620 }, { "epoch": 0.10872941176470588, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.1143, "step": 4621 }, { "epoch": 0.1087529411764706, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0678, "step": 4622 }, { "epoch": 0.1087764705882353, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5258, "step": 4623 }, { "epoch": 0.1088, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3115, "step": 4624 }, { "epoch": 0.10882352941176471, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 1.0892, "step": 4625 }, { "epoch": 0.10884705882352941, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2448, "step": 4626 }, { "epoch": 0.10887058823529412, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.3128, "step": 4627 }, { "epoch": 0.10889411764705882, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3507, "step": 4628 }, { "epoch": 0.10891764705882354, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1485, "step": 4629 }, { "epoch": 0.10894117647058824, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3127, "step": 4630 }, { "epoch": 0.10896470588235294, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.1869, "step": 4631 }, { "epoch": 0.10898823529411765, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.3146, "step": 4632 }, { "epoch": 0.10901176470588235, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.3009, "step": 4633 }, { "epoch": 0.10903529411764706, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.557, "step": 4634 }, { "epoch": 0.10905882352941176, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.3607, "step": 4635 }, { "epoch": 0.10908235294117648, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.3484, "step": 4636 }, { "epoch": 0.10910588235294118, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1739, "step": 4637 }, { "epoch": 0.10912941176470588, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.4161, "step": 4638 }, { "epoch": 0.10915294117647059, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.1017, "step": 4639 }, { "epoch": 0.10917647058823529, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2912, "step": 4640 }, { "epoch": 0.1092, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.2625, "step": 4641 }, { "epoch": 0.1092235294117647, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.1747, "step": 4642 }, { "epoch": 0.1092470588235294, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.4382, "step": 4643 }, { "epoch": 0.10927058823529412, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1772, "step": 4644 }, { "epoch": 0.10929411764705882, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.3385, "step": 4645 }, { "epoch": 0.10931764705882353, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.0679, "step": 4646 }, { "epoch": 0.10934117647058823, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.4023, "step": 4647 }, { "epoch": 0.10936470588235295, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.9918, "step": 4648 }, { "epoch": 0.10938823529411765, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1291, "step": 4649 }, { "epoch": 0.10941176470588235, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0697, "step": 4650 }, { "epoch": 0.10943529411764706, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4895, "step": 4651 }, { "epoch": 0.10945882352941176, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.132, "step": 4652 }, { "epoch": 0.10948235294117648, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1291, "step": 4653 }, { "epoch": 0.10950588235294118, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.403, "step": 4654 }, { "epoch": 0.10952941176470589, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.1188, "step": 4655 }, { "epoch": 0.10955294117647059, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1735, "step": 4656 }, { "epoch": 0.10957647058823529, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3072, "step": 4657 }, { "epoch": 0.1096, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.364, "step": 4658 }, { "epoch": 0.1096235294117647, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1688, "step": 4659 }, { "epoch": 0.10964705882352942, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.2369, "step": 4660 }, { "epoch": 0.10967058823529412, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.4147, "step": 4661 }, { "epoch": 0.10969411764705882, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.2313, "step": 4662 }, { "epoch": 0.10971764705882353, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4934, "step": 4663 }, { "epoch": 0.10974117647058823, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 1.1079, "step": 4664 }, { "epoch": 0.10976470588235294, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2685, "step": 4665 }, { "epoch": 0.10978823529411764, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.487, "step": 4666 }, { "epoch": 0.10981176470588236, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.4113, "step": 4667 }, { "epoch": 0.10983529411764706, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.2824, "step": 4668 }, { "epoch": 0.10985882352941176, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.1249, "step": 4669 }, { "epoch": 0.10988235294117647, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3102, "step": 4670 }, { "epoch": 0.10990588235294117, "grad_norm": 0.291015625, "learning_rate": 0.02, "loss": 1.0227, "step": 4671 }, { "epoch": 0.10992941176470589, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.4039, "step": 4672 }, { "epoch": 0.10995294117647059, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3117, "step": 4673 }, { "epoch": 0.1099764705882353, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.345, "step": 4674 }, { "epoch": 0.11, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.7072, "step": 4675 }, { "epoch": 0.1100235294117647, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.2617, "step": 4676 }, { "epoch": 0.11004705882352941, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2462, "step": 4677 }, { "epoch": 0.11007058823529411, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2901, "step": 4678 }, { "epoch": 0.11009411764705883, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.4903, "step": 4679 }, { "epoch": 0.11011764705882353, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.3365, "step": 4680 }, { "epoch": 0.11014117647058823, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.1133, "step": 4681 }, { "epoch": 0.11016470588235294, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.26, "step": 4682 }, { "epoch": 0.11018823529411764, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.361, "step": 4683 }, { "epoch": 0.11021176470588236, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4524, "step": 4684 }, { "epoch": 0.11023529411764706, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.413, "step": 4685 }, { "epoch": 0.11025882352941177, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.4775, "step": 4686 }, { "epoch": 0.11028235294117647, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 0.9563, "step": 4687 }, { "epoch": 0.11030588235294117, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2397, "step": 4688 }, { "epoch": 0.11032941176470588, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3318, "step": 4689 }, { "epoch": 0.11035294117647058, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4206, "step": 4690 }, { "epoch": 0.1103764705882353, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.2286, "step": 4691 }, { "epoch": 0.1104, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3588, "step": 4692 }, { "epoch": 0.11042352941176471, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 1.4884, "step": 4693 }, { "epoch": 0.11044705882352941, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.4329, "step": 4694 }, { "epoch": 0.11047058823529411, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.7069, "step": 4695 }, { "epoch": 0.11049411764705883, "grad_norm": 0.296875, "learning_rate": 0.02, "loss": 1.0771, "step": 4696 }, { "epoch": 0.11051764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3204, "step": 4697 }, { "epoch": 0.11054117647058824, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2635, "step": 4698 }, { "epoch": 0.11056470588235294, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1974, "step": 4699 }, { "epoch": 0.11058823529411765, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.7533, "step": 4700 }, { "epoch": 0.11061176470588235, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.4941, "step": 4701 }, { "epoch": 0.11063529411764705, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.135, "step": 4702 }, { "epoch": 0.11065882352941177, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.1579, "step": 4703 }, { "epoch": 0.11068235294117647, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.1096, "step": 4704 }, { "epoch": 0.11070588235294118, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2452, "step": 4705 }, { "epoch": 0.11072941176470588, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1635, "step": 4706 }, { "epoch": 0.11075294117647058, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4796, "step": 4707 }, { "epoch": 0.1107764705882353, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.1961, "step": 4708 }, { "epoch": 0.1108, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.33, "step": 4709 }, { "epoch": 0.11082352941176471, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.4983, "step": 4710 }, { "epoch": 0.11084705882352941, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2743, "step": 4711 }, { "epoch": 0.11087058823529412, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3198, "step": 4712 }, { "epoch": 0.11089411764705882, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2101, "step": 4713 }, { "epoch": 0.11091764705882352, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9631, "step": 4714 }, { "epoch": 0.11094117647058824, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2818, "step": 4715 }, { "epoch": 0.11096470588235294, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.2853, "step": 4716 }, { "epoch": 0.11098823529411765, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.5435, "step": 4717 }, { "epoch": 0.11101176470588235, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.2686, "step": 4718 }, { "epoch": 0.11103529411764707, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3162, "step": 4719 }, { "epoch": 0.11105882352941177, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4349, "step": 4720 }, { "epoch": 0.11108235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.3978, "step": 4721 }, { "epoch": 0.11110588235294118, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.0583, "step": 4722 }, { "epoch": 0.11112941176470588, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2481, "step": 4723 }, { "epoch": 0.1111529411764706, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.2307, "step": 4724 }, { "epoch": 0.1111764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3233, "step": 4725 }, { "epoch": 0.1112, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0013, "step": 4726 }, { "epoch": 0.11122352941176471, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2754, "step": 4727 }, { "epoch": 0.11124705882352941, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.2082, "step": 4728 }, { "epoch": 0.11127058823529412, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.34, "step": 4729 }, { "epoch": 0.11129411764705882, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3502, "step": 4730 }, { "epoch": 0.11131764705882354, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2404, "step": 4731 }, { "epoch": 0.11134117647058824, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.1675, "step": 4732 }, { "epoch": 0.11136470588235294, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2721, "step": 4733 }, { "epoch": 0.11138823529411765, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.4399, "step": 4734 }, { "epoch": 0.11141176470588235, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2736, "step": 4735 }, { "epoch": 0.11143529411764706, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.2418, "step": 4736 }, { "epoch": 0.11145882352941176, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2376, "step": 4737 }, { "epoch": 0.11148235294117648, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2675, "step": 4738 }, { "epoch": 0.11150588235294118, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1795, "step": 4739 }, { "epoch": 0.11152941176470588, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.3667, "step": 4740 }, { "epoch": 0.11155294117647059, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4782, "step": 4741 }, { "epoch": 0.11157647058823529, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.1476, "step": 4742 }, { "epoch": 0.1116, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.5402, "step": 4743 }, { "epoch": 0.1116235294117647, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3126, "step": 4744 }, { "epoch": 0.11164705882352942, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3253, "step": 4745 }, { "epoch": 0.11167058823529412, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1198, "step": 4746 }, { "epoch": 0.11169411764705882, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1961, "step": 4747 }, { "epoch": 0.11171764705882353, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.2841, "step": 4748 }, { "epoch": 0.11174117647058823, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.4069, "step": 4749 }, { "epoch": 0.11176470588235295, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3216, "step": 4750 }, { "epoch": 0.11178823529411765, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.282, "step": 4751 }, { "epoch": 0.11181176470588235, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1545, "step": 4752 }, { "epoch": 0.11183529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2879, "step": 4753 }, { "epoch": 0.11185882352941176, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.5987, "step": 4754 }, { "epoch": 0.11188235294117647, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.0774, "step": 4755 }, { "epoch": 0.11190588235294117, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.1561, "step": 4756 }, { "epoch": 0.11192941176470589, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.2394, "step": 4757 }, { "epoch": 0.11195294117647059, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.4412, "step": 4758 }, { "epoch": 0.11197647058823529, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0471, "step": 4759 }, { "epoch": 0.112, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.9937, "step": 4760 }, { "epoch": 0.1120235294117647, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0634, "step": 4761 }, { "epoch": 0.11204705882352942, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.5823, "step": 4762 }, { "epoch": 0.11207058823529412, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2326, "step": 4763 }, { "epoch": 0.11209411764705883, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.1584, "step": 4764 }, { "epoch": 0.11211764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.4859, "step": 4765 }, { "epoch": 0.11214117647058823, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2154, "step": 4766 }, { "epoch": 0.11216470588235294, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.234, "step": 4767 }, { "epoch": 0.11218823529411764, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3698, "step": 4768 }, { "epoch": 0.11221176470588236, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3879, "step": 4769 }, { "epoch": 0.11223529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.4226, "step": 4770 }, { "epoch": 0.11225882352941176, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2971, "step": 4771 }, { "epoch": 0.11228235294117647, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2371, "step": 4772 }, { "epoch": 0.11230588235294117, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.164, "step": 4773 }, { "epoch": 0.11232941176470589, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1492, "step": 4774 }, { "epoch": 0.11235294117647059, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2764, "step": 4775 }, { "epoch": 0.1123764705882353, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3504, "step": 4776 }, { "epoch": 0.1124, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.131, "step": 4777 }, { "epoch": 0.1124235294117647, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2555, "step": 4778 }, { "epoch": 0.11244705882352941, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2485, "step": 4779 }, { "epoch": 0.11247058823529411, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.3705, "step": 4780 }, { "epoch": 0.11249411764705883, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3711, "step": 4781 }, { "epoch": 0.11251764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.5381, "step": 4782 }, { "epoch": 0.11254117647058824, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.5151, "step": 4783 }, { "epoch": 0.11256470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.3558, "step": 4784 }, { "epoch": 0.11258823529411764, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3066, "step": 4785 }, { "epoch": 0.11261176470588236, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.265, "step": 4786 }, { "epoch": 0.11263529411764706, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2029, "step": 4787 }, { "epoch": 0.11265882352941177, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.5934, "step": 4788 }, { "epoch": 0.11268235294117647, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1925, "step": 4789 }, { "epoch": 0.11270588235294117, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.3723, "step": 4790 }, { "epoch": 0.11272941176470588, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.2248, "step": 4791 }, { "epoch": 0.11275294117647058, "grad_norm": 0.28515625, "learning_rate": 0.02, "loss": 0.9182, "step": 4792 }, { "epoch": 0.1127764705882353, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.1733, "step": 4793 }, { "epoch": 0.1128, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.2633, "step": 4794 }, { "epoch": 0.11282352941176471, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.9218, "step": 4795 }, { "epoch": 0.11284705882352941, "grad_norm": 0.287109375, "learning_rate": 0.02, "loss": 1.0455, "step": 4796 }, { "epoch": 0.11287058823529411, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.4985, "step": 4797 }, { "epoch": 0.11289411764705883, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3855, "step": 4798 }, { "epoch": 0.11291764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2855, "step": 4799 }, { "epoch": 0.11294117647058824, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.1828, "step": 4800 }, { "epoch": 0.11296470588235294, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0393, "step": 4801 }, { "epoch": 0.11298823529411765, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.3818, "step": 4802 }, { "epoch": 0.11301176470588235, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.3176, "step": 4803 }, { "epoch": 0.11303529411764705, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3184, "step": 4804 }, { "epoch": 0.11305882352941177, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.3687, "step": 4805 }, { "epoch": 0.11308235294117647, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.1672, "step": 4806 }, { "epoch": 0.11310588235294118, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2493, "step": 4807 }, { "epoch": 0.11312941176470588, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4686, "step": 4808 }, { "epoch": 0.1131529411764706, "grad_norm": 0.287109375, "learning_rate": 0.02, "loss": 1.0683, "step": 4809 }, { "epoch": 0.1131764705882353, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.9387, "step": 4810 }, { "epoch": 0.1132, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.483, "step": 4811 }, { "epoch": 0.11322352941176471, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.338, "step": 4812 }, { "epoch": 0.11324705882352941, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.639, "step": 4813 }, { "epoch": 0.11327058823529412, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2637, "step": 4814 }, { "epoch": 0.11329411764705882, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.9796, "step": 4815 }, { "epoch": 0.11331764705882352, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.391, "step": 4816 }, { "epoch": 0.11334117647058824, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2249, "step": 4817 }, { "epoch": 0.11336470588235294, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.326, "step": 4818 }, { "epoch": 0.11338823529411765, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.9492, "step": 4819 }, { "epoch": 0.11341176470588235, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.0318, "step": 4820 }, { "epoch": 0.11343529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.4656, "step": 4821 }, { "epoch": 0.11345882352941176, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.5516, "step": 4822 }, { "epoch": 0.11348235294117646, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.0324, "step": 4823 }, { "epoch": 0.11350588235294118, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 1.1394, "step": 4824 }, { "epoch": 0.11352941176470588, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.4121, "step": 4825 }, { "epoch": 0.11355294117647059, "grad_norm": 0.302734375, "learning_rate": 0.02, "loss": 1.0748, "step": 4826 }, { "epoch": 0.11357647058823529, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2057, "step": 4827 }, { "epoch": 0.1136, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.2471, "step": 4828 }, { "epoch": 0.1136235294117647, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.3131, "step": 4829 }, { "epoch": 0.1136470588235294, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2357, "step": 4830 }, { "epoch": 0.11367058823529412, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.3338, "step": 4831 }, { "epoch": 0.11369411764705882, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.388, "step": 4832 }, { "epoch": 0.11371764705882353, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.158, "step": 4833 }, { "epoch": 0.11374117647058823, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.2156, "step": 4834 }, { "epoch": 0.11376470588235293, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.1378, "step": 4835 }, { "epoch": 0.11378823529411765, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3101, "step": 4836 }, { "epoch": 0.11381176470588235, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.957, "step": 4837 }, { "epoch": 0.11383529411764706, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1315, "step": 4838 }, { "epoch": 0.11385882352941176, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1974, "step": 4839 }, { "epoch": 0.11388235294117648, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3491, "step": 4840 }, { "epoch": 0.11390588235294118, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.1801, "step": 4841 }, { "epoch": 0.11392941176470588, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3679, "step": 4842 }, { "epoch": 0.11395294117647059, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1539, "step": 4843 }, { "epoch": 0.11397647058823529, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.3248, "step": 4844 }, { "epoch": 0.114, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.1297, "step": 4845 }, { "epoch": 0.1140235294117647, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1599, "step": 4846 }, { "epoch": 0.11404705882352942, "grad_norm": 0.314453125, "learning_rate": 0.02, "loss": 0.9385, "step": 4847 }, { "epoch": 0.11407058823529412, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.5653, "step": 4848 }, { "epoch": 0.11409411764705882, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.0489, "step": 4849 }, { "epoch": 0.11411764705882353, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3029, "step": 4850 }, { "epoch": 0.11414117647058823, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2033, "step": 4851 }, { "epoch": 0.11416470588235295, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2037, "step": 4852 }, { "epoch": 0.11418823529411765, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2909, "step": 4853 }, { "epoch": 0.11421176470588236, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.0958, "step": 4854 }, { "epoch": 0.11423529411764706, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.5299, "step": 4855 }, { "epoch": 0.11425882352941176, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.1856, "step": 4856 }, { "epoch": 0.11428235294117647, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.5178, "step": 4857 }, { "epoch": 0.11430588235294117, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.0802, "step": 4858 }, { "epoch": 0.11432941176470589, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2356, "step": 4859 }, { "epoch": 0.11435294117647059, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0071, "step": 4860 }, { "epoch": 0.11437647058823529, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2818, "step": 4861 }, { "epoch": 0.1144, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.2873, "step": 4862 }, { "epoch": 0.1144235294117647, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2561, "step": 4863 }, { "epoch": 0.11444705882352942, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1614, "step": 4864 }, { "epoch": 0.11447058823529412, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.1735, "step": 4865 }, { "epoch": 0.11449411764705883, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.4788, "step": 4866 }, { "epoch": 0.11451764705882353, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.9505, "step": 4867 }, { "epoch": 0.11454117647058823, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.298, "step": 4868 }, { "epoch": 0.11456470588235294, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.3962, "step": 4869 }, { "epoch": 0.11458823529411764, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3798, "step": 4870 }, { "epoch": 0.11461176470588236, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2764, "step": 4871 }, { "epoch": 0.11463529411764706, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.3512, "step": 4872 }, { "epoch": 0.11465882352941177, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.3772, "step": 4873 }, { "epoch": 0.11468235294117647, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2505, "step": 4874 }, { "epoch": 0.11470588235294117, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2835, "step": 4875 }, { "epoch": 0.11472941176470589, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3855, "step": 4876 }, { "epoch": 0.11475294117647059, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4842, "step": 4877 }, { "epoch": 0.1147764705882353, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.1332, "step": 4878 }, { "epoch": 0.1148, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.3108, "step": 4879 }, { "epoch": 0.1148235294117647, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.1721, "step": 4880 }, { "epoch": 0.11484705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3053, "step": 4881 }, { "epoch": 0.11487058823529411, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.4271, "step": 4882 }, { "epoch": 0.11489411764705883, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4267, "step": 4883 }, { "epoch": 0.11491764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3337, "step": 4884 }, { "epoch": 0.11494117647058824, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0238, "step": 4885 }, { "epoch": 0.11496470588235294, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.5905, "step": 4886 }, { "epoch": 0.11498823529411764, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.0551, "step": 4887 }, { "epoch": 0.11501176470588236, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.128, "step": 4888 }, { "epoch": 0.11503529411764706, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.2596, "step": 4889 }, { "epoch": 0.11505882352941177, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.1201, "step": 4890 }, { "epoch": 0.11508235294117647, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.1035, "step": 4891 }, { "epoch": 0.11510588235294118, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1907, "step": 4892 }, { "epoch": 0.11512941176470588, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2375, "step": 4893 }, { "epoch": 0.11515294117647058, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.5076, "step": 4894 }, { "epoch": 0.1151764705882353, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1678, "step": 4895 }, { "epoch": 0.1152, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2153, "step": 4896 }, { "epoch": 0.11522352941176471, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.3636, "step": 4897 }, { "epoch": 0.11524705882352941, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2063, "step": 4898 }, { "epoch": 0.11527058823529411, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.378, "step": 4899 }, { "epoch": 0.11529411764705882, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.0356, "step": 4900 }, { "epoch": 0.11531764705882352, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.1817, "step": 4901 }, { "epoch": 0.11534117647058824, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4248, "step": 4902 }, { "epoch": 0.11536470588235294, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2352, "step": 4903 }, { "epoch": 0.11538823529411765, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.0391, "step": 4904 }, { "epoch": 0.11541176470588235, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3592, "step": 4905 }, { "epoch": 0.11543529411764705, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2196, "step": 4906 }, { "epoch": 0.11545882352941177, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.5717, "step": 4907 }, { "epoch": 0.11548235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1955, "step": 4908 }, { "epoch": 0.11550588235294118, "grad_norm": 0.314453125, "learning_rate": 0.02, "loss": 0.8435, "step": 4909 }, { "epoch": 0.11552941176470588, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.1624, "step": 4910 }, { "epoch": 0.1155529411764706, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0956, "step": 4911 }, { "epoch": 0.1155764705882353, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.0893, "step": 4912 }, { "epoch": 0.1156, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2266, "step": 4913 }, { "epoch": 0.11562352941176471, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4319, "step": 4914 }, { "epoch": 0.11564705882352941, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3728, "step": 4915 }, { "epoch": 0.11567058823529412, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.0122, "step": 4916 }, { "epoch": 0.11569411764705882, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.9912, "step": 4917 }, { "epoch": 0.11571764705882354, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1211, "step": 4918 }, { "epoch": 0.11574117647058824, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.4081, "step": 4919 }, { "epoch": 0.11576470588235294, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3496, "step": 4920 }, { "epoch": 0.11578823529411765, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1774, "step": 4921 }, { "epoch": 0.11581176470588235, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1546, "step": 4922 }, { "epoch": 0.11583529411764706, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.4476, "step": 4923 }, { "epoch": 0.11585882352941176, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2622, "step": 4924 }, { "epoch": 0.11588235294117646, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.4074, "step": 4925 }, { "epoch": 0.11590588235294118, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.0206, "step": 4926 }, { "epoch": 0.11592941176470588, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2582, "step": 4927 }, { "epoch": 0.11595294117647059, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4626, "step": 4928 }, { "epoch": 0.11597647058823529, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.3433, "step": 4929 }, { "epoch": 0.116, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.2652, "step": 4930 }, { "epoch": 0.1160235294117647, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 1.1055, "step": 4931 }, { "epoch": 0.1160470588235294, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.3271, "step": 4932 }, { "epoch": 0.11607058823529412, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.1049, "step": 4933 }, { "epoch": 0.11609411764705882, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2839, "step": 4934 }, { "epoch": 0.11611764705882353, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.3155, "step": 4935 }, { "epoch": 0.11614117647058823, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.4207, "step": 4936 }, { "epoch": 0.11616470588235295, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.5089, "step": 4937 }, { "epoch": 0.11618823529411765, "grad_norm": 0.3046875, "learning_rate": 0.02, "loss": 0.9728, "step": 4938 }, { "epoch": 0.11621176470588235, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 1.2174, "step": 4939 }, { "epoch": 0.11623529411764706, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.4475, "step": 4940 }, { "epoch": 0.11625882352941176, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.3881, "step": 4941 }, { "epoch": 0.11628235294117648, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3187, "step": 4942 }, { "epoch": 0.11630588235294118, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.1624, "step": 4943 }, { "epoch": 0.11632941176470588, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.014, "step": 4944 }, { "epoch": 0.11635294117647059, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1287, "step": 4945 }, { "epoch": 0.11637647058823529, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.4001, "step": 4946 }, { "epoch": 0.1164, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0769, "step": 4947 }, { "epoch": 0.1164235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.5206, "step": 4948 }, { "epoch": 0.11644705882352942, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.3189, "step": 4949 }, { "epoch": 0.11647058823529412, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2412, "step": 4950 }, { "epoch": 0.11649411764705882, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.3439, "step": 4951 }, { "epoch": 0.11651764705882353, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.1748, "step": 4952 }, { "epoch": 0.11654117647058823, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 1.0044, "step": 4953 }, { "epoch": 0.11656470588235295, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1804, "step": 4954 }, { "epoch": 0.11658823529411765, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2348, "step": 4955 }, { "epoch": 0.11661176470588236, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.4127, "step": 4956 }, { "epoch": 0.11663529411764706, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.3169, "step": 4957 }, { "epoch": 0.11665882352941176, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0392, "step": 4958 }, { "epoch": 0.11668235294117647, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.263, "step": 4959 }, { "epoch": 0.11670588235294117, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.9841, "step": 4960 }, { "epoch": 0.11672941176470589, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.398, "step": 4961 }, { "epoch": 0.11675294117647059, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2931, "step": 4962 }, { "epoch": 0.1167764705882353, "grad_norm": 0.310546875, "learning_rate": 0.02, "loss": 0.9807, "step": 4963 }, { "epoch": 0.1168, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3823, "step": 4964 }, { "epoch": 0.1168235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3904, "step": 4965 }, { "epoch": 0.11684705882352941, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.3519, "step": 4966 }, { "epoch": 0.11687058823529411, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3678, "step": 4967 }, { "epoch": 0.11689411764705883, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.1331, "step": 4968 }, { "epoch": 0.11691764705882353, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.296, "step": 4969 }, { "epoch": 0.11694117647058823, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1538, "step": 4970 }, { "epoch": 0.11696470588235294, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.6094, "step": 4971 }, { "epoch": 0.11698823529411764, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1928, "step": 4972 }, { "epoch": 0.11701176470588236, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2955, "step": 4973 }, { "epoch": 0.11703529411764706, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.1989, "step": 4974 }, { "epoch": 0.11705882352941177, "grad_norm": 0.29296875, "learning_rate": 0.02, "loss": 0.8988, "step": 4975 }, { "epoch": 0.11708235294117647, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1341, "step": 4976 }, { "epoch": 0.11710588235294117, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2968, "step": 4977 }, { "epoch": 0.11712941176470588, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.1488, "step": 4978 }, { "epoch": 0.11715294117647058, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3191, "step": 4979 }, { "epoch": 0.1171764705882353, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1764, "step": 4980 }, { "epoch": 0.1172, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2372, "step": 4981 }, { "epoch": 0.11722352941176471, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3056, "step": 4982 }, { "epoch": 0.11724705882352941, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1058, "step": 4983 }, { "epoch": 0.11727058823529411, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1855, "step": 4984 }, { "epoch": 0.11729411764705883, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2688, "step": 4985 }, { "epoch": 0.11731764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1982, "step": 4986 }, { "epoch": 0.11734117647058824, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.188, "step": 4987 }, { "epoch": 0.11736470588235294, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.5343, "step": 4988 }, { "epoch": 0.11738823529411764, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2409, "step": 4989 }, { "epoch": 0.11741176470588235, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3499, "step": 4990 }, { "epoch": 0.11743529411764705, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.1927, "step": 4991 }, { "epoch": 0.11745882352941177, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2077, "step": 4992 }, { "epoch": 0.11748235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.4638, "step": 4993 }, { "epoch": 0.11750588235294118, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2783, "step": 4994 }, { "epoch": 0.11752941176470588, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.4301, "step": 4995 }, { "epoch": 0.11755294117647058, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.501, "step": 4996 }, { "epoch": 0.1175764705882353, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.9273, "step": 4997 }, { "epoch": 0.1176, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.3094, "step": 4998 }, { "epoch": 0.11762352941176471, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4136, "step": 4999 }, { "epoch": 0.11764705882352941, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2358, "step": 5000 }, { "epoch": 0.11764705882352941, "eval_loss": 2.2437431812286377, "eval_runtime": 683.2742, "eval_samples_per_second": 12.44, "eval_steps_per_second": 3.11, "step": 5000 }, { "epoch": 0.11767058823529412, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.2597, "step": 5001 }, { "epoch": 0.11769411764705882, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 0.8423, "step": 5002 }, { "epoch": 0.11771764705882352, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.1649, "step": 5003 }, { "epoch": 0.11774117647058824, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.9196, "step": 5004 }, { "epoch": 0.11776470588235294, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1877, "step": 5005 }, { "epoch": 0.11778823529411765, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.0179, "step": 5006 }, { "epoch": 0.11781176470588235, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2971, "step": 5007 }, { "epoch": 0.11783529411764705, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3667, "step": 5008 }, { "epoch": 0.11785882352941177, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.5043, "step": 5009 }, { "epoch": 0.11788235294117647, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.2029, "step": 5010 }, { "epoch": 0.11790588235294118, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.0568, "step": 5011 }, { "epoch": 0.11792941176470588, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2814, "step": 5012 }, { "epoch": 0.1179529411764706, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.1265, "step": 5013 }, { "epoch": 0.1179764705882353, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.5833, "step": 5014 }, { "epoch": 0.118, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1212, "step": 5015 }, { "epoch": 0.11802352941176471, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.3748, "step": 5016 }, { "epoch": 0.11804705882352941, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.3151, "step": 5017 }, { "epoch": 0.11807058823529412, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3463, "step": 5018 }, { "epoch": 0.11809411764705882, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.165, "step": 5019 }, { "epoch": 0.11811764705882354, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1303, "step": 5020 }, { "epoch": 0.11814117647058824, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.0238, "step": 5021 }, { "epoch": 0.11816470588235294, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.0182, "step": 5022 }, { "epoch": 0.11818823529411765, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.0626, "step": 5023 }, { "epoch": 0.11821176470588235, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.033, "step": 5024 }, { "epoch": 0.11823529411764706, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2053, "step": 5025 }, { "epoch": 0.11825882352941176, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0523, "step": 5026 }, { "epoch": 0.11828235294117648, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2847, "step": 5027 }, { "epoch": 0.11830588235294118, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3618, "step": 5028 }, { "epoch": 0.11832941176470588, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1278, "step": 5029 }, { "epoch": 0.11835294117647059, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.6021, "step": 5030 }, { "epoch": 0.11837647058823529, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.0449, "step": 5031 }, { "epoch": 0.1184, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2792, "step": 5032 }, { "epoch": 0.1184235294117647, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.1723, "step": 5033 }, { "epoch": 0.1184470588235294, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2522, "step": 5034 }, { "epoch": 0.11847058823529412, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3111, "step": 5035 }, { "epoch": 0.11849411764705882, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3048, "step": 5036 }, { "epoch": 0.11851764705882353, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.2625, "step": 5037 }, { "epoch": 0.11854117647058823, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3193, "step": 5038 }, { "epoch": 0.11856470588235295, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.411, "step": 5039 }, { "epoch": 0.11858823529411765, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1793, "step": 5040 }, { "epoch": 0.11861176470588235, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.1418, "step": 5041 }, { "epoch": 0.11863529411764706, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2283, "step": 5042 }, { "epoch": 0.11865882352941176, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.9548, "step": 5043 }, { "epoch": 0.11868235294117647, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.3935, "step": 5044 }, { "epoch": 0.11870588235294117, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.3853, "step": 5045 }, { "epoch": 0.11872941176470589, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.9595, "step": 5046 }, { "epoch": 0.11875294117647059, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.024, "step": 5047 }, { "epoch": 0.11877647058823529, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.337, "step": 5048 }, { "epoch": 0.1188, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.5503, "step": 5049 }, { "epoch": 0.1188235294117647, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.1929, "step": 5050 }, { "epoch": 0.11884705882352942, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.0402, "step": 5051 }, { "epoch": 0.11887058823529412, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1622, "step": 5052 }, { "epoch": 0.11889411764705882, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1884, "step": 5053 }, { "epoch": 0.11891764705882353, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4038, "step": 5054 }, { "epoch": 0.11894117647058823, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1992, "step": 5055 }, { "epoch": 0.11896470588235294, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.8754, "step": 5056 }, { "epoch": 0.11898823529411764, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3388, "step": 5057 }, { "epoch": 0.11901176470588236, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.4189, "step": 5058 }, { "epoch": 0.11903529411764706, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0094, "step": 5059 }, { "epoch": 0.11905882352941176, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0164, "step": 5060 }, { "epoch": 0.11908235294117647, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2842, "step": 5061 }, { "epoch": 0.11910588235294117, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.4449, "step": 5062 }, { "epoch": 0.11912941176470589, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1842, "step": 5063 }, { "epoch": 0.11915294117647059, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4438, "step": 5064 }, { "epoch": 0.1191764705882353, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3254, "step": 5065 }, { "epoch": 0.1192, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2349, "step": 5066 }, { "epoch": 0.1192235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3326, "step": 5067 }, { "epoch": 0.11924705882352941, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2825, "step": 5068 }, { "epoch": 0.11927058823529411, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3975, "step": 5069 }, { "epoch": 0.11929411764705883, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6341, "step": 5070 }, { "epoch": 0.11931764705882353, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.3715, "step": 5071 }, { "epoch": 0.11934117647058824, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3576, "step": 5072 }, { "epoch": 0.11936470588235294, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.2828, "step": 5073 }, { "epoch": 0.11938823529411764, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.2958, "step": 5074 }, { "epoch": 0.11941176470588236, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1952, "step": 5075 }, { "epoch": 0.11943529411764706, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.455, "step": 5076 }, { "epoch": 0.11945882352941177, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.309, "step": 5077 }, { "epoch": 0.11948235294117647, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.5287, "step": 5078 }, { "epoch": 0.11950588235294117, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.3976, "step": 5079 }, { "epoch": 0.11952941176470588, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.2918, "step": 5080 }, { "epoch": 0.11955294117647058, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.2928, "step": 5081 }, { "epoch": 0.1195764705882353, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.5671, "step": 5082 }, { "epoch": 0.1196, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.504, "step": 5083 }, { "epoch": 0.11962352941176471, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4039, "step": 5084 }, { "epoch": 0.11964705882352941, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.3902, "step": 5085 }, { "epoch": 0.11967058823529411, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.319, "step": 5086 }, { "epoch": 0.11969411764705883, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1345, "step": 5087 }, { "epoch": 0.11971764705882353, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.1822, "step": 5088 }, { "epoch": 0.11974117647058824, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.125, "step": 5089 }, { "epoch": 0.11976470588235294, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.0807, "step": 5090 }, { "epoch": 0.11978823529411765, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3457, "step": 5091 }, { "epoch": 0.11981176470588235, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.1406, "step": 5092 }, { "epoch": 0.11983529411764705, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.3503, "step": 5093 }, { "epoch": 0.11985882352941177, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1622, "step": 5094 }, { "epoch": 0.11988235294117647, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1092, "step": 5095 }, { "epoch": 0.11990588235294118, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.376, "step": 5096 }, { "epoch": 0.11992941176470588, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.2308, "step": 5097 }, { "epoch": 0.11995294117647058, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.9896, "step": 5098 }, { "epoch": 0.1199764705882353, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.1433, "step": 5099 }, { "epoch": 0.12, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3859, "step": 5100 }, { "epoch": 0.12002352941176471, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.3425, "step": 5101 }, { "epoch": 0.12004705882352941, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2871, "step": 5102 }, { "epoch": 0.12007058823529412, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1838, "step": 5103 }, { "epoch": 0.12009411764705882, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.0963, "step": 5104 }, { "epoch": 0.12011764705882352, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.3749, "step": 5105 }, { "epoch": 0.12014117647058824, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.012, "step": 5106 }, { "epoch": 0.12016470588235294, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4512, "step": 5107 }, { "epoch": 0.12018823529411765, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.5418, "step": 5108 }, { "epoch": 0.12021176470588235, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4481, "step": 5109 }, { "epoch": 0.12023529411764707, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1572, "step": 5110 }, { "epoch": 0.12025882352941177, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2004, "step": 5111 }, { "epoch": 0.12028235294117647, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 1.0206, "step": 5112 }, { "epoch": 0.12030588235294118, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.1011, "step": 5113 }, { "epoch": 0.12032941176470588, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3691, "step": 5114 }, { "epoch": 0.1203529411764706, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1361, "step": 5115 }, { "epoch": 0.1203764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.5173, "step": 5116 }, { "epoch": 0.1204, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.1105, "step": 5117 }, { "epoch": 0.12042352941176471, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.423, "step": 5118 }, { "epoch": 0.12044705882352941, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.3983, "step": 5119 }, { "epoch": 0.12047058823529412, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.3349, "step": 5120 }, { "epoch": 0.12049411764705882, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.271, "step": 5121 }, { "epoch": 0.12051764705882353, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.1631, "step": 5122 }, { "epoch": 0.12054117647058823, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.9336, "step": 5123 }, { "epoch": 0.12056470588235293, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.2167, "step": 5124 }, { "epoch": 0.12058823529411765, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0479, "step": 5125 }, { "epoch": 0.12061176470588235, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3492, "step": 5126 }, { "epoch": 0.12063529411764706, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.237, "step": 5127 }, { "epoch": 0.12065882352941176, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3247, "step": 5128 }, { "epoch": 0.12068235294117648, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.1049, "step": 5129 }, { "epoch": 0.12070588235294118, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.172, "step": 5130 }, { "epoch": 0.12072941176470588, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2485, "step": 5131 }, { "epoch": 0.12075294117647059, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.0128, "step": 5132 }, { "epoch": 0.12077647058823529, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.0727, "step": 5133 }, { "epoch": 0.1208, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3275, "step": 5134 }, { "epoch": 0.1208235294117647, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1625, "step": 5135 }, { "epoch": 0.12084705882352942, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2644, "step": 5136 }, { "epoch": 0.12087058823529412, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2693, "step": 5137 }, { "epoch": 0.12089411764705882, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3035, "step": 5138 }, { "epoch": 0.12091764705882353, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.9806, "step": 5139 }, { "epoch": 0.12094117647058823, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.0988, "step": 5140 }, { "epoch": 0.12096470588235295, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.2412, "step": 5141 }, { "epoch": 0.12098823529411765, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1291, "step": 5142 }, { "epoch": 0.12101176470588235, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.9404, "step": 5143 }, { "epoch": 0.12103529411764706, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.0731, "step": 5144 }, { "epoch": 0.12105882352941176, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1886, "step": 5145 }, { "epoch": 0.12108235294117647, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.16, "step": 5146 }, { "epoch": 0.12110588235294117, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1394, "step": 5147 }, { "epoch": 0.12112941176470589, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.9572, "step": 5148 }, { "epoch": 0.12115294117647059, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.502, "step": 5149 }, { "epoch": 0.12117647058823529, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.1676, "step": 5150 }, { "epoch": 0.1212, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4295, "step": 5151 }, { "epoch": 0.1212235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.5941, "step": 5152 }, { "epoch": 0.12124705882352942, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.3161, "step": 5153 }, { "epoch": 0.12127058823529412, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.359, "step": 5154 }, { "epoch": 0.12129411764705883, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.9044, "step": 5155 }, { "epoch": 0.12131764705882353, "grad_norm": 0.294921875, "learning_rate": 0.02, "loss": 0.8632, "step": 5156 }, { "epoch": 0.12134117647058823, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2829, "step": 5157 }, { "epoch": 0.12136470588235294, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3904, "step": 5158 }, { "epoch": 0.12138823529411764, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.4473, "step": 5159 }, { "epoch": 0.12141176470588236, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.163, "step": 5160 }, { "epoch": 0.12143529411764706, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2612, "step": 5161 }, { "epoch": 0.12145882352941176, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2805, "step": 5162 }, { "epoch": 0.12148235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.283, "step": 5163 }, { "epoch": 0.12150588235294117, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.4405, "step": 5164 }, { "epoch": 0.12152941176470589, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.0571, "step": 5165 }, { "epoch": 0.12155294117647059, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.2294, "step": 5166 }, { "epoch": 0.1215764705882353, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3353, "step": 5167 }, { "epoch": 0.1216, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3152, "step": 5168 }, { "epoch": 0.1216235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2312, "step": 5169 }, { "epoch": 0.12164705882352941, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.3393, "step": 5170 }, { "epoch": 0.12167058823529411, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3515, "step": 5171 }, { "epoch": 0.12169411764705883, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.1896, "step": 5172 }, { "epoch": 0.12171764705882353, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1475, "step": 5173 }, { "epoch": 0.12174117647058824, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.9786, "step": 5174 }, { "epoch": 0.12176470588235294, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.9838, "step": 5175 }, { "epoch": 0.12178823529411764, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.9764, "step": 5176 }, { "epoch": 0.12181176470588236, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.3073, "step": 5177 }, { "epoch": 0.12183529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.5472, "step": 5178 }, { "epoch": 0.12185882352941177, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.524, "step": 5179 }, { "epoch": 0.12188235294117647, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2591, "step": 5180 }, { "epoch": 0.12190588235294118, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.1609, "step": 5181 }, { "epoch": 0.12192941176470588, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2175, "step": 5182 }, { "epoch": 0.12195294117647058, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0747, "step": 5183 }, { "epoch": 0.1219764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.5241, "step": 5184 }, { "epoch": 0.122, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2949, "step": 5185 }, { "epoch": 0.12202352941176471, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.6126, "step": 5186 }, { "epoch": 0.12204705882352941, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2827, "step": 5187 }, { "epoch": 0.12207058823529411, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.3436, "step": 5188 }, { "epoch": 0.12209411764705883, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.2765, "step": 5189 }, { "epoch": 0.12211764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3521, "step": 5190 }, { "epoch": 0.12214117647058824, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.45, "step": 5191 }, { "epoch": 0.12216470588235294, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2283, "step": 5192 }, { "epoch": 0.12218823529411765, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.1028, "step": 5193 }, { "epoch": 0.12221176470588235, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2771, "step": 5194 }, { "epoch": 0.12223529411764705, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.2158, "step": 5195 }, { "epoch": 0.12225882352941177, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4634, "step": 5196 }, { "epoch": 0.12228235294117647, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0816, "step": 5197 }, { "epoch": 0.12230588235294118, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.463, "step": 5198 }, { "epoch": 0.12232941176470588, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.4577, "step": 5199 }, { "epoch": 0.1223529411764706, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.9513, "step": 5200 }, { "epoch": 0.1223764705882353, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1225, "step": 5201 }, { "epoch": 0.1224, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1865, "step": 5202 }, { "epoch": 0.12242352941176471, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.4553, "step": 5203 }, { "epoch": 0.12244705882352941, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3879, "step": 5204 }, { "epoch": 0.12247058823529412, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.2169, "step": 5205 }, { "epoch": 0.12249411764705882, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.9994, "step": 5206 }, { "epoch": 0.12251764705882352, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.0708, "step": 5207 }, { "epoch": 0.12254117647058824, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0624, "step": 5208 }, { "epoch": 0.12256470588235294, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3834, "step": 5209 }, { "epoch": 0.12258823529411765, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.0841, "step": 5210 }, { "epoch": 0.12261176470588235, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2584, "step": 5211 }, { "epoch": 0.12263529411764706, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2479, "step": 5212 }, { "epoch": 0.12265882352941176, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2214, "step": 5213 }, { "epoch": 0.12268235294117646, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4619, "step": 5214 }, { "epoch": 0.12270588235294118, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.1145, "step": 5215 }, { "epoch": 0.12272941176470588, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.3101, "step": 5216 }, { "epoch": 0.12275294117647059, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.3432, "step": 5217 }, { "epoch": 0.12277647058823529, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.0912, "step": 5218 }, { "epoch": 0.1228, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3248, "step": 5219 }, { "epoch": 0.1228235294117647, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2596, "step": 5220 }, { "epoch": 0.1228470588235294, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0491, "step": 5221 }, { "epoch": 0.12287058823529412, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.1721, "step": 5222 }, { "epoch": 0.12289411764705882, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.3598, "step": 5223 }, { "epoch": 0.12291764705882353, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.205, "step": 5224 }, { "epoch": 0.12294117647058823, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.4471, "step": 5225 }, { "epoch": 0.12296470588235293, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2942, "step": 5226 }, { "epoch": 0.12298823529411765, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.411, "step": 5227 }, { "epoch": 0.12301176470588235, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.5092, "step": 5228 }, { "epoch": 0.12303529411764706, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2326, "step": 5229 }, { "epoch": 0.12305882352941176, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.8436, "step": 5230 }, { "epoch": 0.12308235294117648, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.2015, "step": 5231 }, { "epoch": 0.12310588235294118, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.499, "step": 5232 }, { "epoch": 0.12312941176470588, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.2667, "step": 5233 }, { "epoch": 0.12315294117647059, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3079, "step": 5234 }, { "epoch": 0.12317647058823529, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.2218, "step": 5235 }, { "epoch": 0.1232, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.3326, "step": 5236 }, { "epoch": 0.1232235294117647, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2836, "step": 5237 }, { "epoch": 0.12324705882352942, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3853, "step": 5238 }, { "epoch": 0.12327058823529412, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1754, "step": 5239 }, { "epoch": 0.12329411764705882, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.1121, "step": 5240 }, { "epoch": 0.12331764705882353, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9751, "step": 5241 }, { "epoch": 0.12334117647058823, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2691, "step": 5242 }, { "epoch": 0.12336470588235295, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1899, "step": 5243 }, { "epoch": 0.12338823529411765, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.2897, "step": 5244 }, { "epoch": 0.12341176470588236, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3562, "step": 5245 }, { "epoch": 0.12343529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3918, "step": 5246 }, { "epoch": 0.12345882352941176, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2737, "step": 5247 }, { "epoch": 0.12348235294117647, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.4087, "step": 5248 }, { "epoch": 0.12350588235294117, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.9594, "step": 5249 }, { "epoch": 0.12352941176470589, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.1839, "step": 5250 }, { "epoch": 0.12355294117647059, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2881, "step": 5251 }, { "epoch": 0.12357647058823529, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3181, "step": 5252 }, { "epoch": 0.1236, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.0288, "step": 5253 }, { "epoch": 0.1236235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2764, "step": 5254 }, { "epoch": 0.12364705882352942, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1266, "step": 5255 }, { "epoch": 0.12367058823529412, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.2393, "step": 5256 }, { "epoch": 0.12369411764705883, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.3332, "step": 5257 }, { "epoch": 0.12371764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4464, "step": 5258 }, { "epoch": 0.12374117647058823, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0068, "step": 5259 }, { "epoch": 0.12376470588235294, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1999, "step": 5260 }, { "epoch": 0.12378823529411764, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3199, "step": 5261 }, { "epoch": 0.12381176470588236, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.6028, "step": 5262 }, { "epoch": 0.12383529411764706, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2001, "step": 5263 }, { "epoch": 0.12385882352941177, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2744, "step": 5264 }, { "epoch": 0.12388235294117647, "grad_norm": 0.2890625, "learning_rate": 0.02, "loss": 1.1838, "step": 5265 }, { "epoch": 0.12390588235294117, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.158, "step": 5266 }, { "epoch": 0.12392941176470589, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1931, "step": 5267 }, { "epoch": 0.12395294117647059, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0653, "step": 5268 }, { "epoch": 0.1239764705882353, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2986, "step": 5269 }, { "epoch": 0.124, "grad_norm": 0.3046875, "learning_rate": 0.02, "loss": 0.9983, "step": 5270 }, { "epoch": 0.1240235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.4704, "step": 5271 }, { "epoch": 0.12404705882352941, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0481, "step": 5272 }, { "epoch": 0.12407058823529411, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3353, "step": 5273 }, { "epoch": 0.12409411764705883, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.2492, "step": 5274 }, { "epoch": 0.12411764705882353, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.2538, "step": 5275 }, { "epoch": 0.12414117647058824, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2459, "step": 5276 }, { "epoch": 0.12416470588235294, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.1833, "step": 5277 }, { "epoch": 0.12418823529411764, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.4902, "step": 5278 }, { "epoch": 0.12421176470588235, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3297, "step": 5279 }, { "epoch": 0.12423529411764705, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.3526, "step": 5280 }, { "epoch": 0.12425882352941177, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.1667, "step": 5281 }, { "epoch": 0.12428235294117647, "grad_norm": 0.328125, "learning_rate": 0.02, "loss": 0.9828, "step": 5282 }, { "epoch": 0.12430588235294118, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.3181, "step": 5283 }, { "epoch": 0.12432941176470588, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.3456, "step": 5284 }, { "epoch": 0.12435294117647058, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.3489, "step": 5285 }, { "epoch": 0.1243764705882353, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.9643, "step": 5286 }, { "epoch": 0.1244, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.5592, "step": 5287 }, { "epoch": 0.12442352941176471, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2258, "step": 5288 }, { "epoch": 0.12444705882352941, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.3121, "step": 5289 }, { "epoch": 0.12447058823529412, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2818, "step": 5290 }, { "epoch": 0.12449411764705882, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.1195, "step": 5291 }, { "epoch": 0.12451764705882352, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 1.0531, "step": 5292 }, { "epoch": 0.12454117647058824, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.1663, "step": 5293 }, { "epoch": 0.12456470588235294, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.1577, "step": 5294 }, { "epoch": 0.12458823529411765, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.07, "step": 5295 }, { "epoch": 0.12461176470588235, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.3055, "step": 5296 }, { "epoch": 0.12463529411764705, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.5451, "step": 5297 }, { "epoch": 0.12465882352941177, "grad_norm": 0.291015625, "learning_rate": 0.02, "loss": 0.9829, "step": 5298 }, { "epoch": 0.12468235294117647, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3154, "step": 5299 }, { "epoch": 0.12470588235294118, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.1783, "step": 5300 }, { "epoch": 0.12472941176470588, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.439, "step": 5301 }, { "epoch": 0.1247529411764706, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.1329, "step": 5302 }, { "epoch": 0.1247764705882353, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.8392, "step": 5303 }, { "epoch": 0.1248, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 1.1631, "step": 5304 }, { "epoch": 0.12482352941176471, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2749, "step": 5305 }, { "epoch": 0.12484705882352941, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.3197, "step": 5306 }, { "epoch": 0.12487058823529412, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.961, "step": 5307 }, { "epoch": 0.12489411764705882, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.2203, "step": 5308 }, { "epoch": 0.12491764705882354, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.0807, "step": 5309 }, { "epoch": 0.12494117647058824, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4383, "step": 5310 }, { "epoch": 0.12496470588235294, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.0512, "step": 5311 }, { "epoch": 0.12498823529411765, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.6075, "step": 5312 }, { "epoch": 0.12501176470588235, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.1546, "step": 5313 }, { "epoch": 0.12503529411764705, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.502, "step": 5314 }, { "epoch": 0.12505882352941178, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2968, "step": 5315 }, { "epoch": 0.12508235294117648, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.1686, "step": 5316 }, { "epoch": 0.12510588235294118, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.1766, "step": 5317 }, { "epoch": 0.12512941176470588, "grad_norm": 0.28515625, "learning_rate": 0.02, "loss": 1.1343, "step": 5318 }, { "epoch": 0.12515294117647058, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.3529, "step": 5319 }, { "epoch": 0.1251764705882353, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2766, "step": 5320 }, { "epoch": 0.1252, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.318, "step": 5321 }, { "epoch": 0.1252235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3046, "step": 5322 }, { "epoch": 0.1252470588235294, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1758, "step": 5323 }, { "epoch": 0.1252705882352941, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.4344, "step": 5324 }, { "epoch": 0.12529411764705883, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1931, "step": 5325 }, { "epoch": 0.12531764705882353, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.0169, "step": 5326 }, { "epoch": 0.12534117647058823, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2584, "step": 5327 }, { "epoch": 0.12536470588235293, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3039, "step": 5328 }, { "epoch": 0.12538823529411763, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3582, "step": 5329 }, { "epoch": 0.12541176470588236, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.3237, "step": 5330 }, { "epoch": 0.12543529411764706, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1095, "step": 5331 }, { "epoch": 0.12545882352941176, "grad_norm": 0.310546875, "learning_rate": 0.02, "loss": 1.0651, "step": 5332 }, { "epoch": 0.12548235294117646, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2065, "step": 5333 }, { "epoch": 0.1255058823529412, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.8362, "step": 5334 }, { "epoch": 0.1255294117647059, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2577, "step": 5335 }, { "epoch": 0.1255529411764706, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5078, "step": 5336 }, { "epoch": 0.1255764705882353, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.1156, "step": 5337 }, { "epoch": 0.1256, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.4164, "step": 5338 }, { "epoch": 0.12562352941176472, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.3022, "step": 5339 }, { "epoch": 0.12564705882352942, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4748, "step": 5340 }, { "epoch": 0.12567058823529412, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1374, "step": 5341 }, { "epoch": 0.12569411764705882, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1461, "step": 5342 }, { "epoch": 0.12571764705882352, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.607, "step": 5343 }, { "epoch": 0.12574117647058825, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0173, "step": 5344 }, { "epoch": 0.12576470588235295, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.2252, "step": 5345 }, { "epoch": 0.12578823529411765, "grad_norm": 0.296875, "learning_rate": 0.02, "loss": 1.01, "step": 5346 }, { "epoch": 0.12581176470588235, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2146, "step": 5347 }, { "epoch": 0.12583529411764705, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0818, "step": 5348 }, { "epoch": 0.12585882352941177, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1135, "step": 5349 }, { "epoch": 0.12588235294117647, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.1362, "step": 5350 }, { "epoch": 0.12590588235294117, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.2045, "step": 5351 }, { "epoch": 0.12592941176470587, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.3792, "step": 5352 }, { "epoch": 0.1259529411764706, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2634, "step": 5353 }, { "epoch": 0.1259764705882353, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.1538, "step": 5354 }, { "epoch": 0.126, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.149, "step": 5355 }, { "epoch": 0.1260235294117647, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.3188, "step": 5356 }, { "epoch": 0.1260470588235294, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3908, "step": 5357 }, { "epoch": 0.12607058823529413, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.0795, "step": 5358 }, { "epoch": 0.12609411764705883, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.4172, "step": 5359 }, { "epoch": 0.12611764705882353, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.1204, "step": 5360 }, { "epoch": 0.12614117647058823, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.2062, "step": 5361 }, { "epoch": 0.12616470588235293, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2205, "step": 5362 }, { "epoch": 0.12618823529411766, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.3004, "step": 5363 }, { "epoch": 0.12621176470588236, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.4129, "step": 5364 }, { "epoch": 0.12623529411764706, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3171, "step": 5365 }, { "epoch": 0.12625882352941176, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4894, "step": 5366 }, { "epoch": 0.12628235294117648, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.5065, "step": 5367 }, { "epoch": 0.12630588235294118, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.5586, "step": 5368 }, { "epoch": 0.12632941176470588, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2644, "step": 5369 }, { "epoch": 0.12635294117647058, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.1167, "step": 5370 }, { "epoch": 0.12637647058823528, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3036, "step": 5371 }, { "epoch": 0.1264, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.2162, "step": 5372 }, { "epoch": 0.1264235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2742, "step": 5373 }, { "epoch": 0.1264470588235294, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2996, "step": 5374 }, { "epoch": 0.1264705882352941, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2839, "step": 5375 }, { "epoch": 0.1264941176470588, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.0503, "step": 5376 }, { "epoch": 0.12651764705882354, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.4807, "step": 5377 }, { "epoch": 0.12654117647058824, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1805, "step": 5378 }, { "epoch": 0.12656470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3304, "step": 5379 }, { "epoch": 0.12658823529411764, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.3996, "step": 5380 }, { "epoch": 0.12661176470588234, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3488, "step": 5381 }, { "epoch": 0.12663529411764707, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2633, "step": 5382 }, { "epoch": 0.12665882352941177, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2075, "step": 5383 }, { "epoch": 0.12668235294117647, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.4204, "step": 5384 }, { "epoch": 0.12670588235294117, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2721, "step": 5385 }, { "epoch": 0.1267294117647059, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2707, "step": 5386 }, { "epoch": 0.1267529411764706, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0953, "step": 5387 }, { "epoch": 0.1267764705882353, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.6061, "step": 5388 }, { "epoch": 0.1268, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1464, "step": 5389 }, { "epoch": 0.1268235294117647, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.302, "step": 5390 }, { "epoch": 0.12684705882352942, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.0943, "step": 5391 }, { "epoch": 0.12687058823529412, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.2166, "step": 5392 }, { "epoch": 0.12689411764705882, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 1.1236, "step": 5393 }, { "epoch": 0.12691764705882352, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.354, "step": 5394 }, { "epoch": 0.12694117647058822, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.2743, "step": 5395 }, { "epoch": 0.12696470588235295, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3578, "step": 5396 }, { "epoch": 0.12698823529411765, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.3118, "step": 5397 }, { "epoch": 0.12701176470588235, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.0721, "step": 5398 }, { "epoch": 0.12703529411764705, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.2588, "step": 5399 }, { "epoch": 0.12705882352941175, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0442, "step": 5400 }, { "epoch": 0.12708235294117648, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2988, "step": 5401 }, { "epoch": 0.12710588235294118, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4935, "step": 5402 }, { "epoch": 0.12712941176470588, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2903, "step": 5403 }, { "epoch": 0.12715294117647058, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.4698, "step": 5404 }, { "epoch": 0.1271764705882353, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.1783, "step": 5405 }, { "epoch": 0.1272, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.0249, "step": 5406 }, { "epoch": 0.1272235294117647, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1395, "step": 5407 }, { "epoch": 0.1272470588235294, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2614, "step": 5408 }, { "epoch": 0.1272705882352941, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1905, "step": 5409 }, { "epoch": 0.12729411764705884, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.2717, "step": 5410 }, { "epoch": 0.12731764705882354, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.1359, "step": 5411 }, { "epoch": 0.12734117647058824, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2035, "step": 5412 }, { "epoch": 0.12736470588235294, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2253, "step": 5413 }, { "epoch": 0.12738823529411764, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3765, "step": 5414 }, { "epoch": 0.12741176470588236, "grad_norm": 0.33984375, "learning_rate": 0.02, "loss": 0.9812, "step": 5415 }, { "epoch": 0.12743529411764706, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3994, "step": 5416 }, { "epoch": 0.12745882352941176, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2525, "step": 5417 }, { "epoch": 0.12748235294117646, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2304, "step": 5418 }, { "epoch": 0.12750588235294116, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.9666, "step": 5419 }, { "epoch": 0.1275294117647059, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.267, "step": 5420 }, { "epoch": 0.1275529411764706, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 1.2517, "step": 5421 }, { "epoch": 0.1275764705882353, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2511, "step": 5422 }, { "epoch": 0.1276, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1995, "step": 5423 }, { "epoch": 0.12762352941176472, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.452, "step": 5424 }, { "epoch": 0.12764705882352942, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2119, "step": 5425 }, { "epoch": 0.12767058823529412, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.085, "step": 5426 }, { "epoch": 0.12769411764705882, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.8983, "step": 5427 }, { "epoch": 0.12771764705882352, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2, "step": 5428 }, { "epoch": 0.12774117647058825, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.0822, "step": 5429 }, { "epoch": 0.12776470588235295, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2784, "step": 5430 }, { "epoch": 0.12778823529411765, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.0452, "step": 5431 }, { "epoch": 0.12781176470588235, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2624, "step": 5432 }, { "epoch": 0.12783529411764705, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2862, "step": 5433 }, { "epoch": 0.12785882352941177, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.9386, "step": 5434 }, { "epoch": 0.12788235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3692, "step": 5435 }, { "epoch": 0.12790588235294117, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.0929, "step": 5436 }, { "epoch": 0.12792941176470587, "grad_norm": 0.296875, "learning_rate": 0.02, "loss": 0.9816, "step": 5437 }, { "epoch": 0.12795294117647057, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.4385, "step": 5438 }, { "epoch": 0.1279764705882353, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3361, "step": 5439 }, { "epoch": 0.128, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1848, "step": 5440 }, { "epoch": 0.1280235294117647, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2168, "step": 5441 }, { "epoch": 0.1280470588235294, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.8653, "step": 5442 }, { "epoch": 0.12807058823529413, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.6001, "step": 5443 }, { "epoch": 0.12809411764705883, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.8564, "step": 5444 }, { "epoch": 0.12811764705882353, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.064, "step": 5445 }, { "epoch": 0.12814117647058823, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.016, "step": 5446 }, { "epoch": 0.12816470588235293, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.5564, "step": 5447 }, { "epoch": 0.12818823529411766, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3762, "step": 5448 }, { "epoch": 0.12821176470588236, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3134, "step": 5449 }, { "epoch": 0.12823529411764706, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.4463, "step": 5450 }, { "epoch": 0.12825882352941176, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2274, "step": 5451 }, { "epoch": 0.12828235294117646, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.2763, "step": 5452 }, { "epoch": 0.1283058823529412, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.4464, "step": 5453 }, { "epoch": 0.1283294117647059, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2647, "step": 5454 }, { "epoch": 0.1283529411764706, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2888, "step": 5455 }, { "epoch": 0.1283764705882353, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0924, "step": 5456 }, { "epoch": 0.1284, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.8313, "step": 5457 }, { "epoch": 0.12842352941176471, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.3737, "step": 5458 }, { "epoch": 0.12844705882352941, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.9644, "step": 5459 }, { "epoch": 0.12847058823529411, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2154, "step": 5460 }, { "epoch": 0.12849411764705881, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2433, "step": 5461 }, { "epoch": 0.12851764705882354, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3678, "step": 5462 }, { "epoch": 0.12854117647058824, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1425, "step": 5463 }, { "epoch": 0.12856470588235294, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3268, "step": 5464 }, { "epoch": 0.12858823529411764, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.221, "step": 5465 }, { "epoch": 0.12861176470588234, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2719, "step": 5466 }, { "epoch": 0.12863529411764707, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2447, "step": 5467 }, { "epoch": 0.12865882352941177, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.0692, "step": 5468 }, { "epoch": 0.12868235294117647, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.2137, "step": 5469 }, { "epoch": 0.12870588235294117, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.1239, "step": 5470 }, { "epoch": 0.12872941176470587, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.307, "step": 5471 }, { "epoch": 0.1287529411764706, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.3965, "step": 5472 }, { "epoch": 0.1287764705882353, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.3162, "step": 5473 }, { "epoch": 0.1288, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.0996, "step": 5474 }, { "epoch": 0.1288235294117647, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.1287, "step": 5475 }, { "epoch": 0.12884705882352943, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.2168, "step": 5476 }, { "epoch": 0.12887058823529413, "grad_norm": 0.29296875, "learning_rate": 0.02, "loss": 1.0129, "step": 5477 }, { "epoch": 0.12889411764705883, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3328, "step": 5478 }, { "epoch": 0.12891764705882353, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2225, "step": 5479 }, { "epoch": 0.12894117647058823, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.4812, "step": 5480 }, { "epoch": 0.12896470588235295, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2728, "step": 5481 }, { "epoch": 0.12898823529411765, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2932, "step": 5482 }, { "epoch": 0.12901176470588235, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2478, "step": 5483 }, { "epoch": 0.12903529411764705, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.3765, "step": 5484 }, { "epoch": 0.12905882352941175, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.1449, "step": 5485 }, { "epoch": 0.12908235294117648, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1551, "step": 5486 }, { "epoch": 0.12910588235294118, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1464, "step": 5487 }, { "epoch": 0.12912941176470588, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1908, "step": 5488 }, { "epoch": 0.12915294117647058, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.3021, "step": 5489 }, { "epoch": 0.12917647058823528, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.4339, "step": 5490 }, { "epoch": 0.1292, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1016, "step": 5491 }, { "epoch": 0.1292235294117647, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.1637, "step": 5492 }, { "epoch": 0.1292470588235294, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.0438, "step": 5493 }, { "epoch": 0.1292705882352941, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2533, "step": 5494 }, { "epoch": 0.12929411764705884, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1077, "step": 5495 }, { "epoch": 0.12931764705882354, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0613, "step": 5496 }, { "epoch": 0.12934117647058824, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2704, "step": 5497 }, { "epoch": 0.12936470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1788, "step": 5498 }, { "epoch": 0.12938823529411764, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.9519, "step": 5499 }, { "epoch": 0.12941176470588237, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.7766, "step": 5500 }, { "epoch": 0.12943529411764707, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.456, "step": 5501 }, { "epoch": 0.12945882352941177, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2663, "step": 5502 }, { "epoch": 0.12948235294117647, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3836, "step": 5503 }, { "epoch": 0.12950588235294117, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0913, "step": 5504 }, { "epoch": 0.1295294117647059, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1108, "step": 5505 }, { "epoch": 0.1295529411764706, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.1167, "step": 5506 }, { "epoch": 0.1295764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1765, "step": 5507 }, { "epoch": 0.1296, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2763, "step": 5508 }, { "epoch": 0.1296235294117647, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.156, "step": 5509 }, { "epoch": 0.12964705882352942, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.311, "step": 5510 }, { "epoch": 0.12967058823529412, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1209, "step": 5511 }, { "epoch": 0.12969411764705882, "grad_norm": 0.30859375, "learning_rate": 0.02, "loss": 0.6357, "step": 5512 }, { "epoch": 0.12971764705882352, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.4466, "step": 5513 }, { "epoch": 0.12974117647058825, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3218, "step": 5514 }, { "epoch": 0.12976470588235295, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3552, "step": 5515 }, { "epoch": 0.12978823529411765, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3083, "step": 5516 }, { "epoch": 0.12981176470588235, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1205, "step": 5517 }, { "epoch": 0.12983529411764705, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3599, "step": 5518 }, { "epoch": 0.12985882352941178, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.194, "step": 5519 }, { "epoch": 0.12988235294117648, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2703, "step": 5520 }, { "epoch": 0.12990588235294118, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2413, "step": 5521 }, { "epoch": 0.12992941176470588, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4571, "step": 5522 }, { "epoch": 0.12995294117647058, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.1979, "step": 5523 }, { "epoch": 0.1299764705882353, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0645, "step": 5524 }, { "epoch": 0.13, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.105, "step": 5525 }, { "epoch": 0.1300235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.5698, "step": 5526 }, { "epoch": 0.1300470588235294, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2058, "step": 5527 }, { "epoch": 0.1300705882352941, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1417, "step": 5528 }, { "epoch": 0.13009411764705883, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3075, "step": 5529 }, { "epoch": 0.13011764705882353, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.3593, "step": 5530 }, { "epoch": 0.13014117647058823, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1307, "step": 5531 }, { "epoch": 0.13016470588235293, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.0861, "step": 5532 }, { "epoch": 0.13018823529411766, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9956, "step": 5533 }, { "epoch": 0.13021176470588236, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2407, "step": 5534 }, { "epoch": 0.13023529411764706, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2945, "step": 5535 }, { "epoch": 0.13025882352941176, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1553, "step": 5536 }, { "epoch": 0.13028235294117646, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.197, "step": 5537 }, { "epoch": 0.1303058823529412, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.0782, "step": 5538 }, { "epoch": 0.1303294117647059, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.4038, "step": 5539 }, { "epoch": 0.1303529411764706, "grad_norm": 0.294921875, "learning_rate": 0.02, "loss": 1.0198, "step": 5540 }, { "epoch": 0.1303764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4384, "step": 5541 }, { "epoch": 0.1304, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.2902, "step": 5542 }, { "epoch": 0.13042352941176472, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.4787, "step": 5543 }, { "epoch": 0.13044705882352942, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.4065, "step": 5544 }, { "epoch": 0.13047058823529412, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2505, "step": 5545 }, { "epoch": 0.13049411764705882, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.3438, "step": 5546 }, { "epoch": 0.13051764705882352, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.1754, "step": 5547 }, { "epoch": 0.13054117647058824, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.2252, "step": 5548 }, { "epoch": 0.13056470588235294, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2835, "step": 5549 }, { "epoch": 0.13058823529411764, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3248, "step": 5550 }, { "epoch": 0.13061176470588234, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2128, "step": 5551 }, { "epoch": 0.13063529411764707, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0824, "step": 5552 }, { "epoch": 0.13065882352941177, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 1.0457, "step": 5553 }, { "epoch": 0.13068235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2992, "step": 5554 }, { "epoch": 0.13070588235294117, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2158, "step": 5555 }, { "epoch": 0.13072941176470587, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2488, "step": 5556 }, { "epoch": 0.1307529411764706, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.9084, "step": 5557 }, { "epoch": 0.1307764705882353, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2372, "step": 5558 }, { "epoch": 0.1308, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1184, "step": 5559 }, { "epoch": 0.1308235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1584, "step": 5560 }, { "epoch": 0.1308470588235294, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.285, "step": 5561 }, { "epoch": 0.13087058823529413, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.4107, "step": 5562 }, { "epoch": 0.13089411764705883, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.4522, "step": 5563 }, { "epoch": 0.13091764705882353, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 0.8484, "step": 5564 }, { "epoch": 0.13094117647058823, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2338, "step": 5565 }, { "epoch": 0.13096470588235293, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.096, "step": 5566 }, { "epoch": 0.13098823529411766, "grad_norm": 0.29296875, "learning_rate": 0.02, "loss": 0.981, "step": 5567 }, { "epoch": 0.13101176470588236, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3083, "step": 5568 }, { "epoch": 0.13103529411764706, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2807, "step": 5569 }, { "epoch": 0.13105882352941176, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3131, "step": 5570 }, { "epoch": 0.13108235294117648, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.3096, "step": 5571 }, { "epoch": 0.13110588235294118, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0212, "step": 5572 }, { "epoch": 0.13112941176470588, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3786, "step": 5573 }, { "epoch": 0.13115294117647058, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.0499, "step": 5574 }, { "epoch": 0.13117647058823528, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.4578, "step": 5575 }, { "epoch": 0.1312, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2703, "step": 5576 }, { "epoch": 0.1312235294117647, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0792, "step": 5577 }, { "epoch": 0.1312470588235294, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.3019, "step": 5578 }, { "epoch": 0.1312705882352941, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3593, "step": 5579 }, { "epoch": 0.1312941176470588, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2498, "step": 5580 }, { "epoch": 0.13131764705882354, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.0325, "step": 5581 }, { "epoch": 0.13134117647058824, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.1798, "step": 5582 }, { "epoch": 0.13136470588235294, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.3262, "step": 5583 }, { "epoch": 0.13138823529411764, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2424, "step": 5584 }, { "epoch": 0.13141176470588237, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.0609, "step": 5585 }, { "epoch": 0.13143529411764707, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.1952, "step": 5586 }, { "epoch": 0.13145882352941177, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3507, "step": 5587 }, { "epoch": 0.13148235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1183, "step": 5588 }, { "epoch": 0.13150588235294117, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.4356, "step": 5589 }, { "epoch": 0.1315294117647059, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.993, "step": 5590 }, { "epoch": 0.1315529411764706, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2432, "step": 5591 }, { "epoch": 0.1315764705882353, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.4725, "step": 5592 }, { "epoch": 0.1316, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.243, "step": 5593 }, { "epoch": 0.1316235294117647, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.173, "step": 5594 }, { "epoch": 0.13164705882352942, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.139, "step": 5595 }, { "epoch": 0.13167058823529412, "grad_norm": 0.287109375, "learning_rate": 0.02, "loss": 0.9287, "step": 5596 }, { "epoch": 0.13169411764705882, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3018, "step": 5597 }, { "epoch": 0.13171764705882352, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.1727, "step": 5598 }, { "epoch": 0.13174117647058822, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.4797, "step": 5599 }, { "epoch": 0.13176470588235295, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.2318, "step": 5600 }, { "epoch": 0.13178823529411765, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.9513, "step": 5601 }, { "epoch": 0.13181176470588235, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.2822, "step": 5602 }, { "epoch": 0.13183529411764705, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.9552, "step": 5603 }, { "epoch": 0.13185882352941178, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3519, "step": 5604 }, { "epoch": 0.13188235294117648, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.3852, "step": 5605 }, { "epoch": 0.13190588235294118, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.2721, "step": 5606 }, { "epoch": 0.13192941176470588, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.2059, "step": 5607 }, { "epoch": 0.13195294117647058, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.2395, "step": 5608 }, { "epoch": 0.1319764705882353, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3762, "step": 5609 }, { "epoch": 0.132, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.5343, "step": 5610 }, { "epoch": 0.1320235294117647, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.123, "step": 5611 }, { "epoch": 0.1320470588235294, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.0211, "step": 5612 }, { "epoch": 0.1320705882352941, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.2934, "step": 5613 }, { "epoch": 0.13209411764705883, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.3416, "step": 5614 }, { "epoch": 0.13211764705882353, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.1831, "step": 5615 }, { "epoch": 0.13214117647058823, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1526, "step": 5616 }, { "epoch": 0.13216470588235293, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1571, "step": 5617 }, { "epoch": 0.13218823529411763, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.9325, "step": 5618 }, { "epoch": 0.13221176470588236, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4921, "step": 5619 }, { "epoch": 0.13223529411764706, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9569, "step": 5620 }, { "epoch": 0.13225882352941176, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.9134, "step": 5621 }, { "epoch": 0.13228235294117646, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.3364, "step": 5622 }, { "epoch": 0.1323058823529412, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1489, "step": 5623 }, { "epoch": 0.1323294117647059, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.2364, "step": 5624 }, { "epoch": 0.1323529411764706, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2932, "step": 5625 }, { "epoch": 0.1323764705882353, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1351, "step": 5626 }, { "epoch": 0.1324, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.2472, "step": 5627 }, { "epoch": 0.13242352941176472, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.2022, "step": 5628 }, { "epoch": 0.13244705882352942, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 1.1136, "step": 5629 }, { "epoch": 0.13247058823529412, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.0473, "step": 5630 }, { "epoch": 0.13249411764705882, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.4675, "step": 5631 }, { "epoch": 0.13251764705882352, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.401, "step": 5632 }, { "epoch": 0.13254117647058825, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.3069, "step": 5633 }, { "epoch": 0.13256470588235295, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3807, "step": 5634 }, { "epoch": 0.13258823529411765, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4643, "step": 5635 }, { "epoch": 0.13261176470588235, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1391, "step": 5636 }, { "epoch": 0.13263529411764705, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.0613, "step": 5637 }, { "epoch": 0.13265882352941177, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3741, "step": 5638 }, { "epoch": 0.13268235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2687, "step": 5639 }, { "epoch": 0.13270588235294117, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3832, "step": 5640 }, { "epoch": 0.13272941176470587, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3622, "step": 5641 }, { "epoch": 0.1327529411764706, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0984, "step": 5642 }, { "epoch": 0.1327764705882353, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1077, "step": 5643 }, { "epoch": 0.1328, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2254, "step": 5644 }, { "epoch": 0.1328235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1, "step": 5645 }, { "epoch": 0.1328470588235294, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.2861, "step": 5646 }, { "epoch": 0.13287058823529413, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.858, "step": 5647 }, { "epoch": 0.13289411764705883, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.3019, "step": 5648 }, { "epoch": 0.13291764705882353, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1919, "step": 5649 }, { "epoch": 0.13294117647058823, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2885, "step": 5650 }, { "epoch": 0.13296470588235293, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3508, "step": 5651 }, { "epoch": 0.13298823529411766, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3111, "step": 5652 }, { "epoch": 0.13301176470588236, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.101, "step": 5653 }, { "epoch": 0.13303529411764706, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.3274, "step": 5654 }, { "epoch": 0.13305882352941176, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0133, "step": 5655 }, { "epoch": 0.13308235294117646, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3864, "step": 5656 }, { "epoch": 0.13310588235294118, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4322, "step": 5657 }, { "epoch": 0.13312941176470589, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0785, "step": 5658 }, { "epoch": 0.13315294117647059, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.387, "step": 5659 }, { "epoch": 0.13317647058823529, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1107, "step": 5660 }, { "epoch": 0.1332, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.53, "step": 5661 }, { "epoch": 0.1332235294117647, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.168, "step": 5662 }, { "epoch": 0.1332470588235294, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2857, "step": 5663 }, { "epoch": 0.1332705882352941, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0653, "step": 5664 }, { "epoch": 0.1332941176470588, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0472, "step": 5665 }, { "epoch": 0.13331764705882354, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.9358, "step": 5666 }, { "epoch": 0.13334117647058824, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.2589, "step": 5667 }, { "epoch": 0.13336470588235294, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2203, "step": 5668 }, { "epoch": 0.13338823529411764, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.0501, "step": 5669 }, { "epoch": 0.13341176470588234, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1873, "step": 5670 }, { "epoch": 0.13343529411764707, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.1769, "step": 5671 }, { "epoch": 0.13345882352941177, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1445, "step": 5672 }, { "epoch": 0.13348235294117647, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2194, "step": 5673 }, { "epoch": 0.13350588235294117, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3068, "step": 5674 }, { "epoch": 0.13352941176470587, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0122, "step": 5675 }, { "epoch": 0.1335529411764706, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.351, "step": 5676 }, { "epoch": 0.1335764705882353, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.0737, "step": 5677 }, { "epoch": 0.1336, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1246, "step": 5678 }, { "epoch": 0.1336235294117647, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.0751, "step": 5679 }, { "epoch": 0.13364705882352942, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 0.907, "step": 5680 }, { "epoch": 0.13367058823529412, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2501, "step": 5681 }, { "epoch": 0.13369411764705882, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4822, "step": 5682 }, { "epoch": 0.13371764705882352, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.6988, "step": 5683 }, { "epoch": 0.13374117647058822, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3112, "step": 5684 }, { "epoch": 0.13376470588235295, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.3201, "step": 5685 }, { "epoch": 0.13378823529411765, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2664, "step": 5686 }, { "epoch": 0.13381176470588235, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1641, "step": 5687 }, { "epoch": 0.13383529411764705, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1731, "step": 5688 }, { "epoch": 0.13385882352941175, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2111, "step": 5689 }, { "epoch": 0.13388235294117648, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1826, "step": 5690 }, { "epoch": 0.13390588235294118, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2382, "step": 5691 }, { "epoch": 0.13392941176470588, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2541, "step": 5692 }, { "epoch": 0.13395294117647058, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.0247, "step": 5693 }, { "epoch": 0.1339764705882353, "grad_norm": 0.28515625, "learning_rate": 0.02, "loss": 1.0261, "step": 5694 }, { "epoch": 0.134, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2083, "step": 5695 }, { "epoch": 0.1340235294117647, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2646, "step": 5696 }, { "epoch": 0.1340470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.4552, "step": 5697 }, { "epoch": 0.1340705882352941, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2084, "step": 5698 }, { "epoch": 0.13409411764705884, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.4268, "step": 5699 }, { "epoch": 0.13411764705882354, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1019, "step": 5700 }, { "epoch": 0.13414117647058824, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2838, "step": 5701 }, { "epoch": 0.13416470588235294, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3518, "step": 5702 }, { "epoch": 0.13418823529411764, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1067, "step": 5703 }, { "epoch": 0.13421176470588236, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.5227, "step": 5704 }, { "epoch": 0.13423529411764706, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2473, "step": 5705 }, { "epoch": 0.13425882352941176, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.288, "step": 5706 }, { "epoch": 0.13428235294117646, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2217, "step": 5707 }, { "epoch": 0.13430588235294116, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.9589, "step": 5708 }, { "epoch": 0.1343294117647059, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1925, "step": 5709 }, { "epoch": 0.1343529411764706, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.1981, "step": 5710 }, { "epoch": 0.1343764705882353, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.063, "step": 5711 }, { "epoch": 0.1344, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3901, "step": 5712 }, { "epoch": 0.13442352941176472, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.403, "step": 5713 }, { "epoch": 0.13444705882352942, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.0468, "step": 5714 }, { "epoch": 0.13447058823529412, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1245, "step": 5715 }, { "epoch": 0.13449411764705882, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3645, "step": 5716 }, { "epoch": 0.13451764705882352, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.1853, "step": 5717 }, { "epoch": 0.13454117647058825, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3311, "step": 5718 }, { "epoch": 0.13456470588235295, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.7808, "step": 5719 }, { "epoch": 0.13458823529411765, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2523, "step": 5720 }, { "epoch": 0.13461176470588235, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3962, "step": 5721 }, { "epoch": 0.13463529411764705, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2229, "step": 5722 }, { "epoch": 0.13465882352941178, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3343, "step": 5723 }, { "epoch": 0.13468235294117648, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2531, "step": 5724 }, { "epoch": 0.13470588235294118, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.1364, "step": 5725 }, { "epoch": 0.13472941176470588, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.2499, "step": 5726 }, { "epoch": 0.13475294117647058, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.2106, "step": 5727 }, { "epoch": 0.1347764705882353, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0065, "step": 5728 }, { "epoch": 0.1348, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.312, "step": 5729 }, { "epoch": 0.1348235294117647, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1785, "step": 5730 }, { "epoch": 0.1348470588235294, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.2172, "step": 5731 }, { "epoch": 0.13487058823529413, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2066, "step": 5732 }, { "epoch": 0.13489411764705883, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.2815, "step": 5733 }, { "epoch": 0.13491764705882353, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.0892, "step": 5734 }, { "epoch": 0.13494117647058823, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1115, "step": 5735 }, { "epoch": 0.13496470588235293, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2286, "step": 5736 }, { "epoch": 0.13498823529411766, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.9697, "step": 5737 }, { "epoch": 0.13501176470588236, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.208, "step": 5738 }, { "epoch": 0.13503529411764706, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.8912, "step": 5739 }, { "epoch": 0.13505882352941176, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.0325, "step": 5740 }, { "epoch": 0.13508235294117646, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.1692, "step": 5741 }, { "epoch": 0.1351058823529412, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3344, "step": 5742 }, { "epoch": 0.1351294117647059, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4497, "step": 5743 }, { "epoch": 0.1351529411764706, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 1.1317, "step": 5744 }, { "epoch": 0.1351764705882353, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1176, "step": 5745 }, { "epoch": 0.1352, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1314, "step": 5746 }, { "epoch": 0.13522352941176471, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.1106, "step": 5747 }, { "epoch": 0.13524705882352941, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1823, "step": 5748 }, { "epoch": 0.13527058823529411, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1754, "step": 5749 }, { "epoch": 0.13529411764705881, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0559, "step": 5750 }, { "epoch": 0.13531764705882354, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2868, "step": 5751 }, { "epoch": 0.13534117647058824, "grad_norm": 0.291015625, "learning_rate": 0.02, "loss": 1.0652, "step": 5752 }, { "epoch": 0.13536470588235294, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1839, "step": 5753 }, { "epoch": 0.13538823529411764, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.8123, "step": 5754 }, { "epoch": 0.13541176470588234, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2007, "step": 5755 }, { "epoch": 0.13543529411764707, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1415, "step": 5756 }, { "epoch": 0.13545882352941177, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4848, "step": 5757 }, { "epoch": 0.13548235294117647, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.105, "step": 5758 }, { "epoch": 0.13550588235294117, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3465, "step": 5759 }, { "epoch": 0.13552941176470587, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.1321, "step": 5760 }, { "epoch": 0.1355529411764706, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2134, "step": 5761 }, { "epoch": 0.1355764705882353, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.07, "step": 5762 }, { "epoch": 0.1356, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.9904, "step": 5763 }, { "epoch": 0.1356235294117647, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.005, "step": 5764 }, { "epoch": 0.1356470588235294, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1834, "step": 5765 }, { "epoch": 0.13567058823529413, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9786, "step": 5766 }, { "epoch": 0.13569411764705883, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9635, "step": 5767 }, { "epoch": 0.13571764705882353, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.0908, "step": 5768 }, { "epoch": 0.13574117647058823, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.4056, "step": 5769 }, { "epoch": 0.13576470588235295, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.3464, "step": 5770 }, { "epoch": 0.13578823529411765, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.0282, "step": 5771 }, { "epoch": 0.13581176470588235, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.3307, "step": 5772 }, { "epoch": 0.13583529411764705, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.9673, "step": 5773 }, { "epoch": 0.13585882352941175, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.9651, "step": 5774 }, { "epoch": 0.13588235294117648, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.917, "step": 5775 }, { "epoch": 0.13590588235294118, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0536, "step": 5776 }, { "epoch": 0.13592941176470588, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2208, "step": 5777 }, { "epoch": 0.13595294117647058, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.3335, "step": 5778 }, { "epoch": 0.13597647058823528, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.3057, "step": 5779 }, { "epoch": 0.136, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.2967, "step": 5780 }, { "epoch": 0.1360235294117647, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0242, "step": 5781 }, { "epoch": 0.1360470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.3554, "step": 5782 }, { "epoch": 0.1360705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.4438, "step": 5783 }, { "epoch": 0.1360941176470588, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2907, "step": 5784 }, { "epoch": 0.13611764705882354, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.9465, "step": 5785 }, { "epoch": 0.13614117647058824, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.2307, "step": 5786 }, { "epoch": 0.13616470588235294, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4145, "step": 5787 }, { "epoch": 0.13618823529411764, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.2797, "step": 5788 }, { "epoch": 0.13621176470588237, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.1442, "step": 5789 }, { "epoch": 0.13623529411764707, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.0611, "step": 5790 }, { "epoch": 0.13625882352941177, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1738, "step": 5791 }, { "epoch": 0.13628235294117647, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.2303, "step": 5792 }, { "epoch": 0.13630588235294117, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.3023, "step": 5793 }, { "epoch": 0.1363294117647059, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.1626, "step": 5794 }, { "epoch": 0.1363529411764706, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3158, "step": 5795 }, { "epoch": 0.1363764705882353, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1535, "step": 5796 }, { "epoch": 0.1364, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1435, "step": 5797 }, { "epoch": 0.1364235294117647, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1771, "step": 5798 }, { "epoch": 0.13644705882352942, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2519, "step": 5799 }, { "epoch": 0.13647058823529412, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.2149, "step": 5800 }, { "epoch": 0.13649411764705882, "grad_norm": 0.287109375, "learning_rate": 0.02, "loss": 0.8256, "step": 5801 }, { "epoch": 0.13651764705882352, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.2096, "step": 5802 }, { "epoch": 0.13654117647058825, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.2719, "step": 5803 }, { "epoch": 0.13656470588235295, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.2058, "step": 5804 }, { "epoch": 0.13658823529411765, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.9896, "step": 5805 }, { "epoch": 0.13661176470588235, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.5795, "step": 5806 }, { "epoch": 0.13663529411764705, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.0162, "step": 5807 }, { "epoch": 0.13665882352941178, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4632, "step": 5808 }, { "epoch": 0.13668235294117648, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1348, "step": 5809 }, { "epoch": 0.13670588235294118, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3849, "step": 5810 }, { "epoch": 0.13672941176470588, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.4714, "step": 5811 }, { "epoch": 0.13675294117647058, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1689, "step": 5812 }, { "epoch": 0.1367764705882353, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3837, "step": 5813 }, { "epoch": 0.1368, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2594, "step": 5814 }, { "epoch": 0.1368235294117647, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3215, "step": 5815 }, { "epoch": 0.1368470588235294, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.0772, "step": 5816 }, { "epoch": 0.1368705882352941, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 1.0778, "step": 5817 }, { "epoch": 0.13689411764705883, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0438, "step": 5818 }, { "epoch": 0.13691764705882353, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.1369, "step": 5819 }, { "epoch": 0.13694117647058823, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.2283, "step": 5820 }, { "epoch": 0.13696470588235293, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2801, "step": 5821 }, { "epoch": 0.13698823529411766, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 1.1139, "step": 5822 }, { "epoch": 0.13701176470588236, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.0831, "step": 5823 }, { "epoch": 0.13703529411764706, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.8397, "step": 5824 }, { "epoch": 0.13705882352941176, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2546, "step": 5825 }, { "epoch": 0.13708235294117646, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.194, "step": 5826 }, { "epoch": 0.1371058823529412, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.0918, "step": 5827 }, { "epoch": 0.1371294117647059, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2322, "step": 5828 }, { "epoch": 0.1371529411764706, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4063, "step": 5829 }, { "epoch": 0.1371764705882353, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.2542, "step": 5830 }, { "epoch": 0.1372, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.0264, "step": 5831 }, { "epoch": 0.13722352941176472, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.0302, "step": 5832 }, { "epoch": 0.13724705882352942, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.8846, "step": 5833 }, { "epoch": 0.13727058823529412, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.1416, "step": 5834 }, { "epoch": 0.13729411764705882, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2598, "step": 5835 }, { "epoch": 0.13731764705882352, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1479, "step": 5836 }, { "epoch": 0.13734117647058824, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.097, "step": 5837 }, { "epoch": 0.13736470588235294, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.818, "step": 5838 }, { "epoch": 0.13738823529411764, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2454, "step": 5839 }, { "epoch": 0.13741176470588234, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.0606, "step": 5840 }, { "epoch": 0.13743529411764707, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.2163, "step": 5841 }, { "epoch": 0.13745882352941177, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4938, "step": 5842 }, { "epoch": 0.13748235294117647, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.0047, "step": 5843 }, { "epoch": 0.13750588235294117, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1951, "step": 5844 }, { "epoch": 0.13752941176470587, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1741, "step": 5845 }, { "epoch": 0.1375529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2134, "step": 5846 }, { "epoch": 0.1375764705882353, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2335, "step": 5847 }, { "epoch": 0.1376, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.3507, "step": 5848 }, { "epoch": 0.1376235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.4746, "step": 5849 }, { "epoch": 0.1376470588235294, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1843, "step": 5850 }, { "epoch": 0.13767058823529413, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2831, "step": 5851 }, { "epoch": 0.13769411764705883, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1039, "step": 5852 }, { "epoch": 0.13771764705882353, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.123, "step": 5853 }, { "epoch": 0.13774117647058823, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3135, "step": 5854 }, { "epoch": 0.13776470588235293, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3037, "step": 5855 }, { "epoch": 0.13778823529411766, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.8553, "step": 5856 }, { "epoch": 0.13781176470588236, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3302, "step": 5857 }, { "epoch": 0.13783529411764706, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.965, "step": 5858 }, { "epoch": 0.13785882352941176, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.501, "step": 5859 }, { "epoch": 0.13788235294117648, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3338, "step": 5860 }, { "epoch": 0.13790588235294118, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 0.9933, "step": 5861 }, { "epoch": 0.13792941176470588, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1316, "step": 5862 }, { "epoch": 0.13795294117647058, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2656, "step": 5863 }, { "epoch": 0.13797647058823528, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2634, "step": 5864 }, { "epoch": 0.138, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2082, "step": 5865 }, { "epoch": 0.1380235294117647, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.9912, "step": 5866 }, { "epoch": 0.1380470588235294, "grad_norm": 0.28515625, "learning_rate": 0.02, "loss": 0.8676, "step": 5867 }, { "epoch": 0.1380705882352941, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.8849, "step": 5868 }, { "epoch": 0.1380941176470588, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2814, "step": 5869 }, { "epoch": 0.13811764705882354, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.9895, "step": 5870 }, { "epoch": 0.13814117647058824, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.4009, "step": 5871 }, { "epoch": 0.13816470588235294, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.0541, "step": 5872 }, { "epoch": 0.13818823529411764, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.3017, "step": 5873 }, { "epoch": 0.13821176470588234, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4144, "step": 5874 }, { "epoch": 0.13823529411764707, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2958, "step": 5875 }, { "epoch": 0.13825882352941177, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2372, "step": 5876 }, { "epoch": 0.13828235294117647, "grad_norm": 0.291015625, "learning_rate": 0.02, "loss": 0.9008, "step": 5877 }, { "epoch": 0.13830588235294117, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.4399, "step": 5878 }, { "epoch": 0.1383294117647059, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.045, "step": 5879 }, { "epoch": 0.1383529411764706, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.2177, "step": 5880 }, { "epoch": 0.1383764705882353, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2109, "step": 5881 }, { "epoch": 0.1384, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0499, "step": 5882 }, { "epoch": 0.1384235294117647, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.1738, "step": 5883 }, { "epoch": 0.13844705882352942, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.2028, "step": 5884 }, { "epoch": 0.13847058823529412, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3131, "step": 5885 }, { "epoch": 0.13849411764705882, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4604, "step": 5886 }, { "epoch": 0.13851764705882352, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2206, "step": 5887 }, { "epoch": 0.13854117647058822, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0338, "step": 5888 }, { "epoch": 0.13856470588235295, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.0911, "step": 5889 }, { "epoch": 0.13858823529411765, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2785, "step": 5890 }, { "epoch": 0.13861176470588235, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2574, "step": 5891 }, { "epoch": 0.13863529411764705, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2808, "step": 5892 }, { "epoch": 0.13865882352941175, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4669, "step": 5893 }, { "epoch": 0.13868235294117648, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.9762, "step": 5894 }, { "epoch": 0.13870588235294118, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1151, "step": 5895 }, { "epoch": 0.13872941176470588, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2466, "step": 5896 }, { "epoch": 0.13875294117647058, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.0197, "step": 5897 }, { "epoch": 0.1387764705882353, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.9393, "step": 5898 }, { "epoch": 0.1388, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.191, "step": 5899 }, { "epoch": 0.1388235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3126, "step": 5900 }, { "epoch": 0.1388470588235294, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3388, "step": 5901 }, { "epoch": 0.1388705882352941, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0266, "step": 5902 }, { "epoch": 0.13889411764705883, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.1732, "step": 5903 }, { "epoch": 0.13891764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.0944, "step": 5904 }, { "epoch": 0.13894117647058823, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1503, "step": 5905 }, { "epoch": 0.13896470588235293, "grad_norm": 0.302734375, "learning_rate": 0.02, "loss": 0.8935, "step": 5906 }, { "epoch": 0.13898823529411763, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2306, "step": 5907 }, { "epoch": 0.13901176470588236, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.1543, "step": 5908 }, { "epoch": 0.13903529411764706, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.4487, "step": 5909 }, { "epoch": 0.13905882352941176, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 1.077, "step": 5910 }, { "epoch": 0.13908235294117646, "grad_norm": 0.28515625, "learning_rate": 0.02, "loss": 1.1721, "step": 5911 }, { "epoch": 0.1391058823529412, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.2536, "step": 5912 }, { "epoch": 0.1391294117647059, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.1938, "step": 5913 }, { "epoch": 0.1391529411764706, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.0138, "step": 5914 }, { "epoch": 0.1391764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3553, "step": 5915 }, { "epoch": 0.1392, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.9105, "step": 5916 }, { "epoch": 0.13922352941176472, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1367, "step": 5917 }, { "epoch": 0.13924705882352942, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.998, "step": 5918 }, { "epoch": 0.13927058823529412, "grad_norm": 0.28515625, "learning_rate": 0.02, "loss": 0.8598, "step": 5919 }, { "epoch": 0.13929411764705882, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.275, "step": 5920 }, { "epoch": 0.13931764705882352, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.0371, "step": 5921 }, { "epoch": 0.13934117647058825, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4162, "step": 5922 }, { "epoch": 0.13936470588235295, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.027, "step": 5923 }, { "epoch": 0.13938823529411765, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3936, "step": 5924 }, { "epoch": 0.13941176470588235, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.3843, "step": 5925 }, { "epoch": 0.13943529411764705, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1649, "step": 5926 }, { "epoch": 0.13945882352941177, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.2523, "step": 5927 }, { "epoch": 0.13948235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2297, "step": 5928 }, { "epoch": 0.13950588235294117, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.1108, "step": 5929 }, { "epoch": 0.13952941176470587, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1478, "step": 5930 }, { "epoch": 0.1395529411764706, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.1111, "step": 5931 }, { "epoch": 0.1395764705882353, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.2302, "step": 5932 }, { "epoch": 0.1396, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.0175, "step": 5933 }, { "epoch": 0.1396235294117647, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1553, "step": 5934 }, { "epoch": 0.1396470588235294, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1449, "step": 5935 }, { "epoch": 0.13967058823529413, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.2609, "step": 5936 }, { "epoch": 0.13969411764705883, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1984, "step": 5937 }, { "epoch": 0.13971764705882353, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.8172, "step": 5938 }, { "epoch": 0.13974117647058823, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.0526, "step": 5939 }, { "epoch": 0.13976470588235293, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.4027, "step": 5940 }, { "epoch": 0.13978823529411766, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2213, "step": 5941 }, { "epoch": 0.13981176470588236, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2573, "step": 5942 }, { "epoch": 0.13983529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4724, "step": 5943 }, { "epoch": 0.13985882352941176, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1576, "step": 5944 }, { "epoch": 0.13988235294117646, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.5667, "step": 5945 }, { "epoch": 0.13990588235294119, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1533, "step": 5946 }, { "epoch": 0.13992941176470589, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2866, "step": 5947 }, { "epoch": 0.13995294117647059, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.9699, "step": 5948 }, { "epoch": 0.13997647058823529, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1808, "step": 5949 }, { "epoch": 0.14, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2977, "step": 5950 }, { "epoch": 0.1400235294117647, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.0474, "step": 5951 }, { "epoch": 0.1400470588235294, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2169, "step": 5952 }, { "epoch": 0.1400705882352941, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.0253, "step": 5953 }, { "epoch": 0.1400941176470588, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2976, "step": 5954 }, { "epoch": 0.14011764705882354, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.2444, "step": 5955 }, { "epoch": 0.14014117647058824, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2468, "step": 5956 }, { "epoch": 0.14016470588235294, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3177, "step": 5957 }, { "epoch": 0.14018823529411764, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1987, "step": 5958 }, { "epoch": 0.14021176470588234, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.2379, "step": 5959 }, { "epoch": 0.14023529411764707, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2272, "step": 5960 }, { "epoch": 0.14025882352941177, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2351, "step": 5961 }, { "epoch": 0.14028235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0522, "step": 5962 }, { "epoch": 0.14030588235294117, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4882, "step": 5963 }, { "epoch": 0.14032941176470587, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2134, "step": 5964 }, { "epoch": 0.1403529411764706, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2531, "step": 5965 }, { "epoch": 0.1403764705882353, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.108, "step": 5966 }, { "epoch": 0.1404, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0431, "step": 5967 }, { "epoch": 0.1404235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1852, "step": 5968 }, { "epoch": 0.14044705882352942, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.1335, "step": 5969 }, { "epoch": 0.14047058823529412, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.1374, "step": 5970 }, { "epoch": 0.14049411764705882, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2469, "step": 5971 }, { "epoch": 0.14051764705882352, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.9478, "step": 5972 }, { "epoch": 0.14054117647058822, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.8063, "step": 5973 }, { "epoch": 0.14056470588235295, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.5331, "step": 5974 }, { "epoch": 0.14058823529411765, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3015, "step": 5975 }, { "epoch": 0.14061176470588235, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1005, "step": 5976 }, { "epoch": 0.14063529411764705, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2829, "step": 5977 }, { "epoch": 0.14065882352941175, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1581, "step": 5978 }, { "epoch": 0.14068235294117648, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4231, "step": 5979 }, { "epoch": 0.14070588235294118, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2459, "step": 5980 }, { "epoch": 0.14072941176470588, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4044, "step": 5981 }, { "epoch": 0.14075294117647058, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.3992, "step": 5982 }, { "epoch": 0.14077647058823528, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1157, "step": 5983 }, { "epoch": 0.1408, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.0807, "step": 5984 }, { "epoch": 0.1408235294117647, "grad_norm": 0.30078125, "learning_rate": 0.02, "loss": 0.6182, "step": 5985 }, { "epoch": 0.1408470588235294, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3203, "step": 5986 }, { "epoch": 0.1408705882352941, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.3039, "step": 5987 }, { "epoch": 0.14089411764705884, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.4387, "step": 5988 }, { "epoch": 0.14091764705882354, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.0614, "step": 5989 }, { "epoch": 0.14094117647058824, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.3242, "step": 5990 }, { "epoch": 0.14096470588235294, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.118, "step": 5991 }, { "epoch": 0.14098823529411764, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2449, "step": 5992 }, { "epoch": 0.14101176470588236, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1576, "step": 5993 }, { "epoch": 0.14103529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4822, "step": 5994 }, { "epoch": 0.14105882352941176, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1714, "step": 5995 }, { "epoch": 0.14108235294117646, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1963, "step": 5996 }, { "epoch": 0.14110588235294116, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 0.6587, "step": 5997 }, { "epoch": 0.1411294117647059, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2898, "step": 5998 }, { "epoch": 0.1411529411764706, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1491, "step": 5999 }, { "epoch": 0.1411764705882353, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9342, "step": 6000 }, { "epoch": 0.1411764705882353, "eval_loss": 2.2358450889587402, "eval_runtime": 681.3738, "eval_samples_per_second": 12.475, "eval_steps_per_second": 3.119, "step": 6000 }, { "epoch": 0.1412, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2199, "step": 6001 }, { "epoch": 0.1412235294117647, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9824, "step": 6002 }, { "epoch": 0.14124705882352942, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.3711, "step": 6003 }, { "epoch": 0.14127058823529412, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.9074, "step": 6004 }, { "epoch": 0.14129411764705882, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.8751, "step": 6005 }, { "epoch": 0.14131764705882352, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1636, "step": 6006 }, { "epoch": 0.14134117647058825, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.2235, "step": 6007 }, { "epoch": 0.14136470588235295, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.343, "step": 6008 }, { "epoch": 0.14138823529411765, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.325, "step": 6009 }, { "epoch": 0.14141176470588235, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4879, "step": 6010 }, { "epoch": 0.14143529411764705, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0329, "step": 6011 }, { "epoch": 0.14145882352941178, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1575, "step": 6012 }, { "epoch": 0.14148235294117648, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1314, "step": 6013 }, { "epoch": 0.14150588235294118, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.0706, "step": 6014 }, { "epoch": 0.14152941176470588, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2803, "step": 6015 }, { "epoch": 0.14155294117647058, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.0212, "step": 6016 }, { "epoch": 0.1415764705882353, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8992, "step": 6017 }, { "epoch": 0.1416, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.9778, "step": 6018 }, { "epoch": 0.1416235294117647, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.112, "step": 6019 }, { "epoch": 0.1416470588235294, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2504, "step": 6020 }, { "epoch": 0.14167058823529413, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.549, "step": 6021 }, { "epoch": 0.14169411764705883, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.3433, "step": 6022 }, { "epoch": 0.14171764705882353, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.9879, "step": 6023 }, { "epoch": 0.14174117647058823, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0223, "step": 6024 }, { "epoch": 0.14176470588235293, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.4187, "step": 6025 }, { "epoch": 0.14178823529411766, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.3208, "step": 6026 }, { "epoch": 0.14181176470588236, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.8172, "step": 6027 }, { "epoch": 0.14183529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2057, "step": 6028 }, { "epoch": 0.14185882352941176, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.4078, "step": 6029 }, { "epoch": 0.14188235294117646, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.3668, "step": 6030 }, { "epoch": 0.1419058823529412, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.5308, "step": 6031 }, { "epoch": 0.1419294117647059, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.0101, "step": 6032 }, { "epoch": 0.1419529411764706, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1042, "step": 6033 }, { "epoch": 0.1419764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1274, "step": 6034 }, { "epoch": 0.142, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.277, "step": 6035 }, { "epoch": 0.14202352941176472, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.076, "step": 6036 }, { "epoch": 0.14204705882352942, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2526, "step": 6037 }, { "epoch": 0.14207058823529412, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2965, "step": 6038 }, { "epoch": 0.14209411764705882, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1951, "step": 6039 }, { "epoch": 0.14211764705882354, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2977, "step": 6040 }, { "epoch": 0.14214117647058824, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4423, "step": 6041 }, { "epoch": 0.14216470588235294, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3425, "step": 6042 }, { "epoch": 0.14218823529411764, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.032, "step": 6043 }, { "epoch": 0.14221176470588234, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1362, "step": 6044 }, { "epoch": 0.14223529411764707, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2346, "step": 6045 }, { "epoch": 0.14225882352941177, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.9355, "step": 6046 }, { "epoch": 0.14228235294117647, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2876, "step": 6047 }, { "epoch": 0.14230588235294117, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3334, "step": 6048 }, { "epoch": 0.14232941176470587, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2725, "step": 6049 }, { "epoch": 0.1423529411764706, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.4234, "step": 6050 }, { "epoch": 0.1423764705882353, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2791, "step": 6051 }, { "epoch": 0.1424, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2367, "step": 6052 }, { "epoch": 0.1424235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3301, "step": 6053 }, { "epoch": 0.1424470588235294, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3402, "step": 6054 }, { "epoch": 0.14247058823529413, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.3998, "step": 6055 }, { "epoch": 0.14249411764705883, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.1493, "step": 6056 }, { "epoch": 0.14251764705882353, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.0177, "step": 6057 }, { "epoch": 0.14254117647058823, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.4125, "step": 6058 }, { "epoch": 0.14256470588235295, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.9963, "step": 6059 }, { "epoch": 0.14258823529411765, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.0222, "step": 6060 }, { "epoch": 0.14261176470588235, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.1753, "step": 6061 }, { "epoch": 0.14263529411764705, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1362, "step": 6062 }, { "epoch": 0.14265882352941175, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.3403, "step": 6063 }, { "epoch": 0.14268235294117648, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.0733, "step": 6064 }, { "epoch": 0.14270588235294118, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.249, "step": 6065 }, { "epoch": 0.14272941176470588, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.1416, "step": 6066 }, { "epoch": 0.14275294117647058, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1728, "step": 6067 }, { "epoch": 0.14277647058823528, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2097, "step": 6068 }, { "epoch": 0.1428, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2913, "step": 6069 }, { "epoch": 0.1428235294117647, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.3833, "step": 6070 }, { "epoch": 0.1428470588235294, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.8831, "step": 6071 }, { "epoch": 0.1428705882352941, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1733, "step": 6072 }, { "epoch": 0.1428941176470588, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.4938, "step": 6073 }, { "epoch": 0.14291764705882354, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.8995, "step": 6074 }, { "epoch": 0.14294117647058824, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.0834, "step": 6075 }, { "epoch": 0.14296470588235294, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1892, "step": 6076 }, { "epoch": 0.14298823529411764, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9974, "step": 6077 }, { "epoch": 0.14301176470588237, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.4003, "step": 6078 }, { "epoch": 0.14303529411764707, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.8289, "step": 6079 }, { "epoch": 0.14305882352941177, "grad_norm": 0.30078125, "learning_rate": 0.02, "loss": 0.7318, "step": 6080 }, { "epoch": 0.14308235294117647, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 1.1668, "step": 6081 }, { "epoch": 0.14310588235294117, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1864, "step": 6082 }, { "epoch": 0.1431294117647059, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.1945, "step": 6083 }, { "epoch": 0.1431529411764706, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.056, "step": 6084 }, { "epoch": 0.1431764705882353, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3073, "step": 6085 }, { "epoch": 0.1432, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1257, "step": 6086 }, { "epoch": 0.1432235294117647, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.672, "step": 6087 }, { "epoch": 0.14324705882352942, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.125, "step": 6088 }, { "epoch": 0.14327058823529412, "grad_norm": 0.287109375, "learning_rate": 0.02, "loss": 0.8496, "step": 6089 }, { "epoch": 0.14329411764705882, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1752, "step": 6090 }, { "epoch": 0.14331764705882352, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.235, "step": 6091 }, { "epoch": 0.14334117647058822, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.165, "step": 6092 }, { "epoch": 0.14336470588235295, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2581, "step": 6093 }, { "epoch": 0.14338823529411765, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.5268, "step": 6094 }, { "epoch": 0.14341176470588235, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.9688, "step": 6095 }, { "epoch": 0.14343529411764705, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.169, "step": 6096 }, { "epoch": 0.14345882352941178, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1326, "step": 6097 }, { "epoch": 0.14348235294117648, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.0029, "step": 6098 }, { "epoch": 0.14350588235294118, "grad_norm": 0.30078125, "learning_rate": 0.02, "loss": 0.7574, "step": 6099 }, { "epoch": 0.14352941176470588, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3714, "step": 6100 }, { "epoch": 0.14355294117647058, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.9753, "step": 6101 }, { "epoch": 0.1435764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2063, "step": 6102 }, { "epoch": 0.1436, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.0754, "step": 6103 }, { "epoch": 0.1436235294117647, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.3239, "step": 6104 }, { "epoch": 0.1436470588235294, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2136, "step": 6105 }, { "epoch": 0.1436705882352941, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1087, "step": 6106 }, { "epoch": 0.14369411764705883, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.2716, "step": 6107 }, { "epoch": 0.14371764705882353, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2404, "step": 6108 }, { "epoch": 0.14374117647058823, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.2304, "step": 6109 }, { "epoch": 0.14376470588235293, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.3895, "step": 6110 }, { "epoch": 0.14378823529411763, "grad_norm": 0.298828125, "learning_rate": 0.02, "loss": 0.9111, "step": 6111 }, { "epoch": 0.14381176470588236, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2597, "step": 6112 }, { "epoch": 0.14383529411764706, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2684, "step": 6113 }, { "epoch": 0.14385882352941176, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.1307, "step": 6114 }, { "epoch": 0.14388235294117646, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.2243, "step": 6115 }, { "epoch": 0.1439058823529412, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.146, "step": 6116 }, { "epoch": 0.1439294117647059, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.1269, "step": 6117 }, { "epoch": 0.1439529411764706, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.1666, "step": 6118 }, { "epoch": 0.1439764705882353, "grad_norm": 0.32421875, "learning_rate": 0.02, "loss": 0.7756, "step": 6119 }, { "epoch": 0.144, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2106, "step": 6120 }, { "epoch": 0.14402352941176472, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3836, "step": 6121 }, { "epoch": 0.14404705882352942, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2296, "step": 6122 }, { "epoch": 0.14407058823529412, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1137, "step": 6123 }, { "epoch": 0.14409411764705882, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.7328, "step": 6124 }, { "epoch": 0.14411764705882352, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1823, "step": 6125 }, { "epoch": 0.14414117647058824, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.637, "step": 6126 }, { "epoch": 0.14416470588235294, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.2907, "step": 6127 }, { "epoch": 0.14418823529411764, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.3423, "step": 6128 }, { "epoch": 0.14421176470588234, "grad_norm": 0.291015625, "learning_rate": 0.02, "loss": 0.8475, "step": 6129 }, { "epoch": 0.14423529411764707, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.5942, "step": 6130 }, { "epoch": 0.14425882352941177, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.2126, "step": 6131 }, { "epoch": 0.14428235294117647, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.0715, "step": 6132 }, { "epoch": 0.14430588235294117, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.2044, "step": 6133 }, { "epoch": 0.14432941176470587, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.899, "step": 6134 }, { "epoch": 0.1443529411764706, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1587, "step": 6135 }, { "epoch": 0.1443764705882353, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.1698, "step": 6136 }, { "epoch": 0.1444, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0403, "step": 6137 }, { "epoch": 0.1444235294117647, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1676, "step": 6138 }, { "epoch": 0.1444470588235294, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.1711, "step": 6139 }, { "epoch": 0.14447058823529413, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3488, "step": 6140 }, { "epoch": 0.14449411764705883, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0352, "step": 6141 }, { "epoch": 0.14451764705882353, "grad_norm": 0.298828125, "learning_rate": 0.02, "loss": 0.9516, "step": 6142 }, { "epoch": 0.14454117647058823, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3653, "step": 6143 }, { "epoch": 0.14456470588235293, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.0466, "step": 6144 }, { "epoch": 0.14458823529411766, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0865, "step": 6145 }, { "epoch": 0.14461176470588236, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1837, "step": 6146 }, { "epoch": 0.14463529411764706, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.129, "step": 6147 }, { "epoch": 0.14465882352941176, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0982, "step": 6148 }, { "epoch": 0.14468235294117648, "grad_norm": 0.2890625, "learning_rate": 0.02, "loss": 0.6924, "step": 6149 }, { "epoch": 0.14470588235294118, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3486, "step": 6150 }, { "epoch": 0.14472941176470588, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.3128, "step": 6151 }, { "epoch": 0.14475294117647058, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.0833, "step": 6152 }, { "epoch": 0.14477647058823528, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1531, "step": 6153 }, { "epoch": 0.1448, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3148, "step": 6154 }, { "epoch": 0.1448235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4343, "step": 6155 }, { "epoch": 0.1448470588235294, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3117, "step": 6156 }, { "epoch": 0.1448705882352941, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2386, "step": 6157 }, { "epoch": 0.1448941176470588, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2124, "step": 6158 }, { "epoch": 0.14491764705882354, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.2417, "step": 6159 }, { "epoch": 0.14494117647058824, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1795, "step": 6160 }, { "epoch": 0.14496470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.363, "step": 6161 }, { "epoch": 0.14498823529411764, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1748, "step": 6162 }, { "epoch": 0.14501176470588234, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2243, "step": 6163 }, { "epoch": 0.14503529411764707, "grad_norm": 0.29296875, "learning_rate": 0.02, "loss": 0.8262, "step": 6164 }, { "epoch": 0.14505882352941177, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0502, "step": 6165 }, { "epoch": 0.14508235294117647, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.2145, "step": 6166 }, { "epoch": 0.14510588235294117, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2326, "step": 6167 }, { "epoch": 0.1451294117647059, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.0499, "step": 6168 }, { "epoch": 0.1451529411764706, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2593, "step": 6169 }, { "epoch": 0.1451764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.5985, "step": 6170 }, { "epoch": 0.1452, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3335, "step": 6171 }, { "epoch": 0.1452235294117647, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.0042, "step": 6172 }, { "epoch": 0.14524705882352942, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.8719, "step": 6173 }, { "epoch": 0.14527058823529412, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.1324, "step": 6174 }, { "epoch": 0.14529411764705882, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.9926, "step": 6175 }, { "epoch": 0.14531764705882352, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2137, "step": 6176 }, { "epoch": 0.14534117647058822, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.1588, "step": 6177 }, { "epoch": 0.14536470588235295, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1716, "step": 6178 }, { "epoch": 0.14538823529411765, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.0806, "step": 6179 }, { "epoch": 0.14541176470588235, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0874, "step": 6180 }, { "epoch": 0.14543529411764705, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2075, "step": 6181 }, { "epoch": 0.14545882352941175, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2247, "step": 6182 }, { "epoch": 0.14548235294117648, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1257, "step": 6183 }, { "epoch": 0.14550588235294118, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.9638, "step": 6184 }, { "epoch": 0.14552941176470588, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0045, "step": 6185 }, { "epoch": 0.14555294117647058, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1206, "step": 6186 }, { "epoch": 0.1455764705882353, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.2854, "step": 6187 }, { "epoch": 0.1456, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.3064, "step": 6188 }, { "epoch": 0.1456235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0279, "step": 6189 }, { "epoch": 0.1456470588235294, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.9609, "step": 6190 }, { "epoch": 0.1456705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2209, "step": 6191 }, { "epoch": 0.14569411764705884, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2612, "step": 6192 }, { "epoch": 0.14571764705882354, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.2061, "step": 6193 }, { "epoch": 0.14574117647058824, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.1119, "step": 6194 }, { "epoch": 0.14576470588235294, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1623, "step": 6195 }, { "epoch": 0.14578823529411764, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2718, "step": 6196 }, { "epoch": 0.14581176470588236, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1057, "step": 6197 }, { "epoch": 0.14583529411764706, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.0698, "step": 6198 }, { "epoch": 0.14585882352941176, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.801, "step": 6199 }, { "epoch": 0.14588235294117646, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.3062, "step": 6200 }, { "epoch": 0.14590588235294116, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3743, "step": 6201 }, { "epoch": 0.1459294117647059, "grad_norm": 0.294921875, "learning_rate": 0.02, "loss": 0.7144, "step": 6202 }, { "epoch": 0.1459529411764706, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.9698, "step": 6203 }, { "epoch": 0.1459764705882353, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.8837, "step": 6204 }, { "epoch": 0.146, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.2078, "step": 6205 }, { "epoch": 0.14602352941176472, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.215, "step": 6206 }, { "epoch": 0.14604705882352942, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.8998, "step": 6207 }, { "epoch": 0.14607058823529412, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2473, "step": 6208 }, { "epoch": 0.14609411764705882, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.1856, "step": 6209 }, { "epoch": 0.14611764705882352, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3392, "step": 6210 }, { "epoch": 0.14614117647058825, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1336, "step": 6211 }, { "epoch": 0.14616470588235295, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3755, "step": 6212 }, { "epoch": 0.14618823529411765, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1716, "step": 6213 }, { "epoch": 0.14621176470588235, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3169, "step": 6214 }, { "epoch": 0.14623529411764705, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2995, "step": 6215 }, { "epoch": 0.14625882352941177, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.0348, "step": 6216 }, { "epoch": 0.14628235294117647, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.0635, "step": 6217 }, { "epoch": 0.14630588235294117, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2179, "step": 6218 }, { "epoch": 0.14632941176470587, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.1184, "step": 6219 }, { "epoch": 0.14635294117647057, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.326, "step": 6220 }, { "epoch": 0.1463764705882353, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.318, "step": 6221 }, { "epoch": 0.1464, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.0967, "step": 6222 }, { "epoch": 0.1464235294117647, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.186, "step": 6223 }, { "epoch": 0.1464470588235294, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2152, "step": 6224 }, { "epoch": 0.14647058823529413, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0601, "step": 6225 }, { "epoch": 0.14649411764705883, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2505, "step": 6226 }, { "epoch": 0.14651764705882353, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3055, "step": 6227 }, { "epoch": 0.14654117647058823, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3699, "step": 6228 }, { "epoch": 0.14656470588235293, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.845, "step": 6229 }, { "epoch": 0.14658823529411766, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9382, "step": 6230 }, { "epoch": 0.14661176470588236, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2088, "step": 6231 }, { "epoch": 0.14663529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.4049, "step": 6232 }, { "epoch": 0.14665882352941176, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2562, "step": 6233 }, { "epoch": 0.14668235294117646, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2859, "step": 6234 }, { "epoch": 0.14670588235294119, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3587, "step": 6235 }, { "epoch": 0.14672941176470589, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.0094, "step": 6236 }, { "epoch": 0.14675294117647059, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3251, "step": 6237 }, { "epoch": 0.14677647058823529, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.287, "step": 6238 }, { "epoch": 0.1468, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2528, "step": 6239 }, { "epoch": 0.1468235294117647, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1825, "step": 6240 }, { "epoch": 0.1468470588235294, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.9012, "step": 6241 }, { "epoch": 0.14687058823529411, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1557, "step": 6242 }, { "epoch": 0.14689411764705881, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.9811, "step": 6243 }, { "epoch": 0.14691764705882354, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.8469, "step": 6244 }, { "epoch": 0.14694117647058824, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1684, "step": 6245 }, { "epoch": 0.14696470588235294, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1992, "step": 6246 }, { "epoch": 0.14698823529411764, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.2617, "step": 6247 }, { "epoch": 0.14701176470588234, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.2148, "step": 6248 }, { "epoch": 0.14703529411764707, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.3715, "step": 6249 }, { "epoch": 0.14705882352941177, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3849, "step": 6250 }, { "epoch": 0.14708235294117647, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.1344, "step": 6251 }, { "epoch": 0.14710588235294117, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.484, "step": 6252 }, { "epoch": 0.14712941176470587, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2152, "step": 6253 }, { "epoch": 0.1471529411764706, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.0181, "step": 6254 }, { "epoch": 0.1471764705882353, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1115, "step": 6255 }, { "epoch": 0.1472, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.2623, "step": 6256 }, { "epoch": 0.1472235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1809, "step": 6257 }, { "epoch": 0.14724705882352943, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2329, "step": 6258 }, { "epoch": 0.14727058823529413, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0157, "step": 6259 }, { "epoch": 0.14729411764705883, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.1651, "step": 6260 }, { "epoch": 0.14731764705882353, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2591, "step": 6261 }, { "epoch": 0.14734117647058823, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1705, "step": 6262 }, { "epoch": 0.14736470588235295, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0433, "step": 6263 }, { "epoch": 0.14738823529411765, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1832, "step": 6264 }, { "epoch": 0.14741176470588235, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0624, "step": 6265 }, { "epoch": 0.14743529411764705, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1097, "step": 6266 }, { "epoch": 0.14745882352941175, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1384, "step": 6267 }, { "epoch": 0.14748235294117648, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3905, "step": 6268 }, { "epoch": 0.14750588235294118, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3389, "step": 6269 }, { "epoch": 0.14752941176470588, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.7343, "step": 6270 }, { "epoch": 0.14755294117647058, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.9475, "step": 6271 }, { "epoch": 0.14757647058823528, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.387, "step": 6272 }, { "epoch": 0.1476, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0985, "step": 6273 }, { "epoch": 0.1476235294117647, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.9705, "step": 6274 }, { "epoch": 0.1476470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.4433, "step": 6275 }, { "epoch": 0.1476705882352941, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.8691, "step": 6276 }, { "epoch": 0.14769411764705884, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1485, "step": 6277 }, { "epoch": 0.14771764705882354, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.137, "step": 6278 }, { "epoch": 0.14774117647058824, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.0074, "step": 6279 }, { "epoch": 0.14776470588235294, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.1557, "step": 6280 }, { "epoch": 0.14778823529411764, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1639, "step": 6281 }, { "epoch": 0.14781176470588236, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3959, "step": 6282 }, { "epoch": 0.14783529411764706, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2194, "step": 6283 }, { "epoch": 0.14785882352941176, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1603, "step": 6284 }, { "epoch": 0.14788235294117646, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1364, "step": 6285 }, { "epoch": 0.14790588235294116, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.0263, "step": 6286 }, { "epoch": 0.1479294117647059, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1212, "step": 6287 }, { "epoch": 0.1479529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.4946, "step": 6288 }, { "epoch": 0.1479764705882353, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.0307, "step": 6289 }, { "epoch": 0.148, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1748, "step": 6290 }, { "epoch": 0.1480235294117647, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1853, "step": 6291 }, { "epoch": 0.14804705882352942, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.1021, "step": 6292 }, { "epoch": 0.14807058823529412, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.0739, "step": 6293 }, { "epoch": 0.14809411764705882, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1273, "step": 6294 }, { "epoch": 0.14811764705882352, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9413, "step": 6295 }, { "epoch": 0.14814117647058825, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.374, "step": 6296 }, { "epoch": 0.14816470588235295, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.5013, "step": 6297 }, { "epoch": 0.14818823529411765, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1092, "step": 6298 }, { "epoch": 0.14821176470588235, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2311, "step": 6299 }, { "epoch": 0.14823529411764705, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2842, "step": 6300 }, { "epoch": 0.14825882352941178, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2574, "step": 6301 }, { "epoch": 0.14828235294117648, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.1606, "step": 6302 }, { "epoch": 0.14830588235294118, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.399, "step": 6303 }, { "epoch": 0.14832941176470588, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2428, "step": 6304 }, { "epoch": 0.14835294117647058, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.4689, "step": 6305 }, { "epoch": 0.1483764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3203, "step": 6306 }, { "epoch": 0.1484, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.3386, "step": 6307 }, { "epoch": 0.1484235294117647, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.9377, "step": 6308 }, { "epoch": 0.1484470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.4964, "step": 6309 }, { "epoch": 0.1484705882352941, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.3878, "step": 6310 }, { "epoch": 0.14849411764705883, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.9871, "step": 6311 }, { "epoch": 0.14851764705882353, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.9144, "step": 6312 }, { "epoch": 0.14854117647058823, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.351, "step": 6313 }, { "epoch": 0.14856470588235293, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.295, "step": 6314 }, { "epoch": 0.14858823529411766, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9646, "step": 6315 }, { "epoch": 0.14861176470588236, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2825, "step": 6316 }, { "epoch": 0.14863529411764706, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 0.7013, "step": 6317 }, { "epoch": 0.14865882352941176, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3126, "step": 6318 }, { "epoch": 0.14868235294117646, "grad_norm": 0.28515625, "learning_rate": 0.02, "loss": 0.9408, "step": 6319 }, { "epoch": 0.1487058823529412, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1246, "step": 6320 }, { "epoch": 0.1487294117647059, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9901, "step": 6321 }, { "epoch": 0.1487529411764706, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.1904, "step": 6322 }, { "epoch": 0.1487764705882353, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 0.9664, "step": 6323 }, { "epoch": 0.1488, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2761, "step": 6324 }, { "epoch": 0.14882352941176472, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0955, "step": 6325 }, { "epoch": 0.14884705882352942, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2318, "step": 6326 }, { "epoch": 0.14887058823529412, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2094, "step": 6327 }, { "epoch": 0.14889411764705882, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2565, "step": 6328 }, { "epoch": 0.14891764705882352, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2914, "step": 6329 }, { "epoch": 0.14894117647058824, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.6143, "step": 6330 }, { "epoch": 0.14896470588235294, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.0632, "step": 6331 }, { "epoch": 0.14898823529411764, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2155, "step": 6332 }, { "epoch": 0.14901176470588234, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.7396, "step": 6333 }, { "epoch": 0.14903529411764707, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.284, "step": 6334 }, { "epoch": 0.14905882352941177, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.3136, "step": 6335 }, { "epoch": 0.14908235294117647, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2573, "step": 6336 }, { "epoch": 0.14910588235294117, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.295, "step": 6337 }, { "epoch": 0.14912941176470587, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.9919, "step": 6338 }, { "epoch": 0.1491529411764706, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9455, "step": 6339 }, { "epoch": 0.1491764705882353, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2752, "step": 6340 }, { "epoch": 0.1492, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.8838, "step": 6341 }, { "epoch": 0.1492235294117647, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.0556, "step": 6342 }, { "epoch": 0.1492470588235294, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3072, "step": 6343 }, { "epoch": 0.14927058823529413, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1902, "step": 6344 }, { "epoch": 0.14929411764705883, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.211, "step": 6345 }, { "epoch": 0.14931764705882353, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.6828, "step": 6346 }, { "epoch": 0.14934117647058823, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4312, "step": 6347 }, { "epoch": 0.14936470588235295, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.9873, "step": 6348 }, { "epoch": 0.14938823529411766, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.1047, "step": 6349 }, { "epoch": 0.14941176470588236, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2936, "step": 6350 }, { "epoch": 0.14943529411764706, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.9469, "step": 6351 }, { "epoch": 0.14945882352941176, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.3606, "step": 6352 }, { "epoch": 0.14948235294117648, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.0087, "step": 6353 }, { "epoch": 0.14950588235294118, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1239, "step": 6354 }, { "epoch": 0.14952941176470588, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1759, "step": 6355 }, { "epoch": 0.14955294117647058, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.219, "step": 6356 }, { "epoch": 0.14957647058823528, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.4283, "step": 6357 }, { "epoch": 0.1496, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1713, "step": 6358 }, { "epoch": 0.1496235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2364, "step": 6359 }, { "epoch": 0.1496470588235294, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.26, "step": 6360 }, { "epoch": 0.1496705882352941, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.1742, "step": 6361 }, { "epoch": 0.1496941176470588, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.237, "step": 6362 }, { "epoch": 0.14971764705882354, "grad_norm": 0.298828125, "learning_rate": 0.02, "loss": 0.7401, "step": 6363 }, { "epoch": 0.14974117647058824, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.0022, "step": 6364 }, { "epoch": 0.14976470588235294, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1388, "step": 6365 }, { "epoch": 0.14978823529411764, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4466, "step": 6366 }, { "epoch": 0.14981176470588237, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.1262, "step": 6367 }, { "epoch": 0.14983529411764707, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1447, "step": 6368 }, { "epoch": 0.14985882352941177, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.9908, "step": 6369 }, { "epoch": 0.14988235294117647, "grad_norm": 0.296875, "learning_rate": 0.02, "loss": 0.5918, "step": 6370 }, { "epoch": 0.14990588235294117, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2042, "step": 6371 }, { "epoch": 0.1499294117647059, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.4266, "step": 6372 }, { "epoch": 0.1499529411764706, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2716, "step": 6373 }, { "epoch": 0.1499764705882353, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1638, "step": 6374 }, { "epoch": 0.15, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.4797, "step": 6375 }, { "epoch": 0.1500235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3379, "step": 6376 }, { "epoch": 0.15004705882352942, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.1057, "step": 6377 }, { "epoch": 0.15007058823529412, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.9346, "step": 6378 }, { "epoch": 0.15009411764705882, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 0.8034, "step": 6379 }, { "epoch": 0.15011764705882352, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.4213, "step": 6380 }, { "epoch": 0.15014117647058822, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.2796, "step": 6381 }, { "epoch": 0.15016470588235295, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1301, "step": 6382 }, { "epoch": 0.15018823529411765, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2609, "step": 6383 }, { "epoch": 0.15021176470588235, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.8219, "step": 6384 }, { "epoch": 0.15023529411764705, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.0454, "step": 6385 }, { "epoch": 0.15025882352941178, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.9607, "step": 6386 }, { "epoch": 0.15028235294117648, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2643, "step": 6387 }, { "epoch": 0.15030588235294118, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0974, "step": 6388 }, { "epoch": 0.15032941176470588, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2098, "step": 6389 }, { "epoch": 0.15035294117647058, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1228, "step": 6390 }, { "epoch": 0.1503764705882353, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 1.0929, "step": 6391 }, { "epoch": 0.1504, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.2775, "step": 6392 }, { "epoch": 0.1504235294117647, "grad_norm": 0.29296875, "learning_rate": 0.02, "loss": 1.016, "step": 6393 }, { "epoch": 0.1504470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3783, "step": 6394 }, { "epoch": 0.1504705882352941, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.1624, "step": 6395 }, { "epoch": 0.15049411764705883, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.0345, "step": 6396 }, { "epoch": 0.15051764705882353, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3568, "step": 6397 }, { "epoch": 0.15054117647058823, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2861, "step": 6398 }, { "epoch": 0.15056470588235293, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0691, "step": 6399 }, { "epoch": 0.15058823529411763, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2533, "step": 6400 }, { "epoch": 0.15061176470588236, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4907, "step": 6401 }, { "epoch": 0.15063529411764706, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2907, "step": 6402 }, { "epoch": 0.15065882352941176, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0732, "step": 6403 }, { "epoch": 0.15068235294117646, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.0629, "step": 6404 }, { "epoch": 0.1507058823529412, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2391, "step": 6405 }, { "epoch": 0.1507294117647059, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2772, "step": 6406 }, { "epoch": 0.1507529411764706, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.0465, "step": 6407 }, { "epoch": 0.1507764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3852, "step": 6408 }, { "epoch": 0.1508, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1549, "step": 6409 }, { "epoch": 0.15082352941176472, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2513, "step": 6410 }, { "epoch": 0.15084705882352942, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2723, "step": 6411 }, { "epoch": 0.15087058823529412, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.9707, "step": 6412 }, { "epoch": 0.15089411764705882, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.102, "step": 6413 }, { "epoch": 0.15091764705882352, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.9784, "step": 6414 }, { "epoch": 0.15094117647058825, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.9785, "step": 6415 }, { "epoch": 0.15096470588235295, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3353, "step": 6416 }, { "epoch": 0.15098823529411765, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.4344, "step": 6417 }, { "epoch": 0.15101176470588235, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.1546, "step": 6418 }, { "epoch": 0.15103529411764705, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.084, "step": 6419 }, { "epoch": 0.15105882352941177, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.3593, "step": 6420 }, { "epoch": 0.15108235294117647, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1742, "step": 6421 }, { "epoch": 0.15110588235294117, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3645, "step": 6422 }, { "epoch": 0.15112941176470587, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.898, "step": 6423 }, { "epoch": 0.1511529411764706, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.3725, "step": 6424 }, { "epoch": 0.1511764705882353, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.171, "step": 6425 }, { "epoch": 0.1512, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.885, "step": 6426 }, { "epoch": 0.1512235294117647, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4877, "step": 6427 }, { "epoch": 0.1512470588235294, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0154, "step": 6428 }, { "epoch": 0.15127058823529413, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.2466, "step": 6429 }, { "epoch": 0.15129411764705883, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.0591, "step": 6430 }, { "epoch": 0.15131764705882353, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.8697, "step": 6431 }, { "epoch": 0.15134117647058823, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2493, "step": 6432 }, { "epoch": 0.15136470588235293, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3146, "step": 6433 }, { "epoch": 0.15138823529411766, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1814, "step": 6434 }, { "epoch": 0.15141176470588236, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2511, "step": 6435 }, { "epoch": 0.15143529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.3585, "step": 6436 }, { "epoch": 0.15145882352941176, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1617, "step": 6437 }, { "epoch": 0.15148235294117646, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.4076, "step": 6438 }, { "epoch": 0.15150588235294118, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.4349, "step": 6439 }, { "epoch": 0.15152941176470588, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2537, "step": 6440 }, { "epoch": 0.15155294117647058, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0762, "step": 6441 }, { "epoch": 0.15157647058823528, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.8812, "step": 6442 }, { "epoch": 0.1516, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.1946, "step": 6443 }, { "epoch": 0.1516235294117647, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1936, "step": 6444 }, { "epoch": 0.1516470588235294, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4556, "step": 6445 }, { "epoch": 0.1516705882352941, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.0163, "step": 6446 }, { "epoch": 0.1516941176470588, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.091, "step": 6447 }, { "epoch": 0.15171764705882354, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.8842, "step": 6448 }, { "epoch": 0.15174117647058824, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3534, "step": 6449 }, { "epoch": 0.15176470588235294, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.9097, "step": 6450 }, { "epoch": 0.15178823529411764, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.08, "step": 6451 }, { "epoch": 0.15181176470588234, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1221, "step": 6452 }, { "epoch": 0.15183529411764707, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 0.9074, "step": 6453 }, { "epoch": 0.15185882352941177, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.9496, "step": 6454 }, { "epoch": 0.15188235294117647, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3469, "step": 6455 }, { "epoch": 0.15190588235294117, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.0947, "step": 6456 }, { "epoch": 0.1519294117647059, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2935, "step": 6457 }, { "epoch": 0.1519529411764706, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.9667, "step": 6458 }, { "epoch": 0.1519764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2827, "step": 6459 }, { "epoch": 0.152, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.2417, "step": 6460 }, { "epoch": 0.1520235294117647, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.1877, "step": 6461 }, { "epoch": 0.15204705882352942, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2101, "step": 6462 }, { "epoch": 0.15207058823529412, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.4218, "step": 6463 }, { "epoch": 0.15209411764705882, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.9357, "step": 6464 }, { "epoch": 0.15211764705882352, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3265, "step": 6465 }, { "epoch": 0.15214117647058822, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.207, "step": 6466 }, { "epoch": 0.15216470588235295, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 0.8173, "step": 6467 }, { "epoch": 0.15218823529411765, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2147, "step": 6468 }, { "epoch": 0.15221176470588235, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.1307, "step": 6469 }, { "epoch": 0.15223529411764705, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.9866, "step": 6470 }, { "epoch": 0.15225882352941175, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2178, "step": 6471 }, { "epoch": 0.15228235294117648, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1974, "step": 6472 }, { "epoch": 0.15230588235294118, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.119, "step": 6473 }, { "epoch": 0.15232941176470588, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.4574, "step": 6474 }, { "epoch": 0.15235294117647058, "grad_norm": 0.296875, "learning_rate": 0.02, "loss": 0.9053, "step": 6475 }, { "epoch": 0.1523764705882353, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1354, "step": 6476 }, { "epoch": 0.1524, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.237, "step": 6477 }, { "epoch": 0.1524235294117647, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3424, "step": 6478 }, { "epoch": 0.1524470588235294, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2147, "step": 6479 }, { "epoch": 0.1524705882352941, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.9131, "step": 6480 }, { "epoch": 0.15249411764705884, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0456, "step": 6481 }, { "epoch": 0.15251764705882354, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1703, "step": 6482 }, { "epoch": 0.15254117647058824, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.3574, "step": 6483 }, { "epoch": 0.15256470588235294, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2836, "step": 6484 }, { "epoch": 0.15258823529411764, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.1338, "step": 6485 }, { "epoch": 0.15261176470588236, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2085, "step": 6486 }, { "epoch": 0.15263529411764706, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.9122, "step": 6487 }, { "epoch": 0.15265882352941176, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.9266, "step": 6488 }, { "epoch": 0.15268235294117646, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3598, "step": 6489 }, { "epoch": 0.15270588235294116, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1897, "step": 6490 }, { "epoch": 0.1527294117647059, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1446, "step": 6491 }, { "epoch": 0.1527529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.263, "step": 6492 }, { "epoch": 0.1527764705882353, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1603, "step": 6493 }, { "epoch": 0.1528, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.2004, "step": 6494 }, { "epoch": 0.15282352941176472, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.9181, "step": 6495 }, { "epoch": 0.15284705882352942, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9563, "step": 6496 }, { "epoch": 0.15287058823529412, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2554, "step": 6497 }, { "epoch": 0.15289411764705882, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0245, "step": 6498 }, { "epoch": 0.15291764705882352, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.3338, "step": 6499 }, { "epoch": 0.15294117647058825, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.0738, "step": 6500 }, { "epoch": 0.15296470588235295, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2909, "step": 6501 }, { "epoch": 0.15298823529411765, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1166, "step": 6502 }, { "epoch": 0.15301176470588235, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.2969, "step": 6503 }, { "epoch": 0.15303529411764705, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.804, "step": 6504 }, { "epoch": 0.15305882352941177, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.9439, "step": 6505 }, { "epoch": 0.15308235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.4003, "step": 6506 }, { "epoch": 0.15310588235294117, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4535, "step": 6507 }, { "epoch": 0.15312941176470588, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.9711, "step": 6508 }, { "epoch": 0.15315294117647058, "grad_norm": 0.302734375, "learning_rate": 0.02, "loss": 0.9065, "step": 6509 }, { "epoch": 0.1531764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.318, "step": 6510 }, { "epoch": 0.1532, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3432, "step": 6511 }, { "epoch": 0.1532235294117647, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3437, "step": 6512 }, { "epoch": 0.1532470588235294, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2476, "step": 6513 }, { "epoch": 0.15327058823529413, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2449, "step": 6514 }, { "epoch": 0.15329411764705883, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1176, "step": 6515 }, { "epoch": 0.15331764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3273, "step": 6516 }, { "epoch": 0.15334117647058823, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1972, "step": 6517 }, { "epoch": 0.15336470588235293, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.2887, "step": 6518 }, { "epoch": 0.15338823529411766, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.0685, "step": 6519 }, { "epoch": 0.15341176470588236, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.286, "step": 6520 }, { "epoch": 0.15343529411764706, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.2357, "step": 6521 }, { "epoch": 0.15345882352941176, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0051, "step": 6522 }, { "epoch": 0.15348235294117646, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.0602, "step": 6523 }, { "epoch": 0.1535058823529412, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0542, "step": 6524 }, { "epoch": 0.1535294117647059, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1746, "step": 6525 }, { "epoch": 0.1535529411764706, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0652, "step": 6526 }, { "epoch": 0.1535764705882353, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0766, "step": 6527 }, { "epoch": 0.1536, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1279, "step": 6528 }, { "epoch": 0.15362352941176471, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.077, "step": 6529 }, { "epoch": 0.15364705882352941, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.3445, "step": 6530 }, { "epoch": 0.15367058823529411, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3008, "step": 6531 }, { "epoch": 0.15369411764705881, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.0738, "step": 6532 }, { "epoch": 0.15371764705882354, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1286, "step": 6533 }, { "epoch": 0.15374117647058824, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1922, "step": 6534 }, { "epoch": 0.15376470588235294, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1821, "step": 6535 }, { "epoch": 0.15378823529411764, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.333, "step": 6536 }, { "epoch": 0.15381176470588234, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1739, "step": 6537 }, { "epoch": 0.15383529411764707, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.246, "step": 6538 }, { "epoch": 0.15385882352941177, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2934, "step": 6539 }, { "epoch": 0.15388235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.3574, "step": 6540 }, { "epoch": 0.15390588235294117, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0625, "step": 6541 }, { "epoch": 0.15392941176470587, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.9866, "step": 6542 }, { "epoch": 0.1539529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3887, "step": 6543 }, { "epoch": 0.1539764705882353, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.9068, "step": 6544 }, { "epoch": 0.154, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.0817, "step": 6545 }, { "epoch": 0.1540235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.4021, "step": 6546 }, { "epoch": 0.1540470588235294, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1202, "step": 6547 }, { "epoch": 0.15407058823529413, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.091, "step": 6548 }, { "epoch": 0.15409411764705883, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1769, "step": 6549 }, { "epoch": 0.15411764705882353, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2774, "step": 6550 }, { "epoch": 0.15414117647058823, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.068, "step": 6551 }, { "epoch": 0.15416470588235295, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3023, "step": 6552 }, { "epoch": 0.15418823529411765, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1442, "step": 6553 }, { "epoch": 0.15421176470588235, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2145, "step": 6554 }, { "epoch": 0.15423529411764705, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.9588, "step": 6555 }, { "epoch": 0.15425882352941175, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2586, "step": 6556 }, { "epoch": 0.15428235294117648, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.8996, "step": 6557 }, { "epoch": 0.15430588235294118, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.974, "step": 6558 }, { "epoch": 0.15432941176470588, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1725, "step": 6559 }, { "epoch": 0.15435294117647058, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2979, "step": 6560 }, { "epoch": 0.15437647058823528, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.088, "step": 6561 }, { "epoch": 0.1544, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.9334, "step": 6562 }, { "epoch": 0.1544235294117647, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2422, "step": 6563 }, { "epoch": 0.1544470588235294, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1117, "step": 6564 }, { "epoch": 0.1544705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.3434, "step": 6565 }, { "epoch": 0.15449411764705884, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.9089, "step": 6566 }, { "epoch": 0.15451764705882354, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3121, "step": 6567 }, { "epoch": 0.15454117647058824, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.9113, "step": 6568 }, { "epoch": 0.15456470588235294, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2311, "step": 6569 }, { "epoch": 0.15458823529411764, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.279, "step": 6570 }, { "epoch": 0.15461176470588237, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2219, "step": 6571 }, { "epoch": 0.15463529411764707, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0584, "step": 6572 }, { "epoch": 0.15465882352941177, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1904, "step": 6573 }, { "epoch": 0.15468235294117647, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.321, "step": 6574 }, { "epoch": 0.15470588235294117, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.8298, "step": 6575 }, { "epoch": 0.1547294117647059, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2609, "step": 6576 }, { "epoch": 0.1547529411764706, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.9099, "step": 6577 }, { "epoch": 0.1547764705882353, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.1726, "step": 6578 }, { "epoch": 0.1548, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2171, "step": 6579 }, { "epoch": 0.1548235294117647, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.3379, "step": 6580 }, { "epoch": 0.15484705882352942, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.9058, "step": 6581 }, { "epoch": 0.15487058823529412, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2383, "step": 6582 }, { "epoch": 0.15489411764705882, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.1491, "step": 6583 }, { "epoch": 0.15491764705882352, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0572, "step": 6584 }, { "epoch": 0.15494117647058825, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2187, "step": 6585 }, { "epoch": 0.15496470588235295, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.4573, "step": 6586 }, { "epoch": 0.15498823529411765, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9413, "step": 6587 }, { "epoch": 0.15501176470588235, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1159, "step": 6588 }, { "epoch": 0.15503529411764705, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1723, "step": 6589 }, { "epoch": 0.15505882352941178, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2682, "step": 6590 }, { "epoch": 0.15508235294117648, "grad_norm": 0.28515625, "learning_rate": 0.02, "loss": 0.7345, "step": 6591 }, { "epoch": 0.15510588235294118, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.1835, "step": 6592 }, { "epoch": 0.15512941176470588, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4889, "step": 6593 }, { "epoch": 0.15515294117647058, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3346, "step": 6594 }, { "epoch": 0.1551764705882353, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1686, "step": 6595 }, { "epoch": 0.1552, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.4405, "step": 6596 }, { "epoch": 0.1552235294117647, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4471, "step": 6597 }, { "epoch": 0.1552470588235294, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.2613, "step": 6598 }, { "epoch": 0.1552705882352941, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1336, "step": 6599 }, { "epoch": 0.15529411764705883, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.1757, "step": 6600 }, { "epoch": 0.15531764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2537, "step": 6601 }, { "epoch": 0.15534117647058823, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.8881, "step": 6602 }, { "epoch": 0.15536470588235293, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1788, "step": 6603 }, { "epoch": 0.15538823529411766, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2875, "step": 6604 }, { "epoch": 0.15541176470588236, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.6426, "step": 6605 }, { "epoch": 0.15543529411764706, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.9154, "step": 6606 }, { "epoch": 0.15545882352941176, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1649, "step": 6607 }, { "epoch": 0.15548235294117646, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0627, "step": 6608 }, { "epoch": 0.1555058823529412, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1206, "step": 6609 }, { "epoch": 0.1555294117647059, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3458, "step": 6610 }, { "epoch": 0.1555529411764706, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.9906, "step": 6611 }, { "epoch": 0.1555764705882353, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 0.8872, "step": 6612 }, { "epoch": 0.1556, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.035, "step": 6613 }, { "epoch": 0.15562352941176472, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3583, "step": 6614 }, { "epoch": 0.15564705882352942, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.0183, "step": 6615 }, { "epoch": 0.15567058823529412, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.3076, "step": 6616 }, { "epoch": 0.15569411764705882, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.0996, "step": 6617 }, { "epoch": 0.15571764705882352, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.4651, "step": 6618 }, { "epoch": 0.15574117647058824, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.0015, "step": 6619 }, { "epoch": 0.15576470588235294, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.4143, "step": 6620 }, { "epoch": 0.15578823529411764, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1975, "step": 6621 }, { "epoch": 0.15581176470588234, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3747, "step": 6622 }, { "epoch": 0.15583529411764707, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3834, "step": 6623 }, { "epoch": 0.15585882352941177, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3364, "step": 6624 }, { "epoch": 0.15588235294117647, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1494, "step": 6625 }, { "epoch": 0.15590588235294117, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2206, "step": 6626 }, { "epoch": 0.15592941176470587, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.0727, "step": 6627 }, { "epoch": 0.1559529411764706, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.9227, "step": 6628 }, { "epoch": 0.1559764705882353, "grad_norm": 0.291015625, "learning_rate": 0.02, "loss": 1.0449, "step": 6629 }, { "epoch": 0.156, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1209, "step": 6630 }, { "epoch": 0.1560235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3087, "step": 6631 }, { "epoch": 0.1560470588235294, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2142, "step": 6632 }, { "epoch": 0.15607058823529413, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.8028, "step": 6633 }, { "epoch": 0.15609411764705883, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.9463, "step": 6634 }, { "epoch": 0.15611764705882353, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.1492, "step": 6635 }, { "epoch": 0.15614117647058823, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9201, "step": 6636 }, { "epoch": 0.15616470588235293, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.5295, "step": 6637 }, { "epoch": 0.15618823529411766, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.304, "step": 6638 }, { "epoch": 0.15621176470588236, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.3347, "step": 6639 }, { "epoch": 0.15623529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2571, "step": 6640 }, { "epoch": 0.15625882352941176, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2023, "step": 6641 }, { "epoch": 0.15628235294117648, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1413, "step": 6642 }, { "epoch": 0.15630588235294118, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0525, "step": 6643 }, { "epoch": 0.15632941176470588, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1478, "step": 6644 }, { "epoch": 0.15635294117647058, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.055, "step": 6645 }, { "epoch": 0.15637647058823528, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1737, "step": 6646 }, { "epoch": 0.1564, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.251, "step": 6647 }, { "epoch": 0.1564235294117647, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.0268, "step": 6648 }, { "epoch": 0.1564470588235294, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.0053, "step": 6649 }, { "epoch": 0.1564705882352941, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.3649, "step": 6650 }, { "epoch": 0.1564941176470588, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1379, "step": 6651 }, { "epoch": 0.15651764705882354, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9883, "step": 6652 }, { "epoch": 0.15654117647058824, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.1182, "step": 6653 }, { "epoch": 0.15656470588235294, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3976, "step": 6654 }, { "epoch": 0.15658823529411764, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.1181, "step": 6655 }, { "epoch": 0.15661176470588234, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.292, "step": 6656 }, { "epoch": 0.15663529411764707, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1699, "step": 6657 }, { "epoch": 0.15665882352941177, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1769, "step": 6658 }, { "epoch": 0.15668235294117647, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0456, "step": 6659 }, { "epoch": 0.15670588235294117, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3124, "step": 6660 }, { "epoch": 0.1567294117647059, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1448, "step": 6661 }, { "epoch": 0.1567529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2222, "step": 6662 }, { "epoch": 0.1567764705882353, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.0035, "step": 6663 }, { "epoch": 0.1568, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3377, "step": 6664 }, { "epoch": 0.1568235294117647, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.0088, "step": 6665 }, { "epoch": 0.15684705882352942, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2907, "step": 6666 }, { "epoch": 0.15687058823529412, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.3158, "step": 6667 }, { "epoch": 0.15689411764705882, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2061, "step": 6668 }, { "epoch": 0.15691764705882352, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3179, "step": 6669 }, { "epoch": 0.15694117647058822, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.3624, "step": 6670 }, { "epoch": 0.15696470588235295, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.9948, "step": 6671 }, { "epoch": 0.15698823529411765, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2831, "step": 6672 }, { "epoch": 0.15701176470588235, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1809, "step": 6673 }, { "epoch": 0.15703529411764705, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3031, "step": 6674 }, { "epoch": 0.15705882352941178, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1512, "step": 6675 }, { "epoch": 0.15708235294117648, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.206, "step": 6676 }, { "epoch": 0.15710588235294118, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2827, "step": 6677 }, { "epoch": 0.15712941176470588, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2958, "step": 6678 }, { "epoch": 0.15715294117647058, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1973, "step": 6679 }, { "epoch": 0.1571764705882353, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.8947, "step": 6680 }, { "epoch": 0.1572, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2653, "step": 6681 }, { "epoch": 0.1572235294117647, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9976, "step": 6682 }, { "epoch": 0.1572470588235294, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.3685, "step": 6683 }, { "epoch": 0.1572705882352941, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3078, "step": 6684 }, { "epoch": 0.15729411764705883, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1501, "step": 6685 }, { "epoch": 0.15731764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1962, "step": 6686 }, { "epoch": 0.15734117647058823, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.2391, "step": 6687 }, { "epoch": 0.15736470588235293, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9742, "step": 6688 }, { "epoch": 0.15738823529411763, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0917, "step": 6689 }, { "epoch": 0.15741176470588236, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1902, "step": 6690 }, { "epoch": 0.15743529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3805, "step": 6691 }, { "epoch": 0.15745882352941176, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0433, "step": 6692 }, { "epoch": 0.15748235294117646, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.002, "step": 6693 }, { "epoch": 0.1575058823529412, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.9166, "step": 6694 }, { "epoch": 0.1575294117647059, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2482, "step": 6695 }, { "epoch": 0.1575529411764706, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3956, "step": 6696 }, { "epoch": 0.1575764705882353, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.1924, "step": 6697 }, { "epoch": 0.1576, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3954, "step": 6698 }, { "epoch": 0.15762352941176472, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2466, "step": 6699 }, { "epoch": 0.15764705882352942, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.9835, "step": 6700 }, { "epoch": 0.15767058823529412, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3232, "step": 6701 }, { "epoch": 0.15769411764705882, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.2114, "step": 6702 }, { "epoch": 0.15771764705882352, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.9919, "step": 6703 }, { "epoch": 0.15774117647058825, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2374, "step": 6704 }, { "epoch": 0.15776470588235295, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3663, "step": 6705 }, { "epoch": 0.15778823529411765, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1805, "step": 6706 }, { "epoch": 0.15781176470588235, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1791, "step": 6707 }, { "epoch": 0.15783529411764705, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1346, "step": 6708 }, { "epoch": 0.15785882352941177, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3411, "step": 6709 }, { "epoch": 0.15788235294117647, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0253, "step": 6710 }, { "epoch": 0.15790588235294117, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2239, "step": 6711 }, { "epoch": 0.15792941176470587, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2859, "step": 6712 }, { "epoch": 0.1579529411764706, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.198, "step": 6713 }, { "epoch": 0.1579764705882353, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2353, "step": 6714 }, { "epoch": 0.158, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3663, "step": 6715 }, { "epoch": 0.1580235294117647, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.309, "step": 6716 }, { "epoch": 0.1580470588235294, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.9659, "step": 6717 }, { "epoch": 0.15807058823529413, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.1544, "step": 6718 }, { "epoch": 0.15809411764705883, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4318, "step": 6719 }, { "epoch": 0.15811764705882353, "grad_norm": 0.298828125, "learning_rate": 0.02, "loss": 0.8909, "step": 6720 }, { "epoch": 0.15814117647058823, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1711, "step": 6721 }, { "epoch": 0.15816470588235293, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0805, "step": 6722 }, { "epoch": 0.15818823529411766, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1525, "step": 6723 }, { "epoch": 0.15821176470588236, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.5135, "step": 6724 }, { "epoch": 0.15823529411764706, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2439, "step": 6725 }, { "epoch": 0.15825882352941176, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1213, "step": 6726 }, { "epoch": 0.15828235294117646, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0379, "step": 6727 }, { "epoch": 0.15830588235294119, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1422, "step": 6728 }, { "epoch": 0.15832941176470589, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.0858, "step": 6729 }, { "epoch": 0.15835294117647059, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2259, "step": 6730 }, { "epoch": 0.15837647058823529, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.1599, "step": 6731 }, { "epoch": 0.1584, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.835, "step": 6732 }, { "epoch": 0.1584235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2792, "step": 6733 }, { "epoch": 0.1584470588235294, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1915, "step": 6734 }, { "epoch": 0.1584705882352941, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.0527, "step": 6735 }, { "epoch": 0.1584941176470588, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.961, "step": 6736 }, { "epoch": 0.15851764705882354, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2626, "step": 6737 }, { "epoch": 0.15854117647058824, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1955, "step": 6738 }, { "epoch": 0.15856470588235294, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1459, "step": 6739 }, { "epoch": 0.15858823529411764, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3568, "step": 6740 }, { "epoch": 0.15861176470588234, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2867, "step": 6741 }, { "epoch": 0.15863529411764707, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.0826, "step": 6742 }, { "epoch": 0.15865882352941177, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.0029, "step": 6743 }, { "epoch": 0.15868235294117647, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2887, "step": 6744 }, { "epoch": 0.15870588235294117, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.9057, "step": 6745 }, { "epoch": 0.15872941176470587, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2181, "step": 6746 }, { "epoch": 0.1587529411764706, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2179, "step": 6747 }, { "epoch": 0.1587764705882353, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.7322, "step": 6748 }, { "epoch": 0.1588, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.3079, "step": 6749 }, { "epoch": 0.1588235294117647, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0574, "step": 6750 }, { "epoch": 0.15884705882352942, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.5714, "step": 6751 }, { "epoch": 0.15887058823529412, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.4509, "step": 6752 }, { "epoch": 0.15889411764705882, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.4116, "step": 6753 }, { "epoch": 0.15891764705882352, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3797, "step": 6754 }, { "epoch": 0.15894117647058822, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0919, "step": 6755 }, { "epoch": 0.15896470588235295, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2304, "step": 6756 }, { "epoch": 0.15898823529411765, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1685, "step": 6757 }, { "epoch": 0.15901176470588235, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0487, "step": 6758 }, { "epoch": 0.15903529411764705, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2013, "step": 6759 }, { "epoch": 0.15905882352941175, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.3113, "step": 6760 }, { "epoch": 0.15908235294117648, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0827, "step": 6761 }, { "epoch": 0.15910588235294118, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.4478, "step": 6762 }, { "epoch": 0.15912941176470588, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.2048, "step": 6763 }, { "epoch": 0.15915294117647058, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.3212, "step": 6764 }, { "epoch": 0.15917647058823528, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2459, "step": 6765 }, { "epoch": 0.1592, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2789, "step": 6766 }, { "epoch": 0.1592235294117647, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.1624, "step": 6767 }, { "epoch": 0.1592470588235294, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1541, "step": 6768 }, { "epoch": 0.1592705882352941, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0681, "step": 6769 }, { "epoch": 0.15929411764705884, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1628, "step": 6770 }, { "epoch": 0.15931764705882354, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1224, "step": 6771 }, { "epoch": 0.15934117647058824, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1863, "step": 6772 }, { "epoch": 0.15936470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1963, "step": 6773 }, { "epoch": 0.15938823529411764, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1692, "step": 6774 }, { "epoch": 0.15941176470588236, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0676, "step": 6775 }, { "epoch": 0.15943529411764706, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1637, "step": 6776 }, { "epoch": 0.15945882352941176, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.0351, "step": 6777 }, { "epoch": 0.15948235294117646, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0467, "step": 6778 }, { "epoch": 0.15950588235294116, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0273, "step": 6779 }, { "epoch": 0.1595294117647059, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1564, "step": 6780 }, { "epoch": 0.1595529411764706, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2795, "step": 6781 }, { "epoch": 0.1595764705882353, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9694, "step": 6782 }, { "epoch": 0.1596, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.2389, "step": 6783 }, { "epoch": 0.15962352941176472, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.0066, "step": 6784 }, { "epoch": 0.15964705882352942, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.1985, "step": 6785 }, { "epoch": 0.15967058823529412, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.4615, "step": 6786 }, { "epoch": 0.15969411764705882, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.0387, "step": 6787 }, { "epoch": 0.15971764705882352, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.0153, "step": 6788 }, { "epoch": 0.15974117647058825, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.9095, "step": 6789 }, { "epoch": 0.15976470588235295, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.308, "step": 6790 }, { "epoch": 0.15978823529411765, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2829, "step": 6791 }, { "epoch": 0.15981176470588235, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.0833, "step": 6792 }, { "epoch": 0.15983529411764705, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.9381, "step": 6793 }, { "epoch": 0.15985882352941178, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.9999, "step": 6794 }, { "epoch": 0.15988235294117648, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0092, "step": 6795 }, { "epoch": 0.15990588235294118, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1481, "step": 6796 }, { "epoch": 0.15992941176470588, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1051, "step": 6797 }, { "epoch": 0.15995294117647058, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.064, "step": 6798 }, { "epoch": 0.1599764705882353, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.9464, "step": 6799 }, { "epoch": 0.16, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4323, "step": 6800 }, { "epoch": 0.1600235294117647, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9377, "step": 6801 }, { "epoch": 0.1600470588235294, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1311, "step": 6802 }, { "epoch": 0.16007058823529413, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2904, "step": 6803 }, { "epoch": 0.16009411764705883, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.2149, "step": 6804 }, { "epoch": 0.16011764705882353, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.9334, "step": 6805 }, { "epoch": 0.16014117647058823, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.9301, "step": 6806 }, { "epoch": 0.16016470588235293, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2453, "step": 6807 }, { "epoch": 0.16018823529411766, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.8196, "step": 6808 }, { "epoch": 0.16021176470588236, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1355, "step": 6809 }, { "epoch": 0.16023529411764706, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1044, "step": 6810 }, { "epoch": 0.16025882352941176, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.0229, "step": 6811 }, { "epoch": 0.16028235294117646, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.1028, "step": 6812 }, { "epoch": 0.1603058823529412, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 0.9048, "step": 6813 }, { "epoch": 0.1603294117647059, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.2444, "step": 6814 }, { "epoch": 0.1603529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.3918, "step": 6815 }, { "epoch": 0.1603764705882353, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1473, "step": 6816 }, { "epoch": 0.1604, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2856, "step": 6817 }, { "epoch": 0.16042352941176471, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.215, "step": 6818 }, { "epoch": 0.16044705882352941, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2519, "step": 6819 }, { "epoch": 0.16047058823529411, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.3681, "step": 6820 }, { "epoch": 0.16049411764705881, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0584, "step": 6821 }, { "epoch": 0.16051764705882354, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1016, "step": 6822 }, { "epoch": 0.16054117647058824, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.2026, "step": 6823 }, { "epoch": 0.16056470588235294, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.8452, "step": 6824 }, { "epoch": 0.16058823529411764, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.3715, "step": 6825 }, { "epoch": 0.16061176470588234, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2273, "step": 6826 }, { "epoch": 0.16063529411764707, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.916, "step": 6827 }, { "epoch": 0.16065882352941177, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2699, "step": 6828 }, { "epoch": 0.16068235294117647, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9824, "step": 6829 }, { "epoch": 0.16070588235294117, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.17, "step": 6830 }, { "epoch": 0.16072941176470587, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.799, "step": 6831 }, { "epoch": 0.1607529411764706, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.8836, "step": 6832 }, { "epoch": 0.1607764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1621, "step": 6833 }, { "epoch": 0.1608, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.9403, "step": 6834 }, { "epoch": 0.1608235294117647, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9337, "step": 6835 }, { "epoch": 0.1608470588235294, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1184, "step": 6836 }, { "epoch": 0.16087058823529413, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1259, "step": 6837 }, { "epoch": 0.16089411764705883, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4044, "step": 6838 }, { "epoch": 0.16091764705882353, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.2678, "step": 6839 }, { "epoch": 0.16094117647058823, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.4302, "step": 6840 }, { "epoch": 0.16096470588235295, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3452, "step": 6841 }, { "epoch": 0.16098823529411765, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2041, "step": 6842 }, { "epoch": 0.16101176470588235, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3276, "step": 6843 }, { "epoch": 0.16103529411764705, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.1034, "step": 6844 }, { "epoch": 0.16105882352941175, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3832, "step": 6845 }, { "epoch": 0.16108235294117648, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.4847, "step": 6846 }, { "epoch": 0.16110588235294118, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1838, "step": 6847 }, { "epoch": 0.16112941176470588, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.3695, "step": 6848 }, { "epoch": 0.16115294117647058, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.4533, "step": 6849 }, { "epoch": 0.16117647058823528, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.469, "step": 6850 }, { "epoch": 0.1612, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.0998, "step": 6851 }, { "epoch": 0.1612235294117647, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.274, "step": 6852 }, { "epoch": 0.1612470588235294, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2167, "step": 6853 }, { "epoch": 0.1612705882352941, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2941, "step": 6854 }, { "epoch": 0.1612941176470588, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2702, "step": 6855 }, { "epoch": 0.16131764705882354, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.8625, "step": 6856 }, { "epoch": 0.16134117647058824, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1721, "step": 6857 }, { "epoch": 0.16136470588235294, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1769, "step": 6858 }, { "epoch": 0.16138823529411764, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.0539, "step": 6859 }, { "epoch": 0.16141176470588237, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.3668, "step": 6860 }, { "epoch": 0.16143529411764707, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2048, "step": 6861 }, { "epoch": 0.16145882352941177, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.084, "step": 6862 }, { "epoch": 0.16148235294117647, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.1066, "step": 6863 }, { "epoch": 0.16150588235294117, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3312, "step": 6864 }, { "epoch": 0.1615294117647059, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.9029, "step": 6865 }, { "epoch": 0.1615529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0976, "step": 6866 }, { "epoch": 0.1615764705882353, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.8095, "step": 6867 }, { "epoch": 0.1616, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1983, "step": 6868 }, { "epoch": 0.1616235294117647, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3423, "step": 6869 }, { "epoch": 0.16164705882352942, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.856, "step": 6870 }, { "epoch": 0.16167058823529412, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.0015, "step": 6871 }, { "epoch": 0.16169411764705882, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.9936, "step": 6872 }, { "epoch": 0.16171764705882352, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3656, "step": 6873 }, { "epoch": 0.16174117647058822, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2138, "step": 6874 }, { "epoch": 0.16176470588235295, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1078, "step": 6875 }, { "epoch": 0.16178823529411765, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0561, "step": 6876 }, { "epoch": 0.16181176470588235, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.9899, "step": 6877 }, { "epoch": 0.16183529411764705, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1463, "step": 6878 }, { "epoch": 0.16185882352941178, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.3272, "step": 6879 }, { "epoch": 0.16188235294117648, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.9689, "step": 6880 }, { "epoch": 0.16190588235294118, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.3427, "step": 6881 }, { "epoch": 0.16192941176470588, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.0128, "step": 6882 }, { "epoch": 0.16195294117647058, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9298, "step": 6883 }, { "epoch": 0.1619764705882353, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2573, "step": 6884 }, { "epoch": 0.162, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0072, "step": 6885 }, { "epoch": 0.1620235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1356, "step": 6886 }, { "epoch": 0.1620470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.3614, "step": 6887 }, { "epoch": 0.1620705882352941, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3085, "step": 6888 }, { "epoch": 0.16209411764705883, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.0051, "step": 6889 }, { "epoch": 0.16211764705882353, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4004, "step": 6890 }, { "epoch": 0.16214117647058823, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.922, "step": 6891 }, { "epoch": 0.16216470588235293, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4473, "step": 6892 }, { "epoch": 0.16218823529411766, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1071, "step": 6893 }, { "epoch": 0.16221176470588236, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1813, "step": 6894 }, { "epoch": 0.16223529411764706, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.0949, "step": 6895 }, { "epoch": 0.16225882352941176, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.0145, "step": 6896 }, { "epoch": 0.16228235294117646, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.0347, "step": 6897 }, { "epoch": 0.1623058823529412, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.1305, "step": 6898 }, { "epoch": 0.1623294117647059, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.106, "step": 6899 }, { "epoch": 0.1623529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.068, "step": 6900 }, { "epoch": 0.1623764705882353, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2726, "step": 6901 }, { "epoch": 0.1624, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.0506, "step": 6902 }, { "epoch": 0.16242352941176472, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.0341, "step": 6903 }, { "epoch": 0.16244705882352942, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.547, "step": 6904 }, { "epoch": 0.16247058823529412, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2309, "step": 6905 }, { "epoch": 0.16249411764705882, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.218, "step": 6906 }, { "epoch": 0.16251764705882352, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.083, "step": 6907 }, { "epoch": 0.16254117647058824, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.242, "step": 6908 }, { "epoch": 0.16256470588235294, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.4019, "step": 6909 }, { "epoch": 0.16258823529411764, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.7976, "step": 6910 }, { "epoch": 0.16261176470588234, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3751, "step": 6911 }, { "epoch": 0.16263529411764707, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2847, "step": 6912 }, { "epoch": 0.16265882352941177, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0642, "step": 6913 }, { "epoch": 0.16268235294117647, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0693, "step": 6914 }, { "epoch": 0.16270588235294117, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1743, "step": 6915 }, { "epoch": 0.16272941176470587, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.8942, "step": 6916 }, { "epoch": 0.1627529411764706, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2366, "step": 6917 }, { "epoch": 0.1627764705882353, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1716, "step": 6918 }, { "epoch": 0.1628, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.0319, "step": 6919 }, { "epoch": 0.1628235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3661, "step": 6920 }, { "epoch": 0.1628470588235294, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.4276, "step": 6921 }, { "epoch": 0.16287058823529413, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.843, "step": 6922 }, { "epoch": 0.16289411764705883, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3242, "step": 6923 }, { "epoch": 0.16291764705882353, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.373, "step": 6924 }, { "epoch": 0.16294117647058823, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0557, "step": 6925 }, { "epoch": 0.16296470588235293, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1081, "step": 6926 }, { "epoch": 0.16298823529411766, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.0997, "step": 6927 }, { "epoch": 0.16301176470588236, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.8116, "step": 6928 }, { "epoch": 0.16303529411764706, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.1801, "step": 6929 }, { "epoch": 0.16305882352941176, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.0132, "step": 6930 }, { "epoch": 0.16308235294117648, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.4384, "step": 6931 }, { "epoch": 0.16310588235294118, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.97, "step": 6932 }, { "epoch": 0.16312941176470588, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2952, "step": 6933 }, { "epoch": 0.16315294117647058, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.7362, "step": 6934 }, { "epoch": 0.16317647058823528, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.9906, "step": 6935 }, { "epoch": 0.1632, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2542, "step": 6936 }, { "epoch": 0.1632235294117647, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.3112, "step": 6937 }, { "epoch": 0.1632470588235294, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.2733, "step": 6938 }, { "epoch": 0.1632705882352941, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.7287, "step": 6939 }, { "epoch": 0.1632941176470588, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.1821, "step": 6940 }, { "epoch": 0.16331764705882354, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2496, "step": 6941 }, { "epoch": 0.16334117647058824, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.191, "step": 6942 }, { "epoch": 0.16336470588235294, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.9708, "step": 6943 }, { "epoch": 0.16338823529411764, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1013, "step": 6944 }, { "epoch": 0.16341176470588234, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.9773, "step": 6945 }, { "epoch": 0.16343529411764707, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.1851, "step": 6946 }, { "epoch": 0.16345882352941177, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1387, "step": 6947 }, { "epoch": 0.16348235294117647, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.8762, "step": 6948 }, { "epoch": 0.16350588235294117, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 0.7392, "step": 6949 }, { "epoch": 0.1635294117647059, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1672, "step": 6950 }, { "epoch": 0.1635529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1439, "step": 6951 }, { "epoch": 0.1635764705882353, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2928, "step": 6952 }, { "epoch": 0.1636, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.879, "step": 6953 }, { "epoch": 0.1636235294117647, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.109, "step": 6954 }, { "epoch": 0.16364705882352942, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2787, "step": 6955 }, { "epoch": 0.16367058823529412, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.2388, "step": 6956 }, { "epoch": 0.16369411764705882, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.9249, "step": 6957 }, { "epoch": 0.16371764705882352, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0658, "step": 6958 }, { "epoch": 0.16374117647058822, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.3108, "step": 6959 }, { "epoch": 0.16376470588235295, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.7276, "step": 6960 }, { "epoch": 0.16378823529411765, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.165, "step": 6961 }, { "epoch": 0.16381176470588235, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.367, "step": 6962 }, { "epoch": 0.16383529411764705, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2583, "step": 6963 }, { "epoch": 0.16385882352941175, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1394, "step": 6964 }, { "epoch": 0.16388235294117648, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.3918, "step": 6965 }, { "epoch": 0.16390588235294118, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 0.7416, "step": 6966 }, { "epoch": 0.16392941176470588, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.9227, "step": 6967 }, { "epoch": 0.16395294117647058, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1578, "step": 6968 }, { "epoch": 0.1639764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1839, "step": 6969 }, { "epoch": 0.164, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.9295, "step": 6970 }, { "epoch": 0.1640235294117647, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.259, "step": 6971 }, { "epoch": 0.1640470588235294, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9238, "step": 6972 }, { "epoch": 0.1640705882352941, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1834, "step": 6973 }, { "epoch": 0.16409411764705883, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2019, "step": 6974 }, { "epoch": 0.16411764705882353, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.9897, "step": 6975 }, { "epoch": 0.16414117647058823, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0967, "step": 6976 }, { "epoch": 0.16416470588235293, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1508, "step": 6977 }, { "epoch": 0.16418823529411763, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1353, "step": 6978 }, { "epoch": 0.16421176470588236, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2414, "step": 6979 }, { "epoch": 0.16423529411764706, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0913, "step": 6980 }, { "epoch": 0.16425882352941176, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1969, "step": 6981 }, { "epoch": 0.16428235294117646, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.231, "step": 6982 }, { "epoch": 0.16430588235294116, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.0033, "step": 6983 }, { "epoch": 0.1643294117647059, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.4841, "step": 6984 }, { "epoch": 0.1643529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1022, "step": 6985 }, { "epoch": 0.1643764705882353, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.8902, "step": 6986 }, { "epoch": 0.1644, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.0164, "step": 6987 }, { "epoch": 0.16442352941176472, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2235, "step": 6988 }, { "epoch": 0.16444705882352942, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0272, "step": 6989 }, { "epoch": 0.16447058823529412, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0799, "step": 6990 }, { "epoch": 0.16449411764705882, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.0815, "step": 6991 }, { "epoch": 0.16451764705882352, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2796, "step": 6992 }, { "epoch": 0.16454117647058825, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1723, "step": 6993 }, { "epoch": 0.16456470588235295, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.1146, "step": 6994 }, { "epoch": 0.16458823529411765, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.9575, "step": 6995 }, { "epoch": 0.16461176470588235, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8878, "step": 6996 }, { "epoch": 0.16463529411764705, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1555, "step": 6997 }, { "epoch": 0.16465882352941177, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.8369, "step": 6998 }, { "epoch": 0.16468235294117647, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.8084, "step": 6999 }, { "epoch": 0.16470588235294117, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1206, "step": 7000 }, { "epoch": 0.16470588235294117, "eval_loss": 2.23186993598938, "eval_runtime": 689.5698, "eval_samples_per_second": 12.327, "eval_steps_per_second": 3.082, "step": 7000 }, { "epoch": 0.16472941176470587, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9277, "step": 7001 }, { "epoch": 0.1647529411764706, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2097, "step": 7002 }, { "epoch": 0.1647764705882353, "grad_norm": 0.296875, "learning_rate": 0.02, "loss": 0.719, "step": 7003 }, { "epoch": 0.1648, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.143, "step": 7004 }, { "epoch": 0.1648235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.338, "step": 7005 }, { "epoch": 0.1648470588235294, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.7349, "step": 7006 }, { "epoch": 0.16487058823529413, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1482, "step": 7007 }, { "epoch": 0.16489411764705883, "grad_norm": 0.310546875, "learning_rate": 0.02, "loss": 0.7177, "step": 7008 }, { "epoch": 0.16491764705882353, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.7084, "step": 7009 }, { "epoch": 0.16494117647058823, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1474, "step": 7010 }, { "epoch": 0.16496470588235293, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.8675, "step": 7011 }, { "epoch": 0.16498823529411766, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.169, "step": 7012 }, { "epoch": 0.16501176470588236, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.3363, "step": 7013 }, { "epoch": 0.16503529411764706, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1725, "step": 7014 }, { "epoch": 0.16505882352941176, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.874, "step": 7015 }, { "epoch": 0.16508235294117646, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3682, "step": 7016 }, { "epoch": 0.16510588235294119, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.9103, "step": 7017 }, { "epoch": 0.16512941176470589, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.9262, "step": 7018 }, { "epoch": 0.16515294117647059, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.3153, "step": 7019 }, { "epoch": 0.16517647058823529, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0581, "step": 7020 }, { "epoch": 0.1652, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.9754, "step": 7021 }, { "epoch": 0.1652235294117647, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1515, "step": 7022 }, { "epoch": 0.1652470588235294, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3339, "step": 7023 }, { "epoch": 0.1652705882352941, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.9872, "step": 7024 }, { "epoch": 0.1652941176470588, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.8388, "step": 7025 }, { "epoch": 0.16531764705882354, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.934, "step": 7026 }, { "epoch": 0.16534117647058824, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2253, "step": 7027 }, { "epoch": 0.16536470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1632, "step": 7028 }, { "epoch": 0.16538823529411764, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.7497, "step": 7029 }, { "epoch": 0.16541176470588234, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.378, "step": 7030 }, { "epoch": 0.16543529411764707, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.3533, "step": 7031 }, { "epoch": 0.16545882352941177, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1216, "step": 7032 }, { "epoch": 0.16548235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.4115, "step": 7033 }, { "epoch": 0.16550588235294117, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0526, "step": 7034 }, { "epoch": 0.16552941176470587, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.302, "step": 7035 }, { "epoch": 0.1655529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2736, "step": 7036 }, { "epoch": 0.1655764705882353, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.0391, "step": 7037 }, { "epoch": 0.1656, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.3811, "step": 7038 }, { "epoch": 0.1656235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2945, "step": 7039 }, { "epoch": 0.16564705882352943, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1966, "step": 7040 }, { "epoch": 0.16567058823529413, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1941, "step": 7041 }, { "epoch": 0.16569411764705883, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 0.8376, "step": 7042 }, { "epoch": 0.16571764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2695, "step": 7043 }, { "epoch": 0.16574117647058823, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2844, "step": 7044 }, { "epoch": 0.16576470588235295, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.1262, "step": 7045 }, { "epoch": 0.16578823529411765, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.0431, "step": 7046 }, { "epoch": 0.16581176470588235, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9415, "step": 7047 }, { "epoch": 0.16583529411764705, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.3083, "step": 7048 }, { "epoch": 0.16585882352941175, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.1465, "step": 7049 }, { "epoch": 0.16588235294117648, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.3658, "step": 7050 }, { "epoch": 0.16590588235294118, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 1.0649, "step": 7051 }, { "epoch": 0.16592941176470588, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1304, "step": 7052 }, { "epoch": 0.16595294117647058, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1564, "step": 7053 }, { "epoch": 0.16597647058823528, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2324, "step": 7054 }, { "epoch": 0.166, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0473, "step": 7055 }, { "epoch": 0.1660235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4079, "step": 7056 }, { "epoch": 0.1660470588235294, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.219, "step": 7057 }, { "epoch": 0.1660705882352941, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.1901, "step": 7058 }, { "epoch": 0.16609411764705884, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2337, "step": 7059 }, { "epoch": 0.16611764705882354, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1496, "step": 7060 }, { "epoch": 0.16614117647058824, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.973, "step": 7061 }, { "epoch": 0.16616470588235294, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0444, "step": 7062 }, { "epoch": 0.16618823529411764, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0398, "step": 7063 }, { "epoch": 0.16621176470588236, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.2994, "step": 7064 }, { "epoch": 0.16623529411764706, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3147, "step": 7065 }, { "epoch": 0.16625882352941176, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.08, "step": 7066 }, { "epoch": 0.16628235294117646, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.9083, "step": 7067 }, { "epoch": 0.16630588235294116, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.9336, "step": 7068 }, { "epoch": 0.1663294117647059, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1888, "step": 7069 }, { "epoch": 0.1663529411764706, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1264, "step": 7070 }, { "epoch": 0.1663764705882353, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.2961, "step": 7071 }, { "epoch": 0.1664, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.2211, "step": 7072 }, { "epoch": 0.1664235294117647, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.0713, "step": 7073 }, { "epoch": 0.16644705882352942, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.193, "step": 7074 }, { "epoch": 0.16647058823529412, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.168, "step": 7075 }, { "epoch": 0.16649411764705882, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.4382, "step": 7076 }, { "epoch": 0.16651764705882352, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1312, "step": 7077 }, { "epoch": 0.16654117647058825, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1189, "step": 7078 }, { "epoch": 0.16656470588235295, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1915, "step": 7079 }, { "epoch": 0.16658823529411765, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1734, "step": 7080 }, { "epoch": 0.16661176470588235, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2907, "step": 7081 }, { "epoch": 0.16663529411764705, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3221, "step": 7082 }, { "epoch": 0.16665882352941178, "grad_norm": 0.294921875, "learning_rate": 0.02, "loss": 0.7238, "step": 7083 }, { "epoch": 0.16668235294117648, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.3058, "step": 7084 }, { "epoch": 0.16670588235294118, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0111, "step": 7085 }, { "epoch": 0.16672941176470588, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.0184, "step": 7086 }, { "epoch": 0.16675294117647058, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2124, "step": 7087 }, { "epoch": 0.1667764705882353, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.8875, "step": 7088 }, { "epoch": 0.1668, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2754, "step": 7089 }, { "epoch": 0.1668235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4699, "step": 7090 }, { "epoch": 0.1668470588235294, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.0411, "step": 7091 }, { "epoch": 0.1668705882352941, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.8907, "step": 7092 }, { "epoch": 0.16689411764705883, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1257, "step": 7093 }, { "epoch": 0.16691764705882353, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2271, "step": 7094 }, { "epoch": 0.16694117647058823, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1062, "step": 7095 }, { "epoch": 0.16696470588235293, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.8357, "step": 7096 }, { "epoch": 0.16698823529411766, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2912, "step": 7097 }, { "epoch": 0.16701176470588236, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.9643, "step": 7098 }, { "epoch": 0.16703529411764706, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2351, "step": 7099 }, { "epoch": 0.16705882352941176, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.8487, "step": 7100 }, { "epoch": 0.16708235294117646, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.8015, "step": 7101 }, { "epoch": 0.1671058823529412, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0987, "step": 7102 }, { "epoch": 0.1671294117647059, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1239, "step": 7103 }, { "epoch": 0.1671529411764706, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1558, "step": 7104 }, { "epoch": 0.1671764705882353, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1278, "step": 7105 }, { "epoch": 0.1672, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.8098, "step": 7106 }, { "epoch": 0.16722352941176472, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3765, "step": 7107 }, { "epoch": 0.16724705882352942, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.8643, "step": 7108 }, { "epoch": 0.16727058823529412, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3317, "step": 7109 }, { "epoch": 0.16729411764705882, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3535, "step": 7110 }, { "epoch": 0.16731764705882354, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2154, "step": 7111 }, { "epoch": 0.16734117647058824, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.0366, "step": 7112 }, { "epoch": 0.16736470588235294, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2672, "step": 7113 }, { "epoch": 0.16738823529411764, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0174, "step": 7114 }, { "epoch": 0.16741176470588234, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1141, "step": 7115 }, { "epoch": 0.16743529411764707, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1652, "step": 7116 }, { "epoch": 0.16745882352941177, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 0.9864, "step": 7117 }, { "epoch": 0.16748235294117647, "grad_norm": 0.296875, "learning_rate": 0.02, "loss": 0.9577, "step": 7118 }, { "epoch": 0.16750588235294117, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2531, "step": 7119 }, { "epoch": 0.16752941176470587, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.269, "step": 7120 }, { "epoch": 0.1675529411764706, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.254, "step": 7121 }, { "epoch": 0.1675764705882353, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.4411, "step": 7122 }, { "epoch": 0.1676, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.339, "step": 7123 }, { "epoch": 0.1676235294117647, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.9108, "step": 7124 }, { "epoch": 0.1676470588235294, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1078, "step": 7125 }, { "epoch": 0.16767058823529413, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0037, "step": 7126 }, { "epoch": 0.16769411764705883, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.5537, "step": 7127 }, { "epoch": 0.16771764705882353, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.7684, "step": 7128 }, { "epoch": 0.16774117647058823, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1253, "step": 7129 }, { "epoch": 0.16776470588235295, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.8843, "step": 7130 }, { "epoch": 0.16778823529411765, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1491, "step": 7131 }, { "epoch": 0.16781176470588235, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0734, "step": 7132 }, { "epoch": 0.16783529411764705, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.0921, "step": 7133 }, { "epoch": 0.16785882352941175, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.3641, "step": 7134 }, { "epoch": 0.16788235294117648, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2918, "step": 7135 }, { "epoch": 0.16790588235294118, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0264, "step": 7136 }, { "epoch": 0.16792941176470588, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1372, "step": 7137 }, { "epoch": 0.16795294117647058, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3331, "step": 7138 }, { "epoch": 0.16797647058823528, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3494, "step": 7139 }, { "epoch": 0.168, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1489, "step": 7140 }, { "epoch": 0.1680235294117647, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.0927, "step": 7141 }, { "epoch": 0.1680470588235294, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2105, "step": 7142 }, { "epoch": 0.1680705882352941, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.8473, "step": 7143 }, { "epoch": 0.1680941176470588, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2018, "step": 7144 }, { "epoch": 0.16811764705882354, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.4299, "step": 7145 }, { "epoch": 0.16814117647058824, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.7958, "step": 7146 }, { "epoch": 0.16816470588235294, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.2055, "step": 7147 }, { "epoch": 0.16818823529411764, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.977, "step": 7148 }, { "epoch": 0.16821176470588237, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0899, "step": 7149 }, { "epoch": 0.16823529411764707, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.0589, "step": 7150 }, { "epoch": 0.16825882352941177, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.1225, "step": 7151 }, { "epoch": 0.16828235294117647, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.056, "step": 7152 }, { "epoch": 0.16830588235294117, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.7176, "step": 7153 }, { "epoch": 0.1683294117647059, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2512, "step": 7154 }, { "epoch": 0.1683529411764706, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8349, "step": 7155 }, { "epoch": 0.1683764705882353, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2255, "step": 7156 }, { "epoch": 0.1684, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1244, "step": 7157 }, { "epoch": 0.1684235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.4553, "step": 7158 }, { "epoch": 0.16844705882352942, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.1581, "step": 7159 }, { "epoch": 0.16847058823529412, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2628, "step": 7160 }, { "epoch": 0.16849411764705882, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0461, "step": 7161 }, { "epoch": 0.16851764705882352, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.1047, "step": 7162 }, { "epoch": 0.16854117647058822, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1494, "step": 7163 }, { "epoch": 0.16856470588235295, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1978, "step": 7164 }, { "epoch": 0.16858823529411765, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1866, "step": 7165 }, { "epoch": 0.16861176470588235, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.0419, "step": 7166 }, { "epoch": 0.16863529411764705, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2326, "step": 7167 }, { "epoch": 0.16865882352941178, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3646, "step": 7168 }, { "epoch": 0.16868235294117648, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.308, "step": 7169 }, { "epoch": 0.16870588235294118, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.9944, "step": 7170 }, { "epoch": 0.16872941176470588, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3496, "step": 7171 }, { "epoch": 0.16875294117647058, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2747, "step": 7172 }, { "epoch": 0.1687764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2533, "step": 7173 }, { "epoch": 0.1688, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.0418, "step": 7174 }, { "epoch": 0.1688235294117647, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1893, "step": 7175 }, { "epoch": 0.1688470588235294, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.9453, "step": 7176 }, { "epoch": 0.1688705882352941, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0219, "step": 7177 }, { "epoch": 0.16889411764705883, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.1875, "step": 7178 }, { "epoch": 0.16891764705882353, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2832, "step": 7179 }, { "epoch": 0.16894117647058823, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0758, "step": 7180 }, { "epoch": 0.16896470588235293, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 1.1182, "step": 7181 }, { "epoch": 0.16898823529411763, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.1469, "step": 7182 }, { "epoch": 0.16901176470588236, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2189, "step": 7183 }, { "epoch": 0.16903529411764706, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.305, "step": 7184 }, { "epoch": 0.16905882352941176, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1309, "step": 7185 }, { "epoch": 0.16908235294117646, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.412, "step": 7186 }, { "epoch": 0.1691058823529412, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4415, "step": 7187 }, { "epoch": 0.1691294117647059, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2096, "step": 7188 }, { "epoch": 0.1691529411764706, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.8884, "step": 7189 }, { "epoch": 0.1691764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2037, "step": 7190 }, { "epoch": 0.1692, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.2943, "step": 7191 }, { "epoch": 0.16922352941176472, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0864, "step": 7192 }, { "epoch": 0.16924705882352942, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.4383, "step": 7193 }, { "epoch": 0.16927058823529412, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1815, "step": 7194 }, { "epoch": 0.16929411764705882, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9809, "step": 7195 }, { "epoch": 0.16931764705882352, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2758, "step": 7196 }, { "epoch": 0.16934117647058824, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2229, "step": 7197 }, { "epoch": 0.16936470588235294, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1841, "step": 7198 }, { "epoch": 0.16938823529411765, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.102, "step": 7199 }, { "epoch": 0.16941176470588235, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3943, "step": 7200 }, { "epoch": 0.16943529411764705, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2624, "step": 7201 }, { "epoch": 0.16945882352941177, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.983, "step": 7202 }, { "epoch": 0.16948235294117647, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.1154, "step": 7203 }, { "epoch": 0.16950588235294117, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.4067, "step": 7204 }, { "epoch": 0.16952941176470587, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.4568, "step": 7205 }, { "epoch": 0.1695529411764706, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1097, "step": 7206 }, { "epoch": 0.1695764705882353, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2726, "step": 7207 }, { "epoch": 0.1696, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1857, "step": 7208 }, { "epoch": 0.1696235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1045, "step": 7209 }, { "epoch": 0.1696470588235294, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0562, "step": 7210 }, { "epoch": 0.16967058823529413, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1347, "step": 7211 }, { "epoch": 0.16969411764705883, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.4048, "step": 7212 }, { "epoch": 0.16971764705882353, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.9333, "step": 7213 }, { "epoch": 0.16974117647058823, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.5146, "step": 7214 }, { "epoch": 0.16976470588235293, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.925, "step": 7215 }, { "epoch": 0.16978823529411766, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0815, "step": 7216 }, { "epoch": 0.16981176470588236, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.4096, "step": 7217 }, { "epoch": 0.16983529411764706, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.8819, "step": 7218 }, { "epoch": 0.16985882352941176, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.074, "step": 7219 }, { "epoch": 0.16988235294117648, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.8801, "step": 7220 }, { "epoch": 0.16990588235294118, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2881, "step": 7221 }, { "epoch": 0.16992941176470588, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.481, "step": 7222 }, { "epoch": 0.16995294117647058, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2386, "step": 7223 }, { "epoch": 0.16997647058823528, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1293, "step": 7224 }, { "epoch": 0.17, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.9941, "step": 7225 }, { "epoch": 0.1700235294117647, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.8692, "step": 7226 }, { "epoch": 0.1700470588235294, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.3361, "step": 7227 }, { "epoch": 0.1700705882352941, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.105, "step": 7228 }, { "epoch": 0.1700941176470588, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1134, "step": 7229 }, { "epoch": 0.17011764705882354, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3221, "step": 7230 }, { "epoch": 0.17014117647058824, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1916, "step": 7231 }, { "epoch": 0.17016470588235294, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.9199, "step": 7232 }, { "epoch": 0.17018823529411764, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3432, "step": 7233 }, { "epoch": 0.17021176470588234, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2594, "step": 7234 }, { "epoch": 0.17023529411764707, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2333, "step": 7235 }, { "epoch": 0.17025882352941177, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.3158, "step": 7236 }, { "epoch": 0.17028235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0557, "step": 7237 }, { "epoch": 0.17030588235294117, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.4296, "step": 7238 }, { "epoch": 0.1703294117647059, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0761, "step": 7239 }, { "epoch": 0.1703529411764706, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1552, "step": 7240 }, { "epoch": 0.1703764705882353, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.0258, "step": 7241 }, { "epoch": 0.1704, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.1515, "step": 7242 }, { "epoch": 0.1704235294117647, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1848, "step": 7243 }, { "epoch": 0.17044705882352942, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.0894, "step": 7244 }, { "epoch": 0.17047058823529412, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2562, "step": 7245 }, { "epoch": 0.17049411764705882, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2904, "step": 7246 }, { "epoch": 0.17051764705882352, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2158, "step": 7247 }, { "epoch": 0.17054117647058822, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0399, "step": 7248 }, { "epoch": 0.17056470588235295, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1922, "step": 7249 }, { "epoch": 0.17058823529411765, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.3169, "step": 7250 }, { "epoch": 0.17061176470588235, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2286, "step": 7251 }, { "epoch": 0.17063529411764705, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.3736, "step": 7252 }, { "epoch": 0.17065882352941175, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3218, "step": 7253 }, { "epoch": 0.17068235294117648, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3179, "step": 7254 }, { "epoch": 0.17070588235294118, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.14, "step": 7255 }, { "epoch": 0.17072941176470588, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.8642, "step": 7256 }, { "epoch": 0.17075294117647058, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9128, "step": 7257 }, { "epoch": 0.1707764705882353, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.2184, "step": 7258 }, { "epoch": 0.1708, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.4645, "step": 7259 }, { "epoch": 0.1708235294117647, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.1671, "step": 7260 }, { "epoch": 0.1708470588235294, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.3043, "step": 7261 }, { "epoch": 0.1708705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2002, "step": 7262 }, { "epoch": 0.17089411764705884, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4231, "step": 7263 }, { "epoch": 0.17091764705882354, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4367, "step": 7264 }, { "epoch": 0.17094117647058824, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3872, "step": 7265 }, { "epoch": 0.17096470588235294, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0828, "step": 7266 }, { "epoch": 0.17098823529411764, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1992, "step": 7267 }, { "epoch": 0.17101176470588236, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.154, "step": 7268 }, { "epoch": 0.17103529411764706, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1306, "step": 7269 }, { "epoch": 0.17105882352941176, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1009, "step": 7270 }, { "epoch": 0.17108235294117646, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.8483, "step": 7271 }, { "epoch": 0.17110588235294116, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.164, "step": 7272 }, { "epoch": 0.1711294117647059, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3487, "step": 7273 }, { "epoch": 0.1711529411764706, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0207, "step": 7274 }, { "epoch": 0.1711764705882353, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.446, "step": 7275 }, { "epoch": 0.1712, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2289, "step": 7276 }, { "epoch": 0.17122352941176472, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9321, "step": 7277 }, { "epoch": 0.17124705882352942, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.9423, "step": 7278 }, { "epoch": 0.17127058823529412, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0235, "step": 7279 }, { "epoch": 0.17129411764705882, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1417, "step": 7280 }, { "epoch": 0.17131764705882352, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.192, "step": 7281 }, { "epoch": 0.17134117647058825, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.3115, "step": 7282 }, { "epoch": 0.17136470588235295, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0396, "step": 7283 }, { "epoch": 0.17138823529411765, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1551, "step": 7284 }, { "epoch": 0.17141176470588235, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.8093, "step": 7285 }, { "epoch": 0.17143529411764705, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.0948, "step": 7286 }, { "epoch": 0.17145882352941177, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.9776, "step": 7287 }, { "epoch": 0.17148235294117647, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1379, "step": 7288 }, { "epoch": 0.17150588235294117, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.973, "step": 7289 }, { "epoch": 0.17152941176470587, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2958, "step": 7290 }, { "epoch": 0.17155294117647057, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2182, "step": 7291 }, { "epoch": 0.1715764705882353, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.4351, "step": 7292 }, { "epoch": 0.1716, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2426, "step": 7293 }, { "epoch": 0.1716235294117647, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.1962, "step": 7294 }, { "epoch": 0.1716470588235294, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2089, "step": 7295 }, { "epoch": 0.17167058823529413, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.0395, "step": 7296 }, { "epoch": 0.17169411764705883, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2981, "step": 7297 }, { "epoch": 0.17171764705882353, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2689, "step": 7298 }, { "epoch": 0.17174117647058823, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1204, "step": 7299 }, { "epoch": 0.17176470588235293, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.258, "step": 7300 }, { "epoch": 0.17178823529411766, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0504, "step": 7301 }, { "epoch": 0.17181176470588236, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1107, "step": 7302 }, { "epoch": 0.17183529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2584, "step": 7303 }, { "epoch": 0.17185882352941176, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.1988, "step": 7304 }, { "epoch": 0.17188235294117646, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.0254, "step": 7305 }, { "epoch": 0.1719058823529412, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.011, "step": 7306 }, { "epoch": 0.1719294117647059, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2146, "step": 7307 }, { "epoch": 0.1719529411764706, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3823, "step": 7308 }, { "epoch": 0.1719764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1442, "step": 7309 }, { "epoch": 0.172, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.1827, "step": 7310 }, { "epoch": 0.17202352941176471, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1527, "step": 7311 }, { "epoch": 0.17204705882352941, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1538, "step": 7312 }, { "epoch": 0.17207058823529411, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9701, "step": 7313 }, { "epoch": 0.17209411764705881, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1472, "step": 7314 }, { "epoch": 0.17211764705882354, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1993, "step": 7315 }, { "epoch": 0.17214117647058824, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2187, "step": 7316 }, { "epoch": 0.17216470588235294, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2799, "step": 7317 }, { "epoch": 0.17218823529411764, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.227, "step": 7318 }, { "epoch": 0.17221176470588234, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2952, "step": 7319 }, { "epoch": 0.17223529411764707, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.3056, "step": 7320 }, { "epoch": 0.17225882352941177, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2942, "step": 7321 }, { "epoch": 0.17228235294117647, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 0.8576, "step": 7322 }, { "epoch": 0.17230588235294117, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.3094, "step": 7323 }, { "epoch": 0.17232941176470587, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1085, "step": 7324 }, { "epoch": 0.1723529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4201, "step": 7325 }, { "epoch": 0.1723764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1038, "step": 7326 }, { "epoch": 0.1724, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2898, "step": 7327 }, { "epoch": 0.1724235294117647, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2324, "step": 7328 }, { "epoch": 0.17244705882352943, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1797, "step": 7329 }, { "epoch": 0.17247058823529413, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2222, "step": 7330 }, { "epoch": 0.17249411764705883, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.1101, "step": 7331 }, { "epoch": 0.17251764705882353, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3244, "step": 7332 }, { "epoch": 0.17254117647058823, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2446, "step": 7333 }, { "epoch": 0.17256470588235295, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.8783, "step": 7334 }, { "epoch": 0.17258823529411765, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0056, "step": 7335 }, { "epoch": 0.17261176470588235, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.1484, "step": 7336 }, { "epoch": 0.17263529411764705, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1649, "step": 7337 }, { "epoch": 0.17265882352941175, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.9896, "step": 7338 }, { "epoch": 0.17268235294117648, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2103, "step": 7339 }, { "epoch": 0.17270588235294118, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.8599, "step": 7340 }, { "epoch": 0.17272941176470588, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0786, "step": 7341 }, { "epoch": 0.17275294117647058, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2441, "step": 7342 }, { "epoch": 0.17277647058823528, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.021, "step": 7343 }, { "epoch": 0.1728, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.3129, "step": 7344 }, { "epoch": 0.1728235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2463, "step": 7345 }, { "epoch": 0.1728470588235294, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2933, "step": 7346 }, { "epoch": 0.1728705882352941, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9635, "step": 7347 }, { "epoch": 0.17289411764705884, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 0.6048, "step": 7348 }, { "epoch": 0.17291764705882354, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0303, "step": 7349 }, { "epoch": 0.17294117647058824, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.3796, "step": 7350 }, { "epoch": 0.17296470588235294, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.813, "step": 7351 }, { "epoch": 0.17298823529411764, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.7419, "step": 7352 }, { "epoch": 0.17301176470588236, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.3601, "step": 7353 }, { "epoch": 0.17303529411764706, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1764, "step": 7354 }, { "epoch": 0.17305882352941176, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1161, "step": 7355 }, { "epoch": 0.17308235294117646, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.0272, "step": 7356 }, { "epoch": 0.17310588235294116, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1898, "step": 7357 }, { "epoch": 0.1731294117647059, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0847, "step": 7358 }, { "epoch": 0.1731529411764706, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.9133, "step": 7359 }, { "epoch": 0.1731764705882353, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2832, "step": 7360 }, { "epoch": 0.1732, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1193, "step": 7361 }, { "epoch": 0.1732235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.4155, "step": 7362 }, { "epoch": 0.17324705882352942, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1776, "step": 7363 }, { "epoch": 0.17327058823529412, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.2381, "step": 7364 }, { "epoch": 0.17329411764705882, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9967, "step": 7365 }, { "epoch": 0.17331764705882352, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.0048, "step": 7366 }, { "epoch": 0.17334117647058825, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.06, "step": 7367 }, { "epoch": 0.17336470588235295, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2457, "step": 7368 }, { "epoch": 0.17338823529411765, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.8375, "step": 7369 }, { "epoch": 0.17341176470588235, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1506, "step": 7370 }, { "epoch": 0.17343529411764705, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2735, "step": 7371 }, { "epoch": 0.17345882352941178, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.203, "step": 7372 }, { "epoch": 0.17348235294117648, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.6027, "step": 7373 }, { "epoch": 0.17350588235294118, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.0741, "step": 7374 }, { "epoch": 0.17352941176470588, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.06, "step": 7375 }, { "epoch": 0.17355294117647058, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.5007, "step": 7376 }, { "epoch": 0.1735764705882353, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9858, "step": 7377 }, { "epoch": 0.1736, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3514, "step": 7378 }, { "epoch": 0.1736235294117647, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2075, "step": 7379 }, { "epoch": 0.1736470588235294, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.0632, "step": 7380 }, { "epoch": 0.1736705882352941, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1025, "step": 7381 }, { "epoch": 0.17369411764705883, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.0891, "step": 7382 }, { "epoch": 0.17371764705882353, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.9017, "step": 7383 }, { "epoch": 0.17374117647058823, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.9279, "step": 7384 }, { "epoch": 0.17376470588235293, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.12, "step": 7385 }, { "epoch": 0.17378823529411766, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.1172, "step": 7386 }, { "epoch": 0.17381176470588236, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.4402, "step": 7387 }, { "epoch": 0.17383529411764706, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.179, "step": 7388 }, { "epoch": 0.17385882352941176, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.9127, "step": 7389 }, { "epoch": 0.17388235294117646, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.7546, "step": 7390 }, { "epoch": 0.1739058823529412, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0923, "step": 7391 }, { "epoch": 0.1739294117647059, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2038, "step": 7392 }, { "epoch": 0.1739529411764706, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.9548, "step": 7393 }, { "epoch": 0.1739764705882353, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0082, "step": 7394 }, { "epoch": 0.174, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.0561, "step": 7395 }, { "epoch": 0.17402352941176472, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.5703, "step": 7396 }, { "epoch": 0.17404705882352942, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.046, "step": 7397 }, { "epoch": 0.17407058823529412, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.3511, "step": 7398 }, { "epoch": 0.17409411764705882, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0686, "step": 7399 }, { "epoch": 0.17411764705882352, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0038, "step": 7400 }, { "epoch": 0.17414117647058824, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9739, "step": 7401 }, { "epoch": 0.17416470588235294, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.9474, "step": 7402 }, { "epoch": 0.17418823529411764, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.0201, "step": 7403 }, { "epoch": 0.17421176470588234, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.7672, "step": 7404 }, { "epoch": 0.17423529411764707, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.3086, "step": 7405 }, { "epoch": 0.17425882352941177, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0221, "step": 7406 }, { "epoch": 0.17428235294117647, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2576, "step": 7407 }, { "epoch": 0.17430588235294117, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.8656, "step": 7408 }, { "epoch": 0.17432941176470587, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0751, "step": 7409 }, { "epoch": 0.1743529411764706, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0625, "step": 7410 }, { "epoch": 0.1743764705882353, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.9182, "step": 7411 }, { "epoch": 0.1744, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1973, "step": 7412 }, { "epoch": 0.1744235294117647, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.4026, "step": 7413 }, { "epoch": 0.1744470588235294, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0768, "step": 7414 }, { "epoch": 0.17447058823529413, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2845, "step": 7415 }, { "epoch": 0.17449411764705883, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.357, "step": 7416 }, { "epoch": 0.17451764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0341, "step": 7417 }, { "epoch": 0.17454117647058823, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.3051, "step": 7418 }, { "epoch": 0.17456470588235293, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2908, "step": 7419 }, { "epoch": 0.17458823529411766, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0973, "step": 7420 }, { "epoch": 0.17461176470588236, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1564, "step": 7421 }, { "epoch": 0.17463529411764706, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9247, "step": 7422 }, { "epoch": 0.17465882352941176, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.8758, "step": 7423 }, { "epoch": 0.17468235294117648, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2131, "step": 7424 }, { "epoch": 0.17470588235294118, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2188, "step": 7425 }, { "epoch": 0.17472941176470588, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9448, "step": 7426 }, { "epoch": 0.17475294117647058, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.0064, "step": 7427 }, { "epoch": 0.17477647058823528, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3311, "step": 7428 }, { "epoch": 0.1748, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2602, "step": 7429 }, { "epoch": 0.1748235294117647, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3019, "step": 7430 }, { "epoch": 0.1748470588235294, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0185, "step": 7431 }, { "epoch": 0.1748705882352941, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2818, "step": 7432 }, { "epoch": 0.1748941176470588, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0606, "step": 7433 }, { "epoch": 0.17491764705882354, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1921, "step": 7434 }, { "epoch": 0.17494117647058824, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1099, "step": 7435 }, { "epoch": 0.17496470588235294, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.1435, "step": 7436 }, { "epoch": 0.17498823529411764, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0557, "step": 7437 }, { "epoch": 0.17501176470588237, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.007, "step": 7438 }, { "epoch": 0.17503529411764707, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 0.8944, "step": 7439 }, { "epoch": 0.17505882352941177, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.4559, "step": 7440 }, { "epoch": 0.17508235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.4576, "step": 7441 }, { "epoch": 0.17510588235294117, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1404, "step": 7442 }, { "epoch": 0.1751294117647059, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.4808, "step": 7443 }, { "epoch": 0.1751529411764706, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.23, "step": 7444 }, { "epoch": 0.1751764705882353, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9948, "step": 7445 }, { "epoch": 0.1752, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.784, "step": 7446 }, { "epoch": 0.1752235294117647, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.9295, "step": 7447 }, { "epoch": 0.17524705882352942, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 1.1357, "step": 7448 }, { "epoch": 0.17527058823529412, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2872, "step": 7449 }, { "epoch": 0.17529411764705882, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.4127, "step": 7450 }, { "epoch": 0.17531764705882352, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.0031, "step": 7451 }, { "epoch": 0.17534117647058822, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.2003, "step": 7452 }, { "epoch": 0.17536470588235295, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2244, "step": 7453 }, { "epoch": 0.17538823529411765, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.179, "step": 7454 }, { "epoch": 0.17541176470588235, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0721, "step": 7455 }, { "epoch": 0.17543529411764705, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.8452, "step": 7456 }, { "epoch": 0.17545882352941178, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.309, "step": 7457 }, { "epoch": 0.17548235294117648, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.134, "step": 7458 }, { "epoch": 0.17550588235294118, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9683, "step": 7459 }, { "epoch": 0.17552941176470588, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0041, "step": 7460 }, { "epoch": 0.17555294117647058, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1653, "step": 7461 }, { "epoch": 0.1755764705882353, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2986, "step": 7462 }, { "epoch": 0.1756, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0316, "step": 7463 }, { "epoch": 0.1756235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3852, "step": 7464 }, { "epoch": 0.1756470588235294, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.962, "step": 7465 }, { "epoch": 0.1756705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3607, "step": 7466 }, { "epoch": 0.17569411764705883, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5702, "step": 7467 }, { "epoch": 0.17571764705882353, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.8315, "step": 7468 }, { "epoch": 0.17574117647058823, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0193, "step": 7469 }, { "epoch": 0.17576470588235293, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.8239, "step": 7470 }, { "epoch": 0.17578823529411763, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.8216, "step": 7471 }, { "epoch": 0.17581176470588236, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2868, "step": 7472 }, { "epoch": 0.17583529411764706, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.8589, "step": 7473 }, { "epoch": 0.17585882352941176, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.0057, "step": 7474 }, { "epoch": 0.17588235294117646, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.9377, "step": 7475 }, { "epoch": 0.1759058823529412, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.4713, "step": 7476 }, { "epoch": 0.1759294117647059, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.3797, "step": 7477 }, { "epoch": 0.1759529411764706, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1666, "step": 7478 }, { "epoch": 0.1759764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1452, "step": 7479 }, { "epoch": 0.176, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.347, "step": 7480 }, { "epoch": 0.17602352941176472, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2497, "step": 7481 }, { "epoch": 0.17604705882352942, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9117, "step": 7482 }, { "epoch": 0.17607058823529412, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.7489, "step": 7483 }, { "epoch": 0.17609411764705882, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2276, "step": 7484 }, { "epoch": 0.17611764705882352, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2726, "step": 7485 }, { "epoch": 0.17614117647058825, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1904, "step": 7486 }, { "epoch": 0.17616470588235295, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.159, "step": 7487 }, { "epoch": 0.17618823529411765, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.0785, "step": 7488 }, { "epoch": 0.17621176470588235, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.203, "step": 7489 }, { "epoch": 0.17623529411764705, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2689, "step": 7490 }, { "epoch": 0.17625882352941177, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3363, "step": 7491 }, { "epoch": 0.17628235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9754, "step": 7492 }, { "epoch": 0.17630588235294117, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2838, "step": 7493 }, { "epoch": 0.17632941176470587, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2438, "step": 7494 }, { "epoch": 0.1763529411764706, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.0824, "step": 7495 }, { "epoch": 0.1763764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.931, "step": 7496 }, { "epoch": 0.1764, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.0551, "step": 7497 }, { "epoch": 0.1764235294117647, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0716, "step": 7498 }, { "epoch": 0.1764470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3131, "step": 7499 }, { "epoch": 0.17647058823529413, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.0804, "step": 7500 }, { "epoch": 0.17649411764705883, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.8271, "step": 7501 }, { "epoch": 0.17651764705882353, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1481, "step": 7502 }, { "epoch": 0.17654117647058823, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1654, "step": 7503 }, { "epoch": 0.17656470588235293, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9137, "step": 7504 }, { "epoch": 0.17658823529411766, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1325, "step": 7505 }, { "epoch": 0.17661176470588236, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0395, "step": 7506 }, { "epoch": 0.17663529411764706, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3163, "step": 7507 }, { "epoch": 0.17665882352941176, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.934, "step": 7508 }, { "epoch": 0.17668235294117646, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2995, "step": 7509 }, { "epoch": 0.17670588235294118, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.4271, "step": 7510 }, { "epoch": 0.17672941176470588, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9484, "step": 7511 }, { "epoch": 0.17675294117647058, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2875, "step": 7512 }, { "epoch": 0.17677647058823528, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9892, "step": 7513 }, { "epoch": 0.1768, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.9336, "step": 7514 }, { "epoch": 0.1768235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.3705, "step": 7515 }, { "epoch": 0.1768470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.4165, "step": 7516 }, { "epoch": 0.1768705882352941, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.9548, "step": 7517 }, { "epoch": 0.1768941176470588, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.3761, "step": 7518 }, { "epoch": 0.17691764705882354, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2201, "step": 7519 }, { "epoch": 0.17694117647058824, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1712, "step": 7520 }, { "epoch": 0.17696470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.3057, "step": 7521 }, { "epoch": 0.17698823529411764, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.9048, "step": 7522 }, { "epoch": 0.17701176470588234, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1838, "step": 7523 }, { "epoch": 0.17703529411764707, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.2443, "step": 7524 }, { "epoch": 0.17705882352941177, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1183, "step": 7525 }, { "epoch": 0.17708235294117647, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.9555, "step": 7526 }, { "epoch": 0.17710588235294117, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.3082, "step": 7527 }, { "epoch": 0.17712941176470587, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1365, "step": 7528 }, { "epoch": 0.1771529411764706, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 0.916, "step": 7529 }, { "epoch": 0.1771764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2529, "step": 7530 }, { "epoch": 0.1772, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1958, "step": 7531 }, { "epoch": 0.1772235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.233, "step": 7532 }, { "epoch": 0.17724705882352942, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2959, "step": 7533 }, { "epoch": 0.17727058823529412, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1141, "step": 7534 }, { "epoch": 0.17729411764705882, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1732, "step": 7535 }, { "epoch": 0.17731764705882352, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1916, "step": 7536 }, { "epoch": 0.17734117647058822, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2336, "step": 7537 }, { "epoch": 0.17736470588235295, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2094, "step": 7538 }, { "epoch": 0.17738823529411765, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0088, "step": 7539 }, { "epoch": 0.17741176470588235, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.8424, "step": 7540 }, { "epoch": 0.17743529411764705, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0556, "step": 7541 }, { "epoch": 0.17745882352941175, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 0.7421, "step": 7542 }, { "epoch": 0.17748235294117648, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1784, "step": 7543 }, { "epoch": 0.17750588235294118, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.8953, "step": 7544 }, { "epoch": 0.17752941176470588, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.9518, "step": 7545 }, { "epoch": 0.17755294117647058, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.0565, "step": 7546 }, { "epoch": 0.1775764705882353, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.332, "step": 7547 }, { "epoch": 0.1776, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.2021, "step": 7548 }, { "epoch": 0.1776235294117647, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.9391, "step": 7549 }, { "epoch": 0.1776470588235294, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1974, "step": 7550 }, { "epoch": 0.1776705882352941, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1083, "step": 7551 }, { "epoch": 0.17769411764705884, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.9184, "step": 7552 }, { "epoch": 0.17771764705882354, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.4704, "step": 7553 }, { "epoch": 0.17774117647058824, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.4583, "step": 7554 }, { "epoch": 0.17776470588235294, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0345, "step": 7555 }, { "epoch": 0.17778823529411764, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.9682, "step": 7556 }, { "epoch": 0.17781176470588236, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.4511, "step": 7557 }, { "epoch": 0.17783529411764706, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1758, "step": 7558 }, { "epoch": 0.17785882352941176, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2462, "step": 7559 }, { "epoch": 0.17788235294117646, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1419, "step": 7560 }, { "epoch": 0.17790588235294116, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0345, "step": 7561 }, { "epoch": 0.1779294117647059, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1856, "step": 7562 }, { "epoch": 0.1779529411764706, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2402, "step": 7563 }, { "epoch": 0.1779764705882353, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.1123, "step": 7564 }, { "epoch": 0.178, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2159, "step": 7565 }, { "epoch": 0.17802352941176472, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2704, "step": 7566 }, { "epoch": 0.17804705882352942, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2491, "step": 7567 }, { "epoch": 0.17807058823529412, "grad_norm": 0.3984375, "learning_rate": 0.02, "loss": 1.4345, "step": 7568 }, { "epoch": 0.17809411764705882, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2531, "step": 7569 }, { "epoch": 0.17811764705882352, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.7926, "step": 7570 }, { "epoch": 0.17814117647058825, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0148, "step": 7571 }, { "epoch": 0.17816470588235295, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.0292, "step": 7572 }, { "epoch": 0.17818823529411765, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.3507, "step": 7573 }, { "epoch": 0.17821176470588235, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9701, "step": 7574 }, { "epoch": 0.17823529411764705, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1518, "step": 7575 }, { "epoch": 0.17825882352941178, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2578, "step": 7576 }, { "epoch": 0.17828235294117648, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.2884, "step": 7577 }, { "epoch": 0.17830588235294118, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9148, "step": 7578 }, { "epoch": 0.17832941176470588, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1754, "step": 7579 }, { "epoch": 0.17835294117647058, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0135, "step": 7580 }, { "epoch": 0.1783764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2693, "step": 7581 }, { "epoch": 0.1784, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2699, "step": 7582 }, { "epoch": 0.1784235294117647, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.9633, "step": 7583 }, { "epoch": 0.1784470588235294, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1266, "step": 7584 }, { "epoch": 0.17847058823529413, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0757, "step": 7585 }, { "epoch": 0.17849411764705883, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.129, "step": 7586 }, { "epoch": 0.17851764705882353, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0297, "step": 7587 }, { "epoch": 0.17854117647058823, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.91, "step": 7588 }, { "epoch": 0.17856470588235293, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.168, "step": 7589 }, { "epoch": 0.17858823529411766, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.9293, "step": 7590 }, { "epoch": 0.17861176470588236, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.2401, "step": 7591 }, { "epoch": 0.17863529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1601, "step": 7592 }, { "epoch": 0.17865882352941176, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0682, "step": 7593 }, { "epoch": 0.17868235294117646, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.1258, "step": 7594 }, { "epoch": 0.1787058823529412, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4466, "step": 7595 }, { "epoch": 0.1787294117647059, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2431, "step": 7596 }, { "epoch": 0.1787529411764706, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0777, "step": 7597 }, { "epoch": 0.1787764705882353, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9297, "step": 7598 }, { "epoch": 0.1788, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.0187, "step": 7599 }, { "epoch": 0.17882352941176471, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1345, "step": 7600 }, { "epoch": 0.17884705882352941, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.3087, "step": 7601 }, { "epoch": 0.17887058823529411, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2774, "step": 7602 }, { "epoch": 0.17889411764705881, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2552, "step": 7603 }, { "epoch": 0.17891764705882354, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2249, "step": 7604 }, { "epoch": 0.17894117647058824, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.9524, "step": 7605 }, { "epoch": 0.17896470588235294, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 0.9464, "step": 7606 }, { "epoch": 0.17898823529411764, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1037, "step": 7607 }, { "epoch": 0.17901176470588234, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 1.003, "step": 7608 }, { "epoch": 0.17903529411764707, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2255, "step": 7609 }, { "epoch": 0.17905882352941177, "grad_norm": 0.2890625, "learning_rate": 0.02, "loss": 0.8339, "step": 7610 }, { "epoch": 0.17908235294117647, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2992, "step": 7611 }, { "epoch": 0.17910588235294117, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0538, "step": 7612 }, { "epoch": 0.17912941176470587, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0796, "step": 7613 }, { "epoch": 0.1791529411764706, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.9659, "step": 7614 }, { "epoch": 0.1791764705882353, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.0834, "step": 7615 }, { "epoch": 0.1792, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.9294, "step": 7616 }, { "epoch": 0.1792235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1115, "step": 7617 }, { "epoch": 0.1792470588235294, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.0272, "step": 7618 }, { "epoch": 0.17927058823529413, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.223, "step": 7619 }, { "epoch": 0.17929411764705883, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1095, "step": 7620 }, { "epoch": 0.17931764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3545, "step": 7621 }, { "epoch": 0.17934117647058823, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3973, "step": 7622 }, { "epoch": 0.17936470588235295, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.163, "step": 7623 }, { "epoch": 0.17938823529411765, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.235, "step": 7624 }, { "epoch": 0.17941176470588235, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0815, "step": 7625 }, { "epoch": 0.17943529411764705, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.5665, "step": 7626 }, { "epoch": 0.17945882352941175, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3757, "step": 7627 }, { "epoch": 0.17948235294117648, "grad_norm": 0.27734375, "learning_rate": 0.02, "loss": 0.9, "step": 7628 }, { "epoch": 0.17950588235294118, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2628, "step": 7629 }, { "epoch": 0.17952941176470588, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.094, "step": 7630 }, { "epoch": 0.17955294117647058, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2668, "step": 7631 }, { "epoch": 0.17957647058823528, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.7604, "step": 7632 }, { "epoch": 0.1796, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1627, "step": 7633 }, { "epoch": 0.1796235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.283, "step": 7634 }, { "epoch": 0.1796470588235294, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2438, "step": 7635 }, { "epoch": 0.1796705882352941, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9677, "step": 7636 }, { "epoch": 0.1796941176470588, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2182, "step": 7637 }, { "epoch": 0.17971764705882354, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.9102, "step": 7638 }, { "epoch": 0.17974117647058824, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2189, "step": 7639 }, { "epoch": 0.17976470588235294, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3009, "step": 7640 }, { "epoch": 0.17978823529411764, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.0676, "step": 7641 }, { "epoch": 0.17981176470588237, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1564, "step": 7642 }, { "epoch": 0.17983529411764707, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.2043, "step": 7643 }, { "epoch": 0.17985882352941177, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0503, "step": 7644 }, { "epoch": 0.17988235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2235, "step": 7645 }, { "epoch": 0.17990588235294117, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0333, "step": 7646 }, { "epoch": 0.1799294117647059, "grad_norm": 0.28515625, "learning_rate": 0.02, "loss": 0.6261, "step": 7647 }, { "epoch": 0.1799529411764706, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.0508, "step": 7648 }, { "epoch": 0.1799764705882353, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.8493, "step": 7649 }, { "epoch": 0.18, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1014, "step": 7650 }, { "epoch": 0.1800235294117647, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0625, "step": 7651 }, { "epoch": 0.18004705882352942, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.7912, "step": 7652 }, { "epoch": 0.18007058823529412, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0721, "step": 7653 }, { "epoch": 0.18009411764705882, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0504, "step": 7654 }, { "epoch": 0.18011764705882352, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1085, "step": 7655 }, { "epoch": 0.18014117647058825, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1726, "step": 7656 }, { "epoch": 0.18016470588235295, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3185, "step": 7657 }, { "epoch": 0.18018823529411765, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0527, "step": 7658 }, { "epoch": 0.18021176470588235, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.142, "step": 7659 }, { "epoch": 0.18023529411764705, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2355, "step": 7660 }, { "epoch": 0.18025882352941178, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.8339, "step": 7661 }, { "epoch": 0.18028235294117648, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2018, "step": 7662 }, { "epoch": 0.18030588235294118, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0802, "step": 7663 }, { "epoch": 0.18032941176470588, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.8887, "step": 7664 }, { "epoch": 0.18035294117647058, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9913, "step": 7665 }, { "epoch": 0.1803764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1396, "step": 7666 }, { "epoch": 0.1804, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2329, "step": 7667 }, { "epoch": 0.1804235294117647, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0076, "step": 7668 }, { "epoch": 0.1804470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2688, "step": 7669 }, { "epoch": 0.1804705882352941, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1083, "step": 7670 }, { "epoch": 0.18049411764705883, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3419, "step": 7671 }, { "epoch": 0.18051764705882353, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1175, "step": 7672 }, { "epoch": 0.18054117647058823, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0072, "step": 7673 }, { "epoch": 0.18056470588235293, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.3413, "step": 7674 }, { "epoch": 0.18058823529411766, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.1827, "step": 7675 }, { "epoch": 0.18061176470588236, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.005, "step": 7676 }, { "epoch": 0.18063529411764706, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 1.0051, "step": 7677 }, { "epoch": 0.18065882352941176, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.8523, "step": 7678 }, { "epoch": 0.18068235294117646, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2206, "step": 7679 }, { "epoch": 0.1807058823529412, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.9366, "step": 7680 }, { "epoch": 0.1807294117647059, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.0909, "step": 7681 }, { "epoch": 0.1807529411764706, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.8955, "step": 7682 }, { "epoch": 0.1807764705882353, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1786, "step": 7683 }, { "epoch": 0.1808, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.8438, "step": 7684 }, { "epoch": 0.18082352941176472, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0545, "step": 7685 }, { "epoch": 0.18084705882352942, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9157, "step": 7686 }, { "epoch": 0.18087058823529412, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2286, "step": 7687 }, { "epoch": 0.18089411764705882, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.7661, "step": 7688 }, { "epoch": 0.18091764705882352, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1256, "step": 7689 }, { "epoch": 0.18094117647058824, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.3029, "step": 7690 }, { "epoch": 0.18096470588235294, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9663, "step": 7691 }, { "epoch": 0.18098823529411764, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.288, "step": 7692 }, { "epoch": 0.18101176470588234, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.9799, "step": 7693 }, { "epoch": 0.18103529411764707, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0887, "step": 7694 }, { "epoch": 0.18105882352941177, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2539, "step": 7695 }, { "epoch": 0.18108235294117647, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.0652, "step": 7696 }, { "epoch": 0.18110588235294117, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3497, "step": 7697 }, { "epoch": 0.18112941176470587, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2578, "step": 7698 }, { "epoch": 0.1811529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.335, "step": 7699 }, { "epoch": 0.1811764705882353, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.1144, "step": 7700 }, { "epoch": 0.1812, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1957, "step": 7701 }, { "epoch": 0.1812235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1545, "step": 7702 }, { "epoch": 0.1812470588235294, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1893, "step": 7703 }, { "epoch": 0.18127058823529413, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.8587, "step": 7704 }, { "epoch": 0.18129411764705883, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.3059, "step": 7705 }, { "epoch": 0.18131764705882353, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.25, "step": 7706 }, { "epoch": 0.18134117647058823, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.8441, "step": 7707 }, { "epoch": 0.18136470588235293, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.9792, "step": 7708 }, { "epoch": 0.18138823529411766, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0817, "step": 7709 }, { "epoch": 0.18141176470588236, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2328, "step": 7710 }, { "epoch": 0.18143529411764706, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.3538, "step": 7711 }, { "epoch": 0.18145882352941176, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.216, "step": 7712 }, { "epoch": 0.18148235294117648, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.3917, "step": 7713 }, { "epoch": 0.18150588235294118, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1715, "step": 7714 }, { "epoch": 0.18152941176470588, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0426, "step": 7715 }, { "epoch": 0.18155294117647058, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.8303, "step": 7716 }, { "epoch": 0.18157647058823528, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0972, "step": 7717 }, { "epoch": 0.1816, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.9383, "step": 7718 }, { "epoch": 0.1816235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2059, "step": 7719 }, { "epoch": 0.1816470588235294, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0905, "step": 7720 }, { "epoch": 0.1816705882352941, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.9646, "step": 7721 }, { "epoch": 0.1816941176470588, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0945, "step": 7722 }, { "epoch": 0.18171764705882354, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9786, "step": 7723 }, { "epoch": 0.18174117647058824, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2525, "step": 7724 }, { "epoch": 0.18176470588235294, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.0223, "step": 7725 }, { "epoch": 0.18178823529411764, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2811, "step": 7726 }, { "epoch": 0.18181176470588234, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.177, "step": 7727 }, { "epoch": 0.18183529411764707, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0491, "step": 7728 }, { "epoch": 0.18185882352941177, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3752, "step": 7729 }, { "epoch": 0.18188235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2543, "step": 7730 }, { "epoch": 0.18190588235294117, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0131, "step": 7731 }, { "epoch": 0.1819294117647059, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2727, "step": 7732 }, { "epoch": 0.1819529411764706, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2109, "step": 7733 }, { "epoch": 0.1819764705882353, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.1834, "step": 7734 }, { "epoch": 0.182, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4288, "step": 7735 }, { "epoch": 0.1820235294117647, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2972, "step": 7736 }, { "epoch": 0.18204705882352942, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.311, "step": 7737 }, { "epoch": 0.18207058823529412, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 0.8686, "step": 7738 }, { "epoch": 0.18209411764705882, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2979, "step": 7739 }, { "epoch": 0.18211764705882352, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0604, "step": 7740 }, { "epoch": 0.18214117647058822, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9891, "step": 7741 }, { "epoch": 0.18216470588235295, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.4422, "step": 7742 }, { "epoch": 0.18218823529411765, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.7166, "step": 7743 }, { "epoch": 0.18221176470588235, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 0.8445, "step": 7744 }, { "epoch": 0.18223529411764705, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.988, "step": 7745 }, { "epoch": 0.18225882352941175, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.0987, "step": 7746 }, { "epoch": 0.18228235294117648, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.3583, "step": 7747 }, { "epoch": 0.18230588235294118, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2366, "step": 7748 }, { "epoch": 0.18232941176470588, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.3922, "step": 7749 }, { "epoch": 0.18235294117647058, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2731, "step": 7750 }, { "epoch": 0.1823764705882353, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.4536, "step": 7751 }, { "epoch": 0.1824, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0253, "step": 7752 }, { "epoch": 0.1824235294117647, "grad_norm": 0.296875, "learning_rate": 0.02, "loss": 0.6429, "step": 7753 }, { "epoch": 0.1824470588235294, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.2438, "step": 7754 }, { "epoch": 0.1824705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.3374, "step": 7755 }, { "epoch": 0.18249411764705883, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0682, "step": 7756 }, { "epoch": 0.18251764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.111, "step": 7757 }, { "epoch": 0.18254117647058823, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.9966, "step": 7758 }, { "epoch": 0.18256470588235293, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.3275, "step": 7759 }, { "epoch": 0.18258823529411763, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.2337, "step": 7760 }, { "epoch": 0.18261176470588236, "grad_norm": 0.310546875, "learning_rate": 0.02, "loss": 0.6701, "step": 7761 }, { "epoch": 0.18263529411764706, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.9786, "step": 7762 }, { "epoch": 0.18265882352941176, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2292, "step": 7763 }, { "epoch": 0.18268235294117646, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2634, "step": 7764 }, { "epoch": 0.1827058823529412, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0114, "step": 7765 }, { "epoch": 0.1827294117647059, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9907, "step": 7766 }, { "epoch": 0.1827529411764706, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2009, "step": 7767 }, { "epoch": 0.1827764705882353, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2067, "step": 7768 }, { "epoch": 0.1828, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.3838, "step": 7769 }, { "epoch": 0.18282352941176472, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.3671, "step": 7770 }, { "epoch": 0.18284705882352942, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2585, "step": 7771 }, { "epoch": 0.18287058823529412, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2154, "step": 7772 }, { "epoch": 0.18289411764705882, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.1144, "step": 7773 }, { "epoch": 0.18291764705882352, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1461, "step": 7774 }, { "epoch": 0.18294117647058825, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.9473, "step": 7775 }, { "epoch": 0.18296470588235295, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3001, "step": 7776 }, { "epoch": 0.18298823529411765, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1804, "step": 7777 }, { "epoch": 0.18301176470588235, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.0077, "step": 7778 }, { "epoch": 0.18303529411764705, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.7563, "step": 7779 }, { "epoch": 0.18305882352941177, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.0146, "step": 7780 }, { "epoch": 0.18308235294117647, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0767, "step": 7781 }, { "epoch": 0.18310588235294117, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 1.0168, "step": 7782 }, { "epoch": 0.18312941176470587, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0065, "step": 7783 }, { "epoch": 0.1831529411764706, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0829, "step": 7784 }, { "epoch": 0.1831764705882353, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.9874, "step": 7785 }, { "epoch": 0.1832, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0839, "step": 7786 }, { "epoch": 0.1832235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1589, "step": 7787 }, { "epoch": 0.1832470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.3139, "step": 7788 }, { "epoch": 0.18327058823529413, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.7273, "step": 7789 }, { "epoch": 0.18329411764705883, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.0367, "step": 7790 }, { "epoch": 0.18331764705882353, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.3332, "step": 7791 }, { "epoch": 0.18334117647058823, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.114, "step": 7792 }, { "epoch": 0.18336470588235293, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0531, "step": 7793 }, { "epoch": 0.18338823529411766, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1379, "step": 7794 }, { "epoch": 0.18341176470588236, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.311, "step": 7795 }, { "epoch": 0.18343529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3922, "step": 7796 }, { "epoch": 0.18345882352941176, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.141, "step": 7797 }, { "epoch": 0.18348235294117646, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.9049, "step": 7798 }, { "epoch": 0.18350588235294119, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2187, "step": 7799 }, { "epoch": 0.18352941176470589, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2022, "step": 7800 }, { "epoch": 0.18355294117647059, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2497, "step": 7801 }, { "epoch": 0.18357647058823529, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.9281, "step": 7802 }, { "epoch": 0.1836, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0983, "step": 7803 }, { "epoch": 0.1836235294117647, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.0347, "step": 7804 }, { "epoch": 0.1836470588235294, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3129, "step": 7805 }, { "epoch": 0.1836705882352941, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.982, "step": 7806 }, { "epoch": 0.1836941176470588, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1861, "step": 7807 }, { "epoch": 0.18371764705882354, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1695, "step": 7808 }, { "epoch": 0.18374117647058824, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.089, "step": 7809 }, { "epoch": 0.18376470588235294, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2376, "step": 7810 }, { "epoch": 0.18378823529411764, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1459, "step": 7811 }, { "epoch": 0.18381176470588234, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2227, "step": 7812 }, { "epoch": 0.18383529411764707, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.0208, "step": 7813 }, { "epoch": 0.18385882352941177, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1822, "step": 7814 }, { "epoch": 0.18388235294117647, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2044, "step": 7815 }, { "epoch": 0.18390588235294117, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1993, "step": 7816 }, { "epoch": 0.18392941176470587, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.6076, "step": 7817 }, { "epoch": 0.1839529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2801, "step": 7818 }, { "epoch": 0.1839764705882353, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2065, "step": 7819 }, { "epoch": 0.184, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.0316, "step": 7820 }, { "epoch": 0.1840235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.3641, "step": 7821 }, { "epoch": 0.18404705882352942, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1456, "step": 7822 }, { "epoch": 0.18407058823529412, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1761, "step": 7823 }, { "epoch": 0.18409411764705882, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.3959, "step": 7824 }, { "epoch": 0.18411764705882352, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1507, "step": 7825 }, { "epoch": 0.18414117647058822, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.0214, "step": 7826 }, { "epoch": 0.18416470588235295, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1518, "step": 7827 }, { "epoch": 0.18418823529411765, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3701, "step": 7828 }, { "epoch": 0.18421176470588235, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8368, "step": 7829 }, { "epoch": 0.18423529411764705, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.7909, "step": 7830 }, { "epoch": 0.18425882352941175, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0279, "step": 7831 }, { "epoch": 0.18428235294117648, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 1.2286, "step": 7832 }, { "epoch": 0.18430588235294118, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2338, "step": 7833 }, { "epoch": 0.18432941176470588, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2439, "step": 7834 }, { "epoch": 0.18435294117647058, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.9388, "step": 7835 }, { "epoch": 0.18437647058823528, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.989, "step": 7836 }, { "epoch": 0.1844, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.4619, "step": 7837 }, { "epoch": 0.1844235294117647, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1724, "step": 7838 }, { "epoch": 0.1844470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1984, "step": 7839 }, { "epoch": 0.1844705882352941, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.8647, "step": 7840 }, { "epoch": 0.18449411764705884, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2102, "step": 7841 }, { "epoch": 0.18451764705882354, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2823, "step": 7842 }, { "epoch": 0.18454117647058824, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0657, "step": 7843 }, { "epoch": 0.18456470588235294, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.0179, "step": 7844 }, { "epoch": 0.18458823529411764, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3597, "step": 7845 }, { "epoch": 0.18461176470588236, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0419, "step": 7846 }, { "epoch": 0.18463529411764706, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.9274, "step": 7847 }, { "epoch": 0.18465882352941176, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0106, "step": 7848 }, { "epoch": 0.18468235294117646, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.9949, "step": 7849 }, { "epoch": 0.18470588235294116, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2592, "step": 7850 }, { "epoch": 0.1847294117647059, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.0254, "step": 7851 }, { "epoch": 0.1847529411764706, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.333, "step": 7852 }, { "epoch": 0.1847764705882353, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1144, "step": 7853 }, { "epoch": 0.1848, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.9995, "step": 7854 }, { "epoch": 0.1848235294117647, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2508, "step": 7855 }, { "epoch": 0.18484705882352942, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.7847, "step": 7856 }, { "epoch": 0.18487058823529412, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1032, "step": 7857 }, { "epoch": 0.18489411764705882, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.896, "step": 7858 }, { "epoch": 0.18491764705882352, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1308, "step": 7859 }, { "epoch": 0.18494117647058825, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1167, "step": 7860 }, { "epoch": 0.18496470588235295, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.9318, "step": 7861 }, { "epoch": 0.18498823529411765, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2778, "step": 7862 }, { "epoch": 0.18501176470588235, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.163, "step": 7863 }, { "epoch": 0.18503529411764705, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.0728, "step": 7864 }, { "epoch": 0.18505882352941178, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1116, "step": 7865 }, { "epoch": 0.18508235294117648, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.8735, "step": 7866 }, { "epoch": 0.18510588235294118, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3753, "step": 7867 }, { "epoch": 0.18512941176470588, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.3535, "step": 7868 }, { "epoch": 0.18515294117647058, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1626, "step": 7869 }, { "epoch": 0.1851764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.169, "step": 7870 }, { "epoch": 0.1852, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1625, "step": 7871 }, { "epoch": 0.1852235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9511, "step": 7872 }, { "epoch": 0.1852470588235294, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.5829, "step": 7873 }, { "epoch": 0.18527058823529413, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.3286, "step": 7874 }, { "epoch": 0.18529411764705883, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.9426, "step": 7875 }, { "epoch": 0.18531764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3016, "step": 7876 }, { "epoch": 0.18534117647058823, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.9326, "step": 7877 }, { "epoch": 0.18536470588235293, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3932, "step": 7878 }, { "epoch": 0.18538823529411766, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.9714, "step": 7879 }, { "epoch": 0.18541176470588236, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.0106, "step": 7880 }, { "epoch": 0.18543529411764706, "grad_norm": 0.287109375, "learning_rate": 0.02, "loss": 0.6407, "step": 7881 }, { "epoch": 0.18545882352941176, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.245, "step": 7882 }, { "epoch": 0.18548235294117646, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0173, "step": 7883 }, { "epoch": 0.1855058823529412, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.9586, "step": 7884 }, { "epoch": 0.1855294117647059, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0471, "step": 7885 }, { "epoch": 0.1855529411764706, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.0153, "step": 7886 }, { "epoch": 0.1855764705882353, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0588, "step": 7887 }, { "epoch": 0.1856, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0217, "step": 7888 }, { "epoch": 0.18562352941176471, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0216, "step": 7889 }, { "epoch": 0.18564705882352942, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.3042, "step": 7890 }, { "epoch": 0.18567058823529412, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0901, "step": 7891 }, { "epoch": 0.18569411764705882, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1352, "step": 7892 }, { "epoch": 0.18571764705882354, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1644, "step": 7893 }, { "epoch": 0.18574117647058824, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1812, "step": 7894 }, { "epoch": 0.18576470588235294, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1722, "step": 7895 }, { "epoch": 0.18578823529411764, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.8431, "step": 7896 }, { "epoch": 0.18581176470588234, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.135, "step": 7897 }, { "epoch": 0.18583529411764707, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.7216, "step": 7898 }, { "epoch": 0.18585882352941177, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.9513, "step": 7899 }, { "epoch": 0.18588235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.359, "step": 7900 }, { "epoch": 0.18590588235294117, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.9264, "step": 7901 }, { "epoch": 0.18592941176470587, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1886, "step": 7902 }, { "epoch": 0.1859529411764706, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1964, "step": 7903 }, { "epoch": 0.1859764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2367, "step": 7904 }, { "epoch": 0.186, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.9468, "step": 7905 }, { "epoch": 0.1860235294117647, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.9275, "step": 7906 }, { "epoch": 0.1860470588235294, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1725, "step": 7907 }, { "epoch": 0.18607058823529413, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.0511, "step": 7908 }, { "epoch": 0.18609411764705883, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1056, "step": 7909 }, { "epoch": 0.18611764705882353, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.9773, "step": 7910 }, { "epoch": 0.18614117647058823, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2441, "step": 7911 }, { "epoch": 0.18616470588235295, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.7528, "step": 7912 }, { "epoch": 0.18618823529411765, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0006, "step": 7913 }, { "epoch": 0.18621176470588235, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0981, "step": 7914 }, { "epoch": 0.18623529411764705, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.082, "step": 7915 }, { "epoch": 0.18625882352941175, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.3688, "step": 7916 }, { "epoch": 0.18628235294117648, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1577, "step": 7917 }, { "epoch": 0.18630588235294118, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8395, "step": 7918 }, { "epoch": 0.18632941176470588, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.0321, "step": 7919 }, { "epoch": 0.18635294117647058, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.984, "step": 7920 }, { "epoch": 0.18637647058823528, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0744, "step": 7921 }, { "epoch": 0.1864, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2812, "step": 7922 }, { "epoch": 0.1864235294117647, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.399, "step": 7923 }, { "epoch": 0.1864470588235294, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1503, "step": 7924 }, { "epoch": 0.1864705882352941, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.9542, "step": 7925 }, { "epoch": 0.1864941176470588, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2761, "step": 7926 }, { "epoch": 0.18651764705882354, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.8009, "step": 7927 }, { "epoch": 0.18654117647058824, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2735, "step": 7928 }, { "epoch": 0.18656470588235294, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.9512, "step": 7929 }, { "epoch": 0.18658823529411764, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.9632, "step": 7930 }, { "epoch": 0.18661176470588237, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1288, "step": 7931 }, { "epoch": 0.18663529411764707, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0019, "step": 7932 }, { "epoch": 0.18665882352941177, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.0086, "step": 7933 }, { "epoch": 0.18668235294117647, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.9612, "step": 7934 }, { "epoch": 0.18670588235294117, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.8931, "step": 7935 }, { "epoch": 0.1867294117647059, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.4024, "step": 7936 }, { "epoch": 0.1867529411764706, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8947, "step": 7937 }, { "epoch": 0.1867764705882353, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.744, "step": 7938 }, { "epoch": 0.1868, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2215, "step": 7939 }, { "epoch": 0.1868235294117647, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1887, "step": 7940 }, { "epoch": 0.18684705882352942, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1105, "step": 7941 }, { "epoch": 0.18687058823529412, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.3665, "step": 7942 }, { "epoch": 0.18689411764705882, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.4248, "step": 7943 }, { "epoch": 0.18691764705882352, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2927, "step": 7944 }, { "epoch": 0.18694117647058822, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8137, "step": 7945 }, { "epoch": 0.18696470588235295, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.4314, "step": 7946 }, { "epoch": 0.18698823529411765, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 1.0531, "step": 7947 }, { "epoch": 0.18701176470588235, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.9377, "step": 7948 }, { "epoch": 0.18703529411764705, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.9382, "step": 7949 }, { "epoch": 0.18705882352941178, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9036, "step": 7950 }, { "epoch": 0.18708235294117648, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0504, "step": 7951 }, { "epoch": 0.18710588235294118, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.8212, "step": 7952 }, { "epoch": 0.18712941176470588, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.8858, "step": 7953 }, { "epoch": 0.18715294117647058, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2185, "step": 7954 }, { "epoch": 0.1871764705882353, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.27, "step": 7955 }, { "epoch": 0.1872, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0501, "step": 7956 }, { "epoch": 0.1872235294117647, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1822, "step": 7957 }, { "epoch": 0.1872470588235294, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.8291, "step": 7958 }, { "epoch": 0.1872705882352941, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0123, "step": 7959 }, { "epoch": 0.18729411764705883, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1776, "step": 7960 }, { "epoch": 0.18731764705882353, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8727, "step": 7961 }, { "epoch": 0.18734117647058823, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1924, "step": 7962 }, { "epoch": 0.18736470588235293, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.8738, "step": 7963 }, { "epoch": 0.18738823529411763, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.6559, "step": 7964 }, { "epoch": 0.18741176470588236, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.019, "step": 7965 }, { "epoch": 0.18743529411764706, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1491, "step": 7966 }, { "epoch": 0.18745882352941176, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1842, "step": 7967 }, { "epoch": 0.18748235294117646, "grad_norm": 0.294921875, "learning_rate": 0.02, "loss": 0.5633, "step": 7968 }, { "epoch": 0.1875058823529412, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.6617, "step": 7969 }, { "epoch": 0.1875294117647059, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.0548, "step": 7970 }, { "epoch": 0.1875529411764706, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1211, "step": 7971 }, { "epoch": 0.1875764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0153, "step": 7972 }, { "epoch": 0.1876, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1804, "step": 7973 }, { "epoch": 0.18762352941176472, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.9217, "step": 7974 }, { "epoch": 0.18764705882352942, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.0376, "step": 7975 }, { "epoch": 0.18767058823529412, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3113, "step": 7976 }, { "epoch": 0.18769411764705882, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.3368, "step": 7977 }, { "epoch": 0.18771764705882352, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.107, "step": 7978 }, { "epoch": 0.18774117647058824, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.8167, "step": 7979 }, { "epoch": 0.18776470588235294, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2496, "step": 7980 }, { "epoch": 0.18778823529411764, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.952, "step": 7981 }, { "epoch": 0.18781176470588234, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0707, "step": 7982 }, { "epoch": 0.18783529411764707, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 0.7749, "step": 7983 }, { "epoch": 0.18785882352941177, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2702, "step": 7984 }, { "epoch": 0.18788235294117647, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1994, "step": 7985 }, { "epoch": 0.18790588235294117, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.7469, "step": 7986 }, { "epoch": 0.18792941176470587, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.8918, "step": 7987 }, { "epoch": 0.1879529411764706, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0291, "step": 7988 }, { "epoch": 0.1879764705882353, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.7947, "step": 7989 }, { "epoch": 0.188, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2851, "step": 7990 }, { "epoch": 0.1880235294117647, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.8793, "step": 7991 }, { "epoch": 0.1880470588235294, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1074, "step": 7992 }, { "epoch": 0.18807058823529413, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.0252, "step": 7993 }, { "epoch": 0.18809411764705883, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9613, "step": 7994 }, { "epoch": 0.18811764705882353, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1617, "step": 7995 }, { "epoch": 0.18814117647058823, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.7004, "step": 7996 }, { "epoch": 0.18816470588235293, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1697, "step": 7997 }, { "epoch": 0.18818823529411766, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.8933, "step": 7998 }, { "epoch": 0.18821176470588236, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8488, "step": 7999 }, { "epoch": 0.18823529411764706, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.3995, "step": 8000 }, { "epoch": 0.18823529411764706, "eval_loss": 2.22733736038208, "eval_runtime": 681.8209, "eval_samples_per_second": 12.467, "eval_steps_per_second": 3.117, "step": 8000 }, { "epoch": 0.18825882352941176, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2597, "step": 8001 }, { "epoch": 0.18828235294117648, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0184, "step": 8002 }, { "epoch": 0.18830588235294118, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0399, "step": 8003 }, { "epoch": 0.18832941176470588, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.319, "step": 8004 }, { "epoch": 0.18835294117647058, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0124, "step": 8005 }, { "epoch": 0.18837647058823528, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1241, "step": 8006 }, { "epoch": 0.1884, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1945, "step": 8007 }, { "epoch": 0.1884235294117647, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.3156, "step": 8008 }, { "epoch": 0.1884470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2599, "step": 8009 }, { "epoch": 0.1884705882352941, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.006, "step": 8010 }, { "epoch": 0.1884941176470588, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.09, "step": 8011 }, { "epoch": 0.18851764705882354, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0945, "step": 8012 }, { "epoch": 0.18854117647058824, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.7707, "step": 8013 }, { "epoch": 0.18856470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1758, "step": 8014 }, { "epoch": 0.18858823529411764, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.4409, "step": 8015 }, { "epoch": 0.18861176470588234, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2759, "step": 8016 }, { "epoch": 0.18863529411764707, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3461, "step": 8017 }, { "epoch": 0.18865882352941177, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.3604, "step": 8018 }, { "epoch": 0.18868235294117647, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.0227, "step": 8019 }, { "epoch": 0.18870588235294117, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.4038, "step": 8020 }, { "epoch": 0.1887294117647059, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2434, "step": 8021 }, { "epoch": 0.1887529411764706, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.9571, "step": 8022 }, { "epoch": 0.1887764705882353, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1479, "step": 8023 }, { "epoch": 0.1888, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2509, "step": 8024 }, { "epoch": 0.1888235294117647, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.3035, "step": 8025 }, { "epoch": 0.18884705882352942, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.9743, "step": 8026 }, { "epoch": 0.18887058823529412, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0636, "step": 8027 }, { "epoch": 0.18889411764705882, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 1.0824, "step": 8028 }, { "epoch": 0.18891764705882352, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.7221, "step": 8029 }, { "epoch": 0.18894117647058822, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 1.0579, "step": 8030 }, { "epoch": 0.18896470588235295, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.8576, "step": 8031 }, { "epoch": 0.18898823529411765, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1043, "step": 8032 }, { "epoch": 0.18901176470588235, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9538, "step": 8033 }, { "epoch": 0.18903529411764705, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1808, "step": 8034 }, { "epoch": 0.18905882352941175, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0883, "step": 8035 }, { "epoch": 0.18908235294117648, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.0312, "step": 8036 }, { "epoch": 0.18910588235294118, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0952, "step": 8037 }, { "epoch": 0.18912941176470588, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.045, "step": 8038 }, { "epoch": 0.18915294117647058, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0741, "step": 8039 }, { "epoch": 0.1891764705882353, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2727, "step": 8040 }, { "epoch": 0.1892, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3448, "step": 8041 }, { "epoch": 0.1892235294117647, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9929, "step": 8042 }, { "epoch": 0.1892470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3053, "step": 8043 }, { "epoch": 0.1892705882352941, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.8974, "step": 8044 }, { "epoch": 0.18929411764705883, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1886, "step": 8045 }, { "epoch": 0.18931764705882353, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1067, "step": 8046 }, { "epoch": 0.18934117647058823, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.99, "step": 8047 }, { "epoch": 0.18936470588235293, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1118, "step": 8048 }, { "epoch": 0.18938823529411764, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.874, "step": 8049 }, { "epoch": 0.18941176470588236, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2379, "step": 8050 }, { "epoch": 0.18943529411764706, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.078, "step": 8051 }, { "epoch": 0.18945882352941176, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1776, "step": 8052 }, { "epoch": 0.18948235294117646, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0383, "step": 8053 }, { "epoch": 0.18950588235294116, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.3891, "step": 8054 }, { "epoch": 0.1895294117647059, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.8437, "step": 8055 }, { "epoch": 0.1895529411764706, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1486, "step": 8056 }, { "epoch": 0.1895764705882353, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4342, "step": 8057 }, { "epoch": 0.1896, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.8377, "step": 8058 }, { "epoch": 0.18962352941176472, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.8619, "step": 8059 }, { "epoch": 0.18964705882352942, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0163, "step": 8060 }, { "epoch": 0.18967058823529412, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2349, "step": 8061 }, { "epoch": 0.18969411764705882, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.6922, "step": 8062 }, { "epoch": 0.18971764705882352, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.8937, "step": 8063 }, { "epoch": 0.18974117647058825, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1955, "step": 8064 }, { "epoch": 0.18976470588235295, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9719, "step": 8065 }, { "epoch": 0.18978823529411765, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9838, "step": 8066 }, { "epoch": 0.18981176470588235, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.6091, "step": 8067 }, { "epoch": 0.18983529411764705, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.5902, "step": 8068 }, { "epoch": 0.18985882352941177, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1364, "step": 8069 }, { "epoch": 0.18988235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.8778, "step": 8070 }, { "epoch": 0.18990588235294117, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0188, "step": 8071 }, { "epoch": 0.18992941176470587, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.25, "step": 8072 }, { "epoch": 0.18995294117647057, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0975, "step": 8073 }, { "epoch": 0.1899764705882353, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0536, "step": 8074 }, { "epoch": 0.19, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0065, "step": 8075 }, { "epoch": 0.1900235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.8838, "step": 8076 }, { "epoch": 0.1900470588235294, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0142, "step": 8077 }, { "epoch": 0.19007058823529413, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0166, "step": 8078 }, { "epoch": 0.19009411764705883, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.3846, "step": 8079 }, { "epoch": 0.19011764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3329, "step": 8080 }, { "epoch": 0.19014117647058823, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2006, "step": 8081 }, { "epoch": 0.19016470588235293, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.8083, "step": 8082 }, { "epoch": 0.19018823529411766, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0877, "step": 8083 }, { "epoch": 0.19021176470588236, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9526, "step": 8084 }, { "epoch": 0.19023529411764706, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.8309, "step": 8085 }, { "epoch": 0.19025882352941176, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1652, "step": 8086 }, { "epoch": 0.19028235294117646, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 1.0343, "step": 8087 }, { "epoch": 0.19030588235294119, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.0233, "step": 8088 }, { "epoch": 0.19032941176470589, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0157, "step": 8089 }, { "epoch": 0.19035294117647059, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.3246, "step": 8090 }, { "epoch": 0.19037647058823529, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.7191, "step": 8091 }, { "epoch": 0.1904, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1991, "step": 8092 }, { "epoch": 0.1904235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.8977, "step": 8093 }, { "epoch": 0.1904470588235294, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.9458, "step": 8094 }, { "epoch": 0.1904705882352941, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2208, "step": 8095 }, { "epoch": 0.1904941176470588, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2957, "step": 8096 }, { "epoch": 0.19051764705882354, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2653, "step": 8097 }, { "epoch": 0.19054117647058824, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2209, "step": 8098 }, { "epoch": 0.19056470588235294, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1087, "step": 8099 }, { "epoch": 0.19058823529411764, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.9293, "step": 8100 }, { "epoch": 0.19061176470588234, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.7736, "step": 8101 }, { "epoch": 0.19063529411764707, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.9209, "step": 8102 }, { "epoch": 0.19065882352941177, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1053, "step": 8103 }, { "epoch": 0.19068235294117647, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1805, "step": 8104 }, { "epoch": 0.19070588235294117, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1816, "step": 8105 }, { "epoch": 0.19072941176470587, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.9992, "step": 8106 }, { "epoch": 0.1907529411764706, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.7047, "step": 8107 }, { "epoch": 0.1907764705882353, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.0111, "step": 8108 }, { "epoch": 0.1908, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1701, "step": 8109 }, { "epoch": 0.1908235294117647, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.9782, "step": 8110 }, { "epoch": 0.19084705882352943, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.9385, "step": 8111 }, { "epoch": 0.19087058823529413, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.186, "step": 8112 }, { "epoch": 0.19089411764705883, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.9031, "step": 8113 }, { "epoch": 0.19091764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2493, "step": 8114 }, { "epoch": 0.19094117647058823, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.9319, "step": 8115 }, { "epoch": 0.19096470588235295, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.9559, "step": 8116 }, { "epoch": 0.19098823529411765, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.972, "step": 8117 }, { "epoch": 0.19101176470588235, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.984, "step": 8118 }, { "epoch": 0.19103529411764705, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.9919, "step": 8119 }, { "epoch": 0.19105882352941175, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1572, "step": 8120 }, { "epoch": 0.19108235294117648, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1487, "step": 8121 }, { "epoch": 0.19110588235294118, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.9236, "step": 8122 }, { "epoch": 0.19112941176470588, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.1432, "step": 8123 }, { "epoch": 0.19115294117647058, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.4656, "step": 8124 }, { "epoch": 0.19117647058823528, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.9645, "step": 8125 }, { "epoch": 0.1912, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.8586, "step": 8126 }, { "epoch": 0.1912235294117647, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.7093, "step": 8127 }, { "epoch": 0.1912470588235294, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.7702, "step": 8128 }, { "epoch": 0.1912705882352941, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0878, "step": 8129 }, { "epoch": 0.19129411764705884, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1649, "step": 8130 }, { "epoch": 0.19131764705882354, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.8681, "step": 8131 }, { "epoch": 0.19134117647058824, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.9459, "step": 8132 }, { "epoch": 0.19136470588235294, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.2005, "step": 8133 }, { "epoch": 0.19138823529411764, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.9732, "step": 8134 }, { "epoch": 0.19141176470588236, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1106, "step": 8135 }, { "epoch": 0.19143529411764706, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.102, "step": 8136 }, { "epoch": 0.19145882352941176, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2626, "step": 8137 }, { "epoch": 0.19148235294117646, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2448, "step": 8138 }, { "epoch": 0.19150588235294116, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.3409, "step": 8139 }, { "epoch": 0.1915294117647059, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2118, "step": 8140 }, { "epoch": 0.1915529411764706, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2916, "step": 8141 }, { "epoch": 0.1915764705882353, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.9286, "step": 8142 }, { "epoch": 0.1916, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9703, "step": 8143 }, { "epoch": 0.1916235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1924, "step": 8144 }, { "epoch": 0.19164705882352942, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1294, "step": 8145 }, { "epoch": 0.19167058823529412, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.088, "step": 8146 }, { "epoch": 0.19169411764705882, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.986, "step": 8147 }, { "epoch": 0.19171764705882352, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.0622, "step": 8148 }, { "epoch": 0.19174117647058825, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0893, "step": 8149 }, { "epoch": 0.19176470588235295, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0714, "step": 8150 }, { "epoch": 0.19178823529411765, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.9096, "step": 8151 }, { "epoch": 0.19181176470588235, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1486, "step": 8152 }, { "epoch": 0.19183529411764705, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2282, "step": 8153 }, { "epoch": 0.19185882352941178, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1113, "step": 8154 }, { "epoch": 0.19188235294117648, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1924, "step": 8155 }, { "epoch": 0.19190588235294118, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.9288, "step": 8156 }, { "epoch": 0.19192941176470588, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1976, "step": 8157 }, { "epoch": 0.19195294117647058, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.3604, "step": 8158 }, { "epoch": 0.1919764705882353, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.0061, "step": 8159 }, { "epoch": 0.192, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1792, "step": 8160 }, { "epoch": 0.1920235294117647, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.0817, "step": 8161 }, { "epoch": 0.1920470588235294, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2655, "step": 8162 }, { "epoch": 0.1920705882352941, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2974, "step": 8163 }, { "epoch": 0.19209411764705883, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.0848, "step": 8164 }, { "epoch": 0.19211764705882353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2897, "step": 8165 }, { "epoch": 0.19214117647058823, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2552, "step": 8166 }, { "epoch": 0.19216470588235293, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.7649, "step": 8167 }, { "epoch": 0.19218823529411766, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.089, "step": 8168 }, { "epoch": 0.19221176470588236, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.8488, "step": 8169 }, { "epoch": 0.19223529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2486, "step": 8170 }, { "epoch": 0.19225882352941176, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.883, "step": 8171 }, { "epoch": 0.19228235294117646, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.027, "step": 8172 }, { "epoch": 0.1923058823529412, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.9004, "step": 8173 }, { "epoch": 0.1923294117647059, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2755, "step": 8174 }, { "epoch": 0.1923529411764706, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0658, "step": 8175 }, { "epoch": 0.1923764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2392, "step": 8176 }, { "epoch": 0.1924, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.022, "step": 8177 }, { "epoch": 0.19242352941176472, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.6949, "step": 8178 }, { "epoch": 0.19244705882352942, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9009, "step": 8179 }, { "epoch": 0.19247058823529412, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.7058, "step": 8180 }, { "epoch": 0.19249411764705882, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.8356, "step": 8181 }, { "epoch": 0.19251764705882352, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2078, "step": 8182 }, { "epoch": 0.19254117647058824, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0027, "step": 8183 }, { "epoch": 0.19256470588235294, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.033, "step": 8184 }, { "epoch": 0.19258823529411764, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.0616, "step": 8185 }, { "epoch": 0.19261176470588234, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2486, "step": 8186 }, { "epoch": 0.19263529411764707, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.077, "step": 8187 }, { "epoch": 0.19265882352941177, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.3421, "step": 8188 }, { "epoch": 0.19268235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.196, "step": 8189 }, { "epoch": 0.19270588235294117, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0043, "step": 8190 }, { "epoch": 0.19272941176470587, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9528, "step": 8191 }, { "epoch": 0.1927529411764706, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3219, "step": 8192 }, { "epoch": 0.1927764705882353, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.9858, "step": 8193 }, { "epoch": 0.1928, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2454, "step": 8194 }, { "epoch": 0.1928235294117647, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 1.0941, "step": 8195 }, { "epoch": 0.1928470588235294, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.9767, "step": 8196 }, { "epoch": 0.19287058823529413, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2082, "step": 8197 }, { "epoch": 0.19289411764705883, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0928, "step": 8198 }, { "epoch": 0.19291764705882353, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.8322, "step": 8199 }, { "epoch": 0.19294117647058823, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1832, "step": 8200 }, { "epoch": 0.19296470588235295, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.0631, "step": 8201 }, { "epoch": 0.19298823529411765, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.0872, "step": 8202 }, { "epoch": 0.19301176470588235, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2335, "step": 8203 }, { "epoch": 0.19303529411764705, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.034, "step": 8204 }, { "epoch": 0.19305882352941175, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.2831, "step": 8205 }, { "epoch": 0.19308235294117648, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0648, "step": 8206 }, { "epoch": 0.19310588235294118, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0964, "step": 8207 }, { "epoch": 0.19312941176470588, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1949, "step": 8208 }, { "epoch": 0.19315294117647058, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1856, "step": 8209 }, { "epoch": 0.19317647058823528, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.99, "step": 8210 }, { "epoch": 0.1932, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1455, "step": 8211 }, { "epoch": 0.1932235294117647, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1598, "step": 8212 }, { "epoch": 0.1932470588235294, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8904, "step": 8213 }, { "epoch": 0.1932705882352941, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.9657, "step": 8214 }, { "epoch": 0.1932941176470588, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1341, "step": 8215 }, { "epoch": 0.19331764705882354, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0986, "step": 8216 }, { "epoch": 0.19334117647058824, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0551, "step": 8217 }, { "epoch": 0.19336470588235294, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.2119, "step": 8218 }, { "epoch": 0.19338823529411764, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2751, "step": 8219 }, { "epoch": 0.19341176470588237, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1596, "step": 8220 }, { "epoch": 0.19343529411764707, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.8838, "step": 8221 }, { "epoch": 0.19345882352941177, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2593, "step": 8222 }, { "epoch": 0.19348235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.9002, "step": 8223 }, { "epoch": 0.19350588235294117, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0457, "step": 8224 }, { "epoch": 0.1935294117647059, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2734, "step": 8225 }, { "epoch": 0.1935529411764706, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1151, "step": 8226 }, { "epoch": 0.1935764705882353, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.9305, "step": 8227 }, { "epoch": 0.1936, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2119, "step": 8228 }, { "epoch": 0.1936235294117647, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.3197, "step": 8229 }, { "epoch": 0.19364705882352942, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3422, "step": 8230 }, { "epoch": 0.19367058823529412, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2663, "step": 8231 }, { "epoch": 0.19369411764705882, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.8338, "step": 8232 }, { "epoch": 0.19371764705882352, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2955, "step": 8233 }, { "epoch": 0.19374117647058822, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.0015, "step": 8234 }, { "epoch": 0.19376470588235295, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0717, "step": 8235 }, { "epoch": 0.19378823529411765, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.9326, "step": 8236 }, { "epoch": 0.19381176470588235, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2354, "step": 8237 }, { "epoch": 0.19383529411764705, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.8408, "step": 8238 }, { "epoch": 0.19385882352941178, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.8612, "step": 8239 }, { "epoch": 0.19388235294117648, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1886, "step": 8240 }, { "epoch": 0.19390588235294118, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2158, "step": 8241 }, { "epoch": 0.19392941176470588, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1638, "step": 8242 }, { "epoch": 0.19395294117647058, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.8925, "step": 8243 }, { "epoch": 0.1939764705882353, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.0214, "step": 8244 }, { "epoch": 0.194, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.0522, "step": 8245 }, { "epoch": 0.1940235294117647, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.834, "step": 8246 }, { "epoch": 0.1940470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1239, "step": 8247 }, { "epoch": 0.1940705882352941, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.0492, "step": 8248 }, { "epoch": 0.19409411764705883, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2092, "step": 8249 }, { "epoch": 0.19411764705882353, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.3683, "step": 8250 }, { "epoch": 0.19414117647058823, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.9401, "step": 8251 }, { "epoch": 0.19416470588235293, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0998, "step": 8252 }, { "epoch": 0.19418823529411763, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.8028, "step": 8253 }, { "epoch": 0.19421176470588236, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.7968, "step": 8254 }, { "epoch": 0.19423529411764706, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.4368, "step": 8255 }, { "epoch": 0.19425882352941176, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.9751, "step": 8256 }, { "epoch": 0.19428235294117646, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.8873, "step": 8257 }, { "epoch": 0.1943058823529412, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0654, "step": 8258 }, { "epoch": 0.1943294117647059, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9828, "step": 8259 }, { "epoch": 0.1943529411764706, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1043, "step": 8260 }, { "epoch": 0.1943764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.3081, "step": 8261 }, { "epoch": 0.1944, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.9736, "step": 8262 }, { "epoch": 0.19442352941176472, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0239, "step": 8263 }, { "epoch": 0.19444705882352942, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.8443, "step": 8264 }, { "epoch": 0.19447058823529412, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0683, "step": 8265 }, { "epoch": 0.19449411764705882, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2954, "step": 8266 }, { "epoch": 0.19451764705882352, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1478, "step": 8267 }, { "epoch": 0.19454117647058825, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.251, "step": 8268 }, { "epoch": 0.19456470588235295, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.3727, "step": 8269 }, { "epoch": 0.19458823529411765, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2823, "step": 8270 }, { "epoch": 0.19461176470588235, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1636, "step": 8271 }, { "epoch": 0.19463529411764705, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0214, "step": 8272 }, { "epoch": 0.19465882352941177, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2088, "step": 8273 }, { "epoch": 0.19468235294117647, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.169, "step": 8274 }, { "epoch": 0.19470588235294117, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0433, "step": 8275 }, { "epoch": 0.19472941176470587, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.3899, "step": 8276 }, { "epoch": 0.1947529411764706, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9599, "step": 8277 }, { "epoch": 0.1947764705882353, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.9273, "step": 8278 }, { "epoch": 0.1948, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.8659, "step": 8279 }, { "epoch": 0.1948235294117647, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.0085, "step": 8280 }, { "epoch": 0.1948470588235294, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1842, "step": 8281 }, { "epoch": 0.19487058823529413, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0585, "step": 8282 }, { "epoch": 0.19489411764705883, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3603, "step": 8283 }, { "epoch": 0.19491764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.3021, "step": 8284 }, { "epoch": 0.19494117647058823, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2672, "step": 8285 }, { "epoch": 0.19496470588235293, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1748, "step": 8286 }, { "epoch": 0.19498823529411766, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1846, "step": 8287 }, { "epoch": 0.19501176470588236, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9475, "step": 8288 }, { "epoch": 0.19503529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.235, "step": 8289 }, { "epoch": 0.19505882352941176, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1268, "step": 8290 }, { "epoch": 0.19508235294117646, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0718, "step": 8291 }, { "epoch": 0.19510588235294118, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9939, "step": 8292 }, { "epoch": 0.19512941176470588, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.8601, "step": 8293 }, { "epoch": 0.19515294117647058, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.3067, "step": 8294 }, { "epoch": 0.19517647058823528, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1274, "step": 8295 }, { "epoch": 0.1952, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.2507, "step": 8296 }, { "epoch": 0.1952235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2884, "step": 8297 }, { "epoch": 0.1952470588235294, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0626, "step": 8298 }, { "epoch": 0.1952705882352941, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.8626, "step": 8299 }, { "epoch": 0.1952941176470588, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1646, "step": 8300 }, { "epoch": 0.19531764705882354, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0149, "step": 8301 }, { "epoch": 0.19534117647058824, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0554, "step": 8302 }, { "epoch": 0.19536470588235294, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0719, "step": 8303 }, { "epoch": 0.19538823529411764, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.9937, "step": 8304 }, { "epoch": 0.19541176470588234, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.0073, "step": 8305 }, { "epoch": 0.19543529411764707, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1979, "step": 8306 }, { "epoch": 0.19545882352941177, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0054, "step": 8307 }, { "epoch": 0.19548235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1269, "step": 8308 }, { "epoch": 0.19550588235294117, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0943, "step": 8309 }, { "epoch": 0.1955294117647059, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.9784, "step": 8310 }, { "epoch": 0.1955529411764706, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.4211, "step": 8311 }, { "epoch": 0.1955764705882353, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0992, "step": 8312 }, { "epoch": 0.1956, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0939, "step": 8313 }, { "epoch": 0.1956235294117647, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9934, "step": 8314 }, { "epoch": 0.19564705882352942, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1314, "step": 8315 }, { "epoch": 0.19567058823529412, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2345, "step": 8316 }, { "epoch": 0.19569411764705882, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1107, "step": 8317 }, { "epoch": 0.19571764705882352, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.3135, "step": 8318 }, { "epoch": 0.19574117647058822, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2427, "step": 8319 }, { "epoch": 0.19576470588235295, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.86, "step": 8320 }, { "epoch": 0.19578823529411765, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.9184, "step": 8321 }, { "epoch": 0.19581176470588235, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1191, "step": 8322 }, { "epoch": 0.19583529411764705, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.4185, "step": 8323 }, { "epoch": 0.19585882352941175, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.8748, "step": 8324 }, { "epoch": 0.19588235294117648, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1734, "step": 8325 }, { "epoch": 0.19590588235294118, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1135, "step": 8326 }, { "epoch": 0.19592941176470588, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.8535, "step": 8327 }, { "epoch": 0.19595294117647058, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.7795, "step": 8328 }, { "epoch": 0.1959764705882353, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1908, "step": 8329 }, { "epoch": 0.196, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9374, "step": 8330 }, { "epoch": 0.1960235294117647, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.0481, "step": 8331 }, { "epoch": 0.1960470588235294, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9867, "step": 8332 }, { "epoch": 0.1960705882352941, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2028, "step": 8333 }, { "epoch": 0.19609411764705884, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.515, "step": 8334 }, { "epoch": 0.19611764705882354, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1074, "step": 8335 }, { "epoch": 0.19614117647058824, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1989, "step": 8336 }, { "epoch": 0.19616470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.3785, "step": 8337 }, { "epoch": 0.19618823529411764, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.9166, "step": 8338 }, { "epoch": 0.19621176470588236, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.2003, "step": 8339 }, { "epoch": 0.19623529411764706, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.4451, "step": 8340 }, { "epoch": 0.19625882352941176, "grad_norm": 0.291015625, "learning_rate": 0.02, "loss": 0.4881, "step": 8341 }, { "epoch": 0.19628235294117646, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2585, "step": 8342 }, { "epoch": 0.19630588235294116, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9646, "step": 8343 }, { "epoch": 0.1963294117647059, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.9591, "step": 8344 }, { "epoch": 0.1963529411764706, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.8791, "step": 8345 }, { "epoch": 0.1963764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3245, "step": 8346 }, { "epoch": 0.1964, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.905, "step": 8347 }, { "epoch": 0.19642352941176472, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1582, "step": 8348 }, { "epoch": 0.19644705882352942, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.9353, "step": 8349 }, { "epoch": 0.19647058823529412, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.3468, "step": 8350 }, { "epoch": 0.19649411764705882, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.8055, "step": 8351 }, { "epoch": 0.19651764705882352, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.073, "step": 8352 }, { "epoch": 0.19654117647058825, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.6452, "step": 8353 }, { "epoch": 0.19656470588235295, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.8222, "step": 8354 }, { "epoch": 0.19658823529411765, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2178, "step": 8355 }, { "epoch": 0.19661176470588235, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1769, "step": 8356 }, { "epoch": 0.19663529411764705, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9649, "step": 8357 }, { "epoch": 0.19665882352941177, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.0383, "step": 8358 }, { "epoch": 0.19668235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2407, "step": 8359 }, { "epoch": 0.19670588235294117, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0015, "step": 8360 }, { "epoch": 0.19672941176470587, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2905, "step": 8361 }, { "epoch": 0.19675294117647057, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2571, "step": 8362 }, { "epoch": 0.1967764705882353, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2903, "step": 8363 }, { "epoch": 0.1968, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.1615, "step": 8364 }, { "epoch": 0.1968235294117647, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.7459, "step": 8365 }, { "epoch": 0.1968470588235294, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2035, "step": 8366 }, { "epoch": 0.19687058823529413, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0862, "step": 8367 }, { "epoch": 0.19689411764705883, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.3154, "step": 8368 }, { "epoch": 0.19691764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2844, "step": 8369 }, { "epoch": 0.19694117647058823, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.7885, "step": 8370 }, { "epoch": 0.19696470588235293, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1473, "step": 8371 }, { "epoch": 0.19698823529411766, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1777, "step": 8372 }, { "epoch": 0.19701176470588236, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2859, "step": 8373 }, { "epoch": 0.19703529411764706, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0504, "step": 8374 }, { "epoch": 0.19705882352941176, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1703, "step": 8375 }, { "epoch": 0.19708235294117646, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1706, "step": 8376 }, { "epoch": 0.1971058823529412, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1997, "step": 8377 }, { "epoch": 0.1971294117647059, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3038, "step": 8378 }, { "epoch": 0.1971529411764706, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1997, "step": 8379 }, { "epoch": 0.1971764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1099, "step": 8380 }, { "epoch": 0.1972, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2461, "step": 8381 }, { "epoch": 0.19722352941176471, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.9782, "step": 8382 }, { "epoch": 0.19724705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9965, "step": 8383 }, { "epoch": 0.19727058823529411, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.8424, "step": 8384 }, { "epoch": 0.19729411764705881, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1094, "step": 8385 }, { "epoch": 0.19731764705882354, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.7968, "step": 8386 }, { "epoch": 0.19734117647058824, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3414, "step": 8387 }, { "epoch": 0.19736470588235294, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.8648, "step": 8388 }, { "epoch": 0.19738823529411764, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.9577, "step": 8389 }, { "epoch": 0.19741176470588234, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1136, "step": 8390 }, { "epoch": 0.19743529411764707, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.1081, "step": 8391 }, { "epoch": 0.19745882352941177, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1393, "step": 8392 }, { "epoch": 0.19748235294117647, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.928, "step": 8393 }, { "epoch": 0.19750588235294117, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2073, "step": 8394 }, { "epoch": 0.19752941176470587, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1072, "step": 8395 }, { "epoch": 0.1975529411764706, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.9423, "step": 8396 }, { "epoch": 0.1975764705882353, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.8725, "step": 8397 }, { "epoch": 0.1976, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2008, "step": 8398 }, { "epoch": 0.1976235294117647, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.9026, "step": 8399 }, { "epoch": 0.1976470588235294, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.604, "step": 8400 }, { "epoch": 0.19767058823529413, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0825, "step": 8401 }, { "epoch": 0.19769411764705883, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0005, "step": 8402 }, { "epoch": 0.19771764705882353, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.8833, "step": 8403 }, { "epoch": 0.19774117647058823, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2573, "step": 8404 }, { "epoch": 0.19776470588235295, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0924, "step": 8405 }, { "epoch": 0.19778823529411765, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1577, "step": 8406 }, { "epoch": 0.19781176470588235, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0758, "step": 8407 }, { "epoch": 0.19783529411764705, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1328, "step": 8408 }, { "epoch": 0.19785882352941175, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.933, "step": 8409 }, { "epoch": 0.19788235294117648, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.116, "step": 8410 }, { "epoch": 0.19790588235294118, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.3224, "step": 8411 }, { "epoch": 0.19792941176470588, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1207, "step": 8412 }, { "epoch": 0.19795294117647058, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9746, "step": 8413 }, { "epoch": 0.19797647058823528, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2503, "step": 8414 }, { "epoch": 0.198, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2752, "step": 8415 }, { "epoch": 0.1980235294117647, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3337, "step": 8416 }, { "epoch": 0.1980470588235294, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.9092, "step": 8417 }, { "epoch": 0.1980705882352941, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2773, "step": 8418 }, { "epoch": 0.19809411764705884, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8392, "step": 8419 }, { "epoch": 0.19811764705882354, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.333, "step": 8420 }, { "epoch": 0.19814117647058824, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1007, "step": 8421 }, { "epoch": 0.19816470588235294, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.8153, "step": 8422 }, { "epoch": 0.19818823529411764, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9628, "step": 8423 }, { "epoch": 0.19821176470588237, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0275, "step": 8424 }, { "epoch": 0.19823529411764707, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2946, "step": 8425 }, { "epoch": 0.19825882352941177, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1736, "step": 8426 }, { "epoch": 0.19828235294117647, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.7179, "step": 8427 }, { "epoch": 0.19830588235294117, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.3291, "step": 8428 }, { "epoch": 0.1983294117647059, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3016, "step": 8429 }, { "epoch": 0.1983529411764706, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0836, "step": 8430 }, { "epoch": 0.1983764705882353, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.955, "step": 8431 }, { "epoch": 0.1984, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1745, "step": 8432 }, { "epoch": 0.1984235294117647, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.7948, "step": 8433 }, { "epoch": 0.19844705882352942, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1367, "step": 8434 }, { "epoch": 0.19847058823529412, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.9145, "step": 8435 }, { "epoch": 0.19849411764705882, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.4656, "step": 8436 }, { "epoch": 0.19851764705882352, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0938, "step": 8437 }, { "epoch": 0.19854117647058825, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.7603, "step": 8438 }, { "epoch": 0.19856470588235295, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0605, "step": 8439 }, { "epoch": 0.19858823529411765, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.8105, "step": 8440 }, { "epoch": 0.19861176470588235, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1263, "step": 8441 }, { "epoch": 0.19863529411764705, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0486, "step": 8442 }, { "epoch": 0.19865882352941178, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9156, "step": 8443 }, { "epoch": 0.19868235294117648, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.3072, "step": 8444 }, { "epoch": 0.19870588235294118, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1078, "step": 8445 }, { "epoch": 0.19872941176470588, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.2107, "step": 8446 }, { "epoch": 0.19875294117647058, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.377, "step": 8447 }, { "epoch": 0.1987764705882353, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9414, "step": 8448 }, { "epoch": 0.1988, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1255, "step": 8449 }, { "epoch": 0.1988235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1365, "step": 8450 }, { "epoch": 0.1988470588235294, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8297, "step": 8451 }, { "epoch": 0.1988705882352941, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.9559, "step": 8452 }, { "epoch": 0.19889411764705883, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.6551, "step": 8453 }, { "epoch": 0.19891764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2864, "step": 8454 }, { "epoch": 0.19894117647058823, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2027, "step": 8455 }, { "epoch": 0.19896470588235293, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.7678, "step": 8456 }, { "epoch": 0.19898823529411766, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.8205, "step": 8457 }, { "epoch": 0.19901176470588236, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8638, "step": 8458 }, { "epoch": 0.19903529411764706, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.965, "step": 8459 }, { "epoch": 0.19905882352941176, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.7978, "step": 8460 }, { "epoch": 0.19908235294117646, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1731, "step": 8461 }, { "epoch": 0.1991058823529412, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.4978, "step": 8462 }, { "epoch": 0.1991294117647059, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0675, "step": 8463 }, { "epoch": 0.1991529411764706, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0717, "step": 8464 }, { "epoch": 0.1991764705882353, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.135, "step": 8465 }, { "epoch": 0.1992, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1192, "step": 8466 }, { "epoch": 0.19922352941176472, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1995, "step": 8467 }, { "epoch": 0.19924705882352942, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8711, "step": 8468 }, { "epoch": 0.19927058823529412, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0467, "step": 8469 }, { "epoch": 0.19929411764705882, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.8727, "step": 8470 }, { "epoch": 0.19931764705882352, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.853, "step": 8471 }, { "epoch": 0.19934117647058824, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1618, "step": 8472 }, { "epoch": 0.19936470588235294, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.084, "step": 8473 }, { "epoch": 0.19938823529411764, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.4249, "step": 8474 }, { "epoch": 0.19941176470588234, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.241, "step": 8475 }, { "epoch": 0.19943529411764707, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1717, "step": 8476 }, { "epoch": 0.19945882352941177, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.9401, "step": 8477 }, { "epoch": 0.19948235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9725, "step": 8478 }, { "epoch": 0.19950588235294117, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0964, "step": 8479 }, { "epoch": 0.19952941176470587, "grad_norm": 0.296875, "learning_rate": 0.02, "loss": 0.5611, "step": 8480 }, { "epoch": 0.1995529411764706, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.8922, "step": 8481 }, { "epoch": 0.1995764705882353, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0237, "step": 8482 }, { "epoch": 0.1996, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0183, "step": 8483 }, { "epoch": 0.1996235294117647, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.763, "step": 8484 }, { "epoch": 0.1996470588235294, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0463, "step": 8485 }, { "epoch": 0.19967058823529413, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.4891, "step": 8486 }, { "epoch": 0.19969411764705883, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9122, "step": 8487 }, { "epoch": 0.19971764705882353, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2153, "step": 8488 }, { "epoch": 0.19974117647058823, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2024, "step": 8489 }, { "epoch": 0.19976470588235293, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9376, "step": 8490 }, { "epoch": 0.19978823529411766, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0013, "step": 8491 }, { "epoch": 0.19981176470588236, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.1189, "step": 8492 }, { "epoch": 0.19983529411764706, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.7523, "step": 8493 }, { "epoch": 0.19985882352941176, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0788, "step": 8494 }, { "epoch": 0.19988235294117648, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.9722, "step": 8495 }, { "epoch": 0.19990588235294118, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.8548, "step": 8496 }, { "epoch": 0.19992941176470588, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.9533, "step": 8497 }, { "epoch": 0.19995294117647058, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2787, "step": 8498 }, { "epoch": 0.19997647058823528, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.7569, "step": 8499 }, { "epoch": 0.2, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1074, "step": 8500 }, { "epoch": 0.2000235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.132, "step": 8501 }, { "epoch": 0.2000470588235294, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2849, "step": 8502 }, { "epoch": 0.2000705882352941, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1642, "step": 8503 }, { "epoch": 0.2000941176470588, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0052, "step": 8504 }, { "epoch": 0.20011764705882354, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1029, "step": 8505 }, { "epoch": 0.20014117647058824, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2282, "step": 8506 }, { "epoch": 0.20016470588235294, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9467, "step": 8507 }, { "epoch": 0.20018823529411764, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.9031, "step": 8508 }, { "epoch": 0.20021176470588234, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1515, "step": 8509 }, { "epoch": 0.20023529411764707, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0887, "step": 8510 }, { "epoch": 0.20025882352941177, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0487, "step": 8511 }, { "epoch": 0.20028235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.3359, "step": 8512 }, { "epoch": 0.20030588235294117, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.829, "step": 8513 }, { "epoch": 0.2003294117647059, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2734, "step": 8514 }, { "epoch": 0.2003529411764706, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.3897, "step": 8515 }, { "epoch": 0.2003764705882353, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.9262, "step": 8516 }, { "epoch": 0.2004, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9899, "step": 8517 }, { "epoch": 0.2004235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.3199, "step": 8518 }, { "epoch": 0.20044705882352942, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.8523, "step": 8519 }, { "epoch": 0.20047058823529412, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.059, "step": 8520 }, { "epoch": 0.20049411764705882, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0127, "step": 8521 }, { "epoch": 0.20051764705882352, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9006, "step": 8522 }, { "epoch": 0.20054117647058822, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0944, "step": 8523 }, { "epoch": 0.20056470588235295, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.9632, "step": 8524 }, { "epoch": 0.20058823529411765, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.016, "step": 8525 }, { "epoch": 0.20061176470588235, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.9925, "step": 8526 }, { "epoch": 0.20063529411764705, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1261, "step": 8527 }, { "epoch": 0.20065882352941178, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.1717, "step": 8528 }, { "epoch": 0.20068235294117648, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0329, "step": 8529 }, { "epoch": 0.20070588235294118, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.1927, "step": 8530 }, { "epoch": 0.20072941176470588, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.239, "step": 8531 }, { "epoch": 0.20075294117647058, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.9109, "step": 8532 }, { "epoch": 0.2007764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.3335, "step": 8533 }, { "epoch": 0.2008, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3315, "step": 8534 }, { "epoch": 0.2008235294117647, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2506, "step": 8535 }, { "epoch": 0.2008470588235294, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0679, "step": 8536 }, { "epoch": 0.2008705882352941, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2272, "step": 8537 }, { "epoch": 0.20089411764705883, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9937, "step": 8538 }, { "epoch": 0.20091764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2012, "step": 8539 }, { "epoch": 0.20094117647058823, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0341, "step": 8540 }, { "epoch": 0.20096470588235293, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0777, "step": 8541 }, { "epoch": 0.20098823529411763, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.007, "step": 8542 }, { "epoch": 0.20101176470588236, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1367, "step": 8543 }, { "epoch": 0.20103529411764706, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.985, "step": 8544 }, { "epoch": 0.20105882352941176, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2399, "step": 8545 }, { "epoch": 0.20108235294117646, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.6363, "step": 8546 }, { "epoch": 0.2011058823529412, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9737, "step": 8547 }, { "epoch": 0.2011294117647059, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.2329, "step": 8548 }, { "epoch": 0.2011529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2531, "step": 8549 }, { "epoch": 0.2011764705882353, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 1.0544, "step": 8550 }, { "epoch": 0.2012, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2995, "step": 8551 }, { "epoch": 0.20122352941176472, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.9379, "step": 8552 }, { "epoch": 0.20124705882352942, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0545, "step": 8553 }, { "epoch": 0.20127058823529412, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9782, "step": 8554 }, { "epoch": 0.20129411764705882, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0696, "step": 8555 }, { "epoch": 0.20131764705882352, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3143, "step": 8556 }, { "epoch": 0.20134117647058825, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2936, "step": 8557 }, { "epoch": 0.20136470588235295, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.8658, "step": 8558 }, { "epoch": 0.20138823529411765, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.3344, "step": 8559 }, { "epoch": 0.20141176470588235, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.8786, "step": 8560 }, { "epoch": 0.20143529411764705, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1311, "step": 8561 }, { "epoch": 0.20145882352941177, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.9643, "step": 8562 }, { "epoch": 0.20148235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1594, "step": 8563 }, { "epoch": 0.20150588235294117, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.3444, "step": 8564 }, { "epoch": 0.20152941176470587, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9977, "step": 8565 }, { "epoch": 0.2015529411764706, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.9604, "step": 8566 }, { "epoch": 0.2015764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0666, "step": 8567 }, { "epoch": 0.2016, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0151, "step": 8568 }, { "epoch": 0.2016235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2834, "step": 8569 }, { "epoch": 0.2016470588235294, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 1.0739, "step": 8570 }, { "epoch": 0.20167058823529413, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2855, "step": 8571 }, { "epoch": 0.20169411764705883, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3473, "step": 8572 }, { "epoch": 0.20171764705882353, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9456, "step": 8573 }, { "epoch": 0.20174117647058823, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9733, "step": 8574 }, { "epoch": 0.20176470588235293, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.057, "step": 8575 }, { "epoch": 0.20178823529411766, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9437, "step": 8576 }, { "epoch": 0.20181176470588236, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.9517, "step": 8577 }, { "epoch": 0.20183529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1527, "step": 8578 }, { "epoch": 0.20185882352941176, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1127, "step": 8579 }, { "epoch": 0.20188235294117646, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1688, "step": 8580 }, { "epoch": 0.20190588235294119, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9546, "step": 8581 }, { "epoch": 0.20192941176470589, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9429, "step": 8582 }, { "epoch": 0.20195294117647059, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.9212, "step": 8583 }, { "epoch": 0.20197647058823529, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.417, "step": 8584 }, { "epoch": 0.202, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0577, "step": 8585 }, { "epoch": 0.2020235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0251, "step": 8586 }, { "epoch": 0.2020470588235294, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2286, "step": 8587 }, { "epoch": 0.2020705882352941, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.9662, "step": 8588 }, { "epoch": 0.2020941176470588, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0426, "step": 8589 }, { "epoch": 0.20211764705882354, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2161, "step": 8590 }, { "epoch": 0.20214117647058824, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9048, "step": 8591 }, { "epoch": 0.20216470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2305, "step": 8592 }, { "epoch": 0.20218823529411764, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2467, "step": 8593 }, { "epoch": 0.20221176470588234, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 1.1667, "step": 8594 }, { "epoch": 0.20223529411764707, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.106, "step": 8595 }, { "epoch": 0.20225882352941177, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0291, "step": 8596 }, { "epoch": 0.20228235294117647, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.7398, "step": 8597 }, { "epoch": 0.20230588235294117, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0331, "step": 8598 }, { "epoch": 0.20232941176470587, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 0.7802, "step": 8599 }, { "epoch": 0.2023529411764706, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.8528, "step": 8600 }, { "epoch": 0.2023764705882353, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2741, "step": 8601 }, { "epoch": 0.2024, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.8723, "step": 8602 }, { "epoch": 0.2024235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.2725, "step": 8603 }, { "epoch": 0.20244705882352942, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 1.0683, "step": 8604 }, { "epoch": 0.20247058823529412, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3115, "step": 8605 }, { "epoch": 0.20249411764705882, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.048, "step": 8606 }, { "epoch": 0.20251764705882352, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0231, "step": 8607 }, { "epoch": 0.20254117647058822, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.3895, "step": 8608 }, { "epoch": 0.20256470588235295, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1504, "step": 8609 }, { "epoch": 0.20258823529411765, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2355, "step": 8610 }, { "epoch": 0.20261176470588235, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.0981, "step": 8611 }, { "epoch": 0.20263529411764705, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2134, "step": 8612 }, { "epoch": 0.20265882352941175, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1644, "step": 8613 }, { "epoch": 0.20268235294117648, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.8753, "step": 8614 }, { "epoch": 0.20270588235294118, "grad_norm": 0.291015625, "learning_rate": 0.02, "loss": 0.7616, "step": 8615 }, { "epoch": 0.20272941176470588, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3538, "step": 8616 }, { "epoch": 0.20275294117647058, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.9203, "step": 8617 }, { "epoch": 0.20277647058823528, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.7222, "step": 8618 }, { "epoch": 0.2028, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.7635, "step": 8619 }, { "epoch": 0.2028235294117647, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.8657, "step": 8620 }, { "epoch": 0.2028470588235294, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.9755, "step": 8621 }, { "epoch": 0.2028705882352941, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.0459, "step": 8622 }, { "epoch": 0.20289411764705884, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1517, "step": 8623 }, { "epoch": 0.20291764705882354, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.8143, "step": 8624 }, { "epoch": 0.20294117647058824, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.9773, "step": 8625 }, { "epoch": 0.20296470588235294, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.8742, "step": 8626 }, { "epoch": 0.20298823529411764, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.9892, "step": 8627 }, { "epoch": 0.20301176470588236, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 0.4202, "step": 8628 }, { "epoch": 0.20303529411764706, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.7414, "step": 8629 }, { "epoch": 0.20305882352941176, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.6095, "step": 8630 }, { "epoch": 0.20308235294117646, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2248, "step": 8631 }, { "epoch": 0.20310588235294116, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.9551, "step": 8632 }, { "epoch": 0.2031294117647059, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1558, "step": 8633 }, { "epoch": 0.2031529411764706, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.9235, "step": 8634 }, { "epoch": 0.2031764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0354, "step": 8635 }, { "epoch": 0.2032, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1786, "step": 8636 }, { "epoch": 0.20322352941176472, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2824, "step": 8637 }, { "epoch": 0.20324705882352942, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9417, "step": 8638 }, { "epoch": 0.20327058823529412, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2423, "step": 8639 }, { "epoch": 0.20329411764705882, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2519, "step": 8640 }, { "epoch": 0.20331764705882352, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2065, "step": 8641 }, { "epoch": 0.20334117647058825, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2235, "step": 8642 }, { "epoch": 0.20336470588235295, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2701, "step": 8643 }, { "epoch": 0.20338823529411765, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1227, "step": 8644 }, { "epoch": 0.20341176470588235, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8672, "step": 8645 }, { "epoch": 0.20343529411764705, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.0042, "step": 8646 }, { "epoch": 0.20345882352941178, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1583, "step": 8647 }, { "epoch": 0.20348235294117648, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.101, "step": 8648 }, { "epoch": 0.20350588235294118, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.7577, "step": 8649 }, { "epoch": 0.20352941176470588, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0967, "step": 8650 }, { "epoch": 0.20355294117647058, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.5878, "step": 8651 }, { "epoch": 0.2035764705882353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.4672, "step": 8652 }, { "epoch": 0.2036, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.7637, "step": 8653 }, { "epoch": 0.2036235294117647, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.9135, "step": 8654 }, { "epoch": 0.2036470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.191, "step": 8655 }, { "epoch": 0.20367058823529413, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2611, "step": 8656 }, { "epoch": 0.20369411764705883, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.9751, "step": 8657 }, { "epoch": 0.20371764705882353, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.9793, "step": 8658 }, { "epoch": 0.20374117647058823, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.9066, "step": 8659 }, { "epoch": 0.20376470588235293, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1464, "step": 8660 }, { "epoch": 0.20378823529411766, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.9199, "step": 8661 }, { "epoch": 0.20381176470588236, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0357, "step": 8662 }, { "epoch": 0.20383529411764706, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.8556, "step": 8663 }, { "epoch": 0.20385882352941176, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.219, "step": 8664 }, { "epoch": 0.20388235294117646, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.4921, "step": 8665 }, { "epoch": 0.2039058823529412, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3257, "step": 8666 }, { "epoch": 0.2039294117647059, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.4558, "step": 8667 }, { "epoch": 0.2039529411764706, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.9069, "step": 8668 }, { "epoch": 0.2039764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0929, "step": 8669 }, { "epoch": 0.204, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1043, "step": 8670 }, { "epoch": 0.20402352941176471, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1657, "step": 8671 }, { "epoch": 0.20404705882352941, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.177, "step": 8672 }, { "epoch": 0.20407058823529411, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1234, "step": 8673 }, { "epoch": 0.20409411764705881, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2053, "step": 8674 }, { "epoch": 0.20411764705882354, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.0922, "step": 8675 }, { "epoch": 0.20414117647058824, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.8879, "step": 8676 }, { "epoch": 0.20416470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0704, "step": 8677 }, { "epoch": 0.20418823529411764, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1352, "step": 8678 }, { "epoch": 0.20421176470588234, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.6791, "step": 8679 }, { "epoch": 0.20423529411764707, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2886, "step": 8680 }, { "epoch": 0.20425882352941177, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.9337, "step": 8681 }, { "epoch": 0.20428235294117647, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.9384, "step": 8682 }, { "epoch": 0.20430588235294117, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0412, "step": 8683 }, { "epoch": 0.20432941176470587, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0555, "step": 8684 }, { "epoch": 0.2043529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2636, "step": 8685 }, { "epoch": 0.2043764705882353, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.6577, "step": 8686 }, { "epoch": 0.2044, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0509, "step": 8687 }, { "epoch": 0.2044235294117647, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.3304, "step": 8688 }, { "epoch": 0.2044470588235294, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.7981, "step": 8689 }, { "epoch": 0.20447058823529413, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 1.2482, "step": 8690 }, { "epoch": 0.20449411764705883, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.974, "step": 8691 }, { "epoch": 0.20451764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1599, "step": 8692 }, { "epoch": 0.20454117647058823, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.3173, "step": 8693 }, { "epoch": 0.20456470588235295, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2798, "step": 8694 }, { "epoch": 0.20458823529411765, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.963, "step": 8695 }, { "epoch": 0.20461176470588235, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0439, "step": 8696 }, { "epoch": 0.20463529411764705, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9804, "step": 8697 }, { "epoch": 0.20465882352941175, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9954, "step": 8698 }, { "epoch": 0.20468235294117648, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.1186, "step": 8699 }, { "epoch": 0.20470588235294118, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1937, "step": 8700 }, { "epoch": 0.20472941176470588, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.8717, "step": 8701 }, { "epoch": 0.20475294117647058, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.9363, "step": 8702 }, { "epoch": 0.20477647058823528, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.976, "step": 8703 }, { "epoch": 0.2048, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.3201, "step": 8704 }, { "epoch": 0.2048235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0987, "step": 8705 }, { "epoch": 0.2048470588235294, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0964, "step": 8706 }, { "epoch": 0.2048705882352941, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9002, "step": 8707 }, { "epoch": 0.2048941176470588, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0275, "step": 8708 }, { "epoch": 0.20491764705882354, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9412, "step": 8709 }, { "epoch": 0.20494117647058824, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0371, "step": 8710 }, { "epoch": 0.20496470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2682, "step": 8711 }, { "epoch": 0.20498823529411764, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0288, "step": 8712 }, { "epoch": 0.20501176470588237, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0048, "step": 8713 }, { "epoch": 0.20503529411764707, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1903, "step": 8714 }, { "epoch": 0.20505882352941177, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1218, "step": 8715 }, { "epoch": 0.20508235294117647, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.2969, "step": 8716 }, { "epoch": 0.20510588235294117, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1101, "step": 8717 }, { "epoch": 0.2051294117647059, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9268, "step": 8718 }, { "epoch": 0.2051529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2163, "step": 8719 }, { "epoch": 0.2051764705882353, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0584, "step": 8720 }, { "epoch": 0.2052, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0789, "step": 8721 }, { "epoch": 0.2052235294117647, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1729, "step": 8722 }, { "epoch": 0.20524705882352942, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.7927, "step": 8723 }, { "epoch": 0.20527058823529412, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0981, "step": 8724 }, { "epoch": 0.20529411764705882, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2156, "step": 8725 }, { "epoch": 0.20531764705882352, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0959, "step": 8726 }, { "epoch": 0.20534117647058822, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9774, "step": 8727 }, { "epoch": 0.20536470588235295, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.322, "step": 8728 }, { "epoch": 0.20538823529411765, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0138, "step": 8729 }, { "epoch": 0.20541176470588235, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0996, "step": 8730 }, { "epoch": 0.20543529411764705, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1239, "step": 8731 }, { "epoch": 0.20545882352941178, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1629, "step": 8732 }, { "epoch": 0.20548235294117648, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1387, "step": 8733 }, { "epoch": 0.20550588235294118, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0324, "step": 8734 }, { "epoch": 0.20552941176470588, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2211, "step": 8735 }, { "epoch": 0.20555294117647058, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.7409, "step": 8736 }, { "epoch": 0.2055764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4255, "step": 8737 }, { "epoch": 0.2056, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1962, "step": 8738 }, { "epoch": 0.2056235294117647, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.7666, "step": 8739 }, { "epoch": 0.2056470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0391, "step": 8740 }, { "epoch": 0.2056705882352941, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0441, "step": 8741 }, { "epoch": 0.20569411764705883, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0366, "step": 8742 }, { "epoch": 0.20571764705882353, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0848, "step": 8743 }, { "epoch": 0.20574117647058823, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.8471, "step": 8744 }, { "epoch": 0.20576470588235293, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.8993, "step": 8745 }, { "epoch": 0.20578823529411766, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1671, "step": 8746 }, { "epoch": 0.20581176470588236, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2025, "step": 8747 }, { "epoch": 0.20583529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1537, "step": 8748 }, { "epoch": 0.20585882352941176, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.263, "step": 8749 }, { "epoch": 0.20588235294117646, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0427, "step": 8750 }, { "epoch": 0.2059058823529412, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0908, "step": 8751 }, { "epoch": 0.2059294117647059, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0714, "step": 8752 }, { "epoch": 0.2059529411764706, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.172, "step": 8753 }, { "epoch": 0.2059764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2552, "step": 8754 }, { "epoch": 0.206, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.4003, "step": 8755 }, { "epoch": 0.20602352941176472, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0609, "step": 8756 }, { "epoch": 0.20604705882352942, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.9205, "step": 8757 }, { "epoch": 0.20607058823529412, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.9317, "step": 8758 }, { "epoch": 0.20609411764705882, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1727, "step": 8759 }, { "epoch": 0.20611764705882352, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1831, "step": 8760 }, { "epoch": 0.20614117647058824, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.0418, "step": 8761 }, { "epoch": 0.20616470588235294, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1535, "step": 8762 }, { "epoch": 0.20618823529411764, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2365, "step": 8763 }, { "epoch": 0.20621176470588234, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.1043, "step": 8764 }, { "epoch": 0.20623529411764707, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.1781, "step": 8765 }, { "epoch": 0.20625882352941177, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.219, "step": 8766 }, { "epoch": 0.20628235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2512, "step": 8767 }, { "epoch": 0.20630588235294117, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1055, "step": 8768 }, { "epoch": 0.20632941176470587, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2252, "step": 8769 }, { "epoch": 0.2063529411764706, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.8794, "step": 8770 }, { "epoch": 0.2063764705882353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.265, "step": 8771 }, { "epoch": 0.2064, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.777, "step": 8772 }, { "epoch": 0.2064235294117647, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.9944, "step": 8773 }, { "epoch": 0.2064470588235294, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0264, "step": 8774 }, { "epoch": 0.20647058823529413, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2705, "step": 8775 }, { "epoch": 0.20649411764705883, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0233, "step": 8776 }, { "epoch": 0.20651764705882353, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1271, "step": 8777 }, { "epoch": 0.20654117647058823, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3198, "step": 8778 }, { "epoch": 0.20656470588235293, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1662, "step": 8779 }, { "epoch": 0.20658823529411766, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1731, "step": 8780 }, { "epoch": 0.20661176470588236, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.1356, "step": 8781 }, { "epoch": 0.20663529411764706, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.2679, "step": 8782 }, { "epoch": 0.20665882352941176, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.4591, "step": 8783 }, { "epoch": 0.20668235294117648, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2164, "step": 8784 }, { "epoch": 0.20670588235294118, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.9196, "step": 8785 }, { "epoch": 0.20672941176470588, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9841, "step": 8786 }, { "epoch": 0.20675294117647058, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0347, "step": 8787 }, { "epoch": 0.20677647058823528, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.0083, "step": 8788 }, { "epoch": 0.2068, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.0614, "step": 8789 }, { "epoch": 0.2068235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.158, "step": 8790 }, { "epoch": 0.2068470588235294, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1191, "step": 8791 }, { "epoch": 0.2068705882352941, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1427, "step": 8792 }, { "epoch": 0.2068941176470588, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.1956, "step": 8793 }, { "epoch": 0.20691764705882354, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.733, "step": 8794 }, { "epoch": 0.20694117647058824, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2505, "step": 8795 }, { "epoch": 0.20696470588235294, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1199, "step": 8796 }, { "epoch": 0.20698823529411764, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.2613, "step": 8797 }, { "epoch": 0.20701176470588234, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.8953, "step": 8798 }, { "epoch": 0.20703529411764707, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2964, "step": 8799 }, { "epoch": 0.20705882352941177, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0135, "step": 8800 }, { "epoch": 0.20708235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3661, "step": 8801 }, { "epoch": 0.20710588235294117, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3478, "step": 8802 }, { "epoch": 0.2071294117647059, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.161, "step": 8803 }, { "epoch": 0.2071529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1453, "step": 8804 }, { "epoch": 0.2071764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1665, "step": 8805 }, { "epoch": 0.2072, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1573, "step": 8806 }, { "epoch": 0.2072235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.4064, "step": 8807 }, { "epoch": 0.20724705882352942, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.924, "step": 8808 }, { "epoch": 0.20727058823529412, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2007, "step": 8809 }, { "epoch": 0.20729411764705882, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1963, "step": 8810 }, { "epoch": 0.20731764705882352, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9652, "step": 8811 }, { "epoch": 0.20734117647058822, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.7669, "step": 8812 }, { "epoch": 0.20736470588235295, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1785, "step": 8813 }, { "epoch": 0.20738823529411765, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.2016, "step": 8814 }, { "epoch": 0.20741176470588235, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2502, "step": 8815 }, { "epoch": 0.20743529411764705, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.8759, "step": 8816 }, { "epoch": 0.20745882352941175, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.6684, "step": 8817 }, { "epoch": 0.20748235294117648, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9715, "step": 8818 }, { "epoch": 0.20750588235294118, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.9607, "step": 8819 }, { "epoch": 0.20752941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1867, "step": 8820 }, { "epoch": 0.20755294117647058, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.4644, "step": 8821 }, { "epoch": 0.2075764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0796, "step": 8822 }, { "epoch": 0.2076, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.9844, "step": 8823 }, { "epoch": 0.2076235294117647, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.8066, "step": 8824 }, { "epoch": 0.2076470588235294, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8921, "step": 8825 }, { "epoch": 0.2076705882352941, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.9105, "step": 8826 }, { "epoch": 0.20769411764705883, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1452, "step": 8827 }, { "epoch": 0.20771764705882353, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.3046, "step": 8828 }, { "epoch": 0.20774117647058823, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0985, "step": 8829 }, { "epoch": 0.20776470588235293, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.3017, "step": 8830 }, { "epoch": 0.20778823529411763, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0868, "step": 8831 }, { "epoch": 0.20781176470588236, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2127, "step": 8832 }, { "epoch": 0.20783529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1454, "step": 8833 }, { "epoch": 0.20785882352941176, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0414, "step": 8834 }, { "epoch": 0.20788235294117646, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8131, "step": 8835 }, { "epoch": 0.20790588235294116, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2984, "step": 8836 }, { "epoch": 0.2079294117647059, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1807, "step": 8837 }, { "epoch": 0.2079529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.283, "step": 8838 }, { "epoch": 0.2079764705882353, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2815, "step": 8839 }, { "epoch": 0.208, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0399, "step": 8840 }, { "epoch": 0.20802352941176472, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.3791, "step": 8841 }, { "epoch": 0.20804705882352942, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8579, "step": 8842 }, { "epoch": 0.20807058823529412, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9669, "step": 8843 }, { "epoch": 0.20809411764705882, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0367, "step": 8844 }, { "epoch": 0.20811764705882352, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1884, "step": 8845 }, { "epoch": 0.20814117647058825, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.7967, "step": 8846 }, { "epoch": 0.20816470588235295, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.6028, "step": 8847 }, { "epoch": 0.20818823529411765, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.3627, "step": 8848 }, { "epoch": 0.20821176470588235, "grad_norm": 0.279296875, "learning_rate": 0.02, "loss": 0.9811, "step": 8849 }, { "epoch": 0.20823529411764705, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2211, "step": 8850 }, { "epoch": 0.20825882352941177, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2096, "step": 8851 }, { "epoch": 0.20828235294117647, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0969, "step": 8852 }, { "epoch": 0.20830588235294117, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0059, "step": 8853 }, { "epoch": 0.20832941176470587, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9811, "step": 8854 }, { "epoch": 0.2083529411764706, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.5895, "step": 8855 }, { "epoch": 0.2083764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1753, "step": 8856 }, { "epoch": 0.2084, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.3442, "step": 8857 }, { "epoch": 0.2084235294117647, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.6698, "step": 8858 }, { "epoch": 0.2084470588235294, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.3263, "step": 8859 }, { "epoch": 0.20847058823529413, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0534, "step": 8860 }, { "epoch": 0.20849411764705883, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.927, "step": 8861 }, { "epoch": 0.20851764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1289, "step": 8862 }, { "epoch": 0.20854117647058823, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2415, "step": 8863 }, { "epoch": 0.20856470588235293, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.0719, "step": 8864 }, { "epoch": 0.20858823529411766, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.8405, "step": 8865 }, { "epoch": 0.20861176470588236, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8783, "step": 8866 }, { "epoch": 0.20863529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.075, "step": 8867 }, { "epoch": 0.20865882352941176, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.255, "step": 8868 }, { "epoch": 0.20868235294117646, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.9357, "step": 8869 }, { "epoch": 0.20870588235294119, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2368, "step": 8870 }, { "epoch": 0.20872941176470589, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9524, "step": 8871 }, { "epoch": 0.20875294117647059, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0475, "step": 8872 }, { "epoch": 0.20877647058823529, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.8775, "step": 8873 }, { "epoch": 0.2088, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.227, "step": 8874 }, { "epoch": 0.2088235294117647, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.9021, "step": 8875 }, { "epoch": 0.2088470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1488, "step": 8876 }, { "epoch": 0.2088705882352941, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.168, "step": 8877 }, { "epoch": 0.2088941176470588, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.9301, "step": 8878 }, { "epoch": 0.20891764705882354, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.4346, "step": 8879 }, { "epoch": 0.20894117647058824, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.7658, "step": 8880 }, { "epoch": 0.20896470588235294, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.948, "step": 8881 }, { "epoch": 0.20898823529411764, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3373, "step": 8882 }, { "epoch": 0.20901176470588234, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.0495, "step": 8883 }, { "epoch": 0.20903529411764707, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.8676, "step": 8884 }, { "epoch": 0.20905882352941177, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.3283, "step": 8885 }, { "epoch": 0.20908235294117647, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1108, "step": 8886 }, { "epoch": 0.20910588235294117, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.8802, "step": 8887 }, { "epoch": 0.20912941176470587, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.167, "step": 8888 }, { "epoch": 0.2091529411764706, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1399, "step": 8889 }, { "epoch": 0.2091764705882353, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.5803, "step": 8890 }, { "epoch": 0.2092, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1951, "step": 8891 }, { "epoch": 0.2092235294117647, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0772, "step": 8892 }, { "epoch": 0.20924705882352942, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0103, "step": 8893 }, { "epoch": 0.20927058823529412, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2134, "step": 8894 }, { "epoch": 0.20929411764705882, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.3332, "step": 8895 }, { "epoch": 0.20931764705882352, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0845, "step": 8896 }, { "epoch": 0.20934117647058822, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1305, "step": 8897 }, { "epoch": 0.20936470588235295, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.0348, "step": 8898 }, { "epoch": 0.20938823529411765, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0891, "step": 8899 }, { "epoch": 0.20941176470588235, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9471, "step": 8900 }, { "epoch": 0.20943529411764705, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.7835, "step": 8901 }, { "epoch": 0.20945882352941175, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2105, "step": 8902 }, { "epoch": 0.20948235294117648, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2327, "step": 8903 }, { "epoch": 0.20950588235294118, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.4634, "step": 8904 }, { "epoch": 0.20952941176470588, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1032, "step": 8905 }, { "epoch": 0.20955294117647058, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.8081, "step": 8906 }, { "epoch": 0.20957647058823528, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.4385, "step": 8907 }, { "epoch": 0.2096, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9022, "step": 8908 }, { "epoch": 0.2096235294117647, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.4351, "step": 8909 }, { "epoch": 0.2096470588235294, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8547, "step": 8910 }, { "epoch": 0.2096705882352941, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8483, "step": 8911 }, { "epoch": 0.20969411764705884, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.7469, "step": 8912 }, { "epoch": 0.20971764705882354, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.063, "step": 8913 }, { "epoch": 0.20974117647058824, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9878, "step": 8914 }, { "epoch": 0.20976470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.3497, "step": 8915 }, { "epoch": 0.20978823529411764, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.02, "step": 8916 }, { "epoch": 0.20981176470588236, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.8043, "step": 8917 }, { "epoch": 0.20983529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0084, "step": 8918 }, { "epoch": 0.20985882352941176, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1669, "step": 8919 }, { "epoch": 0.20988235294117646, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0651, "step": 8920 }, { "epoch": 0.20990588235294116, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.9741, "step": 8921 }, { "epoch": 0.2099294117647059, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1942, "step": 8922 }, { "epoch": 0.2099529411764706, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3397, "step": 8923 }, { "epoch": 0.2099764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1973, "step": 8924 }, { "epoch": 0.21, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.5264, "step": 8925 }, { "epoch": 0.2100235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1762, "step": 8926 }, { "epoch": 0.21004705882352942, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.074, "step": 8927 }, { "epoch": 0.21007058823529412, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0906, "step": 8928 }, { "epoch": 0.21009411764705882, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.2113, "step": 8929 }, { "epoch": 0.21011764705882352, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3633, "step": 8930 }, { "epoch": 0.21014117647058825, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.1175, "step": 8931 }, { "epoch": 0.21016470588235295, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3556, "step": 8932 }, { "epoch": 0.21018823529411765, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2121, "step": 8933 }, { "epoch": 0.21021176470588235, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.302, "step": 8934 }, { "epoch": 0.21023529411764705, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0257, "step": 8935 }, { "epoch": 0.21025882352941178, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0745, "step": 8936 }, { "epoch": 0.21028235294117648, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1435, "step": 8937 }, { "epoch": 0.21030588235294118, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9561, "step": 8938 }, { "epoch": 0.21032941176470588, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.8801, "step": 8939 }, { "epoch": 0.21035294117647058, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1176, "step": 8940 }, { "epoch": 0.2103764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1354, "step": 8941 }, { "epoch": 0.2104, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1232, "step": 8942 }, { "epoch": 0.2104235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9774, "step": 8943 }, { "epoch": 0.2104470588235294, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0925, "step": 8944 }, { "epoch": 0.21047058823529413, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.9417, "step": 8945 }, { "epoch": 0.21049411764705883, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.339, "step": 8946 }, { "epoch": 0.21051764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2419, "step": 8947 }, { "epoch": 0.21054117647058823, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9875, "step": 8948 }, { "epoch": 0.21056470588235293, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0048, "step": 8949 }, { "epoch": 0.21058823529411766, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2114, "step": 8950 }, { "epoch": 0.21061176470588236, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.8354, "step": 8951 }, { "epoch": 0.21063529411764706, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1232, "step": 8952 }, { "epoch": 0.21065882352941176, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.7746, "step": 8953 }, { "epoch": 0.21068235294117646, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.7292, "step": 8954 }, { "epoch": 0.2107058823529412, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.9008, "step": 8955 }, { "epoch": 0.2107294117647059, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0723, "step": 8956 }, { "epoch": 0.2107529411764706, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.9547, "step": 8957 }, { "epoch": 0.2107764705882353, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.6663, "step": 8958 }, { "epoch": 0.2108, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2815, "step": 8959 }, { "epoch": 0.21082352941176472, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.3554, "step": 8960 }, { "epoch": 0.21084705882352942, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0368, "step": 8961 }, { "epoch": 0.21087058823529412, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0963, "step": 8962 }, { "epoch": 0.21089411764705882, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.9848, "step": 8963 }, { "epoch": 0.21091764705882354, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.9713, "step": 8964 }, { "epoch": 0.21094117647058824, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.4931, "step": 8965 }, { "epoch": 0.21096470588235294, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9426, "step": 8966 }, { "epoch": 0.21098823529411764, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9954, "step": 8967 }, { "epoch": 0.21101176470588234, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2801, "step": 8968 }, { "epoch": 0.21103529411764707, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.0853, "step": 8969 }, { "epoch": 0.21105882352941177, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0062, "step": 8970 }, { "epoch": 0.21108235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.044, "step": 8971 }, { "epoch": 0.21110588235294117, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.2782, "step": 8972 }, { "epoch": 0.21112941176470587, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.073, "step": 8973 }, { "epoch": 0.2111529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3492, "step": 8974 }, { "epoch": 0.2111764705882353, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.9296, "step": 8975 }, { "epoch": 0.2112, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.155, "step": 8976 }, { "epoch": 0.2112235294117647, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.2563, "step": 8977 }, { "epoch": 0.2112470588235294, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.9921, "step": 8978 }, { "epoch": 0.21127058823529413, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9065, "step": 8979 }, { "epoch": 0.21129411764705883, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.8302, "step": 8980 }, { "epoch": 0.21131764705882353, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.844, "step": 8981 }, { "epoch": 0.21134117647058823, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1409, "step": 8982 }, { "epoch": 0.21136470588235295, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.975, "step": 8983 }, { "epoch": 0.21138823529411765, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1546, "step": 8984 }, { "epoch": 0.21141176470588235, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.4401, "step": 8985 }, { "epoch": 0.21143529411764705, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.3922, "step": 8986 }, { "epoch": 0.21145882352941175, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1437, "step": 8987 }, { "epoch": 0.21148235294117648, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0032, "step": 8988 }, { "epoch": 0.21150588235294118, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1251, "step": 8989 }, { "epoch": 0.21152941176470588, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.3978, "step": 8990 }, { "epoch": 0.21155294117647058, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0865, "step": 8991 }, { "epoch": 0.21157647058823528, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9566, "step": 8992 }, { "epoch": 0.2116, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.2578, "step": 8993 }, { "epoch": 0.2116235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0342, "step": 8994 }, { "epoch": 0.2116470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1083, "step": 8995 }, { "epoch": 0.2116705882352941, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.8127, "step": 8996 }, { "epoch": 0.2116941176470588, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0843, "step": 8997 }, { "epoch": 0.21171764705882354, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1439, "step": 8998 }, { "epoch": 0.21174117647058824, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3062, "step": 8999 }, { "epoch": 0.21176470588235294, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.8355, "step": 9000 }, { "epoch": 0.21176470588235294, "eval_loss": 2.2205469608306885, "eval_runtime": 684.3701, "eval_samples_per_second": 12.42, "eval_steps_per_second": 3.105, "step": 9000 }, { "epoch": 0.21178823529411764, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0235, "step": 9001 }, { "epoch": 0.21181176470588237, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.9811, "step": 9002 }, { "epoch": 0.21183529411764707, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.9065, "step": 9003 }, { "epoch": 0.21185882352941177, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.7212, "step": 9004 }, { "epoch": 0.21188235294117647, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2669, "step": 9005 }, { "epoch": 0.21190588235294117, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.9723, "step": 9006 }, { "epoch": 0.2119294117647059, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.4144, "step": 9007 }, { "epoch": 0.2119529411764706, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.7712, "step": 9008 }, { "epoch": 0.2119764705882353, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9941, "step": 9009 }, { "epoch": 0.212, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1954, "step": 9010 }, { "epoch": 0.2120235294117647, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1142, "step": 9011 }, { "epoch": 0.21204705882352942, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.8414, "step": 9012 }, { "epoch": 0.21207058823529412, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2087, "step": 9013 }, { "epoch": 0.21209411764705882, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9266, "step": 9014 }, { "epoch": 0.21211764705882352, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2768, "step": 9015 }, { "epoch": 0.21214117647058822, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0287, "step": 9016 }, { "epoch": 0.21216470588235295, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.213, "step": 9017 }, { "epoch": 0.21218823529411765, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.9164, "step": 9018 }, { "epoch": 0.21221176470588235, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9066, "step": 9019 }, { "epoch": 0.21223529411764705, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.8303, "step": 9020 }, { "epoch": 0.21225882352941178, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.8534, "step": 9021 }, { "epoch": 0.21228235294117648, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9464, "step": 9022 }, { "epoch": 0.21230588235294118, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.4002, "step": 9023 }, { "epoch": 0.21232941176470588, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9728, "step": 9024 }, { "epoch": 0.21235294117647058, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2317, "step": 9025 }, { "epoch": 0.2123764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0442, "step": 9026 }, { "epoch": 0.2124, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.6182, "step": 9027 }, { "epoch": 0.2124235294117647, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0772, "step": 9028 }, { "epoch": 0.2124470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1884, "step": 9029 }, { "epoch": 0.2124705882352941, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.8473, "step": 9030 }, { "epoch": 0.21249411764705883, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.7719, "step": 9031 }, { "epoch": 0.21251764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0357, "step": 9032 }, { "epoch": 0.21254117647058823, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1446, "step": 9033 }, { "epoch": 0.21256470588235293, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1556, "step": 9034 }, { "epoch": 0.21258823529411763, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0299, "step": 9035 }, { "epoch": 0.21261176470588236, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.131, "step": 9036 }, { "epoch": 0.21263529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.3549, "step": 9037 }, { "epoch": 0.21265882352941176, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.965, "step": 9038 }, { "epoch": 0.21268235294117646, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.111, "step": 9039 }, { "epoch": 0.2127058823529412, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.159, "step": 9040 }, { "epoch": 0.2127294117647059, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1186, "step": 9041 }, { "epoch": 0.2127529411764706, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9502, "step": 9042 }, { "epoch": 0.2127764705882353, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.7067, "step": 9043 }, { "epoch": 0.2128, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0693, "step": 9044 }, { "epoch": 0.21282352941176472, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.9778, "step": 9045 }, { "epoch": 0.21284705882352942, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2791, "step": 9046 }, { "epoch": 0.21287058823529412, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.0044, "step": 9047 }, { "epoch": 0.21289411764705882, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.9809, "step": 9048 }, { "epoch": 0.21291764705882352, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.8485, "step": 9049 }, { "epoch": 0.21294117647058824, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2667, "step": 9050 }, { "epoch": 0.21296470588235294, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1268, "step": 9051 }, { "epoch": 0.21298823529411764, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.8848, "step": 9052 }, { "epoch": 0.21301176470588234, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2545, "step": 9053 }, { "epoch": 0.21303529411764707, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.2333, "step": 9054 }, { "epoch": 0.21305882352941177, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.8964, "step": 9055 }, { "epoch": 0.21308235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1942, "step": 9056 }, { "epoch": 0.21310588235294117, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.1014, "step": 9057 }, { "epoch": 0.21312941176470587, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1494, "step": 9058 }, { "epoch": 0.2131529411764706, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.4789, "step": 9059 }, { "epoch": 0.2131764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.056, "step": 9060 }, { "epoch": 0.2132, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.4996, "step": 9061 }, { "epoch": 0.2132235294117647, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9326, "step": 9062 }, { "epoch": 0.2132470588235294, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9126, "step": 9063 }, { "epoch": 0.21327058823529413, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.827, "step": 9064 }, { "epoch": 0.21329411764705883, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0252, "step": 9065 }, { "epoch": 0.21331764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0711, "step": 9066 }, { "epoch": 0.21334117647058823, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.0493, "step": 9067 }, { "epoch": 0.21336470588235293, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0318, "step": 9068 }, { "epoch": 0.21338823529411766, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.1592, "step": 9069 }, { "epoch": 0.21341176470588236, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.7906, "step": 9070 }, { "epoch": 0.21343529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.3108, "step": 9071 }, { "epoch": 0.21345882352941176, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9405, "step": 9072 }, { "epoch": 0.21348235294117648, "grad_norm": 0.26171875, "learning_rate": 0.02, "loss": 0.8822, "step": 9073 }, { "epoch": 0.21350588235294118, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0129, "step": 9074 }, { "epoch": 0.21352941176470588, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0875, "step": 9075 }, { "epoch": 0.21355294117647058, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.6713, "step": 9076 }, { "epoch": 0.21357647058823528, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.9426, "step": 9077 }, { "epoch": 0.2136, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.2064, "step": 9078 }, { "epoch": 0.2136235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.3389, "step": 9079 }, { "epoch": 0.2136470588235294, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0486, "step": 9080 }, { "epoch": 0.2136705882352941, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.1028, "step": 9081 }, { "epoch": 0.2136941176470588, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.9034, "step": 9082 }, { "epoch": 0.21371764705882354, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2171, "step": 9083 }, { "epoch": 0.21374117647058824, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2301, "step": 9084 }, { "epoch": 0.21376470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1881, "step": 9085 }, { "epoch": 0.21378823529411764, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1394, "step": 9086 }, { "epoch": 0.21381176470588234, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9961, "step": 9087 }, { "epoch": 0.21383529411764707, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0934, "step": 9088 }, { "epoch": 0.21385882352941177, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 1.0561, "step": 9089 }, { "epoch": 0.21388235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1491, "step": 9090 }, { "epoch": 0.21390588235294117, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.135, "step": 9091 }, { "epoch": 0.2139294117647059, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.9354, "step": 9092 }, { "epoch": 0.2139529411764706, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1258, "step": 9093 }, { "epoch": 0.2139764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1477, "step": 9094 }, { "epoch": 0.214, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.8742, "step": 9095 }, { "epoch": 0.2140235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2198, "step": 9096 }, { "epoch": 0.21404705882352942, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.3219, "step": 9097 }, { "epoch": 0.21407058823529412, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.7725, "step": 9098 }, { "epoch": 0.21409411764705882, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9896, "step": 9099 }, { "epoch": 0.21411764705882352, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.7865, "step": 9100 }, { "epoch": 0.21414117647058822, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1434, "step": 9101 }, { "epoch": 0.21416470588235295, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2792, "step": 9102 }, { "epoch": 0.21418823529411765, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.6237, "step": 9103 }, { "epoch": 0.21421176470588235, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1401, "step": 9104 }, { "epoch": 0.21423529411764705, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0031, "step": 9105 }, { "epoch": 0.21425882352941175, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.968, "step": 9106 }, { "epoch": 0.21428235294117648, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2564, "step": 9107 }, { "epoch": 0.21430588235294118, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2621, "step": 9108 }, { "epoch": 0.21432941176470588, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.165, "step": 9109 }, { "epoch": 0.21435294117647058, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0496, "step": 9110 }, { "epoch": 0.2143764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2764, "step": 9111 }, { "epoch": 0.2144, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0852, "step": 9112 }, { "epoch": 0.2144235294117647, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.3038, "step": 9113 }, { "epoch": 0.2144470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3852, "step": 9114 }, { "epoch": 0.2144705882352941, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.7755, "step": 9115 }, { "epoch": 0.21449411764705884, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1028, "step": 9116 }, { "epoch": 0.21451764705882354, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1619, "step": 9117 }, { "epoch": 0.21454117647058824, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.8583, "step": 9118 }, { "epoch": 0.21456470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.3557, "step": 9119 }, { "epoch": 0.21458823529411764, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.3776, "step": 9120 }, { "epoch": 0.21461176470588236, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.1156, "step": 9121 }, { "epoch": 0.21463529411764706, "grad_norm": 0.255859375, "learning_rate": 0.02, "loss": 0.646, "step": 9122 }, { "epoch": 0.21465882352941176, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9828, "step": 9123 }, { "epoch": 0.21468235294117646, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.8186, "step": 9124 }, { "epoch": 0.21470588235294116, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.3838, "step": 9125 }, { "epoch": 0.2147294117647059, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.8439, "step": 9126 }, { "epoch": 0.2147529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0525, "step": 9127 }, { "epoch": 0.2147764705882353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.3521, "step": 9128 }, { "epoch": 0.2148, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0403, "step": 9129 }, { "epoch": 0.21482352941176472, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.4644, "step": 9130 }, { "epoch": 0.21484705882352942, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.7302, "step": 9131 }, { "epoch": 0.21487058823529412, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2459, "step": 9132 }, { "epoch": 0.21489411764705882, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0112, "step": 9133 }, { "epoch": 0.21491764705882352, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9542, "step": 9134 }, { "epoch": 0.21494117647058825, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1079, "step": 9135 }, { "epoch": 0.21496470588235295, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.1859, "step": 9136 }, { "epoch": 0.21498823529411765, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.4782, "step": 9137 }, { "epoch": 0.21501176470588235, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0303, "step": 9138 }, { "epoch": 0.21503529411764705, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1893, "step": 9139 }, { "epoch": 0.21505882352941177, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2385, "step": 9140 }, { "epoch": 0.21508235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2854, "step": 9141 }, { "epoch": 0.21510588235294117, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0066, "step": 9142 }, { "epoch": 0.21512941176470587, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1938, "step": 9143 }, { "epoch": 0.21515294117647057, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.988, "step": 9144 }, { "epoch": 0.2151764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.3414, "step": 9145 }, { "epoch": 0.2152, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.086, "step": 9146 }, { "epoch": 0.2152235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0099, "step": 9147 }, { "epoch": 0.2152470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1775, "step": 9148 }, { "epoch": 0.21527058823529413, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.234, "step": 9149 }, { "epoch": 0.21529411764705883, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2045, "step": 9150 }, { "epoch": 0.21531764705882353, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.6611, "step": 9151 }, { "epoch": 0.21534117647058823, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.8029, "step": 9152 }, { "epoch": 0.21536470588235293, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9144, "step": 9153 }, { "epoch": 0.21538823529411766, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2526, "step": 9154 }, { "epoch": 0.21541176470588236, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.8964, "step": 9155 }, { "epoch": 0.21543529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8967, "step": 9156 }, { "epoch": 0.21545882352941176, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1038, "step": 9157 }, { "epoch": 0.21548235294117646, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0567, "step": 9158 }, { "epoch": 0.21550588235294119, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.3549, "step": 9159 }, { "epoch": 0.21552941176470589, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1394, "step": 9160 }, { "epoch": 0.21555294117647059, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.4007, "step": 9161 }, { "epoch": 0.21557647058823529, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9955, "step": 9162 }, { "epoch": 0.2156, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1651, "step": 9163 }, { "epoch": 0.21562352941176471, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1193, "step": 9164 }, { "epoch": 0.21564705882352941, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0123, "step": 9165 }, { "epoch": 0.21567058823529411, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2642, "step": 9166 }, { "epoch": 0.21569411764705881, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2058, "step": 9167 }, { "epoch": 0.21571764705882354, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2314, "step": 9168 }, { "epoch": 0.21574117647058824, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.1136, "step": 9169 }, { "epoch": 0.21576470588235294, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0013, "step": 9170 }, { "epoch": 0.21578823529411764, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.257, "step": 9171 }, { "epoch": 0.21581176470588234, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.0053, "step": 9172 }, { "epoch": 0.21583529411764707, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2426, "step": 9173 }, { "epoch": 0.21585882352941177, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1962, "step": 9174 }, { "epoch": 0.21588235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9307, "step": 9175 }, { "epoch": 0.21590588235294117, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.7943, "step": 9176 }, { "epoch": 0.21592941176470587, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9187, "step": 9177 }, { "epoch": 0.2159529411764706, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1569, "step": 9178 }, { "epoch": 0.2159764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0502, "step": 9179 }, { "epoch": 0.216, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.16, "step": 9180 }, { "epoch": 0.2160235294117647, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.8819, "step": 9181 }, { "epoch": 0.21604705882352943, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0187, "step": 9182 }, { "epoch": 0.21607058823529413, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0117, "step": 9183 }, { "epoch": 0.21609411764705883, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.8919, "step": 9184 }, { "epoch": 0.21611764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1106, "step": 9185 }, { "epoch": 0.21614117647058823, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2352, "step": 9186 }, { "epoch": 0.21616470588235295, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.6301, "step": 9187 }, { "epoch": 0.21618823529411765, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.2732, "step": 9188 }, { "epoch": 0.21621176470588235, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1234, "step": 9189 }, { "epoch": 0.21623529411764705, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1634, "step": 9190 }, { "epoch": 0.21625882352941175, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.337, "step": 9191 }, { "epoch": 0.21628235294117648, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.3491, "step": 9192 }, { "epoch": 0.21630588235294118, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.9323, "step": 9193 }, { "epoch": 0.21632941176470588, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0445, "step": 9194 }, { "epoch": 0.21635294117647058, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.026, "step": 9195 }, { "epoch": 0.21637647058823528, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.067, "step": 9196 }, { "epoch": 0.2164, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9915, "step": 9197 }, { "epoch": 0.2164235294117647, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.8879, "step": 9198 }, { "epoch": 0.2164470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3929, "step": 9199 }, { "epoch": 0.2164705882352941, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0445, "step": 9200 }, { "epoch": 0.21649411764705884, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2606, "step": 9201 }, { "epoch": 0.21651764705882354, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1579, "step": 9202 }, { "epoch": 0.21654117647058824, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2508, "step": 9203 }, { "epoch": 0.21656470588235294, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.2439, "step": 9204 }, { "epoch": 0.21658823529411764, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1121, "step": 9205 }, { "epoch": 0.21661176470588236, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 1.1288, "step": 9206 }, { "epoch": 0.21663529411764706, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.0249, "step": 9207 }, { "epoch": 0.21665882352941176, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.9204, "step": 9208 }, { "epoch": 0.21668235294117646, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.7536, "step": 9209 }, { "epoch": 0.21670588235294116, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2795, "step": 9210 }, { "epoch": 0.2167294117647059, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 0.7205, "step": 9211 }, { "epoch": 0.2167529411764706, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0761, "step": 9212 }, { "epoch": 0.2167764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1302, "step": 9213 }, { "epoch": 0.2168, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1695, "step": 9214 }, { "epoch": 0.2168235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.4987, "step": 9215 }, { "epoch": 0.21684705882352942, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1195, "step": 9216 }, { "epoch": 0.21687058823529412, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.4232, "step": 9217 }, { "epoch": 0.21689411764705882, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1683, "step": 9218 }, { "epoch": 0.21691764705882352, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2093, "step": 9219 }, { "epoch": 0.21694117647058825, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3462, "step": 9220 }, { "epoch": 0.21696470588235295, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1064, "step": 9221 }, { "epoch": 0.21698823529411765, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.9447, "step": 9222 }, { "epoch": 0.21701176470588235, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.4982, "step": 9223 }, { "epoch": 0.21703529411764705, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.9354, "step": 9224 }, { "epoch": 0.21705882352941178, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.9877, "step": 9225 }, { "epoch": 0.21708235294117648, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.7535, "step": 9226 }, { "epoch": 0.21710588235294118, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9688, "step": 9227 }, { "epoch": 0.21712941176470588, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.257, "step": 9228 }, { "epoch": 0.21715294117647058, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.5144, "step": 9229 }, { "epoch": 0.2171764705882353, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.6733, "step": 9230 }, { "epoch": 0.2172, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1698, "step": 9231 }, { "epoch": 0.2172235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1993, "step": 9232 }, { "epoch": 0.2172470588235294, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0444, "step": 9233 }, { "epoch": 0.2172705882352941, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0086, "step": 9234 }, { "epoch": 0.21729411764705883, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9816, "step": 9235 }, { "epoch": 0.21731764705882353, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.0054, "step": 9236 }, { "epoch": 0.21734117647058823, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.5881, "step": 9237 }, { "epoch": 0.21736470588235293, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.8857, "step": 9238 }, { "epoch": 0.21738823529411766, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1806, "step": 9239 }, { "epoch": 0.21741176470588236, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.5653, "step": 9240 }, { "epoch": 0.21743529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.901, "step": 9241 }, { "epoch": 0.21745882352941176, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.6611, "step": 9242 }, { "epoch": 0.21748235294117646, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1587, "step": 9243 }, { "epoch": 0.2175058823529412, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0259, "step": 9244 }, { "epoch": 0.2175294117647059, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.8543, "step": 9245 }, { "epoch": 0.2175529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1661, "step": 9246 }, { "epoch": 0.2175764705882353, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1829, "step": 9247 }, { "epoch": 0.2176, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9121, "step": 9248 }, { "epoch": 0.21762352941176472, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8226, "step": 9249 }, { "epoch": 0.21764705882352942, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2469, "step": 9250 }, { "epoch": 0.21767058823529412, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1621, "step": 9251 }, { "epoch": 0.21769411764705882, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2747, "step": 9252 }, { "epoch": 0.21771764705882352, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.3465, "step": 9253 }, { "epoch": 0.21774117647058824, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2195, "step": 9254 }, { "epoch": 0.21776470588235294, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0028, "step": 9255 }, { "epoch": 0.21778823529411764, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1328, "step": 9256 }, { "epoch": 0.21781176470588234, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3608, "step": 9257 }, { "epoch": 0.21783529411764707, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.117, "step": 9258 }, { "epoch": 0.21785882352941177, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1027, "step": 9259 }, { "epoch": 0.21788235294117647, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.96, "step": 9260 }, { "epoch": 0.21790588235294117, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.921, "step": 9261 }, { "epoch": 0.21792941176470587, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1315, "step": 9262 }, { "epoch": 0.2179529411764706, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.7815, "step": 9263 }, { "epoch": 0.2179764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1371, "step": 9264 }, { "epoch": 0.218, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2206, "step": 9265 }, { "epoch": 0.2180235294117647, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.7049, "step": 9266 }, { "epoch": 0.2180470588235294, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.7949, "step": 9267 }, { "epoch": 0.21807058823529413, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.741, "step": 9268 }, { "epoch": 0.21809411764705883, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1292, "step": 9269 }, { "epoch": 0.21811764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.104, "step": 9270 }, { "epoch": 0.21814117647058823, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0567, "step": 9271 }, { "epoch": 0.21816470588235296, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.0369, "step": 9272 }, { "epoch": 0.21818823529411766, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.36, "step": 9273 }, { "epoch": 0.21821176470588236, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1183, "step": 9274 }, { "epoch": 0.21823529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2045, "step": 9275 }, { "epoch": 0.21825882352941176, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4081, "step": 9276 }, { "epoch": 0.21828235294117648, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2963, "step": 9277 }, { "epoch": 0.21830588235294118, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9506, "step": 9278 }, { "epoch": 0.21832941176470588, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0903, "step": 9279 }, { "epoch": 0.21835294117647058, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.8988, "step": 9280 }, { "epoch": 0.21837647058823528, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0014, "step": 9281 }, { "epoch": 0.2184, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.8238, "step": 9282 }, { "epoch": 0.2184235294117647, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.8159, "step": 9283 }, { "epoch": 0.2184470588235294, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9208, "step": 9284 }, { "epoch": 0.2184705882352941, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2843, "step": 9285 }, { "epoch": 0.2184941176470588, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9771, "step": 9286 }, { "epoch": 0.21851764705882354, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0282, "step": 9287 }, { "epoch": 0.21854117647058824, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.862, "step": 9288 }, { "epoch": 0.21856470588235294, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.129, "step": 9289 }, { "epoch": 0.21858823529411764, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2043, "step": 9290 }, { "epoch": 0.21861176470588237, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.27, "step": 9291 }, { "epoch": 0.21863529411764707, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.8847, "step": 9292 }, { "epoch": 0.21865882352941177, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1136, "step": 9293 }, { "epoch": 0.21868235294117647, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.188, "step": 9294 }, { "epoch": 0.21870588235294117, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1895, "step": 9295 }, { "epoch": 0.2187294117647059, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1212, "step": 9296 }, { "epoch": 0.2187529411764706, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.8739, "step": 9297 }, { "epoch": 0.2187764705882353, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.262, "step": 9298 }, { "epoch": 0.2188, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2773, "step": 9299 }, { "epoch": 0.2188235294117647, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.8777, "step": 9300 }, { "epoch": 0.21884705882352942, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.067, "step": 9301 }, { "epoch": 0.21887058823529412, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1785, "step": 9302 }, { "epoch": 0.21889411764705882, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1482, "step": 9303 }, { "epoch": 0.21891764705882352, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2342, "step": 9304 }, { "epoch": 0.21894117647058822, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0107, "step": 9305 }, { "epoch": 0.21896470588235295, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9825, "step": 9306 }, { "epoch": 0.21898823529411765, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.015, "step": 9307 }, { "epoch": 0.21901176470588235, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1015, "step": 9308 }, { "epoch": 0.21903529411764705, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 1.1714, "step": 9309 }, { "epoch": 0.21905882352941178, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.929, "step": 9310 }, { "epoch": 0.21908235294117648, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.8427, "step": 9311 }, { "epoch": 0.21910588235294118, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2282, "step": 9312 }, { "epoch": 0.21912941176470588, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2407, "step": 9313 }, { "epoch": 0.21915294117647058, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.9565, "step": 9314 }, { "epoch": 0.2191764705882353, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.8884, "step": 9315 }, { "epoch": 0.2192, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0952, "step": 9316 }, { "epoch": 0.2192235294117647, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1758, "step": 9317 }, { "epoch": 0.2192470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3507, "step": 9318 }, { "epoch": 0.2192705882352941, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0285, "step": 9319 }, { "epoch": 0.21929411764705883, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.9809, "step": 9320 }, { "epoch": 0.21931764705882353, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.986, "step": 9321 }, { "epoch": 0.21934117647058823, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1586, "step": 9322 }, { "epoch": 0.21936470588235293, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.8712, "step": 9323 }, { "epoch": 0.21938823529411763, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.223, "step": 9324 }, { "epoch": 0.21941176470588236, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1611, "step": 9325 }, { "epoch": 0.21943529411764706, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0415, "step": 9326 }, { "epoch": 0.21945882352941176, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0543, "step": 9327 }, { "epoch": 0.21948235294117646, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1237, "step": 9328 }, { "epoch": 0.2195058823529412, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.8777, "step": 9329 }, { "epoch": 0.2195294117647059, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1386, "step": 9330 }, { "epoch": 0.2195529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.025, "step": 9331 }, { "epoch": 0.2195764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2428, "step": 9332 }, { "epoch": 0.2196, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1107, "step": 9333 }, { "epoch": 0.21962352941176472, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.1388, "step": 9334 }, { "epoch": 0.21964705882352942, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 1.0159, "step": 9335 }, { "epoch": 0.21967058823529412, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.7712, "step": 9336 }, { "epoch": 0.21969411764705882, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2104, "step": 9337 }, { "epoch": 0.21971764705882352, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0258, "step": 9338 }, { "epoch": 0.21974117647058825, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2145, "step": 9339 }, { "epoch": 0.21976470588235295, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9733, "step": 9340 }, { "epoch": 0.21978823529411765, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1541, "step": 9341 }, { "epoch": 0.21981176470588235, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.7926, "step": 9342 }, { "epoch": 0.21983529411764705, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2093, "step": 9343 }, { "epoch": 0.21985882352941177, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.4173, "step": 9344 }, { "epoch": 0.21988235294117647, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9956, "step": 9345 }, { "epoch": 0.21990588235294117, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1664, "step": 9346 }, { "epoch": 0.21992941176470587, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.103, "step": 9347 }, { "epoch": 0.2199529411764706, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9963, "step": 9348 }, { "epoch": 0.2199764705882353, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8012, "step": 9349 }, { "epoch": 0.22, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2382, "step": 9350 }, { "epoch": 0.2200235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1453, "step": 9351 }, { "epoch": 0.2200470588235294, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.772, "step": 9352 }, { "epoch": 0.22007058823529413, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1687, "step": 9353 }, { "epoch": 0.22009411764705883, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0399, "step": 9354 }, { "epoch": 0.22011764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2129, "step": 9355 }, { "epoch": 0.22014117647058823, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.9408, "step": 9356 }, { "epoch": 0.22016470588235293, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1045, "step": 9357 }, { "epoch": 0.22018823529411766, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.3186, "step": 9358 }, { "epoch": 0.22021176470588236, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0882, "step": 9359 }, { "epoch": 0.22023529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.919, "step": 9360 }, { "epoch": 0.22025882352941176, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9913, "step": 9361 }, { "epoch": 0.22028235294117646, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.7093, "step": 9362 }, { "epoch": 0.22030588235294118, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8263, "step": 9363 }, { "epoch": 0.22032941176470588, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2814, "step": 9364 }, { "epoch": 0.22035294117647058, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9801, "step": 9365 }, { "epoch": 0.22037647058823528, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2411, "step": 9366 }, { "epoch": 0.2204, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1627, "step": 9367 }, { "epoch": 0.2204235294117647, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0494, "step": 9368 }, { "epoch": 0.2204470588235294, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0152, "step": 9369 }, { "epoch": 0.2204705882352941, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.9238, "step": 9370 }, { "epoch": 0.2204941176470588, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1487, "step": 9371 }, { "epoch": 0.22051764705882354, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.9143, "step": 9372 }, { "epoch": 0.22054117647058824, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0922, "step": 9373 }, { "epoch": 0.22056470588235294, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 1.0301, "step": 9374 }, { "epoch": 0.22058823529411764, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.8222, "step": 9375 }, { "epoch": 0.22061176470588234, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9282, "step": 9376 }, { "epoch": 0.22063529411764707, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.7291, "step": 9377 }, { "epoch": 0.22065882352941177, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.4147, "step": 9378 }, { "epoch": 0.22068235294117647, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2029, "step": 9379 }, { "epoch": 0.22070588235294117, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.6804, "step": 9380 }, { "epoch": 0.2207294117647059, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.6924, "step": 9381 }, { "epoch": 0.2207529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1414, "step": 9382 }, { "epoch": 0.2207764705882353, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0913, "step": 9383 }, { "epoch": 0.2208, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.8755, "step": 9384 }, { "epoch": 0.2208235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1743, "step": 9385 }, { "epoch": 0.22084705882352942, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.7582, "step": 9386 }, { "epoch": 0.22087058823529412, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1152, "step": 9387 }, { "epoch": 0.22089411764705882, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2274, "step": 9388 }, { "epoch": 0.22091764705882352, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.8996, "step": 9389 }, { "epoch": 0.22094117647058822, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.9183, "step": 9390 }, { "epoch": 0.22096470588235295, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2823, "step": 9391 }, { "epoch": 0.22098823529411765, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.7035, "step": 9392 }, { "epoch": 0.22101176470588235, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.9844, "step": 9393 }, { "epoch": 0.22103529411764705, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.7538, "step": 9394 }, { "epoch": 0.22105882352941175, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0323, "step": 9395 }, { "epoch": 0.22108235294117648, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1356, "step": 9396 }, { "epoch": 0.22110588235294118, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9426, "step": 9397 }, { "epoch": 0.22112941176470588, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2331, "step": 9398 }, { "epoch": 0.22115294117647058, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9795, "step": 9399 }, { "epoch": 0.2211764705882353, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2002, "step": 9400 }, { "epoch": 0.2212, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 1.1828, "step": 9401 }, { "epoch": 0.2212235294117647, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.8472, "step": 9402 }, { "epoch": 0.2212470588235294, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.2235, "step": 9403 }, { "epoch": 0.2212705882352941, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1086, "step": 9404 }, { "epoch": 0.22129411764705884, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.7661, "step": 9405 }, { "epoch": 0.22131764705882354, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1041, "step": 9406 }, { "epoch": 0.22134117647058824, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0027, "step": 9407 }, { "epoch": 0.22136470588235294, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9724, "step": 9408 }, { "epoch": 0.22138823529411764, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.8016, "step": 9409 }, { "epoch": 0.22141176470588236, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.0477, "step": 9410 }, { "epoch": 0.22143529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2564, "step": 9411 }, { "epoch": 0.22145882352941176, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0584, "step": 9412 }, { "epoch": 0.22148235294117646, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0484, "step": 9413 }, { "epoch": 0.22150588235294116, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.8073, "step": 9414 }, { "epoch": 0.2215294117647059, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0515, "step": 9415 }, { "epoch": 0.2215529411764706, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.4785, "step": 9416 }, { "epoch": 0.2215764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1325, "step": 9417 }, { "epoch": 0.2216, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0493, "step": 9418 }, { "epoch": 0.22162352941176472, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1056, "step": 9419 }, { "epoch": 0.22164705882352942, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0665, "step": 9420 }, { "epoch": 0.22167058823529412, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0012, "step": 9421 }, { "epoch": 0.22169411764705882, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.025, "step": 9422 }, { "epoch": 0.22171764705882352, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2651, "step": 9423 }, { "epoch": 0.22174117647058825, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1171, "step": 9424 }, { "epoch": 0.22176470588235295, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.951, "step": 9425 }, { "epoch": 0.22178823529411765, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.0804, "step": 9426 }, { "epoch": 0.22181176470588235, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0569, "step": 9427 }, { "epoch": 0.22183529411764705, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0408, "step": 9428 }, { "epoch": 0.22185882352941177, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0597, "step": 9429 }, { "epoch": 0.22188235294117648, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.8211, "step": 9430 }, { "epoch": 0.22190588235294118, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2056, "step": 9431 }, { "epoch": 0.22192941176470588, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.4765, "step": 9432 }, { "epoch": 0.22195294117647058, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1503, "step": 9433 }, { "epoch": 0.2219764705882353, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.9868, "step": 9434 }, { "epoch": 0.222, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.3362, "step": 9435 }, { "epoch": 0.2220235294117647, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0017, "step": 9436 }, { "epoch": 0.2220470588235294, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2034, "step": 9437 }, { "epoch": 0.22207058823529413, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.6048, "step": 9438 }, { "epoch": 0.22209411764705883, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.7914, "step": 9439 }, { "epoch": 0.22211764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9848, "step": 9440 }, { "epoch": 0.22214117647058823, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1614, "step": 9441 }, { "epoch": 0.22216470588235293, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2135, "step": 9442 }, { "epoch": 0.22218823529411766, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.207, "step": 9443 }, { "epoch": 0.22221176470588236, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.9409, "step": 9444 }, { "epoch": 0.22223529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.023, "step": 9445 }, { "epoch": 0.22225882352941176, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9329, "step": 9446 }, { "epoch": 0.22228235294117646, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0079, "step": 9447 }, { "epoch": 0.2223058823529412, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0453, "step": 9448 }, { "epoch": 0.2223294117647059, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.2846, "step": 9449 }, { "epoch": 0.2223529411764706, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0978, "step": 9450 }, { "epoch": 0.2223764705882353, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.9499, "step": 9451 }, { "epoch": 0.2224, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1746, "step": 9452 }, { "epoch": 0.22242352941176471, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0826, "step": 9453 }, { "epoch": 0.22244705882352941, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.704, "step": 9454 }, { "epoch": 0.22247058823529411, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.3452, "step": 9455 }, { "epoch": 0.22249411764705881, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 0.7491, "step": 9456 }, { "epoch": 0.22251764705882354, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9804, "step": 9457 }, { "epoch": 0.22254117647058824, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.007, "step": 9458 }, { "epoch": 0.22256470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2742, "step": 9459 }, { "epoch": 0.22258823529411764, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.0084, "step": 9460 }, { "epoch": 0.22261176470588234, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.7521, "step": 9461 }, { "epoch": 0.22263529411764707, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.2254, "step": 9462 }, { "epoch": 0.22265882352941177, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.124, "step": 9463 }, { "epoch": 0.22268235294117647, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0135, "step": 9464 }, { "epoch": 0.22270588235294117, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1898, "step": 9465 }, { "epoch": 0.22272941176470587, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1426, "step": 9466 }, { "epoch": 0.2227529411764706, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0351, "step": 9467 }, { "epoch": 0.2227764705882353, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9134, "step": 9468 }, { "epoch": 0.2228, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1082, "step": 9469 }, { "epoch": 0.2228235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2315, "step": 9470 }, { "epoch": 0.2228470588235294, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2554, "step": 9471 }, { "epoch": 0.22287058823529413, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3795, "step": 9472 }, { "epoch": 0.22289411764705883, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0222, "step": 9473 }, { "epoch": 0.22291764705882353, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2942, "step": 9474 }, { "epoch": 0.22294117647058823, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1639, "step": 9475 }, { "epoch": 0.22296470588235295, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2695, "step": 9476 }, { "epoch": 0.22298823529411765, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.085, "step": 9477 }, { "epoch": 0.22301176470588235, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1051, "step": 9478 }, { "epoch": 0.22303529411764705, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.087, "step": 9479 }, { "epoch": 0.22305882352941175, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0576, "step": 9480 }, { "epoch": 0.22308235294117648, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2591, "step": 9481 }, { "epoch": 0.22310588235294118, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9446, "step": 9482 }, { "epoch": 0.22312941176470588, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1189, "step": 9483 }, { "epoch": 0.22315294117647058, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0506, "step": 9484 }, { "epoch": 0.22317647058823528, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.0555, "step": 9485 }, { "epoch": 0.2232, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0044, "step": 9486 }, { "epoch": 0.2232235294117647, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0315, "step": 9487 }, { "epoch": 0.2232470588235294, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9502, "step": 9488 }, { "epoch": 0.2232705882352941, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2223, "step": 9489 }, { "epoch": 0.22329411764705884, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0309, "step": 9490 }, { "epoch": 0.22331764705882354, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1209, "step": 9491 }, { "epoch": 0.22334117647058824, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9792, "step": 9492 }, { "epoch": 0.22336470588235294, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.8282, "step": 9493 }, { "epoch": 0.22338823529411764, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8556, "step": 9494 }, { "epoch": 0.22341176470588237, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 1.2858, "step": 9495 }, { "epoch": 0.22343529411764707, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2289, "step": 9496 }, { "epoch": 0.22345882352941177, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1563, "step": 9497 }, { "epoch": 0.22348235294117647, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.877, "step": 9498 }, { "epoch": 0.22350588235294117, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.2462, "step": 9499 }, { "epoch": 0.2235294117647059, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1178, "step": 9500 }, { "epoch": 0.2235529411764706, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.8725, "step": 9501 }, { "epoch": 0.2235764705882353, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.7885, "step": 9502 }, { "epoch": 0.2236, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9314, "step": 9503 }, { "epoch": 0.2236235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.3542, "step": 9504 }, { "epoch": 0.22364705882352942, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.7218, "step": 9505 }, { "epoch": 0.22367058823529412, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0571, "step": 9506 }, { "epoch": 0.22369411764705882, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.417, "step": 9507 }, { "epoch": 0.22371764705882352, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9224, "step": 9508 }, { "epoch": 0.22374117647058825, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.8703, "step": 9509 }, { "epoch": 0.22376470588235295, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0366, "step": 9510 }, { "epoch": 0.22378823529411765, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 0.846, "step": 9511 }, { "epoch": 0.22381176470588235, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.089, "step": 9512 }, { "epoch": 0.22383529411764705, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.4186, "step": 9513 }, { "epoch": 0.22385882352941178, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.8464, "step": 9514 }, { "epoch": 0.22388235294117648, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9253, "step": 9515 }, { "epoch": 0.22390588235294118, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.1556, "step": 9516 }, { "epoch": 0.22392941176470588, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0257, "step": 9517 }, { "epoch": 0.22395294117647058, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.8929, "step": 9518 }, { "epoch": 0.2239764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.132, "step": 9519 }, { "epoch": 0.224, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0486, "step": 9520 }, { "epoch": 0.2240235294117647, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.0364, "step": 9521 }, { "epoch": 0.2240470588235294, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.8836, "step": 9522 }, { "epoch": 0.2240705882352941, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2854, "step": 9523 }, { "epoch": 0.22409411764705883, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.9036, "step": 9524 }, { "epoch": 0.22411764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1524, "step": 9525 }, { "epoch": 0.22414117647058823, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9888, "step": 9526 }, { "epoch": 0.22416470588235293, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9349, "step": 9527 }, { "epoch": 0.22418823529411766, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.8934, "step": 9528 }, { "epoch": 0.22421176470588236, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1548, "step": 9529 }, { "epoch": 0.22423529411764706, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1941, "step": 9530 }, { "epoch": 0.22425882352941176, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.821, "step": 9531 }, { "epoch": 0.22428235294117646, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.3558, "step": 9532 }, { "epoch": 0.2243058823529412, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.9054, "step": 9533 }, { "epoch": 0.2243294117647059, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0996, "step": 9534 }, { "epoch": 0.2243529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0326, "step": 9535 }, { "epoch": 0.2243764705882353, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.4701, "step": 9536 }, { "epoch": 0.2244, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2855, "step": 9537 }, { "epoch": 0.22442352941176472, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.8987, "step": 9538 }, { "epoch": 0.22444705882352942, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 0.8864, "step": 9539 }, { "epoch": 0.22447058823529412, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0347, "step": 9540 }, { "epoch": 0.22449411764705882, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2326, "step": 9541 }, { "epoch": 0.22451764705882352, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2114, "step": 9542 }, { "epoch": 0.22454117647058824, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1106, "step": 9543 }, { "epoch": 0.22456470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0177, "step": 9544 }, { "epoch": 0.22458823529411764, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8352, "step": 9545 }, { "epoch": 0.22461176470588234, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.08, "step": 9546 }, { "epoch": 0.22463529411764707, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.045, "step": 9547 }, { "epoch": 0.22465882352941177, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9504, "step": 9548 }, { "epoch": 0.22468235294117647, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1973, "step": 9549 }, { "epoch": 0.22470588235294117, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2117, "step": 9550 }, { "epoch": 0.22472941176470587, "grad_norm": 0.341796875, "learning_rate": 0.02, "loss": 1.0492, "step": 9551 }, { "epoch": 0.2247529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2812, "step": 9552 }, { "epoch": 0.2247764705882353, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.4493, "step": 9553 }, { "epoch": 0.2248, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8727, "step": 9554 }, { "epoch": 0.2248235294117647, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.7385, "step": 9555 }, { "epoch": 0.2248470588235294, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.4614, "step": 9556 }, { "epoch": 0.22487058823529413, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.3368, "step": 9557 }, { "epoch": 0.22489411764705883, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1643, "step": 9558 }, { "epoch": 0.22491764705882353, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.9348, "step": 9559 }, { "epoch": 0.22494117647058823, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9072, "step": 9560 }, { "epoch": 0.22496470588235293, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0906, "step": 9561 }, { "epoch": 0.22498823529411766, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9718, "step": 9562 }, { "epoch": 0.22501176470588236, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0068, "step": 9563 }, { "epoch": 0.22503529411764706, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2094, "step": 9564 }, { "epoch": 0.22505882352941176, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.9448, "step": 9565 }, { "epoch": 0.22508235294117648, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9989, "step": 9566 }, { "epoch": 0.22510588235294118, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0034, "step": 9567 }, { "epoch": 0.22512941176470588, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1026, "step": 9568 }, { "epoch": 0.22515294117647058, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0583, "step": 9569 }, { "epoch": 0.22517647058823528, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1497, "step": 9570 }, { "epoch": 0.2252, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.9386, "step": 9571 }, { "epoch": 0.2252235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1425, "step": 9572 }, { "epoch": 0.2252470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1832, "step": 9573 }, { "epoch": 0.2252705882352941, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8417, "step": 9574 }, { "epoch": 0.2252941176470588, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1566, "step": 9575 }, { "epoch": 0.22531764705882354, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2204, "step": 9576 }, { "epoch": 0.22534117647058824, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1273, "step": 9577 }, { "epoch": 0.22536470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0047, "step": 9578 }, { "epoch": 0.22538823529411764, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.852, "step": 9579 }, { "epoch": 0.22541176470588234, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.8656, "step": 9580 }, { "epoch": 0.22543529411764707, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0186, "step": 9581 }, { "epoch": 0.22545882352941177, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9383, "step": 9582 }, { "epoch": 0.22548235294117647, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.3007, "step": 9583 }, { "epoch": 0.22550588235294117, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1177, "step": 9584 }, { "epoch": 0.2255294117647059, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.8887, "step": 9585 }, { "epoch": 0.2255529411764706, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9044, "step": 9586 }, { "epoch": 0.2255764705882353, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.4088, "step": 9587 }, { "epoch": 0.2256, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.0652, "step": 9588 }, { "epoch": 0.2256235294117647, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.3443, "step": 9589 }, { "epoch": 0.22564705882352942, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.7962, "step": 9590 }, { "epoch": 0.22567058823529412, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9481, "step": 9591 }, { "epoch": 0.22569411764705882, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0422, "step": 9592 }, { "epoch": 0.22571764705882352, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.4547, "step": 9593 }, { "epoch": 0.22574117647058822, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.123, "step": 9594 }, { "epoch": 0.22576470588235295, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1314, "step": 9595 }, { "epoch": 0.22578823529411765, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1988, "step": 9596 }, { "epoch": 0.22581176470588235, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2321, "step": 9597 }, { "epoch": 0.22583529411764705, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1821, "step": 9598 }, { "epoch": 0.22585882352941178, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1266, "step": 9599 }, { "epoch": 0.22588235294117648, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9563, "step": 9600 }, { "epoch": 0.22590588235294118, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.8752, "step": 9601 }, { "epoch": 0.22592941176470588, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.3495, "step": 9602 }, { "epoch": 0.22595294117647058, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.8432, "step": 9603 }, { "epoch": 0.2259764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1349, "step": 9604 }, { "epoch": 0.226, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0084, "step": 9605 }, { "epoch": 0.2260235294117647, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0102, "step": 9606 }, { "epoch": 0.2260470588235294, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.8182, "step": 9607 }, { "epoch": 0.2260705882352941, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1928, "step": 9608 }, { "epoch": 0.22609411764705883, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1009, "step": 9609 }, { "epoch": 0.22611764705882353, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9131, "step": 9610 }, { "epoch": 0.22614117647058823, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1273, "step": 9611 }, { "epoch": 0.22616470588235293, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1052, "step": 9612 }, { "epoch": 0.22618823529411763, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.094, "step": 9613 }, { "epoch": 0.22621176470588236, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1954, "step": 9614 }, { "epoch": 0.22623529411764706, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.4209, "step": 9615 }, { "epoch": 0.22625882352941176, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.057, "step": 9616 }, { "epoch": 0.22628235294117646, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9786, "step": 9617 }, { "epoch": 0.2263058823529412, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.048, "step": 9618 }, { "epoch": 0.2263294117647059, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0801, "step": 9619 }, { "epoch": 0.2263529411764706, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1836, "step": 9620 }, { "epoch": 0.2263764705882353, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2434, "step": 9621 }, { "epoch": 0.2264, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.4463, "step": 9622 }, { "epoch": 0.22642352941176472, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1575, "step": 9623 }, { "epoch": 0.22644705882352942, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2469, "step": 9624 }, { "epoch": 0.22647058823529412, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0785, "step": 9625 }, { "epoch": 0.22649411764705882, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.8978, "step": 9626 }, { "epoch": 0.22651764705882352, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.9209, "step": 9627 }, { "epoch": 0.22654117647058825, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2405, "step": 9628 }, { "epoch": 0.22656470588235295, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1302, "step": 9629 }, { "epoch": 0.22658823529411765, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0884, "step": 9630 }, { "epoch": 0.22661176470588235, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.9269, "step": 9631 }, { "epoch": 0.22663529411764705, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.266, "step": 9632 }, { "epoch": 0.22665882352941177, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9375, "step": 9633 }, { "epoch": 0.22668235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1933, "step": 9634 }, { "epoch": 0.22670588235294117, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0611, "step": 9635 }, { "epoch": 0.22672941176470587, "grad_norm": 0.28125, "learning_rate": 0.02, "loss": 0.4584, "step": 9636 }, { "epoch": 0.2267529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1834, "step": 9637 }, { "epoch": 0.2267764705882353, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.9438, "step": 9638 }, { "epoch": 0.2268, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0996, "step": 9639 }, { "epoch": 0.2268235294117647, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.9574, "step": 9640 }, { "epoch": 0.2268470588235294, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 0.9047, "step": 9641 }, { "epoch": 0.22687058823529413, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1955, "step": 9642 }, { "epoch": 0.22689411764705883, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1414, "step": 9643 }, { "epoch": 0.22691764705882353, "grad_norm": 0.275390625, "learning_rate": 0.02, "loss": 0.6246, "step": 9644 }, { "epoch": 0.22694117647058823, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.2877, "step": 9645 }, { "epoch": 0.22696470588235293, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.791, "step": 9646 }, { "epoch": 0.22698823529411766, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.7053, "step": 9647 }, { "epoch": 0.22701176470588236, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1625, "step": 9648 }, { "epoch": 0.22703529411764706, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9267, "step": 9649 }, { "epoch": 0.22705882352941176, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.8917, "step": 9650 }, { "epoch": 0.22708235294117646, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.7772, "step": 9651 }, { "epoch": 0.22710588235294119, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5119, "step": 9652 }, { "epoch": 0.22712941176470589, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0712, "step": 9653 }, { "epoch": 0.22715294117647059, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1583, "step": 9654 }, { "epoch": 0.22717647058823529, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1536, "step": 9655 }, { "epoch": 0.2272, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.9658, "step": 9656 }, { "epoch": 0.2272235294117647, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1986, "step": 9657 }, { "epoch": 0.2272470588235294, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0934, "step": 9658 }, { "epoch": 0.2272705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2422, "step": 9659 }, { "epoch": 0.2272941176470588, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.8406, "step": 9660 }, { "epoch": 0.22731764705882354, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1178, "step": 9661 }, { "epoch": 0.22734117647058824, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9794, "step": 9662 }, { "epoch": 0.22736470588235294, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.8163, "step": 9663 }, { "epoch": 0.22738823529411764, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.234, "step": 9664 }, { "epoch": 0.22741176470588234, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.5318, "step": 9665 }, { "epoch": 0.22743529411764707, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9779, "step": 9666 }, { "epoch": 0.22745882352941177, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0122, "step": 9667 }, { "epoch": 0.22748235294117647, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0736, "step": 9668 }, { "epoch": 0.22750588235294117, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1712, "step": 9669 }, { "epoch": 0.22752941176470587, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.8926, "step": 9670 }, { "epoch": 0.2275529411764706, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0835, "step": 9671 }, { "epoch": 0.2275764705882353, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.9152, "step": 9672 }, { "epoch": 0.2276, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0508, "step": 9673 }, { "epoch": 0.2276235294117647, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0137, "step": 9674 }, { "epoch": 0.22764705882352942, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.3392, "step": 9675 }, { "epoch": 0.22767058823529412, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.087, "step": 9676 }, { "epoch": 0.22769411764705882, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.8847, "step": 9677 }, { "epoch": 0.22771764705882352, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9053, "step": 9678 }, { "epoch": 0.22774117647058822, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.3222, "step": 9679 }, { "epoch": 0.22776470588235295, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1273, "step": 9680 }, { "epoch": 0.22778823529411765, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0512, "step": 9681 }, { "epoch": 0.22781176470588235, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0874, "step": 9682 }, { "epoch": 0.22783529411764705, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0745, "step": 9683 }, { "epoch": 0.22785882352941175, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0533, "step": 9684 }, { "epoch": 0.22788235294117648, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1026, "step": 9685 }, { "epoch": 0.22790588235294118, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.4309, "step": 9686 }, { "epoch": 0.22792941176470588, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1092, "step": 9687 }, { "epoch": 0.22795294117647058, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9914, "step": 9688 }, { "epoch": 0.22797647058823528, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.7291, "step": 9689 }, { "epoch": 0.228, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9878, "step": 9690 }, { "epoch": 0.2280235294117647, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9147, "step": 9691 }, { "epoch": 0.2280470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0497, "step": 9692 }, { "epoch": 0.2280705882352941, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1639, "step": 9693 }, { "epoch": 0.22809411764705884, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.9825, "step": 9694 }, { "epoch": 0.22811764705882354, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.25, "step": 9695 }, { "epoch": 0.22814117647058824, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2007, "step": 9696 }, { "epoch": 0.22816470588235294, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9286, "step": 9697 }, { "epoch": 0.22818823529411764, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0255, "step": 9698 }, { "epoch": 0.22821176470588236, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9471, "step": 9699 }, { "epoch": 0.22823529411764706, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.8772, "step": 9700 }, { "epoch": 0.22825882352941176, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0414, "step": 9701 }, { "epoch": 0.22828235294117646, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1643, "step": 9702 }, { "epoch": 0.22830588235294116, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0116, "step": 9703 }, { "epoch": 0.2283294117647059, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2301, "step": 9704 }, { "epoch": 0.2283529411764706, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.8185, "step": 9705 }, { "epoch": 0.2283764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1609, "step": 9706 }, { "epoch": 0.2284, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.8493, "step": 9707 }, { "epoch": 0.22842352941176472, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0752, "step": 9708 }, { "epoch": 0.22844705882352942, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.3674, "step": 9709 }, { "epoch": 0.22847058823529412, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.8831, "step": 9710 }, { "epoch": 0.22849411764705882, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.105, "step": 9711 }, { "epoch": 0.22851764705882352, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2297, "step": 9712 }, { "epoch": 0.22854117647058825, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1796, "step": 9713 }, { "epoch": 0.22856470588235295, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.4221, "step": 9714 }, { "epoch": 0.22858823529411765, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0472, "step": 9715 }, { "epoch": 0.22861176470588235, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.6437, "step": 9716 }, { "epoch": 0.22863529411764705, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1745, "step": 9717 }, { "epoch": 0.22865882352941178, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.9212, "step": 9718 }, { "epoch": 0.22868235294117648, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.8111, "step": 9719 }, { "epoch": 0.22870588235294118, "grad_norm": 0.26953125, "learning_rate": 0.02, "loss": 0.6161, "step": 9720 }, { "epoch": 0.22872941176470588, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0121, "step": 9721 }, { "epoch": 0.22875294117647058, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.029, "step": 9722 }, { "epoch": 0.2287764705882353, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.9402, "step": 9723 }, { "epoch": 0.2288, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2312, "step": 9724 }, { "epoch": 0.2288235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1681, "step": 9725 }, { "epoch": 0.2288470588235294, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9721, "step": 9726 }, { "epoch": 0.22887058823529413, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2538, "step": 9727 }, { "epoch": 0.22889411764705883, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.8597, "step": 9728 }, { "epoch": 0.22891764705882353, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.9369, "step": 9729 }, { "epoch": 0.22894117647058823, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1083, "step": 9730 }, { "epoch": 0.22896470588235293, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.0047, "step": 9731 }, { "epoch": 0.22898823529411766, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9728, "step": 9732 }, { "epoch": 0.22901176470588236, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.8809, "step": 9733 }, { "epoch": 0.22903529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9816, "step": 9734 }, { "epoch": 0.22905882352941176, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.059, "step": 9735 }, { "epoch": 0.22908235294117646, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3391, "step": 9736 }, { "epoch": 0.2291058823529412, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.3049, "step": 9737 }, { "epoch": 0.2291294117647059, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.3391, "step": 9738 }, { "epoch": 0.2291529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.095, "step": 9739 }, { "epoch": 0.2291764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0899, "step": 9740 }, { "epoch": 0.2292, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0688, "step": 9741 }, { "epoch": 0.22922352941176471, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9894, "step": 9742 }, { "epoch": 0.22924705882352941, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.4684, "step": 9743 }, { "epoch": 0.22927058823529411, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.797, "step": 9744 }, { "epoch": 0.22929411764705881, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1919, "step": 9745 }, { "epoch": 0.22931764705882354, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0609, "step": 9746 }, { "epoch": 0.22934117647058824, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9592, "step": 9747 }, { "epoch": 0.22936470588235294, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1304, "step": 9748 }, { "epoch": 0.22938823529411764, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1162, "step": 9749 }, { "epoch": 0.22941176470588234, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1125, "step": 9750 }, { "epoch": 0.22943529411764707, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1187, "step": 9751 }, { "epoch": 0.22945882352941177, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1484, "step": 9752 }, { "epoch": 0.22948235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1411, "step": 9753 }, { "epoch": 0.22950588235294117, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.9202, "step": 9754 }, { "epoch": 0.22952941176470587, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9634, "step": 9755 }, { "epoch": 0.2295529411764706, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8829, "step": 9756 }, { "epoch": 0.2295764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1189, "step": 9757 }, { "epoch": 0.2296, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0688, "step": 9758 }, { "epoch": 0.2296235294117647, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0253, "step": 9759 }, { "epoch": 0.2296470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0619, "step": 9760 }, { "epoch": 0.22967058823529413, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1076, "step": 9761 }, { "epoch": 0.22969411764705883, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.267, "step": 9762 }, { "epoch": 0.22971764705882353, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.7449, "step": 9763 }, { "epoch": 0.22974117647058823, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.8964, "step": 9764 }, { "epoch": 0.22976470588235295, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0568, "step": 9765 }, { "epoch": 0.22978823529411765, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0999, "step": 9766 }, { "epoch": 0.22981176470588235, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0696, "step": 9767 }, { "epoch": 0.22983529411764705, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2619, "step": 9768 }, { "epoch": 0.22985882352941175, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9435, "step": 9769 }, { "epoch": 0.22988235294117648, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0371, "step": 9770 }, { "epoch": 0.22990588235294118, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0, "step": 9771 }, { "epoch": 0.22992941176470588, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1233, "step": 9772 }, { "epoch": 0.22995294117647058, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.9804, "step": 9773 }, { "epoch": 0.22997647058823528, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9091, "step": 9774 }, { "epoch": 0.23, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2872, "step": 9775 }, { "epoch": 0.2300235294117647, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.7121, "step": 9776 }, { "epoch": 0.2300470588235294, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1715, "step": 9777 }, { "epoch": 0.2300705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1383, "step": 9778 }, { "epoch": 0.2300941176470588, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0795, "step": 9779 }, { "epoch": 0.23011764705882354, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.087, "step": 9780 }, { "epoch": 0.23014117647058824, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.3045, "step": 9781 }, { "epoch": 0.23016470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0141, "step": 9782 }, { "epoch": 0.23018823529411764, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1665, "step": 9783 }, { "epoch": 0.23021176470588237, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2903, "step": 9784 }, { "epoch": 0.23023529411764707, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2829, "step": 9785 }, { "epoch": 0.23025882352941177, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1255, "step": 9786 }, { "epoch": 0.23028235294117647, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.9775, "step": 9787 }, { "epoch": 0.23030588235294117, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9345, "step": 9788 }, { "epoch": 0.2303294117647059, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1514, "step": 9789 }, { "epoch": 0.2303529411764706, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.1511, "step": 9790 }, { "epoch": 0.2303764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.1412, "step": 9791 }, { "epoch": 0.2304, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.8529, "step": 9792 }, { "epoch": 0.2304235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2183, "step": 9793 }, { "epoch": 0.23044705882352942, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.8109, "step": 9794 }, { "epoch": 0.23047058823529412, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.833, "step": 9795 }, { "epoch": 0.23049411764705882, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.9283, "step": 9796 }, { "epoch": 0.23051764705882352, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.1363, "step": 9797 }, { "epoch": 0.23054117647058822, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.9152, "step": 9798 }, { "epoch": 0.23056470588235295, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0624, "step": 9799 }, { "epoch": 0.23058823529411765, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8116, "step": 9800 }, { "epoch": 0.23061176470588235, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3378, "step": 9801 }, { "epoch": 0.23063529411764705, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9798, "step": 9802 }, { "epoch": 0.23065882352941178, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.7927, "step": 9803 }, { "epoch": 0.23068235294117648, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0023, "step": 9804 }, { "epoch": 0.23070588235294118, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1095, "step": 9805 }, { "epoch": 0.23072941176470588, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2475, "step": 9806 }, { "epoch": 0.23075294117647058, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0582, "step": 9807 }, { "epoch": 0.2307764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1795, "step": 9808 }, { "epoch": 0.2308, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1294, "step": 9809 }, { "epoch": 0.2308235294117647, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9646, "step": 9810 }, { "epoch": 0.2308470588235294, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3494, "step": 9811 }, { "epoch": 0.2308705882352941, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1218, "step": 9812 }, { "epoch": 0.23089411764705883, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0982, "step": 9813 }, { "epoch": 0.23091764705882353, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1632, "step": 9814 }, { "epoch": 0.23094117647058823, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1003, "step": 9815 }, { "epoch": 0.23096470588235293, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0038, "step": 9816 }, { "epoch": 0.23098823529411766, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0844, "step": 9817 }, { "epoch": 0.23101176470588236, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2348, "step": 9818 }, { "epoch": 0.23103529411764706, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2552, "step": 9819 }, { "epoch": 0.23105882352941176, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0743, "step": 9820 }, { "epoch": 0.23108235294117646, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8309, "step": 9821 }, { "epoch": 0.2311058823529412, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9534, "step": 9822 }, { "epoch": 0.2311294117647059, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.7438, "step": 9823 }, { "epoch": 0.2311529411764706, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.027, "step": 9824 }, { "epoch": 0.2311764705882353, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0486, "step": 9825 }, { "epoch": 0.2312, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0887, "step": 9826 }, { "epoch": 0.23122352941176472, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.011, "step": 9827 }, { "epoch": 0.23124705882352942, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1812, "step": 9828 }, { "epoch": 0.23127058823529412, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.4715, "step": 9829 }, { "epoch": 0.23129411764705882, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.9928, "step": 9830 }, { "epoch": 0.23131764705882352, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9146, "step": 9831 }, { "epoch": 0.23134117647058824, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.2295, "step": 9832 }, { "epoch": 0.23136470588235294, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.9017, "step": 9833 }, { "epoch": 0.23138823529411764, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8479, "step": 9834 }, { "epoch": 0.23141176470588234, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.306, "step": 9835 }, { "epoch": 0.23143529411764707, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8086, "step": 9836 }, { "epoch": 0.23145882352941177, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.7283, "step": 9837 }, { "epoch": 0.23148235294117647, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.8436, "step": 9838 }, { "epoch": 0.23150588235294117, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2472, "step": 9839 }, { "epoch": 0.23152941176470587, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.8508, "step": 9840 }, { "epoch": 0.2315529411764706, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1153, "step": 9841 }, { "epoch": 0.2315764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1808, "step": 9842 }, { "epoch": 0.2316, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.1568, "step": 9843 }, { "epoch": 0.2316235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.7922, "step": 9844 }, { "epoch": 0.2316470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.999, "step": 9845 }, { "epoch": 0.23167058823529413, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.725, "step": 9846 }, { "epoch": 0.23169411764705883, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2678, "step": 9847 }, { "epoch": 0.23171764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.261, "step": 9848 }, { "epoch": 0.23174117647058823, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.8523, "step": 9849 }, { "epoch": 0.23176470588235293, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0741, "step": 9850 }, { "epoch": 0.23178823529411766, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1614, "step": 9851 }, { "epoch": 0.23181176470588236, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.888, "step": 9852 }, { "epoch": 0.23183529411764706, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2198, "step": 9853 }, { "epoch": 0.23185882352941176, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1733, "step": 9854 }, { "epoch": 0.23188235294117648, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8637, "step": 9855 }, { "epoch": 0.23190588235294118, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.8232, "step": 9856 }, { "epoch": 0.23192941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2227, "step": 9857 }, { "epoch": 0.23195294117647058, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.294, "step": 9858 }, { "epoch": 0.23197647058823528, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.7728, "step": 9859 }, { "epoch": 0.232, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2025, "step": 9860 }, { "epoch": 0.2320235294117647, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2676, "step": 9861 }, { "epoch": 0.2320470588235294, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.9897, "step": 9862 }, { "epoch": 0.2320705882352941, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.905, "step": 9863 }, { "epoch": 0.2320941176470588, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0739, "step": 9864 }, { "epoch": 0.23211764705882354, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9391, "step": 9865 }, { "epoch": 0.23214117647058824, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2479, "step": 9866 }, { "epoch": 0.23216470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1789, "step": 9867 }, { "epoch": 0.23218823529411764, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.3257, "step": 9868 }, { "epoch": 0.23221176470588234, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2, "step": 9869 }, { "epoch": 0.23223529411764707, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.1118, "step": 9870 }, { "epoch": 0.23225882352941177, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1424, "step": 9871 }, { "epoch": 0.23228235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0012, "step": 9872 }, { "epoch": 0.23230588235294117, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2628, "step": 9873 }, { "epoch": 0.2323294117647059, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0492, "step": 9874 }, { "epoch": 0.2323529411764706, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9427, "step": 9875 }, { "epoch": 0.2323764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0184, "step": 9876 }, { "epoch": 0.2324, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 1.0173, "step": 9877 }, { "epoch": 0.2324235294117647, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.915, "step": 9878 }, { "epoch": 0.23244705882352942, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1709, "step": 9879 }, { "epoch": 0.23247058823529412, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.9979, "step": 9880 }, { "epoch": 0.23249411764705882, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1681, "step": 9881 }, { "epoch": 0.23251764705882352, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.8545, "step": 9882 }, { "epoch": 0.23254117647058822, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.7363, "step": 9883 }, { "epoch": 0.23256470588235295, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.7538, "step": 9884 }, { "epoch": 0.23258823529411765, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.155, "step": 9885 }, { "epoch": 0.23261176470588235, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.3154, "step": 9886 }, { "epoch": 0.23263529411764705, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0902, "step": 9887 }, { "epoch": 0.23265882352941175, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1924, "step": 9888 }, { "epoch": 0.23268235294117648, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0134, "step": 9889 }, { "epoch": 0.23270588235294118, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1213, "step": 9890 }, { "epoch": 0.23272941176470588, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0755, "step": 9891 }, { "epoch": 0.23275294117647058, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0949, "step": 9892 }, { "epoch": 0.2327764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0877, "step": 9893 }, { "epoch": 0.2328, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8685, "step": 9894 }, { "epoch": 0.2328235294117647, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.958, "step": 9895 }, { "epoch": 0.2328470588235294, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0598, "step": 9896 }, { "epoch": 0.2328705882352941, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9658, "step": 9897 }, { "epoch": 0.23289411764705883, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.7611, "step": 9898 }, { "epoch": 0.23291764705882353, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2272, "step": 9899 }, { "epoch": 0.23294117647058823, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.8833, "step": 9900 }, { "epoch": 0.23296470588235293, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1347, "step": 9901 }, { "epoch": 0.23298823529411763, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1685, "step": 9902 }, { "epoch": 0.23301176470588236, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.6405, "step": 9903 }, { "epoch": 0.23303529411764706, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.893, "step": 9904 }, { "epoch": 0.23305882352941176, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2433, "step": 9905 }, { "epoch": 0.23308235294117646, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9052, "step": 9906 }, { "epoch": 0.23310588235294116, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9619, "step": 9907 }, { "epoch": 0.2331294117647059, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9356, "step": 9908 }, { "epoch": 0.2331529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1102, "step": 9909 }, { "epoch": 0.2331764705882353, "grad_norm": 0.29296875, "learning_rate": 0.02, "loss": 0.5685, "step": 9910 }, { "epoch": 0.2332, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0063, "step": 9911 }, { "epoch": 0.23322352941176472, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.8673, "step": 9912 }, { "epoch": 0.23324705882352942, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.345, "step": 9913 }, { "epoch": 0.23327058823529412, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1546, "step": 9914 }, { "epoch": 0.23329411764705882, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0831, "step": 9915 }, { "epoch": 0.23331764705882352, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0369, "step": 9916 }, { "epoch": 0.23334117647058825, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0503, "step": 9917 }, { "epoch": 0.23336470588235295, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.002, "step": 9918 }, { "epoch": 0.23338823529411765, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0656, "step": 9919 }, { "epoch": 0.23341176470588235, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.8851, "step": 9920 }, { "epoch": 0.23343529411764705, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.9075, "step": 9921 }, { "epoch": 0.23345882352941177, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1131, "step": 9922 }, { "epoch": 0.23348235294117647, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.3199, "step": 9923 }, { "epoch": 0.23350588235294117, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2069, "step": 9924 }, { "epoch": 0.23352941176470587, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9041, "step": 9925 }, { "epoch": 0.2335529411764706, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.3487, "step": 9926 }, { "epoch": 0.2335764705882353, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1502, "step": 9927 }, { "epoch": 0.2336, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.812, "step": 9928 }, { "epoch": 0.2336235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1039, "step": 9929 }, { "epoch": 0.2336470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1085, "step": 9930 }, { "epoch": 0.23367058823529413, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.9534, "step": 9931 }, { "epoch": 0.23369411764705883, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9402, "step": 9932 }, { "epoch": 0.23371764705882353, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.8916, "step": 9933 }, { "epoch": 0.23374117647058823, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9585, "step": 9934 }, { "epoch": 0.23376470588235293, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9508, "step": 9935 }, { "epoch": 0.23378823529411766, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2254, "step": 9936 }, { "epoch": 0.23381176470588236, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.9101, "step": 9937 }, { "epoch": 0.23383529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1386, "step": 9938 }, { "epoch": 0.23385882352941176, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3668, "step": 9939 }, { "epoch": 0.23388235294117646, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1159, "step": 9940 }, { "epoch": 0.23390588235294119, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3898, "step": 9941 }, { "epoch": 0.23392941176470589, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1457, "step": 9942 }, { "epoch": 0.23395294117647059, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1715, "step": 9943 }, { "epoch": 0.23397647058823529, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9164, "step": 9944 }, { "epoch": 0.234, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1386, "step": 9945 }, { "epoch": 0.2340235294117647, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2398, "step": 9946 }, { "epoch": 0.2340470588235294, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.6258, "step": 9947 }, { "epoch": 0.2340705882352941, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1764, "step": 9948 }, { "epoch": 0.2340941176470588, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0764, "step": 9949 }, { "epoch": 0.23411764705882354, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0386, "step": 9950 }, { "epoch": 0.23414117647058824, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.953, "step": 9951 }, { "epoch": 0.23416470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1522, "step": 9952 }, { "epoch": 0.23418823529411764, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0257, "step": 9953 }, { "epoch": 0.23421176470588234, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9966, "step": 9954 }, { "epoch": 0.23423529411764707, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9285, "step": 9955 }, { "epoch": 0.23425882352941177, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0026, "step": 9956 }, { "epoch": 0.23428235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2884, "step": 9957 }, { "epoch": 0.23430588235294117, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0853, "step": 9958 }, { "epoch": 0.23432941176470587, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1515, "step": 9959 }, { "epoch": 0.2343529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9786, "step": 9960 }, { "epoch": 0.2343764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1042, "step": 9961 }, { "epoch": 0.2344, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2287, "step": 9962 }, { "epoch": 0.2344235294117647, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.9065, "step": 9963 }, { "epoch": 0.23444705882352943, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 1.0019, "step": 9964 }, { "epoch": 0.23447058823529413, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.7555, "step": 9965 }, { "epoch": 0.23449411764705883, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9981, "step": 9966 }, { "epoch": 0.23451764705882353, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 0.7459, "step": 9967 }, { "epoch": 0.23454117647058823, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.803, "step": 9968 }, { "epoch": 0.23456470588235295, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2011, "step": 9969 }, { "epoch": 0.23458823529411765, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.4982, "step": 9970 }, { "epoch": 0.23461176470588235, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0867, "step": 9971 }, { "epoch": 0.23463529411764705, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.892, "step": 9972 }, { "epoch": 0.23465882352941175, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.84, "step": 9973 }, { "epoch": 0.23468235294117648, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1401, "step": 9974 }, { "epoch": 0.23470588235294118, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.8585, "step": 9975 }, { "epoch": 0.23472941176470588, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2936, "step": 9976 }, { "epoch": 0.23475294117647058, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2545, "step": 9977 }, { "epoch": 0.23477647058823528, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8979, "step": 9978 }, { "epoch": 0.2348, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0879, "step": 9979 }, { "epoch": 0.2348235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2426, "step": 9980 }, { "epoch": 0.2348470588235294, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9225, "step": 9981 }, { "epoch": 0.2348705882352941, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.0187, "step": 9982 }, { "epoch": 0.23489411764705884, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0448, "step": 9983 }, { "epoch": 0.23491764705882354, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1514, "step": 9984 }, { "epoch": 0.23494117647058824, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.163, "step": 9985 }, { "epoch": 0.23496470588235294, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8804, "step": 9986 }, { "epoch": 0.23498823529411764, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0604, "step": 9987 }, { "epoch": 0.23501176470588236, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 1.0615, "step": 9988 }, { "epoch": 0.23503529411764706, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0984, "step": 9989 }, { "epoch": 0.23505882352941176, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1008, "step": 9990 }, { "epoch": 0.23508235294117646, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.8487, "step": 9991 }, { "epoch": 0.23510588235294116, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9274, "step": 9992 }, { "epoch": 0.2351294117647059, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1463, "step": 9993 }, { "epoch": 0.2351529411764706, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.9231, "step": 9994 }, { "epoch": 0.2351764705882353, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.1027, "step": 9995 }, { "epoch": 0.2352, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9043, "step": 9996 }, { "epoch": 0.2352235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.042, "step": 9997 }, { "epoch": 0.23524705882352942, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2801, "step": 9998 }, { "epoch": 0.23527058823529412, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.6812, "step": 9999 }, { "epoch": 0.23529411764705882, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.124, "step": 10000 }, { "epoch": 0.23529411764705882, "eval_loss": 2.219313383102417, "eval_runtime": 686.0292, "eval_samples_per_second": 12.39, "eval_steps_per_second": 3.098, "step": 10000 }, { "epoch": 0.23531764705882352, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1444, "step": 10001 }, { "epoch": 0.23534117647058825, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.7349, "step": 10002 }, { "epoch": 0.23536470588235295, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.0465, "step": 10003 }, { "epoch": 0.23538823529411765, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.7281, "step": 10004 }, { "epoch": 0.23541176470588235, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1431, "step": 10005 }, { "epoch": 0.23543529411764705, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0678, "step": 10006 }, { "epoch": 0.23545882352941178, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1104, "step": 10007 }, { "epoch": 0.23548235294117648, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.994, "step": 10008 }, { "epoch": 0.23550588235294118, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0719, "step": 10009 }, { "epoch": 0.23552941176470588, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9659, "step": 10010 }, { "epoch": 0.23555294117647058, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1642, "step": 10011 }, { "epoch": 0.2355764705882353, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.3394, "step": 10012 }, { "epoch": 0.2356, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9419, "step": 10013 }, { "epoch": 0.2356235294117647, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1303, "step": 10014 }, { "epoch": 0.2356470588235294, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9233, "step": 10015 }, { "epoch": 0.2356705882352941, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.946, "step": 10016 }, { "epoch": 0.23569411764705883, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8979, "step": 10017 }, { "epoch": 0.23571764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0079, "step": 10018 }, { "epoch": 0.23574117647058823, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2172, "step": 10019 }, { "epoch": 0.23576470588235293, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2519, "step": 10020 }, { "epoch": 0.23578823529411766, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3118, "step": 10021 }, { "epoch": 0.23581176470588236, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0633, "step": 10022 }, { "epoch": 0.23583529411764706, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0856, "step": 10023 }, { "epoch": 0.23585882352941176, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.8719, "step": 10024 }, { "epoch": 0.23588235294117646, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2692, "step": 10025 }, { "epoch": 0.2359058823529412, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0278, "step": 10026 }, { "epoch": 0.2359294117647059, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.875, "step": 10027 }, { "epoch": 0.2359529411764706, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.154, "step": 10028 }, { "epoch": 0.2359764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.129, "step": 10029 }, { "epoch": 0.236, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.099, "step": 10030 }, { "epoch": 0.23602352941176472, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9711, "step": 10031 }, { "epoch": 0.23604705882352942, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8237, "step": 10032 }, { "epoch": 0.23607058823529412, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.6936, "step": 10033 }, { "epoch": 0.23609411764705882, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.6024, "step": 10034 }, { "epoch": 0.23611764705882354, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.6447, "step": 10035 }, { "epoch": 0.23614117647058824, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.066, "step": 10036 }, { "epoch": 0.23616470588235294, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.7706, "step": 10037 }, { "epoch": 0.23618823529411764, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.8848, "step": 10038 }, { "epoch": 0.23621176470588234, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.148, "step": 10039 }, { "epoch": 0.23623529411764707, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0541, "step": 10040 }, { "epoch": 0.23625882352941177, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2567, "step": 10041 }, { "epoch": 0.23628235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.866, "step": 10042 }, { "epoch": 0.23630588235294117, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.8984, "step": 10043 }, { "epoch": 0.23632941176470587, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1894, "step": 10044 }, { "epoch": 0.2363529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1564, "step": 10045 }, { "epoch": 0.2363764705882353, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.8633, "step": 10046 }, { "epoch": 0.2364, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.8222, "step": 10047 }, { "epoch": 0.2364235294117647, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.9093, "step": 10048 }, { "epoch": 0.2364470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9137, "step": 10049 }, { "epoch": 0.23647058823529413, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.502, "step": 10050 }, { "epoch": 0.23649411764705883, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0863, "step": 10051 }, { "epoch": 0.23651764705882353, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.8318, "step": 10052 }, { "epoch": 0.23654117647058823, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1379, "step": 10053 }, { "epoch": 0.23656470588235295, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0071, "step": 10054 }, { "epoch": 0.23658823529411765, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9023, "step": 10055 }, { "epoch": 0.23661176470588235, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0259, "step": 10056 }, { "epoch": 0.23663529411764705, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1064, "step": 10057 }, { "epoch": 0.23665882352941175, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.842, "step": 10058 }, { "epoch": 0.23668235294117648, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0536, "step": 10059 }, { "epoch": 0.23670588235294118, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0652, "step": 10060 }, { "epoch": 0.23672941176470588, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.4084, "step": 10061 }, { "epoch": 0.23675294117647058, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.9434, "step": 10062 }, { "epoch": 0.23677647058823528, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2385, "step": 10063 }, { "epoch": 0.2368, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.4367, "step": 10064 }, { "epoch": 0.2368235294117647, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.7498, "step": 10065 }, { "epoch": 0.2368470588235294, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1096, "step": 10066 }, { "epoch": 0.2368705882352941, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.8823, "step": 10067 }, { "epoch": 0.2368941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2572, "step": 10068 }, { "epoch": 0.23691764705882354, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1494, "step": 10069 }, { "epoch": 0.23694117647058824, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.2993, "step": 10070 }, { "epoch": 0.23696470588235294, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.1739, "step": 10071 }, { "epoch": 0.23698823529411764, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.0034, "step": 10072 }, { "epoch": 0.23701176470588237, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9201, "step": 10073 }, { "epoch": 0.23703529411764707, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.3181, "step": 10074 }, { "epoch": 0.23705882352941177, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.8128, "step": 10075 }, { "epoch": 0.23708235294117647, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0901, "step": 10076 }, { "epoch": 0.23710588235294117, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1598, "step": 10077 }, { "epoch": 0.2371294117647059, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9845, "step": 10078 }, { "epoch": 0.2371529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0436, "step": 10079 }, { "epoch": 0.2371764705882353, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0752, "step": 10080 }, { "epoch": 0.2372, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5281, "step": 10081 }, { "epoch": 0.2372235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2373, "step": 10082 }, { "epoch": 0.23724705882352942, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1431, "step": 10083 }, { "epoch": 0.23727058823529412, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0812, "step": 10084 }, { "epoch": 0.23729411764705882, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9188, "step": 10085 }, { "epoch": 0.23731764705882352, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8779, "step": 10086 }, { "epoch": 0.23734117647058822, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0291, "step": 10087 }, { "epoch": 0.23736470588235295, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0992, "step": 10088 }, { "epoch": 0.23738823529411765, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0207, "step": 10089 }, { "epoch": 0.23741176470588235, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1369, "step": 10090 }, { "epoch": 0.23743529411764705, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.5127, "step": 10091 }, { "epoch": 0.23745882352941178, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9429, "step": 10092 }, { "epoch": 0.23748235294117648, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.209, "step": 10093 }, { "epoch": 0.23750588235294118, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2912, "step": 10094 }, { "epoch": 0.23752941176470588, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.7612, "step": 10095 }, { "epoch": 0.23755294117647058, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.2739, "step": 10096 }, { "epoch": 0.2375764705882353, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2039, "step": 10097 }, { "epoch": 0.2376, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0507, "step": 10098 }, { "epoch": 0.2376235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.998, "step": 10099 }, { "epoch": 0.2376470588235294, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.2465, "step": 10100 }, { "epoch": 0.2376705882352941, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.3149, "step": 10101 }, { "epoch": 0.23769411764705883, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2602, "step": 10102 }, { "epoch": 0.23771764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9521, "step": 10103 }, { "epoch": 0.23774117647058823, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.9657, "step": 10104 }, { "epoch": 0.23776470588235293, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.6626, "step": 10105 }, { "epoch": 0.23778823529411763, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.3021, "step": 10106 }, { "epoch": 0.23781176470588236, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1935, "step": 10107 }, { "epoch": 0.23783529411764706, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1544, "step": 10108 }, { "epoch": 0.23785882352941176, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.976, "step": 10109 }, { "epoch": 0.23788235294117646, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.7743, "step": 10110 }, { "epoch": 0.2379058823529412, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9156, "step": 10111 }, { "epoch": 0.2379294117647059, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9646, "step": 10112 }, { "epoch": 0.2379529411764706, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.9167, "step": 10113 }, { "epoch": 0.2379764705882353, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1141, "step": 10114 }, { "epoch": 0.238, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9518, "step": 10115 }, { "epoch": 0.23802352941176472, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1921, "step": 10116 }, { "epoch": 0.23804705882352942, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.129, "step": 10117 }, { "epoch": 0.23807058823529412, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2956, "step": 10118 }, { "epoch": 0.23809411764705882, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.7299, "step": 10119 }, { "epoch": 0.23811764705882352, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1392, "step": 10120 }, { "epoch": 0.23814117647058825, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0897, "step": 10121 }, { "epoch": 0.23816470588235295, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.937, "step": 10122 }, { "epoch": 0.23818823529411765, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.224, "step": 10123 }, { "epoch": 0.23821176470588235, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.237, "step": 10124 }, { "epoch": 0.23823529411764705, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2174, "step": 10125 }, { "epoch": 0.23825882352941177, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9535, "step": 10126 }, { "epoch": 0.23828235294117647, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.8366, "step": 10127 }, { "epoch": 0.23830588235294117, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2294, "step": 10128 }, { "epoch": 0.23832941176470587, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0402, "step": 10129 }, { "epoch": 0.2383529411764706, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2471, "step": 10130 }, { "epoch": 0.2383764705882353, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.3236, "step": 10131 }, { "epoch": 0.2384, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8326, "step": 10132 }, { "epoch": 0.2384235294117647, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.7933, "step": 10133 }, { "epoch": 0.2384470588235294, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9218, "step": 10134 }, { "epoch": 0.23847058823529413, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9343, "step": 10135 }, { "epoch": 0.23849411764705883, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0363, "step": 10136 }, { "epoch": 0.23851764705882353, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.2446, "step": 10137 }, { "epoch": 0.23854117647058823, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0706, "step": 10138 }, { "epoch": 0.23856470588235293, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9, "step": 10139 }, { "epoch": 0.23858823529411766, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.2824, "step": 10140 }, { "epoch": 0.23861176470588236, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.5726, "step": 10141 }, { "epoch": 0.23863529411764706, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0896, "step": 10142 }, { "epoch": 0.23865882352941176, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1167, "step": 10143 }, { "epoch": 0.23868235294117648, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2483, "step": 10144 }, { "epoch": 0.23870588235294118, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.8831, "step": 10145 }, { "epoch": 0.23872941176470588, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0695, "step": 10146 }, { "epoch": 0.23875294117647058, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9467, "step": 10147 }, { "epoch": 0.23877647058823528, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.8174, "step": 10148 }, { "epoch": 0.2388, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1532, "step": 10149 }, { "epoch": 0.2388235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2997, "step": 10150 }, { "epoch": 0.2388470588235294, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0526, "step": 10151 }, { "epoch": 0.2388705882352941, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2757, "step": 10152 }, { "epoch": 0.2388941176470588, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1197, "step": 10153 }, { "epoch": 0.23891764705882354, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0337, "step": 10154 }, { "epoch": 0.23894117647058824, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9824, "step": 10155 }, { "epoch": 0.23896470588235294, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8328, "step": 10156 }, { "epoch": 0.23898823529411764, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0513, "step": 10157 }, { "epoch": 0.23901176470588234, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0251, "step": 10158 }, { "epoch": 0.23903529411764707, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2163, "step": 10159 }, { "epoch": 0.23905882352941177, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9782, "step": 10160 }, { "epoch": 0.23908235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8792, "step": 10161 }, { "epoch": 0.23910588235294117, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1973, "step": 10162 }, { "epoch": 0.2391294117647059, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.9698, "step": 10163 }, { "epoch": 0.2391529411764706, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0646, "step": 10164 }, { "epoch": 0.2391764705882353, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.853, "step": 10165 }, { "epoch": 0.2392, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8241, "step": 10166 }, { "epoch": 0.2392235294117647, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8062, "step": 10167 }, { "epoch": 0.23924705882352942, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0658, "step": 10168 }, { "epoch": 0.23927058823529412, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0849, "step": 10169 }, { "epoch": 0.23929411764705882, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2617, "step": 10170 }, { "epoch": 0.23931764705882352, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8358, "step": 10171 }, { "epoch": 0.23934117647058822, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9266, "step": 10172 }, { "epoch": 0.23936470588235295, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1039, "step": 10173 }, { "epoch": 0.23938823529411765, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2775, "step": 10174 }, { "epoch": 0.23941176470588235, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1437, "step": 10175 }, { "epoch": 0.23943529411764705, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9794, "step": 10176 }, { "epoch": 0.23945882352941175, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0758, "step": 10177 }, { "epoch": 0.23948235294117648, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1516, "step": 10178 }, { "epoch": 0.23950588235294118, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.8137, "step": 10179 }, { "epoch": 0.23952941176470588, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0786, "step": 10180 }, { "epoch": 0.23955294117647058, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1603, "step": 10181 }, { "epoch": 0.2395764705882353, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0769, "step": 10182 }, { "epoch": 0.2396, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1799, "step": 10183 }, { "epoch": 0.2396235294117647, "grad_norm": 0.267578125, "learning_rate": 0.02, "loss": 0.6649, "step": 10184 }, { "epoch": 0.2396470588235294, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2472, "step": 10185 }, { "epoch": 0.2396705882352941, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1914, "step": 10186 }, { "epoch": 0.23969411764705884, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.7508, "step": 10187 }, { "epoch": 0.23971764705882354, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9607, "step": 10188 }, { "epoch": 0.23974117647058824, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.866, "step": 10189 }, { "epoch": 0.23976470588235294, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9559, "step": 10190 }, { "epoch": 0.23978823529411764, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1446, "step": 10191 }, { "epoch": 0.23981176470588236, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9559, "step": 10192 }, { "epoch": 0.23983529411764706, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.3304, "step": 10193 }, { "epoch": 0.23985882352941176, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.9741, "step": 10194 }, { "epoch": 0.23988235294117646, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.8891, "step": 10195 }, { "epoch": 0.23990588235294116, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0372, "step": 10196 }, { "epoch": 0.2399294117647059, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0343, "step": 10197 }, { "epoch": 0.2399529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1818, "step": 10198 }, { "epoch": 0.2399764705882353, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1971, "step": 10199 }, { "epoch": 0.24, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9902, "step": 10200 }, { "epoch": 0.24002352941176472, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0894, "step": 10201 }, { "epoch": 0.24004705882352942, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.957, "step": 10202 }, { "epoch": 0.24007058823529412, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0634, "step": 10203 }, { "epoch": 0.24009411764705882, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.855, "step": 10204 }, { "epoch": 0.24011764705882352, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9504, "step": 10205 }, { "epoch": 0.24014117647058825, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1976, "step": 10206 }, { "epoch": 0.24016470588235295, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1222, "step": 10207 }, { "epoch": 0.24018823529411765, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1169, "step": 10208 }, { "epoch": 0.24021176470588235, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2257, "step": 10209 }, { "epoch": 0.24023529411764705, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0849, "step": 10210 }, { "epoch": 0.24025882352941177, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0776, "step": 10211 }, { "epoch": 0.24028235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0536, "step": 10212 }, { "epoch": 0.24030588235294117, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1587, "step": 10213 }, { "epoch": 0.24032941176470587, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1266, "step": 10214 }, { "epoch": 0.24035294117647057, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.952, "step": 10215 }, { "epoch": 0.2403764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0214, "step": 10216 }, { "epoch": 0.2404, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.3648, "step": 10217 }, { "epoch": 0.2404235294117647, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.7125, "step": 10218 }, { "epoch": 0.2404470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0872, "step": 10219 }, { "epoch": 0.24047058823529413, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0876, "step": 10220 }, { "epoch": 0.24049411764705883, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1042, "step": 10221 }, { "epoch": 0.24051764705882353, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.131, "step": 10222 }, { "epoch": 0.24054117647058823, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1064, "step": 10223 }, { "epoch": 0.24056470588235293, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0098, "step": 10224 }, { "epoch": 0.24058823529411766, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.681, "step": 10225 }, { "epoch": 0.24061176470588236, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.7995, "step": 10226 }, { "epoch": 0.24063529411764706, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1803, "step": 10227 }, { "epoch": 0.24065882352941176, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2007, "step": 10228 }, { "epoch": 0.24068235294117646, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.051, "step": 10229 }, { "epoch": 0.2407058823529412, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1456, "step": 10230 }, { "epoch": 0.2407294117647059, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.966, "step": 10231 }, { "epoch": 0.2407529411764706, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9693, "step": 10232 }, { "epoch": 0.2407764705882353, "grad_norm": 0.2734375, "learning_rate": 0.02, "loss": 0.732, "step": 10233 }, { "epoch": 0.2408, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1552, "step": 10234 }, { "epoch": 0.24082352941176471, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.279, "step": 10235 }, { "epoch": 0.24084705882352941, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1312, "step": 10236 }, { "epoch": 0.24087058823529411, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1619, "step": 10237 }, { "epoch": 0.24089411764705881, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.257, "step": 10238 }, { "epoch": 0.24091764705882354, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.3822, "step": 10239 }, { "epoch": 0.24094117647058824, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0847, "step": 10240 }, { "epoch": 0.24096470588235294, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9847, "step": 10241 }, { "epoch": 0.24098823529411764, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.8214, "step": 10242 }, { "epoch": 0.24101176470588234, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2248, "step": 10243 }, { "epoch": 0.24103529411764707, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.1914, "step": 10244 }, { "epoch": 0.24105882352941177, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9818, "step": 10245 }, { "epoch": 0.24108235294117647, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.9177, "step": 10246 }, { "epoch": 0.24110588235294117, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.3808, "step": 10247 }, { "epoch": 0.24112941176470587, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0026, "step": 10248 }, { "epoch": 0.2411529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0572, "step": 10249 }, { "epoch": 0.2411764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7708, "step": 10250 }, { "epoch": 0.2412, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2706, "step": 10251 }, { "epoch": 0.2412235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.985, "step": 10252 }, { "epoch": 0.24124705882352943, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.067, "step": 10253 }, { "epoch": 0.24127058823529413, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1119, "step": 10254 }, { "epoch": 0.24129411764705883, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2153, "step": 10255 }, { "epoch": 0.24131764705882353, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8084, "step": 10256 }, { "epoch": 0.24134117647058823, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0933, "step": 10257 }, { "epoch": 0.24136470588235295, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 1.0401, "step": 10258 }, { "epoch": 0.24138823529411765, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.8056, "step": 10259 }, { "epoch": 0.24141176470588235, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0446, "step": 10260 }, { "epoch": 0.24143529411764705, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.7925, "step": 10261 }, { "epoch": 0.24145882352941175, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.3668, "step": 10262 }, { "epoch": 0.24148235294117648, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1206, "step": 10263 }, { "epoch": 0.24150588235294118, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1851, "step": 10264 }, { "epoch": 0.24152941176470588, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0727, "step": 10265 }, { "epoch": 0.24155294117647058, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2565, "step": 10266 }, { "epoch": 0.24157647058823528, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 1.0677, "step": 10267 }, { "epoch": 0.2416, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0278, "step": 10268 }, { "epoch": 0.2416235294117647, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.7137, "step": 10269 }, { "epoch": 0.2416470588235294, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9215, "step": 10270 }, { "epoch": 0.2416705882352941, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0754, "step": 10271 }, { "epoch": 0.24169411764705884, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.9659, "step": 10272 }, { "epoch": 0.24171764705882354, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.757, "step": 10273 }, { "epoch": 0.24174117647058824, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0737, "step": 10274 }, { "epoch": 0.24176470588235294, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.8049, "step": 10275 }, { "epoch": 0.24178823529411764, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.4005, "step": 10276 }, { "epoch": 0.24181176470588236, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0495, "step": 10277 }, { "epoch": 0.24183529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1636, "step": 10278 }, { "epoch": 0.24185882352941176, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0338, "step": 10279 }, { "epoch": 0.24188235294117647, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1289, "step": 10280 }, { "epoch": 0.24190588235294117, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.8134, "step": 10281 }, { "epoch": 0.2419294117647059, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.928, "step": 10282 }, { "epoch": 0.2419529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1291, "step": 10283 }, { "epoch": 0.2419764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9256, "step": 10284 }, { "epoch": 0.242, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.1124, "step": 10285 }, { "epoch": 0.2420235294117647, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.4903, "step": 10286 }, { "epoch": 0.24204705882352942, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9871, "step": 10287 }, { "epoch": 0.24207058823529412, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.4001, "step": 10288 }, { "epoch": 0.24209411764705882, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.5076, "step": 10289 }, { "epoch": 0.24211764705882352, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.119, "step": 10290 }, { "epoch": 0.24214117647058825, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.9453, "step": 10291 }, { "epoch": 0.24216470588235295, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.8591, "step": 10292 }, { "epoch": 0.24218823529411765, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.9143, "step": 10293 }, { "epoch": 0.24221176470588235, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2946, "step": 10294 }, { "epoch": 0.24223529411764705, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.1156, "step": 10295 }, { "epoch": 0.24225882352941178, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.7892, "step": 10296 }, { "epoch": 0.24228235294117648, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9591, "step": 10297 }, { "epoch": 0.24230588235294118, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0675, "step": 10298 }, { "epoch": 0.24232941176470588, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8665, "step": 10299 }, { "epoch": 0.24235294117647058, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0876, "step": 10300 }, { "epoch": 0.2423764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.8393, "step": 10301 }, { "epoch": 0.2424, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1047, "step": 10302 }, { "epoch": 0.2424235294117647, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.8806, "step": 10303 }, { "epoch": 0.2424470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.378, "step": 10304 }, { "epoch": 0.2424705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0437, "step": 10305 }, { "epoch": 0.24249411764705883, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.8022, "step": 10306 }, { "epoch": 0.24251764705882353, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9865, "step": 10307 }, { "epoch": 0.24254117647058823, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0582, "step": 10308 }, { "epoch": 0.24256470588235293, "grad_norm": 0.25390625, "learning_rate": 0.02, "loss": 0.8693, "step": 10309 }, { "epoch": 0.24258823529411766, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2981, "step": 10310 }, { "epoch": 0.24261176470588236, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.9647, "step": 10311 }, { "epoch": 0.24263529411764706, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1906, "step": 10312 }, { "epoch": 0.24265882352941176, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.226, "step": 10313 }, { "epoch": 0.24268235294117646, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2626, "step": 10314 }, { "epoch": 0.2427058823529412, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.829, "step": 10315 }, { "epoch": 0.2427294117647059, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.9931, "step": 10316 }, { "epoch": 0.2427529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0577, "step": 10317 }, { "epoch": 0.2427764705882353, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.4823, "step": 10318 }, { "epoch": 0.2428, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1502, "step": 10319 }, { "epoch": 0.24282352941176472, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.2421, "step": 10320 }, { "epoch": 0.24284705882352942, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9538, "step": 10321 }, { "epoch": 0.24287058823529412, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0162, "step": 10322 }, { "epoch": 0.24289411764705882, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.7795, "step": 10323 }, { "epoch": 0.24291764705882352, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1545, "step": 10324 }, { "epoch": 0.24294117647058824, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2521, "step": 10325 }, { "epoch": 0.24296470588235294, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8557, "step": 10326 }, { "epoch": 0.24298823529411764, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.8402, "step": 10327 }, { "epoch": 0.24301176470588234, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.966, "step": 10328 }, { "epoch": 0.24303529411764707, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9499, "step": 10329 }, { "epoch": 0.24305882352941177, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1424, "step": 10330 }, { "epoch": 0.24308235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2292, "step": 10331 }, { "epoch": 0.24310588235294117, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.8159, "step": 10332 }, { "epoch": 0.24312941176470587, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.3828, "step": 10333 }, { "epoch": 0.2431529411764706, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9883, "step": 10334 }, { "epoch": 0.2431764705882353, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1348, "step": 10335 }, { "epoch": 0.2432, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 1.0611, "step": 10336 }, { "epoch": 0.2432235294117647, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.934, "step": 10337 }, { "epoch": 0.2432470588235294, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0727, "step": 10338 }, { "epoch": 0.24327058823529413, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.3728, "step": 10339 }, { "epoch": 0.24329411764705883, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0902, "step": 10340 }, { "epoch": 0.24331764705882353, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9735, "step": 10341 }, { "epoch": 0.24334117647058823, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.3739, "step": 10342 }, { "epoch": 0.24336470588235293, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.789, "step": 10343 }, { "epoch": 0.24338823529411766, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8828, "step": 10344 }, { "epoch": 0.24341176470588236, "grad_norm": 1.390625, "learning_rate": 0.02, "loss": 1.191, "step": 10345 }, { "epoch": 0.24343529411764706, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.169, "step": 10346 }, { "epoch": 0.24345882352941176, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.043, "step": 10347 }, { "epoch": 0.24348235294117648, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.5737, "step": 10348 }, { "epoch": 0.24350588235294118, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9372, "step": 10349 }, { "epoch": 0.24352941176470588, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8261, "step": 10350 }, { "epoch": 0.24355294117647058, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0235, "step": 10351 }, { "epoch": 0.24357647058823528, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0772, "step": 10352 }, { "epoch": 0.2436, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.0193, "step": 10353 }, { "epoch": 0.2436235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2876, "step": 10354 }, { "epoch": 0.2436470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9893, "step": 10355 }, { "epoch": 0.2436705882352941, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.895, "step": 10356 }, { "epoch": 0.2436941176470588, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9596, "step": 10357 }, { "epoch": 0.24371764705882354, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.7406, "step": 10358 }, { "epoch": 0.24374117647058824, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0596, "step": 10359 }, { "epoch": 0.24376470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1505, "step": 10360 }, { "epoch": 0.24378823529411764, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0043, "step": 10361 }, { "epoch": 0.24381176470588237, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1614, "step": 10362 }, { "epoch": 0.24383529411764707, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.283, "step": 10363 }, { "epoch": 0.24385882352941177, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.1628, "step": 10364 }, { "epoch": 0.24388235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.4055, "step": 10365 }, { "epoch": 0.24390588235294117, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1613, "step": 10366 }, { "epoch": 0.2439294117647059, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1474, "step": 10367 }, { "epoch": 0.2439529411764706, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.7212, "step": 10368 }, { "epoch": 0.2439764705882353, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.3401, "step": 10369 }, { "epoch": 0.244, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2592, "step": 10370 }, { "epoch": 0.2440235294117647, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.7469, "step": 10371 }, { "epoch": 0.24404705882352942, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0088, "step": 10372 }, { "epoch": 0.24407058823529412, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.7891, "step": 10373 }, { "epoch": 0.24409411764705882, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2273, "step": 10374 }, { "epoch": 0.24411764705882352, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 1.0185, "step": 10375 }, { "epoch": 0.24414117647058822, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9441, "step": 10376 }, { "epoch": 0.24416470588235295, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.4959, "step": 10377 }, { "epoch": 0.24418823529411765, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.1398, "step": 10378 }, { "epoch": 0.24421176470588235, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0265, "step": 10379 }, { "epoch": 0.24423529411764705, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9963, "step": 10380 }, { "epoch": 0.24425882352941178, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.8433, "step": 10381 }, { "epoch": 0.24428235294117648, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2978, "step": 10382 }, { "epoch": 0.24430588235294118, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.9047, "step": 10383 }, { "epoch": 0.24432941176470588, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0805, "step": 10384 }, { "epoch": 0.24435294117647058, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1688, "step": 10385 }, { "epoch": 0.2443764705882353, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1365, "step": 10386 }, { "epoch": 0.2444, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.7918, "step": 10387 }, { "epoch": 0.2444235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.345, "step": 10388 }, { "epoch": 0.2444470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1773, "step": 10389 }, { "epoch": 0.2444705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0961, "step": 10390 }, { "epoch": 0.24449411764705883, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2521, "step": 10391 }, { "epoch": 0.24451764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8902, "step": 10392 }, { "epoch": 0.24454117647058823, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1358, "step": 10393 }, { "epoch": 0.24456470588235293, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0905, "step": 10394 }, { "epoch": 0.24458823529411763, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.3109, "step": 10395 }, { "epoch": 0.24461176470588236, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1043, "step": 10396 }, { "epoch": 0.24463529411764706, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.8765, "step": 10397 }, { "epoch": 0.24465882352941176, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0215, "step": 10398 }, { "epoch": 0.24468235294117646, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.459, "step": 10399 }, { "epoch": 0.2447058823529412, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.228, "step": 10400 }, { "epoch": 0.2447294117647059, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2112, "step": 10401 }, { "epoch": 0.2447529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1551, "step": 10402 }, { "epoch": 0.2447764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0304, "step": 10403 }, { "epoch": 0.2448, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2905, "step": 10404 }, { "epoch": 0.24482352941176472, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.893, "step": 10405 }, { "epoch": 0.24484705882352942, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.3331, "step": 10406 }, { "epoch": 0.24487058823529412, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1619, "step": 10407 }, { "epoch": 0.24489411764705882, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.3827, "step": 10408 }, { "epoch": 0.24491764705882352, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.8646, "step": 10409 }, { "epoch": 0.24494117647058825, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1105, "step": 10410 }, { "epoch": 0.24496470588235295, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0203, "step": 10411 }, { "epoch": 0.24498823529411765, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2071, "step": 10412 }, { "epoch": 0.24501176470588235, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0514, "step": 10413 }, { "epoch": 0.24503529411764705, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9394, "step": 10414 }, { "epoch": 0.24505882352941177, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.8493, "step": 10415 }, { "epoch": 0.24508235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.1626, "step": 10416 }, { "epoch": 0.24510588235294117, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.052, "step": 10417 }, { "epoch": 0.24512941176470587, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.7791, "step": 10418 }, { "epoch": 0.2451529411764706, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.3723, "step": 10419 }, { "epoch": 0.2451764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1052, "step": 10420 }, { "epoch": 0.2452, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8298, "step": 10421 }, { "epoch": 0.2452235294117647, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1508, "step": 10422 }, { "epoch": 0.2452470588235294, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1613, "step": 10423 }, { "epoch": 0.24527058823529413, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9891, "step": 10424 }, { "epoch": 0.24529411764705883, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8093, "step": 10425 }, { "epoch": 0.24531764705882353, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0295, "step": 10426 }, { "epoch": 0.24534117647058823, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0492, "step": 10427 }, { "epoch": 0.24536470588235293, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2409, "step": 10428 }, { "epoch": 0.24538823529411766, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.935, "step": 10429 }, { "epoch": 0.24541176470588236, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2969, "step": 10430 }, { "epoch": 0.24543529411764706, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1421, "step": 10431 }, { "epoch": 0.24545882352941176, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0634, "step": 10432 }, { "epoch": 0.24548235294117646, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1427, "step": 10433 }, { "epoch": 0.24550588235294118, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2265, "step": 10434 }, { "epoch": 0.24552941176470588, "grad_norm": 0.271484375, "learning_rate": 0.02, "loss": 0.7041, "step": 10435 }, { "epoch": 0.24555294117647058, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0161, "step": 10436 }, { "epoch": 0.24557647058823528, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1297, "step": 10437 }, { "epoch": 0.2456, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3037, "step": 10438 }, { "epoch": 0.2456235294117647, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0548, "step": 10439 }, { "epoch": 0.2456470588235294, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.6518, "step": 10440 }, { "epoch": 0.2456705882352941, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0355, "step": 10441 }, { "epoch": 0.2456941176470588, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.9076, "step": 10442 }, { "epoch": 0.24571764705882354, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2699, "step": 10443 }, { "epoch": 0.24574117647058824, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1726, "step": 10444 }, { "epoch": 0.24576470588235294, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 1.2877, "step": 10445 }, { "epoch": 0.24578823529411764, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0045, "step": 10446 }, { "epoch": 0.24581176470588234, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2001, "step": 10447 }, { "epoch": 0.24583529411764707, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9224, "step": 10448 }, { "epoch": 0.24585882352941177, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0601, "step": 10449 }, { "epoch": 0.24588235294117647, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0846, "step": 10450 }, { "epoch": 0.24590588235294117, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2192, "step": 10451 }, { "epoch": 0.24592941176470587, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1537, "step": 10452 }, { "epoch": 0.2459529411764706, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9464, "step": 10453 }, { "epoch": 0.2459764705882353, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.8905, "step": 10454 }, { "epoch": 0.246, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0928, "step": 10455 }, { "epoch": 0.2460235294117647, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0043, "step": 10456 }, { "epoch": 0.24604705882352942, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2126, "step": 10457 }, { "epoch": 0.24607058823529412, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.5398, "step": 10458 }, { "epoch": 0.24609411764705882, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.7249, "step": 10459 }, { "epoch": 0.24611764705882352, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2326, "step": 10460 }, { "epoch": 0.24614117647058822, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.6625, "step": 10461 }, { "epoch": 0.24616470588235295, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1082, "step": 10462 }, { "epoch": 0.24618823529411765, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0203, "step": 10463 }, { "epoch": 0.24621176470588235, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.3133, "step": 10464 }, { "epoch": 0.24623529411764705, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2104, "step": 10465 }, { "epoch": 0.24625882352941175, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1882, "step": 10466 }, { "epoch": 0.24628235294117648, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1003, "step": 10467 }, { "epoch": 0.24630588235294118, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.2404, "step": 10468 }, { "epoch": 0.24632941176470588, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.018, "step": 10469 }, { "epoch": 0.24635294117647058, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.7723, "step": 10470 }, { "epoch": 0.2463764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0854, "step": 10471 }, { "epoch": 0.2464, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0111, "step": 10472 }, { "epoch": 0.2464235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1268, "step": 10473 }, { "epoch": 0.2464470588235294, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0791, "step": 10474 }, { "epoch": 0.2464705882352941, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1914, "step": 10475 }, { "epoch": 0.24649411764705884, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0754, "step": 10476 }, { "epoch": 0.24651764705882354, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1745, "step": 10477 }, { "epoch": 0.24654117647058824, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9446, "step": 10478 }, { "epoch": 0.24656470588235294, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0155, "step": 10479 }, { "epoch": 0.24658823529411764, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2461, "step": 10480 }, { "epoch": 0.24661176470588236, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0657, "step": 10481 }, { "epoch": 0.24663529411764706, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0036, "step": 10482 }, { "epoch": 0.24665882352941176, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.9547, "step": 10483 }, { "epoch": 0.24668235294117646, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1117, "step": 10484 }, { "epoch": 0.24670588235294116, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9082, "step": 10485 }, { "epoch": 0.2467294117647059, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2461, "step": 10486 }, { "epoch": 0.2467529411764706, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.4223, "step": 10487 }, { "epoch": 0.2467764705882353, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0612, "step": 10488 }, { "epoch": 0.2468, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.121, "step": 10489 }, { "epoch": 0.24682352941176472, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2525, "step": 10490 }, { "epoch": 0.24684705882352942, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9422, "step": 10491 }, { "epoch": 0.24687058823529412, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0968, "step": 10492 }, { "epoch": 0.24689411764705882, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1183, "step": 10493 }, { "epoch": 0.24691764705882352, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.3286, "step": 10494 }, { "epoch": 0.24694117647058825, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.8676, "step": 10495 }, { "epoch": 0.24696470588235295, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.6758, "step": 10496 }, { "epoch": 0.24698823529411765, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.2523, "step": 10497 }, { "epoch": 0.24701176470588235, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.7694, "step": 10498 }, { "epoch": 0.24703529411764705, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2462, "step": 10499 }, { "epoch": 0.24705882352941178, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0833, "step": 10500 }, { "epoch": 0.24708235294117648, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.793, "step": 10501 }, { "epoch": 0.24710588235294118, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1605, "step": 10502 }, { "epoch": 0.24712941176470588, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9382, "step": 10503 }, { "epoch": 0.24715294117647058, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1953, "step": 10504 }, { "epoch": 0.2471764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9405, "step": 10505 }, { "epoch": 0.2472, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0312, "step": 10506 }, { "epoch": 0.2472235294117647, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2782, "step": 10507 }, { "epoch": 0.2472470588235294, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.3997, "step": 10508 }, { "epoch": 0.24727058823529413, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0798, "step": 10509 }, { "epoch": 0.24729411764705883, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1761, "step": 10510 }, { "epoch": 0.24731764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.159, "step": 10511 }, { "epoch": 0.24734117647058823, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.3197, "step": 10512 }, { "epoch": 0.24736470588235293, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2455, "step": 10513 }, { "epoch": 0.24738823529411766, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0077, "step": 10514 }, { "epoch": 0.24741176470588236, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.763, "step": 10515 }, { "epoch": 0.24743529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.7818, "step": 10516 }, { "epoch": 0.24745882352941176, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0051, "step": 10517 }, { "epoch": 0.24748235294117646, "grad_norm": 0.263671875, "learning_rate": 0.02, "loss": 0.7656, "step": 10518 }, { "epoch": 0.2475058823529412, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0424, "step": 10519 }, { "epoch": 0.2475294117647059, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1665, "step": 10520 }, { "epoch": 0.2475529411764706, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0737, "step": 10521 }, { "epoch": 0.2475764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1221, "step": 10522 }, { "epoch": 0.2476, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1871, "step": 10523 }, { "epoch": 0.24762352941176471, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.733, "step": 10524 }, { "epoch": 0.24764705882352941, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1173, "step": 10525 }, { "epoch": 0.24767058823529411, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.3685, "step": 10526 }, { "epoch": 0.24769411764705881, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0038, "step": 10527 }, { "epoch": 0.24771764705882354, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.8395, "step": 10528 }, { "epoch": 0.24774117647058824, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.8252, "step": 10529 }, { "epoch": 0.24776470588235294, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 1.0428, "step": 10530 }, { "epoch": 0.24778823529411764, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9601, "step": 10531 }, { "epoch": 0.24781176470588234, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9715, "step": 10532 }, { "epoch": 0.24783529411764707, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1471, "step": 10533 }, { "epoch": 0.24785882352941177, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2908, "step": 10534 }, { "epoch": 0.24788235294117647, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.903, "step": 10535 }, { "epoch": 0.24790588235294117, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1113, "step": 10536 }, { "epoch": 0.24792941176470587, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.6634, "step": 10537 }, { "epoch": 0.2479529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2362, "step": 10538 }, { "epoch": 0.2479764705882353, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0348, "step": 10539 }, { "epoch": 0.248, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0228, "step": 10540 }, { "epoch": 0.2480235294117647, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.6826, "step": 10541 }, { "epoch": 0.2480470588235294, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0425, "step": 10542 }, { "epoch": 0.24807058823529413, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.2269, "step": 10543 }, { "epoch": 0.24809411764705883, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.9441, "step": 10544 }, { "epoch": 0.24811764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2294, "step": 10545 }, { "epoch": 0.24814117647058823, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.39, "step": 10546 }, { "epoch": 0.24816470588235295, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.9487, "step": 10547 }, { "epoch": 0.24818823529411765, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8742, "step": 10548 }, { "epoch": 0.24821176470588235, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8963, "step": 10549 }, { "epoch": 0.24823529411764705, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.6955, "step": 10550 }, { "epoch": 0.24825882352941175, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3553, "step": 10551 }, { "epoch": 0.24828235294117648, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.0166, "step": 10552 }, { "epoch": 0.24830588235294118, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2463, "step": 10553 }, { "epoch": 0.24832941176470588, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.3122, "step": 10554 }, { "epoch": 0.24835294117647058, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.8628, "step": 10555 }, { "epoch": 0.24837647058823528, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0414, "step": 10556 }, { "epoch": 0.2484, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0841, "step": 10557 }, { "epoch": 0.2484235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.07, "step": 10558 }, { "epoch": 0.2484470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0176, "step": 10559 }, { "epoch": 0.2484705882352941, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9014, "step": 10560 }, { "epoch": 0.2484941176470588, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9218, "step": 10561 }, { "epoch": 0.24851764705882354, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2679, "step": 10562 }, { "epoch": 0.24854117647058824, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.3171, "step": 10563 }, { "epoch": 0.24856470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2413, "step": 10564 }, { "epoch": 0.24858823529411764, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0911, "step": 10565 }, { "epoch": 0.24861176470588237, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.3043, "step": 10566 }, { "epoch": 0.24863529411764707, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1796, "step": 10567 }, { "epoch": 0.24865882352941177, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1218, "step": 10568 }, { "epoch": 0.24868235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2605, "step": 10569 }, { "epoch": 0.24870588235294117, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9563, "step": 10570 }, { "epoch": 0.2487294117647059, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1383, "step": 10571 }, { "epoch": 0.2487529411764706, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.7943, "step": 10572 }, { "epoch": 0.2487764705882353, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2418, "step": 10573 }, { "epoch": 0.2488, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1479, "step": 10574 }, { "epoch": 0.2488235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0275, "step": 10575 }, { "epoch": 0.24884705882352942, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.8622, "step": 10576 }, { "epoch": 0.24887058823529412, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.093, "step": 10577 }, { "epoch": 0.24889411764705882, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.7628, "step": 10578 }, { "epoch": 0.24891764705882352, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2096, "step": 10579 }, { "epoch": 0.24894117647058825, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.185, "step": 10580 }, { "epoch": 0.24896470588235295, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0973, "step": 10581 }, { "epoch": 0.24898823529411765, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0758, "step": 10582 }, { "epoch": 0.24901176470588235, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.8322, "step": 10583 }, { "epoch": 0.24903529411764705, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0526, "step": 10584 }, { "epoch": 0.24905882352941178, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.9569, "step": 10585 }, { "epoch": 0.24908235294117648, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.3676, "step": 10586 }, { "epoch": 0.24910588235294118, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9202, "step": 10587 }, { "epoch": 0.24912941176470588, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.9029, "step": 10588 }, { "epoch": 0.24915294117647058, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.3134, "step": 10589 }, { "epoch": 0.2491764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.113, "step": 10590 }, { "epoch": 0.2492, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.3193, "step": 10591 }, { "epoch": 0.2492235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0197, "step": 10592 }, { "epoch": 0.2492470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1587, "step": 10593 }, { "epoch": 0.2492705882352941, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1152, "step": 10594 }, { "epoch": 0.24929411764705883, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0468, "step": 10595 }, { "epoch": 0.24931764705882353, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.7924, "step": 10596 }, { "epoch": 0.24934117647058823, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9098, "step": 10597 }, { "epoch": 0.24936470588235293, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.9518, "step": 10598 }, { "epoch": 0.24938823529411766, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0926, "step": 10599 }, { "epoch": 0.24941176470588236, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.157, "step": 10600 }, { "epoch": 0.24943529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1661, "step": 10601 }, { "epoch": 0.24945882352941176, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1549, "step": 10602 }, { "epoch": 0.24948235294117646, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1536, "step": 10603 }, { "epoch": 0.2495058823529412, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9593, "step": 10604 }, { "epoch": 0.2495294117647059, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1946, "step": 10605 }, { "epoch": 0.2495529411764706, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9395, "step": 10606 }, { "epoch": 0.2495764705882353, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.8818, "step": 10607 }, { "epoch": 0.2496, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9652, "step": 10608 }, { "epoch": 0.24962352941176472, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2233, "step": 10609 }, { "epoch": 0.24964705882352942, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9423, "step": 10610 }, { "epoch": 0.24967058823529412, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0967, "step": 10611 }, { "epoch": 0.24969411764705882, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0297, "step": 10612 }, { "epoch": 0.24971764705882352, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.9839, "step": 10613 }, { "epoch": 0.24974117647058824, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0643, "step": 10614 }, { "epoch": 0.24976470588235294, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9746, "step": 10615 }, { "epoch": 0.24978823529411764, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.7433, "step": 10616 }, { "epoch": 0.24981176470588234, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0186, "step": 10617 }, { "epoch": 0.24983529411764707, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1672, "step": 10618 }, { "epoch": 0.24985882352941177, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.7784, "step": 10619 }, { "epoch": 0.24988235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1676, "step": 10620 }, { "epoch": 0.24990588235294117, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0617, "step": 10621 }, { "epoch": 0.24992941176470587, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2353, "step": 10622 }, { "epoch": 0.2499529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.3126, "step": 10623 }, { "epoch": 0.2499764705882353, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.7374, "step": 10624 }, { "epoch": 0.25, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1822, "step": 10625 }, { "epoch": 0.2500235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.3186, "step": 10626 }, { "epoch": 0.2500470588235294, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9668, "step": 10627 }, { "epoch": 0.2500705882352941, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.965, "step": 10628 }, { "epoch": 0.2500941176470588, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0794, "step": 10629 }, { "epoch": 0.25011764705882356, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.8439, "step": 10630 }, { "epoch": 0.25014117647058826, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9831, "step": 10631 }, { "epoch": 0.25016470588235296, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1543, "step": 10632 }, { "epoch": 0.25018823529411766, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.8325, "step": 10633 }, { "epoch": 0.25021176470588236, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.8755, "step": 10634 }, { "epoch": 0.25023529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.951, "step": 10635 }, { "epoch": 0.25025882352941176, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1409, "step": 10636 }, { "epoch": 0.25028235294117646, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9692, "step": 10637 }, { "epoch": 0.25030588235294116, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0981, "step": 10638 }, { "epoch": 0.25032941176470586, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0117, "step": 10639 }, { "epoch": 0.2503529411764706, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 1.0096, "step": 10640 }, { "epoch": 0.2503764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1625, "step": 10641 }, { "epoch": 0.2504, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.202, "step": 10642 }, { "epoch": 0.2504235294117647, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9742, "step": 10643 }, { "epoch": 0.2504470588235294, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0017, "step": 10644 }, { "epoch": 0.2504705882352941, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2307, "step": 10645 }, { "epoch": 0.2504941176470588, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3988, "step": 10646 }, { "epoch": 0.2505176470588235, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.183, "step": 10647 }, { "epoch": 0.2505411764705882, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1727, "step": 10648 }, { "epoch": 0.25056470588235297, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2374, "step": 10649 }, { "epoch": 0.25058823529411767, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1, "step": 10650 }, { "epoch": 0.25061176470588237, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0419, "step": 10651 }, { "epoch": 0.25063529411764707, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0845, "step": 10652 }, { "epoch": 0.25065882352941177, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0079, "step": 10653 }, { "epoch": 0.25068235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0746, "step": 10654 }, { "epoch": 0.25070588235294117, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8438, "step": 10655 }, { "epoch": 0.25072941176470587, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0775, "step": 10656 }, { "epoch": 0.25075294117647057, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9846, "step": 10657 }, { "epoch": 0.25077647058823527, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9754, "step": 10658 }, { "epoch": 0.2508, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1652, "step": 10659 }, { "epoch": 0.2508235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9916, "step": 10660 }, { "epoch": 0.2508470588235294, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9918, "step": 10661 }, { "epoch": 0.2508705882352941, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9745, "step": 10662 }, { "epoch": 0.2508941176470588, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.8228, "step": 10663 }, { "epoch": 0.2509176470588235, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2722, "step": 10664 }, { "epoch": 0.2509411764705882, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.7521, "step": 10665 }, { "epoch": 0.2509647058823529, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1293, "step": 10666 }, { "epoch": 0.2509882352941176, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0423, "step": 10667 }, { "epoch": 0.2510117647058824, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2179, "step": 10668 }, { "epoch": 0.2510352941176471, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9011, "step": 10669 }, { "epoch": 0.2510588235294118, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9234, "step": 10670 }, { "epoch": 0.2510823529411765, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0011, "step": 10671 }, { "epoch": 0.2511058823529412, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.3013, "step": 10672 }, { "epoch": 0.2511294117647059, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2432, "step": 10673 }, { "epoch": 0.2511529411764706, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0818, "step": 10674 }, { "epoch": 0.2511764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9342, "step": 10675 }, { "epoch": 0.2512, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9534, "step": 10676 }, { "epoch": 0.2512235294117647, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2306, "step": 10677 }, { "epoch": 0.25124705882352943, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.8177, "step": 10678 }, { "epoch": 0.25127058823529413, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1092, "step": 10679 }, { "epoch": 0.25129411764705883, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0421, "step": 10680 }, { "epoch": 0.25131764705882353, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.7222, "step": 10681 }, { "epoch": 0.25134117647058823, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9779, "step": 10682 }, { "epoch": 0.25136470588235293, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9756, "step": 10683 }, { "epoch": 0.25138823529411763, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0835, "step": 10684 }, { "epoch": 0.25141176470588233, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.7254, "step": 10685 }, { "epoch": 0.25143529411764703, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0189, "step": 10686 }, { "epoch": 0.2514588235294118, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.7556, "step": 10687 }, { "epoch": 0.2514823529411765, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1306, "step": 10688 }, { "epoch": 0.2515058823529412, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2478, "step": 10689 }, { "epoch": 0.2515294117647059, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0363, "step": 10690 }, { "epoch": 0.2515529411764706, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9744, "step": 10691 }, { "epoch": 0.2515764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1108, "step": 10692 }, { "epoch": 0.2516, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.6121, "step": 10693 }, { "epoch": 0.2516235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0954, "step": 10694 }, { "epoch": 0.2516470588235294, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2826, "step": 10695 }, { "epoch": 0.2516705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2497, "step": 10696 }, { "epoch": 0.25169411764705885, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0378, "step": 10697 }, { "epoch": 0.25171764705882355, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0203, "step": 10698 }, { "epoch": 0.25174117647058825, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1441, "step": 10699 }, { "epoch": 0.25176470588235295, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1865, "step": 10700 }, { "epoch": 0.25178823529411765, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1944, "step": 10701 }, { "epoch": 0.25181176470588235, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.7733, "step": 10702 }, { "epoch": 0.25183529411764705, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8673, "step": 10703 }, { "epoch": 0.25185882352941175, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.8411, "step": 10704 }, { "epoch": 0.25188235294117645, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9393, "step": 10705 }, { "epoch": 0.2519058823529412, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.3215, "step": 10706 }, { "epoch": 0.2519294117647059, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0087, "step": 10707 }, { "epoch": 0.2519529411764706, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.8242, "step": 10708 }, { "epoch": 0.2519764705882353, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9798, "step": 10709 }, { "epoch": 0.252, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.3032, "step": 10710 }, { "epoch": 0.2520235294117647, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.3997, "step": 10711 }, { "epoch": 0.2520470588235294, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.3985, "step": 10712 }, { "epoch": 0.2520705882352941, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 1.0404, "step": 10713 }, { "epoch": 0.2520941176470588, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.0019, "step": 10714 }, { "epoch": 0.2521176470588235, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1826, "step": 10715 }, { "epoch": 0.25214117647058826, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.869, "step": 10716 }, { "epoch": 0.25216470588235296, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.823, "step": 10717 }, { "epoch": 0.25218823529411766, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.7177, "step": 10718 }, { "epoch": 0.25221176470588236, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1415, "step": 10719 }, { "epoch": 0.25223529411764706, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2672, "step": 10720 }, { "epoch": 0.25225882352941176, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.8938, "step": 10721 }, { "epoch": 0.25228235294117646, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2456, "step": 10722 }, { "epoch": 0.25230588235294116, "grad_norm": 0.24609375, "learning_rate": 0.02, "loss": 0.728, "step": 10723 }, { "epoch": 0.25232941176470586, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2323, "step": 10724 }, { "epoch": 0.2523529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2671, "step": 10725 }, { "epoch": 0.2523764705882353, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.9499, "step": 10726 }, { "epoch": 0.2524, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8447, "step": 10727 }, { "epoch": 0.2524235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9508, "step": 10728 }, { "epoch": 0.2524470588235294, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.716, "step": 10729 }, { "epoch": 0.2524705882352941, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0217, "step": 10730 }, { "epoch": 0.2524941176470588, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.933, "step": 10731 }, { "epoch": 0.2525176470588235, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8584, "step": 10732 }, { "epoch": 0.2525411764705882, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2331, "step": 10733 }, { "epoch": 0.25256470588235297, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1237, "step": 10734 }, { "epoch": 0.25258823529411767, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0603, "step": 10735 }, { "epoch": 0.25261176470588237, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.7937, "step": 10736 }, { "epoch": 0.25263529411764707, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9763, "step": 10737 }, { "epoch": 0.25265882352941177, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1493, "step": 10738 }, { "epoch": 0.25268235294117647, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.4689, "step": 10739 }, { "epoch": 0.25270588235294117, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.8577, "step": 10740 }, { "epoch": 0.25272941176470587, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2818, "step": 10741 }, { "epoch": 0.25275294117647057, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9906, "step": 10742 }, { "epoch": 0.25277647058823527, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9753, "step": 10743 }, { "epoch": 0.2528, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.879, "step": 10744 }, { "epoch": 0.2528235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.843, "step": 10745 }, { "epoch": 0.2528470588235294, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.6525, "step": 10746 }, { "epoch": 0.2528705882352941, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.9111, "step": 10747 }, { "epoch": 0.2528941176470588, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8552, "step": 10748 }, { "epoch": 0.2529176470588235, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2322, "step": 10749 }, { "epoch": 0.2529411764705882, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9341, "step": 10750 }, { "epoch": 0.2529647058823529, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9254, "step": 10751 }, { "epoch": 0.2529882352941176, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0008, "step": 10752 }, { "epoch": 0.2530117647058824, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0854, "step": 10753 }, { "epoch": 0.2530352941176471, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1918, "step": 10754 }, { "epoch": 0.2530588235294118, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2048, "step": 10755 }, { "epoch": 0.2530823529411765, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0208, "step": 10756 }, { "epoch": 0.2531058823529412, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1337, "step": 10757 }, { "epoch": 0.2531294117647059, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0621, "step": 10758 }, { "epoch": 0.2531529411764706, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9482, "step": 10759 }, { "epoch": 0.2531764705882353, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.8861, "step": 10760 }, { "epoch": 0.2532, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2492, "step": 10761 }, { "epoch": 0.2532235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1729, "step": 10762 }, { "epoch": 0.25324705882352944, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2542, "step": 10763 }, { "epoch": 0.25327058823529414, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1383, "step": 10764 }, { "epoch": 0.25329411764705884, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.7289, "step": 10765 }, { "epoch": 0.25331764705882354, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0473, "step": 10766 }, { "epoch": 0.25334117647058824, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.8771, "step": 10767 }, { "epoch": 0.25336470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.362, "step": 10768 }, { "epoch": 0.25338823529411764, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.9949, "step": 10769 }, { "epoch": 0.25341176470588234, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2296, "step": 10770 }, { "epoch": 0.25343529411764704, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2152, "step": 10771 }, { "epoch": 0.2534588235294118, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9989, "step": 10772 }, { "epoch": 0.2534823529411765, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1042, "step": 10773 }, { "epoch": 0.2535058823529412, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.3174, "step": 10774 }, { "epoch": 0.2535294117647059, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1335, "step": 10775 }, { "epoch": 0.2535529411764706, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.8, "step": 10776 }, { "epoch": 0.2535764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0763, "step": 10777 }, { "epoch": 0.2536, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0579, "step": 10778 }, { "epoch": 0.2536235294117647, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0839, "step": 10779 }, { "epoch": 0.2536470588235294, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2521, "step": 10780 }, { "epoch": 0.2536705882352941, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.6556, "step": 10781 }, { "epoch": 0.25369411764705885, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1552, "step": 10782 }, { "epoch": 0.25371764705882355, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0395, "step": 10783 }, { "epoch": 0.25374117647058825, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.3215, "step": 10784 }, { "epoch": 0.25376470588235295, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0871, "step": 10785 }, { "epoch": 0.25378823529411765, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.8771, "step": 10786 }, { "epoch": 0.25381176470588235, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.4243, "step": 10787 }, { "epoch": 0.25383529411764705, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0722, "step": 10788 }, { "epoch": 0.25385882352941175, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2034, "step": 10789 }, { "epoch": 0.25388235294117645, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9015, "step": 10790 }, { "epoch": 0.2539058823529412, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1, "step": 10791 }, { "epoch": 0.2539294117647059, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0381, "step": 10792 }, { "epoch": 0.2539529411764706, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1502, "step": 10793 }, { "epoch": 0.2539764705882353, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.3092, "step": 10794 }, { "epoch": 0.254, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1394, "step": 10795 }, { "epoch": 0.2540235294117647, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.3321, "step": 10796 }, { "epoch": 0.2540470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1172, "step": 10797 }, { "epoch": 0.2540705882352941, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9851, "step": 10798 }, { "epoch": 0.2540941176470588, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8593, "step": 10799 }, { "epoch": 0.2541176470588235, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0084, "step": 10800 }, { "epoch": 0.25414117647058826, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0879, "step": 10801 }, { "epoch": 0.25416470588235296, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1143, "step": 10802 }, { "epoch": 0.25418823529411766, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2659, "step": 10803 }, { "epoch": 0.25421176470588236, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0366, "step": 10804 }, { "epoch": 0.25423529411764706, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.7689, "step": 10805 }, { "epoch": 0.25425882352941176, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9819, "step": 10806 }, { "epoch": 0.25428235294117646, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.7888, "step": 10807 }, { "epoch": 0.25430588235294116, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2579, "step": 10808 }, { "epoch": 0.25432941176470586, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0094, "step": 10809 }, { "epoch": 0.2543529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0306, "step": 10810 }, { "epoch": 0.2543764705882353, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.8656, "step": 10811 }, { "epoch": 0.2544, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0145, "step": 10812 }, { "epoch": 0.2544235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1828, "step": 10813 }, { "epoch": 0.2544470588235294, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2304, "step": 10814 }, { "epoch": 0.2544705882352941, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2001, "step": 10815 }, { "epoch": 0.2544941176470588, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1144, "step": 10816 }, { "epoch": 0.2545176470588235, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1629, "step": 10817 }, { "epoch": 0.2545411764705882, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.8602, "step": 10818 }, { "epoch": 0.2545647058823529, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.5614, "step": 10819 }, { "epoch": 0.25458823529411767, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1763, "step": 10820 }, { "epoch": 0.25461176470588237, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0049, "step": 10821 }, { "epoch": 0.25463529411764707, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1152, "step": 10822 }, { "epoch": 0.25465882352941177, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0917, "step": 10823 }, { "epoch": 0.25468235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0293, "step": 10824 }, { "epoch": 0.25470588235294117, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0945, "step": 10825 }, { "epoch": 0.25472941176470587, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.1404, "step": 10826 }, { "epoch": 0.25475294117647057, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.2743, "step": 10827 }, { "epoch": 0.25477647058823527, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.149, "step": 10828 }, { "epoch": 0.2548, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.4384, "step": 10829 }, { "epoch": 0.2548235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1176, "step": 10830 }, { "epoch": 0.2548470588235294, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.7063, "step": 10831 }, { "epoch": 0.2548705882352941, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0242, "step": 10832 }, { "epoch": 0.2548941176470588, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9312, "step": 10833 }, { "epoch": 0.2549176470588235, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.8667, "step": 10834 }, { "epoch": 0.2549411764705882, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1141, "step": 10835 }, { "epoch": 0.2549647058823529, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.7678, "step": 10836 }, { "epoch": 0.2549882352941176, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.124, "step": 10837 }, { "epoch": 0.2550117647058823, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8433, "step": 10838 }, { "epoch": 0.2550352941176471, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8565, "step": 10839 }, { "epoch": 0.2550588235294118, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.8414, "step": 10840 }, { "epoch": 0.2550823529411765, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1215, "step": 10841 }, { "epoch": 0.2551058823529412, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0746, "step": 10842 }, { "epoch": 0.2551294117647059, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2017, "step": 10843 }, { "epoch": 0.2551529411764706, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9514, "step": 10844 }, { "epoch": 0.2551764705882353, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0938, "step": 10845 }, { "epoch": 0.2552, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0513, "step": 10846 }, { "epoch": 0.2552235294117647, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9436, "step": 10847 }, { "epoch": 0.25524705882352944, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.9341, "step": 10848 }, { "epoch": 0.25527058823529414, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0454, "step": 10849 }, { "epoch": 0.25529411764705884, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.18, "step": 10850 }, { "epoch": 0.25531764705882354, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9887, "step": 10851 }, { "epoch": 0.25534117647058824, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2517, "step": 10852 }, { "epoch": 0.25536470588235294, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2076, "step": 10853 }, { "epoch": 0.25538823529411764, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0978, "step": 10854 }, { "epoch": 0.25541176470588234, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0316, "step": 10855 }, { "epoch": 0.25543529411764704, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9979, "step": 10856 }, { "epoch": 0.25545882352941174, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 1.3681, "step": 10857 }, { "epoch": 0.2554823529411765, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1569, "step": 10858 }, { "epoch": 0.2555058823529412, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1541, "step": 10859 }, { "epoch": 0.2555294117647059, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2626, "step": 10860 }, { "epoch": 0.2555529411764706, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.7709, "step": 10861 }, { "epoch": 0.2555764705882353, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.23, "step": 10862 }, { "epoch": 0.2556, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.8267, "step": 10863 }, { "epoch": 0.2556235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0188, "step": 10864 }, { "epoch": 0.2556470588235294, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8042, "step": 10865 }, { "epoch": 0.2556705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0132, "step": 10866 }, { "epoch": 0.25569411764705885, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0065, "step": 10867 }, { "epoch": 0.25571764705882355, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0165, "step": 10868 }, { "epoch": 0.25574117647058825, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.941, "step": 10869 }, { "epoch": 0.25576470588235295, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1638, "step": 10870 }, { "epoch": 0.25578823529411765, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8651, "step": 10871 }, { "epoch": 0.25581176470588235, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.805, "step": 10872 }, { "epoch": 0.25583529411764705, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.3143, "step": 10873 }, { "epoch": 0.25585882352941175, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.8777, "step": 10874 }, { "epoch": 0.25588235294117645, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.3674, "step": 10875 }, { "epoch": 0.25590588235294115, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0787, "step": 10876 }, { "epoch": 0.2559294117647059, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1306, "step": 10877 }, { "epoch": 0.2559529411764706, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.357, "step": 10878 }, { "epoch": 0.2559764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0719, "step": 10879 }, { "epoch": 0.256, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.042, "step": 10880 }, { "epoch": 0.2560235294117647, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2409, "step": 10881 }, { "epoch": 0.2560470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1293, "step": 10882 }, { "epoch": 0.2560705882352941, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8498, "step": 10883 }, { "epoch": 0.2560941176470588, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.891, "step": 10884 }, { "epoch": 0.2561176470588235, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1433, "step": 10885 }, { "epoch": 0.25614117647058826, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.8249, "step": 10886 }, { "epoch": 0.25616470588235296, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0073, "step": 10887 }, { "epoch": 0.25618823529411766, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.4256, "step": 10888 }, { "epoch": 0.25621176470588236, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0235, "step": 10889 }, { "epoch": 0.25623529411764706, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0134, "step": 10890 }, { "epoch": 0.25625882352941176, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.1217, "step": 10891 }, { "epoch": 0.25628235294117646, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9182, "step": 10892 }, { "epoch": 0.25630588235294116, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0703, "step": 10893 }, { "epoch": 0.25632941176470586, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1503, "step": 10894 }, { "epoch": 0.25635294117647056, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2059, "step": 10895 }, { "epoch": 0.2563764705882353, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.3329, "step": 10896 }, { "epoch": 0.2564, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0442, "step": 10897 }, { "epoch": 0.2564235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0902, "step": 10898 }, { "epoch": 0.2564470588235294, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1962, "step": 10899 }, { "epoch": 0.2564705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0358, "step": 10900 }, { "epoch": 0.2564941176470588, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2769, "step": 10901 }, { "epoch": 0.2565176470588235, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2719, "step": 10902 }, { "epoch": 0.2565411764705882, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.8874, "step": 10903 }, { "epoch": 0.2565647058823529, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.673, "step": 10904 }, { "epoch": 0.2565882352941177, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9465, "step": 10905 }, { "epoch": 0.2566117647058824, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0034, "step": 10906 }, { "epoch": 0.2566352941176471, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0608, "step": 10907 }, { "epoch": 0.2566588235294118, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8862, "step": 10908 }, { "epoch": 0.2566823529411765, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0016, "step": 10909 }, { "epoch": 0.2567058823529412, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8954, "step": 10910 }, { "epoch": 0.2567294117647059, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0863, "step": 10911 }, { "epoch": 0.2567529411764706, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0319, "step": 10912 }, { "epoch": 0.2567764705882353, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.7088, "step": 10913 }, { "epoch": 0.2568, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1767, "step": 10914 }, { "epoch": 0.25682352941176473, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.6796, "step": 10915 }, { "epoch": 0.25684705882352943, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.7248, "step": 10916 }, { "epoch": 0.25687058823529413, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2145, "step": 10917 }, { "epoch": 0.25689411764705883, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2707, "step": 10918 }, { "epoch": 0.25691764705882353, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2021, "step": 10919 }, { "epoch": 0.25694117647058823, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2667, "step": 10920 }, { "epoch": 0.25696470588235293, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1454, "step": 10921 }, { "epoch": 0.25698823529411763, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1779, "step": 10922 }, { "epoch": 0.25701176470588233, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0172, "step": 10923 }, { "epoch": 0.2570352941176471, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2114, "step": 10924 }, { "epoch": 0.2570588235294118, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0719, "step": 10925 }, { "epoch": 0.2570823529411765, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1689, "step": 10926 }, { "epoch": 0.2571058823529412, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.5942, "step": 10927 }, { "epoch": 0.2571294117647059, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2858, "step": 10928 }, { "epoch": 0.2571529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8975, "step": 10929 }, { "epoch": 0.2571764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0612, "step": 10930 }, { "epoch": 0.2572, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0803, "step": 10931 }, { "epoch": 0.2572235294117647, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9059, "step": 10932 }, { "epoch": 0.2572470588235294, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9919, "step": 10933 }, { "epoch": 0.25727058823529414, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.982, "step": 10934 }, { "epoch": 0.25729411764705884, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0628, "step": 10935 }, { "epoch": 0.25731764705882354, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2742, "step": 10936 }, { "epoch": 0.25734117647058824, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.6821, "step": 10937 }, { "epoch": 0.25736470588235294, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.161, "step": 10938 }, { "epoch": 0.25738823529411764, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.98, "step": 10939 }, { "epoch": 0.25741176470588234, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1111, "step": 10940 }, { "epoch": 0.25743529411764704, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9067, "step": 10941 }, { "epoch": 0.25745882352941174, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.049, "step": 10942 }, { "epoch": 0.2574823529411765, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9163, "step": 10943 }, { "epoch": 0.2575058823529412, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8402, "step": 10944 }, { "epoch": 0.2575294117647059, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1118, "step": 10945 }, { "epoch": 0.2575529411764706, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0483, "step": 10946 }, { "epoch": 0.2575764705882353, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.3646, "step": 10947 }, { "epoch": 0.2576, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9841, "step": 10948 }, { "epoch": 0.2576235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0307, "step": 10949 }, { "epoch": 0.2576470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.153, "step": 10950 }, { "epoch": 0.2576705882352941, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2728, "step": 10951 }, { "epoch": 0.25769411764705885, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2588, "step": 10952 }, { "epoch": 0.25771764705882355, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2038, "step": 10953 }, { "epoch": 0.25774117647058825, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.3351, "step": 10954 }, { "epoch": 0.25776470588235295, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.7961, "step": 10955 }, { "epoch": 0.25778823529411765, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1812, "step": 10956 }, { "epoch": 0.25781176470588235, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.7757, "step": 10957 }, { "epoch": 0.25783529411764705, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9232, "step": 10958 }, { "epoch": 0.25785882352941175, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1575, "step": 10959 }, { "epoch": 0.25788235294117645, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1588, "step": 10960 }, { "epoch": 0.25790588235294115, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9091, "step": 10961 }, { "epoch": 0.2579294117647059, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2523, "step": 10962 }, { "epoch": 0.2579529411764706, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2095, "step": 10963 }, { "epoch": 0.2579764705882353, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.9216, "step": 10964 }, { "epoch": 0.258, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 1.0276, "step": 10965 }, { "epoch": 0.2580235294117647, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0405, "step": 10966 }, { "epoch": 0.2580470588235294, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0962, "step": 10967 }, { "epoch": 0.2580705882352941, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.6758, "step": 10968 }, { "epoch": 0.2580941176470588, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1927, "step": 10969 }, { "epoch": 0.2581176470588235, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0358, "step": 10970 }, { "epoch": 0.25814117647058826, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8347, "step": 10971 }, { "epoch": 0.25816470588235296, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1509, "step": 10972 }, { "epoch": 0.25818823529411766, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.5738, "step": 10973 }, { "epoch": 0.25821176470588236, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0838, "step": 10974 }, { "epoch": 0.25823529411764706, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0051, "step": 10975 }, { "epoch": 0.25825882352941176, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.8327, "step": 10976 }, { "epoch": 0.25828235294117646, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1561, "step": 10977 }, { "epoch": 0.25830588235294116, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9271, "step": 10978 }, { "epoch": 0.25832941176470586, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1135, "step": 10979 }, { "epoch": 0.25835294117647056, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.058, "step": 10980 }, { "epoch": 0.2583764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.063, "step": 10981 }, { "epoch": 0.2584, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9887, "step": 10982 }, { "epoch": 0.2584235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0547, "step": 10983 }, { "epoch": 0.2584470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0131, "step": 10984 }, { "epoch": 0.2584705882352941, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0574, "step": 10985 }, { "epoch": 0.2584941176470588, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0511, "step": 10986 }, { "epoch": 0.2585176470588235, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2816, "step": 10987 }, { "epoch": 0.2585411764705882, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.8454, "step": 10988 }, { "epoch": 0.2585647058823529, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1219, "step": 10989 }, { "epoch": 0.2585882352941177, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.4536, "step": 10990 }, { "epoch": 0.2586117647058824, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.906, "step": 10991 }, { "epoch": 0.2586352941176471, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1301, "step": 10992 }, { "epoch": 0.2586588235294118, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1139, "step": 10993 }, { "epoch": 0.2586823529411765, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1283, "step": 10994 }, { "epoch": 0.2587058823529412, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1903, "step": 10995 }, { "epoch": 0.2587294117647059, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.9714, "step": 10996 }, { "epoch": 0.2587529411764706, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.8174, "step": 10997 }, { "epoch": 0.2587764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0736, "step": 10998 }, { "epoch": 0.2588, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1086, "step": 10999 }, { "epoch": 0.25882352941176473, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2304, "step": 11000 }, { "epoch": 0.25882352941176473, "eval_loss": 2.2128984928131104, "eval_runtime": 680.7307, "eval_samples_per_second": 12.487, "eval_steps_per_second": 3.122, "step": 11000 }, { "epoch": 0.25884705882352943, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0904, "step": 11001 }, { "epoch": 0.25887058823529413, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9866, "step": 11002 }, { "epoch": 0.25889411764705883, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.3266, "step": 11003 }, { "epoch": 0.25891764705882353, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.3377, "step": 11004 }, { "epoch": 0.25894117647058823, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1361, "step": 11005 }, { "epoch": 0.25896470588235293, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9552, "step": 11006 }, { "epoch": 0.25898823529411763, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2066, "step": 11007 }, { "epoch": 0.25901176470588233, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8306, "step": 11008 }, { "epoch": 0.2590352941176471, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0703, "step": 11009 }, { "epoch": 0.2590588235294118, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.32, "step": 11010 }, { "epoch": 0.2590823529411765, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9174, "step": 11011 }, { "epoch": 0.2591058823529412, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.7084, "step": 11012 }, { "epoch": 0.2591294117647059, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.6489, "step": 11013 }, { "epoch": 0.2591529411764706, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0152, "step": 11014 }, { "epoch": 0.2591764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.8665, "step": 11015 }, { "epoch": 0.2592, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.8922, "step": 11016 }, { "epoch": 0.2592235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9809, "step": 11017 }, { "epoch": 0.2592470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.3177, "step": 11018 }, { "epoch": 0.25927058823529414, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.6035, "step": 11019 }, { "epoch": 0.25929411764705884, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1138, "step": 11020 }, { "epoch": 0.25931764705882354, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.962, "step": 11021 }, { "epoch": 0.25934117647058824, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.7569, "step": 11022 }, { "epoch": 0.25936470588235294, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9401, "step": 11023 }, { "epoch": 0.25938823529411764, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9442, "step": 11024 }, { "epoch": 0.25941176470588234, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0301, "step": 11025 }, { "epoch": 0.25943529411764704, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.3259, "step": 11026 }, { "epoch": 0.25945882352941174, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8663, "step": 11027 }, { "epoch": 0.2594823529411765, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.7449, "step": 11028 }, { "epoch": 0.2595058823529412, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1123, "step": 11029 }, { "epoch": 0.2595294117647059, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1018, "step": 11030 }, { "epoch": 0.2595529411764706, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1789, "step": 11031 }, { "epoch": 0.2595764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2712, "step": 11032 }, { "epoch": 0.2596, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1911, "step": 11033 }, { "epoch": 0.2596235294117647, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3445, "step": 11034 }, { "epoch": 0.2596470588235294, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1779, "step": 11035 }, { "epoch": 0.2596705882352941, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9904, "step": 11036 }, { "epoch": 0.2596941176470588, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8994, "step": 11037 }, { "epoch": 0.25971764705882355, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.118, "step": 11038 }, { "epoch": 0.25974117647058825, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.3124, "step": 11039 }, { "epoch": 0.25976470588235295, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0241, "step": 11040 }, { "epoch": 0.25978823529411765, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2153, "step": 11041 }, { "epoch": 0.25981176470588235, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.7235, "step": 11042 }, { "epoch": 0.25983529411764705, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1123, "step": 11043 }, { "epoch": 0.25985882352941175, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9282, "step": 11044 }, { "epoch": 0.25988235294117645, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.962, "step": 11045 }, { "epoch": 0.25990588235294115, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0191, "step": 11046 }, { "epoch": 0.2599294117647059, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2136, "step": 11047 }, { "epoch": 0.2599529411764706, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.957, "step": 11048 }, { "epoch": 0.2599764705882353, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0915, "step": 11049 }, { "epoch": 0.26, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2507, "step": 11050 }, { "epoch": 0.2600235294117647, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9992, "step": 11051 }, { "epoch": 0.2600470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2842, "step": 11052 }, { "epoch": 0.2600705882352941, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1631, "step": 11053 }, { "epoch": 0.2600941176470588, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.3751, "step": 11054 }, { "epoch": 0.2601176470588235, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9645, "step": 11055 }, { "epoch": 0.2601411764705882, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.3282, "step": 11056 }, { "epoch": 0.26016470588235296, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9706, "step": 11057 }, { "epoch": 0.26018823529411766, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1229, "step": 11058 }, { "epoch": 0.26021176470588236, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0418, "step": 11059 }, { "epoch": 0.26023529411764706, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.075, "step": 11060 }, { "epoch": 0.26025882352941176, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1614, "step": 11061 }, { "epoch": 0.26028235294117646, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.738, "step": 11062 }, { "epoch": 0.26030588235294116, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9689, "step": 11063 }, { "epoch": 0.26032941176470586, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9149, "step": 11064 }, { "epoch": 0.26035294117647056, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9774, "step": 11065 }, { "epoch": 0.2603764705882353, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.054, "step": 11066 }, { "epoch": 0.2604, "grad_norm": 0.248046875, "learning_rate": 0.02, "loss": 0.7348, "step": 11067 }, { "epoch": 0.2604235294117647, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1911, "step": 11068 }, { "epoch": 0.2604470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2739, "step": 11069 }, { "epoch": 0.2604705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1018, "step": 11070 }, { "epoch": 0.2604941176470588, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9493, "step": 11071 }, { "epoch": 0.2605176470588235, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0, "step": 11072 }, { "epoch": 0.2605411764705882, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.9072, "step": 11073 }, { "epoch": 0.2605647058823529, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0349, "step": 11074 }, { "epoch": 0.2605882352941176, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1829, "step": 11075 }, { "epoch": 0.2606117647058824, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1934, "step": 11076 }, { "epoch": 0.2606352941176471, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9023, "step": 11077 }, { "epoch": 0.2606588235294118, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8155, "step": 11078 }, { "epoch": 0.2606823529411765, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2031, "step": 11079 }, { "epoch": 0.2607058823529412, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2596, "step": 11080 }, { "epoch": 0.2607294117647059, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 1.1313, "step": 11081 }, { "epoch": 0.2607529411764706, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.7833, "step": 11082 }, { "epoch": 0.2607764705882353, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.3638, "step": 11083 }, { "epoch": 0.2608, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8728, "step": 11084 }, { "epoch": 0.26082352941176473, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.128, "step": 11085 }, { "epoch": 0.26084705882352943, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1615, "step": 11086 }, { "epoch": 0.26087058823529413, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.3125, "step": 11087 }, { "epoch": 0.26089411764705883, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0749, "step": 11088 }, { "epoch": 0.26091764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0479, "step": 11089 }, { "epoch": 0.26094117647058823, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1151, "step": 11090 }, { "epoch": 0.26096470588235293, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0746, "step": 11091 }, { "epoch": 0.26098823529411763, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2575, "step": 11092 }, { "epoch": 0.26101176470588233, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0773, "step": 11093 }, { "epoch": 0.26103529411764703, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 1.0518, "step": 11094 }, { "epoch": 0.2610588235294118, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.055, "step": 11095 }, { "epoch": 0.2610823529411765, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.3439, "step": 11096 }, { "epoch": 0.2611058823529412, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1518, "step": 11097 }, { "epoch": 0.2611294117647059, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9228, "step": 11098 }, { "epoch": 0.2611529411764706, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1402, "step": 11099 }, { "epoch": 0.2611764705882353, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8374, "step": 11100 }, { "epoch": 0.2612, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1198, "step": 11101 }, { "epoch": 0.2612235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1012, "step": 11102 }, { "epoch": 0.2612470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0288, "step": 11103 }, { "epoch": 0.26127058823529414, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2389, "step": 11104 }, { "epoch": 0.26129411764705884, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0892, "step": 11105 }, { "epoch": 0.26131764705882354, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 1.0019, "step": 11106 }, { "epoch": 0.26134117647058824, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0997, "step": 11107 }, { "epoch": 0.26136470588235294, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.144, "step": 11108 }, { "epoch": 0.26138823529411764, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1046, "step": 11109 }, { "epoch": 0.26141176470588234, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.3628, "step": 11110 }, { "epoch": 0.26143529411764704, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2166, "step": 11111 }, { "epoch": 0.26145882352941174, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9059, "step": 11112 }, { "epoch": 0.26148235294117644, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9907, "step": 11113 }, { "epoch": 0.2615058823529412, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8165, "step": 11114 }, { "epoch": 0.2615294117647059, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.9803, "step": 11115 }, { "epoch": 0.2615529411764706, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1353, "step": 11116 }, { "epoch": 0.2615764705882353, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.7684, "step": 11117 }, { "epoch": 0.2616, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.8649, "step": 11118 }, { "epoch": 0.2616235294117647, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1548, "step": 11119 }, { "epoch": 0.2616470588235294, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.943, "step": 11120 }, { "epoch": 0.2616705882352941, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1613, "step": 11121 }, { "epoch": 0.2616941176470588, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9612, "step": 11122 }, { "epoch": 0.26171764705882355, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1377, "step": 11123 }, { "epoch": 0.26174117647058825, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1856, "step": 11124 }, { "epoch": 0.26176470588235295, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.3709, "step": 11125 }, { "epoch": 0.26178823529411765, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9157, "step": 11126 }, { "epoch": 0.26181176470588235, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1901, "step": 11127 }, { "epoch": 0.26183529411764705, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.839, "step": 11128 }, { "epoch": 0.26185882352941175, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9424, "step": 11129 }, { "epoch": 0.26188235294117646, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1357, "step": 11130 }, { "epoch": 0.26190588235294116, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9195, "step": 11131 }, { "epoch": 0.26192941176470586, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0672, "step": 11132 }, { "epoch": 0.2619529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.992, "step": 11133 }, { "epoch": 0.2619764705882353, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1994, "step": 11134 }, { "epoch": 0.262, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0352, "step": 11135 }, { "epoch": 0.2620235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0841, "step": 11136 }, { "epoch": 0.2620470588235294, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8869, "step": 11137 }, { "epoch": 0.2620705882352941, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9263, "step": 11138 }, { "epoch": 0.2620941176470588, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8811, "step": 11139 }, { "epoch": 0.2621176470588235, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0926, "step": 11140 }, { "epoch": 0.2621411764705882, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9423, "step": 11141 }, { "epoch": 0.26216470588235297, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1023, "step": 11142 }, { "epoch": 0.26218823529411767, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1447, "step": 11143 }, { "epoch": 0.26221176470588237, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1461, "step": 11144 }, { "epoch": 0.26223529411764707, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.3643, "step": 11145 }, { "epoch": 0.26225882352941177, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.8166, "step": 11146 }, { "epoch": 0.26228235294117647, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.4393, "step": 11147 }, { "epoch": 0.26230588235294117, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8239, "step": 11148 }, { "epoch": 0.26232941176470587, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0422, "step": 11149 }, { "epoch": 0.26235294117647057, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2713, "step": 11150 }, { "epoch": 0.26237647058823527, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.188, "step": 11151 }, { "epoch": 0.2624, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0717, "step": 11152 }, { "epoch": 0.2624235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.123, "step": 11153 }, { "epoch": 0.2624470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9868, "step": 11154 }, { "epoch": 0.2624705882352941, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.629, "step": 11155 }, { "epoch": 0.2624941176470588, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9827, "step": 11156 }, { "epoch": 0.2625176470588235, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1009, "step": 11157 }, { "epoch": 0.2625411764705882, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.5852, "step": 11158 }, { "epoch": 0.2625647058823529, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.9287, "step": 11159 }, { "epoch": 0.2625882352941176, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9549, "step": 11160 }, { "epoch": 0.2626117647058824, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.8629, "step": 11161 }, { "epoch": 0.2626352941176471, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9906, "step": 11162 }, { "epoch": 0.2626588235294118, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.619, "step": 11163 }, { "epoch": 0.2626823529411765, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.3194, "step": 11164 }, { "epoch": 0.2627058823529412, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2111, "step": 11165 }, { "epoch": 0.2627294117647059, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9215, "step": 11166 }, { "epoch": 0.2627529411764706, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.3458, "step": 11167 }, { "epoch": 0.2627764705882353, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.6366, "step": 11168 }, { "epoch": 0.2628, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1408, "step": 11169 }, { "epoch": 0.26282352941176473, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2342, "step": 11170 }, { "epoch": 0.26284705882352943, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.9163, "step": 11171 }, { "epoch": 0.26287058823529413, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.821, "step": 11172 }, { "epoch": 0.26289411764705883, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1538, "step": 11173 }, { "epoch": 0.26291764705882353, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0208, "step": 11174 }, { "epoch": 0.26294117647058823, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0384, "step": 11175 }, { "epoch": 0.26296470588235293, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.6907, "step": 11176 }, { "epoch": 0.26298823529411763, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2474, "step": 11177 }, { "epoch": 0.26301176470588233, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1528, "step": 11178 }, { "epoch": 0.26303529411764703, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9308, "step": 11179 }, { "epoch": 0.2630588235294118, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9137, "step": 11180 }, { "epoch": 0.2630823529411765, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8745, "step": 11181 }, { "epoch": 0.2631058823529412, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2452, "step": 11182 }, { "epoch": 0.2631294117647059, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1674, "step": 11183 }, { "epoch": 0.2631529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2802, "step": 11184 }, { "epoch": 0.2631764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7606, "step": 11185 }, { "epoch": 0.2632, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.3191, "step": 11186 }, { "epoch": 0.2632235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9163, "step": 11187 }, { "epoch": 0.2632470588235294, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.3212, "step": 11188 }, { "epoch": 0.26327058823529415, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.8891, "step": 11189 }, { "epoch": 0.26329411764705885, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1487, "step": 11190 }, { "epoch": 0.26331764705882355, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2081, "step": 11191 }, { "epoch": 0.26334117647058825, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9174, "step": 11192 }, { "epoch": 0.26336470588235295, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0103, "step": 11193 }, { "epoch": 0.26338823529411765, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.816, "step": 11194 }, { "epoch": 0.26341176470588235, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.5499, "step": 11195 }, { "epoch": 0.26343529411764705, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1263, "step": 11196 }, { "epoch": 0.26345882352941175, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2184, "step": 11197 }, { "epoch": 0.26348235294117645, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9624, "step": 11198 }, { "epoch": 0.2635058823529412, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2993, "step": 11199 }, { "epoch": 0.2635294117647059, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2427, "step": 11200 }, { "epoch": 0.2635529411764706, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.7482, "step": 11201 }, { "epoch": 0.2635764705882353, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.7017, "step": 11202 }, { "epoch": 0.2636, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.969, "step": 11203 }, { "epoch": 0.2636235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0664, "step": 11204 }, { "epoch": 0.2636470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1159, "step": 11205 }, { "epoch": 0.2636705882352941, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.942, "step": 11206 }, { "epoch": 0.2636941176470588, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2697, "step": 11207 }, { "epoch": 0.26371764705882356, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9057, "step": 11208 }, { "epoch": 0.26374117647058826, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9498, "step": 11209 }, { "epoch": 0.26376470588235296, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9903, "step": 11210 }, { "epoch": 0.26378823529411766, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1848, "step": 11211 }, { "epoch": 0.26381176470588236, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2978, "step": 11212 }, { "epoch": 0.26383529411764706, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1132, "step": 11213 }, { "epoch": 0.26385882352941176, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.9096, "step": 11214 }, { "epoch": 0.26388235294117646, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.8255, "step": 11215 }, { "epoch": 0.26390588235294116, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.5531, "step": 11216 }, { "epoch": 0.26392941176470586, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1772, "step": 11217 }, { "epoch": 0.2639529411764706, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3751, "step": 11218 }, { "epoch": 0.2639764705882353, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0813, "step": 11219 }, { "epoch": 0.264, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.3744, "step": 11220 }, { "epoch": 0.2640235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0397, "step": 11221 }, { "epoch": 0.2640470588235294, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.6078, "step": 11222 }, { "epoch": 0.2640705882352941, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.7858, "step": 11223 }, { "epoch": 0.2640941176470588, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.3689, "step": 11224 }, { "epoch": 0.2641176470588235, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2083, "step": 11225 }, { "epoch": 0.2641411764705882, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9285, "step": 11226 }, { "epoch": 0.26416470588235297, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9186, "step": 11227 }, { "epoch": 0.26418823529411767, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.7852, "step": 11228 }, { "epoch": 0.26421176470588237, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0423, "step": 11229 }, { "epoch": 0.26423529411764707, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1139, "step": 11230 }, { "epoch": 0.26425882352941177, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9077, "step": 11231 }, { "epoch": 0.26428235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9167, "step": 11232 }, { "epoch": 0.26430588235294117, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1144, "step": 11233 }, { "epoch": 0.26432941176470587, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0625, "step": 11234 }, { "epoch": 0.26435294117647057, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0379, "step": 11235 }, { "epoch": 0.26437647058823527, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.4356, "step": 11236 }, { "epoch": 0.2644, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1259, "step": 11237 }, { "epoch": 0.2644235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1499, "step": 11238 }, { "epoch": 0.2644470588235294, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.9318, "step": 11239 }, { "epoch": 0.2644705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.146, "step": 11240 }, { "epoch": 0.2644941176470588, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0216, "step": 11241 }, { "epoch": 0.2645176470588235, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.989, "step": 11242 }, { "epoch": 0.2645411764705882, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.3618, "step": 11243 }, { "epoch": 0.2645647058823529, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.8553, "step": 11244 }, { "epoch": 0.2645882352941176, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.3232, "step": 11245 }, { "epoch": 0.2646117647058824, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9605, "step": 11246 }, { "epoch": 0.2646352941176471, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9078, "step": 11247 }, { "epoch": 0.2646588235294118, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0677, "step": 11248 }, { "epoch": 0.2646823529411765, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2186, "step": 11249 }, { "epoch": 0.2647058823529412, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1978, "step": 11250 }, { "epoch": 0.2647294117647059, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9359, "step": 11251 }, { "epoch": 0.2647529411764706, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0172, "step": 11252 }, { "epoch": 0.2647764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.225, "step": 11253 }, { "epoch": 0.2648, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1368, "step": 11254 }, { "epoch": 0.2648235294117647, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0642, "step": 11255 }, { "epoch": 0.26484705882352944, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2373, "step": 11256 }, { "epoch": 0.26487058823529414, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8762, "step": 11257 }, { "epoch": 0.26489411764705884, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0431, "step": 11258 }, { "epoch": 0.26491764705882354, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.013, "step": 11259 }, { "epoch": 0.26494117647058824, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9224, "step": 11260 }, { "epoch": 0.26496470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9402, "step": 11261 }, { "epoch": 0.26498823529411764, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.8993, "step": 11262 }, { "epoch": 0.26501176470588234, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1854, "step": 11263 }, { "epoch": 0.26503529411764704, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8947, "step": 11264 }, { "epoch": 0.2650588235294118, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1594, "step": 11265 }, { "epoch": 0.2650823529411765, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.867, "step": 11266 }, { "epoch": 0.2651058823529412, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2641, "step": 11267 }, { "epoch": 0.2651294117647059, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.8658, "step": 11268 }, { "epoch": 0.2651529411764706, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1422, "step": 11269 }, { "epoch": 0.2651764705882353, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2658, "step": 11270 }, { "epoch": 0.2652, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.8308, "step": 11271 }, { "epoch": 0.2652235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0782, "step": 11272 }, { "epoch": 0.2652470588235294, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.247, "step": 11273 }, { "epoch": 0.2652705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1682, "step": 11274 }, { "epoch": 0.26529411764705885, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1111, "step": 11275 }, { "epoch": 0.26531764705882355, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.3736, "step": 11276 }, { "epoch": 0.26534117647058825, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0137, "step": 11277 }, { "epoch": 0.26536470588235295, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.275, "step": 11278 }, { "epoch": 0.26538823529411765, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.7857, "step": 11279 }, { "epoch": 0.26541176470588235, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1766, "step": 11280 }, { "epoch": 0.26543529411764705, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8594, "step": 11281 }, { "epoch": 0.26545882352941175, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1871, "step": 11282 }, { "epoch": 0.26548235294117645, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3866, "step": 11283 }, { "epoch": 0.2655058823529412, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9893, "step": 11284 }, { "epoch": 0.2655294117647059, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2744, "step": 11285 }, { "epoch": 0.2655529411764706, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.7681, "step": 11286 }, { "epoch": 0.2655764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9646, "step": 11287 }, { "epoch": 0.2656, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.256, "step": 11288 }, { "epoch": 0.2656235294117647, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8745, "step": 11289 }, { "epoch": 0.2656470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1692, "step": 11290 }, { "epoch": 0.2656705882352941, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2934, "step": 11291 }, { "epoch": 0.2656941176470588, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9761, "step": 11292 }, { "epoch": 0.2657176470588235, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8384, "step": 11293 }, { "epoch": 0.26574117647058826, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0146, "step": 11294 }, { "epoch": 0.26576470588235296, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0566, "step": 11295 }, { "epoch": 0.26578823529411766, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1184, "step": 11296 }, { "epoch": 0.26581176470588236, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0465, "step": 11297 }, { "epoch": 0.26583529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9708, "step": 11298 }, { "epoch": 0.26585882352941176, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1767, "step": 11299 }, { "epoch": 0.26588235294117646, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9335, "step": 11300 }, { "epoch": 0.26590588235294116, "grad_norm": 0.265625, "learning_rate": 0.02, "loss": 0.7235, "step": 11301 }, { "epoch": 0.26592941176470586, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.8998, "step": 11302 }, { "epoch": 0.2659529411764706, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1069, "step": 11303 }, { "epoch": 0.2659764705882353, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.8211, "step": 11304 }, { "epoch": 0.266, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.6308, "step": 11305 }, { "epoch": 0.2660235294117647, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.8755, "step": 11306 }, { "epoch": 0.2660470588235294, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1366, "step": 11307 }, { "epoch": 0.2660705882352941, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0348, "step": 11308 }, { "epoch": 0.2660941176470588, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.097, "step": 11309 }, { "epoch": 0.2661176470588235, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9582, "step": 11310 }, { "epoch": 0.2661411764705882, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0592, "step": 11311 }, { "epoch": 0.2661647058823529, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.1434, "step": 11312 }, { "epoch": 0.26618823529411767, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8243, "step": 11313 }, { "epoch": 0.26621176470588237, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9868, "step": 11314 }, { "epoch": 0.26623529411764707, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1106, "step": 11315 }, { "epoch": 0.26625882352941177, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1599, "step": 11316 }, { "epoch": 0.26628235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1546, "step": 11317 }, { "epoch": 0.26630588235294117, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2413, "step": 11318 }, { "epoch": 0.26632941176470587, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0093, "step": 11319 }, { "epoch": 0.26635294117647057, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0169, "step": 11320 }, { "epoch": 0.26637647058823527, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8621, "step": 11321 }, { "epoch": 0.2664, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.6646, "step": 11322 }, { "epoch": 0.2664235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0431, "step": 11323 }, { "epoch": 0.2664470588235294, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.9006, "step": 11324 }, { "epoch": 0.2664705882352941, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9024, "step": 11325 }, { "epoch": 0.2664941176470588, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9743, "step": 11326 }, { "epoch": 0.2665176470588235, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0598, "step": 11327 }, { "epoch": 0.2665411764705882, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0459, "step": 11328 }, { "epoch": 0.2665647058823529, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1201, "step": 11329 }, { "epoch": 0.2665882352941176, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0099, "step": 11330 }, { "epoch": 0.2666117647058823, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8616, "step": 11331 }, { "epoch": 0.2666352941176471, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1544, "step": 11332 }, { "epoch": 0.2666588235294118, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0269, "step": 11333 }, { "epoch": 0.2666823529411765, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0535, "step": 11334 }, { "epoch": 0.2667058823529412, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.7548, "step": 11335 }, { "epoch": 0.2667294117647059, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.8712, "step": 11336 }, { "epoch": 0.2667529411764706, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0438, "step": 11337 }, { "epoch": 0.2667764705882353, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.8883, "step": 11338 }, { "epoch": 0.2668, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.982, "step": 11339 }, { "epoch": 0.2668235294117647, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0338, "step": 11340 }, { "epoch": 0.26684705882352944, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.7856, "step": 11341 }, { "epoch": 0.26687058823529414, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0582, "step": 11342 }, { "epoch": 0.26689411764705884, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.178, "step": 11343 }, { "epoch": 0.26691764705882354, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9993, "step": 11344 }, { "epoch": 0.26694117647058824, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9468, "step": 11345 }, { "epoch": 0.26696470588235294, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0519, "step": 11346 }, { "epoch": 0.26698823529411764, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.6991, "step": 11347 }, { "epoch": 0.26701176470588234, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.048, "step": 11348 }, { "epoch": 0.26703529411764704, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9432, "step": 11349 }, { "epoch": 0.26705882352941174, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9127, "step": 11350 }, { "epoch": 0.2670823529411765, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0621, "step": 11351 }, { "epoch": 0.2671058823529412, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0492, "step": 11352 }, { "epoch": 0.2671294117647059, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0639, "step": 11353 }, { "epoch": 0.2671529411764706, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8173, "step": 11354 }, { "epoch": 0.2671764705882353, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8241, "step": 11355 }, { "epoch": 0.2672, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0904, "step": 11356 }, { "epoch": 0.2672235294117647, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8791, "step": 11357 }, { "epoch": 0.2672470588235294, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1235, "step": 11358 }, { "epoch": 0.2672705882352941, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2476, "step": 11359 }, { "epoch": 0.26729411764705885, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0053, "step": 11360 }, { "epoch": 0.26731764705882355, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.161, "step": 11361 }, { "epoch": 0.26734117647058825, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1589, "step": 11362 }, { "epoch": 0.26736470588235295, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1312, "step": 11363 }, { "epoch": 0.26738823529411765, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0289, "step": 11364 }, { "epoch": 0.26741176470588235, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.1901, "step": 11365 }, { "epoch": 0.26743529411764705, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2061, "step": 11366 }, { "epoch": 0.26745882352941175, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8971, "step": 11367 }, { "epoch": 0.26748235294117645, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9604, "step": 11368 }, { "epoch": 0.26750588235294115, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0573, "step": 11369 }, { "epoch": 0.2675294117647059, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9839, "step": 11370 }, { "epoch": 0.2675529411764706, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.7956, "step": 11371 }, { "epoch": 0.2675764705882353, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.7788, "step": 11372 }, { "epoch": 0.2676, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1612, "step": 11373 }, { "epoch": 0.2676235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9005, "step": 11374 }, { "epoch": 0.2676470588235294, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.7847, "step": 11375 }, { "epoch": 0.2676705882352941, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2014, "step": 11376 }, { "epoch": 0.2676941176470588, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9073, "step": 11377 }, { "epoch": 0.2677176470588235, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0289, "step": 11378 }, { "epoch": 0.26774117647058826, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9428, "step": 11379 }, { "epoch": 0.26776470588235296, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.7471, "step": 11380 }, { "epoch": 0.26778823529411766, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.3687, "step": 11381 }, { "epoch": 0.26781176470588236, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9536, "step": 11382 }, { "epoch": 0.26783529411764706, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1551, "step": 11383 }, { "epoch": 0.26785882352941176, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0596, "step": 11384 }, { "epoch": 0.26788235294117646, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2381, "step": 11385 }, { "epoch": 0.26790588235294116, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9722, "step": 11386 }, { "epoch": 0.26792941176470586, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0026, "step": 11387 }, { "epoch": 0.2679529411764706, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2065, "step": 11388 }, { "epoch": 0.2679764705882353, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.9453, "step": 11389 }, { "epoch": 0.268, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1224, "step": 11390 }, { "epoch": 0.2680235294117647, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1596, "step": 11391 }, { "epoch": 0.2680470588235294, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.745, "step": 11392 }, { "epoch": 0.2680705882352941, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8948, "step": 11393 }, { "epoch": 0.2680941176470588, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8544, "step": 11394 }, { "epoch": 0.2681176470588235, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1796, "step": 11395 }, { "epoch": 0.2681411764705882, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.8036, "step": 11396 }, { "epoch": 0.2681647058823529, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2128, "step": 11397 }, { "epoch": 0.26818823529411767, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0138, "step": 11398 }, { "epoch": 0.26821176470588237, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.7035, "step": 11399 }, { "epoch": 0.26823529411764707, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.8617, "step": 11400 }, { "epoch": 0.26825882352941177, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9805, "step": 11401 }, { "epoch": 0.26828235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2599, "step": 11402 }, { "epoch": 0.26830588235294117, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.7585, "step": 11403 }, { "epoch": 0.26832941176470587, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0852, "step": 11404 }, { "epoch": 0.26835294117647057, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.157, "step": 11405 }, { "epoch": 0.26837647058823527, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9471, "step": 11406 }, { "epoch": 0.2684, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.7588, "step": 11407 }, { "epoch": 0.2684235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1083, "step": 11408 }, { "epoch": 0.2684470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2664, "step": 11409 }, { "epoch": 0.2684705882352941, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0537, "step": 11410 }, { "epoch": 0.2684941176470588, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.955, "step": 11411 }, { "epoch": 0.2685176470588235, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8265, "step": 11412 }, { "epoch": 0.2685411764705882, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.7227, "step": 11413 }, { "epoch": 0.2685647058823529, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9959, "step": 11414 }, { "epoch": 0.2685882352941176, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2192, "step": 11415 }, { "epoch": 0.2686117647058823, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.8721, "step": 11416 }, { "epoch": 0.2686352941176471, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0374, "step": 11417 }, { "epoch": 0.2686588235294118, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1158, "step": 11418 }, { "epoch": 0.2686823529411765, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2848, "step": 11419 }, { "epoch": 0.2687058823529412, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.3181, "step": 11420 }, { "epoch": 0.2687294117647059, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2834, "step": 11421 }, { "epoch": 0.2687529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0922, "step": 11422 }, { "epoch": 0.2687764705882353, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.7115, "step": 11423 }, { "epoch": 0.2688, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.3314, "step": 11424 }, { "epoch": 0.2688235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8489, "step": 11425 }, { "epoch": 0.26884705882352944, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.2442, "step": 11426 }, { "epoch": 0.26887058823529414, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9171, "step": 11427 }, { "epoch": 0.26889411764705884, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0225, "step": 11428 }, { "epoch": 0.26891764705882354, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0745, "step": 11429 }, { "epoch": 0.26894117647058824, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9269, "step": 11430 }, { "epoch": 0.26896470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1231, "step": 11431 }, { "epoch": 0.26898823529411764, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1357, "step": 11432 }, { "epoch": 0.26901176470588234, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1208, "step": 11433 }, { "epoch": 0.26903529411764704, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1399, "step": 11434 }, { "epoch": 0.26905882352941174, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0477, "step": 11435 }, { "epoch": 0.2690823529411765, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9729, "step": 11436 }, { "epoch": 0.2691058823529412, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.345, "step": 11437 }, { "epoch": 0.2691294117647059, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1892, "step": 11438 }, { "epoch": 0.2691529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1074, "step": 11439 }, { "epoch": 0.2691764705882353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.96, "step": 11440 }, { "epoch": 0.2692, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.902, "step": 11441 }, { "epoch": 0.2692235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1162, "step": 11442 }, { "epoch": 0.2692470588235294, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9941, "step": 11443 }, { "epoch": 0.2692705882352941, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9168, "step": 11444 }, { "epoch": 0.26929411764705885, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.1482, "step": 11445 }, { "epoch": 0.26931764705882355, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9162, "step": 11446 }, { "epoch": 0.26934117647058825, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.984, "step": 11447 }, { "epoch": 0.26936470588235295, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2177, "step": 11448 }, { "epoch": 0.26938823529411765, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9734, "step": 11449 }, { "epoch": 0.26941176470588235, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1006, "step": 11450 }, { "epoch": 0.26943529411764705, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1555, "step": 11451 }, { "epoch": 0.26945882352941175, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.7259, "step": 11452 }, { "epoch": 0.26948235294117645, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0489, "step": 11453 }, { "epoch": 0.26950588235294115, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1205, "step": 11454 }, { "epoch": 0.2695294117647059, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.168, "step": 11455 }, { "epoch": 0.2695529411764706, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9073, "step": 11456 }, { "epoch": 0.2695764705882353, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9469, "step": 11457 }, { "epoch": 0.2696, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.7847, "step": 11458 }, { "epoch": 0.2696235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1429, "step": 11459 }, { "epoch": 0.2696470588235294, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9126, "step": 11460 }, { "epoch": 0.2696705882352941, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0869, "step": 11461 }, { "epoch": 0.2696941176470588, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2113, "step": 11462 }, { "epoch": 0.2697176470588235, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2556, "step": 11463 }, { "epoch": 0.26974117647058826, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.3164, "step": 11464 }, { "epoch": 0.26976470588235296, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1684, "step": 11465 }, { "epoch": 0.26978823529411766, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 0.6541, "step": 11466 }, { "epoch": 0.26981176470588236, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0955, "step": 11467 }, { "epoch": 0.26983529411764706, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.5225, "step": 11468 }, { "epoch": 0.26985882352941176, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1363, "step": 11469 }, { "epoch": 0.26988235294117646, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0684, "step": 11470 }, { "epoch": 0.26990588235294116, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.7523, "step": 11471 }, { "epoch": 0.26992941176470586, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8035, "step": 11472 }, { "epoch": 0.26995294117647056, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2701, "step": 11473 }, { "epoch": 0.2699764705882353, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1338, "step": 11474 }, { "epoch": 0.27, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.013, "step": 11475 }, { "epoch": 0.2700235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2899, "step": 11476 }, { "epoch": 0.2700470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.792, "step": 11477 }, { "epoch": 0.2700705882352941, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2324, "step": 11478 }, { "epoch": 0.2700941176470588, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.7748, "step": 11479 }, { "epoch": 0.2701176470588235, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.5307, "step": 11480 }, { "epoch": 0.2701411764705882, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9984, "step": 11481 }, { "epoch": 0.2701647058823529, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2434, "step": 11482 }, { "epoch": 0.2701882352941177, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.3396, "step": 11483 }, { "epoch": 0.2702117647058824, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.251, "step": 11484 }, { "epoch": 0.2702352941176471, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.462, "step": 11485 }, { "epoch": 0.2702588235294118, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.95, "step": 11486 }, { "epoch": 0.2702823529411765, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0374, "step": 11487 }, { "epoch": 0.2703058823529412, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0235, "step": 11488 }, { "epoch": 0.2703294117647059, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0673, "step": 11489 }, { "epoch": 0.2703529411764706, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0376, "step": 11490 }, { "epoch": 0.2703764705882353, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.8872, "step": 11491 }, { "epoch": 0.2704, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1263, "step": 11492 }, { "epoch": 0.27042352941176473, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8277, "step": 11493 }, { "epoch": 0.27044705882352943, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0716, "step": 11494 }, { "epoch": 0.27047058823529413, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.3206, "step": 11495 }, { "epoch": 0.27049411764705883, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.7337, "step": 11496 }, { "epoch": 0.27051764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.5774, "step": 11497 }, { "epoch": 0.27054117647058823, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2405, "step": 11498 }, { "epoch": 0.27056470588235293, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2337, "step": 11499 }, { "epoch": 0.27058823529411763, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.7478, "step": 11500 }, { "epoch": 0.27061176470588233, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9298, "step": 11501 }, { "epoch": 0.2706352941176471, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2377, "step": 11502 }, { "epoch": 0.2706588235294118, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9466, "step": 11503 }, { "epoch": 0.2706823529411765, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9732, "step": 11504 }, { "epoch": 0.2707058823529412, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2239, "step": 11505 }, { "epoch": 0.2707294117647059, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1537, "step": 11506 }, { "epoch": 0.2707529411764706, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.178, "step": 11507 }, { "epoch": 0.2707764705882353, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.5365, "step": 11508 }, { "epoch": 0.2708, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9436, "step": 11509 }, { "epoch": 0.2708235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9113, "step": 11510 }, { "epoch": 0.2708470588235294, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9294, "step": 11511 }, { "epoch": 0.27087058823529414, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9605, "step": 11512 }, { "epoch": 0.27089411764705884, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9256, "step": 11513 }, { "epoch": 0.27091764705882354, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0877, "step": 11514 }, { "epoch": 0.27094117647058824, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9695, "step": 11515 }, { "epoch": 0.27096470588235294, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.895, "step": 11516 }, { "epoch": 0.27098823529411764, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8987, "step": 11517 }, { "epoch": 0.27101176470588234, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.146, "step": 11518 }, { "epoch": 0.27103529411764704, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0679, "step": 11519 }, { "epoch": 0.27105882352941174, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 1.0068, "step": 11520 }, { "epoch": 0.2710823529411765, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9281, "step": 11521 }, { "epoch": 0.2711058823529412, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.7241, "step": 11522 }, { "epoch": 0.2711294117647059, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9394, "step": 11523 }, { "epoch": 0.2711529411764706, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.262, "step": 11524 }, { "epoch": 0.2711764705882353, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0561, "step": 11525 }, { "epoch": 0.2712, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0241, "step": 11526 }, { "epoch": 0.2712235294117647, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.3369, "step": 11527 }, { "epoch": 0.2712470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1096, "step": 11528 }, { "epoch": 0.2712705882352941, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9184, "step": 11529 }, { "epoch": 0.2712941176470588, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9478, "step": 11530 }, { "epoch": 0.27131764705882355, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0133, "step": 11531 }, { "epoch": 0.27134117647058825, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2551, "step": 11532 }, { "epoch": 0.27136470588235295, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1693, "step": 11533 }, { "epoch": 0.27138823529411765, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8999, "step": 11534 }, { "epoch": 0.27141176470588235, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.16, "step": 11535 }, { "epoch": 0.27143529411764705, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2632, "step": 11536 }, { "epoch": 0.27145882352941175, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2023, "step": 11537 }, { "epoch": 0.27148235294117645, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9446, "step": 11538 }, { "epoch": 0.27150588235294115, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.778, "step": 11539 }, { "epoch": 0.2715294117647059, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1704, "step": 11540 }, { "epoch": 0.2715529411764706, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.142, "step": 11541 }, { "epoch": 0.2715764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8319, "step": 11542 }, { "epoch": 0.2716, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9816, "step": 11543 }, { "epoch": 0.2716235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.7186, "step": 11544 }, { "epoch": 0.2716470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9368, "step": 11545 }, { "epoch": 0.2716705882352941, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.8218, "step": 11546 }, { "epoch": 0.2716941176470588, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.4663, "step": 11547 }, { "epoch": 0.2717176470588235, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2956, "step": 11548 }, { "epoch": 0.2717411764705882, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.146, "step": 11549 }, { "epoch": 0.27176470588235296, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1846, "step": 11550 }, { "epoch": 0.27178823529411766, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0111, "step": 11551 }, { "epoch": 0.27181176470588236, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.3045, "step": 11552 }, { "epoch": 0.27183529411764706, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.7517, "step": 11553 }, { "epoch": 0.27185882352941176, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.8918, "step": 11554 }, { "epoch": 0.27188235294117646, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.2934, "step": 11555 }, { "epoch": 0.27190588235294116, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1859, "step": 11556 }, { "epoch": 0.27192941176470586, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2673, "step": 11557 }, { "epoch": 0.27195294117647056, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.987, "step": 11558 }, { "epoch": 0.2719764705882353, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8428, "step": 11559 }, { "epoch": 0.272, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.8809, "step": 11560 }, { "epoch": 0.2720235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1242, "step": 11561 }, { "epoch": 0.2720470588235294, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2332, "step": 11562 }, { "epoch": 0.2720705882352941, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0204, "step": 11563 }, { "epoch": 0.2720941176470588, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.8424, "step": 11564 }, { "epoch": 0.2721176470588235, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0968, "step": 11565 }, { "epoch": 0.2721411764705882, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0327, "step": 11566 }, { "epoch": 0.2721647058823529, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0215, "step": 11567 }, { "epoch": 0.2721882352941176, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0129, "step": 11568 }, { "epoch": 0.2722117647058824, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0813, "step": 11569 }, { "epoch": 0.2722352941176471, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9239, "step": 11570 }, { "epoch": 0.2722588235294118, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9896, "step": 11571 }, { "epoch": 0.2722823529411765, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9388, "step": 11572 }, { "epoch": 0.2723058823529412, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0634, "step": 11573 }, { "epoch": 0.2723294117647059, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9546, "step": 11574 }, { "epoch": 0.2723529411764706, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.3395, "step": 11575 }, { "epoch": 0.2723764705882353, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1694, "step": 11576 }, { "epoch": 0.2724, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9947, "step": 11577 }, { "epoch": 0.27242352941176473, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.6846, "step": 11578 }, { "epoch": 0.27244705882352943, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0699, "step": 11579 }, { "epoch": 0.27247058823529413, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5458, "step": 11580 }, { "epoch": 0.27249411764705883, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.8817, "step": 11581 }, { "epoch": 0.27251764705882353, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2843, "step": 11582 }, { "epoch": 0.27254117647058823, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9361, "step": 11583 }, { "epoch": 0.27256470588235293, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 1.0085, "step": 11584 }, { "epoch": 0.27258823529411763, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0339, "step": 11585 }, { "epoch": 0.27261176470588233, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1165, "step": 11586 }, { "epoch": 0.27263529411764703, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9429, "step": 11587 }, { "epoch": 0.2726588235294118, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0569, "step": 11588 }, { "epoch": 0.2726823529411765, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9708, "step": 11589 }, { "epoch": 0.2727058823529412, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1425, "step": 11590 }, { "epoch": 0.2727294117647059, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9431, "step": 11591 }, { "epoch": 0.2727529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0194, "step": 11592 }, { "epoch": 0.2727764705882353, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.588, "step": 11593 }, { "epoch": 0.2728, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1999, "step": 11594 }, { "epoch": 0.2728235294117647, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1689, "step": 11595 }, { "epoch": 0.2728470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.121, "step": 11596 }, { "epoch": 0.27287058823529414, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.044, "step": 11597 }, { "epoch": 0.27289411764705884, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.3233, "step": 11598 }, { "epoch": 0.27291764705882354, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2225, "step": 11599 }, { "epoch": 0.27294117647058824, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1698, "step": 11600 }, { "epoch": 0.27296470588235294, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9677, "step": 11601 }, { "epoch": 0.27298823529411764, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1981, "step": 11602 }, { "epoch": 0.27301176470588234, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0384, "step": 11603 }, { "epoch": 0.27303529411764704, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.6626, "step": 11604 }, { "epoch": 0.27305882352941174, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1482, "step": 11605 }, { "epoch": 0.2730823529411765, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1038, "step": 11606 }, { "epoch": 0.2731058823529412, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8063, "step": 11607 }, { "epoch": 0.2731294117647059, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.8338, "step": 11608 }, { "epoch": 0.2731529411764706, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.3598, "step": 11609 }, { "epoch": 0.2731764705882353, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.2244, "step": 11610 }, { "epoch": 0.2732, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.706, "step": 11611 }, { "epoch": 0.2732235294117647, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.7318, "step": 11612 }, { "epoch": 0.2732470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2053, "step": 11613 }, { "epoch": 0.2732705882352941, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1171, "step": 11614 }, { "epoch": 0.2732941176470588, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1898, "step": 11615 }, { "epoch": 0.27331764705882355, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8408, "step": 11616 }, { "epoch": 0.27334117647058825, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0953, "step": 11617 }, { "epoch": 0.27336470588235295, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.3185, "step": 11618 }, { "epoch": 0.27338823529411765, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2178, "step": 11619 }, { "epoch": 0.27341176470588235, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1323, "step": 11620 }, { "epoch": 0.27343529411764705, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.4145, "step": 11621 }, { "epoch": 0.27345882352941175, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0205, "step": 11622 }, { "epoch": 0.27348235294117645, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0705, "step": 11623 }, { "epoch": 0.27350588235294115, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2433, "step": 11624 }, { "epoch": 0.2735294117647059, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0119, "step": 11625 }, { "epoch": 0.2735529411764706, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1422, "step": 11626 }, { "epoch": 0.2735764705882353, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.8321, "step": 11627 }, { "epoch": 0.2736, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.057, "step": 11628 }, { "epoch": 0.2736235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1331, "step": 11629 }, { "epoch": 0.2736470588235294, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.2641, "step": 11630 }, { "epoch": 0.2736705882352941, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9808, "step": 11631 }, { "epoch": 0.2736941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1829, "step": 11632 }, { "epoch": 0.2737176470588235, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1312, "step": 11633 }, { "epoch": 0.2737411764705882, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9572, "step": 11634 }, { "epoch": 0.27376470588235297, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.7593, "step": 11635 }, { "epoch": 0.27378823529411767, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0291, "step": 11636 }, { "epoch": 0.27381176470588237, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.8518, "step": 11637 }, { "epoch": 0.27383529411764707, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.7162, "step": 11638 }, { "epoch": 0.27385882352941177, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.127, "step": 11639 }, { "epoch": 0.27388235294117647, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.9638, "step": 11640 }, { "epoch": 0.27390588235294117, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.726, "step": 11641 }, { "epoch": 0.27392941176470587, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.3323, "step": 11642 }, { "epoch": 0.27395294117647057, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9334, "step": 11643 }, { "epoch": 0.2739764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.6978, "step": 11644 }, { "epoch": 0.274, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1106, "step": 11645 }, { "epoch": 0.2740235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.6714, "step": 11646 }, { "epoch": 0.2740470588235294, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.7414, "step": 11647 }, { "epoch": 0.2740705882352941, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2415, "step": 11648 }, { "epoch": 0.2740941176470588, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.7395, "step": 11649 }, { "epoch": 0.2741176470588235, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.3503, "step": 11650 }, { "epoch": 0.2741411764705882, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.8213, "step": 11651 }, { "epoch": 0.2741647058823529, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0262, "step": 11652 }, { "epoch": 0.2741882352941176, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2077, "step": 11653 }, { "epoch": 0.2742117647058824, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.107, "step": 11654 }, { "epoch": 0.2742352941176471, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1869, "step": 11655 }, { "epoch": 0.2742588235294118, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9131, "step": 11656 }, { "epoch": 0.2742823529411765, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.3635, "step": 11657 }, { "epoch": 0.2743058823529412, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.2263, "step": 11658 }, { "epoch": 0.2743294117647059, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.407, "step": 11659 }, { "epoch": 0.2743529411764706, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0905, "step": 11660 }, { "epoch": 0.2743764705882353, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.7986, "step": 11661 }, { "epoch": 0.2744, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1262, "step": 11662 }, { "epoch": 0.27442352941176473, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.7419, "step": 11663 }, { "epoch": 0.27444705882352943, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0386, "step": 11664 }, { "epoch": 0.27447058823529413, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1122, "step": 11665 }, { "epoch": 0.27449411764705883, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2997, "step": 11666 }, { "epoch": 0.27451764705882353, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.3715, "step": 11667 }, { "epoch": 0.27454117647058823, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0226, "step": 11668 }, { "epoch": 0.27456470588235293, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.292, "step": 11669 }, { "epoch": 0.27458823529411763, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 1.0309, "step": 11670 }, { "epoch": 0.27461176470588233, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.987, "step": 11671 }, { "epoch": 0.27463529411764703, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8645, "step": 11672 }, { "epoch": 0.2746588235294118, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2259, "step": 11673 }, { "epoch": 0.2746823529411765, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.331, "step": 11674 }, { "epoch": 0.2747058823529412, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.009, "step": 11675 }, { "epoch": 0.2747294117647059, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9794, "step": 11676 }, { "epoch": 0.2747529411764706, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9428, "step": 11677 }, { "epoch": 0.2747764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1406, "step": 11678 }, { "epoch": 0.2748, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2708, "step": 11679 }, { "epoch": 0.2748235294117647, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0442, "step": 11680 }, { "epoch": 0.2748470588235294, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.7919, "step": 11681 }, { "epoch": 0.27487058823529414, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9075, "step": 11682 }, { "epoch": 0.27489411764705884, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.9011, "step": 11683 }, { "epoch": 0.27491764705882354, "grad_norm": 0.259765625, "learning_rate": 0.02, "loss": 0.7846, "step": 11684 }, { "epoch": 0.27494117647058824, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.8397, "step": 11685 }, { "epoch": 0.27496470588235294, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.306, "step": 11686 }, { "epoch": 0.27498823529411764, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9364, "step": 11687 }, { "epoch": 0.27501176470588234, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.2143, "step": 11688 }, { "epoch": 0.27503529411764704, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.5873, "step": 11689 }, { "epoch": 0.27505882352941174, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8496, "step": 11690 }, { "epoch": 0.27508235294117644, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.0846, "step": 11691 }, { "epoch": 0.2751058823529412, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9347, "step": 11692 }, { "epoch": 0.2751294117647059, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.7268, "step": 11693 }, { "epoch": 0.2751529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2551, "step": 11694 }, { "epoch": 0.2751764705882353, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2714, "step": 11695 }, { "epoch": 0.2752, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0063, "step": 11696 }, { "epoch": 0.2752235294117647, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9754, "step": 11697 }, { "epoch": 0.2752470588235294, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1967, "step": 11698 }, { "epoch": 0.2752705882352941, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2019, "step": 11699 }, { "epoch": 0.2752941176470588, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9422, "step": 11700 }, { "epoch": 0.27531764705882356, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0384, "step": 11701 }, { "epoch": 0.27534117647058826, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9922, "step": 11702 }, { "epoch": 0.27536470588235296, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.6817, "step": 11703 }, { "epoch": 0.27538823529411766, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9571, "step": 11704 }, { "epoch": 0.27541176470588236, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0666, "step": 11705 }, { "epoch": 0.27543529411764706, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0163, "step": 11706 }, { "epoch": 0.27545882352941176, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1279, "step": 11707 }, { "epoch": 0.27548235294117646, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0728, "step": 11708 }, { "epoch": 0.27550588235294116, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1782, "step": 11709 }, { "epoch": 0.27552941176470586, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1789, "step": 11710 }, { "epoch": 0.2755529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9352, "step": 11711 }, { "epoch": 0.2755764705882353, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8881, "step": 11712 }, { "epoch": 0.2756, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.2719, "step": 11713 }, { "epoch": 0.2756235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.176, "step": 11714 }, { "epoch": 0.2756470588235294, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.963, "step": 11715 }, { "epoch": 0.2756705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.7237, "step": 11716 }, { "epoch": 0.2756941176470588, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1581, "step": 11717 }, { "epoch": 0.2757176470588235, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.3708, "step": 11718 }, { "epoch": 0.2757411764705882, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0352, "step": 11719 }, { "epoch": 0.27576470588235297, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1721, "step": 11720 }, { "epoch": 0.27578823529411767, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.8101, "step": 11721 }, { "epoch": 0.27581176470588237, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.4292, "step": 11722 }, { "epoch": 0.27583529411764707, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1172, "step": 11723 }, { "epoch": 0.27585882352941177, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0598, "step": 11724 }, { "epoch": 0.27588235294117647, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9851, "step": 11725 }, { "epoch": 0.27590588235294117, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0622, "step": 11726 }, { "epoch": 0.27592941176470587, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9857, "step": 11727 }, { "epoch": 0.27595294117647057, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.8301, "step": 11728 }, { "epoch": 0.27597647058823527, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9431, "step": 11729 }, { "epoch": 0.276, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0595, "step": 11730 }, { "epoch": 0.2760235294117647, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.6929, "step": 11731 }, { "epoch": 0.2760470588235294, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.8581, "step": 11732 }, { "epoch": 0.2760705882352941, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9353, "step": 11733 }, { "epoch": 0.2760941176470588, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0915, "step": 11734 }, { "epoch": 0.2761176470588235, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9283, "step": 11735 }, { "epoch": 0.2761411764705882, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.8632, "step": 11736 }, { "epoch": 0.2761647058823529, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.7861, "step": 11737 }, { "epoch": 0.2761882352941176, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1116, "step": 11738 }, { "epoch": 0.2762117647058824, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0977, "step": 11739 }, { "epoch": 0.2762352941176471, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2552, "step": 11740 }, { "epoch": 0.2762588235294118, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0902, "step": 11741 }, { "epoch": 0.2762823529411765, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.7634, "step": 11742 }, { "epoch": 0.2763058823529412, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8784, "step": 11743 }, { "epoch": 0.2763294117647059, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.7279, "step": 11744 }, { "epoch": 0.2763529411764706, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.356, "step": 11745 }, { "epoch": 0.2763764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9991, "step": 11746 }, { "epoch": 0.2764, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1725, "step": 11747 }, { "epoch": 0.2764235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9894, "step": 11748 }, { "epoch": 0.27644705882352943, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2154, "step": 11749 }, { "epoch": 0.27647058823529413, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8499, "step": 11750 }, { "epoch": 0.27649411764705883, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1061, "step": 11751 }, { "epoch": 0.27651764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.309, "step": 11752 }, { "epoch": 0.27654117647058823, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.8169, "step": 11753 }, { "epoch": 0.27656470588235293, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.1287, "step": 11754 }, { "epoch": 0.27658823529411763, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.965, "step": 11755 }, { "epoch": 0.27661176470588233, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0656, "step": 11756 }, { "epoch": 0.27663529411764703, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.7234, "step": 11757 }, { "epoch": 0.2766588235294118, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1649, "step": 11758 }, { "epoch": 0.2766823529411765, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9265, "step": 11759 }, { "epoch": 0.2767058823529412, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2406, "step": 11760 }, { "epoch": 0.2767294117647059, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2155, "step": 11761 }, { "epoch": 0.2767529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.8698, "step": 11762 }, { "epoch": 0.2767764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.8546, "step": 11763 }, { "epoch": 0.2768, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9287, "step": 11764 }, { "epoch": 0.2768235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.101, "step": 11765 }, { "epoch": 0.2768470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1184, "step": 11766 }, { "epoch": 0.2768705882352941, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0632, "step": 11767 }, { "epoch": 0.27689411764705885, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1409, "step": 11768 }, { "epoch": 0.27691764705882355, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1145, "step": 11769 }, { "epoch": 0.27694117647058825, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9075, "step": 11770 }, { "epoch": 0.27696470588235295, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9089, "step": 11771 }, { "epoch": 0.27698823529411765, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0405, "step": 11772 }, { "epoch": 0.27701176470588235, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0955, "step": 11773 }, { "epoch": 0.27703529411764705, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.8421, "step": 11774 }, { "epoch": 0.27705882352941175, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0114, "step": 11775 }, { "epoch": 0.27708235294117645, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0234, "step": 11776 }, { "epoch": 0.2771058823529412, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.059, "step": 11777 }, { "epoch": 0.2771294117647059, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0979, "step": 11778 }, { "epoch": 0.2771529411764706, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1224, "step": 11779 }, { "epoch": 0.2771764705882353, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.022, "step": 11780 }, { "epoch": 0.2772, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3697, "step": 11781 }, { "epoch": 0.2772235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0152, "step": 11782 }, { "epoch": 0.2772470588235294, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2471, "step": 11783 }, { "epoch": 0.2772705882352941, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0954, "step": 11784 }, { "epoch": 0.2772941176470588, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8407, "step": 11785 }, { "epoch": 0.2773176470588235, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.8578, "step": 11786 }, { "epoch": 0.27734117647058826, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.6582, "step": 11787 }, { "epoch": 0.27736470588235296, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2049, "step": 11788 }, { "epoch": 0.27738823529411766, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2137, "step": 11789 }, { "epoch": 0.27741176470588236, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9056, "step": 11790 }, { "epoch": 0.27743529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0641, "step": 11791 }, { "epoch": 0.27745882352941176, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2699, "step": 11792 }, { "epoch": 0.27748235294117646, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.11, "step": 11793 }, { "epoch": 0.27750588235294116, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1767, "step": 11794 }, { "epoch": 0.27752941176470586, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9111, "step": 11795 }, { "epoch": 0.2775529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2299, "step": 11796 }, { "epoch": 0.2775764705882353, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1307, "step": 11797 }, { "epoch": 0.2776, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0868, "step": 11798 }, { "epoch": 0.2776235294117647, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.096, "step": 11799 }, { "epoch": 0.2776470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.941, "step": 11800 }, { "epoch": 0.2776705882352941, "grad_norm": 0.25, "learning_rate": 0.02, "loss": 0.8265, "step": 11801 }, { "epoch": 0.2776941176470588, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1547, "step": 11802 }, { "epoch": 0.2777176470588235, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.7752, "step": 11803 }, { "epoch": 0.2777411764705882, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0911, "step": 11804 }, { "epoch": 0.2777647058823529, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.3444, "step": 11805 }, { "epoch": 0.27778823529411767, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1123, "step": 11806 }, { "epoch": 0.27781176470588237, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9818, "step": 11807 }, { "epoch": 0.27783529411764707, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.8749, "step": 11808 }, { "epoch": 0.27785882352941177, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1341, "step": 11809 }, { "epoch": 0.27788235294117647, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8329, "step": 11810 }, { "epoch": 0.27790588235294117, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.094, "step": 11811 }, { "epoch": 0.27792941176470587, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.9581, "step": 11812 }, { "epoch": 0.27795294117647057, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0911, "step": 11813 }, { "epoch": 0.27797647058823527, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0597, "step": 11814 }, { "epoch": 0.278, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.6962, "step": 11815 }, { "epoch": 0.2780235294117647, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.6799, "step": 11816 }, { "epoch": 0.2780470588235294, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1245, "step": 11817 }, { "epoch": 0.2780705882352941, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8384, "step": 11818 }, { "epoch": 0.2780941176470588, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1989, "step": 11819 }, { "epoch": 0.2781176470588235, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.765, "step": 11820 }, { "epoch": 0.2781411764705882, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9349, "step": 11821 }, { "epoch": 0.2781647058823529, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1193, "step": 11822 }, { "epoch": 0.2781882352941176, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.8643, "step": 11823 }, { "epoch": 0.2782117647058824, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9229, "step": 11824 }, { "epoch": 0.2782352941176471, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.218, "step": 11825 }, { "epoch": 0.2782588235294118, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9474, "step": 11826 }, { "epoch": 0.2782823529411765, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.3033, "step": 11827 }, { "epoch": 0.2783058823529412, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8453, "step": 11828 }, { "epoch": 0.2783294117647059, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9192, "step": 11829 }, { "epoch": 0.2783529411764706, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.2817, "step": 11830 }, { "epoch": 0.2783764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.5551, "step": 11831 }, { "epoch": 0.2784, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.6864, "step": 11832 }, { "epoch": 0.2784235294117647, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1051, "step": 11833 }, { "epoch": 0.27844705882352944, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.8965, "step": 11834 }, { "epoch": 0.27847058823529414, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.8113, "step": 11835 }, { "epoch": 0.27849411764705884, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1343, "step": 11836 }, { "epoch": 0.27851764705882354, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1484, "step": 11837 }, { "epoch": 0.27854117647058824, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1825, "step": 11838 }, { "epoch": 0.27856470588235294, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.17, "step": 11839 }, { "epoch": 0.27858823529411764, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0904, "step": 11840 }, { "epoch": 0.27861176470588234, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.447, "step": 11841 }, { "epoch": 0.27863529411764704, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1676, "step": 11842 }, { "epoch": 0.2786588235294118, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2495, "step": 11843 }, { "epoch": 0.2786823529411765, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1032, "step": 11844 }, { "epoch": 0.2787058823529412, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9686, "step": 11845 }, { "epoch": 0.2787294117647059, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.8827, "step": 11846 }, { "epoch": 0.2787529411764706, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.3716, "step": 11847 }, { "epoch": 0.2787764705882353, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.1951, "step": 11848 }, { "epoch": 0.2788, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0112, "step": 11849 }, { "epoch": 0.2788235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8311, "step": 11850 }, { "epoch": 0.2788470588235294, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2064, "step": 11851 }, { "epoch": 0.2788705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.272, "step": 11852 }, { "epoch": 0.27889411764705885, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.773, "step": 11853 }, { "epoch": 0.27891764705882355, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0724, "step": 11854 }, { "epoch": 0.27894117647058825, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8476, "step": 11855 }, { "epoch": 0.27896470588235295, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.979, "step": 11856 }, { "epoch": 0.27898823529411765, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.4387, "step": 11857 }, { "epoch": 0.27901176470588235, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2725, "step": 11858 }, { "epoch": 0.27903529411764705, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.3242, "step": 11859 }, { "epoch": 0.27905882352941175, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1703, "step": 11860 }, { "epoch": 0.27908235294117645, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.9029, "step": 11861 }, { "epoch": 0.2791058823529412, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9077, "step": 11862 }, { "epoch": 0.2791294117647059, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0451, "step": 11863 }, { "epoch": 0.2791529411764706, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0652, "step": 11864 }, { "epoch": 0.2791764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9921, "step": 11865 }, { "epoch": 0.2792, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0848, "step": 11866 }, { "epoch": 0.2792235294117647, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0801, "step": 11867 }, { "epoch": 0.2792470588235294, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.12, "step": 11868 }, { "epoch": 0.2792705882352941, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.7505, "step": 11869 }, { "epoch": 0.2792941176470588, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0343, "step": 11870 }, { "epoch": 0.2793176470588235, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8491, "step": 11871 }, { "epoch": 0.27934117647058826, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0251, "step": 11872 }, { "epoch": 0.27936470588235296, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1722, "step": 11873 }, { "epoch": 0.27938823529411766, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8942, "step": 11874 }, { "epoch": 0.27941176470588236, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1248, "step": 11875 }, { "epoch": 0.27943529411764706, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1212, "step": 11876 }, { "epoch": 0.27945882352941176, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0468, "step": 11877 }, { "epoch": 0.27948235294117646, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.6832, "step": 11878 }, { "epoch": 0.27950588235294116, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9786, "step": 11879 }, { "epoch": 0.27952941176470586, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1181, "step": 11880 }, { "epoch": 0.2795529411764706, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1236, "step": 11881 }, { "epoch": 0.2795764705882353, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.3408, "step": 11882 }, { "epoch": 0.2796, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2737, "step": 11883 }, { "epoch": 0.2796235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1258, "step": 11884 }, { "epoch": 0.2796470588235294, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0967, "step": 11885 }, { "epoch": 0.2796705882352941, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.853, "step": 11886 }, { "epoch": 0.2796941176470588, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5148, "step": 11887 }, { "epoch": 0.2797176470588235, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1018, "step": 11888 }, { "epoch": 0.2797411764705882, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2525, "step": 11889 }, { "epoch": 0.2797647058823529, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0585, "step": 11890 }, { "epoch": 0.27978823529411767, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1915, "step": 11891 }, { "epoch": 0.27981176470588237, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1926, "step": 11892 }, { "epoch": 0.27983529411764707, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.8683, "step": 11893 }, { "epoch": 0.27985882352941177, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9376, "step": 11894 }, { "epoch": 0.27988235294117647, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.984, "step": 11895 }, { "epoch": 0.27990588235294117, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1118, "step": 11896 }, { "epoch": 0.27992941176470587, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9453, "step": 11897 }, { "epoch": 0.27995294117647057, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0867, "step": 11898 }, { "epoch": 0.27997647058823527, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.223, "step": 11899 }, { "epoch": 0.28, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1023, "step": 11900 }, { "epoch": 0.2800235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1389, "step": 11901 }, { "epoch": 0.2800470588235294, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.3451, "step": 11902 }, { "epoch": 0.2800705882352941, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9787, "step": 11903 }, { "epoch": 0.2800941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1785, "step": 11904 }, { "epoch": 0.2801176470588235, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0411, "step": 11905 }, { "epoch": 0.2801411764705882, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0385, "step": 11906 }, { "epoch": 0.2801647058823529, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0898, "step": 11907 }, { "epoch": 0.2801882352941176, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8951, "step": 11908 }, { "epoch": 0.2802117647058823, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.5363, "step": 11909 }, { "epoch": 0.2802352941176471, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1594, "step": 11910 }, { "epoch": 0.2802588235294118, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0256, "step": 11911 }, { "epoch": 0.2802823529411765, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1636, "step": 11912 }, { "epoch": 0.2803058823529412, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0363, "step": 11913 }, { "epoch": 0.2803294117647059, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2509, "step": 11914 }, { "epoch": 0.2803529411764706, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9525, "step": 11915 }, { "epoch": 0.2803764705882353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1566, "step": 11916 }, { "epoch": 0.2804, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8125, "step": 11917 }, { "epoch": 0.2804235294117647, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.7389, "step": 11918 }, { "epoch": 0.28044705882352944, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0574, "step": 11919 }, { "epoch": 0.28047058823529414, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8187, "step": 11920 }, { "epoch": 0.28049411764705884, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9149, "step": 11921 }, { "epoch": 0.28051764705882354, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1057, "step": 11922 }, { "epoch": 0.28054117647058824, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2352, "step": 11923 }, { "epoch": 0.28056470588235294, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9803, "step": 11924 }, { "epoch": 0.28058823529411764, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9178, "step": 11925 }, { "epoch": 0.28061176470588234, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0539, "step": 11926 }, { "epoch": 0.28063529411764704, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.916, "step": 11927 }, { "epoch": 0.28065882352941174, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8427, "step": 11928 }, { "epoch": 0.2806823529411765, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1607, "step": 11929 }, { "epoch": 0.2807058823529412, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0763, "step": 11930 }, { "epoch": 0.2807294117647059, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1135, "step": 11931 }, { "epoch": 0.2807529411764706, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9578, "step": 11932 }, { "epoch": 0.2807764705882353, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9113, "step": 11933 }, { "epoch": 0.2808, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.497, "step": 11934 }, { "epoch": 0.2808235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.7562, "step": 11935 }, { "epoch": 0.2808470588235294, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.6681, "step": 11936 }, { "epoch": 0.2808705882352941, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1694, "step": 11937 }, { "epoch": 0.28089411764705885, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0879, "step": 11938 }, { "epoch": 0.28091764705882355, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8378, "step": 11939 }, { "epoch": 0.28094117647058825, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1683, "step": 11940 }, { "epoch": 0.28096470588235295, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2407, "step": 11941 }, { "epoch": 0.28098823529411765, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.902, "step": 11942 }, { "epoch": 0.28101176470588235, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2735, "step": 11943 }, { "epoch": 0.28103529411764705, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0824, "step": 11944 }, { "epoch": 0.28105882352941175, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0779, "step": 11945 }, { "epoch": 0.28108235294117645, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2251, "step": 11946 }, { "epoch": 0.28110588235294115, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.147, "step": 11947 }, { "epoch": 0.2811294117647059, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1456, "step": 11948 }, { "epoch": 0.2811529411764706, "grad_norm": 0.2470703125, "learning_rate": 0.02, "loss": 0.936, "step": 11949 }, { "epoch": 0.2811764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9677, "step": 11950 }, { "epoch": 0.2812, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9541, "step": 11951 }, { "epoch": 0.2812235294117647, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.3846, "step": 11952 }, { "epoch": 0.2812470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2864, "step": 11953 }, { "epoch": 0.2812705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0389, "step": 11954 }, { "epoch": 0.2812941176470588, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2869, "step": 11955 }, { "epoch": 0.2813176470588235, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.4307, "step": 11956 }, { "epoch": 0.28134117647058826, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9605, "step": 11957 }, { "epoch": 0.28136470588235296, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0361, "step": 11958 }, { "epoch": 0.28138823529411766, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9206, "step": 11959 }, { "epoch": 0.28141176470588236, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.8328, "step": 11960 }, { "epoch": 0.28143529411764706, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0756, "step": 11961 }, { "epoch": 0.28145882352941176, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.6494, "step": 11962 }, { "epoch": 0.28148235294117646, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1651, "step": 11963 }, { "epoch": 0.28150588235294116, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1662, "step": 11964 }, { "epoch": 0.28152941176470586, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9722, "step": 11965 }, { "epoch": 0.28155294117647056, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.7802, "step": 11966 }, { "epoch": 0.2815764705882353, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8241, "step": 11967 }, { "epoch": 0.2816, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.8531, "step": 11968 }, { "epoch": 0.2816235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9964, "step": 11969 }, { "epoch": 0.2816470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0809, "step": 11970 }, { "epoch": 0.2816705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9895, "step": 11971 }, { "epoch": 0.2816941176470588, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.014, "step": 11972 }, { "epoch": 0.2817176470588235, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.004, "step": 11973 }, { "epoch": 0.2817411764705882, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2395, "step": 11974 }, { "epoch": 0.2817647058823529, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.8247, "step": 11975 }, { "epoch": 0.2817882352941177, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9587, "step": 11976 }, { "epoch": 0.2818117647058824, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9539, "step": 11977 }, { "epoch": 0.2818352941176471, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1304, "step": 11978 }, { "epoch": 0.2818588235294118, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.6783, "step": 11979 }, { "epoch": 0.2818823529411765, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.284, "step": 11980 }, { "epoch": 0.2819058823529412, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9184, "step": 11981 }, { "epoch": 0.2819294117647059, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1057, "step": 11982 }, { "epoch": 0.2819529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.7952, "step": 11983 }, { "epoch": 0.2819764705882353, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 1.0132, "step": 11984 }, { "epoch": 0.282, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.5107, "step": 11985 }, { "epoch": 0.28202352941176473, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1549, "step": 11986 }, { "epoch": 0.28204705882352943, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1934, "step": 11987 }, { "epoch": 0.28207058823529413, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.9363, "step": 11988 }, { "epoch": 0.28209411764705883, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.06, "step": 11989 }, { "epoch": 0.28211764705882353, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2003, "step": 11990 }, { "epoch": 0.28214117647058823, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0317, "step": 11991 }, { "epoch": 0.28216470588235293, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8635, "step": 11992 }, { "epoch": 0.28218823529411763, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0304, "step": 11993 }, { "epoch": 0.28221176470588233, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.143, "step": 11994 }, { "epoch": 0.2822352941176471, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9634, "step": 11995 }, { "epoch": 0.2822588235294118, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9196, "step": 11996 }, { "epoch": 0.2822823529411765, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9848, "step": 11997 }, { "epoch": 0.2823058823529412, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2279, "step": 11998 }, { "epoch": 0.2823294117647059, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.014, "step": 11999 }, { "epoch": 0.2823529411764706, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1547, "step": 12000 }, { "epoch": 0.2823529411764706, "eval_loss": 2.211031675338745, "eval_runtime": 684.4193, "eval_samples_per_second": 12.419, "eval_steps_per_second": 3.105, "step": 12000 }, { "epoch": 0.2823764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.688, "step": 12001 }, { "epoch": 0.2824, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0545, "step": 12002 }, { "epoch": 0.2824235294117647, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.6519, "step": 12003 }, { "epoch": 0.2824470588235294, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.033, "step": 12004 }, { "epoch": 0.28247058823529414, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2179, "step": 12005 }, { "epoch": 0.28249411764705884, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.8302, "step": 12006 }, { "epoch": 0.28251764705882354, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0665, "step": 12007 }, { "epoch": 0.28254117647058824, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8198, "step": 12008 }, { "epoch": 0.28256470588235294, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9093, "step": 12009 }, { "epoch": 0.28258823529411764, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0075, "step": 12010 }, { "epoch": 0.28261176470588234, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1493, "step": 12011 }, { "epoch": 0.28263529411764704, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9909, "step": 12012 }, { "epoch": 0.28265882352941174, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0264, "step": 12013 }, { "epoch": 0.2826823529411765, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0935, "step": 12014 }, { "epoch": 0.2827058823529412, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.8191, "step": 12015 }, { "epoch": 0.2827294117647059, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0133, "step": 12016 }, { "epoch": 0.2827529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.01, "step": 12017 }, { "epoch": 0.2827764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1794, "step": 12018 }, { "epoch": 0.2828, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1812, "step": 12019 }, { "epoch": 0.2828235294117647, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9427, "step": 12020 }, { "epoch": 0.2828470588235294, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1919, "step": 12021 }, { "epoch": 0.2828705882352941, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.9036, "step": 12022 }, { "epoch": 0.2828941176470588, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2165, "step": 12023 }, { "epoch": 0.28291764705882355, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.7866, "step": 12024 }, { "epoch": 0.28294117647058825, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1939, "step": 12025 }, { "epoch": 0.28296470588235295, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.3418, "step": 12026 }, { "epoch": 0.28298823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9618, "step": 12027 }, { "epoch": 0.28301176470588235, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.3104, "step": 12028 }, { "epoch": 0.28303529411764705, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1104, "step": 12029 }, { "epoch": 0.28305882352941175, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2939, "step": 12030 }, { "epoch": 0.28308235294117645, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.3414, "step": 12031 }, { "epoch": 0.28310588235294115, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8649, "step": 12032 }, { "epoch": 0.2831294117647059, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0274, "step": 12033 }, { "epoch": 0.2831529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.191, "step": 12034 }, { "epoch": 0.2831764705882353, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1682, "step": 12035 }, { "epoch": 0.2832, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8304, "step": 12036 }, { "epoch": 0.2832235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.933, "step": 12037 }, { "epoch": 0.2832470588235294, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1144, "step": 12038 }, { "epoch": 0.2832705882352941, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8563, "step": 12039 }, { "epoch": 0.2832941176470588, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9821, "step": 12040 }, { "epoch": 0.2833176470588235, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9275, "step": 12041 }, { "epoch": 0.28334117647058826, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.8675, "step": 12042 }, { "epoch": 0.28336470588235296, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9205, "step": 12043 }, { "epoch": 0.28338823529411766, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.3119, "step": 12044 }, { "epoch": 0.28341176470588236, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1735, "step": 12045 }, { "epoch": 0.28343529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.2037, "step": 12046 }, { "epoch": 0.28345882352941176, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9997, "step": 12047 }, { "epoch": 0.28348235294117646, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8932, "step": 12048 }, { "epoch": 0.28350588235294116, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.6335, "step": 12049 }, { "epoch": 0.28352941176470586, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1748, "step": 12050 }, { "epoch": 0.28355294117647056, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1489, "step": 12051 }, { "epoch": 0.2835764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2678, "step": 12052 }, { "epoch": 0.2836, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.8731, "step": 12053 }, { "epoch": 0.2836235294117647, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.7146, "step": 12054 }, { "epoch": 0.2836470588235294, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8602, "step": 12055 }, { "epoch": 0.2836705882352941, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.7953, "step": 12056 }, { "epoch": 0.2836941176470588, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.9727, "step": 12057 }, { "epoch": 0.2837176470588235, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9712, "step": 12058 }, { "epoch": 0.2837411764705882, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.9097, "step": 12059 }, { "epoch": 0.2837647058823529, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0435, "step": 12060 }, { "epoch": 0.2837882352941177, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2216, "step": 12061 }, { "epoch": 0.2838117647058824, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0624, "step": 12062 }, { "epoch": 0.2838352941176471, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1343, "step": 12063 }, { "epoch": 0.2838588235294118, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8375, "step": 12064 }, { "epoch": 0.2838823529411765, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.733, "step": 12065 }, { "epoch": 0.2839058823529412, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.8805, "step": 12066 }, { "epoch": 0.2839294117647059, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1933, "step": 12067 }, { "epoch": 0.2839529411764706, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2418, "step": 12068 }, { "epoch": 0.2839764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0653, "step": 12069 }, { "epoch": 0.284, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.068, "step": 12070 }, { "epoch": 0.28402352941176473, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9846, "step": 12071 }, { "epoch": 0.28404705882352943, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.908, "step": 12072 }, { "epoch": 0.28407058823529413, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.3292, "step": 12073 }, { "epoch": 0.28409411764705883, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0613, "step": 12074 }, { "epoch": 0.28411764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0924, "step": 12075 }, { "epoch": 0.28414117647058823, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.188, "step": 12076 }, { "epoch": 0.28416470588235293, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0095, "step": 12077 }, { "epoch": 0.28418823529411763, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2856, "step": 12078 }, { "epoch": 0.28421176470588233, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.6578, "step": 12079 }, { "epoch": 0.2842352941176471, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9046, "step": 12080 }, { "epoch": 0.2842588235294118, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9117, "step": 12081 }, { "epoch": 0.2842823529411765, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9802, "step": 12082 }, { "epoch": 0.2843058823529412, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8819, "step": 12083 }, { "epoch": 0.2843294117647059, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.2141, "step": 12084 }, { "epoch": 0.2843529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9859, "step": 12085 }, { "epoch": 0.2843764705882353, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.3251, "step": 12086 }, { "epoch": 0.2844, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1208, "step": 12087 }, { "epoch": 0.2844235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.7107, "step": 12088 }, { "epoch": 0.2844470588235294, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1872, "step": 12089 }, { "epoch": 0.28447058823529414, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7772, "step": 12090 }, { "epoch": 0.28449411764705884, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9533, "step": 12091 }, { "epoch": 0.28451764705882354, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.7817, "step": 12092 }, { "epoch": 0.28454117647058824, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.995, "step": 12093 }, { "epoch": 0.28456470588235294, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0792, "step": 12094 }, { "epoch": 0.28458823529411764, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1191, "step": 12095 }, { "epoch": 0.28461176470588234, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.903, "step": 12096 }, { "epoch": 0.28463529411764704, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2053, "step": 12097 }, { "epoch": 0.28465882352941174, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1118, "step": 12098 }, { "epoch": 0.2846823529411765, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1691, "step": 12099 }, { "epoch": 0.2847058823529412, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.7997, "step": 12100 }, { "epoch": 0.2847294117647059, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.058, "step": 12101 }, { "epoch": 0.2847529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9522, "step": 12102 }, { "epoch": 0.2847764705882353, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9314, "step": 12103 }, { "epoch": 0.2848, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0643, "step": 12104 }, { "epoch": 0.2848235294117647, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8493, "step": 12105 }, { "epoch": 0.2848470588235294, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.051, "step": 12106 }, { "epoch": 0.2848705882352941, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0539, "step": 12107 }, { "epoch": 0.2848941176470588, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1448, "step": 12108 }, { "epoch": 0.28491764705882355, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2064, "step": 12109 }, { "epoch": 0.28494117647058825, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8822, "step": 12110 }, { "epoch": 0.28496470588235295, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9775, "step": 12111 }, { "epoch": 0.28498823529411765, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1464, "step": 12112 }, { "epoch": 0.28501176470588235, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0064, "step": 12113 }, { "epoch": 0.28503529411764705, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.7958, "step": 12114 }, { "epoch": 0.28505882352941175, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.355, "step": 12115 }, { "epoch": 0.28508235294117645, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.8607, "step": 12116 }, { "epoch": 0.28510588235294115, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2417, "step": 12117 }, { "epoch": 0.2851294117647059, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9265, "step": 12118 }, { "epoch": 0.2851529411764706, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0574, "step": 12119 }, { "epoch": 0.2851764705882353, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0734, "step": 12120 }, { "epoch": 0.2852, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.3955, "step": 12121 }, { "epoch": 0.2852235294117647, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0608, "step": 12122 }, { "epoch": 0.2852470588235294, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2022, "step": 12123 }, { "epoch": 0.2852705882352941, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1659, "step": 12124 }, { "epoch": 0.2852941176470588, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.7589, "step": 12125 }, { "epoch": 0.2853176470588235, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0835, "step": 12126 }, { "epoch": 0.2853411764705882, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0465, "step": 12127 }, { "epoch": 0.28536470588235296, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.0373, "step": 12128 }, { "epoch": 0.28538823529411766, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.9698, "step": 12129 }, { "epoch": 0.28541176470588236, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.2395, "step": 12130 }, { "epoch": 0.28543529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1019, "step": 12131 }, { "epoch": 0.28545882352941176, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.389, "step": 12132 }, { "epoch": 0.28548235294117646, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1126, "step": 12133 }, { "epoch": 0.28550588235294116, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0944, "step": 12134 }, { "epoch": 0.28552941176470586, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.015, "step": 12135 }, { "epoch": 0.28555294117647056, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0905, "step": 12136 }, { "epoch": 0.2855764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0576, "step": 12137 }, { "epoch": 0.2856, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.275, "step": 12138 }, { "epoch": 0.2856235294117647, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.1506, "step": 12139 }, { "epoch": 0.2856470588235294, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0756, "step": 12140 }, { "epoch": 0.2856705882352941, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1196, "step": 12141 }, { "epoch": 0.2856941176470588, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0661, "step": 12142 }, { "epoch": 0.2857176470588235, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9359, "step": 12143 }, { "epoch": 0.2857411764705882, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9272, "step": 12144 }, { "epoch": 0.2857647058823529, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9719, "step": 12145 }, { "epoch": 0.2857882352941176, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1084, "step": 12146 }, { "epoch": 0.2858117647058824, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9776, "step": 12147 }, { "epoch": 0.2858352941176471, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9273, "step": 12148 }, { "epoch": 0.2858588235294118, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2901, "step": 12149 }, { "epoch": 0.2858823529411765, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0413, "step": 12150 }, { "epoch": 0.2859058823529412, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2328, "step": 12151 }, { "epoch": 0.2859294117647059, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1718, "step": 12152 }, { "epoch": 0.2859529411764706, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2999, "step": 12153 }, { "epoch": 0.2859764705882353, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0591, "step": 12154 }, { "epoch": 0.286, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1034, "step": 12155 }, { "epoch": 0.28602352941176473, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8244, "step": 12156 }, { "epoch": 0.28604705882352943, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.6039, "step": 12157 }, { "epoch": 0.28607058823529413, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9288, "step": 12158 }, { "epoch": 0.28609411764705883, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.3678, "step": 12159 }, { "epoch": 0.28611764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0314, "step": 12160 }, { "epoch": 0.28614117647058823, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1125, "step": 12161 }, { "epoch": 0.28616470588235293, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9646, "step": 12162 }, { "epoch": 0.28618823529411763, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.8237, "step": 12163 }, { "epoch": 0.28621176470588233, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1966, "step": 12164 }, { "epoch": 0.28623529411764703, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0078, "step": 12165 }, { "epoch": 0.2862588235294118, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2065, "step": 12166 }, { "epoch": 0.2862823529411765, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0663, "step": 12167 }, { "epoch": 0.2863058823529412, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1776, "step": 12168 }, { "epoch": 0.2863294117647059, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9783, "step": 12169 }, { "epoch": 0.2863529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0896, "step": 12170 }, { "epoch": 0.2863764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9075, "step": 12171 }, { "epoch": 0.2864, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2332, "step": 12172 }, { "epoch": 0.2864235294117647, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.1209, "step": 12173 }, { "epoch": 0.2864470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1765, "step": 12174 }, { "epoch": 0.28647058823529414, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.6665, "step": 12175 }, { "epoch": 0.28649411764705884, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.2078, "step": 12176 }, { "epoch": 0.28651764705882354, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1556, "step": 12177 }, { "epoch": 0.28654117647058824, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9443, "step": 12178 }, { "epoch": 0.28656470588235294, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9209, "step": 12179 }, { "epoch": 0.28658823529411764, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.3804, "step": 12180 }, { "epoch": 0.28661176470588234, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8314, "step": 12181 }, { "epoch": 0.28663529411764704, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8995, "step": 12182 }, { "epoch": 0.28665882352941174, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1153, "step": 12183 }, { "epoch": 0.28668235294117644, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2098, "step": 12184 }, { "epoch": 0.2867058823529412, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1679, "step": 12185 }, { "epoch": 0.2867294117647059, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.8668, "step": 12186 }, { "epoch": 0.2867529411764706, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1899, "step": 12187 }, { "epoch": 0.2867764705882353, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9385, "step": 12188 }, { "epoch": 0.2868, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.9514, "step": 12189 }, { "epoch": 0.2868235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.7637, "step": 12190 }, { "epoch": 0.2868470588235294, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1752, "step": 12191 }, { "epoch": 0.2868705882352941, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.2911, "step": 12192 }, { "epoch": 0.2868941176470588, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0863, "step": 12193 }, { "epoch": 0.28691764705882356, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1438, "step": 12194 }, { "epoch": 0.28694117647058826, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.6906, "step": 12195 }, { "epoch": 0.28696470588235296, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.6674, "step": 12196 }, { "epoch": 0.28698823529411766, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.7751, "step": 12197 }, { "epoch": 0.28701176470588236, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1305, "step": 12198 }, { "epoch": 0.28703529411764706, "grad_norm": 0.2451171875, "learning_rate": 0.02, "loss": 0.7515, "step": 12199 }, { "epoch": 0.28705882352941176, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9199, "step": 12200 }, { "epoch": 0.28708235294117646, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.9068, "step": 12201 }, { "epoch": 0.28710588235294116, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0076, "step": 12202 }, { "epoch": 0.28712941176470586, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2412, "step": 12203 }, { "epoch": 0.2871529411764706, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.5998, "step": 12204 }, { "epoch": 0.2871764705882353, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8519, "step": 12205 }, { "epoch": 0.2872, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9379, "step": 12206 }, { "epoch": 0.2872235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0573, "step": 12207 }, { "epoch": 0.2872470588235294, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0903, "step": 12208 }, { "epoch": 0.2872705882352941, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.102, "step": 12209 }, { "epoch": 0.2872941176470588, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9912, "step": 12210 }, { "epoch": 0.2873176470588235, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2587, "step": 12211 }, { "epoch": 0.2873411764705882, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0103, "step": 12212 }, { "epoch": 0.28736470588235297, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0511, "step": 12213 }, { "epoch": 0.28738823529411767, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9317, "step": 12214 }, { "epoch": 0.28741176470588237, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9969, "step": 12215 }, { "epoch": 0.28743529411764707, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9876, "step": 12216 }, { "epoch": 0.28745882352941177, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0313, "step": 12217 }, { "epoch": 0.28748235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9555, "step": 12218 }, { "epoch": 0.28750588235294117, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0354, "step": 12219 }, { "epoch": 0.28752941176470587, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9794, "step": 12220 }, { "epoch": 0.28755294117647057, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0376, "step": 12221 }, { "epoch": 0.28757647058823527, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9967, "step": 12222 }, { "epoch": 0.2876, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8502, "step": 12223 }, { "epoch": 0.2876235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1719, "step": 12224 }, { "epoch": 0.2876470588235294, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1521, "step": 12225 }, { "epoch": 0.2876705882352941, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9712, "step": 12226 }, { "epoch": 0.2876941176470588, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9765, "step": 12227 }, { "epoch": 0.2877176470588235, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0709, "step": 12228 }, { "epoch": 0.2877411764705882, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0047, "step": 12229 }, { "epoch": 0.2877647058823529, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1227, "step": 12230 }, { "epoch": 0.2877882352941176, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.7815, "step": 12231 }, { "epoch": 0.2878117647058824, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9356, "step": 12232 }, { "epoch": 0.2878352941176471, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.202, "step": 12233 }, { "epoch": 0.2878588235294118, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.052, "step": 12234 }, { "epoch": 0.2878823529411765, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1946, "step": 12235 }, { "epoch": 0.2879058823529412, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2284, "step": 12236 }, { "epoch": 0.2879294117647059, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2789, "step": 12237 }, { "epoch": 0.2879529411764706, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0884, "step": 12238 }, { "epoch": 0.2879764705882353, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.007, "step": 12239 }, { "epoch": 0.288, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8075, "step": 12240 }, { "epoch": 0.2880235294117647, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.144, "step": 12241 }, { "epoch": 0.28804705882352943, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2526, "step": 12242 }, { "epoch": 0.28807058823529413, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0968, "step": 12243 }, { "epoch": 0.28809411764705883, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1899, "step": 12244 }, { "epoch": 0.28811764705882353, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8568, "step": 12245 }, { "epoch": 0.28814117647058823, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0498, "step": 12246 }, { "epoch": 0.28816470588235293, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7501, "step": 12247 }, { "epoch": 0.28818823529411763, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0726, "step": 12248 }, { "epoch": 0.28821176470588233, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2196, "step": 12249 }, { "epoch": 0.28823529411764703, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0276, "step": 12250 }, { "epoch": 0.2882588235294118, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9507, "step": 12251 }, { "epoch": 0.2882823529411765, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2826, "step": 12252 }, { "epoch": 0.2883058823529412, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0263, "step": 12253 }, { "epoch": 0.2883294117647059, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1363, "step": 12254 }, { "epoch": 0.2883529411764706, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.411, "step": 12255 }, { "epoch": 0.2883764705882353, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.385, "step": 12256 }, { "epoch": 0.2884, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1202, "step": 12257 }, { "epoch": 0.2884235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9302, "step": 12258 }, { "epoch": 0.2884470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0134, "step": 12259 }, { "epoch": 0.28847058823529415, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2365, "step": 12260 }, { "epoch": 0.28849411764705885, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.3643, "step": 12261 }, { "epoch": 0.28851764705882355, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0149, "step": 12262 }, { "epoch": 0.28854117647058825, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.4163, "step": 12263 }, { "epoch": 0.28856470588235295, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9031, "step": 12264 }, { "epoch": 0.28858823529411765, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.182, "step": 12265 }, { "epoch": 0.28861176470588235, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8118, "step": 12266 }, { "epoch": 0.28863529411764705, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9363, "step": 12267 }, { "epoch": 0.28865882352941175, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0042, "step": 12268 }, { "epoch": 0.28868235294117645, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1282, "step": 12269 }, { "epoch": 0.2887058823529412, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0894, "step": 12270 }, { "epoch": 0.2887294117647059, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9841, "step": 12271 }, { "epoch": 0.2887529411764706, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1587, "step": 12272 }, { "epoch": 0.2887764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.9805, "step": 12273 }, { "epoch": 0.2888, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9588, "step": 12274 }, { "epoch": 0.2888235294117647, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.7407, "step": 12275 }, { "epoch": 0.2888470588235294, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0759, "step": 12276 }, { "epoch": 0.2888705882352941, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1665, "step": 12277 }, { "epoch": 0.2888941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1521, "step": 12278 }, { "epoch": 0.28891764705882356, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8025, "step": 12279 }, { "epoch": 0.28894117647058826, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0983, "step": 12280 }, { "epoch": 0.28896470588235296, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.8017, "step": 12281 }, { "epoch": 0.28898823529411766, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.8168, "step": 12282 }, { "epoch": 0.28901176470588236, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1679, "step": 12283 }, { "epoch": 0.28903529411764706, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9229, "step": 12284 }, { "epoch": 0.28905882352941176, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.1193, "step": 12285 }, { "epoch": 0.28908235294117646, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0152, "step": 12286 }, { "epoch": 0.28910588235294116, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8738, "step": 12287 }, { "epoch": 0.28912941176470586, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2589, "step": 12288 }, { "epoch": 0.2891529411764706, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1646, "step": 12289 }, { "epoch": 0.2891764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1247, "step": 12290 }, { "epoch": 0.2892, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.751, "step": 12291 }, { "epoch": 0.2892235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8917, "step": 12292 }, { "epoch": 0.2892470588235294, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8573, "step": 12293 }, { "epoch": 0.2892705882352941, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.6787, "step": 12294 }, { "epoch": 0.2892941176470588, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.723, "step": 12295 }, { "epoch": 0.2893176470588235, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2165, "step": 12296 }, { "epoch": 0.2893411764705882, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0652, "step": 12297 }, { "epoch": 0.28936470588235297, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1854, "step": 12298 }, { "epoch": 0.28938823529411767, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9277, "step": 12299 }, { "epoch": 0.28941176470588237, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9749, "step": 12300 }, { "epoch": 0.28943529411764707, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2843, "step": 12301 }, { "epoch": 0.28945882352941177, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0785, "step": 12302 }, { "epoch": 0.28948235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0964, "step": 12303 }, { "epoch": 0.28950588235294117, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1199, "step": 12304 }, { "epoch": 0.28952941176470587, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.1202, "step": 12305 }, { "epoch": 0.28955294117647057, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8188, "step": 12306 }, { "epoch": 0.28957647058823527, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1758, "step": 12307 }, { "epoch": 0.2896, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0286, "step": 12308 }, { "epoch": 0.2896235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2065, "step": 12309 }, { "epoch": 0.2896470588235294, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9928, "step": 12310 }, { "epoch": 0.2896705882352941, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8696, "step": 12311 }, { "epoch": 0.2896941176470588, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1406, "step": 12312 }, { "epoch": 0.2897176470588235, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.7713, "step": 12313 }, { "epoch": 0.2897411764705882, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1815, "step": 12314 }, { "epoch": 0.2897647058823529, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0205, "step": 12315 }, { "epoch": 0.2897882352941176, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.5819, "step": 12316 }, { "epoch": 0.2898117647058824, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1405, "step": 12317 }, { "epoch": 0.2898352941176471, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0634, "step": 12318 }, { "epoch": 0.2898588235294118, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2529, "step": 12319 }, { "epoch": 0.2898823529411765, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3229, "step": 12320 }, { "epoch": 0.2899058823529412, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2732, "step": 12321 }, { "epoch": 0.2899294117647059, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.3675, "step": 12322 }, { "epoch": 0.2899529411764706, "grad_norm": 0.4140625, "learning_rate": 0.02, "loss": 0.8223, "step": 12323 }, { "epoch": 0.2899764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8112, "step": 12324 }, { "epoch": 0.29, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0177, "step": 12325 }, { "epoch": 0.2900235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9814, "step": 12326 }, { "epoch": 0.29004705882352944, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2121, "step": 12327 }, { "epoch": 0.29007058823529414, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.3721, "step": 12328 }, { "epoch": 0.29009411764705884, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9812, "step": 12329 }, { "epoch": 0.29011764705882354, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.6315, "step": 12330 }, { "epoch": 0.29014117647058824, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2983, "step": 12331 }, { "epoch": 0.29016470588235294, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.3071, "step": 12332 }, { "epoch": 0.29018823529411764, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0653, "step": 12333 }, { "epoch": 0.29021176470588234, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1906, "step": 12334 }, { "epoch": 0.29023529411764704, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9242, "step": 12335 }, { "epoch": 0.2902588235294118, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1759, "step": 12336 }, { "epoch": 0.2902823529411765, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.019, "step": 12337 }, { "epoch": 0.2903058823529412, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1033, "step": 12338 }, { "epoch": 0.2903294117647059, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8698, "step": 12339 }, { "epoch": 0.2903529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.2124, "step": 12340 }, { "epoch": 0.2903764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9436, "step": 12341 }, { "epoch": 0.2904, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0893, "step": 12342 }, { "epoch": 0.2904235294117647, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2293, "step": 12343 }, { "epoch": 0.2904470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9785, "step": 12344 }, { "epoch": 0.2904705882352941, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9542, "step": 12345 }, { "epoch": 0.29049411764705885, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2059, "step": 12346 }, { "epoch": 0.29051764705882355, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9891, "step": 12347 }, { "epoch": 0.29054117647058825, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0397, "step": 12348 }, { "epoch": 0.29056470588235295, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9964, "step": 12349 }, { "epoch": 0.29058823529411765, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8912, "step": 12350 }, { "epoch": 0.29061176470588235, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8639, "step": 12351 }, { "epoch": 0.29063529411764705, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9681, "step": 12352 }, { "epoch": 0.29065882352941175, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0172, "step": 12353 }, { "epoch": 0.29068235294117645, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2143, "step": 12354 }, { "epoch": 0.2907058823529412, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.914, "step": 12355 }, { "epoch": 0.2907294117647059, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.7516, "step": 12356 }, { "epoch": 0.2907529411764706, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2038, "step": 12357 }, { "epoch": 0.2907764705882353, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1837, "step": 12358 }, { "epoch": 0.2908, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.6541, "step": 12359 }, { "epoch": 0.2908235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.7577, "step": 12360 }, { "epoch": 0.2908470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0442, "step": 12361 }, { "epoch": 0.2908705882352941, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9287, "step": 12362 }, { "epoch": 0.2908941176470588, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9192, "step": 12363 }, { "epoch": 0.2909176470588235, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9625, "step": 12364 }, { "epoch": 0.29094117647058826, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0295, "step": 12365 }, { "epoch": 0.29096470588235296, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0543, "step": 12366 }, { "epoch": 0.29098823529411766, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.9326, "step": 12367 }, { "epoch": 0.29101176470588236, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0862, "step": 12368 }, { "epoch": 0.29103529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.7681, "step": 12369 }, { "epoch": 0.29105882352941176, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2069, "step": 12370 }, { "epoch": 0.29108235294117646, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1731, "step": 12371 }, { "epoch": 0.29110588235294116, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2388, "step": 12372 }, { "epoch": 0.29112941176470586, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.106, "step": 12373 }, { "epoch": 0.2911529411764706, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.736, "step": 12374 }, { "epoch": 0.2911764705882353, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.8983, "step": 12375 }, { "epoch": 0.2912, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.945, "step": 12376 }, { "epoch": 0.2912235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8128, "step": 12377 }, { "epoch": 0.2912470588235294, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.3458, "step": 12378 }, { "epoch": 0.2912705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.8808, "step": 12379 }, { "epoch": 0.2912941176470588, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8283, "step": 12380 }, { "epoch": 0.2913176470588235, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9891, "step": 12381 }, { "epoch": 0.2913411764705882, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.6652, "step": 12382 }, { "epoch": 0.2913647058823529, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0809, "step": 12383 }, { "epoch": 0.29138823529411767, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2213, "step": 12384 }, { "epoch": 0.29141176470588237, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0493, "step": 12385 }, { "epoch": 0.29143529411764707, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.184, "step": 12386 }, { "epoch": 0.29145882352941177, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2059, "step": 12387 }, { "epoch": 0.29148235294117647, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8253, "step": 12388 }, { "epoch": 0.29150588235294117, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1115, "step": 12389 }, { "epoch": 0.29152941176470587, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.7953, "step": 12390 }, { "epoch": 0.29155294117647057, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1282, "step": 12391 }, { "epoch": 0.29157647058823527, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1222, "step": 12392 }, { "epoch": 0.2916, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2732, "step": 12393 }, { "epoch": 0.2916235294117647, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.7489, "step": 12394 }, { "epoch": 0.2916470588235294, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.3503, "step": 12395 }, { "epoch": 0.2916705882352941, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0436, "step": 12396 }, { "epoch": 0.2916941176470588, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8266, "step": 12397 }, { "epoch": 0.2917176470588235, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0621, "step": 12398 }, { "epoch": 0.2917411764705882, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9719, "step": 12399 }, { "epoch": 0.2917647058823529, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2353, "step": 12400 }, { "epoch": 0.2917882352941176, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0574, "step": 12401 }, { "epoch": 0.2918117647058823, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9867, "step": 12402 }, { "epoch": 0.2918352941176471, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.027, "step": 12403 }, { "epoch": 0.2918588235294118, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.225, "step": 12404 }, { "epoch": 0.2918823529411765, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.024, "step": 12405 }, { "epoch": 0.2919058823529412, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.036, "step": 12406 }, { "epoch": 0.2919294117647059, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9963, "step": 12407 }, { "epoch": 0.2919529411764706, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9053, "step": 12408 }, { "epoch": 0.2919764705882353, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.219, "step": 12409 }, { "epoch": 0.292, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1839, "step": 12410 }, { "epoch": 0.2920235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9203, "step": 12411 }, { "epoch": 0.29204705882352944, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0957, "step": 12412 }, { "epoch": 0.29207058823529414, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.3409, "step": 12413 }, { "epoch": 0.29209411764705884, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2408, "step": 12414 }, { "epoch": 0.29211764705882354, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1802, "step": 12415 }, { "epoch": 0.29214117647058824, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9867, "step": 12416 }, { "epoch": 0.29216470588235294, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.7679, "step": 12417 }, { "epoch": 0.29218823529411764, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2676, "step": 12418 }, { "epoch": 0.29221176470588234, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1066, "step": 12419 }, { "epoch": 0.29223529411764704, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.6794, "step": 12420 }, { "epoch": 0.29225882352941174, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1771, "step": 12421 }, { "epoch": 0.2922823529411765, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1185, "step": 12422 }, { "epoch": 0.2923058823529412, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0672, "step": 12423 }, { "epoch": 0.2923294117647059, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0308, "step": 12424 }, { "epoch": 0.2923529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2716, "step": 12425 }, { "epoch": 0.2923764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1744, "step": 12426 }, { "epoch": 0.2924, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2326, "step": 12427 }, { "epoch": 0.2924235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.893, "step": 12428 }, { "epoch": 0.2924470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0107, "step": 12429 }, { "epoch": 0.2924705882352941, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3133, "step": 12430 }, { "epoch": 0.29249411764705885, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9277, "step": 12431 }, { "epoch": 0.29251764705882355, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9915, "step": 12432 }, { "epoch": 0.29254117647058825, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.125, "step": 12433 }, { "epoch": 0.29256470588235295, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2128, "step": 12434 }, { "epoch": 0.29258823529411765, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9999, "step": 12435 }, { "epoch": 0.29261176470588235, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.7606, "step": 12436 }, { "epoch": 0.29263529411764705, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.1841, "step": 12437 }, { "epoch": 0.29265882352941175, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.8709, "step": 12438 }, { "epoch": 0.29268235294117645, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.215, "step": 12439 }, { "epoch": 0.29270588235294115, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.276, "step": 12440 }, { "epoch": 0.2927294117647059, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.8173, "step": 12441 }, { "epoch": 0.2927529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9886, "step": 12442 }, { "epoch": 0.2927764705882353, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1619, "step": 12443 }, { "epoch": 0.2928, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0763, "step": 12444 }, { "epoch": 0.2928235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2196, "step": 12445 }, { "epoch": 0.2928470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0104, "step": 12446 }, { "epoch": 0.2928705882352941, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1378, "step": 12447 }, { "epoch": 0.2928941176470588, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.8561, "step": 12448 }, { "epoch": 0.2929176470588235, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9845, "step": 12449 }, { "epoch": 0.29294117647058826, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.6845, "step": 12450 }, { "epoch": 0.29296470588235296, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.2529, "step": 12451 }, { "epoch": 0.29298823529411766, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2175, "step": 12452 }, { "epoch": 0.29301176470588236, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9212, "step": 12453 }, { "epoch": 0.29303529411764706, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9746, "step": 12454 }, { "epoch": 0.29305882352941176, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.8787, "step": 12455 }, { "epoch": 0.29308235294117646, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0095, "step": 12456 }, { "epoch": 0.29310588235294116, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0957, "step": 12457 }, { "epoch": 0.29312941176470586, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9589, "step": 12458 }, { "epoch": 0.29315294117647056, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.114, "step": 12459 }, { "epoch": 0.2931764705882353, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9325, "step": 12460 }, { "epoch": 0.2932, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0886, "step": 12461 }, { "epoch": 0.2932235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9897, "step": 12462 }, { "epoch": 0.2932470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0298, "step": 12463 }, { "epoch": 0.2932705882352941, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9277, "step": 12464 }, { "epoch": 0.2932941176470588, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9677, "step": 12465 }, { "epoch": 0.2933176470588235, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8544, "step": 12466 }, { "epoch": 0.2933411764705882, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.3074, "step": 12467 }, { "epoch": 0.2933647058823529, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9203, "step": 12468 }, { "epoch": 0.29338823529411767, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.3309, "step": 12469 }, { "epoch": 0.29341176470588237, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9765, "step": 12470 }, { "epoch": 0.29343529411764707, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0373, "step": 12471 }, { "epoch": 0.29345882352941177, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0057, "step": 12472 }, { "epoch": 0.29348235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.908, "step": 12473 }, { "epoch": 0.29350588235294117, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8793, "step": 12474 }, { "epoch": 0.29352941176470587, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0447, "step": 12475 }, { "epoch": 0.29355294117647057, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.7194, "step": 12476 }, { "epoch": 0.29357647058823527, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0002, "step": 12477 }, { "epoch": 0.2936, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.7503, "step": 12478 }, { "epoch": 0.2936235294117647, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9589, "step": 12479 }, { "epoch": 0.2936470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.17, "step": 12480 }, { "epoch": 0.2936705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0197, "step": 12481 }, { "epoch": 0.2936941176470588, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0089, "step": 12482 }, { "epoch": 0.2937176470588235, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1703, "step": 12483 }, { "epoch": 0.29374117647058823, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.8949, "step": 12484 }, { "epoch": 0.29376470588235293, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2632, "step": 12485 }, { "epoch": 0.29378823529411763, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8828, "step": 12486 }, { "epoch": 0.29381176470588233, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0163, "step": 12487 }, { "epoch": 0.2938352941176471, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0153, "step": 12488 }, { "epoch": 0.2938588235294118, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.3392, "step": 12489 }, { "epoch": 0.2938823529411765, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.3271, "step": 12490 }, { "epoch": 0.2939058823529412, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1211, "step": 12491 }, { "epoch": 0.2939294117647059, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1735, "step": 12492 }, { "epoch": 0.2939529411764706, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0934, "step": 12493 }, { "epoch": 0.2939764705882353, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.8701, "step": 12494 }, { "epoch": 0.294, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8031, "step": 12495 }, { "epoch": 0.2940235294117647, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2926, "step": 12496 }, { "epoch": 0.29404705882352944, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.7837, "step": 12497 }, { "epoch": 0.29407058823529414, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1248, "step": 12498 }, { "epoch": 0.29409411764705884, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.782, "step": 12499 }, { "epoch": 0.29411764705882354, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1619, "step": 12500 }, { "epoch": 0.29414117647058824, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2076, "step": 12501 }, { "epoch": 0.29416470588235294, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1421, "step": 12502 }, { "epoch": 0.29418823529411764, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0451, "step": 12503 }, { "epoch": 0.29421176470588234, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2051, "step": 12504 }, { "epoch": 0.29423529411764704, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.7692, "step": 12505 }, { "epoch": 0.29425882352941174, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0594, "step": 12506 }, { "epoch": 0.2942823529411765, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8525, "step": 12507 }, { "epoch": 0.2943058823529412, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1413, "step": 12508 }, { "epoch": 0.2943294117647059, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9015, "step": 12509 }, { "epoch": 0.2943529411764706, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.8604, "step": 12510 }, { "epoch": 0.2943764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.927, "step": 12511 }, { "epoch": 0.2944, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9927, "step": 12512 }, { "epoch": 0.2944235294117647, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1338, "step": 12513 }, { "epoch": 0.2944470588235294, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.3205, "step": 12514 }, { "epoch": 0.2944705882352941, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1035, "step": 12515 }, { "epoch": 0.29449411764705885, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8411, "step": 12516 }, { "epoch": 0.29451764705882355, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2409, "step": 12517 }, { "epoch": 0.29454117647058825, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1613, "step": 12518 }, { "epoch": 0.29456470588235295, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9361, "step": 12519 }, { "epoch": 0.29458823529411765, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1056, "step": 12520 }, { "epoch": 0.29461176470588235, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9381, "step": 12521 }, { "epoch": 0.29463529411764705, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9928, "step": 12522 }, { "epoch": 0.29465882352941175, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.7759, "step": 12523 }, { "epoch": 0.29468235294117645, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0891, "step": 12524 }, { "epoch": 0.29470588235294115, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.2129, "step": 12525 }, { "epoch": 0.2947294117647059, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1682, "step": 12526 }, { "epoch": 0.2947529411764706, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.3902, "step": 12527 }, { "epoch": 0.2947764705882353, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1595, "step": 12528 }, { "epoch": 0.2948, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3675, "step": 12529 }, { "epoch": 0.2948235294117647, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.8112, "step": 12530 }, { "epoch": 0.2948470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2527, "step": 12531 }, { "epoch": 0.2948705882352941, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.909, "step": 12532 }, { "epoch": 0.2948941176470588, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1803, "step": 12533 }, { "epoch": 0.2949176470588235, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0811, "step": 12534 }, { "epoch": 0.29494117647058826, "grad_norm": 0.2333984375, "learning_rate": 0.02, "loss": 0.7381, "step": 12535 }, { "epoch": 0.29496470588235296, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0036, "step": 12536 }, { "epoch": 0.29498823529411766, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0457, "step": 12537 }, { "epoch": 0.29501176470588236, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8259, "step": 12538 }, { "epoch": 0.29503529411764706, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9997, "step": 12539 }, { "epoch": 0.29505882352941176, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8812, "step": 12540 }, { "epoch": 0.29508235294117646, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1708, "step": 12541 }, { "epoch": 0.29510588235294116, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.7391, "step": 12542 }, { "epoch": 0.29512941176470586, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2958, "step": 12543 }, { "epoch": 0.29515294117647056, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.157, "step": 12544 }, { "epoch": 0.2951764705882353, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1781, "step": 12545 }, { "epoch": 0.2952, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9456, "step": 12546 }, { "epoch": 0.2952235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0076, "step": 12547 }, { "epoch": 0.2952470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2092, "step": 12548 }, { "epoch": 0.2952705882352941, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0302, "step": 12549 }, { "epoch": 0.2952941176470588, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0603, "step": 12550 }, { "epoch": 0.2953176470588235, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.757, "step": 12551 }, { "epoch": 0.2953411764705882, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0436, "step": 12552 }, { "epoch": 0.2953647058823529, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1749, "step": 12553 }, { "epoch": 0.2953882352941177, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2222, "step": 12554 }, { "epoch": 0.2954117647058824, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0102, "step": 12555 }, { "epoch": 0.2954352941176471, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8893, "step": 12556 }, { "epoch": 0.2954588235294118, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0778, "step": 12557 }, { "epoch": 0.2954823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0435, "step": 12558 }, { "epoch": 0.2955058823529412, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0829, "step": 12559 }, { "epoch": 0.2955294117647059, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.6628, "step": 12560 }, { "epoch": 0.2955529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9373, "step": 12561 }, { "epoch": 0.2955764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.7515, "step": 12562 }, { "epoch": 0.2956, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.7887, "step": 12563 }, { "epoch": 0.29562352941176473, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9774, "step": 12564 }, { "epoch": 0.29564705882352943, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.3598, "step": 12565 }, { "epoch": 0.29567058823529413, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0831, "step": 12566 }, { "epoch": 0.29569411764705883, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0306, "step": 12567 }, { "epoch": 0.29571764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1067, "step": 12568 }, { "epoch": 0.29574117647058823, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.311, "step": 12569 }, { "epoch": 0.29576470588235293, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.24, "step": 12570 }, { "epoch": 0.29578823529411763, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9348, "step": 12571 }, { "epoch": 0.29581176470588233, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.7675, "step": 12572 }, { "epoch": 0.2958352941176471, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.3119, "step": 12573 }, { "epoch": 0.2958588235294118, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.4373, "step": 12574 }, { "epoch": 0.2958823529411765, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1025, "step": 12575 }, { "epoch": 0.2959058823529412, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1875, "step": 12576 }, { "epoch": 0.2959294117647059, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0142, "step": 12577 }, { "epoch": 0.2959529411764706, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9854, "step": 12578 }, { "epoch": 0.2959764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2403, "step": 12579 }, { "epoch": 0.296, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8116, "step": 12580 }, { "epoch": 0.2960235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0969, "step": 12581 }, { "epoch": 0.2960470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9752, "step": 12582 }, { "epoch": 0.29607058823529414, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1015, "step": 12583 }, { "epoch": 0.29609411764705884, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1405, "step": 12584 }, { "epoch": 0.29611764705882354, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1304, "step": 12585 }, { "epoch": 0.29614117647058824, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.8713, "step": 12586 }, { "epoch": 0.29616470588235294, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2149, "step": 12587 }, { "epoch": 0.29618823529411764, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.7891, "step": 12588 }, { "epoch": 0.29621176470588234, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2736, "step": 12589 }, { "epoch": 0.29623529411764704, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1291, "step": 12590 }, { "epoch": 0.29625882352941174, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0859, "step": 12591 }, { "epoch": 0.2962823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0277, "step": 12592 }, { "epoch": 0.2963058823529412, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.3273, "step": 12593 }, { "epoch": 0.2963294117647059, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.1316, "step": 12594 }, { "epoch": 0.2963529411764706, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9965, "step": 12595 }, { "epoch": 0.2963764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0847, "step": 12596 }, { "epoch": 0.2964, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9692, "step": 12597 }, { "epoch": 0.2964235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0681, "step": 12598 }, { "epoch": 0.2964470588235294, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.3652, "step": 12599 }, { "epoch": 0.2964705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2209, "step": 12600 }, { "epoch": 0.2964941176470588, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.3104, "step": 12601 }, { "epoch": 0.29651764705882355, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1341, "step": 12602 }, { "epoch": 0.29654117647058825, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.7885, "step": 12603 }, { "epoch": 0.29656470588235295, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.2884, "step": 12604 }, { "epoch": 0.29658823529411765, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.3417, "step": 12605 }, { "epoch": 0.29661176470588235, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1411, "step": 12606 }, { "epoch": 0.29663529411764705, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9866, "step": 12607 }, { "epoch": 0.29665882352941175, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1413, "step": 12608 }, { "epoch": 0.29668235294117645, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9627, "step": 12609 }, { "epoch": 0.29670588235294115, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.7635, "step": 12610 }, { "epoch": 0.2967294117647059, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.7916, "step": 12611 }, { "epoch": 0.2967529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2338, "step": 12612 }, { "epoch": 0.2967764705882353, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.3646, "step": 12613 }, { "epoch": 0.2968, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9146, "step": 12614 }, { "epoch": 0.2968235294117647, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.6586, "step": 12615 }, { "epoch": 0.2968470588235294, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9319, "step": 12616 }, { "epoch": 0.2968705882352941, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1382, "step": 12617 }, { "epoch": 0.2968941176470588, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 0.9474, "step": 12618 }, { "epoch": 0.2969176470588235, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9052, "step": 12619 }, { "epoch": 0.2969411764705882, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.4343, "step": 12620 }, { "epoch": 0.29696470588235296, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.8215, "step": 12621 }, { "epoch": 0.29698823529411766, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8909, "step": 12622 }, { "epoch": 0.29701176470588236, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.3505, "step": 12623 }, { "epoch": 0.29703529411764706, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.4423, "step": 12624 }, { "epoch": 0.29705882352941176, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0621, "step": 12625 }, { "epoch": 0.29708235294117646, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0411, "step": 12626 }, { "epoch": 0.29710588235294116, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0628, "step": 12627 }, { "epoch": 0.29712941176470586, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9672, "step": 12628 }, { "epoch": 0.29715294117647056, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.33, "step": 12629 }, { "epoch": 0.2971764705882353, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0739, "step": 12630 }, { "epoch": 0.2972, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9746, "step": 12631 }, { "epoch": 0.2972235294117647, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1342, "step": 12632 }, { "epoch": 0.2972470588235294, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.6695, "step": 12633 }, { "epoch": 0.2972705882352941, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1157, "step": 12634 }, { "epoch": 0.2972941176470588, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.4458, "step": 12635 }, { "epoch": 0.2973176470588235, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.4219, "step": 12636 }, { "epoch": 0.2973411764705882, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0537, "step": 12637 }, { "epoch": 0.2973647058823529, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0448, "step": 12638 }, { "epoch": 0.2973882352941176, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.7173, "step": 12639 }, { "epoch": 0.2974117647058824, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1489, "step": 12640 }, { "epoch": 0.2974352941176471, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.7361, "step": 12641 }, { "epoch": 0.2974588235294118, "grad_norm": 0.240234375, "learning_rate": 0.02, "loss": 0.6219, "step": 12642 }, { "epoch": 0.2974823529411765, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2042, "step": 12643 }, { "epoch": 0.2975058823529412, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0051, "step": 12644 }, { "epoch": 0.2975294117647059, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1664, "step": 12645 }, { "epoch": 0.2975529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0204, "step": 12646 }, { "epoch": 0.2975764705882353, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.8307, "step": 12647 }, { "epoch": 0.2976, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0796, "step": 12648 }, { "epoch": 0.29762352941176473, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9855, "step": 12649 }, { "epoch": 0.29764705882352943, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.8695, "step": 12650 }, { "epoch": 0.29767058823529413, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0745, "step": 12651 }, { "epoch": 0.29769411764705883, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0594, "step": 12652 }, { "epoch": 0.29771764705882353, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.965, "step": 12653 }, { "epoch": 0.29774117647058823, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.8405, "step": 12654 }, { "epoch": 0.29776470588235293, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8545, "step": 12655 }, { "epoch": 0.29778823529411763, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9316, "step": 12656 }, { "epoch": 0.29781176470588233, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1233, "step": 12657 }, { "epoch": 0.29783529411764703, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9676, "step": 12658 }, { "epoch": 0.2978588235294118, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1659, "step": 12659 }, { "epoch": 0.2978823529411765, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0076, "step": 12660 }, { "epoch": 0.2979058823529412, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.099, "step": 12661 }, { "epoch": 0.2979294117647059, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0749, "step": 12662 }, { "epoch": 0.2979529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1355, "step": 12663 }, { "epoch": 0.2979764705882353, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1049, "step": 12664 }, { "epoch": 0.298, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2347, "step": 12665 }, { "epoch": 0.2980235294117647, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.8968, "step": 12666 }, { "epoch": 0.2980470588235294, "grad_norm": 0.2392578125, "learning_rate": 0.02, "loss": 0.6143, "step": 12667 }, { "epoch": 0.29807058823529414, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1801, "step": 12668 }, { "epoch": 0.29809411764705884, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0886, "step": 12669 }, { "epoch": 0.29811764705882354, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2529, "step": 12670 }, { "epoch": 0.29814117647058824, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.786, "step": 12671 }, { "epoch": 0.29816470588235294, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2013, "step": 12672 }, { "epoch": 0.29818823529411764, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1673, "step": 12673 }, { "epoch": 0.29821176470588234, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.7493, "step": 12674 }, { "epoch": 0.29823529411764704, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1709, "step": 12675 }, { "epoch": 0.29825882352941174, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1222, "step": 12676 }, { "epoch": 0.29828235294117644, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.8277, "step": 12677 }, { "epoch": 0.2983058823529412, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.7993, "step": 12678 }, { "epoch": 0.2983294117647059, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0757, "step": 12679 }, { "epoch": 0.2983529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.7837, "step": 12680 }, { "epoch": 0.2983764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0864, "step": 12681 }, { "epoch": 0.2984, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0245, "step": 12682 }, { "epoch": 0.2984235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0309, "step": 12683 }, { "epoch": 0.2984470588235294, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8513, "step": 12684 }, { "epoch": 0.2984705882352941, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9504, "step": 12685 }, { "epoch": 0.2984941176470588, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8807, "step": 12686 }, { "epoch": 0.29851764705882355, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9352, "step": 12687 }, { "epoch": 0.29854117647058825, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0035, "step": 12688 }, { "epoch": 0.29856470588235295, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.3235, "step": 12689 }, { "epoch": 0.29858823529411765, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0478, "step": 12690 }, { "epoch": 0.29861176470588235, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1703, "step": 12691 }, { "epoch": 0.29863529411764705, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0812, "step": 12692 }, { "epoch": 0.29865882352941175, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.9193, "step": 12693 }, { "epoch": 0.29868235294117645, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9988, "step": 12694 }, { "epoch": 0.29870588235294115, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8045, "step": 12695 }, { "epoch": 0.2987294117647059, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.7607, "step": 12696 }, { "epoch": 0.2987529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.8065, "step": 12697 }, { "epoch": 0.2987764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0168, "step": 12698 }, { "epoch": 0.2988, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9818, "step": 12699 }, { "epoch": 0.2988235294117647, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.945, "step": 12700 }, { "epoch": 0.2988470588235294, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9287, "step": 12701 }, { "epoch": 0.2988705882352941, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1567, "step": 12702 }, { "epoch": 0.2988941176470588, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1466, "step": 12703 }, { "epoch": 0.2989176470588235, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.8312, "step": 12704 }, { "epoch": 0.2989411764705882, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0854, "step": 12705 }, { "epoch": 0.29896470588235297, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1437, "step": 12706 }, { "epoch": 0.29898823529411767, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1388, "step": 12707 }, { "epoch": 0.29901176470588237, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1981, "step": 12708 }, { "epoch": 0.29903529411764707, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9152, "step": 12709 }, { "epoch": 0.29905882352941177, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8435, "step": 12710 }, { "epoch": 0.29908235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.8927, "step": 12711 }, { "epoch": 0.29910588235294117, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1131, "step": 12712 }, { "epoch": 0.29912941176470587, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0861, "step": 12713 }, { "epoch": 0.29915294117647057, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2048, "step": 12714 }, { "epoch": 0.2991764705882353, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8351, "step": 12715 }, { "epoch": 0.2992, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9153, "step": 12716 }, { "epoch": 0.2992235294117647, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2173, "step": 12717 }, { "epoch": 0.2992470588235294, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9145, "step": 12718 }, { "epoch": 0.2992705882352941, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1635, "step": 12719 }, { "epoch": 0.2992941176470588, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9858, "step": 12720 }, { "epoch": 0.2993176470588235, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.2485, "step": 12721 }, { "epoch": 0.2993411764705882, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0174, "step": 12722 }, { "epoch": 0.2993647058823529, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9554, "step": 12723 }, { "epoch": 0.2993882352941176, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1496, "step": 12724 }, { "epoch": 0.2994117647058824, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.3835, "step": 12725 }, { "epoch": 0.2994352941176471, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.4169, "step": 12726 }, { "epoch": 0.2994588235294118, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0855, "step": 12727 }, { "epoch": 0.2994823529411765, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.3339, "step": 12728 }, { "epoch": 0.2995058823529412, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9893, "step": 12729 }, { "epoch": 0.2995294117647059, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0747, "step": 12730 }, { "epoch": 0.2995529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9925, "step": 12731 }, { "epoch": 0.2995764705882353, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2306, "step": 12732 }, { "epoch": 0.2996, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0668, "step": 12733 }, { "epoch": 0.29962352941176473, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9197, "step": 12734 }, { "epoch": 0.29964705882352943, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1657, "step": 12735 }, { "epoch": 0.29967058823529413, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9527, "step": 12736 }, { "epoch": 0.29969411764705883, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0839, "step": 12737 }, { "epoch": 0.29971764705882353, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.1209, "step": 12738 }, { "epoch": 0.29974117647058823, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.8072, "step": 12739 }, { "epoch": 0.29976470588235293, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9811, "step": 12740 }, { "epoch": 0.29978823529411763, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0594, "step": 12741 }, { "epoch": 0.29981176470588233, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9428, "step": 12742 }, { "epoch": 0.29983529411764703, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1236, "step": 12743 }, { "epoch": 0.2998588235294118, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.7654, "step": 12744 }, { "epoch": 0.2998823529411765, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.2339, "step": 12745 }, { "epoch": 0.2999058823529412, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9004, "step": 12746 }, { "epoch": 0.2999294117647059, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1987, "step": 12747 }, { "epoch": 0.2999529411764706, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.13, "step": 12748 }, { "epoch": 0.2999764705882353, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0722, "step": 12749 }, { "epoch": 0.3, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2344, "step": 12750 }, { "epoch": 0.3000235294117647, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0928, "step": 12751 }, { "epoch": 0.3000470588235294, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.6486, "step": 12752 }, { "epoch": 0.30007058823529414, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9499, "step": 12753 }, { "epoch": 0.30009411764705884, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.017, "step": 12754 }, { "epoch": 0.30011764705882354, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.009, "step": 12755 }, { "epoch": 0.30014117647058824, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1891, "step": 12756 }, { "epoch": 0.30016470588235294, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1546, "step": 12757 }, { "epoch": 0.30018823529411764, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2248, "step": 12758 }, { "epoch": 0.30021176470588234, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8149, "step": 12759 }, { "epoch": 0.30023529411764704, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0074, "step": 12760 }, { "epoch": 0.30025882352941174, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1368, "step": 12761 }, { "epoch": 0.30028235294117644, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1027, "step": 12762 }, { "epoch": 0.3003058823529412, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1295, "step": 12763 }, { "epoch": 0.3003294117647059, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9216, "step": 12764 }, { "epoch": 0.3003529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0449, "step": 12765 }, { "epoch": 0.3003764705882353, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1425, "step": 12766 }, { "epoch": 0.3004, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.2637, "step": 12767 }, { "epoch": 0.3004235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9298, "step": 12768 }, { "epoch": 0.3004470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8575, "step": 12769 }, { "epoch": 0.3004705882352941, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.1553, "step": 12770 }, { "epoch": 0.3004941176470588, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0104, "step": 12771 }, { "epoch": 0.30051764705882356, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8313, "step": 12772 }, { "epoch": 0.30054117647058826, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1574, "step": 12773 }, { "epoch": 0.30056470588235296, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1284, "step": 12774 }, { "epoch": 0.30058823529411766, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.3196, "step": 12775 }, { "epoch": 0.30061176470588236, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1997, "step": 12776 }, { "epoch": 0.30063529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1671, "step": 12777 }, { "epoch": 0.30065882352941176, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0324, "step": 12778 }, { "epoch": 0.30068235294117646, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9964, "step": 12779 }, { "epoch": 0.30070588235294116, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.253, "step": 12780 }, { "epoch": 0.30072941176470586, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9301, "step": 12781 }, { "epoch": 0.3007529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.708, "step": 12782 }, { "epoch": 0.3007764705882353, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1922, "step": 12783 }, { "epoch": 0.3008, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2388, "step": 12784 }, { "epoch": 0.3008235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7525, "step": 12785 }, { "epoch": 0.3008470588235294, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.7374, "step": 12786 }, { "epoch": 0.3008705882352941, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0474, "step": 12787 }, { "epoch": 0.3008941176470588, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9288, "step": 12788 }, { "epoch": 0.3009176470588235, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1333, "step": 12789 }, { "epoch": 0.3009411764705882, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.7867, "step": 12790 }, { "epoch": 0.30096470588235297, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.7812, "step": 12791 }, { "epoch": 0.30098823529411767, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1212, "step": 12792 }, { "epoch": 0.30101176470588237, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.1411, "step": 12793 }, { "epoch": 0.30103529411764707, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1555, "step": 12794 }, { "epoch": 0.30105882352941177, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0532, "step": 12795 }, { "epoch": 0.30108235294117647, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1116, "step": 12796 }, { "epoch": 0.30110588235294117, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1505, "step": 12797 }, { "epoch": 0.30112941176470587, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0874, "step": 12798 }, { "epoch": 0.30115294117647057, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1654, "step": 12799 }, { "epoch": 0.30117647058823527, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0587, "step": 12800 }, { "epoch": 0.3012, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9263, "step": 12801 }, { "epoch": 0.3012235294117647, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1296, "step": 12802 }, { "epoch": 0.3012470588235294, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9365, "step": 12803 }, { "epoch": 0.3012705882352941, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9988, "step": 12804 }, { "epoch": 0.3012941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9301, "step": 12805 }, { "epoch": 0.3013176470588235, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.6182, "step": 12806 }, { "epoch": 0.3013411764705882, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9431, "step": 12807 }, { "epoch": 0.3013647058823529, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0009, "step": 12808 }, { "epoch": 0.3013882352941176, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.209, "step": 12809 }, { "epoch": 0.3014117647058824, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1799, "step": 12810 }, { "epoch": 0.3014352941176471, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0581, "step": 12811 }, { "epoch": 0.3014588235294118, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9028, "step": 12812 }, { "epoch": 0.3014823529411765, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9018, "step": 12813 }, { "epoch": 0.3015058823529412, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9045, "step": 12814 }, { "epoch": 0.3015294117647059, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.7909, "step": 12815 }, { "epoch": 0.3015529411764706, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2275, "step": 12816 }, { "epoch": 0.3015764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1755, "step": 12817 }, { "epoch": 0.3016, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.6684, "step": 12818 }, { "epoch": 0.3016235294117647, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0597, "step": 12819 }, { "epoch": 0.30164705882352943, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0066, "step": 12820 }, { "epoch": 0.30167058823529413, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.7811, "step": 12821 }, { "epoch": 0.30169411764705883, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0488, "step": 12822 }, { "epoch": 0.30171764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0833, "step": 12823 }, { "epoch": 0.30174117647058823, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0037, "step": 12824 }, { "epoch": 0.30176470588235293, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9945, "step": 12825 }, { "epoch": 0.30178823529411763, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1818, "step": 12826 }, { "epoch": 0.30181176470588233, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0585, "step": 12827 }, { "epoch": 0.30183529411764703, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1038, "step": 12828 }, { "epoch": 0.3018588235294118, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0388, "step": 12829 }, { "epoch": 0.3018823529411765, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.7887, "step": 12830 }, { "epoch": 0.3019058823529412, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1352, "step": 12831 }, { "epoch": 0.3019294117647059, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8335, "step": 12832 }, { "epoch": 0.3019529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0581, "step": 12833 }, { "epoch": 0.3019764705882353, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.6882, "step": 12834 }, { "epoch": 0.302, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1549, "step": 12835 }, { "epoch": 0.3020235294117647, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.9459, "step": 12836 }, { "epoch": 0.3020470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.123, "step": 12837 }, { "epoch": 0.3020705882352941, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9462, "step": 12838 }, { "epoch": 0.30209411764705885, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9344, "step": 12839 }, { "epoch": 0.30211764705882355, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1213, "step": 12840 }, { "epoch": 0.30214117647058825, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9948, "step": 12841 }, { "epoch": 0.30216470588235295, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.056, "step": 12842 }, { "epoch": 0.30218823529411765, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8866, "step": 12843 }, { "epoch": 0.30221176470588235, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0935, "step": 12844 }, { "epoch": 0.30223529411764705, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1247, "step": 12845 }, { "epoch": 0.30225882352941175, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8731, "step": 12846 }, { "epoch": 0.30228235294117645, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0159, "step": 12847 }, { "epoch": 0.3023058823529412, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9075, "step": 12848 }, { "epoch": 0.3023294117647059, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.088, "step": 12849 }, { "epoch": 0.3023529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8519, "step": 12850 }, { "epoch": 0.3023764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1753, "step": 12851 }, { "epoch": 0.3024, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.3031, "step": 12852 }, { "epoch": 0.3024235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0034, "step": 12853 }, { "epoch": 0.3024470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0861, "step": 12854 }, { "epoch": 0.3024705882352941, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.6043, "step": 12855 }, { "epoch": 0.3024941176470588, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0452, "step": 12856 }, { "epoch": 0.3025176470588235, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0671, "step": 12857 }, { "epoch": 0.30254117647058826, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0845, "step": 12858 }, { "epoch": 0.30256470588235296, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0571, "step": 12859 }, { "epoch": 0.30258823529411766, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1166, "step": 12860 }, { "epoch": 0.30261176470588236, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0555, "step": 12861 }, { "epoch": 0.30263529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9904, "step": 12862 }, { "epoch": 0.30265882352941176, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1295, "step": 12863 }, { "epoch": 0.30268235294117646, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1225, "step": 12864 }, { "epoch": 0.30270588235294116, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9674, "step": 12865 }, { "epoch": 0.30272941176470586, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.7986, "step": 12866 }, { "epoch": 0.3027529411764706, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1425, "step": 12867 }, { "epoch": 0.3027764705882353, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1646, "step": 12868 }, { "epoch": 0.3028, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1516, "step": 12869 }, { "epoch": 0.3028235294117647, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.3338, "step": 12870 }, { "epoch": 0.3028470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1221, "step": 12871 }, { "epoch": 0.3028705882352941, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.059, "step": 12872 }, { "epoch": 0.3028941176470588, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.212, "step": 12873 }, { "epoch": 0.3029176470588235, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0321, "step": 12874 }, { "epoch": 0.3029411764705882, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.3094, "step": 12875 }, { "epoch": 0.3029647058823529, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.8874, "step": 12876 }, { "epoch": 0.30298823529411767, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9053, "step": 12877 }, { "epoch": 0.30301176470588237, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9573, "step": 12878 }, { "epoch": 0.30303529411764707, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9433, "step": 12879 }, { "epoch": 0.30305882352941177, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.2222, "step": 12880 }, { "epoch": 0.30308235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8712, "step": 12881 }, { "epoch": 0.30310588235294117, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.6966, "step": 12882 }, { "epoch": 0.30312941176470587, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0769, "step": 12883 }, { "epoch": 0.30315294117647057, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8697, "step": 12884 }, { "epoch": 0.30317647058823527, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9447, "step": 12885 }, { "epoch": 0.3032, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8258, "step": 12886 }, { "epoch": 0.3032235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9236, "step": 12887 }, { "epoch": 0.3032470588235294, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0462, "step": 12888 }, { "epoch": 0.3032705882352941, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0321, "step": 12889 }, { "epoch": 0.3032941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.142, "step": 12890 }, { "epoch": 0.3033176470588235, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2341, "step": 12891 }, { "epoch": 0.3033411764705882, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8538, "step": 12892 }, { "epoch": 0.3033647058823529, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8425, "step": 12893 }, { "epoch": 0.3033882352941176, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2962, "step": 12894 }, { "epoch": 0.3034117647058823, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9426, "step": 12895 }, { "epoch": 0.3034352941176471, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0322, "step": 12896 }, { "epoch": 0.3034588235294118, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9285, "step": 12897 }, { "epoch": 0.3034823529411765, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0254, "step": 12898 }, { "epoch": 0.3035058823529412, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1776, "step": 12899 }, { "epoch": 0.3035294117647059, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.2658, "step": 12900 }, { "epoch": 0.3035529411764706, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.4188, "step": 12901 }, { "epoch": 0.3035764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9979, "step": 12902 }, { "epoch": 0.3036, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.7479, "step": 12903 }, { "epoch": 0.3036235294117647, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.7941, "step": 12904 }, { "epoch": 0.30364705882352944, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.9072, "step": 12905 }, { "epoch": 0.30367058823529414, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2693, "step": 12906 }, { "epoch": 0.30369411764705884, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8604, "step": 12907 }, { "epoch": 0.30371764705882354, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.028, "step": 12908 }, { "epoch": 0.30374117647058824, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9622, "step": 12909 }, { "epoch": 0.30376470588235294, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2345, "step": 12910 }, { "epoch": 0.30378823529411764, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.862, "step": 12911 }, { "epoch": 0.30381176470588234, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1481, "step": 12912 }, { "epoch": 0.30383529411764704, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9647, "step": 12913 }, { "epoch": 0.3038588235294118, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8229, "step": 12914 }, { "epoch": 0.3038823529411765, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2039, "step": 12915 }, { "epoch": 0.3039058823529412, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7789, "step": 12916 }, { "epoch": 0.3039294117647059, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1384, "step": 12917 }, { "epoch": 0.3039529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9627, "step": 12918 }, { "epoch": 0.3039764705882353, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1783, "step": 12919 }, { "epoch": 0.304, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.258, "step": 12920 }, { "epoch": 0.3040235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1879, "step": 12921 }, { "epoch": 0.3040470588235294, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2966, "step": 12922 }, { "epoch": 0.3040705882352941, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8565, "step": 12923 }, { "epoch": 0.30409411764705885, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.9711, "step": 12924 }, { "epoch": 0.30411764705882355, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.6664, "step": 12925 }, { "epoch": 0.30414117647058825, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0046, "step": 12926 }, { "epoch": 0.30416470588235295, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9313, "step": 12927 }, { "epoch": 0.30418823529411765, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0621, "step": 12928 }, { "epoch": 0.30421176470588235, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.2592, "step": 12929 }, { "epoch": 0.30423529411764705, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 1.0044, "step": 12930 }, { "epoch": 0.30425882352941175, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.3356, "step": 12931 }, { "epoch": 0.30428235294117645, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.0077, "step": 12932 }, { "epoch": 0.3043058823529412, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9642, "step": 12933 }, { "epoch": 0.3043294117647059, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9539, "step": 12934 }, { "epoch": 0.3043529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.941, "step": 12935 }, { "epoch": 0.3043764705882353, "grad_norm": 0.318359375, "learning_rate": 0.02, "loss": 1.1025, "step": 12936 }, { "epoch": 0.3044, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7394, "step": 12937 }, { "epoch": 0.3044235294117647, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0828, "step": 12938 }, { "epoch": 0.3044470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1255, "step": 12939 }, { "epoch": 0.3044705882352941, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8771, "step": 12940 }, { "epoch": 0.3044941176470588, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0988, "step": 12941 }, { "epoch": 0.3045176470588235, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.119, "step": 12942 }, { "epoch": 0.30454117647058826, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8716, "step": 12943 }, { "epoch": 0.30456470588235296, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 1.0113, "step": 12944 }, { "epoch": 0.30458823529411766, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9853, "step": 12945 }, { "epoch": 0.30461176470588236, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.8352, "step": 12946 }, { "epoch": 0.30463529411764706, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.175, "step": 12947 }, { "epoch": 0.30465882352941176, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1495, "step": 12948 }, { "epoch": 0.30468235294117646, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.5224, "step": 12949 }, { "epoch": 0.30470588235294116, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.7359, "step": 12950 }, { "epoch": 0.30472941176470586, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.9786, "step": 12951 }, { "epoch": 0.3047529411764706, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.8613, "step": 12952 }, { "epoch": 0.3047764705882353, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 1.0751, "step": 12953 }, { "epoch": 0.3048, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0271, "step": 12954 }, { "epoch": 0.3048235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9774, "step": 12955 }, { "epoch": 0.3048470588235294, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3074, "step": 12956 }, { "epoch": 0.3048705882352941, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9372, "step": 12957 }, { "epoch": 0.3048941176470588, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.709, "step": 12958 }, { "epoch": 0.3049176470588235, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.8804, "step": 12959 }, { "epoch": 0.3049411764705882, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9747, "step": 12960 }, { "epoch": 0.3049647058823529, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.4235, "step": 12961 }, { "epoch": 0.30498823529411767, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.8439, "step": 12962 }, { "epoch": 0.30501176470588237, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.9661, "step": 12963 }, { "epoch": 0.30503529411764707, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.8228, "step": 12964 }, { "epoch": 0.30505882352941177, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.3562, "step": 12965 }, { "epoch": 0.30508235294117647, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1825, "step": 12966 }, { "epoch": 0.30510588235294117, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9521, "step": 12967 }, { "epoch": 0.30512941176470587, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.6476, "step": 12968 }, { "epoch": 0.30515294117647057, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.6908, "step": 12969 }, { "epoch": 0.30517647058823527, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.271, "step": 12970 }, { "epoch": 0.3052, "grad_norm": 0.73828125, "learning_rate": 0.02, "loss": 0.866, "step": 12971 }, { "epoch": 0.3052235294117647, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9377, "step": 12972 }, { "epoch": 0.3052470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1322, "step": 12973 }, { "epoch": 0.3052705882352941, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.257, "step": 12974 }, { "epoch": 0.3052941176470588, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8166, "step": 12975 }, { "epoch": 0.3053176470588235, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1144, "step": 12976 }, { "epoch": 0.3053411764705882, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9485, "step": 12977 }, { "epoch": 0.3053647058823529, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.914, "step": 12978 }, { "epoch": 0.3053882352941176, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0844, "step": 12979 }, { "epoch": 0.3054117647058823, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9871, "step": 12980 }, { "epoch": 0.3054352941176471, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.925, "step": 12981 }, { "epoch": 0.3054588235294118, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0536, "step": 12982 }, { "epoch": 0.3054823529411765, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0729, "step": 12983 }, { "epoch": 0.3055058823529412, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.3307, "step": 12984 }, { "epoch": 0.3055294117647059, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1362, "step": 12985 }, { "epoch": 0.3055529411764706, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.7814, "step": 12986 }, { "epoch": 0.3055764705882353, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1344, "step": 12987 }, { "epoch": 0.3056, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.5962, "step": 12988 }, { "epoch": 0.3056235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.3194, "step": 12989 }, { "epoch": 0.30564705882352944, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9537, "step": 12990 }, { "epoch": 0.30567058823529414, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9602, "step": 12991 }, { "epoch": 0.30569411764705884, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.834, "step": 12992 }, { "epoch": 0.30571764705882354, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.197, "step": 12993 }, { "epoch": 0.30574117647058824, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.3172, "step": 12994 }, { "epoch": 0.30576470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9606, "step": 12995 }, { "epoch": 0.30578823529411764, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1514, "step": 12996 }, { "epoch": 0.30581176470588234, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.7119, "step": 12997 }, { "epoch": 0.30583529411764704, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1373, "step": 12998 }, { "epoch": 0.30585882352941174, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2613, "step": 12999 }, { "epoch": 0.3058823529411765, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9309, "step": 13000 }, { "epoch": 0.3058823529411765, "eval_loss": 2.2015132904052734, "eval_runtime": 683.9391, "eval_samples_per_second": 12.428, "eval_steps_per_second": 3.107, "step": 13000 }, { "epoch": 0.3059058823529412, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1553, "step": 13001 }, { "epoch": 0.3059294117647059, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.288, "step": 13002 }, { "epoch": 0.3059529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0806, "step": 13003 }, { "epoch": 0.3059764705882353, "grad_norm": 0.2578125, "learning_rate": 0.02, "loss": 0.8829, "step": 13004 }, { "epoch": 0.306, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.9516, "step": 13005 }, { "epoch": 0.3060235294117647, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.486, "step": 13006 }, { "epoch": 0.3060470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2632, "step": 13007 }, { "epoch": 0.3060705882352941, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0298, "step": 13008 }, { "epoch": 0.30609411764705885, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0789, "step": 13009 }, { "epoch": 0.30611764705882355, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9361, "step": 13010 }, { "epoch": 0.30614117647058825, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0789, "step": 13011 }, { "epoch": 0.30616470588235295, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.7847, "step": 13012 }, { "epoch": 0.30618823529411765, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2502, "step": 13013 }, { "epoch": 0.30621176470588235, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2146, "step": 13014 }, { "epoch": 0.30623529411764705, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 1.0331, "step": 13015 }, { "epoch": 0.30625882352941175, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1615, "step": 13016 }, { "epoch": 0.30628235294117645, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1422, "step": 13017 }, { "epoch": 0.30630588235294115, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1882, "step": 13018 }, { "epoch": 0.3063294117647059, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.983, "step": 13019 }, { "epoch": 0.3063529411764706, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1149, "step": 13020 }, { "epoch": 0.3063764705882353, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.078, "step": 13021 }, { "epoch": 0.3064, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1242, "step": 13022 }, { "epoch": 0.3064235294117647, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0373, "step": 13023 }, { "epoch": 0.3064470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.2531, "step": 13024 }, { "epoch": 0.3064705882352941, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0175, "step": 13025 }, { "epoch": 0.3064941176470588, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 0.8885, "step": 13026 }, { "epoch": 0.3065176470588235, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0572, "step": 13027 }, { "epoch": 0.30654117647058826, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9364, "step": 13028 }, { "epoch": 0.30656470588235296, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8199, "step": 13029 }, { "epoch": 0.30658823529411766, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8845, "step": 13030 }, { "epoch": 0.30661176470588236, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9722, "step": 13031 }, { "epoch": 0.30663529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9512, "step": 13032 }, { "epoch": 0.30665882352941176, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1062, "step": 13033 }, { "epoch": 0.30668235294117646, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9714, "step": 13034 }, { "epoch": 0.30670588235294116, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7098, "step": 13035 }, { "epoch": 0.30672941176470586, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2746, "step": 13036 }, { "epoch": 0.30675294117647056, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0616, "step": 13037 }, { "epoch": 0.3067764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9803, "step": 13038 }, { "epoch": 0.3068, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2482, "step": 13039 }, { "epoch": 0.3068235294117647, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.3368, "step": 13040 }, { "epoch": 0.3068470588235294, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9432, "step": 13041 }, { "epoch": 0.3068705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.7637, "step": 13042 }, { "epoch": 0.3068941176470588, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.011, "step": 13043 }, { "epoch": 0.3069176470588235, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9841, "step": 13044 }, { "epoch": 0.3069411764705882, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1128, "step": 13045 }, { "epoch": 0.3069647058823529, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1213, "step": 13046 }, { "epoch": 0.3069882352941177, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.023, "step": 13047 }, { "epoch": 0.3070117647058824, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9715, "step": 13048 }, { "epoch": 0.3070352941176471, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.3262, "step": 13049 }, { "epoch": 0.3070588235294118, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8977, "step": 13050 }, { "epoch": 0.3070823529411765, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1536, "step": 13051 }, { "epoch": 0.3071058823529412, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0293, "step": 13052 }, { "epoch": 0.3071294117647059, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.8304, "step": 13053 }, { "epoch": 0.3071529411764706, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.7295, "step": 13054 }, { "epoch": 0.3071764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0334, "step": 13055 }, { "epoch": 0.3072, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.7655, "step": 13056 }, { "epoch": 0.30722352941176473, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1156, "step": 13057 }, { "epoch": 0.30724705882352943, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0849, "step": 13058 }, { "epoch": 0.30727058823529413, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0885, "step": 13059 }, { "epoch": 0.30729411764705883, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.3606, "step": 13060 }, { "epoch": 0.30731764705882353, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.1864, "step": 13061 }, { "epoch": 0.30734117647058823, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.2902, "step": 13062 }, { "epoch": 0.30736470588235293, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.5823, "step": 13063 }, { "epoch": 0.30738823529411763, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.4154, "step": 13064 }, { "epoch": 0.30741176470588233, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2003, "step": 13065 }, { "epoch": 0.3074352941176471, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0414, "step": 13066 }, { "epoch": 0.3074588235294118, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2341, "step": 13067 }, { "epoch": 0.3074823529411765, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0832, "step": 13068 }, { "epoch": 0.3075058823529412, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0764, "step": 13069 }, { "epoch": 0.3075294117647059, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9011, "step": 13070 }, { "epoch": 0.3075529411764706, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.3503, "step": 13071 }, { "epoch": 0.3075764705882353, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9237, "step": 13072 }, { "epoch": 0.3076, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.1905, "step": 13073 }, { "epoch": 0.3076235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8552, "step": 13074 }, { "epoch": 0.3076470588235294, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9966, "step": 13075 }, { "epoch": 0.30767058823529414, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0083, "step": 13076 }, { "epoch": 0.30769411764705884, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1953, "step": 13077 }, { "epoch": 0.30771764705882354, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9882, "step": 13078 }, { "epoch": 0.30774117647058824, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0353, "step": 13079 }, { "epoch": 0.30776470588235294, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8728, "step": 13080 }, { "epoch": 0.30778823529411764, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1515, "step": 13081 }, { "epoch": 0.30781176470588234, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.7724, "step": 13082 }, { "epoch": 0.30783529411764704, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0071, "step": 13083 }, { "epoch": 0.30785882352941174, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1804, "step": 13084 }, { "epoch": 0.3078823529411765, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.7728, "step": 13085 }, { "epoch": 0.3079058823529412, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8628, "step": 13086 }, { "epoch": 0.3079294117647059, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0312, "step": 13087 }, { "epoch": 0.3079529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8633, "step": 13088 }, { "epoch": 0.3079764705882353, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.069, "step": 13089 }, { "epoch": 0.308, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.94, "step": 13090 }, { "epoch": 0.3080235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1423, "step": 13091 }, { "epoch": 0.3080470588235294, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2016, "step": 13092 }, { "epoch": 0.3080705882352941, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1838, "step": 13093 }, { "epoch": 0.3080941176470588, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9923, "step": 13094 }, { "epoch": 0.30811764705882355, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.4392, "step": 13095 }, { "epoch": 0.30814117647058825, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0927, "step": 13096 }, { "epoch": 0.30816470588235295, "grad_norm": 0.2373046875, "learning_rate": 0.02, "loss": 0.5387, "step": 13097 }, { "epoch": 0.30818823529411765, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8917, "step": 13098 }, { "epoch": 0.30821176470588235, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8235, "step": 13099 }, { "epoch": 0.30823529411764705, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0479, "step": 13100 }, { "epoch": 0.30825882352941175, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2776, "step": 13101 }, { "epoch": 0.30828235294117645, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8668, "step": 13102 }, { "epoch": 0.30830588235294115, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.2911, "step": 13103 }, { "epoch": 0.3083294117647059, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.4841, "step": 13104 }, { "epoch": 0.3083529411764706, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2049, "step": 13105 }, { "epoch": 0.3083764705882353, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2981, "step": 13106 }, { "epoch": 0.3084, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9013, "step": 13107 }, { "epoch": 0.3084235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9572, "step": 13108 }, { "epoch": 0.3084470588235294, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9223, "step": 13109 }, { "epoch": 0.3084705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9922, "step": 13110 }, { "epoch": 0.3084941176470588, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0035, "step": 13111 }, { "epoch": 0.3085176470588235, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9459, "step": 13112 }, { "epoch": 0.3085411764705882, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.063, "step": 13113 }, { "epoch": 0.30856470588235296, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.056, "step": 13114 }, { "epoch": 0.30858823529411766, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.8297, "step": 13115 }, { "epoch": 0.30861176470588236, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.76, "step": 13116 }, { "epoch": 0.30863529411764706, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0367, "step": 13117 }, { "epoch": 0.30865882352941176, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2925, "step": 13118 }, { "epoch": 0.30868235294117646, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1094, "step": 13119 }, { "epoch": 0.30870588235294116, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.849, "step": 13120 }, { "epoch": 0.30872941176470586, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2473, "step": 13121 }, { "epoch": 0.30875294117647056, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0777, "step": 13122 }, { "epoch": 0.3087764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1735, "step": 13123 }, { "epoch": 0.3088, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0684, "step": 13124 }, { "epoch": 0.3088235294117647, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9676, "step": 13125 }, { "epoch": 0.3088470588235294, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1676, "step": 13126 }, { "epoch": 0.3088705882352941, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1387, "step": 13127 }, { "epoch": 0.3088941176470588, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1179, "step": 13128 }, { "epoch": 0.3089176470588235, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0349, "step": 13129 }, { "epoch": 0.3089411764705882, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0863, "step": 13130 }, { "epoch": 0.3089647058823529, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8528, "step": 13131 }, { "epoch": 0.3089882352941177, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.8104, "step": 13132 }, { "epoch": 0.3090117647058824, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9374, "step": 13133 }, { "epoch": 0.3090352941176471, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0462, "step": 13134 }, { "epoch": 0.3090588235294118, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.1546, "step": 13135 }, { "epoch": 0.3090823529411765, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.152, "step": 13136 }, { "epoch": 0.3091058823529412, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.7054, "step": 13137 }, { "epoch": 0.3091294117647059, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1254, "step": 13138 }, { "epoch": 0.3091529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9398, "step": 13139 }, { "epoch": 0.3091764705882353, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9853, "step": 13140 }, { "epoch": 0.3092, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.9558, "step": 13141 }, { "epoch": 0.30922352941176473, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0909, "step": 13142 }, { "epoch": 0.30924705882352943, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.4037, "step": 13143 }, { "epoch": 0.30927058823529413, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.0558, "step": 13144 }, { "epoch": 0.30929411764705883, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1198, "step": 13145 }, { "epoch": 0.30931764705882353, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0205, "step": 13146 }, { "epoch": 0.30934117647058823, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.7695, "step": 13147 }, { "epoch": 0.30936470588235293, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1484, "step": 13148 }, { "epoch": 0.30938823529411763, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9115, "step": 13149 }, { "epoch": 0.30941176470588233, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2006, "step": 13150 }, { "epoch": 0.3094352941176471, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1606, "step": 13151 }, { "epoch": 0.3094588235294118, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0741, "step": 13152 }, { "epoch": 0.3094823529411765, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0734, "step": 13153 }, { "epoch": 0.3095058823529412, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0402, "step": 13154 }, { "epoch": 0.3095294117647059, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0396, "step": 13155 }, { "epoch": 0.3095529411764706, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.172, "step": 13156 }, { "epoch": 0.3095764705882353, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1587, "step": 13157 }, { "epoch": 0.3096, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9332, "step": 13158 }, { "epoch": 0.3096235294117647, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.7946, "step": 13159 }, { "epoch": 0.3096470588235294, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0156, "step": 13160 }, { "epoch": 0.30967058823529414, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0291, "step": 13161 }, { "epoch": 0.30969411764705884, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.4091, "step": 13162 }, { "epoch": 0.30971764705882354, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9842, "step": 13163 }, { "epoch": 0.30974117647058824, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.7658, "step": 13164 }, { "epoch": 0.30976470588235294, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0604, "step": 13165 }, { "epoch": 0.30978823529411764, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0707, "step": 13166 }, { "epoch": 0.30981176470588234, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.8709, "step": 13167 }, { "epoch": 0.30983529411764704, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.7656, "step": 13168 }, { "epoch": 0.30985882352941174, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.7802, "step": 13169 }, { "epoch": 0.3098823529411765, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.1565, "step": 13170 }, { "epoch": 0.3099058823529412, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.2038, "step": 13171 }, { "epoch": 0.3099294117647059, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.171, "step": 13172 }, { "epoch": 0.3099529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9294, "step": 13173 }, { "epoch": 0.3099764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0416, "step": 13174 }, { "epoch": 0.31, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8927, "step": 13175 }, { "epoch": 0.3100235294117647, "grad_norm": 0.2412109375, "learning_rate": 0.02, "loss": 0.7533, "step": 13176 }, { "epoch": 0.3100470588235294, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.4117, "step": 13177 }, { "epoch": 0.3100705882352941, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.2692, "step": 13178 }, { "epoch": 0.3100941176470588, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2114, "step": 13179 }, { "epoch": 0.31011764705882355, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1601, "step": 13180 }, { "epoch": 0.31014117647058825, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.3297, "step": 13181 }, { "epoch": 0.31016470588235295, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2502, "step": 13182 }, { "epoch": 0.31018823529411765, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1382, "step": 13183 }, { "epoch": 0.31021176470588235, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0707, "step": 13184 }, { "epoch": 0.31023529411764705, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.125, "step": 13185 }, { "epoch": 0.31025882352941175, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.118, "step": 13186 }, { "epoch": 0.31028235294117645, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.7866, "step": 13187 }, { "epoch": 0.31030588235294115, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.035, "step": 13188 }, { "epoch": 0.3103294117647059, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.4753, "step": 13189 }, { "epoch": 0.3103529411764706, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.5335, "step": 13190 }, { "epoch": 0.3103764705882353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9127, "step": 13191 }, { "epoch": 0.3104, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0155, "step": 13192 }, { "epoch": 0.3104235294117647, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1762, "step": 13193 }, { "epoch": 0.3104470588235294, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.259, "step": 13194 }, { "epoch": 0.3104705882352941, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9541, "step": 13195 }, { "epoch": 0.3104941176470588, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7768, "step": 13196 }, { "epoch": 0.3105176470588235, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0196, "step": 13197 }, { "epoch": 0.3105411764705882, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8232, "step": 13198 }, { "epoch": 0.31056470588235296, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2654, "step": 13199 }, { "epoch": 0.31058823529411766, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9218, "step": 13200 }, { "epoch": 0.31061176470588236, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.265, "step": 13201 }, { "epoch": 0.31063529411764707, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0482, "step": 13202 }, { "epoch": 0.31065882352941177, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.9103, "step": 13203 }, { "epoch": 0.31068235294117647, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.8915, "step": 13204 }, { "epoch": 0.31070588235294117, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7059, "step": 13205 }, { "epoch": 0.31072941176470587, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.6999, "step": 13206 }, { "epoch": 0.31075294117647057, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.7528, "step": 13207 }, { "epoch": 0.3107764705882353, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.6809, "step": 13208 }, { "epoch": 0.3108, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1788, "step": 13209 }, { "epoch": 0.3108235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9201, "step": 13210 }, { "epoch": 0.3108470588235294, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9315, "step": 13211 }, { "epoch": 0.3108705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.8683, "step": 13212 }, { "epoch": 0.3108941176470588, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9399, "step": 13213 }, { "epoch": 0.3109176470588235, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.885, "step": 13214 }, { "epoch": 0.3109411764705882, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0441, "step": 13215 }, { "epoch": 0.3109647058823529, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.068, "step": 13216 }, { "epoch": 0.3109882352941176, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2377, "step": 13217 }, { "epoch": 0.3110117647058824, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8378, "step": 13218 }, { "epoch": 0.3110352941176471, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.201, "step": 13219 }, { "epoch": 0.3110588235294118, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9358, "step": 13220 }, { "epoch": 0.3110823529411765, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9968, "step": 13221 }, { "epoch": 0.3111058823529412, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0043, "step": 13222 }, { "epoch": 0.3111294117647059, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9869, "step": 13223 }, { "epoch": 0.3111529411764706, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.4398, "step": 13224 }, { "epoch": 0.3111764705882353, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.6539, "step": 13225 }, { "epoch": 0.3112, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1876, "step": 13226 }, { "epoch": 0.31122352941176473, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.6521, "step": 13227 }, { "epoch": 0.31124705882352943, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9833, "step": 13228 }, { "epoch": 0.31127058823529413, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.273, "step": 13229 }, { "epoch": 0.31129411764705883, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9859, "step": 13230 }, { "epoch": 0.31131764705882353, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1098, "step": 13231 }, { "epoch": 0.31134117647058823, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.2308, "step": 13232 }, { "epoch": 0.31136470588235293, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9922, "step": 13233 }, { "epoch": 0.31138823529411763, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2749, "step": 13234 }, { "epoch": 0.31141176470588233, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9742, "step": 13235 }, { "epoch": 0.31143529411764703, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8216, "step": 13236 }, { "epoch": 0.3114588235294118, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0488, "step": 13237 }, { "epoch": 0.3114823529411765, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0359, "step": 13238 }, { "epoch": 0.3115058823529412, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9291, "step": 13239 }, { "epoch": 0.3115294117647059, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1928, "step": 13240 }, { "epoch": 0.3115529411764706, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8007, "step": 13241 }, { "epoch": 0.3115764705882353, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9841, "step": 13242 }, { "epoch": 0.3116, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.9943, "step": 13243 }, { "epoch": 0.3116235294117647, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.7025, "step": 13244 }, { "epoch": 0.3116470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.039, "step": 13245 }, { "epoch": 0.31167058823529414, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8477, "step": 13246 }, { "epoch": 0.31169411764705884, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.8008, "step": 13247 }, { "epoch": 0.31171764705882354, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9895, "step": 13248 }, { "epoch": 0.31174117647058824, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8288, "step": 13249 }, { "epoch": 0.31176470588235294, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1297, "step": 13250 }, { "epoch": 0.31178823529411764, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0568, "step": 13251 }, { "epoch": 0.31181176470588234, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1551, "step": 13252 }, { "epoch": 0.31183529411764704, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1945, "step": 13253 }, { "epoch": 0.31185882352941174, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0649, "step": 13254 }, { "epoch": 0.31188235294117644, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8219, "step": 13255 }, { "epoch": 0.3119058823529412, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0565, "step": 13256 }, { "epoch": 0.3119294117647059, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8148, "step": 13257 }, { "epoch": 0.3119529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.6697, "step": 13258 }, { "epoch": 0.3119764705882353, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0861, "step": 13259 }, { "epoch": 0.312, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8142, "step": 13260 }, { "epoch": 0.3120235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8151, "step": 13261 }, { "epoch": 0.3120470588235294, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.867, "step": 13262 }, { "epoch": 0.3120705882352941, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.5407, "step": 13263 }, { "epoch": 0.3120941176470588, "grad_norm": 0.244140625, "learning_rate": 0.02, "loss": 0.4139, "step": 13264 }, { "epoch": 0.31211764705882356, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0088, "step": 13265 }, { "epoch": 0.31214117647058826, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9926, "step": 13266 }, { "epoch": 0.31216470588235296, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1606, "step": 13267 }, { "epoch": 0.31218823529411766, "grad_norm": 0.2490234375, "learning_rate": 0.02, "loss": 0.5015, "step": 13268 }, { "epoch": 0.31221176470588236, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.178, "step": 13269 }, { "epoch": 0.31223529411764706, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9525, "step": 13270 }, { "epoch": 0.31225882352941176, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2405, "step": 13271 }, { "epoch": 0.31228235294117646, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0183, "step": 13272 }, { "epoch": 0.31230588235294116, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.6313, "step": 13273 }, { "epoch": 0.31232941176470586, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.4587, "step": 13274 }, { "epoch": 0.3123529411764706, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1541, "step": 13275 }, { "epoch": 0.3123764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0848, "step": 13276 }, { "epoch": 0.3124, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1828, "step": 13277 }, { "epoch": 0.3124235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0369, "step": 13278 }, { "epoch": 0.3124470588235294, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.72, "step": 13279 }, { "epoch": 0.3124705882352941, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0112, "step": 13280 }, { "epoch": 0.3124941176470588, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2145, "step": 13281 }, { "epoch": 0.3125176470588235, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.7677, "step": 13282 }, { "epoch": 0.3125411764705882, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0474, "step": 13283 }, { "epoch": 0.31256470588235297, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0165, "step": 13284 }, { "epoch": 0.31258823529411767, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1405, "step": 13285 }, { "epoch": 0.31261176470588237, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0203, "step": 13286 }, { "epoch": 0.31263529411764707, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9334, "step": 13287 }, { "epoch": 0.31265882352941177, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1265, "step": 13288 }, { "epoch": 0.31268235294117647, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9606, "step": 13289 }, { "epoch": 0.31270588235294117, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0683, "step": 13290 }, { "epoch": 0.31272941176470587, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1086, "step": 13291 }, { "epoch": 0.31275294117647057, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0097, "step": 13292 }, { "epoch": 0.31277647058823527, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.8621, "step": 13293 }, { "epoch": 0.3128, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9618, "step": 13294 }, { "epoch": 0.3128235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8502, "step": 13295 }, { "epoch": 0.3128470588235294, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8487, "step": 13296 }, { "epoch": 0.3128705882352941, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0404, "step": 13297 }, { "epoch": 0.3128941176470588, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1697, "step": 13298 }, { "epoch": 0.3129176470588235, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2618, "step": 13299 }, { "epoch": 0.3129411764705882, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0511, "step": 13300 }, { "epoch": 0.3129647058823529, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0346, "step": 13301 }, { "epoch": 0.3129882352941176, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9136, "step": 13302 }, { "epoch": 0.3130117647058824, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2505, "step": 13303 }, { "epoch": 0.3130352941176471, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.004, "step": 13304 }, { "epoch": 0.3130588235294118, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.5187, "step": 13305 }, { "epoch": 0.3130823529411765, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.8123, "step": 13306 }, { "epoch": 0.3131058823529412, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0164, "step": 13307 }, { "epoch": 0.3131294117647059, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9642, "step": 13308 }, { "epoch": 0.3131529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0939, "step": 13309 }, { "epoch": 0.3131764705882353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0636, "step": 13310 }, { "epoch": 0.3132, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0137, "step": 13311 }, { "epoch": 0.3132235294117647, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1926, "step": 13312 }, { "epoch": 0.31324705882352943, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1332, "step": 13313 }, { "epoch": 0.31327058823529413, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.3047, "step": 13314 }, { "epoch": 0.31329411764705883, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.08, "step": 13315 }, { "epoch": 0.31331764705882353, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9147, "step": 13316 }, { "epoch": 0.31334117647058823, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0917, "step": 13317 }, { "epoch": 0.31336470588235293, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2479, "step": 13318 }, { "epoch": 0.31338823529411763, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1692, "step": 13319 }, { "epoch": 0.31341176470588233, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.807, "step": 13320 }, { "epoch": 0.31343529411764703, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9576, "step": 13321 }, { "epoch": 0.3134588235294118, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8545, "step": 13322 }, { "epoch": 0.3134823529411765, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0656, "step": 13323 }, { "epoch": 0.3135058823529412, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2396, "step": 13324 }, { "epoch": 0.3135294117647059, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1082, "step": 13325 }, { "epoch": 0.3135529411764706, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.1818, "step": 13326 }, { "epoch": 0.3135764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9697, "step": 13327 }, { "epoch": 0.3136, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1736, "step": 13328 }, { "epoch": 0.3136235294117647, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.4204, "step": 13329 }, { "epoch": 0.3136470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1757, "step": 13330 }, { "epoch": 0.3136705882352941, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1512, "step": 13331 }, { "epoch": 0.31369411764705885, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.203, "step": 13332 }, { "epoch": 0.31371764705882355, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1877, "step": 13333 }, { "epoch": 0.31374117647058825, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.7569, "step": 13334 }, { "epoch": 0.31376470588235295, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8801, "step": 13335 }, { "epoch": 0.31378823529411765, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2919, "step": 13336 }, { "epoch": 0.31381176470588235, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9754, "step": 13337 }, { "epoch": 0.31383529411764705, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0594, "step": 13338 }, { "epoch": 0.31385882352941175, "grad_norm": 0.251953125, "learning_rate": 0.02, "loss": 1.0398, "step": 13339 }, { "epoch": 0.31388235294117645, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0016, "step": 13340 }, { "epoch": 0.3139058823529412, "grad_norm": 0.2421875, "learning_rate": 0.02, "loss": 0.2903, "step": 13341 }, { "epoch": 0.3139294117647059, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0347, "step": 13342 }, { "epoch": 0.3139529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.207, "step": 13343 }, { "epoch": 0.3139764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1333, "step": 13344 }, { "epoch": 0.314, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9613, "step": 13345 }, { "epoch": 0.3140235294117647, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0073, "step": 13346 }, { "epoch": 0.3140470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0509, "step": 13347 }, { "epoch": 0.3140705882352941, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0485, "step": 13348 }, { "epoch": 0.3140941176470588, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0016, "step": 13349 }, { "epoch": 0.31411764705882356, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.6472, "step": 13350 }, { "epoch": 0.31414117647058826, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1033, "step": 13351 }, { "epoch": 0.31416470588235296, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.6671, "step": 13352 }, { "epoch": 0.31418823529411766, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.7475, "step": 13353 }, { "epoch": 0.31421176470588236, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9157, "step": 13354 }, { "epoch": 0.31423529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9717, "step": 13355 }, { "epoch": 0.31425882352941176, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0209, "step": 13356 }, { "epoch": 0.31428235294117646, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.047, "step": 13357 }, { "epoch": 0.31430588235294116, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0838, "step": 13358 }, { "epoch": 0.31432941176470586, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0693, "step": 13359 }, { "epoch": 0.3143529411764706, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1275, "step": 13360 }, { "epoch": 0.3143764705882353, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.6648, "step": 13361 }, { "epoch": 0.3144, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0967, "step": 13362 }, { "epoch": 0.3144235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8835, "step": 13363 }, { "epoch": 0.3144470588235294, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.7232, "step": 13364 }, { "epoch": 0.3144705882352941, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8561, "step": 13365 }, { "epoch": 0.3144941176470588, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0754, "step": 13366 }, { "epoch": 0.3145176470588235, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0594, "step": 13367 }, { "epoch": 0.3145411764705882, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9672, "step": 13368 }, { "epoch": 0.31456470588235297, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9772, "step": 13369 }, { "epoch": 0.31458823529411767, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9901, "step": 13370 }, { "epoch": 0.31461176470588237, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.0819, "step": 13371 }, { "epoch": 0.31463529411764707, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.038, "step": 13372 }, { "epoch": 0.31465882352941177, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0351, "step": 13373 }, { "epoch": 0.31468235294117647, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2095, "step": 13374 }, { "epoch": 0.31470588235294117, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1744, "step": 13375 }, { "epoch": 0.31472941176470587, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.202, "step": 13376 }, { "epoch": 0.31475294117647057, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9923, "step": 13377 }, { "epoch": 0.31477647058823527, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0641, "step": 13378 }, { "epoch": 0.3148, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.8372, "step": 13379 }, { "epoch": 0.3148235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0452, "step": 13380 }, { "epoch": 0.3148470588235294, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.2681, "step": 13381 }, { "epoch": 0.3148705882352941, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1782, "step": 13382 }, { "epoch": 0.3148941176470588, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1974, "step": 13383 }, { "epoch": 0.3149176470588235, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1177, "step": 13384 }, { "epoch": 0.3149411764705882, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.9994, "step": 13385 }, { "epoch": 0.3149647058823529, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2316, "step": 13386 }, { "epoch": 0.3149882352941176, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9118, "step": 13387 }, { "epoch": 0.3150117647058824, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.4985, "step": 13388 }, { "epoch": 0.3150352941176471, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1489, "step": 13389 }, { "epoch": 0.3150588235294118, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0003, "step": 13390 }, { "epoch": 0.3150823529411765, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.2243, "step": 13391 }, { "epoch": 0.3151058823529412, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9605, "step": 13392 }, { "epoch": 0.3151294117647059, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9996, "step": 13393 }, { "epoch": 0.3151529411764706, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9638, "step": 13394 }, { "epoch": 0.3151764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 1.0144, "step": 13395 }, { "epoch": 0.3152, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.3949, "step": 13396 }, { "epoch": 0.3152235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9661, "step": 13397 }, { "epoch": 0.31524705882352944, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9076, "step": 13398 }, { "epoch": 0.31527058823529414, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2666, "step": 13399 }, { "epoch": 0.31529411764705884, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0561, "step": 13400 }, { "epoch": 0.31531764705882354, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.116, "step": 13401 }, { "epoch": 0.31534117647058824, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1664, "step": 13402 }, { "epoch": 0.31536470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8376, "step": 13403 }, { "epoch": 0.31538823529411764, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.454, "step": 13404 }, { "epoch": 0.31541176470588234, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.069, "step": 13405 }, { "epoch": 0.31543529411764704, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0263, "step": 13406 }, { "epoch": 0.3154588235294118, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.322, "step": 13407 }, { "epoch": 0.3154823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9602, "step": 13408 }, { "epoch": 0.3155058823529412, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9383, "step": 13409 }, { "epoch": 0.3155294117647059, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2292, "step": 13410 }, { "epoch": 0.3155529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.7494, "step": 13411 }, { "epoch": 0.3155764705882353, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.6918, "step": 13412 }, { "epoch": 0.3156, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8421, "step": 13413 }, { "epoch": 0.3156235294117647, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3438, "step": 13414 }, { "epoch": 0.3156470588235294, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0485, "step": 13415 }, { "epoch": 0.3156705882352941, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1717, "step": 13416 }, { "epoch": 0.31569411764705885, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8763, "step": 13417 }, { "epoch": 0.31571764705882355, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.7509, "step": 13418 }, { "epoch": 0.31574117647058825, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1, "step": 13419 }, { "epoch": 0.31576470588235295, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9188, "step": 13420 }, { "epoch": 0.31578823529411765, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1426, "step": 13421 }, { "epoch": 0.31581176470588235, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.531, "step": 13422 }, { "epoch": 0.31583529411764705, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.2642, "step": 13423 }, { "epoch": 0.31585882352941175, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0348, "step": 13424 }, { "epoch": 0.31588235294117645, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1673, "step": 13425 }, { "epoch": 0.3159058823529412, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8642, "step": 13426 }, { "epoch": 0.3159294117647059, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1959, "step": 13427 }, { "epoch": 0.3159529411764706, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.6548, "step": 13428 }, { "epoch": 0.3159764705882353, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8746, "step": 13429 }, { "epoch": 0.316, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8468, "step": 13430 }, { "epoch": 0.3160235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.7963, "step": 13431 }, { "epoch": 0.3160470588235294, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0554, "step": 13432 }, { "epoch": 0.3160705882352941, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2766, "step": 13433 }, { "epoch": 0.3160941176470588, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1355, "step": 13434 }, { "epoch": 0.3161176470588235, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.6717, "step": 13435 }, { "epoch": 0.31614117647058826, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8483, "step": 13436 }, { "epoch": 0.31616470588235296, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9434, "step": 13437 }, { "epoch": 0.31618823529411766, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9548, "step": 13438 }, { "epoch": 0.31621176470588236, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9823, "step": 13439 }, { "epoch": 0.31623529411764706, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.6727, "step": 13440 }, { "epoch": 0.31625882352941176, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.7532, "step": 13441 }, { "epoch": 0.31628235294117646, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9895, "step": 13442 }, { "epoch": 0.31630588235294116, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0838, "step": 13443 }, { "epoch": 0.31632941176470586, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8871, "step": 13444 }, { "epoch": 0.3163529411764706, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0216, "step": 13445 }, { "epoch": 0.3163764705882353, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0663, "step": 13446 }, { "epoch": 0.3164, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2339, "step": 13447 }, { "epoch": 0.3164235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0029, "step": 13448 }, { "epoch": 0.3164470588235294, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1646, "step": 13449 }, { "epoch": 0.3164705882352941, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1623, "step": 13450 }, { "epoch": 0.3164941176470588, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1697, "step": 13451 }, { "epoch": 0.3165176470588235, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8962, "step": 13452 }, { "epoch": 0.3165411764705882, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1167, "step": 13453 }, { "epoch": 0.3165647058823529, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1164, "step": 13454 }, { "epoch": 0.31658823529411767, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.6782, "step": 13455 }, { "epoch": 0.31661176470588237, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.7759, "step": 13456 }, { "epoch": 0.31663529411764707, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0538, "step": 13457 }, { "epoch": 0.31665882352941177, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9676, "step": 13458 }, { "epoch": 0.31668235294117647, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2673, "step": 13459 }, { "epoch": 0.31670588235294117, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0915, "step": 13460 }, { "epoch": 0.31672941176470587, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.0754, "step": 13461 }, { "epoch": 0.31675294117647057, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0366, "step": 13462 }, { "epoch": 0.31677647058823527, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9711, "step": 13463 }, { "epoch": 0.3168, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1632, "step": 13464 }, { "epoch": 0.3168235294117647, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.3409, "step": 13465 }, { "epoch": 0.3168470588235294, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.7656, "step": 13466 }, { "epoch": 0.3168705882352941, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9807, "step": 13467 }, { "epoch": 0.3168941176470588, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9099, "step": 13468 }, { "epoch": 0.3169176470588235, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1941, "step": 13469 }, { "epoch": 0.3169411764705882, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8477, "step": 13470 }, { "epoch": 0.3169647058823529, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2741, "step": 13471 }, { "epoch": 0.3169882352941176, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.8261, "step": 13472 }, { "epoch": 0.3170117647058823, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1443, "step": 13473 }, { "epoch": 0.3170352941176471, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.046, "step": 13474 }, { "epoch": 0.3170588235294118, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.7519, "step": 13475 }, { "epoch": 0.3170823529411765, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9106, "step": 13476 }, { "epoch": 0.3171058823529412, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8868, "step": 13477 }, { "epoch": 0.3171294117647059, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.934, "step": 13478 }, { "epoch": 0.3171529411764706, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0813, "step": 13479 }, { "epoch": 0.3171764705882353, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2549, "step": 13480 }, { "epoch": 0.3172, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1326, "step": 13481 }, { "epoch": 0.3172235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.3126, "step": 13482 }, { "epoch": 0.31724705882352944, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0605, "step": 13483 }, { "epoch": 0.31727058823529414, "grad_norm": 4.9375, "learning_rate": 0.02, "loss": 1.1221, "step": 13484 }, { "epoch": 0.31729411764705884, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.4508, "step": 13485 }, { "epoch": 0.31731764705882354, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9209, "step": 13486 }, { "epoch": 0.31734117647058824, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0151, "step": 13487 }, { "epoch": 0.31736470588235294, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2049, "step": 13488 }, { "epoch": 0.31738823529411764, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9426, "step": 13489 }, { "epoch": 0.31741176470588234, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0663, "step": 13490 }, { "epoch": 0.31743529411764704, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0796, "step": 13491 }, { "epoch": 0.31745882352941174, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2293, "step": 13492 }, { "epoch": 0.3174823529411765, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2003, "step": 13493 }, { "epoch": 0.3175058823529412, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1475, "step": 13494 }, { "epoch": 0.3175294117647059, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.008, "step": 13495 }, { "epoch": 0.3175529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.7832, "step": 13496 }, { "epoch": 0.3175764705882353, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.6665, "step": 13497 }, { "epoch": 0.3176, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.93, "step": 13498 }, { "epoch": 0.3176235294117647, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.3379, "step": 13499 }, { "epoch": 0.3176470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0335, "step": 13500 }, { "epoch": 0.3176705882352941, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8366, "step": 13501 }, { "epoch": 0.31769411764705885, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2458, "step": 13502 }, { "epoch": 0.31771764705882355, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9567, "step": 13503 }, { "epoch": 0.31774117647058825, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1251, "step": 13504 }, { "epoch": 0.31776470588235295, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.013, "step": 13505 }, { "epoch": 0.31778823529411765, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0504, "step": 13506 }, { "epoch": 0.31781176470588235, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0309, "step": 13507 }, { "epoch": 0.31783529411764705, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9618, "step": 13508 }, { "epoch": 0.31785882352941175, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.649, "step": 13509 }, { "epoch": 0.31788235294117645, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0249, "step": 13510 }, { "epoch": 0.31790588235294115, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0308, "step": 13511 }, { "epoch": 0.3179294117647059, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2121, "step": 13512 }, { "epoch": 0.3179529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0001, "step": 13513 }, { "epoch": 0.3179764705882353, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.3885, "step": 13514 }, { "epoch": 0.318, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2863, "step": 13515 }, { "epoch": 0.3180235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0389, "step": 13516 }, { "epoch": 0.3180470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.8869, "step": 13517 }, { "epoch": 0.3180705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.129, "step": 13518 }, { "epoch": 0.3180941176470588, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.233, "step": 13519 }, { "epoch": 0.3181176470588235, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9534, "step": 13520 }, { "epoch": 0.31814117647058826, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1412, "step": 13521 }, { "epoch": 0.31816470588235296, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.762, "step": 13522 }, { "epoch": 0.31818823529411766, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3369, "step": 13523 }, { "epoch": 0.31821176470588236, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1807, "step": 13524 }, { "epoch": 0.31823529411764706, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8719, "step": 13525 }, { "epoch": 0.31825882352941176, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9958, "step": 13526 }, { "epoch": 0.31828235294117646, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.994, "step": 13527 }, { "epoch": 0.31830588235294116, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.8732, "step": 13528 }, { "epoch": 0.31832941176470586, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.3488, "step": 13529 }, { "epoch": 0.31835294117647056, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.4585, "step": 13530 }, { "epoch": 0.3183764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.163, "step": 13531 }, { "epoch": 0.3184, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.4478, "step": 13532 }, { "epoch": 0.3184235294117647, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0832, "step": 13533 }, { "epoch": 0.3184470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0691, "step": 13534 }, { "epoch": 0.3184705882352941, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.7402, "step": 13535 }, { "epoch": 0.3184941176470588, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.3144, "step": 13536 }, { "epoch": 0.3185176470588235, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.8331, "step": 13537 }, { "epoch": 0.3185411764705882, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9518, "step": 13538 }, { "epoch": 0.3185647058823529, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9867, "step": 13539 }, { "epoch": 0.31858823529411767, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.8405, "step": 13540 }, { "epoch": 0.31861176470588237, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 1.0774, "step": 13541 }, { "epoch": 0.31863529411764707, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.8371, "step": 13542 }, { "epoch": 0.31865882352941177, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8476, "step": 13543 }, { "epoch": 0.31868235294117647, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1699, "step": 13544 }, { "epoch": 0.31870588235294117, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0118, "step": 13545 }, { "epoch": 0.31872941176470587, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.2835, "step": 13546 }, { "epoch": 0.31875294117647057, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8984, "step": 13547 }, { "epoch": 0.3187764705882353, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1854, "step": 13548 }, { "epoch": 0.3188, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0907, "step": 13549 }, { "epoch": 0.31882352941176473, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1878, "step": 13550 }, { "epoch": 0.31884705882352943, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.9062, "step": 13551 }, { "epoch": 0.31887058823529413, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2817, "step": 13552 }, { "epoch": 0.31889411764705883, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9942, "step": 13553 }, { "epoch": 0.31891764705882353, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8897, "step": 13554 }, { "epoch": 0.31894117647058823, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9876, "step": 13555 }, { "epoch": 0.31896470588235293, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0472, "step": 13556 }, { "epoch": 0.31898823529411763, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.7428, "step": 13557 }, { "epoch": 0.31901176470588233, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9032, "step": 13558 }, { "epoch": 0.3190352941176471, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1118, "step": 13559 }, { "epoch": 0.3190588235294118, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.7985, "step": 13560 }, { "epoch": 0.3190823529411765, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.6941, "step": 13561 }, { "epoch": 0.3191058823529412, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8438, "step": 13562 }, { "epoch": 0.3191294117647059, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8341, "step": 13563 }, { "epoch": 0.3191529411764706, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9913, "step": 13564 }, { "epoch": 0.3191764705882353, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.7387, "step": 13565 }, { "epoch": 0.3192, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.3856, "step": 13566 }, { "epoch": 0.3192235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1867, "step": 13567 }, { "epoch": 0.31924705882352944, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8568, "step": 13568 }, { "epoch": 0.31927058823529414, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0758, "step": 13569 }, { "epoch": 0.31929411764705884, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8485, "step": 13570 }, { "epoch": 0.31931764705882354, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.137, "step": 13571 }, { "epoch": 0.31934117647058824, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0005, "step": 13572 }, { "epoch": 0.31936470588235294, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.7491, "step": 13573 }, { "epoch": 0.31938823529411764, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2516, "step": 13574 }, { "epoch": 0.31941176470588234, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8478, "step": 13575 }, { "epoch": 0.31943529411764704, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2181, "step": 13576 }, { "epoch": 0.31945882352941174, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.7902, "step": 13577 }, { "epoch": 0.3194823529411765, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0845, "step": 13578 }, { "epoch": 0.3195058823529412, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2464, "step": 13579 }, { "epoch": 0.3195294117647059, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.7894, "step": 13580 }, { "epoch": 0.3195529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0104, "step": 13581 }, { "epoch": 0.3195764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.2164, "step": 13582 }, { "epoch": 0.3196, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.7547, "step": 13583 }, { "epoch": 0.3196235294117647, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.7093, "step": 13584 }, { "epoch": 0.3196470588235294, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8706, "step": 13585 }, { "epoch": 0.3196705882352941, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1532, "step": 13586 }, { "epoch": 0.31969411764705885, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.3812, "step": 13587 }, { "epoch": 0.31971764705882355, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9059, "step": 13588 }, { "epoch": 0.31974117647058825, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2763, "step": 13589 }, { "epoch": 0.31976470588235295, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.7772, "step": 13590 }, { "epoch": 0.31978823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0088, "step": 13591 }, { "epoch": 0.31981176470588235, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.072, "step": 13592 }, { "epoch": 0.31983529411764705, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9559, "step": 13593 }, { "epoch": 0.31985882352941175, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0141, "step": 13594 }, { "epoch": 0.31988235294117645, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9213, "step": 13595 }, { "epoch": 0.31990588235294115, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.081, "step": 13596 }, { "epoch": 0.3199294117647059, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.7794, "step": 13597 }, { "epoch": 0.3199529411764706, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0003, "step": 13598 }, { "epoch": 0.3199764705882353, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.205, "step": 13599 }, { "epoch": 0.32, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1681, "step": 13600 }, { "epoch": 0.3200235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2264, "step": 13601 }, { "epoch": 0.3200470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.214, "step": 13602 }, { "epoch": 0.3200705882352941, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0568, "step": 13603 }, { "epoch": 0.3200941176470588, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0655, "step": 13604 }, { "epoch": 0.3201176470588235, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8505, "step": 13605 }, { "epoch": 0.32014117647058826, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9703, "step": 13606 }, { "epoch": 0.32016470588235296, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.7744, "step": 13607 }, { "epoch": 0.32018823529411766, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.7033, "step": 13608 }, { "epoch": 0.32021176470588236, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.891, "step": 13609 }, { "epoch": 0.32023529411764706, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.2405, "step": 13610 }, { "epoch": 0.32025882352941176, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.3586, "step": 13611 }, { "epoch": 0.32028235294117646, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.6609, "step": 13612 }, { "epoch": 0.32030588235294116, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 0.9717, "step": 13613 }, { "epoch": 0.32032941176470586, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9038, "step": 13614 }, { "epoch": 0.32035294117647056, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1344, "step": 13615 }, { "epoch": 0.3203764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0006, "step": 13616 }, { "epoch": 0.3204, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.7438, "step": 13617 }, { "epoch": 0.3204235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.043, "step": 13618 }, { "epoch": 0.3204470588235294, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.24, "step": 13619 }, { "epoch": 0.3204705882352941, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.6767, "step": 13620 }, { "epoch": 0.3204941176470588, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1088, "step": 13621 }, { "epoch": 0.3205176470588235, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9959, "step": 13622 }, { "epoch": 0.3205411764705882, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8794, "step": 13623 }, { "epoch": 0.3205647058823529, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9387, "step": 13624 }, { "epoch": 0.3205882352941177, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1727, "step": 13625 }, { "epoch": 0.3206117647058824, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.758, "step": 13626 }, { "epoch": 0.3206352941176471, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1309, "step": 13627 }, { "epoch": 0.3206588235294118, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2539, "step": 13628 }, { "epoch": 0.3206823529411765, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9785, "step": 13629 }, { "epoch": 0.3207058823529412, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.09, "step": 13630 }, { "epoch": 0.3207294117647059, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9605, "step": 13631 }, { "epoch": 0.3207529411764706, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0488, "step": 13632 }, { "epoch": 0.3207764705882353, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1945, "step": 13633 }, { "epoch": 0.3208, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.2164, "step": 13634 }, { "epoch": 0.32082352941176473, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1882, "step": 13635 }, { "epoch": 0.32084705882352943, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.3934, "step": 13636 }, { "epoch": 0.32087058823529413, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.2357, "step": 13637 }, { "epoch": 0.32089411764705883, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.7631, "step": 13638 }, { "epoch": 0.32091764705882353, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1826, "step": 13639 }, { "epoch": 0.32094117647058823, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0361, "step": 13640 }, { "epoch": 0.32096470588235293, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9105, "step": 13641 }, { "epoch": 0.32098823529411763, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9653, "step": 13642 }, { "epoch": 0.32101176470588233, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1732, "step": 13643 }, { "epoch": 0.3210352941176471, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1026, "step": 13644 }, { "epoch": 0.3210588235294118, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9077, "step": 13645 }, { "epoch": 0.3210823529411765, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.7371, "step": 13646 }, { "epoch": 0.3211058823529412, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.237, "step": 13647 }, { "epoch": 0.3211294117647059, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0575, "step": 13648 }, { "epoch": 0.3211529411764706, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.8348, "step": 13649 }, { "epoch": 0.3211764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.8924, "step": 13650 }, { "epoch": 0.3212, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.6874, "step": 13651 }, { "epoch": 0.3212235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0873, "step": 13652 }, { "epoch": 0.3212470588235294, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1721, "step": 13653 }, { "epoch": 0.32127058823529414, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.8552, "step": 13654 }, { "epoch": 0.32129411764705884, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3781, "step": 13655 }, { "epoch": 0.32131764705882354, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.3569, "step": 13656 }, { "epoch": 0.32134117647058824, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1034, "step": 13657 }, { "epoch": 0.32136470588235294, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9429, "step": 13658 }, { "epoch": 0.32138823529411764, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.5361, "step": 13659 }, { "epoch": 0.32141176470588234, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.1822, "step": 13660 }, { "epoch": 0.32143529411764704, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0812, "step": 13661 }, { "epoch": 0.32145882352941174, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.878, "step": 13662 }, { "epoch": 0.3214823529411765, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.6903, "step": 13663 }, { "epoch": 0.3215058823529412, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9942, "step": 13664 }, { "epoch": 0.3215294117647059, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2752, "step": 13665 }, { "epoch": 0.3215529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.108, "step": 13666 }, { "epoch": 0.3215764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9112, "step": 13667 }, { "epoch": 0.3216, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.7464, "step": 13668 }, { "epoch": 0.3216235294117647, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8373, "step": 13669 }, { "epoch": 0.3216470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0914, "step": 13670 }, { "epoch": 0.3216705882352941, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1572, "step": 13671 }, { "epoch": 0.3216941176470588, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0296, "step": 13672 }, { "epoch": 0.32171764705882355, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0607, "step": 13673 }, { "epoch": 0.32174117647058825, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8652, "step": 13674 }, { "epoch": 0.32176470588235295, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9526, "step": 13675 }, { "epoch": 0.32178823529411765, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.97, "step": 13676 }, { "epoch": 0.32181176470588235, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0496, "step": 13677 }, { "epoch": 0.32183529411764705, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.7572, "step": 13678 }, { "epoch": 0.32185882352941175, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2396, "step": 13679 }, { "epoch": 0.32188235294117645, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9263, "step": 13680 }, { "epoch": 0.32190588235294115, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9945, "step": 13681 }, { "epoch": 0.3219294117647059, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.59, "step": 13682 }, { "epoch": 0.3219529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.8792, "step": 13683 }, { "epoch": 0.3219764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.5969, "step": 13684 }, { "epoch": 0.322, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9267, "step": 13685 }, { "epoch": 0.3220235294117647, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.6274, "step": 13686 }, { "epoch": 0.3220470588235294, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 0.9587, "step": 13687 }, { "epoch": 0.3220705882352941, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.2867, "step": 13688 }, { "epoch": 0.3220941176470588, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.4354, "step": 13689 }, { "epoch": 0.3221176470588235, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9362, "step": 13690 }, { "epoch": 0.3221411764705882, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0503, "step": 13691 }, { "epoch": 0.32216470588235296, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1795, "step": 13692 }, { "epoch": 0.32218823529411766, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0459, "step": 13693 }, { "epoch": 0.32221176470588236, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2439, "step": 13694 }, { "epoch": 0.32223529411764706, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2051, "step": 13695 }, { "epoch": 0.32225882352941176, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8556, "step": 13696 }, { "epoch": 0.32228235294117646, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1227, "step": 13697 }, { "epoch": 0.32230588235294116, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1394, "step": 13698 }, { "epoch": 0.32232941176470586, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8643, "step": 13699 }, { "epoch": 0.32235294117647056, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1059, "step": 13700 }, { "epoch": 0.3223764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9923, "step": 13701 }, { "epoch": 0.3224, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8661, "step": 13702 }, { "epoch": 0.3224235294117647, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8367, "step": 13703 }, { "epoch": 0.3224470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0639, "step": 13704 }, { "epoch": 0.3224705882352941, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.8573, "step": 13705 }, { "epoch": 0.3224941176470588, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9094, "step": 13706 }, { "epoch": 0.3225176470588235, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.6792, "step": 13707 }, { "epoch": 0.3225411764705882, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.051, "step": 13708 }, { "epoch": 0.3225647058823529, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0788, "step": 13709 }, { "epoch": 0.3225882352941176, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9953, "step": 13710 }, { "epoch": 0.3226117647058824, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.9529, "step": 13711 }, { "epoch": 0.3226352941176471, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3267, "step": 13712 }, { "epoch": 0.3226588235294118, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.8007, "step": 13713 }, { "epoch": 0.3226823529411765, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0162, "step": 13714 }, { "epoch": 0.3227058823529412, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.096, "step": 13715 }, { "epoch": 0.3227294117647059, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9084, "step": 13716 }, { "epoch": 0.3227529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.1571, "step": 13717 }, { "epoch": 0.3227764705882353, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.8785, "step": 13718 }, { "epoch": 0.3228, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.6539, "step": 13719 }, { "epoch": 0.32282352941176473, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0655, "step": 13720 }, { "epoch": 0.32284705882352943, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1916, "step": 13721 }, { "epoch": 0.32287058823529413, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9535, "step": 13722 }, { "epoch": 0.32289411764705883, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1999, "step": 13723 }, { "epoch": 0.32291764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8585, "step": 13724 }, { "epoch": 0.32294117647058823, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.9389, "step": 13725 }, { "epoch": 0.32296470588235293, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.7364, "step": 13726 }, { "epoch": 0.32298823529411763, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1822, "step": 13727 }, { "epoch": 0.32301176470588233, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0167, "step": 13728 }, { "epoch": 0.32303529411764703, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9884, "step": 13729 }, { "epoch": 0.3230588235294118, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0332, "step": 13730 }, { "epoch": 0.3230823529411765, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.9703, "step": 13731 }, { "epoch": 0.3231058823529412, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0208, "step": 13732 }, { "epoch": 0.3231294117647059, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1844, "step": 13733 }, { "epoch": 0.3231529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.7648, "step": 13734 }, { "epoch": 0.3231764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.7906, "step": 13735 }, { "epoch": 0.3232, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1052, "step": 13736 }, { "epoch": 0.3232235294117647, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1525, "step": 13737 }, { "epoch": 0.3232470588235294, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2578, "step": 13738 }, { "epoch": 0.32327058823529414, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1667, "step": 13739 }, { "epoch": 0.32329411764705884, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9869, "step": 13740 }, { "epoch": 0.32331764705882354, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.7588, "step": 13741 }, { "epoch": 0.32334117647058824, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8626, "step": 13742 }, { "epoch": 0.32336470588235294, "grad_norm": 0.236328125, "learning_rate": 0.02, "loss": 0.5378, "step": 13743 }, { "epoch": 0.32338823529411764, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1753, "step": 13744 }, { "epoch": 0.32341176470588234, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0452, "step": 13745 }, { "epoch": 0.32343529411764704, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.7923, "step": 13746 }, { "epoch": 0.32345882352941174, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9558, "step": 13747 }, { "epoch": 0.32348235294117644, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8322, "step": 13748 }, { "epoch": 0.3235058823529412, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1624, "step": 13749 }, { "epoch": 0.3235294117647059, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9646, "step": 13750 }, { "epoch": 0.3235529411764706, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0805, "step": 13751 }, { "epoch": 0.3235764705882353, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.3494, "step": 13752 }, { "epoch": 0.3236, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0148, "step": 13753 }, { "epoch": 0.3236235294117647, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.7484, "step": 13754 }, { "epoch": 0.3236470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0018, "step": 13755 }, { "epoch": 0.3236705882352941, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9828, "step": 13756 }, { "epoch": 0.3236941176470588, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9785, "step": 13757 }, { "epoch": 0.32371764705882355, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.8206, "step": 13758 }, { "epoch": 0.32374117647058825, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.847, "step": 13759 }, { "epoch": 0.32376470588235295, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.5032, "step": 13760 }, { "epoch": 0.32378823529411765, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1757, "step": 13761 }, { "epoch": 0.32381176470588235, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0853, "step": 13762 }, { "epoch": 0.32383529411764705, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9684, "step": 13763 }, { "epoch": 0.32385882352941175, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8859, "step": 13764 }, { "epoch": 0.32388235294117645, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9732, "step": 13765 }, { "epoch": 0.32390588235294115, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9974, "step": 13766 }, { "epoch": 0.32392941176470585, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0821, "step": 13767 }, { "epoch": 0.3239529411764706, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9551, "step": 13768 }, { "epoch": 0.3239764705882353, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1453, "step": 13769 }, { "epoch": 0.324, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2377, "step": 13770 }, { "epoch": 0.3240235294117647, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.328, "step": 13771 }, { "epoch": 0.3240470588235294, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.93, "step": 13772 }, { "epoch": 0.3240705882352941, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8425, "step": 13773 }, { "epoch": 0.3240941176470588, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0109, "step": 13774 }, { "epoch": 0.3241176470588235, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.9295, "step": 13775 }, { "epoch": 0.3241411764705882, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.6719, "step": 13776 }, { "epoch": 0.32416470588235297, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.1707, "step": 13777 }, { "epoch": 0.32418823529411767, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.842, "step": 13778 }, { "epoch": 0.32421176470588237, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8026, "step": 13779 }, { "epoch": 0.32423529411764707, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9647, "step": 13780 }, { "epoch": 0.32425882352941177, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9513, "step": 13781 }, { "epoch": 0.32428235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0915, "step": 13782 }, { "epoch": 0.32430588235294117, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1566, "step": 13783 }, { "epoch": 0.32432941176470587, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9996, "step": 13784 }, { "epoch": 0.32435294117647057, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9625, "step": 13785 }, { "epoch": 0.3243764705882353, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.4839, "step": 13786 }, { "epoch": 0.3244, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0329, "step": 13787 }, { "epoch": 0.3244235294117647, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0927, "step": 13788 }, { "epoch": 0.3244470588235294, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0482, "step": 13789 }, { "epoch": 0.3244705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.8389, "step": 13790 }, { "epoch": 0.3244941176470588, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.934, "step": 13791 }, { "epoch": 0.3245176470588235, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.2108, "step": 13792 }, { "epoch": 0.3245411764705882, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2087, "step": 13793 }, { "epoch": 0.3245647058823529, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8985, "step": 13794 }, { "epoch": 0.3245882352941176, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1602, "step": 13795 }, { "epoch": 0.3246117647058824, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.618, "step": 13796 }, { "epoch": 0.3246352941176471, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0235, "step": 13797 }, { "epoch": 0.3246588235294118, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.6162, "step": 13798 }, { "epoch": 0.3246823529411765, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8619, "step": 13799 }, { "epoch": 0.3247058823529412, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1926, "step": 13800 }, { "epoch": 0.3247294117647059, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0641, "step": 13801 }, { "epoch": 0.3247529411764706, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0014, "step": 13802 }, { "epoch": 0.3247764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0265, "step": 13803 }, { "epoch": 0.3248, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1396, "step": 13804 }, { "epoch": 0.32482352941176473, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.2397, "step": 13805 }, { "epoch": 0.32484705882352943, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8879, "step": 13806 }, { "epoch": 0.32487058823529413, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8126, "step": 13807 }, { "epoch": 0.32489411764705883, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0333, "step": 13808 }, { "epoch": 0.32491764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8182, "step": 13809 }, { "epoch": 0.32494117647058823, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2056, "step": 13810 }, { "epoch": 0.32496470588235293, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1433, "step": 13811 }, { "epoch": 0.32498823529411763, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0066, "step": 13812 }, { "epoch": 0.32501176470588233, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8325, "step": 13813 }, { "epoch": 0.32503529411764703, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.2743, "step": 13814 }, { "epoch": 0.3250588235294118, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0427, "step": 13815 }, { "epoch": 0.3250823529411765, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0267, "step": 13816 }, { "epoch": 0.3251058823529412, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0535, "step": 13817 }, { "epoch": 0.3251294117647059, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9386, "step": 13818 }, { "epoch": 0.3251529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0009, "step": 13819 }, { "epoch": 0.3251764705882353, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.175, "step": 13820 }, { "epoch": 0.3252, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0224, "step": 13821 }, { "epoch": 0.3252235294117647, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0938, "step": 13822 }, { "epoch": 0.3252470588235294, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.7314, "step": 13823 }, { "epoch": 0.32527058823529414, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0129, "step": 13824 }, { "epoch": 0.32529411764705884, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0512, "step": 13825 }, { "epoch": 0.32531764705882354, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9416, "step": 13826 }, { "epoch": 0.32534117647058824, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 0.9714, "step": 13827 }, { "epoch": 0.32536470588235294, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1246, "step": 13828 }, { "epoch": 0.32538823529411764, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0052, "step": 13829 }, { "epoch": 0.32541176470588234, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0103, "step": 13830 }, { "epoch": 0.32543529411764704, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.5828, "step": 13831 }, { "epoch": 0.32545882352941174, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0096, "step": 13832 }, { "epoch": 0.32548235294117644, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.7424, "step": 13833 }, { "epoch": 0.3255058823529412, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1583, "step": 13834 }, { "epoch": 0.3255294117647059, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1228, "step": 13835 }, { "epoch": 0.3255529411764706, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0064, "step": 13836 }, { "epoch": 0.3255764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0165, "step": 13837 }, { "epoch": 0.3256, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8447, "step": 13838 }, { "epoch": 0.3256235294117647, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9802, "step": 13839 }, { "epoch": 0.3256470588235294, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0461, "step": 13840 }, { "epoch": 0.3256705882352941, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0447, "step": 13841 }, { "epoch": 0.3256941176470588, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1328, "step": 13842 }, { "epoch": 0.32571764705882356, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.22, "step": 13843 }, { "epoch": 0.32574117647058826, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0139, "step": 13844 }, { "epoch": 0.32576470588235296, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8776, "step": 13845 }, { "epoch": 0.32578823529411766, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9389, "step": 13846 }, { "epoch": 0.32581176470588236, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1798, "step": 13847 }, { "epoch": 0.32583529411764706, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0315, "step": 13848 }, { "epoch": 0.32585882352941176, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8525, "step": 13849 }, { "epoch": 0.32588235294117646, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.2775, "step": 13850 }, { "epoch": 0.32590588235294116, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9617, "step": 13851 }, { "epoch": 0.32592941176470586, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.8927, "step": 13852 }, { "epoch": 0.3259529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.887, "step": 13853 }, { "epoch": 0.3259764705882353, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.1215, "step": 13854 }, { "epoch": 0.326, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.062, "step": 13855 }, { "epoch": 0.3260235294117647, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1756, "step": 13856 }, { "epoch": 0.3260470588235294, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1742, "step": 13857 }, { "epoch": 0.3260705882352941, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0771, "step": 13858 }, { "epoch": 0.3260941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1532, "step": 13859 }, { "epoch": 0.3261176470588235, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0307, "step": 13860 }, { "epoch": 0.3261411764705882, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0603, "step": 13861 }, { "epoch": 0.32616470588235297, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0656, "step": 13862 }, { "epoch": 0.32618823529411767, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.6908, "step": 13863 }, { "epoch": 0.32621176470588237, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1045, "step": 13864 }, { "epoch": 0.32623529411764707, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9149, "step": 13865 }, { "epoch": 0.32625882352941177, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.577, "step": 13866 }, { "epoch": 0.32628235294117647, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1538, "step": 13867 }, { "epoch": 0.32630588235294117, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0541, "step": 13868 }, { "epoch": 0.32632941176470587, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0142, "step": 13869 }, { "epoch": 0.32635294117647057, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1759, "step": 13870 }, { "epoch": 0.32637647058823527, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.3515, "step": 13871 }, { "epoch": 0.3264, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8772, "step": 13872 }, { "epoch": 0.3264235294117647, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.328, "step": 13873 }, { "epoch": 0.3264470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9275, "step": 13874 }, { "epoch": 0.3264705882352941, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.7853, "step": 13875 }, { "epoch": 0.3264941176470588, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.3796, "step": 13876 }, { "epoch": 0.3265176470588235, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8318, "step": 13877 }, { "epoch": 0.3265411764705882, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2925, "step": 13878 }, { "epoch": 0.3265647058823529, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0989, "step": 13879 }, { "epoch": 0.3265882352941176, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1146, "step": 13880 }, { "epoch": 0.3266117647058824, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0055, "step": 13881 }, { "epoch": 0.3266352941176471, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0631, "step": 13882 }, { "epoch": 0.3266588235294118, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9287, "step": 13883 }, { "epoch": 0.3266823529411765, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.0922, "step": 13884 }, { "epoch": 0.3267058823529412, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1958, "step": 13885 }, { "epoch": 0.3267294117647059, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0648, "step": 13886 }, { "epoch": 0.3267529411764706, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2299, "step": 13887 }, { "epoch": 0.3267764705882353, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0328, "step": 13888 }, { "epoch": 0.3268, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.2643, "step": 13889 }, { "epoch": 0.3268235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.7249, "step": 13890 }, { "epoch": 0.32684705882352943, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0186, "step": 13891 }, { "epoch": 0.32687058823529413, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2126, "step": 13892 }, { "epoch": 0.32689411764705884, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.9009, "step": 13893 }, { "epoch": 0.32691764705882354, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2195, "step": 13894 }, { "epoch": 0.32694117647058824, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9197, "step": 13895 }, { "epoch": 0.32696470588235294, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8834, "step": 13896 }, { "epoch": 0.32698823529411764, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.7511, "step": 13897 }, { "epoch": 0.32701176470588234, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0468, "step": 13898 }, { "epoch": 0.32703529411764704, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1704, "step": 13899 }, { "epoch": 0.3270588235294118, "grad_norm": 0.2265625, "learning_rate": 0.02, "loss": 0.8028, "step": 13900 }, { "epoch": 0.3270823529411765, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.6665, "step": 13901 }, { "epoch": 0.3271058823529412, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0947, "step": 13902 }, { "epoch": 0.3271294117647059, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2838, "step": 13903 }, { "epoch": 0.3271529411764706, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2398, "step": 13904 }, { "epoch": 0.3271764705882353, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.6469, "step": 13905 }, { "epoch": 0.3272, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.7866, "step": 13906 }, { "epoch": 0.3272235294117647, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9907, "step": 13907 }, { "epoch": 0.3272470588235294, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0838, "step": 13908 }, { "epoch": 0.3272705882352941, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0677, "step": 13909 }, { "epoch": 0.32729411764705885, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9107, "step": 13910 }, { "epoch": 0.32731764705882355, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0966, "step": 13911 }, { "epoch": 0.32734117647058825, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0749, "step": 13912 }, { "epoch": 0.32736470588235295, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9583, "step": 13913 }, { "epoch": 0.32738823529411765, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.1723, "step": 13914 }, { "epoch": 0.32741176470588235, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.8269, "step": 13915 }, { "epoch": 0.32743529411764705, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.7339, "step": 13916 }, { "epoch": 0.32745882352941175, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2305, "step": 13917 }, { "epoch": 0.32748235294117645, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0484, "step": 13918 }, { "epoch": 0.3275058823529412, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.979, "step": 13919 }, { "epoch": 0.3275294117647059, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9808, "step": 13920 }, { "epoch": 0.3275529411764706, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.046, "step": 13921 }, { "epoch": 0.3275764705882353, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.7671, "step": 13922 }, { "epoch": 0.3276, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1081, "step": 13923 }, { "epoch": 0.3276235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9858, "step": 13924 }, { "epoch": 0.3276470588235294, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9901, "step": 13925 }, { "epoch": 0.3276705882352941, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8885, "step": 13926 }, { "epoch": 0.3276941176470588, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0858, "step": 13927 }, { "epoch": 0.3277176470588235, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0661, "step": 13928 }, { "epoch": 0.32774117647058826, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.911, "step": 13929 }, { "epoch": 0.32776470588235296, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1558, "step": 13930 }, { "epoch": 0.32778823529411766, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9599, "step": 13931 }, { "epoch": 0.32781176470588236, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.4301, "step": 13932 }, { "epoch": 0.32783529411764706, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9045, "step": 13933 }, { "epoch": 0.32785882352941176, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0901, "step": 13934 }, { "epoch": 0.32788235294117646, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8164, "step": 13935 }, { "epoch": 0.32790588235294116, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9317, "step": 13936 }, { "epoch": 0.32792941176470586, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.3086, "step": 13937 }, { "epoch": 0.3279529411764706, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2405, "step": 13938 }, { "epoch": 0.3279764705882353, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0084, "step": 13939 }, { "epoch": 0.328, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1318, "step": 13940 }, { "epoch": 0.3280235294117647, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.002, "step": 13941 }, { "epoch": 0.3280470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9798, "step": 13942 }, { "epoch": 0.3280705882352941, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.97, "step": 13943 }, { "epoch": 0.3280941176470588, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2798, "step": 13944 }, { "epoch": 0.3281176470588235, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9603, "step": 13945 }, { "epoch": 0.3281411764705882, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8837, "step": 13946 }, { "epoch": 0.3281647058823529, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9841, "step": 13947 }, { "epoch": 0.32818823529411767, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0247, "step": 13948 }, { "epoch": 0.32821176470588237, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.05, "step": 13949 }, { "epoch": 0.32823529411764707, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0307, "step": 13950 }, { "epoch": 0.32825882352941177, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9466, "step": 13951 }, { "epoch": 0.32828235294117647, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.3127, "step": 13952 }, { "epoch": 0.32830588235294117, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.2369, "step": 13953 }, { "epoch": 0.32832941176470587, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9453, "step": 13954 }, { "epoch": 0.32835294117647057, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9139, "step": 13955 }, { "epoch": 0.32837647058823527, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9311, "step": 13956 }, { "epoch": 0.3284, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.823, "step": 13957 }, { "epoch": 0.3284235294117647, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.468, "step": 13958 }, { "epoch": 0.3284470588235294, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9075, "step": 13959 }, { "epoch": 0.3284705882352941, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.7579, "step": 13960 }, { "epoch": 0.3284941176470588, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.2763, "step": 13961 }, { "epoch": 0.3285176470588235, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9833, "step": 13962 }, { "epoch": 0.3285411764705882, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1667, "step": 13963 }, { "epoch": 0.3285647058823529, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.924, "step": 13964 }, { "epoch": 0.3285882352941176, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0708, "step": 13965 }, { "epoch": 0.3286117647058823, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.7991, "step": 13966 }, { "epoch": 0.3286352941176471, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.3119, "step": 13967 }, { "epoch": 0.3286588235294118, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8844, "step": 13968 }, { "epoch": 0.3286823529411765, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1308, "step": 13969 }, { "epoch": 0.3287058823529412, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.0914, "step": 13970 }, { "epoch": 0.3287294117647059, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8215, "step": 13971 }, { "epoch": 0.3287529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8305, "step": 13972 }, { "epoch": 0.3287764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9856, "step": 13973 }, { "epoch": 0.3288, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0704, "step": 13974 }, { "epoch": 0.3288235294117647, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.8941, "step": 13975 }, { "epoch": 0.32884705882352944, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.953, "step": 13976 }, { "epoch": 0.32887058823529414, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9316, "step": 13977 }, { "epoch": 0.32889411764705884, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.2216, "step": 13978 }, { "epoch": 0.32891764705882354, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2925, "step": 13979 }, { "epoch": 0.32894117647058824, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.7199, "step": 13980 }, { "epoch": 0.32896470588235294, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.5844, "step": 13981 }, { "epoch": 0.32898823529411764, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2329, "step": 13982 }, { "epoch": 0.32901176470588234, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0079, "step": 13983 }, { "epoch": 0.32903529411764704, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.121, "step": 13984 }, { "epoch": 0.32905882352941174, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2076, "step": 13985 }, { "epoch": 0.3290823529411765, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0326, "step": 13986 }, { "epoch": 0.3291058823529412, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.8291, "step": 13987 }, { "epoch": 0.3291294117647059, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9564, "step": 13988 }, { "epoch": 0.3291529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1793, "step": 13989 }, { "epoch": 0.3291764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8194, "step": 13990 }, { "epoch": 0.3292, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0507, "step": 13991 }, { "epoch": 0.3292235294117647, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0278, "step": 13992 }, { "epoch": 0.3292470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.018, "step": 13993 }, { "epoch": 0.3292705882352941, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.171, "step": 13994 }, { "epoch": 0.32929411764705885, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0071, "step": 13995 }, { "epoch": 0.32931764705882355, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8668, "step": 13996 }, { "epoch": 0.32934117647058825, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8699, "step": 13997 }, { "epoch": 0.32936470588235295, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8886, "step": 13998 }, { "epoch": 0.32938823529411765, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9135, "step": 13999 }, { "epoch": 0.32941176470588235, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1461, "step": 14000 }, { "epoch": 0.32941176470588235, "eval_loss": 2.200066089630127, "eval_runtime": 689.0455, "eval_samples_per_second": 12.336, "eval_steps_per_second": 3.084, "step": 14000 }, { "epoch": 0.32943529411764705, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0601, "step": 14001 }, { "epoch": 0.32945882352941175, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9208, "step": 14002 }, { "epoch": 0.32948235294117645, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9888, "step": 14003 }, { "epoch": 0.3295058823529412, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9937, "step": 14004 }, { "epoch": 0.3295294117647059, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0289, "step": 14005 }, { "epoch": 0.3295529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8197, "step": 14006 }, { "epoch": 0.3295764705882353, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.7114, "step": 14007 }, { "epoch": 0.3296, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0741, "step": 14008 }, { "epoch": 0.3296235294117647, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1542, "step": 14009 }, { "epoch": 0.3296470588235294, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.516, "step": 14010 }, { "epoch": 0.3296705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0267, "step": 14011 }, { "epoch": 0.3296941176470588, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9892, "step": 14012 }, { "epoch": 0.3297176470588235, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8875, "step": 14013 }, { "epoch": 0.32974117647058826, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8312, "step": 14014 }, { "epoch": 0.32976470588235296, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.7962, "step": 14015 }, { "epoch": 0.32978823529411766, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.4697, "step": 14016 }, { "epoch": 0.32981176470588236, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.96, "step": 14017 }, { "epoch": 0.32983529411764706, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.7189, "step": 14018 }, { "epoch": 0.32985882352941176, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2392, "step": 14019 }, { "epoch": 0.32988235294117646, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.3578, "step": 14020 }, { "epoch": 0.32990588235294116, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.6951, "step": 14021 }, { "epoch": 0.32992941176470586, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0871, "step": 14022 }, { "epoch": 0.3299529411764706, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1643, "step": 14023 }, { "epoch": 0.3299764705882353, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0938, "step": 14024 }, { "epoch": 0.33, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.4558, "step": 14025 }, { "epoch": 0.3300235294117647, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.3296, "step": 14026 }, { "epoch": 0.3300470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0953, "step": 14027 }, { "epoch": 0.3300705882352941, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.6118, "step": 14028 }, { "epoch": 0.3300941176470588, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8214, "step": 14029 }, { "epoch": 0.3301176470588235, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8097, "step": 14030 }, { "epoch": 0.3301411764705882, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8709, "step": 14031 }, { "epoch": 0.3301647058823529, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2729, "step": 14032 }, { "epoch": 0.33018823529411767, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8182, "step": 14033 }, { "epoch": 0.33021176470588237, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.8534, "step": 14034 }, { "epoch": 0.33023529411764707, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1262, "step": 14035 }, { "epoch": 0.33025882352941177, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.082, "step": 14036 }, { "epoch": 0.33028235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0892, "step": 14037 }, { "epoch": 0.33030588235294117, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1224, "step": 14038 }, { "epoch": 0.33032941176470587, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1186, "step": 14039 }, { "epoch": 0.33035294117647057, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.7945, "step": 14040 }, { "epoch": 0.33037647058823527, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0275, "step": 14041 }, { "epoch": 0.3304, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.1233, "step": 14042 }, { "epoch": 0.3304235294117647, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1068, "step": 14043 }, { "epoch": 0.3304470588235294, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8049, "step": 14044 }, { "epoch": 0.3304705882352941, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.6965, "step": 14045 }, { "epoch": 0.3304941176470588, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.064, "step": 14046 }, { "epoch": 0.3305176470588235, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9165, "step": 14047 }, { "epoch": 0.3305411764705882, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.6463, "step": 14048 }, { "epoch": 0.3305647058823529, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.954, "step": 14049 }, { "epoch": 0.3305882352941176, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.931, "step": 14050 }, { "epoch": 0.3306117647058823, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7945, "step": 14051 }, { "epoch": 0.3306352941176471, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9507, "step": 14052 }, { "epoch": 0.3306588235294118, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1197, "step": 14053 }, { "epoch": 0.3306823529411765, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1182, "step": 14054 }, { "epoch": 0.3307058823529412, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1849, "step": 14055 }, { "epoch": 0.3307294117647059, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9299, "step": 14056 }, { "epoch": 0.3307529411764706, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0041, "step": 14057 }, { "epoch": 0.3307764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9793, "step": 14058 }, { "epoch": 0.3308, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8101, "step": 14059 }, { "epoch": 0.3308235294117647, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0314, "step": 14060 }, { "epoch": 0.33084705882352944, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0163, "step": 14061 }, { "epoch": 0.33087058823529414, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0785, "step": 14062 }, { "epoch": 0.33089411764705884, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1742, "step": 14063 }, { "epoch": 0.33091764705882354, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1107, "step": 14064 }, { "epoch": 0.33094117647058824, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1633, "step": 14065 }, { "epoch": 0.33096470588235294, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.7576, "step": 14066 }, { "epoch": 0.33098823529411764, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9356, "step": 14067 }, { "epoch": 0.33101176470588234, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0481, "step": 14068 }, { "epoch": 0.33103529411764704, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1471, "step": 14069 }, { "epoch": 0.33105882352941174, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0325, "step": 14070 }, { "epoch": 0.3310823529411765, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0855, "step": 14071 }, { "epoch": 0.3311058823529412, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8262, "step": 14072 }, { "epoch": 0.3311294117647059, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.6534, "step": 14073 }, { "epoch": 0.3311529411764706, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1479, "step": 14074 }, { "epoch": 0.3311764705882353, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1535, "step": 14075 }, { "epoch": 0.3312, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1215, "step": 14076 }, { "epoch": 0.3312235294117647, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1087, "step": 14077 }, { "epoch": 0.3312470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.3181, "step": 14078 }, { "epoch": 0.3312705882352941, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9984, "step": 14079 }, { "epoch": 0.33129411764705885, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.069, "step": 14080 }, { "epoch": 0.33131764705882355, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.6299, "step": 14081 }, { "epoch": 0.33134117647058825, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1874, "step": 14082 }, { "epoch": 0.33136470588235295, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8526, "step": 14083 }, { "epoch": 0.33138823529411765, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.8655, "step": 14084 }, { "epoch": 0.33141176470588235, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.7879, "step": 14085 }, { "epoch": 0.33143529411764705, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0254, "step": 14086 }, { "epoch": 0.33145882352941175, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1419, "step": 14087 }, { "epoch": 0.33148235294117645, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.6273, "step": 14088 }, { "epoch": 0.33150588235294115, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0082, "step": 14089 }, { "epoch": 0.3315294117647059, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.184, "step": 14090 }, { "epoch": 0.3315529411764706, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0716, "step": 14091 }, { "epoch": 0.3315764705882353, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1922, "step": 14092 }, { "epoch": 0.3316, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0269, "step": 14093 }, { "epoch": 0.3316235294117647, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.4721, "step": 14094 }, { "epoch": 0.3316470588235294, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.157, "step": 14095 }, { "epoch": 0.3316705882352941, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8651, "step": 14096 }, { "epoch": 0.3316941176470588, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1544, "step": 14097 }, { "epoch": 0.3317176470588235, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1091, "step": 14098 }, { "epoch": 0.33174117647058826, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.1347, "step": 14099 }, { "epoch": 0.33176470588235296, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.4505, "step": 14100 }, { "epoch": 0.33178823529411766, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9427, "step": 14101 }, { "epoch": 0.33181176470588236, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9026, "step": 14102 }, { "epoch": 0.33183529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1311, "step": 14103 }, { "epoch": 0.33185882352941176, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0737, "step": 14104 }, { "epoch": 0.33188235294117646, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2376, "step": 14105 }, { "epoch": 0.33190588235294116, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.998, "step": 14106 }, { "epoch": 0.33192941176470586, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0849, "step": 14107 }, { "epoch": 0.33195294117647056, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.5853, "step": 14108 }, { "epoch": 0.3319764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.7479, "step": 14109 }, { "epoch": 0.332, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1328, "step": 14110 }, { "epoch": 0.3320235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9094, "step": 14111 }, { "epoch": 0.3320470588235294, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.6293, "step": 14112 }, { "epoch": 0.3320705882352941, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.8416, "step": 14113 }, { "epoch": 0.3320941176470588, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.6732, "step": 14114 }, { "epoch": 0.3321176470588235, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1445, "step": 14115 }, { "epoch": 0.3321411764705882, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0498, "step": 14116 }, { "epoch": 0.3321647058823529, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.338, "step": 14117 }, { "epoch": 0.3321882352941177, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9458, "step": 14118 }, { "epoch": 0.3322117647058824, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.066, "step": 14119 }, { "epoch": 0.3322352941176471, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.6965, "step": 14120 }, { "epoch": 0.3322588235294118, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.3729, "step": 14121 }, { "epoch": 0.3322823529411765, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0533, "step": 14122 }, { "epoch": 0.3323058823529412, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.8576, "step": 14123 }, { "epoch": 0.3323294117647059, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1829, "step": 14124 }, { "epoch": 0.3323529411764706, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2594, "step": 14125 }, { "epoch": 0.3323764705882353, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0085, "step": 14126 }, { "epoch": 0.3324, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1577, "step": 14127 }, { "epoch": 0.33242352941176473, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2667, "step": 14128 }, { "epoch": 0.33244705882352943, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8705, "step": 14129 }, { "epoch": 0.33247058823529413, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0306, "step": 14130 }, { "epoch": 0.33249411764705883, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1368, "step": 14131 }, { "epoch": 0.33251764705882353, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1346, "step": 14132 }, { "epoch": 0.33254117647058823, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.7813, "step": 14133 }, { "epoch": 0.33256470588235293, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.7668, "step": 14134 }, { "epoch": 0.33258823529411763, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.3932, "step": 14135 }, { "epoch": 0.33261176470588233, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.2052, "step": 14136 }, { "epoch": 0.3326352941176471, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9769, "step": 14137 }, { "epoch": 0.3326588235294118, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0764, "step": 14138 }, { "epoch": 0.3326823529411765, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9094, "step": 14139 }, { "epoch": 0.3327058823529412, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.5528, "step": 14140 }, { "epoch": 0.3327294117647059, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.6537, "step": 14141 }, { "epoch": 0.3327529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9792, "step": 14142 }, { "epoch": 0.3327764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0961, "step": 14143 }, { "epoch": 0.3328, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9022, "step": 14144 }, { "epoch": 0.3328235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.165, "step": 14145 }, { "epoch": 0.3328470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1436, "step": 14146 }, { "epoch": 0.33287058823529414, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1693, "step": 14147 }, { "epoch": 0.33289411764705884, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1365, "step": 14148 }, { "epoch": 0.33291764705882354, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8836, "step": 14149 }, { "epoch": 0.33294117647058824, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1552, "step": 14150 }, { "epoch": 0.33296470588235294, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.8239, "step": 14151 }, { "epoch": 0.33298823529411764, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0829, "step": 14152 }, { "epoch": 0.33301176470588234, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1384, "step": 14153 }, { "epoch": 0.33303529411764704, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.6436, "step": 14154 }, { "epoch": 0.33305882352941174, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.9761, "step": 14155 }, { "epoch": 0.3330823529411765, "grad_norm": 0.2431640625, "learning_rate": 0.02, "loss": 0.7754, "step": 14156 }, { "epoch": 0.3331058823529412, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0864, "step": 14157 }, { "epoch": 0.3331294117647059, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9564, "step": 14158 }, { "epoch": 0.3331529411764706, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8142, "step": 14159 }, { "epoch": 0.3331764705882353, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8455, "step": 14160 }, { "epoch": 0.3332, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.8743, "step": 14161 }, { "epoch": 0.3332235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0354, "step": 14162 }, { "epoch": 0.3332470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0955, "step": 14163 }, { "epoch": 0.3332705882352941, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2471, "step": 14164 }, { "epoch": 0.3332941176470588, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0841, "step": 14165 }, { "epoch": 0.33331764705882355, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2859, "step": 14166 }, { "epoch": 0.33334117647058825, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 0.5054, "step": 14167 }, { "epoch": 0.33336470588235295, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.3494, "step": 14168 }, { "epoch": 0.33338823529411765, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.7812, "step": 14169 }, { "epoch": 0.33341176470588235, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8481, "step": 14170 }, { "epoch": 0.33343529411764705, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.5911, "step": 14171 }, { "epoch": 0.33345882352941175, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0376, "step": 14172 }, { "epoch": 0.33348235294117645, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0005, "step": 14173 }, { "epoch": 0.33350588235294115, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.4899, "step": 14174 }, { "epoch": 0.3335294117647059, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7859, "step": 14175 }, { "epoch": 0.3335529411764706, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.204, "step": 14176 }, { "epoch": 0.3335764705882353, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9679, "step": 14177 }, { "epoch": 0.3336, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.6424, "step": 14178 }, { "epoch": 0.3336235294117647, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1561, "step": 14179 }, { "epoch": 0.3336470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9075, "step": 14180 }, { "epoch": 0.3336705882352941, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.973, "step": 14181 }, { "epoch": 0.3336941176470588, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2497, "step": 14182 }, { "epoch": 0.3337176470588235, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.774, "step": 14183 }, { "epoch": 0.3337411764705882, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.013, "step": 14184 }, { "epoch": 0.33376470588235296, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1095, "step": 14185 }, { "epoch": 0.33378823529411766, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0891, "step": 14186 }, { "epoch": 0.33381176470588236, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1143, "step": 14187 }, { "epoch": 0.33383529411764706, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9041, "step": 14188 }, { "epoch": 0.33385882352941176, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.3618, "step": 14189 }, { "epoch": 0.33388235294117646, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1367, "step": 14190 }, { "epoch": 0.33390588235294116, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.8839, "step": 14191 }, { "epoch": 0.33392941176470586, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0969, "step": 14192 }, { "epoch": 0.33395294117647056, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.195, "step": 14193 }, { "epoch": 0.3339764705882353, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0698, "step": 14194 }, { "epoch": 0.334, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8143, "step": 14195 }, { "epoch": 0.3340235294117647, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0899, "step": 14196 }, { "epoch": 0.3340470588235294, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1156, "step": 14197 }, { "epoch": 0.3340705882352941, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2367, "step": 14198 }, { "epoch": 0.3340941176470588, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8894, "step": 14199 }, { "epoch": 0.3341176470588235, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0106, "step": 14200 }, { "epoch": 0.3341411764705882, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.7322, "step": 14201 }, { "epoch": 0.3341647058823529, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9548, "step": 14202 }, { "epoch": 0.3341882352941177, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0644, "step": 14203 }, { "epoch": 0.3342117647058824, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8323, "step": 14204 }, { "epoch": 0.3342352941176471, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0184, "step": 14205 }, { "epoch": 0.3342588235294118, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.112, "step": 14206 }, { "epoch": 0.3342823529411765, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9301, "step": 14207 }, { "epoch": 0.3343058823529412, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1022, "step": 14208 }, { "epoch": 0.3343294117647059, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0194, "step": 14209 }, { "epoch": 0.3343529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7817, "step": 14210 }, { "epoch": 0.3343764705882353, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.7747, "step": 14211 }, { "epoch": 0.3344, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.084, "step": 14212 }, { "epoch": 0.33442352941176473, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0943, "step": 14213 }, { "epoch": 0.33444705882352943, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1565, "step": 14214 }, { "epoch": 0.33447058823529413, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9634, "step": 14215 }, { "epoch": 0.33449411764705883, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0044, "step": 14216 }, { "epoch": 0.33451764705882353, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.306, "step": 14217 }, { "epoch": 0.33454117647058823, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.243, "step": 14218 }, { "epoch": 0.33456470588235293, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8712, "step": 14219 }, { "epoch": 0.33458823529411763, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.991, "step": 14220 }, { "epoch": 0.33461176470588233, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0198, "step": 14221 }, { "epoch": 0.3346352941176471, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2259, "step": 14222 }, { "epoch": 0.3346588235294118, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0223, "step": 14223 }, { "epoch": 0.3346823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0308, "step": 14224 }, { "epoch": 0.3347058823529412, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0349, "step": 14225 }, { "epoch": 0.3347294117647059, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8317, "step": 14226 }, { "epoch": 0.3347529411764706, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1271, "step": 14227 }, { "epoch": 0.3347764705882353, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.6339, "step": 14228 }, { "epoch": 0.3348, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.7059, "step": 14229 }, { "epoch": 0.3348235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0265, "step": 14230 }, { "epoch": 0.3348470588235294, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.2971, "step": 14231 }, { "epoch": 0.33487058823529414, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1031, "step": 14232 }, { "epoch": 0.33489411764705884, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.3655, "step": 14233 }, { "epoch": 0.33491764705882354, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1319, "step": 14234 }, { "epoch": 0.33494117647058824, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.322, "step": 14235 }, { "epoch": 0.33496470588235294, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 1.0183, "step": 14236 }, { "epoch": 0.33498823529411764, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8855, "step": 14237 }, { "epoch": 0.33501176470588234, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.3535, "step": 14238 }, { "epoch": 0.33503529411764704, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9989, "step": 14239 }, { "epoch": 0.33505882352941174, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0023, "step": 14240 }, { "epoch": 0.3350823529411765, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9739, "step": 14241 }, { "epoch": 0.3351058823529412, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.4511, "step": 14242 }, { "epoch": 0.3351294117647059, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.998, "step": 14243 }, { "epoch": 0.3351529411764706, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0607, "step": 14244 }, { "epoch": 0.3351764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1305, "step": 14245 }, { "epoch": 0.3352, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8227, "step": 14246 }, { "epoch": 0.3352235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0118, "step": 14247 }, { "epoch": 0.3352470588235294, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.7601, "step": 14248 }, { "epoch": 0.3352705882352941, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0931, "step": 14249 }, { "epoch": 0.3352941176470588, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9494, "step": 14250 }, { "epoch": 0.33531764705882355, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9407, "step": 14251 }, { "epoch": 0.33534117647058825, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9208, "step": 14252 }, { "epoch": 0.33536470588235295, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9476, "step": 14253 }, { "epoch": 0.33538823529411765, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0132, "step": 14254 }, { "epoch": 0.33541176470588235, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9497, "step": 14255 }, { "epoch": 0.33543529411764705, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9566, "step": 14256 }, { "epoch": 0.33545882352941175, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9102, "step": 14257 }, { "epoch": 0.33548235294117645, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0924, "step": 14258 }, { "epoch": 0.33550588235294115, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8292, "step": 14259 }, { "epoch": 0.3355294117647059, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.7062, "step": 14260 }, { "epoch": 0.3355529411764706, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9906, "step": 14261 }, { "epoch": 0.3355764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1311, "step": 14262 }, { "epoch": 0.3356, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.3153, "step": 14263 }, { "epoch": 0.3356235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1749, "step": 14264 }, { "epoch": 0.3356470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1703, "step": 14265 }, { "epoch": 0.3356705882352941, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.6835, "step": 14266 }, { "epoch": 0.3356941176470588, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9264, "step": 14267 }, { "epoch": 0.3357176470588235, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.7769, "step": 14268 }, { "epoch": 0.3357411764705882, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.1889, "step": 14269 }, { "epoch": 0.33576470588235297, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9435, "step": 14270 }, { "epoch": 0.33578823529411767, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.1478, "step": 14271 }, { "epoch": 0.33581176470588237, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1254, "step": 14272 }, { "epoch": 0.33583529411764707, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0551, "step": 14273 }, { "epoch": 0.33585882352941177, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1039, "step": 14274 }, { "epoch": 0.33588235294117647, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1347, "step": 14275 }, { "epoch": 0.33590588235294117, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9618, "step": 14276 }, { "epoch": 0.33592941176470587, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0342, "step": 14277 }, { "epoch": 0.33595294117647057, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1217, "step": 14278 }, { "epoch": 0.3359764705882353, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.9819, "step": 14279 }, { "epoch": 0.336, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.6559, "step": 14280 }, { "epoch": 0.3360235294117647, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.2375, "step": 14281 }, { "epoch": 0.3360470588235294, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.281, "step": 14282 }, { "epoch": 0.3360705882352941, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0454, "step": 14283 }, { "epoch": 0.3360941176470588, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0542, "step": 14284 }, { "epoch": 0.3361176470588235, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7883, "step": 14285 }, { "epoch": 0.3361411764705882, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0348, "step": 14286 }, { "epoch": 0.3361647058823529, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0045, "step": 14287 }, { "epoch": 0.3361882352941176, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0714, "step": 14288 }, { "epoch": 0.3362117647058824, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2983, "step": 14289 }, { "epoch": 0.3362352941176471, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.9289, "step": 14290 }, { "epoch": 0.3362588235294118, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.26, "step": 14291 }, { "epoch": 0.3362823529411765, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1233, "step": 14292 }, { "epoch": 0.3363058823529412, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2412, "step": 14293 }, { "epoch": 0.3363294117647059, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.8071, "step": 14294 }, { "epoch": 0.3363529411764706, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1329, "step": 14295 }, { "epoch": 0.3363764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0729, "step": 14296 }, { "epoch": 0.3364, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9331, "step": 14297 }, { "epoch": 0.33642352941176473, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0687, "step": 14298 }, { "epoch": 0.33644705882352943, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0007, "step": 14299 }, { "epoch": 0.33647058823529413, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8684, "step": 14300 }, { "epoch": 0.33649411764705883, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9844, "step": 14301 }, { "epoch": 0.33651764705882353, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1438, "step": 14302 }, { "epoch": 0.33654117647058823, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0018, "step": 14303 }, { "epoch": 0.33656470588235293, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 0.9723, "step": 14304 }, { "epoch": 0.33658823529411763, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8692, "step": 14305 }, { "epoch": 0.33661176470588233, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2632, "step": 14306 }, { "epoch": 0.33663529411764703, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9216, "step": 14307 }, { "epoch": 0.3366588235294118, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9348, "step": 14308 }, { "epoch": 0.3366823529411765, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8112, "step": 14309 }, { "epoch": 0.3367058823529412, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.734, "step": 14310 }, { "epoch": 0.3367294117647059, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2815, "step": 14311 }, { "epoch": 0.3367529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0667, "step": 14312 }, { "epoch": 0.3367764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0625, "step": 14313 }, { "epoch": 0.3368, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9562, "step": 14314 }, { "epoch": 0.3368235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0148, "step": 14315 }, { "epoch": 0.3368470588235294, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0581, "step": 14316 }, { "epoch": 0.33687058823529414, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9665, "step": 14317 }, { "epoch": 0.33689411764705884, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.7071, "step": 14318 }, { "epoch": 0.33691764705882354, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0224, "step": 14319 }, { "epoch": 0.33694117647058824, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9076, "step": 14320 }, { "epoch": 0.33696470588235294, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9887, "step": 14321 }, { "epoch": 0.33698823529411764, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2225, "step": 14322 }, { "epoch": 0.33701176470588234, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 0.9965, "step": 14323 }, { "epoch": 0.33703529411764704, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9346, "step": 14324 }, { "epoch": 0.33705882352941174, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1811, "step": 14325 }, { "epoch": 0.33708235294117644, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.7036, "step": 14326 }, { "epoch": 0.3371058823529412, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9271, "step": 14327 }, { "epoch": 0.3371294117647059, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7627, "step": 14328 }, { "epoch": 0.3371529411764706, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7894, "step": 14329 }, { "epoch": 0.3371764705882353, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9371, "step": 14330 }, { "epoch": 0.3372, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8973, "step": 14331 }, { "epoch": 0.3372235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8777, "step": 14332 }, { "epoch": 0.3372470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.811, "step": 14333 }, { "epoch": 0.3372705882352941, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9441, "step": 14334 }, { "epoch": 0.3372941176470588, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.5934, "step": 14335 }, { "epoch": 0.33731764705882356, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.2266, "step": 14336 }, { "epoch": 0.33734117647058826, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.2354, "step": 14337 }, { "epoch": 0.33736470588235296, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.7188, "step": 14338 }, { "epoch": 0.33738823529411766, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1483, "step": 14339 }, { "epoch": 0.33741176470588236, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0333, "step": 14340 }, { "epoch": 0.33743529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8755, "step": 14341 }, { "epoch": 0.33745882352941176, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.211, "step": 14342 }, { "epoch": 0.33748235294117646, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8077, "step": 14343 }, { "epoch": 0.33750588235294116, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0082, "step": 14344 }, { "epoch": 0.33752941176470586, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0373, "step": 14345 }, { "epoch": 0.3375529411764706, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2386, "step": 14346 }, { "epoch": 0.3375764705882353, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0053, "step": 14347 }, { "epoch": 0.3376, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.271, "step": 14348 }, { "epoch": 0.3376235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1332, "step": 14349 }, { "epoch": 0.3376470588235294, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2396, "step": 14350 }, { "epoch": 0.3376705882352941, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2872, "step": 14351 }, { "epoch": 0.3376941176470588, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0697, "step": 14352 }, { "epoch": 0.3377176470588235, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.7517, "step": 14353 }, { "epoch": 0.3377411764705882, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.0052, "step": 14354 }, { "epoch": 0.33776470588235297, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0053, "step": 14355 }, { "epoch": 0.33778823529411767, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9991, "step": 14356 }, { "epoch": 0.33781176470588237, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.12, "step": 14357 }, { "epoch": 0.33783529411764707, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1027, "step": 14358 }, { "epoch": 0.33785882352941177, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.1501, "step": 14359 }, { "epoch": 0.33788235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9794, "step": 14360 }, { "epoch": 0.33790588235294117, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9113, "step": 14361 }, { "epoch": 0.33792941176470587, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1845, "step": 14362 }, { "epoch": 0.33795294117647057, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2668, "step": 14363 }, { "epoch": 0.33797647058823527, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0092, "step": 14364 }, { "epoch": 0.338, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0529, "step": 14365 }, { "epoch": 0.3380235294117647, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0361, "step": 14366 }, { "epoch": 0.3380470588235294, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.6095, "step": 14367 }, { "epoch": 0.3380705882352941, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.2658, "step": 14368 }, { "epoch": 0.3380941176470588, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9231, "step": 14369 }, { "epoch": 0.3381176470588235, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0554, "step": 14370 }, { "epoch": 0.3381411764705882, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1158, "step": 14371 }, { "epoch": 0.3381647058823529, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.3674, "step": 14372 }, { "epoch": 0.3381882352941176, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8224, "step": 14373 }, { "epoch": 0.3382117647058824, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8569, "step": 14374 }, { "epoch": 0.3382352941176471, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9283, "step": 14375 }, { "epoch": 0.3382588235294118, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2407, "step": 14376 }, { "epoch": 0.3382823529411765, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.6795, "step": 14377 }, { "epoch": 0.3383058823529412, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7752, "step": 14378 }, { "epoch": 0.3383294117647059, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0591, "step": 14379 }, { "epoch": 0.3383529411764706, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0892, "step": 14380 }, { "epoch": 0.3383764705882353, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.3297, "step": 14381 }, { "epoch": 0.3384, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1856, "step": 14382 }, { "epoch": 0.3384235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9101, "step": 14383 }, { "epoch": 0.33844705882352943, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2522, "step": 14384 }, { "epoch": 0.33847058823529413, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8105, "step": 14385 }, { "epoch": 0.33849411764705883, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.3702, "step": 14386 }, { "epoch": 0.33851764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.7397, "step": 14387 }, { "epoch": 0.33854117647058823, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8885, "step": 14388 }, { "epoch": 0.33856470588235293, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0334, "step": 14389 }, { "epoch": 0.33858823529411763, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.3499, "step": 14390 }, { "epoch": 0.33861176470588233, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8582, "step": 14391 }, { "epoch": 0.33863529411764703, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0171, "step": 14392 }, { "epoch": 0.3386588235294118, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1197, "step": 14393 }, { "epoch": 0.3386823529411765, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0285, "step": 14394 }, { "epoch": 0.3387058823529412, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0269, "step": 14395 }, { "epoch": 0.3387294117647059, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1155, "step": 14396 }, { "epoch": 0.3387529411764706, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.7453, "step": 14397 }, { "epoch": 0.3387764705882353, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1789, "step": 14398 }, { "epoch": 0.3388, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.7636, "step": 14399 }, { "epoch": 0.3388235294117647, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.4081, "step": 14400 }, { "epoch": 0.3388470588235294, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8928, "step": 14401 }, { "epoch": 0.3388705882352941, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9863, "step": 14402 }, { "epoch": 0.33889411764705885, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5038, "step": 14403 }, { "epoch": 0.33891764705882355, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8296, "step": 14404 }, { "epoch": 0.33894117647058825, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1047, "step": 14405 }, { "epoch": 0.33896470588235295, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1006, "step": 14406 }, { "epoch": 0.33898823529411765, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9348, "step": 14407 }, { "epoch": 0.33901176470588235, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1059, "step": 14408 }, { "epoch": 0.33903529411764705, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9104, "step": 14409 }, { "epoch": 0.33905882352941175, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2517, "step": 14410 }, { "epoch": 0.33908235294117645, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1189, "step": 14411 }, { "epoch": 0.3391058823529412, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0142, "step": 14412 }, { "epoch": 0.3391294117647059, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0884, "step": 14413 }, { "epoch": 0.3391529411764706, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0243, "step": 14414 }, { "epoch": 0.3391764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2585, "step": 14415 }, { "epoch": 0.3392, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9646, "step": 14416 }, { "epoch": 0.3392235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8277, "step": 14417 }, { "epoch": 0.3392470588235294, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9483, "step": 14418 }, { "epoch": 0.3392705882352941, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8518, "step": 14419 }, { "epoch": 0.3392941176470588, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2503, "step": 14420 }, { "epoch": 0.33931764705882356, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0705, "step": 14421 }, { "epoch": 0.33934117647058826, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0174, "step": 14422 }, { "epoch": 0.33936470588235296, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9163, "step": 14423 }, { "epoch": 0.33938823529411766, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9107, "step": 14424 }, { "epoch": 0.33941176470588236, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.6669, "step": 14425 }, { "epoch": 0.33943529411764706, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.393, "step": 14426 }, { "epoch": 0.33945882352941176, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9332, "step": 14427 }, { "epoch": 0.33948235294117646, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2103, "step": 14428 }, { "epoch": 0.33950588235294116, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8787, "step": 14429 }, { "epoch": 0.33952941176470586, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1218, "step": 14430 }, { "epoch": 0.3395529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9259, "step": 14431 }, { "epoch": 0.3395764705882353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0809, "step": 14432 }, { "epoch": 0.3396, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0649, "step": 14433 }, { "epoch": 0.3396235294117647, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1783, "step": 14434 }, { "epoch": 0.3396470588235294, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0711, "step": 14435 }, { "epoch": 0.3396705882352941, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0741, "step": 14436 }, { "epoch": 0.3396941176470588, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8733, "step": 14437 }, { "epoch": 0.3397176470588235, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.4291, "step": 14438 }, { "epoch": 0.3397411764705882, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1228, "step": 14439 }, { "epoch": 0.33976470588235297, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0908, "step": 14440 }, { "epoch": 0.33978823529411767, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0651, "step": 14441 }, { "epoch": 0.33981176470588237, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.856, "step": 14442 }, { "epoch": 0.33983529411764707, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7634, "step": 14443 }, { "epoch": 0.33985882352941177, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9124, "step": 14444 }, { "epoch": 0.33988235294117647, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.973, "step": 14445 }, { "epoch": 0.33990588235294117, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1413, "step": 14446 }, { "epoch": 0.33992941176470587, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0899, "step": 14447 }, { "epoch": 0.33995294117647057, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9602, "step": 14448 }, { "epoch": 0.33997647058823527, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9642, "step": 14449 }, { "epoch": 0.34, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8869, "step": 14450 }, { "epoch": 0.3400235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.7378, "step": 14451 }, { "epoch": 0.3400470588235294, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9976, "step": 14452 }, { "epoch": 0.3400705882352941, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.123, "step": 14453 }, { "epoch": 0.3400941176470588, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9082, "step": 14454 }, { "epoch": 0.3401176470588235, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.8791, "step": 14455 }, { "epoch": 0.3401411764705882, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9766, "step": 14456 }, { "epoch": 0.3401647058823529, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.306, "step": 14457 }, { "epoch": 0.3401882352941176, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.733, "step": 14458 }, { "epoch": 0.3402117647058824, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1462, "step": 14459 }, { "epoch": 0.3402352941176471, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0799, "step": 14460 }, { "epoch": 0.3402588235294118, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.6825, "step": 14461 }, { "epoch": 0.3402823529411765, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.7755, "step": 14462 }, { "epoch": 0.3403058823529412, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.135, "step": 14463 }, { "epoch": 0.3403294117647059, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8217, "step": 14464 }, { "epoch": 0.3403529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1244, "step": 14465 }, { "epoch": 0.3403764705882353, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.6905, "step": 14466 }, { "epoch": 0.3404, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.5745, "step": 14467 }, { "epoch": 0.3404235294117647, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.1741, "step": 14468 }, { "epoch": 0.34044705882352944, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0934, "step": 14469 }, { "epoch": 0.34047058823529414, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0538, "step": 14470 }, { "epoch": 0.34049411764705884, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2323, "step": 14471 }, { "epoch": 0.34051764705882354, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0684, "step": 14472 }, { "epoch": 0.34054117647058824, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1351, "step": 14473 }, { "epoch": 0.34056470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0546, "step": 14474 }, { "epoch": 0.34058823529411764, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2005, "step": 14475 }, { "epoch": 0.34061176470588234, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.7301, "step": 14476 }, { "epoch": 0.34063529411764704, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0517, "step": 14477 }, { "epoch": 0.3406588235294118, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.0687, "step": 14478 }, { "epoch": 0.3406823529411765, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.3518, "step": 14479 }, { "epoch": 0.3407058823529412, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8735, "step": 14480 }, { "epoch": 0.3407294117647059, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.3191, "step": 14481 }, { "epoch": 0.3407529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.3635, "step": 14482 }, { "epoch": 0.3407764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.9169, "step": 14483 }, { "epoch": 0.3408, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.5187, "step": 14484 }, { "epoch": 0.3408235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7845, "step": 14485 }, { "epoch": 0.3408470588235294, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.7615, "step": 14486 }, { "epoch": 0.3408705882352941, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.2397, "step": 14487 }, { "epoch": 0.34089411764705885, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8518, "step": 14488 }, { "epoch": 0.34091764705882355, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1421, "step": 14489 }, { "epoch": 0.34094117647058825, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2892, "step": 14490 }, { "epoch": 0.34096470588235295, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.3179, "step": 14491 }, { "epoch": 0.34098823529411765, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.95, "step": 14492 }, { "epoch": 0.34101176470588235, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.357, "step": 14493 }, { "epoch": 0.34103529411764705, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.1018, "step": 14494 }, { "epoch": 0.34105882352941175, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.7779, "step": 14495 }, { "epoch": 0.34108235294117645, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1637, "step": 14496 }, { "epoch": 0.3411058823529412, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.137, "step": 14497 }, { "epoch": 0.3411294117647059, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0408, "step": 14498 }, { "epoch": 0.3411529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9601, "step": 14499 }, { "epoch": 0.3411764705882353, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.7088, "step": 14500 }, { "epoch": 0.3412, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.7907, "step": 14501 }, { "epoch": 0.3412235294117647, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9398, "step": 14502 }, { "epoch": 0.3412470588235294, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1187, "step": 14503 }, { "epoch": 0.3412705882352941, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2408, "step": 14504 }, { "epoch": 0.3412941176470588, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0762, "step": 14505 }, { "epoch": 0.3413176470588235, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.2573, "step": 14506 }, { "epoch": 0.34134117647058826, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8568, "step": 14507 }, { "epoch": 0.34136470588235296, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0798, "step": 14508 }, { "epoch": 0.34138823529411766, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1601, "step": 14509 }, { "epoch": 0.34141176470588236, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0262, "step": 14510 }, { "epoch": 0.34143529411764706, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8317, "step": 14511 }, { "epoch": 0.34145882352941176, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9275, "step": 14512 }, { "epoch": 0.34148235294117646, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1475, "step": 14513 }, { "epoch": 0.34150588235294116, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8806, "step": 14514 }, { "epoch": 0.34152941176470586, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.9051, "step": 14515 }, { "epoch": 0.3415529411764706, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0464, "step": 14516 }, { "epoch": 0.3415764705882353, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2218, "step": 14517 }, { "epoch": 0.3416, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.101, "step": 14518 }, { "epoch": 0.3416235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0686, "step": 14519 }, { "epoch": 0.3416470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.7791, "step": 14520 }, { "epoch": 0.3416705882352941, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0825, "step": 14521 }, { "epoch": 0.3416941176470588, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0356, "step": 14522 }, { "epoch": 0.3417176470588235, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1406, "step": 14523 }, { "epoch": 0.3417411764705882, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.704, "step": 14524 }, { "epoch": 0.3417647058823529, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8678, "step": 14525 }, { "epoch": 0.34178823529411767, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8983, "step": 14526 }, { "epoch": 0.34181176470588237, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8502, "step": 14527 }, { "epoch": 0.34183529411764707, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.7845, "step": 14528 }, { "epoch": 0.34185882352941177, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9466, "step": 14529 }, { "epoch": 0.34188235294117647, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.1867, "step": 14530 }, { "epoch": 0.34190588235294117, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.068, "step": 14531 }, { "epoch": 0.34192941176470587, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0974, "step": 14532 }, { "epoch": 0.34195294117647057, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9561, "step": 14533 }, { "epoch": 0.34197647058823527, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0251, "step": 14534 }, { "epoch": 0.342, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0317, "step": 14535 }, { "epoch": 0.3420235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.5805, "step": 14536 }, { "epoch": 0.3420470588235294, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9301, "step": 14537 }, { "epoch": 0.3420705882352941, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.7511, "step": 14538 }, { "epoch": 0.3420941176470588, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9729, "step": 14539 }, { "epoch": 0.3421176470588235, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8251, "step": 14540 }, { "epoch": 0.3421411764705882, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1588, "step": 14541 }, { "epoch": 0.3421647058823529, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 0.9044, "step": 14542 }, { "epoch": 0.3421882352941176, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.7665, "step": 14543 }, { "epoch": 0.3422117647058823, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.7199, "step": 14544 }, { "epoch": 0.3422352941176471, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9469, "step": 14545 }, { "epoch": 0.3422588235294118, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.994, "step": 14546 }, { "epoch": 0.3422823529411765, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1617, "step": 14547 }, { "epoch": 0.3423058823529412, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 0.9652, "step": 14548 }, { "epoch": 0.3423294117647059, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9657, "step": 14549 }, { "epoch": 0.3423529411764706, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.038, "step": 14550 }, { "epoch": 0.3423764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0176, "step": 14551 }, { "epoch": 0.3424, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2786, "step": 14552 }, { "epoch": 0.3424235294117647, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.876, "step": 14553 }, { "epoch": 0.34244705882352944, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.7261, "step": 14554 }, { "epoch": 0.34247058823529414, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.232, "step": 14555 }, { "epoch": 0.34249411764705884, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.888, "step": 14556 }, { "epoch": 0.34251764705882354, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.983, "step": 14557 }, { "epoch": 0.34254117647058824, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.3497, "step": 14558 }, { "epoch": 0.34256470588235294, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0274, "step": 14559 }, { "epoch": 0.34258823529411764, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.9695, "step": 14560 }, { "epoch": 0.34261176470588234, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0703, "step": 14561 }, { "epoch": 0.34263529411764704, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0363, "step": 14562 }, { "epoch": 0.34265882352941174, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1396, "step": 14563 }, { "epoch": 0.3426823529411765, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.3734, "step": 14564 }, { "epoch": 0.3427058823529412, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8198, "step": 14565 }, { "epoch": 0.3427294117647059, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0716, "step": 14566 }, { "epoch": 0.3427529411764706, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.7571, "step": 14567 }, { "epoch": 0.3427764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8908, "step": 14568 }, { "epoch": 0.3428, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.124, "step": 14569 }, { "epoch": 0.3428235294117647, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.4042, "step": 14570 }, { "epoch": 0.3428470588235294, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2685, "step": 14571 }, { "epoch": 0.3428705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9116, "step": 14572 }, { "epoch": 0.34289411764705885, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.081, "step": 14573 }, { "epoch": 0.34291764705882355, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9731, "step": 14574 }, { "epoch": 0.34294117647058825, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8486, "step": 14575 }, { "epoch": 0.34296470588235295, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9307, "step": 14576 }, { "epoch": 0.34298823529411765, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1176, "step": 14577 }, { "epoch": 0.34301176470588235, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.7212, "step": 14578 }, { "epoch": 0.34303529411764705, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8751, "step": 14579 }, { "epoch": 0.34305882352941175, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2005, "step": 14580 }, { "epoch": 0.34308235294117645, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1443, "step": 14581 }, { "epoch": 0.34310588235294115, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9486, "step": 14582 }, { "epoch": 0.3431294117647059, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0015, "step": 14583 }, { "epoch": 0.3431529411764706, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.8945, "step": 14584 }, { "epoch": 0.3431764705882353, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0056, "step": 14585 }, { "epoch": 0.3432, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.3054, "step": 14586 }, { "epoch": 0.3432235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.061, "step": 14587 }, { "epoch": 0.3432470588235294, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.763, "step": 14588 }, { "epoch": 0.3432705882352941, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.657, "step": 14589 }, { "epoch": 0.3432941176470588, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.7981, "step": 14590 }, { "epoch": 0.3433176470588235, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 0.912, "step": 14591 }, { "epoch": 0.34334117647058826, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.7925, "step": 14592 }, { "epoch": 0.34336470588235296, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1285, "step": 14593 }, { "epoch": 0.34338823529411766, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.3079, "step": 14594 }, { "epoch": 0.34341176470588236, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9466, "step": 14595 }, { "epoch": 0.34343529411764706, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0663, "step": 14596 }, { "epoch": 0.34345882352941176, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9695, "step": 14597 }, { "epoch": 0.34348235294117646, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1096, "step": 14598 }, { "epoch": 0.34350588235294116, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1427, "step": 14599 }, { "epoch": 0.34352941176470586, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.2424, "step": 14600 }, { "epoch": 0.34355294117647056, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.5366, "step": 14601 }, { "epoch": 0.3435764705882353, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1405, "step": 14602 }, { "epoch": 0.3436, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.6496, "step": 14603 }, { "epoch": 0.3436235294117647, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0091, "step": 14604 }, { "epoch": 0.3436470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9812, "step": 14605 }, { "epoch": 0.3436705882352941, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.047, "step": 14606 }, { "epoch": 0.3436941176470588, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2446, "step": 14607 }, { "epoch": 0.3437176470588235, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9667, "step": 14608 }, { "epoch": 0.3437411764705882, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0069, "step": 14609 }, { "epoch": 0.3437647058823529, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1335, "step": 14610 }, { "epoch": 0.34378823529411767, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0452, "step": 14611 }, { "epoch": 0.3438117647058824, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0295, "step": 14612 }, { "epoch": 0.3438352941176471, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0881, "step": 14613 }, { "epoch": 0.3438588235294118, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0171, "step": 14614 }, { "epoch": 0.3438823529411765, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1811, "step": 14615 }, { "epoch": 0.3439058823529412, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1416, "step": 14616 }, { "epoch": 0.3439294117647059, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1028, "step": 14617 }, { "epoch": 0.3439529411764706, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.933, "step": 14618 }, { "epoch": 0.3439764705882353, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.2128, "step": 14619 }, { "epoch": 0.344, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0596, "step": 14620 }, { "epoch": 0.34402352941176473, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0448, "step": 14621 }, { "epoch": 0.34404705882352943, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0425, "step": 14622 }, { "epoch": 0.34407058823529413, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.3792, "step": 14623 }, { "epoch": 0.34409411764705883, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0806, "step": 14624 }, { "epoch": 0.34411764705882353, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.947, "step": 14625 }, { "epoch": 0.34414117647058823, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1319, "step": 14626 }, { "epoch": 0.34416470588235293, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8667, "step": 14627 }, { "epoch": 0.34418823529411763, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0517, "step": 14628 }, { "epoch": 0.34421176470588233, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.137, "step": 14629 }, { "epoch": 0.3442352941176471, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2278, "step": 14630 }, { "epoch": 0.3442588235294118, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8342, "step": 14631 }, { "epoch": 0.3442823529411765, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0907, "step": 14632 }, { "epoch": 0.3443058823529412, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9802, "step": 14633 }, { "epoch": 0.3443294117647059, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.249, "step": 14634 }, { "epoch": 0.3443529411764706, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9922, "step": 14635 }, { "epoch": 0.3443764705882353, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0618, "step": 14636 }, { "epoch": 0.3444, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.5592, "step": 14637 }, { "epoch": 0.3444235294117647, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0223, "step": 14638 }, { "epoch": 0.34444705882352944, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.6981, "step": 14639 }, { "epoch": 0.34447058823529414, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9189, "step": 14640 }, { "epoch": 0.34449411764705884, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8928, "step": 14641 }, { "epoch": 0.34451764705882354, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.946, "step": 14642 }, { "epoch": 0.34454117647058824, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.743, "step": 14643 }, { "epoch": 0.34456470588235294, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.056, "step": 14644 }, { "epoch": 0.34458823529411764, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0017, "step": 14645 }, { "epoch": 0.34461176470588234, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8639, "step": 14646 }, { "epoch": 0.34463529411764704, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8214, "step": 14647 }, { "epoch": 0.34465882352941174, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8291, "step": 14648 }, { "epoch": 0.3446823529411765, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.2024, "step": 14649 }, { "epoch": 0.3447058823529412, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9184, "step": 14650 }, { "epoch": 0.3447294117647059, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1356, "step": 14651 }, { "epoch": 0.3447529411764706, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1017, "step": 14652 }, { "epoch": 0.3447764705882353, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.1469, "step": 14653 }, { "epoch": 0.3448, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.2793, "step": 14654 }, { "epoch": 0.3448235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9845, "step": 14655 }, { "epoch": 0.3448470588235294, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0224, "step": 14656 }, { "epoch": 0.3448705882352941, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0628, "step": 14657 }, { "epoch": 0.34489411764705885, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8561, "step": 14658 }, { "epoch": 0.34491764705882355, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8163, "step": 14659 }, { "epoch": 0.34494117647058825, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2267, "step": 14660 }, { "epoch": 0.34496470588235295, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8916, "step": 14661 }, { "epoch": 0.34498823529411765, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1009, "step": 14662 }, { "epoch": 0.34501176470588235, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.2426, "step": 14663 }, { "epoch": 0.34503529411764705, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1332, "step": 14664 }, { "epoch": 0.34505882352941175, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0203, "step": 14665 }, { "epoch": 0.34508235294117645, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8969, "step": 14666 }, { "epoch": 0.34510588235294115, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1278, "step": 14667 }, { "epoch": 0.3451294117647059, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.4362, "step": 14668 }, { "epoch": 0.3451529411764706, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8197, "step": 14669 }, { "epoch": 0.3451764705882353, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9808, "step": 14670 }, { "epoch": 0.3452, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.4257, "step": 14671 }, { "epoch": 0.3452235294117647, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0949, "step": 14672 }, { "epoch": 0.3452470588235294, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1862, "step": 14673 }, { "epoch": 0.3452705882352941, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1879, "step": 14674 }, { "epoch": 0.3452941176470588, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1509, "step": 14675 }, { "epoch": 0.3453176470588235, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 0.5386, "step": 14676 }, { "epoch": 0.34534117647058826, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0038, "step": 14677 }, { "epoch": 0.34536470588235296, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0173, "step": 14678 }, { "epoch": 0.34538823529411766, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8302, "step": 14679 }, { "epoch": 0.34541176470588236, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9359, "step": 14680 }, { "epoch": 0.34543529411764706, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2418, "step": 14681 }, { "epoch": 0.34545882352941176, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.693, "step": 14682 }, { "epoch": 0.34548235294117646, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.8946, "step": 14683 }, { "epoch": 0.34550588235294116, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0956, "step": 14684 }, { "epoch": 0.34552941176470586, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0787, "step": 14685 }, { "epoch": 0.34555294117647056, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.2569, "step": 14686 }, { "epoch": 0.3455764705882353, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9144, "step": 14687 }, { "epoch": 0.3456, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2016, "step": 14688 }, { "epoch": 0.3456235294117647, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1427, "step": 14689 }, { "epoch": 0.3456470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0273, "step": 14690 }, { "epoch": 0.3456705882352941, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1368, "step": 14691 }, { "epoch": 0.3456941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0075, "step": 14692 }, { "epoch": 0.3457176470588235, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0701, "step": 14693 }, { "epoch": 0.3457411764705882, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1998, "step": 14694 }, { "epoch": 0.3457647058823529, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.9749, "step": 14695 }, { "epoch": 0.3457882352941177, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.6191, "step": 14696 }, { "epoch": 0.3458117647058824, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2518, "step": 14697 }, { "epoch": 0.3458352941176471, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1053, "step": 14698 }, { "epoch": 0.3458588235294118, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2221, "step": 14699 }, { "epoch": 0.3458823529411765, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.7109, "step": 14700 }, { "epoch": 0.3459058823529412, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.683, "step": 14701 }, { "epoch": 0.3459294117647059, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1703, "step": 14702 }, { "epoch": 0.3459529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.919, "step": 14703 }, { "epoch": 0.3459764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.6978, "step": 14704 }, { "epoch": 0.346, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.093, "step": 14705 }, { "epoch": 0.34602352941176473, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0188, "step": 14706 }, { "epoch": 0.34604705882352943, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.3225, "step": 14707 }, { "epoch": 0.34607058823529413, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.4632, "step": 14708 }, { "epoch": 0.34609411764705883, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0379, "step": 14709 }, { "epoch": 0.34611764705882353, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1147, "step": 14710 }, { "epoch": 0.34614117647058823, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2522, "step": 14711 }, { "epoch": 0.34616470588235293, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.3635, "step": 14712 }, { "epoch": 0.34618823529411763, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.052, "step": 14713 }, { "epoch": 0.34621176470588233, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0446, "step": 14714 }, { "epoch": 0.3462352941176471, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2707, "step": 14715 }, { "epoch": 0.3462588235294118, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0714, "step": 14716 }, { "epoch": 0.3462823529411765, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.3447, "step": 14717 }, { "epoch": 0.3463058823529412, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0054, "step": 14718 }, { "epoch": 0.3463294117647059, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8585, "step": 14719 }, { "epoch": 0.3463529411764706, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0814, "step": 14720 }, { "epoch": 0.3463764705882353, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.7201, "step": 14721 }, { "epoch": 0.3464, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0054, "step": 14722 }, { "epoch": 0.3464235294117647, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9253, "step": 14723 }, { "epoch": 0.3464470588235294, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1329, "step": 14724 }, { "epoch": 0.34647058823529414, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1937, "step": 14725 }, { "epoch": 0.34649411764705884, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.6719, "step": 14726 }, { "epoch": 0.34651764705882354, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8591, "step": 14727 }, { "epoch": 0.34654117647058824, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0923, "step": 14728 }, { "epoch": 0.34656470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8764, "step": 14729 }, { "epoch": 0.34658823529411764, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.7486, "step": 14730 }, { "epoch": 0.34661176470588234, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9726, "step": 14731 }, { "epoch": 0.34663529411764704, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.2985, "step": 14732 }, { "epoch": 0.34665882352941174, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8898, "step": 14733 }, { "epoch": 0.3466823529411765, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.678, "step": 14734 }, { "epoch": 0.3467058823529412, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3306, "step": 14735 }, { "epoch": 0.3467294117647059, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9132, "step": 14736 }, { "epoch": 0.3467529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.509, "step": 14737 }, { "epoch": 0.3467764705882353, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0282, "step": 14738 }, { "epoch": 0.3468, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.8521, "step": 14739 }, { "epoch": 0.3468235294117647, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.305, "step": 14740 }, { "epoch": 0.3468470588235294, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0462, "step": 14741 }, { "epoch": 0.3468705882352941, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1046, "step": 14742 }, { "epoch": 0.3468941176470588, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1, "step": 14743 }, { "epoch": 0.34691764705882355, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.1247, "step": 14744 }, { "epoch": 0.34694117647058825, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0621, "step": 14745 }, { "epoch": 0.34696470588235295, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.2295, "step": 14746 }, { "epoch": 0.34698823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8164, "step": 14747 }, { "epoch": 0.34701176470588235, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9106, "step": 14748 }, { "epoch": 0.34703529411764705, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0772, "step": 14749 }, { "epoch": 0.34705882352941175, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.1906, "step": 14750 }, { "epoch": 0.34708235294117645, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8949, "step": 14751 }, { "epoch": 0.34710588235294115, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1082, "step": 14752 }, { "epoch": 0.3471294117647059, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2494, "step": 14753 }, { "epoch": 0.3471529411764706, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9852, "step": 14754 }, { "epoch": 0.3471764705882353, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 0.854, "step": 14755 }, { "epoch": 0.3472, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2139, "step": 14756 }, { "epoch": 0.3472235294117647, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9211, "step": 14757 }, { "epoch": 0.3472470588235294, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9735, "step": 14758 }, { "epoch": 0.3472705882352941, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.6633, "step": 14759 }, { "epoch": 0.3472941176470588, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.189, "step": 14760 }, { "epoch": 0.3473176470588235, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.094, "step": 14761 }, { "epoch": 0.3473411764705882, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0271, "step": 14762 }, { "epoch": 0.34736470588235296, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0185, "step": 14763 }, { "epoch": 0.34738823529411766, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.929, "step": 14764 }, { "epoch": 0.34741176470588236, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2649, "step": 14765 }, { "epoch": 0.34743529411764706, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2549, "step": 14766 }, { "epoch": 0.34745882352941176, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1218, "step": 14767 }, { "epoch": 0.34748235294117646, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9513, "step": 14768 }, { "epoch": 0.34750588235294116, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9382, "step": 14769 }, { "epoch": 0.34752941176470586, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.6983, "step": 14770 }, { "epoch": 0.34755294117647056, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.2754, "step": 14771 }, { "epoch": 0.3475764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.6031, "step": 14772 }, { "epoch": 0.3476, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9976, "step": 14773 }, { "epoch": 0.3476235294117647, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0481, "step": 14774 }, { "epoch": 0.3476470588235294, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.9188, "step": 14775 }, { "epoch": 0.3476705882352941, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.3436, "step": 14776 }, { "epoch": 0.3476941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1309, "step": 14777 }, { "epoch": 0.3477176470588235, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9867, "step": 14778 }, { "epoch": 0.3477411764705882, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.127, "step": 14779 }, { "epoch": 0.3477647058823529, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0643, "step": 14780 }, { "epoch": 0.3477882352941176, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0171, "step": 14781 }, { "epoch": 0.3478117647058824, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0969, "step": 14782 }, { "epoch": 0.3478352941176471, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0601, "step": 14783 }, { "epoch": 0.3478588235294118, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.8184, "step": 14784 }, { "epoch": 0.3478823529411765, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.135, "step": 14785 }, { "epoch": 0.3479058823529412, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0338, "step": 14786 }, { "epoch": 0.3479294117647059, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9652, "step": 14787 }, { "epoch": 0.3479529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8486, "step": 14788 }, { "epoch": 0.3479764705882353, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.3406, "step": 14789 }, { "epoch": 0.348, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9184, "step": 14790 }, { "epoch": 0.34802352941176473, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1428, "step": 14791 }, { "epoch": 0.34804705882352943, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1012, "step": 14792 }, { "epoch": 0.34807058823529413, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8945, "step": 14793 }, { "epoch": 0.34809411764705883, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.115, "step": 14794 }, { "epoch": 0.34811764705882353, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1275, "step": 14795 }, { "epoch": 0.34814117647058823, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2718, "step": 14796 }, { "epoch": 0.34816470588235293, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0154, "step": 14797 }, { "epoch": 0.34818823529411763, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8968, "step": 14798 }, { "epoch": 0.34821176470588233, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2223, "step": 14799 }, { "epoch": 0.34823529411764703, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0379, "step": 14800 }, { "epoch": 0.3482588235294118, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9566, "step": 14801 }, { "epoch": 0.3482823529411765, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1279, "step": 14802 }, { "epoch": 0.3483058823529412, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.7977, "step": 14803 }, { "epoch": 0.3483294117647059, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1764, "step": 14804 }, { "epoch": 0.3483529411764706, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.6743, "step": 14805 }, { "epoch": 0.3483764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9875, "step": 14806 }, { "epoch": 0.3484, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.9524, "step": 14807 }, { "epoch": 0.3484235294117647, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 1.0309, "step": 14808 }, { "epoch": 0.3484470588235294, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0477, "step": 14809 }, { "epoch": 0.34847058823529414, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.6996, "step": 14810 }, { "epoch": 0.34849411764705884, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2967, "step": 14811 }, { "epoch": 0.34851764705882354, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0078, "step": 14812 }, { "epoch": 0.34854117647058824, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1103, "step": 14813 }, { "epoch": 0.34856470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2477, "step": 14814 }, { "epoch": 0.34858823529411764, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0655, "step": 14815 }, { "epoch": 0.34861176470588234, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.2098, "step": 14816 }, { "epoch": 0.34863529411764704, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0223, "step": 14817 }, { "epoch": 0.34865882352941174, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9102, "step": 14818 }, { "epoch": 0.34868235294117644, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8862, "step": 14819 }, { "epoch": 0.3487058823529412, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.3166, "step": 14820 }, { "epoch": 0.3487294117647059, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2102, "step": 14821 }, { "epoch": 0.3487529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.116, "step": 14822 }, { "epoch": 0.3487764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1338, "step": 14823 }, { "epoch": 0.3488, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.386, "step": 14824 }, { "epoch": 0.3488235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.024, "step": 14825 }, { "epoch": 0.3488470588235294, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2182, "step": 14826 }, { "epoch": 0.3488705882352941, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.086, "step": 14827 }, { "epoch": 0.3488941176470588, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9069, "step": 14828 }, { "epoch": 0.34891764705882355, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0311, "step": 14829 }, { "epoch": 0.34894117647058825, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9018, "step": 14830 }, { "epoch": 0.34896470588235295, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9548, "step": 14831 }, { "epoch": 0.34898823529411765, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.5229, "step": 14832 }, { "epoch": 0.34901176470588235, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1805, "step": 14833 }, { "epoch": 0.34903529411764705, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.1664, "step": 14834 }, { "epoch": 0.34905882352941175, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9009, "step": 14835 }, { "epoch": 0.34908235294117645, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0779, "step": 14836 }, { "epoch": 0.34910588235294115, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9033, "step": 14837 }, { "epoch": 0.34912941176470585, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1057, "step": 14838 }, { "epoch": 0.3491529411764706, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1482, "step": 14839 }, { "epoch": 0.3491764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8617, "step": 14840 }, { "epoch": 0.3492, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.007, "step": 14841 }, { "epoch": 0.3492235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.076, "step": 14842 }, { "epoch": 0.3492470588235294, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8552, "step": 14843 }, { "epoch": 0.3492705882352941, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1705, "step": 14844 }, { "epoch": 0.3492941176470588, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9766, "step": 14845 }, { "epoch": 0.3493176470588235, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.3802, "step": 14846 }, { "epoch": 0.3493411764705882, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0793, "step": 14847 }, { "epoch": 0.34936470588235297, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1925, "step": 14848 }, { "epoch": 0.34938823529411767, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1216, "step": 14849 }, { "epoch": 0.34941176470588237, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.8725, "step": 14850 }, { "epoch": 0.34943529411764707, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9556, "step": 14851 }, { "epoch": 0.34945882352941177, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0462, "step": 14852 }, { "epoch": 0.34948235294117647, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.7518, "step": 14853 }, { "epoch": 0.34950588235294117, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.145, "step": 14854 }, { "epoch": 0.34952941176470587, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.0811, "step": 14855 }, { "epoch": 0.34955294117647057, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0011, "step": 14856 }, { "epoch": 0.3495764705882353, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9475, "step": 14857 }, { "epoch": 0.3496, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0993, "step": 14858 }, { "epoch": 0.3496235294117647, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.872, "step": 14859 }, { "epoch": 0.3496470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.7132, "step": 14860 }, { "epoch": 0.3496705882352941, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0482, "step": 14861 }, { "epoch": 0.3496941176470588, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9507, "step": 14862 }, { "epoch": 0.3497176470588235, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0628, "step": 14863 }, { "epoch": 0.3497411764705882, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.885, "step": 14864 }, { "epoch": 0.3497647058823529, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8383, "step": 14865 }, { "epoch": 0.3497882352941176, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.7516, "step": 14866 }, { "epoch": 0.3498117647058824, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.4214, "step": 14867 }, { "epoch": 0.3498352941176471, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0297, "step": 14868 }, { "epoch": 0.3498588235294118, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9269, "step": 14869 }, { "epoch": 0.3498823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9753, "step": 14870 }, { "epoch": 0.3499058823529412, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.224, "step": 14871 }, { "epoch": 0.3499294117647059, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9698, "step": 14872 }, { "epoch": 0.3499529411764706, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.6965, "step": 14873 }, { "epoch": 0.3499764705882353, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1807, "step": 14874 }, { "epoch": 0.35, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0534, "step": 14875 }, { "epoch": 0.35002352941176473, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0338, "step": 14876 }, { "epoch": 0.35004705882352943, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.7168, "step": 14877 }, { "epoch": 0.35007058823529413, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.8729, "step": 14878 }, { "epoch": 0.35009411764705883, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.6464, "step": 14879 }, { "epoch": 0.35011764705882353, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.083, "step": 14880 }, { "epoch": 0.35014117647058823, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9225, "step": 14881 }, { "epoch": 0.35016470588235293, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1109, "step": 14882 }, { "epoch": 0.35018823529411763, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9329, "step": 14883 }, { "epoch": 0.35021176470588233, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9628, "step": 14884 }, { "epoch": 0.35023529411764703, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.236, "step": 14885 }, { "epoch": 0.3502588235294118, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.404, "step": 14886 }, { "epoch": 0.3502823529411765, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2126, "step": 14887 }, { "epoch": 0.3503058823529412, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.984, "step": 14888 }, { "epoch": 0.3503294117647059, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1583, "step": 14889 }, { "epoch": 0.3503529411764706, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0841, "step": 14890 }, { "epoch": 0.3503764705882353, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9189, "step": 14891 }, { "epoch": 0.3504, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9016, "step": 14892 }, { "epoch": 0.3504235294117647, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.3158, "step": 14893 }, { "epoch": 0.3504470588235294, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.191, "step": 14894 }, { "epoch": 0.35047058823529414, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9763, "step": 14895 }, { "epoch": 0.35049411764705884, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8922, "step": 14896 }, { "epoch": 0.35051764705882354, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.5032, "step": 14897 }, { "epoch": 0.35054117647058824, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.3233, "step": 14898 }, { "epoch": 0.35056470588235294, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1183, "step": 14899 }, { "epoch": 0.35058823529411764, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.2298, "step": 14900 }, { "epoch": 0.35061176470588234, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.149, "step": 14901 }, { "epoch": 0.35063529411764705, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2519, "step": 14902 }, { "epoch": 0.35065882352941175, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.1933, "step": 14903 }, { "epoch": 0.35068235294117645, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9637, "step": 14904 }, { "epoch": 0.3507058823529412, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9738, "step": 14905 }, { "epoch": 0.3507294117647059, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.7818, "step": 14906 }, { "epoch": 0.3507529411764706, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0799, "step": 14907 }, { "epoch": 0.3507764705882353, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2587, "step": 14908 }, { "epoch": 0.3508, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.5152, "step": 14909 }, { "epoch": 0.3508235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.072, "step": 14910 }, { "epoch": 0.3508470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9196, "step": 14911 }, { "epoch": 0.3508705882352941, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8131, "step": 14912 }, { "epoch": 0.3508941176470588, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.8241, "step": 14913 }, { "epoch": 0.35091764705882356, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9393, "step": 14914 }, { "epoch": 0.35094117647058826, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.7808, "step": 14915 }, { "epoch": 0.35096470588235296, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8602, "step": 14916 }, { "epoch": 0.35098823529411766, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.7908, "step": 14917 }, { "epoch": 0.35101176470588236, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9801, "step": 14918 }, { "epoch": 0.35103529411764706, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.025, "step": 14919 }, { "epoch": 0.35105882352941176, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.7558, "step": 14920 }, { "epoch": 0.35108235294117646, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8163, "step": 14921 }, { "epoch": 0.35110588235294116, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.992, "step": 14922 }, { "epoch": 0.35112941176470586, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8717, "step": 14923 }, { "epoch": 0.3511529411764706, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.7961, "step": 14924 }, { "epoch": 0.3511764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7096, "step": 14925 }, { "epoch": 0.3512, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9874, "step": 14926 }, { "epoch": 0.3512235294117647, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0492, "step": 14927 }, { "epoch": 0.3512470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0856, "step": 14928 }, { "epoch": 0.3512705882352941, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0325, "step": 14929 }, { "epoch": 0.3512941176470588, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9077, "step": 14930 }, { "epoch": 0.3513176470588235, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.2869, "step": 14931 }, { "epoch": 0.3513411764705882, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1424, "step": 14932 }, { "epoch": 0.35136470588235297, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0267, "step": 14933 }, { "epoch": 0.35138823529411767, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2037, "step": 14934 }, { "epoch": 0.35141176470588237, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2048, "step": 14935 }, { "epoch": 0.35143529411764707, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9723, "step": 14936 }, { "epoch": 0.35145882352941177, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.9902, "step": 14937 }, { "epoch": 0.35148235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8987, "step": 14938 }, { "epoch": 0.35150588235294117, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1945, "step": 14939 }, { "epoch": 0.35152941176470587, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2353, "step": 14940 }, { "epoch": 0.35155294117647057, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 1.2028, "step": 14941 }, { "epoch": 0.35157647058823527, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0349, "step": 14942 }, { "epoch": 0.3516, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8111, "step": 14943 }, { "epoch": 0.3516235294117647, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.054, "step": 14944 }, { "epoch": 0.3516470588235294, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.6529, "step": 14945 }, { "epoch": 0.3516705882352941, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0445, "step": 14946 }, { "epoch": 0.3516941176470588, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9751, "step": 14947 }, { "epoch": 0.3517176470588235, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.7348, "step": 14948 }, { "epoch": 0.3517411764705882, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.5094, "step": 14949 }, { "epoch": 0.3517647058823529, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0198, "step": 14950 }, { "epoch": 0.3517882352941176, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.3446, "step": 14951 }, { "epoch": 0.3518117647058824, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8686, "step": 14952 }, { "epoch": 0.3518352941176471, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0063, "step": 14953 }, { "epoch": 0.3518588235294118, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9025, "step": 14954 }, { "epoch": 0.3518823529411765, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.47, "step": 14955 }, { "epoch": 0.3519058823529412, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 0.9338, "step": 14956 }, { "epoch": 0.3519294117647059, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.1598, "step": 14957 }, { "epoch": 0.3519529411764706, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.2018, "step": 14958 }, { "epoch": 0.3519764705882353, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1263, "step": 14959 }, { "epoch": 0.352, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9481, "step": 14960 }, { "epoch": 0.3520235294117647, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1605, "step": 14961 }, { "epoch": 0.35204705882352944, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0171, "step": 14962 }, { "epoch": 0.35207058823529414, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.7999, "step": 14963 }, { "epoch": 0.35209411764705884, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1527, "step": 14964 }, { "epoch": 0.35211764705882354, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.6106, "step": 14965 }, { "epoch": 0.35214117647058824, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.7767, "step": 14966 }, { "epoch": 0.35216470588235294, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.7777, "step": 14967 }, { "epoch": 0.35218823529411764, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.077, "step": 14968 }, { "epoch": 0.35221176470588234, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.8662, "step": 14969 }, { "epoch": 0.35223529411764704, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.175, "step": 14970 }, { "epoch": 0.3522588235294118, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 0.8095, "step": 14971 }, { "epoch": 0.3522823529411765, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0571, "step": 14972 }, { "epoch": 0.3523058823529412, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.7044, "step": 14973 }, { "epoch": 0.3523294117647059, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9643, "step": 14974 }, { "epoch": 0.3523529411764706, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.3138, "step": 14975 }, { "epoch": 0.3523764705882353, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.7996, "step": 14976 }, { "epoch": 0.3524, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.873, "step": 14977 }, { "epoch": 0.3524235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8844, "step": 14978 }, { "epoch": 0.3524470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0275, "step": 14979 }, { "epoch": 0.3524705882352941, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0646, "step": 14980 }, { "epoch": 0.35249411764705885, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.8223, "step": 14981 }, { "epoch": 0.35251764705882355, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2169, "step": 14982 }, { "epoch": 0.35254117647058825, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.199, "step": 14983 }, { "epoch": 0.35256470588235295, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8008, "step": 14984 }, { "epoch": 0.35258823529411765, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.0038, "step": 14985 }, { "epoch": 0.35261176470588235, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1557, "step": 14986 }, { "epoch": 0.35263529411764705, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2039, "step": 14987 }, { "epoch": 0.35265882352941175, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.106, "step": 14988 }, { "epoch": 0.35268235294117645, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2241, "step": 14989 }, { "epoch": 0.3527058823529412, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.8938, "step": 14990 }, { "epoch": 0.3527294117647059, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.841, "step": 14991 }, { "epoch": 0.3527529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1678, "step": 14992 }, { "epoch": 0.3527764705882353, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1449, "step": 14993 }, { "epoch": 0.3528, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9605, "step": 14994 }, { "epoch": 0.3528235294117647, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.1465, "step": 14995 }, { "epoch": 0.3528470588235294, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0588, "step": 14996 }, { "epoch": 0.3528705882352941, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0308, "step": 14997 }, { "epoch": 0.3528941176470588, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.7951, "step": 14998 }, { "epoch": 0.3529176470588235, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8711, "step": 14999 }, { "epoch": 0.35294117647058826, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1968, "step": 15000 }, { "epoch": 0.35294117647058826, "eval_loss": 2.194629430770874, "eval_runtime": 681.2593, "eval_samples_per_second": 12.477, "eval_steps_per_second": 3.119, "step": 15000 }, { "epoch": 0.35296470588235296, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.5946, "step": 15001 }, { "epoch": 0.35298823529411766, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9766, "step": 15002 }, { "epoch": 0.35301176470588236, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.1483, "step": 15003 }, { "epoch": 0.35303529411764706, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.3054, "step": 15004 }, { "epoch": 0.35305882352941176, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2404, "step": 15005 }, { "epoch": 0.35308235294117646, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.5371, "step": 15006 }, { "epoch": 0.35310588235294116, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.8011, "step": 15007 }, { "epoch": 0.35312941176470586, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.896, "step": 15008 }, { "epoch": 0.3531529411764706, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3531, "step": 15009 }, { "epoch": 0.3531764705882353, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0329, "step": 15010 }, { "epoch": 0.3532, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.3041, "step": 15011 }, { "epoch": 0.3532235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.108, "step": 15012 }, { "epoch": 0.3532470588235294, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.7981, "step": 15013 }, { "epoch": 0.3532705882352941, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.7653, "step": 15014 }, { "epoch": 0.3532941176470588, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8935, "step": 15015 }, { "epoch": 0.3533176470588235, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.7329, "step": 15016 }, { "epoch": 0.3533411764705882, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0025, "step": 15017 }, { "epoch": 0.3533647058823529, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1722, "step": 15018 }, { "epoch": 0.35338823529411767, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9607, "step": 15019 }, { "epoch": 0.35341176470588237, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.633, "step": 15020 }, { "epoch": 0.35343529411764707, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9984, "step": 15021 }, { "epoch": 0.35345882352941177, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0844, "step": 15022 }, { "epoch": 0.35348235294117647, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9873, "step": 15023 }, { "epoch": 0.35350588235294117, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1697, "step": 15024 }, { "epoch": 0.35352941176470587, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.246, "step": 15025 }, { "epoch": 0.35355294117647057, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0904, "step": 15026 }, { "epoch": 0.35357647058823527, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.02, "step": 15027 }, { "epoch": 0.3536, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2146, "step": 15028 }, { "epoch": 0.3536235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.9368, "step": 15029 }, { "epoch": 0.3536470588235294, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.5428, "step": 15030 }, { "epoch": 0.3536705882352941, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9907, "step": 15031 }, { "epoch": 0.3536941176470588, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0374, "step": 15032 }, { "epoch": 0.3537176470588235, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9562, "step": 15033 }, { "epoch": 0.3537411764705882, "grad_norm": 0.2294921875, "learning_rate": 0.02, "loss": 0.7895, "step": 15034 }, { "epoch": 0.3537647058823529, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0079, "step": 15035 }, { "epoch": 0.3537882352941176, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.036, "step": 15036 }, { "epoch": 0.3538117647058823, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.21, "step": 15037 }, { "epoch": 0.3538352941176471, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0431, "step": 15038 }, { "epoch": 0.3538588235294118, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1807, "step": 15039 }, { "epoch": 0.3538823529411765, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.985, "step": 15040 }, { "epoch": 0.3539058823529412, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0206, "step": 15041 }, { "epoch": 0.3539294117647059, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.8863, "step": 15042 }, { "epoch": 0.3539529411764706, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2395, "step": 15043 }, { "epoch": 0.3539764705882353, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.3374, "step": 15044 }, { "epoch": 0.354, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.881, "step": 15045 }, { "epoch": 0.3540235294117647, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9188, "step": 15046 }, { "epoch": 0.35404705882352944, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2215, "step": 15047 }, { "epoch": 0.35407058823529414, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8985, "step": 15048 }, { "epoch": 0.35409411764705884, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8297, "step": 15049 }, { "epoch": 0.35411764705882354, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.6075, "step": 15050 }, { "epoch": 0.35414117647058824, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2754, "step": 15051 }, { "epoch": 0.35416470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0335, "step": 15052 }, { "epoch": 0.35418823529411764, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.1397, "step": 15053 }, { "epoch": 0.35421176470588234, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1665, "step": 15054 }, { "epoch": 0.35423529411764704, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.7231, "step": 15055 }, { "epoch": 0.35425882352941174, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9597, "step": 15056 }, { "epoch": 0.3542823529411765, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1333, "step": 15057 }, { "epoch": 0.3543058823529412, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.1043, "step": 15058 }, { "epoch": 0.3543294117647059, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.953, "step": 15059 }, { "epoch": 0.3543529411764706, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.6705, "step": 15060 }, { "epoch": 0.3543764705882353, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.3734, "step": 15061 }, { "epoch": 0.3544, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.18, "step": 15062 }, { "epoch": 0.3544235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8419, "step": 15063 }, { "epoch": 0.3544470588235294, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.909, "step": 15064 }, { "epoch": 0.3544705882352941, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.2948, "step": 15065 }, { "epoch": 0.35449411764705885, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0274, "step": 15066 }, { "epoch": 0.35451764705882355, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.2405, "step": 15067 }, { "epoch": 0.35454117647058825, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.3404, "step": 15068 }, { "epoch": 0.35456470588235295, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8918, "step": 15069 }, { "epoch": 0.35458823529411765, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0125, "step": 15070 }, { "epoch": 0.35461176470588235, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.7412, "step": 15071 }, { "epoch": 0.35463529411764705, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9821, "step": 15072 }, { "epoch": 0.35465882352941175, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.1975, "step": 15073 }, { "epoch": 0.35468235294117645, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0944, "step": 15074 }, { "epoch": 0.3547058823529412, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.7678, "step": 15075 }, { "epoch": 0.3547294117647059, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.8675, "step": 15076 }, { "epoch": 0.3547529411764706, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1184, "step": 15077 }, { "epoch": 0.3547764705882353, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.2436, "step": 15078 }, { "epoch": 0.3548, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.7653, "step": 15079 }, { "epoch": 0.3548235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2284, "step": 15080 }, { "epoch": 0.3548470588235294, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.7149, "step": 15081 }, { "epoch": 0.3548705882352941, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0758, "step": 15082 }, { "epoch": 0.3548941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9795, "step": 15083 }, { "epoch": 0.3549176470588235, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9963, "step": 15084 }, { "epoch": 0.35494117647058826, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.49, "step": 15085 }, { "epoch": 0.35496470588235296, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8693, "step": 15086 }, { "epoch": 0.35498823529411766, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0924, "step": 15087 }, { "epoch": 0.35501176470588236, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.6385, "step": 15088 }, { "epoch": 0.35503529411764706, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.6182, "step": 15089 }, { "epoch": 0.35505882352941176, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2216, "step": 15090 }, { "epoch": 0.35508235294117646, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2079, "step": 15091 }, { "epoch": 0.35510588235294116, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0055, "step": 15092 }, { "epoch": 0.35512941176470586, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.3694, "step": 15093 }, { "epoch": 0.3551529411764706, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9871, "step": 15094 }, { "epoch": 0.3551764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0159, "step": 15095 }, { "epoch": 0.3552, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.7665, "step": 15096 }, { "epoch": 0.3552235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9762, "step": 15097 }, { "epoch": 0.3552470588235294, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8926, "step": 15098 }, { "epoch": 0.3552705882352941, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.1109, "step": 15099 }, { "epoch": 0.3552941176470588, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8573, "step": 15100 }, { "epoch": 0.3553176470588235, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0723, "step": 15101 }, { "epoch": 0.3553411764705882, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1535, "step": 15102 }, { "epoch": 0.3553647058823529, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9747, "step": 15103 }, { "epoch": 0.35538823529411767, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8114, "step": 15104 }, { "epoch": 0.35541176470588237, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0046, "step": 15105 }, { "epoch": 0.35543529411764707, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8326, "step": 15106 }, { "epoch": 0.35545882352941177, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0298, "step": 15107 }, { "epoch": 0.35548235294117647, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9724, "step": 15108 }, { "epoch": 0.35550588235294117, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.3121, "step": 15109 }, { "epoch": 0.35552941176470587, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.1791, "step": 15110 }, { "epoch": 0.35555294117647057, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.584, "step": 15111 }, { "epoch": 0.35557647058823527, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9248, "step": 15112 }, { "epoch": 0.3556, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0698, "step": 15113 }, { "epoch": 0.3556235294117647, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.6295, "step": 15114 }, { "epoch": 0.3556470588235294, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1837, "step": 15115 }, { "epoch": 0.3556705882352941, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8008, "step": 15116 }, { "epoch": 0.3556941176470588, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.1412, "step": 15117 }, { "epoch": 0.3557176470588235, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1871, "step": 15118 }, { "epoch": 0.3557411764705882, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.781, "step": 15119 }, { "epoch": 0.3557647058823529, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.2544, "step": 15120 }, { "epoch": 0.3557882352941176, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9646, "step": 15121 }, { "epoch": 0.3558117647058823, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.8578, "step": 15122 }, { "epoch": 0.3558352941176471, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0223, "step": 15123 }, { "epoch": 0.3558588235294118, "grad_norm": 0.2236328125, "learning_rate": 0.02, "loss": 0.5843, "step": 15124 }, { "epoch": 0.3558823529411765, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.2193, "step": 15125 }, { "epoch": 0.3559058823529412, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.4393, "step": 15126 }, { "epoch": 0.3559294117647059, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.016, "step": 15127 }, { "epoch": 0.3559529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0113, "step": 15128 }, { "epoch": 0.3559764705882353, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9878, "step": 15129 }, { "epoch": 0.356, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.7319, "step": 15130 }, { "epoch": 0.3560235294117647, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1387, "step": 15131 }, { "epoch": 0.35604705882352944, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.6292, "step": 15132 }, { "epoch": 0.35607058823529414, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.8791, "step": 15133 }, { "epoch": 0.35609411764705884, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8612, "step": 15134 }, { "epoch": 0.35611764705882354, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.8886, "step": 15135 }, { "epoch": 0.35614117647058824, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8827, "step": 15136 }, { "epoch": 0.35616470588235294, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 0.9822, "step": 15137 }, { "epoch": 0.35618823529411764, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.4126, "step": 15138 }, { "epoch": 0.35621176470588234, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9525, "step": 15139 }, { "epoch": 0.35623529411764704, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9653, "step": 15140 }, { "epoch": 0.35625882352941174, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.6066, "step": 15141 }, { "epoch": 0.3562823529411765, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0422, "step": 15142 }, { "epoch": 0.3563058823529412, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0492, "step": 15143 }, { "epoch": 0.3563294117647059, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9499, "step": 15144 }, { "epoch": 0.3563529411764706, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.2617, "step": 15145 }, { "epoch": 0.3563764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.7234, "step": 15146 }, { "epoch": 0.3564, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9767, "step": 15147 }, { "epoch": 0.3564235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9473, "step": 15148 }, { "epoch": 0.3564470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1262, "step": 15149 }, { "epoch": 0.3564705882352941, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9749, "step": 15150 }, { "epoch": 0.35649411764705885, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9159, "step": 15151 }, { "epoch": 0.35651764705882355, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7801, "step": 15152 }, { "epoch": 0.35654117647058825, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0509, "step": 15153 }, { "epoch": 0.35656470588235295, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2851, "step": 15154 }, { "epoch": 0.35658823529411765, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2336, "step": 15155 }, { "epoch": 0.35661176470588235, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0636, "step": 15156 }, { "epoch": 0.35663529411764705, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2516, "step": 15157 }, { "epoch": 0.35665882352941175, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.7907, "step": 15158 }, { "epoch": 0.35668235294117645, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.0591, "step": 15159 }, { "epoch": 0.35670588235294115, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0888, "step": 15160 }, { "epoch": 0.3567294117647059, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0374, "step": 15161 }, { "epoch": 0.3567529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9385, "step": 15162 }, { "epoch": 0.3567764705882353, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0156, "step": 15163 }, { "epoch": 0.3568, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2951, "step": 15164 }, { "epoch": 0.3568235294117647, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.7035, "step": 15165 }, { "epoch": 0.3568470588235294, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1776, "step": 15166 }, { "epoch": 0.3568705882352941, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0698, "step": 15167 }, { "epoch": 0.3568941176470588, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1986, "step": 15168 }, { "epoch": 0.3569176470588235, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9722, "step": 15169 }, { "epoch": 0.35694117647058826, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1998, "step": 15170 }, { "epoch": 0.35696470588235296, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.3522, "step": 15171 }, { "epoch": 0.35698823529411766, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.888, "step": 15172 }, { "epoch": 0.35701176470588236, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.2988, "step": 15173 }, { "epoch": 0.35703529411764706, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0903, "step": 15174 }, { "epoch": 0.35705882352941176, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1194, "step": 15175 }, { "epoch": 0.35708235294117646, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9602, "step": 15176 }, { "epoch": 0.35710588235294116, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0945, "step": 15177 }, { "epoch": 0.35712941176470586, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8635, "step": 15178 }, { "epoch": 0.35715294117647056, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9445, "step": 15179 }, { "epoch": 0.3571764705882353, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8945, "step": 15180 }, { "epoch": 0.3572, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9407, "step": 15181 }, { "epoch": 0.3572235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0204, "step": 15182 }, { "epoch": 0.3572470588235294, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.2743, "step": 15183 }, { "epoch": 0.3572705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8969, "step": 15184 }, { "epoch": 0.3572941176470588, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1397, "step": 15185 }, { "epoch": 0.3573176470588235, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9767, "step": 15186 }, { "epoch": 0.3573411764705882, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9598, "step": 15187 }, { "epoch": 0.3573647058823529, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.3106, "step": 15188 }, { "epoch": 0.3573882352941177, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8418, "step": 15189 }, { "epoch": 0.3574117647058824, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.2293, "step": 15190 }, { "epoch": 0.3574352941176471, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0679, "step": 15191 }, { "epoch": 0.3574588235294118, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1764, "step": 15192 }, { "epoch": 0.3574823529411765, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0108, "step": 15193 }, { "epoch": 0.3575058823529412, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2131, "step": 15194 }, { "epoch": 0.3575294117647059, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.7687, "step": 15195 }, { "epoch": 0.3575529411764706, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0906, "step": 15196 }, { "epoch": 0.3575764705882353, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.257, "step": 15197 }, { "epoch": 0.3576, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0881, "step": 15198 }, { "epoch": 0.35762352941176473, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1274, "step": 15199 }, { "epoch": 0.35764705882352943, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.021, "step": 15200 }, { "epoch": 0.35767058823529413, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9459, "step": 15201 }, { "epoch": 0.35769411764705883, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0594, "step": 15202 }, { "epoch": 0.35771764705882353, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9195, "step": 15203 }, { "epoch": 0.35774117647058823, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2118, "step": 15204 }, { "epoch": 0.35776470588235293, "grad_norm": 0.283203125, "learning_rate": 0.02, "loss": 0.7427, "step": 15205 }, { "epoch": 0.35778823529411763, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.956, "step": 15206 }, { "epoch": 0.35781176470588233, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9112, "step": 15207 }, { "epoch": 0.3578352941176471, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.2295, "step": 15208 }, { "epoch": 0.3578588235294118, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.5768, "step": 15209 }, { "epoch": 0.3578823529411765, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0264, "step": 15210 }, { "epoch": 0.3579058823529412, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1123, "step": 15211 }, { "epoch": 0.3579294117647059, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.141, "step": 15212 }, { "epoch": 0.3579529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.953, "step": 15213 }, { "epoch": 0.3579764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0597, "step": 15214 }, { "epoch": 0.358, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9644, "step": 15215 }, { "epoch": 0.3580235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9437, "step": 15216 }, { "epoch": 0.3580470588235294, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9108, "step": 15217 }, { "epoch": 0.35807058823529414, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.1062, "step": 15218 }, { "epoch": 0.35809411764705884, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.184, "step": 15219 }, { "epoch": 0.35811764705882354, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.6341, "step": 15220 }, { "epoch": 0.35814117647058824, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.7964, "step": 15221 }, { "epoch": 0.35816470588235294, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 0.9648, "step": 15222 }, { "epoch": 0.35818823529411764, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0082, "step": 15223 }, { "epoch": 0.35821176470588234, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.9396, "step": 15224 }, { "epoch": 0.35823529411764704, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.712, "step": 15225 }, { "epoch": 0.35825882352941174, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9459, "step": 15226 }, { "epoch": 0.3582823529411765, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0825, "step": 15227 }, { "epoch": 0.3583058823529412, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0182, "step": 15228 }, { "epoch": 0.3583294117647059, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1818, "step": 15229 }, { "epoch": 0.3583529411764706, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2203, "step": 15230 }, { "epoch": 0.3583764705882353, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.037, "step": 15231 }, { "epoch": 0.3584, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.191, "step": 15232 }, { "epoch": 0.3584235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0546, "step": 15233 }, { "epoch": 0.3584470588235294, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9174, "step": 15234 }, { "epoch": 0.3584705882352941, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.8868, "step": 15235 }, { "epoch": 0.3584941176470588, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9774, "step": 15236 }, { "epoch": 0.35851764705882355, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0128, "step": 15237 }, { "epoch": 0.35854117647058825, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8745, "step": 15238 }, { "epoch": 0.35856470588235295, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0909, "step": 15239 }, { "epoch": 0.35858823529411765, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0136, "step": 15240 }, { "epoch": 0.35861176470588235, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9379, "step": 15241 }, { "epoch": 0.35863529411764705, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8285, "step": 15242 }, { "epoch": 0.35865882352941175, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.7728, "step": 15243 }, { "epoch": 0.35868235294117645, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9625, "step": 15244 }, { "epoch": 0.35870588235294115, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8595, "step": 15245 }, { "epoch": 0.3587294117647059, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.4461, "step": 15246 }, { "epoch": 0.3587529411764706, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.3028, "step": 15247 }, { "epoch": 0.3587764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9115, "step": 15248 }, { "epoch": 0.3588, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1502, "step": 15249 }, { "epoch": 0.3588235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0545, "step": 15250 }, { "epoch": 0.3588470588235294, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1245, "step": 15251 }, { "epoch": 0.3588705882352941, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9706, "step": 15252 }, { "epoch": 0.3588941176470588, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.2477, "step": 15253 }, { "epoch": 0.3589176470588235, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0629, "step": 15254 }, { "epoch": 0.3589411764705882, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.6081, "step": 15255 }, { "epoch": 0.35896470588235296, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.0565, "step": 15256 }, { "epoch": 0.35898823529411766, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.745, "step": 15257 }, { "epoch": 0.35901176470588236, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9325, "step": 15258 }, { "epoch": 0.35903529411764706, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0944, "step": 15259 }, { "epoch": 0.35905882352941176, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.7439, "step": 15260 }, { "epoch": 0.35908235294117646, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2118, "step": 15261 }, { "epoch": 0.35910588235294116, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9611, "step": 15262 }, { "epoch": 0.35912941176470586, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1369, "step": 15263 }, { "epoch": 0.35915294117647056, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0478, "step": 15264 }, { "epoch": 0.3591764705882353, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.8693, "step": 15265 }, { "epoch": 0.3592, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0203, "step": 15266 }, { "epoch": 0.3592235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0355, "step": 15267 }, { "epoch": 0.3592470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7329, "step": 15268 }, { "epoch": 0.3592705882352941, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1007, "step": 15269 }, { "epoch": 0.3592941176470588, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1036, "step": 15270 }, { "epoch": 0.3593176470588235, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.7851, "step": 15271 }, { "epoch": 0.3593411764705882, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8382, "step": 15272 }, { "epoch": 0.3593647058823529, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2705, "step": 15273 }, { "epoch": 0.3593882352941176, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.7691, "step": 15274 }, { "epoch": 0.3594117647058824, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2046, "step": 15275 }, { "epoch": 0.3594352941176471, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0354, "step": 15276 }, { "epoch": 0.3594588235294118, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.2111, "step": 15277 }, { "epoch": 0.3594823529411765, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.193, "step": 15278 }, { "epoch": 0.3595058823529412, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0785, "step": 15279 }, { "epoch": 0.3595294117647059, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9847, "step": 15280 }, { "epoch": 0.3595529411764706, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0395, "step": 15281 }, { "epoch": 0.3595764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1757, "step": 15282 }, { "epoch": 0.3596, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.1393, "step": 15283 }, { "epoch": 0.35962352941176473, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9932, "step": 15284 }, { "epoch": 0.35964705882352943, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0306, "step": 15285 }, { "epoch": 0.35967058823529413, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.3354, "step": 15286 }, { "epoch": 0.35969411764705883, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9851, "step": 15287 }, { "epoch": 0.35971764705882353, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0071, "step": 15288 }, { "epoch": 0.35974117647058823, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1574, "step": 15289 }, { "epoch": 0.35976470588235293, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2364, "step": 15290 }, { "epoch": 0.35978823529411763, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0184, "step": 15291 }, { "epoch": 0.35981176470588233, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.034, "step": 15292 }, { "epoch": 0.3598352941176471, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0714, "step": 15293 }, { "epoch": 0.3598588235294118, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0992, "step": 15294 }, { "epoch": 0.3598823529411765, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.845, "step": 15295 }, { "epoch": 0.3599058823529412, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1906, "step": 15296 }, { "epoch": 0.3599294117647059, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2253, "step": 15297 }, { "epoch": 0.3599529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9499, "step": 15298 }, { "epoch": 0.3599764705882353, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0674, "step": 15299 }, { "epoch": 0.36, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0464, "step": 15300 }, { "epoch": 0.3600235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.218, "step": 15301 }, { "epoch": 0.3600470588235294, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0737, "step": 15302 }, { "epoch": 0.36007058823529414, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0904, "step": 15303 }, { "epoch": 0.36009411764705884, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.133, "step": 15304 }, { "epoch": 0.36011764705882354, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.955, "step": 15305 }, { "epoch": 0.36014117647058824, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0702, "step": 15306 }, { "epoch": 0.36016470588235294, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9105, "step": 15307 }, { "epoch": 0.36018823529411764, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0661, "step": 15308 }, { "epoch": 0.36021176470588234, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9992, "step": 15309 }, { "epoch": 0.36023529411764704, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.8634, "step": 15310 }, { "epoch": 0.36025882352941174, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.5858, "step": 15311 }, { "epoch": 0.3602823529411765, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.265, "step": 15312 }, { "epoch": 0.3603058823529412, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9551, "step": 15313 }, { "epoch": 0.3603294117647059, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.839, "step": 15314 }, { "epoch": 0.3603529411764706, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.2956, "step": 15315 }, { "epoch": 0.3603764705882353, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9399, "step": 15316 }, { "epoch": 0.3604, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9487, "step": 15317 }, { "epoch": 0.3604235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.041, "step": 15318 }, { "epoch": 0.3604470588235294, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8271, "step": 15319 }, { "epoch": 0.3604705882352941, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9581, "step": 15320 }, { "epoch": 0.3604941176470588, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0851, "step": 15321 }, { "epoch": 0.36051764705882355, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9214, "step": 15322 }, { "epoch": 0.36054117647058825, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8319, "step": 15323 }, { "epoch": 0.36056470588235295, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0673, "step": 15324 }, { "epoch": 0.36058823529411765, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9584, "step": 15325 }, { "epoch": 0.36061176470588235, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8908, "step": 15326 }, { "epoch": 0.36063529411764705, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0987, "step": 15327 }, { "epoch": 0.36065882352941175, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8045, "step": 15328 }, { "epoch": 0.36068235294117645, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.163, "step": 15329 }, { "epoch": 0.36070588235294115, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0397, "step": 15330 }, { "epoch": 0.3607294117647059, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1338, "step": 15331 }, { "epoch": 0.3607529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.2006, "step": 15332 }, { "epoch": 0.3607764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.4768, "step": 15333 }, { "epoch": 0.3608, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0042, "step": 15334 }, { "epoch": 0.3608235294117647, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 0.9399, "step": 15335 }, { "epoch": 0.3608470588235294, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9315, "step": 15336 }, { "epoch": 0.3608705882352941, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.074, "step": 15337 }, { "epoch": 0.3608941176470588, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9323, "step": 15338 }, { "epoch": 0.3609176470588235, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8235, "step": 15339 }, { "epoch": 0.3609411764705882, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.888, "step": 15340 }, { "epoch": 0.36096470588235297, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1717, "step": 15341 }, { "epoch": 0.36098823529411767, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.683, "step": 15342 }, { "epoch": 0.36101176470588237, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8902, "step": 15343 }, { "epoch": 0.36103529411764707, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.3675, "step": 15344 }, { "epoch": 0.36105882352941177, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2081, "step": 15345 }, { "epoch": 0.36108235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8224, "step": 15346 }, { "epoch": 0.36110588235294117, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1111, "step": 15347 }, { "epoch": 0.36112941176470587, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.8557, "step": 15348 }, { "epoch": 0.36115294117647057, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0608, "step": 15349 }, { "epoch": 0.3611764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.766, "step": 15350 }, { "epoch": 0.3612, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0158, "step": 15351 }, { "epoch": 0.3612235294117647, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.8945, "step": 15352 }, { "epoch": 0.3612470588235294, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8526, "step": 15353 }, { "epoch": 0.3612705882352941, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8916, "step": 15354 }, { "epoch": 0.3612941176470588, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8329, "step": 15355 }, { "epoch": 0.3613176470588235, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.3396, "step": 15356 }, { "epoch": 0.3613411764705882, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9464, "step": 15357 }, { "epoch": 0.3613647058823529, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8541, "step": 15358 }, { "epoch": 0.3613882352941176, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.2584, "step": 15359 }, { "epoch": 0.3614117647058824, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0, "step": 15360 }, { "epoch": 0.3614352941176471, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8539, "step": 15361 }, { "epoch": 0.3614588235294118, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.9298, "step": 15362 }, { "epoch": 0.3614823529411765, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.7898, "step": 15363 }, { "epoch": 0.3615058823529412, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.7113, "step": 15364 }, { "epoch": 0.3615294117647059, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1825, "step": 15365 }, { "epoch": 0.3615529411764706, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.969, "step": 15366 }, { "epoch": 0.3615764705882353, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.7888, "step": 15367 }, { "epoch": 0.3616, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.1363, "step": 15368 }, { "epoch": 0.36162352941176473, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0019, "step": 15369 }, { "epoch": 0.36164705882352943, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.196, "step": 15370 }, { "epoch": 0.36167058823529413, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9817, "step": 15371 }, { "epoch": 0.36169411764705883, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9842, "step": 15372 }, { "epoch": 0.36171764705882353, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.903, "step": 15373 }, { "epoch": 0.36174117647058823, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.7158, "step": 15374 }, { "epoch": 0.36176470588235293, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.189, "step": 15375 }, { "epoch": 0.36178823529411763, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.5079, "step": 15376 }, { "epoch": 0.36181176470588233, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1402, "step": 15377 }, { "epoch": 0.36183529411764703, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0314, "step": 15378 }, { "epoch": 0.3618588235294118, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1769, "step": 15379 }, { "epoch": 0.3618823529411765, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 1.0792, "step": 15380 }, { "epoch": 0.3619058823529412, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2468, "step": 15381 }, { "epoch": 0.3619294117647059, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2957, "step": 15382 }, { "epoch": 0.3619529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0221, "step": 15383 }, { "epoch": 0.3619764705882353, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.985, "step": 15384 }, { "epoch": 0.362, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.1964, "step": 15385 }, { "epoch": 0.3620235294117647, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0064, "step": 15386 }, { "epoch": 0.3620470588235294, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2036, "step": 15387 }, { "epoch": 0.36207058823529414, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.7616, "step": 15388 }, { "epoch": 0.36209411764705884, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.062, "step": 15389 }, { "epoch": 0.36211764705882354, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.017, "step": 15390 }, { "epoch": 0.36214117647058824, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8352, "step": 15391 }, { "epoch": 0.36216470588235294, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1169, "step": 15392 }, { "epoch": 0.36218823529411764, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0106, "step": 15393 }, { "epoch": 0.36221176470588234, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1049, "step": 15394 }, { "epoch": 0.36223529411764704, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0695, "step": 15395 }, { "epoch": 0.36225882352941174, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.7356, "step": 15396 }, { "epoch": 0.36228235294117644, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.779, "step": 15397 }, { "epoch": 0.3623058823529412, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.2742, "step": 15398 }, { "epoch": 0.3623294117647059, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0393, "step": 15399 }, { "epoch": 0.3623529411764706, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0839, "step": 15400 }, { "epoch": 0.3623764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.8034, "step": 15401 }, { "epoch": 0.3624, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8861, "step": 15402 }, { "epoch": 0.3624235294117647, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0212, "step": 15403 }, { "epoch": 0.3624470588235294, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9477, "step": 15404 }, { "epoch": 0.3624705882352941, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.8344, "step": 15405 }, { "epoch": 0.3624941176470588, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9133, "step": 15406 }, { "epoch": 0.36251764705882356, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9773, "step": 15407 }, { "epoch": 0.36254117647058826, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.2982, "step": 15408 }, { "epoch": 0.36256470588235296, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0297, "step": 15409 }, { "epoch": 0.36258823529411766, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8046, "step": 15410 }, { "epoch": 0.36261176470588236, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.6331, "step": 15411 }, { "epoch": 0.36263529411764706, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1666, "step": 15412 }, { "epoch": 0.36265882352941176, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.921, "step": 15413 }, { "epoch": 0.36268235294117646, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.8418, "step": 15414 }, { "epoch": 0.36270588235294116, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0636, "step": 15415 }, { "epoch": 0.36272941176470586, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2144, "step": 15416 }, { "epoch": 0.3627529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0721, "step": 15417 }, { "epoch": 0.3627764705882353, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9287, "step": 15418 }, { "epoch": 0.3628, "grad_norm": 0.2255859375, "learning_rate": 0.02, "loss": 0.8338, "step": 15419 }, { "epoch": 0.3628235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.209, "step": 15420 }, { "epoch": 0.3628470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1947, "step": 15421 }, { "epoch": 0.3628705882352941, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.1665, "step": 15422 }, { "epoch": 0.3628941176470588, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.7256, "step": 15423 }, { "epoch": 0.3629176470588235, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0492, "step": 15424 }, { "epoch": 0.3629411764705882, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0819, "step": 15425 }, { "epoch": 0.36296470588235297, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.6985, "step": 15426 }, { "epoch": 0.36298823529411767, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1016, "step": 15427 }, { "epoch": 0.36301176470588237, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9172, "step": 15428 }, { "epoch": 0.36303529411764707, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9089, "step": 15429 }, { "epoch": 0.36305882352941177, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9407, "step": 15430 }, { "epoch": 0.36308235294117647, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8916, "step": 15431 }, { "epoch": 0.36310588235294117, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3394, "step": 15432 }, { "epoch": 0.36312941176470587, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2331, "step": 15433 }, { "epoch": 0.36315294117647057, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0331, "step": 15434 }, { "epoch": 0.36317647058823527, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9244, "step": 15435 }, { "epoch": 0.3632, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9851, "step": 15436 }, { "epoch": 0.3632235294117647, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.5708, "step": 15437 }, { "epoch": 0.3632470588235294, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0206, "step": 15438 }, { "epoch": 0.3632705882352941, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0408, "step": 15439 }, { "epoch": 0.3632941176470588, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.2548, "step": 15440 }, { "epoch": 0.3633176470588235, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.7888, "step": 15441 }, { "epoch": 0.3633411764705882, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0543, "step": 15442 }, { "epoch": 0.3633647058823529, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.1476, "step": 15443 }, { "epoch": 0.3633882352941176, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.037, "step": 15444 }, { "epoch": 0.3634117647058824, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0809, "step": 15445 }, { "epoch": 0.3634352941176471, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2696, "step": 15446 }, { "epoch": 0.3634588235294118, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.1891, "step": 15447 }, { "epoch": 0.3634823529411765, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9245, "step": 15448 }, { "epoch": 0.3635058823529412, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1745, "step": 15449 }, { "epoch": 0.3635294117647059, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.3019, "step": 15450 }, { "epoch": 0.3635529411764706, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.6625, "step": 15451 }, { "epoch": 0.3635764705882353, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.3225, "step": 15452 }, { "epoch": 0.3636, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0991, "step": 15453 }, { "epoch": 0.3636235294117647, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8987, "step": 15454 }, { "epoch": 0.36364705882352943, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.7428, "step": 15455 }, { "epoch": 0.36367058823529413, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.92, "step": 15456 }, { "epoch": 0.36369411764705883, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0172, "step": 15457 }, { "epoch": 0.36371764705882353, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0187, "step": 15458 }, { "epoch": 0.36374117647058823, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8658, "step": 15459 }, { "epoch": 0.36376470588235293, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.2233, "step": 15460 }, { "epoch": 0.36378823529411763, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1313, "step": 15461 }, { "epoch": 0.36381176470588233, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.22, "step": 15462 }, { "epoch": 0.36383529411764703, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9932, "step": 15463 }, { "epoch": 0.3638588235294118, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.7501, "step": 15464 }, { "epoch": 0.3638823529411765, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0077, "step": 15465 }, { "epoch": 0.3639058823529412, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.03, "step": 15466 }, { "epoch": 0.3639294117647059, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9774, "step": 15467 }, { "epoch": 0.3639529411764706, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.9862, "step": 15468 }, { "epoch": 0.3639764705882353, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.8464, "step": 15469 }, { "epoch": 0.364, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0301, "step": 15470 }, { "epoch": 0.3640235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.7126, "step": 15471 }, { "epoch": 0.3640470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8758, "step": 15472 }, { "epoch": 0.3640705882352941, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9391, "step": 15473 }, { "epoch": 0.36409411764705885, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1539, "step": 15474 }, { "epoch": 0.36411764705882355, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2864, "step": 15475 }, { "epoch": 0.36414117647058825, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9295, "step": 15476 }, { "epoch": 0.36416470588235295, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9548, "step": 15477 }, { "epoch": 0.36418823529411765, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.0319, "step": 15478 }, { "epoch": 0.36421176470588235, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2017, "step": 15479 }, { "epoch": 0.36423529411764705, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.7777, "step": 15480 }, { "epoch": 0.36425882352941175, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.7599, "step": 15481 }, { "epoch": 0.36428235294117645, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.931, "step": 15482 }, { "epoch": 0.3643058823529412, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9929, "step": 15483 }, { "epoch": 0.3643294117647059, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.6395, "step": 15484 }, { "epoch": 0.3643529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.7839, "step": 15485 }, { "epoch": 0.3643764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.7237, "step": 15486 }, { "epoch": 0.3644, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0406, "step": 15487 }, { "epoch": 0.3644235294117647, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.1274, "step": 15488 }, { "epoch": 0.3644470588235294, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0032, "step": 15489 }, { "epoch": 0.3644705882352941, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.089, "step": 15490 }, { "epoch": 0.3644941176470588, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8091, "step": 15491 }, { "epoch": 0.3645176470588235, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.0821, "step": 15492 }, { "epoch": 0.36454117647058826, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.6512, "step": 15493 }, { "epoch": 0.36456470588235296, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.6873, "step": 15494 }, { "epoch": 0.36458823529411766, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.3445, "step": 15495 }, { "epoch": 0.36461176470588236, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8374, "step": 15496 }, { "epoch": 0.36463529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.7784, "step": 15497 }, { "epoch": 0.36465882352941176, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2085, "step": 15498 }, { "epoch": 0.36468235294117646, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8824, "step": 15499 }, { "epoch": 0.36470588235294116, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8465, "step": 15500 }, { "epoch": 0.36472941176470586, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0329, "step": 15501 }, { "epoch": 0.3647529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0447, "step": 15502 }, { "epoch": 0.3647764705882353, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0979, "step": 15503 }, { "epoch": 0.3648, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0962, "step": 15504 }, { "epoch": 0.3648235294117647, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9789, "step": 15505 }, { "epoch": 0.3648470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.096, "step": 15506 }, { "epoch": 0.3648705882352941, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0291, "step": 15507 }, { "epoch": 0.3648941176470588, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0099, "step": 15508 }, { "epoch": 0.3649176470588235, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.6826, "step": 15509 }, { "epoch": 0.3649411764705882, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9541, "step": 15510 }, { "epoch": 0.36496470588235297, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1, "step": 15511 }, { "epoch": 0.36498823529411767, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1176, "step": 15512 }, { "epoch": 0.36501176470588237, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8072, "step": 15513 }, { "epoch": 0.36503529411764707, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.3791, "step": 15514 }, { "epoch": 0.36505882352941177, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0503, "step": 15515 }, { "epoch": 0.36508235294117647, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.7254, "step": 15516 }, { "epoch": 0.36510588235294117, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8489, "step": 15517 }, { "epoch": 0.36512941176470587, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9642, "step": 15518 }, { "epoch": 0.36515294117647057, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.7369, "step": 15519 }, { "epoch": 0.36517647058823527, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9659, "step": 15520 }, { "epoch": 0.3652, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9163, "step": 15521 }, { "epoch": 0.3652235294117647, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1504, "step": 15522 }, { "epoch": 0.3652470588235294, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 0.8783, "step": 15523 }, { "epoch": 0.3652705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0205, "step": 15524 }, { "epoch": 0.3652941176470588, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.0543, "step": 15525 }, { "epoch": 0.3653176470588235, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.6547, "step": 15526 }, { "epoch": 0.3653411764705882, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9581, "step": 15527 }, { "epoch": 0.3653647058823529, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.6954, "step": 15528 }, { "epoch": 0.3653882352941176, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8003, "step": 15529 }, { "epoch": 0.3654117647058824, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1331, "step": 15530 }, { "epoch": 0.3654352941176471, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 0.9974, "step": 15531 }, { "epoch": 0.3654588235294118, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2252, "step": 15532 }, { "epoch": 0.3654823529411765, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0943, "step": 15533 }, { "epoch": 0.3655058823529412, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.8227, "step": 15534 }, { "epoch": 0.3655294117647059, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1346, "step": 15535 }, { "epoch": 0.3655529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9314, "step": 15536 }, { "epoch": 0.3655764705882353, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1805, "step": 15537 }, { "epoch": 0.3656, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0676, "step": 15538 }, { "epoch": 0.3656235294117647, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1276, "step": 15539 }, { "epoch": 0.36564705882352944, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2842, "step": 15540 }, { "epoch": 0.36567058823529414, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0764, "step": 15541 }, { "epoch": 0.36569411764705884, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 0.9942, "step": 15542 }, { "epoch": 0.36571764705882354, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8289, "step": 15543 }, { "epoch": 0.36574117647058824, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.2589, "step": 15544 }, { "epoch": 0.36576470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.7955, "step": 15545 }, { "epoch": 0.36578823529411764, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.8774, "step": 15546 }, { "epoch": 0.36581176470588234, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0959, "step": 15547 }, { "epoch": 0.36583529411764704, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8891, "step": 15548 }, { "epoch": 0.3658588235294118, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1704, "step": 15549 }, { "epoch": 0.3658823529411765, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.7385, "step": 15550 }, { "epoch": 0.3659058823529412, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1599, "step": 15551 }, { "epoch": 0.3659294117647059, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0727, "step": 15552 }, { "epoch": 0.3659529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9604, "step": 15553 }, { "epoch": 0.3659764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.6481, "step": 15554 }, { "epoch": 0.366, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.318, "step": 15555 }, { "epoch": 0.3660235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.926, "step": 15556 }, { "epoch": 0.3660470588235294, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.0935, "step": 15557 }, { "epoch": 0.3660705882352941, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.1046, "step": 15558 }, { "epoch": 0.36609411764705885, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.7905, "step": 15559 }, { "epoch": 0.36611764705882355, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8231, "step": 15560 }, { "epoch": 0.36614117647058825, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.1378, "step": 15561 }, { "epoch": 0.36616470588235295, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8722, "step": 15562 }, { "epoch": 0.36618823529411765, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9918, "step": 15563 }, { "epoch": 0.36621176470588235, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0095, "step": 15564 }, { "epoch": 0.36623529411764705, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.3428, "step": 15565 }, { "epoch": 0.36625882352941175, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.7878, "step": 15566 }, { "epoch": 0.36628235294117645, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.7861, "step": 15567 }, { "epoch": 0.3663058823529412, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9705, "step": 15568 }, { "epoch": 0.3663294117647059, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0913, "step": 15569 }, { "epoch": 0.3663529411764706, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9071, "step": 15570 }, { "epoch": 0.3663764705882353, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9241, "step": 15571 }, { "epoch": 0.3664, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9987, "step": 15572 }, { "epoch": 0.3664235294117647, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1167, "step": 15573 }, { "epoch": 0.3664470588235294, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0428, "step": 15574 }, { "epoch": 0.3664705882352941, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.3343, "step": 15575 }, { "epoch": 0.3664941176470588, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0092, "step": 15576 }, { "epoch": 0.3665176470588235, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8218, "step": 15577 }, { "epoch": 0.36654117647058826, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0324, "step": 15578 }, { "epoch": 0.36656470588235296, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3163, "step": 15579 }, { "epoch": 0.36658823529411766, "grad_norm": 0.63671875, "learning_rate": 0.02, "loss": 0.9853, "step": 15580 }, { "epoch": 0.36661176470588236, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9894, "step": 15581 }, { "epoch": 0.36663529411764706, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.208, "step": 15582 }, { "epoch": 0.36665882352941176, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.2576, "step": 15583 }, { "epoch": 0.36668235294117646, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2177, "step": 15584 }, { "epoch": 0.36670588235294116, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1382, "step": 15585 }, { "epoch": 0.36672941176470586, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9849, "step": 15586 }, { "epoch": 0.3667529411764706, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8897, "step": 15587 }, { "epoch": 0.3667764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9392, "step": 15588 }, { "epoch": 0.3668, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.355, "step": 15589 }, { "epoch": 0.3668235294117647, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8756, "step": 15590 }, { "epoch": 0.3668470588235294, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.7062, "step": 15591 }, { "epoch": 0.3668705882352941, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.7886, "step": 15592 }, { "epoch": 0.3668941176470588, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 0.994, "step": 15593 }, { "epoch": 0.3669176470588235, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9768, "step": 15594 }, { "epoch": 0.3669411764705882, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.655, "step": 15595 }, { "epoch": 0.3669647058823529, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8589, "step": 15596 }, { "epoch": 0.36698823529411767, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8763, "step": 15597 }, { "epoch": 0.36701176470588237, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9967, "step": 15598 }, { "epoch": 0.36703529411764707, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2423, "step": 15599 }, { "epoch": 0.36705882352941177, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9517, "step": 15600 }, { "epoch": 0.36708235294117647, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0618, "step": 15601 }, { "epoch": 0.36710588235294117, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1217, "step": 15602 }, { "epoch": 0.36712941176470587, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2647, "step": 15603 }, { "epoch": 0.36715294117647057, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0655, "step": 15604 }, { "epoch": 0.36717647058823527, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0449, "step": 15605 }, { "epoch": 0.3672, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.976, "step": 15606 }, { "epoch": 0.3672235294117647, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.3854, "step": 15607 }, { "epoch": 0.3672470588235294, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1776, "step": 15608 }, { "epoch": 0.3672705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9551, "step": 15609 }, { "epoch": 0.3672941176470588, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.2539, "step": 15610 }, { "epoch": 0.3673176470588235, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.93, "step": 15611 }, { "epoch": 0.3673411764705882, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.039, "step": 15612 }, { "epoch": 0.3673647058823529, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.3163, "step": 15613 }, { "epoch": 0.3673882352941176, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.3202, "step": 15614 }, { "epoch": 0.3674117647058823, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8102, "step": 15615 }, { "epoch": 0.3674352941176471, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9429, "step": 15616 }, { "epoch": 0.3674588235294118, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1683, "step": 15617 }, { "epoch": 0.3674823529411765, "grad_norm": 0.234375, "learning_rate": 0.02, "loss": 0.9455, "step": 15618 }, { "epoch": 0.3675058823529412, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.992, "step": 15619 }, { "epoch": 0.3675294117647059, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.8521, "step": 15620 }, { "epoch": 0.3675529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9118, "step": 15621 }, { "epoch": 0.3675764705882353, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.656, "step": 15622 }, { "epoch": 0.3676, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8074, "step": 15623 }, { "epoch": 0.3676235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0006, "step": 15624 }, { "epoch": 0.36764705882352944, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1766, "step": 15625 }, { "epoch": 0.36767058823529414, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9021, "step": 15626 }, { "epoch": 0.36769411764705884, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9164, "step": 15627 }, { "epoch": 0.36771764705882354, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 0.768, "step": 15628 }, { "epoch": 0.36774117647058824, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1303, "step": 15629 }, { "epoch": 0.36776470588235294, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.7334, "step": 15630 }, { "epoch": 0.36778823529411764, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.4687, "step": 15631 }, { "epoch": 0.36781176470588234, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0543, "step": 15632 }, { "epoch": 0.36783529411764704, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0317, "step": 15633 }, { "epoch": 0.36785882352941174, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0894, "step": 15634 }, { "epoch": 0.3678823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.7301, "step": 15635 }, { "epoch": 0.3679058823529412, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8292, "step": 15636 }, { "epoch": 0.3679294117647059, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0458, "step": 15637 }, { "epoch": 0.3679529411764706, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1419, "step": 15638 }, { "epoch": 0.3679764705882353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.991, "step": 15639 }, { "epoch": 0.368, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8617, "step": 15640 }, { "epoch": 0.3680235294117647, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.0341, "step": 15641 }, { "epoch": 0.3680470588235294, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.119, "step": 15642 }, { "epoch": 0.3680705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.7774, "step": 15643 }, { "epoch": 0.36809411764705885, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8279, "step": 15644 }, { "epoch": 0.36811764705882355, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1301, "step": 15645 }, { "epoch": 0.36814117647058825, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1516, "step": 15646 }, { "epoch": 0.36816470588235295, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.024, "step": 15647 }, { "epoch": 0.36818823529411765, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2271, "step": 15648 }, { "epoch": 0.36821176470588235, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.6974, "step": 15649 }, { "epoch": 0.36823529411764705, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.8948, "step": 15650 }, { "epoch": 0.36825882352941175, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.2158, "step": 15651 }, { "epoch": 0.36828235294117645, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.7853, "step": 15652 }, { "epoch": 0.36830588235294115, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1273, "step": 15653 }, { "epoch": 0.3683294117647059, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1206, "step": 15654 }, { "epoch": 0.3683529411764706, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1234, "step": 15655 }, { "epoch": 0.3683764705882353, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9254, "step": 15656 }, { "epoch": 0.3684, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.6512, "step": 15657 }, { "epoch": 0.3684235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0061, "step": 15658 }, { "epoch": 0.3684470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2276, "step": 15659 }, { "epoch": 0.3684705882352941, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.2027, "step": 15660 }, { "epoch": 0.3684941176470588, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.8463, "step": 15661 }, { "epoch": 0.3685176470588235, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0599, "step": 15662 }, { "epoch": 0.36854117647058826, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0664, "step": 15663 }, { "epoch": 0.36856470588235296, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.7127, "step": 15664 }, { "epoch": 0.36858823529411766, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1453, "step": 15665 }, { "epoch": 0.36861176470588236, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.039, "step": 15666 }, { "epoch": 0.36863529411764706, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0083, "step": 15667 }, { "epoch": 0.36865882352941176, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.2096, "step": 15668 }, { "epoch": 0.36868235294117646, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.7498, "step": 15669 }, { "epoch": 0.36870588235294116, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.4807, "step": 15670 }, { "epoch": 0.36872941176470586, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.2082, "step": 15671 }, { "epoch": 0.36875294117647056, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.8948, "step": 15672 }, { "epoch": 0.3687764705882353, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0013, "step": 15673 }, { "epoch": 0.3688, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.6627, "step": 15674 }, { "epoch": 0.3688235294117647, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 0.9437, "step": 15675 }, { "epoch": 0.3688470588235294, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1322, "step": 15676 }, { "epoch": 0.3688705882352941, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.6614, "step": 15677 }, { "epoch": 0.3688941176470588, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8111, "step": 15678 }, { "epoch": 0.3689176470588235, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.942, "step": 15679 }, { "epoch": 0.3689411764705882, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0729, "step": 15680 }, { "epoch": 0.3689647058823529, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.932, "step": 15681 }, { "epoch": 0.3689882352941177, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.951, "step": 15682 }, { "epoch": 0.3690117647058824, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.803, "step": 15683 }, { "epoch": 0.3690352941176471, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0652, "step": 15684 }, { "epoch": 0.3690588235294118, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.7209, "step": 15685 }, { "epoch": 0.3690823529411765, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0455, "step": 15686 }, { "epoch": 0.3691058823529412, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.819, "step": 15687 }, { "epoch": 0.3691294117647059, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.7075, "step": 15688 }, { "epoch": 0.3691529411764706, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.7096, "step": 15689 }, { "epoch": 0.3691764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.7889, "step": 15690 }, { "epoch": 0.3692, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.7005, "step": 15691 }, { "epoch": 0.36922352941176473, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.296, "step": 15692 }, { "epoch": 0.36924705882352943, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0336, "step": 15693 }, { "epoch": 0.36927058823529413, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.247, "step": 15694 }, { "epoch": 0.36929411764705883, "grad_norm": 0.2275390625, "learning_rate": 0.02, "loss": 0.7248, "step": 15695 }, { "epoch": 0.36931764705882353, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0682, "step": 15696 }, { "epoch": 0.36934117647058823, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.3207, "step": 15697 }, { "epoch": 0.36936470588235293, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9926, "step": 15698 }, { "epoch": 0.36938823529411763, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9906, "step": 15699 }, { "epoch": 0.36941176470588233, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.7504, "step": 15700 }, { "epoch": 0.3694352941176471, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.309, "step": 15701 }, { "epoch": 0.3694588235294118, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.1523, "step": 15702 }, { "epoch": 0.3694823529411765, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9454, "step": 15703 }, { "epoch": 0.3695058823529412, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0966, "step": 15704 }, { "epoch": 0.3695294117647059, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0347, "step": 15705 }, { "epoch": 0.3695529411764706, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0767, "step": 15706 }, { "epoch": 0.3695764705882353, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0072, "step": 15707 }, { "epoch": 0.3696, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9186, "step": 15708 }, { "epoch": 0.3696235294117647, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.7586, "step": 15709 }, { "epoch": 0.3696470588235294, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1875, "step": 15710 }, { "epoch": 0.36967058823529414, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0486, "step": 15711 }, { "epoch": 0.36969411764705884, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1762, "step": 15712 }, { "epoch": 0.36971764705882354, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2439, "step": 15713 }, { "epoch": 0.36974117647058824, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.7367, "step": 15714 }, { "epoch": 0.36976470588235294, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.9504, "step": 15715 }, { "epoch": 0.36978823529411764, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.3118, "step": 15716 }, { "epoch": 0.36981176470588234, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8864, "step": 15717 }, { "epoch": 0.36983529411764704, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8159, "step": 15718 }, { "epoch": 0.36985882352941174, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.7932, "step": 15719 }, { "epoch": 0.3698823529411765, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.133, "step": 15720 }, { "epoch": 0.3699058823529412, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0984, "step": 15721 }, { "epoch": 0.3699294117647059, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.055, "step": 15722 }, { "epoch": 0.3699529411764706, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0179, "step": 15723 }, { "epoch": 0.3699764705882353, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 0.901, "step": 15724 }, { "epoch": 0.37, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.2249, "step": 15725 }, { "epoch": 0.3700235294117647, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.205, "step": 15726 }, { "epoch": 0.3700470588235294, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1842, "step": 15727 }, { "epoch": 0.3700705882352941, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.4097, "step": 15728 }, { "epoch": 0.37009411764705885, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8987, "step": 15729 }, { "epoch": 0.37011764705882355, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.9658, "step": 15730 }, { "epoch": 0.37014117647058825, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.177, "step": 15731 }, { "epoch": 0.37016470588235295, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0564, "step": 15732 }, { "epoch": 0.37018823529411765, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.0141, "step": 15733 }, { "epoch": 0.37021176470588235, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 0.9693, "step": 15734 }, { "epoch": 0.37023529411764705, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.7662, "step": 15735 }, { "epoch": 0.37025882352941175, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9976, "step": 15736 }, { "epoch": 0.37028235294117645, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9728, "step": 15737 }, { "epoch": 0.37030588235294115, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.8133, "step": 15738 }, { "epoch": 0.3703294117647059, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0596, "step": 15739 }, { "epoch": 0.3703529411764706, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0984, "step": 15740 }, { "epoch": 0.3703764705882353, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.989, "step": 15741 }, { "epoch": 0.3704, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.7965, "step": 15742 }, { "epoch": 0.3704235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8985, "step": 15743 }, { "epoch": 0.3704470588235294, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0313, "step": 15744 }, { "epoch": 0.3704705882352941, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0555, "step": 15745 }, { "epoch": 0.3704941176470588, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1604, "step": 15746 }, { "epoch": 0.3705176470588235, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8895, "step": 15747 }, { "epoch": 0.37054117647058826, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.972, "step": 15748 }, { "epoch": 0.37056470588235296, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.651, "step": 15749 }, { "epoch": 0.37058823529411766, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.9212, "step": 15750 }, { "epoch": 0.37061176470588236, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0647, "step": 15751 }, { "epoch": 0.37063529411764706, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8883, "step": 15752 }, { "epoch": 0.37065882352941176, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8883, "step": 15753 }, { "epoch": 0.37068235294117646, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0329, "step": 15754 }, { "epoch": 0.37070588235294116, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8457, "step": 15755 }, { "epoch": 0.37072941176470586, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9125, "step": 15756 }, { "epoch": 0.37075294117647056, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0061, "step": 15757 }, { "epoch": 0.3707764705882353, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.8103, "step": 15758 }, { "epoch": 0.3708, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1674, "step": 15759 }, { "epoch": 0.3708235294117647, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.2874, "step": 15760 }, { "epoch": 0.3708470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9331, "step": 15761 }, { "epoch": 0.3708705882352941, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.6659, "step": 15762 }, { "epoch": 0.3708941176470588, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.978, "step": 15763 }, { "epoch": 0.3709176470588235, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0992, "step": 15764 }, { "epoch": 0.3709411764705882, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0927, "step": 15765 }, { "epoch": 0.3709647058823529, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9188, "step": 15766 }, { "epoch": 0.3709882352941177, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0389, "step": 15767 }, { "epoch": 0.3710117647058824, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 0.9282, "step": 15768 }, { "epoch": 0.3710352941176471, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0679, "step": 15769 }, { "epoch": 0.3710588235294118, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.7023, "step": 15770 }, { "epoch": 0.3710823529411765, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.6841, "step": 15771 }, { "epoch": 0.3711058823529412, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8803, "step": 15772 }, { "epoch": 0.3711294117647059, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.1725, "step": 15773 }, { "epoch": 0.3711529411764706, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.115, "step": 15774 }, { "epoch": 0.3711764705882353, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2302, "step": 15775 }, { "epoch": 0.3712, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0746, "step": 15776 }, { "epoch": 0.37122352941176473, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8072, "step": 15777 }, { "epoch": 0.37124705882352943, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.8999, "step": 15778 }, { "epoch": 0.37127058823529413, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.7154, "step": 15779 }, { "epoch": 0.37129411764705883, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8722, "step": 15780 }, { "epoch": 0.37131764705882353, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.3541, "step": 15781 }, { "epoch": 0.37134117647058823, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.7752, "step": 15782 }, { "epoch": 0.37136470588235293, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.2468, "step": 15783 }, { "epoch": 0.37138823529411763, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.163, "step": 15784 }, { "epoch": 0.37141176470588233, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9101, "step": 15785 }, { "epoch": 0.3714352941176471, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8621, "step": 15786 }, { "epoch": 0.3714588235294118, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.8286, "step": 15787 }, { "epoch": 0.3714823529411765, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.6638, "step": 15788 }, { "epoch": 0.3715058823529412, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.1356, "step": 15789 }, { "epoch": 0.3715294117647059, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0911, "step": 15790 }, { "epoch": 0.3715529411764706, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0789, "step": 15791 }, { "epoch": 0.3715764705882353, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.3192, "step": 15792 }, { "epoch": 0.3716, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1536, "step": 15793 }, { "epoch": 0.3716235294117647, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.2517, "step": 15794 }, { "epoch": 0.3716470588235294, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0427, "step": 15795 }, { "epoch": 0.37167058823529414, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.232, "step": 15796 }, { "epoch": 0.37169411764705884, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0454, "step": 15797 }, { "epoch": 0.37171764705882354, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.368, "step": 15798 }, { "epoch": 0.37174117647058824, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9938, "step": 15799 }, { "epoch": 0.37176470588235294, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1573, "step": 15800 }, { "epoch": 0.37178823529411764, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.1413, "step": 15801 }, { "epoch": 0.37181176470588234, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0646, "step": 15802 }, { "epoch": 0.37183529411764704, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0524, "step": 15803 }, { "epoch": 0.37185882352941174, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0713, "step": 15804 }, { "epoch": 0.3718823529411765, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1358, "step": 15805 }, { "epoch": 0.3719058823529412, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.1013, "step": 15806 }, { "epoch": 0.3719294117647059, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0185, "step": 15807 }, { "epoch": 0.3719529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.8478, "step": 15808 }, { "epoch": 0.3719764705882353, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.2285, "step": 15809 }, { "epoch": 0.372, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9153, "step": 15810 }, { "epoch": 0.3720235294117647, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.3481, "step": 15811 }, { "epoch": 0.3720470588235294, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8719, "step": 15812 }, { "epoch": 0.3720705882352941, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2639, "step": 15813 }, { "epoch": 0.3720941176470588, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.7691, "step": 15814 }, { "epoch": 0.37211764705882355, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8695, "step": 15815 }, { "epoch": 0.37214117647058825, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.971, "step": 15816 }, { "epoch": 0.37216470588235295, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1408, "step": 15817 }, { "epoch": 0.37218823529411765, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9577, "step": 15818 }, { "epoch": 0.37221176470588235, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1631, "step": 15819 }, { "epoch": 0.37223529411764705, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8809, "step": 15820 }, { "epoch": 0.37225882352941175, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.575, "step": 15821 }, { "epoch": 0.37228235294117645, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0173, "step": 15822 }, { "epoch": 0.37230588235294115, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9249, "step": 15823 }, { "epoch": 0.3723294117647059, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9787, "step": 15824 }, { "epoch": 0.3723529411764706, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.2344, "step": 15825 }, { "epoch": 0.3723764705882353, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.214, "step": 15826 }, { "epoch": 0.3724, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8639, "step": 15827 }, { "epoch": 0.3724235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.7948, "step": 15828 }, { "epoch": 0.3724470588235294, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0114, "step": 15829 }, { "epoch": 0.3724705882352941, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.3721, "step": 15830 }, { "epoch": 0.3724941176470588, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.015, "step": 15831 }, { "epoch": 0.3725176470588235, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.841, "step": 15832 }, { "epoch": 0.3725411764705882, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.7359, "step": 15833 }, { "epoch": 0.37256470588235296, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.7634, "step": 15834 }, { "epoch": 0.37258823529411766, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.7483, "step": 15835 }, { "epoch": 0.37261176470588236, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2974, "step": 15836 }, { "epoch": 0.37263529411764706, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.4037, "step": 15837 }, { "epoch": 0.37265882352941176, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0407, "step": 15838 }, { "epoch": 0.37268235294117646, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1853, "step": 15839 }, { "epoch": 0.37270588235294116, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2016, "step": 15840 }, { "epoch": 0.37272941176470586, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.0296, "step": 15841 }, { "epoch": 0.37275294117647056, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9051, "step": 15842 }, { "epoch": 0.3727764705882353, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.7652, "step": 15843 }, { "epoch": 0.3728, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 1.1345, "step": 15844 }, { "epoch": 0.3728235294117647, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.2866, "step": 15845 }, { "epoch": 0.3728470588235294, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.5107, "step": 15846 }, { "epoch": 0.3728705882352941, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.1791, "step": 15847 }, { "epoch": 0.3728941176470588, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9008, "step": 15848 }, { "epoch": 0.3729176470588235, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9269, "step": 15849 }, { "epoch": 0.3729411764705882, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1251, "step": 15850 }, { "epoch": 0.3729647058823529, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.6432, "step": 15851 }, { "epoch": 0.3729882352941176, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1359, "step": 15852 }, { "epoch": 0.3730117647058824, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.4529, "step": 15853 }, { "epoch": 0.3730352941176471, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7722, "step": 15854 }, { "epoch": 0.3730588235294118, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8821, "step": 15855 }, { "epoch": 0.3730823529411765, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2219, "step": 15856 }, { "epoch": 0.3731058823529412, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9996, "step": 15857 }, { "epoch": 0.3731294117647059, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8291, "step": 15858 }, { "epoch": 0.3731529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9577, "step": 15859 }, { "epoch": 0.3731764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.8244, "step": 15860 }, { "epoch": 0.3732, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2096, "step": 15861 }, { "epoch": 0.37322352941176473, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2566, "step": 15862 }, { "epoch": 0.37324705882352943, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9381, "step": 15863 }, { "epoch": 0.37327058823529413, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1208, "step": 15864 }, { "epoch": 0.37329411764705883, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.7325, "step": 15865 }, { "epoch": 0.37331764705882353, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9169, "step": 15866 }, { "epoch": 0.37334117647058823, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0093, "step": 15867 }, { "epoch": 0.37336470588235293, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9062, "step": 15868 }, { "epoch": 0.37338823529411763, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9097, "step": 15869 }, { "epoch": 0.37341176470588233, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2216, "step": 15870 }, { "epoch": 0.37343529411764703, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1811, "step": 15871 }, { "epoch": 0.3734588235294118, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2201, "step": 15872 }, { "epoch": 0.3734823529411765, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9584, "step": 15873 }, { "epoch": 0.3735058823529412, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.877, "step": 15874 }, { "epoch": 0.3735294117647059, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1718, "step": 15875 }, { "epoch": 0.3735529411764706, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9752, "step": 15876 }, { "epoch": 0.3735764705882353, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.091, "step": 15877 }, { "epoch": 0.3736, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2677, "step": 15878 }, { "epoch": 0.3736235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.3123, "step": 15879 }, { "epoch": 0.3736470588235294, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1797, "step": 15880 }, { "epoch": 0.37367058823529414, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9104, "step": 15881 }, { "epoch": 0.37369411764705884, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.713, "step": 15882 }, { "epoch": 0.37371764705882354, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0381, "step": 15883 }, { "epoch": 0.37374117647058824, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8929, "step": 15884 }, { "epoch": 0.37376470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.7086, "step": 15885 }, { "epoch": 0.37378823529411764, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1483, "step": 15886 }, { "epoch": 0.37381176470588234, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9238, "step": 15887 }, { "epoch": 0.37383529411764704, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1056, "step": 15888 }, { "epoch": 0.37385882352941174, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1608, "step": 15889 }, { "epoch": 0.37388235294117644, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8884, "step": 15890 }, { "epoch": 0.3739058823529412, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.8527, "step": 15891 }, { "epoch": 0.3739294117647059, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 1.2635, "step": 15892 }, { "epoch": 0.3739529411764706, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.9908, "step": 15893 }, { "epoch": 0.3739764705882353, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0102, "step": 15894 }, { "epoch": 0.374, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.5101, "step": 15895 }, { "epoch": 0.3740235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.2542, "step": 15896 }, { "epoch": 0.3740470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.6424, "step": 15897 }, { "epoch": 0.3740705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.7216, "step": 15898 }, { "epoch": 0.3740941176470588, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9613, "step": 15899 }, { "epoch": 0.37411764705882355, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.6883, "step": 15900 }, { "epoch": 0.37414117647058825, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.2474, "step": 15901 }, { "epoch": 0.37416470588235295, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0768, "step": 15902 }, { "epoch": 0.37418823529411765, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.082, "step": 15903 }, { "epoch": 0.37421176470588235, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1066, "step": 15904 }, { "epoch": 0.37423529411764705, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.9214, "step": 15905 }, { "epoch": 0.37425882352941175, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.6546, "step": 15906 }, { "epoch": 0.37428235294117645, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2799, "step": 15907 }, { "epoch": 0.37430588235294115, "grad_norm": 0.2314453125, "learning_rate": 0.02, "loss": 0.4886, "step": 15908 }, { "epoch": 0.37432941176470585, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8672, "step": 15909 }, { "epoch": 0.3743529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9782, "step": 15910 }, { "epoch": 0.3743764705882353, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.809, "step": 15911 }, { "epoch": 0.3744, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.2159, "step": 15912 }, { "epoch": 0.3744235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.8906, "step": 15913 }, { "epoch": 0.3744470588235294, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9858, "step": 15914 }, { "epoch": 0.3744705882352941, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.045, "step": 15915 }, { "epoch": 0.3744941176470588, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.3596, "step": 15916 }, { "epoch": 0.3745176470588235, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2401, "step": 15917 }, { "epoch": 0.3745411764705882, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0497, "step": 15918 }, { "epoch": 0.37456470588235297, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9461, "step": 15919 }, { "epoch": 0.37458823529411767, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.7111, "step": 15920 }, { "epoch": 0.37461176470588237, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9929, "step": 15921 }, { "epoch": 0.37463529411764707, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.34, "step": 15922 }, { "epoch": 0.37465882352941177, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.3372, "step": 15923 }, { "epoch": 0.37468235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8565, "step": 15924 }, { "epoch": 0.37470588235294117, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2598, "step": 15925 }, { "epoch": 0.37472941176470587, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.3032, "step": 15926 }, { "epoch": 0.37475294117647057, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 0.927, "step": 15927 }, { "epoch": 0.37477647058823527, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0569, "step": 15928 }, { "epoch": 0.3748, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0016, "step": 15929 }, { "epoch": 0.3748235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9849, "step": 15930 }, { "epoch": 0.3748470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.7602, "step": 15931 }, { "epoch": 0.3748705882352941, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.2079, "step": 15932 }, { "epoch": 0.3748941176470588, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.7263, "step": 15933 }, { "epoch": 0.3749176470588235, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8252, "step": 15934 }, { "epoch": 0.3749411764705882, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9192, "step": 15935 }, { "epoch": 0.3749647058823529, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0548, "step": 15936 }, { "epoch": 0.3749882352941176, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1165, "step": 15937 }, { "epoch": 0.3750117647058824, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9929, "step": 15938 }, { "epoch": 0.3750352941176471, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9724, "step": 15939 }, { "epoch": 0.3750588235294118, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1931, "step": 15940 }, { "epoch": 0.3750823529411765, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 0.9947, "step": 15941 }, { "epoch": 0.3751058823529412, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8759, "step": 15942 }, { "epoch": 0.3751294117647059, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8807, "step": 15943 }, { "epoch": 0.3751529411764706, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1647, "step": 15944 }, { "epoch": 0.3751764705882353, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.2389, "step": 15945 }, { "epoch": 0.3752, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0818, "step": 15946 }, { "epoch": 0.37522352941176473, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.8691, "step": 15947 }, { "epoch": 0.37524705882352943, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0219, "step": 15948 }, { "epoch": 0.37527058823529413, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9208, "step": 15949 }, { "epoch": 0.37529411764705883, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1534, "step": 15950 }, { "epoch": 0.37531764705882353, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 0.9318, "step": 15951 }, { "epoch": 0.37534117647058823, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2561, "step": 15952 }, { "epoch": 0.37536470588235293, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0711, "step": 15953 }, { "epoch": 0.37538823529411763, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.7229, "step": 15954 }, { "epoch": 0.37541176470588233, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0917, "step": 15955 }, { "epoch": 0.37543529411764703, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9986, "step": 15956 }, { "epoch": 0.3754588235294118, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9358, "step": 15957 }, { "epoch": 0.3754823529411765, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.3146, "step": 15958 }, { "epoch": 0.3755058823529412, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9685, "step": 15959 }, { "epoch": 0.3755294117647059, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9082, "step": 15960 }, { "epoch": 0.3755529411764706, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1877, "step": 15961 }, { "epoch": 0.3755764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9504, "step": 15962 }, { "epoch": 0.3756, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.8263, "step": 15963 }, { "epoch": 0.3756235294117647, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.3582, "step": 15964 }, { "epoch": 0.3756470588235294, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0895, "step": 15965 }, { "epoch": 0.37567058823529415, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0071, "step": 15966 }, { "epoch": 0.37569411764705885, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9697, "step": 15967 }, { "epoch": 0.37571764705882355, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0207, "step": 15968 }, { "epoch": 0.37574117647058825, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.0649, "step": 15969 }, { "epoch": 0.37576470588235295, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.0864, "step": 15970 }, { "epoch": 0.37578823529411765, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9022, "step": 15971 }, { "epoch": 0.37581176470588235, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1542, "step": 15972 }, { "epoch": 0.37583529411764705, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.963, "step": 15973 }, { "epoch": 0.37585882352941175, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.0027, "step": 15974 }, { "epoch": 0.37588235294117645, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.1502, "step": 15975 }, { "epoch": 0.3759058823529412, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.252, "step": 15976 }, { "epoch": 0.3759294117647059, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8725, "step": 15977 }, { "epoch": 0.3759529411764706, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1806, "step": 15978 }, { "epoch": 0.3759764705882353, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9197, "step": 15979 }, { "epoch": 0.376, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0364, "step": 15980 }, { "epoch": 0.3760235294117647, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 0.9751, "step": 15981 }, { "epoch": 0.3760470588235294, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0746, "step": 15982 }, { "epoch": 0.3760705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.1015, "step": 15983 }, { "epoch": 0.3760941176470588, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1579, "step": 15984 }, { "epoch": 0.37611764705882356, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0219, "step": 15985 }, { "epoch": 0.37614117647058826, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.2535, "step": 15986 }, { "epoch": 0.37616470588235296, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7836, "step": 15987 }, { "epoch": 0.37618823529411766, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1926, "step": 15988 }, { "epoch": 0.37621176470588236, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0737, "step": 15989 }, { "epoch": 0.37623529411764706, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0235, "step": 15990 }, { "epoch": 0.37625882352941176, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.3884, "step": 15991 }, { "epoch": 0.37628235294117646, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1994, "step": 15992 }, { "epoch": 0.37630588235294116, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.7312, "step": 15993 }, { "epoch": 0.37632941176470586, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0224, "step": 15994 }, { "epoch": 0.3763529411764706, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.581, "step": 15995 }, { "epoch": 0.3763764705882353, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.5856, "step": 15996 }, { "epoch": 0.3764, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1183, "step": 15997 }, { "epoch": 0.3764235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2512, "step": 15998 }, { "epoch": 0.3764470588235294, "grad_norm": 0.2197265625, "learning_rate": 0.02, "loss": 0.6554, "step": 15999 }, { "epoch": 0.3764705882352941, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.707, "step": 16000 }, { "epoch": 0.3764705882352941, "eval_loss": 2.1887595653533936, "eval_runtime": 679.3218, "eval_samples_per_second": 12.512, "eval_steps_per_second": 3.128, "step": 16000 }, { "epoch": 0.3764941176470588, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.785, "step": 16001 }, { "epoch": 0.3765176470588235, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1141, "step": 16002 }, { "epoch": 0.3765411764705882, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9296, "step": 16003 }, { "epoch": 0.37656470588235297, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9441, "step": 16004 }, { "epoch": 0.37658823529411767, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0682, "step": 16005 }, { "epoch": 0.37661176470588237, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0076, "step": 16006 }, { "epoch": 0.37663529411764707, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0257, "step": 16007 }, { "epoch": 0.37665882352941177, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0939, "step": 16008 }, { "epoch": 0.37668235294117647, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.3631, "step": 16009 }, { "epoch": 0.37670588235294117, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1608, "step": 16010 }, { "epoch": 0.37672941176470587, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.7462, "step": 16011 }, { "epoch": 0.37675294117647057, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.7852, "step": 16012 }, { "epoch": 0.37677647058823527, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9878, "step": 16013 }, { "epoch": 0.3768, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 0.8971, "step": 16014 }, { "epoch": 0.3768235294117647, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.6234, "step": 16015 }, { "epoch": 0.3768470588235294, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.0817, "step": 16016 }, { "epoch": 0.3768705882352941, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0346, "step": 16017 }, { "epoch": 0.3768941176470588, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0991, "step": 16018 }, { "epoch": 0.3769176470588235, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9831, "step": 16019 }, { "epoch": 0.3769411764705882, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0149, "step": 16020 }, { "epoch": 0.3769647058823529, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.2224, "step": 16021 }, { "epoch": 0.3769882352941176, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.0567, "step": 16022 }, { "epoch": 0.3770117647058824, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0021, "step": 16023 }, { "epoch": 0.3770352941176471, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8997, "step": 16024 }, { "epoch": 0.3770588235294118, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.3875, "step": 16025 }, { "epoch": 0.3770823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.6998, "step": 16026 }, { "epoch": 0.3771058823529412, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1264, "step": 16027 }, { "epoch": 0.3771294117647059, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.3073, "step": 16028 }, { "epoch": 0.3771529411764706, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.303, "step": 16029 }, { "epoch": 0.3771764705882353, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0954, "step": 16030 }, { "epoch": 0.3772, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8882, "step": 16031 }, { "epoch": 0.3772235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0054, "step": 16032 }, { "epoch": 0.37724705882352944, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.025, "step": 16033 }, { "epoch": 0.37727058823529414, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.3355, "step": 16034 }, { "epoch": 0.37729411764705884, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9477, "step": 16035 }, { "epoch": 0.37731764705882354, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0899, "step": 16036 }, { "epoch": 0.37734117647058824, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0526, "step": 16037 }, { "epoch": 0.37736470588235294, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7737, "step": 16038 }, { "epoch": 0.37738823529411764, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.6937, "step": 16039 }, { "epoch": 0.37741176470588234, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8623, "step": 16040 }, { "epoch": 0.37743529411764704, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.0242, "step": 16041 }, { "epoch": 0.3774588235294118, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1636, "step": 16042 }, { "epoch": 0.3774823529411765, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.0398, "step": 16043 }, { "epoch": 0.3775058823529412, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9567, "step": 16044 }, { "epoch": 0.3775294117647059, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1541, "step": 16045 }, { "epoch": 0.3775529411764706, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8141, "step": 16046 }, { "epoch": 0.3775764705882353, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1109, "step": 16047 }, { "epoch": 0.3776, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8065, "step": 16048 }, { "epoch": 0.3776235294117647, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8225, "step": 16049 }, { "epoch": 0.3776470588235294, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1431, "step": 16050 }, { "epoch": 0.3776705882352941, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.956, "step": 16051 }, { "epoch": 0.37769411764705885, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9259, "step": 16052 }, { "epoch": 0.37771764705882355, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9604, "step": 16053 }, { "epoch": 0.37774117647058825, "grad_norm": 0.220703125, "learning_rate": 0.02, "loss": 0.5828, "step": 16054 }, { "epoch": 0.37776470588235295, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.0216, "step": 16055 }, { "epoch": 0.37778823529411765, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.943, "step": 16056 }, { "epoch": 0.37781176470588235, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0593, "step": 16057 }, { "epoch": 0.37783529411764705, "grad_norm": 0.2353515625, "learning_rate": 0.02, "loss": 0.2771, "step": 16058 }, { "epoch": 0.37785882352941175, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9756, "step": 16059 }, { "epoch": 0.37788235294117645, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.2674, "step": 16060 }, { "epoch": 0.3779058823529412, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.2226, "step": 16061 }, { "epoch": 0.3779294117647059, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9653, "step": 16062 }, { "epoch": 0.3779529411764706, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0097, "step": 16063 }, { "epoch": 0.3779764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.8841, "step": 16064 }, { "epoch": 0.378, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1301, "step": 16065 }, { "epoch": 0.3780235294117647, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.82, "step": 16066 }, { "epoch": 0.3780470588235294, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1283, "step": 16067 }, { "epoch": 0.3780705882352941, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 0.9523, "step": 16068 }, { "epoch": 0.3780941176470588, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.2622, "step": 16069 }, { "epoch": 0.3781176470588235, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0812, "step": 16070 }, { "epoch": 0.37814117647058826, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2531, "step": 16071 }, { "epoch": 0.37816470588235296, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8077, "step": 16072 }, { "epoch": 0.37818823529411766, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0842, "step": 16073 }, { "epoch": 0.37821176470588236, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8952, "step": 16074 }, { "epoch": 0.37823529411764706, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.7889, "step": 16075 }, { "epoch": 0.37825882352941176, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1324, "step": 16076 }, { "epoch": 0.37828235294117646, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1381, "step": 16077 }, { "epoch": 0.37830588235294116, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0805, "step": 16078 }, { "epoch": 0.37832941176470586, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2671, "step": 16079 }, { "epoch": 0.3783529411764706, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0603, "step": 16080 }, { "epoch": 0.3783764705882353, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1091, "step": 16081 }, { "epoch": 0.3784, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.3307, "step": 16082 }, { "epoch": 0.3784235294117647, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.0439, "step": 16083 }, { "epoch": 0.3784470588235294, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.999, "step": 16084 }, { "epoch": 0.3784705882352941, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.6319, "step": 16085 }, { "epoch": 0.3784941176470588, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.3559, "step": 16086 }, { "epoch": 0.3785176470588235, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9833, "step": 16087 }, { "epoch": 0.3785411764705882, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.7256, "step": 16088 }, { "epoch": 0.3785647058823529, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.8687, "step": 16089 }, { "epoch": 0.37858823529411767, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8482, "step": 16090 }, { "epoch": 0.37861176470588237, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.808, "step": 16091 }, { "epoch": 0.37863529411764707, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0569, "step": 16092 }, { "epoch": 0.37865882352941177, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0379, "step": 16093 }, { "epoch": 0.37868235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8041, "step": 16094 }, { "epoch": 0.37870588235294117, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0198, "step": 16095 }, { "epoch": 0.37872941176470587, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1105, "step": 16096 }, { "epoch": 0.37875294117647057, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1798, "step": 16097 }, { "epoch": 0.37877647058823527, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1318, "step": 16098 }, { "epoch": 0.3788, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8416, "step": 16099 }, { "epoch": 0.3788235294117647, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.091, "step": 16100 }, { "epoch": 0.3788470588235294, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.1109, "step": 16101 }, { "epoch": 0.3788705882352941, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1279, "step": 16102 }, { "epoch": 0.3788941176470588, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9346, "step": 16103 }, { "epoch": 0.3789176470588235, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0972, "step": 16104 }, { "epoch": 0.3789411764705882, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0831, "step": 16105 }, { "epoch": 0.3789647058823529, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8602, "step": 16106 }, { "epoch": 0.3789882352941176, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1212, "step": 16107 }, { "epoch": 0.3790117647058823, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0068, "step": 16108 }, { "epoch": 0.3790352941176471, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.784, "step": 16109 }, { "epoch": 0.3790588235294118, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.9411, "step": 16110 }, { "epoch": 0.3790823529411765, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.0761, "step": 16111 }, { "epoch": 0.3791058823529412, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0117, "step": 16112 }, { "epoch": 0.3791294117647059, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1803, "step": 16113 }, { "epoch": 0.3791529411764706, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1595, "step": 16114 }, { "epoch": 0.3791764705882353, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8662, "step": 16115 }, { "epoch": 0.3792, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.3003, "step": 16116 }, { "epoch": 0.3792235294117647, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 1.0551, "step": 16117 }, { "epoch": 0.37924705882352944, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.2058, "step": 16118 }, { "epoch": 0.37927058823529414, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.8915, "step": 16119 }, { "epoch": 0.37929411764705884, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.925, "step": 16120 }, { "epoch": 0.37931764705882354, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.2837, "step": 16121 }, { "epoch": 0.37934117647058824, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8921, "step": 16122 }, { "epoch": 0.37936470588235294, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9161, "step": 16123 }, { "epoch": 0.37938823529411764, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1593, "step": 16124 }, { "epoch": 0.37941176470588234, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0604, "step": 16125 }, { "epoch": 0.37943529411764704, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9057, "step": 16126 }, { "epoch": 0.37945882352941174, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1926, "step": 16127 }, { "epoch": 0.3794823529411765, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.6366, "step": 16128 }, { "epoch": 0.3795058823529412, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.1226, "step": 16129 }, { "epoch": 0.3795294117647059, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.8431, "step": 16130 }, { "epoch": 0.3795529411764706, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.1293, "step": 16131 }, { "epoch": 0.3795764705882353, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 0.9678, "step": 16132 }, { "epoch": 0.3796, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.2529, "step": 16133 }, { "epoch": 0.3796235294117647, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.817, "step": 16134 }, { "epoch": 0.3796470588235294, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 0.9891, "step": 16135 }, { "epoch": 0.3796705882352941, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8659, "step": 16136 }, { "epoch": 0.37969411764705885, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0978, "step": 16137 }, { "epoch": 0.37971764705882355, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.075, "step": 16138 }, { "epoch": 0.37974117647058825, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9245, "step": 16139 }, { "epoch": 0.37976470588235295, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.7328, "step": 16140 }, { "epoch": 0.37978823529411765, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0932, "step": 16141 }, { "epoch": 0.37981176470588235, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1367, "step": 16142 }, { "epoch": 0.37983529411764705, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9962, "step": 16143 }, { "epoch": 0.37985882352941175, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9535, "step": 16144 }, { "epoch": 0.37988235294117645, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.8588, "step": 16145 }, { "epoch": 0.37990588235294115, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9648, "step": 16146 }, { "epoch": 0.3799294117647059, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 0.8463, "step": 16147 }, { "epoch": 0.3799529411764706, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8535, "step": 16148 }, { "epoch": 0.3799764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9793, "step": 16149 }, { "epoch": 0.38, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.7205, "step": 16150 }, { "epoch": 0.3800235294117647, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0685, "step": 16151 }, { "epoch": 0.3800470588235294, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8222, "step": 16152 }, { "epoch": 0.3800705882352941, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.2688, "step": 16153 }, { "epoch": 0.3800941176470588, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.8868, "step": 16154 }, { "epoch": 0.3801176470588235, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.3019, "step": 16155 }, { "epoch": 0.38014117647058826, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.7545, "step": 16156 }, { "epoch": 0.38016470588235296, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8422, "step": 16157 }, { "epoch": 0.38018823529411766, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0835, "step": 16158 }, { "epoch": 0.38021176470588236, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.8952, "step": 16159 }, { "epoch": 0.38023529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.8667, "step": 16160 }, { "epoch": 0.38025882352941176, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9293, "step": 16161 }, { "epoch": 0.38028235294117646, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9667, "step": 16162 }, { "epoch": 0.38030588235294116, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.6207, "step": 16163 }, { "epoch": 0.38032941176470586, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 0.9642, "step": 16164 }, { "epoch": 0.3803529411764706, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2212, "step": 16165 }, { "epoch": 0.3803764705882353, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1989, "step": 16166 }, { "epoch": 0.3804, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.7733, "step": 16167 }, { "epoch": 0.3804235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0126, "step": 16168 }, { "epoch": 0.3804470588235294, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.066, "step": 16169 }, { "epoch": 0.3804705882352941, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1103, "step": 16170 }, { "epoch": 0.3804941176470588, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0606, "step": 16171 }, { "epoch": 0.3805176470588235, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9198, "step": 16172 }, { "epoch": 0.3805411764705882, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.993, "step": 16173 }, { "epoch": 0.3805647058823529, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9957, "step": 16174 }, { "epoch": 0.38058823529411767, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2512, "step": 16175 }, { "epoch": 0.38061176470588237, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0457, "step": 16176 }, { "epoch": 0.38063529411764707, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.777, "step": 16177 }, { "epoch": 0.38065882352941177, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8797, "step": 16178 }, { "epoch": 0.38068235294117647, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0654, "step": 16179 }, { "epoch": 0.38070588235294117, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 1.0298, "step": 16180 }, { "epoch": 0.38072941176470587, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.3132, "step": 16181 }, { "epoch": 0.38075294117647057, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.1276, "step": 16182 }, { "epoch": 0.38077647058823527, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.0466, "step": 16183 }, { "epoch": 0.3808, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.082, "step": 16184 }, { "epoch": 0.3808235294117647, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1294, "step": 16185 }, { "epoch": 0.3808470588235294, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8285, "step": 16186 }, { "epoch": 0.3808705882352941, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1322, "step": 16187 }, { "epoch": 0.3808941176470588, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0586, "step": 16188 }, { "epoch": 0.3809176470588235, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1, "step": 16189 }, { "epoch": 0.3809411764705882, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9584, "step": 16190 }, { "epoch": 0.3809647058823529, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.177, "step": 16191 }, { "epoch": 0.3809882352941176, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.9272, "step": 16192 }, { "epoch": 0.3810117647058823, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9045, "step": 16193 }, { "epoch": 0.3810352941176471, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.7305, "step": 16194 }, { "epoch": 0.3810588235294118, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7707, "step": 16195 }, { "epoch": 0.3810823529411765, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9826, "step": 16196 }, { "epoch": 0.3811058823529412, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0626, "step": 16197 }, { "epoch": 0.3811294117647059, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0914, "step": 16198 }, { "epoch": 0.3811529411764706, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1346, "step": 16199 }, { "epoch": 0.3811764705882353, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9903, "step": 16200 }, { "epoch": 0.3812, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0268, "step": 16201 }, { "epoch": 0.3812235294117647, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0401, "step": 16202 }, { "epoch": 0.38124705882352944, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.8745, "step": 16203 }, { "epoch": 0.38127058823529414, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1349, "step": 16204 }, { "epoch": 0.38129411764705884, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9087, "step": 16205 }, { "epoch": 0.38131764705882354, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8757, "step": 16206 }, { "epoch": 0.38134117647058824, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.2248, "step": 16207 }, { "epoch": 0.38136470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1474, "step": 16208 }, { "epoch": 0.38138823529411764, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0408, "step": 16209 }, { "epoch": 0.38141176470588234, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.0821, "step": 16210 }, { "epoch": 0.38143529411764704, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9785, "step": 16211 }, { "epoch": 0.38145882352941174, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1644, "step": 16212 }, { "epoch": 0.3814823529411765, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8784, "step": 16213 }, { "epoch": 0.3815058823529412, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.6372, "step": 16214 }, { "epoch": 0.3815294117647059, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1443, "step": 16215 }, { "epoch": 0.3815529411764706, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9694, "step": 16216 }, { "epoch": 0.3815764705882353, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9962, "step": 16217 }, { "epoch": 0.3816, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1745, "step": 16218 }, { "epoch": 0.3816235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9793, "step": 16219 }, { "epoch": 0.3816470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0326, "step": 16220 }, { "epoch": 0.3816705882352941, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0306, "step": 16221 }, { "epoch": 0.38169411764705885, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8015, "step": 16222 }, { "epoch": 0.38171764705882355, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.3449, "step": 16223 }, { "epoch": 0.38174117647058825, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1505, "step": 16224 }, { "epoch": 0.38176470588235295, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1509, "step": 16225 }, { "epoch": 0.38178823529411765, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0558, "step": 16226 }, { "epoch": 0.38181176470588235, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9403, "step": 16227 }, { "epoch": 0.38183529411764705, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1068, "step": 16228 }, { "epoch": 0.38185882352941175, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.7776, "step": 16229 }, { "epoch": 0.38188235294117645, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 1.0085, "step": 16230 }, { "epoch": 0.38190588235294115, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2374, "step": 16231 }, { "epoch": 0.3819294117647059, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.91, "step": 16232 }, { "epoch": 0.3819529411764706, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1045, "step": 16233 }, { "epoch": 0.3819764705882353, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.4921, "step": 16234 }, { "epoch": 0.382, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.3399, "step": 16235 }, { "epoch": 0.3820235294117647, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.0893, "step": 16236 }, { "epoch": 0.3820470588235294, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.2062, "step": 16237 }, { "epoch": 0.3820705882352941, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.8409, "step": 16238 }, { "epoch": 0.3820941176470588, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.1404, "step": 16239 }, { "epoch": 0.3821176470588235, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1126, "step": 16240 }, { "epoch": 0.38214117647058826, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9709, "step": 16241 }, { "epoch": 0.38216470588235296, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.7607, "step": 16242 }, { "epoch": 0.38218823529411766, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.3298, "step": 16243 }, { "epoch": 0.38221176470588236, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2013, "step": 16244 }, { "epoch": 0.38223529411764706, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0733, "step": 16245 }, { "epoch": 0.38225882352941176, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.3167, "step": 16246 }, { "epoch": 0.38228235294117646, "grad_norm": 0.224609375, "learning_rate": 0.02, "loss": 0.5858, "step": 16247 }, { "epoch": 0.38230588235294116, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1202, "step": 16248 }, { "epoch": 0.38232941176470586, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9575, "step": 16249 }, { "epoch": 0.38235294117647056, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.6828, "step": 16250 }, { "epoch": 0.3823764705882353, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.4131, "step": 16251 }, { "epoch": 0.3824, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8131, "step": 16252 }, { "epoch": 0.3824235294117647, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9767, "step": 16253 }, { "epoch": 0.3824470588235294, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.7442, "step": 16254 }, { "epoch": 0.3824705882352941, "grad_norm": 0.23828125, "learning_rate": 0.02, "loss": 0.7401, "step": 16255 }, { "epoch": 0.3824941176470588, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.2326, "step": 16256 }, { "epoch": 0.3825176470588235, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8588, "step": 16257 }, { "epoch": 0.3825411764705882, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0153, "step": 16258 }, { "epoch": 0.3825647058823529, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0252, "step": 16259 }, { "epoch": 0.3825882352941177, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.3578, "step": 16260 }, { "epoch": 0.3826117647058824, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8942, "step": 16261 }, { "epoch": 0.3826352941176471, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1539, "step": 16262 }, { "epoch": 0.3826588235294118, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.8186, "step": 16263 }, { "epoch": 0.3826823529411765, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0318, "step": 16264 }, { "epoch": 0.3827058823529412, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.8907, "step": 16265 }, { "epoch": 0.3827294117647059, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1557, "step": 16266 }, { "epoch": 0.3827529411764706, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1344, "step": 16267 }, { "epoch": 0.3827764705882353, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.7121, "step": 16268 }, { "epoch": 0.3828, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.7309, "step": 16269 }, { "epoch": 0.38282352941176473, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.2473, "step": 16270 }, { "epoch": 0.38284705882352943, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.038, "step": 16271 }, { "epoch": 0.38287058823529413, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.1051, "step": 16272 }, { "epoch": 0.38289411764705883, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0847, "step": 16273 }, { "epoch": 0.38291764705882353, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8229, "step": 16274 }, { "epoch": 0.38294117647058823, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.6137, "step": 16275 }, { "epoch": 0.38296470588235293, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.7055, "step": 16276 }, { "epoch": 0.38298823529411763, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8069, "step": 16277 }, { "epoch": 0.38301176470588233, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0593, "step": 16278 }, { "epoch": 0.3830352941176471, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0725, "step": 16279 }, { "epoch": 0.3830588235294118, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.1543, "step": 16280 }, { "epoch": 0.3830823529411765, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0165, "step": 16281 }, { "epoch": 0.3831058823529412, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0335, "step": 16282 }, { "epoch": 0.3831294117647059, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.8317, "step": 16283 }, { "epoch": 0.3831529411764706, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.9482, "step": 16284 }, { "epoch": 0.3831764705882353, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.3405, "step": 16285 }, { "epoch": 0.3832, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1371, "step": 16286 }, { "epoch": 0.3832235294117647, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.3035, "step": 16287 }, { "epoch": 0.3832470588235294, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.3179, "step": 16288 }, { "epoch": 0.38327058823529414, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8122, "step": 16289 }, { "epoch": 0.38329411764705884, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.2365, "step": 16290 }, { "epoch": 0.38331764705882354, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8094, "step": 16291 }, { "epoch": 0.38334117647058824, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.1129, "step": 16292 }, { "epoch": 0.38336470588235294, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0024, "step": 16293 }, { "epoch": 0.38338823529411764, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.7914, "step": 16294 }, { "epoch": 0.38341176470588234, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2366, "step": 16295 }, { "epoch": 0.38343529411764704, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9998, "step": 16296 }, { "epoch": 0.38345882352941174, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.8084, "step": 16297 }, { "epoch": 0.3834823529411765, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.2275, "step": 16298 }, { "epoch": 0.3835058823529412, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.8016, "step": 16299 }, { "epoch": 0.3835294117647059, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.8482, "step": 16300 }, { "epoch": 0.3835529411764706, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8791, "step": 16301 }, { "epoch": 0.3835764705882353, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 0.9391, "step": 16302 }, { "epoch": 0.3836, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.7869, "step": 16303 }, { "epoch": 0.3836235294117647, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7994, "step": 16304 }, { "epoch": 0.3836470588235294, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.2169, "step": 16305 }, { "epoch": 0.3836705882352941, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.803, "step": 16306 }, { "epoch": 0.3836941176470588, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.091, "step": 16307 }, { "epoch": 0.38371764705882355, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 0.8988, "step": 16308 }, { "epoch": 0.38374117647058825, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0296, "step": 16309 }, { "epoch": 0.38376470588235295, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.2276, "step": 16310 }, { "epoch": 0.38378823529411765, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8004, "step": 16311 }, { "epoch": 0.38381176470588235, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2559, "step": 16312 }, { "epoch": 0.38383529411764705, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.0309, "step": 16313 }, { "epoch": 0.38385882352941175, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 0.9358, "step": 16314 }, { "epoch": 0.38388235294117645, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.7231, "step": 16315 }, { "epoch": 0.38390588235294115, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8824, "step": 16316 }, { "epoch": 0.3839294117647059, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.27, "step": 16317 }, { "epoch": 0.3839529411764706, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1324, "step": 16318 }, { "epoch": 0.3839764705882353, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.7081, "step": 16319 }, { "epoch": 0.384, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9098, "step": 16320 }, { "epoch": 0.3840235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0781, "step": 16321 }, { "epoch": 0.3840470588235294, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.2048, "step": 16322 }, { "epoch": 0.3840705882352941, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2047, "step": 16323 }, { "epoch": 0.3840941176470588, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.1113, "step": 16324 }, { "epoch": 0.3841176470588235, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0232, "step": 16325 }, { "epoch": 0.3841411764705882, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8684, "step": 16326 }, { "epoch": 0.38416470588235296, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2304, "step": 16327 }, { "epoch": 0.38418823529411766, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0183, "step": 16328 }, { "epoch": 0.38421176470588236, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.0286, "step": 16329 }, { "epoch": 0.38423529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.7924, "step": 16330 }, { "epoch": 0.38425882352941176, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.874, "step": 16331 }, { "epoch": 0.38428235294117646, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1492, "step": 16332 }, { "epoch": 0.38430588235294116, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.6757, "step": 16333 }, { "epoch": 0.38432941176470586, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.083, "step": 16334 }, { "epoch": 0.38435294117647056, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8586, "step": 16335 }, { "epoch": 0.3843764705882353, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9033, "step": 16336 }, { "epoch": 0.3844, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1198, "step": 16337 }, { "epoch": 0.3844235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.098, "step": 16338 }, { "epoch": 0.3844470588235294, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.3585, "step": 16339 }, { "epoch": 0.3844705882352941, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1468, "step": 16340 }, { "epoch": 0.3844941176470588, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.2308, "step": 16341 }, { "epoch": 0.3845176470588235, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 0.965, "step": 16342 }, { "epoch": 0.3845411764705882, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0482, "step": 16343 }, { "epoch": 0.3845647058823529, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1055, "step": 16344 }, { "epoch": 0.3845882352941176, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1992, "step": 16345 }, { "epoch": 0.3846117647058824, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9773, "step": 16346 }, { "epoch": 0.3846352941176471, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1256, "step": 16347 }, { "epoch": 0.3846588235294118, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1683, "step": 16348 }, { "epoch": 0.3846823529411765, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.6862, "step": 16349 }, { "epoch": 0.3847058823529412, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.8372, "step": 16350 }, { "epoch": 0.3847294117647059, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8921, "step": 16351 }, { "epoch": 0.3847529411764706, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9168, "step": 16352 }, { "epoch": 0.3847764705882353, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9245, "step": 16353 }, { "epoch": 0.3848, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.6051, "step": 16354 }, { "epoch": 0.38482352941176473, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0836, "step": 16355 }, { "epoch": 0.38484705882352943, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8655, "step": 16356 }, { "epoch": 0.38487058823529413, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.1325, "step": 16357 }, { "epoch": 0.38489411764705883, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.9186, "step": 16358 }, { "epoch": 0.38491764705882353, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0327, "step": 16359 }, { "epoch": 0.38494117647058823, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9943, "step": 16360 }, { "epoch": 0.38496470588235293, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.9529, "step": 16361 }, { "epoch": 0.38498823529411763, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8332, "step": 16362 }, { "epoch": 0.38501176470588233, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8551, "step": 16363 }, { "epoch": 0.38503529411764703, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9212, "step": 16364 }, { "epoch": 0.3850588235294118, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0025, "step": 16365 }, { "epoch": 0.3850823529411765, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0809, "step": 16366 }, { "epoch": 0.3851058823529412, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9987, "step": 16367 }, { "epoch": 0.3851294117647059, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9054, "step": 16368 }, { "epoch": 0.3851529411764706, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.7586, "step": 16369 }, { "epoch": 0.3851764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0396, "step": 16370 }, { "epoch": 0.3852, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9724, "step": 16371 }, { "epoch": 0.3852235294117647, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 0.9932, "step": 16372 }, { "epoch": 0.3852470588235294, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0637, "step": 16373 }, { "epoch": 0.38527058823529414, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0449, "step": 16374 }, { "epoch": 0.38529411764705884, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0911, "step": 16375 }, { "epoch": 0.38531764705882354, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.5061, "step": 16376 }, { "epoch": 0.38534117647058824, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8025, "step": 16377 }, { "epoch": 0.38536470588235294, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.0877, "step": 16378 }, { "epoch": 0.38538823529411764, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.3227, "step": 16379 }, { "epoch": 0.38541176470588234, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1354, "step": 16380 }, { "epoch": 0.38543529411764704, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2112, "step": 16381 }, { "epoch": 0.38545882352941174, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.8521, "step": 16382 }, { "epoch": 0.3854823529411765, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0496, "step": 16383 }, { "epoch": 0.3855058823529412, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0056, "step": 16384 }, { "epoch": 0.3855294117647059, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.7023, "step": 16385 }, { "epoch": 0.3855529411764706, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.6639, "step": 16386 }, { "epoch": 0.3855764705882353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9047, "step": 16387 }, { "epoch": 0.3856, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0882, "step": 16388 }, { "epoch": 0.3856235294117647, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8233, "step": 16389 }, { "epoch": 0.3856470588235294, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.105, "step": 16390 }, { "epoch": 0.3856705882352941, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0034, "step": 16391 }, { "epoch": 0.3856941176470588, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.6684, "step": 16392 }, { "epoch": 0.38571764705882355, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.7092, "step": 16393 }, { "epoch": 0.38574117647058825, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1173, "step": 16394 }, { "epoch": 0.38576470588235295, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8763, "step": 16395 }, { "epoch": 0.38578823529411765, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.1478, "step": 16396 }, { "epoch": 0.38581176470588235, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 0.9711, "step": 16397 }, { "epoch": 0.38583529411764705, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9924, "step": 16398 }, { "epoch": 0.38585882352941175, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8533, "step": 16399 }, { "epoch": 0.38588235294117645, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9092, "step": 16400 }, { "epoch": 0.38590588235294115, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.4722, "step": 16401 }, { "epoch": 0.3859294117647059, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.175, "step": 16402 }, { "epoch": 0.3859529411764706, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.0445, "step": 16403 }, { "epoch": 0.3859764705882353, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.6673, "step": 16404 }, { "epoch": 0.386, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.6598, "step": 16405 }, { "epoch": 0.3860235294117647, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.333, "step": 16406 }, { "epoch": 0.3860470588235294, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.2971, "step": 16407 }, { "epoch": 0.3860705882352941, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.0359, "step": 16408 }, { "epoch": 0.3860941176470588, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.127, "step": 16409 }, { "epoch": 0.3861176470588235, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.7723, "step": 16410 }, { "epoch": 0.3861411764705882, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1386, "step": 16411 }, { "epoch": 0.38616470588235297, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.589, "step": 16412 }, { "epoch": 0.38618823529411767, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.2233, "step": 16413 }, { "epoch": 0.38621176470588237, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1355, "step": 16414 }, { "epoch": 0.38623529411764707, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.2813, "step": 16415 }, { "epoch": 0.38625882352941177, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.987, "step": 16416 }, { "epoch": 0.38628235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 1.0223, "step": 16417 }, { "epoch": 0.38630588235294117, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.455, "step": 16418 }, { "epoch": 0.38632941176470587, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.2431, "step": 16419 }, { "epoch": 0.38635294117647057, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.8345, "step": 16420 }, { "epoch": 0.3863764705882353, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.8123, "step": 16421 }, { "epoch": 0.3864, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0193, "step": 16422 }, { "epoch": 0.3864235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1112, "step": 16423 }, { "epoch": 0.3864470588235294, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2774, "step": 16424 }, { "epoch": 0.3864705882352941, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7543, "step": 16425 }, { "epoch": 0.3864941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.216, "step": 16426 }, { "epoch": 0.3865176470588235, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8591, "step": 16427 }, { "epoch": 0.3865411764705882, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9755, "step": 16428 }, { "epoch": 0.3865647058823529, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.1323, "step": 16429 }, { "epoch": 0.3865882352941176, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1977, "step": 16430 }, { "epoch": 0.3866117647058824, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 0.9755, "step": 16431 }, { "epoch": 0.3866352941176471, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8802, "step": 16432 }, { "epoch": 0.3866588235294118, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1748, "step": 16433 }, { "epoch": 0.3866823529411765, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8173, "step": 16434 }, { "epoch": 0.3867058823529412, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.864, "step": 16435 }, { "epoch": 0.3867294117647059, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1183, "step": 16436 }, { "epoch": 0.3867529411764706, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.8835, "step": 16437 }, { "epoch": 0.3867764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0679, "step": 16438 }, { "epoch": 0.3868, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9636, "step": 16439 }, { "epoch": 0.38682352941176473, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9268, "step": 16440 }, { "epoch": 0.38684705882352943, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.8573, "step": 16441 }, { "epoch": 0.38687058823529413, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.2429, "step": 16442 }, { "epoch": 0.38689411764705883, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8123, "step": 16443 }, { "epoch": 0.38691764705882353, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.9236, "step": 16444 }, { "epoch": 0.38694117647058823, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8825, "step": 16445 }, { "epoch": 0.38696470588235293, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.946, "step": 16446 }, { "epoch": 0.38698823529411763, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9466, "step": 16447 }, { "epoch": 0.38701176470588233, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7787, "step": 16448 }, { "epoch": 0.38703529411764703, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9074, "step": 16449 }, { "epoch": 0.3870588235294118, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0341, "step": 16450 }, { "epoch": 0.3870823529411765, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8858, "step": 16451 }, { "epoch": 0.3871058823529412, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.2844, "step": 16452 }, { "epoch": 0.3871294117647059, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9385, "step": 16453 }, { "epoch": 0.3871529411764706, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9256, "step": 16454 }, { "epoch": 0.3871764705882353, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9833, "step": 16455 }, { "epoch": 0.3872, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.217, "step": 16456 }, { "epoch": 0.3872235294117647, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.9825, "step": 16457 }, { "epoch": 0.3872470588235294, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.1983, "step": 16458 }, { "epoch": 0.38727058823529414, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1868, "step": 16459 }, { "epoch": 0.38729411764705884, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.3447, "step": 16460 }, { "epoch": 0.38731764705882354, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9945, "step": 16461 }, { "epoch": 0.38734117647058824, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9723, "step": 16462 }, { "epoch": 0.38736470588235294, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.2676, "step": 16463 }, { "epoch": 0.38738823529411764, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0168, "step": 16464 }, { "epoch": 0.38741176470588234, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.5456, "step": 16465 }, { "epoch": 0.38743529411764704, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 0.9068, "step": 16466 }, { "epoch": 0.38745882352941174, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1215, "step": 16467 }, { "epoch": 0.38748235294117644, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1714, "step": 16468 }, { "epoch": 0.3875058823529412, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.0933, "step": 16469 }, { "epoch": 0.3875294117647059, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0415, "step": 16470 }, { "epoch": 0.3875529411764706, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.8946, "step": 16471 }, { "epoch": 0.3875764705882353, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.76, "step": 16472 }, { "epoch": 0.3876, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0525, "step": 16473 }, { "epoch": 0.3876235294117647, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0708, "step": 16474 }, { "epoch": 0.3876470588235294, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.8866, "step": 16475 }, { "epoch": 0.3876705882352941, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.7921, "step": 16476 }, { "epoch": 0.3876941176470588, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.005, "step": 16477 }, { "epoch": 0.38771764705882356, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1647, "step": 16478 }, { "epoch": 0.38774117647058826, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8147, "step": 16479 }, { "epoch": 0.38776470588235296, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.731, "step": 16480 }, { "epoch": 0.38778823529411766, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.0595, "step": 16481 }, { "epoch": 0.38781176470588236, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0761, "step": 16482 }, { "epoch": 0.38783529411764706, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1093, "step": 16483 }, { "epoch": 0.38785882352941176, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8892, "step": 16484 }, { "epoch": 0.38788235294117646, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.1305, "step": 16485 }, { "epoch": 0.38790588235294116, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 0.9942, "step": 16486 }, { "epoch": 0.38792941176470586, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.1589, "step": 16487 }, { "epoch": 0.3879529411764706, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.728, "step": 16488 }, { "epoch": 0.3879764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8642, "step": 16489 }, { "epoch": 0.388, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1227, "step": 16490 }, { "epoch": 0.3880235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0882, "step": 16491 }, { "epoch": 0.3880470588235294, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.8227, "step": 16492 }, { "epoch": 0.3880705882352941, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1548, "step": 16493 }, { "epoch": 0.3880941176470588, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.095, "step": 16494 }, { "epoch": 0.3881176470588235, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0696, "step": 16495 }, { "epoch": 0.3881411764705882, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1151, "step": 16496 }, { "epoch": 0.38816470588235297, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1905, "step": 16497 }, { "epoch": 0.38818823529411767, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8779, "step": 16498 }, { "epoch": 0.38821176470588237, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.2162, "step": 16499 }, { "epoch": 0.38823529411764707, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1285, "step": 16500 }, { "epoch": 0.38825882352941177, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8795, "step": 16501 }, { "epoch": 0.38828235294117647, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.1971, "step": 16502 }, { "epoch": 0.38830588235294117, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.5896, "step": 16503 }, { "epoch": 0.38832941176470587, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0227, "step": 16504 }, { "epoch": 0.38835294117647057, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.016, "step": 16505 }, { "epoch": 0.38837647058823527, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0553, "step": 16506 }, { "epoch": 0.3884, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.7392, "step": 16507 }, { "epoch": 0.3884235294117647, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.2052, "step": 16508 }, { "epoch": 0.3884470588235294, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.8276, "step": 16509 }, { "epoch": 0.3884705882352941, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9473, "step": 16510 }, { "epoch": 0.3884941176470588, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.2865, "step": 16511 }, { "epoch": 0.3885176470588235, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9377, "step": 16512 }, { "epoch": 0.3885411764705882, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8557, "step": 16513 }, { "epoch": 0.3885647058823529, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9394, "step": 16514 }, { "epoch": 0.3885882352941176, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0062, "step": 16515 }, { "epoch": 0.3886117647058824, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.6945, "step": 16516 }, { "epoch": 0.3886352941176471, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2094, "step": 16517 }, { "epoch": 0.3886588235294118, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0186, "step": 16518 }, { "epoch": 0.3886823529411765, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0496, "step": 16519 }, { "epoch": 0.3887058823529412, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.0356, "step": 16520 }, { "epoch": 0.3887294117647059, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.781, "step": 16521 }, { "epoch": 0.3887529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8703, "step": 16522 }, { "epoch": 0.3887764705882353, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8155, "step": 16523 }, { "epoch": 0.3888, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.77, "step": 16524 }, { "epoch": 0.3888235294117647, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.6988, "step": 16525 }, { "epoch": 0.38884705882352943, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.7295, "step": 16526 }, { "epoch": 0.38887058823529413, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9785, "step": 16527 }, { "epoch": 0.38889411764705883, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.087, "step": 16528 }, { "epoch": 0.38891764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8425, "step": 16529 }, { "epoch": 0.38894117647058823, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8525, "step": 16530 }, { "epoch": 0.38896470588235293, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1751, "step": 16531 }, { "epoch": 0.38898823529411763, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0465, "step": 16532 }, { "epoch": 0.38901176470588233, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.143, "step": 16533 }, { "epoch": 0.38903529411764703, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9703, "step": 16534 }, { "epoch": 0.3890588235294118, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.063, "step": 16535 }, { "epoch": 0.3890823529411765, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0276, "step": 16536 }, { "epoch": 0.3891058823529412, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0911, "step": 16537 }, { "epoch": 0.3891294117647059, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.6346, "step": 16538 }, { "epoch": 0.3891529411764706, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0473, "step": 16539 }, { "epoch": 0.3891764705882353, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.7886, "step": 16540 }, { "epoch": 0.3892, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.8515, "step": 16541 }, { "epoch": 0.3892235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9544, "step": 16542 }, { "epoch": 0.3892470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0497, "step": 16543 }, { "epoch": 0.3892705882352941, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0768, "step": 16544 }, { "epoch": 0.38929411764705885, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1452, "step": 16545 }, { "epoch": 0.38931764705882355, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.0338, "step": 16546 }, { "epoch": 0.38934117647058825, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0391, "step": 16547 }, { "epoch": 0.38936470588235295, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1854, "step": 16548 }, { "epoch": 0.38938823529411765, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.9483, "step": 16549 }, { "epoch": 0.38941176470588235, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9914, "step": 16550 }, { "epoch": 0.38943529411764705, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.0444, "step": 16551 }, { "epoch": 0.38945882352941175, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.098, "step": 16552 }, { "epoch": 0.38948235294117645, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9406, "step": 16553 }, { "epoch": 0.3895058823529412, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9163, "step": 16554 }, { "epoch": 0.3895294117647059, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.6352, "step": 16555 }, { "epoch": 0.3895529411764706, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1382, "step": 16556 }, { "epoch": 0.3895764705882353, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8018, "step": 16557 }, { "epoch": 0.3896, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0358, "step": 16558 }, { "epoch": 0.3896235294117647, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.246, "step": 16559 }, { "epoch": 0.3896470588235294, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.7771, "step": 16560 }, { "epoch": 0.3896705882352941, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0438, "step": 16561 }, { "epoch": 0.3896941176470588, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.9489, "step": 16562 }, { "epoch": 0.3897176470588235, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.3187, "step": 16563 }, { "epoch": 0.38974117647058826, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9581, "step": 16564 }, { "epoch": 0.38976470588235296, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9423, "step": 16565 }, { "epoch": 0.38978823529411766, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0481, "step": 16566 }, { "epoch": 0.38981176470588236, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9656, "step": 16567 }, { "epoch": 0.38983529411764706, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1367, "step": 16568 }, { "epoch": 0.38985882352941176, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1961, "step": 16569 }, { "epoch": 0.38988235294117646, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.6735, "step": 16570 }, { "epoch": 0.38990588235294116, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9676, "step": 16571 }, { "epoch": 0.38992941176470586, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2077, "step": 16572 }, { "epoch": 0.3899529411764706, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0624, "step": 16573 }, { "epoch": 0.3899764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9891, "step": 16574 }, { "epoch": 0.39, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.986, "step": 16575 }, { "epoch": 0.3900235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0052, "step": 16576 }, { "epoch": 0.3900470588235294, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.0565, "step": 16577 }, { "epoch": 0.3900705882352941, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2285, "step": 16578 }, { "epoch": 0.3900941176470588, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0551, "step": 16579 }, { "epoch": 0.3901176470588235, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8578, "step": 16580 }, { "epoch": 0.3901411764705882, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.7092, "step": 16581 }, { "epoch": 0.3901647058823529, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.1883, "step": 16582 }, { "epoch": 0.39018823529411767, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.009, "step": 16583 }, { "epoch": 0.39021176470588237, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 0.9759, "step": 16584 }, { "epoch": 0.39023529411764707, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.3439, "step": 16585 }, { "epoch": 0.39025882352941177, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.6767, "step": 16586 }, { "epoch": 0.39028235294117647, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1006, "step": 16587 }, { "epoch": 0.39030588235294117, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 0.9166, "step": 16588 }, { "epoch": 0.39032941176470587, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.893, "step": 16589 }, { "epoch": 0.39035294117647057, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0232, "step": 16590 }, { "epoch": 0.39037647058823527, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0377, "step": 16591 }, { "epoch": 0.3904, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.7344, "step": 16592 }, { "epoch": 0.3904235294117647, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2685, "step": 16593 }, { "epoch": 0.3904470588235294, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1446, "step": 16594 }, { "epoch": 0.3904705882352941, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0096, "step": 16595 }, { "epoch": 0.3904941176470588, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0459, "step": 16596 }, { "epoch": 0.3905176470588235, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9786, "step": 16597 }, { "epoch": 0.3905411764705882, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.2139, "step": 16598 }, { "epoch": 0.3905647058823529, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 1.1048, "step": 16599 }, { "epoch": 0.3905882352941176, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1369, "step": 16600 }, { "epoch": 0.3906117647058824, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 0.8104, "step": 16601 }, { "epoch": 0.3906352941176471, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.0845, "step": 16602 }, { "epoch": 0.3906588235294118, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.2043, "step": 16603 }, { "epoch": 0.3906823529411765, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.7813, "step": 16604 }, { "epoch": 0.3907058823529412, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.0814, "step": 16605 }, { "epoch": 0.3907294117647059, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0277, "step": 16606 }, { "epoch": 0.3907529411764706, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0756, "step": 16607 }, { "epoch": 0.3907764705882353, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9918, "step": 16608 }, { "epoch": 0.3908, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0501, "step": 16609 }, { "epoch": 0.3908235294117647, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.1114, "step": 16610 }, { "epoch": 0.39084705882352944, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 0.8973, "step": 16611 }, { "epoch": 0.39087058823529414, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9492, "step": 16612 }, { "epoch": 0.39089411764705884, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1192, "step": 16613 }, { "epoch": 0.39091764705882354, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.99, "step": 16614 }, { "epoch": 0.39094117647058824, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.281, "step": 16615 }, { "epoch": 0.39096470588235294, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9091, "step": 16616 }, { "epoch": 0.39098823529411764, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.823, "step": 16617 }, { "epoch": 0.39101176470588234, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.9175, "step": 16618 }, { "epoch": 0.39103529411764704, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.1988, "step": 16619 }, { "epoch": 0.3910588235294118, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.0226, "step": 16620 }, { "epoch": 0.3910823529411765, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0192, "step": 16621 }, { "epoch": 0.3911058823529412, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.853, "step": 16622 }, { "epoch": 0.3911294117647059, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.3041, "step": 16623 }, { "epoch": 0.3911529411764706, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 0.9703, "step": 16624 }, { "epoch": 0.3911764705882353, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.0062, "step": 16625 }, { "epoch": 0.3912, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9695, "step": 16626 }, { "epoch": 0.3912235294117647, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.5148, "step": 16627 }, { "epoch": 0.3912470588235294, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0842, "step": 16628 }, { "epoch": 0.3912705882352941, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.8902, "step": 16629 }, { "epoch": 0.39129411764705885, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1575, "step": 16630 }, { "epoch": 0.39131764705882355, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0306, "step": 16631 }, { "epoch": 0.39134117647058825, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.7129, "step": 16632 }, { "epoch": 0.39136470588235295, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1904, "step": 16633 }, { "epoch": 0.39138823529411765, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.006, "step": 16634 }, { "epoch": 0.39141176470588235, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0695, "step": 16635 }, { "epoch": 0.39143529411764705, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9108, "step": 16636 }, { "epoch": 0.39145882352941175, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9691, "step": 16637 }, { "epoch": 0.39148235294117645, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9046, "step": 16638 }, { "epoch": 0.3915058823529412, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1064, "step": 16639 }, { "epoch": 0.3915294117647059, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9354, "step": 16640 }, { "epoch": 0.3915529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0733, "step": 16641 }, { "epoch": 0.3915764705882353, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.1897, "step": 16642 }, { "epoch": 0.3916, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 1.0767, "step": 16643 }, { "epoch": 0.3916235294117647, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9396, "step": 16644 }, { "epoch": 0.3916470588235294, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1594, "step": 16645 }, { "epoch": 0.3916705882352941, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1038, "step": 16646 }, { "epoch": 0.3916941176470588, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.2195, "step": 16647 }, { "epoch": 0.3917176470588235, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.982, "step": 16648 }, { "epoch": 0.39174117647058826, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9938, "step": 16649 }, { "epoch": 0.39176470588235296, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0676, "step": 16650 }, { "epoch": 0.39178823529411766, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0486, "step": 16651 }, { "epoch": 0.39181176470588236, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0961, "step": 16652 }, { "epoch": 0.39183529411764706, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.2341, "step": 16653 }, { "epoch": 0.39185882352941176, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 1.1667, "step": 16654 }, { "epoch": 0.39188235294117646, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0628, "step": 16655 }, { "epoch": 0.39190588235294116, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.2285, "step": 16656 }, { "epoch": 0.39192941176470586, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 0.9547, "step": 16657 }, { "epoch": 0.3919529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.7865, "step": 16658 }, { "epoch": 0.3919764705882353, "grad_norm": 0.138671875, "learning_rate": 0.02, "loss": 1.2568, "step": 16659 }, { "epoch": 0.392, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.103, "step": 16660 }, { "epoch": 0.3920235294117647, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 0.9301, "step": 16661 }, { "epoch": 0.3920470588235294, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.0869, "step": 16662 }, { "epoch": 0.3920705882352941, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.4, "step": 16663 }, { "epoch": 0.3920941176470588, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1005, "step": 16664 }, { "epoch": 0.3921176470588235, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.0942, "step": 16665 }, { "epoch": 0.3921411764705882, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0142, "step": 16666 }, { "epoch": 0.3921647058823529, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.7945, "step": 16667 }, { "epoch": 0.39218823529411767, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1641, "step": 16668 }, { "epoch": 0.39221176470588237, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.1234, "step": 16669 }, { "epoch": 0.39223529411764707, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9367, "step": 16670 }, { "epoch": 0.39225882352941177, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.6296, "step": 16671 }, { "epoch": 0.39228235294117647, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.4395, "step": 16672 }, { "epoch": 0.39230588235294117, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0976, "step": 16673 }, { "epoch": 0.39232941176470587, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.0391, "step": 16674 }, { "epoch": 0.39235294117647057, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0199, "step": 16675 }, { "epoch": 0.39237647058823527, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1776, "step": 16676 }, { "epoch": 0.3924, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.4876, "step": 16677 }, { "epoch": 0.3924235294117647, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.8537, "step": 16678 }, { "epoch": 0.3924470588235294, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.1253, "step": 16679 }, { "epoch": 0.3924705882352941, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0204, "step": 16680 }, { "epoch": 0.3924941176470588, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9015, "step": 16681 }, { "epoch": 0.3925176470588235, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.2291, "step": 16682 }, { "epoch": 0.3925411764705882, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8513, "step": 16683 }, { "epoch": 0.3925647058823529, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9811, "step": 16684 }, { "epoch": 0.3925882352941176, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.166, "step": 16685 }, { "epoch": 0.3926117647058823, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.7847, "step": 16686 }, { "epoch": 0.3926352941176471, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.6571, "step": 16687 }, { "epoch": 0.3926588235294118, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.7295, "step": 16688 }, { "epoch": 0.3926823529411765, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9324, "step": 16689 }, { "epoch": 0.3927058823529412, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.3082, "step": 16690 }, { "epoch": 0.3927294117647059, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0486, "step": 16691 }, { "epoch": 0.3927529411764706, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1183, "step": 16692 }, { "epoch": 0.3927764705882353, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1243, "step": 16693 }, { "epoch": 0.3928, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.527, "step": 16694 }, { "epoch": 0.3928235294117647, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.224, "step": 16695 }, { "epoch": 0.39284705882352944, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9716, "step": 16696 }, { "epoch": 0.39287058823529414, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8304, "step": 16697 }, { "epoch": 0.39289411764705884, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8584, "step": 16698 }, { "epoch": 0.39291764705882354, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1492, "step": 16699 }, { "epoch": 0.39294117647058824, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 0.8667, "step": 16700 }, { "epoch": 0.39296470588235294, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0958, "step": 16701 }, { "epoch": 0.39298823529411764, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8867, "step": 16702 }, { "epoch": 0.39301176470588234, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.634, "step": 16703 }, { "epoch": 0.39303529411764704, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8051, "step": 16704 }, { "epoch": 0.39305882352941174, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.7941, "step": 16705 }, { "epoch": 0.3930823529411765, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.4186, "step": 16706 }, { "epoch": 0.3931058823529412, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.5096, "step": 16707 }, { "epoch": 0.3931294117647059, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.77, "step": 16708 }, { "epoch": 0.3931529411764706, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.0488, "step": 16709 }, { "epoch": 0.3931764705882353, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1777, "step": 16710 }, { "epoch": 0.3932, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.8805, "step": 16711 }, { "epoch": 0.3932235294117647, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2199, "step": 16712 }, { "epoch": 0.3932470588235294, "grad_norm": 0.228515625, "learning_rate": 0.02, "loss": 1.2898, "step": 16713 }, { "epoch": 0.3932705882352941, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9307, "step": 16714 }, { "epoch": 0.39329411764705885, "grad_norm": 0.2158203125, "learning_rate": 0.02, "loss": 0.7767, "step": 16715 }, { "epoch": 0.39331764705882355, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1083, "step": 16716 }, { "epoch": 0.39334117647058825, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9768, "step": 16717 }, { "epoch": 0.39336470588235295, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0393, "step": 16718 }, { "epoch": 0.39338823529411765, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1266, "step": 16719 }, { "epoch": 0.39341176470588235, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0249, "step": 16720 }, { "epoch": 0.39343529411764705, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.017, "step": 16721 }, { "epoch": 0.39345882352941175, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0347, "step": 16722 }, { "epoch": 0.39348235294117645, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.705, "step": 16723 }, { "epoch": 0.39350588235294115, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.1235, "step": 16724 }, { "epoch": 0.3935294117647059, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9472, "step": 16725 }, { "epoch": 0.3935529411764706, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8554, "step": 16726 }, { "epoch": 0.3935764705882353, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9294, "step": 16727 }, { "epoch": 0.3936, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.7454, "step": 16728 }, { "epoch": 0.3936235294117647, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.2199, "step": 16729 }, { "epoch": 0.3936470588235294, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.8766, "step": 16730 }, { "epoch": 0.3936705882352941, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0408, "step": 16731 }, { "epoch": 0.3936941176470588, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9915, "step": 16732 }, { "epoch": 0.3937176470588235, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9206, "step": 16733 }, { "epoch": 0.39374117647058826, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9435, "step": 16734 }, { "epoch": 0.39376470588235296, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9441, "step": 16735 }, { "epoch": 0.39378823529411766, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 0.8968, "step": 16736 }, { "epoch": 0.39381176470588236, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0101, "step": 16737 }, { "epoch": 0.39383529411764706, "grad_norm": 0.22265625, "learning_rate": 0.02, "loss": 0.6803, "step": 16738 }, { "epoch": 0.39385882352941176, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.3639, "step": 16739 }, { "epoch": 0.39388235294117646, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9405, "step": 16740 }, { "epoch": 0.39390588235294116, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.8635, "step": 16741 }, { "epoch": 0.39392941176470586, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.7259, "step": 16742 }, { "epoch": 0.39395294117647056, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 0.9405, "step": 16743 }, { "epoch": 0.3939764705882353, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1932, "step": 16744 }, { "epoch": 0.394, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9656, "step": 16745 }, { "epoch": 0.3940235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8751, "step": 16746 }, { "epoch": 0.3940470588235294, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1783, "step": 16747 }, { "epoch": 0.3940705882352941, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0744, "step": 16748 }, { "epoch": 0.3940941176470588, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.0903, "step": 16749 }, { "epoch": 0.3941176470588235, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.1715, "step": 16750 }, { "epoch": 0.3941411764705882, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1476, "step": 16751 }, { "epoch": 0.3941647058823529, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.6656, "step": 16752 }, { "epoch": 0.3941882352941177, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.7119, "step": 16753 }, { "epoch": 0.3942117647058824, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1135, "step": 16754 }, { "epoch": 0.3942352941176471, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.1022, "step": 16755 }, { "epoch": 0.3942588235294118, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.868, "step": 16756 }, { "epoch": 0.3942823529411765, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1696, "step": 16757 }, { "epoch": 0.3943058823529412, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1084, "step": 16758 }, { "epoch": 0.3943294117647059, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.955, "step": 16759 }, { "epoch": 0.3943529411764706, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.4611, "step": 16760 }, { "epoch": 0.3943764705882353, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1038, "step": 16761 }, { "epoch": 0.3944, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.3647, "step": 16762 }, { "epoch": 0.39442352941176473, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 1.1023, "step": 16763 }, { "epoch": 0.39444705882352943, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 1.0746, "step": 16764 }, { "epoch": 0.39447058823529413, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9292, "step": 16765 }, { "epoch": 0.39449411764705883, "grad_norm": 0.1455078125, "learning_rate": 0.02, "loss": 1.1776, "step": 16766 }, { "epoch": 0.39451764705882353, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0943, "step": 16767 }, { "epoch": 0.39454117647058823, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9235, "step": 16768 }, { "epoch": 0.39456470588235293, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.071, "step": 16769 }, { "epoch": 0.39458823529411763, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.4844, "step": 16770 }, { "epoch": 0.39461176470588233, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.0423, "step": 16771 }, { "epoch": 0.3946352941176471, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.098, "step": 16772 }, { "epoch": 0.3946588235294118, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.0868, "step": 16773 }, { "epoch": 0.3946823529411765, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1293, "step": 16774 }, { "epoch": 0.3947058823529412, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.6995, "step": 16775 }, { "epoch": 0.3947294117647059, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9458, "step": 16776 }, { "epoch": 0.3947529411764706, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0999, "step": 16777 }, { "epoch": 0.3947764705882353, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1976, "step": 16778 }, { "epoch": 0.3948, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0302, "step": 16779 }, { "epoch": 0.3948235294117647, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.1042, "step": 16780 }, { "epoch": 0.3948470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.2562, "step": 16781 }, { "epoch": 0.39487058823529414, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.015, "step": 16782 }, { "epoch": 0.39489411764705884, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0887, "step": 16783 }, { "epoch": 0.39491764705882354, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9233, "step": 16784 }, { "epoch": 0.39494117647058824, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1612, "step": 16785 }, { "epoch": 0.39496470588235294, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1164, "step": 16786 }, { "epoch": 0.39498823529411764, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9279, "step": 16787 }, { "epoch": 0.39501176470588234, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.8926, "step": 16788 }, { "epoch": 0.39503529411764704, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9748, "step": 16789 }, { "epoch": 0.39505882352941174, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9327, "step": 16790 }, { "epoch": 0.3950823529411765, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.2552, "step": 16791 }, { "epoch": 0.3951058823529412, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2138, "step": 16792 }, { "epoch": 0.3951294117647059, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.302, "step": 16793 }, { "epoch": 0.3951529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0611, "step": 16794 }, { "epoch": 0.3951764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.2136, "step": 16795 }, { "epoch": 0.3952, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.8991, "step": 16796 }, { "epoch": 0.3952235294117647, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.7539, "step": 16797 }, { "epoch": 0.3952470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0778, "step": 16798 }, { "epoch": 0.3952705882352941, "grad_norm": 0.2109375, "learning_rate": 0.02, "loss": 0.7775, "step": 16799 }, { "epoch": 0.3952941176470588, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9496, "step": 16800 }, { "epoch": 0.39531764705882355, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.5422, "step": 16801 }, { "epoch": 0.39534117647058825, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.2315, "step": 16802 }, { "epoch": 0.39536470588235295, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8291, "step": 16803 }, { "epoch": 0.39538823529411765, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.9561, "step": 16804 }, { "epoch": 0.39541176470588235, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.014, "step": 16805 }, { "epoch": 0.39543529411764705, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.2596, "step": 16806 }, { "epoch": 0.39545882352941175, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1063, "step": 16807 }, { "epoch": 0.39548235294117645, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9664, "step": 16808 }, { "epoch": 0.39550588235294115, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9774, "step": 16809 }, { "epoch": 0.3955294117647059, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.6421, "step": 16810 }, { "epoch": 0.3955529411764706, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0201, "step": 16811 }, { "epoch": 0.3955764705882353, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.5294, "step": 16812 }, { "epoch": 0.3956, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.7685, "step": 16813 }, { "epoch": 0.3956235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8767, "step": 16814 }, { "epoch": 0.3956470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9875, "step": 16815 }, { "epoch": 0.3956705882352941, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.7414, "step": 16816 }, { "epoch": 0.3956941176470588, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.82, "step": 16817 }, { "epoch": 0.3957176470588235, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.7797, "step": 16818 }, { "epoch": 0.39574117647058826, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.2012, "step": 16819 }, { "epoch": 0.39576470588235296, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.1966, "step": 16820 }, { "epoch": 0.39578823529411766, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8167, "step": 16821 }, { "epoch": 0.39581176470588236, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.2303, "step": 16822 }, { "epoch": 0.39583529411764706, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.05, "step": 16823 }, { "epoch": 0.39585882352941176, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0204, "step": 16824 }, { "epoch": 0.39588235294117646, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1193, "step": 16825 }, { "epoch": 0.39590588235294116, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0955, "step": 16826 }, { "epoch": 0.39592941176470586, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8946, "step": 16827 }, { "epoch": 0.39595294117647056, "grad_norm": 0.1474609375, "learning_rate": 0.02, "loss": 1.2159, "step": 16828 }, { "epoch": 0.3959764705882353, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0261, "step": 16829 }, { "epoch": 0.396, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.8921, "step": 16830 }, { "epoch": 0.3960235294117647, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9309, "step": 16831 }, { "epoch": 0.3960470588235294, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.677, "step": 16832 }, { "epoch": 0.3960705882352941, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1003, "step": 16833 }, { "epoch": 0.3960941176470588, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0115, "step": 16834 }, { "epoch": 0.3961176470588235, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.2238, "step": 16835 }, { "epoch": 0.3961411764705882, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0055, "step": 16836 }, { "epoch": 0.3961647058823529, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.9479, "step": 16837 }, { "epoch": 0.3961882352941177, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.4974, "step": 16838 }, { "epoch": 0.3962117647058824, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.5954, "step": 16839 }, { "epoch": 0.3962352941176471, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.7177, "step": 16840 }, { "epoch": 0.3962588235294118, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1869, "step": 16841 }, { "epoch": 0.3962823529411765, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0413, "step": 16842 }, { "epoch": 0.3963058823529412, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.3459, "step": 16843 }, { "epoch": 0.3963294117647059, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0246, "step": 16844 }, { "epoch": 0.3963529411764706, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 0.9716, "step": 16845 }, { "epoch": 0.3963764705882353, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.6374, "step": 16846 }, { "epoch": 0.3964, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0904, "step": 16847 }, { "epoch": 0.39642352941176473, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2224, "step": 16848 }, { "epoch": 0.39644705882352943, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9348, "step": 16849 }, { "epoch": 0.39647058823529413, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9858, "step": 16850 }, { "epoch": 0.39649411764705883, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8684, "step": 16851 }, { "epoch": 0.39651764705882353, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9163, "step": 16852 }, { "epoch": 0.39654117647058823, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8105, "step": 16853 }, { "epoch": 0.39656470588235293, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8241, "step": 16854 }, { "epoch": 0.39658823529411763, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.7869, "step": 16855 }, { "epoch": 0.39661176470588233, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1588, "step": 16856 }, { "epoch": 0.3966352941176471, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1928, "step": 16857 }, { "epoch": 0.3966588235294118, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9702, "step": 16858 }, { "epoch": 0.3966823529411765, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8629, "step": 16859 }, { "epoch": 0.3967058823529412, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0186, "step": 16860 }, { "epoch": 0.3967294117647059, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8385, "step": 16861 }, { "epoch": 0.3967529411764706, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0891, "step": 16862 }, { "epoch": 0.3967764705882353, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.091, "step": 16863 }, { "epoch": 0.3968, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0762, "step": 16864 }, { "epoch": 0.3968235294117647, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9347, "step": 16865 }, { "epoch": 0.3968470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.7403, "step": 16866 }, { "epoch": 0.39687058823529414, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.3184, "step": 16867 }, { "epoch": 0.39689411764705884, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1344, "step": 16868 }, { "epoch": 0.39691764705882354, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1133, "step": 16869 }, { "epoch": 0.39694117647058824, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.5864, "step": 16870 }, { "epoch": 0.39696470588235294, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1511, "step": 16871 }, { "epoch": 0.39698823529411764, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.0113, "step": 16872 }, { "epoch": 0.39701176470588234, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9237, "step": 16873 }, { "epoch": 0.39703529411764704, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.7704, "step": 16874 }, { "epoch": 0.39705882352941174, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9191, "step": 16875 }, { "epoch": 0.3970823529411765, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0857, "step": 16876 }, { "epoch": 0.3971058823529412, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9927, "step": 16877 }, { "epoch": 0.3971294117647059, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0796, "step": 16878 }, { "epoch": 0.3971529411764706, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.7357, "step": 16879 }, { "epoch": 0.3971764705882353, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.039, "step": 16880 }, { "epoch": 0.3972, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.0116, "step": 16881 }, { "epoch": 0.3972235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9867, "step": 16882 }, { "epoch": 0.3972470588235294, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.978, "step": 16883 }, { "epoch": 0.3972705882352941, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.013, "step": 16884 }, { "epoch": 0.3972941176470588, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.94, "step": 16885 }, { "epoch": 0.39731764705882355, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 0.9594, "step": 16886 }, { "epoch": 0.39734117647058825, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9086, "step": 16887 }, { "epoch": 0.39736470588235295, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.7454, "step": 16888 }, { "epoch": 0.39738823529411765, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 0.9628, "step": 16889 }, { "epoch": 0.39741176470588235, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.6127, "step": 16890 }, { "epoch": 0.39743529411764705, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.8859, "step": 16891 }, { "epoch": 0.39745882352941175, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9278, "step": 16892 }, { "epoch": 0.39748235294117645, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.3749, "step": 16893 }, { "epoch": 0.39750588235294115, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0885, "step": 16894 }, { "epoch": 0.3975294117647059, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8703, "step": 16895 }, { "epoch": 0.3975529411764706, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1552, "step": 16896 }, { "epoch": 0.3975764705882353, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.089, "step": 16897 }, { "epoch": 0.3976, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.7545, "step": 16898 }, { "epoch": 0.3976235294117647, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0465, "step": 16899 }, { "epoch": 0.3976470588235294, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.8555, "step": 16900 }, { "epoch": 0.3976705882352941, "grad_norm": 0.2001953125, "learning_rate": 0.02, "loss": 0.9321, "step": 16901 }, { "epoch": 0.3976941176470588, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.5453, "step": 16902 }, { "epoch": 0.3977176470588235, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 0.8213, "step": 16903 }, { "epoch": 0.3977411764705882, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0776, "step": 16904 }, { "epoch": 0.39776470588235296, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.9464, "step": 16905 }, { "epoch": 0.39778823529411766, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8502, "step": 16906 }, { "epoch": 0.39781176470588236, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9234, "step": 16907 }, { "epoch": 0.39783529411764706, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2406, "step": 16908 }, { "epoch": 0.39785882352941176, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8349, "step": 16909 }, { "epoch": 0.39788235294117646, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9417, "step": 16910 }, { "epoch": 0.39790588235294116, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.1352, "step": 16911 }, { "epoch": 0.39792941176470586, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.0122, "step": 16912 }, { "epoch": 0.39795294117647056, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.5282, "step": 16913 }, { "epoch": 0.3979764705882353, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.04, "step": 16914 }, { "epoch": 0.398, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.9014, "step": 16915 }, { "epoch": 0.3980235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0466, "step": 16916 }, { "epoch": 0.3980470588235294, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.3018, "step": 16917 }, { "epoch": 0.3980705882352941, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0412, "step": 16918 }, { "epoch": 0.3980941176470588, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.9696, "step": 16919 }, { "epoch": 0.3981176470588235, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0017, "step": 16920 }, { "epoch": 0.3981411764705882, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9038, "step": 16921 }, { "epoch": 0.3981647058823529, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0813, "step": 16922 }, { "epoch": 0.3981882352941176, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.642, "step": 16923 }, { "epoch": 0.3982117647058824, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0506, "step": 16924 }, { "epoch": 0.3982352941176471, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.2286, "step": 16925 }, { "epoch": 0.3982588235294118, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.2895, "step": 16926 }, { "epoch": 0.3982823529411765, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9942, "step": 16927 }, { "epoch": 0.3983058823529412, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 0.9733, "step": 16928 }, { "epoch": 0.3983294117647059, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.7704, "step": 16929 }, { "epoch": 0.3983529411764706, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1057, "step": 16930 }, { "epoch": 0.3983764705882353, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 0.9586, "step": 16931 }, { "epoch": 0.3984, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.653, "step": 16932 }, { "epoch": 0.39842352941176473, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.0739, "step": 16933 }, { "epoch": 0.39844705882352943, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0122, "step": 16934 }, { "epoch": 0.39847058823529413, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1603, "step": 16935 }, { "epoch": 0.39849411764705883, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8871, "step": 16936 }, { "epoch": 0.39851764705882353, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0518, "step": 16937 }, { "epoch": 0.39854117647058823, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.8928, "step": 16938 }, { "epoch": 0.39856470588235293, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.0635, "step": 16939 }, { "epoch": 0.39858823529411763, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.0414, "step": 16940 }, { "epoch": 0.39861176470588233, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.8889, "step": 16941 }, { "epoch": 0.39863529411764703, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 0.954, "step": 16942 }, { "epoch": 0.3986588235294118, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1089, "step": 16943 }, { "epoch": 0.3986823529411765, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.746, "step": 16944 }, { "epoch": 0.3987058823529412, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.1447, "step": 16945 }, { "epoch": 0.3987294117647059, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.1678, "step": 16946 }, { "epoch": 0.3987529411764706, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9376, "step": 16947 }, { "epoch": 0.3987764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.713, "step": 16948 }, { "epoch": 0.3988, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.671, "step": 16949 }, { "epoch": 0.3988235294117647, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.7236, "step": 16950 }, { "epoch": 0.3988470588235294, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.159, "step": 16951 }, { "epoch": 0.39887058823529414, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.7944, "step": 16952 }, { "epoch": 0.39889411764705884, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.065, "step": 16953 }, { "epoch": 0.39891764705882354, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.7546, "step": 16954 }, { "epoch": 0.39894117647058824, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1727, "step": 16955 }, { "epoch": 0.39896470588235294, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9706, "step": 16956 }, { "epoch": 0.39898823529411764, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 0.8956, "step": 16957 }, { "epoch": 0.39901176470588234, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.78, "step": 16958 }, { "epoch": 0.39903529411764704, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2203, "step": 16959 }, { "epoch": 0.39905882352941174, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0078, "step": 16960 }, { "epoch": 0.39908235294117644, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.0711, "step": 16961 }, { "epoch": 0.3991058823529412, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8457, "step": 16962 }, { "epoch": 0.3991294117647059, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 1.0285, "step": 16963 }, { "epoch": 0.3991529411764706, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.8361, "step": 16964 }, { "epoch": 0.3991764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9356, "step": 16965 }, { "epoch": 0.3992, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.0173, "step": 16966 }, { "epoch": 0.3992235294117647, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.6848, "step": 16967 }, { "epoch": 0.3992470588235294, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.6542, "step": 16968 }, { "epoch": 0.3992705882352941, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8394, "step": 16969 }, { "epoch": 0.3992941176470588, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.153, "step": 16970 }, { "epoch": 0.39931764705882355, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0465, "step": 16971 }, { "epoch": 0.39934117647058825, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.0377, "step": 16972 }, { "epoch": 0.39936470588235295, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9592, "step": 16973 }, { "epoch": 0.39938823529411766, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9292, "step": 16974 }, { "epoch": 0.39941176470588236, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9771, "step": 16975 }, { "epoch": 0.39943529411764706, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9662, "step": 16976 }, { "epoch": 0.39945882352941176, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.1901, "step": 16977 }, { "epoch": 0.39948235294117646, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0199, "step": 16978 }, { "epoch": 0.39950588235294116, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.8206, "step": 16979 }, { "epoch": 0.39952941176470586, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.8663, "step": 16980 }, { "epoch": 0.3995529411764706, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.3564, "step": 16981 }, { "epoch": 0.3995764705882353, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1081, "step": 16982 }, { "epoch": 0.3996, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.6861, "step": 16983 }, { "epoch": 0.3996235294117647, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9104, "step": 16984 }, { "epoch": 0.3996470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.6132, "step": 16985 }, { "epoch": 0.3996705882352941, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 0.8048, "step": 16986 }, { "epoch": 0.3996941176470588, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 0.9868, "step": 16987 }, { "epoch": 0.3997176470588235, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8991, "step": 16988 }, { "epoch": 0.3997411764705882, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9899, "step": 16989 }, { "epoch": 0.39976470588235297, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.5737, "step": 16990 }, { "epoch": 0.39978823529411767, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9269, "step": 16991 }, { "epoch": 0.39981176470588237, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.1338, "step": 16992 }, { "epoch": 0.39983529411764707, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9804, "step": 16993 }, { "epoch": 0.39985882352941177, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0818, "step": 16994 }, { "epoch": 0.39988235294117647, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0744, "step": 16995 }, { "epoch": 0.39990588235294117, "grad_norm": 0.21875, "learning_rate": 0.02, "loss": 0.5892, "step": 16996 }, { "epoch": 0.39992941176470587, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.2538, "step": 16997 }, { "epoch": 0.39995294117647057, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0205, "step": 16998 }, { "epoch": 0.39997647058823527, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.7813, "step": 16999 }, { "epoch": 0.4, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.0842, "step": 17000 }, { "epoch": 0.4, "eval_loss": 2.186739206314087, "eval_runtime": 685.2807, "eval_samples_per_second": 12.404, "eval_steps_per_second": 3.101, "step": 17000 }, { "epoch": 0.4000235294117647, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9425, "step": 17001 }, { "epoch": 0.4000470588235294, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1127, "step": 17002 }, { "epoch": 0.4000705882352941, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.881, "step": 17003 }, { "epoch": 0.4000941176470588, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0563, "step": 17004 }, { "epoch": 0.4001176470588235, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9175, "step": 17005 }, { "epoch": 0.4001411764705882, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0026, "step": 17006 }, { "epoch": 0.4001647058823529, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0117, "step": 17007 }, { "epoch": 0.4001882352941176, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1724, "step": 17008 }, { "epoch": 0.4002117647058824, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.7454, "step": 17009 }, { "epoch": 0.4002352941176471, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1664, "step": 17010 }, { "epoch": 0.4002588235294118, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0794, "step": 17011 }, { "epoch": 0.4002823529411765, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8012, "step": 17012 }, { "epoch": 0.4003058823529412, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.7609, "step": 17013 }, { "epoch": 0.4003294117647059, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.9115, "step": 17014 }, { "epoch": 0.4003529411764706, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 1.1214, "step": 17015 }, { "epoch": 0.4003764705882353, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.3172, "step": 17016 }, { "epoch": 0.4004, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.1545, "step": 17017 }, { "epoch": 0.4004235294117647, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8438, "step": 17018 }, { "epoch": 0.40044705882352943, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 0.9248, "step": 17019 }, { "epoch": 0.40047058823529413, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0116, "step": 17020 }, { "epoch": 0.40049411764705883, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.2066, "step": 17021 }, { "epoch": 0.40051764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1136, "step": 17022 }, { "epoch": 0.40054117647058823, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.8762, "step": 17023 }, { "epoch": 0.40056470588235293, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0464, "step": 17024 }, { "epoch": 0.40058823529411763, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0352, "step": 17025 }, { "epoch": 0.40061176470588233, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.3244, "step": 17026 }, { "epoch": 0.40063529411764703, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.3058, "step": 17027 }, { "epoch": 0.4006588235294118, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9756, "step": 17028 }, { "epoch": 0.4006823529411765, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0493, "step": 17029 }, { "epoch": 0.4007058823529412, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.1088, "step": 17030 }, { "epoch": 0.4007294117647059, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.7538, "step": 17031 }, { "epoch": 0.4007529411764706, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.7748, "step": 17032 }, { "epoch": 0.4007764705882353, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.8317, "step": 17033 }, { "epoch": 0.4008, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.2267, "step": 17034 }, { "epoch": 0.4008235294117647, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.2295, "step": 17035 }, { "epoch": 0.4008470588235294, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9492, "step": 17036 }, { "epoch": 0.40087058823529415, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.0189, "step": 17037 }, { "epoch": 0.40089411764705885, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.6118, "step": 17038 }, { "epoch": 0.40091764705882355, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1571, "step": 17039 }, { "epoch": 0.40094117647058825, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.7591, "step": 17040 }, { "epoch": 0.40096470588235295, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.1753, "step": 17041 }, { "epoch": 0.40098823529411765, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.211, "step": 17042 }, { "epoch": 0.40101176470588235, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.2165, "step": 17043 }, { "epoch": 0.40103529411764705, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 0.9459, "step": 17044 }, { "epoch": 0.40105882352941175, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.1515, "step": 17045 }, { "epoch": 0.40108235294117645, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.3869, "step": 17046 }, { "epoch": 0.4011058823529412, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.84, "step": 17047 }, { "epoch": 0.4011294117647059, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8394, "step": 17048 }, { "epoch": 0.4011529411764706, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0313, "step": 17049 }, { "epoch": 0.4011764705882353, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.2015, "step": 17050 }, { "epoch": 0.4012, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.7755, "step": 17051 }, { "epoch": 0.4012235294117647, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.272, "step": 17052 }, { "epoch": 0.4012470588235294, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.8693, "step": 17053 }, { "epoch": 0.4012705882352941, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.8343, "step": 17054 }, { "epoch": 0.4012941176470588, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1274, "step": 17055 }, { "epoch": 0.40131764705882356, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.162, "step": 17056 }, { "epoch": 0.40134117647058826, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9518, "step": 17057 }, { "epoch": 0.40136470588235296, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.4068, "step": 17058 }, { "epoch": 0.40138823529411766, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.7975, "step": 17059 }, { "epoch": 0.40141176470588236, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.7518, "step": 17060 }, { "epoch": 0.40143529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.1391, "step": 17061 }, { "epoch": 0.40145882352941176, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.1011, "step": 17062 }, { "epoch": 0.40148235294117646, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9322, "step": 17063 }, { "epoch": 0.40150588235294116, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1685, "step": 17064 }, { "epoch": 0.40152941176470586, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.0015, "step": 17065 }, { "epoch": 0.4015529411764706, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1257, "step": 17066 }, { "epoch": 0.4015764705882353, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.0115, "step": 17067 }, { "epoch": 0.4016, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0303, "step": 17068 }, { "epoch": 0.4016235294117647, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.7906, "step": 17069 }, { "epoch": 0.4016470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.9961, "step": 17070 }, { "epoch": 0.4016705882352941, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.3695, "step": 17071 }, { "epoch": 0.4016941176470588, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0638, "step": 17072 }, { "epoch": 0.4017176470588235, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0031, "step": 17073 }, { "epoch": 0.4017411764705882, "grad_norm": 0.1435546875, "learning_rate": 0.02, "loss": 1.3632, "step": 17074 }, { "epoch": 0.40176470588235297, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.1893, "step": 17075 }, { "epoch": 0.40178823529411767, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.8428, "step": 17076 }, { "epoch": 0.40181176470588237, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1203, "step": 17077 }, { "epoch": 0.40183529411764707, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.3239, "step": 17078 }, { "epoch": 0.40185882352941177, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0866, "step": 17079 }, { "epoch": 0.40188235294117647, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0628, "step": 17080 }, { "epoch": 0.40190588235294117, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0908, "step": 17081 }, { "epoch": 0.40192941176470587, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9108, "step": 17082 }, { "epoch": 0.40195294117647057, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.5492, "step": 17083 }, { "epoch": 0.40197647058823527, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0518, "step": 17084 }, { "epoch": 0.402, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1543, "step": 17085 }, { "epoch": 0.4020235294117647, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0257, "step": 17086 }, { "epoch": 0.4020470588235294, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.0821, "step": 17087 }, { "epoch": 0.4020705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 1.0106, "step": 17088 }, { "epoch": 0.4020941176470588, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1693, "step": 17089 }, { "epoch": 0.4021176470588235, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8198, "step": 17090 }, { "epoch": 0.4021411764705882, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.8855, "step": 17091 }, { "epoch": 0.4021647058823529, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1338, "step": 17092 }, { "epoch": 0.4021882352941176, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8018, "step": 17093 }, { "epoch": 0.4022117647058824, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0909, "step": 17094 }, { "epoch": 0.4022352941176471, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1388, "step": 17095 }, { "epoch": 0.4022588235294118, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8887, "step": 17096 }, { "epoch": 0.4022823529411765, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1531, "step": 17097 }, { "epoch": 0.4023058823529412, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.8436, "step": 17098 }, { "epoch": 0.4023294117647059, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1123, "step": 17099 }, { "epoch": 0.4023529411764706, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.2892, "step": 17100 }, { "epoch": 0.4023764705882353, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1654, "step": 17101 }, { "epoch": 0.4024, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 1.1655, "step": 17102 }, { "epoch": 0.4024235294117647, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.1253, "step": 17103 }, { "epoch": 0.40244705882352944, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.9301, "step": 17104 }, { "epoch": 0.40247058823529414, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.251, "step": 17105 }, { "epoch": 0.40249411764705884, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1, "step": 17106 }, { "epoch": 0.40251764705882354, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.2532, "step": 17107 }, { "epoch": 0.40254117647058824, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9884, "step": 17108 }, { "epoch": 0.40256470588235294, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9268, "step": 17109 }, { "epoch": 0.40258823529411764, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.7137, "step": 17110 }, { "epoch": 0.40261176470588234, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.7353, "step": 17111 }, { "epoch": 0.40263529411764704, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.8077, "step": 17112 }, { "epoch": 0.4026588235294118, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.8965, "step": 17113 }, { "epoch": 0.4026823529411765, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.0482, "step": 17114 }, { "epoch": 0.4027058823529412, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8993, "step": 17115 }, { "epoch": 0.4027294117647059, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.8083, "step": 17116 }, { "epoch": 0.4027529411764706, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.3182, "step": 17117 }, { "epoch": 0.4027764705882353, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.1977, "step": 17118 }, { "epoch": 0.4028, "grad_norm": 0.2080078125, "learning_rate": 0.02, "loss": 0.6051, "step": 17119 }, { "epoch": 0.4028235294117647, "grad_norm": 0.140625, "learning_rate": 0.02, "loss": 1.3925, "step": 17120 }, { "epoch": 0.4028470588235294, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9893, "step": 17121 }, { "epoch": 0.4028705882352941, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.7159, "step": 17122 }, { "epoch": 0.40289411764705885, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.3556, "step": 17123 }, { "epoch": 0.40291764705882355, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.8188, "step": 17124 }, { "epoch": 0.40294117647058825, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0958, "step": 17125 }, { "epoch": 0.40296470588235295, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0515, "step": 17126 }, { "epoch": 0.40298823529411765, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.8715, "step": 17127 }, { "epoch": 0.40301176470588235, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.8511, "step": 17128 }, { "epoch": 0.40303529411764705, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.8132, "step": 17129 }, { "epoch": 0.40305882352941175, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2194, "step": 17130 }, { "epoch": 0.40308235294117645, "grad_norm": 0.216796875, "learning_rate": 0.02, "loss": 0.6812, "step": 17131 }, { "epoch": 0.4031058823529412, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.0492, "step": 17132 }, { "epoch": 0.4031294117647059, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9542, "step": 17133 }, { "epoch": 0.4031529411764706, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8249, "step": 17134 }, { "epoch": 0.4031764705882353, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1489, "step": 17135 }, { "epoch": 0.4032, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9845, "step": 17136 }, { "epoch": 0.4032235294117647, "grad_norm": 0.14453125, "learning_rate": 0.02, "loss": 1.3071, "step": 17137 }, { "epoch": 0.4032470588235294, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0848, "step": 17138 }, { "epoch": 0.4032705882352941, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.081, "step": 17139 }, { "epoch": 0.4032941176470588, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.6974, "step": 17140 }, { "epoch": 0.4033176470588235, "grad_norm": 0.142578125, "learning_rate": 0.02, "loss": 1.3637, "step": 17141 }, { "epoch": 0.40334117647058826, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9164, "step": 17142 }, { "epoch": 0.40336470588235296, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.2162, "step": 17143 }, { "epoch": 0.40338823529411766, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.994, "step": 17144 }, { "epoch": 0.40341176470588236, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0412, "step": 17145 }, { "epoch": 0.40343529411764706, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.2081, "step": 17146 }, { "epoch": 0.40345882352941176, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.1625, "step": 17147 }, { "epoch": 0.40348235294117646, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.7394, "step": 17148 }, { "epoch": 0.40350588235294116, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9139, "step": 17149 }, { "epoch": 0.40352941176470586, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9313, "step": 17150 }, { "epoch": 0.4035529411764706, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0672, "step": 17151 }, { "epoch": 0.4035764705882353, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.3656, "step": 17152 }, { "epoch": 0.4036, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1254, "step": 17153 }, { "epoch": 0.4036235294117647, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1152, "step": 17154 }, { "epoch": 0.4036470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9297, "step": 17155 }, { "epoch": 0.4036705882352941, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9743, "step": 17156 }, { "epoch": 0.4036941176470588, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.204, "step": 17157 }, { "epoch": 0.4037176470588235, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.7067, "step": 17158 }, { "epoch": 0.4037411764705882, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.054, "step": 17159 }, { "epoch": 0.4037647058823529, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.8672, "step": 17160 }, { "epoch": 0.40378823529411767, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.8327, "step": 17161 }, { "epoch": 0.40381176470588237, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9894, "step": 17162 }, { "epoch": 0.40383529411764707, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2237, "step": 17163 }, { "epoch": 0.40385882352941177, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.1976, "step": 17164 }, { "epoch": 0.40388235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.162, "step": 17165 }, { "epoch": 0.40390588235294117, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9842, "step": 17166 }, { "epoch": 0.40392941176470587, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.1607, "step": 17167 }, { "epoch": 0.40395294117647057, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9762, "step": 17168 }, { "epoch": 0.40397647058823527, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0473, "step": 17169 }, { "epoch": 0.404, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0917, "step": 17170 }, { "epoch": 0.4040235294117647, "grad_norm": 0.19140625, "learning_rate": 0.02, "loss": 0.7753, "step": 17171 }, { "epoch": 0.4040470588235294, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9192, "step": 17172 }, { "epoch": 0.4040705882352941, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.1711, "step": 17173 }, { "epoch": 0.4040941176470588, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9857, "step": 17174 }, { "epoch": 0.4041176470588235, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9291, "step": 17175 }, { "epoch": 0.4041411764705882, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1163, "step": 17176 }, { "epoch": 0.4041647058823529, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.947, "step": 17177 }, { "epoch": 0.4041882352941176, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 1.072, "step": 17178 }, { "epoch": 0.4042117647058823, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.3276, "step": 17179 }, { "epoch": 0.4042352941176471, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 1.1129, "step": 17180 }, { "epoch": 0.4042588235294118, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.0923, "step": 17181 }, { "epoch": 0.4042823529411765, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8461, "step": 17182 }, { "epoch": 0.4043058823529412, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8515, "step": 17183 }, { "epoch": 0.4043294117647059, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.7813, "step": 17184 }, { "epoch": 0.4043529411764706, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0486, "step": 17185 }, { "epoch": 0.4043764705882353, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0588, "step": 17186 }, { "epoch": 0.4044, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.902, "step": 17187 }, { "epoch": 0.4044235294117647, "grad_norm": 0.201171875, "learning_rate": 0.02, "loss": 0.7027, "step": 17188 }, { "epoch": 0.40444705882352944, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.2354, "step": 17189 }, { "epoch": 0.40447058823529414, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9165, "step": 17190 }, { "epoch": 0.40449411764705884, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 0.9932, "step": 17191 }, { "epoch": 0.40451764705882354, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.2293, "step": 17192 }, { "epoch": 0.40454117647058824, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 0.9673, "step": 17193 }, { "epoch": 0.40456470588235294, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8289, "step": 17194 }, { "epoch": 0.40458823529411764, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0277, "step": 17195 }, { "epoch": 0.40461176470588234, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8185, "step": 17196 }, { "epoch": 0.40463529411764704, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 1.142, "step": 17197 }, { "epoch": 0.40465882352941174, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.272, "step": 17198 }, { "epoch": 0.4046823529411765, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.107, "step": 17199 }, { "epoch": 0.4047058823529412, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.7105, "step": 17200 }, { "epoch": 0.4047294117647059, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.9502, "step": 17201 }, { "epoch": 0.4047529411764706, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.1318, "step": 17202 }, { "epoch": 0.4047764705882353, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9866, "step": 17203 }, { "epoch": 0.4048, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.1249, "step": 17204 }, { "epoch": 0.4048235294117647, "grad_norm": 0.1865234375, "learning_rate": 0.02, "loss": 0.9172, "step": 17205 }, { "epoch": 0.4048470588235294, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.171, "step": 17206 }, { "epoch": 0.4048705882352941, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.0481, "step": 17207 }, { "epoch": 0.40489411764705885, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.1763, "step": 17208 }, { "epoch": 0.40491764705882355, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.7625, "step": 17209 }, { "epoch": 0.40494117647058825, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.978, "step": 17210 }, { "epoch": 0.40496470588235295, "grad_norm": 0.1953125, "learning_rate": 0.02, "loss": 0.8986, "step": 17211 }, { "epoch": 0.40498823529411765, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.3412, "step": 17212 }, { "epoch": 0.40501176470588235, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9691, "step": 17213 }, { "epoch": 0.40503529411764705, "grad_norm": 0.1552734375, "learning_rate": 0.02, "loss": 1.1834, "step": 17214 }, { "epoch": 0.40505882352941175, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9825, "step": 17215 }, { "epoch": 0.40508235294117645, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.2318, "step": 17216 }, { "epoch": 0.40510588235294115, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.9551, "step": 17217 }, { "epoch": 0.4051294117647059, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1259, "step": 17218 }, { "epoch": 0.4051529411764706, "grad_norm": 0.2099609375, "learning_rate": 0.02, "loss": 0.5993, "step": 17219 }, { "epoch": 0.4051764705882353, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8359, "step": 17220 }, { "epoch": 0.4052, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.976, "step": 17221 }, { "epoch": 0.4052235294117647, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.1752, "step": 17222 }, { "epoch": 0.4052470588235294, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.7416, "step": 17223 }, { "epoch": 0.4052705882352941, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.1178, "step": 17224 }, { "epoch": 0.4052941176470588, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.3074, "step": 17225 }, { "epoch": 0.4053176470588235, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9688, "step": 17226 }, { "epoch": 0.40534117647058826, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0882, "step": 17227 }, { "epoch": 0.40536470588235296, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.8716, "step": 17228 }, { "epoch": 0.40538823529411766, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9068, "step": 17229 }, { "epoch": 0.40541176470588236, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0332, "step": 17230 }, { "epoch": 0.40543529411764706, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.6996, "step": 17231 }, { "epoch": 0.40545882352941176, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.3087, "step": 17232 }, { "epoch": 0.40548235294117646, "grad_norm": 0.203125, "learning_rate": 0.02, "loss": 0.6656, "step": 17233 }, { "epoch": 0.40550588235294116, "grad_norm": 0.15234375, "learning_rate": 0.02, "loss": 1.2658, "step": 17234 }, { "epoch": 0.40552941176470586, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9038, "step": 17235 }, { "epoch": 0.40555294117647056, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9417, "step": 17236 }, { "epoch": 0.4055764705882353, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0006, "step": 17237 }, { "epoch": 0.4056, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.695, "step": 17238 }, { "epoch": 0.4056235294117647, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.7048, "step": 17239 }, { "epoch": 0.4056470588235294, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.2478, "step": 17240 }, { "epoch": 0.4056705882352941, "grad_norm": 0.208984375, "learning_rate": 0.02, "loss": 0.7238, "step": 17241 }, { "epoch": 0.4056941176470588, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8606, "step": 17242 }, { "epoch": 0.4057176470588235, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.1693, "step": 17243 }, { "epoch": 0.4057411764705882, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.154, "step": 17244 }, { "epoch": 0.4057647058823529, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.018, "step": 17245 }, { "epoch": 0.40578823529411767, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.7783, "step": 17246 }, { "epoch": 0.40581176470588237, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.3528, "step": 17247 }, { "epoch": 0.40583529411764707, "grad_norm": 0.205078125, "learning_rate": 0.02, "loss": 0.5329, "step": 17248 }, { "epoch": 0.40585882352941177, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0538, "step": 17249 }, { "epoch": 0.40588235294117647, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.1143, "step": 17250 }, { "epoch": 0.40590588235294117, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9592, "step": 17251 }, { "epoch": 0.40592941176470587, "grad_norm": 0.23046875, "learning_rate": 0.02, "loss": 0.5554, "step": 17252 }, { "epoch": 0.40595294117647057, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0203, "step": 17253 }, { "epoch": 0.40597647058823527, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1132, "step": 17254 }, { "epoch": 0.406, "grad_norm": 0.2216796875, "learning_rate": 0.02, "loss": 0.6278, "step": 17255 }, { "epoch": 0.4060235294117647, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9721, "step": 17256 }, { "epoch": 0.4060470588235294, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9506, "step": 17257 }, { "epoch": 0.4060705882352941, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8321, "step": 17258 }, { "epoch": 0.4060941176470588, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.8733, "step": 17259 }, { "epoch": 0.4061176470588235, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.896, "step": 17260 }, { "epoch": 0.4061411764705882, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9384, "step": 17261 }, { "epoch": 0.4061647058823529, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.0154, "step": 17262 }, { "epoch": 0.4061882352941176, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8331, "step": 17263 }, { "epoch": 0.40621176470588233, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.0836, "step": 17264 }, { "epoch": 0.4062352941176471, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 0.9461, "step": 17265 }, { "epoch": 0.4062588235294118, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8016, "step": 17266 }, { "epoch": 0.4062823529411765, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.108, "step": 17267 }, { "epoch": 0.4063058823529412, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.7708, "step": 17268 }, { "epoch": 0.4063294117647059, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.9839, "step": 17269 }, { "epoch": 0.4063529411764706, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.5937, "step": 17270 }, { "epoch": 0.4063764705882353, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 0.8992, "step": 17271 }, { "epoch": 0.4064, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.7944, "step": 17272 }, { "epoch": 0.4064235294117647, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.0185, "step": 17273 }, { "epoch": 0.40644705882352944, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1351, "step": 17274 }, { "epoch": 0.40647058823529414, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.7251, "step": 17275 }, { "epoch": 0.40649411764705884, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 0.9398, "step": 17276 }, { "epoch": 0.40651764705882354, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.2471, "step": 17277 }, { "epoch": 0.40654117647058824, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 0.8147, "step": 17278 }, { "epoch": 0.40656470588235294, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.3041, "step": 17279 }, { "epoch": 0.40658823529411764, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 0.9734, "step": 17280 }, { "epoch": 0.40661176470588234, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8927, "step": 17281 }, { "epoch": 0.40663529411764704, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0506, "step": 17282 }, { "epoch": 0.40665882352941174, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8944, "step": 17283 }, { "epoch": 0.4066823529411765, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 1.0478, "step": 17284 }, { "epoch": 0.4067058823529412, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0063, "step": 17285 }, { "epoch": 0.4067294117647059, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9928, "step": 17286 }, { "epoch": 0.4067529411764706, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0183, "step": 17287 }, { "epoch": 0.4067764705882353, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 0.9407, "step": 17288 }, { "epoch": 0.4068, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1332, "step": 17289 }, { "epoch": 0.4068235294117647, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.198, "step": 17290 }, { "epoch": 0.4068470588235294, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.1922, "step": 17291 }, { "epoch": 0.4068705882352941, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.2188, "step": 17292 }, { "epoch": 0.40689411764705885, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.9074, "step": 17293 }, { "epoch": 0.40691764705882355, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.278, "step": 17294 }, { "epoch": 0.40694117647058825, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1944, "step": 17295 }, { "epoch": 0.40696470588235295, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.7776, "step": 17296 }, { "epoch": 0.40698823529411765, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9612, "step": 17297 }, { "epoch": 0.40701176470588235, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.0174, "step": 17298 }, { "epoch": 0.40703529411764705, "grad_norm": 0.1962890625, "learning_rate": 0.02, "loss": 0.6054, "step": 17299 }, { "epoch": 0.40705882352941175, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9858, "step": 17300 }, { "epoch": 0.40708235294117645, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.1682, "step": 17301 }, { "epoch": 0.40710588235294115, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.9298, "step": 17302 }, { "epoch": 0.4071294117647059, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.1009, "step": 17303 }, { "epoch": 0.4071529411764706, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1047, "step": 17304 }, { "epoch": 0.4071764705882353, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 1.198, "step": 17305 }, { "epoch": 0.4072, "grad_norm": 0.20703125, "learning_rate": 0.02, "loss": 0.7679, "step": 17306 }, { "epoch": 0.4072235294117647, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8667, "step": 17307 }, { "epoch": 0.4072470588235294, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.855, "step": 17308 }, { "epoch": 0.4072705882352941, "grad_norm": 0.1513671875, "learning_rate": 0.02, "loss": 1.0924, "step": 17309 }, { "epoch": 0.4072941176470588, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.995, "step": 17310 }, { "epoch": 0.4073176470588235, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8187, "step": 17311 }, { "epoch": 0.40734117647058826, "grad_norm": 0.2041015625, "learning_rate": 0.02, "loss": 0.5567, "step": 17312 }, { "epoch": 0.40736470588235296, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.113, "step": 17313 }, { "epoch": 0.40738823529411766, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.1602, "step": 17314 }, { "epoch": 0.40741176470588236, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1762, "step": 17315 }, { "epoch": 0.40743529411764706, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0398, "step": 17316 }, { "epoch": 0.40745882352941176, "grad_norm": 0.21484375, "learning_rate": 0.02, "loss": 0.7318, "step": 17317 }, { "epoch": 0.40748235294117646, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.0366, "step": 17318 }, { "epoch": 0.40750588235294116, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 1.039, "step": 17319 }, { "epoch": 0.40752941176470586, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.7627, "step": 17320 }, { "epoch": 0.40755294117647056, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.905, "step": 17321 }, { "epoch": 0.4075764705882353, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.7726, "step": 17322 }, { "epoch": 0.4076, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.2421, "step": 17323 }, { "epoch": 0.4076235294117647, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 0.9984, "step": 17324 }, { "epoch": 0.4076470588235294, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.6548, "step": 17325 }, { "epoch": 0.4076705882352941, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 1.0465, "step": 17326 }, { "epoch": 0.4076941176470588, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.0409, "step": 17327 }, { "epoch": 0.4077176470588235, "grad_norm": 0.1923828125, "learning_rate": 0.02, "loss": 0.8835, "step": 17328 }, { "epoch": 0.4077411764705882, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9497, "step": 17329 }, { "epoch": 0.4077647058823529, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.1592, "step": 17330 }, { "epoch": 0.4077882352941177, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.924, "step": 17331 }, { "epoch": 0.4078117647058824, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8257, "step": 17332 }, { "epoch": 0.4078352941176471, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0285, "step": 17333 }, { "epoch": 0.4078588235294118, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9621, "step": 17334 }, { "epoch": 0.4078823529411765, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.0896, "step": 17335 }, { "epoch": 0.4079058823529412, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.6147, "step": 17336 }, { "epoch": 0.4079294117647059, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0192, "step": 17337 }, { "epoch": 0.4079529411764706, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9187, "step": 17338 }, { "epoch": 0.4079764705882353, "grad_norm": 0.2138671875, "learning_rate": 0.02, "loss": 0.5403, "step": 17339 }, { "epoch": 0.408, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.1591, "step": 17340 }, { "epoch": 0.40802352941176473, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.194, "step": 17341 }, { "epoch": 0.40804705882352943, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.0852, "step": 17342 }, { "epoch": 0.40807058823529413, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9111, "step": 17343 }, { "epoch": 0.40809411764705883, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.8542, "step": 17344 }, { "epoch": 0.40811764705882353, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0045, "step": 17345 }, { "epoch": 0.40814117647058823, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9902, "step": 17346 }, { "epoch": 0.40816470588235293, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.2307, "step": 17347 }, { "epoch": 0.40818823529411763, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0349, "step": 17348 }, { "epoch": 0.40821176470588233, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0671, "step": 17349 }, { "epoch": 0.4082352941176471, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.2955, "step": 17350 }, { "epoch": 0.4082588235294118, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0509, "step": 17351 }, { "epoch": 0.4082823529411765, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0023, "step": 17352 }, { "epoch": 0.4083058823529412, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.2119, "step": 17353 }, { "epoch": 0.4083294117647059, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1903, "step": 17354 }, { "epoch": 0.4083529411764706, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.073, "step": 17355 }, { "epoch": 0.4083764705882353, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.2084, "step": 17356 }, { "epoch": 0.4084, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.236, "step": 17357 }, { "epoch": 0.4084235294117647, "grad_norm": 0.2177734375, "learning_rate": 0.02, "loss": 0.618, "step": 17358 }, { "epoch": 0.4084470588235294, "grad_norm": 0.150390625, "learning_rate": 0.02, "loss": 1.2224, "step": 17359 }, { "epoch": 0.40847058823529414, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.1705, "step": 17360 }, { "epoch": 0.40849411764705884, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.7593, "step": 17361 }, { "epoch": 0.40851764705882354, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.8611, "step": 17362 }, { "epoch": 0.40854117647058824, "grad_norm": 0.17578125, "learning_rate": 0.02, "loss": 1.1258, "step": 17363 }, { "epoch": 0.40856470588235294, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0144, "step": 17364 }, { "epoch": 0.40858823529411764, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1432, "step": 17365 }, { "epoch": 0.40861176470588234, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8927, "step": 17366 }, { "epoch": 0.40863529411764704, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 0.9715, "step": 17367 }, { "epoch": 0.40865882352941174, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.8232, "step": 17368 }, { "epoch": 0.4086823529411765, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 1.1441, "step": 17369 }, { "epoch": 0.4087058823529412, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 1.0752, "step": 17370 }, { "epoch": 0.4087294117647059, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0091, "step": 17371 }, { "epoch": 0.4087529411764706, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.9849, "step": 17372 }, { "epoch": 0.4087764705882353, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.8529, "step": 17373 }, { "epoch": 0.4088, "grad_norm": 0.181640625, "learning_rate": 0.02, "loss": 0.8532, "step": 17374 }, { "epoch": 0.4088235294117647, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.0749, "step": 17375 }, { "epoch": 0.4088470588235294, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.9581, "step": 17376 }, { "epoch": 0.4088705882352941, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.7233, "step": 17377 }, { "epoch": 0.4088941176470588, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0892, "step": 17378 }, { "epoch": 0.40891764705882355, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.85, "step": 17379 }, { "epoch": 0.40894117647058825, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 0.9249, "step": 17380 }, { "epoch": 0.40896470588235295, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.6787, "step": 17381 }, { "epoch": 0.40898823529411765, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 1.2041, "step": 17382 }, { "epoch": 0.40901176470588235, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9958, "step": 17383 }, { "epoch": 0.40903529411764705, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1861, "step": 17384 }, { "epoch": 0.40905882352941175, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.3528, "step": 17385 }, { "epoch": 0.40908235294117645, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.7648, "step": 17386 }, { "epoch": 0.40910588235294115, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0604, "step": 17387 }, { "epoch": 0.4091294117647059, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.1201, "step": 17388 }, { "epoch": 0.4091529411764706, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.7446, "step": 17389 }, { "epoch": 0.4091764705882353, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 1.0475, "step": 17390 }, { "epoch": 0.4092, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.042, "step": 17391 }, { "epoch": 0.4092235294117647, "grad_norm": 0.185546875, "learning_rate": 0.02, "loss": 0.9977, "step": 17392 }, { "epoch": 0.4092470588235294, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 0.8851, "step": 17393 }, { "epoch": 0.4092705882352941, "grad_norm": 0.19921875, "learning_rate": 0.02, "loss": 0.4205, "step": 17394 }, { "epoch": 0.4092941176470588, "grad_norm": 0.154296875, "learning_rate": 0.02, "loss": 1.1432, "step": 17395 }, { "epoch": 0.4093176470588235, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9345, "step": 17396 }, { "epoch": 0.4093411764705882, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.5996, "step": 17397 }, { "epoch": 0.40936470588235296, "grad_norm": 0.1884765625, "learning_rate": 0.02, "loss": 0.9157, "step": 17398 }, { "epoch": 0.40938823529411766, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 1.1186, "step": 17399 }, { "epoch": 0.40941176470588236, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.8571, "step": 17400 }, { "epoch": 0.40943529411764706, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.0684, "step": 17401 }, { "epoch": 0.40945882352941176, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 1.0205, "step": 17402 }, { "epoch": 0.40948235294117646, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8055, "step": 17403 }, { "epoch": 0.40950588235294116, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 0.9619, "step": 17404 }, { "epoch": 0.40952941176470586, "grad_norm": 0.166015625, "learning_rate": 0.02, "loss": 0.9278, "step": 17405 }, { "epoch": 0.40955294117647056, "grad_norm": 0.1572265625, "learning_rate": 0.02, "loss": 1.0689, "step": 17406 }, { "epoch": 0.4095764705882353, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.1644, "step": 17407 }, { "epoch": 0.4096, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.0105, "step": 17408 }, { "epoch": 0.4096235294117647, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.05, "step": 17409 }, { "epoch": 0.4096470588235294, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0545, "step": 17410 }, { "epoch": 0.4096705882352941, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 1.0305, "step": 17411 }, { "epoch": 0.4096941176470588, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.9338, "step": 17412 }, { "epoch": 0.4097176470588235, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9499, "step": 17413 }, { "epoch": 0.4097411764705882, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8427, "step": 17414 }, { "epoch": 0.4097647058823529, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.8781, "step": 17415 }, { "epoch": 0.4097882352941176, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.9169, "step": 17416 }, { "epoch": 0.4098117647058824, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 0.8384, "step": 17417 }, { "epoch": 0.4098352941176471, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.0911, "step": 17418 }, { "epoch": 0.4098588235294118, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 0.9272, "step": 17419 }, { "epoch": 0.4098823529411765, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.2464, "step": 17420 }, { "epoch": 0.4099058823529412, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.0563, "step": 17421 }, { "epoch": 0.4099294117647059, "grad_norm": 0.1748046875, "learning_rate": 0.02, "loss": 1.0287, "step": 17422 }, { "epoch": 0.4099529411764706, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 1.0661, "step": 17423 }, { "epoch": 0.4099764705882353, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 1.3528, "step": 17424 }, { "epoch": 0.41, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.8789, "step": 17425 }, { "epoch": 0.41002352941176473, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.999, "step": 17426 }, { "epoch": 0.41004705882352943, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.147, "step": 17427 }, { "epoch": 0.41007058823529413, "grad_norm": 0.1484375, "learning_rate": 0.02, "loss": 1.3393, "step": 17428 }, { "epoch": 0.41009411764705883, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 1.0963, "step": 17429 }, { "epoch": 0.41011764705882353, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 1.3362, "step": 17430 }, { "epoch": 0.41014117647058823, "grad_norm": 0.1650390625, "learning_rate": 0.02, "loss": 0.907, "step": 17431 }, { "epoch": 0.41016470588235293, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 1.1074, "step": 17432 }, { "epoch": 0.41018823529411763, "grad_norm": 0.1689453125, "learning_rate": 0.02, "loss": 0.8849, "step": 17433 }, { "epoch": 0.41021176470588233, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.9648, "step": 17434 }, { "epoch": 0.41023529411764703, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.8564, "step": 17435 }, { "epoch": 0.4102588235294118, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 0.9583, "step": 17436 }, { "epoch": 0.4102823529411765, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.9131, "step": 17437 }, { "epoch": 0.4103058823529412, "grad_norm": 0.1630859375, "learning_rate": 0.02, "loss": 1.1528, "step": 17438 }, { "epoch": 0.4103294117647059, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9926, "step": 17439 }, { "epoch": 0.4103529411764706, "grad_norm": 0.1533203125, "learning_rate": 0.02, "loss": 1.1352, "step": 17440 }, { "epoch": 0.4103764705882353, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9453, "step": 17441 }, { "epoch": 0.4104, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.7089, "step": 17442 }, { "epoch": 0.4104235294117647, "grad_norm": 0.1806640625, "learning_rate": 0.02, "loss": 0.9972, "step": 17443 }, { "epoch": 0.4104470588235294, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 1.1402, "step": 17444 }, { "epoch": 0.41047058823529414, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.8717, "step": 17445 }, { "epoch": 0.41049411764705884, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.2409, "step": 17446 }, { "epoch": 0.41051764705882354, "grad_norm": 0.212890625, "learning_rate": 0.02, "loss": 0.6196, "step": 17447 }, { "epoch": 0.41054117647058824, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 1.0623, "step": 17448 }, { "epoch": 0.41056470588235294, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.7673, "step": 17449 }, { "epoch": 0.41058823529411764, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8843, "step": 17450 }, { "epoch": 0.41061176470588234, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.1235, "step": 17451 }, { "epoch": 0.41063529411764704, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.7907, "step": 17452 }, { "epoch": 0.41065882352941174, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.9433, "step": 17453 }, { "epoch": 0.41068235294117644, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.8361, "step": 17454 }, { "epoch": 0.4107058823529412, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 1.002, "step": 17455 }, { "epoch": 0.4107294117647059, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.9969, "step": 17456 }, { "epoch": 0.4107529411764706, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.9169, "step": 17457 }, { "epoch": 0.4107764705882353, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7629, "step": 17458 }, { "epoch": 0.4108, "grad_norm": 0.1708984375, "learning_rate": 0.02, "loss": 0.9237, "step": 17459 }, { "epoch": 0.4108235294117647, "grad_norm": 0.1845703125, "learning_rate": 0.02, "loss": 0.825, "step": 17460 }, { "epoch": 0.4108470588235294, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.0602, "step": 17461 }, { "epoch": 0.4108705882352941, "grad_norm": 0.171875, "learning_rate": 0.02, "loss": 0.9619, "step": 17462 }, { "epoch": 0.4108941176470588, "grad_norm": 0.162109375, "learning_rate": 0.02, "loss": 1.0673, "step": 17463 }, { "epoch": 0.41091764705882355, "grad_norm": 0.1669921875, "learning_rate": 0.02, "loss": 1.2337, "step": 17464 }, { "epoch": 0.41094117647058825, "grad_norm": 0.197265625, "learning_rate": 0.02, "loss": 0.7072, "step": 17465 }, { "epoch": 0.41096470588235295, "grad_norm": 0.1728515625, "learning_rate": 0.02, "loss": 0.8795, "step": 17466 }, { "epoch": 0.41098823529411765, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 0.9933, "step": 17467 }, { "epoch": 0.41101176470588235, "grad_norm": 0.1640625, "learning_rate": 0.02, "loss": 1.151, "step": 17468 }, { "epoch": 0.41103529411764705, "grad_norm": 0.1796875, "learning_rate": 0.02, "loss": 0.7845, "step": 17469 }, { "epoch": 0.41105882352941175, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 1.0023, "step": 17470 }, { "epoch": 0.41108235294117645, "grad_norm": 0.1591796875, "learning_rate": 0.02, "loss": 0.9398, "step": 17471 }, { "epoch": 0.41110588235294115, "grad_norm": 0.1416015625, "learning_rate": 0.02, "loss": 1.4449, "step": 17472 }, { "epoch": 0.4111294117647059, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.8314, "step": 17473 }, { "epoch": 0.4111529411764706, "grad_norm": 0.173828125, "learning_rate": 0.02, "loss": 0.7942, "step": 17474 }, { "epoch": 0.4111764705882353, "grad_norm": 0.16015625, "learning_rate": 0.02, "loss": 1.2878, "step": 17475 }, { "epoch": 0.4112, "grad_norm": 0.1982421875, "learning_rate": 0.02, "loss": 0.9063, "step": 17476 }, { "epoch": 0.4112235294117647, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.0267, "step": 17477 }, { "epoch": 0.4112470588235294, "grad_norm": 0.177734375, "learning_rate": 0.02, "loss": 0.9543, "step": 17478 }, { "epoch": 0.4112705882352941, "grad_norm": 0.1875, "learning_rate": 0.02, "loss": 1.1084, "step": 17479 }, { "epoch": 0.4112941176470588, "grad_norm": 0.1767578125, "learning_rate": 0.02, "loss": 0.986, "step": 17480 }, { "epoch": 0.4113176470588235, "grad_norm": 0.18359375, "learning_rate": 0.02, "loss": 0.9048, "step": 17481 }, { "epoch": 0.4113411764705882, "grad_norm": 0.189453125, "learning_rate": 0.02, "loss": 0.8176, "step": 17482 }, { "epoch": 0.41136470588235297, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.2305, "step": 17483 }, { "epoch": 0.41138823529411767, "grad_norm": 0.1904296875, "learning_rate": 0.02, "loss": 0.7281, "step": 17484 }, { "epoch": 0.41141176470588237, "grad_norm": 0.15625, "learning_rate": 0.02, "loss": 1.0059, "step": 17485 }, { "epoch": 0.41143529411764707, "grad_norm": 0.1826171875, "learning_rate": 0.02, "loss": 0.9923, "step": 17486 }, { "epoch": 0.41145882352941177, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0094, "step": 17487 }, { "epoch": 0.41148235294117647, "grad_norm": 0.169921875, "learning_rate": 0.02, "loss": 1.0283, "step": 17488 }, { "epoch": 0.41150588235294117, "grad_norm": 0.2060546875, "learning_rate": 0.02, "loss": 0.7556, "step": 17489 }, { "epoch": 0.41152941176470587, "grad_norm": 0.158203125, "learning_rate": 0.02, "loss": 1.0252, "step": 17490 }, { "epoch": 0.41155294117647057, "grad_norm": 0.1494140625, "learning_rate": 0.02, "loss": 1.1781, "step": 17491 }, { "epoch": 0.4115764705882353, "grad_norm": 0.232421875, "learning_rate": 0.02, "loss": 1.0702, "step": 17492 }, { "epoch": 0.4116, "grad_norm": 0.146484375, "learning_rate": 0.02, "loss": 1.4155, "step": 17493 }, { "epoch": 0.4116235294117647, "grad_norm": 0.193359375, "learning_rate": 0.02, "loss": 0.7684, "step": 17494 }, { "epoch": 0.4116470588235294, "grad_norm": 0.2021484375, "learning_rate": 0.02, "loss": 0.7149, "step": 17495 }, { "epoch": 0.4116705882352941, "grad_norm": 0.1943359375, "learning_rate": 0.02, "loss": 0.6803, "step": 17496 }, { "epoch": 0.4116941176470588, "grad_norm": 0.2119140625, "learning_rate": 0.02, "loss": 0.6649, "step": 17497 }, { "epoch": 0.4117176470588235, "grad_norm": 0.16796875, "learning_rate": 0.02, "loss": 1.0341, "step": 17498 }, { "epoch": 0.4117411764705882, "grad_norm": 0.1611328125, "learning_rate": 0.02, "loss": 1.1641, "step": 17499 }, { "epoch": 0.4117647058823529, "grad_norm": 0.1787109375, "learning_rate": 0.02, "loss": 0.8341, "step": 17500 } ], "logging_steps": 1, "max_steps": 42500, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0620032579532251e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }