{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8003001125422033, "eval_steps": 500, "global_step": 19200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.1682297528239755e-05, "grad_norm": 64.0, "learning_rate": 1.388888888888889e-07, "loss": 8.6274, "step": 1 }, { "epoch": 8.336459505647951e-05, "grad_norm": 167.0, "learning_rate": 2.777777777777778e-07, "loss": 13.5009, "step": 2 }, { "epoch": 0.00012504689258471927, "grad_norm": 210.0, "learning_rate": 4.1666666666666667e-07, "loss": 15.8188, "step": 3 }, { "epoch": 0.00016672919011295902, "grad_norm": 80.5, "learning_rate": 5.555555555555556e-07, "loss": 13.8132, "step": 4 }, { "epoch": 0.00020841148764119878, "grad_norm": 90.0, "learning_rate": 6.944444444444445e-07, "loss": 18.0005, "step": 5 }, { "epoch": 0.00025009378516943853, "grad_norm": 66.0, "learning_rate": 8.333333333333333e-07, "loss": 13.815, "step": 6 }, { "epoch": 0.0002917760826976783, "grad_norm": 73.5, "learning_rate": 9.722222222222222e-07, "loss": 12.3135, "step": 7 }, { "epoch": 0.00033345838022591804, "grad_norm": 138.0, "learning_rate": 1.1111111111111112e-06, "loss": 16.5008, "step": 8 }, { "epoch": 0.0003751406777541578, "grad_norm": 308.0, "learning_rate": 1.25e-06, "loss": 34.756, "step": 9 }, { "epoch": 0.00041682297528239755, "grad_norm": 80.0, "learning_rate": 1.388888888888889e-06, "loss": 13.4411, "step": 10 }, { "epoch": 0.0004585052728106373, "grad_norm": 156.0, "learning_rate": 1.5277777777777778e-06, "loss": 24.751, "step": 11 }, { "epoch": 0.0005001875703388771, "grad_norm": 81.5, "learning_rate": 1.6666666666666667e-06, "loss": 17.0024, "step": 12 }, { "epoch": 0.0005418698678671169, "grad_norm": 139.0, "learning_rate": 1.8055555555555555e-06, "loss": 28.7525, "step": 13 }, { "epoch": 0.0005835521653953566, "grad_norm": 78.0, "learning_rate": 1.9444444444444444e-06, "loss": 13.9386, "step": 14 }, { "epoch": 0.0006252344629235964, "grad_norm": 59.5, "learning_rate": 2.0833333333333334e-06, "loss": 10.6903, "step": 15 }, { "epoch": 0.0006669167604518361, "grad_norm": 88.0, "learning_rate": 2.2222222222222225e-06, "loss": 17.1271, "step": 16 }, { "epoch": 0.0007085990579800759, "grad_norm": 124.0, "learning_rate": 2.361111111111111e-06, "loss": 20.5015, "step": 17 }, { "epoch": 0.0007502813555083156, "grad_norm": 72.0, "learning_rate": 2.5e-06, "loss": 13.4388, "step": 18 }, { "epoch": 0.0007919636530365554, "grad_norm": 83.5, "learning_rate": 2.638888888888889e-06, "loss": 16.3777, "step": 19 }, { "epoch": 0.0008336459505647951, "grad_norm": 164.0, "learning_rate": 2.777777777777778e-06, "loss": 19.3761, "step": 20 }, { "epoch": 0.0008753282480930349, "grad_norm": 52.25, "learning_rate": 2.916666666666667e-06, "loss": 10.1263, "step": 21 }, { "epoch": 0.0009170105456212746, "grad_norm": 103.0, "learning_rate": 3.0555555555555556e-06, "loss": 11.8139, "step": 22 }, { "epoch": 0.0009586928431495144, "grad_norm": 132.0, "learning_rate": 3.1944444444444443e-06, "loss": 24.6261, "step": 23 }, { "epoch": 0.0010003751406777541, "grad_norm": 98.0, "learning_rate": 3.3333333333333333e-06, "loss": 16.5008, "step": 24 }, { "epoch": 0.001042057438205994, "grad_norm": 144.0, "learning_rate": 3.4722222222222224e-06, "loss": 21.126, "step": 25 }, { "epoch": 0.0010837397357342337, "grad_norm": 123.5, "learning_rate": 3.611111111111111e-06, "loss": 21.0022, "step": 26 }, { "epoch": 0.0011254220332624733, "grad_norm": 75.0, "learning_rate": 3.75e-06, "loss": 13.439, "step": 27 }, { "epoch": 0.0011671043307907132, "grad_norm": 117.5, "learning_rate": 3.888888888888889e-06, "loss": 19.8756, "step": 28 }, { "epoch": 0.001208786628318953, "grad_norm": 224.0, "learning_rate": 4.027777777777779e-06, "loss": 24.5091, "step": 29 }, { "epoch": 0.0012504689258471928, "grad_norm": 59.75, "learning_rate": 4.166666666666667e-06, "loss": 11.6892, "step": 30 }, { "epoch": 0.0012921512233754324, "grad_norm": 160.0, "learning_rate": 4.305555555555556e-06, "loss": 15.5017, "step": 31 }, { "epoch": 0.0013338335209036722, "grad_norm": 70.0, "learning_rate": 4.444444444444445e-06, "loss": 10.565, "step": 32 }, { "epoch": 0.001375515818431912, "grad_norm": 102.5, "learning_rate": 4.583333333333333e-06, "loss": 19.2535, "step": 33 }, { "epoch": 0.0014171981159601518, "grad_norm": 150.0, "learning_rate": 4.722222222222222e-06, "loss": 20.6273, "step": 34 }, { "epoch": 0.0014588804134883914, "grad_norm": 104.0, "learning_rate": 4.861111111111111e-06, "loss": 7.721, "step": 35 }, { "epoch": 0.0015005627110166312, "grad_norm": 308.0, "learning_rate": 5e-06, "loss": 19.5012, "step": 36 }, { "epoch": 0.001542245008544871, "grad_norm": 112.0, "learning_rate": 5.138888888888889e-06, "loss": 17.5013, "step": 37 }, { "epoch": 0.0015839273060731108, "grad_norm": 211.0, "learning_rate": 5.277777777777778e-06, "loss": 19.0028, "step": 38 }, { "epoch": 0.0016256096036013504, "grad_norm": 111.0, "learning_rate": 5.416666666666667e-06, "loss": 20.0022, "step": 39 }, { "epoch": 0.0016672919011295902, "grad_norm": 374.0, "learning_rate": 5.555555555555556e-06, "loss": 36.5015, "step": 40 }, { "epoch": 0.00170897419865783, "grad_norm": 93.0, "learning_rate": 5.694444444444445e-06, "loss": 15.6892, "step": 41 }, { "epoch": 0.0017506564961860698, "grad_norm": 239.0, "learning_rate": 5.833333333333334e-06, "loss": 17.8794, "step": 42 }, { "epoch": 0.0017923387937143094, "grad_norm": 200.0, "learning_rate": 5.972222222222223e-06, "loss": 15.065, "step": 43 }, { "epoch": 0.0018340210912425492, "grad_norm": 231.0, "learning_rate": 6.111111111111111e-06, "loss": 13.1886, "step": 44 }, { "epoch": 0.001875703388770789, "grad_norm": 91.0, "learning_rate": 6.25e-06, "loss": 17.001, "step": 45 }, { "epoch": 0.0019173856862990289, "grad_norm": 129.0, "learning_rate": 6.3888888888888885e-06, "loss": 16.6274, "step": 46 }, { "epoch": 0.0019590679838272685, "grad_norm": 134.0, "learning_rate": 6.5277777777777784e-06, "loss": 24.1271, "step": 47 }, { "epoch": 0.0020007502813555083, "grad_norm": 176.0, "learning_rate": 6.666666666666667e-06, "loss": 21.5012, "step": 48 }, { "epoch": 0.002042432578883748, "grad_norm": 71.5, "learning_rate": 6.805555555555556e-06, "loss": 11.0633, "step": 49 }, { "epoch": 0.002084114876411988, "grad_norm": 90.5, "learning_rate": 6.944444444444445e-06, "loss": 14.4389, "step": 50 }, { "epoch": 0.0021257971739402277, "grad_norm": 128.0, "learning_rate": 7.083333333333334e-06, "loss": 11.4404, "step": 51 }, { "epoch": 0.0021674794714684675, "grad_norm": 48.0, "learning_rate": 7.222222222222222e-06, "loss": 8.3156, "step": 52 }, { "epoch": 0.0022091617689967073, "grad_norm": 100.0, "learning_rate": 7.361111111111112e-06, "loss": 15.564, "step": 53 }, { "epoch": 0.0022508440665249467, "grad_norm": 123.5, "learning_rate": 7.5e-06, "loss": 10.6887, "step": 54 }, { "epoch": 0.0022925263640531865, "grad_norm": 242.0, "learning_rate": 7.63888888888889e-06, "loss": 18.8776, "step": 55 }, { "epoch": 0.0023342086615814263, "grad_norm": 98.0, "learning_rate": 7.777777777777777e-06, "loss": 16.2522, "step": 56 }, { "epoch": 0.002375890959109666, "grad_norm": 234.0, "learning_rate": 7.916666666666667e-06, "loss": 18.0009, "step": 57 }, { "epoch": 0.002417573256637906, "grad_norm": 76.5, "learning_rate": 8.055555555555557e-06, "loss": 12.1889, "step": 58 }, { "epoch": 0.0024592555541661457, "grad_norm": 149.0, "learning_rate": 8.194444444444445e-06, "loss": 17.1265, "step": 59 }, { "epoch": 0.0025009378516943855, "grad_norm": 61.75, "learning_rate": 8.333333333333334e-06, "loss": 8.689, "step": 60 }, { "epoch": 0.0025426201492226254, "grad_norm": 228.0, "learning_rate": 8.472222222222223e-06, "loss": 16.1302, "step": 61 }, { "epoch": 0.0025843024467508647, "grad_norm": 74.5, "learning_rate": 8.611111111111112e-06, "loss": 12.0014, "step": 62 }, { "epoch": 0.0026259847442791045, "grad_norm": 146.0, "learning_rate": 8.75e-06, "loss": 19.7507, "step": 63 }, { "epoch": 0.0026676670418073443, "grad_norm": 296.0, "learning_rate": 8.88888888888889e-06, "loss": 21.3808, "step": 64 }, { "epoch": 0.002709349339335584, "grad_norm": 172.0, "learning_rate": 9.027777777777777e-06, "loss": 14.8174, "step": 65 }, { "epoch": 0.002751031636863824, "grad_norm": 119.0, "learning_rate": 9.166666666666666e-06, "loss": 5.5344, "step": 66 }, { "epoch": 0.0027927139343920638, "grad_norm": 114.0, "learning_rate": 9.305555555555555e-06, "loss": 16.6262, "step": 67 }, { "epoch": 0.0028343962319203036, "grad_norm": 80.5, "learning_rate": 9.444444444444445e-06, "loss": 10.0658, "step": 68 }, { "epoch": 0.0028760785294485434, "grad_norm": 190.0, "learning_rate": 9.583333333333334e-06, "loss": 12.2508, "step": 69 }, { "epoch": 0.0029177608269767828, "grad_norm": 88.5, "learning_rate": 9.722222222222223e-06, "loss": 13.44, "step": 70 }, { "epoch": 0.0029594431245050226, "grad_norm": 210.0, "learning_rate": 9.861111111111112e-06, "loss": 6.9399, "step": 71 }, { "epoch": 0.0030011254220332624, "grad_norm": 127.0, "learning_rate": 1e-05, "loss": 17.0012, "step": 72 }, { "epoch": 0.003042807719561502, "grad_norm": 322.0, "learning_rate": 1.013888888888889e-05, "loss": 10.1906, "step": 73 }, { "epoch": 0.003084490017089742, "grad_norm": 233.0, "learning_rate": 1.0277777777777777e-05, "loss": 27.6306, "step": 74 }, { "epoch": 0.003126172314617982, "grad_norm": 158.0, "learning_rate": 1.0416666666666668e-05, "loss": 24.5016, "step": 75 }, { "epoch": 0.0031678546121462216, "grad_norm": 99.0, "learning_rate": 1.0555555555555555e-05, "loss": 12.003, "step": 76 }, { "epoch": 0.0032095369096744614, "grad_norm": 181.0, "learning_rate": 1.0694444444444444e-05, "loss": 15.8187, "step": 77 }, { "epoch": 0.003251219207202701, "grad_norm": 144.0, "learning_rate": 1.0833333333333334e-05, "loss": 10.1267, "step": 78 }, { "epoch": 0.0032929015047309406, "grad_norm": 138.0, "learning_rate": 1.0972222222222223e-05, "loss": 19.5013, "step": 79 }, { "epoch": 0.0033345838022591804, "grad_norm": 282.0, "learning_rate": 1.1111111111111112e-05, "loss": 29.2517, "step": 80 }, { "epoch": 0.0033762660997874202, "grad_norm": 68.5, "learning_rate": 1.125e-05, "loss": 9.6898, "step": 81 }, { "epoch": 0.00341794839731566, "grad_norm": 132.0, "learning_rate": 1.138888888888889e-05, "loss": 16.1259, "step": 82 }, { "epoch": 0.0034596306948439, "grad_norm": 202.0, "learning_rate": 1.1527777777777779e-05, "loss": 17.5011, "step": 83 }, { "epoch": 0.0035013129923721397, "grad_norm": 87.5, "learning_rate": 1.1666666666666668e-05, "loss": 9.8779, "step": 84 }, { "epoch": 0.0035429952899003795, "grad_norm": 115.0, "learning_rate": 1.1805555555555555e-05, "loss": 16.1263, "step": 85 }, { "epoch": 0.003584677587428619, "grad_norm": 180.0, "learning_rate": 1.1944444444444446e-05, "loss": 7.9394, "step": 86 }, { "epoch": 0.0036263598849568587, "grad_norm": 79.5, "learning_rate": 1.2083333333333333e-05, "loss": 10.2515, "step": 87 }, { "epoch": 0.0036680421824850985, "grad_norm": 410.0, "learning_rate": 1.2222222222222222e-05, "loss": 27.6259, "step": 88 }, { "epoch": 0.0037097244800133383, "grad_norm": 241.0, "learning_rate": 1.2361111111111112e-05, "loss": 21.6272, "step": 89 }, { "epoch": 0.003751406777541578, "grad_norm": 131.0, "learning_rate": 1.25e-05, "loss": 13.9388, "step": 90 }, { "epoch": 0.003793089075069818, "grad_norm": 163.0, "learning_rate": 1.263888888888889e-05, "loss": 19.3761, "step": 91 }, { "epoch": 0.0038347713725980577, "grad_norm": 146.0, "learning_rate": 1.2777777777777777e-05, "loss": 17.3761, "step": 92 }, { "epoch": 0.0038764536701262975, "grad_norm": 136.0, "learning_rate": 1.2916666666666668e-05, "loss": 15.5679, "step": 93 }, { "epoch": 0.003918135967654537, "grad_norm": 164.0, "learning_rate": 1.3055555555555557e-05, "loss": 15.5639, "step": 94 }, { "epoch": 0.003959818265182777, "grad_norm": 360.0, "learning_rate": 1.3194444444444446e-05, "loss": 27.88, "step": 95 }, { "epoch": 0.0040015005627110165, "grad_norm": 320.0, "learning_rate": 1.3333333333333333e-05, "loss": 28.1302, "step": 96 }, { "epoch": 0.004043182860239257, "grad_norm": 119.5, "learning_rate": 1.3472222222222222e-05, "loss": 14.3772, "step": 97 }, { "epoch": 0.004084865157767496, "grad_norm": 101.5, "learning_rate": 1.3611111111111111e-05, "loss": 12.0642, "step": 98 }, { "epoch": 0.0041265474552957355, "grad_norm": 128.0, "learning_rate": 1.3750000000000002e-05, "loss": 15.5011, "step": 99 }, { "epoch": 0.004168229752823976, "grad_norm": 131.0, "learning_rate": 1.388888888888889e-05, "loss": 13.4389, "step": 100 }, { "epoch": 0.004209912050352215, "grad_norm": 129.0, "learning_rate": 1.4027777777777779e-05, "loss": 11.8762, "step": 101 }, { "epoch": 0.004251594347880455, "grad_norm": 191.0, "learning_rate": 1.4166666666666668e-05, "loss": 16.3772, "step": 102 }, { "epoch": 0.004293276645408695, "grad_norm": 126.5, "learning_rate": 1.4305555555555555e-05, "loss": 14.3142, "step": 103 }, { "epoch": 0.004334958942936935, "grad_norm": 119.0, "learning_rate": 1.4444444444444444e-05, "loss": 13.128, "step": 104 }, { "epoch": 0.004376641240465174, "grad_norm": 178.0, "learning_rate": 1.4583333333333335e-05, "loss": 14.1896, "step": 105 }, { "epoch": 0.004418323537993415, "grad_norm": 224.0, "learning_rate": 1.4722222222222224e-05, "loss": 10.8161, "step": 106 }, { "epoch": 0.004460005835521654, "grad_norm": 255.0, "learning_rate": 1.4861111111111111e-05, "loss": 18.3769, "step": 107 }, { "epoch": 0.004501688133049893, "grad_norm": 149.0, "learning_rate": 1.5e-05, "loss": 12.3134, "step": 108 }, { "epoch": 0.004543370430578134, "grad_norm": 133.0, "learning_rate": 1.5138888888888888e-05, "loss": 13.3171, "step": 109 }, { "epoch": 0.004585052728106373, "grad_norm": 202.0, "learning_rate": 1.527777777777778e-05, "loss": 13.5007, "step": 110 }, { "epoch": 0.004626735025634613, "grad_norm": 194.0, "learning_rate": 1.5416666666666668e-05, "loss": 13.0014, "step": 111 }, { "epoch": 0.004668417323162853, "grad_norm": 121.5, "learning_rate": 1.5555555555555555e-05, "loss": 13.1264, "step": 112 }, { "epoch": 0.004710099620691093, "grad_norm": 468.0, "learning_rate": 1.5694444444444446e-05, "loss": 15.3138, "step": 113 }, { "epoch": 0.004751781918219332, "grad_norm": 256.0, "learning_rate": 1.5833333333333333e-05, "loss": 23.1267, "step": 114 }, { "epoch": 0.004793464215747572, "grad_norm": 206.0, "learning_rate": 1.597222222222222e-05, "loss": 12.6886, "step": 115 }, { "epoch": 0.004835146513275812, "grad_norm": 171.0, "learning_rate": 1.6111111111111115e-05, "loss": 15.1892, "step": 116 }, { "epoch": 0.004876828810804051, "grad_norm": 112.5, "learning_rate": 1.6250000000000002e-05, "loss": 12.3759, "step": 117 }, { "epoch": 0.0049185111083322915, "grad_norm": 252.0, "learning_rate": 1.638888888888889e-05, "loss": 14.3134, "step": 118 }, { "epoch": 0.004960193405860531, "grad_norm": 304.0, "learning_rate": 1.6527777777777777e-05, "loss": 26.2532, "step": 119 }, { "epoch": 0.005001875703388771, "grad_norm": 372.0, "learning_rate": 1.6666666666666667e-05, "loss": 20.7521, "step": 120 }, { "epoch": 0.0050435580009170105, "grad_norm": 211.0, "learning_rate": 1.6805555555555558e-05, "loss": 20.627, "step": 121 }, { "epoch": 0.005085240298445251, "grad_norm": 264.0, "learning_rate": 1.6944444444444446e-05, "loss": 15.0682, "step": 122 }, { "epoch": 0.00512692259597349, "grad_norm": 125.0, "learning_rate": 1.7083333333333333e-05, "loss": 9.6259, "step": 123 }, { "epoch": 0.0051686048935017295, "grad_norm": 159.0, "learning_rate": 1.7222222222222224e-05, "loss": 16.6276, "step": 124 }, { "epoch": 0.00521028719102997, "grad_norm": 143.0, "learning_rate": 1.736111111111111e-05, "loss": 11.5641, "step": 125 }, { "epoch": 0.005251969488558209, "grad_norm": 85.5, "learning_rate": 1.75e-05, "loss": 8.751, "step": 126 }, { "epoch": 0.005293651786086449, "grad_norm": 150.0, "learning_rate": 1.763888888888889e-05, "loss": 14.3139, "step": 127 }, { "epoch": 0.005335334083614689, "grad_norm": 188.0, "learning_rate": 1.777777777777778e-05, "loss": 15.94, "step": 128 }, { "epoch": 0.005377016381142929, "grad_norm": 219.0, "learning_rate": 1.7916666666666667e-05, "loss": 16.376, "step": 129 }, { "epoch": 0.005418698678671168, "grad_norm": 119.5, "learning_rate": 1.8055555555555555e-05, "loss": 11.8145, "step": 130 }, { "epoch": 0.005460380976199408, "grad_norm": 254.0, "learning_rate": 1.8194444444444445e-05, "loss": 14.3138, "step": 131 }, { "epoch": 0.005502063273727648, "grad_norm": 156.0, "learning_rate": 1.8333333333333333e-05, "loss": 9.3155, "step": 132 }, { "epoch": 0.005543745571255887, "grad_norm": 147.0, "learning_rate": 1.8472222222222224e-05, "loss": 12.8133, "step": 133 }, { "epoch": 0.0055854278687841276, "grad_norm": 160.0, "learning_rate": 1.861111111111111e-05, "loss": 14.8763, "step": 134 }, { "epoch": 0.005627110166312367, "grad_norm": 258.0, "learning_rate": 1.8750000000000002e-05, "loss": 23.6275, "step": 135 }, { "epoch": 0.005668792463840607, "grad_norm": 241.0, "learning_rate": 1.888888888888889e-05, "loss": 19.0009, "step": 136 }, { "epoch": 0.0057104747613688465, "grad_norm": 218.0, "learning_rate": 1.9027777777777776e-05, "loss": 18.6265, "step": 137 }, { "epoch": 0.005752157058897087, "grad_norm": 165.0, "learning_rate": 1.9166666666666667e-05, "loss": 13.8765, "step": 138 }, { "epoch": 0.005793839356425326, "grad_norm": 284.0, "learning_rate": 1.9305555555555558e-05, "loss": 10.3143, "step": 139 }, { "epoch": 0.0058355216539535655, "grad_norm": 224.0, "learning_rate": 1.9444444444444445e-05, "loss": 13.3759, "step": 140 }, { "epoch": 0.005877203951481806, "grad_norm": 226.0, "learning_rate": 1.9583333333333333e-05, "loss": 17.6269, "step": 141 }, { "epoch": 0.005918886249010045, "grad_norm": 284.0, "learning_rate": 1.9722222222222224e-05, "loss": 11.1262, "step": 142 }, { "epoch": 0.005960568546538285, "grad_norm": 344.0, "learning_rate": 1.986111111111111e-05, "loss": 25.2527, "step": 143 }, { "epoch": 0.006002250844066525, "grad_norm": 171.0, "learning_rate": 2e-05, "loss": 15.3764, "step": 144 }, { "epoch": 0.006043933141594765, "grad_norm": 280.0, "learning_rate": 2.013888888888889e-05, "loss": 19.8826, "step": 145 }, { "epoch": 0.006085615439123004, "grad_norm": 229.0, "learning_rate": 2.027777777777778e-05, "loss": 18.378, "step": 146 }, { "epoch": 0.006127297736651245, "grad_norm": 108.5, "learning_rate": 2.0416666666666667e-05, "loss": 9.4384, "step": 147 }, { "epoch": 0.006168980034179484, "grad_norm": 166.0, "learning_rate": 2.0555555555555555e-05, "loss": 14.127, "step": 148 }, { "epoch": 0.006210662331707723, "grad_norm": 177.0, "learning_rate": 2.0694444444444445e-05, "loss": 13.5015, "step": 149 }, { "epoch": 0.006252344629235964, "grad_norm": 200.0, "learning_rate": 2.0833333333333336e-05, "loss": 14.3765, "step": 150 }, { "epoch": 0.006294026926764203, "grad_norm": 412.0, "learning_rate": 2.0972222222222223e-05, "loss": 22.5064, "step": 151 }, { "epoch": 0.006335709224292443, "grad_norm": 193.0, "learning_rate": 2.111111111111111e-05, "loss": 11.6886, "step": 152 }, { "epoch": 0.006377391521820683, "grad_norm": 141.0, "learning_rate": 2.125e-05, "loss": 11.3758, "step": 153 }, { "epoch": 0.006419073819348923, "grad_norm": 270.0, "learning_rate": 2.138888888888889e-05, "loss": 18.2512, "step": 154 }, { "epoch": 0.006460756116877162, "grad_norm": 122.0, "learning_rate": 2.152777777777778e-05, "loss": 8.1271, "step": 155 }, { "epoch": 0.006502438414405402, "grad_norm": 288.0, "learning_rate": 2.1666666666666667e-05, "loss": 20.5031, "step": 156 }, { "epoch": 0.006544120711933642, "grad_norm": 418.0, "learning_rate": 2.1805555555555558e-05, "loss": 19.7526, "step": 157 }, { "epoch": 0.006585803009461881, "grad_norm": 194.0, "learning_rate": 2.1944444444444445e-05, "loss": 14.2517, "step": 158 }, { "epoch": 0.0066274853069901215, "grad_norm": 328.0, "learning_rate": 2.2083333333333333e-05, "loss": 20.2507, "step": 159 }, { "epoch": 0.006669167604518361, "grad_norm": 191.0, "learning_rate": 2.2222222222222223e-05, "loss": 12.5019, "step": 160 }, { "epoch": 0.006710849902046601, "grad_norm": 344.0, "learning_rate": 2.2361111111111114e-05, "loss": 22.0053, "step": 161 }, { "epoch": 0.0067525321995748405, "grad_norm": 147.0, "learning_rate": 2.25e-05, "loss": 10.689, "step": 162 }, { "epoch": 0.006794214497103081, "grad_norm": 171.0, "learning_rate": 2.263888888888889e-05, "loss": 12.5013, "step": 163 }, { "epoch": 0.00683589679463132, "grad_norm": 364.0, "learning_rate": 2.277777777777778e-05, "loss": 24.3815, "step": 164 }, { "epoch": 0.0068775790921595595, "grad_norm": 205.0, "learning_rate": 2.2916666666666667e-05, "loss": 11.7527, "step": 165 }, { "epoch": 0.0069192613896878, "grad_norm": 232.0, "learning_rate": 2.3055555555555558e-05, "loss": 12.6897, "step": 166 }, { "epoch": 0.006960943687216039, "grad_norm": 204.0, "learning_rate": 2.3194444444444445e-05, "loss": 14.5019, "step": 167 }, { "epoch": 0.007002625984744279, "grad_norm": 268.0, "learning_rate": 2.3333333333333336e-05, "loss": 13.0013, "step": 168 }, { "epoch": 0.007044308282272519, "grad_norm": 241.0, "learning_rate": 2.3472222222222223e-05, "loss": 11.8761, "step": 169 }, { "epoch": 0.007085990579800759, "grad_norm": 268.0, "learning_rate": 2.361111111111111e-05, "loss": 18.501, "step": 170 }, { "epoch": 0.007127672877328998, "grad_norm": 209.0, "learning_rate": 2.375e-05, "loss": 9.0014, "step": 171 }, { "epoch": 0.007169355174857238, "grad_norm": 164.0, "learning_rate": 2.3888888888888892e-05, "loss": 12.1882, "step": 172 }, { "epoch": 0.007211037472385478, "grad_norm": 332.0, "learning_rate": 2.402777777777778e-05, "loss": 17.751, "step": 173 }, { "epoch": 0.007252719769913717, "grad_norm": 288.0, "learning_rate": 2.4166666666666667e-05, "loss": 19.001, "step": 174 }, { "epoch": 0.007294402067441958, "grad_norm": 226.0, "learning_rate": 2.4305555555555558e-05, "loss": 14.191, "step": 175 }, { "epoch": 0.007336084364970197, "grad_norm": 154.0, "learning_rate": 2.4444444444444445e-05, "loss": 10.6287, "step": 176 }, { "epoch": 0.007377766662498437, "grad_norm": 162.0, "learning_rate": 2.4583333333333332e-05, "loss": 11.9385, "step": 177 }, { "epoch": 0.007419448960026677, "grad_norm": 158.0, "learning_rate": 2.4722222222222223e-05, "loss": 11.0025, "step": 178 }, { "epoch": 0.007461131257554917, "grad_norm": 191.0, "learning_rate": 2.4861111111111114e-05, "loss": 12.1264, "step": 179 }, { "epoch": 0.007502813555083156, "grad_norm": 170.0, "learning_rate": 2.5e-05, "loss": 10.8136, "step": 180 }, { "epoch": 0.007544495852611396, "grad_norm": 322.0, "learning_rate": 2.513888888888889e-05, "loss": 15.3767, "step": 181 }, { "epoch": 0.007586178150139636, "grad_norm": 262.0, "learning_rate": 2.527777777777778e-05, "loss": 16.6263, "step": 182 }, { "epoch": 0.007627860447667875, "grad_norm": 141.0, "learning_rate": 2.5416666666666667e-05, "loss": 10.4384, "step": 183 }, { "epoch": 0.007669542745196115, "grad_norm": 223.0, "learning_rate": 2.5555555555555554e-05, "loss": 14.3758, "step": 184 }, { "epoch": 0.007711225042724355, "grad_norm": 284.0, "learning_rate": 2.5694444444444445e-05, "loss": 19.001, "step": 185 }, { "epoch": 0.007752907340252595, "grad_norm": 436.0, "learning_rate": 2.5833333333333336e-05, "loss": 25.5035, "step": 186 }, { "epoch": 0.007794589637780834, "grad_norm": 218.0, "learning_rate": 2.5972222222222226e-05, "loss": 14.6892, "step": 187 }, { "epoch": 0.007836271935309074, "grad_norm": 209.0, "learning_rate": 2.6111111111111114e-05, "loss": 15.0012, "step": 188 }, { "epoch": 0.007877954232837313, "grad_norm": 264.0, "learning_rate": 2.625e-05, "loss": 18.1277, "step": 189 }, { "epoch": 0.007919636530365554, "grad_norm": 148.0, "learning_rate": 2.6388888888888892e-05, "loss": 9.752, "step": 190 }, { "epoch": 0.007961318827893794, "grad_norm": 202.0, "learning_rate": 2.652777777777778e-05, "loss": 11.5638, "step": 191 }, { "epoch": 0.008003001125422033, "grad_norm": 274.0, "learning_rate": 2.6666666666666667e-05, "loss": 14.876, "step": 192 }, { "epoch": 0.008044683422950272, "grad_norm": 246.0, "learning_rate": 2.6805555555555557e-05, "loss": 14.4382, "step": 193 }, { "epoch": 0.008086365720478514, "grad_norm": 197.0, "learning_rate": 2.6944444444444445e-05, "loss": 10.6888, "step": 194 }, { "epoch": 0.008128048018006753, "grad_norm": 396.0, "learning_rate": 2.7083333333333332e-05, "loss": 18.1294, "step": 195 }, { "epoch": 0.008169730315534992, "grad_norm": 274.0, "learning_rate": 2.7222222222222223e-05, "loss": 17.1265, "step": 196 }, { "epoch": 0.008211412613063232, "grad_norm": 155.0, "learning_rate": 2.7361111111111114e-05, "loss": 10.6907, "step": 197 }, { "epoch": 0.008253094910591471, "grad_norm": 206.0, "learning_rate": 2.7500000000000004e-05, "loss": 13.6887, "step": 198 }, { "epoch": 0.008294777208119712, "grad_norm": 246.0, "learning_rate": 2.7638888888888892e-05, "loss": 12.2511, "step": 199 }, { "epoch": 0.008336459505647952, "grad_norm": 300.0, "learning_rate": 2.777777777777778e-05, "loss": 14.0012, "step": 200 }, { "epoch": 0.008378141803176191, "grad_norm": 288.0, "learning_rate": 2.791666666666667e-05, "loss": 14.815, "step": 201 }, { "epoch": 0.00841982410070443, "grad_norm": 314.0, "learning_rate": 2.8055555555555557e-05, "loss": 12.8143, "step": 202 }, { "epoch": 0.008461506398232671, "grad_norm": 524.0, "learning_rate": 2.8194444444444445e-05, "loss": 28.1259, "step": 203 }, { "epoch": 0.00850318869576091, "grad_norm": 142.0, "learning_rate": 2.8333333333333335e-05, "loss": 10.0023, "step": 204 }, { "epoch": 0.00854487099328915, "grad_norm": 238.0, "learning_rate": 2.8472222222222223e-05, "loss": 13.3758, "step": 205 }, { "epoch": 0.00858655329081739, "grad_norm": 322.0, "learning_rate": 2.861111111111111e-05, "loss": 18.6291, "step": 206 }, { "epoch": 0.008628235588345629, "grad_norm": 320.0, "learning_rate": 2.8749999999999997e-05, "loss": 8.503, "step": 207 }, { "epoch": 0.00866991788587387, "grad_norm": 310.0, "learning_rate": 2.8888888888888888e-05, "loss": 18.8761, "step": 208 }, { "epoch": 0.00871160018340211, "grad_norm": 165.0, "learning_rate": 2.9027777777777782e-05, "loss": 10.1892, "step": 209 }, { "epoch": 0.008753282480930349, "grad_norm": 286.0, "learning_rate": 2.916666666666667e-05, "loss": 15.6889, "step": 210 }, { "epoch": 0.008794964778458588, "grad_norm": 141.0, "learning_rate": 2.9305555555555557e-05, "loss": 9.4395, "step": 211 }, { "epoch": 0.00883664707598683, "grad_norm": 336.0, "learning_rate": 2.9444444444444448e-05, "loss": 16.3779, "step": 212 }, { "epoch": 0.008878329373515069, "grad_norm": 452.0, "learning_rate": 2.9583333333333335e-05, "loss": 25.7515, "step": 213 }, { "epoch": 0.008920011671043308, "grad_norm": 91.0, "learning_rate": 2.9722222222222223e-05, "loss": 5.0645, "step": 214 }, { "epoch": 0.008961693968571547, "grad_norm": 222.0, "learning_rate": 2.9861111111111113e-05, "loss": 9.3767, "step": 215 }, { "epoch": 0.009003376266099787, "grad_norm": 256.0, "learning_rate": 3e-05, "loss": 14.0634, "step": 216 }, { "epoch": 0.009045058563628028, "grad_norm": 184.0, "learning_rate": 3.0138888888888888e-05, "loss": 11.6898, "step": 217 }, { "epoch": 0.009086740861156267, "grad_norm": 474.0, "learning_rate": 3.0277777777777776e-05, "loss": 23.3761, "step": 218 }, { "epoch": 0.009128423158684507, "grad_norm": 160.0, "learning_rate": 3.0416666666666666e-05, "loss": 9.815, "step": 219 }, { "epoch": 0.009170105456212746, "grad_norm": 540.0, "learning_rate": 3.055555555555556e-05, "loss": 25.0059, "step": 220 }, { "epoch": 0.009211787753740985, "grad_norm": 556.0, "learning_rate": 3.069444444444445e-05, "loss": 22.2527, "step": 221 }, { "epoch": 0.009253470051269226, "grad_norm": 211.0, "learning_rate": 3.0833333333333335e-05, "loss": 11.7514, "step": 222 }, { "epoch": 0.009295152348797466, "grad_norm": 344.0, "learning_rate": 3.0972222222222226e-05, "loss": 17.5021, "step": 223 }, { "epoch": 0.009336834646325705, "grad_norm": 185.0, "learning_rate": 3.111111111111111e-05, "loss": 11.5019, "step": 224 }, { "epoch": 0.009378516943853945, "grad_norm": 500.0, "learning_rate": 3.125e-05, "loss": 26.1286, "step": 225 }, { "epoch": 0.009420199241382186, "grad_norm": 166.0, "learning_rate": 3.138888888888889e-05, "loss": 9.8135, "step": 226 }, { "epoch": 0.009461881538910425, "grad_norm": 165.0, "learning_rate": 3.1527777777777775e-05, "loss": 9.7557, "step": 227 }, { "epoch": 0.009503563836438664, "grad_norm": 860.0, "learning_rate": 3.1666666666666666e-05, "loss": 35.0053, "step": 228 }, { "epoch": 0.009545246133966904, "grad_norm": 292.0, "learning_rate": 3.180555555555556e-05, "loss": 15.8763, "step": 229 }, { "epoch": 0.009586928431495143, "grad_norm": 532.0, "learning_rate": 3.194444444444444e-05, "loss": 24.7514, "step": 230 }, { "epoch": 0.009628610729023384, "grad_norm": 237.0, "learning_rate": 3.208333333333334e-05, "loss": 13.6887, "step": 231 }, { "epoch": 0.009670293026551624, "grad_norm": 255.0, "learning_rate": 3.222222222222223e-05, "loss": 13.8142, "step": 232 }, { "epoch": 0.009711975324079863, "grad_norm": 322.0, "learning_rate": 3.236111111111111e-05, "loss": 16.5009, "step": 233 }, { "epoch": 0.009753657621608102, "grad_norm": 556.0, "learning_rate": 3.2500000000000004e-05, "loss": 29.3764, "step": 234 }, { "epoch": 0.009795339919136344, "grad_norm": 432.0, "learning_rate": 3.263888888888889e-05, "loss": 18.1268, "step": 235 }, { "epoch": 0.009837022216664583, "grad_norm": 384.0, "learning_rate": 3.277777777777778e-05, "loss": 19.0027, "step": 236 }, { "epoch": 0.009878704514192822, "grad_norm": 300.0, "learning_rate": 3.291666666666667e-05, "loss": 14.8811, "step": 237 }, { "epoch": 0.009920386811721062, "grad_norm": 360.0, "learning_rate": 3.3055555555555553e-05, "loss": 18.002, "step": 238 }, { "epoch": 0.009962069109249301, "grad_norm": 207.0, "learning_rate": 3.3194444444444444e-05, "loss": 11.5636, "step": 239 }, { "epoch": 0.010003751406777542, "grad_norm": 384.0, "learning_rate": 3.3333333333333335e-05, "loss": 18.3788, "step": 240 }, { "epoch": 0.010045433704305782, "grad_norm": 446.0, "learning_rate": 3.347222222222222e-05, "loss": 20.752, "step": 241 }, { "epoch": 0.010087116001834021, "grad_norm": 236.0, "learning_rate": 3.3611111111111116e-05, "loss": 9.6283, "step": 242 }, { "epoch": 0.01012879829936226, "grad_norm": 372.0, "learning_rate": 3.375000000000001e-05, "loss": 18.3779, "step": 243 }, { "epoch": 0.010170480596890501, "grad_norm": 201.0, "learning_rate": 3.388888888888889e-05, "loss": 10.3767, "step": 244 }, { "epoch": 0.01021216289441874, "grad_norm": 380.0, "learning_rate": 3.402777777777778e-05, "loss": 14.7528, "step": 245 }, { "epoch": 0.01025384519194698, "grad_norm": 230.0, "learning_rate": 3.4166666666666666e-05, "loss": 11.8766, "step": 246 }, { "epoch": 0.01029552748947522, "grad_norm": 170.0, "learning_rate": 3.430555555555556e-05, "loss": 8.3163, "step": 247 }, { "epoch": 0.010337209787003459, "grad_norm": 308.0, "learning_rate": 3.444444444444445e-05, "loss": 13.6312, "step": 248 }, { "epoch": 0.0103788920845317, "grad_norm": 636.0, "learning_rate": 3.458333333333333e-05, "loss": 26.6308, "step": 249 }, { "epoch": 0.01042057438205994, "grad_norm": 237.0, "learning_rate": 3.472222222222222e-05, "loss": 12.1259, "step": 250 }, { "epoch": 0.010462256679588179, "grad_norm": 446.0, "learning_rate": 3.486111111111111e-05, "loss": 19.002, "step": 251 }, { "epoch": 0.010503938977116418, "grad_norm": 231.0, "learning_rate": 3.5e-05, "loss": 11.6901, "step": 252 }, { "epoch": 0.01054562127464466, "grad_norm": 498.0, "learning_rate": 3.513888888888889e-05, "loss": 22.6262, "step": 253 }, { "epoch": 0.010587303572172899, "grad_norm": 294.0, "learning_rate": 3.527777777777778e-05, "loss": 13.3141, "step": 254 }, { "epoch": 0.010628985869701138, "grad_norm": 207.0, "learning_rate": 3.541666666666667e-05, "loss": 11.0634, "step": 255 }, { "epoch": 0.010670668167229377, "grad_norm": 432.0, "learning_rate": 3.555555555555556e-05, "loss": 15.3772, "step": 256 }, { "epoch": 0.010712350464757617, "grad_norm": 112.5, "learning_rate": 3.5694444444444444e-05, "loss": 6.4409, "step": 257 }, { "epoch": 0.010754032762285858, "grad_norm": 232.0, "learning_rate": 3.5833333333333335e-05, "loss": 11.4391, "step": 258 }, { "epoch": 0.010795715059814097, "grad_norm": 284.0, "learning_rate": 3.5972222222222225e-05, "loss": 14.0653, "step": 259 }, { "epoch": 0.010837397357342337, "grad_norm": 191.0, "learning_rate": 3.611111111111111e-05, "loss": 10.8762, "step": 260 }, { "epoch": 0.010879079654870576, "grad_norm": 302.0, "learning_rate": 3.625e-05, "loss": 13.3143, "step": 261 }, { "epoch": 0.010920761952398815, "grad_norm": 237.0, "learning_rate": 3.638888888888889e-05, "loss": 11.692, "step": 262 }, { "epoch": 0.010962444249927056, "grad_norm": 708.0, "learning_rate": 3.6527777777777775e-05, "loss": 28.6281, "step": 263 }, { "epoch": 0.011004126547455296, "grad_norm": 171.0, "learning_rate": 3.6666666666666666e-05, "loss": 6.9384, "step": 264 }, { "epoch": 0.011045808844983535, "grad_norm": 376.0, "learning_rate": 3.6805555555555556e-05, "loss": 17.251, "step": 265 }, { "epoch": 0.011087491142511775, "grad_norm": 308.0, "learning_rate": 3.694444444444445e-05, "loss": 14.2521, "step": 266 }, { "epoch": 0.011129173440040016, "grad_norm": 294.0, "learning_rate": 3.708333333333334e-05, "loss": 12.5647, "step": 267 }, { "epoch": 0.011170855737568255, "grad_norm": 233.0, "learning_rate": 3.722222222222222e-05, "loss": 11.6916, "step": 268 }, { "epoch": 0.011212538035096494, "grad_norm": 249.0, "learning_rate": 3.736111111111111e-05, "loss": 11.7572, "step": 269 }, { "epoch": 0.011254220332624734, "grad_norm": 314.0, "learning_rate": 3.7500000000000003e-05, "loss": 15.6262, "step": 270 }, { "epoch": 0.011295902630152973, "grad_norm": 378.0, "learning_rate": 3.763888888888889e-05, "loss": 14.7516, "step": 271 }, { "epoch": 0.011337584927681214, "grad_norm": 304.0, "learning_rate": 3.777777777777778e-05, "loss": 13.9387, "step": 272 }, { "epoch": 0.011379267225209454, "grad_norm": 190.0, "learning_rate": 3.791666666666667e-05, "loss": 10.4393, "step": 273 }, { "epoch": 0.011420949522737693, "grad_norm": 243.0, "learning_rate": 3.805555555555555e-05, "loss": 12.7512, "step": 274 }, { "epoch": 0.011462631820265932, "grad_norm": 340.0, "learning_rate": 3.8194444444444444e-05, "loss": 16.002, "step": 275 }, { "epoch": 0.011504314117794174, "grad_norm": 426.0, "learning_rate": 3.8333333333333334e-05, "loss": 18.7518, "step": 276 }, { "epoch": 0.011545996415322413, "grad_norm": 316.0, "learning_rate": 3.8472222222222225e-05, "loss": 14.1893, "step": 277 }, { "epoch": 0.011587678712850652, "grad_norm": 135.0, "learning_rate": 3.8611111111111116e-05, "loss": 7.7848, "step": 278 }, { "epoch": 0.011629361010378892, "grad_norm": 218.0, "learning_rate": 3.875e-05, "loss": 10.94, "step": 279 }, { "epoch": 0.011671043307907131, "grad_norm": 412.0, "learning_rate": 3.888888888888889e-05, "loss": 18.8758, "step": 280 }, { "epoch": 0.011712725605435372, "grad_norm": 129.0, "learning_rate": 3.902777777777778e-05, "loss": 7.0642, "step": 281 }, { "epoch": 0.011754407902963612, "grad_norm": 228.0, "learning_rate": 3.9166666666666665e-05, "loss": 10.0638, "step": 282 }, { "epoch": 0.011796090200491851, "grad_norm": 192.0, "learning_rate": 3.9305555555555556e-05, "loss": 10.2527, "step": 283 }, { "epoch": 0.01183777249802009, "grad_norm": 344.0, "learning_rate": 3.944444444444445e-05, "loss": 15.5049, "step": 284 }, { "epoch": 0.011879454795548331, "grad_norm": 239.0, "learning_rate": 3.958333333333333e-05, "loss": 11.6889, "step": 285 }, { "epoch": 0.01192113709307657, "grad_norm": 402.0, "learning_rate": 3.972222222222222e-05, "loss": 15.0027, "step": 286 }, { "epoch": 0.01196281939060481, "grad_norm": 426.0, "learning_rate": 3.986111111111111e-05, "loss": 19.376, "step": 287 }, { "epoch": 0.01200450168813305, "grad_norm": 254.0, "learning_rate": 4e-05, "loss": 11.9404, "step": 288 }, { "epoch": 0.012046183985661289, "grad_norm": 402.0, "learning_rate": 4.0138888888888894e-05, "loss": 17.3774, "step": 289 }, { "epoch": 0.01208786628318953, "grad_norm": 434.0, "learning_rate": 4.027777777777778e-05, "loss": 16.6274, "step": 290 }, { "epoch": 0.01212954858071777, "grad_norm": 418.0, "learning_rate": 4.041666666666667e-05, "loss": 16.1263, "step": 291 }, { "epoch": 0.012171230878246009, "grad_norm": 255.0, "learning_rate": 4.055555555555556e-05, "loss": 9.3773, "step": 292 }, { "epoch": 0.012212913175774248, "grad_norm": 298.0, "learning_rate": 4.0694444444444444e-05, "loss": 11.8143, "step": 293 }, { "epoch": 0.01225459547330249, "grad_norm": 396.0, "learning_rate": 4.0833333333333334e-05, "loss": 17.127, "step": 294 }, { "epoch": 0.012296277770830729, "grad_norm": 270.0, "learning_rate": 4.0972222222222225e-05, "loss": 12.6888, "step": 295 }, { "epoch": 0.012337960068358968, "grad_norm": 255.0, "learning_rate": 4.111111111111111e-05, "loss": 11.1893, "step": 296 }, { "epoch": 0.012379642365887207, "grad_norm": 1040.0, "learning_rate": 4.125e-05, "loss": 36.7561, "step": 297 }, { "epoch": 0.012421324663415447, "grad_norm": 402.0, "learning_rate": 4.138888888888889e-05, "loss": 13.6943, "step": 298 }, { "epoch": 0.012463006960943688, "grad_norm": 253.0, "learning_rate": 4.152777777777778e-05, "loss": 12.1269, "step": 299 }, { "epoch": 0.012504689258471927, "grad_norm": 600.0, "learning_rate": 4.166666666666667e-05, "loss": 20.3756, "step": 300 }, { "epoch": 0.012546371556000167, "grad_norm": 360.0, "learning_rate": 4.1805555555555556e-05, "loss": 15.6265, "step": 301 }, { "epoch": 0.012588053853528406, "grad_norm": 334.0, "learning_rate": 4.194444444444445e-05, "loss": 14.1899, "step": 302 }, { "epoch": 0.012629736151056645, "grad_norm": 207.0, "learning_rate": 4.208333333333334e-05, "loss": 10.3152, "step": 303 }, { "epoch": 0.012671418448584887, "grad_norm": 376.0, "learning_rate": 4.222222222222222e-05, "loss": 15.0636, "step": 304 }, { "epoch": 0.012713100746113126, "grad_norm": 282.0, "learning_rate": 4.236111111111111e-05, "loss": 12.8765, "step": 305 }, { "epoch": 0.012754783043641365, "grad_norm": 478.0, "learning_rate": 4.25e-05, "loss": 18.6258, "step": 306 }, { "epoch": 0.012796465341169605, "grad_norm": 312.0, "learning_rate": 4.263888888888889e-05, "loss": 14.5663, "step": 307 }, { "epoch": 0.012838147638697846, "grad_norm": 326.0, "learning_rate": 4.277777777777778e-05, "loss": 15.3764, "step": 308 }, { "epoch": 0.012879829936226085, "grad_norm": 251.0, "learning_rate": 4.291666666666667e-05, "loss": 11.5634, "step": 309 }, { "epoch": 0.012921512233754325, "grad_norm": 292.0, "learning_rate": 4.305555555555556e-05, "loss": 12.6264, "step": 310 }, { "epoch": 0.012963194531282564, "grad_norm": 296.0, "learning_rate": 4.319444444444445e-05, "loss": 13.1255, "step": 311 }, { "epoch": 0.013004876828810803, "grad_norm": 676.0, "learning_rate": 4.3333333333333334e-05, "loss": 21.8778, "step": 312 }, { "epoch": 0.013046559126339044, "grad_norm": 488.0, "learning_rate": 4.3472222222222225e-05, "loss": 17.877, "step": 313 }, { "epoch": 0.013088241423867284, "grad_norm": 344.0, "learning_rate": 4.3611111111111116e-05, "loss": 14.0636, "step": 314 }, { "epoch": 0.013129923721395523, "grad_norm": 466.0, "learning_rate": 4.375e-05, "loss": 17.6263, "step": 315 }, { "epoch": 0.013171606018923762, "grad_norm": 366.0, "learning_rate": 4.388888888888889e-05, "loss": 15.6883, "step": 316 }, { "epoch": 0.013213288316452004, "grad_norm": 348.0, "learning_rate": 4.402777777777778e-05, "loss": 16.2511, "step": 317 }, { "epoch": 0.013254970613980243, "grad_norm": 394.0, "learning_rate": 4.4166666666666665e-05, "loss": 16.7506, "step": 318 }, { "epoch": 0.013296652911508482, "grad_norm": 668.0, "learning_rate": 4.4305555555555556e-05, "loss": 25.0008, "step": 319 }, { "epoch": 0.013338335209036722, "grad_norm": 238.0, "learning_rate": 4.4444444444444447e-05, "loss": 10.9398, "step": 320 }, { "epoch": 0.013380017506564961, "grad_norm": 384.0, "learning_rate": 4.458333333333334e-05, "loss": 16.6257, "step": 321 }, { "epoch": 0.013421699804093202, "grad_norm": 280.0, "learning_rate": 4.472222222222223e-05, "loss": 12.0635, "step": 322 }, { "epoch": 0.013463382101621442, "grad_norm": 223.0, "learning_rate": 4.486111111111111e-05, "loss": 10.5013, "step": 323 }, { "epoch": 0.013505064399149681, "grad_norm": 326.0, "learning_rate": 4.5e-05, "loss": 14.002, "step": 324 }, { "epoch": 0.01354674669667792, "grad_norm": 800.0, "learning_rate": 4.5138888888888894e-05, "loss": 30.1258, "step": 325 }, { "epoch": 0.013588428994206161, "grad_norm": 214.0, "learning_rate": 4.527777777777778e-05, "loss": 10.1891, "step": 326 }, { "epoch": 0.0136301112917344, "grad_norm": 354.0, "learning_rate": 4.541666666666667e-05, "loss": 14.9387, "step": 327 }, { "epoch": 0.01367179358926264, "grad_norm": 608.0, "learning_rate": 4.555555555555556e-05, "loss": 21.5015, "step": 328 }, { "epoch": 0.01371347588679088, "grad_norm": 596.0, "learning_rate": 4.569444444444444e-05, "loss": 20.8817, "step": 329 }, { "epoch": 0.013755158184319119, "grad_norm": 394.0, "learning_rate": 4.5833333333333334e-05, "loss": 15.1888, "step": 330 }, { "epoch": 0.01379684048184736, "grad_norm": 572.0, "learning_rate": 4.5972222222222225e-05, "loss": 19.8788, "step": 331 }, { "epoch": 0.0138385227793756, "grad_norm": 520.0, "learning_rate": 4.6111111111111115e-05, "loss": 19.8771, "step": 332 }, { "epoch": 0.013880205076903839, "grad_norm": 247.0, "learning_rate": 4.6250000000000006e-05, "loss": 11.5039, "step": 333 }, { "epoch": 0.013921887374432078, "grad_norm": 262.0, "learning_rate": 4.638888888888889e-05, "loss": 12.0652, "step": 334 }, { "epoch": 0.01396356967196032, "grad_norm": 300.0, "learning_rate": 4.652777777777778e-05, "loss": 11.1885, "step": 335 }, { "epoch": 0.014005251969488559, "grad_norm": 254.0, "learning_rate": 4.666666666666667e-05, "loss": 12.0649, "step": 336 }, { "epoch": 0.014046934267016798, "grad_norm": 322.0, "learning_rate": 4.6805555555555556e-05, "loss": 14.5639, "step": 337 }, { "epoch": 0.014088616564545037, "grad_norm": 180.0, "learning_rate": 4.6944444444444446e-05, "loss": 7.6892, "step": 338 }, { "epoch": 0.014130298862073277, "grad_norm": 332.0, "learning_rate": 4.708333333333334e-05, "loss": 14.7508, "step": 339 }, { "epoch": 0.014171981159601518, "grad_norm": 322.0, "learning_rate": 4.722222222222222e-05, "loss": 12.5636, "step": 340 }, { "epoch": 0.014213663457129757, "grad_norm": 406.0, "learning_rate": 4.736111111111111e-05, "loss": 15.1924, "step": 341 }, { "epoch": 0.014255345754657997, "grad_norm": 512.0, "learning_rate": 4.75e-05, "loss": 17.131, "step": 342 }, { "epoch": 0.014297028052186236, "grad_norm": 260.0, "learning_rate": 4.7638888888888887e-05, "loss": 10.5006, "step": 343 }, { "epoch": 0.014338710349714475, "grad_norm": 278.0, "learning_rate": 4.7777777777777784e-05, "loss": 12.0663, "step": 344 }, { "epoch": 0.014380392647242717, "grad_norm": 808.0, "learning_rate": 4.791666666666667e-05, "loss": 29.2507, "step": 345 }, { "epoch": 0.014422074944770956, "grad_norm": 232.0, "learning_rate": 4.805555555555556e-05, "loss": 8.5638, "step": 346 }, { "epoch": 0.014463757242299195, "grad_norm": 364.0, "learning_rate": 4.819444444444445e-05, "loss": 11.3759, "step": 347 }, { "epoch": 0.014505439539827435, "grad_norm": 572.0, "learning_rate": 4.8333333333333334e-05, "loss": 18.3781, "step": 348 }, { "epoch": 0.014547121837355676, "grad_norm": 420.0, "learning_rate": 4.8472222222222224e-05, "loss": 16.1259, "step": 349 }, { "epoch": 0.014588804134883915, "grad_norm": 406.0, "learning_rate": 4.8611111111111115e-05, "loss": 15.001, "step": 350 }, { "epoch": 0.014630486432412155, "grad_norm": 684.0, "learning_rate": 4.875e-05, "loss": 23.7515, "step": 351 }, { "epoch": 0.014672168729940394, "grad_norm": 438.0, "learning_rate": 4.888888888888889e-05, "loss": 15.2517, "step": 352 }, { "epoch": 0.014713851027468633, "grad_norm": 246.0, "learning_rate": 4.902777777777778e-05, "loss": 9.5636, "step": 353 }, { "epoch": 0.014755533324996874, "grad_norm": 332.0, "learning_rate": 4.9166666666666665e-05, "loss": 12.7509, "step": 354 }, { "epoch": 0.014797215622525114, "grad_norm": 202.0, "learning_rate": 4.930555555555556e-05, "loss": 10.5659, "step": 355 }, { "epoch": 0.014838897920053353, "grad_norm": 197.0, "learning_rate": 4.9444444444444446e-05, "loss": 9.3782, "step": 356 }, { "epoch": 0.014880580217581593, "grad_norm": 306.0, "learning_rate": 4.958333333333334e-05, "loss": 13.3147, "step": 357 }, { "epoch": 0.014922262515109834, "grad_norm": 217.0, "learning_rate": 4.972222222222223e-05, "loss": 10.6889, "step": 358 }, { "epoch": 0.014963944812638073, "grad_norm": 304.0, "learning_rate": 4.986111111111111e-05, "loss": 13.4383, "step": 359 }, { "epoch": 0.015005627110166312, "grad_norm": 442.0, "learning_rate": 5e-05, "loss": 16.7517, "step": 360 }, { "epoch": 0.015047309407694552, "grad_norm": 354.0, "learning_rate": 5.013888888888889e-05, "loss": 14.8143, "step": 361 }, { "epoch": 0.015088991705222791, "grad_norm": 408.0, "learning_rate": 5.027777777777778e-05, "loss": 16.6269, "step": 362 }, { "epoch": 0.015130674002751032, "grad_norm": 1184.0, "learning_rate": 5.041666666666667e-05, "loss": 33.0067, "step": 363 }, { "epoch": 0.015172356300279272, "grad_norm": 374.0, "learning_rate": 5.055555555555556e-05, "loss": 15.6888, "step": 364 }, { "epoch": 0.015214038597807511, "grad_norm": 181.0, "learning_rate": 5.069444444444444e-05, "loss": 9.0024, "step": 365 }, { "epoch": 0.01525572089533575, "grad_norm": 456.0, "learning_rate": 5.0833333333333333e-05, "loss": 18.7506, "step": 366 }, { "epoch": 0.015297403192863991, "grad_norm": 326.0, "learning_rate": 5.0972222222222224e-05, "loss": 13.8777, "step": 367 }, { "epoch": 0.01533908549039223, "grad_norm": 330.0, "learning_rate": 5.111111111111111e-05, "loss": 13.3765, "step": 368 }, { "epoch": 0.01538076778792047, "grad_norm": 328.0, "learning_rate": 5.125e-05, "loss": 12.3162, "step": 369 }, { "epoch": 0.01542245008544871, "grad_norm": 398.0, "learning_rate": 5.138888888888889e-05, "loss": 16.1265, "step": 370 }, { "epoch": 0.015464132382976949, "grad_norm": 1352.0, "learning_rate": 5.1527777777777774e-05, "loss": 36.7557, "step": 371 }, { "epoch": 0.01550581468050519, "grad_norm": 274.0, "learning_rate": 5.166666666666667e-05, "loss": 12.0641, "step": 372 }, { "epoch": 0.01554749697803343, "grad_norm": 424.0, "learning_rate": 5.180555555555556e-05, "loss": 18.3762, "step": 373 }, { "epoch": 0.015589179275561669, "grad_norm": 472.0, "learning_rate": 5.194444444444445e-05, "loss": 18.3777, "step": 374 }, { "epoch": 0.015630861573089908, "grad_norm": 326.0, "learning_rate": 5.208333333333334e-05, "loss": 14.8144, "step": 375 }, { "epoch": 0.015672543870618148, "grad_norm": 249.0, "learning_rate": 5.222222222222223e-05, "loss": 11.6277, "step": 376 }, { "epoch": 0.015714226168146387, "grad_norm": 243.0, "learning_rate": 5.236111111111112e-05, "loss": 10.3148, "step": 377 }, { "epoch": 0.015755908465674626, "grad_norm": 768.0, "learning_rate": 5.25e-05, "loss": 23.6287, "step": 378 }, { "epoch": 0.01579759076320287, "grad_norm": 412.0, "learning_rate": 5.263888888888889e-05, "loss": 17.2519, "step": 379 }, { "epoch": 0.01583927306073111, "grad_norm": 208.0, "learning_rate": 5.2777777777777784e-05, "loss": 9.6261, "step": 380 }, { "epoch": 0.015880955358259348, "grad_norm": 248.0, "learning_rate": 5.291666666666667e-05, "loss": 11.1901, "step": 381 }, { "epoch": 0.015922637655787587, "grad_norm": 398.0, "learning_rate": 5.305555555555556e-05, "loss": 14.9392, "step": 382 }, { "epoch": 0.015964319953315827, "grad_norm": 255.0, "learning_rate": 5.319444444444445e-05, "loss": 9.5632, "step": 383 }, { "epoch": 0.016006002250844066, "grad_norm": 209.0, "learning_rate": 5.333333333333333e-05, "loss": 11.1273, "step": 384 }, { "epoch": 0.016047684548372305, "grad_norm": 386.0, "learning_rate": 5.3472222222222224e-05, "loss": 15.003, "step": 385 }, { "epoch": 0.016089366845900545, "grad_norm": 276.0, "learning_rate": 5.3611111111111115e-05, "loss": 10.4406, "step": 386 }, { "epoch": 0.016131049143428784, "grad_norm": 191.0, "learning_rate": 5.375e-05, "loss": 8.0641, "step": 387 }, { "epoch": 0.016172731440957027, "grad_norm": 528.0, "learning_rate": 5.388888888888889e-05, "loss": 18.3764, "step": 388 }, { "epoch": 0.016214413738485266, "grad_norm": 976.0, "learning_rate": 5.402777777777778e-05, "loss": 26.1317, "step": 389 }, { "epoch": 0.016256096036013506, "grad_norm": 480.0, "learning_rate": 5.4166666666666664e-05, "loss": 16.7522, "step": 390 }, { "epoch": 0.016297778333541745, "grad_norm": 181.0, "learning_rate": 5.4305555555555555e-05, "loss": 9.6897, "step": 391 }, { "epoch": 0.016339460631069985, "grad_norm": 580.0, "learning_rate": 5.4444444444444446e-05, "loss": 22.5013, "step": 392 }, { "epoch": 0.016381142928598224, "grad_norm": 334.0, "learning_rate": 5.458333333333333e-05, "loss": 13.6272, "step": 393 }, { "epoch": 0.016422825226126463, "grad_norm": 382.0, "learning_rate": 5.472222222222223e-05, "loss": 15.5636, "step": 394 }, { "epoch": 0.016464507523654703, "grad_norm": 700.0, "learning_rate": 5.486111111111112e-05, "loss": 24.6256, "step": 395 }, { "epoch": 0.016506189821182942, "grad_norm": 484.0, "learning_rate": 5.500000000000001e-05, "loss": 18.3771, "step": 396 }, { "epoch": 0.016547872118711185, "grad_norm": 249.0, "learning_rate": 5.513888888888889e-05, "loss": 10.1887, "step": 397 }, { "epoch": 0.016589554416239424, "grad_norm": 408.0, "learning_rate": 5.5277777777777783e-05, "loss": 14.9418, "step": 398 }, { "epoch": 0.016631236713767664, "grad_norm": 136.0, "learning_rate": 5.5416666666666674e-05, "loss": 8.0015, "step": 399 }, { "epoch": 0.016672919011295903, "grad_norm": 224.0, "learning_rate": 5.555555555555556e-05, "loss": 9.9386, "step": 400 }, { "epoch": 0.016714601308824142, "grad_norm": 364.0, "learning_rate": 5.569444444444445e-05, "loss": 14.8761, "step": 401 }, { "epoch": 0.016756283606352382, "grad_norm": 382.0, "learning_rate": 5.583333333333334e-05, "loss": 15.0014, "step": 402 }, { "epoch": 0.01679796590388062, "grad_norm": 696.0, "learning_rate": 5.5972222222222224e-05, "loss": 23.2518, "step": 403 }, { "epoch": 0.01683964820140886, "grad_norm": 474.0, "learning_rate": 5.6111111111111114e-05, "loss": 16.376, "step": 404 }, { "epoch": 0.0168813304989371, "grad_norm": 364.0, "learning_rate": 5.6250000000000005e-05, "loss": 14.9388, "step": 405 }, { "epoch": 0.016923012796465343, "grad_norm": 864.0, "learning_rate": 5.638888888888889e-05, "loss": 26.252, "step": 406 }, { "epoch": 0.016964695093993582, "grad_norm": 388.0, "learning_rate": 5.652777777777778e-05, "loss": 15.0022, "step": 407 }, { "epoch": 0.01700637739152182, "grad_norm": 418.0, "learning_rate": 5.666666666666667e-05, "loss": 16.3761, "step": 408 }, { "epoch": 0.01704805968905006, "grad_norm": 350.0, "learning_rate": 5.6805555555555555e-05, "loss": 15.2511, "step": 409 }, { "epoch": 0.0170897419865783, "grad_norm": 374.0, "learning_rate": 5.6944444444444445e-05, "loss": 14.127, "step": 410 }, { "epoch": 0.01713142428410654, "grad_norm": 186.0, "learning_rate": 5.7083333333333336e-05, "loss": 9.1889, "step": 411 }, { "epoch": 0.01717310658163478, "grad_norm": 370.0, "learning_rate": 5.722222222222222e-05, "loss": 14.1263, "step": 412 }, { "epoch": 0.01721478887916302, "grad_norm": 828.0, "learning_rate": 5.736111111111111e-05, "loss": 25.2509, "step": 413 }, { "epoch": 0.017256471176691258, "grad_norm": 608.0, "learning_rate": 5.7499999999999995e-05, "loss": 19.7516, "step": 414 }, { "epoch": 0.0172981534742195, "grad_norm": 368.0, "learning_rate": 5.7638888888888886e-05, "loss": 15.1261, "step": 415 }, { "epoch": 0.01733983577174774, "grad_norm": 248.0, "learning_rate": 5.7777777777777776e-05, "loss": 11.0052, "step": 416 }, { "epoch": 0.01738151806927598, "grad_norm": 288.0, "learning_rate": 5.7916666666666674e-05, "loss": 11.7511, "step": 417 }, { "epoch": 0.01742320036680422, "grad_norm": 468.0, "learning_rate": 5.8055555555555565e-05, "loss": 17.5006, "step": 418 }, { "epoch": 0.017464882664332458, "grad_norm": 350.0, "learning_rate": 5.819444444444445e-05, "loss": 13.4405, "step": 419 }, { "epoch": 0.017506564961860697, "grad_norm": 404.0, "learning_rate": 5.833333333333334e-05, "loss": 15.4385, "step": 420 }, { "epoch": 0.017548247259388937, "grad_norm": 960.0, "learning_rate": 5.847222222222223e-05, "loss": 26.632, "step": 421 }, { "epoch": 0.017589929556917176, "grad_norm": 468.0, "learning_rate": 5.8611111111111114e-05, "loss": 17.751, "step": 422 }, { "epoch": 0.017631611854445416, "grad_norm": 696.0, "learning_rate": 5.8750000000000005e-05, "loss": 25.752, "step": 423 }, { "epoch": 0.01767329415197366, "grad_norm": 276.0, "learning_rate": 5.8888888888888896e-05, "loss": 11.564, "step": 424 }, { "epoch": 0.017714976449501898, "grad_norm": 504.0, "learning_rate": 5.902777777777778e-05, "loss": 15.8804, "step": 425 }, { "epoch": 0.017756658747030137, "grad_norm": 464.0, "learning_rate": 5.916666666666667e-05, "loss": 14.4402, "step": 426 }, { "epoch": 0.017798341044558377, "grad_norm": 684.0, "learning_rate": 5.930555555555556e-05, "loss": 22.1258, "step": 427 }, { "epoch": 0.017840023342086616, "grad_norm": 462.0, "learning_rate": 5.9444444444444445e-05, "loss": 16.3761, "step": 428 }, { "epoch": 0.017881705639614855, "grad_norm": 320.0, "learning_rate": 5.9583333333333336e-05, "loss": 12.7511, "step": 429 }, { "epoch": 0.017923387937143095, "grad_norm": 462.0, "learning_rate": 5.972222222222223e-05, "loss": 15.4393, "step": 430 }, { "epoch": 0.017965070234671334, "grad_norm": 326.0, "learning_rate": 5.986111111111111e-05, "loss": 13.0014, "step": 431 }, { "epoch": 0.018006752532199573, "grad_norm": 278.0, "learning_rate": 6e-05, "loss": 12.4392, "step": 432 }, { "epoch": 0.018048434829727816, "grad_norm": 404.0, "learning_rate": 6.013888888888889e-05, "loss": 13.3139, "step": 433 }, { "epoch": 0.018090117127256056, "grad_norm": 584.0, "learning_rate": 6.0277777777777776e-05, "loss": 19.6259, "step": 434 }, { "epoch": 0.018131799424784295, "grad_norm": 328.0, "learning_rate": 6.041666666666667e-05, "loss": 12.6272, "step": 435 }, { "epoch": 0.018173481722312534, "grad_norm": 142.0, "learning_rate": 6.055555555555555e-05, "loss": 8.3776, "step": 436 }, { "epoch": 0.018215164019840774, "grad_norm": 600.0, "learning_rate": 6.069444444444444e-05, "loss": 20.0029, "step": 437 }, { "epoch": 0.018256846317369013, "grad_norm": 560.0, "learning_rate": 6.083333333333333e-05, "loss": 19.001, "step": 438 }, { "epoch": 0.018298528614897253, "grad_norm": 494.0, "learning_rate": 6.097222222222223e-05, "loss": 15.2508, "step": 439 }, { "epoch": 0.018340210912425492, "grad_norm": 147.0, "learning_rate": 6.111111111111112e-05, "loss": 7.6587, "step": 440 }, { "epoch": 0.01838189320995373, "grad_norm": 332.0, "learning_rate": 6.125000000000001e-05, "loss": 13.1881, "step": 441 }, { "epoch": 0.01842357550748197, "grad_norm": 652.0, "learning_rate": 6.13888888888889e-05, "loss": 19.3818, "step": 442 }, { "epoch": 0.018465257805010214, "grad_norm": 740.0, "learning_rate": 6.152777777777778e-05, "loss": 24.0007, "step": 443 }, { "epoch": 0.018506940102538453, "grad_norm": 404.0, "learning_rate": 6.166666666666667e-05, "loss": 14.9387, "step": 444 }, { "epoch": 0.018548622400066692, "grad_norm": 334.0, "learning_rate": 6.180555555555556e-05, "loss": 12.4396, "step": 445 }, { "epoch": 0.01859030469759493, "grad_norm": 412.0, "learning_rate": 6.194444444444445e-05, "loss": 15.4396, "step": 446 }, { "epoch": 0.01863198699512317, "grad_norm": 410.0, "learning_rate": 6.208333333333334e-05, "loss": 14.2508, "step": 447 }, { "epoch": 0.01867366929265141, "grad_norm": 336.0, "learning_rate": 6.222222222222222e-05, "loss": 12.5053, "step": 448 }, { "epoch": 0.01871535159017965, "grad_norm": 580.0, "learning_rate": 6.236111111111111e-05, "loss": 18.5006, "step": 449 }, { "epoch": 0.01875703388770789, "grad_norm": 980.0, "learning_rate": 6.25e-05, "loss": 25.8824, "step": 450 }, { "epoch": 0.01879871618523613, "grad_norm": 478.0, "learning_rate": 6.263888888888889e-05, "loss": 15.3763, "step": 451 }, { "epoch": 0.01884039848276437, "grad_norm": 696.0, "learning_rate": 6.277777777777778e-05, "loss": 20.8807, "step": 452 }, { "epoch": 0.01888208078029261, "grad_norm": 512.0, "learning_rate": 6.291666666666667e-05, "loss": 17.7519, "step": 453 }, { "epoch": 0.01892376307782085, "grad_norm": 185.0, "learning_rate": 6.305555555555555e-05, "loss": 9.4392, "step": 454 }, { "epoch": 0.01896544537534909, "grad_norm": 102.0, "learning_rate": 6.319444444444444e-05, "loss": 6.7823, "step": 455 }, { "epoch": 0.01900712767287733, "grad_norm": 828.0, "learning_rate": 6.333333333333333e-05, "loss": 19.1357, "step": 456 }, { "epoch": 0.01904880997040557, "grad_norm": 342.0, "learning_rate": 6.347222222222222e-05, "loss": 13.9397, "step": 457 }, { "epoch": 0.019090492267933808, "grad_norm": 197.0, "learning_rate": 6.361111111111111e-05, "loss": 8.8762, "step": 458 }, { "epoch": 0.019132174565462047, "grad_norm": 1256.0, "learning_rate": 6.375e-05, "loss": 29.8807, "step": 459 }, { "epoch": 0.019173856862990286, "grad_norm": 660.0, "learning_rate": 6.388888888888888e-05, "loss": 23.3768, "step": 460 }, { "epoch": 0.01921553916051853, "grad_norm": 249.0, "learning_rate": 6.402777777777777e-05, "loss": 11.0638, "step": 461 }, { "epoch": 0.01925722145804677, "grad_norm": 290.0, "learning_rate": 6.416666666666668e-05, "loss": 10.9386, "step": 462 }, { "epoch": 0.019298903755575008, "grad_norm": 596.0, "learning_rate": 6.430555555555557e-05, "loss": 22.1296, "step": 463 }, { "epoch": 0.019340586053103247, "grad_norm": 472.0, "learning_rate": 6.444444444444446e-05, "loss": 17.0024, "step": 464 }, { "epoch": 0.019382268350631487, "grad_norm": 133.0, "learning_rate": 6.458333333333334e-05, "loss": 8.4396, "step": 465 }, { "epoch": 0.019423950648159726, "grad_norm": 191.0, "learning_rate": 6.472222222222223e-05, "loss": 9.6265, "step": 466 }, { "epoch": 0.019465632945687966, "grad_norm": 712.0, "learning_rate": 6.486111111111112e-05, "loss": 20.7546, "step": 467 }, { "epoch": 0.019507315243216205, "grad_norm": 470.0, "learning_rate": 6.500000000000001e-05, "loss": 16.1264, "step": 468 }, { "epoch": 0.019548997540744444, "grad_norm": 278.0, "learning_rate": 6.51388888888889e-05, "loss": 10.4403, "step": 469 }, { "epoch": 0.019590679838272687, "grad_norm": 740.0, "learning_rate": 6.527777777777778e-05, "loss": 24.2509, "step": 470 }, { "epoch": 0.019632362135800926, "grad_norm": 350.0, "learning_rate": 6.541666666666667e-05, "loss": 15.0019, "step": 471 }, { "epoch": 0.019674044433329166, "grad_norm": 149.0, "learning_rate": 6.555555555555556e-05, "loss": 8.3143, "step": 472 }, { "epoch": 0.019715726730857405, "grad_norm": 382.0, "learning_rate": 6.569444444444445e-05, "loss": 15.5007, "step": 473 }, { "epoch": 0.019757409028385645, "grad_norm": 344.0, "learning_rate": 6.583333333333334e-05, "loss": 14.6895, "step": 474 }, { "epoch": 0.019799091325913884, "grad_norm": 712.0, "learning_rate": 6.597222222222223e-05, "loss": 20.3764, "step": 475 }, { "epoch": 0.019840773623442123, "grad_norm": 221.0, "learning_rate": 6.611111111111111e-05, "loss": 10.3132, "step": 476 }, { "epoch": 0.019882455920970363, "grad_norm": 175.0, "learning_rate": 6.625e-05, "loss": 8.8771, "step": 477 }, { "epoch": 0.019924138218498602, "grad_norm": 544.0, "learning_rate": 6.638888888888889e-05, "loss": 20.1266, "step": 478 }, { "epoch": 0.019965820516026845, "grad_norm": 358.0, "learning_rate": 6.652777777777778e-05, "loss": 9.9387, "step": 479 }, { "epoch": 0.020007502813555084, "grad_norm": 478.0, "learning_rate": 6.666666666666667e-05, "loss": 16.5014, "step": 480 }, { "epoch": 0.020049185111083324, "grad_norm": 292.0, "learning_rate": 6.680555555555556e-05, "loss": 11.8145, "step": 481 }, { "epoch": 0.020090867408611563, "grad_norm": 414.0, "learning_rate": 6.694444444444444e-05, "loss": 14.6884, "step": 482 }, { "epoch": 0.020132549706139802, "grad_norm": 648.0, "learning_rate": 6.708333333333333e-05, "loss": 20.0029, "step": 483 }, { "epoch": 0.020174232003668042, "grad_norm": 392.0, "learning_rate": 6.722222222222223e-05, "loss": 14.0636, "step": 484 }, { "epoch": 0.02021591430119628, "grad_norm": 868.0, "learning_rate": 6.736111111111112e-05, "loss": 23.007, "step": 485 }, { "epoch": 0.02025759659872452, "grad_norm": 360.0, "learning_rate": 6.750000000000001e-05, "loss": 14.0636, "step": 486 }, { "epoch": 0.02029927889625276, "grad_norm": 242.0, "learning_rate": 6.763888888888889e-05, "loss": 10.6888, "step": 487 }, { "epoch": 0.020340961193781003, "grad_norm": 113.5, "learning_rate": 6.777777777777778e-05, "loss": 7.0635, "step": 488 }, { "epoch": 0.020382643491309242, "grad_norm": 490.0, "learning_rate": 6.791666666666667e-05, "loss": 17.3784, "step": 489 }, { "epoch": 0.02042432578883748, "grad_norm": 338.0, "learning_rate": 6.805555555555556e-05, "loss": 12.8758, "step": 490 }, { "epoch": 0.02046600808636572, "grad_norm": 466.0, "learning_rate": 6.819444444444445e-05, "loss": 17.0006, "step": 491 }, { "epoch": 0.02050769038389396, "grad_norm": 159.0, "learning_rate": 6.833333333333333e-05, "loss": 8.9394, "step": 492 }, { "epoch": 0.0205493726814222, "grad_norm": 173.0, "learning_rate": 6.847222222222222e-05, "loss": 8.3766, "step": 493 }, { "epoch": 0.02059105497895044, "grad_norm": 308.0, "learning_rate": 6.861111111111111e-05, "loss": 12.3132, "step": 494 }, { "epoch": 0.02063273727647868, "grad_norm": 524.0, "learning_rate": 6.875e-05, "loss": 18.5009, "step": 495 }, { "epoch": 0.020674419574006918, "grad_norm": 235.0, "learning_rate": 6.88888888888889e-05, "loss": 10.19, "step": 496 }, { "epoch": 0.02071610187153516, "grad_norm": 338.0, "learning_rate": 6.902777777777779e-05, "loss": 14.3773, "step": 497 }, { "epoch": 0.0207577841690634, "grad_norm": 1320.0, "learning_rate": 6.916666666666666e-05, "loss": 40.5038, "step": 498 }, { "epoch": 0.02079946646659164, "grad_norm": 334.0, "learning_rate": 6.930555555555555e-05, "loss": 13.8758, "step": 499 }, { "epoch": 0.02084114876411988, "grad_norm": 340.0, "learning_rate": 6.944444444444444e-05, "loss": 14.0018, "step": 500 }, { "epoch": 0.020882831061648118, "grad_norm": 358.0, "learning_rate": 6.958333333333334e-05, "loss": 14.1279, "step": 501 }, { "epoch": 0.020924513359176358, "grad_norm": 292.0, "learning_rate": 6.972222222222223e-05, "loss": 12.2565, "step": 502 }, { "epoch": 0.020966195656704597, "grad_norm": 376.0, "learning_rate": 6.986111111111112e-05, "loss": 13.0012, "step": 503 }, { "epoch": 0.021007877954232836, "grad_norm": 117.0, "learning_rate": 7e-05, "loss": 7.4702, "step": 504 }, { "epoch": 0.021049560251761076, "grad_norm": 756.0, "learning_rate": 7.013888888888888e-05, "loss": 22.8783, "step": 505 }, { "epoch": 0.02109124254928932, "grad_norm": 219.0, "learning_rate": 7.027777777777778e-05, "loss": 10.1882, "step": 506 }, { "epoch": 0.021132924846817558, "grad_norm": 512.0, "learning_rate": 7.041666666666668e-05, "loss": 15.3778, "step": 507 }, { "epoch": 0.021174607144345797, "grad_norm": 124.0, "learning_rate": 7.055555555555556e-05, "loss": 8.0642, "step": 508 }, { "epoch": 0.021216289441874037, "grad_norm": 500.0, "learning_rate": 7.069444444444445e-05, "loss": 17.0018, "step": 509 }, { "epoch": 0.021257971739402276, "grad_norm": 302.0, "learning_rate": 7.083333333333334e-05, "loss": 12.7509, "step": 510 }, { "epoch": 0.021299654036930515, "grad_norm": 488.0, "learning_rate": 7.097222222222223e-05, "loss": 17.5034, "step": 511 }, { "epoch": 0.021341336334458755, "grad_norm": 520.0, "learning_rate": 7.111111111111112e-05, "loss": 19.6272, "step": 512 }, { "epoch": 0.021383018631986994, "grad_norm": 286.0, "learning_rate": 7.125000000000001e-05, "loss": 12.3759, "step": 513 }, { "epoch": 0.021424700929515234, "grad_norm": 612.0, "learning_rate": 7.138888888888889e-05, "loss": 20.626, "step": 514 }, { "epoch": 0.021466383227043473, "grad_norm": 322.0, "learning_rate": 7.152777777777778e-05, "loss": 13.0032, "step": 515 }, { "epoch": 0.021508065524571716, "grad_norm": 470.0, "learning_rate": 7.166666666666667e-05, "loss": 16.8779, "step": 516 }, { "epoch": 0.021549747822099955, "grad_norm": 512.0, "learning_rate": 7.180555555555556e-05, "loss": 17.0019, "step": 517 }, { "epoch": 0.021591430119628195, "grad_norm": 348.0, "learning_rate": 7.194444444444445e-05, "loss": 15.0633, "step": 518 }, { "epoch": 0.021633112417156434, "grad_norm": 1064.0, "learning_rate": 7.208333333333334e-05, "loss": 31.3757, "step": 519 }, { "epoch": 0.021674794714684673, "grad_norm": 1128.0, "learning_rate": 7.222222222222222e-05, "loss": 28.7583, "step": 520 }, { "epoch": 0.021716477012212913, "grad_norm": 225.0, "learning_rate": 7.236111111111111e-05, "loss": 11.3761, "step": 521 }, { "epoch": 0.021758159309741152, "grad_norm": 151.0, "learning_rate": 7.25e-05, "loss": 8.4412, "step": 522 }, { "epoch": 0.02179984160726939, "grad_norm": 464.0, "learning_rate": 7.263888888888889e-05, "loss": 15.1891, "step": 523 }, { "epoch": 0.02184152390479763, "grad_norm": 174.0, "learning_rate": 7.277777777777778e-05, "loss": 8.9412, "step": 524 }, { "epoch": 0.021883206202325874, "grad_norm": 700.0, "learning_rate": 7.291666666666667e-05, "loss": 22.7512, "step": 525 }, { "epoch": 0.021924888499854113, "grad_norm": 354.0, "learning_rate": 7.305555555555555e-05, "loss": 13.1892, "step": 526 }, { "epoch": 0.021966570797382352, "grad_norm": 280.0, "learning_rate": 7.319444444444444e-05, "loss": 11.1885, "step": 527 }, { "epoch": 0.02200825309491059, "grad_norm": 300.0, "learning_rate": 7.333333333333333e-05, "loss": 13.3133, "step": 528 }, { "epoch": 0.02204993539243883, "grad_norm": 201.0, "learning_rate": 7.347222222222224e-05, "loss": 9.5011, "step": 529 }, { "epoch": 0.02209161768996707, "grad_norm": 448.0, "learning_rate": 7.361111111111111e-05, "loss": 17.5013, "step": 530 }, { "epoch": 0.02213329998749531, "grad_norm": 446.0, "learning_rate": 7.375e-05, "loss": 15.813, "step": 531 }, { "epoch": 0.02217498228502355, "grad_norm": 712.0, "learning_rate": 7.38888888888889e-05, "loss": 20.7578, "step": 532 }, { "epoch": 0.02221666458255179, "grad_norm": 328.0, "learning_rate": 7.402777777777779e-05, "loss": 11.9392, "step": 533 }, { "epoch": 0.02225834688008003, "grad_norm": 227.0, "learning_rate": 7.416666666666668e-05, "loss": 10.8758, "step": 534 }, { "epoch": 0.02230002917760827, "grad_norm": 240.0, "learning_rate": 7.430555555555557e-05, "loss": 11.8762, "step": 535 }, { "epoch": 0.02234171147513651, "grad_norm": 576.0, "learning_rate": 7.444444444444444e-05, "loss": 18.0022, "step": 536 }, { "epoch": 0.02238339377266475, "grad_norm": 696.0, "learning_rate": 7.458333333333333e-05, "loss": 20.1263, "step": 537 }, { "epoch": 0.02242507607019299, "grad_norm": 386.0, "learning_rate": 7.472222222222223e-05, "loss": 13.814, "step": 538 }, { "epoch": 0.02246675836772123, "grad_norm": 334.0, "learning_rate": 7.486111111111112e-05, "loss": 10.8776, "step": 539 }, { "epoch": 0.022508440665249468, "grad_norm": 266.0, "learning_rate": 7.500000000000001e-05, "loss": 10.6257, "step": 540 }, { "epoch": 0.022550122962777707, "grad_norm": 358.0, "learning_rate": 7.51388888888889e-05, "loss": 12.0647, "step": 541 }, { "epoch": 0.022591805260305946, "grad_norm": 346.0, "learning_rate": 7.527777777777777e-05, "loss": 13.6896, "step": 542 }, { "epoch": 0.02263348755783419, "grad_norm": 372.0, "learning_rate": 7.541666666666667e-05, "loss": 14.8763, "step": 543 }, { "epoch": 0.02267516985536243, "grad_norm": 380.0, "learning_rate": 7.555555555555556e-05, "loss": 13.5009, "step": 544 }, { "epoch": 0.022716852152890668, "grad_norm": 302.0, "learning_rate": 7.569444444444445e-05, "loss": 9.688, "step": 545 }, { "epoch": 0.022758534450418907, "grad_norm": 382.0, "learning_rate": 7.583333333333334e-05, "loss": 14.3131, "step": 546 }, { "epoch": 0.022800216747947147, "grad_norm": 436.0, "learning_rate": 7.597222222222223e-05, "loss": 12.6943, "step": 547 }, { "epoch": 0.022841899045475386, "grad_norm": 1160.0, "learning_rate": 7.61111111111111e-05, "loss": 30.0016, "step": 548 }, { "epoch": 0.022883581343003626, "grad_norm": 528.0, "learning_rate": 7.625e-05, "loss": 16.0058, "step": 549 }, { "epoch": 0.022925263640531865, "grad_norm": 436.0, "learning_rate": 7.638888888888889e-05, "loss": 15.0634, "step": 550 }, { "epoch": 0.022966945938060104, "grad_norm": 404.0, "learning_rate": 7.652777777777778e-05, "loss": 15.3143, "step": 551 }, { "epoch": 0.023008628235588347, "grad_norm": 260.0, "learning_rate": 7.666666666666667e-05, "loss": 11.2511, "step": 552 }, { "epoch": 0.023050310533116587, "grad_norm": 264.0, "learning_rate": 7.680555555555556e-05, "loss": 10.8138, "step": 553 }, { "epoch": 0.023091992830644826, "grad_norm": 137.0, "learning_rate": 7.694444444444445e-05, "loss": 8.3136, "step": 554 }, { "epoch": 0.023133675128173065, "grad_norm": 510.0, "learning_rate": 7.708333333333334e-05, "loss": 15.4389, "step": 555 }, { "epoch": 0.023175357425701305, "grad_norm": 548.0, "learning_rate": 7.722222222222223e-05, "loss": 14.8196, "step": 556 }, { "epoch": 0.023217039723229544, "grad_norm": 516.0, "learning_rate": 7.736111111111112e-05, "loss": 16.5012, "step": 557 }, { "epoch": 0.023258722020757783, "grad_norm": 936.0, "learning_rate": 7.75e-05, "loss": 23.0024, "step": 558 }, { "epoch": 0.023300404318286023, "grad_norm": 330.0, "learning_rate": 7.763888888888889e-05, "loss": 12.9384, "step": 559 }, { "epoch": 0.023342086615814262, "grad_norm": 394.0, "learning_rate": 7.777777777777778e-05, "loss": 14.0006, "step": 560 }, { "epoch": 0.023383768913342505, "grad_norm": 288.0, "learning_rate": 7.791666666666667e-05, "loss": 12.3769, "step": 561 }, { "epoch": 0.023425451210870744, "grad_norm": 724.0, "learning_rate": 7.805555555555556e-05, "loss": 18.5063, "step": 562 }, { "epoch": 0.023467133508398984, "grad_norm": 358.0, "learning_rate": 7.819444444444445e-05, "loss": 13.7513, "step": 563 }, { "epoch": 0.023508815805927223, "grad_norm": 270.0, "learning_rate": 7.833333333333333e-05, "loss": 11.6893, "step": 564 }, { "epoch": 0.023550498103455463, "grad_norm": 340.0, "learning_rate": 7.847222222222222e-05, "loss": 13.6261, "step": 565 }, { "epoch": 0.023592180400983702, "grad_norm": 340.0, "learning_rate": 7.861111111111111e-05, "loss": 14.1261, "step": 566 }, { "epoch": 0.02363386269851194, "grad_norm": 438.0, "learning_rate": 7.875e-05, "loss": 15.3754, "step": 567 }, { "epoch": 0.02367554499604018, "grad_norm": 848.0, "learning_rate": 7.88888888888889e-05, "loss": 24.3771, "step": 568 }, { "epoch": 0.02371722729356842, "grad_norm": 516.0, "learning_rate": 7.902777777777778e-05, "loss": 16.0022, "step": 569 }, { "epoch": 0.023758909591096663, "grad_norm": 164.0, "learning_rate": 7.916666666666666e-05, "loss": 7.5028, "step": 570 }, { "epoch": 0.023800591888624902, "grad_norm": 75.0, "learning_rate": 7.930555555555555e-05, "loss": 6.4694, "step": 571 }, { "epoch": 0.02384227418615314, "grad_norm": 428.0, "learning_rate": 7.944444444444444e-05, "loss": 14.6284, "step": 572 }, { "epoch": 0.02388395648368138, "grad_norm": 864.0, "learning_rate": 7.958333333333333e-05, "loss": 21.631, "step": 573 }, { "epoch": 0.02392563878120962, "grad_norm": 1056.0, "learning_rate": 7.972222222222223e-05, "loss": 27.8822, "step": 574 }, { "epoch": 0.02396732107873786, "grad_norm": 396.0, "learning_rate": 7.986111111111112e-05, "loss": 12.0009, "step": 575 }, { "epoch": 0.0240090033762661, "grad_norm": 1048.0, "learning_rate": 8e-05, "loss": 26.0028, "step": 576 }, { "epoch": 0.02405068567379434, "grad_norm": 220.0, "learning_rate": 8.01388888888889e-05, "loss": 10.6882, "step": 577 }, { "epoch": 0.024092367971322578, "grad_norm": 268.0, "learning_rate": 8.027777777777779e-05, "loss": 11.3773, "step": 578 }, { "epoch": 0.02413405026885082, "grad_norm": 454.0, "learning_rate": 8.041666666666668e-05, "loss": 15.4401, "step": 579 }, { "epoch": 0.02417573256637906, "grad_norm": 1088.0, "learning_rate": 8.055555555555556e-05, "loss": 28.5076, "step": 580 }, { "epoch": 0.0242174148639073, "grad_norm": 243.0, "learning_rate": 8.069444444444445e-05, "loss": 9.7525, "step": 581 }, { "epoch": 0.02425909716143554, "grad_norm": 276.0, "learning_rate": 8.083333333333334e-05, "loss": 12.1272, "step": 582 }, { "epoch": 0.024300779458963778, "grad_norm": 382.0, "learning_rate": 8.097222222222223e-05, "loss": 13.3832, "step": 583 }, { "epoch": 0.024342461756492018, "grad_norm": 564.0, "learning_rate": 8.111111111111112e-05, "loss": 18.0014, "step": 584 }, { "epoch": 0.024384144054020257, "grad_norm": 284.0, "learning_rate": 8.125000000000001e-05, "loss": 12.5022, "step": 585 }, { "epoch": 0.024425826351548496, "grad_norm": 568.0, "learning_rate": 8.138888888888889e-05, "loss": 19.3811, "step": 586 }, { "epoch": 0.024467508649076736, "grad_norm": 438.0, "learning_rate": 8.152777777777778e-05, "loss": 16.8755, "step": 587 }, { "epoch": 0.02450919094660498, "grad_norm": 184.0, "learning_rate": 8.166666666666667e-05, "loss": 9.876, "step": 588 }, { "epoch": 0.024550873244133218, "grad_norm": 568.0, "learning_rate": 8.180555555555556e-05, "loss": 13.319, "step": 589 }, { "epoch": 0.024592555541661457, "grad_norm": 536.0, "learning_rate": 8.194444444444445e-05, "loss": 17.3758, "step": 590 }, { "epoch": 0.024634237839189697, "grad_norm": 394.0, "learning_rate": 8.208333333333334e-05, "loss": 15.001, "step": 591 }, { "epoch": 0.024675920136717936, "grad_norm": 544.0, "learning_rate": 8.222222222222222e-05, "loss": 19.1259, "step": 592 }, { "epoch": 0.024717602434246175, "grad_norm": 306.0, "learning_rate": 8.236111111111111e-05, "loss": 13.9392, "step": 593 }, { "epoch": 0.024759284731774415, "grad_norm": 240.0, "learning_rate": 8.25e-05, "loss": 9.9397, "step": 594 }, { "epoch": 0.024800967029302654, "grad_norm": 352.0, "learning_rate": 8.263888888888889e-05, "loss": 13.5015, "step": 595 }, { "epoch": 0.024842649326830894, "grad_norm": 384.0, "learning_rate": 8.277777777777778e-05, "loss": 14.7507, "step": 596 }, { "epoch": 0.024884331624359133, "grad_norm": 402.0, "learning_rate": 8.291666666666667e-05, "loss": 13.8771, "step": 597 }, { "epoch": 0.024926013921887376, "grad_norm": 328.0, "learning_rate": 8.305555555555556e-05, "loss": 14.4387, "step": 598 }, { "epoch": 0.024967696219415615, "grad_norm": 560.0, "learning_rate": 8.319444444444445e-05, "loss": 18.8759, "step": 599 }, { "epoch": 0.025009378516943855, "grad_norm": 552.0, "learning_rate": 8.333333333333334e-05, "loss": 20.0009, "step": 600 }, { "epoch": 0.025051060814472094, "grad_norm": 406.0, "learning_rate": 8.347222222222223e-05, "loss": 14.5008, "step": 601 }, { "epoch": 0.025092743112000333, "grad_norm": 290.0, "learning_rate": 8.361111111111111e-05, "loss": 8.6279, "step": 602 }, { "epoch": 0.025134425409528573, "grad_norm": 134.0, "learning_rate": 8.375e-05, "loss": 8.5026, "step": 603 }, { "epoch": 0.025176107707056812, "grad_norm": 158.0, "learning_rate": 8.38888888888889e-05, "loss": 9.3763, "step": 604 }, { "epoch": 0.02521779000458505, "grad_norm": 672.0, "learning_rate": 8.402777777777778e-05, "loss": 18.752, "step": 605 }, { "epoch": 0.02525947230211329, "grad_norm": 284.0, "learning_rate": 8.416666666666668e-05, "loss": 12.0025, "step": 606 }, { "epoch": 0.025301154599641534, "grad_norm": 210.0, "learning_rate": 8.430555555555557e-05, "loss": 10.6885, "step": 607 }, { "epoch": 0.025342836897169773, "grad_norm": 564.0, "learning_rate": 8.444444444444444e-05, "loss": 17.126, "step": 608 }, { "epoch": 0.025384519194698012, "grad_norm": 660.0, "learning_rate": 8.458333333333333e-05, "loss": 18.5065, "step": 609 }, { "epoch": 0.025426201492226252, "grad_norm": 334.0, "learning_rate": 8.472222222222222e-05, "loss": 12.9397, "step": 610 }, { "epoch": 0.02546788378975449, "grad_norm": 544.0, "learning_rate": 8.486111111111112e-05, "loss": 17.3766, "step": 611 }, { "epoch": 0.02550956608728273, "grad_norm": 141.0, "learning_rate": 8.5e-05, "loss": 8.2517, "step": 612 }, { "epoch": 0.02555124838481097, "grad_norm": 510.0, "learning_rate": 8.51388888888889e-05, "loss": 17.1257, "step": 613 }, { "epoch": 0.02559293068233921, "grad_norm": 314.0, "learning_rate": 8.527777777777777e-05, "loss": 13.3133, "step": 614 }, { "epoch": 0.02563461297986745, "grad_norm": 360.0, "learning_rate": 8.541666666666666e-05, "loss": 12.0648, "step": 615 }, { "epoch": 0.02567629527739569, "grad_norm": 600.0, "learning_rate": 8.555555555555556e-05, "loss": 19.5007, "step": 616 }, { "epoch": 0.02571797757492393, "grad_norm": 248.0, "learning_rate": 8.569444444444445e-05, "loss": 11.0019, "step": 617 }, { "epoch": 0.02575965987245217, "grad_norm": 386.0, "learning_rate": 8.583333333333334e-05, "loss": 14.063, "step": 618 }, { "epoch": 0.02580134216998041, "grad_norm": 156.0, "learning_rate": 8.597222222222223e-05, "loss": 9.3151, "step": 619 }, { "epoch": 0.02584302446750865, "grad_norm": 124.0, "learning_rate": 8.611111111111112e-05, "loss": 7.9702, "step": 620 }, { "epoch": 0.02588470676503689, "grad_norm": 478.0, "learning_rate": 8.625000000000001e-05, "loss": 16.7516, "step": 621 }, { "epoch": 0.025926389062565128, "grad_norm": 532.0, "learning_rate": 8.63888888888889e-05, "loss": 16.8758, "step": 622 }, { "epoch": 0.025968071360093367, "grad_norm": 370.0, "learning_rate": 8.652777777777779e-05, "loss": 14.7508, "step": 623 }, { "epoch": 0.026009753657621607, "grad_norm": 220.0, "learning_rate": 8.666666666666667e-05, "loss": 10.0032, "step": 624 }, { "epoch": 0.02605143595514985, "grad_norm": 440.0, "learning_rate": 8.680555555555556e-05, "loss": 16.7514, "step": 625 }, { "epoch": 0.02609311825267809, "grad_norm": 302.0, "learning_rate": 8.694444444444445e-05, "loss": 12.5652, "step": 626 }, { "epoch": 0.026134800550206328, "grad_norm": 210.0, "learning_rate": 8.708333333333334e-05, "loss": 7.1896, "step": 627 }, { "epoch": 0.026176482847734567, "grad_norm": 612.0, "learning_rate": 8.722222222222223e-05, "loss": 20.1299, "step": 628 }, { "epoch": 0.026218165145262807, "grad_norm": 430.0, "learning_rate": 8.736111111111112e-05, "loss": 14.5636, "step": 629 }, { "epoch": 0.026259847442791046, "grad_norm": 274.0, "learning_rate": 8.75e-05, "loss": 11.439, "step": 630 }, { "epoch": 0.026301529740319286, "grad_norm": 504.0, "learning_rate": 8.763888888888889e-05, "loss": 18.1258, "step": 631 }, { "epoch": 0.026343212037847525, "grad_norm": 292.0, "learning_rate": 8.777777777777778e-05, "loss": 11.5013, "step": 632 }, { "epoch": 0.026384894335375764, "grad_norm": 344.0, "learning_rate": 8.791666666666667e-05, "loss": 12.6887, "step": 633 }, { "epoch": 0.026426576632904007, "grad_norm": 106.0, "learning_rate": 8.805555555555556e-05, "loss": 6.5018, "step": 634 }, { "epoch": 0.026468258930432247, "grad_norm": 516.0, "learning_rate": 8.819444444444445e-05, "loss": 18.3762, "step": 635 }, { "epoch": 0.026509941227960486, "grad_norm": 968.0, "learning_rate": 8.833333333333333e-05, "loss": 28.1274, "step": 636 }, { "epoch": 0.026551623525488725, "grad_norm": 448.0, "learning_rate": 8.847222222222222e-05, "loss": 14.5021, "step": 637 }, { "epoch": 0.026593305823016965, "grad_norm": 145.0, "learning_rate": 8.861111111111111e-05, "loss": 8.8142, "step": 638 }, { "epoch": 0.026634988120545204, "grad_norm": 179.0, "learning_rate": 8.875e-05, "loss": 8.5013, "step": 639 }, { "epoch": 0.026676670418073443, "grad_norm": 680.0, "learning_rate": 8.888888888888889e-05, "loss": 20.6267, "step": 640 }, { "epoch": 0.026718352715601683, "grad_norm": 446.0, "learning_rate": 8.902777777777777e-05, "loss": 16.5019, "step": 641 }, { "epoch": 0.026760035013129922, "grad_norm": 406.0, "learning_rate": 8.916666666666667e-05, "loss": 14.3757, "step": 642 }, { "epoch": 0.026801717310658165, "grad_norm": 452.0, "learning_rate": 8.930555555555557e-05, "loss": 16.0006, "step": 643 }, { "epoch": 0.026843399608186404, "grad_norm": 466.0, "learning_rate": 8.944444444444446e-05, "loss": 15.8758, "step": 644 }, { "epoch": 0.026885081905714644, "grad_norm": 215.0, "learning_rate": 8.958333333333335e-05, "loss": 10.189, "step": 645 }, { "epoch": 0.026926764203242883, "grad_norm": 1072.0, "learning_rate": 8.972222222222222e-05, "loss": 29.2557, "step": 646 }, { "epoch": 0.026968446500771123, "grad_norm": 426.0, "learning_rate": 8.986111111111111e-05, "loss": 14.1885, "step": 647 }, { "epoch": 0.027010128798299362, "grad_norm": 556.0, "learning_rate": 9e-05, "loss": 17.7512, "step": 648 }, { "epoch": 0.0270518110958276, "grad_norm": 72.5, "learning_rate": 9.01388888888889e-05, "loss": 5.222, "step": 649 }, { "epoch": 0.02709349339335584, "grad_norm": 446.0, "learning_rate": 9.027777777777779e-05, "loss": 15.8765, "step": 650 }, { "epoch": 0.02713517569088408, "grad_norm": 302.0, "learning_rate": 9.041666666666668e-05, "loss": 11.1886, "step": 651 }, { "epoch": 0.027176857988412323, "grad_norm": 318.0, "learning_rate": 9.055555555555556e-05, "loss": 13.1883, "step": 652 }, { "epoch": 0.027218540285940562, "grad_norm": 704.0, "learning_rate": 9.069444444444445e-05, "loss": 18.5043, "step": 653 }, { "epoch": 0.0272602225834688, "grad_norm": 266.0, "learning_rate": 9.083333333333334e-05, "loss": 11.8152, "step": 654 }, { "epoch": 0.02730190488099704, "grad_norm": 298.0, "learning_rate": 9.097222222222223e-05, "loss": 12.5008, "step": 655 }, { "epoch": 0.02734358717852528, "grad_norm": 229.0, "learning_rate": 9.111111111111112e-05, "loss": 10.1888, "step": 656 }, { "epoch": 0.02738526947605352, "grad_norm": 128.0, "learning_rate": 9.125e-05, "loss": 7.972, "step": 657 }, { "epoch": 0.02742695177358176, "grad_norm": 362.0, "learning_rate": 9.138888888888889e-05, "loss": 14.0667, "step": 658 }, { "epoch": 0.02746863407111, "grad_norm": 406.0, "learning_rate": 9.152777777777778e-05, "loss": 14.0646, "step": 659 }, { "epoch": 0.027510316368638238, "grad_norm": 676.0, "learning_rate": 9.166666666666667e-05, "loss": 21.8777, "step": 660 }, { "epoch": 0.02755199866616648, "grad_norm": 420.0, "learning_rate": 9.180555555555556e-05, "loss": 14.6275, "step": 661 }, { "epoch": 0.02759368096369472, "grad_norm": 109.0, "learning_rate": 9.194444444444445e-05, "loss": 8.6901, "step": 662 }, { "epoch": 0.02763536326122296, "grad_norm": 428.0, "learning_rate": 9.208333333333333e-05, "loss": 15.0668, "step": 663 }, { "epoch": 0.0276770455587512, "grad_norm": 596.0, "learning_rate": 9.222222222222223e-05, "loss": 20.5007, "step": 664 }, { "epoch": 0.02771872785627944, "grad_norm": 326.0, "learning_rate": 9.236111111111112e-05, "loss": 13.3777, "step": 665 }, { "epoch": 0.027760410153807678, "grad_norm": 384.0, "learning_rate": 9.250000000000001e-05, "loss": 14.9382, "step": 666 }, { "epoch": 0.027802092451335917, "grad_norm": 656.0, "learning_rate": 9.26388888888889e-05, "loss": 20.501, "step": 667 }, { "epoch": 0.027843774748864156, "grad_norm": 328.0, "learning_rate": 9.277777777777778e-05, "loss": 13.8136, "step": 668 }, { "epoch": 0.027885457046392396, "grad_norm": 540.0, "learning_rate": 9.291666666666667e-05, "loss": 19.2518, "step": 669 }, { "epoch": 0.02792713934392064, "grad_norm": 412.0, "learning_rate": 9.305555555555556e-05, "loss": 14.9384, "step": 670 }, { "epoch": 0.027968821641448878, "grad_norm": 338.0, "learning_rate": 9.319444444444445e-05, "loss": 11.7509, "step": 671 }, { "epoch": 0.028010503938977117, "grad_norm": 568.0, "learning_rate": 9.333333333333334e-05, "loss": 15.8841, "step": 672 }, { "epoch": 0.028052186236505357, "grad_norm": 272.0, "learning_rate": 9.347222222222223e-05, "loss": 11.6261, "step": 673 }, { "epoch": 0.028093868534033596, "grad_norm": 304.0, "learning_rate": 9.361111111111111e-05, "loss": 12.6911, "step": 674 }, { "epoch": 0.028135550831561836, "grad_norm": 322.0, "learning_rate": 9.375e-05, "loss": 13.5632, "step": 675 }, { "epoch": 0.028177233129090075, "grad_norm": 378.0, "learning_rate": 9.388888888888889e-05, "loss": 14.5025, "step": 676 }, { "epoch": 0.028218915426618314, "grad_norm": 510.0, "learning_rate": 9.402777777777778e-05, "loss": 17.7532, "step": 677 }, { "epoch": 0.028260597724146554, "grad_norm": 482.0, "learning_rate": 9.416666666666667e-05, "loss": 17.2516, "step": 678 }, { "epoch": 0.028302280021674793, "grad_norm": 800.0, "learning_rate": 9.430555555555555e-05, "loss": 24.6265, "step": 679 }, { "epoch": 0.028343962319203036, "grad_norm": 728.0, "learning_rate": 9.444444444444444e-05, "loss": 22.7545, "step": 680 }, { "epoch": 0.028385644616731275, "grad_norm": 616.0, "learning_rate": 9.458333333333333e-05, "loss": 19.5023, "step": 681 }, { "epoch": 0.028427326914259515, "grad_norm": 404.0, "learning_rate": 9.472222222222222e-05, "loss": 13.2513, "step": 682 }, { "epoch": 0.028469009211787754, "grad_norm": 672.0, "learning_rate": 9.486111111111111e-05, "loss": 21.5008, "step": 683 }, { "epoch": 0.028510691509315993, "grad_norm": 127.5, "learning_rate": 9.5e-05, "loss": 7.2831, "step": 684 }, { "epoch": 0.028552373806844233, "grad_norm": 1192.0, "learning_rate": 9.513888888888888e-05, "loss": 32.0055, "step": 685 }, { "epoch": 0.028594056104372472, "grad_norm": 195.0, "learning_rate": 9.527777777777777e-05, "loss": 8.6261, "step": 686 }, { "epoch": 0.02863573840190071, "grad_norm": 306.0, "learning_rate": 9.541666666666668e-05, "loss": 13.0637, "step": 687 }, { "epoch": 0.02867742069942895, "grad_norm": 576.0, "learning_rate": 9.555555555555557e-05, "loss": 16.6327, "step": 688 }, { "epoch": 0.028719102996957194, "grad_norm": 330.0, "learning_rate": 9.569444444444446e-05, "loss": 13.4384, "step": 689 }, { "epoch": 0.028760785294485433, "grad_norm": 724.0, "learning_rate": 9.583333333333334e-05, "loss": 22.8765, "step": 690 }, { "epoch": 0.028802467592013672, "grad_norm": 324.0, "learning_rate": 9.597222222222223e-05, "loss": 14.4389, "step": 691 }, { "epoch": 0.028844149889541912, "grad_norm": 688.0, "learning_rate": 9.611111111111112e-05, "loss": 22.2514, "step": 692 }, { "epoch": 0.02888583218707015, "grad_norm": 498.0, "learning_rate": 9.625000000000001e-05, "loss": 15.8134, "step": 693 }, { "epoch": 0.02892751448459839, "grad_norm": 448.0, "learning_rate": 9.63888888888889e-05, "loss": 15.8765, "step": 694 }, { "epoch": 0.02896919678212663, "grad_norm": 390.0, "learning_rate": 9.652777777777779e-05, "loss": 14.3755, "step": 695 }, { "epoch": 0.02901087907965487, "grad_norm": 410.0, "learning_rate": 9.666666666666667e-05, "loss": 15.3755, "step": 696 }, { "epoch": 0.02905256137718311, "grad_norm": 388.0, "learning_rate": 9.680555555555556e-05, "loss": 15.0006, "step": 697 }, { "epoch": 0.02909424367471135, "grad_norm": 470.0, "learning_rate": 9.694444444444445e-05, "loss": 16.7509, "step": 698 }, { "epoch": 0.02913592597223959, "grad_norm": 484.0, "learning_rate": 9.708333333333334e-05, "loss": 17.5025, "step": 699 }, { "epoch": 0.02917760826976783, "grad_norm": 388.0, "learning_rate": 9.722222222222223e-05, "loss": 14.5013, "step": 700 }, { "epoch": 0.02921929056729607, "grad_norm": 412.0, "learning_rate": 9.736111111111111e-05, "loss": 13.3132, "step": 701 }, { "epoch": 0.02926097286482431, "grad_norm": 466.0, "learning_rate": 9.75e-05, "loss": 17.1264, "step": 702 }, { "epoch": 0.02930265516235255, "grad_norm": 1272.0, "learning_rate": 9.763888888888889e-05, "loss": 29.1321, "step": 703 }, { "epoch": 0.029344337459880788, "grad_norm": 404.0, "learning_rate": 9.777777777777778e-05, "loss": 13.6267, "step": 704 }, { "epoch": 0.029386019757409027, "grad_norm": 239.0, "learning_rate": 9.791666666666667e-05, "loss": 11.2509, "step": 705 }, { "epoch": 0.029427702054937267, "grad_norm": 1056.0, "learning_rate": 9.805555555555556e-05, "loss": 27.3768, "step": 706 }, { "epoch": 0.02946938435246551, "grad_norm": 274.0, "learning_rate": 9.819444444444444e-05, "loss": 12.0005, "step": 707 }, { "epoch": 0.02951106664999375, "grad_norm": 410.0, "learning_rate": 9.833333333333333e-05, "loss": 14.5634, "step": 708 }, { "epoch": 0.029552748947521988, "grad_norm": 233.0, "learning_rate": 9.847222222222223e-05, "loss": 10.626, "step": 709 }, { "epoch": 0.029594431245050228, "grad_norm": 816.0, "learning_rate": 9.861111111111112e-05, "loss": 20.3805, "step": 710 }, { "epoch": 0.029636113542578467, "grad_norm": 306.0, "learning_rate": 9.875000000000002e-05, "loss": 12.1894, "step": 711 }, { "epoch": 0.029677795840106706, "grad_norm": 668.0, "learning_rate": 9.888888888888889e-05, "loss": 20.1256, "step": 712 }, { "epoch": 0.029719478137634946, "grad_norm": 396.0, "learning_rate": 9.902777777777778e-05, "loss": 14.7511, "step": 713 }, { "epoch": 0.029761160435163185, "grad_norm": 226.0, "learning_rate": 9.916666666666667e-05, "loss": 11.0036, "step": 714 }, { "epoch": 0.029802842732691424, "grad_norm": 416.0, "learning_rate": 9.930555555555556e-05, "loss": 15.0012, "step": 715 }, { "epoch": 0.029844525030219667, "grad_norm": 231.0, "learning_rate": 9.944444444444446e-05, "loss": 11.2522, "step": 716 }, { "epoch": 0.029886207327747907, "grad_norm": 486.0, "learning_rate": 9.958333333333335e-05, "loss": 16.876, "step": 717 }, { "epoch": 0.029927889625276146, "grad_norm": 394.0, "learning_rate": 9.972222222222222e-05, "loss": 15.6258, "step": 718 }, { "epoch": 0.029969571922804385, "grad_norm": 174.0, "learning_rate": 9.986111111111111e-05, "loss": 9.5011, "step": 719 }, { "epoch": 0.030011254220332625, "grad_norm": 294.0, "learning_rate": 0.0001, "loss": 12.4381, "step": 720 }, { "epoch": 0.030052936517860864, "grad_norm": 199.0, "learning_rate": 9.999999954437278e-05, "loss": 9.1268, "step": 721 }, { "epoch": 0.030094618815389104, "grad_norm": 516.0, "learning_rate": 9.999999817749113e-05, "loss": 18.5007, "step": 722 }, { "epoch": 0.030136301112917343, "grad_norm": 354.0, "learning_rate": 9.999999589935508e-05, "loss": 13.5634, "step": 723 }, { "epoch": 0.030177983410445582, "grad_norm": 384.0, "learning_rate": 9.999999270996466e-05, "loss": 14.0636, "step": 724 }, { "epoch": 0.030219665707973825, "grad_norm": 480.0, "learning_rate": 9.999998860931994e-05, "loss": 14.0019, "step": 725 }, { "epoch": 0.030261348005502064, "grad_norm": 340.0, "learning_rate": 9.999998359742098e-05, "loss": 12.1883, "step": 726 }, { "epoch": 0.030303030303030304, "grad_norm": 390.0, "learning_rate": 9.999997767426788e-05, "loss": 13.6886, "step": 727 }, { "epoch": 0.030344712600558543, "grad_norm": 282.0, "learning_rate": 9.999997083986076e-05, "loss": 11.3756, "step": 728 }, { "epoch": 0.030386394898086783, "grad_norm": 390.0, "learning_rate": 9.999996309419972e-05, "loss": 13.8757, "step": 729 }, { "epoch": 0.030428077195615022, "grad_norm": 692.0, "learning_rate": 9.999995443728493e-05, "loss": 19.8759, "step": 730 }, { "epoch": 0.03046975949314326, "grad_norm": 229.0, "learning_rate": 9.999994486911652e-05, "loss": 10.7511, "step": 731 }, { "epoch": 0.0305114417906715, "grad_norm": 580.0, "learning_rate": 9.999993438969468e-05, "loss": 19.0005, "step": 732 }, { "epoch": 0.03055312408819974, "grad_norm": 512.0, "learning_rate": 9.99999229990196e-05, "loss": 18.2511, "step": 733 }, { "epoch": 0.030594806385727983, "grad_norm": 175.0, "learning_rate": 9.999991069709147e-05, "loss": 9.4384, "step": 734 }, { "epoch": 0.030636488683256222, "grad_norm": 374.0, "learning_rate": 9.999989748391054e-05, "loss": 14.0011, "step": 735 }, { "epoch": 0.03067817098078446, "grad_norm": 448.0, "learning_rate": 9.999988335947703e-05, "loss": 15.0638, "step": 736 }, { "epoch": 0.0307198532783127, "grad_norm": 322.0, "learning_rate": 9.999986832379122e-05, "loss": 12.3139, "step": 737 }, { "epoch": 0.03076153557584094, "grad_norm": 330.0, "learning_rate": 9.999985237685336e-05, "loss": 12.3776, "step": 738 }, { "epoch": 0.03080321787336918, "grad_norm": 268.0, "learning_rate": 9.999983551866377e-05, "loss": 11.5637, "step": 739 }, { "epoch": 0.03084490017089742, "grad_norm": 362.0, "learning_rate": 9.999981774922271e-05, "loss": 12.9388, "step": 740 }, { "epoch": 0.03088658246842566, "grad_norm": 556.0, "learning_rate": 9.999979906853056e-05, "loss": 18.7512, "step": 741 }, { "epoch": 0.030928264765953898, "grad_norm": 556.0, "learning_rate": 9.999977947658763e-05, "loss": 16.1272, "step": 742 }, { "epoch": 0.03096994706348214, "grad_norm": 440.0, "learning_rate": 9.999975897339427e-05, "loss": 15.2576, "step": 743 }, { "epoch": 0.03101162936101038, "grad_norm": 458.0, "learning_rate": 9.999973755895087e-05, "loss": 11.632, "step": 744 }, { "epoch": 0.03105331165853862, "grad_norm": 356.0, "learning_rate": 9.999971523325781e-05, "loss": 13.0006, "step": 745 }, { "epoch": 0.03109499395606686, "grad_norm": 450.0, "learning_rate": 9.99996919963155e-05, "loss": 16.6265, "step": 746 }, { "epoch": 0.0311366762535951, "grad_norm": 864.0, "learning_rate": 9.999966784812437e-05, "loss": 24.3754, "step": 747 }, { "epoch": 0.031178358551123338, "grad_norm": 724.0, "learning_rate": 9.999964278868486e-05, "loss": 22.5017, "step": 748 }, { "epoch": 0.031220040848651577, "grad_norm": 692.0, "learning_rate": 9.999961681799741e-05, "loss": 23.2506, "step": 749 }, { "epoch": 0.031261723146179816, "grad_norm": 732.0, "learning_rate": 9.999958993606251e-05, "loss": 21.8768, "step": 750 }, { "epoch": 0.031303405443708056, "grad_norm": 508.0, "learning_rate": 9.999956214288064e-05, "loss": 15.5026, "step": 751 }, { "epoch": 0.031345087741236295, "grad_norm": 326.0, "learning_rate": 9.999953343845232e-05, "loss": 12.3133, "step": 752 }, { "epoch": 0.031386770038764535, "grad_norm": 210.0, "learning_rate": 9.999950382277806e-05, "loss": 9.6261, "step": 753 }, { "epoch": 0.031428452336292774, "grad_norm": 368.0, "learning_rate": 9.99994732958584e-05, "loss": 13.0008, "step": 754 }, { "epoch": 0.03147013463382101, "grad_norm": 684.0, "learning_rate": 9.999944185769391e-05, "loss": 20.5006, "step": 755 }, { "epoch": 0.03151181693134925, "grad_norm": 262.0, "learning_rate": 9.999940950828514e-05, "loss": 11.6257, "step": 756 }, { "epoch": 0.0315534992288775, "grad_norm": 396.0, "learning_rate": 9.999937624763272e-05, "loss": 15.3765, "step": 757 }, { "epoch": 0.03159518152640574, "grad_norm": 458.0, "learning_rate": 9.99993420757372e-05, "loss": 13.7553, "step": 758 }, { "epoch": 0.03163686382393398, "grad_norm": 208.0, "learning_rate": 9.999930699259925e-05, "loss": 8.6297, "step": 759 }, { "epoch": 0.03167854612146222, "grad_norm": 180.0, "learning_rate": 9.999927099821949e-05, "loss": 10.0635, "step": 760 }, { "epoch": 0.03172022841899046, "grad_norm": 434.0, "learning_rate": 9.999923409259857e-05, "loss": 16.0009, "step": 761 }, { "epoch": 0.031761910716518696, "grad_norm": 336.0, "learning_rate": 9.999919627573716e-05, "loss": 12.0648, "step": 762 }, { "epoch": 0.031803593014046935, "grad_norm": 318.0, "learning_rate": 9.999915754763598e-05, "loss": 13.3756, "step": 763 }, { "epoch": 0.031845275311575175, "grad_norm": 304.0, "learning_rate": 9.99991179082957e-05, "loss": 13.8175, "step": 764 }, { "epoch": 0.031886957609103414, "grad_norm": 398.0, "learning_rate": 9.999907735771706e-05, "loss": 13.5052, "step": 765 }, { "epoch": 0.03192863990663165, "grad_norm": 676.0, "learning_rate": 9.999903589590081e-05, "loss": 19.379, "step": 766 }, { "epoch": 0.03197032220415989, "grad_norm": 468.0, "learning_rate": 9.999899352284768e-05, "loss": 14.1886, "step": 767 }, { "epoch": 0.03201200450168813, "grad_norm": 792.0, "learning_rate": 9.999895023855845e-05, "loss": 23.0009, "step": 768 }, { "epoch": 0.03205368679921637, "grad_norm": 640.0, "learning_rate": 9.999890604303392e-05, "loss": 21.3758, "step": 769 }, { "epoch": 0.03209536909674461, "grad_norm": 418.0, "learning_rate": 9.999886093627491e-05, "loss": 14.8757, "step": 770 }, { "epoch": 0.03213705139427285, "grad_norm": 580.0, "learning_rate": 9.999881491828219e-05, "loss": 15.4394, "step": 771 }, { "epoch": 0.03217873369180109, "grad_norm": 161.0, "learning_rate": 9.999876798905664e-05, "loss": 9.1889, "step": 772 }, { "epoch": 0.03222041598932933, "grad_norm": 780.0, "learning_rate": 9.99987201485991e-05, "loss": 23.7507, "step": 773 }, { "epoch": 0.03226209828685757, "grad_norm": 290.0, "learning_rate": 9.999867139691045e-05, "loss": 12.0658, "step": 774 }, { "epoch": 0.032303780584385815, "grad_norm": 296.0, "learning_rate": 9.999862173399159e-05, "loss": 12.8139, "step": 775 }, { "epoch": 0.032345462881914054, "grad_norm": 370.0, "learning_rate": 9.99985711598434e-05, "loss": 14.1255, "step": 776 }, { "epoch": 0.032387145179442293, "grad_norm": 350.0, "learning_rate": 9.99985196744668e-05, "loss": 13.7512, "step": 777 }, { "epoch": 0.03242882747697053, "grad_norm": 502.0, "learning_rate": 9.999846727786275e-05, "loss": 17.6266, "step": 778 }, { "epoch": 0.03247050977449877, "grad_norm": 584.0, "learning_rate": 9.99984139700322e-05, "loss": 21.5016, "step": 779 }, { "epoch": 0.03251219207202701, "grad_norm": 414.0, "learning_rate": 9.99983597509761e-05, "loss": 15.2512, "step": 780 }, { "epoch": 0.03255387436955525, "grad_norm": 460.0, "learning_rate": 9.999830462069548e-05, "loss": 14.7552, "step": 781 }, { "epoch": 0.03259555666708349, "grad_norm": 454.0, "learning_rate": 9.999824857919132e-05, "loss": 15.6891, "step": 782 }, { "epoch": 0.03263723896461173, "grad_norm": 588.0, "learning_rate": 9.999819162646462e-05, "loss": 21.0011, "step": 783 }, { "epoch": 0.03267892126213997, "grad_norm": 368.0, "learning_rate": 9.999813376251644e-05, "loss": 14.9381, "step": 784 }, { "epoch": 0.03272060355966821, "grad_norm": 364.0, "learning_rate": 9.999807498734785e-05, "loss": 13.8758, "step": 785 }, { "epoch": 0.03276228585719645, "grad_norm": 438.0, "learning_rate": 9.99980153009599e-05, "loss": 12.8765, "step": 786 }, { "epoch": 0.03280396815472469, "grad_norm": 672.0, "learning_rate": 9.999795470335367e-05, "loss": 20.0038, "step": 787 }, { "epoch": 0.03284565045225293, "grad_norm": 244.0, "learning_rate": 9.999789319453029e-05, "loss": 11.1256, "step": 788 }, { "epoch": 0.032887332749781166, "grad_norm": 364.0, "learning_rate": 9.999783077449087e-05, "loss": 14.876, "step": 789 }, { "epoch": 0.032929015047309405, "grad_norm": 382.0, "learning_rate": 9.999776744323654e-05, "loss": 14.8761, "step": 790 }, { "epoch": 0.032970697344837645, "grad_norm": 932.0, "learning_rate": 9.999770320076845e-05, "loss": 26.1268, "step": 791 }, { "epoch": 0.033012379642365884, "grad_norm": 388.0, "learning_rate": 9.999763804708779e-05, "loss": 14.0632, "step": 792 }, { "epoch": 0.033054061939894124, "grad_norm": 492.0, "learning_rate": 9.999757198219575e-05, "loss": 16.8761, "step": 793 }, { "epoch": 0.03309574423742237, "grad_norm": 2256.0, "learning_rate": 9.99975050060935e-05, "loss": 50.0052, "step": 794 }, { "epoch": 0.03313742653495061, "grad_norm": 231.0, "learning_rate": 9.999743711878229e-05, "loss": 12.314, "step": 795 }, { "epoch": 0.03317910883247885, "grad_norm": 940.0, "learning_rate": 9.999736832026337e-05, "loss": 25.5047, "step": 796 }, { "epoch": 0.03322079113000709, "grad_norm": 450.0, "learning_rate": 9.999729861053795e-05, "loss": 15.751, "step": 797 }, { "epoch": 0.03326247342753533, "grad_norm": 167.0, "learning_rate": 9.999722798960733e-05, "loss": 9.3765, "step": 798 }, { "epoch": 0.03330415572506357, "grad_norm": 336.0, "learning_rate": 9.999715645747279e-05, "loss": 12.0636, "step": 799 }, { "epoch": 0.033345838022591806, "grad_norm": 608.0, "learning_rate": 9.999708401413564e-05, "loss": 18.0014, "step": 800 }, { "epoch": 0.033387520320120045, "grad_norm": 388.0, "learning_rate": 9.999701065959719e-05, "loss": 13.5008, "step": 801 }, { "epoch": 0.033429202617648285, "grad_norm": 572.0, "learning_rate": 9.999693639385878e-05, "loss": 19.8758, "step": 802 }, { "epoch": 0.033470884915176524, "grad_norm": 217.0, "learning_rate": 9.999686121692179e-05, "loss": 10.6258, "step": 803 }, { "epoch": 0.033512567212704764, "grad_norm": 548.0, "learning_rate": 9.999678512878754e-05, "loss": 16.501, "step": 804 }, { "epoch": 0.033554249510233, "grad_norm": 414.0, "learning_rate": 9.999670812945745e-05, "loss": 15.752, "step": 805 }, { "epoch": 0.03359593180776124, "grad_norm": 360.0, "learning_rate": 9.999663021893293e-05, "loss": 13.6257, "step": 806 }, { "epoch": 0.03363761410528948, "grad_norm": 332.0, "learning_rate": 9.999655139721537e-05, "loss": 13.1887, "step": 807 }, { "epoch": 0.03367929640281772, "grad_norm": 1600.0, "learning_rate": 9.999647166430623e-05, "loss": 39.0029, "step": 808 }, { "epoch": 0.03372097870034596, "grad_norm": 512.0, "learning_rate": 9.999639102020695e-05, "loss": 15.8765, "step": 809 }, { "epoch": 0.0337626609978742, "grad_norm": 284.0, "learning_rate": 9.9996309464919e-05, "loss": 12.001, "step": 810 }, { "epoch": 0.03380434329540244, "grad_norm": 133.0, "learning_rate": 9.999622699844388e-05, "loss": 8.4394, "step": 811 }, { "epoch": 0.033846025592930686, "grad_norm": 238.0, "learning_rate": 9.99961436207831e-05, "loss": 10.3755, "step": 812 }, { "epoch": 0.033887707890458925, "grad_norm": 968.0, "learning_rate": 9.999605933193814e-05, "loss": 23.7567, "step": 813 }, { "epoch": 0.033929390187987164, "grad_norm": 386.0, "learning_rate": 9.999597413191055e-05, "loss": 13.3758, "step": 814 }, { "epoch": 0.033971072485515404, "grad_norm": 226.0, "learning_rate": 9.999588802070193e-05, "loss": 11.0006, "step": 815 }, { "epoch": 0.03401275478304364, "grad_norm": 179.0, "learning_rate": 9.999580099831379e-05, "loss": 9.4383, "step": 816 }, { "epoch": 0.03405443708057188, "grad_norm": 768.0, "learning_rate": 9.999571306474773e-05, "loss": 22.6259, "step": 817 }, { "epoch": 0.03409611937810012, "grad_norm": 320.0, "learning_rate": 9.999562422000538e-05, "loss": 11.1896, "step": 818 }, { "epoch": 0.03413780167562836, "grad_norm": 422.0, "learning_rate": 9.999553446408834e-05, "loss": 12.8142, "step": 819 }, { "epoch": 0.0341794839731566, "grad_norm": 244.0, "learning_rate": 9.999544379699824e-05, "loss": 10.6274, "step": 820 }, { "epoch": 0.03422116627068484, "grad_norm": 454.0, "learning_rate": 9.999535221873673e-05, "loss": 15.3753, "step": 821 }, { "epoch": 0.03426284856821308, "grad_norm": 290.0, "learning_rate": 9.999525972930551e-05, "loss": 12.6904, "step": 822 }, { "epoch": 0.03430453086574132, "grad_norm": 1480.0, "learning_rate": 9.999516632870621e-05, "loss": 35.5058, "step": 823 }, { "epoch": 0.03434621316326956, "grad_norm": 628.0, "learning_rate": 9.999507201694058e-05, "loss": 19.2508, "step": 824 }, { "epoch": 0.0343878954607978, "grad_norm": 740.0, "learning_rate": 9.999497679401033e-05, "loss": 24.376, "step": 825 }, { "epoch": 0.03442957775832604, "grad_norm": 314.0, "learning_rate": 9.99948806599172e-05, "loss": 11.5631, "step": 826 }, { "epoch": 0.034471260055854276, "grad_norm": 656.0, "learning_rate": 9.999478361466292e-05, "loss": 18.0008, "step": 827 }, { "epoch": 0.034512942353382516, "grad_norm": 127.5, "learning_rate": 9.999468565824927e-05, "loss": 8.1255, "step": 828 }, { "epoch": 0.034554624650910755, "grad_norm": 632.0, "learning_rate": 9.999458679067804e-05, "loss": 18.2527, "step": 829 }, { "epoch": 0.034596306948439, "grad_norm": 217.0, "learning_rate": 9.999448701195102e-05, "loss": 10.8755, "step": 830 }, { "epoch": 0.03463798924596724, "grad_norm": 404.0, "learning_rate": 9.999438632207003e-05, "loss": 16.1255, "step": 831 }, { "epoch": 0.03467967154349548, "grad_norm": 386.0, "learning_rate": 9.999428472103694e-05, "loss": 12.2511, "step": 832 }, { "epoch": 0.03472135384102372, "grad_norm": 324.0, "learning_rate": 9.999418220885355e-05, "loss": 9.1261, "step": 833 }, { "epoch": 0.03476303613855196, "grad_norm": 67.0, "learning_rate": 9.999407878552175e-05, "loss": 7.3448, "step": 834 }, { "epoch": 0.0348047184360802, "grad_norm": 844.0, "learning_rate": 9.999397445104343e-05, "loss": 23.6261, "step": 835 }, { "epoch": 0.03484640073360844, "grad_norm": 470.0, "learning_rate": 9.999386920542049e-05, "loss": 16.2512, "step": 836 }, { "epoch": 0.03488808303113668, "grad_norm": 1020.0, "learning_rate": 9.999376304865484e-05, "loss": 25.506, "step": 837 }, { "epoch": 0.034929765328664916, "grad_norm": 302.0, "learning_rate": 9.999365598074843e-05, "loss": 12.1897, "step": 838 }, { "epoch": 0.034971447626193156, "grad_norm": 736.0, "learning_rate": 9.99935480017032e-05, "loss": 23.255, "step": 839 }, { "epoch": 0.035013129923721395, "grad_norm": 458.0, "learning_rate": 9.99934391115211e-05, "loss": 11.6306, "step": 840 }, { "epoch": 0.035054812221249634, "grad_norm": 732.0, "learning_rate": 9.999332931020415e-05, "loss": 17.2571, "step": 841 }, { "epoch": 0.035096494518777874, "grad_norm": 960.0, "learning_rate": 9.999321859775435e-05, "loss": 23.5005, "step": 842 }, { "epoch": 0.03513817681630611, "grad_norm": 868.0, "learning_rate": 9.999310697417368e-05, "loss": 25.7506, "step": 843 }, { "epoch": 0.03517985911383435, "grad_norm": 880.0, "learning_rate": 9.999299443946422e-05, "loss": 25.5009, "step": 844 }, { "epoch": 0.03522154141136259, "grad_norm": 280.0, "learning_rate": 9.999288099362799e-05, "loss": 10.6885, "step": 845 }, { "epoch": 0.03526322370889083, "grad_norm": 135.0, "learning_rate": 9.999276663666705e-05, "loss": 6.9086, "step": 846 }, { "epoch": 0.03530490600641907, "grad_norm": 358.0, "learning_rate": 9.999265136858352e-05, "loss": 13.5631, "step": 847 }, { "epoch": 0.03534658830394732, "grad_norm": 528.0, "learning_rate": 9.999253518937945e-05, "loss": 18.6265, "step": 848 }, { "epoch": 0.035388270601475556, "grad_norm": 332.0, "learning_rate": 9.999241809905702e-05, "loss": 13.7504, "step": 849 }, { "epoch": 0.035429952899003796, "grad_norm": 314.0, "learning_rate": 9.999230009761832e-05, "loss": 10.4389, "step": 850 }, { "epoch": 0.035471635196532035, "grad_norm": 484.0, "learning_rate": 9.99921811850655e-05, "loss": 17.3757, "step": 851 }, { "epoch": 0.035513317494060274, "grad_norm": 478.0, "learning_rate": 9.999206136140076e-05, "loss": 17.0011, "step": 852 }, { "epoch": 0.035554999791588514, "grad_norm": 164.0, "learning_rate": 9.999194062662627e-05, "loss": 9.0004, "step": 853 }, { "epoch": 0.03559668208911675, "grad_norm": 960.0, "learning_rate": 9.999181898074421e-05, "loss": 25.5015, "step": 854 }, { "epoch": 0.03563836438664499, "grad_norm": 338.0, "learning_rate": 9.999169642375681e-05, "loss": 12.2506, "step": 855 }, { "epoch": 0.03568004668417323, "grad_norm": 1672.0, "learning_rate": 9.99915729556663e-05, "loss": 42.0003, "step": 856 }, { "epoch": 0.03572172898170147, "grad_norm": 250.0, "learning_rate": 9.999144857647495e-05, "loss": 12.3758, "step": 857 }, { "epoch": 0.03576341127922971, "grad_norm": 462.0, "learning_rate": 9.999132328618501e-05, "loss": 16.6257, "step": 858 }, { "epoch": 0.03580509357675795, "grad_norm": 430.0, "learning_rate": 9.999119708479876e-05, "loss": 16.5003, "step": 859 }, { "epoch": 0.03584677587428619, "grad_norm": 560.0, "learning_rate": 9.99910699723185e-05, "loss": 18.1279, "step": 860 }, { "epoch": 0.03588845817181443, "grad_norm": 1224.0, "learning_rate": 9.999094194874656e-05, "loss": 31.6274, "step": 861 }, { "epoch": 0.03593014046934267, "grad_norm": 235.0, "learning_rate": 9.999081301408526e-05, "loss": 11.6892, "step": 862 }, { "epoch": 0.03597182276687091, "grad_norm": 350.0, "learning_rate": 9.999068316833695e-05, "loss": 12.7529, "step": 863 }, { "epoch": 0.03601350506439915, "grad_norm": 338.0, "learning_rate": 9.999055241150401e-05, "loss": 13.1898, "step": 864 }, { "epoch": 0.036055187361927386, "grad_norm": 446.0, "learning_rate": 9.999042074358882e-05, "loss": 15.376, "step": 865 }, { "epoch": 0.03609686965945563, "grad_norm": 167.0, "learning_rate": 9.999028816459377e-05, "loss": 8.4379, "step": 866 }, { "epoch": 0.03613855195698387, "grad_norm": 288.0, "learning_rate": 9.999015467452128e-05, "loss": 12.8773, "step": 867 }, { "epoch": 0.03618023425451211, "grad_norm": 454.0, "learning_rate": 9.999002027337377e-05, "loss": 18.2532, "step": 868 }, { "epoch": 0.03622191655204035, "grad_norm": 157.0, "learning_rate": 9.998988496115372e-05, "loss": 8.3767, "step": 869 }, { "epoch": 0.03626359884956859, "grad_norm": 334.0, "learning_rate": 9.998974873786357e-05, "loss": 13.4382, "step": 870 }, { "epoch": 0.03630528114709683, "grad_norm": 444.0, "learning_rate": 9.998961160350582e-05, "loss": 14.9379, "step": 871 }, { "epoch": 0.03634696344462507, "grad_norm": 2040.0, "learning_rate": 9.998947355808295e-05, "loss": 46.0064, "step": 872 }, { "epoch": 0.03638864574215331, "grad_norm": 176.0, "learning_rate": 9.99893346015975e-05, "loss": 8.5009, "step": 873 }, { "epoch": 0.03643032803968155, "grad_norm": 75.5, "learning_rate": 9.998919473405197e-05, "loss": 6.3773, "step": 874 }, { "epoch": 0.03647201033720979, "grad_norm": 588.0, "learning_rate": 9.998905395544895e-05, "loss": 18.1263, "step": 875 }, { "epoch": 0.036513692634738026, "grad_norm": 314.0, "learning_rate": 9.998891226579096e-05, "loss": 11.1282, "step": 876 }, { "epoch": 0.036555374932266266, "grad_norm": 304.0, "learning_rate": 9.998876966508063e-05, "loss": 13.6259, "step": 877 }, { "epoch": 0.036597057229794505, "grad_norm": 420.0, "learning_rate": 9.998862615332052e-05, "loss": 14.501, "step": 878 }, { "epoch": 0.036638739527322745, "grad_norm": 418.0, "learning_rate": 9.998848173051327e-05, "loss": 13.814, "step": 879 }, { "epoch": 0.036680421824850984, "grad_norm": 640.0, "learning_rate": 9.99883363966615e-05, "loss": 19.0008, "step": 880 }, { "epoch": 0.03672210412237922, "grad_norm": 262.0, "learning_rate": 9.998819015176786e-05, "loss": 10.1266, "step": 881 }, { "epoch": 0.03676378641990746, "grad_norm": 1448.0, "learning_rate": 9.998804299583502e-05, "loss": 33.7555, "step": 882 }, { "epoch": 0.0368054687174357, "grad_norm": 580.0, "learning_rate": 9.998789492886565e-05, "loss": 17.5041, "step": 883 }, { "epoch": 0.03684715101496394, "grad_norm": 416.0, "learning_rate": 9.998774595086247e-05, "loss": 14.2525, "step": 884 }, { "epoch": 0.03688883331249219, "grad_norm": 312.0, "learning_rate": 9.998759606182817e-05, "loss": 12.0008, "step": 885 }, { "epoch": 0.03693051561002043, "grad_norm": 358.0, "learning_rate": 9.99874452617655e-05, "loss": 13.6904, "step": 886 }, { "epoch": 0.036972197907548666, "grad_norm": 980.0, "learning_rate": 9.998729355067719e-05, "loss": 22.7557, "step": 887 }, { "epoch": 0.037013880205076906, "grad_norm": 488.0, "learning_rate": 9.998714092856605e-05, "loss": 17.1271, "step": 888 }, { "epoch": 0.037055562502605145, "grad_norm": 294.0, "learning_rate": 9.998698739543481e-05, "loss": 13.0632, "step": 889 }, { "epoch": 0.037097244800133385, "grad_norm": 107.5, "learning_rate": 9.998683295128627e-05, "loss": 8.5631, "step": 890 }, { "epoch": 0.037138927097661624, "grad_norm": 225.0, "learning_rate": 9.99866775961233e-05, "loss": 11.3136, "step": 891 }, { "epoch": 0.03718060939518986, "grad_norm": 596.0, "learning_rate": 9.998652132994865e-05, "loss": 18.0006, "step": 892 }, { "epoch": 0.0372222916927181, "grad_norm": 386.0, "learning_rate": 9.998636415276525e-05, "loss": 8.5636, "step": 893 }, { "epoch": 0.03726397399024634, "grad_norm": 180.0, "learning_rate": 9.99862060645759e-05, "loss": 9.9392, "step": 894 }, { "epoch": 0.03730565628777458, "grad_norm": 378.0, "learning_rate": 9.998604706538352e-05, "loss": 14.5011, "step": 895 }, { "epoch": 0.03734733858530282, "grad_norm": 426.0, "learning_rate": 9.9985887155191e-05, "loss": 14.7508, "step": 896 }, { "epoch": 0.03738902088283106, "grad_norm": 472.0, "learning_rate": 9.998572633400123e-05, "loss": 17.3772, "step": 897 }, { "epoch": 0.0374307031803593, "grad_norm": 221.0, "learning_rate": 9.998556460181718e-05, "loss": 10.1887, "step": 898 }, { "epoch": 0.03747238547788754, "grad_norm": 596.0, "learning_rate": 9.998540195864177e-05, "loss": 16.5007, "step": 899 }, { "epoch": 0.03751406777541578, "grad_norm": 454.0, "learning_rate": 9.998523840447795e-05, "loss": 13.7517, "step": 900 }, { "epoch": 0.03755575007294402, "grad_norm": 852.0, "learning_rate": 9.998507393932875e-05, "loss": 23.0018, "step": 901 }, { "epoch": 0.03759743237047226, "grad_norm": 472.0, "learning_rate": 9.998490856319713e-05, "loss": 17.0005, "step": 902 }, { "epoch": 0.0376391146680005, "grad_norm": 338.0, "learning_rate": 9.998474227608612e-05, "loss": 14.3758, "step": 903 }, { "epoch": 0.03768079696552874, "grad_norm": 292.0, "learning_rate": 9.998457507799874e-05, "loss": 12.1884, "step": 904 }, { "epoch": 0.03772247926305698, "grad_norm": 356.0, "learning_rate": 9.998440696893805e-05, "loss": 12.7504, "step": 905 }, { "epoch": 0.03776416156058522, "grad_norm": 245.0, "learning_rate": 9.99842379489071e-05, "loss": 10.5017, "step": 906 }, { "epoch": 0.03780584385811346, "grad_norm": 324.0, "learning_rate": 9.998406801790898e-05, "loss": 13.6258, "step": 907 }, { "epoch": 0.0378475261556417, "grad_norm": 212.0, "learning_rate": 9.998389717594677e-05, "loss": 10.8761, "step": 908 }, { "epoch": 0.03788920845316994, "grad_norm": 258.0, "learning_rate": 9.998372542302361e-05, "loss": 9.5011, "step": 909 }, { "epoch": 0.03793089075069818, "grad_norm": 436.0, "learning_rate": 9.998355275914262e-05, "loss": 15.6256, "step": 910 }, { "epoch": 0.03797257304822642, "grad_norm": 372.0, "learning_rate": 9.998337918430694e-05, "loss": 13.8766, "step": 911 }, { "epoch": 0.03801425534575466, "grad_norm": 764.0, "learning_rate": 9.998320469851971e-05, "loss": 21.5054, "step": 912 }, { "epoch": 0.0380559376432829, "grad_norm": 227.0, "learning_rate": 9.998302930178418e-05, "loss": 11.1265, "step": 913 }, { "epoch": 0.03809761994081114, "grad_norm": 330.0, "learning_rate": 9.998285299410348e-05, "loss": 13.001, "step": 914 }, { "epoch": 0.038139302238339376, "grad_norm": 828.0, "learning_rate": 9.998267577548085e-05, "loss": 23.254, "step": 915 }, { "epoch": 0.038180984535867615, "grad_norm": 616.0, "learning_rate": 9.998249764591951e-05, "loss": 19.2538, "step": 916 }, { "epoch": 0.038222666833395855, "grad_norm": 462.0, "learning_rate": 9.998231860542273e-05, "loss": 14.6256, "step": 917 }, { "epoch": 0.038264349130924094, "grad_norm": 348.0, "learning_rate": 9.998213865399376e-05, "loss": 12.5006, "step": 918 }, { "epoch": 0.03830603142845233, "grad_norm": 832.0, "learning_rate": 9.998195779163586e-05, "loss": 21.6261, "step": 919 }, { "epoch": 0.03834771372598057, "grad_norm": 572.0, "learning_rate": 9.998177601835235e-05, "loss": 15.0014, "step": 920 }, { "epoch": 0.03838939602350882, "grad_norm": 306.0, "learning_rate": 9.998159333414652e-05, "loss": 12.6893, "step": 921 }, { "epoch": 0.03843107832103706, "grad_norm": 470.0, "learning_rate": 9.998140973902173e-05, "loss": 16.1254, "step": 922 }, { "epoch": 0.0384727606185653, "grad_norm": 318.0, "learning_rate": 9.99812252329813e-05, "loss": 10.3154, "step": 923 }, { "epoch": 0.03851444291609354, "grad_norm": 628.0, "learning_rate": 9.998103981602862e-05, "loss": 20.3757, "step": 924 }, { "epoch": 0.03855612521362178, "grad_norm": 452.0, "learning_rate": 9.998085348816704e-05, "loss": 11.814, "step": 925 }, { "epoch": 0.038597807511150016, "grad_norm": 454.0, "learning_rate": 9.998066624939997e-05, "loss": 15.0011, "step": 926 }, { "epoch": 0.038639489808678255, "grad_norm": 1208.0, "learning_rate": 9.998047809973081e-05, "loss": 26.2559, "step": 927 }, { "epoch": 0.038681172106206495, "grad_norm": 632.0, "learning_rate": 9.998028903916302e-05, "loss": 18.8786, "step": 928 }, { "epoch": 0.038722854403734734, "grad_norm": 203.0, "learning_rate": 9.998009906770002e-05, "loss": 10.1881, "step": 929 }, { "epoch": 0.038764536701262974, "grad_norm": 564.0, "learning_rate": 9.997990818534527e-05, "loss": 17.2507, "step": 930 }, { "epoch": 0.03880621899879121, "grad_norm": 270.0, "learning_rate": 9.997971639210227e-05, "loss": 12.9397, "step": 931 }, { "epoch": 0.03884790129631945, "grad_norm": 416.0, "learning_rate": 9.997952368797448e-05, "loss": 12.5006, "step": 932 }, { "epoch": 0.03888958359384769, "grad_norm": 608.0, "learning_rate": 9.997933007296545e-05, "loss": 18.2514, "step": 933 }, { "epoch": 0.03893126589137593, "grad_norm": 338.0, "learning_rate": 9.99791355470787e-05, "loss": 14.3764, "step": 934 }, { "epoch": 0.03897294818890417, "grad_norm": 456.0, "learning_rate": 9.997894011031774e-05, "loss": 16.7517, "step": 935 }, { "epoch": 0.03901463048643241, "grad_norm": 318.0, "learning_rate": 9.997874376268619e-05, "loss": 13.5633, "step": 936 }, { "epoch": 0.03905631278396065, "grad_norm": 316.0, "learning_rate": 9.997854650418758e-05, "loss": 13.4384, "step": 937 }, { "epoch": 0.03909799508148889, "grad_norm": 229.0, "learning_rate": 9.997834833482553e-05, "loss": 10.5628, "step": 938 }, { "epoch": 0.039139677379017135, "grad_norm": 480.0, "learning_rate": 9.997814925460364e-05, "loss": 17.1255, "step": 939 }, { "epoch": 0.039181359676545374, "grad_norm": 202.0, "learning_rate": 9.997794926352555e-05, "loss": 9.8754, "step": 940 }, { "epoch": 0.039223041974073614, "grad_norm": 904.0, "learning_rate": 9.997774836159488e-05, "loss": 27.376, "step": 941 }, { "epoch": 0.03926472427160185, "grad_norm": 298.0, "learning_rate": 9.997754654881533e-05, "loss": 12.3758, "step": 942 }, { "epoch": 0.03930640656913009, "grad_norm": 900.0, "learning_rate": 9.997734382519055e-05, "loss": 24.3757, "step": 943 }, { "epoch": 0.03934808886665833, "grad_norm": 796.0, "learning_rate": 9.997714019072425e-05, "loss": 19.506, "step": 944 }, { "epoch": 0.03938977116418657, "grad_norm": 123.0, "learning_rate": 9.997693564542012e-05, "loss": 7.0963, "step": 945 }, { "epoch": 0.03943145346171481, "grad_norm": 192.0, "learning_rate": 9.99767301892819e-05, "loss": 10.3131, "step": 946 }, { "epoch": 0.03947313575924305, "grad_norm": 478.0, "learning_rate": 9.997652382231334e-05, "loss": 15.9384, "step": 947 }, { "epoch": 0.03951481805677129, "grad_norm": 482.0, "learning_rate": 9.997631654451821e-05, "loss": 14.8132, "step": 948 }, { "epoch": 0.03955650035429953, "grad_norm": 470.0, "learning_rate": 9.997610835590027e-05, "loss": 16.5008, "step": 949 }, { "epoch": 0.03959818265182777, "grad_norm": 320.0, "learning_rate": 9.997589925646331e-05, "loss": 14.2507, "step": 950 }, { "epoch": 0.03963986494935601, "grad_norm": 620.0, "learning_rate": 9.997568924621116e-05, "loss": 18.7543, "step": 951 }, { "epoch": 0.03968154724688425, "grad_norm": 422.0, "learning_rate": 9.997547832514762e-05, "loss": 14.4384, "step": 952 }, { "epoch": 0.039723229544412486, "grad_norm": 344.0, "learning_rate": 9.997526649327656e-05, "loss": 14.1881, "step": 953 }, { "epoch": 0.039764911841940725, "grad_norm": 233.0, "learning_rate": 9.997505375060183e-05, "loss": 10.3145, "step": 954 }, { "epoch": 0.039806594139468965, "grad_norm": 418.0, "learning_rate": 9.997484009712732e-05, "loss": 15.2524, "step": 955 }, { "epoch": 0.039848276436997204, "grad_norm": 420.0, "learning_rate": 9.99746255328569e-05, "loss": 13.8129, "step": 956 }, { "epoch": 0.039889958734525444, "grad_norm": 316.0, "learning_rate": 9.997441005779451e-05, "loss": 12.9382, "step": 957 }, { "epoch": 0.03993164103205369, "grad_norm": 648.0, "learning_rate": 9.997419367194404e-05, "loss": 20.5028, "step": 958 }, { "epoch": 0.03997332332958193, "grad_norm": 744.0, "learning_rate": 9.997397637530946e-05, "loss": 20.7527, "step": 959 }, { "epoch": 0.04001500562711017, "grad_norm": 344.0, "learning_rate": 9.997375816789474e-05, "loss": 12.3763, "step": 960 }, { "epoch": 0.04005668792463841, "grad_norm": 172.0, "learning_rate": 9.997353904970381e-05, "loss": 10.2515, "step": 961 }, { "epoch": 0.04009837022216665, "grad_norm": 1048.0, "learning_rate": 9.997331902074072e-05, "loss": 29.6254, "step": 962 }, { "epoch": 0.04014005251969489, "grad_norm": 272.0, "learning_rate": 9.997309808100946e-05, "loss": 11.1908, "step": 963 }, { "epoch": 0.040181734817223126, "grad_norm": 692.0, "learning_rate": 9.997287623051403e-05, "loss": 20.3759, "step": 964 }, { "epoch": 0.040223417114751366, "grad_norm": 556.0, "learning_rate": 9.99726534692585e-05, "loss": 18.3762, "step": 965 }, { "epoch": 0.040265099412279605, "grad_norm": 364.0, "learning_rate": 9.997242979724693e-05, "loss": 13.938, "step": 966 }, { "epoch": 0.040306781709807844, "grad_norm": 432.0, "learning_rate": 9.997220521448338e-05, "loss": 15.8128, "step": 967 }, { "epoch": 0.040348464007336084, "grad_norm": 180.0, "learning_rate": 9.997197972097196e-05, "loss": 9.7509, "step": 968 }, { "epoch": 0.04039014630486432, "grad_norm": 245.0, "learning_rate": 9.997175331671678e-05, "loss": 11.3753, "step": 969 }, { "epoch": 0.04043182860239256, "grad_norm": 500.0, "learning_rate": 9.997152600172195e-05, "loss": 16.5009, "step": 970 }, { "epoch": 0.0404735108999208, "grad_norm": 398.0, "learning_rate": 9.997129777599163e-05, "loss": 15.626, "step": 971 }, { "epoch": 0.04051519319744904, "grad_norm": 828.0, "learning_rate": 9.997106863952997e-05, "loss": 21.0057, "step": 972 }, { "epoch": 0.04055687549497728, "grad_norm": 1296.0, "learning_rate": 9.997083859234115e-05, "loss": 29.1304, "step": 973 }, { "epoch": 0.04059855779250552, "grad_norm": 197.0, "learning_rate": 9.997060763442937e-05, "loss": 10.252, "step": 974 }, { "epoch": 0.04064024009003376, "grad_norm": 402.0, "learning_rate": 9.99703757657988e-05, "loss": 15.0011, "step": 975 }, { "epoch": 0.040681922387562006, "grad_norm": 470.0, "learning_rate": 9.997014298645371e-05, "loss": 15.0666, "step": 976 }, { "epoch": 0.040723604685090245, "grad_norm": 258.0, "learning_rate": 9.996990929639834e-05, "loss": 12.3758, "step": 977 }, { "epoch": 0.040765286982618484, "grad_norm": 230.0, "learning_rate": 9.996967469563692e-05, "loss": 11.6254, "step": 978 }, { "epoch": 0.040806969280146724, "grad_norm": 400.0, "learning_rate": 9.996943918417376e-05, "loss": 12.2514, "step": 979 }, { "epoch": 0.04084865157767496, "grad_norm": 420.0, "learning_rate": 9.996920276201312e-05, "loss": 16.0019, "step": 980 }, { "epoch": 0.0408903338752032, "grad_norm": 392.0, "learning_rate": 9.996896542915932e-05, "loss": 14.8757, "step": 981 }, { "epoch": 0.04093201617273144, "grad_norm": 462.0, "learning_rate": 9.996872718561671e-05, "loss": 16.2516, "step": 982 }, { "epoch": 0.04097369847025968, "grad_norm": 336.0, "learning_rate": 9.996848803138961e-05, "loss": 12.8767, "step": 983 }, { "epoch": 0.04101538076778792, "grad_norm": 408.0, "learning_rate": 9.996824796648236e-05, "loss": 15.1884, "step": 984 }, { "epoch": 0.04105706306531616, "grad_norm": 528.0, "learning_rate": 9.996800699089937e-05, "loss": 16.7518, "step": 985 }, { "epoch": 0.0410987453628444, "grad_norm": 462.0, "learning_rate": 9.9967765104645e-05, "loss": 16.2506, "step": 986 }, { "epoch": 0.04114042766037264, "grad_norm": 436.0, "learning_rate": 9.99675223077237e-05, "loss": 15.877, "step": 987 }, { "epoch": 0.04118210995790088, "grad_norm": 872.0, "learning_rate": 9.996727860013985e-05, "loss": 21.7509, "step": 988 }, { "epoch": 0.04122379225542912, "grad_norm": 876.0, "learning_rate": 9.996703398189792e-05, "loss": 25.1263, "step": 989 }, { "epoch": 0.04126547455295736, "grad_norm": 237.0, "learning_rate": 9.996678845300236e-05, "loss": 10.1264, "step": 990 }, { "epoch": 0.041307156850485596, "grad_norm": 1288.0, "learning_rate": 9.996654201345765e-05, "loss": 29.6307, "step": 991 }, { "epoch": 0.041348839148013836, "grad_norm": 596.0, "learning_rate": 9.996629466326826e-05, "loss": 17.6274, "step": 992 }, { "epoch": 0.041390521445542075, "grad_norm": 142.0, "learning_rate": 9.996604640243872e-05, "loss": 9.5631, "step": 993 }, { "epoch": 0.04143220374307032, "grad_norm": 608.0, "learning_rate": 9.996579723097356e-05, "loss": 17.6262, "step": 994 }, { "epoch": 0.04147388604059856, "grad_norm": 380.0, "learning_rate": 9.99655471488773e-05, "loss": 14.5007, "step": 995 }, { "epoch": 0.0415155683381268, "grad_norm": 482.0, "learning_rate": 9.996529615615451e-05, "loss": 16.8762, "step": 996 }, { "epoch": 0.04155725063565504, "grad_norm": 668.0, "learning_rate": 9.996504425280977e-05, "loss": 21.626, "step": 997 }, { "epoch": 0.04159893293318328, "grad_norm": 238.0, "learning_rate": 9.996479143884765e-05, "loss": 11.188, "step": 998 }, { "epoch": 0.04164061523071152, "grad_norm": 226.0, "learning_rate": 9.996453771427276e-05, "loss": 10.8774, "step": 999 }, { "epoch": 0.04168229752823976, "grad_norm": 145.0, "learning_rate": 9.996428307908976e-05, "loss": 8.2511, "step": 1000 }, { "epoch": 0.041723979825768, "grad_norm": 584.0, "learning_rate": 9.996402753330325e-05, "loss": 18.5011, "step": 1001 }, { "epoch": 0.041765662123296236, "grad_norm": 211.0, "learning_rate": 9.996377107691792e-05, "loss": 10.1261, "step": 1002 }, { "epoch": 0.041807344420824476, "grad_norm": 420.0, "learning_rate": 9.996351370993842e-05, "loss": 15.4381, "step": 1003 }, { "epoch": 0.041849026718352715, "grad_norm": 330.0, "learning_rate": 9.996325543236943e-05, "loss": 13.4379, "step": 1004 }, { "epoch": 0.041890709015880954, "grad_norm": 424.0, "learning_rate": 9.996299624421569e-05, "loss": 16.6264, "step": 1005 }, { "epoch": 0.041932391313409194, "grad_norm": 174.0, "learning_rate": 9.996273614548191e-05, "loss": 9.6882, "step": 1006 }, { "epoch": 0.04197407361093743, "grad_norm": 330.0, "learning_rate": 9.996247513617281e-05, "loss": 12.565, "step": 1007 }, { "epoch": 0.04201575590846567, "grad_norm": 506.0, "learning_rate": 9.996221321629319e-05, "loss": 15.6882, "step": 1008 }, { "epoch": 0.04205743820599391, "grad_norm": 340.0, "learning_rate": 9.996195038584779e-05, "loss": 13.9389, "step": 1009 }, { "epoch": 0.04209912050352215, "grad_norm": 612.0, "learning_rate": 9.99616866448414e-05, "loss": 17.7544, "step": 1010 }, { "epoch": 0.04214080280105039, "grad_norm": 320.0, "learning_rate": 9.996142199327885e-05, "loss": 12.5006, "step": 1011 }, { "epoch": 0.04218248509857864, "grad_norm": 286.0, "learning_rate": 9.996115643116494e-05, "loss": 11.6882, "step": 1012 }, { "epoch": 0.042224167396106876, "grad_norm": 346.0, "learning_rate": 9.996088995850453e-05, "loss": 13.4386, "step": 1013 }, { "epoch": 0.042265849693635116, "grad_norm": 796.0, "learning_rate": 9.996062257530243e-05, "loss": 26.0005, "step": 1014 }, { "epoch": 0.042307531991163355, "grad_norm": 720.0, "learning_rate": 9.996035428156358e-05, "loss": 20.3755, "step": 1015 }, { "epoch": 0.042349214288691595, "grad_norm": 668.0, "learning_rate": 9.996008507729284e-05, "loss": 21.8756, "step": 1016 }, { "epoch": 0.042390896586219834, "grad_norm": 326.0, "learning_rate": 9.995981496249511e-05, "loss": 13.6882, "step": 1017 }, { "epoch": 0.04243257888374807, "grad_norm": 520.0, "learning_rate": 9.99595439371753e-05, "loss": 16.0022, "step": 1018 }, { "epoch": 0.04247426118127631, "grad_norm": 432.0, "learning_rate": 9.995927200133839e-05, "loss": 15.1301, "step": 1019 }, { "epoch": 0.04251594347880455, "grad_norm": 784.0, "learning_rate": 9.99589991549893e-05, "loss": 22.3757, "step": 1020 }, { "epoch": 0.04255762577633279, "grad_norm": 800.0, "learning_rate": 9.995872539813302e-05, "loss": 24.2507, "step": 1021 }, { "epoch": 0.04259930807386103, "grad_norm": 556.0, "learning_rate": 9.995845073077452e-05, "loss": 18.0006, "step": 1022 }, { "epoch": 0.04264099037138927, "grad_norm": 274.0, "learning_rate": 9.995817515291884e-05, "loss": 12.1267, "step": 1023 }, { "epoch": 0.04268267266891751, "grad_norm": 992.0, "learning_rate": 9.995789866457099e-05, "loss": 24.8808, "step": 1024 }, { "epoch": 0.04272435496644575, "grad_norm": 312.0, "learning_rate": 9.995762126573598e-05, "loss": 12.813, "step": 1025 }, { "epoch": 0.04276603726397399, "grad_norm": 318.0, "learning_rate": 9.99573429564189e-05, "loss": 13.9378, "step": 1026 }, { "epoch": 0.04280771956150223, "grad_norm": 176.0, "learning_rate": 9.99570637366248e-05, "loss": 9.3765, "step": 1027 }, { "epoch": 0.04284940185903047, "grad_norm": 346.0, "learning_rate": 9.995678360635879e-05, "loss": 13.1267, "step": 1028 }, { "epoch": 0.042891084156558706, "grad_norm": 636.0, "learning_rate": 9.995650256562596e-05, "loss": 19.3753, "step": 1029 }, { "epoch": 0.042932766454086946, "grad_norm": 286.0, "learning_rate": 9.995622061443143e-05, "loss": 12.6259, "step": 1030 }, { "epoch": 0.04297444875161519, "grad_norm": 266.0, "learning_rate": 9.995593775278034e-05, "loss": 11.1882, "step": 1031 }, { "epoch": 0.04301613104914343, "grad_norm": 430.0, "learning_rate": 9.995565398067785e-05, "loss": 15.751, "step": 1032 }, { "epoch": 0.04305781334667167, "grad_norm": 242.0, "learning_rate": 9.995536929812915e-05, "loss": 10.0629, "step": 1033 }, { "epoch": 0.04309949564419991, "grad_norm": 328.0, "learning_rate": 9.995508370513939e-05, "loss": 8.881, "step": 1034 }, { "epoch": 0.04314117794172815, "grad_norm": 167.0, "learning_rate": 9.995479720171381e-05, "loss": 8.6886, "step": 1035 }, { "epoch": 0.04318286023925639, "grad_norm": 366.0, "learning_rate": 9.995450978785762e-05, "loss": 15.44, "step": 1036 }, { "epoch": 0.04322454253678463, "grad_norm": 716.0, "learning_rate": 9.995422146357605e-05, "loss": 20.3754, "step": 1037 }, { "epoch": 0.04326622483431287, "grad_norm": 728.0, "learning_rate": 9.995393222887435e-05, "loss": 20.2509, "step": 1038 }, { "epoch": 0.04330790713184111, "grad_norm": 334.0, "learning_rate": 9.995364208375781e-05, "loss": 13.6878, "step": 1039 }, { "epoch": 0.043349589429369347, "grad_norm": 552.0, "learning_rate": 9.99533510282317e-05, "loss": 18.0021, "step": 1040 }, { "epoch": 0.043391271726897586, "grad_norm": 206.0, "learning_rate": 9.995305906230134e-05, "loss": 10.8758, "step": 1041 }, { "epoch": 0.043432954024425825, "grad_norm": 201.0, "learning_rate": 9.995276618597203e-05, "loss": 10.5634, "step": 1042 }, { "epoch": 0.043474636321954065, "grad_norm": 324.0, "learning_rate": 9.995247239924915e-05, "loss": 13.0005, "step": 1043 }, { "epoch": 0.043516318619482304, "grad_norm": 211.0, "learning_rate": 9.9952177702138e-05, "loss": 9.8137, "step": 1044 }, { "epoch": 0.04355800091701054, "grad_norm": 304.0, "learning_rate": 9.995188209464398e-05, "loss": 12.313, "step": 1045 }, { "epoch": 0.04359968321453878, "grad_norm": 346.0, "learning_rate": 9.995158557677249e-05, "loss": 13.7512, "step": 1046 }, { "epoch": 0.04364136551206702, "grad_norm": 164.0, "learning_rate": 9.99512881485289e-05, "loss": 9.1256, "step": 1047 }, { "epoch": 0.04368304780959526, "grad_norm": 924.0, "learning_rate": 9.995098980991866e-05, "loss": 26.126, "step": 1048 }, { "epoch": 0.04372473010712351, "grad_norm": 218.0, "learning_rate": 9.995069056094719e-05, "loss": 10.0647, "step": 1049 }, { "epoch": 0.04376641240465175, "grad_norm": 600.0, "learning_rate": 9.995039040161997e-05, "loss": 19.7514, "step": 1050 }, { "epoch": 0.04380809470217999, "grad_norm": 256.0, "learning_rate": 9.995008933194243e-05, "loss": 11.4387, "step": 1051 }, { "epoch": 0.043849776999708226, "grad_norm": 286.0, "learning_rate": 9.994978735192009e-05, "loss": 12.1268, "step": 1052 }, { "epoch": 0.043891459297236465, "grad_norm": 96.0, "learning_rate": 9.994948446155842e-05, "loss": 6.6886, "step": 1053 }, { "epoch": 0.043933141594764705, "grad_norm": 472.0, "learning_rate": 9.994918066086298e-05, "loss": 15.628, "step": 1054 }, { "epoch": 0.043974823892292944, "grad_norm": 219.0, "learning_rate": 9.994887594983929e-05, "loss": 9.6261, "step": 1055 }, { "epoch": 0.04401650618982118, "grad_norm": 1144.0, "learning_rate": 9.99485703284929e-05, "loss": 31.3754, "step": 1056 }, { "epoch": 0.04405818848734942, "grad_norm": 520.0, "learning_rate": 9.994826379682938e-05, "loss": 14.6323, "step": 1057 }, { "epoch": 0.04409987078487766, "grad_norm": 498.0, "learning_rate": 9.99479563548543e-05, "loss": 17.5026, "step": 1058 }, { "epoch": 0.0441415530824059, "grad_norm": 264.0, "learning_rate": 9.99476480025733e-05, "loss": 13.2519, "step": 1059 }, { "epoch": 0.04418323537993414, "grad_norm": 382.0, "learning_rate": 9.994733873999199e-05, "loss": 15.3142, "step": 1060 }, { "epoch": 0.04422491767746238, "grad_norm": 326.0, "learning_rate": 9.994702856711597e-05, "loss": 13.5013, "step": 1061 }, { "epoch": 0.04426659997499062, "grad_norm": 516.0, "learning_rate": 9.994671748395095e-05, "loss": 17.626, "step": 1062 }, { "epoch": 0.04430828227251886, "grad_norm": 584.0, "learning_rate": 9.994640549050257e-05, "loss": 17.501, "step": 1063 }, { "epoch": 0.0443499645700471, "grad_norm": 916.0, "learning_rate": 9.994609258677648e-05, "loss": 24.5013, "step": 1064 }, { "epoch": 0.04439164686757534, "grad_norm": 512.0, "learning_rate": 9.994577877277845e-05, "loss": 17.3754, "step": 1065 }, { "epoch": 0.04443332916510358, "grad_norm": 380.0, "learning_rate": 9.994546404851415e-05, "loss": 14.7506, "step": 1066 }, { "epoch": 0.044475011462631824, "grad_norm": 564.0, "learning_rate": 9.994514841398934e-05, "loss": 18.001, "step": 1067 }, { "epoch": 0.04451669376016006, "grad_norm": 1040.0, "learning_rate": 9.994483186920978e-05, "loss": 25.8794, "step": 1068 }, { "epoch": 0.0445583760576883, "grad_norm": 302.0, "learning_rate": 9.994451441418122e-05, "loss": 12.3757, "step": 1069 }, { "epoch": 0.04460005835521654, "grad_norm": 512.0, "learning_rate": 9.994419604890944e-05, "loss": 17.5005, "step": 1070 }, { "epoch": 0.04464174065274478, "grad_norm": 360.0, "learning_rate": 9.994387677340026e-05, "loss": 13.8136, "step": 1071 }, { "epoch": 0.04468342295027302, "grad_norm": 149.0, "learning_rate": 9.99435565876595e-05, "loss": 9.1259, "step": 1072 }, { "epoch": 0.04472510524780126, "grad_norm": 502.0, "learning_rate": 9.994323549169297e-05, "loss": 19.6265, "step": 1073 }, { "epoch": 0.0447667875453295, "grad_norm": 334.0, "learning_rate": 9.994291348550656e-05, "loss": 13.5646, "step": 1074 }, { "epoch": 0.04480846984285774, "grad_norm": 278.0, "learning_rate": 9.99425905691061e-05, "loss": 12.063, "step": 1075 }, { "epoch": 0.04485015214038598, "grad_norm": 700.0, "learning_rate": 9.994226674249749e-05, "loss": 18.8761, "step": 1076 }, { "epoch": 0.04489183443791422, "grad_norm": 334.0, "learning_rate": 9.994194200568665e-05, "loss": 13.876, "step": 1077 }, { "epoch": 0.04493351673544246, "grad_norm": 1048.0, "learning_rate": 9.994161635867949e-05, "loss": 31.6254, "step": 1078 }, { "epoch": 0.044975199032970696, "grad_norm": 668.0, "learning_rate": 9.994128980148192e-05, "loss": 19.7511, "step": 1079 }, { "epoch": 0.045016881330498935, "grad_norm": 494.0, "learning_rate": 9.994096233409992e-05, "loss": 15.5044, "step": 1080 }, { "epoch": 0.045058563628027175, "grad_norm": 512.0, "learning_rate": 9.994063395653945e-05, "loss": 17.63, "step": 1081 }, { "epoch": 0.045100245925555414, "grad_norm": 118.0, "learning_rate": 9.994030466880648e-05, "loss": 8.3133, "step": 1082 }, { "epoch": 0.045141928223083654, "grad_norm": 408.0, "learning_rate": 9.993997447090704e-05, "loss": 14.8131, "step": 1083 }, { "epoch": 0.04518361052061189, "grad_norm": 464.0, "learning_rate": 9.993964336284712e-05, "loss": 17.2505, "step": 1084 }, { "epoch": 0.04522529281814014, "grad_norm": 392.0, "learning_rate": 9.993931134463277e-05, "loss": 13.7515, "step": 1085 }, { "epoch": 0.04526697511566838, "grad_norm": 540.0, "learning_rate": 9.993897841627005e-05, "loss": 18.5006, "step": 1086 }, { "epoch": 0.04530865741319662, "grad_norm": 330.0, "learning_rate": 9.9938644577765e-05, "loss": 14.0632, "step": 1087 }, { "epoch": 0.04535033971072486, "grad_norm": 366.0, "learning_rate": 9.99383098291237e-05, "loss": 13.8146, "step": 1088 }, { "epoch": 0.0453920220082531, "grad_norm": 422.0, "learning_rate": 9.993797417035231e-05, "loss": 15.0007, "step": 1089 }, { "epoch": 0.045433704305781336, "grad_norm": 432.0, "learning_rate": 9.993763760145689e-05, "loss": 16.0007, "step": 1090 }, { "epoch": 0.045475386603309575, "grad_norm": 382.0, "learning_rate": 9.99373001224436e-05, "loss": 14.1257, "step": 1091 }, { "epoch": 0.045517068900837815, "grad_norm": 608.0, "learning_rate": 9.993696173331857e-05, "loss": 18.8757, "step": 1092 }, { "epoch": 0.045558751198366054, "grad_norm": 860.0, "learning_rate": 9.9936622434088e-05, "loss": 24.8768, "step": 1093 }, { "epoch": 0.045600433495894294, "grad_norm": 247.0, "learning_rate": 9.993628222475802e-05, "loss": 11.7519, "step": 1094 }, { "epoch": 0.04564211579342253, "grad_norm": 256.0, "learning_rate": 9.993594110533488e-05, "loss": 10.8759, "step": 1095 }, { "epoch": 0.04568379809095077, "grad_norm": 296.0, "learning_rate": 9.993559907582478e-05, "loss": 13.376, "step": 1096 }, { "epoch": 0.04572548038847901, "grad_norm": 304.0, "learning_rate": 9.993525613623395e-05, "loss": 12.6255, "step": 1097 }, { "epoch": 0.04576716268600725, "grad_norm": 440.0, "learning_rate": 9.993491228656866e-05, "loss": 16.0007, "step": 1098 }, { "epoch": 0.04580884498353549, "grad_norm": 370.0, "learning_rate": 9.993456752683515e-05, "loss": 11.4382, "step": 1099 }, { "epoch": 0.04585052728106373, "grad_norm": 205.0, "learning_rate": 9.99342218570397e-05, "loss": 10.4382, "step": 1100 }, { "epoch": 0.04589220957859197, "grad_norm": 253.0, "learning_rate": 9.993387527718865e-05, "loss": 10.8135, "step": 1101 }, { "epoch": 0.04593389187612021, "grad_norm": 404.0, "learning_rate": 9.993352778728827e-05, "loss": 12.6269, "step": 1102 }, { "epoch": 0.045975574173648455, "grad_norm": 294.0, "learning_rate": 9.993317938734492e-05, "loss": 11.8139, "step": 1103 }, { "epoch": 0.046017256471176694, "grad_norm": 193.0, "learning_rate": 9.993283007736495e-05, "loss": 10.188, "step": 1104 }, { "epoch": 0.046058938768704934, "grad_norm": 672.0, "learning_rate": 9.993247985735472e-05, "loss": 18.5038, "step": 1105 }, { "epoch": 0.04610062106623317, "grad_norm": 378.0, "learning_rate": 9.99321287273206e-05, "loss": 13.4378, "step": 1106 }, { "epoch": 0.04614230336376141, "grad_norm": 86.5, "learning_rate": 9.993177668726901e-05, "loss": 5.8762, "step": 1107 }, { "epoch": 0.04618398566128965, "grad_norm": 688.0, "learning_rate": 9.993142373720634e-05, "loss": 23.2511, "step": 1108 }, { "epoch": 0.04622566795881789, "grad_norm": 242.0, "learning_rate": 9.993106987713906e-05, "loss": 12.001, "step": 1109 }, { "epoch": 0.04626735025634613, "grad_norm": 420.0, "learning_rate": 9.993071510707359e-05, "loss": 15.3141, "step": 1110 }, { "epoch": 0.04630903255387437, "grad_norm": 704.0, "learning_rate": 9.99303594270164e-05, "loss": 19.7575, "step": 1111 }, { "epoch": 0.04635071485140261, "grad_norm": 218.0, "learning_rate": 9.9930002836974e-05, "loss": 11.5632, "step": 1112 }, { "epoch": 0.04639239714893085, "grad_norm": 242.0, "learning_rate": 9.992964533695285e-05, "loss": 11.5004, "step": 1113 }, { "epoch": 0.04643407944645909, "grad_norm": 69.0, "learning_rate": 9.992928692695947e-05, "loss": 6.8757, "step": 1114 }, { "epoch": 0.04647576174398733, "grad_norm": 644.0, "learning_rate": 9.992892760700042e-05, "loss": 19.0007, "step": 1115 }, { "epoch": 0.04651744404151557, "grad_norm": 1200.0, "learning_rate": 9.992856737708223e-05, "loss": 28.0055, "step": 1116 }, { "epoch": 0.046559126339043806, "grad_norm": 156.0, "learning_rate": 9.992820623721147e-05, "loss": 6.9381, "step": 1117 }, { "epoch": 0.046600808636572046, "grad_norm": 252.0, "learning_rate": 9.992784418739472e-05, "loss": 12.001, "step": 1118 }, { "epoch": 0.046642490934100285, "grad_norm": 196.0, "learning_rate": 9.992748122763856e-05, "loss": 10.2519, "step": 1119 }, { "epoch": 0.046684173231628524, "grad_norm": 272.0, "learning_rate": 9.992711735794965e-05, "loss": 11.7509, "step": 1120 }, { "epoch": 0.046725855529156764, "grad_norm": 322.0, "learning_rate": 9.992675257833456e-05, "loss": 12.6257, "step": 1121 }, { "epoch": 0.04676753782668501, "grad_norm": 584.0, "learning_rate": 9.992638688879999e-05, "loss": 17.1255, "step": 1122 }, { "epoch": 0.04680922012421325, "grad_norm": 262.0, "learning_rate": 9.992602028935259e-05, "loss": 12.4391, "step": 1123 }, { "epoch": 0.04685090242174149, "grad_norm": 169.0, "learning_rate": 9.992565277999903e-05, "loss": 10.3175, "step": 1124 }, { "epoch": 0.04689258471926973, "grad_norm": 182.0, "learning_rate": 9.992528436074601e-05, "loss": 10.4383, "step": 1125 }, { "epoch": 0.04693426701679797, "grad_norm": 464.0, "learning_rate": 9.992491503160027e-05, "loss": 15.3138, "step": 1126 }, { "epoch": 0.04697594931432621, "grad_norm": 326.0, "learning_rate": 9.992454479256852e-05, "loss": 14.3129, "step": 1127 }, { "epoch": 0.047017631611854446, "grad_norm": 386.0, "learning_rate": 9.992417364365749e-05, "loss": 13.1259, "step": 1128 }, { "epoch": 0.047059313909382686, "grad_norm": 656.0, "learning_rate": 9.992380158487398e-05, "loss": 20.8753, "step": 1129 }, { "epoch": 0.047100996206910925, "grad_norm": 380.0, "learning_rate": 9.992342861622475e-05, "loss": 13.5008, "step": 1130 }, { "epoch": 0.047142678504439164, "grad_norm": 224.0, "learning_rate": 9.992305473771661e-05, "loss": 11.0634, "step": 1131 }, { "epoch": 0.047184360801967404, "grad_norm": 280.0, "learning_rate": 9.992267994935635e-05, "loss": 10.5632, "step": 1132 }, { "epoch": 0.04722604309949564, "grad_norm": 508.0, "learning_rate": 9.992230425115083e-05, "loss": 16.3773, "step": 1133 }, { "epoch": 0.04726772539702388, "grad_norm": 324.0, "learning_rate": 9.992192764310685e-05, "loss": 11.0645, "step": 1134 }, { "epoch": 0.04730940769455212, "grad_norm": 624.0, "learning_rate": 9.992155012523135e-05, "loss": 19.1277, "step": 1135 }, { "epoch": 0.04735108999208036, "grad_norm": 324.0, "learning_rate": 9.992117169753115e-05, "loss": 12.6254, "step": 1136 }, { "epoch": 0.0473927722896086, "grad_norm": 188.0, "learning_rate": 9.992079236001317e-05, "loss": 10.6258, "step": 1137 }, { "epoch": 0.04743445458713684, "grad_norm": 342.0, "learning_rate": 9.99204121126843e-05, "loss": 13.8762, "step": 1138 }, { "epoch": 0.04747613688466508, "grad_norm": 174.0, "learning_rate": 9.992003095555151e-05, "loss": 9.7513, "step": 1139 }, { "epoch": 0.047517819182193326, "grad_norm": 480.0, "learning_rate": 9.991964888862171e-05, "loss": 15.5637, "step": 1140 }, { "epoch": 0.047559501479721565, "grad_norm": 604.0, "learning_rate": 9.991926591190188e-05, "loss": 20.8756, "step": 1141 }, { "epoch": 0.047601183777249804, "grad_norm": 117.0, "learning_rate": 9.991888202539901e-05, "loss": 8.4385, "step": 1142 }, { "epoch": 0.047642866074778044, "grad_norm": 452.0, "learning_rate": 9.991849722912006e-05, "loss": 16.2507, "step": 1143 }, { "epoch": 0.04768454837230628, "grad_norm": 584.0, "learning_rate": 9.991811152307208e-05, "loss": 18.0005, "step": 1144 }, { "epoch": 0.04772623066983452, "grad_norm": 474.0, "learning_rate": 9.991772490726209e-05, "loss": 14.7535, "step": 1145 }, { "epoch": 0.04776791296736276, "grad_norm": 328.0, "learning_rate": 9.991733738169712e-05, "loss": 13.0013, "step": 1146 }, { "epoch": 0.047809595264891, "grad_norm": 212.0, "learning_rate": 9.991694894638426e-05, "loss": 10.4385, "step": 1147 }, { "epoch": 0.04785127756241924, "grad_norm": 704.0, "learning_rate": 9.991655960133058e-05, "loss": 20.256, "step": 1148 }, { "epoch": 0.04789295985994748, "grad_norm": 852.0, "learning_rate": 9.991616934654316e-05, "loss": 25.5003, "step": 1149 }, { "epoch": 0.04793464215747572, "grad_norm": 640.0, "learning_rate": 9.99157781820291e-05, "loss": 18.6252, "step": 1150 }, { "epoch": 0.04797632445500396, "grad_norm": 282.0, "learning_rate": 9.991538610779558e-05, "loss": 12.7514, "step": 1151 }, { "epoch": 0.0480180067525322, "grad_norm": 456.0, "learning_rate": 9.991499312384971e-05, "loss": 14.0643, "step": 1152 }, { "epoch": 0.04805968905006044, "grad_norm": 394.0, "learning_rate": 9.991459923019866e-05, "loss": 14.9397, "step": 1153 }, { "epoch": 0.04810137134758868, "grad_norm": 434.0, "learning_rate": 9.99142044268496e-05, "loss": 16.1256, "step": 1154 }, { "epoch": 0.048143053645116916, "grad_norm": 330.0, "learning_rate": 9.991380871380974e-05, "loss": 14.1879, "step": 1155 }, { "epoch": 0.048184735942645156, "grad_norm": 294.0, "learning_rate": 9.991341209108627e-05, "loss": 13.1882, "step": 1156 }, { "epoch": 0.048226418240173395, "grad_norm": 448.0, "learning_rate": 9.991301455868645e-05, "loss": 14.6878, "step": 1157 }, { "epoch": 0.04826810053770164, "grad_norm": 460.0, "learning_rate": 9.991261611661751e-05, "loss": 16.0016, "step": 1158 }, { "epoch": 0.04830978283522988, "grad_norm": 628.0, "learning_rate": 9.991221676488671e-05, "loss": 22.1255, "step": 1159 }, { "epoch": 0.04835146513275812, "grad_norm": 254.0, "learning_rate": 9.991181650350133e-05, "loss": 11.1258, "step": 1160 }, { "epoch": 0.04839314743028636, "grad_norm": 308.0, "learning_rate": 9.991141533246865e-05, "loss": 12.6885, "step": 1161 }, { "epoch": 0.0484348297278146, "grad_norm": 560.0, "learning_rate": 9.991101325179601e-05, "loss": 17.8754, "step": 1162 }, { "epoch": 0.04847651202534284, "grad_norm": 592.0, "learning_rate": 9.991061026149071e-05, "loss": 18.8755, "step": 1163 }, { "epoch": 0.04851819432287108, "grad_norm": 520.0, "learning_rate": 9.991020636156012e-05, "loss": 17.2506, "step": 1164 }, { "epoch": 0.04855987662039932, "grad_norm": 424.0, "learning_rate": 9.990980155201157e-05, "loss": 14.814, "step": 1165 }, { "epoch": 0.048601558917927556, "grad_norm": 298.0, "learning_rate": 9.990939583285248e-05, "loss": 11.6264, "step": 1166 }, { "epoch": 0.048643241215455796, "grad_norm": 189.0, "learning_rate": 9.99089892040902e-05, "loss": 9.3136, "step": 1167 }, { "epoch": 0.048684923512984035, "grad_norm": 440.0, "learning_rate": 9.990858166573217e-05, "loss": 15.0629, "step": 1168 }, { "epoch": 0.048726605810512275, "grad_norm": 268.0, "learning_rate": 9.990817321778581e-05, "loss": 11.5635, "step": 1169 }, { "epoch": 0.048768288108040514, "grad_norm": 438.0, "learning_rate": 9.990776386025857e-05, "loss": 15.5634, "step": 1170 }, { "epoch": 0.04880997040556875, "grad_norm": 652.0, "learning_rate": 9.990735359315788e-05, "loss": 19.1261, "step": 1171 }, { "epoch": 0.04885165270309699, "grad_norm": 128.0, "learning_rate": 9.990694241649126e-05, "loss": 6.3761, "step": 1172 }, { "epoch": 0.04889333500062523, "grad_norm": 474.0, "learning_rate": 9.990653033026618e-05, "loss": 18.0012, "step": 1173 }, { "epoch": 0.04893501729815347, "grad_norm": 148.0, "learning_rate": 9.990611733449016e-05, "loss": 9.0009, "step": 1174 }, { "epoch": 0.04897669959568171, "grad_norm": 245.0, "learning_rate": 9.99057034291707e-05, "loss": 12.0005, "step": 1175 }, { "epoch": 0.04901838189320996, "grad_norm": 290.0, "learning_rate": 9.990528861431539e-05, "loss": 12.7505, "step": 1176 }, { "epoch": 0.049060064190738197, "grad_norm": 1408.0, "learning_rate": 9.990487288993176e-05, "loss": 30.005, "step": 1177 }, { "epoch": 0.049101746488266436, "grad_norm": 274.0, "learning_rate": 9.990445625602738e-05, "loss": 12.3761, "step": 1178 }, { "epoch": 0.049143428785794675, "grad_norm": 216.0, "learning_rate": 9.990403871260984e-05, "loss": 11.2507, "step": 1179 }, { "epoch": 0.049185111083322915, "grad_norm": 732.0, "learning_rate": 9.99036202596868e-05, "loss": 22.2509, "step": 1180 }, { "epoch": 0.049226793380851154, "grad_norm": 266.0, "learning_rate": 9.990320089726583e-05, "loss": 11.6264, "step": 1181 }, { "epoch": 0.04926847567837939, "grad_norm": 804.0, "learning_rate": 9.990278062535459e-05, "loss": 26.0014, "step": 1182 }, { "epoch": 0.04931015797590763, "grad_norm": 290.0, "learning_rate": 9.990235944396075e-05, "loss": 12.8753, "step": 1183 }, { "epoch": 0.04935184027343587, "grad_norm": 544.0, "learning_rate": 9.990193735309199e-05, "loss": 19.501, "step": 1184 }, { "epoch": 0.04939352257096411, "grad_norm": 464.0, "learning_rate": 9.990151435275599e-05, "loss": 19.2507, "step": 1185 }, { "epoch": 0.04943520486849235, "grad_norm": 300.0, "learning_rate": 9.990109044296044e-05, "loss": 13.564, "step": 1186 }, { "epoch": 0.04947688716602059, "grad_norm": 278.0, "learning_rate": 9.99006656237131e-05, "loss": 12.4456, "step": 1187 }, { "epoch": 0.04951856946354883, "grad_norm": 322.0, "learning_rate": 9.990023989502171e-05, "loss": 13.8758, "step": 1188 }, { "epoch": 0.04956025176107707, "grad_norm": 179.0, "learning_rate": 9.9899813256894e-05, "loss": 8.7505, "step": 1189 }, { "epoch": 0.04960193405860531, "grad_norm": 244.0, "learning_rate": 9.989938570933777e-05, "loss": 12.2513, "step": 1190 }, { "epoch": 0.04964361635613355, "grad_norm": 444.0, "learning_rate": 9.98989572523608e-05, "loss": 15.7528, "step": 1191 }, { "epoch": 0.04968529865366179, "grad_norm": 466.0, "learning_rate": 9.989852788597092e-05, "loss": 16.8761, "step": 1192 }, { "epoch": 0.04972698095119003, "grad_norm": 292.0, "learning_rate": 9.989809761017593e-05, "loss": 13.6262, "step": 1193 }, { "epoch": 0.049768663248718266, "grad_norm": 744.0, "learning_rate": 9.989766642498369e-05, "loss": 18.3815, "step": 1194 }, { "epoch": 0.04981034554624651, "grad_norm": 308.0, "learning_rate": 9.989723433040203e-05, "loss": 12.7516, "step": 1195 }, { "epoch": 0.04985202784377475, "grad_norm": 428.0, "learning_rate": 9.989680132643886e-05, "loss": 15.7512, "step": 1196 }, { "epoch": 0.04989371014130299, "grad_norm": 80.0, "learning_rate": 9.989636741310207e-05, "loss": 7.8128, "step": 1197 }, { "epoch": 0.04993539243883123, "grad_norm": 720.0, "learning_rate": 9.989593259039954e-05, "loss": 21.626, "step": 1198 }, { "epoch": 0.04997707473635947, "grad_norm": 241.0, "learning_rate": 9.989549685833919e-05, "loss": 11.313, "step": 1199 }, { "epoch": 0.05001875703388771, "grad_norm": 282.0, "learning_rate": 9.989506021692901e-05, "loss": 12.5004, "step": 1200 }, { "epoch": 0.05006043933141595, "grad_norm": 253.0, "learning_rate": 9.989462266617691e-05, "loss": 12.44, "step": 1201 }, { "epoch": 0.05010212162894419, "grad_norm": 300.0, "learning_rate": 9.98941842060909e-05, "loss": 12.7508, "step": 1202 }, { "epoch": 0.05014380392647243, "grad_norm": 664.0, "learning_rate": 9.989374483667892e-05, "loss": 16.6261, "step": 1203 }, { "epoch": 0.05018548622400067, "grad_norm": 194.0, "learning_rate": 9.989330455794904e-05, "loss": 9.8132, "step": 1204 }, { "epoch": 0.050227168521528906, "grad_norm": 502.0, "learning_rate": 9.989286336990926e-05, "loss": 16.1262, "step": 1205 }, { "epoch": 0.050268850819057145, "grad_norm": 296.0, "learning_rate": 9.98924212725676e-05, "loss": 12.4394, "step": 1206 }, { "epoch": 0.050310533116585385, "grad_norm": 422.0, "learning_rate": 9.989197826593212e-05, "loss": 15.1254, "step": 1207 }, { "epoch": 0.050352215414113624, "grad_norm": 592.0, "learning_rate": 9.989153435001093e-05, "loss": 20.7513, "step": 1208 }, { "epoch": 0.050393897711641863, "grad_norm": 468.0, "learning_rate": 9.989108952481209e-05, "loss": 15.8133, "step": 1209 }, { "epoch": 0.0504355800091701, "grad_norm": 396.0, "learning_rate": 9.989064379034372e-05, "loss": 14.6257, "step": 1210 }, { "epoch": 0.05047726230669834, "grad_norm": 584.0, "learning_rate": 9.989019714661394e-05, "loss": 18.1259, "step": 1211 }, { "epoch": 0.05051894460422658, "grad_norm": 418.0, "learning_rate": 9.988974959363089e-05, "loss": 14.7511, "step": 1212 }, { "epoch": 0.05056062690175483, "grad_norm": 199.0, "learning_rate": 9.988930113140271e-05, "loss": 10.6883, "step": 1213 }, { "epoch": 0.05060230919928307, "grad_norm": 1272.0, "learning_rate": 9.98888517599376e-05, "loss": 30.0004, "step": 1214 }, { "epoch": 0.05064399149681131, "grad_norm": 360.0, "learning_rate": 9.988840147924372e-05, "loss": 13.8138, "step": 1215 }, { "epoch": 0.050685673794339546, "grad_norm": 179.0, "learning_rate": 9.988795028932931e-05, "loss": 9.7507, "step": 1216 }, { "epoch": 0.050727356091867785, "grad_norm": 356.0, "learning_rate": 9.988749819020258e-05, "loss": 12.6266, "step": 1217 }, { "epoch": 0.050769038389396025, "grad_norm": 420.0, "learning_rate": 9.988704518187177e-05, "loss": 12.8165, "step": 1218 }, { "epoch": 0.050810720686924264, "grad_norm": 1232.0, "learning_rate": 9.988659126434511e-05, "loss": 26.1308, "step": 1219 }, { "epoch": 0.050852402984452504, "grad_norm": 254.0, "learning_rate": 9.988613643763091e-05, "loss": 11.3135, "step": 1220 }, { "epoch": 0.05089408528198074, "grad_norm": 244.0, "learning_rate": 9.988568070173745e-05, "loss": 11.4395, "step": 1221 }, { "epoch": 0.05093576757950898, "grad_norm": 344.0, "learning_rate": 9.988522405667302e-05, "loss": 13.5014, "step": 1222 }, { "epoch": 0.05097744987703722, "grad_norm": 192.0, "learning_rate": 9.988476650244597e-05, "loss": 9.8755, "step": 1223 }, { "epoch": 0.05101913217456546, "grad_norm": 516.0, "learning_rate": 9.988430803906461e-05, "loss": 17.7504, "step": 1224 }, { "epoch": 0.0510608144720937, "grad_norm": 188.0, "learning_rate": 9.98838486665373e-05, "loss": 10.8758, "step": 1225 }, { "epoch": 0.05110249676962194, "grad_norm": 188.0, "learning_rate": 9.988338838487243e-05, "loss": 8.501, "step": 1226 }, { "epoch": 0.05114417906715018, "grad_norm": 612.0, "learning_rate": 9.98829271940784e-05, "loss": 17.6262, "step": 1227 }, { "epoch": 0.05118586136467842, "grad_norm": 226.0, "learning_rate": 9.988246509416356e-05, "loss": 11.3139, "step": 1228 }, { "epoch": 0.05122754366220666, "grad_norm": 258.0, "learning_rate": 9.988200208513637e-05, "loss": 11.0628, "step": 1229 }, { "epoch": 0.0512692259597349, "grad_norm": 276.0, "learning_rate": 9.988153816700528e-05, "loss": 11.501, "step": 1230 }, { "epoch": 0.051310908257263144, "grad_norm": 418.0, "learning_rate": 9.988107333977871e-05, "loss": 15.9382, "step": 1231 }, { "epoch": 0.05135259055479138, "grad_norm": 724.0, "learning_rate": 9.988060760346515e-05, "loss": 22.0066, "step": 1232 }, { "epoch": 0.05139427285231962, "grad_norm": 536.0, "learning_rate": 9.988014095807311e-05, "loss": 19.1256, "step": 1233 }, { "epoch": 0.05143595514984786, "grad_norm": 210.0, "learning_rate": 9.987967340361107e-05, "loss": 10.5012, "step": 1234 }, { "epoch": 0.0514776374473761, "grad_norm": 266.0, "learning_rate": 9.987920494008753e-05, "loss": 11.7513, "step": 1235 }, { "epoch": 0.05151931974490434, "grad_norm": 290.0, "learning_rate": 9.987873556751107e-05, "loss": 11.7513, "step": 1236 }, { "epoch": 0.05156100204243258, "grad_norm": 584.0, "learning_rate": 9.987826528589021e-05, "loss": 18.2521, "step": 1237 }, { "epoch": 0.05160268433996082, "grad_norm": 322.0, "learning_rate": 9.987779409523357e-05, "loss": 12.4382, "step": 1238 }, { "epoch": 0.05164436663748906, "grad_norm": 588.0, "learning_rate": 9.987732199554968e-05, "loss": 15.7524, "step": 1239 }, { "epoch": 0.0516860489350173, "grad_norm": 232.0, "learning_rate": 9.987684898684718e-05, "loss": 10.9381, "step": 1240 }, { "epoch": 0.05172773123254554, "grad_norm": 384.0, "learning_rate": 9.987637506913468e-05, "loss": 15.4382, "step": 1241 }, { "epoch": 0.05176941353007378, "grad_norm": 131.0, "learning_rate": 9.98759002424208e-05, "loss": 7.6882, "step": 1242 }, { "epoch": 0.051811095827602016, "grad_norm": 92.0, "learning_rate": 9.987542450671422e-05, "loss": 7.0948, "step": 1243 }, { "epoch": 0.051852778125130256, "grad_norm": 326.0, "learning_rate": 9.987494786202361e-05, "loss": 13.563, "step": 1244 }, { "epoch": 0.051894460422658495, "grad_norm": 354.0, "learning_rate": 9.987447030835763e-05, "loss": 12.5009, "step": 1245 }, { "epoch": 0.051936142720186734, "grad_norm": 516.0, "learning_rate": 9.987399184572501e-05, "loss": 17.5004, "step": 1246 }, { "epoch": 0.051977825017714974, "grad_norm": 137.0, "learning_rate": 9.987351247413446e-05, "loss": 9.313, "step": 1247 }, { "epoch": 0.05201950731524321, "grad_norm": 193.0, "learning_rate": 9.987303219359471e-05, "loss": 9.7518, "step": 1248 }, { "epoch": 0.05206118961277146, "grad_norm": 366.0, "learning_rate": 9.987255100411455e-05, "loss": 14.6255, "step": 1249 }, { "epoch": 0.0521028719102997, "grad_norm": 306.0, "learning_rate": 9.987206890570269e-05, "loss": 12.4382, "step": 1250 }, { "epoch": 0.05214455420782794, "grad_norm": 1168.0, "learning_rate": 9.987158589836796e-05, "loss": 25.6307, "step": 1251 }, { "epoch": 0.05218623650535618, "grad_norm": 484.0, "learning_rate": 9.987110198211915e-05, "loss": 15.1878, "step": 1252 }, { "epoch": 0.05222791880288442, "grad_norm": 191.0, "learning_rate": 9.987061715696509e-05, "loss": 8.9379, "step": 1253 }, { "epoch": 0.052269601100412656, "grad_norm": 580.0, "learning_rate": 9.987013142291457e-05, "loss": 17.3766, "step": 1254 }, { "epoch": 0.052311283397940896, "grad_norm": 290.0, "learning_rate": 9.986964477997651e-05, "loss": 12.3755, "step": 1255 }, { "epoch": 0.052352965695469135, "grad_norm": 944.0, "learning_rate": 9.986915722815973e-05, "loss": 22.6303, "step": 1256 }, { "epoch": 0.052394647992997374, "grad_norm": 93.5, "learning_rate": 9.986866876747314e-05, "loss": 8.1888, "step": 1257 }, { "epoch": 0.052436330290525614, "grad_norm": 260.0, "learning_rate": 9.986817939792565e-05, "loss": 11.1881, "step": 1258 }, { "epoch": 0.05247801258805385, "grad_norm": 252.0, "learning_rate": 9.986768911952613e-05, "loss": 11.1259, "step": 1259 }, { "epoch": 0.05251969488558209, "grad_norm": 1088.0, "learning_rate": 9.986719793228357e-05, "loss": 25.3838, "step": 1260 }, { "epoch": 0.05256137718311033, "grad_norm": 656.0, "learning_rate": 9.98667058362069e-05, "loss": 19.0048, "step": 1261 }, { "epoch": 0.05260305948063857, "grad_norm": 186.0, "learning_rate": 9.986621283130508e-05, "loss": 9.2518, "step": 1262 }, { "epoch": 0.05264474177816681, "grad_norm": 414.0, "learning_rate": 9.986571891758712e-05, "loss": 15.6259, "step": 1263 }, { "epoch": 0.05268642407569505, "grad_norm": 113.5, "learning_rate": 9.9865224095062e-05, "loss": 6.9709, "step": 1264 }, { "epoch": 0.05272810637322329, "grad_norm": 1320.0, "learning_rate": 9.986472836373875e-05, "loss": 26.506, "step": 1265 }, { "epoch": 0.05276978867075153, "grad_norm": 284.0, "learning_rate": 9.986423172362638e-05, "loss": 12.5661, "step": 1266 }, { "epoch": 0.05281147096827977, "grad_norm": 396.0, "learning_rate": 9.986373417473396e-05, "loss": 15.1884, "step": 1267 }, { "epoch": 0.052853153265808014, "grad_norm": 768.0, "learning_rate": 9.986323571707058e-05, "loss": 21.6278, "step": 1268 }, { "epoch": 0.052894835563336254, "grad_norm": 366.0, "learning_rate": 9.98627363506453e-05, "loss": 13.9383, "step": 1269 }, { "epoch": 0.05293651786086449, "grad_norm": 520.0, "learning_rate": 9.986223607546721e-05, "loss": 15.8154, "step": 1270 }, { "epoch": 0.05297820015839273, "grad_norm": 108.0, "learning_rate": 9.986173489154544e-05, "loss": 7.7824, "step": 1271 }, { "epoch": 0.05301988245592097, "grad_norm": 310.0, "learning_rate": 9.986123279888913e-05, "loss": 12.3759, "step": 1272 }, { "epoch": 0.05306156475344921, "grad_norm": 219.0, "learning_rate": 9.986072979750743e-05, "loss": 9.2502, "step": 1273 }, { "epoch": 0.05310324705097745, "grad_norm": 948.0, "learning_rate": 9.98602258874095e-05, "loss": 24.5049, "step": 1274 }, { "epoch": 0.05314492934850569, "grad_norm": 812.0, "learning_rate": 9.985972106860453e-05, "loss": 26.2508, "step": 1275 }, { "epoch": 0.05318661164603393, "grad_norm": 284.0, "learning_rate": 9.985921534110171e-05, "loss": 11.7515, "step": 1276 }, { "epoch": 0.05322829394356217, "grad_norm": 472.0, "learning_rate": 9.985870870491027e-05, "loss": 15.6879, "step": 1277 }, { "epoch": 0.05326997624109041, "grad_norm": 470.0, "learning_rate": 9.985820116003944e-05, "loss": 15.6256, "step": 1278 }, { "epoch": 0.05331165853861865, "grad_norm": 270.0, "learning_rate": 9.985769270649845e-05, "loss": 12.2512, "step": 1279 }, { "epoch": 0.05335334083614689, "grad_norm": 338.0, "learning_rate": 9.985718334429662e-05, "loss": 14.1888, "step": 1280 }, { "epoch": 0.053395023133675126, "grad_norm": 260.0, "learning_rate": 9.985667307344316e-05, "loss": 12.0053, "step": 1281 }, { "epoch": 0.053436705431203366, "grad_norm": 314.0, "learning_rate": 9.985616189394743e-05, "loss": 13.5635, "step": 1282 }, { "epoch": 0.053478387728731605, "grad_norm": 640.0, "learning_rate": 9.985564980581872e-05, "loss": 18.6267, "step": 1283 }, { "epoch": 0.053520070026259844, "grad_norm": 224.0, "learning_rate": 9.985513680906635e-05, "loss": 10.8759, "step": 1284 }, { "epoch": 0.053561752323788084, "grad_norm": 212.0, "learning_rate": 9.985462290369971e-05, "loss": 10.5637, "step": 1285 }, { "epoch": 0.05360343462131633, "grad_norm": 576.0, "learning_rate": 9.985410808972812e-05, "loss": 17.7537, "step": 1286 }, { "epoch": 0.05364511691884457, "grad_norm": 560.0, "learning_rate": 9.985359236716101e-05, "loss": 18.8755, "step": 1287 }, { "epoch": 0.05368679921637281, "grad_norm": 458.0, "learning_rate": 9.985307573600772e-05, "loss": 16.8777, "step": 1288 }, { "epoch": 0.05372848151390105, "grad_norm": 776.0, "learning_rate": 9.985255819627774e-05, "loss": 20.1259, "step": 1289 }, { "epoch": 0.05377016381142929, "grad_norm": 199.0, "learning_rate": 9.985203974798043e-05, "loss": 10.063, "step": 1290 }, { "epoch": 0.05381184610895753, "grad_norm": 404.0, "learning_rate": 9.985152039112528e-05, "loss": 15.4384, "step": 1291 }, { "epoch": 0.053853528406485766, "grad_norm": 169.0, "learning_rate": 9.985100012572176e-05, "loss": 9.7507, "step": 1292 }, { "epoch": 0.053895210704014006, "grad_norm": 716.0, "learning_rate": 9.985047895177932e-05, "loss": 18.2515, "step": 1293 }, { "epoch": 0.053936893001542245, "grad_norm": 248.0, "learning_rate": 9.984995686930748e-05, "loss": 11.6255, "step": 1294 }, { "epoch": 0.053978575299070485, "grad_norm": 266.0, "learning_rate": 9.984943387831573e-05, "loss": 10.8133, "step": 1295 }, { "epoch": 0.054020257596598724, "grad_norm": 174.0, "learning_rate": 9.984890997881365e-05, "loss": 7.8449, "step": 1296 }, { "epoch": 0.05406193989412696, "grad_norm": 416.0, "learning_rate": 9.984838517081076e-05, "loss": 14.4381, "step": 1297 }, { "epoch": 0.0541036221916552, "grad_norm": 183.0, "learning_rate": 9.98478594543166e-05, "loss": 9.9388, "step": 1298 }, { "epoch": 0.05414530448918344, "grad_norm": 376.0, "learning_rate": 9.98473328293408e-05, "loss": 14.0633, "step": 1299 }, { "epoch": 0.05418698678671168, "grad_norm": 150.0, "learning_rate": 9.984680529589294e-05, "loss": 9.6888, "step": 1300 }, { "epoch": 0.05422866908423992, "grad_norm": 362.0, "learning_rate": 9.98462768539826e-05, "loss": 13.001, "step": 1301 }, { "epoch": 0.05427035138176816, "grad_norm": 280.0, "learning_rate": 9.984574750361946e-05, "loss": 12.9386, "step": 1302 }, { "epoch": 0.0543120336792964, "grad_norm": 217.0, "learning_rate": 9.984521724481315e-05, "loss": 10.688, "step": 1303 }, { "epoch": 0.054353715976824646, "grad_norm": 350.0, "learning_rate": 9.984468607757331e-05, "loss": 14.9387, "step": 1304 }, { "epoch": 0.054395398274352885, "grad_norm": 294.0, "learning_rate": 9.984415400190966e-05, "loss": 13.1253, "step": 1305 }, { "epoch": 0.054437080571881125, "grad_norm": 434.0, "learning_rate": 9.984362101783187e-05, "loss": 15.5629, "step": 1306 }, { "epoch": 0.054478762869409364, "grad_norm": 186.0, "learning_rate": 9.984308712534967e-05, "loss": 10.5029, "step": 1307 }, { "epoch": 0.0545204451669376, "grad_norm": 308.0, "learning_rate": 9.984255232447278e-05, "loss": 13.3135, "step": 1308 }, { "epoch": 0.05456212746446584, "grad_norm": 253.0, "learning_rate": 9.984201661521094e-05, "loss": 11.5005, "step": 1309 }, { "epoch": 0.05460380976199408, "grad_norm": 552.0, "learning_rate": 9.984147999757394e-05, "loss": 17.2505, "step": 1310 }, { "epoch": 0.05464549205952232, "grad_norm": 556.0, "learning_rate": 9.984094247157152e-05, "loss": 15.314, "step": 1311 }, { "epoch": 0.05468717435705056, "grad_norm": 294.0, "learning_rate": 9.984040403721351e-05, "loss": 12.3131, "step": 1312 }, { "epoch": 0.0547288566545788, "grad_norm": 488.0, "learning_rate": 9.983986469450972e-05, "loss": 17.2505, "step": 1313 }, { "epoch": 0.05477053895210704, "grad_norm": 470.0, "learning_rate": 9.983932444346996e-05, "loss": 16.1256, "step": 1314 }, { "epoch": 0.05481222124963528, "grad_norm": 648.0, "learning_rate": 9.98387832841041e-05, "loss": 19.5039, "step": 1315 }, { "epoch": 0.05485390354716352, "grad_norm": 1152.0, "learning_rate": 9.983824121642197e-05, "loss": 31.3756, "step": 1316 }, { "epoch": 0.05489558584469176, "grad_norm": 298.0, "learning_rate": 9.983769824043349e-05, "loss": 13.127, "step": 1317 }, { "epoch": 0.05493726814222, "grad_norm": 540.0, "learning_rate": 9.983715435614854e-05, "loss": 18.7507, "step": 1318 }, { "epoch": 0.054978950439748236, "grad_norm": 482.0, "learning_rate": 9.9836609563577e-05, "loss": 15.9387, "step": 1319 }, { "epoch": 0.055020632737276476, "grad_norm": 660.0, "learning_rate": 9.983606386272884e-05, "loss": 20.1254, "step": 1320 }, { "epoch": 0.055062315034804715, "grad_norm": 510.0, "learning_rate": 9.9835517253614e-05, "loss": 17.2505, "step": 1321 }, { "epoch": 0.05510399733233296, "grad_norm": 310.0, "learning_rate": 9.983496973624242e-05, "loss": 12.063, "step": 1322 }, { "epoch": 0.0551456796298612, "grad_norm": 576.0, "learning_rate": 9.98344213106241e-05, "loss": 18.8756, "step": 1323 }, { "epoch": 0.05518736192738944, "grad_norm": 344.0, "learning_rate": 9.983387197676903e-05, "loss": 13.8759, "step": 1324 }, { "epoch": 0.05522904422491768, "grad_norm": 241.0, "learning_rate": 9.983332173468722e-05, "loss": 12.688, "step": 1325 }, { "epoch": 0.05527072652244592, "grad_norm": 432.0, "learning_rate": 9.983277058438869e-05, "loss": 15.6887, "step": 1326 }, { "epoch": 0.05531240881997416, "grad_norm": 588.0, "learning_rate": 9.98322185258835e-05, "loss": 19.3754, "step": 1327 }, { "epoch": 0.0553540911175024, "grad_norm": 428.0, "learning_rate": 9.98316655591817e-05, "loss": 13.8135, "step": 1328 }, { "epoch": 0.05539577341503064, "grad_norm": 466.0, "learning_rate": 9.983111168429338e-05, "loss": 14.5014, "step": 1329 }, { "epoch": 0.05543745571255888, "grad_norm": 492.0, "learning_rate": 9.98305569012286e-05, "loss": 16.8759, "step": 1330 }, { "epoch": 0.055479138010087116, "grad_norm": 780.0, "learning_rate": 9.983000120999753e-05, "loss": 22.7507, "step": 1331 }, { "epoch": 0.055520820307615355, "grad_norm": 556.0, "learning_rate": 9.982944461061023e-05, "loss": 19.0004, "step": 1332 }, { "epoch": 0.055562502605143595, "grad_norm": 768.0, "learning_rate": 9.982888710307692e-05, "loss": 21.5016, "step": 1333 }, { "epoch": 0.055604184902671834, "grad_norm": 446.0, "learning_rate": 9.982832868740767e-05, "loss": 14.8775, "step": 1334 }, { "epoch": 0.05564586720020007, "grad_norm": 516.0, "learning_rate": 9.982776936361275e-05, "loss": 19.8765, "step": 1335 }, { "epoch": 0.05568754949772831, "grad_norm": 628.0, "learning_rate": 9.982720913170229e-05, "loss": 19.1256, "step": 1336 }, { "epoch": 0.05572923179525655, "grad_norm": 648.0, "learning_rate": 9.982664799168653e-05, "loss": 20.6265, "step": 1337 }, { "epoch": 0.05577091409278479, "grad_norm": 748.0, "learning_rate": 9.982608594357568e-05, "loss": 21.7509, "step": 1338 }, { "epoch": 0.05581259639031303, "grad_norm": 288.0, "learning_rate": 9.982552298737999e-05, "loss": 12.1882, "step": 1339 }, { "epoch": 0.05585427868784128, "grad_norm": 196.0, "learning_rate": 9.982495912310972e-05, "loss": 6.8752, "step": 1340 }, { "epoch": 0.05589596098536952, "grad_norm": 241.0, "learning_rate": 9.982439435077515e-05, "loss": 11.3753, "step": 1341 }, { "epoch": 0.055937643282897756, "grad_norm": 358.0, "learning_rate": 9.982382867038657e-05, "loss": 14.6255, "step": 1342 }, { "epoch": 0.055979325580425995, "grad_norm": 418.0, "learning_rate": 9.982326208195428e-05, "loss": 14.9385, "step": 1343 }, { "epoch": 0.056021007877954235, "grad_norm": 183.0, "learning_rate": 9.982269458548863e-05, "loss": 10.3776, "step": 1344 }, { "epoch": 0.056062690175482474, "grad_norm": 528.0, "learning_rate": 9.982212618099993e-05, "loss": 16.6259, "step": 1345 }, { "epoch": 0.056104372473010714, "grad_norm": 1120.0, "learning_rate": 9.982155686849858e-05, "loss": 27.8762, "step": 1346 }, { "epoch": 0.05614605477053895, "grad_norm": 234.0, "learning_rate": 9.982098664799492e-05, "loss": 12.1881, "step": 1347 }, { "epoch": 0.05618773706806719, "grad_norm": 372.0, "learning_rate": 9.982041551949936e-05, "loss": 14.2512, "step": 1348 }, { "epoch": 0.05622941936559543, "grad_norm": 628.0, "learning_rate": 9.981984348302231e-05, "loss": 18.6258, "step": 1349 }, { "epoch": 0.05627110166312367, "grad_norm": 394.0, "learning_rate": 9.981927053857417e-05, "loss": 13.2505, "step": 1350 }, { "epoch": 0.05631278396065191, "grad_norm": 624.0, "learning_rate": 9.981869668616541e-05, "loss": 18.7505, "step": 1351 }, { "epoch": 0.05635446625818015, "grad_norm": 444.0, "learning_rate": 9.981812192580649e-05, "loss": 16.0006, "step": 1352 }, { "epoch": 0.05639614855570839, "grad_norm": 1288.0, "learning_rate": 9.981754625750788e-05, "loss": 29.0066, "step": 1353 }, { "epoch": 0.05643783085323663, "grad_norm": 408.0, "learning_rate": 9.981696968128005e-05, "loss": 14.5633, "step": 1354 }, { "epoch": 0.05647951315076487, "grad_norm": 692.0, "learning_rate": 9.981639219713353e-05, "loss": 23.2515, "step": 1355 }, { "epoch": 0.05652119544829311, "grad_norm": 432.0, "learning_rate": 9.981581380507885e-05, "loss": 16.3755, "step": 1356 }, { "epoch": 0.05656287774582135, "grad_norm": 250.0, "learning_rate": 9.981523450512653e-05, "loss": 12.6883, "step": 1357 }, { "epoch": 0.056604560043349586, "grad_norm": 462.0, "learning_rate": 9.981465429728713e-05, "loss": 12.8755, "step": 1358 }, { "epoch": 0.05664624234087783, "grad_norm": 1384.0, "learning_rate": 9.981407318157125e-05, "loss": 31.2508, "step": 1359 }, { "epoch": 0.05668792463840607, "grad_norm": 398.0, "learning_rate": 9.981349115798946e-05, "loss": 14.313, "step": 1360 }, { "epoch": 0.05672960693593431, "grad_norm": 298.0, "learning_rate": 9.981290822655237e-05, "loss": 10.6882, "step": 1361 }, { "epoch": 0.05677128923346255, "grad_norm": 374.0, "learning_rate": 9.981232438727061e-05, "loss": 14.0673, "step": 1362 }, { "epoch": 0.05681297153099079, "grad_norm": 716.0, "learning_rate": 9.981173964015481e-05, "loss": 21.2512, "step": 1363 }, { "epoch": 0.05685465382851903, "grad_norm": 1528.0, "learning_rate": 9.981115398521563e-05, "loss": 37.0009, "step": 1364 }, { "epoch": 0.05689633612604727, "grad_norm": 356.0, "learning_rate": 9.981056742246375e-05, "loss": 12.6262, "step": 1365 }, { "epoch": 0.05693801842357551, "grad_norm": 422.0, "learning_rate": 9.980997995190987e-05, "loss": 15.9392, "step": 1366 }, { "epoch": 0.05697970072110375, "grad_norm": 284.0, "learning_rate": 9.980939157356468e-05, "loss": 13.0629, "step": 1367 }, { "epoch": 0.05702138301863199, "grad_norm": 262.0, "learning_rate": 9.98088022874389e-05, "loss": 11.688, "step": 1368 }, { "epoch": 0.057063065316160226, "grad_norm": 1024.0, "learning_rate": 9.980821209354328e-05, "loss": 25.381, "step": 1369 }, { "epoch": 0.057104747613688465, "grad_norm": 664.0, "learning_rate": 9.980762099188856e-05, "loss": 19.8758, "step": 1370 }, { "epoch": 0.057146429911216705, "grad_norm": 222.0, "learning_rate": 9.980702898248553e-05, "loss": 11.1256, "step": 1371 }, { "epoch": 0.057188112208744944, "grad_norm": 616.0, "learning_rate": 9.980643606534499e-05, "loss": 20.5012, "step": 1372 }, { "epoch": 0.057229794506273184, "grad_norm": 386.0, "learning_rate": 9.98058422404777e-05, "loss": 14.6899, "step": 1373 }, { "epoch": 0.05727147680380142, "grad_norm": 362.0, "learning_rate": 9.980524750789455e-05, "loss": 12.3131, "step": 1374 }, { "epoch": 0.05731315910132966, "grad_norm": 202.0, "learning_rate": 9.980465186760633e-05, "loss": 10.0632, "step": 1375 }, { "epoch": 0.0573548413988579, "grad_norm": 205.0, "learning_rate": 9.980405531962392e-05, "loss": 10.7513, "step": 1376 }, { "epoch": 0.05739652369638615, "grad_norm": 181.0, "learning_rate": 9.980345786395815e-05, "loss": 9.7512, "step": 1377 }, { "epoch": 0.05743820599391439, "grad_norm": 580.0, "learning_rate": 9.980285950061996e-05, "loss": 20.8755, "step": 1378 }, { "epoch": 0.05747988829144263, "grad_norm": 174.0, "learning_rate": 9.980226022962022e-05, "loss": 9.0007, "step": 1379 }, { "epoch": 0.057521570588970866, "grad_norm": 153.0, "learning_rate": 9.98016600509699e-05, "loss": 9.0632, "step": 1380 }, { "epoch": 0.057563252886499106, "grad_norm": 140.0, "learning_rate": 9.980105896467987e-05, "loss": 9.9385, "step": 1381 }, { "epoch": 0.057604935184027345, "grad_norm": 568.0, "learning_rate": 9.980045697076113e-05, "loss": 16.3799, "step": 1382 }, { "epoch": 0.057646617481555584, "grad_norm": 290.0, "learning_rate": 9.979985406922466e-05, "loss": 12.4381, "step": 1383 }, { "epoch": 0.057688299779083824, "grad_norm": 342.0, "learning_rate": 9.979925026008139e-05, "loss": 13.4389, "step": 1384 }, { "epoch": 0.05772998207661206, "grad_norm": 282.0, "learning_rate": 9.97986455433424e-05, "loss": 11.6254, "step": 1385 }, { "epoch": 0.0577716643741403, "grad_norm": 274.0, "learning_rate": 9.979803991901865e-05, "loss": 12.2506, "step": 1386 }, { "epoch": 0.05781334667166854, "grad_norm": 342.0, "learning_rate": 9.979743338712121e-05, "loss": 12.7504, "step": 1387 }, { "epoch": 0.05785502896919678, "grad_norm": 234.0, "learning_rate": 9.979682594766113e-05, "loss": 11.4398, "step": 1388 }, { "epoch": 0.05789671126672502, "grad_norm": 224.0, "learning_rate": 9.979621760064947e-05, "loss": 9.8147, "step": 1389 }, { "epoch": 0.05793839356425326, "grad_norm": 156.0, "learning_rate": 9.979560834609732e-05, "loss": 8.3139, "step": 1390 }, { "epoch": 0.0579800758617815, "grad_norm": 92.5, "learning_rate": 9.97949981840158e-05, "loss": 6.9071, "step": 1391 }, { "epoch": 0.05802175815930974, "grad_norm": 252.0, "learning_rate": 9.979438711441602e-05, "loss": 11.8754, "step": 1392 }, { "epoch": 0.05806344045683798, "grad_norm": 540.0, "learning_rate": 9.97937751373091e-05, "loss": 16.8754, "step": 1393 }, { "epoch": 0.05810512275436622, "grad_norm": 432.0, "learning_rate": 9.979316225270621e-05, "loss": 14.9388, "step": 1394 }, { "epoch": 0.058146805051894464, "grad_norm": 290.0, "learning_rate": 9.979254846061852e-05, "loss": 13.0632, "step": 1395 }, { "epoch": 0.0581884873494227, "grad_norm": 344.0, "learning_rate": 9.979193376105723e-05, "loss": 13.7507, "step": 1396 }, { "epoch": 0.05823016964695094, "grad_norm": 352.0, "learning_rate": 9.97913181540335e-05, "loss": 14.6261, "step": 1397 }, { "epoch": 0.05827185194447918, "grad_norm": 272.0, "learning_rate": 9.979070163955859e-05, "loss": 12.0635, "step": 1398 }, { "epoch": 0.05831353424200742, "grad_norm": 249.0, "learning_rate": 9.979008421764371e-05, "loss": 10.8805, "step": 1399 }, { "epoch": 0.05835521653953566, "grad_norm": 358.0, "learning_rate": 9.978946588830014e-05, "loss": 14.314, "step": 1400 }, { "epoch": 0.0583968988370639, "grad_norm": 167.0, "learning_rate": 9.978884665153913e-05, "loss": 10.0004, "step": 1401 }, { "epoch": 0.05843858113459214, "grad_norm": 584.0, "learning_rate": 9.978822650737197e-05, "loss": 18.3755, "step": 1402 }, { "epoch": 0.05848026343212038, "grad_norm": 218.0, "learning_rate": 9.978760545580996e-05, "loss": 11.0009, "step": 1403 }, { "epoch": 0.05852194572964862, "grad_norm": 486.0, "learning_rate": 9.978698349686444e-05, "loss": 16.1288, "step": 1404 }, { "epoch": 0.05856362802717686, "grad_norm": 588.0, "learning_rate": 9.978636063054669e-05, "loss": 17.2509, "step": 1405 }, { "epoch": 0.0586053103247051, "grad_norm": 258.0, "learning_rate": 9.978573685686813e-05, "loss": 12.376, "step": 1406 }, { "epoch": 0.058646992622233336, "grad_norm": 394.0, "learning_rate": 9.978511217584008e-05, "loss": 14.6255, "step": 1407 }, { "epoch": 0.058688674919761576, "grad_norm": 219.0, "learning_rate": 9.978448658747395e-05, "loss": 10.0006, "step": 1408 }, { "epoch": 0.058730357217289815, "grad_norm": 708.0, "learning_rate": 9.978386009178112e-05, "loss": 20.5003, "step": 1409 }, { "epoch": 0.058772039514818054, "grad_norm": 676.0, "learning_rate": 9.978323268877304e-05, "loss": 19.753, "step": 1410 }, { "epoch": 0.058813721812346294, "grad_norm": 462.0, "learning_rate": 9.97826043784611e-05, "loss": 15.1304, "step": 1411 }, { "epoch": 0.05885540410987453, "grad_norm": 132.0, "learning_rate": 9.97819751608568e-05, "loss": 10.2525, "step": 1412 }, { "epoch": 0.05889708640740278, "grad_norm": 292.0, "learning_rate": 9.978134503597157e-05, "loss": 12.3142, "step": 1413 }, { "epoch": 0.05893876870493102, "grad_norm": 288.0, "learning_rate": 9.97807140038169e-05, "loss": 13.563, "step": 1414 }, { "epoch": 0.05898045100245926, "grad_norm": 596.0, "learning_rate": 9.978008206440431e-05, "loss": 18.8767, "step": 1415 }, { "epoch": 0.0590221332999875, "grad_norm": 298.0, "learning_rate": 9.97794492177453e-05, "loss": 13.5005, "step": 1416 }, { "epoch": 0.05906381559751574, "grad_norm": 272.0, "learning_rate": 9.977881546385141e-05, "loss": 11.6277, "step": 1417 }, { "epoch": 0.059105497895043976, "grad_norm": 155.0, "learning_rate": 9.97781808027342e-05, "loss": 9.565, "step": 1418 }, { "epoch": 0.059147180192572216, "grad_norm": 720.0, "learning_rate": 9.977754523440521e-05, "loss": 20.5043, "step": 1419 }, { "epoch": 0.059188862490100455, "grad_norm": 226.0, "learning_rate": 9.977690875887604e-05, "loss": 11.6274, "step": 1420 }, { "epoch": 0.059230544787628694, "grad_norm": 154.0, "learning_rate": 9.977627137615831e-05, "loss": 7.751, "step": 1421 }, { "epoch": 0.059272227085156934, "grad_norm": 624.0, "learning_rate": 9.977563308626359e-05, "loss": 16.751, "step": 1422 }, { "epoch": 0.05931390938268517, "grad_norm": 229.0, "learning_rate": 9.977499388920355e-05, "loss": 10.3139, "step": 1423 }, { "epoch": 0.05935559168021341, "grad_norm": 612.0, "learning_rate": 9.977435378498983e-05, "loss": 20.2508, "step": 1424 }, { "epoch": 0.05939727397774165, "grad_norm": 470.0, "learning_rate": 9.977371277363408e-05, "loss": 16.7513, "step": 1425 }, { "epoch": 0.05943895627526989, "grad_norm": 306.0, "learning_rate": 9.977307085514802e-05, "loss": 13.6894, "step": 1426 }, { "epoch": 0.05948063857279813, "grad_norm": 376.0, "learning_rate": 9.977242802954329e-05, "loss": 14.4389, "step": 1427 }, { "epoch": 0.05952232087032637, "grad_norm": 334.0, "learning_rate": 9.977178429683167e-05, "loss": 13.1902, "step": 1428 }, { "epoch": 0.05956400316785461, "grad_norm": 362.0, "learning_rate": 9.977113965702485e-05, "loss": 14.4385, "step": 1429 }, { "epoch": 0.05960568546538285, "grad_norm": 426.0, "learning_rate": 9.97704941101346e-05, "loss": 16.0014, "step": 1430 }, { "epoch": 0.05964736776291109, "grad_norm": 864.0, "learning_rate": 9.976984765617268e-05, "loss": 24.1261, "step": 1431 }, { "epoch": 0.059689050060439335, "grad_norm": 342.0, "learning_rate": 9.976920029515087e-05, "loss": 12.1881, "step": 1432 }, { "epoch": 0.059730732357967574, "grad_norm": 422.0, "learning_rate": 9.976855202708096e-05, "loss": 13.7511, "step": 1433 }, { "epoch": 0.05977241465549581, "grad_norm": 564.0, "learning_rate": 9.976790285197476e-05, "loss": 18.001, "step": 1434 }, { "epoch": 0.05981409695302405, "grad_norm": 440.0, "learning_rate": 9.976725276984413e-05, "loss": 16.6264, "step": 1435 }, { "epoch": 0.05985577925055229, "grad_norm": 224.0, "learning_rate": 9.976660178070088e-05, "loss": 11.3136, "step": 1436 }, { "epoch": 0.05989746154808053, "grad_norm": 382.0, "learning_rate": 9.976594988455691e-05, "loss": 14.0631, "step": 1437 }, { "epoch": 0.05993914384560877, "grad_norm": 438.0, "learning_rate": 9.976529708142408e-05, "loss": 15.1256, "step": 1438 }, { "epoch": 0.05998082614313701, "grad_norm": 178.0, "learning_rate": 9.976464337131429e-05, "loss": 9.1257, "step": 1439 }, { "epoch": 0.06002250844066525, "grad_norm": 344.0, "learning_rate": 9.976398875423947e-05, "loss": 11.8756, "step": 1440 }, { "epoch": 0.06006419073819349, "grad_norm": 576.0, "learning_rate": 9.976333323021152e-05, "loss": 18.8762, "step": 1441 }, { "epoch": 0.06010587303572173, "grad_norm": 91.0, "learning_rate": 9.976267679924242e-05, "loss": 8.251, "step": 1442 }, { "epoch": 0.06014755533324997, "grad_norm": 488.0, "learning_rate": 9.976201946134411e-05, "loss": 16.8755, "step": 1443 }, { "epoch": 0.06018923763077821, "grad_norm": 464.0, "learning_rate": 9.976136121652857e-05, "loss": 15.6877, "step": 1444 }, { "epoch": 0.060230919928306446, "grad_norm": 252.0, "learning_rate": 9.976070206480783e-05, "loss": 11.6878, "step": 1445 }, { "epoch": 0.060272602225834686, "grad_norm": 237.0, "learning_rate": 9.976004200619385e-05, "loss": 11.1882, "step": 1446 }, { "epoch": 0.060314284523362925, "grad_norm": 388.0, "learning_rate": 9.97593810406987e-05, "loss": 14.3755, "step": 1447 }, { "epoch": 0.060355966820891165, "grad_norm": 496.0, "learning_rate": 9.975871916833441e-05, "loss": 16.3756, "step": 1448 }, { "epoch": 0.060397649118419404, "grad_norm": 452.0, "learning_rate": 9.975805638911304e-05, "loss": 14.1254, "step": 1449 }, { "epoch": 0.06043933141594765, "grad_norm": 376.0, "learning_rate": 9.975739270304669e-05, "loss": 14.5013, "step": 1450 }, { "epoch": 0.06048101371347589, "grad_norm": 302.0, "learning_rate": 9.975672811014742e-05, "loss": 12.8128, "step": 1451 }, { "epoch": 0.06052269601100413, "grad_norm": 298.0, "learning_rate": 9.975606261042738e-05, "loss": 11.0629, "step": 1452 }, { "epoch": 0.06056437830853237, "grad_norm": 466.0, "learning_rate": 9.975539620389869e-05, "loss": 15.6929, "step": 1453 }, { "epoch": 0.06060606060606061, "grad_norm": 744.0, "learning_rate": 9.975472889057346e-05, "loss": 21.2514, "step": 1454 }, { "epoch": 0.06064774290358885, "grad_norm": 580.0, "learning_rate": 9.97540606704639e-05, "loss": 17.3775, "step": 1455 }, { "epoch": 0.060689425201117086, "grad_norm": 254.0, "learning_rate": 9.975339154358216e-05, "loss": 12.3759, "step": 1456 }, { "epoch": 0.060731107498645326, "grad_norm": 428.0, "learning_rate": 9.975272150994045e-05, "loss": 15.0669, "step": 1457 }, { "epoch": 0.060772789796173565, "grad_norm": 378.0, "learning_rate": 9.975205056955096e-05, "loss": 14.7504, "step": 1458 }, { "epoch": 0.060814472093701805, "grad_norm": 239.0, "learning_rate": 9.975137872242595e-05, "loss": 12.1882, "step": 1459 }, { "epoch": 0.060856154391230044, "grad_norm": 354.0, "learning_rate": 9.975070596857764e-05, "loss": 14.7503, "step": 1460 }, { "epoch": 0.06089783668875828, "grad_norm": 876.0, "learning_rate": 9.975003230801829e-05, "loss": 22.6258, "step": 1461 }, { "epoch": 0.06093951898628652, "grad_norm": 484.0, "learning_rate": 9.974935774076019e-05, "loss": 15.8127, "step": 1462 }, { "epoch": 0.06098120128381476, "grad_norm": 241.0, "learning_rate": 9.974868226681562e-05, "loss": 11.5675, "step": 1463 }, { "epoch": 0.061022883581343, "grad_norm": 596.0, "learning_rate": 9.97480058861969e-05, "loss": 17.0006, "step": 1464 }, { "epoch": 0.06106456587887124, "grad_norm": 402.0, "learning_rate": 9.974732859891637e-05, "loss": 14.188, "step": 1465 }, { "epoch": 0.06110624817639948, "grad_norm": 520.0, "learning_rate": 9.974665040498636e-05, "loss": 16.6257, "step": 1466 }, { "epoch": 0.06114793047392772, "grad_norm": 316.0, "learning_rate": 9.974597130441921e-05, "loss": 12.8756, "step": 1467 }, { "epoch": 0.061189612771455966, "grad_norm": 244.0, "learning_rate": 9.974529129722733e-05, "loss": 11.5004, "step": 1468 }, { "epoch": 0.061231295068984205, "grad_norm": 344.0, "learning_rate": 9.974461038342311e-05, "loss": 12.8128, "step": 1469 }, { "epoch": 0.061272977366512445, "grad_norm": 1040.0, "learning_rate": 9.974392856301893e-05, "loss": 33.2519, "step": 1470 }, { "epoch": 0.061314659664040684, "grad_norm": 560.0, "learning_rate": 9.974324583602726e-05, "loss": 16.8756, "step": 1471 }, { "epoch": 0.06135634196156892, "grad_norm": 388.0, "learning_rate": 9.97425622024605e-05, "loss": 14.3755, "step": 1472 }, { "epoch": 0.06139802425909716, "grad_norm": 412.0, "learning_rate": 9.974187766233112e-05, "loss": 14.3756, "step": 1473 }, { "epoch": 0.0614397065566254, "grad_norm": 368.0, "learning_rate": 9.974119221565162e-05, "loss": 14.5007, "step": 1474 }, { "epoch": 0.06148138885415364, "grad_norm": 162.0, "learning_rate": 9.974050586243448e-05, "loss": 9.4381, "step": 1475 }, { "epoch": 0.06152307115168188, "grad_norm": 104.5, "learning_rate": 9.97398186026922e-05, "loss": 8.9378, "step": 1476 }, { "epoch": 0.06156475344921012, "grad_norm": 486.0, "learning_rate": 9.97391304364373e-05, "loss": 16.5004, "step": 1477 }, { "epoch": 0.06160643574673836, "grad_norm": 129.0, "learning_rate": 9.973844136368234e-05, "loss": 8.9384, "step": 1478 }, { "epoch": 0.0616481180442666, "grad_norm": 76.0, "learning_rate": 9.973775138443987e-05, "loss": 7.3758, "step": 1479 }, { "epoch": 0.06168980034179484, "grad_norm": 191.0, "learning_rate": 9.973706049872247e-05, "loss": 9.0627, "step": 1480 }, { "epoch": 0.06173148263932308, "grad_norm": 548.0, "learning_rate": 9.973636870654272e-05, "loss": 18.5009, "step": 1481 }, { "epoch": 0.06177316493685132, "grad_norm": 512.0, "learning_rate": 9.973567600791324e-05, "loss": 17.3755, "step": 1482 }, { "epoch": 0.06181484723437956, "grad_norm": 362.0, "learning_rate": 9.973498240284664e-05, "loss": 14.1879, "step": 1483 }, { "epoch": 0.061856529531907796, "grad_norm": 252.0, "learning_rate": 9.973428789135559e-05, "loss": 10.5009, "step": 1484 }, { "epoch": 0.061898211829436035, "grad_norm": 244.0, "learning_rate": 9.973359247345272e-05, "loss": 11.5636, "step": 1485 }, { "epoch": 0.06193989412696428, "grad_norm": 596.0, "learning_rate": 9.973289614915071e-05, "loss": 15.8757, "step": 1486 }, { "epoch": 0.06198157642449252, "grad_norm": 508.0, "learning_rate": 9.973219891846225e-05, "loss": 16.5009, "step": 1487 }, { "epoch": 0.06202325872202076, "grad_norm": 436.0, "learning_rate": 9.973150078140006e-05, "loss": 12.5664, "step": 1488 }, { "epoch": 0.062064941019549, "grad_norm": 382.0, "learning_rate": 9.973080173797684e-05, "loss": 15.4381, "step": 1489 }, { "epoch": 0.06210662331707724, "grad_norm": 304.0, "learning_rate": 9.973010178820534e-05, "loss": 13.2509, "step": 1490 }, { "epoch": 0.06214830561460548, "grad_norm": 1020.0, "learning_rate": 9.972940093209833e-05, "loss": 24.7517, "step": 1491 }, { "epoch": 0.06218998791213372, "grad_norm": 356.0, "learning_rate": 9.972869916966858e-05, "loss": 14.5631, "step": 1492 }, { "epoch": 0.06223167020966196, "grad_norm": 1056.0, "learning_rate": 9.972799650092887e-05, "loss": 27.1265, "step": 1493 }, { "epoch": 0.0622733525071902, "grad_norm": 580.0, "learning_rate": 9.9727292925892e-05, "loss": 18.5005, "step": 1494 }, { "epoch": 0.062315034804718436, "grad_norm": 498.0, "learning_rate": 9.972658844457081e-05, "loss": 14.692, "step": 1495 }, { "epoch": 0.062356717102246675, "grad_norm": 422.0, "learning_rate": 9.972588305697812e-05, "loss": 17.0014, "step": 1496 }, { "epoch": 0.062398399399774915, "grad_norm": 186.0, "learning_rate": 9.97251767631268e-05, "loss": 10.8755, "step": 1497 }, { "epoch": 0.062440081697303154, "grad_norm": 322.0, "learning_rate": 9.972446956302974e-05, "loss": 13.6254, "step": 1498 }, { "epoch": 0.062481763994831394, "grad_norm": 348.0, "learning_rate": 9.97237614566998e-05, "loss": 12.314, "step": 1499 }, { "epoch": 0.06252344629235963, "grad_norm": 374.0, "learning_rate": 9.972305244414987e-05, "loss": 14.0008, "step": 1500 }, { "epoch": 0.06256512858988787, "grad_norm": 187.0, "learning_rate": 9.972234252539291e-05, "loss": 9.4381, "step": 1501 }, { "epoch": 0.06260681088741611, "grad_norm": 1096.0, "learning_rate": 9.972163170044185e-05, "loss": 25.0055, "step": 1502 }, { "epoch": 0.06264849318494435, "grad_norm": 346.0, "learning_rate": 9.972091996930964e-05, "loss": 13.626, "step": 1503 }, { "epoch": 0.06269017548247259, "grad_norm": 266.0, "learning_rate": 9.972020733200924e-05, "loss": 12.6255, "step": 1504 }, { "epoch": 0.06273185778000083, "grad_norm": 336.0, "learning_rate": 9.971949378855365e-05, "loss": 13.1878, "step": 1505 }, { "epoch": 0.06277354007752907, "grad_norm": 724.0, "learning_rate": 9.971877933895587e-05, "loss": 19.2505, "step": 1506 }, { "epoch": 0.06281522237505731, "grad_norm": 162.0, "learning_rate": 9.971806398322892e-05, "loss": 9.0632, "step": 1507 }, { "epoch": 0.06285690467258555, "grad_norm": 201.0, "learning_rate": 9.971734772138586e-05, "loss": 10.6889, "step": 1508 }, { "epoch": 0.06289858697011379, "grad_norm": 354.0, "learning_rate": 9.97166305534397e-05, "loss": 14.7507, "step": 1509 }, { "epoch": 0.06294026926764203, "grad_norm": 202.0, "learning_rate": 9.971591247940355e-05, "loss": 10.0009, "step": 1510 }, { "epoch": 0.06298195156517027, "grad_norm": 868.0, "learning_rate": 9.971519349929047e-05, "loss": 25.0061, "step": 1511 }, { "epoch": 0.0630236338626985, "grad_norm": 274.0, "learning_rate": 9.971447361311359e-05, "loss": 11.1257, "step": 1512 }, { "epoch": 0.06306531616022674, "grad_norm": 398.0, "learning_rate": 9.971375282088599e-05, "loss": 16.0005, "step": 1513 }, { "epoch": 0.063106998457755, "grad_norm": 444.0, "learning_rate": 9.971303112262086e-05, "loss": 15.5009, "step": 1514 }, { "epoch": 0.06314868075528324, "grad_norm": 320.0, "learning_rate": 9.971230851833131e-05, "loss": 11.3757, "step": 1515 }, { "epoch": 0.06319036305281148, "grad_norm": 350.0, "learning_rate": 9.971158500803052e-05, "loss": 10.3756, "step": 1516 }, { "epoch": 0.06323204535033972, "grad_norm": 378.0, "learning_rate": 9.971086059173169e-05, "loss": 14.7504, "step": 1517 }, { "epoch": 0.06327372764786796, "grad_norm": 760.0, "learning_rate": 9.971013526944802e-05, "loss": 20.7504, "step": 1518 }, { "epoch": 0.0633154099453962, "grad_norm": 197.0, "learning_rate": 9.97094090411927e-05, "loss": 9.6261, "step": 1519 }, { "epoch": 0.06335709224292443, "grad_norm": 238.0, "learning_rate": 9.970868190697899e-05, "loss": 11.0631, "step": 1520 }, { "epoch": 0.06339877454045267, "grad_norm": 276.0, "learning_rate": 9.970795386682017e-05, "loss": 12.126, "step": 1521 }, { "epoch": 0.06344045683798091, "grad_norm": 362.0, "learning_rate": 9.970722492072945e-05, "loss": 14.188, "step": 1522 }, { "epoch": 0.06348213913550915, "grad_norm": 183.0, "learning_rate": 9.970649506872015e-05, "loss": 9.9394, "step": 1523 }, { "epoch": 0.06352382143303739, "grad_norm": 105.5, "learning_rate": 9.970576431080556e-05, "loss": 7.8439, "step": 1524 }, { "epoch": 0.06356550373056563, "grad_norm": 262.0, "learning_rate": 9.9705032646999e-05, "loss": 9.6273, "step": 1525 }, { "epoch": 0.06360718602809387, "grad_norm": 516.0, "learning_rate": 9.970430007731382e-05, "loss": 18.0004, "step": 1526 }, { "epoch": 0.06364886832562211, "grad_norm": 848.0, "learning_rate": 9.970356660176337e-05, "loss": 22.7504, "step": 1527 }, { "epoch": 0.06369055062315035, "grad_norm": 448.0, "learning_rate": 9.970283222036099e-05, "loss": 14.7505, "step": 1528 }, { "epoch": 0.06373223292067859, "grad_norm": 278.0, "learning_rate": 9.970209693312007e-05, "loss": 11.4392, "step": 1529 }, { "epoch": 0.06377391521820683, "grad_norm": 280.0, "learning_rate": 9.970136074005403e-05, "loss": 11.6261, "step": 1530 }, { "epoch": 0.06381559751573507, "grad_norm": 1328.0, "learning_rate": 9.970062364117628e-05, "loss": 31.0055, "step": 1531 }, { "epoch": 0.0638572798132633, "grad_norm": 382.0, "learning_rate": 9.969988563650026e-05, "loss": 16.0036, "step": 1532 }, { "epoch": 0.06389896211079155, "grad_norm": 322.0, "learning_rate": 9.96991467260394e-05, "loss": 13.5006, "step": 1533 }, { "epoch": 0.06394064440831979, "grad_norm": 556.0, "learning_rate": 9.969840690980718e-05, "loss": 17.126, "step": 1534 }, { "epoch": 0.06398232670584802, "grad_norm": 191.0, "learning_rate": 9.969766618781709e-05, "loss": 10.9389, "step": 1535 }, { "epoch": 0.06402400900337626, "grad_norm": 360.0, "learning_rate": 9.969692456008262e-05, "loss": 13.4379, "step": 1536 }, { "epoch": 0.0640656913009045, "grad_norm": 201.0, "learning_rate": 9.969618202661728e-05, "loss": 10.5005, "step": 1537 }, { "epoch": 0.06410737359843274, "grad_norm": 384.0, "learning_rate": 9.969543858743461e-05, "loss": 15.1257, "step": 1538 }, { "epoch": 0.06414905589596098, "grad_norm": 512.0, "learning_rate": 9.969469424254819e-05, "loss": 17.0003, "step": 1539 }, { "epoch": 0.06419073819348922, "grad_norm": 233.0, "learning_rate": 9.969394899197152e-05, "loss": 10.7532, "step": 1540 }, { "epoch": 0.06423242049101746, "grad_norm": 230.0, "learning_rate": 9.969320283571824e-05, "loss": 10.0629, "step": 1541 }, { "epoch": 0.0642741027885457, "grad_norm": 408.0, "learning_rate": 9.969245577380191e-05, "loss": 13.7505, "step": 1542 }, { "epoch": 0.06431578508607394, "grad_norm": 253.0, "learning_rate": 9.969170780623617e-05, "loss": 11.688, "step": 1543 }, { "epoch": 0.06435746738360218, "grad_norm": 420.0, "learning_rate": 9.969095893303464e-05, "loss": 15.1889, "step": 1544 }, { "epoch": 0.06439914968113042, "grad_norm": 1008.0, "learning_rate": 9.969020915421098e-05, "loss": 25.1308, "step": 1545 }, { "epoch": 0.06444083197865866, "grad_norm": 173.0, "learning_rate": 9.968945846977884e-05, "loss": 9.7509, "step": 1546 }, { "epoch": 0.0644825142761869, "grad_norm": 231.0, "learning_rate": 9.968870687975192e-05, "loss": 11.3136, "step": 1547 }, { "epoch": 0.06452419657371514, "grad_norm": 342.0, "learning_rate": 9.96879543841439e-05, "loss": 13.9379, "step": 1548 }, { "epoch": 0.06456587887124338, "grad_norm": 254.0, "learning_rate": 9.968720098296849e-05, "loss": 11.5628, "step": 1549 }, { "epoch": 0.06460756116877163, "grad_norm": 416.0, "learning_rate": 9.968644667623943e-05, "loss": 15.3754, "step": 1550 }, { "epoch": 0.06464924346629987, "grad_norm": 568.0, "learning_rate": 9.968569146397049e-05, "loss": 17.7504, "step": 1551 }, { "epoch": 0.06469092576382811, "grad_norm": 584.0, "learning_rate": 9.968493534617541e-05, "loss": 18.876, "step": 1552 }, { "epoch": 0.06473260806135635, "grad_norm": 300.0, "learning_rate": 9.968417832286795e-05, "loss": 11.5637, "step": 1553 }, { "epoch": 0.06477429035888459, "grad_norm": 1136.0, "learning_rate": 9.968342039406194e-05, "loss": 24.5064, "step": 1554 }, { "epoch": 0.06481597265641283, "grad_norm": 648.0, "learning_rate": 9.968266155977118e-05, "loss": 20.5006, "step": 1555 }, { "epoch": 0.06485765495394107, "grad_norm": 159.0, "learning_rate": 9.968190182000952e-05, "loss": 9.8145, "step": 1556 }, { "epoch": 0.0648993372514693, "grad_norm": 512.0, "learning_rate": 9.968114117479077e-05, "loss": 16.5004, "step": 1557 }, { "epoch": 0.06494101954899754, "grad_norm": 416.0, "learning_rate": 9.968037962412881e-05, "loss": 15.5627, "step": 1558 }, { "epoch": 0.06498270184652578, "grad_norm": 720.0, "learning_rate": 9.967961716803755e-05, "loss": 23.0007, "step": 1559 }, { "epoch": 0.06502438414405402, "grad_norm": 712.0, "learning_rate": 9.967885380653082e-05, "loss": 19.5065, "step": 1560 }, { "epoch": 0.06506606644158226, "grad_norm": 1144.0, "learning_rate": 9.967808953962259e-05, "loss": 26.6256, "step": 1561 }, { "epoch": 0.0651077487391105, "grad_norm": 452.0, "learning_rate": 9.967732436732677e-05, "loss": 16.8752, "step": 1562 }, { "epoch": 0.06514943103663874, "grad_norm": 384.0, "learning_rate": 9.96765582896573e-05, "loss": 12.2507, "step": 1563 }, { "epoch": 0.06519111333416698, "grad_norm": 272.0, "learning_rate": 9.967579130662814e-05, "loss": 12.1261, "step": 1564 }, { "epoch": 0.06523279563169522, "grad_norm": 494.0, "learning_rate": 9.967502341825328e-05, "loss": 17.5004, "step": 1565 }, { "epoch": 0.06527447792922346, "grad_norm": 140.0, "learning_rate": 9.967425462454669e-05, "loss": 10.2512, "step": 1566 }, { "epoch": 0.0653161602267517, "grad_norm": 412.0, "learning_rate": 9.967348492552242e-05, "loss": 13.5049, "step": 1567 }, { "epoch": 0.06535784252427994, "grad_norm": 396.0, "learning_rate": 9.967271432119447e-05, "loss": 15.3134, "step": 1568 }, { "epoch": 0.06539952482180818, "grad_norm": 130.0, "learning_rate": 9.96719428115769e-05, "loss": 9.1256, "step": 1569 }, { "epoch": 0.06544120711933642, "grad_norm": 372.0, "learning_rate": 9.967117039668376e-05, "loss": 15.063, "step": 1570 }, { "epoch": 0.06548288941686466, "grad_norm": 258.0, "learning_rate": 9.967039707652911e-05, "loss": 10.0032, "step": 1571 }, { "epoch": 0.0655245717143929, "grad_norm": 154.0, "learning_rate": 9.966962285112709e-05, "loss": 8.9394, "step": 1572 }, { "epoch": 0.06556625401192114, "grad_norm": 222.0, "learning_rate": 9.966884772049178e-05, "loss": 11.5012, "step": 1573 }, { "epoch": 0.06560793630944937, "grad_norm": 428.0, "learning_rate": 9.966807168463729e-05, "loss": 14.6881, "step": 1574 }, { "epoch": 0.06564961860697761, "grad_norm": 510.0, "learning_rate": 9.96672947435778e-05, "loss": 16.3755, "step": 1575 }, { "epoch": 0.06569130090450585, "grad_norm": 1728.0, "learning_rate": 9.966651689732746e-05, "loss": 35.7554, "step": 1576 }, { "epoch": 0.06573298320203409, "grad_norm": 1064.0, "learning_rate": 9.966573814590043e-05, "loss": 23.7557, "step": 1577 }, { "epoch": 0.06577466549956233, "grad_norm": 380.0, "learning_rate": 9.966495848931092e-05, "loss": 13.7512, "step": 1578 }, { "epoch": 0.06581634779709057, "grad_norm": 310.0, "learning_rate": 9.966417792757315e-05, "loss": 13.3772, "step": 1579 }, { "epoch": 0.06585803009461881, "grad_norm": 364.0, "learning_rate": 9.96633964607013e-05, "loss": 15.0635, "step": 1580 }, { "epoch": 0.06589971239214705, "grad_norm": 1168.0, "learning_rate": 9.966261408870965e-05, "loss": 26.5017, "step": 1581 }, { "epoch": 0.06594139468967529, "grad_norm": 278.0, "learning_rate": 9.966183081161244e-05, "loss": 12.0635, "step": 1582 }, { "epoch": 0.06598307698720353, "grad_norm": 81.5, "learning_rate": 9.966104662942398e-05, "loss": 8.3758, "step": 1583 }, { "epoch": 0.06602475928473177, "grad_norm": 133.0, "learning_rate": 9.966026154215851e-05, "loss": 8.3757, "step": 1584 }, { "epoch": 0.06606644158226001, "grad_norm": 378.0, "learning_rate": 9.965947554983038e-05, "loss": 15.6881, "step": 1585 }, { "epoch": 0.06610812387978825, "grad_norm": 468.0, "learning_rate": 9.96586886524539e-05, "loss": 15.4385, "step": 1586 }, { "epoch": 0.0661498061773165, "grad_norm": 141.0, "learning_rate": 9.96579008500434e-05, "loss": 9.2503, "step": 1587 }, { "epoch": 0.06619148847484474, "grad_norm": 370.0, "learning_rate": 9.965711214261327e-05, "loss": 15.7513, "step": 1588 }, { "epoch": 0.06623317077237298, "grad_norm": 620.0, "learning_rate": 9.965632253017784e-05, "loss": 19.8755, "step": 1589 }, { "epoch": 0.06627485306990122, "grad_norm": 540.0, "learning_rate": 9.965553201275153e-05, "loss": 16.7516, "step": 1590 }, { "epoch": 0.06631653536742946, "grad_norm": 912.0, "learning_rate": 9.965474059034874e-05, "loss": 23.3762, "step": 1591 }, { "epoch": 0.0663582176649577, "grad_norm": 500.0, "learning_rate": 9.965394826298391e-05, "loss": 17.6254, "step": 1592 }, { "epoch": 0.06639989996248594, "grad_norm": 912.0, "learning_rate": 9.965315503067145e-05, "loss": 23.8754, "step": 1593 }, { "epoch": 0.06644158226001418, "grad_norm": 320.0, "learning_rate": 9.965236089342582e-05, "loss": 12.0006, "step": 1594 }, { "epoch": 0.06648326455754242, "grad_norm": 167.0, "learning_rate": 9.965156585126153e-05, "loss": 10.0635, "step": 1595 }, { "epoch": 0.06652494685507065, "grad_norm": 688.0, "learning_rate": 9.965076990419305e-05, "loss": 18.1265, "step": 1596 }, { "epoch": 0.0665666291525989, "grad_norm": 221.0, "learning_rate": 9.964997305223485e-05, "loss": 10.8764, "step": 1597 }, { "epoch": 0.06660831145012713, "grad_norm": 444.0, "learning_rate": 9.964917529540149e-05, "loss": 16.0013, "step": 1598 }, { "epoch": 0.06664999374765537, "grad_norm": 296.0, "learning_rate": 9.964837663370752e-05, "loss": 13.3129, "step": 1599 }, { "epoch": 0.06669167604518361, "grad_norm": 284.0, "learning_rate": 9.964757706716748e-05, "loss": 12.6255, "step": 1600 }, { "epoch": 0.06673335834271185, "grad_norm": 278.0, "learning_rate": 9.964677659579592e-05, "loss": 12.8756, "step": 1601 }, { "epoch": 0.06677504064024009, "grad_norm": 129.0, "learning_rate": 9.964597521960746e-05, "loss": 7.5956, "step": 1602 }, { "epoch": 0.06681672293776833, "grad_norm": 468.0, "learning_rate": 9.964517293861669e-05, "loss": 15.7503, "step": 1603 }, { "epoch": 0.06685840523529657, "grad_norm": 330.0, "learning_rate": 9.964436975283823e-05, "loss": 12.938, "step": 1604 }, { "epoch": 0.06690008753282481, "grad_norm": 237.0, "learning_rate": 9.964356566228674e-05, "loss": 11.188, "step": 1605 }, { "epoch": 0.06694176983035305, "grad_norm": 584.0, "learning_rate": 9.964276066697687e-05, "loss": 18.5005, "step": 1606 }, { "epoch": 0.06698345212788129, "grad_norm": 294.0, "learning_rate": 9.964195476692327e-05, "loss": 14.0012, "step": 1607 }, { "epoch": 0.06702513442540953, "grad_norm": 394.0, "learning_rate": 9.964114796214062e-05, "loss": 15.2507, "step": 1608 }, { "epoch": 0.06706681672293777, "grad_norm": 356.0, "learning_rate": 9.964034025264365e-05, "loss": 12.6256, "step": 1609 }, { "epoch": 0.067108499020466, "grad_norm": 206.0, "learning_rate": 9.963953163844708e-05, "loss": 11.5008, "step": 1610 }, { "epoch": 0.06715018131799425, "grad_norm": 160.0, "learning_rate": 9.963872211956562e-05, "loss": 9.8757, "step": 1611 }, { "epoch": 0.06719186361552248, "grad_norm": 648.0, "learning_rate": 9.963791169601406e-05, "loss": 20.3759, "step": 1612 }, { "epoch": 0.06723354591305072, "grad_norm": 205.0, "learning_rate": 9.963710036780716e-05, "loss": 10.2506, "step": 1613 }, { "epoch": 0.06727522821057896, "grad_norm": 304.0, "learning_rate": 9.963628813495969e-05, "loss": 13.1256, "step": 1614 }, { "epoch": 0.0673169105081072, "grad_norm": 632.0, "learning_rate": 9.963547499748646e-05, "loss": 14.8801, "step": 1615 }, { "epoch": 0.06735859280563544, "grad_norm": 95.0, "learning_rate": 9.963466095540228e-05, "loss": 6.4381, "step": 1616 }, { "epoch": 0.06740027510316368, "grad_norm": 111.5, "learning_rate": 9.963384600872202e-05, "loss": 9.3138, "step": 1617 }, { "epoch": 0.06744195740069192, "grad_norm": 396.0, "learning_rate": 9.96330301574605e-05, "loss": 13.6889, "step": 1618 }, { "epoch": 0.06748363969822016, "grad_norm": 173.0, "learning_rate": 9.96322134016326e-05, "loss": 8.6882, "step": 1619 }, { "epoch": 0.0675253219957484, "grad_norm": 410.0, "learning_rate": 9.963139574125321e-05, "loss": 15.0004, "step": 1620 }, { "epoch": 0.06756700429327664, "grad_norm": 378.0, "learning_rate": 9.963057717633721e-05, "loss": 14.8127, "step": 1621 }, { "epoch": 0.06760868659080488, "grad_norm": 348.0, "learning_rate": 9.962975770689955e-05, "loss": 12.5629, "step": 1622 }, { "epoch": 0.06765036888833313, "grad_norm": 290.0, "learning_rate": 9.962893733295515e-05, "loss": 13.1266, "step": 1623 }, { "epoch": 0.06769205118586137, "grad_norm": 217.0, "learning_rate": 9.962811605451896e-05, "loss": 10.8129, "step": 1624 }, { "epoch": 0.06773373348338961, "grad_norm": 262.0, "learning_rate": 9.962729387160595e-05, "loss": 12.5004, "step": 1625 }, { "epoch": 0.06777541578091785, "grad_norm": 620.0, "learning_rate": 9.962647078423111e-05, "loss": 19.7505, "step": 1626 }, { "epoch": 0.06781709807844609, "grad_norm": 113.0, "learning_rate": 9.962564679240942e-05, "loss": 8.8767, "step": 1627 }, { "epoch": 0.06785878037597433, "grad_norm": 195.0, "learning_rate": 9.962482189615592e-05, "loss": 10.627, "step": 1628 }, { "epoch": 0.06790046267350257, "grad_norm": 390.0, "learning_rate": 9.962399609548563e-05, "loss": 14.2504, "step": 1629 }, { "epoch": 0.06794214497103081, "grad_norm": 260.0, "learning_rate": 9.96231693904136e-05, "loss": 12.0004, "step": 1630 }, { "epoch": 0.06798382726855905, "grad_norm": 205.0, "learning_rate": 9.962234178095493e-05, "loss": 11.3129, "step": 1631 }, { "epoch": 0.06802550956608729, "grad_norm": 232.0, "learning_rate": 9.962151326712466e-05, "loss": 7.6884, "step": 1632 }, { "epoch": 0.06806719186361553, "grad_norm": 540.0, "learning_rate": 9.96206838489379e-05, "loss": 18.6263, "step": 1633 }, { "epoch": 0.06810887416114376, "grad_norm": 316.0, "learning_rate": 9.961985352640977e-05, "loss": 12.9379, "step": 1634 }, { "epoch": 0.068150556458672, "grad_norm": 122.5, "learning_rate": 9.961902229955541e-05, "loss": 8.8759, "step": 1635 }, { "epoch": 0.06819223875620024, "grad_norm": 368.0, "learning_rate": 9.961819016838997e-05, "loss": 14.1884, "step": 1636 }, { "epoch": 0.06823392105372848, "grad_norm": 352.0, "learning_rate": 9.96173571329286e-05, "loss": 14.3132, "step": 1637 }, { "epoch": 0.06827560335125672, "grad_norm": 328.0, "learning_rate": 9.961652319318649e-05, "loss": 14.0012, "step": 1638 }, { "epoch": 0.06831728564878496, "grad_norm": 390.0, "learning_rate": 9.961568834917885e-05, "loss": 14.2508, "step": 1639 }, { "epoch": 0.0683589679463132, "grad_norm": 258.0, "learning_rate": 9.961485260092088e-05, "loss": 11.9381, "step": 1640 }, { "epoch": 0.06840065024384144, "grad_norm": 976.0, "learning_rate": 9.961401594842783e-05, "loss": 23.0051, "step": 1641 }, { "epoch": 0.06844233254136968, "grad_norm": 245.0, "learning_rate": 9.961317839171492e-05, "loss": 11.6878, "step": 1642 }, { "epoch": 0.06848401483889792, "grad_norm": 346.0, "learning_rate": 9.961233993079743e-05, "loss": 14.1262, "step": 1643 }, { "epoch": 0.06852569713642616, "grad_norm": 1056.0, "learning_rate": 9.961150056569064e-05, "loss": 28.2503, "step": 1644 }, { "epoch": 0.0685673794339544, "grad_norm": 736.0, "learning_rate": 9.961066029640984e-05, "loss": 20.7503, "step": 1645 }, { "epoch": 0.06860906173148264, "grad_norm": 366.0, "learning_rate": 9.960981912297037e-05, "loss": 13.3131, "step": 1646 }, { "epoch": 0.06865074402901088, "grad_norm": 284.0, "learning_rate": 9.960897704538755e-05, "loss": 12.6257, "step": 1647 }, { "epoch": 0.06869242632653912, "grad_norm": 248.0, "learning_rate": 9.960813406367669e-05, "loss": 12.1887, "step": 1648 }, { "epoch": 0.06873410862406736, "grad_norm": 696.0, "learning_rate": 9.960729017785319e-05, "loss": 17.5005, "step": 1649 }, { "epoch": 0.0687757909215956, "grad_norm": 221.0, "learning_rate": 9.960644538793245e-05, "loss": 11.1259, "step": 1650 }, { "epoch": 0.06881747321912383, "grad_norm": 216.0, "learning_rate": 9.96055996939298e-05, "loss": 11.752, "step": 1651 }, { "epoch": 0.06885915551665207, "grad_norm": 792.0, "learning_rate": 9.960475309586073e-05, "loss": 21.6256, "step": 1652 }, { "epoch": 0.06890083781418031, "grad_norm": 556.0, "learning_rate": 9.96039055937406e-05, "loss": 17.8784, "step": 1653 }, { "epoch": 0.06894252011170855, "grad_norm": 246.0, "learning_rate": 9.96030571875849e-05, "loss": 10.0008, "step": 1654 }, { "epoch": 0.06898420240923679, "grad_norm": 504.0, "learning_rate": 9.960220787740908e-05, "loss": 17.1253, "step": 1655 }, { "epoch": 0.06902588470676503, "grad_norm": 494.0, "learning_rate": 9.960135766322862e-05, "loss": 18.5007, "step": 1656 }, { "epoch": 0.06906756700429327, "grad_norm": 362.0, "learning_rate": 9.960050654505901e-05, "loss": 13.8761, "step": 1657 }, { "epoch": 0.06910924930182151, "grad_norm": 616.0, "learning_rate": 9.959965452291576e-05, "loss": 20.8755, "step": 1658 }, { "epoch": 0.06915093159934976, "grad_norm": 362.0, "learning_rate": 9.95988015968144e-05, "loss": 14.1884, "step": 1659 }, { "epoch": 0.069192613896878, "grad_norm": 176.0, "learning_rate": 9.959794776677049e-05, "loss": 9.5014, "step": 1660 }, { "epoch": 0.06923429619440624, "grad_norm": 800.0, "learning_rate": 9.959709303279958e-05, "loss": 23.5003, "step": 1661 }, { "epoch": 0.06927597849193448, "grad_norm": 424.0, "learning_rate": 9.959623739491724e-05, "loss": 16.0031, "step": 1662 }, { "epoch": 0.06931766078946272, "grad_norm": 338.0, "learning_rate": 9.959538085313909e-05, "loss": 13.2538, "step": 1663 }, { "epoch": 0.06935934308699096, "grad_norm": 532.0, "learning_rate": 9.95945234074807e-05, "loss": 16.2518, "step": 1664 }, { "epoch": 0.0694010253845192, "grad_norm": 344.0, "learning_rate": 9.959366505795771e-05, "loss": 13.9383, "step": 1665 }, { "epoch": 0.06944270768204744, "grad_norm": 348.0, "learning_rate": 9.959280580458578e-05, "loss": 12.7512, "step": 1666 }, { "epoch": 0.06948438997957568, "grad_norm": 472.0, "learning_rate": 9.959194564738058e-05, "loss": 15.8144, "step": 1667 }, { "epoch": 0.06952607227710392, "grad_norm": 314.0, "learning_rate": 9.959108458635775e-05, "loss": 12.4418, "step": 1668 }, { "epoch": 0.06956775457463216, "grad_norm": 472.0, "learning_rate": 9.959022262153301e-05, "loss": 17.0005, "step": 1669 }, { "epoch": 0.0696094368721604, "grad_norm": 506.0, "learning_rate": 9.958935975292206e-05, "loss": 15.8135, "step": 1670 }, { "epoch": 0.06965111916968864, "grad_norm": 1328.0, "learning_rate": 9.958849598054062e-05, "loss": 29.5081, "step": 1671 }, { "epoch": 0.06969280146721687, "grad_norm": 1048.0, "learning_rate": 9.958763130440444e-05, "loss": 28.2502, "step": 1672 }, { "epoch": 0.06973448376474511, "grad_norm": 244.0, "learning_rate": 9.958676572452928e-05, "loss": 11.8133, "step": 1673 }, { "epoch": 0.06977616606227335, "grad_norm": 644.0, "learning_rate": 9.958589924093091e-05, "loss": 19.1267, "step": 1674 }, { "epoch": 0.0698178483598016, "grad_norm": 242.0, "learning_rate": 9.958503185362513e-05, "loss": 13.3767, "step": 1675 }, { "epoch": 0.06985953065732983, "grad_norm": 378.0, "learning_rate": 9.958416356262773e-05, "loss": 13.6256, "step": 1676 }, { "epoch": 0.06990121295485807, "grad_norm": 372.0, "learning_rate": 9.958329436795454e-05, "loss": 13.9384, "step": 1677 }, { "epoch": 0.06994289525238631, "grad_norm": 197.0, "learning_rate": 9.958242426962144e-05, "loss": 8.8758, "step": 1678 }, { "epoch": 0.06998457754991455, "grad_norm": 296.0, "learning_rate": 9.958155326764424e-05, "loss": 12.3755, "step": 1679 }, { "epoch": 0.07002625984744279, "grad_norm": 340.0, "learning_rate": 9.958068136203883e-05, "loss": 13.6897, "step": 1680 }, { "epoch": 0.07006794214497103, "grad_norm": 572.0, "learning_rate": 9.95798085528211e-05, "loss": 18.8752, "step": 1681 }, { "epoch": 0.07010962444249927, "grad_norm": 272.0, "learning_rate": 9.957893484000696e-05, "loss": 12.8759, "step": 1682 }, { "epoch": 0.07015130674002751, "grad_norm": 476.0, "learning_rate": 9.957806022361234e-05, "loss": 14.1257, "step": 1683 }, { "epoch": 0.07019298903755575, "grad_norm": 812.0, "learning_rate": 9.957718470365315e-05, "loss": 21.251, "step": 1684 }, { "epoch": 0.07023467133508399, "grad_norm": 270.0, "learning_rate": 9.957630828014539e-05, "loss": 12.5016, "step": 1685 }, { "epoch": 0.07027635363261223, "grad_norm": 284.0, "learning_rate": 9.9575430953105e-05, "loss": 12.3757, "step": 1686 }, { "epoch": 0.07031803593014047, "grad_norm": 528.0, "learning_rate": 9.957455272254797e-05, "loss": 18.6259, "step": 1687 }, { "epoch": 0.0703597182276687, "grad_norm": 152.0, "learning_rate": 9.957367358849033e-05, "loss": 9.1882, "step": 1688 }, { "epoch": 0.07040140052519694, "grad_norm": 396.0, "learning_rate": 9.957279355094809e-05, "loss": 14.5005, "step": 1689 }, { "epoch": 0.07044308282272518, "grad_norm": 304.0, "learning_rate": 9.957191260993727e-05, "loss": 13.0007, "step": 1690 }, { "epoch": 0.07048476512025342, "grad_norm": 680.0, "learning_rate": 9.957103076547395e-05, "loss": 17.1256, "step": 1691 }, { "epoch": 0.07052644741778166, "grad_norm": 196.0, "learning_rate": 9.957014801757419e-05, "loss": 11.3131, "step": 1692 }, { "epoch": 0.0705681297153099, "grad_norm": 382.0, "learning_rate": 9.956926436625409e-05, "loss": 14.5007, "step": 1693 }, { "epoch": 0.07060981201283814, "grad_norm": 816.0, "learning_rate": 9.956837981152975e-05, "loss": 23.8755, "step": 1694 }, { "epoch": 0.07065149431036638, "grad_norm": 296.0, "learning_rate": 9.956749435341728e-05, "loss": 12.6883, "step": 1695 }, { "epoch": 0.07069317660789463, "grad_norm": 488.0, "learning_rate": 9.956660799193283e-05, "loss": 19.6255, "step": 1696 }, { "epoch": 0.07073485890542287, "grad_norm": 192.0, "learning_rate": 9.956572072709254e-05, "loss": 11.1254, "step": 1697 }, { "epoch": 0.07077654120295111, "grad_norm": 210.0, "learning_rate": 9.95648325589126e-05, "loss": 12.064, "step": 1698 }, { "epoch": 0.07081822350047935, "grad_norm": 288.0, "learning_rate": 9.956394348740918e-05, "loss": 12.8755, "step": 1699 }, { "epoch": 0.07085990579800759, "grad_norm": 420.0, "learning_rate": 9.95630535125985e-05, "loss": 15.1883, "step": 1700 }, { "epoch": 0.07090158809553583, "grad_norm": 348.0, "learning_rate": 9.956216263449676e-05, "loss": 12.1254, "step": 1701 }, { "epoch": 0.07094327039306407, "grad_norm": 274.0, "learning_rate": 9.956127085312021e-05, "loss": 11.6259, "step": 1702 }, { "epoch": 0.07098495269059231, "grad_norm": 464.0, "learning_rate": 9.95603781684851e-05, "loss": 16.5009, "step": 1703 }, { "epoch": 0.07102663498812055, "grad_norm": 544.0, "learning_rate": 9.955948458060768e-05, "loss": 17.0007, "step": 1704 }, { "epoch": 0.07106831728564879, "grad_norm": 76.5, "learning_rate": 9.955859008950428e-05, "loss": 7.1879, "step": 1705 }, { "epoch": 0.07110999958317703, "grad_norm": 384.0, "learning_rate": 9.955769469519117e-05, "loss": 15.5629, "step": 1706 }, { "epoch": 0.07115168188070527, "grad_norm": 76.0, "learning_rate": 9.955679839768467e-05, "loss": 7.1888, "step": 1707 }, { "epoch": 0.0711933641782335, "grad_norm": 968.0, "learning_rate": 9.955590119700112e-05, "loss": 28.3755, "step": 1708 }, { "epoch": 0.07123504647576175, "grad_norm": 712.0, "learning_rate": 9.955500309315688e-05, "loss": 21.3757, "step": 1709 }, { "epoch": 0.07127672877328999, "grad_norm": 170.0, "learning_rate": 9.95541040861683e-05, "loss": 10.1886, "step": 1710 }, { "epoch": 0.07131841107081822, "grad_norm": 370.0, "learning_rate": 9.955320417605177e-05, "loss": 15.4381, "step": 1711 }, { "epoch": 0.07136009336834646, "grad_norm": 688.0, "learning_rate": 9.955230336282371e-05, "loss": 19.5004, "step": 1712 }, { "epoch": 0.0714017756658747, "grad_norm": 434.0, "learning_rate": 9.955140164650049e-05, "loss": 14.8132, "step": 1713 }, { "epoch": 0.07144345796340294, "grad_norm": 286.0, "learning_rate": 9.955049902709861e-05, "loss": 13.688, "step": 1714 }, { "epoch": 0.07148514026093118, "grad_norm": 392.0, "learning_rate": 9.954959550463447e-05, "loss": 11.5632, "step": 1715 }, { "epoch": 0.07152682255845942, "grad_norm": 352.0, "learning_rate": 9.954869107912457e-05, "loss": 12.8133, "step": 1716 }, { "epoch": 0.07156850485598766, "grad_norm": 169.0, "learning_rate": 9.954778575058537e-05, "loss": 8.6254, "step": 1717 }, { "epoch": 0.0716101871535159, "grad_norm": 354.0, "learning_rate": 9.954687951903337e-05, "loss": 14.0003, "step": 1718 }, { "epoch": 0.07165186945104414, "grad_norm": 1200.0, "learning_rate": 9.954597238448509e-05, "loss": 30.1255, "step": 1719 }, { "epoch": 0.07169355174857238, "grad_norm": 232.0, "learning_rate": 9.954506434695707e-05, "loss": 10.8756, "step": 1720 }, { "epoch": 0.07173523404610062, "grad_norm": 520.0, "learning_rate": 9.954415540646586e-05, "loss": 16.1256, "step": 1721 }, { "epoch": 0.07177691634362886, "grad_norm": 528.0, "learning_rate": 9.9543245563028e-05, "loss": 16.0012, "step": 1722 }, { "epoch": 0.0718185986411571, "grad_norm": 508.0, "learning_rate": 9.95423348166601e-05, "loss": 16.6255, "step": 1723 }, { "epoch": 0.07186028093868534, "grad_norm": 516.0, "learning_rate": 9.954142316737877e-05, "loss": 14.6906, "step": 1724 }, { "epoch": 0.07190196323621358, "grad_norm": 624.0, "learning_rate": 9.954051061520058e-05, "loss": 18.7504, "step": 1725 }, { "epoch": 0.07194364553374182, "grad_norm": 139.0, "learning_rate": 9.95395971601422e-05, "loss": 9.564, "step": 1726 }, { "epoch": 0.07198532783127005, "grad_norm": 96.0, "learning_rate": 9.953868280222026e-05, "loss": 7.4696, "step": 1727 }, { "epoch": 0.0720270101287983, "grad_norm": 510.0, "learning_rate": 9.953776754145144e-05, "loss": 17.3758, "step": 1728 }, { "epoch": 0.07206869242632653, "grad_norm": 472.0, "learning_rate": 9.953685137785238e-05, "loss": 17.1258, "step": 1729 }, { "epoch": 0.07211037472385477, "grad_norm": 254.0, "learning_rate": 9.953593431143982e-05, "loss": 10.8759, "step": 1730 }, { "epoch": 0.07215205702138301, "grad_norm": 688.0, "learning_rate": 9.953501634223047e-05, "loss": 20.3755, "step": 1731 }, { "epoch": 0.07219373931891127, "grad_norm": 676.0, "learning_rate": 9.953409747024105e-05, "loss": 21.8753, "step": 1732 }, { "epoch": 0.0722354216164395, "grad_norm": 392.0, "learning_rate": 9.953317769548829e-05, "loss": 15.438, "step": 1733 }, { "epoch": 0.07227710391396774, "grad_norm": 194.0, "learning_rate": 9.953225701798899e-05, "loss": 11.9384, "step": 1734 }, { "epoch": 0.07231878621149598, "grad_norm": 286.0, "learning_rate": 9.953133543775989e-05, "loss": 12.7506, "step": 1735 }, { "epoch": 0.07236046850902422, "grad_norm": 648.0, "learning_rate": 9.95304129548178e-05, "loss": 20.0026, "step": 1736 }, { "epoch": 0.07240215080655246, "grad_norm": 752.0, "learning_rate": 9.952948956917956e-05, "loss": 22.1262, "step": 1737 }, { "epoch": 0.0724438331040807, "grad_norm": 544.0, "learning_rate": 9.952856528086197e-05, "loss": 17.7514, "step": 1738 }, { "epoch": 0.07248551540160894, "grad_norm": 256.0, "learning_rate": 9.952764008988187e-05, "loss": 11.5643, "step": 1739 }, { "epoch": 0.07252719769913718, "grad_norm": 210.0, "learning_rate": 9.952671399625613e-05, "loss": 10.3756, "step": 1740 }, { "epoch": 0.07256887999666542, "grad_norm": 660.0, "learning_rate": 9.952578700000163e-05, "loss": 19.6281, "step": 1741 }, { "epoch": 0.07261056229419366, "grad_norm": 484.0, "learning_rate": 9.952485910113529e-05, "loss": 16.7509, "step": 1742 }, { "epoch": 0.0726522445917219, "grad_norm": 340.0, "learning_rate": 9.952393029967397e-05, "loss": 13.8756, "step": 1743 }, { "epoch": 0.07269392688925014, "grad_norm": 528.0, "learning_rate": 9.952300059563464e-05, "loss": 17.6255, "step": 1744 }, { "epoch": 0.07273560918677838, "grad_norm": 392.0, "learning_rate": 9.952206998903422e-05, "loss": 16.3761, "step": 1745 }, { "epoch": 0.07277729148430662, "grad_norm": 296.0, "learning_rate": 9.952113847988969e-05, "loss": 11.7507, "step": 1746 }, { "epoch": 0.07281897378183486, "grad_norm": 236.0, "learning_rate": 9.952020606821799e-05, "loss": 10.6256, "step": 1747 }, { "epoch": 0.0728606560793631, "grad_norm": 408.0, "learning_rate": 9.951927275403616e-05, "loss": 12.313, "step": 1748 }, { "epoch": 0.07290233837689133, "grad_norm": 600.0, "learning_rate": 9.95183385373612e-05, "loss": 18.2505, "step": 1749 }, { "epoch": 0.07294402067441957, "grad_norm": 290.0, "learning_rate": 9.951740341821008e-05, "loss": 11.5637, "step": 1750 }, { "epoch": 0.07298570297194781, "grad_norm": 158.0, "learning_rate": 9.951646739659993e-05, "loss": 8.6276, "step": 1751 }, { "epoch": 0.07302738526947605, "grad_norm": 286.0, "learning_rate": 9.951553047254774e-05, "loss": 12.376, "step": 1752 }, { "epoch": 0.07306906756700429, "grad_norm": 150.0, "learning_rate": 9.951459264607062e-05, "loss": 8.0632, "step": 1753 }, { "epoch": 0.07311074986453253, "grad_norm": 199.0, "learning_rate": 9.951365391718565e-05, "loss": 11.5004, "step": 1754 }, { "epoch": 0.07315243216206077, "grad_norm": 278.0, "learning_rate": 9.951271428590995e-05, "loss": 13.0632, "step": 1755 }, { "epoch": 0.07319411445958901, "grad_norm": 62.5, "learning_rate": 9.951177375226064e-05, "loss": 7.5637, "step": 1756 }, { "epoch": 0.07323579675711725, "grad_norm": 460.0, "learning_rate": 9.951083231625485e-05, "loss": 16.6268, "step": 1757 }, { "epoch": 0.07327747905464549, "grad_norm": 96.5, "learning_rate": 9.950988997790974e-05, "loss": 6.0321, "step": 1758 }, { "epoch": 0.07331916135217373, "grad_norm": 632.0, "learning_rate": 9.950894673724249e-05, "loss": 19.5011, "step": 1759 }, { "epoch": 0.07336084364970197, "grad_norm": 284.0, "learning_rate": 9.950800259427031e-05, "loss": 12.5005, "step": 1760 }, { "epoch": 0.07340252594723021, "grad_norm": 386.0, "learning_rate": 9.950705754901038e-05, "loss": 14.2509, "step": 1761 }, { "epoch": 0.07344420824475845, "grad_norm": 390.0, "learning_rate": 9.950611160147991e-05, "loss": 14.4384, "step": 1762 }, { "epoch": 0.07348589054228669, "grad_norm": 112.0, "learning_rate": 9.950516475169618e-05, "loss": 8.9396, "step": 1763 }, { "epoch": 0.07352757283981493, "grad_norm": 260.0, "learning_rate": 9.950421699967642e-05, "loss": 12.7507, "step": 1764 }, { "epoch": 0.07356925513734316, "grad_norm": 225.0, "learning_rate": 9.950326834543792e-05, "loss": 10.9381, "step": 1765 }, { "epoch": 0.0736109374348714, "grad_norm": 442.0, "learning_rate": 9.950231878899796e-05, "loss": 16.626, "step": 1766 }, { "epoch": 0.07365261973239964, "grad_norm": 494.0, "learning_rate": 9.950136833037385e-05, "loss": 15.3763, "step": 1767 }, { "epoch": 0.07369430202992788, "grad_norm": 374.0, "learning_rate": 9.950041696958289e-05, "loss": 15.3129, "step": 1768 }, { "epoch": 0.07373598432745614, "grad_norm": 236.0, "learning_rate": 9.949946470664245e-05, "loss": 9.2506, "step": 1769 }, { "epoch": 0.07377766662498438, "grad_norm": 231.0, "learning_rate": 9.949851154156986e-05, "loss": 10.1886, "step": 1770 }, { "epoch": 0.07381934892251261, "grad_norm": 245.0, "learning_rate": 9.949755747438252e-05, "loss": 12.5004, "step": 1771 }, { "epoch": 0.07386103122004085, "grad_norm": 560.0, "learning_rate": 9.949660250509779e-05, "loss": 18.001, "step": 1772 }, { "epoch": 0.0739027135175691, "grad_norm": 404.0, "learning_rate": 9.949564663373307e-05, "loss": 15.126, "step": 1773 }, { "epoch": 0.07394439581509733, "grad_norm": 482.0, "learning_rate": 9.949468986030582e-05, "loss": 15.3759, "step": 1774 }, { "epoch": 0.07398607811262557, "grad_norm": 384.0, "learning_rate": 9.949373218483344e-05, "loss": 13.3145, "step": 1775 }, { "epoch": 0.07402776041015381, "grad_norm": 368.0, "learning_rate": 9.949277360733341e-05, "loss": 14.2505, "step": 1776 }, { "epoch": 0.07406944270768205, "grad_norm": 366.0, "learning_rate": 9.949181412782318e-05, "loss": 13.6881, "step": 1777 }, { "epoch": 0.07411112500521029, "grad_norm": 334.0, "learning_rate": 9.949085374632026e-05, "loss": 13.189, "step": 1778 }, { "epoch": 0.07415280730273853, "grad_norm": 520.0, "learning_rate": 9.948989246284211e-05, "loss": 17.751, "step": 1779 }, { "epoch": 0.07419448960026677, "grad_norm": 460.0, "learning_rate": 9.948893027740629e-05, "loss": 15.5628, "step": 1780 }, { "epoch": 0.07423617189779501, "grad_norm": 498.0, "learning_rate": 9.948796719003033e-05, "loss": 17.1265, "step": 1781 }, { "epoch": 0.07427785419532325, "grad_norm": 155.0, "learning_rate": 9.948700320073177e-05, "loss": 10.8134, "step": 1782 }, { "epoch": 0.07431953649285149, "grad_norm": 99.5, "learning_rate": 9.948603830952816e-05, "loss": 7.1576, "step": 1783 }, { "epoch": 0.07436121879037973, "grad_norm": 310.0, "learning_rate": 9.948507251643712e-05, "loss": 13.4383, "step": 1784 }, { "epoch": 0.07440290108790797, "grad_norm": 564.0, "learning_rate": 9.948410582147627e-05, "loss": 19.8755, "step": 1785 }, { "epoch": 0.0744445833854362, "grad_norm": 470.0, "learning_rate": 9.948313822466317e-05, "loss": 16.3754, "step": 1786 }, { "epoch": 0.07448626568296444, "grad_norm": 568.0, "learning_rate": 9.948216972601549e-05, "loss": 18.0007, "step": 1787 }, { "epoch": 0.07452794798049268, "grad_norm": 95.5, "learning_rate": 9.948120032555088e-05, "loss": 7.1879, "step": 1788 }, { "epoch": 0.07456963027802092, "grad_norm": 125.5, "learning_rate": 9.948023002328699e-05, "loss": 9.5632, "step": 1789 }, { "epoch": 0.07461131257554916, "grad_norm": 800.0, "learning_rate": 9.947925881924151e-05, "loss": 22.2535, "step": 1790 }, { "epoch": 0.0746529948730774, "grad_norm": 474.0, "learning_rate": 9.947828671343217e-05, "loss": 16.0006, "step": 1791 }, { "epoch": 0.07469467717060564, "grad_norm": 260.0, "learning_rate": 9.947731370587665e-05, "loss": 10.1254, "step": 1792 }, { "epoch": 0.07473635946813388, "grad_norm": 760.0, "learning_rate": 9.94763397965927e-05, "loss": 18.3799, "step": 1793 }, { "epoch": 0.07477804176566212, "grad_norm": 520.0, "learning_rate": 9.947536498559805e-05, "loss": 17.7508, "step": 1794 }, { "epoch": 0.07481972406319036, "grad_norm": 458.0, "learning_rate": 9.94743892729105e-05, "loss": 17.5002, "step": 1795 }, { "epoch": 0.0748614063607186, "grad_norm": 314.0, "learning_rate": 9.94734126585478e-05, "loss": 13.754, "step": 1796 }, { "epoch": 0.07490308865824684, "grad_norm": 428.0, "learning_rate": 9.947243514252776e-05, "loss": 17.0005, "step": 1797 }, { "epoch": 0.07494477095577508, "grad_norm": 584.0, "learning_rate": 9.947145672486822e-05, "loss": 19.1256, "step": 1798 }, { "epoch": 0.07498645325330332, "grad_norm": 932.0, "learning_rate": 9.947047740558697e-05, "loss": 24.6258, "step": 1799 }, { "epoch": 0.07502813555083156, "grad_norm": 252.0, "learning_rate": 9.946949718470188e-05, "loss": 12.1256, "step": 1800 }, { "epoch": 0.0750698178483598, "grad_norm": 312.0, "learning_rate": 9.946851606223081e-05, "loss": 10.5005, "step": 1801 }, { "epoch": 0.07511150014588804, "grad_norm": 732.0, "learning_rate": 9.946753403819164e-05, "loss": 23.0007, "step": 1802 }, { "epoch": 0.07515318244341627, "grad_norm": 392.0, "learning_rate": 9.946655111260228e-05, "loss": 13.8143, "step": 1803 }, { "epoch": 0.07519486474094451, "grad_norm": 528.0, "learning_rate": 9.946556728548065e-05, "loss": 16.8795, "step": 1804 }, { "epoch": 0.07523654703847277, "grad_norm": 1800.0, "learning_rate": 9.946458255684464e-05, "loss": 37.7548, "step": 1805 }, { "epoch": 0.075278229336001, "grad_norm": 416.0, "learning_rate": 9.946359692671222e-05, "loss": 15.9383, "step": 1806 }, { "epoch": 0.07531991163352925, "grad_norm": 486.0, "learning_rate": 9.946261039510136e-05, "loss": 17.3755, "step": 1807 }, { "epoch": 0.07536159393105749, "grad_norm": 253.0, "learning_rate": 9.946162296203005e-05, "loss": 12.2506, "step": 1808 }, { "epoch": 0.07540327622858572, "grad_norm": 244.0, "learning_rate": 9.946063462751626e-05, "loss": 8.6878, "step": 1809 }, { "epoch": 0.07544495852611396, "grad_norm": 223.0, "learning_rate": 9.945964539157801e-05, "loss": 11.2503, "step": 1810 }, { "epoch": 0.0754866408236422, "grad_norm": 274.0, "learning_rate": 9.945865525423334e-05, "loss": 12.5006, "step": 1811 }, { "epoch": 0.07552832312117044, "grad_norm": 258.0, "learning_rate": 9.945766421550028e-05, "loss": 11.4382, "step": 1812 }, { "epoch": 0.07557000541869868, "grad_norm": 268.0, "learning_rate": 9.94566722753969e-05, "loss": 12.7507, "step": 1813 }, { "epoch": 0.07561168771622692, "grad_norm": 704.0, "learning_rate": 9.945567943394127e-05, "loss": 21.7505, "step": 1814 }, { "epoch": 0.07565337001375516, "grad_norm": 416.0, "learning_rate": 9.945468569115151e-05, "loss": 15.3128, "step": 1815 }, { "epoch": 0.0756950523112834, "grad_norm": 668.0, "learning_rate": 9.94536910470457e-05, "loss": 18.5006, "step": 1816 }, { "epoch": 0.07573673460881164, "grad_norm": 1104.0, "learning_rate": 9.945269550164199e-05, "loss": 28.6256, "step": 1817 }, { "epoch": 0.07577841690633988, "grad_norm": 187.0, "learning_rate": 9.94516990549585e-05, "loss": 10.1924, "step": 1818 }, { "epoch": 0.07582009920386812, "grad_norm": 326.0, "learning_rate": 9.945070170701342e-05, "loss": 13.751, "step": 1819 }, { "epoch": 0.07586178150139636, "grad_norm": 304.0, "learning_rate": 9.944970345782491e-05, "loss": 12.6279, "step": 1820 }, { "epoch": 0.0759034637989246, "grad_norm": 476.0, "learning_rate": 9.944870430741115e-05, "loss": 17.0016, "step": 1821 }, { "epoch": 0.07594514609645284, "grad_norm": 322.0, "learning_rate": 9.944770425579037e-05, "loss": 14.0634, "step": 1822 }, { "epoch": 0.07598682839398108, "grad_norm": 396.0, "learning_rate": 9.94467033029808e-05, "loss": 15.3132, "step": 1823 }, { "epoch": 0.07602851069150932, "grad_norm": 115.5, "learning_rate": 9.944570144900067e-05, "loss": 7.9384, "step": 1824 }, { "epoch": 0.07607019298903755, "grad_norm": 600.0, "learning_rate": 9.944469869386824e-05, "loss": 18.3754, "step": 1825 }, { "epoch": 0.0761118752865658, "grad_norm": 266.0, "learning_rate": 9.944369503760179e-05, "loss": 10.938, "step": 1826 }, { "epoch": 0.07615355758409403, "grad_norm": 364.0, "learning_rate": 9.94426904802196e-05, "loss": 13.5632, "step": 1827 }, { "epoch": 0.07619523988162227, "grad_norm": 452.0, "learning_rate": 9.944168502173999e-05, "loss": 15.7513, "step": 1828 }, { "epoch": 0.07623692217915051, "grad_norm": 272.0, "learning_rate": 9.94406786621813e-05, "loss": 12.3129, "step": 1829 }, { "epoch": 0.07627860447667875, "grad_norm": 81.0, "learning_rate": 9.943967140156182e-05, "loss": 6.9693, "step": 1830 }, { "epoch": 0.07632028677420699, "grad_norm": 1392.0, "learning_rate": 9.943866323989996e-05, "loss": 37.2507, "step": 1831 }, { "epoch": 0.07636196907173523, "grad_norm": 482.0, "learning_rate": 9.943765417721407e-05, "loss": 17.3759, "step": 1832 }, { "epoch": 0.07640365136926347, "grad_norm": 199.0, "learning_rate": 9.943664421352255e-05, "loss": 10.2516, "step": 1833 }, { "epoch": 0.07644533366679171, "grad_norm": 446.0, "learning_rate": 9.943563334884379e-05, "loss": 14.5009, "step": 1834 }, { "epoch": 0.07648701596431995, "grad_norm": 280.0, "learning_rate": 9.943462158319622e-05, "loss": 12.9389, "step": 1835 }, { "epoch": 0.07652869826184819, "grad_norm": 390.0, "learning_rate": 9.94336089165983e-05, "loss": 14.2504, "step": 1836 }, { "epoch": 0.07657038055937643, "grad_norm": 71.5, "learning_rate": 9.943259534906846e-05, "loss": 5.6878, "step": 1837 }, { "epoch": 0.07661206285690467, "grad_norm": 230.0, "learning_rate": 9.943158088062518e-05, "loss": 12.3135, "step": 1838 }, { "epoch": 0.0766537451544329, "grad_norm": 185.0, "learning_rate": 9.943056551128694e-05, "loss": 10.1254, "step": 1839 }, { "epoch": 0.07669542745196115, "grad_norm": 84.5, "learning_rate": 9.942954924107227e-05, "loss": 7.6259, "step": 1840 }, { "epoch": 0.07673710974948939, "grad_norm": 304.0, "learning_rate": 9.942853206999967e-05, "loss": 12.8754, "step": 1841 }, { "epoch": 0.07677879204701764, "grad_norm": 412.0, "learning_rate": 9.94275139980877e-05, "loss": 14.0675, "step": 1842 }, { "epoch": 0.07682047434454588, "grad_norm": 133.0, "learning_rate": 9.942649502535489e-05, "loss": 6.5323, "step": 1843 }, { "epoch": 0.07686215664207412, "grad_norm": 456.0, "learning_rate": 9.942547515181982e-05, "loss": 17.1265, "step": 1844 }, { "epoch": 0.07690383893960236, "grad_norm": 632.0, "learning_rate": 9.942445437750108e-05, "loss": 20.1258, "step": 1845 }, { "epoch": 0.0769455212371306, "grad_norm": 260.0, "learning_rate": 9.942343270241725e-05, "loss": 11.0005, "step": 1846 }, { "epoch": 0.07698720353465884, "grad_norm": 66.5, "learning_rate": 9.9422410126587e-05, "loss": 5.6883, "step": 1847 }, { "epoch": 0.07702888583218707, "grad_norm": 402.0, "learning_rate": 9.942138665002892e-05, "loss": 14.5014, "step": 1848 }, { "epoch": 0.07707056812971531, "grad_norm": 211.0, "learning_rate": 9.94203622727617e-05, "loss": 10.8754, "step": 1849 }, { "epoch": 0.07711225042724355, "grad_norm": 512.0, "learning_rate": 9.941933699480397e-05, "loss": 17.5016, "step": 1850 }, { "epoch": 0.07715393272477179, "grad_norm": 248.0, "learning_rate": 9.941831081617445e-05, "loss": 9.5005, "step": 1851 }, { "epoch": 0.07719561502230003, "grad_norm": 255.0, "learning_rate": 9.941728373689182e-05, "loss": 10.376, "step": 1852 }, { "epoch": 0.07723729731982827, "grad_norm": 121.0, "learning_rate": 9.941625575697481e-05, "loss": 6.0022, "step": 1853 }, { "epoch": 0.07727897961735651, "grad_norm": 532.0, "learning_rate": 9.941522687644216e-05, "loss": 15.2555, "step": 1854 }, { "epoch": 0.07732066191488475, "grad_norm": 500.0, "learning_rate": 9.94141970953126e-05, "loss": 16.8766, "step": 1855 }, { "epoch": 0.07736234421241299, "grad_norm": 528.0, "learning_rate": 9.941316641360492e-05, "loss": 17.6259, "step": 1856 }, { "epoch": 0.07740402650994123, "grad_norm": 74.5, "learning_rate": 9.941213483133788e-05, "loss": 8.2506, "step": 1857 }, { "epoch": 0.07744570880746947, "grad_norm": 253.0, "learning_rate": 9.941110234853033e-05, "loss": 11.9383, "step": 1858 }, { "epoch": 0.07748739110499771, "grad_norm": 374.0, "learning_rate": 9.941006896520102e-05, "loss": 13.5631, "step": 1859 }, { "epoch": 0.07752907340252595, "grad_norm": 536.0, "learning_rate": 9.940903468136884e-05, "loss": 18.7504, "step": 1860 }, { "epoch": 0.07757075570005419, "grad_norm": 396.0, "learning_rate": 9.940799949705259e-05, "loss": 14.438, "step": 1861 }, { "epoch": 0.07761243799758243, "grad_norm": 220.0, "learning_rate": 9.940696341227119e-05, "loss": 11.0017, "step": 1862 }, { "epoch": 0.07765412029511067, "grad_norm": 788.0, "learning_rate": 9.940592642704348e-05, "loss": 20.1262, "step": 1863 }, { "epoch": 0.0776958025926389, "grad_norm": 316.0, "learning_rate": 9.940488854138839e-05, "loss": 11.9385, "step": 1864 }, { "epoch": 0.07773748489016714, "grad_norm": 147.0, "learning_rate": 9.94038497553248e-05, "loss": 11.0005, "step": 1865 }, { "epoch": 0.07777916718769538, "grad_norm": 176.0, "learning_rate": 9.940281006887168e-05, "loss": 9.3141, "step": 1866 }, { "epoch": 0.07782084948522362, "grad_norm": 135.0, "learning_rate": 9.940176948204795e-05, "loss": 9.3759, "step": 1867 }, { "epoch": 0.07786253178275186, "grad_norm": 336.0, "learning_rate": 9.940072799487259e-05, "loss": 14.0007, "step": 1868 }, { "epoch": 0.0779042140802801, "grad_norm": 360.0, "learning_rate": 9.939968560736458e-05, "loss": 13.2508, "step": 1869 }, { "epoch": 0.07794589637780834, "grad_norm": 266.0, "learning_rate": 9.939864231954292e-05, "loss": 11.5639, "step": 1870 }, { "epoch": 0.07798757867533658, "grad_norm": 780.0, "learning_rate": 9.93975981314266e-05, "loss": 19.7529, "step": 1871 }, { "epoch": 0.07802926097286482, "grad_norm": 378.0, "learning_rate": 9.939655304303468e-05, "loss": 14.754, "step": 1872 }, { "epoch": 0.07807094327039306, "grad_norm": 516.0, "learning_rate": 9.93955070543862e-05, "loss": 16.7504, "step": 1873 }, { "epoch": 0.0781126255679213, "grad_norm": 384.0, "learning_rate": 9.93944601655002e-05, "loss": 14.3132, "step": 1874 }, { "epoch": 0.07815430786544954, "grad_norm": 266.0, "learning_rate": 9.93934123763958e-05, "loss": 11.7512, "step": 1875 }, { "epoch": 0.07819599016297778, "grad_norm": 280.0, "learning_rate": 9.939236368709207e-05, "loss": 13.251, "step": 1876 }, { "epoch": 0.07823767246050602, "grad_norm": 380.0, "learning_rate": 9.939131409760811e-05, "loss": 14.2507, "step": 1877 }, { "epoch": 0.07827935475803427, "grad_norm": 312.0, "learning_rate": 9.939026360796309e-05, "loss": 12.6889, "step": 1878 }, { "epoch": 0.07832103705556251, "grad_norm": 276.0, "learning_rate": 9.938921221817612e-05, "loss": 13.1285, "step": 1879 }, { "epoch": 0.07836271935309075, "grad_norm": 312.0, "learning_rate": 9.938815992826638e-05, "loss": 13.0001, "step": 1880 }, { "epoch": 0.07840440165061899, "grad_norm": 472.0, "learning_rate": 9.938710673825302e-05, "loss": 17.7505, "step": 1881 }, { "epoch": 0.07844608394814723, "grad_norm": 896.0, "learning_rate": 9.938605264815529e-05, "loss": 25.2529, "step": 1882 }, { "epoch": 0.07848776624567547, "grad_norm": 258.0, "learning_rate": 9.938499765799233e-05, "loss": 12.5028, "step": 1883 }, { "epoch": 0.0785294485432037, "grad_norm": 568.0, "learning_rate": 9.938394176778343e-05, "loss": 18.3758, "step": 1884 }, { "epoch": 0.07857113084073195, "grad_norm": 214.0, "learning_rate": 9.938288497754779e-05, "loss": 11.8134, "step": 1885 }, { "epoch": 0.07861281313826018, "grad_norm": 434.0, "learning_rate": 9.938182728730469e-05, "loss": 10.0634, "step": 1886 }, { "epoch": 0.07865449543578842, "grad_norm": 684.0, "learning_rate": 9.938076869707343e-05, "loss": 21.876, "step": 1887 }, { "epoch": 0.07869617773331666, "grad_norm": 376.0, "learning_rate": 9.937970920687324e-05, "loss": 11.9406, "step": 1888 }, { "epoch": 0.0787378600308449, "grad_norm": 1120.0, "learning_rate": 9.937864881672347e-05, "loss": 26.2564, "step": 1889 }, { "epoch": 0.07877954232837314, "grad_norm": 272.0, "learning_rate": 9.937758752664347e-05, "loss": 13.5628, "step": 1890 }, { "epoch": 0.07882122462590138, "grad_norm": 540.0, "learning_rate": 9.937652533665253e-05, "loss": 17.8768, "step": 1891 }, { "epoch": 0.07886290692342962, "grad_norm": 540.0, "learning_rate": 9.937546224677005e-05, "loss": 16.5004, "step": 1892 }, { "epoch": 0.07890458922095786, "grad_norm": 1384.0, "learning_rate": 9.937439825701538e-05, "loss": 26.3812, "step": 1893 }, { "epoch": 0.0789462715184861, "grad_norm": 139.0, "learning_rate": 9.937333336740791e-05, "loss": 10.3761, "step": 1894 }, { "epoch": 0.07898795381601434, "grad_norm": 326.0, "learning_rate": 9.937226757796706e-05, "loss": 11.938, "step": 1895 }, { "epoch": 0.07902963611354258, "grad_norm": 368.0, "learning_rate": 9.937120088871226e-05, "loss": 14.1254, "step": 1896 }, { "epoch": 0.07907131841107082, "grad_norm": 426.0, "learning_rate": 9.937013329966293e-05, "loss": 15.188, "step": 1897 }, { "epoch": 0.07911300070859906, "grad_norm": 334.0, "learning_rate": 9.936906481083854e-05, "loss": 13.1257, "step": 1898 }, { "epoch": 0.0791546830061273, "grad_norm": 438.0, "learning_rate": 9.936799542225856e-05, "loss": 16.3755, "step": 1899 }, { "epoch": 0.07919636530365554, "grad_norm": 482.0, "learning_rate": 9.936692513394247e-05, "loss": 16.5003, "step": 1900 }, { "epoch": 0.07923804760118378, "grad_norm": 178.0, "learning_rate": 9.936585394590982e-05, "loss": 7.0017, "step": 1901 }, { "epoch": 0.07927972989871201, "grad_norm": 476.0, "learning_rate": 9.936478185818008e-05, "loss": 16.7506, "step": 1902 }, { "epoch": 0.07932141219624025, "grad_norm": 150.0, "learning_rate": 9.936370887077281e-05, "loss": 9.4395, "step": 1903 }, { "epoch": 0.0793630944937685, "grad_norm": 187.0, "learning_rate": 9.936263498370756e-05, "loss": 10.5004, "step": 1904 }, { "epoch": 0.07940477679129673, "grad_norm": 548.0, "learning_rate": 9.936156019700391e-05, "loss": 17.2506, "step": 1905 }, { "epoch": 0.07944645908882497, "grad_norm": 326.0, "learning_rate": 9.936048451068144e-05, "loss": 14.8775, "step": 1906 }, { "epoch": 0.07948814138635321, "grad_norm": 724.0, "learning_rate": 9.935940792475975e-05, "loss": 19.6262, "step": 1907 }, { "epoch": 0.07952982368388145, "grad_norm": 298.0, "learning_rate": 9.935833043925848e-05, "loss": 12.5636, "step": 1908 }, { "epoch": 0.07957150598140969, "grad_norm": 228.0, "learning_rate": 9.935725205419726e-05, "loss": 10.0006, "step": 1909 }, { "epoch": 0.07961318827893793, "grad_norm": 824.0, "learning_rate": 9.935617276959574e-05, "loss": 21.5066, "step": 1910 }, { "epoch": 0.07965487057646617, "grad_norm": 1680.0, "learning_rate": 9.935509258547358e-05, "loss": 32.0062, "step": 1911 }, { "epoch": 0.07969655287399441, "grad_norm": 364.0, "learning_rate": 9.935401150185048e-05, "loss": 16.1274, "step": 1912 }, { "epoch": 0.07973823517152265, "grad_norm": 1248.0, "learning_rate": 9.935292951874613e-05, "loss": 32.001, "step": 1913 }, { "epoch": 0.07977991746905089, "grad_norm": 312.0, "learning_rate": 9.935184663618026e-05, "loss": 13.8754, "step": 1914 }, { "epoch": 0.07982159976657914, "grad_norm": 213.0, "learning_rate": 9.935076285417262e-05, "loss": 10.5628, "step": 1915 }, { "epoch": 0.07986328206410738, "grad_norm": 548.0, "learning_rate": 9.934967817274294e-05, "loss": 17.7504, "step": 1916 }, { "epoch": 0.07990496436163562, "grad_norm": 242.0, "learning_rate": 9.934859259191099e-05, "loss": 11.7502, "step": 1917 }, { "epoch": 0.07994664665916386, "grad_norm": 190.0, "learning_rate": 9.934750611169656e-05, "loss": 9.938, "step": 1918 }, { "epoch": 0.0799883289566921, "grad_norm": 824.0, "learning_rate": 9.934641873211945e-05, "loss": 20.6296, "step": 1919 }, { "epoch": 0.08003001125422034, "grad_norm": 324.0, "learning_rate": 9.934533045319949e-05, "loss": 13.3755, "step": 1920 }, { "epoch": 0.08007169355174858, "grad_norm": 356.0, "learning_rate": 9.934424127495649e-05, "loss": 14.1258, "step": 1921 }, { "epoch": 0.08011337584927682, "grad_norm": 310.0, "learning_rate": 9.93431511974103e-05, "loss": 10.2508, "step": 1922 }, { "epoch": 0.08015505814680506, "grad_norm": 122.0, "learning_rate": 9.934206022058083e-05, "loss": 9.2523, "step": 1923 }, { "epoch": 0.0801967404443333, "grad_norm": 380.0, "learning_rate": 9.934096834448792e-05, "loss": 15.0634, "step": 1924 }, { "epoch": 0.08023842274186153, "grad_norm": 1064.0, "learning_rate": 9.933987556915148e-05, "loss": 28.3763, "step": 1925 }, { "epoch": 0.08028010503938977, "grad_norm": 344.0, "learning_rate": 9.933878189459142e-05, "loss": 13.4378, "step": 1926 }, { "epoch": 0.08032178733691801, "grad_norm": 430.0, "learning_rate": 9.933768732082768e-05, "loss": 16.2506, "step": 1927 }, { "epoch": 0.08036346963444625, "grad_norm": 856.0, "learning_rate": 9.93365918478802e-05, "loss": 21.2554, "step": 1928 }, { "epoch": 0.08040515193197449, "grad_norm": 113.0, "learning_rate": 9.933549547576898e-05, "loss": 8.3756, "step": 1929 }, { "epoch": 0.08044683422950273, "grad_norm": 147.0, "learning_rate": 9.933439820451395e-05, "loss": 11.3758, "step": 1930 }, { "epoch": 0.08048851652703097, "grad_norm": 137.0, "learning_rate": 9.933330003413516e-05, "loss": 10.2506, "step": 1931 }, { "epoch": 0.08053019882455921, "grad_norm": 302.0, "learning_rate": 9.933220096465258e-05, "loss": 11.877, "step": 1932 }, { "epoch": 0.08057188112208745, "grad_norm": 326.0, "learning_rate": 9.933110099608627e-05, "loss": 12.7503, "step": 1933 }, { "epoch": 0.08061356341961569, "grad_norm": 189.0, "learning_rate": 9.933000012845625e-05, "loss": 10.9394, "step": 1934 }, { "epoch": 0.08065524571714393, "grad_norm": 472.0, "learning_rate": 9.932889836178261e-05, "loss": 16.0003, "step": 1935 }, { "epoch": 0.08069692801467217, "grad_norm": 784.0, "learning_rate": 9.932779569608542e-05, "loss": 23.126, "step": 1936 }, { "epoch": 0.0807386103122004, "grad_norm": 1256.0, "learning_rate": 9.932669213138475e-05, "loss": 28.131, "step": 1937 }, { "epoch": 0.08078029260972865, "grad_norm": 572.0, "learning_rate": 9.932558766770076e-05, "loss": 16.6261, "step": 1938 }, { "epoch": 0.08082197490725689, "grad_norm": 240.0, "learning_rate": 9.932448230505355e-05, "loss": 11.8779, "step": 1939 }, { "epoch": 0.08086365720478512, "grad_norm": 244.0, "learning_rate": 9.932337604346327e-05, "loss": 10.3132, "step": 1940 }, { "epoch": 0.08090533950231336, "grad_norm": 848.0, "learning_rate": 9.932226888295008e-05, "loss": 23.0043, "step": 1941 }, { "epoch": 0.0809470217998416, "grad_norm": 184.0, "learning_rate": 9.932116082353417e-05, "loss": 9.0652, "step": 1942 }, { "epoch": 0.08098870409736984, "grad_norm": 502.0, "learning_rate": 9.932005186523572e-05, "loss": 15.6272, "step": 1943 }, { "epoch": 0.08103038639489808, "grad_norm": 432.0, "learning_rate": 9.931894200807494e-05, "loss": 16.5011, "step": 1944 }, { "epoch": 0.08107206869242632, "grad_norm": 592.0, "learning_rate": 9.931783125207208e-05, "loss": 17.7541, "step": 1945 }, { "epoch": 0.08111375098995456, "grad_norm": 932.0, "learning_rate": 9.931671959724736e-05, "loss": 23.5049, "step": 1946 }, { "epoch": 0.0811554332874828, "grad_norm": 374.0, "learning_rate": 9.931560704362105e-05, "loss": 13.3141, "step": 1947 }, { "epoch": 0.08119711558501104, "grad_norm": 552.0, "learning_rate": 9.931449359121343e-05, "loss": 16.1286, "step": 1948 }, { "epoch": 0.08123879788253928, "grad_norm": 284.0, "learning_rate": 9.931337924004477e-05, "loss": 12.7516, "step": 1949 }, { "epoch": 0.08128048018006752, "grad_norm": 652.0, "learning_rate": 9.93122639901354e-05, "loss": 20.1254, "step": 1950 }, { "epoch": 0.08132216247759577, "grad_norm": 408.0, "learning_rate": 9.931114784150564e-05, "loss": 13.8762, "step": 1951 }, { "epoch": 0.08136384477512401, "grad_norm": 130.0, "learning_rate": 9.931003079417584e-05, "loss": 9.3761, "step": 1952 }, { "epoch": 0.08140552707265225, "grad_norm": 392.0, "learning_rate": 9.930891284816635e-05, "loss": 14.5633, "step": 1953 }, { "epoch": 0.08144720937018049, "grad_norm": 302.0, "learning_rate": 9.930779400349754e-05, "loss": 12.5011, "step": 1954 }, { "epoch": 0.08148889166770873, "grad_norm": 296.0, "learning_rate": 9.930667426018981e-05, "loss": 13.3131, "step": 1955 }, { "epoch": 0.08153057396523697, "grad_norm": 284.0, "learning_rate": 9.930555361826356e-05, "loss": 11.8756, "step": 1956 }, { "epoch": 0.08157225626276521, "grad_norm": 752.0, "learning_rate": 9.930443207773923e-05, "loss": 21.3758, "step": 1957 }, { "epoch": 0.08161393856029345, "grad_norm": 876.0, "learning_rate": 9.930330963863725e-05, "loss": 23.8756, "step": 1958 }, { "epoch": 0.08165562085782169, "grad_norm": 360.0, "learning_rate": 9.930218630097807e-05, "loss": 14.9419, "step": 1959 }, { "epoch": 0.08169730315534993, "grad_norm": 140.0, "learning_rate": 9.930106206478216e-05, "loss": 7.8441, "step": 1960 }, { "epoch": 0.08173898545287817, "grad_norm": 636.0, "learning_rate": 9.929993693007003e-05, "loss": 16.8819, "step": 1961 }, { "epoch": 0.0817806677504064, "grad_norm": 1208.0, "learning_rate": 9.929881089686216e-05, "loss": 27.5055, "step": 1962 }, { "epoch": 0.08182235004793464, "grad_norm": 210.0, "learning_rate": 9.929768396517908e-05, "loss": 12.0633, "step": 1963 }, { "epoch": 0.08186403234546288, "grad_norm": 179.0, "learning_rate": 9.929655613504136e-05, "loss": 10.3131, "step": 1964 }, { "epoch": 0.08190571464299112, "grad_norm": 612.0, "learning_rate": 9.929542740646951e-05, "loss": 19.2502, "step": 1965 }, { "epoch": 0.08194739694051936, "grad_norm": 392.0, "learning_rate": 9.929429777948412e-05, "loss": 14.3129, "step": 1966 }, { "epoch": 0.0819890792380476, "grad_norm": 720.0, "learning_rate": 9.929316725410577e-05, "loss": 18.8763, "step": 1967 }, { "epoch": 0.08203076153557584, "grad_norm": 402.0, "learning_rate": 9.929203583035509e-05, "loss": 14.9439, "step": 1968 }, { "epoch": 0.08207244383310408, "grad_norm": 1384.0, "learning_rate": 9.929090350825268e-05, "loss": 33.0003, "step": 1969 }, { "epoch": 0.08211412613063232, "grad_norm": 270.0, "learning_rate": 9.928977028781916e-05, "loss": 12.7514, "step": 1970 }, { "epoch": 0.08215580842816056, "grad_norm": 446.0, "learning_rate": 9.928863616907521e-05, "loss": 16.0016, "step": 1971 }, { "epoch": 0.0821974907256888, "grad_norm": 440.0, "learning_rate": 9.92875011520415e-05, "loss": 16.2505, "step": 1972 }, { "epoch": 0.08223917302321704, "grad_norm": 208.0, "learning_rate": 9.928636523673869e-05, "loss": 9.0629, "step": 1973 }, { "epoch": 0.08228085532074528, "grad_norm": 322.0, "learning_rate": 9.92852284231875e-05, "loss": 13.5629, "step": 1974 }, { "epoch": 0.08232253761827352, "grad_norm": 404.0, "learning_rate": 9.928409071140865e-05, "loss": 15.5636, "step": 1975 }, { "epoch": 0.08236421991580176, "grad_norm": 378.0, "learning_rate": 9.928295210142289e-05, "loss": 14.8787, "step": 1976 }, { "epoch": 0.08240590221333, "grad_norm": 231.0, "learning_rate": 9.928181259325093e-05, "loss": 11.3139, "step": 1977 }, { "epoch": 0.08244758451085824, "grad_norm": 180.0, "learning_rate": 9.928067218691356e-05, "loss": 9.8756, "step": 1978 }, { "epoch": 0.08248926680838647, "grad_norm": 240.0, "learning_rate": 9.927953088243158e-05, "loss": 10.692, "step": 1979 }, { "epoch": 0.08253094910591471, "grad_norm": 272.0, "learning_rate": 9.927838867982576e-05, "loss": 12.6878, "step": 1980 }, { "epoch": 0.08257263140344295, "grad_norm": 386.0, "learning_rate": 9.927724557911694e-05, "loss": 14.3761, "step": 1981 }, { "epoch": 0.08261431370097119, "grad_norm": 65.0, "learning_rate": 9.927610158032594e-05, "loss": 6.1569, "step": 1982 }, { "epoch": 0.08265599599849943, "grad_norm": 456.0, "learning_rate": 9.927495668347362e-05, "loss": 14.6284, "step": 1983 }, { "epoch": 0.08269767829602767, "grad_norm": 2176.0, "learning_rate": 9.927381088858083e-05, "loss": 43.2514, "step": 1984 }, { "epoch": 0.08273936059355591, "grad_norm": 394.0, "learning_rate": 9.927266419566847e-05, "loss": 15.0631, "step": 1985 }, { "epoch": 0.08278104289108415, "grad_norm": 768.0, "learning_rate": 9.927151660475745e-05, "loss": 19.628, "step": 1986 }, { "epoch": 0.08282272518861239, "grad_norm": 294.0, "learning_rate": 9.927036811586864e-05, "loss": 13.4393, "step": 1987 }, { "epoch": 0.08286440748614064, "grad_norm": 241.0, "learning_rate": 9.9269218729023e-05, "loss": 11.126, "step": 1988 }, { "epoch": 0.08290608978366888, "grad_norm": 392.0, "learning_rate": 9.926806844424148e-05, "loss": 15.1296, "step": 1989 }, { "epoch": 0.08294777208119712, "grad_norm": 378.0, "learning_rate": 9.926691726154505e-05, "loss": 15.0007, "step": 1990 }, { "epoch": 0.08298945437872536, "grad_norm": 236.0, "learning_rate": 9.926576518095466e-05, "loss": 11.3131, "step": 1991 }, { "epoch": 0.0830311366762536, "grad_norm": 205.0, "learning_rate": 9.926461220249133e-05, "loss": 11.188, "step": 1992 }, { "epoch": 0.08307281897378184, "grad_norm": 156.0, "learning_rate": 9.926345832617607e-05, "loss": 10.1884, "step": 1993 }, { "epoch": 0.08311450127131008, "grad_norm": 664.0, "learning_rate": 9.926230355202992e-05, "loss": 20.3756, "step": 1994 }, { "epoch": 0.08315618356883832, "grad_norm": 1064.0, "learning_rate": 9.92611478800739e-05, "loss": 21.6311, "step": 1995 }, { "epoch": 0.08319786586636656, "grad_norm": 632.0, "learning_rate": 9.925999131032909e-05, "loss": 19.6288, "step": 1996 }, { "epoch": 0.0832395481638948, "grad_norm": 312.0, "learning_rate": 9.925883384281658e-05, "loss": 13.1888, "step": 1997 }, { "epoch": 0.08328123046142304, "grad_norm": 290.0, "learning_rate": 9.925767547755743e-05, "loss": 12.063, "step": 1998 }, { "epoch": 0.08332291275895128, "grad_norm": 490.0, "learning_rate": 9.925651621457278e-05, "loss": 16.8756, "step": 1999 }, { "epoch": 0.08336459505647952, "grad_norm": 318.0, "learning_rate": 9.925535605388375e-05, "loss": 12.4379, "step": 2000 }, { "epoch": 0.08340627735400775, "grad_norm": 740.0, "learning_rate": 9.925419499551149e-05, "loss": 17.8796, "step": 2001 }, { "epoch": 0.083447959651536, "grad_norm": 187.0, "learning_rate": 9.925303303947715e-05, "loss": 11.0634, "step": 2002 }, { "epoch": 0.08348964194906423, "grad_norm": 406.0, "learning_rate": 9.92518701858019e-05, "loss": 14.0631, "step": 2003 }, { "epoch": 0.08353132424659247, "grad_norm": 486.0, "learning_rate": 9.925070643450696e-05, "loss": 16.8761, "step": 2004 }, { "epoch": 0.08357300654412071, "grad_norm": 75.5, "learning_rate": 9.924954178561351e-05, "loss": 5.9386, "step": 2005 }, { "epoch": 0.08361468884164895, "grad_norm": 314.0, "learning_rate": 9.924837623914278e-05, "loss": 11.3755, "step": 2006 }, { "epoch": 0.08365637113917719, "grad_norm": 664.0, "learning_rate": 9.924720979511605e-05, "loss": 21.5007, "step": 2007 }, { "epoch": 0.08369805343670543, "grad_norm": 132.0, "learning_rate": 9.924604245355454e-05, "loss": 8.6881, "step": 2008 }, { "epoch": 0.08373973573423367, "grad_norm": 266.0, "learning_rate": 9.924487421447952e-05, "loss": 11.7506, "step": 2009 }, { "epoch": 0.08378141803176191, "grad_norm": 1016.0, "learning_rate": 9.92437050779123e-05, "loss": 27.5006, "step": 2010 }, { "epoch": 0.08382310032929015, "grad_norm": 316.0, "learning_rate": 9.924253504387419e-05, "loss": 13.0004, "step": 2011 }, { "epoch": 0.08386478262681839, "grad_norm": 340.0, "learning_rate": 9.92413641123865e-05, "loss": 13.752, "step": 2012 }, { "epoch": 0.08390646492434663, "grad_norm": 468.0, "learning_rate": 9.924019228347059e-05, "loss": 16.2505, "step": 2013 }, { "epoch": 0.08394814722187487, "grad_norm": 356.0, "learning_rate": 9.92390195571478e-05, "loss": 12.8128, "step": 2014 }, { "epoch": 0.0839898295194031, "grad_norm": 284.0, "learning_rate": 9.92378459334395e-05, "loss": 13.2515, "step": 2015 }, { "epoch": 0.08403151181693135, "grad_norm": 396.0, "learning_rate": 9.923667141236709e-05, "loss": 14.5011, "step": 2016 }, { "epoch": 0.08407319411445958, "grad_norm": 217.0, "learning_rate": 9.923549599395197e-05, "loss": 11.8758, "step": 2017 }, { "epoch": 0.08411487641198782, "grad_norm": 296.0, "learning_rate": 9.923431967821559e-05, "loss": 9.0012, "step": 2018 }, { "epoch": 0.08415655870951606, "grad_norm": 1152.0, "learning_rate": 9.923314246517933e-05, "loss": 31.5007, "step": 2019 }, { "epoch": 0.0841982410070443, "grad_norm": 490.0, "learning_rate": 9.92319643548647e-05, "loss": 18.1256, "step": 2020 }, { "epoch": 0.08423992330457254, "grad_norm": 235.0, "learning_rate": 9.923078534729314e-05, "loss": 11.5006, "step": 2021 }, { "epoch": 0.08428160560210078, "grad_norm": 532.0, "learning_rate": 9.922960544248614e-05, "loss": 16.2513, "step": 2022 }, { "epoch": 0.08432328789962902, "grad_norm": 156.0, "learning_rate": 9.922842464046523e-05, "loss": 10.1258, "step": 2023 }, { "epoch": 0.08436497019715727, "grad_norm": 1408.0, "learning_rate": 9.922724294125189e-05, "loss": 29.1299, "step": 2024 }, { "epoch": 0.08440665249468551, "grad_norm": 1176.0, "learning_rate": 9.922606034486768e-05, "loss": 27.1271, "step": 2025 }, { "epoch": 0.08444833479221375, "grad_norm": 173.0, "learning_rate": 9.922487685133415e-05, "loss": 10.2506, "step": 2026 }, { "epoch": 0.08449001708974199, "grad_norm": 280.0, "learning_rate": 9.922369246067288e-05, "loss": 13.0628, "step": 2027 }, { "epoch": 0.08453169938727023, "grad_norm": 356.0, "learning_rate": 9.922250717290545e-05, "loss": 14.6902, "step": 2028 }, { "epoch": 0.08457338168479847, "grad_norm": 322.0, "learning_rate": 9.922132098805343e-05, "loss": 12.6881, "step": 2029 }, { "epoch": 0.08461506398232671, "grad_norm": 256.0, "learning_rate": 9.922013390613849e-05, "loss": 12.8761, "step": 2030 }, { "epoch": 0.08465674627985495, "grad_norm": 205.0, "learning_rate": 9.921894592718224e-05, "loss": 12.1255, "step": 2031 }, { "epoch": 0.08469842857738319, "grad_norm": 210.0, "learning_rate": 9.921775705120632e-05, "loss": 11.6258, "step": 2032 }, { "epoch": 0.08474011087491143, "grad_norm": 173.0, "learning_rate": 9.921656727823241e-05, "loss": 9.1884, "step": 2033 }, { "epoch": 0.08478179317243967, "grad_norm": 612.0, "learning_rate": 9.921537660828219e-05, "loss": 17.5003, "step": 2034 }, { "epoch": 0.08482347546996791, "grad_norm": 676.0, "learning_rate": 9.921418504137738e-05, "loss": 20.6255, "step": 2035 }, { "epoch": 0.08486515776749615, "grad_norm": 540.0, "learning_rate": 9.921299257753966e-05, "loss": 18.6257, "step": 2036 }, { "epoch": 0.08490684006502439, "grad_norm": 112.0, "learning_rate": 9.92117992167908e-05, "loss": 4.938, "step": 2037 }, { "epoch": 0.08494852236255263, "grad_norm": 374.0, "learning_rate": 9.921060495915251e-05, "loss": 13.5635, "step": 2038 }, { "epoch": 0.08499020466008086, "grad_norm": 516.0, "learning_rate": 9.920940980464658e-05, "loss": 17.5018, "step": 2039 }, { "epoch": 0.0850318869576091, "grad_norm": 988.0, "learning_rate": 9.920821375329478e-05, "loss": 25.2507, "step": 2040 }, { "epoch": 0.08507356925513734, "grad_norm": 251.0, "learning_rate": 9.920701680511894e-05, "loss": 11.0628, "step": 2041 }, { "epoch": 0.08511525155266558, "grad_norm": 470.0, "learning_rate": 9.920581896014084e-05, "loss": 16.8752, "step": 2042 }, { "epoch": 0.08515693385019382, "grad_norm": 201.0, "learning_rate": 9.920462021838233e-05, "loss": 11.8764, "step": 2043 }, { "epoch": 0.08519861614772206, "grad_norm": 177.0, "learning_rate": 9.920342057986522e-05, "loss": 10.1256, "step": 2044 }, { "epoch": 0.0852402984452503, "grad_norm": 310.0, "learning_rate": 9.920222004461144e-05, "loss": 13.6255, "step": 2045 }, { "epoch": 0.08528198074277854, "grad_norm": 302.0, "learning_rate": 9.92010186126428e-05, "loss": 14.1883, "step": 2046 }, { "epoch": 0.08532366304030678, "grad_norm": 252.0, "learning_rate": 9.919981628398126e-05, "loss": 11.8756, "step": 2047 }, { "epoch": 0.08536534533783502, "grad_norm": 290.0, "learning_rate": 9.919861305864865e-05, "loss": 11.626, "step": 2048 }, { "epoch": 0.08540702763536326, "grad_norm": 716.0, "learning_rate": 9.9197408936667e-05, "loss": 20.6276, "step": 2049 }, { "epoch": 0.0854487099328915, "grad_norm": 332.0, "learning_rate": 9.919620391805818e-05, "loss": 13.9381, "step": 2050 }, { "epoch": 0.08549039223041974, "grad_norm": 225.0, "learning_rate": 9.919499800284418e-05, "loss": 12.0006, "step": 2051 }, { "epoch": 0.08553207452794798, "grad_norm": 88.0, "learning_rate": 9.919379119104697e-05, "loss": 9.0007, "step": 2052 }, { "epoch": 0.08557375682547622, "grad_norm": 372.0, "learning_rate": 9.919258348268857e-05, "loss": 14.5665, "step": 2053 }, { "epoch": 0.08561543912300446, "grad_norm": 462.0, "learning_rate": 9.919137487779095e-05, "loss": 15.8143, "step": 2054 }, { "epoch": 0.0856571214205327, "grad_norm": 83.0, "learning_rate": 9.919016537637616e-05, "loss": 9.7511, "step": 2055 }, { "epoch": 0.08569880371806093, "grad_norm": 1144.0, "learning_rate": 9.918895497846623e-05, "loss": 29.8756, "step": 2056 }, { "epoch": 0.08574048601558917, "grad_norm": 177.0, "learning_rate": 9.918774368408324e-05, "loss": 10.5628, "step": 2057 }, { "epoch": 0.08578216831311741, "grad_norm": 316.0, "learning_rate": 9.918653149324926e-05, "loss": 14.9379, "step": 2058 }, { "epoch": 0.08582385061064565, "grad_norm": 616.0, "learning_rate": 9.918531840598637e-05, "loss": 19.8755, "step": 2059 }, { "epoch": 0.08586553290817389, "grad_norm": 270.0, "learning_rate": 9.918410442231668e-05, "loss": 12.7516, "step": 2060 }, { "epoch": 0.08590721520570214, "grad_norm": 484.0, "learning_rate": 9.918288954226233e-05, "loss": 16.3755, "step": 2061 }, { "epoch": 0.08594889750323038, "grad_norm": 191.0, "learning_rate": 9.918167376584544e-05, "loss": 11.0006, "step": 2062 }, { "epoch": 0.08599057980075862, "grad_norm": 354.0, "learning_rate": 9.91804570930882e-05, "loss": 14.0004, "step": 2063 }, { "epoch": 0.08603226209828686, "grad_norm": 484.0, "learning_rate": 9.917923952401275e-05, "loss": 15.3142, "step": 2064 }, { "epoch": 0.0860739443958151, "grad_norm": 1080.0, "learning_rate": 9.917802105864129e-05, "loss": 25.2507, "step": 2065 }, { "epoch": 0.08611562669334334, "grad_norm": 187.0, "learning_rate": 9.917680169699603e-05, "loss": 11.5006, "step": 2066 }, { "epoch": 0.08615730899087158, "grad_norm": 200.0, "learning_rate": 9.91755814390992e-05, "loss": 12.0007, "step": 2067 }, { "epoch": 0.08619899128839982, "grad_norm": 1176.0, "learning_rate": 9.917436028497305e-05, "loss": 34.5008, "step": 2068 }, { "epoch": 0.08624067358592806, "grad_norm": 430.0, "learning_rate": 9.917313823463978e-05, "loss": 15.5634, "step": 2069 }, { "epoch": 0.0862823558834563, "grad_norm": 496.0, "learning_rate": 9.917191528812173e-05, "loss": 16.0008, "step": 2070 }, { "epoch": 0.08632403818098454, "grad_norm": 144.0, "learning_rate": 9.917069144544116e-05, "loss": 8.8131, "step": 2071 }, { "epoch": 0.08636572047851278, "grad_norm": 772.0, "learning_rate": 9.916946670662036e-05, "loss": 21.2512, "step": 2072 }, { "epoch": 0.08640740277604102, "grad_norm": 266.0, "learning_rate": 9.916824107168166e-05, "loss": 13.0011, "step": 2073 }, { "epoch": 0.08644908507356926, "grad_norm": 225.0, "learning_rate": 9.916701454064741e-05, "loss": 11.8137, "step": 2074 }, { "epoch": 0.0864907673710975, "grad_norm": 396.0, "learning_rate": 9.916578711353996e-05, "loss": 13.8764, "step": 2075 }, { "epoch": 0.08653244966862574, "grad_norm": 272.0, "learning_rate": 9.916455879038167e-05, "loss": 13.0629, "step": 2076 }, { "epoch": 0.08657413196615397, "grad_norm": 173.0, "learning_rate": 9.916332957119492e-05, "loss": 9.5005, "step": 2077 }, { "epoch": 0.08661581426368221, "grad_norm": 56.25, "learning_rate": 9.916209945600215e-05, "loss": 8.4404, "step": 2078 }, { "epoch": 0.08665749656121045, "grad_norm": 470.0, "learning_rate": 9.916086844482573e-05, "loss": 15.065, "step": 2079 }, { "epoch": 0.08669917885873869, "grad_norm": 270.0, "learning_rate": 9.915963653768812e-05, "loss": 13.1255, "step": 2080 }, { "epoch": 0.08674086115626693, "grad_norm": 420.0, "learning_rate": 9.915840373461178e-05, "loss": 16.0004, "step": 2081 }, { "epoch": 0.08678254345379517, "grad_norm": 1696.0, "learning_rate": 9.915717003561917e-05, "loss": 38.2506, "step": 2082 }, { "epoch": 0.08682422575132341, "grad_norm": 400.0, "learning_rate": 9.915593544073276e-05, "loss": 12.942, "step": 2083 }, { "epoch": 0.08686590804885165, "grad_norm": 171.0, "learning_rate": 9.915469994997509e-05, "loss": 11.0636, "step": 2084 }, { "epoch": 0.08690759034637989, "grad_norm": 422.0, "learning_rate": 9.915346356336862e-05, "loss": 15.8767, "step": 2085 }, { "epoch": 0.08694927264390813, "grad_norm": 290.0, "learning_rate": 9.915222628093593e-05, "loss": 13.0629, "step": 2086 }, { "epoch": 0.08699095494143637, "grad_norm": 352.0, "learning_rate": 9.915098810269954e-05, "loss": 13.4382, "step": 2087 }, { "epoch": 0.08703263723896461, "grad_norm": 644.0, "learning_rate": 9.914974902868204e-05, "loss": 19.7543, "step": 2088 }, { "epoch": 0.08707431953649285, "grad_norm": 182.0, "learning_rate": 9.9148509058906e-05, "loss": 10.5639, "step": 2089 }, { "epoch": 0.08711600183402109, "grad_norm": 708.0, "learning_rate": 9.914726819339401e-05, "loss": 18.5045, "step": 2090 }, { "epoch": 0.08715768413154933, "grad_norm": 137.0, "learning_rate": 9.91460264321687e-05, "loss": 9.6257, "step": 2091 }, { "epoch": 0.08719936642907757, "grad_norm": 130.0, "learning_rate": 9.914478377525269e-05, "loss": 9.7529, "step": 2092 }, { "epoch": 0.0872410487266058, "grad_norm": 768.0, "learning_rate": 9.914354022266862e-05, "loss": 23.2519, "step": 2093 }, { "epoch": 0.08728273102413404, "grad_norm": 332.0, "learning_rate": 9.91422957744392e-05, "loss": 13.1265, "step": 2094 }, { "epoch": 0.08732441332166228, "grad_norm": 159.0, "learning_rate": 9.914105043058705e-05, "loss": 9.0007, "step": 2095 }, { "epoch": 0.08736609561919052, "grad_norm": 50.5, "learning_rate": 9.913980419113491e-05, "loss": 7.6597, "step": 2096 }, { "epoch": 0.08740777791671878, "grad_norm": 728.0, "learning_rate": 9.913855705610548e-05, "loss": 20.5006, "step": 2097 }, { "epoch": 0.08744946021424702, "grad_norm": 234.0, "learning_rate": 9.913730902552148e-05, "loss": 11.3135, "step": 2098 }, { "epoch": 0.08749114251177526, "grad_norm": 904.0, "learning_rate": 9.913606009940566e-05, "loss": 22.7588, "step": 2099 }, { "epoch": 0.0875328248093035, "grad_norm": 245.0, "learning_rate": 9.913481027778077e-05, "loss": 10.9382, "step": 2100 }, { "epoch": 0.08757450710683173, "grad_norm": 236.0, "learning_rate": 9.913355956066961e-05, "loss": 11.8133, "step": 2101 }, { "epoch": 0.08761618940435997, "grad_norm": 536.0, "learning_rate": 9.913230794809497e-05, "loss": 18.252, "step": 2102 }, { "epoch": 0.08765787170188821, "grad_norm": 1016.0, "learning_rate": 9.913105544007966e-05, "loss": 23.2512, "step": 2103 }, { "epoch": 0.08769955399941645, "grad_norm": 414.0, "learning_rate": 9.91298020366465e-05, "loss": 13.6897, "step": 2104 }, { "epoch": 0.08774123629694469, "grad_norm": 556.0, "learning_rate": 9.912854773781832e-05, "loss": 16.411, "step": 2105 }, { "epoch": 0.08778291859447293, "grad_norm": 250.0, "learning_rate": 9.912729254361801e-05, "loss": 11.1912, "step": 2106 }, { "epoch": 0.08782460089200117, "grad_norm": 336.0, "learning_rate": 9.912603645406844e-05, "loss": 13.6297, "step": 2107 }, { "epoch": 0.08786628318952941, "grad_norm": 306.0, "learning_rate": 9.912477946919247e-05, "loss": 11.6901, "step": 2108 }, { "epoch": 0.08790796548705765, "grad_norm": 390.0, "learning_rate": 9.912352158901306e-05, "loss": 15.6901, "step": 2109 }, { "epoch": 0.08794964778458589, "grad_norm": 227.0, "learning_rate": 9.91222628135531e-05, "loss": 10.9441, "step": 2110 }, { "epoch": 0.08799133008211413, "grad_norm": 540.0, "learning_rate": 9.912100314283552e-05, "loss": 15.84, "step": 2111 }, { "epoch": 0.08803301237964237, "grad_norm": 364.0, "learning_rate": 9.911974257688332e-05, "loss": 13.2522, "step": 2112 }, { "epoch": 0.0880746946771706, "grad_norm": 422.0, "learning_rate": 9.911848111571944e-05, "loss": 16.128, "step": 2113 }, { "epoch": 0.08811637697469885, "grad_norm": 322.0, "learning_rate": 9.91172187593669e-05, "loss": 14.1253, "step": 2114 }, { "epoch": 0.08815805927222709, "grad_norm": 560.0, "learning_rate": 9.911595550784867e-05, "loss": 18.5096, "step": 2115 }, { "epoch": 0.08819974156975532, "grad_norm": 488.0, "learning_rate": 9.911469136118778e-05, "loss": 15.4378, "step": 2116 }, { "epoch": 0.08824142386728356, "grad_norm": 520.0, "learning_rate": 9.91134263194073e-05, "loss": 14.688, "step": 2117 }, { "epoch": 0.0882831061648118, "grad_norm": 544.0, "learning_rate": 9.911216038253026e-05, "loss": 17.5086, "step": 2118 }, { "epoch": 0.08832478846234004, "grad_norm": 346.0, "learning_rate": 9.911089355057974e-05, "loss": 15.188, "step": 2119 }, { "epoch": 0.08836647075986828, "grad_norm": 488.0, "learning_rate": 9.910962582357882e-05, "loss": 17.2516, "step": 2120 }, { "epoch": 0.08840815305739652, "grad_norm": 322.0, "learning_rate": 9.910835720155062e-05, "loss": 13.5635, "step": 2121 }, { "epoch": 0.08844983535492476, "grad_norm": 157.0, "learning_rate": 9.910708768451824e-05, "loss": 10.1267, "step": 2122 }, { "epoch": 0.088491517652453, "grad_norm": 142.0, "learning_rate": 9.910581727250483e-05, "loss": 9.0009, "step": 2123 }, { "epoch": 0.08853319994998124, "grad_norm": 228.0, "learning_rate": 9.910454596553353e-05, "loss": 11.5638, "step": 2124 }, { "epoch": 0.08857488224750948, "grad_norm": 414.0, "learning_rate": 9.910327376362753e-05, "loss": 15.0011, "step": 2125 }, { "epoch": 0.08861656454503772, "grad_norm": 356.0, "learning_rate": 9.910200066681002e-05, "loss": 12.1889, "step": 2126 }, { "epoch": 0.08865824684256596, "grad_norm": 872.0, "learning_rate": 9.910072667510417e-05, "loss": 24.0008, "step": 2127 }, { "epoch": 0.0886999291400942, "grad_norm": 524.0, "learning_rate": 9.909945178853324e-05, "loss": 15.502, "step": 2128 }, { "epoch": 0.08874161143762244, "grad_norm": 316.0, "learning_rate": 9.909817600712041e-05, "loss": 12.5007, "step": 2129 }, { "epoch": 0.08878329373515068, "grad_norm": 237.0, "learning_rate": 9.909689933088899e-05, "loss": 11.5632, "step": 2130 }, { "epoch": 0.08882497603267892, "grad_norm": 596.0, "learning_rate": 9.909562175986221e-05, "loss": 16.7517, "step": 2131 }, { "epoch": 0.08886665833020715, "grad_norm": 165.0, "learning_rate": 9.909434329406338e-05, "loss": 10.5044, "step": 2132 }, { "epoch": 0.08890834062773541, "grad_norm": 536.0, "learning_rate": 9.909306393351576e-05, "loss": 17.7505, "step": 2133 }, { "epoch": 0.08895002292526365, "grad_norm": 200.0, "learning_rate": 9.90917836782427e-05, "loss": 11.1258, "step": 2134 }, { "epoch": 0.08899170522279189, "grad_norm": 86.0, "learning_rate": 9.909050252826752e-05, "loss": 9.0007, "step": 2135 }, { "epoch": 0.08903338752032013, "grad_norm": 928.0, "learning_rate": 9.90892204836136e-05, "loss": 22.6315, "step": 2136 }, { "epoch": 0.08907506981784837, "grad_norm": 189.0, "learning_rate": 9.908793754430426e-05, "loss": 10.6254, "step": 2137 }, { "epoch": 0.0891167521153766, "grad_norm": 250.0, "learning_rate": 9.908665371036289e-05, "loss": 12.2512, "step": 2138 }, { "epoch": 0.08915843441290484, "grad_norm": 225.0, "learning_rate": 9.908536898181292e-05, "loss": 11.8756, "step": 2139 }, { "epoch": 0.08920011671043308, "grad_norm": 608.0, "learning_rate": 9.908408335867774e-05, "loss": 19.7506, "step": 2140 }, { "epoch": 0.08924179900796132, "grad_norm": 264.0, "learning_rate": 9.908279684098076e-05, "loss": 12.377, "step": 2141 }, { "epoch": 0.08928348130548956, "grad_norm": 110.5, "learning_rate": 9.908150942874548e-05, "loss": 5.8132, "step": 2142 }, { "epoch": 0.0893251636030178, "grad_norm": 748.0, "learning_rate": 9.908022112199531e-05, "loss": 20.5048, "step": 2143 }, { "epoch": 0.08936684590054604, "grad_norm": 672.0, "learning_rate": 9.907893192075377e-05, "loss": 19.5004, "step": 2144 }, { "epoch": 0.08940852819807428, "grad_norm": 384.0, "learning_rate": 9.907764182504434e-05, "loss": 13.938, "step": 2145 }, { "epoch": 0.08945021049560252, "grad_norm": 444.0, "learning_rate": 9.90763508348905e-05, "loss": 15.7504, "step": 2146 }, { "epoch": 0.08949189279313076, "grad_norm": 218.0, "learning_rate": 9.907505895031584e-05, "loss": 11.063, "step": 2147 }, { "epoch": 0.089533575090659, "grad_norm": 304.0, "learning_rate": 9.907376617134388e-05, "loss": 12.8754, "step": 2148 }, { "epoch": 0.08957525738818724, "grad_norm": 600.0, "learning_rate": 9.907247249799815e-05, "loss": 18.8758, "step": 2149 }, { "epoch": 0.08961693968571548, "grad_norm": 292.0, "learning_rate": 9.907117793030227e-05, "loss": 13.3753, "step": 2150 }, { "epoch": 0.08965862198324372, "grad_norm": 266.0, "learning_rate": 9.906988246827982e-05, "loss": 13.6879, "step": 2151 }, { "epoch": 0.08970030428077196, "grad_norm": 210.0, "learning_rate": 9.906858611195439e-05, "loss": 11.8142, "step": 2152 }, { "epoch": 0.0897419865783002, "grad_norm": 241.0, "learning_rate": 9.906728886134963e-05, "loss": 11.5629, "step": 2153 }, { "epoch": 0.08978366887582843, "grad_norm": 133.0, "learning_rate": 9.906599071648917e-05, "loss": 8.8131, "step": 2154 }, { "epoch": 0.08982535117335667, "grad_norm": 392.0, "learning_rate": 9.906469167739667e-05, "loss": 15.1257, "step": 2155 }, { "epoch": 0.08986703347088491, "grad_norm": 494.0, "learning_rate": 9.906339174409582e-05, "loss": 16.7502, "step": 2156 }, { "epoch": 0.08990871576841315, "grad_norm": 255.0, "learning_rate": 9.90620909166103e-05, "loss": 12.9379, "step": 2157 }, { "epoch": 0.08995039806594139, "grad_norm": 624.0, "learning_rate": 9.90607891949638e-05, "loss": 18.6255, "step": 2158 }, { "epoch": 0.08999208036346963, "grad_norm": 408.0, "learning_rate": 9.905948657918008e-05, "loss": 14.5671, "step": 2159 }, { "epoch": 0.09003376266099787, "grad_norm": 318.0, "learning_rate": 9.905818306928286e-05, "loss": 9.6878, "step": 2160 }, { "epoch": 0.09007544495852611, "grad_norm": 338.0, "learning_rate": 9.90568786652959e-05, "loss": 13.7514, "step": 2161 }, { "epoch": 0.09011712725605435, "grad_norm": 123.5, "learning_rate": 9.905557336724296e-05, "loss": 9.5005, "step": 2162 }, { "epoch": 0.09015880955358259, "grad_norm": 456.0, "learning_rate": 9.905426717514785e-05, "loss": 15.3788, "step": 2163 }, { "epoch": 0.09020049185111083, "grad_norm": 494.0, "learning_rate": 9.905296008903437e-05, "loss": 16.7504, "step": 2164 }, { "epoch": 0.09024217414863907, "grad_norm": 528.0, "learning_rate": 9.905165210892633e-05, "loss": 18.1254, "step": 2165 }, { "epoch": 0.09028385644616731, "grad_norm": 123.5, "learning_rate": 9.905034323484756e-05, "loss": 9.9381, "step": 2166 }, { "epoch": 0.09032553874369555, "grad_norm": 346.0, "learning_rate": 9.904903346682196e-05, "loss": 15.0005, "step": 2167 }, { "epoch": 0.09036722104122379, "grad_norm": 564.0, "learning_rate": 9.904772280487336e-05, "loss": 18.6262, "step": 2168 }, { "epoch": 0.09040890333875203, "grad_norm": 616.0, "learning_rate": 9.904641124902565e-05, "loss": 19.7507, "step": 2169 }, { "epoch": 0.09045058563628028, "grad_norm": 193.0, "learning_rate": 9.904509879930275e-05, "loss": 8.8137, "step": 2170 }, { "epoch": 0.09049226793380852, "grad_norm": 304.0, "learning_rate": 9.904378545572857e-05, "loss": 13.0005, "step": 2171 }, { "epoch": 0.09053395023133676, "grad_norm": 540.0, "learning_rate": 9.904247121832703e-05, "loss": 17.2509, "step": 2172 }, { "epoch": 0.090575632528865, "grad_norm": 384.0, "learning_rate": 9.904115608712213e-05, "loss": 15.0629, "step": 2173 }, { "epoch": 0.09061731482639324, "grad_norm": 540.0, "learning_rate": 9.903984006213778e-05, "loss": 17.8754, "step": 2174 }, { "epoch": 0.09065899712392148, "grad_norm": 210.0, "learning_rate": 9.903852314339801e-05, "loss": 11.0014, "step": 2175 }, { "epoch": 0.09070067942144971, "grad_norm": 844.0, "learning_rate": 9.903720533092679e-05, "loss": 19.7558, "step": 2176 }, { "epoch": 0.09074236171897795, "grad_norm": 470.0, "learning_rate": 9.903588662474814e-05, "loss": 15.9379, "step": 2177 }, { "epoch": 0.0907840440165062, "grad_norm": 1020.0, "learning_rate": 9.903456702488611e-05, "loss": 24.7523, "step": 2178 }, { "epoch": 0.09082572631403443, "grad_norm": 272.0, "learning_rate": 9.903324653136477e-05, "loss": 11.688, "step": 2179 }, { "epoch": 0.09086740861156267, "grad_norm": 404.0, "learning_rate": 9.903192514420814e-05, "loss": 14.7516, "step": 2180 }, { "epoch": 0.09090909090909091, "grad_norm": 348.0, "learning_rate": 9.90306028634403e-05, "loss": 14.0629, "step": 2181 }, { "epoch": 0.09095077320661915, "grad_norm": 302.0, "learning_rate": 9.90292796890854e-05, "loss": 11.2513, "step": 2182 }, { "epoch": 0.09099245550414739, "grad_norm": 442.0, "learning_rate": 9.902795562116751e-05, "loss": 16.2506, "step": 2183 }, { "epoch": 0.09103413780167563, "grad_norm": 221.0, "learning_rate": 9.902663065971078e-05, "loss": 11.8129, "step": 2184 }, { "epoch": 0.09107582009920387, "grad_norm": 306.0, "learning_rate": 9.902530480473936e-05, "loss": 13.2504, "step": 2185 }, { "epoch": 0.09111750239673211, "grad_norm": 560.0, "learning_rate": 9.902397805627741e-05, "loss": 18.3752, "step": 2186 }, { "epoch": 0.09115918469426035, "grad_norm": 211.0, "learning_rate": 9.902265041434909e-05, "loss": 11.1255, "step": 2187 }, { "epoch": 0.09120086699178859, "grad_norm": 247.0, "learning_rate": 9.902132187897863e-05, "loss": 11.0629, "step": 2188 }, { "epoch": 0.09124254928931683, "grad_norm": 186.0, "learning_rate": 9.901999245019022e-05, "loss": 11.0641, "step": 2189 }, { "epoch": 0.09128423158684507, "grad_norm": 528.0, "learning_rate": 9.90186621280081e-05, "loss": 18.5007, "step": 2190 }, { "epoch": 0.0913259138843733, "grad_norm": 632.0, "learning_rate": 9.901733091245651e-05, "loss": 18.7502, "step": 2191 }, { "epoch": 0.09136759618190154, "grad_norm": 388.0, "learning_rate": 9.901599880355972e-05, "loss": 16.5006, "step": 2192 }, { "epoch": 0.09140927847942978, "grad_norm": 1248.0, "learning_rate": 9.9014665801342e-05, "loss": 25.7547, "step": 2193 }, { "epoch": 0.09145096077695802, "grad_norm": 394.0, "learning_rate": 9.901333190582762e-05, "loss": 14.8753, "step": 2194 }, { "epoch": 0.09149264307448626, "grad_norm": 728.0, "learning_rate": 9.901199711704093e-05, "loss": 19.8762, "step": 2195 }, { "epoch": 0.0915343253720145, "grad_norm": 476.0, "learning_rate": 9.901066143500626e-05, "loss": 17.0025, "step": 2196 }, { "epoch": 0.09157600766954274, "grad_norm": 193.0, "learning_rate": 9.900932485974791e-05, "loss": 10.8755, "step": 2197 }, { "epoch": 0.09161768996707098, "grad_norm": 354.0, "learning_rate": 9.900798739129027e-05, "loss": 14.3754, "step": 2198 }, { "epoch": 0.09165937226459922, "grad_norm": 350.0, "learning_rate": 9.90066490296577e-05, "loss": 13.8157, "step": 2199 }, { "epoch": 0.09170105456212746, "grad_norm": 312.0, "learning_rate": 9.900530977487463e-05, "loss": 13.8757, "step": 2200 }, { "epoch": 0.0917427368596557, "grad_norm": 704.0, "learning_rate": 9.90039696269654e-05, "loss": 21.5003, "step": 2201 }, { "epoch": 0.09178441915718394, "grad_norm": 448.0, "learning_rate": 9.90026285859545e-05, "loss": 14.0634, "step": 2202 }, { "epoch": 0.09182610145471218, "grad_norm": 456.0, "learning_rate": 9.900128665186634e-05, "loss": 16.8755, "step": 2203 }, { "epoch": 0.09186778375224042, "grad_norm": 576.0, "learning_rate": 9.89999438247254e-05, "loss": 16.379, "step": 2204 }, { "epoch": 0.09190946604976866, "grad_norm": 294.0, "learning_rate": 9.899860010455611e-05, "loss": 13.8754, "step": 2205 }, { "epoch": 0.09195114834729691, "grad_norm": 237.0, "learning_rate": 9.899725549138297e-05, "loss": 12.8141, "step": 2206 }, { "epoch": 0.09199283064482515, "grad_norm": 640.0, "learning_rate": 9.899590998523053e-05, "loss": 18.6254, "step": 2207 }, { "epoch": 0.09203451294235339, "grad_norm": 632.0, "learning_rate": 9.899456358612327e-05, "loss": 20.7513, "step": 2208 }, { "epoch": 0.09207619523988163, "grad_norm": 380.0, "learning_rate": 9.899321629408573e-05, "loss": 14.1252, "step": 2209 }, { "epoch": 0.09211787753740987, "grad_norm": 424.0, "learning_rate": 9.899186810914249e-05, "loss": 14.313, "step": 2210 }, { "epoch": 0.0921595598349381, "grad_norm": 300.0, "learning_rate": 9.899051903131809e-05, "loss": 14.1254, "step": 2211 }, { "epoch": 0.09220124213246635, "grad_norm": 952.0, "learning_rate": 9.898916906063714e-05, "loss": 26.6256, "step": 2212 }, { "epoch": 0.09224292442999459, "grad_norm": 386.0, "learning_rate": 9.898781819712422e-05, "loss": 15.1881, "step": 2213 }, { "epoch": 0.09228460672752282, "grad_norm": 256.0, "learning_rate": 9.898646644080398e-05, "loss": 12.0627, "step": 2214 }, { "epoch": 0.09232628902505106, "grad_norm": 237.0, "learning_rate": 9.898511379170104e-05, "loss": 11.3133, "step": 2215 }, { "epoch": 0.0923679713225793, "grad_norm": 280.0, "learning_rate": 9.898376024984003e-05, "loss": 12.4379, "step": 2216 }, { "epoch": 0.09240965362010754, "grad_norm": 450.0, "learning_rate": 9.898240581524567e-05, "loss": 12.8757, "step": 2217 }, { "epoch": 0.09245133591763578, "grad_norm": 262.0, "learning_rate": 9.898105048794262e-05, "loss": 12.8131, "step": 2218 }, { "epoch": 0.09249301821516402, "grad_norm": 454.0, "learning_rate": 9.897969426795555e-05, "loss": 15.6879, "step": 2219 }, { "epoch": 0.09253470051269226, "grad_norm": 444.0, "learning_rate": 9.897833715530922e-05, "loss": 14.8755, "step": 2220 }, { "epoch": 0.0925763828102205, "grad_norm": 672.0, "learning_rate": 9.897697915002835e-05, "loss": 20.877, "step": 2221 }, { "epoch": 0.09261806510774874, "grad_norm": 126.5, "learning_rate": 9.897562025213766e-05, "loss": 9.6258, "step": 2222 }, { "epoch": 0.09265974740527698, "grad_norm": 544.0, "learning_rate": 9.897426046166198e-05, "loss": 18.0003, "step": 2223 }, { "epoch": 0.09270142970280522, "grad_norm": 272.0, "learning_rate": 9.897289977862604e-05, "loss": 11.6259, "step": 2224 }, { "epoch": 0.09274311200033346, "grad_norm": 332.0, "learning_rate": 9.897153820305464e-05, "loss": 13.7506, "step": 2225 }, { "epoch": 0.0927847942978617, "grad_norm": 344.0, "learning_rate": 9.897017573497263e-05, "loss": 13.7507, "step": 2226 }, { "epoch": 0.09282647659538994, "grad_norm": 608.0, "learning_rate": 9.896881237440481e-05, "loss": 17.1259, "step": 2227 }, { "epoch": 0.09286815889291818, "grad_norm": 916.0, "learning_rate": 9.896744812137604e-05, "loss": 21.754, "step": 2228 }, { "epoch": 0.09290984119044642, "grad_norm": 214.0, "learning_rate": 9.896608297591119e-05, "loss": 10.5629, "step": 2229 }, { "epoch": 0.09295152348797465, "grad_norm": 1240.0, "learning_rate": 9.896471693803511e-05, "loss": 32.0004, "step": 2230 }, { "epoch": 0.0929932057855029, "grad_norm": 181.0, "learning_rate": 9.896335000777273e-05, "loss": 9.938, "step": 2231 }, { "epoch": 0.09303488808303113, "grad_norm": 372.0, "learning_rate": 9.896198218514896e-05, "loss": 10.6894, "step": 2232 }, { "epoch": 0.09307657038055937, "grad_norm": 100.0, "learning_rate": 9.89606134701887e-05, "loss": 10.002, "step": 2233 }, { "epoch": 0.09311825267808761, "grad_norm": 640.0, "learning_rate": 9.895924386291693e-05, "loss": 21.2506, "step": 2234 }, { "epoch": 0.09315993497561585, "grad_norm": 1256.0, "learning_rate": 9.895787336335858e-05, "loss": 27.7564, "step": 2235 }, { "epoch": 0.09320161727314409, "grad_norm": 380.0, "learning_rate": 9.895650197153864e-05, "loss": 14.3754, "step": 2236 }, { "epoch": 0.09324329957067233, "grad_norm": 314.0, "learning_rate": 9.895512968748211e-05, "loss": 14.1253, "step": 2237 }, { "epoch": 0.09328498186820057, "grad_norm": 245.0, "learning_rate": 9.8953756511214e-05, "loss": 12.688, "step": 2238 }, { "epoch": 0.09332666416572881, "grad_norm": 548.0, "learning_rate": 9.895238244275933e-05, "loss": 15.1878, "step": 2239 }, { "epoch": 0.09336834646325705, "grad_norm": 338.0, "learning_rate": 9.895100748214314e-05, "loss": 11.8756, "step": 2240 }, { "epoch": 0.09341002876078529, "grad_norm": 404.0, "learning_rate": 9.89496316293905e-05, "loss": 14.5627, "step": 2241 }, { "epoch": 0.09345171105831353, "grad_norm": 168.0, "learning_rate": 9.894825488452648e-05, "loss": 10.4379, "step": 2242 }, { "epoch": 0.09349339335584178, "grad_norm": 904.0, "learning_rate": 9.894687724757616e-05, "loss": 23.8755, "step": 2243 }, { "epoch": 0.09353507565337002, "grad_norm": 151.0, "learning_rate": 9.894549871856466e-05, "loss": 9.5629, "step": 2244 }, { "epoch": 0.09357675795089826, "grad_norm": 164.0, "learning_rate": 9.89441192975171e-05, "loss": 10.5005, "step": 2245 }, { "epoch": 0.0936184402484265, "grad_norm": 820.0, "learning_rate": 9.894273898445863e-05, "loss": 22.2504, "step": 2246 }, { "epoch": 0.09366012254595474, "grad_norm": 189.0, "learning_rate": 9.894135777941439e-05, "loss": 8.9381, "step": 2247 }, { "epoch": 0.09370180484348298, "grad_norm": 356.0, "learning_rate": 9.893997568240956e-05, "loss": 15.0005, "step": 2248 }, { "epoch": 0.09374348714101122, "grad_norm": 124.5, "learning_rate": 9.893859269346933e-05, "loss": 8.6884, "step": 2249 }, { "epoch": 0.09378516943853946, "grad_norm": 1176.0, "learning_rate": 9.89372088126189e-05, "loss": 27.256, "step": 2250 }, { "epoch": 0.0938268517360677, "grad_norm": 470.0, "learning_rate": 9.893582403988349e-05, "loss": 16.1254, "step": 2251 }, { "epoch": 0.09386853403359594, "grad_norm": 440.0, "learning_rate": 9.893443837528835e-05, "loss": 15.3127, "step": 2252 }, { "epoch": 0.09391021633112417, "grad_norm": 356.0, "learning_rate": 9.893305181885873e-05, "loss": 14.0019, "step": 2253 }, { "epoch": 0.09395189862865241, "grad_norm": 748.0, "learning_rate": 9.89316643706199e-05, "loss": 21.6261, "step": 2254 }, { "epoch": 0.09399358092618065, "grad_norm": 498.0, "learning_rate": 9.893027603059712e-05, "loss": 20.0011, "step": 2255 }, { "epoch": 0.09403526322370889, "grad_norm": 608.0, "learning_rate": 9.892888679881573e-05, "loss": 18.126, "step": 2256 }, { "epoch": 0.09407694552123713, "grad_norm": 402.0, "learning_rate": 9.892749667530105e-05, "loss": 14.7507, "step": 2257 }, { "epoch": 0.09411862781876537, "grad_norm": 250.0, "learning_rate": 9.892610566007838e-05, "loss": 12.9379, "step": 2258 }, { "epoch": 0.09416031011629361, "grad_norm": 252.0, "learning_rate": 9.89247137531731e-05, "loss": 12.2505, "step": 2259 }, { "epoch": 0.09420199241382185, "grad_norm": 306.0, "learning_rate": 9.892332095461056e-05, "loss": 13.3129, "step": 2260 }, { "epoch": 0.09424367471135009, "grad_norm": 1504.0, "learning_rate": 9.892192726441615e-05, "loss": 28.7547, "step": 2261 }, { "epoch": 0.09428535700887833, "grad_norm": 324.0, "learning_rate": 9.89205326826153e-05, "loss": 13.3753, "step": 2262 }, { "epoch": 0.09432703930640657, "grad_norm": 424.0, "learning_rate": 9.891913720923337e-05, "loss": 15.5636, "step": 2263 }, { "epoch": 0.09436872160393481, "grad_norm": 432.0, "learning_rate": 9.891774084429584e-05, "loss": 12.505, "step": 2264 }, { "epoch": 0.09441040390146305, "grad_norm": 141.0, "learning_rate": 9.891634358782814e-05, "loss": 10.3755, "step": 2265 }, { "epoch": 0.09445208619899129, "grad_norm": 516.0, "learning_rate": 9.891494543985573e-05, "loss": 18.0012, "step": 2266 }, { "epoch": 0.09449376849651953, "grad_norm": 548.0, "learning_rate": 9.89135464004041e-05, "loss": 18.0003, "step": 2267 }, { "epoch": 0.09453545079404777, "grad_norm": 176.0, "learning_rate": 9.891214646949874e-05, "loss": 10.0004, "step": 2268 }, { "epoch": 0.094577133091576, "grad_norm": 122.5, "learning_rate": 9.891074564716516e-05, "loss": 7.9704, "step": 2269 }, { "epoch": 0.09461881538910424, "grad_norm": 254.0, "learning_rate": 9.89093439334289e-05, "loss": 11.3764, "step": 2270 }, { "epoch": 0.09466049768663248, "grad_norm": 528.0, "learning_rate": 9.890794132831551e-05, "loss": 16.2513, "step": 2271 }, { "epoch": 0.09470217998416072, "grad_norm": 504.0, "learning_rate": 9.890653783185055e-05, "loss": 17.8755, "step": 2272 }, { "epoch": 0.09474386228168896, "grad_norm": 732.0, "learning_rate": 9.890513344405961e-05, "loss": 20.1284, "step": 2273 }, { "epoch": 0.0947855445792172, "grad_norm": 260.0, "learning_rate": 9.890372816496825e-05, "loss": 12.3756, "step": 2274 }, { "epoch": 0.09482722687674544, "grad_norm": 740.0, "learning_rate": 9.89023219946021e-05, "loss": 19.8799, "step": 2275 }, { "epoch": 0.09486890917427368, "grad_norm": 548.0, "learning_rate": 9.890091493298681e-05, "loss": 17.0005, "step": 2276 }, { "epoch": 0.09491059147180192, "grad_norm": 486.0, "learning_rate": 9.8899506980148e-05, "loss": 14.813, "step": 2277 }, { "epoch": 0.09495227376933016, "grad_norm": 68.5, "learning_rate": 9.889809813611133e-05, "loss": 7.9382, "step": 2278 }, { "epoch": 0.09499395606685841, "grad_norm": 298.0, "learning_rate": 9.889668840090247e-05, "loss": 11.5002, "step": 2279 }, { "epoch": 0.09503563836438665, "grad_norm": 268.0, "learning_rate": 9.889527777454715e-05, "loss": 12.4383, "step": 2280 }, { "epoch": 0.09507732066191489, "grad_norm": 384.0, "learning_rate": 9.889386625707102e-05, "loss": 14.1893, "step": 2281 }, { "epoch": 0.09511900295944313, "grad_norm": 312.0, "learning_rate": 9.889245384849985e-05, "loss": 13.8765, "step": 2282 }, { "epoch": 0.09516068525697137, "grad_norm": 400.0, "learning_rate": 9.889104054885938e-05, "loss": 15.3758, "step": 2283 }, { "epoch": 0.09520236755449961, "grad_norm": 358.0, "learning_rate": 9.888962635817535e-05, "loss": 14.001, "step": 2284 }, { "epoch": 0.09524404985202785, "grad_norm": 320.0, "learning_rate": 9.888821127647354e-05, "loss": 12.0628, "step": 2285 }, { "epoch": 0.09528573214955609, "grad_norm": 592.0, "learning_rate": 9.888679530377973e-05, "loss": 17.6254, "step": 2286 }, { "epoch": 0.09532741444708433, "grad_norm": 336.0, "learning_rate": 9.888537844011974e-05, "loss": 13.938, "step": 2287 }, { "epoch": 0.09536909674461257, "grad_norm": 278.0, "learning_rate": 9.88839606855194e-05, "loss": 14.0012, "step": 2288 }, { "epoch": 0.0954107790421408, "grad_norm": 436.0, "learning_rate": 9.888254204000451e-05, "loss": 15.4378, "step": 2289 }, { "epoch": 0.09545246133966905, "grad_norm": 103.0, "learning_rate": 9.888112250360098e-05, "loss": 8.1892, "step": 2290 }, { "epoch": 0.09549414363719728, "grad_norm": 344.0, "learning_rate": 9.887970207633464e-05, "loss": 14.0636, "step": 2291 }, { "epoch": 0.09553582593472552, "grad_norm": 1288.0, "learning_rate": 9.887828075823139e-05, "loss": 33.5003, "step": 2292 }, { "epoch": 0.09557750823225376, "grad_norm": 892.0, "learning_rate": 9.887685854931714e-05, "loss": 23.8754, "step": 2293 }, { "epoch": 0.095619190529782, "grad_norm": 232.0, "learning_rate": 9.887543544961779e-05, "loss": 11.6883, "step": 2294 }, { "epoch": 0.09566087282731024, "grad_norm": 684.0, "learning_rate": 9.887401145915931e-05, "loss": 22.6255, "step": 2295 }, { "epoch": 0.09570255512483848, "grad_norm": 394.0, "learning_rate": 9.887258657796762e-05, "loss": 14.1878, "step": 2296 }, { "epoch": 0.09574423742236672, "grad_norm": 430.0, "learning_rate": 9.88711608060687e-05, "loss": 14.8131, "step": 2297 }, { "epoch": 0.09578591971989496, "grad_norm": 448.0, "learning_rate": 9.886973414348855e-05, "loss": 17.1254, "step": 2298 }, { "epoch": 0.0958276020174232, "grad_norm": 246.0, "learning_rate": 9.886830659025315e-05, "loss": 12.5632, "step": 2299 }, { "epoch": 0.09586928431495144, "grad_norm": 175.0, "learning_rate": 9.88668781463885e-05, "loss": 10.3755, "step": 2300 }, { "epoch": 0.09591096661247968, "grad_norm": 576.0, "learning_rate": 9.886544881192069e-05, "loss": 18.2502, "step": 2301 }, { "epoch": 0.09595264891000792, "grad_norm": 159.0, "learning_rate": 9.886401858687573e-05, "loss": 6.9068, "step": 2302 }, { "epoch": 0.09599433120753616, "grad_norm": 332.0, "learning_rate": 9.886258747127969e-05, "loss": 14.0009, "step": 2303 }, { "epoch": 0.0960360135050644, "grad_norm": 478.0, "learning_rate": 9.886115546515865e-05, "loss": 16.2502, "step": 2304 }, { "epoch": 0.09607769580259264, "grad_norm": 151.0, "learning_rate": 9.885972256853873e-05, "loss": 8.6256, "step": 2305 }, { "epoch": 0.09611937810012088, "grad_norm": 1440.0, "learning_rate": 9.8858288781446e-05, "loss": 36.7506, "step": 2306 }, { "epoch": 0.09616106039764911, "grad_norm": 872.0, "learning_rate": 9.885685410390665e-05, "loss": 22.5049, "step": 2307 }, { "epoch": 0.09620274269517735, "grad_norm": 182.0, "learning_rate": 9.885541853594677e-05, "loss": 9.8137, "step": 2308 }, { "epoch": 0.0962444249927056, "grad_norm": 620.0, "learning_rate": 9.885398207759257e-05, "loss": 18.126, "step": 2309 }, { "epoch": 0.09628610729023383, "grad_norm": 207.0, "learning_rate": 9.885254472887021e-05, "loss": 11.813, "step": 2310 }, { "epoch": 0.09632778958776207, "grad_norm": 348.0, "learning_rate": 9.885110648980588e-05, "loss": 15.3754, "step": 2311 }, { "epoch": 0.09636947188529031, "grad_norm": 390.0, "learning_rate": 9.884966736042581e-05, "loss": 15.9385, "step": 2312 }, { "epoch": 0.09641115418281855, "grad_norm": 278.0, "learning_rate": 9.884822734075619e-05, "loss": 12.938, "step": 2313 }, { "epoch": 0.09645283648034679, "grad_norm": 528.0, "learning_rate": 9.88467864308233e-05, "loss": 17.6289, "step": 2314 }, { "epoch": 0.09649451877787503, "grad_norm": 604.0, "learning_rate": 9.884534463065341e-05, "loss": 17.7503, "step": 2315 }, { "epoch": 0.09653620107540328, "grad_norm": 588.0, "learning_rate": 9.884390194027276e-05, "loss": 19.5022, "step": 2316 }, { "epoch": 0.09657788337293152, "grad_norm": 175.0, "learning_rate": 9.884245835970767e-05, "loss": 10.6255, "step": 2317 }, { "epoch": 0.09661956567045976, "grad_norm": 436.0, "learning_rate": 9.884101388898443e-05, "loss": 14.9384, "step": 2318 }, { "epoch": 0.096661247967988, "grad_norm": 147.0, "learning_rate": 9.883956852812938e-05, "loss": 9.5628, "step": 2319 }, { "epoch": 0.09670293026551624, "grad_norm": 86.0, "learning_rate": 9.883812227716885e-05, "loss": 5.5944, "step": 2320 }, { "epoch": 0.09674461256304448, "grad_norm": 270.0, "learning_rate": 9.883667513612923e-05, "loss": 12.8755, "step": 2321 }, { "epoch": 0.09678629486057272, "grad_norm": 700.0, "learning_rate": 9.883522710503686e-05, "loss": 20.5009, "step": 2322 }, { "epoch": 0.09682797715810096, "grad_norm": 230.0, "learning_rate": 9.883377818391812e-05, "loss": 12.313, "step": 2323 }, { "epoch": 0.0968696594556292, "grad_norm": 169.0, "learning_rate": 9.883232837279946e-05, "loss": 10.3753, "step": 2324 }, { "epoch": 0.09691134175315744, "grad_norm": 494.0, "learning_rate": 9.883087767170727e-05, "loss": 16.6253, "step": 2325 }, { "epoch": 0.09695302405068568, "grad_norm": 692.0, "learning_rate": 9.8829426080668e-05, "loss": 20.8755, "step": 2326 }, { "epoch": 0.09699470634821392, "grad_norm": 412.0, "learning_rate": 9.882797359970812e-05, "loss": 13.1264, "step": 2327 }, { "epoch": 0.09703638864574216, "grad_norm": 342.0, "learning_rate": 9.882652022885406e-05, "loss": 13.8133, "step": 2328 }, { "epoch": 0.0970780709432704, "grad_norm": 276.0, "learning_rate": 9.882506596813235e-05, "loss": 12.6266, "step": 2329 }, { "epoch": 0.09711975324079863, "grad_norm": 724.0, "learning_rate": 9.882361081756948e-05, "loss": 21.3754, "step": 2330 }, { "epoch": 0.09716143553832687, "grad_norm": 428.0, "learning_rate": 9.882215477719197e-05, "loss": 13.8754, "step": 2331 }, { "epoch": 0.09720311783585511, "grad_norm": 268.0, "learning_rate": 9.882069784702635e-05, "loss": 11.8131, "step": 2332 }, { "epoch": 0.09724480013338335, "grad_norm": 362.0, "learning_rate": 9.881924002709918e-05, "loss": 13.9407, "step": 2333 }, { "epoch": 0.09728648243091159, "grad_norm": 142.0, "learning_rate": 9.881778131743702e-05, "loss": 8.5633, "step": 2334 }, { "epoch": 0.09732816472843983, "grad_norm": 250.0, "learning_rate": 9.881632171806648e-05, "loss": 11.6255, "step": 2335 }, { "epoch": 0.09736984702596807, "grad_norm": 215.0, "learning_rate": 9.881486122901414e-05, "loss": 9.6268, "step": 2336 }, { "epoch": 0.09741152932349631, "grad_norm": 236.0, "learning_rate": 9.88133998503066e-05, "loss": 11.2504, "step": 2337 }, { "epoch": 0.09745321162102455, "grad_norm": 130.0, "learning_rate": 9.881193758197052e-05, "loss": 9.6879, "step": 2338 }, { "epoch": 0.09749489391855279, "grad_norm": 304.0, "learning_rate": 9.881047442403255e-05, "loss": 10.0665, "step": 2339 }, { "epoch": 0.09753657621608103, "grad_norm": 348.0, "learning_rate": 9.880901037651935e-05, "loss": 13.8755, "step": 2340 }, { "epoch": 0.09757825851360927, "grad_norm": 500.0, "learning_rate": 9.88075454394576e-05, "loss": 16.3759, "step": 2341 }, { "epoch": 0.0976199408111375, "grad_norm": 1520.0, "learning_rate": 9.880607961287401e-05, "loss": 29.1306, "step": 2342 }, { "epoch": 0.09766162310866575, "grad_norm": 350.0, "learning_rate": 9.880461289679528e-05, "loss": 13.1883, "step": 2343 }, { "epoch": 0.09770330540619399, "grad_norm": 262.0, "learning_rate": 9.880314529124816e-05, "loss": 13.0004, "step": 2344 }, { "epoch": 0.09774498770372222, "grad_norm": 366.0, "learning_rate": 9.880167679625937e-05, "loss": 13.6878, "step": 2345 }, { "epoch": 0.09778667000125046, "grad_norm": 284.0, "learning_rate": 9.880020741185569e-05, "loss": 11.6255, "step": 2346 }, { "epoch": 0.0978283522987787, "grad_norm": 356.0, "learning_rate": 9.87987371380639e-05, "loss": 13.0003, "step": 2347 }, { "epoch": 0.09787003459630694, "grad_norm": 290.0, "learning_rate": 9.879726597491079e-05, "loss": 13.2505, "step": 2348 }, { "epoch": 0.09791171689383518, "grad_norm": 230.0, "learning_rate": 9.879579392242318e-05, "loss": 11.1255, "step": 2349 }, { "epoch": 0.09795339919136342, "grad_norm": 344.0, "learning_rate": 9.879432098062789e-05, "loss": 14.6879, "step": 2350 }, { "epoch": 0.09799508148889166, "grad_norm": 131.0, "learning_rate": 9.879284714955179e-05, "loss": 8.5011, "step": 2351 }, { "epoch": 0.09803676378641991, "grad_norm": 568.0, "learning_rate": 9.87913724292217e-05, "loss": 18.626, "step": 2352 }, { "epoch": 0.09807844608394815, "grad_norm": 368.0, "learning_rate": 9.87898968196645e-05, "loss": 14.0632, "step": 2353 }, { "epoch": 0.09812012838147639, "grad_norm": 450.0, "learning_rate": 9.878842032090713e-05, "loss": 16.2504, "step": 2354 }, { "epoch": 0.09816181067900463, "grad_norm": 221.0, "learning_rate": 9.878694293297645e-05, "loss": 11.1878, "step": 2355 }, { "epoch": 0.09820349297653287, "grad_norm": 178.0, "learning_rate": 9.878546465589942e-05, "loss": 9.5628, "step": 2356 }, { "epoch": 0.09824517527406111, "grad_norm": 153.0, "learning_rate": 9.878398548970295e-05, "loss": 10.1877, "step": 2357 }, { "epoch": 0.09828685757158935, "grad_norm": 404.0, "learning_rate": 9.878250543441401e-05, "loss": 16.0008, "step": 2358 }, { "epoch": 0.09832853986911759, "grad_norm": 888.0, "learning_rate": 9.878102449005959e-05, "loss": 26.0008, "step": 2359 }, { "epoch": 0.09837022216664583, "grad_norm": 159.0, "learning_rate": 9.877954265666667e-05, "loss": 10.5633, "step": 2360 }, { "epoch": 0.09841190446417407, "grad_norm": 203.0, "learning_rate": 9.877805993426225e-05, "loss": 10.813, "step": 2361 }, { "epoch": 0.09845358676170231, "grad_norm": 524.0, "learning_rate": 9.877657632287335e-05, "loss": 17.0005, "step": 2362 }, { "epoch": 0.09849526905923055, "grad_norm": 232.0, "learning_rate": 9.877509182252703e-05, "loss": 12.063, "step": 2363 }, { "epoch": 0.09853695135675879, "grad_norm": 700.0, "learning_rate": 9.877360643325033e-05, "loss": 21.6256, "step": 2364 }, { "epoch": 0.09857863365428703, "grad_norm": 532.0, "learning_rate": 9.877212015507031e-05, "loss": 17.5014, "step": 2365 }, { "epoch": 0.09862031595181527, "grad_norm": 304.0, "learning_rate": 9.877063298801407e-05, "loss": 13.0003, "step": 2366 }, { "epoch": 0.0986619982493435, "grad_norm": 318.0, "learning_rate": 9.876914493210874e-05, "loss": 14.0007, "step": 2367 }, { "epoch": 0.09870368054687174, "grad_norm": 326.0, "learning_rate": 9.87676559873814e-05, "loss": 13.3133, "step": 2368 }, { "epoch": 0.09874536284439998, "grad_norm": 71.0, "learning_rate": 9.876616615385918e-05, "loss": 7.8128, "step": 2369 }, { "epoch": 0.09878704514192822, "grad_norm": 1012.0, "learning_rate": 9.876467543156928e-05, "loss": 23.8805, "step": 2370 }, { "epoch": 0.09882872743945646, "grad_norm": 416.0, "learning_rate": 9.876318382053884e-05, "loss": 15.3769, "step": 2371 }, { "epoch": 0.0988704097369847, "grad_norm": 454.0, "learning_rate": 9.876169132079503e-05, "loss": 16.6257, "step": 2372 }, { "epoch": 0.09891209203451294, "grad_norm": 141.0, "learning_rate": 9.876019793236509e-05, "loss": 8.5005, "step": 2373 }, { "epoch": 0.09895377433204118, "grad_norm": 189.0, "learning_rate": 9.875870365527618e-05, "loss": 11.563, "step": 2374 }, { "epoch": 0.09899545662956942, "grad_norm": 556.0, "learning_rate": 9.875720848955559e-05, "loss": 15.7545, "step": 2375 }, { "epoch": 0.09903713892709766, "grad_norm": 183.0, "learning_rate": 9.875571243523055e-05, "loss": 11.5631, "step": 2376 }, { "epoch": 0.0990788212246259, "grad_norm": 205.0, "learning_rate": 9.875421549232831e-05, "loss": 9.001, "step": 2377 }, { "epoch": 0.09912050352215414, "grad_norm": 1416.0, "learning_rate": 9.875271766087617e-05, "loss": 33.0048, "step": 2378 }, { "epoch": 0.09916218581968238, "grad_norm": 233.0, "learning_rate": 9.875121894090142e-05, "loss": 11.6255, "step": 2379 }, { "epoch": 0.09920386811721062, "grad_norm": 186.0, "learning_rate": 9.874971933243139e-05, "loss": 11.1253, "step": 2380 }, { "epoch": 0.09924555041473886, "grad_norm": 138.0, "learning_rate": 9.874821883549338e-05, "loss": 9.3762, "step": 2381 }, { "epoch": 0.0992872327122671, "grad_norm": 624.0, "learning_rate": 9.874671745011477e-05, "loss": 21.0012, "step": 2382 }, { "epoch": 0.09932891500979533, "grad_norm": 300.0, "learning_rate": 9.874521517632289e-05, "loss": 12.9384, "step": 2383 }, { "epoch": 0.09937059730732357, "grad_norm": 296.0, "learning_rate": 9.874371201414517e-05, "loss": 12.8754, "step": 2384 }, { "epoch": 0.09941227960485181, "grad_norm": 544.0, "learning_rate": 9.874220796360894e-05, "loss": 15.4383, "step": 2385 }, { "epoch": 0.09945396190238005, "grad_norm": 192.0, "learning_rate": 9.874070302474165e-05, "loss": 10.3131, "step": 2386 }, { "epoch": 0.09949564419990829, "grad_norm": 372.0, "learning_rate": 9.873919719757072e-05, "loss": 11.6286, "step": 2387 }, { "epoch": 0.09953732649743653, "grad_norm": 147.0, "learning_rate": 9.873769048212359e-05, "loss": 9.1883, "step": 2388 }, { "epoch": 0.09957900879496479, "grad_norm": 253.0, "learning_rate": 9.873618287842773e-05, "loss": 10.8138, "step": 2389 }, { "epoch": 0.09962069109249302, "grad_norm": 408.0, "learning_rate": 9.87346743865106e-05, "loss": 16.3755, "step": 2390 }, { "epoch": 0.09966237339002126, "grad_norm": 576.0, "learning_rate": 9.873316500639972e-05, "loss": 19.2504, "step": 2391 }, { "epoch": 0.0997040556875495, "grad_norm": 692.0, "learning_rate": 9.873165473812258e-05, "loss": 20.6255, "step": 2392 }, { "epoch": 0.09974573798507774, "grad_norm": 187.0, "learning_rate": 9.873014358170669e-05, "loss": 8.188, "step": 2393 }, { "epoch": 0.09978742028260598, "grad_norm": 115.5, "learning_rate": 9.872863153717961e-05, "loss": 9.0632, "step": 2394 }, { "epoch": 0.09982910258013422, "grad_norm": 472.0, "learning_rate": 9.872711860456891e-05, "loss": 15.8161, "step": 2395 }, { "epoch": 0.09987078487766246, "grad_norm": 288.0, "learning_rate": 9.872560478390214e-05, "loss": 10.378, "step": 2396 }, { "epoch": 0.0999124671751907, "grad_norm": 223.0, "learning_rate": 9.872409007520691e-05, "loss": 12.1888, "step": 2397 }, { "epoch": 0.09995414947271894, "grad_norm": 194.0, "learning_rate": 9.87225744785108e-05, "loss": 10.2507, "step": 2398 }, { "epoch": 0.09999583177024718, "grad_norm": 440.0, "learning_rate": 9.872105799384144e-05, "loss": 15.0627, "step": 2399 }, { "epoch": 0.10003751406777542, "grad_norm": 278.0, "learning_rate": 9.871954062122648e-05, "loss": 13.1254, "step": 2400 }, { "epoch": 0.10007919636530366, "grad_norm": 81.5, "learning_rate": 9.871802236069356e-05, "loss": 8.188, "step": 2401 }, { "epoch": 0.1001208786628319, "grad_norm": 252.0, "learning_rate": 9.871650321227038e-05, "loss": 12.1254, "step": 2402 }, { "epoch": 0.10016256096036014, "grad_norm": 592.0, "learning_rate": 9.871498317598457e-05, "loss": 18.2504, "step": 2403 }, { "epoch": 0.10020424325788838, "grad_norm": 616.0, "learning_rate": 9.871346225186389e-05, "loss": 20.1257, "step": 2404 }, { "epoch": 0.10024592555541662, "grad_norm": 696.0, "learning_rate": 9.871194043993603e-05, "loss": 21.1252, "step": 2405 }, { "epoch": 0.10028760785294485, "grad_norm": 724.0, "learning_rate": 9.871041774022873e-05, "loss": 21.0003, "step": 2406 }, { "epoch": 0.1003292901504731, "grad_norm": 238.0, "learning_rate": 9.870889415276975e-05, "loss": 11.8753, "step": 2407 }, { "epoch": 0.10037097244800133, "grad_norm": 326.0, "learning_rate": 9.870736967758684e-05, "loss": 13.5634, "step": 2408 }, { "epoch": 0.10041265474552957, "grad_norm": 324.0, "learning_rate": 9.87058443147078e-05, "loss": 13.6879, "step": 2409 }, { "epoch": 0.10045433704305781, "grad_norm": 1216.0, "learning_rate": 9.870431806416043e-05, "loss": 28.1303, "step": 2410 }, { "epoch": 0.10049601934058605, "grad_norm": 288.0, "learning_rate": 9.870279092597252e-05, "loss": 13.3756, "step": 2411 }, { "epoch": 0.10053770163811429, "grad_norm": 222.0, "learning_rate": 9.870126290017194e-05, "loss": 11.8757, "step": 2412 }, { "epoch": 0.10057938393564253, "grad_norm": 852.0, "learning_rate": 9.869973398678651e-05, "loss": 22.2509, "step": 2413 }, { "epoch": 0.10062106623317077, "grad_norm": 288.0, "learning_rate": 9.869820418584412e-05, "loss": 11.4428, "step": 2414 }, { "epoch": 0.10066274853069901, "grad_norm": 260.0, "learning_rate": 9.869667349737261e-05, "loss": 12.9383, "step": 2415 }, { "epoch": 0.10070443082822725, "grad_norm": 532.0, "learning_rate": 9.869514192139993e-05, "loss": 16.2543, "step": 2416 }, { "epoch": 0.10074611312575549, "grad_norm": 446.0, "learning_rate": 9.869360945795395e-05, "loss": 15.2502, "step": 2417 }, { "epoch": 0.10078779542328373, "grad_norm": 63.25, "learning_rate": 9.869207610706261e-05, "loss": 7.3134, "step": 2418 }, { "epoch": 0.10082947772081197, "grad_norm": 580.0, "learning_rate": 9.869054186875387e-05, "loss": 18.3759, "step": 2419 }, { "epoch": 0.1008711600183402, "grad_norm": 241.0, "learning_rate": 9.868900674305567e-05, "loss": 13.5007, "step": 2420 }, { "epoch": 0.10091284231586845, "grad_norm": 244.0, "learning_rate": 9.868747072999602e-05, "loss": 11.9383, "step": 2421 }, { "epoch": 0.10095452461339668, "grad_norm": 270.0, "learning_rate": 9.86859338296029e-05, "loss": 12.5628, "step": 2422 }, { "epoch": 0.10099620691092492, "grad_norm": 258.0, "learning_rate": 9.868439604190429e-05, "loss": 12.3131, "step": 2423 }, { "epoch": 0.10103788920845316, "grad_norm": 768.0, "learning_rate": 9.868285736692824e-05, "loss": 21.0011, "step": 2424 }, { "epoch": 0.10107957150598142, "grad_norm": 322.0, "learning_rate": 9.86813178047028e-05, "loss": 10.1889, "step": 2425 }, { "epoch": 0.10112125380350966, "grad_norm": 442.0, "learning_rate": 9.867977735525602e-05, "loss": 15.5013, "step": 2426 }, { "epoch": 0.1011629361010379, "grad_norm": 338.0, "learning_rate": 9.8678236018616e-05, "loss": 11.439, "step": 2427 }, { "epoch": 0.10120461839856613, "grad_norm": 264.0, "learning_rate": 9.867669379481078e-05, "loss": 12.8757, "step": 2428 }, { "epoch": 0.10124630069609437, "grad_norm": 462.0, "learning_rate": 9.86751506838685e-05, "loss": 14.6888, "step": 2429 }, { "epoch": 0.10128798299362261, "grad_norm": 528.0, "learning_rate": 9.867360668581726e-05, "loss": 17.6259, "step": 2430 }, { "epoch": 0.10132966529115085, "grad_norm": 1192.0, "learning_rate": 9.867206180068525e-05, "loss": 31.0002, "step": 2431 }, { "epoch": 0.10137134758867909, "grad_norm": 470.0, "learning_rate": 9.867051602850057e-05, "loss": 16.2511, "step": 2432 }, { "epoch": 0.10141302988620733, "grad_norm": 414.0, "learning_rate": 9.866896936929142e-05, "loss": 14.938, "step": 2433 }, { "epoch": 0.10145471218373557, "grad_norm": 223.0, "learning_rate": 9.866742182308599e-05, "loss": 12.0629, "step": 2434 }, { "epoch": 0.10149639448126381, "grad_norm": 322.0, "learning_rate": 9.866587338991248e-05, "loss": 12.8135, "step": 2435 }, { "epoch": 0.10153807677879205, "grad_norm": 344.0, "learning_rate": 9.86643240697991e-05, "loss": 14.0003, "step": 2436 }, { "epoch": 0.10157975907632029, "grad_norm": 222.0, "learning_rate": 9.86627738627741e-05, "loss": 13.1256, "step": 2437 }, { "epoch": 0.10162144137384853, "grad_norm": 123.5, "learning_rate": 9.866122276886571e-05, "loss": 9.6257, "step": 2438 }, { "epoch": 0.10166312367137677, "grad_norm": 386.0, "learning_rate": 9.865967078810223e-05, "loss": 14.0639, "step": 2439 }, { "epoch": 0.10170480596890501, "grad_norm": 286.0, "learning_rate": 9.865811792051191e-05, "loss": 11.8759, "step": 2440 }, { "epoch": 0.10174648826643325, "grad_norm": 580.0, "learning_rate": 9.86565641661231e-05, "loss": 18.5021, "step": 2441 }, { "epoch": 0.10178817056396149, "grad_norm": 474.0, "learning_rate": 9.865500952496407e-05, "loss": 17.2511, "step": 2442 }, { "epoch": 0.10182985286148973, "grad_norm": 286.0, "learning_rate": 9.865345399706319e-05, "loss": 13.314, "step": 2443 }, { "epoch": 0.10187153515901796, "grad_norm": 1104.0, "learning_rate": 9.865189758244877e-05, "loss": 27.3753, "step": 2444 }, { "epoch": 0.1019132174565462, "grad_norm": 450.0, "learning_rate": 9.865034028114922e-05, "loss": 15.5012, "step": 2445 }, { "epoch": 0.10195489975407444, "grad_norm": 400.0, "learning_rate": 9.864878209319288e-05, "loss": 15.0633, "step": 2446 }, { "epoch": 0.10199658205160268, "grad_norm": 800.0, "learning_rate": 9.864722301860817e-05, "loss": 23.1258, "step": 2447 }, { "epoch": 0.10203826434913092, "grad_norm": 218.0, "learning_rate": 9.864566305742352e-05, "loss": 12.3133, "step": 2448 }, { "epoch": 0.10207994664665916, "grad_norm": 520.0, "learning_rate": 9.864410220966731e-05, "loss": 18.8754, "step": 2449 }, { "epoch": 0.1021216289441874, "grad_norm": 245.0, "learning_rate": 9.864254047536806e-05, "loss": 12.0002, "step": 2450 }, { "epoch": 0.10216331124171564, "grad_norm": 348.0, "learning_rate": 9.864097785455416e-05, "loss": 14.5635, "step": 2451 }, { "epoch": 0.10220499353924388, "grad_norm": 860.0, "learning_rate": 9.863941434725413e-05, "loss": 21.7561, "step": 2452 }, { "epoch": 0.10224667583677212, "grad_norm": 430.0, "learning_rate": 9.863784995349646e-05, "loss": 16.2516, "step": 2453 }, { "epoch": 0.10228835813430036, "grad_norm": 229.0, "learning_rate": 9.863628467330965e-05, "loss": 11.3132, "step": 2454 }, { "epoch": 0.1023300404318286, "grad_norm": 74.0, "learning_rate": 9.863471850672224e-05, "loss": 6.9069, "step": 2455 }, { "epoch": 0.10237172272935684, "grad_norm": 218.0, "learning_rate": 9.863315145376276e-05, "loss": 11.0628, "step": 2456 }, { "epoch": 0.10241340502688508, "grad_norm": 204.0, "learning_rate": 9.863158351445979e-05, "loss": 10.0011, "step": 2457 }, { "epoch": 0.10245508732441332, "grad_norm": 250.0, "learning_rate": 9.863001468884188e-05, "loss": 11.8131, "step": 2458 }, { "epoch": 0.10249676962194156, "grad_norm": 213.0, "learning_rate": 9.862844497693764e-05, "loss": 12.0009, "step": 2459 }, { "epoch": 0.1025384519194698, "grad_norm": 808.0, "learning_rate": 9.862687437877567e-05, "loss": 21.6254, "step": 2460 }, { "epoch": 0.10258013421699803, "grad_norm": 440.0, "learning_rate": 9.862530289438461e-05, "loss": 15.8142, "step": 2461 }, { "epoch": 0.10262181651452629, "grad_norm": 2736.0, "learning_rate": 9.862373052379308e-05, "loss": 51.5051, "step": 2462 }, { "epoch": 0.10266349881205453, "grad_norm": 396.0, "learning_rate": 9.862215726702974e-05, "loss": 14.6881, "step": 2463 }, { "epoch": 0.10270518110958277, "grad_norm": 125.5, "learning_rate": 9.862058312412326e-05, "loss": 9.3754, "step": 2464 }, { "epoch": 0.102746863407111, "grad_norm": 326.0, "learning_rate": 9.861900809510236e-05, "loss": 13.5632, "step": 2465 }, { "epoch": 0.10278854570463924, "grad_norm": 284.0, "learning_rate": 9.861743217999571e-05, "loss": 11.063, "step": 2466 }, { "epoch": 0.10283022800216748, "grad_norm": 147.0, "learning_rate": 9.861585537883205e-05, "loss": 10.0631, "step": 2467 }, { "epoch": 0.10287191029969572, "grad_norm": 224.0, "learning_rate": 9.861427769164008e-05, "loss": 11.3136, "step": 2468 }, { "epoch": 0.10291359259722396, "grad_norm": 664.0, "learning_rate": 9.861269911844861e-05, "loss": 20.5003, "step": 2469 }, { "epoch": 0.1029552748947522, "grad_norm": 312.0, "learning_rate": 9.86111196592864e-05, "loss": 13.3127, "step": 2470 }, { "epoch": 0.10299695719228044, "grad_norm": 324.0, "learning_rate": 9.860953931418218e-05, "loss": 13.1252, "step": 2471 }, { "epoch": 0.10303863948980868, "grad_norm": 416.0, "learning_rate": 9.86079580831648e-05, "loss": 15.8134, "step": 2472 }, { "epoch": 0.10308032178733692, "grad_norm": 612.0, "learning_rate": 9.86063759662631e-05, "loss": 19.6267, "step": 2473 }, { "epoch": 0.10312200408486516, "grad_norm": 668.0, "learning_rate": 9.860479296350586e-05, "loss": 21.1253, "step": 2474 }, { "epoch": 0.1031636863823934, "grad_norm": 244.0, "learning_rate": 9.860320907492196e-05, "loss": 11.9379, "step": 2475 }, { "epoch": 0.10320536867992164, "grad_norm": 220.0, "learning_rate": 9.860162430054025e-05, "loss": 10.0006, "step": 2476 }, { "epoch": 0.10324705097744988, "grad_norm": 1360.0, "learning_rate": 9.860003864038962e-05, "loss": 34.5018, "step": 2477 }, { "epoch": 0.10328873327497812, "grad_norm": 732.0, "learning_rate": 9.859845209449898e-05, "loss": 19.7504, "step": 2478 }, { "epoch": 0.10333041557250636, "grad_norm": 488.0, "learning_rate": 9.859686466289723e-05, "loss": 18.1254, "step": 2479 }, { "epoch": 0.1033720978700346, "grad_norm": 512.0, "learning_rate": 9.859527634561332e-05, "loss": 16.6257, "step": 2480 }, { "epoch": 0.10341378016756284, "grad_norm": 248.0, "learning_rate": 9.859368714267617e-05, "loss": 11.2505, "step": 2481 }, { "epoch": 0.10345546246509107, "grad_norm": 260.0, "learning_rate": 9.859209705411477e-05, "loss": 11.9381, "step": 2482 }, { "epoch": 0.10349714476261931, "grad_norm": 936.0, "learning_rate": 9.859050607995808e-05, "loss": 23.0061, "step": 2483 }, { "epoch": 0.10353882706014755, "grad_norm": 61.5, "learning_rate": 9.85889142202351e-05, "loss": 7.3142, "step": 2484 }, { "epoch": 0.10358050935767579, "grad_norm": 394.0, "learning_rate": 9.858732147497486e-05, "loss": 14.2505, "step": 2485 }, { "epoch": 0.10362219165520403, "grad_norm": 344.0, "learning_rate": 9.858572784420637e-05, "loss": 13.5635, "step": 2486 }, { "epoch": 0.10366387395273227, "grad_norm": 400.0, "learning_rate": 9.858413332795866e-05, "loss": 15.8128, "step": 2487 }, { "epoch": 0.10370555625026051, "grad_norm": 1184.0, "learning_rate": 9.858253792626083e-05, "loss": 25.5054, "step": 2488 }, { "epoch": 0.10374723854778875, "grad_norm": 520.0, "learning_rate": 9.858094163914191e-05, "loss": 16.0003, "step": 2489 }, { "epoch": 0.10378892084531699, "grad_norm": 402.0, "learning_rate": 9.857934446663103e-05, "loss": 14.4382, "step": 2490 }, { "epoch": 0.10383060314284523, "grad_norm": 282.0, "learning_rate": 9.857774640875727e-05, "loss": 13.0629, "step": 2491 }, { "epoch": 0.10387228544037347, "grad_norm": 386.0, "learning_rate": 9.85761474655498e-05, "loss": 14.0009, "step": 2492 }, { "epoch": 0.10391396773790171, "grad_norm": 348.0, "learning_rate": 9.857454763703771e-05, "loss": 14.6257, "step": 2493 }, { "epoch": 0.10395565003542995, "grad_norm": 169.0, "learning_rate": 9.857294692325017e-05, "loss": 10.2504, "step": 2494 }, { "epoch": 0.10399733233295819, "grad_norm": 410.0, "learning_rate": 9.857134532421637e-05, "loss": 17.5003, "step": 2495 }, { "epoch": 0.10403901463048643, "grad_norm": 884.0, "learning_rate": 9.85697428399655e-05, "loss": 20.1301, "step": 2496 }, { "epoch": 0.10408069692801467, "grad_norm": 360.0, "learning_rate": 9.856813947052673e-05, "loss": 14.0005, "step": 2497 }, { "epoch": 0.10412237922554292, "grad_norm": 278.0, "learning_rate": 9.856653521592932e-05, "loss": 12.8754, "step": 2498 }, { "epoch": 0.10416406152307116, "grad_norm": 332.0, "learning_rate": 9.85649300762025e-05, "loss": 13.5631, "step": 2499 }, { "epoch": 0.1042057438205994, "grad_norm": 544.0, "learning_rate": 9.856332405137552e-05, "loss": 18.5003, "step": 2500 }, { "epoch": 0.10424742611812764, "grad_norm": 145.0, "learning_rate": 9.856171714147764e-05, "loss": 9.4378, "step": 2501 }, { "epoch": 0.10428910841565588, "grad_norm": 125.0, "learning_rate": 9.856010934653815e-05, "loss": 8.129, "step": 2502 }, { "epoch": 0.10433079071318412, "grad_norm": 210.0, "learning_rate": 9.855850066658636e-05, "loss": 12.4378, "step": 2503 }, { "epoch": 0.10437247301071235, "grad_norm": 468.0, "learning_rate": 9.855689110165158e-05, "loss": 15.5627, "step": 2504 }, { "epoch": 0.1044141553082406, "grad_norm": 494.0, "learning_rate": 9.855528065176316e-05, "loss": 17.0004, "step": 2505 }, { "epoch": 0.10445583760576883, "grad_norm": 143.0, "learning_rate": 9.855366931695043e-05, "loss": 10.3763, "step": 2506 }, { "epoch": 0.10449751990329707, "grad_norm": 212.0, "learning_rate": 9.855205709724277e-05, "loss": 10.5051, "step": 2507 }, { "epoch": 0.10453920220082531, "grad_norm": 398.0, "learning_rate": 9.855044399266957e-05, "loss": 16.0005, "step": 2508 }, { "epoch": 0.10458088449835355, "grad_norm": 354.0, "learning_rate": 9.85488300032602e-05, "loss": 14.0639, "step": 2509 }, { "epoch": 0.10462256679588179, "grad_norm": 932.0, "learning_rate": 9.85472151290441e-05, "loss": 27.2504, "step": 2510 }, { "epoch": 0.10466424909341003, "grad_norm": 1472.0, "learning_rate": 9.85455993700507e-05, "loss": 28.507, "step": 2511 }, { "epoch": 0.10470593139093827, "grad_norm": 406.0, "learning_rate": 9.854398272630945e-05, "loss": 13.4392, "step": 2512 }, { "epoch": 0.10474761368846651, "grad_norm": 235.0, "learning_rate": 9.854236519784978e-05, "loss": 12.6255, "step": 2513 }, { "epoch": 0.10478929598599475, "grad_norm": 308.0, "learning_rate": 9.854074678470122e-05, "loss": 13.252, "step": 2514 }, { "epoch": 0.10483097828352299, "grad_norm": 956.0, "learning_rate": 9.853912748689322e-05, "loss": 23.126, "step": 2515 }, { "epoch": 0.10487266058105123, "grad_norm": 716.0, "learning_rate": 9.853750730445533e-05, "loss": 21.2508, "step": 2516 }, { "epoch": 0.10491434287857947, "grad_norm": 244.0, "learning_rate": 9.853588623741705e-05, "loss": 12.2504, "step": 2517 }, { "epoch": 0.1049560251761077, "grad_norm": 454.0, "learning_rate": 9.853426428580795e-05, "loss": 16.5002, "step": 2518 }, { "epoch": 0.10499770747363595, "grad_norm": 808.0, "learning_rate": 9.853264144965755e-05, "loss": 20.0048, "step": 2519 }, { "epoch": 0.10503938977116418, "grad_norm": 218.0, "learning_rate": 9.853101772899547e-05, "loss": 11.5003, "step": 2520 }, { "epoch": 0.10508107206869242, "grad_norm": 398.0, "learning_rate": 9.852939312385128e-05, "loss": 15.8132, "step": 2521 }, { "epoch": 0.10512275436622066, "grad_norm": 540.0, "learning_rate": 9.852776763425458e-05, "loss": 16.507, "step": 2522 }, { "epoch": 0.1051644366637489, "grad_norm": 1004.0, "learning_rate": 9.852614126023503e-05, "loss": 24.381, "step": 2523 }, { "epoch": 0.10520611896127714, "grad_norm": 330.0, "learning_rate": 9.852451400182223e-05, "loss": 11.6254, "step": 2524 }, { "epoch": 0.10524780125880538, "grad_norm": 370.0, "learning_rate": 9.852288585904586e-05, "loss": 14.7507, "step": 2525 }, { "epoch": 0.10528948355633362, "grad_norm": 298.0, "learning_rate": 9.852125683193559e-05, "loss": 13.8133, "step": 2526 }, { "epoch": 0.10533116585386186, "grad_norm": 338.0, "learning_rate": 9.851962692052111e-05, "loss": 13.8133, "step": 2527 }, { "epoch": 0.1053728481513901, "grad_norm": 354.0, "learning_rate": 9.851799612483211e-05, "loss": 14.3132, "step": 2528 }, { "epoch": 0.10541453044891834, "grad_norm": 712.0, "learning_rate": 9.851636444489832e-05, "loss": 20.3758, "step": 2529 }, { "epoch": 0.10545621274644658, "grad_norm": 346.0, "learning_rate": 9.851473188074949e-05, "loss": 15.3128, "step": 2530 }, { "epoch": 0.10549789504397482, "grad_norm": 253.0, "learning_rate": 9.851309843241536e-05, "loss": 12.5629, "step": 2531 }, { "epoch": 0.10553957734150306, "grad_norm": 171.0, "learning_rate": 9.851146409992572e-05, "loss": 9.6254, "step": 2532 }, { "epoch": 0.1055812596390313, "grad_norm": 828.0, "learning_rate": 9.850982888331032e-05, "loss": 23.3752, "step": 2533 }, { "epoch": 0.10562294193655954, "grad_norm": 296.0, "learning_rate": 9.850819278259899e-05, "loss": 8.5013, "step": 2534 }, { "epoch": 0.10566462423408779, "grad_norm": 266.0, "learning_rate": 9.850655579782155e-05, "loss": 12.6879, "step": 2535 }, { "epoch": 0.10570630653161603, "grad_norm": 498.0, "learning_rate": 9.850491792900782e-05, "loss": 16.3755, "step": 2536 }, { "epoch": 0.10574798882914427, "grad_norm": 404.0, "learning_rate": 9.850327917618766e-05, "loss": 15.1888, "step": 2537 }, { "epoch": 0.10578967112667251, "grad_norm": 352.0, "learning_rate": 9.850163953939091e-05, "loss": 15.314, "step": 2538 }, { "epoch": 0.10583135342420075, "grad_norm": 868.0, "learning_rate": 9.849999901864749e-05, "loss": 21.2546, "step": 2539 }, { "epoch": 0.10587303572172899, "grad_norm": 171.0, "learning_rate": 9.84983576139873e-05, "loss": 10.2516, "step": 2540 }, { "epoch": 0.10591471801925723, "grad_norm": 616.0, "learning_rate": 9.84967153254402e-05, "loss": 20.2508, "step": 2541 }, { "epoch": 0.10595640031678547, "grad_norm": 240.0, "learning_rate": 9.849507215303618e-05, "loss": 13.0629, "step": 2542 }, { "epoch": 0.1059980826143137, "grad_norm": 219.0, "learning_rate": 9.849342809680515e-05, "loss": 11.438, "step": 2543 }, { "epoch": 0.10603976491184194, "grad_norm": 256.0, "learning_rate": 9.849178315677709e-05, "loss": 13.1887, "step": 2544 }, { "epoch": 0.10608144720937018, "grad_norm": 572.0, "learning_rate": 9.8490137332982e-05, "loss": 16.2504, "step": 2545 }, { "epoch": 0.10612312950689842, "grad_norm": 282.0, "learning_rate": 9.848849062544984e-05, "loss": 12.8755, "step": 2546 }, { "epoch": 0.10616481180442666, "grad_norm": 328.0, "learning_rate": 9.848684303421063e-05, "loss": 13.6877, "step": 2547 }, { "epoch": 0.1062064941019549, "grad_norm": 206.0, "learning_rate": 9.84851945592944e-05, "loss": 9.5629, "step": 2548 }, { "epoch": 0.10624817639948314, "grad_norm": 356.0, "learning_rate": 9.84835452007312e-05, "loss": 14.0006, "step": 2549 }, { "epoch": 0.10628985869701138, "grad_norm": 312.0, "learning_rate": 9.848189495855108e-05, "loss": 13.1257, "step": 2550 }, { "epoch": 0.10633154099453962, "grad_norm": 448.0, "learning_rate": 9.848024383278413e-05, "loss": 17.2504, "step": 2551 }, { "epoch": 0.10637322329206786, "grad_norm": 294.0, "learning_rate": 9.847859182346042e-05, "loss": 12.4384, "step": 2552 }, { "epoch": 0.1064149055895961, "grad_norm": 209.0, "learning_rate": 9.847693893061007e-05, "loss": 10.6256, "step": 2553 }, { "epoch": 0.10645658788712434, "grad_norm": 202.0, "learning_rate": 9.847528515426321e-05, "loss": 9.1263, "step": 2554 }, { "epoch": 0.10649827018465258, "grad_norm": 552.0, "learning_rate": 9.847363049444997e-05, "loss": 18.5013, "step": 2555 }, { "epoch": 0.10653995248218082, "grad_norm": 79.0, "learning_rate": 9.847197495120053e-05, "loss": 8.1877, "step": 2556 }, { "epoch": 0.10658163477970906, "grad_norm": 314.0, "learning_rate": 9.847031852454502e-05, "loss": 13.0006, "step": 2557 }, { "epoch": 0.1066233170772373, "grad_norm": 300.0, "learning_rate": 9.846866121451366e-05, "loss": 13.0628, "step": 2558 }, { "epoch": 0.10666499937476553, "grad_norm": 358.0, "learning_rate": 9.846700302113665e-05, "loss": 14.4378, "step": 2559 }, { "epoch": 0.10670668167229377, "grad_norm": 67.5, "learning_rate": 9.846534394444421e-05, "loss": 9.1258, "step": 2560 }, { "epoch": 0.10674836396982201, "grad_norm": 532.0, "learning_rate": 9.846368398446657e-05, "loss": 16.8753, "step": 2561 }, { "epoch": 0.10679004626735025, "grad_norm": 780.0, "learning_rate": 9.846202314123399e-05, "loss": 21.754, "step": 2562 }, { "epoch": 0.10683172856487849, "grad_norm": 392.0, "learning_rate": 9.846036141477673e-05, "loss": 14.563, "step": 2563 }, { "epoch": 0.10687341086240673, "grad_norm": 408.0, "learning_rate": 9.845869880512508e-05, "loss": 14.8133, "step": 2564 }, { "epoch": 0.10691509315993497, "grad_norm": 660.0, "learning_rate": 9.845703531230935e-05, "loss": 20.5003, "step": 2565 }, { "epoch": 0.10695677545746321, "grad_norm": 472.0, "learning_rate": 9.845537093635985e-05, "loss": 15.6307, "step": 2566 }, { "epoch": 0.10699845775499145, "grad_norm": 344.0, "learning_rate": 9.845370567730691e-05, "loss": 14.7507, "step": 2567 }, { "epoch": 0.10704014005251969, "grad_norm": 143.0, "learning_rate": 9.845203953518089e-05, "loss": 9.1257, "step": 2568 }, { "epoch": 0.10708182235004793, "grad_norm": 338.0, "learning_rate": 9.845037251001213e-05, "loss": 15.2511, "step": 2569 }, { "epoch": 0.10712350464757617, "grad_norm": 402.0, "learning_rate": 9.844870460183104e-05, "loss": 15.0008, "step": 2570 }, { "epoch": 0.10716518694510442, "grad_norm": 228.0, "learning_rate": 9.8447035810668e-05, "loss": 11.5633, "step": 2571 }, { "epoch": 0.10720686924263266, "grad_norm": 410.0, "learning_rate": 9.844536613655346e-05, "loss": 13.4383, "step": 2572 }, { "epoch": 0.1072485515401609, "grad_norm": 498.0, "learning_rate": 9.84436955795178e-05, "loss": 17.2503, "step": 2573 }, { "epoch": 0.10729023383768914, "grad_norm": 324.0, "learning_rate": 9.84420241395915e-05, "loss": 14.3128, "step": 2574 }, { "epoch": 0.10733191613521738, "grad_norm": 140.0, "learning_rate": 9.844035181680499e-05, "loss": 8.688, "step": 2575 }, { "epoch": 0.10737359843274562, "grad_norm": 1544.0, "learning_rate": 9.843867861118878e-05, "loss": 31.8788, "step": 2576 }, { "epoch": 0.10741528073027386, "grad_norm": 636.0, "learning_rate": 9.843700452277333e-05, "loss": 15.63, "step": 2577 }, { "epoch": 0.1074569630278021, "grad_norm": 1200.0, "learning_rate": 9.843532955158921e-05, "loss": 28.1272, "step": 2578 }, { "epoch": 0.10749864532533034, "grad_norm": 952.0, "learning_rate": 9.843365369766688e-05, "loss": 22.2505, "step": 2579 }, { "epoch": 0.10754032762285858, "grad_norm": 304.0, "learning_rate": 9.843197696103694e-05, "loss": 13.688, "step": 2580 }, { "epoch": 0.10758200992038681, "grad_norm": 460.0, "learning_rate": 9.843029934172989e-05, "loss": 15.7502, "step": 2581 }, { "epoch": 0.10762369221791505, "grad_norm": 134.0, "learning_rate": 9.842862083977636e-05, "loss": 9.188, "step": 2582 }, { "epoch": 0.1076653745154433, "grad_norm": 84.5, "learning_rate": 9.842694145520691e-05, "loss": 9.7506, "step": 2583 }, { "epoch": 0.10770705681297153, "grad_norm": 408.0, "learning_rate": 9.842526118805214e-05, "loss": 15.0628, "step": 2584 }, { "epoch": 0.10774873911049977, "grad_norm": 191.0, "learning_rate": 9.842358003834269e-05, "loss": 11.1877, "step": 2585 }, { "epoch": 0.10779042140802801, "grad_norm": 438.0, "learning_rate": 9.84218980061092e-05, "loss": 14.4418, "step": 2586 }, { "epoch": 0.10783210370555625, "grad_norm": 442.0, "learning_rate": 9.842021509138232e-05, "loss": 16.3759, "step": 2587 }, { "epoch": 0.10787378600308449, "grad_norm": 532.0, "learning_rate": 9.841853129419271e-05, "loss": 16.8753, "step": 2588 }, { "epoch": 0.10791546830061273, "grad_norm": 304.0, "learning_rate": 9.841684661457109e-05, "loss": 12.3131, "step": 2589 }, { "epoch": 0.10795715059814097, "grad_norm": 372.0, "learning_rate": 9.841516105254813e-05, "loss": 14.1253, "step": 2590 }, { "epoch": 0.10799883289566921, "grad_norm": 780.0, "learning_rate": 9.841347460815456e-05, "loss": 18.6334, "step": 2591 }, { "epoch": 0.10804051519319745, "grad_norm": 620.0, "learning_rate": 9.841178728142113e-05, "loss": 18.6255, "step": 2592 }, { "epoch": 0.10808219749072569, "grad_norm": 298.0, "learning_rate": 9.841009907237857e-05, "loss": 11.7508, "step": 2593 }, { "epoch": 0.10812387978825393, "grad_norm": 464.0, "learning_rate": 9.840840998105764e-05, "loss": 16.3752, "step": 2594 }, { "epoch": 0.10816556208578217, "grad_norm": 292.0, "learning_rate": 9.840672000748916e-05, "loss": 12.376, "step": 2595 }, { "epoch": 0.1082072443833104, "grad_norm": 440.0, "learning_rate": 9.840502915170392e-05, "loss": 16.2506, "step": 2596 }, { "epoch": 0.10824892668083864, "grad_norm": 280.0, "learning_rate": 9.840333741373271e-05, "loss": 11.5025, "step": 2597 }, { "epoch": 0.10829060897836688, "grad_norm": 460.0, "learning_rate": 9.840164479360639e-05, "loss": 14.5637, "step": 2598 }, { "epoch": 0.10833229127589512, "grad_norm": 170.0, "learning_rate": 9.839995129135579e-05, "loss": 10.5002, "step": 2599 }, { "epoch": 0.10837397357342336, "grad_norm": 436.0, "learning_rate": 9.839825690701179e-05, "loss": 16.1257, "step": 2600 }, { "epoch": 0.1084156558709516, "grad_norm": 81.5, "learning_rate": 9.839656164060525e-05, "loss": 7.5942, "step": 2601 }, { "epoch": 0.10845733816847984, "grad_norm": 174.0, "learning_rate": 9.839486549216708e-05, "loss": 8.5626, "step": 2602 }, { "epoch": 0.10849902046600808, "grad_norm": 254.0, "learning_rate": 9.839316846172819e-05, "loss": 11.8128, "step": 2603 }, { "epoch": 0.10854070276353632, "grad_norm": 294.0, "learning_rate": 9.839147054931951e-05, "loss": 12.1253, "step": 2604 }, { "epoch": 0.10858238506106456, "grad_norm": 146.0, "learning_rate": 9.838977175497198e-05, "loss": 10.1254, "step": 2605 }, { "epoch": 0.1086240673585928, "grad_norm": 134.0, "learning_rate": 9.838807207871656e-05, "loss": 8.6884, "step": 2606 }, { "epoch": 0.10866574965612105, "grad_norm": 354.0, "learning_rate": 9.838637152058425e-05, "loss": 14.3754, "step": 2607 }, { "epoch": 0.10870743195364929, "grad_norm": 704.0, "learning_rate": 9.838467008060602e-05, "loss": 22.1254, "step": 2608 }, { "epoch": 0.10874911425117753, "grad_norm": 382.0, "learning_rate": 9.838296775881287e-05, "loss": 14.4396, "step": 2609 }, { "epoch": 0.10879079654870577, "grad_norm": 368.0, "learning_rate": 9.838126455523584e-05, "loss": 14.5003, "step": 2610 }, { "epoch": 0.10883247884623401, "grad_norm": 458.0, "learning_rate": 9.837956046990597e-05, "loss": 14.57, "step": 2611 }, { "epoch": 0.10887416114376225, "grad_norm": 95.5, "learning_rate": 9.837785550285432e-05, "loss": 9.7508, "step": 2612 }, { "epoch": 0.10891584344129049, "grad_norm": 478.0, "learning_rate": 9.837614965411195e-05, "loss": 18.0011, "step": 2613 }, { "epoch": 0.10895752573881873, "grad_norm": 502.0, "learning_rate": 9.837444292370996e-05, "loss": 16.6257, "step": 2614 }, { "epoch": 0.10899920803634697, "grad_norm": 1232.0, "learning_rate": 9.837273531167946e-05, "loss": 26.0054, "step": 2615 }, { "epoch": 0.1090408903338752, "grad_norm": 648.0, "learning_rate": 9.837102681805157e-05, "loss": 18.0008, "step": 2616 }, { "epoch": 0.10908257263140345, "grad_norm": 348.0, "learning_rate": 9.836931744285741e-05, "loss": 13.8129, "step": 2617 }, { "epoch": 0.10912425492893169, "grad_norm": 342.0, "learning_rate": 9.836760718612815e-05, "loss": 13.938, "step": 2618 }, { "epoch": 0.10916593722645992, "grad_norm": 576.0, "learning_rate": 9.836589604789495e-05, "loss": 19.0003, "step": 2619 }, { "epoch": 0.10920761952398816, "grad_norm": 486.0, "learning_rate": 9.8364184028189e-05, "loss": 16.7505, "step": 2620 }, { "epoch": 0.1092493018215164, "grad_norm": 101.5, "learning_rate": 9.836247112704152e-05, "loss": 7.2192, "step": 2621 }, { "epoch": 0.10929098411904464, "grad_norm": 1432.0, "learning_rate": 9.83607573444837e-05, "loss": 30.3808, "step": 2622 }, { "epoch": 0.10933266641657288, "grad_norm": 228.0, "learning_rate": 9.835904268054678e-05, "loss": 10.6255, "step": 2623 }, { "epoch": 0.10937434871410112, "grad_norm": 844.0, "learning_rate": 9.835732713526203e-05, "loss": 20.8805, "step": 2624 }, { "epoch": 0.10941603101162936, "grad_norm": 194.0, "learning_rate": 9.835561070866069e-05, "loss": 9.1881, "step": 2625 }, { "epoch": 0.1094577133091576, "grad_norm": 424.0, "learning_rate": 9.835389340077403e-05, "loss": 15.2508, "step": 2626 }, { "epoch": 0.10949939560668584, "grad_norm": 264.0, "learning_rate": 9.83521752116334e-05, "loss": 12.3753, "step": 2627 }, { "epoch": 0.10954107790421408, "grad_norm": 568.0, "learning_rate": 9.835045614127008e-05, "loss": 16.6252, "step": 2628 }, { "epoch": 0.10958276020174232, "grad_norm": 136.0, "learning_rate": 9.834873618971539e-05, "loss": 8.938, "step": 2629 }, { "epoch": 0.10962444249927056, "grad_norm": 240.0, "learning_rate": 9.83470153570007e-05, "loss": 11.0003, "step": 2630 }, { "epoch": 0.1096661247967988, "grad_norm": 215.0, "learning_rate": 9.834529364315736e-05, "loss": 10.7518, "step": 2631 }, { "epoch": 0.10970780709432704, "grad_norm": 464.0, "learning_rate": 9.834357104821676e-05, "loss": 16.7505, "step": 2632 }, { "epoch": 0.10974948939185528, "grad_norm": 512.0, "learning_rate": 9.834184757221028e-05, "loss": 18.752, "step": 2633 }, { "epoch": 0.10979117168938352, "grad_norm": 764.0, "learning_rate": 9.834012321516935e-05, "loss": 22.2504, "step": 2634 }, { "epoch": 0.10983285398691175, "grad_norm": 251.0, "learning_rate": 9.833839797712537e-05, "loss": 12.6892, "step": 2635 }, { "epoch": 0.10987453628444, "grad_norm": 244.0, "learning_rate": 9.83366718581098e-05, "loss": 12.5631, "step": 2636 }, { "epoch": 0.10991621858196823, "grad_norm": 568.0, "learning_rate": 9.833494485815409e-05, "loss": 18.1254, "step": 2637 }, { "epoch": 0.10995790087949647, "grad_norm": 338.0, "learning_rate": 9.833321697728971e-05, "loss": 14.4379, "step": 2638 }, { "epoch": 0.10999958317702471, "grad_norm": 206.0, "learning_rate": 9.833148821554818e-05, "loss": 10.876, "step": 2639 }, { "epoch": 0.11004126547455295, "grad_norm": 67.5, "learning_rate": 9.832975857296096e-05, "loss": 8.5022, "step": 2640 }, { "epoch": 0.11008294777208119, "grad_norm": 1768.0, "learning_rate": 9.832802804955963e-05, "loss": 37.0005, "step": 2641 }, { "epoch": 0.11012463006960943, "grad_norm": 380.0, "learning_rate": 9.832629664537568e-05, "loss": 14.8763, "step": 2642 }, { "epoch": 0.11016631236713767, "grad_norm": 536.0, "learning_rate": 9.832456436044068e-05, "loss": 14.8129, "step": 2643 }, { "epoch": 0.11020799466466592, "grad_norm": 250.0, "learning_rate": 9.832283119478623e-05, "loss": 11.626, "step": 2644 }, { "epoch": 0.11024967696219416, "grad_norm": 836.0, "learning_rate": 9.832109714844387e-05, "loss": 24.2511, "step": 2645 }, { "epoch": 0.1102913592597224, "grad_norm": 592.0, "learning_rate": 9.831936222144523e-05, "loss": 19.751, "step": 2646 }, { "epoch": 0.11033304155725064, "grad_norm": 452.0, "learning_rate": 9.831762641382192e-05, "loss": 16.0007, "step": 2647 }, { "epoch": 0.11037472385477888, "grad_norm": 234.0, "learning_rate": 9.831588972560559e-05, "loss": 11.9392, "step": 2648 }, { "epoch": 0.11041640615230712, "grad_norm": 230.0, "learning_rate": 9.831415215682786e-05, "loss": 10.5004, "step": 2649 }, { "epoch": 0.11045808844983536, "grad_norm": 268.0, "learning_rate": 9.831241370752045e-05, "loss": 13.4392, "step": 2650 }, { "epoch": 0.1104997707473636, "grad_norm": 127.5, "learning_rate": 9.831067437771498e-05, "loss": 7.9698, "step": 2651 }, { "epoch": 0.11054145304489184, "grad_norm": 468.0, "learning_rate": 9.83089341674432e-05, "loss": 17.7502, "step": 2652 }, { "epoch": 0.11058313534242008, "grad_norm": 199.0, "learning_rate": 9.830719307673679e-05, "loss": 12.0629, "step": 2653 }, { "epoch": 0.11062481763994832, "grad_norm": 524.0, "learning_rate": 9.830545110562752e-05, "loss": 19.2503, "step": 2654 }, { "epoch": 0.11066649993747656, "grad_norm": 644.0, "learning_rate": 9.83037082541471e-05, "loss": 18.5008, "step": 2655 }, { "epoch": 0.1107081822350048, "grad_norm": 438.0, "learning_rate": 9.830196452232732e-05, "loss": 15.6891, "step": 2656 }, { "epoch": 0.11074986453253303, "grad_norm": 544.0, "learning_rate": 9.830021991019993e-05, "loss": 15.8752, "step": 2657 }, { "epoch": 0.11079154683006127, "grad_norm": 972.0, "learning_rate": 9.829847441779675e-05, "loss": 22.6305, "step": 2658 }, { "epoch": 0.11083322912758951, "grad_norm": 266.0, "learning_rate": 9.82967280451496e-05, "loss": 12.8763, "step": 2659 }, { "epoch": 0.11087491142511775, "grad_norm": 268.0, "learning_rate": 9.82949807922903e-05, "loss": 13.0002, "step": 2660 }, { "epoch": 0.11091659372264599, "grad_norm": 101.0, "learning_rate": 9.829323265925066e-05, "loss": 8.8759, "step": 2661 }, { "epoch": 0.11095827602017423, "grad_norm": 184.0, "learning_rate": 9.829148364606258e-05, "loss": 9.938, "step": 2662 }, { "epoch": 0.11099995831770247, "grad_norm": 1280.0, "learning_rate": 9.828973375275793e-05, "loss": 27.8796, "step": 2663 }, { "epoch": 0.11104164061523071, "grad_norm": 142.0, "learning_rate": 9.828798297936859e-05, "loss": 8.5636, "step": 2664 }, { "epoch": 0.11108332291275895, "grad_norm": 268.0, "learning_rate": 9.828623132592647e-05, "loss": 12.5005, "step": 2665 }, { "epoch": 0.11112500521028719, "grad_norm": 382.0, "learning_rate": 9.828447879246349e-05, "loss": 15.3753, "step": 2666 }, { "epoch": 0.11116668750781543, "grad_norm": 420.0, "learning_rate": 9.828272537901162e-05, "loss": 16.5003, "step": 2667 }, { "epoch": 0.11120836980534367, "grad_norm": 468.0, "learning_rate": 9.828097108560279e-05, "loss": 16.5014, "step": 2668 }, { "epoch": 0.11125005210287191, "grad_norm": 956.0, "learning_rate": 9.827921591226897e-05, "loss": 24.3754, "step": 2669 }, { "epoch": 0.11129173440040015, "grad_norm": 378.0, "learning_rate": 9.827745985904216e-05, "loss": 14.6892, "step": 2670 }, { "epoch": 0.11133341669792839, "grad_norm": 410.0, "learning_rate": 9.827570292595434e-05, "loss": 14.6256, "step": 2671 }, { "epoch": 0.11137509899545663, "grad_norm": 296.0, "learning_rate": 9.827394511303755e-05, "loss": 12.1253, "step": 2672 }, { "epoch": 0.11141678129298486, "grad_norm": 952.0, "learning_rate": 9.827218642032384e-05, "loss": 23.8808, "step": 2673 }, { "epoch": 0.1114584635905131, "grad_norm": 378.0, "learning_rate": 9.827042684784524e-05, "loss": 14.1255, "step": 2674 }, { "epoch": 0.11150014588804134, "grad_norm": 326.0, "learning_rate": 9.826866639563384e-05, "loss": 12.8128, "step": 2675 }, { "epoch": 0.11154182818556958, "grad_norm": 506.0, "learning_rate": 9.826690506372169e-05, "loss": 16.3759, "step": 2676 }, { "epoch": 0.11158351048309782, "grad_norm": 438.0, "learning_rate": 9.826514285214092e-05, "loss": 14.3759, "step": 2677 }, { "epoch": 0.11162519278062606, "grad_norm": 183.0, "learning_rate": 9.826337976092364e-05, "loss": 10.877, "step": 2678 }, { "epoch": 0.1116668750781543, "grad_norm": 95.0, "learning_rate": 9.826161579010197e-05, "loss": 7.6886, "step": 2679 }, { "epoch": 0.11170855737568255, "grad_norm": 324.0, "learning_rate": 9.825985093970807e-05, "loss": 13.6262, "step": 2680 }, { "epoch": 0.1117502396732108, "grad_norm": 376.0, "learning_rate": 9.825808520977411e-05, "loss": 14.3753, "step": 2681 }, { "epoch": 0.11179192197073903, "grad_norm": 374.0, "learning_rate": 9.825631860033225e-05, "loss": 13.2507, "step": 2682 }, { "epoch": 0.11183360426826727, "grad_norm": 193.0, "learning_rate": 9.825455111141471e-05, "loss": 10.0629, "step": 2683 }, { "epoch": 0.11187528656579551, "grad_norm": 324.0, "learning_rate": 9.825278274305369e-05, "loss": 13.7503, "step": 2684 }, { "epoch": 0.11191696886332375, "grad_norm": 192.0, "learning_rate": 9.825101349528144e-05, "loss": 10.4385, "step": 2685 }, { "epoch": 0.11195865116085199, "grad_norm": 294.0, "learning_rate": 9.824924336813015e-05, "loss": 12.688, "step": 2686 }, { "epoch": 0.11200033345838023, "grad_norm": 133.0, "learning_rate": 9.824747236163213e-05, "loss": 8.2505, "step": 2687 }, { "epoch": 0.11204201575590847, "grad_norm": 454.0, "learning_rate": 9.824570047581965e-05, "loss": 15.5002, "step": 2688 }, { "epoch": 0.11208369805343671, "grad_norm": 218.0, "learning_rate": 9.824392771072499e-05, "loss": 11.1883, "step": 2689 }, { "epoch": 0.11212538035096495, "grad_norm": 1208.0, "learning_rate": 9.824215406638046e-05, "loss": 26.5058, "step": 2690 }, { "epoch": 0.11216706264849319, "grad_norm": 246.0, "learning_rate": 9.82403795428184e-05, "loss": 11.9377, "step": 2691 }, { "epoch": 0.11220874494602143, "grad_norm": 404.0, "learning_rate": 9.823860414007113e-05, "loss": 15.0006, "step": 2692 }, { "epoch": 0.11225042724354967, "grad_norm": 428.0, "learning_rate": 9.823682785817103e-05, "loss": 16.2503, "step": 2693 }, { "epoch": 0.1122921095410779, "grad_norm": 396.0, "learning_rate": 9.823505069715047e-05, "loss": 14.7504, "step": 2694 }, { "epoch": 0.11233379183860615, "grad_norm": 402.0, "learning_rate": 9.823327265704181e-05, "loss": 14.5011, "step": 2695 }, { "epoch": 0.11237547413613438, "grad_norm": 354.0, "learning_rate": 9.823149373787746e-05, "loss": 12.6252, "step": 2696 }, { "epoch": 0.11241715643366262, "grad_norm": 536.0, "learning_rate": 9.822971393968988e-05, "loss": 13.1278, "step": 2697 }, { "epoch": 0.11245883873119086, "grad_norm": 306.0, "learning_rate": 9.822793326251147e-05, "loss": 11.4379, "step": 2698 }, { "epoch": 0.1125005210287191, "grad_norm": 398.0, "learning_rate": 9.822615170637471e-05, "loss": 16.1252, "step": 2699 }, { "epoch": 0.11254220332624734, "grad_norm": 340.0, "learning_rate": 9.822436927131204e-05, "loss": 13.4377, "step": 2700 }, { "epoch": 0.11258388562377558, "grad_norm": 488.0, "learning_rate": 9.822258595735596e-05, "loss": 17.1256, "step": 2701 }, { "epoch": 0.11262556792130382, "grad_norm": 416.0, "learning_rate": 9.822080176453897e-05, "loss": 14.8131, "step": 2702 }, { "epoch": 0.11266725021883206, "grad_norm": 406.0, "learning_rate": 9.82190166928936e-05, "loss": 13.9378, "step": 2703 }, { "epoch": 0.1127089325163603, "grad_norm": 258.0, "learning_rate": 9.821723074245235e-05, "loss": 11.6877, "step": 2704 }, { "epoch": 0.11275061481388854, "grad_norm": 356.0, "learning_rate": 9.821544391324779e-05, "loss": 14.5628, "step": 2705 }, { "epoch": 0.11279229711141678, "grad_norm": 209.0, "learning_rate": 9.82136562053125e-05, "loss": 9.8754, "step": 2706 }, { "epoch": 0.11283397940894502, "grad_norm": 382.0, "learning_rate": 9.821186761867905e-05, "loss": 14.688, "step": 2707 }, { "epoch": 0.11287566170647326, "grad_norm": 186.0, "learning_rate": 9.821007815338002e-05, "loss": 12.2505, "step": 2708 }, { "epoch": 0.1129173440040015, "grad_norm": 1448.0, "learning_rate": 9.820828780944805e-05, "loss": 29.5045, "step": 2709 }, { "epoch": 0.11295902630152974, "grad_norm": 235.0, "learning_rate": 9.820649658691573e-05, "loss": 11.7502, "step": 2710 }, { "epoch": 0.11300070859905798, "grad_norm": 420.0, "learning_rate": 9.820470448581574e-05, "loss": 16.1261, "step": 2711 }, { "epoch": 0.11304239089658621, "grad_norm": 588.0, "learning_rate": 9.820291150618073e-05, "loss": 19.1254, "step": 2712 }, { "epoch": 0.11308407319411445, "grad_norm": 253.0, "learning_rate": 9.820111764804338e-05, "loss": 10.8754, "step": 2713 }, { "epoch": 0.1131257554916427, "grad_norm": 186.0, "learning_rate": 9.81993229114364e-05, "loss": 9.5004, "step": 2714 }, { "epoch": 0.11316743778917093, "grad_norm": 163.0, "learning_rate": 9.819752729639247e-05, "loss": 12.2507, "step": 2715 }, { "epoch": 0.11320912008669917, "grad_norm": 310.0, "learning_rate": 9.819573080294431e-05, "loss": 13.1879, "step": 2716 }, { "epoch": 0.11325080238422743, "grad_norm": 1704.0, "learning_rate": 9.81939334311247e-05, "loss": 43.2532, "step": 2717 }, { "epoch": 0.11329248468175566, "grad_norm": 442.0, "learning_rate": 9.819213518096637e-05, "loss": 16.0031, "step": 2718 }, { "epoch": 0.1133341669792839, "grad_norm": 217.0, "learning_rate": 9.819033605250209e-05, "loss": 12.3129, "step": 2719 }, { "epoch": 0.11337584927681214, "grad_norm": 256.0, "learning_rate": 9.818853604576465e-05, "loss": 12.5003, "step": 2720 }, { "epoch": 0.11341753157434038, "grad_norm": 528.0, "learning_rate": 9.818673516078689e-05, "loss": 16.7507, "step": 2721 }, { "epoch": 0.11345921387186862, "grad_norm": 466.0, "learning_rate": 9.818493339760158e-05, "loss": 15.8763, "step": 2722 }, { "epoch": 0.11350089616939686, "grad_norm": 684.0, "learning_rate": 9.818313075624159e-05, "loss": 19.2509, "step": 2723 }, { "epoch": 0.1135425784669251, "grad_norm": 239.0, "learning_rate": 9.818132723673977e-05, "loss": 10.8754, "step": 2724 }, { "epoch": 0.11358426076445334, "grad_norm": 408.0, "learning_rate": 9.817952283912896e-05, "loss": 16.2509, "step": 2725 }, { "epoch": 0.11362594306198158, "grad_norm": 556.0, "learning_rate": 9.81777175634421e-05, "loss": 17.7515, "step": 2726 }, { "epoch": 0.11366762535950982, "grad_norm": 161.0, "learning_rate": 9.817591140971204e-05, "loss": 9.9377, "step": 2727 }, { "epoch": 0.11370930765703806, "grad_norm": 246.0, "learning_rate": 9.817410437797172e-05, "loss": 11.9378, "step": 2728 }, { "epoch": 0.1137509899545663, "grad_norm": 400.0, "learning_rate": 9.817229646825407e-05, "loss": 14.9381, "step": 2729 }, { "epoch": 0.11379267225209454, "grad_norm": 302.0, "learning_rate": 9.817048768059207e-05, "loss": 8.4393, "step": 2730 }, { "epoch": 0.11383435454962278, "grad_norm": 336.0, "learning_rate": 9.816867801501863e-05, "loss": 12.691, "step": 2731 }, { "epoch": 0.11387603684715102, "grad_norm": 724.0, "learning_rate": 9.816686747156676e-05, "loss": 17.63, "step": 2732 }, { "epoch": 0.11391771914467926, "grad_norm": 282.0, "learning_rate": 9.816505605026944e-05, "loss": 13.1881, "step": 2733 }, { "epoch": 0.1139594014422075, "grad_norm": 402.0, "learning_rate": 9.816324375115973e-05, "loss": 16.3755, "step": 2734 }, { "epoch": 0.11400108373973573, "grad_norm": 247.0, "learning_rate": 9.816143057427061e-05, "loss": 11.3756, "step": 2735 }, { "epoch": 0.11404276603726397, "grad_norm": 288.0, "learning_rate": 9.815961651963513e-05, "loss": 12.8129, "step": 2736 }, { "epoch": 0.11408444833479221, "grad_norm": 272.0, "learning_rate": 9.815780158728638e-05, "loss": 12.2503, "step": 2737 }, { "epoch": 0.11412613063232045, "grad_norm": 274.0, "learning_rate": 9.815598577725741e-05, "loss": 13.9383, "step": 2738 }, { "epoch": 0.11416781292984869, "grad_norm": 215.0, "learning_rate": 9.815416908958132e-05, "loss": 11.1254, "step": 2739 }, { "epoch": 0.11420949522737693, "grad_norm": 298.0, "learning_rate": 9.815235152429125e-05, "loss": 14.3132, "step": 2740 }, { "epoch": 0.11425117752490517, "grad_norm": 134.0, "learning_rate": 9.815053308142029e-05, "loss": 9.6259, "step": 2741 }, { "epoch": 0.11429285982243341, "grad_norm": 398.0, "learning_rate": 9.814871376100158e-05, "loss": 14.5627, "step": 2742 }, { "epoch": 0.11433454211996165, "grad_norm": 304.0, "learning_rate": 9.814689356306828e-05, "loss": 12.1253, "step": 2743 }, { "epoch": 0.11437622441748989, "grad_norm": 1088.0, "learning_rate": 9.814507248765359e-05, "loss": 31.7522, "step": 2744 }, { "epoch": 0.11441790671501813, "grad_norm": 438.0, "learning_rate": 9.814325053479067e-05, "loss": 16.2503, "step": 2745 }, { "epoch": 0.11445958901254637, "grad_norm": 80.0, "learning_rate": 9.814142770451274e-05, "loss": 8.3753, "step": 2746 }, { "epoch": 0.1145012713100746, "grad_norm": 572.0, "learning_rate": 9.8139603996853e-05, "loss": 18.2509, "step": 2747 }, { "epoch": 0.11454295360760285, "grad_norm": 540.0, "learning_rate": 9.813777941184472e-05, "loss": 17.6256, "step": 2748 }, { "epoch": 0.11458463590513109, "grad_norm": 248.0, "learning_rate": 9.813595394952114e-05, "loss": 12.0005, "step": 2749 }, { "epoch": 0.11462631820265932, "grad_norm": 312.0, "learning_rate": 9.813412760991552e-05, "loss": 12.689, "step": 2750 }, { "epoch": 0.11466800050018756, "grad_norm": 502.0, "learning_rate": 9.813230039306114e-05, "loss": 17.0008, "step": 2751 }, { "epoch": 0.1147096827977158, "grad_norm": 274.0, "learning_rate": 9.813047229899132e-05, "loss": 12.2503, "step": 2752 }, { "epoch": 0.11475136509524406, "grad_norm": 191.0, "learning_rate": 9.81286433277394e-05, "loss": 10.0627, "step": 2753 }, { "epoch": 0.1147930473927723, "grad_norm": 292.0, "learning_rate": 9.812681347933863e-05, "loss": 13.0637, "step": 2754 }, { "epoch": 0.11483472969030054, "grad_norm": 448.0, "learning_rate": 9.812498275382245e-05, "loss": 15.3128, "step": 2755 }, { "epoch": 0.11487641198782877, "grad_norm": 180.0, "learning_rate": 9.812315115122417e-05, "loss": 9.8756, "step": 2756 }, { "epoch": 0.11491809428535701, "grad_norm": 416.0, "learning_rate": 9.81213186715772e-05, "loss": 15.8758, "step": 2757 }, { "epoch": 0.11495977658288525, "grad_norm": 360.0, "learning_rate": 9.811948531491491e-05, "loss": 14.5002, "step": 2758 }, { "epoch": 0.11500145888041349, "grad_norm": 220.0, "learning_rate": 9.811765108127073e-05, "loss": 10.8754, "step": 2759 }, { "epoch": 0.11504314117794173, "grad_norm": 290.0, "learning_rate": 9.81158159706781e-05, "loss": 12.8127, "step": 2760 }, { "epoch": 0.11508482347546997, "grad_norm": 330.0, "learning_rate": 9.811397998317045e-05, "loss": 13.8755, "step": 2761 }, { "epoch": 0.11512650577299821, "grad_norm": 728.0, "learning_rate": 9.811214311878124e-05, "loss": 21.2503, "step": 2762 }, { "epoch": 0.11516818807052645, "grad_norm": 404.0, "learning_rate": 9.811030537754395e-05, "loss": 16.3757, "step": 2763 }, { "epoch": 0.11520987036805469, "grad_norm": 167.0, "learning_rate": 9.810846675949208e-05, "loss": 9.5628, "step": 2764 }, { "epoch": 0.11525155266558293, "grad_norm": 120.5, "learning_rate": 9.810662726465913e-05, "loss": 9.5011, "step": 2765 }, { "epoch": 0.11529323496311117, "grad_norm": 272.0, "learning_rate": 9.810478689307863e-05, "loss": 12.2503, "step": 2766 }, { "epoch": 0.11533491726063941, "grad_norm": 568.0, "learning_rate": 9.810294564478411e-05, "loss": 19.1257, "step": 2767 }, { "epoch": 0.11537659955816765, "grad_norm": 158.0, "learning_rate": 9.810110351980914e-05, "loss": 8.7506, "step": 2768 }, { "epoch": 0.11541828185569589, "grad_norm": 233.0, "learning_rate": 9.809926051818727e-05, "loss": 12.3129, "step": 2769 }, { "epoch": 0.11545996415322413, "grad_norm": 316.0, "learning_rate": 9.809741663995213e-05, "loss": 14.1256, "step": 2770 }, { "epoch": 0.11550164645075237, "grad_norm": 302.0, "learning_rate": 9.809557188513731e-05, "loss": 13.1886, "step": 2771 }, { "epoch": 0.1155433287482806, "grad_norm": 320.0, "learning_rate": 9.80937262537764e-05, "loss": 13.6256, "step": 2772 }, { "epoch": 0.11558501104580884, "grad_norm": 348.0, "learning_rate": 9.809187974590307e-05, "loss": 11.5632, "step": 2773 }, { "epoch": 0.11562669334333708, "grad_norm": 498.0, "learning_rate": 9.809003236155097e-05, "loss": 16.6253, "step": 2774 }, { "epoch": 0.11566837564086532, "grad_norm": 406.0, "learning_rate": 9.808818410075374e-05, "loss": 15.8752, "step": 2775 }, { "epoch": 0.11571005793839356, "grad_norm": 178.0, "learning_rate": 9.80863349635451e-05, "loss": 9.501, "step": 2776 }, { "epoch": 0.1157517402359218, "grad_norm": 284.0, "learning_rate": 9.808448494995875e-05, "loss": 13.5633, "step": 2777 }, { "epoch": 0.11579342253345004, "grad_norm": 476.0, "learning_rate": 9.808263406002837e-05, "loss": 15.5635, "step": 2778 }, { "epoch": 0.11583510483097828, "grad_norm": 520.0, "learning_rate": 9.808078229378771e-05, "loss": 16.5003, "step": 2779 }, { "epoch": 0.11587678712850652, "grad_norm": 145.0, "learning_rate": 9.807892965127055e-05, "loss": 9.8131, "step": 2780 }, { "epoch": 0.11591846942603476, "grad_norm": 988.0, "learning_rate": 9.807707613251062e-05, "loss": 21.38, "step": 2781 }, { "epoch": 0.115960151723563, "grad_norm": 450.0, "learning_rate": 9.80752217375417e-05, "loss": 15.1264, "step": 2782 }, { "epoch": 0.11600183402109124, "grad_norm": 178.0, "learning_rate": 9.80733664663976e-05, "loss": 10.6878, "step": 2783 }, { "epoch": 0.11604351631861948, "grad_norm": 322.0, "learning_rate": 9.807151031911214e-05, "loss": 13.8137, "step": 2784 }, { "epoch": 0.11608519861614772, "grad_norm": 360.0, "learning_rate": 9.806965329571912e-05, "loss": 14.6255, "step": 2785 }, { "epoch": 0.11612688091367596, "grad_norm": 258.0, "learning_rate": 9.806779539625241e-05, "loss": 11.631, "step": 2786 }, { "epoch": 0.1161685632112042, "grad_norm": 536.0, "learning_rate": 9.806593662074586e-05, "loss": 19.1257, "step": 2787 }, { "epoch": 0.11621024550873243, "grad_norm": 211.0, "learning_rate": 9.806407696923336e-05, "loss": 8.5003, "step": 2788 }, { "epoch": 0.11625192780626067, "grad_norm": 892.0, "learning_rate": 9.806221644174877e-05, "loss": 23.2503, "step": 2789 }, { "epoch": 0.11629361010378893, "grad_norm": 256.0, "learning_rate": 9.806035503832603e-05, "loss": 12.0627, "step": 2790 }, { "epoch": 0.11633529240131717, "grad_norm": 408.0, "learning_rate": 9.805849275899905e-05, "loss": 14.8754, "step": 2791 }, { "epoch": 0.1163769746988454, "grad_norm": 177.0, "learning_rate": 9.805662960380178e-05, "loss": 10.8129, "step": 2792 }, { "epoch": 0.11641865699637365, "grad_norm": 772.0, "learning_rate": 9.805476557276816e-05, "loss": 22.6252, "step": 2793 }, { "epoch": 0.11646033929390188, "grad_norm": 260.0, "learning_rate": 9.805290066593218e-05, "loss": 11.813, "step": 2794 }, { "epoch": 0.11650202159143012, "grad_norm": 268.0, "learning_rate": 9.805103488332782e-05, "loss": 12.5005, "step": 2795 }, { "epoch": 0.11654370388895836, "grad_norm": 788.0, "learning_rate": 9.804916822498908e-05, "loss": 24.1253, "step": 2796 }, { "epoch": 0.1165853861864866, "grad_norm": 328.0, "learning_rate": 9.804730069094998e-05, "loss": 13.8752, "step": 2797 }, { "epoch": 0.11662706848401484, "grad_norm": 516.0, "learning_rate": 9.804543228124456e-05, "loss": 18.0009, "step": 2798 }, { "epoch": 0.11666875078154308, "grad_norm": 222.0, "learning_rate": 9.804356299590688e-05, "loss": 11.6268, "step": 2799 }, { "epoch": 0.11671043307907132, "grad_norm": 432.0, "learning_rate": 9.804169283497099e-05, "loss": 15.6881, "step": 2800 }, { "epoch": 0.11675211537659956, "grad_norm": 1224.0, "learning_rate": 9.803982179847099e-05, "loss": 24.8795, "step": 2801 }, { "epoch": 0.1167937976741278, "grad_norm": 500.0, "learning_rate": 9.803794988644097e-05, "loss": 16.5002, "step": 2802 }, { "epoch": 0.11683547997165604, "grad_norm": 177.0, "learning_rate": 9.803607709891504e-05, "loss": 12.0633, "step": 2803 }, { "epoch": 0.11687716226918428, "grad_norm": 179.0, "learning_rate": 9.803420343592736e-05, "loss": 7.6278, "step": 2804 }, { "epoch": 0.11691884456671252, "grad_norm": 408.0, "learning_rate": 9.803232889751203e-05, "loss": 14.2504, "step": 2805 }, { "epoch": 0.11696052686424076, "grad_norm": 314.0, "learning_rate": 9.803045348370327e-05, "loss": 13.2505, "step": 2806 }, { "epoch": 0.117002209161769, "grad_norm": 304.0, "learning_rate": 9.802857719453523e-05, "loss": 12.3754, "step": 2807 }, { "epoch": 0.11704389145929724, "grad_norm": 972.0, "learning_rate": 9.802670003004208e-05, "loss": 24.8803, "step": 2808 }, { "epoch": 0.11708557375682548, "grad_norm": 612.0, "learning_rate": 9.802482199025808e-05, "loss": 18.8756, "step": 2809 }, { "epoch": 0.11712725605435372, "grad_norm": 159.0, "learning_rate": 9.802294307521744e-05, "loss": 9.2504, "step": 2810 }, { "epoch": 0.11716893835188195, "grad_norm": 356.0, "learning_rate": 9.80210632849544e-05, "loss": 12.6896, "step": 2811 }, { "epoch": 0.1172106206494102, "grad_norm": 492.0, "learning_rate": 9.80191826195032e-05, "loss": 16.8753, "step": 2812 }, { "epoch": 0.11725230294693843, "grad_norm": 632.0, "learning_rate": 9.801730107889815e-05, "loss": 19.3753, "step": 2813 }, { "epoch": 0.11729398524446667, "grad_norm": 266.0, "learning_rate": 9.801541866317352e-05, "loss": 13.4378, "step": 2814 }, { "epoch": 0.11733566754199491, "grad_norm": 330.0, "learning_rate": 9.801353537236361e-05, "loss": 13.8755, "step": 2815 }, { "epoch": 0.11737734983952315, "grad_norm": 250.0, "learning_rate": 9.801165120650278e-05, "loss": 11.3133, "step": 2816 }, { "epoch": 0.11741903213705139, "grad_norm": 264.0, "learning_rate": 9.800976616562533e-05, "loss": 11.2503, "step": 2817 }, { "epoch": 0.11746071443457963, "grad_norm": 270.0, "learning_rate": 9.800788024976564e-05, "loss": 12.3753, "step": 2818 }, { "epoch": 0.11750239673210787, "grad_norm": 350.0, "learning_rate": 9.800599345895805e-05, "loss": 12.5007, "step": 2819 }, { "epoch": 0.11754407902963611, "grad_norm": 612.0, "learning_rate": 9.800410579323698e-05, "loss": 19.3791, "step": 2820 }, { "epoch": 0.11758576132716435, "grad_norm": 239.0, "learning_rate": 9.800221725263683e-05, "loss": 11.3758, "step": 2821 }, { "epoch": 0.11762744362469259, "grad_norm": 155.0, "learning_rate": 9.8000327837192e-05, "loss": 4.1879, "step": 2822 }, { "epoch": 0.11766912592222083, "grad_norm": 229.0, "learning_rate": 9.799843754693693e-05, "loss": 12.252, "step": 2823 }, { "epoch": 0.11771080821974907, "grad_norm": 940.0, "learning_rate": 9.799654638190607e-05, "loss": 21.8805, "step": 2824 }, { "epoch": 0.1177524905172773, "grad_norm": 384.0, "learning_rate": 9.799465434213391e-05, "loss": 14.6892, "step": 2825 }, { "epoch": 0.11779417281480556, "grad_norm": 528.0, "learning_rate": 9.79927614276549e-05, "loss": 16.8769, "step": 2826 }, { "epoch": 0.1178358551123338, "grad_norm": 394.0, "learning_rate": 9.799086763850355e-05, "loss": 11.6263, "step": 2827 }, { "epoch": 0.11787753740986204, "grad_norm": 398.0, "learning_rate": 9.798897297471439e-05, "loss": 13.876, "step": 2828 }, { "epoch": 0.11791921970739028, "grad_norm": 346.0, "learning_rate": 9.798707743632194e-05, "loss": 14.4385, "step": 2829 }, { "epoch": 0.11796090200491852, "grad_norm": 644.0, "learning_rate": 9.798518102336073e-05, "loss": 19.3764, "step": 2830 }, { "epoch": 0.11800258430244676, "grad_norm": 292.0, "learning_rate": 9.798328373586534e-05, "loss": 13.1252, "step": 2831 }, { "epoch": 0.118044266599975, "grad_norm": 224.0, "learning_rate": 9.798138557387034e-05, "loss": 11.4379, "step": 2832 }, { "epoch": 0.11808594889750323, "grad_norm": 560.0, "learning_rate": 9.797948653741034e-05, "loss": 18.6252, "step": 2833 }, { "epoch": 0.11812763119503147, "grad_norm": 660.0, "learning_rate": 9.797758662651992e-05, "loss": 17.3758, "step": 2834 }, { "epoch": 0.11816931349255971, "grad_norm": 386.0, "learning_rate": 9.797568584123375e-05, "loss": 14.6261, "step": 2835 }, { "epoch": 0.11821099579008795, "grad_norm": 368.0, "learning_rate": 9.797378418158643e-05, "loss": 14.0012, "step": 2836 }, { "epoch": 0.11825267808761619, "grad_norm": 620.0, "learning_rate": 9.797188164761264e-05, "loss": 19.2508, "step": 2837 }, { "epoch": 0.11829436038514443, "grad_norm": 209.0, "learning_rate": 9.796997823934704e-05, "loss": 11.751, "step": 2838 }, { "epoch": 0.11833604268267267, "grad_norm": 121.5, "learning_rate": 9.796807395682434e-05, "loss": 8.1882, "step": 2839 }, { "epoch": 0.11837772498020091, "grad_norm": 266.0, "learning_rate": 9.796616880007922e-05, "loss": 11.6254, "step": 2840 }, { "epoch": 0.11841940727772915, "grad_norm": 756.0, "learning_rate": 9.796426276914643e-05, "loss": 21.7506, "step": 2841 }, { "epoch": 0.11846108957525739, "grad_norm": 324.0, "learning_rate": 9.796235586406068e-05, "loss": 13.6254, "step": 2842 }, { "epoch": 0.11850277187278563, "grad_norm": 1480.0, "learning_rate": 9.796044808485677e-05, "loss": 27.3802, "step": 2843 }, { "epoch": 0.11854445417031387, "grad_norm": 138.0, "learning_rate": 9.79585394315694e-05, "loss": 7.094, "step": 2844 }, { "epoch": 0.11858613646784211, "grad_norm": 876.0, "learning_rate": 9.79566299042334e-05, "loss": 26.0013, "step": 2845 }, { "epoch": 0.11862781876537035, "grad_norm": 141.0, "learning_rate": 9.795471950288355e-05, "loss": 8.8755, "step": 2846 }, { "epoch": 0.11866950106289859, "grad_norm": 247.0, "learning_rate": 9.795280822755471e-05, "loss": 12.5628, "step": 2847 }, { "epoch": 0.11871118336042683, "grad_norm": 278.0, "learning_rate": 9.795089607828167e-05, "loss": 12.7503, "step": 2848 }, { "epoch": 0.11875286565795506, "grad_norm": 444.0, "learning_rate": 9.794898305509927e-05, "loss": 14.688, "step": 2849 }, { "epoch": 0.1187945479554833, "grad_norm": 320.0, "learning_rate": 9.794706915804243e-05, "loss": 13.5012, "step": 2850 }, { "epoch": 0.11883623025301154, "grad_norm": 100.0, "learning_rate": 9.794515438714598e-05, "loss": 8.4384, "step": 2851 }, { "epoch": 0.11887791255053978, "grad_norm": 366.0, "learning_rate": 9.794323874244485e-05, "loss": 14.5634, "step": 2852 }, { "epoch": 0.11891959484806802, "grad_norm": 438.0, "learning_rate": 9.794132222397392e-05, "loss": 16.376, "step": 2853 }, { "epoch": 0.11896127714559626, "grad_norm": 224.0, "learning_rate": 9.793940483176815e-05, "loss": 12.0628, "step": 2854 }, { "epoch": 0.1190029594431245, "grad_norm": 238.0, "learning_rate": 9.793748656586245e-05, "loss": 7.6889, "step": 2855 }, { "epoch": 0.11904464174065274, "grad_norm": 464.0, "learning_rate": 9.793556742629183e-05, "loss": 15.8777, "step": 2856 }, { "epoch": 0.11908632403818098, "grad_norm": 454.0, "learning_rate": 9.793364741309122e-05, "loss": 17.1257, "step": 2857 }, { "epoch": 0.11912800633570922, "grad_norm": 171.0, "learning_rate": 9.793172652629564e-05, "loss": 9.6894, "step": 2858 }, { "epoch": 0.11916968863323746, "grad_norm": 322.0, "learning_rate": 9.792980476594009e-05, "loss": 13.1255, "step": 2859 }, { "epoch": 0.1192113709307657, "grad_norm": 334.0, "learning_rate": 9.792788213205959e-05, "loss": 13.939, "step": 2860 }, { "epoch": 0.11925305322829394, "grad_norm": 217.0, "learning_rate": 9.792595862468919e-05, "loss": 10.6882, "step": 2861 }, { "epoch": 0.11929473552582218, "grad_norm": 424.0, "learning_rate": 9.792403424386392e-05, "loss": 15.8134, "step": 2862 }, { "epoch": 0.11933641782335043, "grad_norm": 162.0, "learning_rate": 9.792210898961889e-05, "loss": 10.0641, "step": 2863 }, { "epoch": 0.11937810012087867, "grad_norm": 132.0, "learning_rate": 9.792018286198917e-05, "loss": 7.1573, "step": 2864 }, { "epoch": 0.11941978241840691, "grad_norm": 360.0, "learning_rate": 9.791825586100985e-05, "loss": 14.6253, "step": 2865 }, { "epoch": 0.11946146471593515, "grad_norm": 71.0, "learning_rate": 9.791632798671606e-05, "loss": 7.7504, "step": 2866 }, { "epoch": 0.11950314701346339, "grad_norm": 282.0, "learning_rate": 9.791439923914295e-05, "loss": 13.4378, "step": 2867 }, { "epoch": 0.11954482931099163, "grad_norm": 640.0, "learning_rate": 9.791246961832565e-05, "loss": 17.1303, "step": 2868 }, { "epoch": 0.11958651160851987, "grad_norm": 225.0, "learning_rate": 9.791053912429935e-05, "loss": 11.1883, "step": 2869 }, { "epoch": 0.1196281939060481, "grad_norm": 466.0, "learning_rate": 9.790860775709923e-05, "loss": 16.0005, "step": 2870 }, { "epoch": 0.11966987620357634, "grad_norm": 1216.0, "learning_rate": 9.790667551676046e-05, "loss": 26.6323, "step": 2871 }, { "epoch": 0.11971155850110458, "grad_norm": 1128.0, "learning_rate": 9.790474240331828e-05, "loss": 24.0044, "step": 2872 }, { "epoch": 0.11975324079863282, "grad_norm": 231.0, "learning_rate": 9.790280841680793e-05, "loss": 11.689, "step": 2873 }, { "epoch": 0.11979492309616106, "grad_norm": 141.0, "learning_rate": 9.790087355726463e-05, "loss": 8.4377, "step": 2874 }, { "epoch": 0.1198366053936893, "grad_norm": 101.5, "learning_rate": 9.789893782472367e-05, "loss": 8.6877, "step": 2875 }, { "epoch": 0.11987828769121754, "grad_norm": 760.0, "learning_rate": 9.789700121922031e-05, "loss": 26.2503, "step": 2876 }, { "epoch": 0.11991996998874578, "grad_norm": 388.0, "learning_rate": 9.789506374078985e-05, "loss": 13.6257, "step": 2877 }, { "epoch": 0.11996165228627402, "grad_norm": 139.0, "learning_rate": 9.78931253894676e-05, "loss": 9.2502, "step": 2878 }, { "epoch": 0.12000333458380226, "grad_norm": 1416.0, "learning_rate": 9.789118616528889e-05, "loss": 29.0035, "step": 2879 }, { "epoch": 0.1200450168813305, "grad_norm": 644.0, "learning_rate": 9.788924606828905e-05, "loss": 18.3757, "step": 2880 }, { "epoch": 0.12008669917885874, "grad_norm": 486.0, "learning_rate": 9.788730509850346e-05, "loss": 16.2525, "step": 2881 }, { "epoch": 0.12012838147638698, "grad_norm": 588.0, "learning_rate": 9.788536325596749e-05, "loss": 19.1254, "step": 2882 }, { "epoch": 0.12017006377391522, "grad_norm": 229.0, "learning_rate": 9.78834205407165e-05, "loss": 12.0627, "step": 2883 }, { "epoch": 0.12021174607144346, "grad_norm": 101.0, "learning_rate": 9.788147695278596e-05, "loss": 9.0627, "step": 2884 }, { "epoch": 0.1202534283689717, "grad_norm": 300.0, "learning_rate": 9.787953249221123e-05, "loss": 13.6257, "step": 2885 }, { "epoch": 0.12029511066649994, "grad_norm": 696.0, "learning_rate": 9.787758715902775e-05, "loss": 20.5006, "step": 2886 }, { "epoch": 0.12033679296402817, "grad_norm": 330.0, "learning_rate": 9.787564095327102e-05, "loss": 14.3754, "step": 2887 }, { "epoch": 0.12037847526155641, "grad_norm": 776.0, "learning_rate": 9.787369387497647e-05, "loss": 21.1254, "step": 2888 }, { "epoch": 0.12042015755908465, "grad_norm": 146.0, "learning_rate": 9.78717459241796e-05, "loss": 9.8137, "step": 2889 }, { "epoch": 0.12046183985661289, "grad_norm": 300.0, "learning_rate": 9.786979710091593e-05, "loss": 13.1877, "step": 2890 }, { "epoch": 0.12050352215414113, "grad_norm": 400.0, "learning_rate": 9.786784740522095e-05, "loss": 14.2506, "step": 2891 }, { "epoch": 0.12054520445166937, "grad_norm": 302.0, "learning_rate": 9.78658968371302e-05, "loss": 11.5021, "step": 2892 }, { "epoch": 0.12058688674919761, "grad_norm": 286.0, "learning_rate": 9.786394539667922e-05, "loss": 14.1257, "step": 2893 }, { "epoch": 0.12062856904672585, "grad_norm": 388.0, "learning_rate": 9.786199308390358e-05, "loss": 15.5635, "step": 2894 }, { "epoch": 0.12067025134425409, "grad_norm": 77.5, "learning_rate": 9.786003989883889e-05, "loss": 7.8127, "step": 2895 }, { "epoch": 0.12071193364178233, "grad_norm": 390.0, "learning_rate": 9.785808584152071e-05, "loss": 14.3128, "step": 2896 }, { "epoch": 0.12075361593931057, "grad_norm": 238.0, "learning_rate": 9.785613091198467e-05, "loss": 11.1878, "step": 2897 }, { "epoch": 0.12079529823683881, "grad_norm": 158.0, "learning_rate": 9.78541751102664e-05, "loss": 9.3128, "step": 2898 }, { "epoch": 0.12083698053436706, "grad_norm": 244.0, "learning_rate": 9.785221843640153e-05, "loss": 11.688, "step": 2899 }, { "epoch": 0.1208786628318953, "grad_norm": 384.0, "learning_rate": 9.785026089042575e-05, "loss": 14.4378, "step": 2900 }, { "epoch": 0.12092034512942354, "grad_norm": 460.0, "learning_rate": 9.784830247237469e-05, "loss": 17.6269, "step": 2901 }, { "epoch": 0.12096202742695178, "grad_norm": 197.0, "learning_rate": 9.784634318228409e-05, "loss": 10.8754, "step": 2902 }, { "epoch": 0.12100370972448002, "grad_norm": 458.0, "learning_rate": 9.784438302018963e-05, "loss": 16.126, "step": 2903 }, { "epoch": 0.12104539202200826, "grad_norm": 336.0, "learning_rate": 9.784242198612705e-05, "loss": 10.7514, "step": 2904 }, { "epoch": 0.1210870743195365, "grad_norm": 294.0, "learning_rate": 9.784046008013208e-05, "loss": 13.3752, "step": 2905 }, { "epoch": 0.12112875661706474, "grad_norm": 344.0, "learning_rate": 9.783849730224048e-05, "loss": 12.7521, "step": 2906 }, { "epoch": 0.12117043891459298, "grad_norm": 540.0, "learning_rate": 9.783653365248802e-05, "loss": 18.0005, "step": 2907 }, { "epoch": 0.12121212121212122, "grad_norm": 318.0, "learning_rate": 9.783456913091048e-05, "loss": 13.6878, "step": 2908 }, { "epoch": 0.12125380350964945, "grad_norm": 64.5, "learning_rate": 9.783260373754368e-05, "loss": 6.8445, "step": 2909 }, { "epoch": 0.1212954858071777, "grad_norm": 256.0, "learning_rate": 9.783063747242343e-05, "loss": 13.0042, "step": 2910 }, { "epoch": 0.12133716810470593, "grad_norm": 510.0, "learning_rate": 9.782867033558556e-05, "loss": 19.6255, "step": 2911 }, { "epoch": 0.12137885040223417, "grad_norm": 402.0, "learning_rate": 9.782670232706592e-05, "loss": 15.0003, "step": 2912 }, { "epoch": 0.12142053269976241, "grad_norm": 468.0, "learning_rate": 9.78247334469004e-05, "loss": 15.6297, "step": 2913 }, { "epoch": 0.12146221499729065, "grad_norm": 384.0, "learning_rate": 9.782276369512487e-05, "loss": 14.5024, "step": 2914 }, { "epoch": 0.12150389729481889, "grad_norm": 322.0, "learning_rate": 9.782079307177521e-05, "loss": 13.3758, "step": 2915 }, { "epoch": 0.12154557959234713, "grad_norm": 410.0, "learning_rate": 9.781882157688735e-05, "loss": 15.6264, "step": 2916 }, { "epoch": 0.12158726188987537, "grad_norm": 256.0, "learning_rate": 9.781684921049722e-05, "loss": 11.688, "step": 2917 }, { "epoch": 0.12162894418740361, "grad_norm": 237.0, "learning_rate": 9.781487597264079e-05, "loss": 12.6261, "step": 2918 }, { "epoch": 0.12167062648493185, "grad_norm": 326.0, "learning_rate": 9.7812901863354e-05, "loss": 13.688, "step": 2919 }, { "epoch": 0.12171230878246009, "grad_norm": 532.0, "learning_rate": 9.781092688267281e-05, "loss": 18.3755, "step": 2920 }, { "epoch": 0.12175399107998833, "grad_norm": 410.0, "learning_rate": 9.780895103063323e-05, "loss": 14.6254, "step": 2921 }, { "epoch": 0.12179567337751657, "grad_norm": 255.0, "learning_rate": 9.780697430727129e-05, "loss": 9.1882, "step": 2922 }, { "epoch": 0.1218373556750448, "grad_norm": 139.0, "learning_rate": 9.7804996712623e-05, "loss": 9.4379, "step": 2923 }, { "epoch": 0.12187903797257305, "grad_norm": 260.0, "learning_rate": 9.78030182467244e-05, "loss": 9.5637, "step": 2924 }, { "epoch": 0.12192072027010128, "grad_norm": 748.0, "learning_rate": 9.780103890961154e-05, "loss": 21.5004, "step": 2925 }, { "epoch": 0.12196240256762952, "grad_norm": 302.0, "learning_rate": 9.779905870132051e-05, "loss": 12.6254, "step": 2926 }, { "epoch": 0.12200408486515776, "grad_norm": 556.0, "learning_rate": 9.779707762188739e-05, "loss": 17.377, "step": 2927 }, { "epoch": 0.122045767162686, "grad_norm": 676.0, "learning_rate": 9.779509567134828e-05, "loss": 20.2515, "step": 2928 }, { "epoch": 0.12208744946021424, "grad_norm": 166.0, "learning_rate": 9.779311284973931e-05, "loss": 9.1257, "step": 2929 }, { "epoch": 0.12212913175774248, "grad_norm": 320.0, "learning_rate": 9.779112915709662e-05, "loss": 13.0011, "step": 2930 }, { "epoch": 0.12217081405527072, "grad_norm": 414.0, "learning_rate": 9.778914459345636e-05, "loss": 15.6269, "step": 2931 }, { "epoch": 0.12221249635279896, "grad_norm": 118.0, "learning_rate": 9.77871591588547e-05, "loss": 9.001, "step": 2932 }, { "epoch": 0.1222541786503272, "grad_norm": 408.0, "learning_rate": 9.778517285332783e-05, "loss": 14.5631, "step": 2933 }, { "epoch": 0.12229586094785544, "grad_norm": 374.0, "learning_rate": 9.778318567691191e-05, "loss": 13.7518, "step": 2934 }, { "epoch": 0.12233754324538368, "grad_norm": 784.0, "learning_rate": 9.778119762964322e-05, "loss": 21.7509, "step": 2935 }, { "epoch": 0.12237922554291193, "grad_norm": 366.0, "learning_rate": 9.777920871155795e-05, "loss": 15.3161, "step": 2936 }, { "epoch": 0.12242090784044017, "grad_norm": 580.0, "learning_rate": 9.777721892269236e-05, "loss": 16.6297, "step": 2937 }, { "epoch": 0.12246259013796841, "grad_norm": 364.0, "learning_rate": 9.777522826308272e-05, "loss": 14.0628, "step": 2938 }, { "epoch": 0.12250427243549665, "grad_norm": 227.0, "learning_rate": 9.777323673276528e-05, "loss": 10.9378, "step": 2939 }, { "epoch": 0.12254595473302489, "grad_norm": 304.0, "learning_rate": 9.777124433177639e-05, "loss": 12.6877, "step": 2940 }, { "epoch": 0.12258763703055313, "grad_norm": 496.0, "learning_rate": 9.776925106015231e-05, "loss": 16.1255, "step": 2941 }, { "epoch": 0.12262931932808137, "grad_norm": 812.0, "learning_rate": 9.776725691792941e-05, "loss": 21.7554, "step": 2942 }, { "epoch": 0.12267100162560961, "grad_norm": 328.0, "learning_rate": 9.776526190514399e-05, "loss": 13.9379, "step": 2943 }, { "epoch": 0.12271268392313785, "grad_norm": 278.0, "learning_rate": 9.776326602183246e-05, "loss": 13.5014, "step": 2944 }, { "epoch": 0.12275436622066609, "grad_norm": 696.0, "learning_rate": 9.776126926803115e-05, "loss": 21.6267, "step": 2945 }, { "epoch": 0.12279604851819433, "grad_norm": 290.0, "learning_rate": 9.775927164377645e-05, "loss": 11.8753, "step": 2946 }, { "epoch": 0.12283773081572257, "grad_norm": 386.0, "learning_rate": 9.775727314910481e-05, "loss": 15.4378, "step": 2947 }, { "epoch": 0.1228794131132508, "grad_norm": 246.0, "learning_rate": 9.775527378405261e-05, "loss": 12.0004, "step": 2948 }, { "epoch": 0.12292109541077904, "grad_norm": 620.0, "learning_rate": 9.775327354865633e-05, "loss": 20.251, "step": 2949 }, { "epoch": 0.12296277770830728, "grad_norm": 302.0, "learning_rate": 9.775127244295237e-05, "loss": 13.8129, "step": 2950 }, { "epoch": 0.12300446000583552, "grad_norm": 452.0, "learning_rate": 9.774927046697725e-05, "loss": 15.6254, "step": 2951 }, { "epoch": 0.12304614230336376, "grad_norm": 528.0, "learning_rate": 9.774726762076742e-05, "loss": 16.8762, "step": 2952 }, { "epoch": 0.123087824600892, "grad_norm": 556.0, "learning_rate": 9.774526390435943e-05, "loss": 18.2513, "step": 2953 }, { "epoch": 0.12312950689842024, "grad_norm": 1016.0, "learning_rate": 9.774325931778974e-05, "loss": 27.2503, "step": 2954 }, { "epoch": 0.12317118919594848, "grad_norm": 131.0, "learning_rate": 9.774125386109492e-05, "loss": 9.6255, "step": 2955 }, { "epoch": 0.12321287149347672, "grad_norm": 720.0, "learning_rate": 9.773924753431152e-05, "loss": 19.877, "step": 2956 }, { "epoch": 0.12325455379100496, "grad_norm": 199.0, "learning_rate": 9.773724033747608e-05, "loss": 8.8756, "step": 2957 }, { "epoch": 0.1232962360885332, "grad_norm": 482.0, "learning_rate": 9.77352322706252e-05, "loss": 16.2505, "step": 2958 }, { "epoch": 0.12333791838606144, "grad_norm": 868.0, "learning_rate": 9.773322333379548e-05, "loss": 22.5047, "step": 2959 }, { "epoch": 0.12337960068358968, "grad_norm": 380.0, "learning_rate": 9.773121352702353e-05, "loss": 15.4393, "step": 2960 }, { "epoch": 0.12342128298111792, "grad_norm": 840.0, "learning_rate": 9.772920285034596e-05, "loss": 25.502, "step": 2961 }, { "epoch": 0.12346296527864616, "grad_norm": 134.0, "learning_rate": 9.772719130379944e-05, "loss": 9.7506, "step": 2962 }, { "epoch": 0.1235046475761744, "grad_norm": 596.0, "learning_rate": 9.772517888742063e-05, "loss": 17.3794, "step": 2963 }, { "epoch": 0.12354632987370263, "grad_norm": 1032.0, "learning_rate": 9.772316560124618e-05, "loss": 25.3765, "step": 2964 }, { "epoch": 0.12358801217123087, "grad_norm": 318.0, "learning_rate": 9.772115144531281e-05, "loss": 13.2527, "step": 2965 }, { "epoch": 0.12362969446875911, "grad_norm": 524.0, "learning_rate": 9.771913641965722e-05, "loss": 16.2509, "step": 2966 }, { "epoch": 0.12367137676628735, "grad_norm": 266.0, "learning_rate": 9.771712052431614e-05, "loss": 13.1268, "step": 2967 }, { "epoch": 0.12371305906381559, "grad_norm": 300.0, "learning_rate": 9.771510375932628e-05, "loss": 13.5639, "step": 2968 }, { "epoch": 0.12375474136134383, "grad_norm": 212.0, "learning_rate": 9.771308612472444e-05, "loss": 10.3755, "step": 2969 }, { "epoch": 0.12379642365887207, "grad_norm": 404.0, "learning_rate": 9.771106762054736e-05, "loss": 15.6258, "step": 2970 }, { "epoch": 0.12383810595640031, "grad_norm": 812.0, "learning_rate": 9.770904824683185e-05, "loss": 21.2505, "step": 2971 }, { "epoch": 0.12387978825392856, "grad_norm": 247.0, "learning_rate": 9.770702800361469e-05, "loss": 10.6881, "step": 2972 }, { "epoch": 0.1239214705514568, "grad_norm": 676.0, "learning_rate": 9.77050068909327e-05, "loss": 22.0019, "step": 2973 }, { "epoch": 0.12396315284898504, "grad_norm": 400.0, "learning_rate": 9.770298490882273e-05, "loss": 16.2503, "step": 2974 }, { "epoch": 0.12400483514651328, "grad_norm": 54.75, "learning_rate": 9.770096205732164e-05, "loss": 7.6884, "step": 2975 }, { "epoch": 0.12404651744404152, "grad_norm": 418.0, "learning_rate": 9.769893833646627e-05, "loss": 16.6252, "step": 2976 }, { "epoch": 0.12408819974156976, "grad_norm": 1384.0, "learning_rate": 9.769691374629352e-05, "loss": 31.0061, "step": 2977 }, { "epoch": 0.124129882039098, "grad_norm": 141.0, "learning_rate": 9.769488828684029e-05, "loss": 9.0008, "step": 2978 }, { "epoch": 0.12417156433662624, "grad_norm": 300.0, "learning_rate": 9.769286195814346e-05, "loss": 12.5637, "step": 2979 }, { "epoch": 0.12421324663415448, "grad_norm": 170.0, "learning_rate": 9.769083476024e-05, "loss": 8.8128, "step": 2980 }, { "epoch": 0.12425492893168272, "grad_norm": 139.0, "learning_rate": 9.768880669316685e-05, "loss": 10.1254, "step": 2981 }, { "epoch": 0.12429661122921096, "grad_norm": 552.0, "learning_rate": 9.768677775696095e-05, "loss": 18.7506, "step": 2982 }, { "epoch": 0.1243382935267392, "grad_norm": 52.0, "learning_rate": 9.768474795165932e-05, "loss": 7.3762, "step": 2983 }, { "epoch": 0.12437997582426744, "grad_norm": 188.0, "learning_rate": 9.76827172772989e-05, "loss": 11.1887, "step": 2984 }, { "epoch": 0.12442165812179568, "grad_norm": 452.0, "learning_rate": 9.768068573391674e-05, "loss": 15.189, "step": 2985 }, { "epoch": 0.12446334041932391, "grad_norm": 360.0, "learning_rate": 9.767865332154984e-05, "loss": 13.8755, "step": 2986 }, { "epoch": 0.12450502271685215, "grad_norm": 225.0, "learning_rate": 9.767662004023525e-05, "loss": 12.251, "step": 2987 }, { "epoch": 0.1245467050143804, "grad_norm": 804.0, "learning_rate": 9.767458589001002e-05, "loss": 23.5019, "step": 2988 }, { "epoch": 0.12458838731190863, "grad_norm": 616.0, "learning_rate": 9.767255087091125e-05, "loss": 17.6255, "step": 2989 }, { "epoch": 0.12463006960943687, "grad_norm": 256.0, "learning_rate": 9.767051498297599e-05, "loss": 13.2521, "step": 2990 }, { "epoch": 0.12467175190696511, "grad_norm": 226.0, "learning_rate": 9.766847822624138e-05, "loss": 12.0002, "step": 2991 }, { "epoch": 0.12471343420449335, "grad_norm": 454.0, "learning_rate": 9.76664406007445e-05, "loss": 15.2546, "step": 2992 }, { "epoch": 0.12475511650202159, "grad_norm": 152.0, "learning_rate": 9.766440210652254e-05, "loss": 8.8755, "step": 2993 }, { "epoch": 0.12479679879954983, "grad_norm": 238.0, "learning_rate": 9.76623627436126e-05, "loss": 12.813, "step": 2994 }, { "epoch": 0.12483848109707807, "grad_norm": 420.0, "learning_rate": 9.766032251205186e-05, "loss": 15.1883, "step": 2995 }, { "epoch": 0.12488016339460631, "grad_norm": 238.0, "learning_rate": 9.765828141187753e-05, "loss": 10.6879, "step": 2996 }, { "epoch": 0.12492184569213455, "grad_norm": 360.0, "learning_rate": 9.765623944312679e-05, "loss": 14.7503, "step": 2997 }, { "epoch": 0.12496352798966279, "grad_norm": 724.0, "learning_rate": 9.765419660583683e-05, "loss": 21.6258, "step": 2998 }, { "epoch": 0.12500521028719103, "grad_norm": 340.0, "learning_rate": 9.765215290004494e-05, "loss": 14.438, "step": 2999 }, { "epoch": 0.12504689258471927, "grad_norm": 296.0, "learning_rate": 9.765010832578831e-05, "loss": 12.6253, "step": 3000 }, { "epoch": 0.1250885748822475, "grad_norm": 194.0, "learning_rate": 9.764806288310424e-05, "loss": 10.0005, "step": 3001 }, { "epoch": 0.12513025717977574, "grad_norm": 78.0, "learning_rate": 9.764601657202998e-05, "loss": 9.7519, "step": 3002 }, { "epoch": 0.12517193947730398, "grad_norm": 290.0, "learning_rate": 9.764396939260285e-05, "loss": 13.1254, "step": 3003 }, { "epoch": 0.12521362177483222, "grad_norm": 392.0, "learning_rate": 9.764192134486014e-05, "loss": 14.1878, "step": 3004 }, { "epoch": 0.12525530407236046, "grad_norm": 201.0, "learning_rate": 9.763987242883919e-05, "loss": 12.1882, "step": 3005 }, { "epoch": 0.1252969863698887, "grad_norm": 276.0, "learning_rate": 9.763782264457734e-05, "loss": 12.6879, "step": 3006 }, { "epoch": 0.12533866866741694, "grad_norm": 796.0, "learning_rate": 9.763577199211193e-05, "loss": 22.8754, "step": 3007 }, { "epoch": 0.12538035096494518, "grad_norm": 182.0, "learning_rate": 9.763372047148036e-05, "loss": 10.6256, "step": 3008 }, { "epoch": 0.12542203326247342, "grad_norm": 222.0, "learning_rate": 9.763166808271999e-05, "loss": 11.9381, "step": 3009 }, { "epoch": 0.12546371556000166, "grad_norm": 508.0, "learning_rate": 9.762961482586826e-05, "loss": 17.0008, "step": 3010 }, { "epoch": 0.1255053978575299, "grad_norm": 304.0, "learning_rate": 9.762756070096257e-05, "loss": 11.7506, "step": 3011 }, { "epoch": 0.12554708015505814, "grad_norm": 119.5, "learning_rate": 9.762550570804035e-05, "loss": 8.2518, "step": 3012 }, { "epoch": 0.12558876245258638, "grad_norm": 344.0, "learning_rate": 9.762344984713904e-05, "loss": 14.1255, "step": 3013 }, { "epoch": 0.12563044475011462, "grad_norm": 442.0, "learning_rate": 9.762139311829617e-05, "loss": 16.626, "step": 3014 }, { "epoch": 0.12567212704764286, "grad_norm": 149.0, "learning_rate": 9.761933552154916e-05, "loss": 11.1274, "step": 3015 }, { "epoch": 0.1257138093451711, "grad_norm": 290.0, "learning_rate": 9.761727705693552e-05, "loss": 12.2502, "step": 3016 }, { "epoch": 0.12575549164269934, "grad_norm": 372.0, "learning_rate": 9.76152177244928e-05, "loss": 15.2505, "step": 3017 }, { "epoch": 0.12579717394022757, "grad_norm": 608.0, "learning_rate": 9.76131575242585e-05, "loss": 19.7511, "step": 3018 }, { "epoch": 0.12583885623775581, "grad_norm": 266.0, "learning_rate": 9.761109645627019e-05, "loss": 11.8753, "step": 3019 }, { "epoch": 0.12588053853528405, "grad_norm": 588.0, "learning_rate": 9.760903452056542e-05, "loss": 18.6257, "step": 3020 }, { "epoch": 0.1259222208328123, "grad_norm": 268.0, "learning_rate": 9.760697171718176e-05, "loss": 11.8754, "step": 3021 }, { "epoch": 0.12596390313034053, "grad_norm": 1368.0, "learning_rate": 9.76049080461568e-05, "loss": 32.0006, "step": 3022 }, { "epoch": 0.12600558542786877, "grad_norm": 504.0, "learning_rate": 9.76028435075282e-05, "loss": 16.8771, "step": 3023 }, { "epoch": 0.126047267725397, "grad_norm": 230.0, "learning_rate": 9.760077810133353e-05, "loss": 11.7504, "step": 3024 }, { "epoch": 0.12608895002292525, "grad_norm": 346.0, "learning_rate": 9.759871182761044e-05, "loss": 14.3129, "step": 3025 }, { "epoch": 0.1261306323204535, "grad_norm": 58.25, "learning_rate": 9.759664468639664e-05, "loss": 6.5346, "step": 3026 }, { "epoch": 0.12617231461798176, "grad_norm": 198.0, "learning_rate": 9.759457667772973e-05, "loss": 11.0628, "step": 3027 }, { "epoch": 0.12621399691551, "grad_norm": 452.0, "learning_rate": 9.759250780164745e-05, "loss": 15.5005, "step": 3028 }, { "epoch": 0.12625567921303824, "grad_norm": 266.0, "learning_rate": 9.759043805818748e-05, "loss": 10.8753, "step": 3029 }, { "epoch": 0.12629736151056647, "grad_norm": 168.0, "learning_rate": 9.758836744738757e-05, "loss": 10.8757, "step": 3030 }, { "epoch": 0.12633904380809471, "grad_norm": 648.0, "learning_rate": 9.758629596928543e-05, "loss": 18.1309, "step": 3031 }, { "epoch": 0.12638072610562295, "grad_norm": 884.0, "learning_rate": 9.758422362391881e-05, "loss": 24.3772, "step": 3032 }, { "epoch": 0.1264224084031512, "grad_norm": 482.0, "learning_rate": 9.75821504113255e-05, "loss": 15.3131, "step": 3033 }, { "epoch": 0.12646409070067943, "grad_norm": 240.0, "learning_rate": 9.758007633154328e-05, "loss": 12.1255, "step": 3034 }, { "epoch": 0.12650577299820767, "grad_norm": 256.0, "learning_rate": 9.757800138460994e-05, "loss": 14.1892, "step": 3035 }, { "epoch": 0.1265474552957359, "grad_norm": 346.0, "learning_rate": 9.75759255705633e-05, "loss": 14.7508, "step": 3036 }, { "epoch": 0.12658913759326415, "grad_norm": 360.0, "learning_rate": 9.757384888944119e-05, "loss": 14.7515, "step": 3037 }, { "epoch": 0.1266308198907924, "grad_norm": 458.0, "learning_rate": 9.757177134128147e-05, "loss": 15.1885, "step": 3038 }, { "epoch": 0.12667250218832063, "grad_norm": 312.0, "learning_rate": 9.756969292612199e-05, "loss": 10.0641, "step": 3039 }, { "epoch": 0.12671418448584887, "grad_norm": 430.0, "learning_rate": 9.756761364400063e-05, "loss": 17.3773, "step": 3040 }, { "epoch": 0.1267558667833771, "grad_norm": 524.0, "learning_rate": 9.75655334949553e-05, "loss": 14.5006, "step": 3041 }, { "epoch": 0.12679754908090535, "grad_norm": 103.0, "learning_rate": 9.756345247902388e-05, "loss": 6.2203, "step": 3042 }, { "epoch": 0.1268392313784336, "grad_norm": 276.0, "learning_rate": 9.756137059624432e-05, "loss": 12.5009, "step": 3043 }, { "epoch": 0.12688091367596183, "grad_norm": 248.0, "learning_rate": 9.755928784665459e-05, "loss": 12.5634, "step": 3044 }, { "epoch": 0.12692259597349007, "grad_norm": 306.0, "learning_rate": 9.755720423029258e-05, "loss": 13.0005, "step": 3045 }, { "epoch": 0.1269642782710183, "grad_norm": 97.0, "learning_rate": 9.755511974719631e-05, "loss": 8.6257, "step": 3046 }, { "epoch": 0.12700596056854654, "grad_norm": 174.0, "learning_rate": 9.755303439740378e-05, "loss": 10.6256, "step": 3047 }, { "epoch": 0.12704764286607478, "grad_norm": 472.0, "learning_rate": 9.755094818095296e-05, "loss": 17.2508, "step": 3048 }, { "epoch": 0.12708932516360302, "grad_norm": 266.0, "learning_rate": 9.754886109788188e-05, "loss": 10.6879, "step": 3049 }, { "epoch": 0.12713100746113126, "grad_norm": 632.0, "learning_rate": 9.75467731482286e-05, "loss": 18.3757, "step": 3050 }, { "epoch": 0.1271726897586595, "grad_norm": 380.0, "learning_rate": 9.754468433203115e-05, "loss": 16.0008, "step": 3051 }, { "epoch": 0.12721437205618774, "grad_norm": 454.0, "learning_rate": 9.754259464932762e-05, "loss": 15.753, "step": 3052 }, { "epoch": 0.12725605435371598, "grad_norm": 1040.0, "learning_rate": 9.754050410015607e-05, "loss": 25.1263, "step": 3053 }, { "epoch": 0.12729773665124422, "grad_norm": 466.0, "learning_rate": 9.753841268455462e-05, "loss": 16.8794, "step": 3054 }, { "epoch": 0.12733941894877246, "grad_norm": 656.0, "learning_rate": 9.753632040256137e-05, "loss": 19.0004, "step": 3055 }, { "epoch": 0.1273811012463007, "grad_norm": 370.0, "learning_rate": 9.753422725421446e-05, "loss": 14.4385, "step": 3056 }, { "epoch": 0.12742278354382894, "grad_norm": 186.0, "learning_rate": 9.753213323955204e-05, "loss": 11.0003, "step": 3057 }, { "epoch": 0.12746446584135718, "grad_norm": 220.0, "learning_rate": 9.753003835861228e-05, "loss": 11.9391, "step": 3058 }, { "epoch": 0.12750614813888542, "grad_norm": 628.0, "learning_rate": 9.752794261143334e-05, "loss": 19.1301, "step": 3059 }, { "epoch": 0.12754783043641366, "grad_norm": 132.0, "learning_rate": 9.752584599805344e-05, "loss": 9.3765, "step": 3060 }, { "epoch": 0.1275895127339419, "grad_norm": 940.0, "learning_rate": 9.752374851851079e-05, "loss": 25.1254, "step": 3061 }, { "epoch": 0.12763119503147013, "grad_norm": 308.0, "learning_rate": 9.752165017284357e-05, "loss": 12.0662, "step": 3062 }, { "epoch": 0.12767287732899837, "grad_norm": 115.0, "learning_rate": 9.751955096109006e-05, "loss": 8.8761, "step": 3063 }, { "epoch": 0.1277145596265266, "grad_norm": 444.0, "learning_rate": 9.751745088328855e-05, "loss": 16.1255, "step": 3064 }, { "epoch": 0.12775624192405485, "grad_norm": 378.0, "learning_rate": 9.751534993947725e-05, "loss": 14.1879, "step": 3065 }, { "epoch": 0.1277979242215831, "grad_norm": 640.0, "learning_rate": 9.751324812969448e-05, "loss": 20.5004, "step": 3066 }, { "epoch": 0.12783960651911133, "grad_norm": 236.0, "learning_rate": 9.751114545397856e-05, "loss": 10.3129, "step": 3067 }, { "epoch": 0.12788128881663957, "grad_norm": 314.0, "learning_rate": 9.750904191236779e-05, "loss": 10.8131, "step": 3068 }, { "epoch": 0.1279229711141678, "grad_norm": 164.0, "learning_rate": 9.750693750490052e-05, "loss": 10.6255, "step": 3069 }, { "epoch": 0.12796465341169605, "grad_norm": 382.0, "learning_rate": 9.750483223161509e-05, "loss": 15.5629, "step": 3070 }, { "epoch": 0.1280063357092243, "grad_norm": 476.0, "learning_rate": 9.750272609254987e-05, "loss": 15.3173, "step": 3071 }, { "epoch": 0.12804801800675253, "grad_norm": 552.0, "learning_rate": 9.750061908774325e-05, "loss": 17.5017, "step": 3072 }, { "epoch": 0.12808970030428077, "grad_norm": 494.0, "learning_rate": 9.749851121723363e-05, "loss": 15.6255, "step": 3073 }, { "epoch": 0.128131382601809, "grad_norm": 418.0, "learning_rate": 9.749640248105943e-05, "loss": 13.3136, "step": 3074 }, { "epoch": 0.12817306489933725, "grad_norm": 227.0, "learning_rate": 9.749429287925909e-05, "loss": 10.5004, "step": 3075 }, { "epoch": 0.1282147471968655, "grad_norm": 73.5, "learning_rate": 9.749218241187103e-05, "loss": 8.3754, "step": 3076 }, { "epoch": 0.12825642949439373, "grad_norm": 544.0, "learning_rate": 9.749007107893373e-05, "loss": 16.2508, "step": 3077 }, { "epoch": 0.12829811179192196, "grad_norm": 398.0, "learning_rate": 9.748795888048567e-05, "loss": 15.0006, "step": 3078 }, { "epoch": 0.1283397940894502, "grad_norm": 524.0, "learning_rate": 9.748584581656535e-05, "loss": 16.1252, "step": 3079 }, { "epoch": 0.12838147638697844, "grad_norm": 338.0, "learning_rate": 9.748373188721128e-05, "loss": 12.8781, "step": 3080 }, { "epoch": 0.12842315868450668, "grad_norm": 179.0, "learning_rate": 9.748161709246198e-05, "loss": 11.2504, "step": 3081 }, { "epoch": 0.12846484098203492, "grad_norm": 660.0, "learning_rate": 9.747950143235598e-05, "loss": 18.3788, "step": 3082 }, { "epoch": 0.12850652327956316, "grad_norm": 454.0, "learning_rate": 9.747738490693185e-05, "loss": 16.3754, "step": 3083 }, { "epoch": 0.1285482055770914, "grad_norm": 676.0, "learning_rate": 9.747526751622819e-05, "loss": 19.6277, "step": 3084 }, { "epoch": 0.12858988787461964, "grad_norm": 462.0, "learning_rate": 9.747314926028354e-05, "loss": 16.3755, "step": 3085 }, { "epoch": 0.12863157017214788, "grad_norm": 280.0, "learning_rate": 9.747103013913654e-05, "loss": 12.3759, "step": 3086 }, { "epoch": 0.12867325246967612, "grad_norm": 191.0, "learning_rate": 9.74689101528258e-05, "loss": 9.0635, "step": 3087 }, { "epoch": 0.12871493476720436, "grad_norm": 92.5, "learning_rate": 9.746678930138996e-05, "loss": 9.3755, "step": 3088 }, { "epoch": 0.1287566170647326, "grad_norm": 350.0, "learning_rate": 9.746466758486768e-05, "loss": 14.189, "step": 3089 }, { "epoch": 0.12879829936226084, "grad_norm": 628.0, "learning_rate": 9.74625450032976e-05, "loss": 20.1252, "step": 3090 }, { "epoch": 0.12883998165978908, "grad_norm": 109.5, "learning_rate": 9.746042155671844e-05, "loss": 9.4378, "step": 3091 }, { "epoch": 0.12888166395731732, "grad_norm": 128.0, "learning_rate": 9.745829724516888e-05, "loss": 7.0319, "step": 3092 }, { "epoch": 0.12892334625484556, "grad_norm": 1024.0, "learning_rate": 9.745617206868764e-05, "loss": 26.1252, "step": 3093 }, { "epoch": 0.1289650285523738, "grad_norm": 100.0, "learning_rate": 9.745404602731345e-05, "loss": 8.0632, "step": 3094 }, { "epoch": 0.12900671084990203, "grad_norm": 158.0, "learning_rate": 9.745191912108504e-05, "loss": 9.3755, "step": 3095 }, { "epoch": 0.12904839314743027, "grad_norm": 167.0, "learning_rate": 9.744979135004122e-05, "loss": 9.7505, "step": 3096 }, { "epoch": 0.1290900754449585, "grad_norm": 784.0, "learning_rate": 9.744766271422072e-05, "loss": 22.7503, "step": 3097 }, { "epoch": 0.12913175774248675, "grad_norm": 356.0, "learning_rate": 9.744553321366238e-05, "loss": 15.0003, "step": 3098 }, { "epoch": 0.129173440040015, "grad_norm": 314.0, "learning_rate": 9.744340284840497e-05, "loss": 13.3752, "step": 3099 }, { "epoch": 0.12921512233754326, "grad_norm": 288.0, "learning_rate": 9.744127161848732e-05, "loss": 12.6286, "step": 3100 }, { "epoch": 0.1292568046350715, "grad_norm": 239.0, "learning_rate": 9.74391395239483e-05, "loss": 11.1879, "step": 3101 }, { "epoch": 0.12929848693259974, "grad_norm": 412.0, "learning_rate": 9.743700656482675e-05, "loss": 15.4378, "step": 3102 }, { "epoch": 0.12934016923012798, "grad_norm": 500.0, "learning_rate": 9.743487274116154e-05, "loss": 17.6253, "step": 3103 }, { "epoch": 0.12938185152765622, "grad_norm": 1208.0, "learning_rate": 9.743273805299155e-05, "loss": 30.8753, "step": 3104 }, { "epoch": 0.12942353382518446, "grad_norm": 780.0, "learning_rate": 9.743060250035571e-05, "loss": 20.8793, "step": 3105 }, { "epoch": 0.1294652161227127, "grad_norm": 908.0, "learning_rate": 9.742846608329295e-05, "loss": 22.6261, "step": 3106 }, { "epoch": 0.12950689842024093, "grad_norm": 101.0, "learning_rate": 9.742632880184214e-05, "loss": 9.0633, "step": 3107 }, { "epoch": 0.12954858071776917, "grad_norm": 132.0, "learning_rate": 9.742419065604231e-05, "loss": 10.6268, "step": 3108 }, { "epoch": 0.1295902630152974, "grad_norm": 312.0, "learning_rate": 9.74220516459324e-05, "loss": 13.3753, "step": 3109 }, { "epoch": 0.12963194531282565, "grad_norm": 520.0, "learning_rate": 9.741991177155138e-05, "loss": 20.0004, "step": 3110 }, { "epoch": 0.1296736276103539, "grad_norm": 264.0, "learning_rate": 9.741777103293825e-05, "loss": 13.8129, "step": 3111 }, { "epoch": 0.12971530990788213, "grad_norm": 540.0, "learning_rate": 9.741562943013204e-05, "loss": 18.2549, "step": 3112 }, { "epoch": 0.12975699220541037, "grad_norm": 438.0, "learning_rate": 9.741348696317177e-05, "loss": 17.0007, "step": 3113 }, { "epoch": 0.1297986745029386, "grad_norm": 1104.0, "learning_rate": 9.74113436320965e-05, "loss": 25.8801, "step": 3114 }, { "epoch": 0.12984035680046685, "grad_norm": 668.0, "learning_rate": 9.740919943694527e-05, "loss": 19.2503, "step": 3115 }, { "epoch": 0.1298820390979951, "grad_norm": 348.0, "learning_rate": 9.740705437775719e-05, "loss": 12.5633, "step": 3116 }, { "epoch": 0.12992372139552333, "grad_norm": 266.0, "learning_rate": 9.740490845457133e-05, "loss": 11.8754, "step": 3117 }, { "epoch": 0.12996540369305157, "grad_norm": 50.5, "learning_rate": 9.740276166742679e-05, "loss": 7.3755, "step": 3118 }, { "epoch": 0.1300070859905798, "grad_norm": 458.0, "learning_rate": 9.740061401636272e-05, "loss": 16.1269, "step": 3119 }, { "epoch": 0.13004876828810805, "grad_norm": 175.0, "learning_rate": 9.739846550141826e-05, "loss": 11.063, "step": 3120 }, { "epoch": 0.13009045058563629, "grad_norm": 370.0, "learning_rate": 9.739631612263255e-05, "loss": 14.0638, "step": 3121 }, { "epoch": 0.13013213288316453, "grad_norm": 504.0, "learning_rate": 9.739416588004478e-05, "loss": 14.8158, "step": 3122 }, { "epoch": 0.13017381518069276, "grad_norm": 215.0, "learning_rate": 9.73920147736941e-05, "loss": 11.2503, "step": 3123 }, { "epoch": 0.130215497478221, "grad_norm": 524.0, "learning_rate": 9.738986280361978e-05, "loss": 17.5023, "step": 3124 }, { "epoch": 0.13025717977574924, "grad_norm": 330.0, "learning_rate": 9.738770996986099e-05, "loss": 12.563, "step": 3125 }, { "epoch": 0.13029886207327748, "grad_norm": 147.0, "learning_rate": 9.738555627245697e-05, "loss": 8.7504, "step": 3126 }, { "epoch": 0.13034054437080572, "grad_norm": 496.0, "learning_rate": 9.7383401711447e-05, "loss": 18.7523, "step": 3127 }, { "epoch": 0.13038222666833396, "grad_norm": 354.0, "learning_rate": 9.738124628687031e-05, "loss": 14.4378, "step": 3128 }, { "epoch": 0.1304239089658622, "grad_norm": 544.0, "learning_rate": 9.73790899987662e-05, "loss": 18.2503, "step": 3129 }, { "epoch": 0.13046559126339044, "grad_norm": 180.0, "learning_rate": 9.737693284717398e-05, "loss": 10.6254, "step": 3130 }, { "epoch": 0.13050727356091868, "grad_norm": 532.0, "learning_rate": 9.737477483213295e-05, "loss": 17.2503, "step": 3131 }, { "epoch": 0.13054895585844692, "grad_norm": 154.0, "learning_rate": 9.737261595368243e-05, "loss": 9.8761, "step": 3132 }, { "epoch": 0.13059063815597516, "grad_norm": 234.0, "learning_rate": 9.737045621186181e-05, "loss": 12.8757, "step": 3133 }, { "epoch": 0.1306323204535034, "grad_norm": 1496.0, "learning_rate": 9.73682956067104e-05, "loss": 38.2542, "step": 3134 }, { "epoch": 0.13067400275103164, "grad_norm": 157.0, "learning_rate": 9.736613413826758e-05, "loss": 9.7504, "step": 3135 }, { "epoch": 0.13071568504855988, "grad_norm": 384.0, "learning_rate": 9.736397180657279e-05, "loss": 14.2505, "step": 3136 }, { "epoch": 0.13075736734608812, "grad_norm": 140.0, "learning_rate": 9.73618086116654e-05, "loss": 10.1253, "step": 3137 }, { "epoch": 0.13079904964361636, "grad_norm": 478.0, "learning_rate": 9.735964455358484e-05, "loss": 16.7524, "step": 3138 }, { "epoch": 0.1308407319411446, "grad_norm": 71.5, "learning_rate": 9.735747963237055e-05, "loss": 7.0326, "step": 3139 }, { "epoch": 0.13088241423867283, "grad_norm": 904.0, "learning_rate": 9.7355313848062e-05, "loss": 22.1253, "step": 3140 }, { "epoch": 0.13092409653620107, "grad_norm": 370.0, "learning_rate": 9.735314720069864e-05, "loss": 12.6881, "step": 3141 }, { "epoch": 0.1309657788337293, "grad_norm": 356.0, "learning_rate": 9.735097969031998e-05, "loss": 13.3773, "step": 3142 }, { "epoch": 0.13100746113125755, "grad_norm": 171.0, "learning_rate": 9.73488113169655e-05, "loss": 10.7515, "step": 3143 }, { "epoch": 0.1310491434287858, "grad_norm": 197.0, "learning_rate": 9.734664208067475e-05, "loss": 11.7518, "step": 3144 }, { "epoch": 0.13109082572631403, "grad_norm": 352.0, "learning_rate": 9.734447198148721e-05, "loss": 14.5636, "step": 3145 }, { "epoch": 0.13113250802384227, "grad_norm": 556.0, "learning_rate": 9.73423010194425e-05, "loss": 16.0004, "step": 3146 }, { "epoch": 0.1311741903213705, "grad_norm": 314.0, "learning_rate": 9.734012919458014e-05, "loss": 14.1881, "step": 3147 }, { "epoch": 0.13121587261889875, "grad_norm": 109.0, "learning_rate": 9.733795650693971e-05, "loss": 8.5005, "step": 3148 }, { "epoch": 0.131257554916427, "grad_norm": 75.5, "learning_rate": 9.733578295656083e-05, "loss": 8.6885, "step": 3149 }, { "epoch": 0.13129923721395523, "grad_norm": 338.0, "learning_rate": 9.733360854348311e-05, "loss": 14.3752, "step": 3150 }, { "epoch": 0.13134091951148347, "grad_norm": 544.0, "learning_rate": 9.733143326774618e-05, "loss": 17.3793, "step": 3151 }, { "epoch": 0.1313826018090117, "grad_norm": 532.0, "learning_rate": 9.732925712938966e-05, "loss": 17.5006, "step": 3152 }, { "epoch": 0.13142428410653995, "grad_norm": 344.0, "learning_rate": 9.732708012845323e-05, "loss": 14.6904, "step": 3153 }, { "epoch": 0.13146596640406819, "grad_norm": 1240.0, "learning_rate": 9.732490226497656e-05, "loss": 24.1311, "step": 3154 }, { "epoch": 0.13150764870159642, "grad_norm": 468.0, "learning_rate": 9.732272353899936e-05, "loss": 15.6263, "step": 3155 }, { "epoch": 0.13154933099912466, "grad_norm": 256.0, "learning_rate": 9.732054395056131e-05, "loss": 10.0628, "step": 3156 }, { "epoch": 0.1315910132966529, "grad_norm": 200.0, "learning_rate": 9.731836349970213e-05, "loss": 11.2507, "step": 3157 }, { "epoch": 0.13163269559418114, "grad_norm": 184.0, "learning_rate": 9.731618218646161e-05, "loss": 10.5002, "step": 3158 }, { "epoch": 0.13167437789170938, "grad_norm": 260.0, "learning_rate": 9.731400001087945e-05, "loss": 13.2513, "step": 3159 }, { "epoch": 0.13171606018923762, "grad_norm": 404.0, "learning_rate": 9.731181697299544e-05, "loss": 16.0011, "step": 3160 }, { "epoch": 0.13175774248676586, "grad_norm": 182.0, "learning_rate": 9.730963307284936e-05, "loss": 10.9391, "step": 3161 }, { "epoch": 0.1317994247842941, "grad_norm": 340.0, "learning_rate": 9.730744831048103e-05, "loss": 14.3134, "step": 3162 }, { "epoch": 0.13184110708182234, "grad_norm": 236.0, "learning_rate": 9.730526268593025e-05, "loss": 12.5006, "step": 3163 }, { "epoch": 0.13188278937935058, "grad_norm": 173.0, "learning_rate": 9.730307619923686e-05, "loss": 10.1257, "step": 3164 }, { "epoch": 0.13192447167687882, "grad_norm": 294.0, "learning_rate": 9.730088885044071e-05, "loss": 12.6881, "step": 3165 }, { "epoch": 0.13196615397440706, "grad_norm": 272.0, "learning_rate": 9.729870063958165e-05, "loss": 13.0007, "step": 3166 }, { "epoch": 0.1320078362719353, "grad_norm": 197.0, "learning_rate": 9.729651156669959e-05, "loss": 11.188, "step": 3167 }, { "epoch": 0.13204951856946354, "grad_norm": 149.0, "learning_rate": 9.729432163183439e-05, "loss": 8.8757, "step": 3168 }, { "epoch": 0.13209120086699178, "grad_norm": 218.0, "learning_rate": 9.729213083502599e-05, "loss": 10.6257, "step": 3169 }, { "epoch": 0.13213288316452002, "grad_norm": 644.0, "learning_rate": 9.728993917631431e-05, "loss": 20.8781, "step": 3170 }, { "epoch": 0.13217456546204825, "grad_norm": 484.0, "learning_rate": 9.728774665573928e-05, "loss": 17.1262, "step": 3171 }, { "epoch": 0.1322162477595765, "grad_norm": 164.0, "learning_rate": 9.728555327334087e-05, "loss": 8.9377, "step": 3172 }, { "epoch": 0.13225793005710476, "grad_norm": 532.0, "learning_rate": 9.728335902915906e-05, "loss": 17.8757, "step": 3173 }, { "epoch": 0.132299612354633, "grad_norm": 536.0, "learning_rate": 9.728116392323383e-05, "loss": 16.8752, "step": 3174 }, { "epoch": 0.13234129465216124, "grad_norm": 242.0, "learning_rate": 9.727896795560518e-05, "loss": 10.5629, "step": 3175 }, { "epoch": 0.13238297694968948, "grad_norm": 808.0, "learning_rate": 9.727677112631318e-05, "loss": 22.7521, "step": 3176 }, { "epoch": 0.13242465924721772, "grad_norm": 246.0, "learning_rate": 9.727457343539779e-05, "loss": 10.3751, "step": 3177 }, { "epoch": 0.13246634154474596, "grad_norm": 500.0, "learning_rate": 9.727237488289911e-05, "loss": 15.4379, "step": 3178 }, { "epoch": 0.1325080238422742, "grad_norm": 178.0, "learning_rate": 9.727017546885721e-05, "loss": 10.0661, "step": 3179 }, { "epoch": 0.13254970613980244, "grad_norm": 256.0, "learning_rate": 9.726797519331217e-05, "loss": 13.0628, "step": 3180 }, { "epoch": 0.13259138843733068, "grad_norm": 476.0, "learning_rate": 9.726577405630408e-05, "loss": 16.1252, "step": 3181 }, { "epoch": 0.13263307073485892, "grad_norm": 206.0, "learning_rate": 9.726357205787304e-05, "loss": 11.5629, "step": 3182 }, { "epoch": 0.13267475303238715, "grad_norm": 316.0, "learning_rate": 9.726136919805924e-05, "loss": 13.0006, "step": 3183 }, { "epoch": 0.1327164353299154, "grad_norm": 234.0, "learning_rate": 9.725916547690277e-05, "loss": 10.5003, "step": 3184 }, { "epoch": 0.13275811762744363, "grad_norm": 416.0, "learning_rate": 9.725696089444383e-05, "loss": 13.5005, "step": 3185 }, { "epoch": 0.13279979992497187, "grad_norm": 113.0, "learning_rate": 9.725475545072255e-05, "loss": 8.1253, "step": 3186 }, { "epoch": 0.1328414822225001, "grad_norm": 310.0, "learning_rate": 9.72525491457792e-05, "loss": 12.8757, "step": 3187 }, { "epoch": 0.13288316452002835, "grad_norm": 264.0, "learning_rate": 9.725034197965391e-05, "loss": 12.3756, "step": 3188 }, { "epoch": 0.1329248468175566, "grad_norm": 1112.0, "learning_rate": 9.724813395238697e-05, "loss": 24.3755, "step": 3189 }, { "epoch": 0.13296652911508483, "grad_norm": 306.0, "learning_rate": 9.724592506401857e-05, "loss": 12.9388, "step": 3190 }, { "epoch": 0.13300821141261307, "grad_norm": 442.0, "learning_rate": 9.724371531458902e-05, "loss": 14.5634, "step": 3191 }, { "epoch": 0.1330498937101413, "grad_norm": 344.0, "learning_rate": 9.724150470413855e-05, "loss": 14.2504, "step": 3192 }, { "epoch": 0.13309157600766955, "grad_norm": 472.0, "learning_rate": 9.723929323270745e-05, "loss": 17.0041, "step": 3193 }, { "epoch": 0.1331332583051978, "grad_norm": 400.0, "learning_rate": 9.723708090033605e-05, "loss": 14.4377, "step": 3194 }, { "epoch": 0.13317494060272603, "grad_norm": 358.0, "learning_rate": 9.723486770706466e-05, "loss": 13.4395, "step": 3195 }, { "epoch": 0.13321662290025427, "grad_norm": 904.0, "learning_rate": 9.723265365293361e-05, "loss": 24.6255, "step": 3196 }, { "epoch": 0.1332583051977825, "grad_norm": 418.0, "learning_rate": 9.723043873798326e-05, "loss": 16.6254, "step": 3197 }, { "epoch": 0.13329998749531075, "grad_norm": 496.0, "learning_rate": 9.722822296225395e-05, "loss": 18.13, "step": 3198 }, { "epoch": 0.13334166979283898, "grad_norm": 364.0, "learning_rate": 9.722600632578611e-05, "loss": 13.2502, "step": 3199 }, { "epoch": 0.13338335209036722, "grad_norm": 334.0, "learning_rate": 9.722378882862009e-05, "loss": 14.3131, "step": 3200 }, { "epoch": 0.13342503438789546, "grad_norm": 390.0, "learning_rate": 9.722157047079634e-05, "loss": 14.5628, "step": 3201 }, { "epoch": 0.1334667166854237, "grad_norm": 552.0, "learning_rate": 9.721935125235528e-05, "loss": 17.2501, "step": 3202 }, { "epoch": 0.13350839898295194, "grad_norm": 544.0, "learning_rate": 9.721713117333734e-05, "loss": 17.6253, "step": 3203 }, { "epoch": 0.13355008128048018, "grad_norm": 1024.0, "learning_rate": 9.7214910233783e-05, "loss": 26.2506, "step": 3204 }, { "epoch": 0.13359176357800842, "grad_norm": 258.0, "learning_rate": 9.721268843373273e-05, "loss": 12.3146, "step": 3205 }, { "epoch": 0.13363344587553666, "grad_norm": 134.0, "learning_rate": 9.721046577322701e-05, "loss": 8.4385, "step": 3206 }, { "epoch": 0.1336751281730649, "grad_norm": 143.0, "learning_rate": 9.720824225230639e-05, "loss": 7.8753, "step": 3207 }, { "epoch": 0.13371681047059314, "grad_norm": 1064.0, "learning_rate": 9.720601787101133e-05, "loss": 27.7503, "step": 3208 }, { "epoch": 0.13375849276812138, "grad_norm": 366.0, "learning_rate": 9.720379262938241e-05, "loss": 13.5644, "step": 3209 }, { "epoch": 0.13380017506564962, "grad_norm": 510.0, "learning_rate": 9.720156652746019e-05, "loss": 17.0004, "step": 3210 }, { "epoch": 0.13384185736317786, "grad_norm": 588.0, "learning_rate": 9.719933956528523e-05, "loss": 18.3758, "step": 3211 }, { "epoch": 0.1338835396607061, "grad_norm": 432.0, "learning_rate": 9.719711174289812e-05, "loss": 15.8754, "step": 3212 }, { "epoch": 0.13392522195823434, "grad_norm": 480.0, "learning_rate": 9.719488306033944e-05, "loss": 14.7502, "step": 3213 }, { "epoch": 0.13396690425576258, "grad_norm": 486.0, "learning_rate": 9.719265351764984e-05, "loss": 17.1259, "step": 3214 }, { "epoch": 0.13400858655329081, "grad_norm": 374.0, "learning_rate": 9.719042311486995e-05, "loss": 13.8754, "step": 3215 }, { "epoch": 0.13405026885081905, "grad_norm": 348.0, "learning_rate": 9.71881918520404e-05, "loss": 14.2502, "step": 3216 }, { "epoch": 0.1340919511483473, "grad_norm": 510.0, "learning_rate": 9.718595972920186e-05, "loss": 15.8127, "step": 3217 }, { "epoch": 0.13413363344587553, "grad_norm": 236.0, "learning_rate": 9.718372674639502e-05, "loss": 12.0634, "step": 3218 }, { "epoch": 0.13417531574340377, "grad_norm": 1368.0, "learning_rate": 9.718149290366056e-05, "loss": 35.2538, "step": 3219 }, { "epoch": 0.134216998040932, "grad_norm": 198.0, "learning_rate": 9.717925820103922e-05, "loss": 12.0011, "step": 3220 }, { "epoch": 0.13425868033846025, "grad_norm": 300.0, "learning_rate": 9.717702263857169e-05, "loss": 13.4377, "step": 3221 }, { "epoch": 0.1343003626359885, "grad_norm": 548.0, "learning_rate": 9.717478621629876e-05, "loss": 18.1262, "step": 3222 }, { "epoch": 0.13434204493351673, "grad_norm": 248.0, "learning_rate": 9.717254893426115e-05, "loss": 10.9381, "step": 3223 }, { "epoch": 0.13438372723104497, "grad_norm": 380.0, "learning_rate": 9.717031079249965e-05, "loss": 15.1878, "step": 3224 }, { "epoch": 0.1344254095285732, "grad_norm": 115.5, "learning_rate": 9.716807179105505e-05, "loss": 7.9067, "step": 3225 }, { "epoch": 0.13446709182610145, "grad_norm": 183.0, "learning_rate": 9.716583192996815e-05, "loss": 8.3143, "step": 3226 }, { "epoch": 0.1345087741236297, "grad_norm": 392.0, "learning_rate": 9.716359120927978e-05, "loss": 14.8131, "step": 3227 }, { "epoch": 0.13455045642115793, "grad_norm": 284.0, "learning_rate": 9.716134962903076e-05, "loss": 9.3762, "step": 3228 }, { "epoch": 0.13459213871868617, "grad_norm": 612.0, "learning_rate": 9.715910718926198e-05, "loss": 20.5003, "step": 3229 }, { "epoch": 0.1346338210162144, "grad_norm": 812.0, "learning_rate": 9.715686389001426e-05, "loss": 20.131, "step": 3230 }, { "epoch": 0.13467550331374264, "grad_norm": 312.0, "learning_rate": 9.715461973132854e-05, "loss": 11.6256, "step": 3231 }, { "epoch": 0.13471718561127088, "grad_norm": 71.0, "learning_rate": 9.715237471324566e-05, "loss": 7.72, "step": 3232 }, { "epoch": 0.13475886790879912, "grad_norm": 286.0, "learning_rate": 9.715012883580657e-05, "loss": 13.0629, "step": 3233 }, { "epoch": 0.13480055020632736, "grad_norm": 272.0, "learning_rate": 9.714788209905222e-05, "loss": 12.1263, "step": 3234 }, { "epoch": 0.1348422325038556, "grad_norm": 169.0, "learning_rate": 9.71456345030235e-05, "loss": 7.9067, "step": 3235 }, { "epoch": 0.13488391480138384, "grad_norm": 235.0, "learning_rate": 9.714338604776143e-05, "loss": 12.1885, "step": 3236 }, { "epoch": 0.13492559709891208, "grad_norm": 560.0, "learning_rate": 9.714113673330697e-05, "loss": 18.2546, "step": 3237 }, { "epoch": 0.13496727939644032, "grad_norm": 51.0, "learning_rate": 9.713888655970108e-05, "loss": 7.6564, "step": 3238 }, { "epoch": 0.13500896169396856, "grad_norm": 216.0, "learning_rate": 9.713663552698482e-05, "loss": 10.938, "step": 3239 }, { "epoch": 0.1350506439914968, "grad_norm": 108.5, "learning_rate": 9.713438363519918e-05, "loss": 9.4379, "step": 3240 }, { "epoch": 0.13509232628902504, "grad_norm": 290.0, "learning_rate": 9.713213088438522e-05, "loss": 13.8127, "step": 3241 }, { "epoch": 0.13513400858655328, "grad_norm": 486.0, "learning_rate": 9.712987727458399e-05, "loss": 15.6257, "step": 3242 }, { "epoch": 0.13517569088408152, "grad_norm": 390.0, "learning_rate": 9.712762280583656e-05, "loss": 14.6881, "step": 3243 }, { "epoch": 0.13521737318160976, "grad_norm": 135.0, "learning_rate": 9.712536747818402e-05, "loss": 10.0628, "step": 3244 }, { "epoch": 0.13525905547913802, "grad_norm": 1104.0, "learning_rate": 9.712311129166749e-05, "loss": 27.7506, "step": 3245 }, { "epoch": 0.13530073777666626, "grad_norm": 388.0, "learning_rate": 9.712085424632806e-05, "loss": 16.7528, "step": 3246 }, { "epoch": 0.1353424200741945, "grad_norm": 324.0, "learning_rate": 9.711859634220689e-05, "loss": 13.3756, "step": 3247 }, { "epoch": 0.13538410237172274, "grad_norm": 350.0, "learning_rate": 9.711633757934509e-05, "loss": 14.8158, "step": 3248 }, { "epoch": 0.13542578466925098, "grad_norm": 318.0, "learning_rate": 9.711407795778388e-05, "loss": 13.9386, "step": 3249 }, { "epoch": 0.13546746696677922, "grad_norm": 165.0, "learning_rate": 9.711181747756441e-05, "loss": 9.3754, "step": 3250 }, { "epoch": 0.13550914926430746, "grad_norm": 470.0, "learning_rate": 9.710955613872788e-05, "loss": 16.6252, "step": 3251 }, { "epoch": 0.1355508315618357, "grad_norm": 338.0, "learning_rate": 9.710729394131552e-05, "loss": 13.8767, "step": 3252 }, { "epoch": 0.13559251385936394, "grad_norm": 70.0, "learning_rate": 9.710503088536854e-05, "loss": 7.6253, "step": 3253 }, { "epoch": 0.13563419615689218, "grad_norm": 231.0, "learning_rate": 9.710276697092818e-05, "loss": 12.1268, "step": 3254 }, { "epoch": 0.13567587845442042, "grad_norm": 292.0, "learning_rate": 9.710050219803572e-05, "loss": 13.1252, "step": 3255 }, { "epoch": 0.13571756075194866, "grad_norm": 330.0, "learning_rate": 9.709823656673243e-05, "loss": 13.5004, "step": 3256 }, { "epoch": 0.1357592430494769, "grad_norm": 214.0, "learning_rate": 9.709597007705959e-05, "loss": 11.2504, "step": 3257 }, { "epoch": 0.13580092534700514, "grad_norm": 374.0, "learning_rate": 9.709370272905851e-05, "loss": 15.5006, "step": 3258 }, { "epoch": 0.13584260764453338, "grad_norm": 306.0, "learning_rate": 9.709143452277053e-05, "loss": 13.0015, "step": 3259 }, { "epoch": 0.13588428994206161, "grad_norm": 362.0, "learning_rate": 9.708916545823696e-05, "loss": 13.5002, "step": 3260 }, { "epoch": 0.13592597223958985, "grad_norm": 330.0, "learning_rate": 9.708689553549919e-05, "loss": 14.1256, "step": 3261 }, { "epoch": 0.1359676545371181, "grad_norm": 510.0, "learning_rate": 9.708462475459857e-05, "loss": 17.5002, "step": 3262 }, { "epoch": 0.13600933683464633, "grad_norm": 248.0, "learning_rate": 9.708235311557646e-05, "loss": 11.9383, "step": 3263 }, { "epoch": 0.13605101913217457, "grad_norm": 89.5, "learning_rate": 9.708008061847428e-05, "loss": 8.2505, "step": 3264 }, { "epoch": 0.1360927014297028, "grad_norm": 524.0, "learning_rate": 9.707780726333348e-05, "loss": 14.9402, "step": 3265 }, { "epoch": 0.13613438372723105, "grad_norm": 135.0, "learning_rate": 9.707553305019546e-05, "loss": 8.9379, "step": 3266 }, { "epoch": 0.1361760660247593, "grad_norm": 205.0, "learning_rate": 9.707325797910165e-05, "loss": 10.9381, "step": 3267 }, { "epoch": 0.13621774832228753, "grad_norm": 884.0, "learning_rate": 9.707098205009355e-05, "loss": 21.5035, "step": 3268 }, { "epoch": 0.13625943061981577, "grad_norm": 324.0, "learning_rate": 9.706870526321262e-05, "loss": 13.066, "step": 3269 }, { "epoch": 0.136301112917344, "grad_norm": 564.0, "learning_rate": 9.706642761850035e-05, "loss": 17.8767, "step": 3270 }, { "epoch": 0.13634279521487225, "grad_norm": 332.0, "learning_rate": 9.706414911599828e-05, "loss": 12.7502, "step": 3271 }, { "epoch": 0.1363844775124005, "grad_norm": 588.0, "learning_rate": 9.70618697557479e-05, "loss": 18.6259, "step": 3272 }, { "epoch": 0.13642615980992873, "grad_norm": 458.0, "learning_rate": 9.705958953779077e-05, "loss": 17.7503, "step": 3273 }, { "epoch": 0.13646784210745697, "grad_norm": 408.0, "learning_rate": 9.705730846216844e-05, "loss": 15.0017, "step": 3274 }, { "epoch": 0.1365095244049852, "grad_norm": 368.0, "learning_rate": 9.705502652892249e-05, "loss": 10.4383, "step": 3275 }, { "epoch": 0.13655120670251344, "grad_norm": 506.0, "learning_rate": 9.70527437380945e-05, "loss": 17.3759, "step": 3276 }, { "epoch": 0.13659288900004168, "grad_norm": 372.0, "learning_rate": 9.705046008972607e-05, "loss": 13.6263, "step": 3277 }, { "epoch": 0.13663457129756992, "grad_norm": 132.0, "learning_rate": 9.704817558385885e-05, "loss": 7.7822, "step": 3278 }, { "epoch": 0.13667625359509816, "grad_norm": 304.0, "learning_rate": 9.704589022053443e-05, "loss": 12.4379, "step": 3279 }, { "epoch": 0.1367179358926264, "grad_norm": 239.0, "learning_rate": 9.704360399979451e-05, "loss": 12.1253, "step": 3280 }, { "epoch": 0.13675961819015464, "grad_norm": 450.0, "learning_rate": 9.70413169216807e-05, "loss": 16.1253, "step": 3281 }, { "epoch": 0.13680130048768288, "grad_norm": 1040.0, "learning_rate": 9.703902898623474e-05, "loss": 27.2502, "step": 3282 }, { "epoch": 0.13684298278521112, "grad_norm": 332.0, "learning_rate": 9.703674019349829e-05, "loss": 12.7505, "step": 3283 }, { "epoch": 0.13688466508273936, "grad_norm": 246.0, "learning_rate": 9.703445054351307e-05, "loss": 12.4379, "step": 3284 }, { "epoch": 0.1369263473802676, "grad_norm": 772.0, "learning_rate": 9.703216003632081e-05, "loss": 23.0007, "step": 3285 }, { "epoch": 0.13696802967779584, "grad_norm": 280.0, "learning_rate": 9.702986867196328e-05, "loss": 13.4379, "step": 3286 }, { "epoch": 0.13700971197532408, "grad_norm": 131.0, "learning_rate": 9.702757645048219e-05, "loss": 9.6884, "step": 3287 }, { "epoch": 0.13705139427285232, "grad_norm": 552.0, "learning_rate": 9.702528337191937e-05, "loss": 17.6255, "step": 3288 }, { "epoch": 0.13709307657038056, "grad_norm": 448.0, "learning_rate": 9.702298943631656e-05, "loss": 14.0004, "step": 3289 }, { "epoch": 0.1371347588679088, "grad_norm": 446.0, "learning_rate": 9.702069464371561e-05, "loss": 16.5006, "step": 3290 }, { "epoch": 0.13717644116543704, "grad_norm": 1136.0, "learning_rate": 9.701839899415834e-05, "loss": 25.7551, "step": 3291 }, { "epoch": 0.13721812346296527, "grad_norm": 434.0, "learning_rate": 9.701610248768656e-05, "loss": 18.0004, "step": 3292 }, { "epoch": 0.13725980576049351, "grad_norm": 352.0, "learning_rate": 9.701380512434213e-05, "loss": 14.1255, "step": 3293 }, { "epoch": 0.13730148805802175, "grad_norm": 648.0, "learning_rate": 9.701150690416694e-05, "loss": 20.7504, "step": 3294 }, { "epoch": 0.13734317035555, "grad_norm": 239.0, "learning_rate": 9.700920782720285e-05, "loss": 12.0005, "step": 3295 }, { "epoch": 0.13738485265307823, "grad_norm": 274.0, "learning_rate": 9.700690789349178e-05, "loss": 13.5627, "step": 3296 }, { "epoch": 0.13742653495060647, "grad_norm": 220.0, "learning_rate": 9.700460710307565e-05, "loss": 12.313, "step": 3297 }, { "epoch": 0.1374682172481347, "grad_norm": 390.0, "learning_rate": 9.700230545599638e-05, "loss": 15.563, "step": 3298 }, { "epoch": 0.13750989954566295, "grad_norm": 408.0, "learning_rate": 9.70000029522959e-05, "loss": 14.6881, "step": 3299 }, { "epoch": 0.1375515818431912, "grad_norm": 166.0, "learning_rate": 9.699769959201623e-05, "loss": 8.8133, "step": 3300 }, { "epoch": 0.13759326414071943, "grad_norm": 338.0, "learning_rate": 9.699539537519928e-05, "loss": 13.8759, "step": 3301 }, { "epoch": 0.13763494643824767, "grad_norm": 506.0, "learning_rate": 9.69930903018871e-05, "loss": 17.0004, "step": 3302 }, { "epoch": 0.1376766287357759, "grad_norm": 704.0, "learning_rate": 9.699078437212166e-05, "loss": 17.0049, "step": 3303 }, { "epoch": 0.13771831103330415, "grad_norm": 243.0, "learning_rate": 9.698847758594502e-05, "loss": 10.8752, "step": 3304 }, { "epoch": 0.1377599933308324, "grad_norm": 40.75, "learning_rate": 9.698616994339919e-05, "loss": 7.0002, "step": 3305 }, { "epoch": 0.13780167562836063, "grad_norm": 418.0, "learning_rate": 9.698386144452624e-05, "loss": 15.8128, "step": 3306 }, { "epoch": 0.13784335792588887, "grad_norm": 784.0, "learning_rate": 9.698155208936825e-05, "loss": 22.0003, "step": 3307 }, { "epoch": 0.1378850402234171, "grad_norm": 162.0, "learning_rate": 9.697924187796732e-05, "loss": 9.7504, "step": 3308 }, { "epoch": 0.13792672252094534, "grad_norm": 592.0, "learning_rate": 9.697693081036551e-05, "loss": 18.8756, "step": 3309 }, { "epoch": 0.13796840481847358, "grad_norm": 250.0, "learning_rate": 9.697461888660498e-05, "loss": 12.4381, "step": 3310 }, { "epoch": 0.13801008711600182, "grad_norm": 340.0, "learning_rate": 9.697230610672785e-05, "loss": 15.3127, "step": 3311 }, { "epoch": 0.13805176941353006, "grad_norm": 424.0, "learning_rate": 9.696999247077627e-05, "loss": 15.1878, "step": 3312 }, { "epoch": 0.1380934517110583, "grad_norm": 340.0, "learning_rate": 9.69676779787924e-05, "loss": 11.7501, "step": 3313 }, { "epoch": 0.13813513400858654, "grad_norm": 171.0, "learning_rate": 9.696536263081843e-05, "loss": 11.3754, "step": 3314 }, { "epoch": 0.13817681630611478, "grad_norm": 444.0, "learning_rate": 9.696304642689657e-05, "loss": 16.0012, "step": 3315 }, { "epoch": 0.13821849860364302, "grad_norm": 472.0, "learning_rate": 9.696072936706901e-05, "loss": 15.6893, "step": 3316 }, { "epoch": 0.13826018090117126, "grad_norm": 536.0, "learning_rate": 9.6958411451378e-05, "loss": 16.2558, "step": 3317 }, { "epoch": 0.13830186319869953, "grad_norm": 716.0, "learning_rate": 9.695609267986576e-05, "loss": 20.5004, "step": 3318 }, { "epoch": 0.13834354549622777, "grad_norm": 116.5, "learning_rate": 9.695377305257457e-05, "loss": 9.8757, "step": 3319 }, { "epoch": 0.138385227793756, "grad_norm": 516.0, "learning_rate": 9.69514525695467e-05, "loss": 18.253, "step": 3320 }, { "epoch": 0.13842691009128424, "grad_norm": 1448.0, "learning_rate": 9.694913123082443e-05, "loss": 30.2545, "step": 3321 }, { "epoch": 0.13846859238881248, "grad_norm": 294.0, "learning_rate": 9.694680903645009e-05, "loss": 12.6253, "step": 3322 }, { "epoch": 0.13851027468634072, "grad_norm": 199.0, "learning_rate": 9.694448598646597e-05, "loss": 9.9378, "step": 3323 }, { "epoch": 0.13855195698386896, "grad_norm": 228.0, "learning_rate": 9.694216208091443e-05, "loss": 11.3752, "step": 3324 }, { "epoch": 0.1385936392813972, "grad_norm": 186.0, "learning_rate": 9.693983731983782e-05, "loss": 11.0039, "step": 3325 }, { "epoch": 0.13863532157892544, "grad_norm": 418.0, "learning_rate": 9.693751170327849e-05, "loss": 15.2502, "step": 3326 }, { "epoch": 0.13867700387645368, "grad_norm": 185.0, "learning_rate": 9.693518523127888e-05, "loss": 11.6256, "step": 3327 }, { "epoch": 0.13871868617398192, "grad_norm": 241.0, "learning_rate": 9.693285790388133e-05, "loss": 11.0632, "step": 3328 }, { "epoch": 0.13876036847151016, "grad_norm": 416.0, "learning_rate": 9.693052972112829e-05, "loss": 14.3754, "step": 3329 }, { "epoch": 0.1388020507690384, "grad_norm": 228.0, "learning_rate": 9.692820068306216e-05, "loss": 11.8128, "step": 3330 }, { "epoch": 0.13884373306656664, "grad_norm": 404.0, "learning_rate": 9.692587078972541e-05, "loss": 15.3128, "step": 3331 }, { "epoch": 0.13888541536409488, "grad_norm": 390.0, "learning_rate": 9.69235400411605e-05, "loss": 12.7502, "step": 3332 }, { "epoch": 0.13892709766162312, "grad_norm": 169.0, "learning_rate": 9.692120843740993e-05, "loss": 9.4382, "step": 3333 }, { "epoch": 0.13896877995915136, "grad_norm": 440.0, "learning_rate": 9.691887597851616e-05, "loss": 16.0004, "step": 3334 }, { "epoch": 0.1390104622566796, "grad_norm": 1512.0, "learning_rate": 9.691654266452171e-05, "loss": 34.5015, "step": 3335 }, { "epoch": 0.13905214455420783, "grad_norm": 268.0, "learning_rate": 9.691420849546909e-05, "loss": 12.5639, "step": 3336 }, { "epoch": 0.13909382685173607, "grad_norm": 191.0, "learning_rate": 9.691187347140087e-05, "loss": 9.3752, "step": 3337 }, { "epoch": 0.1391355091492643, "grad_norm": 174.0, "learning_rate": 9.690953759235959e-05, "loss": 9.3759, "step": 3338 }, { "epoch": 0.13917719144679255, "grad_norm": 251.0, "learning_rate": 9.690720085838781e-05, "loss": 12.0003, "step": 3339 }, { "epoch": 0.1392188737443208, "grad_norm": 628.0, "learning_rate": 9.690486326952815e-05, "loss": 18.7509, "step": 3340 }, { "epoch": 0.13926055604184903, "grad_norm": 184.0, "learning_rate": 9.690252482582318e-05, "loss": 8.9377, "step": 3341 }, { "epoch": 0.13930223833937727, "grad_norm": 344.0, "learning_rate": 9.690018552731554e-05, "loss": 13.5632, "step": 3342 }, { "epoch": 0.1393439206369055, "grad_norm": 93.5, "learning_rate": 9.689784537404784e-05, "loss": 5.3442, "step": 3343 }, { "epoch": 0.13938560293443375, "grad_norm": 350.0, "learning_rate": 9.689550436606276e-05, "loss": 15.063, "step": 3344 }, { "epoch": 0.139427285231962, "grad_norm": 182.0, "learning_rate": 9.689316250340294e-05, "loss": 10.1254, "step": 3345 }, { "epoch": 0.13946896752949023, "grad_norm": 680.0, "learning_rate": 9.689081978611108e-05, "loss": 18.1317, "step": 3346 }, { "epoch": 0.13951064982701847, "grad_norm": 255.0, "learning_rate": 9.688847621422986e-05, "loss": 12.4381, "step": 3347 }, { "epoch": 0.1395523321245467, "grad_norm": 462.0, "learning_rate": 9.6886131787802e-05, "loss": 16.1254, "step": 3348 }, { "epoch": 0.13959401442207495, "grad_norm": 684.0, "learning_rate": 9.688378650687024e-05, "loss": 20.0004, "step": 3349 }, { "epoch": 0.1396356967196032, "grad_norm": 232.0, "learning_rate": 9.688144037147729e-05, "loss": 12.3129, "step": 3350 }, { "epoch": 0.13967737901713143, "grad_norm": 306.0, "learning_rate": 9.687909338166593e-05, "loss": 14.0004, "step": 3351 }, { "epoch": 0.13971906131465966, "grad_norm": 468.0, "learning_rate": 9.687674553747895e-05, "loss": 16.3763, "step": 3352 }, { "epoch": 0.1397607436121879, "grad_norm": 444.0, "learning_rate": 9.68743968389591e-05, "loss": 14.4381, "step": 3353 }, { "epoch": 0.13980242590971614, "grad_norm": 466.0, "learning_rate": 9.68720472861492e-05, "loss": 16.3752, "step": 3354 }, { "epoch": 0.13984410820724438, "grad_norm": 306.0, "learning_rate": 9.68696968790921e-05, "loss": 13.5006, "step": 3355 }, { "epoch": 0.13988579050477262, "grad_norm": 1112.0, "learning_rate": 9.68673456178306e-05, "loss": 24.8806, "step": 3356 }, { "epoch": 0.13992747280230086, "grad_norm": 138.0, "learning_rate": 9.686499350240757e-05, "loss": 10.5633, "step": 3357 }, { "epoch": 0.1399691550998291, "grad_norm": 336.0, "learning_rate": 9.686264053286586e-05, "loss": 14.1251, "step": 3358 }, { "epoch": 0.14001083739735734, "grad_norm": 302.0, "learning_rate": 9.686028670924839e-05, "loss": 13.2504, "step": 3359 }, { "epoch": 0.14005251969488558, "grad_norm": 260.0, "learning_rate": 9.685793203159803e-05, "loss": 12.8128, "step": 3360 }, { "epoch": 0.14009420199241382, "grad_norm": 318.0, "learning_rate": 9.68555764999577e-05, "loss": 13.3127, "step": 3361 }, { "epoch": 0.14013588428994206, "grad_norm": 158.0, "learning_rate": 9.68532201143703e-05, "loss": 9.8752, "step": 3362 }, { "epoch": 0.1401775665874703, "grad_norm": 712.0, "learning_rate": 9.685086287487883e-05, "loss": 21.5003, "step": 3363 }, { "epoch": 0.14021924888499854, "grad_norm": 236.0, "learning_rate": 9.684850478152622e-05, "loss": 12.7502, "step": 3364 }, { "epoch": 0.14026093118252678, "grad_norm": 252.0, "learning_rate": 9.684614583435546e-05, "loss": 11.4378, "step": 3365 }, { "epoch": 0.14030261348005502, "grad_norm": 116.0, "learning_rate": 9.684378603340952e-05, "loss": 8.688, "step": 3366 }, { "epoch": 0.14034429577758326, "grad_norm": 692.0, "learning_rate": 9.684142537873142e-05, "loss": 21.3755, "step": 3367 }, { "epoch": 0.1403859780751115, "grad_norm": 120.5, "learning_rate": 9.68390638703642e-05, "loss": 9.1257, "step": 3368 }, { "epoch": 0.14042766037263973, "grad_norm": 280.0, "learning_rate": 9.683670150835087e-05, "loss": 13.7503, "step": 3369 }, { "epoch": 0.14046934267016797, "grad_norm": 214.0, "learning_rate": 9.68343382927345e-05, "loss": 12.1878, "step": 3370 }, { "epoch": 0.1405110249676962, "grad_norm": 468.0, "learning_rate": 9.683197422355816e-05, "loss": 16.6254, "step": 3371 }, { "epoch": 0.14055270726522445, "grad_norm": 368.0, "learning_rate": 9.682960930086493e-05, "loss": 11.5031, "step": 3372 }, { "epoch": 0.1405943895627527, "grad_norm": 220.0, "learning_rate": 9.682724352469792e-05, "loss": 9.1265, "step": 3373 }, { "epoch": 0.14063607186028093, "grad_norm": 234.0, "learning_rate": 9.682487689510023e-05, "loss": 11.3755, "step": 3374 }, { "epoch": 0.14067775415780917, "grad_norm": 127.0, "learning_rate": 9.6822509412115e-05, "loss": 9.6252, "step": 3375 }, { "epoch": 0.1407194364553374, "grad_norm": 478.0, "learning_rate": 9.682014107578538e-05, "loss": 15.1878, "step": 3376 }, { "epoch": 0.14076111875286565, "grad_norm": 207.0, "learning_rate": 9.681777188615454e-05, "loss": 11.0004, "step": 3377 }, { "epoch": 0.1408028010503939, "grad_norm": 176.0, "learning_rate": 9.681540184326565e-05, "loss": 10.8753, "step": 3378 }, { "epoch": 0.14084448334792213, "grad_norm": 402.0, "learning_rate": 9.68130309471619e-05, "loss": 14.8753, "step": 3379 }, { "epoch": 0.14088616564545037, "grad_norm": 608.0, "learning_rate": 9.681065919788652e-05, "loss": 19.0011, "step": 3380 }, { "epoch": 0.1409278479429786, "grad_norm": 241.0, "learning_rate": 9.680828659548271e-05, "loss": 11.3131, "step": 3381 }, { "epoch": 0.14096953024050685, "grad_norm": 247.0, "learning_rate": 9.680591313999372e-05, "loss": 12.5016, "step": 3382 }, { "epoch": 0.14101121253803509, "grad_norm": 524.0, "learning_rate": 9.680353883146281e-05, "loss": 16.8759, "step": 3383 }, { "epoch": 0.14105289483556332, "grad_norm": 516.0, "learning_rate": 9.680116366993323e-05, "loss": 19.0005, "step": 3384 }, { "epoch": 0.14109457713309156, "grad_norm": 198.0, "learning_rate": 9.679878765544831e-05, "loss": 10.6879, "step": 3385 }, { "epoch": 0.1411362594306198, "grad_norm": 1608.0, "learning_rate": 9.679641078805133e-05, "loss": 34.0063, "step": 3386 }, { "epoch": 0.14117794172814804, "grad_norm": 408.0, "learning_rate": 9.67940330677856e-05, "loss": 15.2508, "step": 3387 }, { "epoch": 0.14121962402567628, "grad_norm": 480.0, "learning_rate": 9.679165449469449e-05, "loss": 16.8754, "step": 3388 }, { "epoch": 0.14126130632320452, "grad_norm": 226.0, "learning_rate": 9.678927506882129e-05, "loss": 11.6271, "step": 3389 }, { "epoch": 0.14130298862073276, "grad_norm": 390.0, "learning_rate": 9.678689479020942e-05, "loss": 14.3133, "step": 3390 }, { "epoch": 0.14134467091826103, "grad_norm": 532.0, "learning_rate": 9.678451365890222e-05, "loss": 18.0003, "step": 3391 }, { "epoch": 0.14138635321578927, "grad_norm": 193.0, "learning_rate": 9.678213167494312e-05, "loss": 10.9392, "step": 3392 }, { "epoch": 0.1414280355133175, "grad_norm": 160.0, "learning_rate": 9.677974883837551e-05, "loss": 10.1253, "step": 3393 }, { "epoch": 0.14146971781084575, "grad_norm": 532.0, "learning_rate": 9.677736514924283e-05, "loss": 18.1264, "step": 3394 }, { "epoch": 0.14151140010837399, "grad_norm": 147.0, "learning_rate": 9.677498060758852e-05, "loss": 11.438, "step": 3395 }, { "epoch": 0.14155308240590223, "grad_norm": 462.0, "learning_rate": 9.677259521345602e-05, "loss": 16.1258, "step": 3396 }, { "epoch": 0.14159476470343046, "grad_norm": 237.0, "learning_rate": 9.677020896688885e-05, "loss": 11.9379, "step": 3397 }, { "epoch": 0.1416364470009587, "grad_norm": 348.0, "learning_rate": 9.676782186793043e-05, "loss": 13.001, "step": 3398 }, { "epoch": 0.14167812929848694, "grad_norm": 136.0, "learning_rate": 9.676543391662434e-05, "loss": 10.9381, "step": 3399 }, { "epoch": 0.14171981159601518, "grad_norm": 792.0, "learning_rate": 9.676304511301404e-05, "loss": 23.5004, "step": 3400 }, { "epoch": 0.14176149389354342, "grad_norm": 342.0, "learning_rate": 9.67606554571431e-05, "loss": 12.7513, "step": 3401 }, { "epoch": 0.14180317619107166, "grad_norm": 1144.0, "learning_rate": 9.675826494905507e-05, "loss": 27.6256, "step": 3402 }, { "epoch": 0.1418448584885999, "grad_norm": 620.0, "learning_rate": 9.67558735887935e-05, "loss": 19.0001, "step": 3403 }, { "epoch": 0.14188654078612814, "grad_norm": 127.0, "learning_rate": 9.675348137640198e-05, "loss": 10.1254, "step": 3404 }, { "epoch": 0.14192822308365638, "grad_norm": 992.0, "learning_rate": 9.675108831192415e-05, "loss": 27.3754, "step": 3405 }, { "epoch": 0.14196990538118462, "grad_norm": 348.0, "learning_rate": 9.674869439540355e-05, "loss": 13.5628, "step": 3406 }, { "epoch": 0.14201158767871286, "grad_norm": 214.0, "learning_rate": 9.674629962688384e-05, "loss": 12.0637, "step": 3407 }, { "epoch": 0.1420532699762411, "grad_norm": 688.0, "learning_rate": 9.674390400640868e-05, "loss": 21.1254, "step": 3408 }, { "epoch": 0.14209495227376934, "grad_norm": 588.0, "learning_rate": 9.674150753402173e-05, "loss": 18.3766, "step": 3409 }, { "epoch": 0.14213663457129758, "grad_norm": 676.0, "learning_rate": 9.673911020976664e-05, "loss": 20.5003, "step": 3410 }, { "epoch": 0.14217831686882582, "grad_norm": 828.0, "learning_rate": 9.673671203368713e-05, "loss": 21.6259, "step": 3411 }, { "epoch": 0.14221999916635406, "grad_norm": 442.0, "learning_rate": 9.673431300582688e-05, "loss": 15.6254, "step": 3412 }, { "epoch": 0.1422616814638823, "grad_norm": 444.0, "learning_rate": 9.673191312622964e-05, "loss": 14.6884, "step": 3413 }, { "epoch": 0.14230336376141053, "grad_norm": 278.0, "learning_rate": 9.672951239493913e-05, "loss": 13.5634, "step": 3414 }, { "epoch": 0.14234504605893877, "grad_norm": 668.0, "learning_rate": 9.67271108119991e-05, "loss": 17.6288, "step": 3415 }, { "epoch": 0.142386728356467, "grad_norm": 218.0, "learning_rate": 9.672470837745334e-05, "loss": 10.2505, "step": 3416 }, { "epoch": 0.14242841065399525, "grad_norm": 320.0, "learning_rate": 9.67223050913456e-05, "loss": 13.5631, "step": 3417 }, { "epoch": 0.1424700929515235, "grad_norm": 334.0, "learning_rate": 9.671990095371972e-05, "loss": 13.063, "step": 3418 }, { "epoch": 0.14251177524905173, "grad_norm": 740.0, "learning_rate": 9.67174959646195e-05, "loss": 22.1256, "step": 3419 }, { "epoch": 0.14255345754657997, "grad_norm": 394.0, "learning_rate": 9.671509012408877e-05, "loss": 15.8753, "step": 3420 }, { "epoch": 0.1425951398441082, "grad_norm": 334.0, "learning_rate": 9.671268343217137e-05, "loss": 14.1253, "step": 3421 }, { "epoch": 0.14263682214163645, "grad_norm": 334.0, "learning_rate": 9.671027588891118e-05, "loss": 12.0005, "step": 3422 }, { "epoch": 0.1426785044391647, "grad_norm": 472.0, "learning_rate": 9.670786749435204e-05, "loss": 14.1883, "step": 3423 }, { "epoch": 0.14272018673669293, "grad_norm": 560.0, "learning_rate": 9.670545824853789e-05, "loss": 18.7502, "step": 3424 }, { "epoch": 0.14276186903422117, "grad_norm": 422.0, "learning_rate": 9.670304815151262e-05, "loss": 15.8127, "step": 3425 }, { "epoch": 0.1428035513317494, "grad_norm": 193.0, "learning_rate": 9.670063720332012e-05, "loss": 11.0005, "step": 3426 }, { "epoch": 0.14284523362927765, "grad_norm": 372.0, "learning_rate": 9.669822540400438e-05, "loss": 14.1879, "step": 3427 }, { "epoch": 0.14288691592680589, "grad_norm": 448.0, "learning_rate": 9.669581275360935e-05, "loss": 15.1258, "step": 3428 }, { "epoch": 0.14292859822433412, "grad_norm": 444.0, "learning_rate": 9.669339925217897e-05, "loss": 15.5006, "step": 3429 }, { "epoch": 0.14297028052186236, "grad_norm": 338.0, "learning_rate": 9.669098489975725e-05, "loss": 12.7502, "step": 3430 }, { "epoch": 0.1430119628193906, "grad_norm": 171.0, "learning_rate": 9.668856969638817e-05, "loss": 9.5004, "step": 3431 }, { "epoch": 0.14305364511691884, "grad_norm": 404.0, "learning_rate": 9.66861536421158e-05, "loss": 12.3147, "step": 3432 }, { "epoch": 0.14309532741444708, "grad_norm": 288.0, "learning_rate": 9.66837367369841e-05, "loss": 12.1255, "step": 3433 }, { "epoch": 0.14313700971197532, "grad_norm": 492.0, "learning_rate": 9.668131898103716e-05, "loss": 16.7505, "step": 3434 }, { "epoch": 0.14317869200950356, "grad_norm": 246.0, "learning_rate": 9.667890037431906e-05, "loss": 12.1255, "step": 3435 }, { "epoch": 0.1432203743070318, "grad_norm": 600.0, "learning_rate": 9.667648091687384e-05, "loss": 19.0003, "step": 3436 }, { "epoch": 0.14326205660456004, "grad_norm": 366.0, "learning_rate": 9.667406060874559e-05, "loss": 13.0636, "step": 3437 }, { "epoch": 0.14330373890208828, "grad_norm": 406.0, "learning_rate": 9.667163944997848e-05, "loss": 14.3135, "step": 3438 }, { "epoch": 0.14334542119961652, "grad_norm": 122.0, "learning_rate": 9.666921744061658e-05, "loss": 8.1253, "step": 3439 }, { "epoch": 0.14338710349714476, "grad_norm": 110.5, "learning_rate": 9.666679458070405e-05, "loss": 8.9378, "step": 3440 }, { "epoch": 0.143428785794673, "grad_norm": 348.0, "learning_rate": 9.666437087028505e-05, "loss": 15.063, "step": 3441 }, { "epoch": 0.14347046809220124, "grad_norm": 494.0, "learning_rate": 9.666194630940375e-05, "loss": 16.3753, "step": 3442 }, { "epoch": 0.14351215038972948, "grad_norm": 516.0, "learning_rate": 9.665952089810432e-05, "loss": 16.1266, "step": 3443 }, { "epoch": 0.14355383268725772, "grad_norm": 360.0, "learning_rate": 9.6657094636431e-05, "loss": 14.5628, "step": 3444 }, { "epoch": 0.14359551498478595, "grad_norm": 342.0, "learning_rate": 9.665466752442797e-05, "loss": 11.7505, "step": 3445 }, { "epoch": 0.1436371972823142, "grad_norm": 472.0, "learning_rate": 9.66522395621395e-05, "loss": 17.0003, "step": 3446 }, { "epoch": 0.14367887957984243, "grad_norm": 290.0, "learning_rate": 9.664981074960981e-05, "loss": 13.8765, "step": 3447 }, { "epoch": 0.14372056187737067, "grad_norm": 205.0, "learning_rate": 9.66473810868832e-05, "loss": 11.6255, "step": 3448 }, { "epoch": 0.1437622441748989, "grad_norm": 165.0, "learning_rate": 9.66449505740039e-05, "loss": 10.7506, "step": 3449 }, { "epoch": 0.14380392647242715, "grad_norm": 500.0, "learning_rate": 9.664251921101625e-05, "loss": 17.1255, "step": 3450 }, { "epoch": 0.1438456087699554, "grad_norm": 976.0, "learning_rate": 9.664008699796455e-05, "loss": 26.0003, "step": 3451 }, { "epoch": 0.14388729106748363, "grad_norm": 512.0, "learning_rate": 9.663765393489311e-05, "loss": 16.5003, "step": 3452 }, { "epoch": 0.14392897336501187, "grad_norm": 424.0, "learning_rate": 9.66352200218463e-05, "loss": 16.2506, "step": 3453 }, { "epoch": 0.1439706556625401, "grad_norm": 600.0, "learning_rate": 9.663278525886845e-05, "loss": 16.3762, "step": 3454 }, { "epoch": 0.14401233796006835, "grad_norm": 358.0, "learning_rate": 9.663034964600396e-05, "loss": 13.5004, "step": 3455 }, { "epoch": 0.1440540202575966, "grad_norm": 320.0, "learning_rate": 9.66279131832972e-05, "loss": 14.1879, "step": 3456 }, { "epoch": 0.14409570255512483, "grad_norm": 213.0, "learning_rate": 9.66254758707926e-05, "loss": 11.5628, "step": 3457 }, { "epoch": 0.14413738485265307, "grad_norm": 1168.0, "learning_rate": 9.662303770853456e-05, "loss": 27.0004, "step": 3458 }, { "epoch": 0.1441790671501813, "grad_norm": 191.0, "learning_rate": 9.66205986965675e-05, "loss": 11.9382, "step": 3459 }, { "epoch": 0.14422074944770955, "grad_norm": 374.0, "learning_rate": 9.66181588349359e-05, "loss": 12.8754, "step": 3460 }, { "epoch": 0.14426243174523778, "grad_norm": 476.0, "learning_rate": 9.661571812368421e-05, "loss": 15.7505, "step": 3461 }, { "epoch": 0.14430411404276602, "grad_norm": 400.0, "learning_rate": 9.661327656285694e-05, "loss": 16.126, "step": 3462 }, { "epoch": 0.14434579634029426, "grad_norm": 406.0, "learning_rate": 9.661083415249856e-05, "loss": 17.0004, "step": 3463 }, { "epoch": 0.14438747863782253, "grad_norm": 448.0, "learning_rate": 9.66083908926536e-05, "loss": 17.2507, "step": 3464 }, { "epoch": 0.14442916093535077, "grad_norm": 255.0, "learning_rate": 9.660594678336654e-05, "loss": 12.4378, "step": 3465 }, { "epoch": 0.144470843232879, "grad_norm": 636.0, "learning_rate": 9.660350182468198e-05, "loss": 21.6254, "step": 3466 }, { "epoch": 0.14451252553040725, "grad_norm": 154.0, "learning_rate": 9.660105601664448e-05, "loss": 8.6879, "step": 3467 }, { "epoch": 0.1445542078279355, "grad_norm": 354.0, "learning_rate": 9.659860935929859e-05, "loss": 13.8756, "step": 3468 }, { "epoch": 0.14459589012546373, "grad_norm": 250.0, "learning_rate": 9.659616185268889e-05, "loss": 12.2521, "step": 3469 }, { "epoch": 0.14463757242299197, "grad_norm": 240.0, "learning_rate": 9.659371349686001e-05, "loss": 12.2523, "step": 3470 }, { "epoch": 0.1446792547205202, "grad_norm": 532.0, "learning_rate": 9.659126429185659e-05, "loss": 18.1252, "step": 3471 }, { "epoch": 0.14472093701804845, "grad_norm": 466.0, "learning_rate": 9.658881423772322e-05, "loss": 15.9387, "step": 3472 }, { "epoch": 0.14476261931557668, "grad_norm": 334.0, "learning_rate": 9.658636333450457e-05, "loss": 13.7506, "step": 3473 }, { "epoch": 0.14480430161310492, "grad_norm": 294.0, "learning_rate": 9.658391158224532e-05, "loss": 13.2504, "step": 3474 }, { "epoch": 0.14484598391063316, "grad_norm": 372.0, "learning_rate": 9.658145898099015e-05, "loss": 15.1878, "step": 3475 }, { "epoch": 0.1448876662081614, "grad_norm": 704.0, "learning_rate": 9.657900553078376e-05, "loss": 18.0053, "step": 3476 }, { "epoch": 0.14492934850568964, "grad_norm": 484.0, "learning_rate": 9.657655123167084e-05, "loss": 17.7542, "step": 3477 }, { "epoch": 0.14497103080321788, "grad_norm": 58.25, "learning_rate": 9.657409608369616e-05, "loss": 9.0628, "step": 3478 }, { "epoch": 0.14501271310074612, "grad_norm": 354.0, "learning_rate": 9.657164008690443e-05, "loss": 14.942, "step": 3479 }, { "epoch": 0.14505439539827436, "grad_norm": 217.0, "learning_rate": 9.656918324134044e-05, "loss": 13.0631, "step": 3480 }, { "epoch": 0.1450960776958026, "grad_norm": 110.5, "learning_rate": 9.656672554704892e-05, "loss": 7.8756, "step": 3481 }, { "epoch": 0.14513775999333084, "grad_norm": 212.0, "learning_rate": 9.656426700407473e-05, "loss": 11.6882, "step": 3482 }, { "epoch": 0.14517944229085908, "grad_norm": 127.5, "learning_rate": 9.656180761246262e-05, "loss": 10.3129, "step": 3483 }, { "epoch": 0.14522112458838732, "grad_norm": 536.0, "learning_rate": 9.655934737225743e-05, "loss": 15.1264, "step": 3484 }, { "epoch": 0.14526280688591556, "grad_norm": 244.0, "learning_rate": 9.655688628350401e-05, "loss": 11.5004, "step": 3485 }, { "epoch": 0.1453044891834438, "grad_norm": 338.0, "learning_rate": 9.655442434624721e-05, "loss": 14.1877, "step": 3486 }, { "epoch": 0.14534617148097204, "grad_norm": 171.0, "learning_rate": 9.655196156053187e-05, "loss": 9.3134, "step": 3487 }, { "epoch": 0.14538785377850028, "grad_norm": 161.0, "learning_rate": 9.654949792640294e-05, "loss": 10.2509, "step": 3488 }, { "epoch": 0.14542953607602851, "grad_norm": 240.0, "learning_rate": 9.654703344390525e-05, "loss": 11.8129, "step": 3489 }, { "epoch": 0.14547121837355675, "grad_norm": 162.0, "learning_rate": 9.654456811308375e-05, "loss": 9.5003, "step": 3490 }, { "epoch": 0.145512900671085, "grad_norm": 1504.0, "learning_rate": 9.654210193398335e-05, "loss": 28.1371, "step": 3491 }, { "epoch": 0.14555458296861323, "grad_norm": 336.0, "learning_rate": 9.653963490664902e-05, "loss": 13.1259, "step": 3492 }, { "epoch": 0.14559626526614147, "grad_norm": 708.0, "learning_rate": 9.653716703112572e-05, "loss": 17.8784, "step": 3493 }, { "epoch": 0.1456379475636697, "grad_norm": 238.0, "learning_rate": 9.65346983074584e-05, "loss": 11.8127, "step": 3494 }, { "epoch": 0.14567962986119795, "grad_norm": 302.0, "learning_rate": 9.653222873569209e-05, "loss": 12.8755, "step": 3495 }, { "epoch": 0.1457213121587262, "grad_norm": 177.0, "learning_rate": 9.652975831587176e-05, "loss": 9.0012, "step": 3496 }, { "epoch": 0.14576299445625443, "grad_norm": 352.0, "learning_rate": 9.652728704804249e-05, "loss": 14.8137, "step": 3497 }, { "epoch": 0.14580467675378267, "grad_norm": 220.0, "learning_rate": 9.652481493224926e-05, "loss": 10.438, "step": 3498 }, { "epoch": 0.1458463590513109, "grad_norm": 356.0, "learning_rate": 9.652234196853714e-05, "loss": 14.5629, "step": 3499 }, { "epoch": 0.14588804134883915, "grad_norm": 147.0, "learning_rate": 9.651986815695122e-05, "loss": 7.5002, "step": 3500 }, { "epoch": 0.1459297236463674, "grad_norm": 306.0, "learning_rate": 9.651739349753657e-05, "loss": 13.9382, "step": 3501 }, { "epoch": 0.14597140594389563, "grad_norm": 498.0, "learning_rate": 9.651491799033829e-05, "loss": 17.0002, "step": 3502 }, { "epoch": 0.14601308824142387, "grad_norm": 1272.0, "learning_rate": 9.651244163540152e-05, "loss": 29.3794, "step": 3503 }, { "epoch": 0.1460547705389521, "grad_norm": 306.0, "learning_rate": 9.650996443277136e-05, "loss": 13.2504, "step": 3504 }, { "epoch": 0.14609645283648034, "grad_norm": 496.0, "learning_rate": 9.650748638249296e-05, "loss": 16.5005, "step": 3505 }, { "epoch": 0.14613813513400858, "grad_norm": 572.0, "learning_rate": 9.65050074846115e-05, "loss": 19.0003, "step": 3506 }, { "epoch": 0.14617981743153682, "grad_norm": 187.0, "learning_rate": 9.650252773917214e-05, "loss": 10.7506, "step": 3507 }, { "epoch": 0.14622149972906506, "grad_norm": 468.0, "learning_rate": 9.65000471462201e-05, "loss": 17.377, "step": 3508 }, { "epoch": 0.1462631820265933, "grad_norm": 316.0, "learning_rate": 9.649756570580057e-05, "loss": 13.6878, "step": 3509 }, { "epoch": 0.14630486432412154, "grad_norm": 135.0, "learning_rate": 9.649508341795877e-05, "loss": 9.3756, "step": 3510 }, { "epoch": 0.14634654662164978, "grad_norm": 173.0, "learning_rate": 9.649260028273995e-05, "loss": 11.0629, "step": 3511 }, { "epoch": 0.14638822891917802, "grad_norm": 612.0, "learning_rate": 9.649011630018936e-05, "loss": 18.2506, "step": 3512 }, { "epoch": 0.14642991121670626, "grad_norm": 262.0, "learning_rate": 9.648763147035229e-05, "loss": 13.3761, "step": 3513 }, { "epoch": 0.1464715935142345, "grad_norm": 83.0, "learning_rate": 9.648514579327399e-05, "loss": 9.063, "step": 3514 }, { "epoch": 0.14651327581176274, "grad_norm": 492.0, "learning_rate": 9.648265926899979e-05, "loss": 18.2503, "step": 3515 }, { "epoch": 0.14655495810929098, "grad_norm": 239.0, "learning_rate": 9.648017189757499e-05, "loss": 11.9378, "step": 3516 }, { "epoch": 0.14659664040681922, "grad_norm": 532.0, "learning_rate": 9.647768367904494e-05, "loss": 18.3751, "step": 3517 }, { "epoch": 0.14663832270434746, "grad_norm": 1672.0, "learning_rate": 9.647519461345498e-05, "loss": 42.2518, "step": 3518 }, { "epoch": 0.1466800050018757, "grad_norm": 564.0, "learning_rate": 9.647270470085046e-05, "loss": 17.8798, "step": 3519 }, { "epoch": 0.14672168729940394, "grad_norm": 466.0, "learning_rate": 9.647021394127678e-05, "loss": 16.2508, "step": 3520 }, { "epoch": 0.14676336959693217, "grad_norm": 203.0, "learning_rate": 9.646772233477934e-05, "loss": 11.6877, "step": 3521 }, { "epoch": 0.14680505189446041, "grad_norm": 201.0, "learning_rate": 9.646522988140352e-05, "loss": 11.6266, "step": 3522 }, { "epoch": 0.14684673419198865, "grad_norm": 364.0, "learning_rate": 9.646273658119476e-05, "loss": 14.0003, "step": 3523 }, { "epoch": 0.1468884164895169, "grad_norm": 204.0, "learning_rate": 9.646024243419848e-05, "loss": 10.8755, "step": 3524 }, { "epoch": 0.14693009878704513, "grad_norm": 760.0, "learning_rate": 9.64577474404602e-05, "loss": 21.5007, "step": 3525 }, { "epoch": 0.14697178108457337, "grad_norm": 266.0, "learning_rate": 9.645525160002533e-05, "loss": 12.4379, "step": 3526 }, { "epoch": 0.1470134633821016, "grad_norm": 366.0, "learning_rate": 9.645275491293937e-05, "loss": 14.5009, "step": 3527 }, { "epoch": 0.14705514567962985, "grad_norm": 980.0, "learning_rate": 9.645025737924782e-05, "loss": 24.1278, "step": 3528 }, { "epoch": 0.1470968279771581, "grad_norm": 608.0, "learning_rate": 9.644775899899623e-05, "loss": 19.3754, "step": 3529 }, { "epoch": 0.14713851027468633, "grad_norm": 756.0, "learning_rate": 9.64452597722301e-05, "loss": 24.5005, "step": 3530 }, { "epoch": 0.14718019257221457, "grad_norm": 1504.0, "learning_rate": 9.644275969899498e-05, "loss": 31.6298, "step": 3531 }, { "epoch": 0.1472218748697428, "grad_norm": 372.0, "learning_rate": 9.644025877933645e-05, "loss": 14.5629, "step": 3532 }, { "epoch": 0.14726355716727105, "grad_norm": 458.0, "learning_rate": 9.643775701330007e-05, "loss": 16.0006, "step": 3533 }, { "epoch": 0.1473052394647993, "grad_norm": 270.0, "learning_rate": 9.643525440093147e-05, "loss": 13.0003, "step": 3534 }, { "epoch": 0.14734692176232753, "grad_norm": 203.0, "learning_rate": 9.64327509422762e-05, "loss": 11.0628, "step": 3535 }, { "epoch": 0.14738860405985577, "grad_norm": 179.0, "learning_rate": 9.643024663737994e-05, "loss": 11.6881, "step": 3536 }, { "epoch": 0.14743028635738403, "grad_norm": 390.0, "learning_rate": 9.642774148628832e-05, "loss": 16.1252, "step": 3537 }, { "epoch": 0.14747196865491227, "grad_norm": 201.0, "learning_rate": 9.642523548904699e-05, "loss": 11.6254, "step": 3538 }, { "epoch": 0.1475136509524405, "grad_norm": 217.0, "learning_rate": 9.642272864570162e-05, "loss": 10.626, "step": 3539 }, { "epoch": 0.14755533324996875, "grad_norm": 362.0, "learning_rate": 9.64202209562979e-05, "loss": 13.9383, "step": 3540 }, { "epoch": 0.147597015547497, "grad_norm": 376.0, "learning_rate": 9.64177124208815e-05, "loss": 14.6879, "step": 3541 }, { "epoch": 0.14763869784502523, "grad_norm": 432.0, "learning_rate": 9.641520303949822e-05, "loss": 16.1261, "step": 3542 }, { "epoch": 0.14768038014255347, "grad_norm": 752.0, "learning_rate": 9.641269281219372e-05, "loss": 23.2504, "step": 3543 }, { "epoch": 0.1477220624400817, "grad_norm": 178.0, "learning_rate": 9.641018173901378e-05, "loss": 10.7513, "step": 3544 }, { "epoch": 0.14776374473760995, "grad_norm": 430.0, "learning_rate": 9.640766982000415e-05, "loss": 14.3127, "step": 3545 }, { "epoch": 0.1478054270351382, "grad_norm": 532.0, "learning_rate": 9.640515705521063e-05, "loss": 17.0004, "step": 3546 }, { "epoch": 0.14784710933266643, "grad_norm": 350.0, "learning_rate": 9.640264344467898e-05, "loss": 13.9421, "step": 3547 }, { "epoch": 0.14788879163019467, "grad_norm": 71.5, "learning_rate": 9.640012898845505e-05, "loss": 7.3762, "step": 3548 }, { "epoch": 0.1479304739277229, "grad_norm": 302.0, "learning_rate": 9.639761368658467e-05, "loss": 13.7502, "step": 3549 }, { "epoch": 0.14797215622525114, "grad_norm": 310.0, "learning_rate": 9.639509753911363e-05, "loss": 13.3131, "step": 3550 }, { "epoch": 0.14801383852277938, "grad_norm": 160.0, "learning_rate": 9.639258054608783e-05, "loss": 10.5628, "step": 3551 }, { "epoch": 0.14805552082030762, "grad_norm": 173.0, "learning_rate": 9.639006270755313e-05, "loss": 8.8757, "step": 3552 }, { "epoch": 0.14809720311783586, "grad_norm": 416.0, "learning_rate": 9.638754402355542e-05, "loss": 15.438, "step": 3553 }, { "epoch": 0.1481388854153641, "grad_norm": 358.0, "learning_rate": 9.63850244941406e-05, "loss": 14.6254, "step": 3554 }, { "epoch": 0.14818056771289234, "grad_norm": 532.0, "learning_rate": 9.638250411935459e-05, "loss": 17.3776, "step": 3555 }, { "epoch": 0.14822225001042058, "grad_norm": 160.0, "learning_rate": 9.637998289924333e-05, "loss": 10.4384, "step": 3556 }, { "epoch": 0.14826393230794882, "grad_norm": 286.0, "learning_rate": 9.637746083385276e-05, "loss": 13.6272, "step": 3557 }, { "epoch": 0.14830561460547706, "grad_norm": 78.0, "learning_rate": 9.637493792322885e-05, "loss": 6.8442, "step": 3558 }, { "epoch": 0.1483472969030053, "grad_norm": 106.5, "learning_rate": 9.637241416741758e-05, "loss": 9.1882, "step": 3559 }, { "epoch": 0.14838897920053354, "grad_norm": 422.0, "learning_rate": 9.636988956646495e-05, "loss": 15.5002, "step": 3560 }, { "epoch": 0.14843066149806178, "grad_norm": 328.0, "learning_rate": 9.636736412041696e-05, "loss": 14.2518, "step": 3561 }, { "epoch": 0.14847234379559002, "grad_norm": 98.0, "learning_rate": 9.636483782931965e-05, "loss": 9.0628, "step": 3562 }, { "epoch": 0.14851402609311826, "grad_norm": 414.0, "learning_rate": 9.636231069321905e-05, "loss": 16.0003, "step": 3563 }, { "epoch": 0.1485557083906465, "grad_norm": 378.0, "learning_rate": 9.635978271216122e-05, "loss": 14.5004, "step": 3564 }, { "epoch": 0.14859739068817474, "grad_norm": 504.0, "learning_rate": 9.635725388619223e-05, "loss": 15.9379, "step": 3565 }, { "epoch": 0.14863907298570297, "grad_norm": 344.0, "learning_rate": 9.635472421535818e-05, "loss": 14.8755, "step": 3566 }, { "epoch": 0.14868075528323121, "grad_norm": 302.0, "learning_rate": 9.635219369970518e-05, "loss": 13.1879, "step": 3567 }, { "epoch": 0.14872243758075945, "grad_norm": 148.0, "learning_rate": 9.634966233927931e-05, "loss": 8.9383, "step": 3568 }, { "epoch": 0.1487641198782877, "grad_norm": 712.0, "learning_rate": 9.634713013412675e-05, "loss": 21.1258, "step": 3569 }, { "epoch": 0.14880580217581593, "grad_norm": 238.0, "learning_rate": 9.634459708429361e-05, "loss": 7.0004, "step": 3570 }, { "epoch": 0.14884748447334417, "grad_norm": 298.0, "learning_rate": 9.634206318982609e-05, "loss": 13.9382, "step": 3571 }, { "epoch": 0.1488891667708724, "grad_norm": 88.0, "learning_rate": 9.633952845077034e-05, "loss": 9.3128, "step": 3572 }, { "epoch": 0.14893084906840065, "grad_norm": 214.0, "learning_rate": 9.633699286717259e-05, "loss": 11.6254, "step": 3573 }, { "epoch": 0.1489725313659289, "grad_norm": 336.0, "learning_rate": 9.633445643907901e-05, "loss": 13.4394, "step": 3574 }, { "epoch": 0.14901421366345713, "grad_norm": 424.0, "learning_rate": 9.633191916653585e-05, "loss": 15.7502, "step": 3575 }, { "epoch": 0.14905589596098537, "grad_norm": 242.0, "learning_rate": 9.632938104958936e-05, "loss": 11.8756, "step": 3576 }, { "epoch": 0.1490975782585136, "grad_norm": 632.0, "learning_rate": 9.632684208828579e-05, "loss": 18.6252, "step": 3577 }, { "epoch": 0.14913926055604185, "grad_norm": 234.0, "learning_rate": 9.63243022826714e-05, "loss": 12.2504, "step": 3578 }, { "epoch": 0.1491809428535701, "grad_norm": 326.0, "learning_rate": 9.63217616327925e-05, "loss": 12.6881, "step": 3579 }, { "epoch": 0.14922262515109833, "grad_norm": 136.0, "learning_rate": 9.631922013869537e-05, "loss": 10.6882, "step": 3580 }, { "epoch": 0.14926430744862657, "grad_norm": 1624.0, "learning_rate": 9.631667780042634e-05, "loss": 33.2548, "step": 3581 }, { "epoch": 0.1493059897461548, "grad_norm": 700.0, "learning_rate": 9.631413461803176e-05, "loss": 22.751, "step": 3582 }, { "epoch": 0.14934767204368304, "grad_norm": 140.0, "learning_rate": 9.631159059155797e-05, "loss": 10.0631, "step": 3583 }, { "epoch": 0.14938935434121128, "grad_norm": 402.0, "learning_rate": 9.630904572105131e-05, "loss": 15.688, "step": 3584 }, { "epoch": 0.14943103663873952, "grad_norm": 692.0, "learning_rate": 9.63065000065582e-05, "loss": 21.2517, "step": 3585 }, { "epoch": 0.14947271893626776, "grad_norm": 372.0, "learning_rate": 9.630395344812499e-05, "loss": 13.313, "step": 3586 }, { "epoch": 0.149514401233796, "grad_norm": 440.0, "learning_rate": 9.630140604579814e-05, "loss": 16.5006, "step": 3587 }, { "epoch": 0.14955608353132424, "grad_norm": 744.0, "learning_rate": 9.629885779962405e-05, "loss": 21.7502, "step": 3588 }, { "epoch": 0.14959776582885248, "grad_norm": 44.25, "learning_rate": 9.629630870964917e-05, "loss": 7.4383, "step": 3589 }, { "epoch": 0.14963944812638072, "grad_norm": 414.0, "learning_rate": 9.629375877591992e-05, "loss": 15.129, "step": 3590 }, { "epoch": 0.14968113042390896, "grad_norm": 92.0, "learning_rate": 9.629120799848286e-05, "loss": 9.2509, "step": 3591 }, { "epoch": 0.1497228127214372, "grad_norm": 104.0, "learning_rate": 9.62886563773844e-05, "loss": 9.7504, "step": 3592 }, { "epoch": 0.14976449501896544, "grad_norm": 764.0, "learning_rate": 9.628610391267105e-05, "loss": 21.7507, "step": 3593 }, { "epoch": 0.14980617731649368, "grad_norm": 1600.0, "learning_rate": 9.628355060438937e-05, "loss": 32.503, "step": 3594 }, { "epoch": 0.14984785961402192, "grad_norm": 624.0, "learning_rate": 9.628099645258587e-05, "loss": 18.6259, "step": 3595 }, { "epoch": 0.14988954191155016, "grad_norm": 235.0, "learning_rate": 9.62784414573071e-05, "loss": 11.063, "step": 3596 }, { "epoch": 0.1499312242090784, "grad_norm": 162.0, "learning_rate": 9.627588561859961e-05, "loss": 12.3134, "step": 3597 }, { "epoch": 0.14997290650660663, "grad_norm": 884.0, "learning_rate": 9.627332893651002e-05, "loss": 23.3771, "step": 3598 }, { "epoch": 0.15001458880413487, "grad_norm": 458.0, "learning_rate": 9.62707714110849e-05, "loss": 15.1252, "step": 3599 }, { "epoch": 0.1500562711016631, "grad_norm": 448.0, "learning_rate": 9.626821304237086e-05, "loss": 13.6877, "step": 3600 }, { "epoch": 0.15009795339919135, "grad_norm": 1248.0, "learning_rate": 9.626565383041452e-05, "loss": 28.6299, "step": 3601 }, { "epoch": 0.1501396356967196, "grad_norm": 370.0, "learning_rate": 9.626309377526254e-05, "loss": 14.3752, "step": 3602 }, { "epoch": 0.15018131799424783, "grad_norm": 221.0, "learning_rate": 9.626053287696157e-05, "loss": 11.6255, "step": 3603 }, { "epoch": 0.15022300029177607, "grad_norm": 672.0, "learning_rate": 9.625797113555828e-05, "loss": 18.5008, "step": 3604 }, { "epoch": 0.1502646825893043, "grad_norm": 478.0, "learning_rate": 9.625540855109936e-05, "loss": 16.127, "step": 3605 }, { "epoch": 0.15030636488683255, "grad_norm": 249.0, "learning_rate": 9.62528451236315e-05, "loss": 11.9377, "step": 3606 }, { "epoch": 0.1503480471843608, "grad_norm": 478.0, "learning_rate": 9.625028085320145e-05, "loss": 16.6253, "step": 3607 }, { "epoch": 0.15038972948188903, "grad_norm": 239.0, "learning_rate": 9.624771573985592e-05, "loss": 11.6877, "step": 3608 }, { "epoch": 0.15043141177941727, "grad_norm": 164.0, "learning_rate": 9.624514978364165e-05, "loss": 9.3132, "step": 3609 }, { "epoch": 0.15047309407694553, "grad_norm": 584.0, "learning_rate": 9.624258298460545e-05, "loss": 19.6256, "step": 3610 }, { "epoch": 0.15051477637447377, "grad_norm": 163.0, "learning_rate": 9.624001534279405e-05, "loss": 8.6889, "step": 3611 }, { "epoch": 0.150556458672002, "grad_norm": 480.0, "learning_rate": 9.623744685825426e-05, "loss": 17.2511, "step": 3612 }, { "epoch": 0.15059814096953025, "grad_norm": 318.0, "learning_rate": 9.62348775310329e-05, "loss": 11.6879, "step": 3613 }, { "epoch": 0.1506398232670585, "grad_norm": 744.0, "learning_rate": 9.623230736117682e-05, "loss": 20.7504, "step": 3614 }, { "epoch": 0.15068150556458673, "grad_norm": 668.0, "learning_rate": 9.62297363487328e-05, "loss": 20.7505, "step": 3615 }, { "epoch": 0.15072318786211497, "grad_norm": 470.0, "learning_rate": 9.622716449374775e-05, "loss": 14.9378, "step": 3616 }, { "epoch": 0.1507648701596432, "grad_norm": 316.0, "learning_rate": 9.622459179626852e-05, "loss": 14.1879, "step": 3617 }, { "epoch": 0.15080655245717145, "grad_norm": 1184.0, "learning_rate": 9.622201825634198e-05, "loss": 28.6253, "step": 3618 }, { "epoch": 0.1508482347546997, "grad_norm": 147.0, "learning_rate": 9.62194438740151e-05, "loss": 10.5629, "step": 3619 }, { "epoch": 0.15088991705222793, "grad_norm": 177.0, "learning_rate": 9.62168686493347e-05, "loss": 9.6894, "step": 3620 }, { "epoch": 0.15093159934975617, "grad_norm": 460.0, "learning_rate": 9.621429258234779e-05, "loss": 16.7503, "step": 3621 }, { "epoch": 0.1509732816472844, "grad_norm": 256.0, "learning_rate": 9.62117156731013e-05, "loss": 12.6254, "step": 3622 }, { "epoch": 0.15101496394481265, "grad_norm": 298.0, "learning_rate": 9.620913792164219e-05, "loss": 10.8131, "step": 3623 }, { "epoch": 0.1510566462423409, "grad_norm": 121.5, "learning_rate": 9.620655932801743e-05, "loss": 9.7502, "step": 3624 }, { "epoch": 0.15109832853986913, "grad_norm": 74.5, "learning_rate": 9.620397989227403e-05, "loss": 8.2504, "step": 3625 }, { "epoch": 0.15114001083739736, "grad_norm": 760.0, "learning_rate": 9.620139961445899e-05, "loss": 21.7516, "step": 3626 }, { "epoch": 0.1511816931349256, "grad_norm": 484.0, "learning_rate": 9.619881849461936e-05, "loss": 18.2503, "step": 3627 }, { "epoch": 0.15122337543245384, "grad_norm": 2080.0, "learning_rate": 9.619623653280215e-05, "loss": 46.0002, "step": 3628 }, { "epoch": 0.15126505772998208, "grad_norm": 478.0, "learning_rate": 9.619365372905442e-05, "loss": 16.7538, "step": 3629 }, { "epoch": 0.15130674002751032, "grad_norm": 296.0, "learning_rate": 9.619107008342325e-05, "loss": 13.2526, "step": 3630 }, { "epoch": 0.15134842232503856, "grad_norm": 672.0, "learning_rate": 9.618848559595572e-05, "loss": 19.7502, "step": 3631 }, { "epoch": 0.1513901046225668, "grad_norm": 340.0, "learning_rate": 9.618590026669896e-05, "loss": 13.0628, "step": 3632 }, { "epoch": 0.15143178692009504, "grad_norm": 264.0, "learning_rate": 9.618331409570005e-05, "loss": 11.6882, "step": 3633 }, { "epoch": 0.15147346921762328, "grad_norm": 264.0, "learning_rate": 9.618072708300617e-05, "loss": 12.2503, "step": 3634 }, { "epoch": 0.15151515151515152, "grad_norm": 400.0, "learning_rate": 9.61781392286644e-05, "loss": 14.7502, "step": 3635 }, { "epoch": 0.15155683381267976, "grad_norm": 144.0, "learning_rate": 9.617555053272197e-05, "loss": 9.1884, "step": 3636 }, { "epoch": 0.151598516110208, "grad_norm": 496.0, "learning_rate": 9.617296099522602e-05, "loss": 19.2503, "step": 3637 }, { "epoch": 0.15164019840773624, "grad_norm": 213.0, "learning_rate": 9.617037061622375e-05, "loss": 12.0627, "step": 3638 }, { "epoch": 0.15168188070526448, "grad_norm": 380.0, "learning_rate": 9.616777939576241e-05, "loss": 16.7507, "step": 3639 }, { "epoch": 0.15172356300279272, "grad_norm": 1464.0, "learning_rate": 9.616518733388916e-05, "loss": 30.5043, "step": 3640 }, { "epoch": 0.15176524530032096, "grad_norm": 93.5, "learning_rate": 9.616259443065129e-05, "loss": 7.5637, "step": 3641 }, { "epoch": 0.1518069275978492, "grad_norm": 356.0, "learning_rate": 9.616000068609602e-05, "loss": 13.3128, "step": 3642 }, { "epoch": 0.15184860989537743, "grad_norm": 344.0, "learning_rate": 9.615740610027066e-05, "loss": 14.8129, "step": 3643 }, { "epoch": 0.15189029219290567, "grad_norm": 336.0, "learning_rate": 9.615481067322247e-05, "loss": 13.3128, "step": 3644 }, { "epoch": 0.1519319744904339, "grad_norm": 544.0, "learning_rate": 9.615221440499876e-05, "loss": 17.8753, "step": 3645 }, { "epoch": 0.15197365678796215, "grad_norm": 458.0, "learning_rate": 9.614961729564683e-05, "loss": 14.6255, "step": 3646 }, { "epoch": 0.1520153390854904, "grad_norm": 368.0, "learning_rate": 9.614701934521404e-05, "loss": 14.8133, "step": 3647 }, { "epoch": 0.15205702138301863, "grad_norm": 253.0, "learning_rate": 9.614442055374773e-05, "loss": 12.6878, "step": 3648 }, { "epoch": 0.15209870368054687, "grad_norm": 144.0, "learning_rate": 9.614182092129526e-05, "loss": 7.344, "step": 3649 }, { "epoch": 0.1521403859780751, "grad_norm": 158.0, "learning_rate": 9.6139220447904e-05, "loss": 9.7504, "step": 3650 }, { "epoch": 0.15218206827560335, "grad_norm": 450.0, "learning_rate": 9.613661913362135e-05, "loss": 16.3766, "step": 3651 }, { "epoch": 0.1522237505731316, "grad_norm": 700.0, "learning_rate": 9.613401697849473e-05, "loss": 20.5035, "step": 3652 }, { "epoch": 0.15226543287065983, "grad_norm": 212.0, "learning_rate": 9.613141398257155e-05, "loss": 10.6254, "step": 3653 }, { "epoch": 0.15230711516818807, "grad_norm": 268.0, "learning_rate": 9.612881014589924e-05, "loss": 8.8133, "step": 3654 }, { "epoch": 0.1523487974657163, "grad_norm": 218.0, "learning_rate": 9.612620546852529e-05, "loss": 11.2505, "step": 3655 }, { "epoch": 0.15239047976324455, "grad_norm": 310.0, "learning_rate": 9.612359995049715e-05, "loss": 13.4392, "step": 3656 }, { "epoch": 0.15243216206077279, "grad_norm": 255.0, "learning_rate": 9.612099359186229e-05, "loss": 10.2523, "step": 3657 }, { "epoch": 0.15247384435830103, "grad_norm": 61.75, "learning_rate": 9.611838639266823e-05, "loss": 8.1884, "step": 3658 }, { "epoch": 0.15251552665582926, "grad_norm": 1400.0, "learning_rate": 9.611577835296251e-05, "loss": 33.5023, "step": 3659 }, { "epoch": 0.1525572089533575, "grad_norm": 225.0, "learning_rate": 9.61131694727926e-05, "loss": 12.1256, "step": 3660 }, { "epoch": 0.15259889125088574, "grad_norm": 486.0, "learning_rate": 9.61105597522061e-05, "loss": 17.3751, "step": 3661 }, { "epoch": 0.15264057354841398, "grad_norm": 1440.0, "learning_rate": 9.610794919125056e-05, "loss": 33.5002, "step": 3662 }, { "epoch": 0.15268225584594222, "grad_norm": 450.0, "learning_rate": 9.610533778997357e-05, "loss": 16.8753, "step": 3663 }, { "epoch": 0.15272393814347046, "grad_norm": 560.0, "learning_rate": 9.610272554842268e-05, "loss": 17.0005, "step": 3664 }, { "epoch": 0.1527656204409987, "grad_norm": 211.0, "learning_rate": 9.610011246664553e-05, "loss": 11.5629, "step": 3665 }, { "epoch": 0.15280730273852694, "grad_norm": 201.0, "learning_rate": 9.609749854468973e-05, "loss": 12.5004, "step": 3666 }, { "epoch": 0.15284898503605518, "grad_norm": 1472.0, "learning_rate": 9.609488378260295e-05, "loss": 35.5047, "step": 3667 }, { "epoch": 0.15289066733358342, "grad_norm": 422.0, "learning_rate": 9.609226818043279e-05, "loss": 15.8755, "step": 3668 }, { "epoch": 0.15293234963111166, "grad_norm": 436.0, "learning_rate": 9.608965173822697e-05, "loss": 16.3754, "step": 3669 }, { "epoch": 0.1529740319286399, "grad_norm": 288.0, "learning_rate": 9.608703445603315e-05, "loss": 13.2502, "step": 3670 }, { "epoch": 0.15301571422616814, "grad_norm": 174.0, "learning_rate": 9.608441633389905e-05, "loss": 9.3752, "step": 3671 }, { "epoch": 0.15305739652369638, "grad_norm": 382.0, "learning_rate": 9.608179737187234e-05, "loss": 15.0628, "step": 3672 }, { "epoch": 0.15309907882122462, "grad_norm": 450.0, "learning_rate": 9.60791775700008e-05, "loss": 17.7506, "step": 3673 }, { "epoch": 0.15314076111875286, "grad_norm": 732.0, "learning_rate": 9.607655692833217e-05, "loss": 20.7513, "step": 3674 }, { "epoch": 0.1531824434162811, "grad_norm": 452.0, "learning_rate": 9.607393544691418e-05, "loss": 16.5006, "step": 3675 }, { "epoch": 0.15322412571380933, "grad_norm": 1440.0, "learning_rate": 9.607131312579463e-05, "loss": 27.6306, "step": 3676 }, { "epoch": 0.15326580801133757, "grad_norm": 217.0, "learning_rate": 9.606868996502132e-05, "loss": 11.8128, "step": 3677 }, { "epoch": 0.1533074903088658, "grad_norm": 468.0, "learning_rate": 9.606606596464203e-05, "loss": 15.7505, "step": 3678 }, { "epoch": 0.15334917260639405, "grad_norm": 121.0, "learning_rate": 9.606344112470461e-05, "loss": 9.4398, "step": 3679 }, { "epoch": 0.1533908549039223, "grad_norm": 346.0, "learning_rate": 9.606081544525689e-05, "loss": 14.0633, "step": 3680 }, { "epoch": 0.15343253720145053, "grad_norm": 452.0, "learning_rate": 9.60581889263467e-05, "loss": 16.8754, "step": 3681 }, { "epoch": 0.15347421949897877, "grad_norm": 364.0, "learning_rate": 9.605556156802196e-05, "loss": 13.4383, "step": 3682 }, { "epoch": 0.15351590179650704, "grad_norm": 318.0, "learning_rate": 9.60529333703305e-05, "loss": 14.626, "step": 3683 }, { "epoch": 0.15355758409403528, "grad_norm": 446.0, "learning_rate": 9.605030433332023e-05, "loss": 15.6879, "step": 3684 }, { "epoch": 0.15359926639156352, "grad_norm": 314.0, "learning_rate": 9.60476744570391e-05, "loss": 12.2505, "step": 3685 }, { "epoch": 0.15364094868909176, "grad_norm": 688.0, "learning_rate": 9.6045043741535e-05, "loss": 19.1303, "step": 3686 }, { "epoch": 0.15368263098662, "grad_norm": 174.0, "learning_rate": 9.60424121868559e-05, "loss": 10.6262, "step": 3687 }, { "epoch": 0.15372431328414823, "grad_norm": 178.0, "learning_rate": 9.603977979304975e-05, "loss": 11.0629, "step": 3688 }, { "epoch": 0.15376599558167647, "grad_norm": 294.0, "learning_rate": 9.603714656016452e-05, "loss": 11.0044, "step": 3689 }, { "epoch": 0.1538076778792047, "grad_norm": 406.0, "learning_rate": 9.603451248824819e-05, "loss": 17.0018, "step": 3690 }, { "epoch": 0.15384936017673295, "grad_norm": 580.0, "learning_rate": 9.603187757734882e-05, "loss": 18.1257, "step": 3691 }, { "epoch": 0.1538910424742612, "grad_norm": 402.0, "learning_rate": 9.602924182751436e-05, "loss": 13.5003, "step": 3692 }, { "epoch": 0.15393272477178943, "grad_norm": 1112.0, "learning_rate": 9.602660523879291e-05, "loss": 19.8801, "step": 3693 }, { "epoch": 0.15397440706931767, "grad_norm": 103.5, "learning_rate": 9.602396781123248e-05, "loss": 9.6889, "step": 3694 }, { "epoch": 0.1540160893668459, "grad_norm": 720.0, "learning_rate": 9.602132954488115e-05, "loss": 21.2503, "step": 3695 }, { "epoch": 0.15405777166437415, "grad_norm": 524.0, "learning_rate": 9.601869043978702e-05, "loss": 17.8763, "step": 3696 }, { "epoch": 0.1540994539619024, "grad_norm": 446.0, "learning_rate": 9.601605049599815e-05, "loss": 15.6895, "step": 3697 }, { "epoch": 0.15414113625943063, "grad_norm": 458.0, "learning_rate": 9.601340971356268e-05, "loss": 17.127, "step": 3698 }, { "epoch": 0.15418281855695887, "grad_norm": 288.0, "learning_rate": 9.601076809252873e-05, "loss": 13.0002, "step": 3699 }, { "epoch": 0.1542245008544871, "grad_norm": 197.0, "learning_rate": 9.600812563294447e-05, "loss": 7.7504, "step": 3700 }, { "epoch": 0.15426618315201535, "grad_norm": 328.0, "learning_rate": 9.600548233485802e-05, "loss": 13.5007, "step": 3701 }, { "epoch": 0.15430786544954359, "grad_norm": 107.0, "learning_rate": 9.600283819831756e-05, "loss": 8.5014, "step": 3702 }, { "epoch": 0.15434954774707182, "grad_norm": 924.0, "learning_rate": 9.60001932233713e-05, "loss": 25.5017, "step": 3703 }, { "epoch": 0.15439123004460006, "grad_norm": 152.0, "learning_rate": 9.599754741006744e-05, "loss": 9.3753, "step": 3704 }, { "epoch": 0.1544329123421283, "grad_norm": 528.0, "learning_rate": 9.599490075845418e-05, "loss": 17.7504, "step": 3705 }, { "epoch": 0.15447459463965654, "grad_norm": 128.0, "learning_rate": 9.599225326857979e-05, "loss": 9.4378, "step": 3706 }, { "epoch": 0.15451627693718478, "grad_norm": 171.0, "learning_rate": 9.59896049404925e-05, "loss": 9.6877, "step": 3707 }, { "epoch": 0.15455795923471302, "grad_norm": 117.0, "learning_rate": 9.598695577424057e-05, "loss": 8.3129, "step": 3708 }, { "epoch": 0.15459964153224126, "grad_norm": 462.0, "learning_rate": 9.598430576987228e-05, "loss": 14.2526, "step": 3709 }, { "epoch": 0.1546413238297695, "grad_norm": 390.0, "learning_rate": 9.598165492743593e-05, "loss": 14.0629, "step": 3710 }, { "epoch": 0.15468300612729774, "grad_norm": 600.0, "learning_rate": 9.597900324697986e-05, "loss": 19.3753, "step": 3711 }, { "epoch": 0.15472468842482598, "grad_norm": 60.25, "learning_rate": 9.597635072855237e-05, "loss": 6.7505, "step": 3712 }, { "epoch": 0.15476637072235422, "grad_norm": 69.0, "learning_rate": 9.59736973722018e-05, "loss": 8.8773, "step": 3713 }, { "epoch": 0.15480805301988246, "grad_norm": 254.0, "learning_rate": 9.597104317797651e-05, "loss": 13.1255, "step": 3714 }, { "epoch": 0.1548497353174107, "grad_norm": 422.0, "learning_rate": 9.596838814592488e-05, "loss": 15.002, "step": 3715 }, { "epoch": 0.15489141761493894, "grad_norm": 494.0, "learning_rate": 9.59657322760953e-05, "loss": 14.3774, "step": 3716 }, { "epoch": 0.15493309991246718, "grad_norm": 268.0, "learning_rate": 9.596307556853616e-05, "loss": 11.0074, "step": 3717 }, { "epoch": 0.15497478220999542, "grad_norm": 356.0, "learning_rate": 9.596041802329589e-05, "loss": 14.0646, "step": 3718 }, { "epoch": 0.15501646450752365, "grad_norm": 1656.0, "learning_rate": 9.595775964042294e-05, "loss": 41.2515, "step": 3719 }, { "epoch": 0.1550581468050519, "grad_norm": 464.0, "learning_rate": 9.595510041996572e-05, "loss": 16.627, "step": 3720 }, { "epoch": 0.15509982910258013, "grad_norm": 604.0, "learning_rate": 9.595244036197272e-05, "loss": 18.752, "step": 3721 }, { "epoch": 0.15514151140010837, "grad_norm": 904.0, "learning_rate": 9.594977946649242e-05, "loss": 21.5101, "step": 3722 }, { "epoch": 0.1551831936976366, "grad_norm": 540.0, "learning_rate": 9.59471177335733e-05, "loss": 17.2588, "step": 3723 }, { "epoch": 0.15522487599516485, "grad_norm": 816.0, "learning_rate": 9.594445516326389e-05, "loss": 21.7506, "step": 3724 }, { "epoch": 0.1552665582926931, "grad_norm": 688.0, "learning_rate": 9.594179175561271e-05, "loss": 21.0006, "step": 3725 }, { "epoch": 0.15530824059022133, "grad_norm": 466.0, "learning_rate": 9.593912751066829e-05, "loss": 15.5711, "step": 3726 }, { "epoch": 0.15534992288774957, "grad_norm": 434.0, "learning_rate": 9.593646242847919e-05, "loss": 14.5666, "step": 3727 }, { "epoch": 0.1553916051852778, "grad_norm": 688.0, "learning_rate": 9.593379650909398e-05, "loss": 21.0002, "step": 3728 }, { "epoch": 0.15543328748280605, "grad_norm": 298.0, "learning_rate": 9.593112975256126e-05, "loss": 11.9384, "step": 3729 }, { "epoch": 0.1554749697803343, "grad_norm": 212.0, "learning_rate": 9.592846215892964e-05, "loss": 11.3758, "step": 3730 }, { "epoch": 0.15551665207786253, "grad_norm": 188.0, "learning_rate": 9.592579372824768e-05, "loss": 11.5015, "step": 3731 }, { "epoch": 0.15555833437539077, "grad_norm": 166.0, "learning_rate": 9.592312446056408e-05, "loss": 10.6878, "step": 3732 }, { "epoch": 0.155600016672919, "grad_norm": 440.0, "learning_rate": 9.592045435592745e-05, "loss": 15.8132, "step": 3733 }, { "epoch": 0.15564169897044725, "grad_norm": 175.0, "learning_rate": 9.591778341438646e-05, "loss": 10.7517, "step": 3734 }, { "epoch": 0.15568338126797548, "grad_norm": 402.0, "learning_rate": 9.59151116359898e-05, "loss": 15.6254, "step": 3735 }, { "epoch": 0.15572506356550372, "grad_norm": 128.0, "learning_rate": 9.591243902078615e-05, "loss": 9.7508, "step": 3736 }, { "epoch": 0.15576674586303196, "grad_norm": 294.0, "learning_rate": 9.590976556882423e-05, "loss": 11.9378, "step": 3737 }, { "epoch": 0.1558084281605602, "grad_norm": 454.0, "learning_rate": 9.590709128015276e-05, "loss": 15.7506, "step": 3738 }, { "epoch": 0.15585011045808844, "grad_norm": 158.0, "learning_rate": 9.590441615482047e-05, "loss": 10.8758, "step": 3739 }, { "epoch": 0.15589179275561668, "grad_norm": 328.0, "learning_rate": 9.590174019287611e-05, "loss": 14.1252, "step": 3740 }, { "epoch": 0.15593347505314492, "grad_norm": 272.0, "learning_rate": 9.589906339436847e-05, "loss": 12.0003, "step": 3741 }, { "epoch": 0.15597515735067316, "grad_norm": 404.0, "learning_rate": 9.589638575934632e-05, "loss": 15.3755, "step": 3742 }, { "epoch": 0.1560168396482014, "grad_norm": 1824.0, "learning_rate": 9.589370728785847e-05, "loss": 38.7508, "step": 3743 }, { "epoch": 0.15605852194572964, "grad_norm": 540.0, "learning_rate": 9.589102797995373e-05, "loss": 18.7514, "step": 3744 }, { "epoch": 0.15610020424325788, "grad_norm": 608.0, "learning_rate": 9.588834783568094e-05, "loss": 19.0011, "step": 3745 }, { "epoch": 0.15614188654078612, "grad_norm": 193.0, "learning_rate": 9.588566685508892e-05, "loss": 11.3762, "step": 3746 }, { "epoch": 0.15618356883831436, "grad_norm": 282.0, "learning_rate": 9.588298503822655e-05, "loss": 12.3754, "step": 3747 }, { "epoch": 0.1562252511358426, "grad_norm": 880.0, "learning_rate": 9.588030238514272e-05, "loss": 20.0018, "step": 3748 }, { "epoch": 0.15626693343337084, "grad_norm": 724.0, "learning_rate": 9.587761889588628e-05, "loss": 22.1259, "step": 3749 }, { "epoch": 0.15630861573089908, "grad_norm": 588.0, "learning_rate": 9.587493457050619e-05, "loss": 18.0005, "step": 3750 }, { "epoch": 0.15635029802842731, "grad_norm": 506.0, "learning_rate": 9.587224940905133e-05, "loss": 16.6264, "step": 3751 }, { "epoch": 0.15639198032595555, "grad_norm": 382.0, "learning_rate": 9.586956341157067e-05, "loss": 14.1258, "step": 3752 }, { "epoch": 0.1564336626234838, "grad_norm": 164.0, "learning_rate": 9.586687657811314e-05, "loss": 9.9381, "step": 3753 }, { "epoch": 0.15647534492101203, "grad_norm": 211.0, "learning_rate": 9.586418890872769e-05, "loss": 10.8128, "step": 3754 }, { "epoch": 0.15651702721854027, "grad_norm": 56.0, "learning_rate": 9.586150040346333e-05, "loss": 8.1884, "step": 3755 }, { "epoch": 0.15655870951606854, "grad_norm": 532.0, "learning_rate": 9.585881106236907e-05, "loss": 18.3753, "step": 3756 }, { "epoch": 0.15660039181359678, "grad_norm": 174.0, "learning_rate": 9.58561208854939e-05, "loss": 9.8756, "step": 3757 }, { "epoch": 0.15664207411112502, "grad_norm": 163.0, "learning_rate": 9.585342987288686e-05, "loss": 10.0629, "step": 3758 }, { "epoch": 0.15668375640865326, "grad_norm": 92.5, "learning_rate": 9.585073802459699e-05, "loss": 8.188, "step": 3759 }, { "epoch": 0.1567254387061815, "grad_norm": 209.0, "learning_rate": 9.584804534067335e-05, "loss": 10.5003, "step": 3760 }, { "epoch": 0.15676712100370974, "grad_norm": 384.0, "learning_rate": 9.5845351821165e-05, "loss": 14.5002, "step": 3761 }, { "epoch": 0.15680880330123798, "grad_norm": 280.0, "learning_rate": 9.584265746612106e-05, "loss": 13.3753, "step": 3762 }, { "epoch": 0.15685048559876621, "grad_norm": 490.0, "learning_rate": 9.58399622755906e-05, "loss": 17.8752, "step": 3763 }, { "epoch": 0.15689216789629445, "grad_norm": 604.0, "learning_rate": 9.583726624962278e-05, "loss": 18.8758, "step": 3764 }, { "epoch": 0.1569338501938227, "grad_norm": 290.0, "learning_rate": 9.583456938826671e-05, "loss": 13.0002, "step": 3765 }, { "epoch": 0.15697553249135093, "grad_norm": 178.0, "learning_rate": 9.583187169157153e-05, "loss": 11.626, "step": 3766 }, { "epoch": 0.15701721478887917, "grad_norm": 221.0, "learning_rate": 9.582917315958642e-05, "loss": 10.8753, "step": 3767 }, { "epoch": 0.1570588970864074, "grad_norm": 612.0, "learning_rate": 9.582647379236058e-05, "loss": 18.1262, "step": 3768 }, { "epoch": 0.15710057938393565, "grad_norm": 328.0, "learning_rate": 9.582377358994317e-05, "loss": 13.5007, "step": 3769 }, { "epoch": 0.1571422616814639, "grad_norm": 424.0, "learning_rate": 9.582107255238342e-05, "loss": 14.628, "step": 3770 }, { "epoch": 0.15718394397899213, "grad_norm": 402.0, "learning_rate": 9.581837067973056e-05, "loss": 13.3753, "step": 3771 }, { "epoch": 0.15722562627652037, "grad_norm": 154.0, "learning_rate": 9.581566797203384e-05, "loss": 8.2503, "step": 3772 }, { "epoch": 0.1572673085740486, "grad_norm": 228.0, "learning_rate": 9.581296442934248e-05, "loss": 10.688, "step": 3773 }, { "epoch": 0.15730899087157685, "grad_norm": 876.0, "learning_rate": 9.581026005170577e-05, "loss": 22.5002, "step": 3774 }, { "epoch": 0.1573506731691051, "grad_norm": 450.0, "learning_rate": 9.580755483917303e-05, "loss": 15.938, "step": 3775 }, { "epoch": 0.15739235546663333, "grad_norm": 368.0, "learning_rate": 9.580484879179352e-05, "loss": 13.7503, "step": 3776 }, { "epoch": 0.15743403776416157, "grad_norm": 438.0, "learning_rate": 9.580214190961659e-05, "loss": 13.3797, "step": 3777 }, { "epoch": 0.1574757200616898, "grad_norm": 322.0, "learning_rate": 9.579943419269155e-05, "loss": 13.9379, "step": 3778 }, { "epoch": 0.15751740235921805, "grad_norm": 652.0, "learning_rate": 9.579672564106776e-05, "loss": 19.8752, "step": 3779 }, { "epoch": 0.15755908465674628, "grad_norm": 124.5, "learning_rate": 9.579401625479456e-05, "loss": 10.5007, "step": 3780 }, { "epoch": 0.15760076695427452, "grad_norm": 225.0, "learning_rate": 9.579130603392137e-05, "loss": 12.1253, "step": 3781 }, { "epoch": 0.15764244925180276, "grad_norm": 314.0, "learning_rate": 9.578859497849755e-05, "loss": 12.0649, "step": 3782 }, { "epoch": 0.157684131549331, "grad_norm": 248.0, "learning_rate": 9.578588308857253e-05, "loss": 11.7503, "step": 3783 }, { "epoch": 0.15772581384685924, "grad_norm": 776.0, "learning_rate": 9.578317036419573e-05, "loss": 22.5006, "step": 3784 }, { "epoch": 0.15776749614438748, "grad_norm": 167.0, "learning_rate": 9.578045680541657e-05, "loss": 9.7504, "step": 3785 }, { "epoch": 0.15780917844191572, "grad_norm": 426.0, "learning_rate": 9.577774241228454e-05, "loss": 17.2526, "step": 3786 }, { "epoch": 0.15785086073944396, "grad_norm": 162.0, "learning_rate": 9.577502718484908e-05, "loss": 10.3762, "step": 3787 }, { "epoch": 0.1578925430369722, "grad_norm": 125.0, "learning_rate": 9.577231112315967e-05, "loss": 8.2505, "step": 3788 }, { "epoch": 0.15793422533450044, "grad_norm": 233.0, "learning_rate": 9.576959422726586e-05, "loss": 11.0003, "step": 3789 }, { "epoch": 0.15797590763202868, "grad_norm": 382.0, "learning_rate": 9.576687649721711e-05, "loss": 14.2508, "step": 3790 }, { "epoch": 0.15801758992955692, "grad_norm": 86.5, "learning_rate": 9.576415793306298e-05, "loss": 7.5947, "step": 3791 }, { "epoch": 0.15805927222708516, "grad_norm": 1592.0, "learning_rate": 9.5761438534853e-05, "loss": 33.5067, "step": 3792 }, { "epoch": 0.1581009545246134, "grad_norm": 1408.0, "learning_rate": 9.575871830263675e-05, "loss": 27.5033, "step": 3793 }, { "epoch": 0.15814263682214164, "grad_norm": 520.0, "learning_rate": 9.57559972364638e-05, "loss": 17.2502, "step": 3794 }, { "epoch": 0.15818431911966988, "grad_norm": 896.0, "learning_rate": 9.575327533638371e-05, "loss": 22.3804, "step": 3795 }, { "epoch": 0.15822600141719811, "grad_norm": 442.0, "learning_rate": 9.575055260244615e-05, "loss": 16.2503, "step": 3796 }, { "epoch": 0.15826768371472635, "grad_norm": 640.0, "learning_rate": 9.57478290347007e-05, "loss": 20.8753, "step": 3797 }, { "epoch": 0.1583093660122546, "grad_norm": 508.0, "learning_rate": 9.574510463319699e-05, "loss": 16.1298, "step": 3798 }, { "epoch": 0.15835104830978283, "grad_norm": 342.0, "learning_rate": 9.57423793979847e-05, "loss": 14.3754, "step": 3799 }, { "epoch": 0.15839273060731107, "grad_norm": 864.0, "learning_rate": 9.573965332911349e-05, "loss": 21.0053, "step": 3800 }, { "epoch": 0.1584344129048393, "grad_norm": 382.0, "learning_rate": 9.573692642663303e-05, "loss": 15.1878, "step": 3801 }, { "epoch": 0.15847609520236755, "grad_norm": 780.0, "learning_rate": 9.573419869059302e-05, "loss": 20.0007, "step": 3802 }, { "epoch": 0.1585177774998958, "grad_norm": 652.0, "learning_rate": 9.573147012104319e-05, "loss": 17.2541, "step": 3803 }, { "epoch": 0.15855945979742403, "grad_norm": 147.0, "learning_rate": 9.572874071803324e-05, "loss": 8.7503, "step": 3804 }, { "epoch": 0.15860114209495227, "grad_norm": 744.0, "learning_rate": 9.572601048161294e-05, "loss": 21.5007, "step": 3805 }, { "epoch": 0.1586428243924805, "grad_norm": 175.0, "learning_rate": 9.572327941183206e-05, "loss": 11.1259, "step": 3806 }, { "epoch": 0.15868450669000875, "grad_norm": 398.0, "learning_rate": 9.572054750874033e-05, "loss": 14.5003, "step": 3807 }, { "epoch": 0.158726188987537, "grad_norm": 488.0, "learning_rate": 9.571781477238757e-05, "loss": 16.1256, "step": 3808 }, { "epoch": 0.15876787128506523, "grad_norm": 402.0, "learning_rate": 9.571508120282357e-05, "loss": 13.131, "step": 3809 }, { "epoch": 0.15880955358259347, "grad_norm": 466.0, "learning_rate": 9.571234680009817e-05, "loss": 16.3752, "step": 3810 }, { "epoch": 0.1588512358801217, "grad_norm": 596.0, "learning_rate": 9.570961156426118e-05, "loss": 18.6262, "step": 3811 }, { "epoch": 0.15889291817764994, "grad_norm": 153.0, "learning_rate": 9.570687549536245e-05, "loss": 9.7511, "step": 3812 }, { "epoch": 0.15893460047517818, "grad_norm": 470.0, "learning_rate": 9.570413859345189e-05, "loss": 17.1252, "step": 3813 }, { "epoch": 0.15897628277270642, "grad_norm": 510.0, "learning_rate": 9.570140085857933e-05, "loss": 16.7503, "step": 3814 }, { "epoch": 0.15901796507023466, "grad_norm": 156.0, "learning_rate": 9.569866229079468e-05, "loss": 10.5003, "step": 3815 }, { "epoch": 0.1590596473677629, "grad_norm": 580.0, "learning_rate": 9.569592289014786e-05, "loss": 18.3801, "step": 3816 }, { "epoch": 0.15910132966529114, "grad_norm": 960.0, "learning_rate": 9.569318265668879e-05, "loss": 25.8755, "step": 3817 }, { "epoch": 0.15914301196281938, "grad_norm": 294.0, "learning_rate": 9.56904415904674e-05, "loss": 13.814, "step": 3818 }, { "epoch": 0.15918469426034762, "grad_norm": 584.0, "learning_rate": 9.568769969153366e-05, "loss": 17.5024, "step": 3819 }, { "epoch": 0.15922637655787586, "grad_norm": 544.0, "learning_rate": 9.568495695993754e-05, "loss": 17.2503, "step": 3820 }, { "epoch": 0.1592680588554041, "grad_norm": 240.0, "learning_rate": 9.568221339572901e-05, "loss": 12.188, "step": 3821 }, { "epoch": 0.15930974115293234, "grad_norm": 229.0, "learning_rate": 9.56794689989581e-05, "loss": 11.0628, "step": 3822 }, { "epoch": 0.15935142345046058, "grad_norm": 139.0, "learning_rate": 9.56767237696748e-05, "loss": 9.3754, "step": 3823 }, { "epoch": 0.15939310574798882, "grad_norm": 326.0, "learning_rate": 9.567397770792916e-05, "loss": 13.0628, "step": 3824 }, { "epoch": 0.15943478804551706, "grad_norm": 320.0, "learning_rate": 9.567123081377123e-05, "loss": 13.6878, "step": 3825 }, { "epoch": 0.1594764703430453, "grad_norm": 106.0, "learning_rate": 9.566848308725106e-05, "loss": 10.1263, "step": 3826 }, { "epoch": 0.15951815264057354, "grad_norm": 167.0, "learning_rate": 9.566573452841872e-05, "loss": 11.0007, "step": 3827 }, { "epoch": 0.15955983493810177, "grad_norm": 207.0, "learning_rate": 9.566298513732433e-05, "loss": 11.3129, "step": 3828 }, { "epoch": 0.15960151723563004, "grad_norm": 227.0, "learning_rate": 9.566023491401798e-05, "loss": 11.7502, "step": 3829 }, { "epoch": 0.15964319953315828, "grad_norm": 129.0, "learning_rate": 9.565748385854981e-05, "loss": 8.8148, "step": 3830 }, { "epoch": 0.15968488183068652, "grad_norm": 362.0, "learning_rate": 9.56547319709699e-05, "loss": 15.5648, "step": 3831 }, { "epoch": 0.15972656412821476, "grad_norm": 194.0, "learning_rate": 9.565197925132849e-05, "loss": 11.5639, "step": 3832 }, { "epoch": 0.159768246425743, "grad_norm": 338.0, "learning_rate": 9.564922569967568e-05, "loss": 14.0006, "step": 3833 }, { "epoch": 0.15980992872327124, "grad_norm": 191.0, "learning_rate": 9.564647131606168e-05, "loss": 9.3754, "step": 3834 }, { "epoch": 0.15985161102079948, "grad_norm": 648.0, "learning_rate": 9.56437161005367e-05, "loss": 18.3795, "step": 3835 }, { "epoch": 0.15989329331832772, "grad_norm": 276.0, "learning_rate": 9.564096005315094e-05, "loss": 13.938, "step": 3836 }, { "epoch": 0.15993497561585596, "grad_norm": 231.0, "learning_rate": 9.563820317395462e-05, "loss": 11.4378, "step": 3837 }, { "epoch": 0.1599766579133842, "grad_norm": 446.0, "learning_rate": 9.5635445462998e-05, "loss": 14.6879, "step": 3838 }, { "epoch": 0.16001834021091244, "grad_norm": 556.0, "learning_rate": 9.563268692033136e-05, "loss": 19.1252, "step": 3839 }, { "epoch": 0.16006002250844067, "grad_norm": 442.0, "learning_rate": 9.562992754600493e-05, "loss": 15.8753, "step": 3840 }, { "epoch": 0.16010170480596891, "grad_norm": 1048.0, "learning_rate": 9.562716734006902e-05, "loss": 21.2508, "step": 3841 }, { "epoch": 0.16014338710349715, "grad_norm": 173.0, "learning_rate": 9.562440630257392e-05, "loss": 10.6879, "step": 3842 }, { "epoch": 0.1601850694010254, "grad_norm": 792.0, "learning_rate": 9.562164443356998e-05, "loss": 21.8753, "step": 3843 }, { "epoch": 0.16022675169855363, "grad_norm": 536.0, "learning_rate": 9.561888173310754e-05, "loss": 17.7503, "step": 3844 }, { "epoch": 0.16026843399608187, "grad_norm": 1296.0, "learning_rate": 9.56161182012369e-05, "loss": 27.7554, "step": 3845 }, { "epoch": 0.1603101162936101, "grad_norm": 292.0, "learning_rate": 9.561335383800846e-05, "loss": 12.0006, "step": 3846 }, { "epoch": 0.16035179859113835, "grad_norm": 438.0, "learning_rate": 9.56105886434726e-05, "loss": 15.8755, "step": 3847 }, { "epoch": 0.1603934808886666, "grad_norm": 362.0, "learning_rate": 9.560782261767974e-05, "loss": 11.1883, "step": 3848 }, { "epoch": 0.16043516318619483, "grad_norm": 99.5, "learning_rate": 9.560505576068022e-05, "loss": 8.3774, "step": 3849 }, { "epoch": 0.16047684548372307, "grad_norm": 276.0, "learning_rate": 9.560228807252453e-05, "loss": 12.313, "step": 3850 }, { "epoch": 0.1605185277812513, "grad_norm": 176.0, "learning_rate": 9.55995195532631e-05, "loss": 9.8128, "step": 3851 }, { "epoch": 0.16056021007877955, "grad_norm": 468.0, "learning_rate": 9.559675020294637e-05, "loss": 16.7509, "step": 3852 }, { "epoch": 0.1606018923763078, "grad_norm": 804.0, "learning_rate": 9.559398002162482e-05, "loss": 23.7505, "step": 3853 }, { "epoch": 0.16064357467383603, "grad_norm": 394.0, "learning_rate": 9.559120900934893e-05, "loss": 15.2516, "step": 3854 }, { "epoch": 0.16068525697136427, "grad_norm": 55.0, "learning_rate": 9.558843716616923e-05, "loss": 7.4067, "step": 3855 }, { "epoch": 0.1607269392688925, "grad_norm": 808.0, "learning_rate": 9.55856644921362e-05, "loss": 23.3759, "step": 3856 }, { "epoch": 0.16076862156642074, "grad_norm": 776.0, "learning_rate": 9.558289098730037e-05, "loss": 21.1255, "step": 3857 }, { "epoch": 0.16081030386394898, "grad_norm": 230.0, "learning_rate": 9.558011665171234e-05, "loss": 11.564, "step": 3858 }, { "epoch": 0.16085198616147722, "grad_norm": 239.0, "learning_rate": 9.557734148542262e-05, "loss": 11.0004, "step": 3859 }, { "epoch": 0.16089366845900546, "grad_norm": 392.0, "learning_rate": 9.557456548848181e-05, "loss": 14.7505, "step": 3860 }, { "epoch": 0.1609353507565337, "grad_norm": 121.0, "learning_rate": 9.557178866094049e-05, "loss": 9.4381, "step": 3861 }, { "epoch": 0.16097703305406194, "grad_norm": 420.0, "learning_rate": 9.556901100284929e-05, "loss": 15.7507, "step": 3862 }, { "epoch": 0.16101871535159018, "grad_norm": 1304.0, "learning_rate": 9.55662325142588e-05, "loss": 30.88, "step": 3863 }, { "epoch": 0.16106039764911842, "grad_norm": 240.0, "learning_rate": 9.55634531952197e-05, "loss": 12.0629, "step": 3864 }, { "epoch": 0.16110207994664666, "grad_norm": 160.0, "learning_rate": 9.55606730457826e-05, "loss": 10.6256, "step": 3865 }, { "epoch": 0.1611437622441749, "grad_norm": 474.0, "learning_rate": 9.555789206599821e-05, "loss": 16.2516, "step": 3866 }, { "epoch": 0.16118544454170314, "grad_norm": 446.0, "learning_rate": 9.555511025591716e-05, "loss": 15.0007, "step": 3867 }, { "epoch": 0.16122712683923138, "grad_norm": 536.0, "learning_rate": 9.555232761559022e-05, "loss": 18.8762, "step": 3868 }, { "epoch": 0.16126880913675962, "grad_norm": 306.0, "learning_rate": 9.554954414506805e-05, "loss": 14.5034, "step": 3869 }, { "epoch": 0.16131049143428786, "grad_norm": 89.5, "learning_rate": 9.554675984440138e-05, "loss": 8.188, "step": 3870 }, { "epoch": 0.1613521737318161, "grad_norm": 372.0, "learning_rate": 9.5543974713641e-05, "loss": 13.8754, "step": 3871 }, { "epoch": 0.16139385602934433, "grad_norm": 246.0, "learning_rate": 9.554118875283762e-05, "loss": 10.6254, "step": 3872 }, { "epoch": 0.16143553832687257, "grad_norm": 408.0, "learning_rate": 9.553840196204203e-05, "loss": 14.8762, "step": 3873 }, { "epoch": 0.1614772206244008, "grad_norm": 756.0, "learning_rate": 9.553561434130501e-05, "loss": 24.0003, "step": 3874 }, { "epoch": 0.16151890292192905, "grad_norm": 604.0, "learning_rate": 9.55328258906774e-05, "loss": 21.3753, "step": 3875 }, { "epoch": 0.1615605852194573, "grad_norm": 450.0, "learning_rate": 9.553003661020998e-05, "loss": 15.1877, "step": 3876 }, { "epoch": 0.16160226751698553, "grad_norm": 540.0, "learning_rate": 9.552724649995361e-05, "loss": 17.3752, "step": 3877 }, { "epoch": 0.16164394981451377, "grad_norm": 1600.0, "learning_rate": 9.552445555995913e-05, "loss": 38.2503, "step": 3878 }, { "epoch": 0.161685632112042, "grad_norm": 364.0, "learning_rate": 9.552166379027739e-05, "loss": 11.8767, "step": 3879 }, { "epoch": 0.16172731440957025, "grad_norm": 224.0, "learning_rate": 9.55188711909593e-05, "loss": 11.6255, "step": 3880 }, { "epoch": 0.1617689967070985, "grad_norm": 125.5, "learning_rate": 9.551607776205576e-05, "loss": 9.6878, "step": 3881 }, { "epoch": 0.16181067900462673, "grad_norm": 180.0, "learning_rate": 9.551328350361763e-05, "loss": 12.0628, "step": 3882 }, { "epoch": 0.16185236130215497, "grad_norm": 544.0, "learning_rate": 9.551048841569588e-05, "loss": 17.1254, "step": 3883 }, { "epoch": 0.1618940435996832, "grad_norm": 366.0, "learning_rate": 9.550769249834144e-05, "loss": 13.063, "step": 3884 }, { "epoch": 0.16193572589721145, "grad_norm": 292.0, "learning_rate": 9.550489575160527e-05, "loss": 12.6251, "step": 3885 }, { "epoch": 0.16197740819473969, "grad_norm": 592.0, "learning_rate": 9.550209817553832e-05, "loss": 19.5002, "step": 3886 }, { "epoch": 0.16201909049226793, "grad_norm": 482.0, "learning_rate": 9.549929977019161e-05, "loss": 17.3755, "step": 3887 }, { "epoch": 0.16206077278979616, "grad_norm": 442.0, "learning_rate": 9.549650053561612e-05, "loss": 16.1253, "step": 3888 }, { "epoch": 0.1621024550873244, "grad_norm": 652.0, "learning_rate": 9.549370047186286e-05, "loss": 21.3758, "step": 3889 }, { "epoch": 0.16214413738485264, "grad_norm": 652.0, "learning_rate": 9.549089957898287e-05, "loss": 19.6252, "step": 3890 }, { "epoch": 0.16218581968238088, "grad_norm": 392.0, "learning_rate": 9.548809785702719e-05, "loss": 14.3127, "step": 3891 }, { "epoch": 0.16222750197990912, "grad_norm": 126.0, "learning_rate": 9.548529530604691e-05, "loss": 7.9705, "step": 3892 }, { "epoch": 0.16226918427743736, "grad_norm": 137.0, "learning_rate": 9.548249192609306e-05, "loss": 9.6877, "step": 3893 }, { "epoch": 0.1623108665749656, "grad_norm": 442.0, "learning_rate": 9.547968771721678e-05, "loss": 14.8752, "step": 3894 }, { "epoch": 0.16235254887249384, "grad_norm": 246.0, "learning_rate": 9.547688267946915e-05, "loss": 13.0628, "step": 3895 }, { "epoch": 0.16239423117002208, "grad_norm": 568.0, "learning_rate": 9.547407681290128e-05, "loss": 17.0048, "step": 3896 }, { "epoch": 0.16243591346755032, "grad_norm": 868.0, "learning_rate": 9.547127011756434e-05, "loss": 25.0003, "step": 3897 }, { "epoch": 0.16247759576507856, "grad_norm": 520.0, "learning_rate": 9.546846259350945e-05, "loss": 18.6263, "step": 3898 }, { "epoch": 0.1625192780626068, "grad_norm": 364.0, "learning_rate": 9.546565424078781e-05, "loss": 14.3757, "step": 3899 }, { "epoch": 0.16256096036013504, "grad_norm": 976.0, "learning_rate": 9.546284505945057e-05, "loss": 23.1255, "step": 3900 }, { "epoch": 0.16260264265766328, "grad_norm": 161.0, "learning_rate": 9.546003504954895e-05, "loss": 9.3755, "step": 3901 }, { "epoch": 0.16264432495519154, "grad_norm": 716.0, "learning_rate": 9.545722421113416e-05, "loss": 20.0004, "step": 3902 }, { "epoch": 0.16268600725271978, "grad_norm": 191.0, "learning_rate": 9.545441254425742e-05, "loss": 9.2505, "step": 3903 }, { "epoch": 0.16272768955024802, "grad_norm": 316.0, "learning_rate": 9.545160004896998e-05, "loss": 14.5632, "step": 3904 }, { "epoch": 0.16276937184777626, "grad_norm": 462.0, "learning_rate": 9.54487867253231e-05, "loss": 14.8754, "step": 3905 }, { "epoch": 0.1628110541453045, "grad_norm": 442.0, "learning_rate": 9.544597257336802e-05, "loss": 16.5006, "step": 3906 }, { "epoch": 0.16285273644283274, "grad_norm": 384.0, "learning_rate": 9.544315759315607e-05, "loss": 15.6282, "step": 3907 }, { "epoch": 0.16289441874036098, "grad_norm": 768.0, "learning_rate": 9.544034178473855e-05, "loss": 21.6252, "step": 3908 }, { "epoch": 0.16293610103788922, "grad_norm": 48.25, "learning_rate": 9.543752514816675e-05, "loss": 7.9378, "step": 3909 }, { "epoch": 0.16297778333541746, "grad_norm": 512.0, "learning_rate": 9.543470768349203e-05, "loss": 18.1258, "step": 3910 }, { "epoch": 0.1630194656329457, "grad_norm": 276.0, "learning_rate": 9.543188939076572e-05, "loss": 12.063, "step": 3911 }, { "epoch": 0.16306114793047394, "grad_norm": 350.0, "learning_rate": 9.542907027003923e-05, "loss": 15.5002, "step": 3912 }, { "epoch": 0.16310283022800218, "grad_norm": 232.0, "learning_rate": 9.542625032136385e-05, "loss": 13.0019, "step": 3913 }, { "epoch": 0.16314451252553042, "grad_norm": 218.0, "learning_rate": 9.542342954479106e-05, "loss": 11.1254, "step": 3914 }, { "epoch": 0.16318619482305866, "grad_norm": 424.0, "learning_rate": 9.542060794037222e-05, "loss": 15.7507, "step": 3915 }, { "epoch": 0.1632278771205869, "grad_norm": 928.0, "learning_rate": 9.541778550815876e-05, "loss": 24.0002, "step": 3916 }, { "epoch": 0.16326955941811513, "grad_norm": 464.0, "learning_rate": 9.541496224820214e-05, "loss": 16.6252, "step": 3917 }, { "epoch": 0.16331124171564337, "grad_norm": 322.0, "learning_rate": 9.541213816055381e-05, "loss": 13.188, "step": 3918 }, { "epoch": 0.1633529240131716, "grad_norm": 576.0, "learning_rate": 9.540931324526521e-05, "loss": 19.5008, "step": 3919 }, { "epoch": 0.16339460631069985, "grad_norm": 600.0, "learning_rate": 9.540648750238785e-05, "loss": 21.5002, "step": 3920 }, { "epoch": 0.1634362886082281, "grad_norm": 434.0, "learning_rate": 9.540366093197323e-05, "loss": 15.7516, "step": 3921 }, { "epoch": 0.16347797090575633, "grad_norm": 356.0, "learning_rate": 9.540083353407287e-05, "loss": 15.0628, "step": 3922 }, { "epoch": 0.16351965320328457, "grad_norm": 378.0, "learning_rate": 9.539800530873828e-05, "loss": 15.6877, "step": 3923 }, { "epoch": 0.1635613355008128, "grad_norm": 388.0, "learning_rate": 9.5395176256021e-05, "loss": 14.3127, "step": 3924 }, { "epoch": 0.16360301779834105, "grad_norm": 892.0, "learning_rate": 9.53923463759726e-05, "loss": 23.3759, "step": 3925 }, { "epoch": 0.1636447000958693, "grad_norm": 1448.0, "learning_rate": 9.538951566864468e-05, "loss": 34.0003, "step": 3926 }, { "epoch": 0.16368638239339753, "grad_norm": 888.0, "learning_rate": 9.53866841340888e-05, "loss": 23.6262, "step": 3927 }, { "epoch": 0.16372806469092577, "grad_norm": 872.0, "learning_rate": 9.538385177235658e-05, "loss": 24.2515, "step": 3928 }, { "epoch": 0.163769746988454, "grad_norm": 362.0, "learning_rate": 9.538101858349962e-05, "loss": 13.4386, "step": 3929 }, { "epoch": 0.16381142928598225, "grad_norm": 322.0, "learning_rate": 9.537818456756957e-05, "loss": 14.0628, "step": 3930 }, { "epoch": 0.16385311158351049, "grad_norm": 488.0, "learning_rate": 9.537534972461808e-05, "loss": 13.1288, "step": 3931 }, { "epoch": 0.16389479388103873, "grad_norm": 147.0, "learning_rate": 9.537251405469681e-05, "loss": 11.5636, "step": 3932 }, { "epoch": 0.16393647617856696, "grad_norm": 209.0, "learning_rate": 9.536967755785744e-05, "loss": 12.0008, "step": 3933 }, { "epoch": 0.1639781584760952, "grad_norm": 143.0, "learning_rate": 9.536684023415167e-05, "loss": 10.8132, "step": 3934 }, { "epoch": 0.16401984077362344, "grad_norm": 456.0, "learning_rate": 9.536400208363122e-05, "loss": 19.0004, "step": 3935 }, { "epoch": 0.16406152307115168, "grad_norm": 80.5, "learning_rate": 9.536116310634779e-05, "loss": 7.6255, "step": 3936 }, { "epoch": 0.16410320536867992, "grad_norm": 494.0, "learning_rate": 9.535832330235314e-05, "loss": 16.7512, "step": 3937 }, { "epoch": 0.16414488766620816, "grad_norm": 416.0, "learning_rate": 9.535548267169903e-05, "loss": 14.8132, "step": 3938 }, { "epoch": 0.1641865699637364, "grad_norm": 864.0, "learning_rate": 9.535264121443722e-05, "loss": 21.2553, "step": 3939 }, { "epoch": 0.16422825226126464, "grad_norm": 476.0, "learning_rate": 9.534979893061951e-05, "loss": 16.5003, "step": 3940 }, { "epoch": 0.16426993455879288, "grad_norm": 460.0, "learning_rate": 9.534695582029767e-05, "loss": 16.6254, "step": 3941 }, { "epoch": 0.16431161685632112, "grad_norm": 452.0, "learning_rate": 9.534411188352352e-05, "loss": 15.6877, "step": 3942 }, { "epoch": 0.16435329915384936, "grad_norm": 253.0, "learning_rate": 9.534126712034895e-05, "loss": 11.8752, "step": 3943 }, { "epoch": 0.1643949814513776, "grad_norm": 308.0, "learning_rate": 9.533842153082572e-05, "loss": 13.9379, "step": 3944 }, { "epoch": 0.16443666374890584, "grad_norm": 556.0, "learning_rate": 9.533557511500574e-05, "loss": 18.0002, "step": 3945 }, { "epoch": 0.16447834604643408, "grad_norm": 322.0, "learning_rate": 9.53327278729409e-05, "loss": 12.6255, "step": 3946 }, { "epoch": 0.16452002834396232, "grad_norm": 472.0, "learning_rate": 9.532987980468305e-05, "loss": 15.8761, "step": 3947 }, { "epoch": 0.16456171064149056, "grad_norm": 474.0, "learning_rate": 9.532703091028412e-05, "loss": 16.376, "step": 3948 }, { "epoch": 0.1646033929390188, "grad_norm": 70.5, "learning_rate": 9.532418118979605e-05, "loss": 8.6267, "step": 3949 }, { "epoch": 0.16464507523654703, "grad_norm": 266.0, "learning_rate": 9.532133064327073e-05, "loss": 13.188, "step": 3950 }, { "epoch": 0.16468675753407527, "grad_norm": 304.0, "learning_rate": 9.531847927076015e-05, "loss": 11.938, "step": 3951 }, { "epoch": 0.1647284398316035, "grad_norm": 95.5, "learning_rate": 9.531562707231625e-05, "loss": 8.3131, "step": 3952 }, { "epoch": 0.16477012212913175, "grad_norm": 129.0, "learning_rate": 9.531277404799101e-05, "loss": 8.1259, "step": 3953 }, { "epoch": 0.16481180442666, "grad_norm": 326.0, "learning_rate": 9.530992019783647e-05, "loss": 13.7508, "step": 3954 }, { "epoch": 0.16485348672418823, "grad_norm": 171.0, "learning_rate": 9.530706552190461e-05, "loss": 9.7502, "step": 3955 }, { "epoch": 0.16489516902171647, "grad_norm": 200.0, "learning_rate": 9.530421002024744e-05, "loss": 10.9377, "step": 3956 }, { "epoch": 0.1649368513192447, "grad_norm": 804.0, "learning_rate": 9.530135369291702e-05, "loss": 20.7503, "step": 3957 }, { "epoch": 0.16497853361677295, "grad_norm": 544.0, "learning_rate": 9.529849653996543e-05, "loss": 19.1258, "step": 3958 }, { "epoch": 0.1650202159143012, "grad_norm": 284.0, "learning_rate": 9.52956385614447e-05, "loss": 12.5006, "step": 3959 }, { "epoch": 0.16506189821182943, "grad_norm": 476.0, "learning_rate": 9.529277975740694e-05, "loss": 15.9378, "step": 3960 }, { "epoch": 0.16510358050935767, "grad_norm": 158.0, "learning_rate": 9.528992012790425e-05, "loss": 11.3753, "step": 3961 }, { "epoch": 0.1651452628068859, "grad_norm": 916.0, "learning_rate": 9.528705967298876e-05, "loss": 26.1254, "step": 3962 }, { "epoch": 0.16518694510441415, "grad_norm": 540.0, "learning_rate": 9.528419839271257e-05, "loss": 17.7509, "step": 3963 }, { "epoch": 0.16522862740194239, "grad_norm": 272.0, "learning_rate": 9.528133628712785e-05, "loss": 12.6255, "step": 3964 }, { "epoch": 0.16527030969947062, "grad_norm": 216.0, "learning_rate": 9.527847335628675e-05, "loss": 11.0003, "step": 3965 }, { "epoch": 0.16531199199699886, "grad_norm": 368.0, "learning_rate": 9.527560960024146e-05, "loss": 15.6252, "step": 3966 }, { "epoch": 0.1653536742945271, "grad_norm": 952.0, "learning_rate": 9.527274501904416e-05, "loss": 27.0007, "step": 3967 }, { "epoch": 0.16539535659205534, "grad_norm": 1432.0, "learning_rate": 9.526987961274707e-05, "loss": 29.6299, "step": 3968 }, { "epoch": 0.16543703888958358, "grad_norm": 448.0, "learning_rate": 9.52670133814024e-05, "loss": 16.7504, "step": 3969 }, { "epoch": 0.16547872118711182, "grad_norm": 184.0, "learning_rate": 9.526414632506239e-05, "loss": 11.6259, "step": 3970 }, { "epoch": 0.16552040348464006, "grad_norm": 1320.0, "learning_rate": 9.52612784437793e-05, "loss": 26.3807, "step": 3971 }, { "epoch": 0.1655620857821683, "grad_norm": 214.0, "learning_rate": 9.52584097376054e-05, "loss": 9.4378, "step": 3972 }, { "epoch": 0.16560376807969654, "grad_norm": 262.0, "learning_rate": 9.525554020659295e-05, "loss": 12.1253, "step": 3973 }, { "epoch": 0.16564545037722478, "grad_norm": 298.0, "learning_rate": 9.525266985079426e-05, "loss": 12.8127, "step": 3974 }, { "epoch": 0.16568713267475305, "grad_norm": 240.0, "learning_rate": 9.524979867026168e-05, "loss": 11.1254, "step": 3975 }, { "epoch": 0.16572881497228129, "grad_norm": 274.0, "learning_rate": 9.524692666504746e-05, "loss": 13.063, "step": 3976 }, { "epoch": 0.16577049726980952, "grad_norm": 80.5, "learning_rate": 9.5244053835204e-05, "loss": 8.0637, "step": 3977 }, { "epoch": 0.16581217956733776, "grad_norm": 292.0, "learning_rate": 9.524118018078366e-05, "loss": 13.7503, "step": 3978 }, { "epoch": 0.165853861864866, "grad_norm": 732.0, "learning_rate": 9.523830570183876e-05, "loss": 21.2501, "step": 3979 }, { "epoch": 0.16589554416239424, "grad_norm": 161.0, "learning_rate": 9.523543039842174e-05, "loss": 10.3754, "step": 3980 }, { "epoch": 0.16593722645992248, "grad_norm": 684.0, "learning_rate": 9.5232554270585e-05, "loss": 20.2503, "step": 3981 }, { "epoch": 0.16597890875745072, "grad_norm": 1352.0, "learning_rate": 9.522967731838093e-05, "loss": 28.7553, "step": 3982 }, { "epoch": 0.16602059105497896, "grad_norm": 175.0, "learning_rate": 9.522679954186197e-05, "loss": 10.938, "step": 3983 }, { "epoch": 0.1660622733525072, "grad_norm": 328.0, "learning_rate": 9.522392094108056e-05, "loss": 10.7503, "step": 3984 }, { "epoch": 0.16610395565003544, "grad_norm": 62.0, "learning_rate": 9.522104151608922e-05, "loss": 7.5632, "step": 3985 }, { "epoch": 0.16614563794756368, "grad_norm": 380.0, "learning_rate": 9.521816126694035e-05, "loss": 14.8132, "step": 3986 }, { "epoch": 0.16618732024509192, "grad_norm": 274.0, "learning_rate": 9.521528019368648e-05, "loss": 12.1879, "step": 3987 }, { "epoch": 0.16622900254262016, "grad_norm": 160.0, "learning_rate": 9.521239829638013e-05, "loss": 10.5632, "step": 3988 }, { "epoch": 0.1662706848401484, "grad_norm": 61.0, "learning_rate": 9.52095155750738e-05, "loss": 7.7815, "step": 3989 }, { "epoch": 0.16631236713767664, "grad_norm": 354.0, "learning_rate": 9.520663202982004e-05, "loss": 14.6252, "step": 3990 }, { "epoch": 0.16635404943520488, "grad_norm": 200.0, "learning_rate": 9.520374766067137e-05, "loss": 11.0027, "step": 3991 }, { "epoch": 0.16639573173273312, "grad_norm": 268.0, "learning_rate": 9.520086246768041e-05, "loss": 13.1255, "step": 3992 }, { "epoch": 0.16643741403026135, "grad_norm": 172.0, "learning_rate": 9.519797645089971e-05, "loss": 10.688, "step": 3993 }, { "epoch": 0.1664790963277896, "grad_norm": 246.0, "learning_rate": 9.51950896103819e-05, "loss": 12.2503, "step": 3994 }, { "epoch": 0.16652077862531783, "grad_norm": 536.0, "learning_rate": 9.519220194617955e-05, "loss": 17.8755, "step": 3995 }, { "epoch": 0.16656246092284607, "grad_norm": 398.0, "learning_rate": 9.51893134583453e-05, "loss": 14.6258, "step": 3996 }, { "epoch": 0.1666041432203743, "grad_norm": 512.0, "learning_rate": 9.518642414693182e-05, "loss": 17.5003, "step": 3997 }, { "epoch": 0.16664582551790255, "grad_norm": 296.0, "learning_rate": 9.518353401199173e-05, "loss": 13.5631, "step": 3998 }, { "epoch": 0.1666875078154308, "grad_norm": 192.0, "learning_rate": 9.518064305357773e-05, "loss": 9.1879, "step": 3999 }, { "epoch": 0.16672919011295903, "grad_norm": 118.0, "learning_rate": 9.517775127174252e-05, "loss": 9.5011, "step": 4000 }, { "epoch": 0.16677087241048727, "grad_norm": 114.0, "learning_rate": 9.517485866653874e-05, "loss": 7.8127, "step": 4001 }, { "epoch": 0.1668125547080155, "grad_norm": 520.0, "learning_rate": 9.517196523801919e-05, "loss": 16.5003, "step": 4002 }, { "epoch": 0.16685423700554375, "grad_norm": 360.0, "learning_rate": 9.516907098623654e-05, "loss": 15.0645, "step": 4003 }, { "epoch": 0.166895919303072, "grad_norm": 620.0, "learning_rate": 9.516617591124357e-05, "loss": 22.3752, "step": 4004 }, { "epoch": 0.16693760160060023, "grad_norm": 362.0, "learning_rate": 9.516328001309303e-05, "loss": 14.5003, "step": 4005 }, { "epoch": 0.16697928389812847, "grad_norm": 120.5, "learning_rate": 9.516038329183771e-05, "loss": 11.3762, "step": 4006 }, { "epoch": 0.1670209661956567, "grad_norm": 195.0, "learning_rate": 9.515748574753038e-05, "loss": 11.7513, "step": 4007 }, { "epoch": 0.16706264849318495, "grad_norm": 564.0, "learning_rate": 9.515458738022389e-05, "loss": 19.3751, "step": 4008 }, { "epoch": 0.16710433079071318, "grad_norm": 210.0, "learning_rate": 9.515168818997102e-05, "loss": 11.3127, "step": 4009 }, { "epoch": 0.16714601308824142, "grad_norm": 410.0, "learning_rate": 9.514878817682462e-05, "loss": 14.4381, "step": 4010 }, { "epoch": 0.16718769538576966, "grad_norm": 428.0, "learning_rate": 9.514588734083756e-05, "loss": 15.1251, "step": 4011 }, { "epoch": 0.1672293776832979, "grad_norm": 1384.0, "learning_rate": 9.514298568206268e-05, "loss": 32.252, "step": 4012 }, { "epoch": 0.16727105998082614, "grad_norm": 298.0, "learning_rate": 9.514008320055289e-05, "loss": 11.438, "step": 4013 }, { "epoch": 0.16731274227835438, "grad_norm": 205.0, "learning_rate": 9.513717989636107e-05, "loss": 10.6878, "step": 4014 }, { "epoch": 0.16735442457588262, "grad_norm": 238.0, "learning_rate": 9.513427576954015e-05, "loss": 11.1877, "step": 4015 }, { "epoch": 0.16739610687341086, "grad_norm": 284.0, "learning_rate": 9.513137082014305e-05, "loss": 12.2504, "step": 4016 }, { "epoch": 0.1674377891709391, "grad_norm": 324.0, "learning_rate": 9.512846504822268e-05, "loss": 11.1876, "step": 4017 }, { "epoch": 0.16747947146846734, "grad_norm": 290.0, "learning_rate": 9.512555845383207e-05, "loss": 12.6265, "step": 4018 }, { "epoch": 0.16752115376599558, "grad_norm": 201.0, "learning_rate": 9.512265103702411e-05, "loss": 11.1263, "step": 4019 }, { "epoch": 0.16756283606352382, "grad_norm": 576.0, "learning_rate": 9.511974279785185e-05, "loss": 18.6254, "step": 4020 }, { "epoch": 0.16760451836105206, "grad_norm": 600.0, "learning_rate": 9.511683373636828e-05, "loss": 18.1254, "step": 4021 }, { "epoch": 0.1676462006585803, "grad_norm": 350.0, "learning_rate": 9.511392385262641e-05, "loss": 15.1257, "step": 4022 }, { "epoch": 0.16768788295610854, "grad_norm": 242.0, "learning_rate": 9.511101314667925e-05, "loss": 10.6253, "step": 4023 }, { "epoch": 0.16772956525363678, "grad_norm": 494.0, "learning_rate": 9.51081016185799e-05, "loss": 18.0007, "step": 4024 }, { "epoch": 0.16777124755116501, "grad_norm": 61.75, "learning_rate": 9.510518926838137e-05, "loss": 7.719, "step": 4025 }, { "epoch": 0.16781292984869325, "grad_norm": 336.0, "learning_rate": 9.510227609613678e-05, "loss": 13.439, "step": 4026 }, { "epoch": 0.1678546121462215, "grad_norm": 92.0, "learning_rate": 9.509936210189918e-05, "loss": 7.1254, "step": 4027 }, { "epoch": 0.16789629444374973, "grad_norm": 322.0, "learning_rate": 9.509644728572172e-05, "loss": 14.626, "step": 4028 }, { "epoch": 0.16793797674127797, "grad_norm": 972.0, "learning_rate": 9.50935316476575e-05, "loss": 23.3804, "step": 4029 }, { "epoch": 0.1679796590388062, "grad_norm": 272.0, "learning_rate": 9.509061518775967e-05, "loss": 11.5628, "step": 4030 }, { "epoch": 0.16802134133633445, "grad_norm": 242.0, "learning_rate": 9.508769790608136e-05, "loss": 13.0003, "step": 4031 }, { "epoch": 0.1680630236338627, "grad_norm": 488.0, "learning_rate": 9.508477980267577e-05, "loss": 15.0636, "step": 4032 }, { "epoch": 0.16810470593139093, "grad_norm": 304.0, "learning_rate": 9.508186087759606e-05, "loss": 14.0634, "step": 4033 }, { "epoch": 0.16814638822891917, "grad_norm": 498.0, "learning_rate": 9.507894113089544e-05, "loss": 17.6254, "step": 4034 }, { "epoch": 0.1681880705264474, "grad_norm": 326.0, "learning_rate": 9.50760205626271e-05, "loss": 10.315, "step": 4035 }, { "epoch": 0.16822975282397565, "grad_norm": 161.0, "learning_rate": 9.50730991728443e-05, "loss": 11.2505, "step": 4036 }, { "epoch": 0.1682714351215039, "grad_norm": 217.0, "learning_rate": 9.507017696160025e-05, "loss": 11.0015, "step": 4037 }, { "epoch": 0.16831311741903213, "grad_norm": 136.0, "learning_rate": 9.506725392894824e-05, "loss": 8.1256, "step": 4038 }, { "epoch": 0.16835479971656037, "grad_norm": 241.0, "learning_rate": 9.506433007494151e-05, "loss": 12.1878, "step": 4039 }, { "epoch": 0.1683964820140886, "grad_norm": 600.0, "learning_rate": 9.506140539963337e-05, "loss": 16.8793, "step": 4040 }, { "epoch": 0.16843816431161684, "grad_norm": 282.0, "learning_rate": 9.505847990307713e-05, "loss": 13.1881, "step": 4041 }, { "epoch": 0.16847984660914508, "grad_norm": 506.0, "learning_rate": 9.505555358532608e-05, "loss": 17.0004, "step": 4042 }, { "epoch": 0.16852152890667332, "grad_norm": 536.0, "learning_rate": 9.505262644643357e-05, "loss": 17.2502, "step": 4043 }, { "epoch": 0.16856321120420156, "grad_norm": 382.0, "learning_rate": 9.504969848645293e-05, "loss": 15.6253, "step": 4044 }, { "epoch": 0.1686048935017298, "grad_norm": 249.0, "learning_rate": 9.504676970543755e-05, "loss": 12.5014, "step": 4045 }, { "epoch": 0.16864657579925804, "grad_norm": 212.0, "learning_rate": 9.504384010344081e-05, "loss": 11.0002, "step": 4046 }, { "epoch": 0.16868825809678628, "grad_norm": 1016.0, "learning_rate": 9.504090968051605e-05, "loss": 21.6295, "step": 4047 }, { "epoch": 0.16872994039431455, "grad_norm": 230.0, "learning_rate": 9.503797843671673e-05, "loss": 11.6254, "step": 4048 }, { "epoch": 0.1687716226918428, "grad_norm": 792.0, "learning_rate": 9.503504637209625e-05, "loss": 18.8777, "step": 4049 }, { "epoch": 0.16881330498937103, "grad_norm": 644.0, "learning_rate": 9.503211348670806e-05, "loss": 18.8757, "step": 4050 }, { "epoch": 0.16885498728689927, "grad_norm": 366.0, "learning_rate": 9.502917978060562e-05, "loss": 14.2502, "step": 4051 }, { "epoch": 0.1688966695844275, "grad_norm": 236.0, "learning_rate": 9.502624525384235e-05, "loss": 11.1891, "step": 4052 }, { "epoch": 0.16893835188195575, "grad_norm": 1888.0, "learning_rate": 9.502330990647177e-05, "loss": 38.5032, "step": 4053 }, { "epoch": 0.16898003417948398, "grad_norm": 362.0, "learning_rate": 9.502037373854737e-05, "loss": 15.6877, "step": 4054 }, { "epoch": 0.16902171647701222, "grad_norm": 396.0, "learning_rate": 9.501743675012268e-05, "loss": 14.5008, "step": 4055 }, { "epoch": 0.16906339877454046, "grad_norm": 1104.0, "learning_rate": 9.50144989412512e-05, "loss": 23.5057, "step": 4056 }, { "epoch": 0.1691050810720687, "grad_norm": 362.0, "learning_rate": 9.501156031198647e-05, "loss": 15.5009, "step": 4057 }, { "epoch": 0.16914676336959694, "grad_norm": 217.0, "learning_rate": 9.500862086238206e-05, "loss": 11.8755, "step": 4058 }, { "epoch": 0.16918844566712518, "grad_norm": 262.0, "learning_rate": 9.500568059249155e-05, "loss": 12.439, "step": 4059 }, { "epoch": 0.16923012796465342, "grad_norm": 484.0, "learning_rate": 9.50027395023685e-05, "loss": 15.2504, "step": 4060 }, { "epoch": 0.16927181026218166, "grad_norm": 150.0, "learning_rate": 9.499979759206655e-05, "loss": 10.188, "step": 4061 }, { "epoch": 0.1693134925597099, "grad_norm": 564.0, "learning_rate": 9.499685486163928e-05, "loss": 19.1252, "step": 4062 }, { "epoch": 0.16935517485723814, "grad_norm": 136.0, "learning_rate": 9.499391131114032e-05, "loss": 11.0003, "step": 4063 }, { "epoch": 0.16939685715476638, "grad_norm": 442.0, "learning_rate": 9.499096694062337e-05, "loss": 14.7508, "step": 4064 }, { "epoch": 0.16943853945229462, "grad_norm": 73.0, "learning_rate": 9.498802175014203e-05, "loss": 7.4377, "step": 4065 }, { "epoch": 0.16948022174982286, "grad_norm": 243.0, "learning_rate": 9.498507573975e-05, "loss": 10.1878, "step": 4066 }, { "epoch": 0.1695219040473511, "grad_norm": 219.0, "learning_rate": 9.498212890950097e-05, "loss": 10.7508, "step": 4067 }, { "epoch": 0.16956358634487934, "grad_norm": 161.0, "learning_rate": 9.497918125944864e-05, "loss": 10.0627, "step": 4068 }, { "epoch": 0.16960526864240758, "grad_norm": 524.0, "learning_rate": 9.497623278964675e-05, "loss": 19.2504, "step": 4069 }, { "epoch": 0.16964695093993581, "grad_norm": 197.0, "learning_rate": 9.497328350014904e-05, "loss": 10.8752, "step": 4070 }, { "epoch": 0.16968863323746405, "grad_norm": 306.0, "learning_rate": 9.497033339100922e-05, "loss": 13.5635, "step": 4071 }, { "epoch": 0.1697303155349923, "grad_norm": 294.0, "learning_rate": 9.49673824622811e-05, "loss": 12.1878, "step": 4072 }, { "epoch": 0.16977199783252053, "grad_norm": 179.0, "learning_rate": 9.496443071401844e-05, "loss": 10.8759, "step": 4073 }, { "epoch": 0.16981368013004877, "grad_norm": 380.0, "learning_rate": 9.496147814627503e-05, "loss": 15.4381, "step": 4074 }, { "epoch": 0.169855362427577, "grad_norm": 382.0, "learning_rate": 9.49585247591047e-05, "loss": 13.6879, "step": 4075 }, { "epoch": 0.16989704472510525, "grad_norm": 268.0, "learning_rate": 9.495557055256125e-05, "loss": 13.5628, "step": 4076 }, { "epoch": 0.1699387270226335, "grad_norm": 620.0, "learning_rate": 9.495261552669853e-05, "loss": 19.5002, "step": 4077 }, { "epoch": 0.16998040932016173, "grad_norm": 161.0, "learning_rate": 9.494965968157044e-05, "loss": 9.6882, "step": 4078 }, { "epoch": 0.17002209161768997, "grad_norm": 222.0, "learning_rate": 9.494670301723077e-05, "loss": 11.6253, "step": 4079 }, { "epoch": 0.1700637739152182, "grad_norm": 119.5, "learning_rate": 9.494374553373348e-05, "loss": 9.3127, "step": 4080 }, { "epoch": 0.17010545621274645, "grad_norm": 446.0, "learning_rate": 9.494078723113242e-05, "loss": 17.2508, "step": 4081 }, { "epoch": 0.1701471385102747, "grad_norm": 268.0, "learning_rate": 9.493782810948152e-05, "loss": 11.9379, "step": 4082 }, { "epoch": 0.17018882080780293, "grad_norm": 388.0, "learning_rate": 9.493486816883472e-05, "loss": 14.064, "step": 4083 }, { "epoch": 0.17023050310533117, "grad_norm": 348.0, "learning_rate": 9.493190740924596e-05, "loss": 14.8752, "step": 4084 }, { "epoch": 0.1702721854028594, "grad_norm": 324.0, "learning_rate": 9.492894583076918e-05, "loss": 13.1261, "step": 4085 }, { "epoch": 0.17031386770038764, "grad_norm": 139.0, "learning_rate": 9.49259834334584e-05, "loss": 9.1881, "step": 4086 }, { "epoch": 0.17035554999791588, "grad_norm": 140.0, "learning_rate": 9.492302021736759e-05, "loss": 9.813, "step": 4087 }, { "epoch": 0.17039723229544412, "grad_norm": 245.0, "learning_rate": 9.492005618255072e-05, "loss": 10.0629, "step": 4088 }, { "epoch": 0.17043891459297236, "grad_norm": 330.0, "learning_rate": 9.491709132906185e-05, "loss": 12.3135, "step": 4089 }, { "epoch": 0.1704805968905006, "grad_norm": 470.0, "learning_rate": 9.491412565695501e-05, "loss": 17.6268, "step": 4090 }, { "epoch": 0.17052227918802884, "grad_norm": 354.0, "learning_rate": 9.491115916628424e-05, "loss": 13.8754, "step": 4091 }, { "epoch": 0.17056396148555708, "grad_norm": 198.0, "learning_rate": 9.49081918571036e-05, "loss": 11.7504, "step": 4092 }, { "epoch": 0.17060564378308532, "grad_norm": 336.0, "learning_rate": 9.490522372946718e-05, "loss": 13.5628, "step": 4093 }, { "epoch": 0.17064732608061356, "grad_norm": 298.0, "learning_rate": 9.490225478342906e-05, "loss": 11.6876, "step": 4094 }, { "epoch": 0.1706890083781418, "grad_norm": 210.0, "learning_rate": 9.489928501904339e-05, "loss": 11.1255, "step": 4095 }, { "epoch": 0.17073069067567004, "grad_norm": 394.0, "learning_rate": 9.489631443636424e-05, "loss": 12.2508, "step": 4096 }, { "epoch": 0.17077237297319828, "grad_norm": 564.0, "learning_rate": 9.48933430354458e-05, "loss": 17.6252, "step": 4097 }, { "epoch": 0.17081405527072652, "grad_norm": 328.0, "learning_rate": 9.489037081634217e-05, "loss": 13.7503, "step": 4098 }, { "epoch": 0.17085573756825476, "grad_norm": 612.0, "learning_rate": 9.488739777910756e-05, "loss": 20.8756, "step": 4099 }, { "epoch": 0.170897419865783, "grad_norm": 446.0, "learning_rate": 9.488442392379613e-05, "loss": 15.9395, "step": 4100 }, { "epoch": 0.17093910216331124, "grad_norm": 66.5, "learning_rate": 9.48814492504621e-05, "loss": 5.7502, "step": 4101 }, { "epoch": 0.17098078446083947, "grad_norm": 320.0, "learning_rate": 9.487847375915966e-05, "loss": 13.8128, "step": 4102 }, { "epoch": 0.1710224667583677, "grad_norm": 190.0, "learning_rate": 9.487549744994306e-05, "loss": 10.6879, "step": 4103 }, { "epoch": 0.17106414905589595, "grad_norm": 406.0, "learning_rate": 9.487252032286655e-05, "loss": 15.7503, "step": 4104 }, { "epoch": 0.1711058313534242, "grad_norm": 314.0, "learning_rate": 9.486954237798435e-05, "loss": 13.1256, "step": 4105 }, { "epoch": 0.17114751365095243, "grad_norm": 624.0, "learning_rate": 9.486656361535077e-05, "loss": 18.8753, "step": 4106 }, { "epoch": 0.17118919594848067, "grad_norm": 372.0, "learning_rate": 9.486358403502008e-05, "loss": 14.563, "step": 4107 }, { "epoch": 0.1712308782460089, "grad_norm": 644.0, "learning_rate": 9.486060363704658e-05, "loss": 18.7554, "step": 4108 }, { "epoch": 0.17127256054353715, "grad_norm": 616.0, "learning_rate": 9.485762242148461e-05, "loss": 18.7511, "step": 4109 }, { "epoch": 0.1713142428410654, "grad_norm": 284.0, "learning_rate": 9.48546403883885e-05, "loss": 12.6877, "step": 4110 }, { "epoch": 0.17135592513859363, "grad_norm": 110.5, "learning_rate": 9.485165753781257e-05, "loss": 6.9071, "step": 4111 }, { "epoch": 0.17139760743612187, "grad_norm": 628.0, "learning_rate": 9.48486738698112e-05, "loss": 19.1253, "step": 4112 }, { "epoch": 0.1714392897336501, "grad_norm": 107.0, "learning_rate": 9.484568938443878e-05, "loss": 6.2825, "step": 4113 }, { "epoch": 0.17148097203117835, "grad_norm": 612.0, "learning_rate": 9.484270408174966e-05, "loss": 19.6274, "step": 4114 }, { "epoch": 0.1715226543287066, "grad_norm": 430.0, "learning_rate": 9.483971796179831e-05, "loss": 17.1258, "step": 4115 }, { "epoch": 0.17156433662623483, "grad_norm": 249.0, "learning_rate": 9.483673102463911e-05, "loss": 11.2503, "step": 4116 }, { "epoch": 0.17160601892376307, "grad_norm": 328.0, "learning_rate": 9.48337432703265e-05, "loss": 12.6877, "step": 4117 }, { "epoch": 0.1716477012212913, "grad_norm": 234.0, "learning_rate": 9.483075469891495e-05, "loss": 11.6257, "step": 4118 }, { "epoch": 0.17168938351881954, "grad_norm": 260.0, "learning_rate": 9.48277653104589e-05, "loss": 13.1878, "step": 4119 }, { "epoch": 0.17173106581634778, "grad_norm": 1080.0, "learning_rate": 9.482477510501286e-05, "loss": 29.0003, "step": 4120 }, { "epoch": 0.17177274811387605, "grad_norm": 57.75, "learning_rate": 9.482178408263132e-05, "loss": 8.0009, "step": 4121 }, { "epoch": 0.1718144304114043, "grad_norm": 516.0, "learning_rate": 9.481879224336877e-05, "loss": 16.2503, "step": 4122 }, { "epoch": 0.17185611270893253, "grad_norm": 1232.0, "learning_rate": 9.481579958727975e-05, "loss": 31.0011, "step": 4123 }, { "epoch": 0.17189779500646077, "grad_norm": 133.0, "learning_rate": 9.481280611441883e-05, "loss": 9.5632, "step": 4124 }, { "epoch": 0.171939477303989, "grad_norm": 342.0, "learning_rate": 9.480981182484053e-05, "loss": 14.4381, "step": 4125 }, { "epoch": 0.17198115960151725, "grad_norm": 430.0, "learning_rate": 9.480681671859941e-05, "loss": 15.9377, "step": 4126 }, { "epoch": 0.1720228418990455, "grad_norm": 258.0, "learning_rate": 9.48038207957501e-05, "loss": 12.0005, "step": 4127 }, { "epoch": 0.17206452419657373, "grad_norm": 292.0, "learning_rate": 9.480082405634717e-05, "loss": 13.2504, "step": 4128 }, { "epoch": 0.17210620649410197, "grad_norm": 95.5, "learning_rate": 9.479782650044524e-05, "loss": 9.6879, "step": 4129 }, { "epoch": 0.1721478887916302, "grad_norm": 179.0, "learning_rate": 9.479482812809897e-05, "loss": 9.1878, "step": 4130 }, { "epoch": 0.17218957108915844, "grad_norm": 422.0, "learning_rate": 9.479182893936296e-05, "loss": 16.3754, "step": 4131 }, { "epoch": 0.17223125338668668, "grad_norm": 364.0, "learning_rate": 9.478882893429188e-05, "loss": 15.1879, "step": 4132 }, { "epoch": 0.17227293568421492, "grad_norm": 474.0, "learning_rate": 9.478582811294044e-05, "loss": 16.2506, "step": 4133 }, { "epoch": 0.17231461798174316, "grad_norm": 952.0, "learning_rate": 9.47828264753633e-05, "loss": 25.2506, "step": 4134 }, { "epoch": 0.1723563002792714, "grad_norm": 976.0, "learning_rate": 9.477982402161517e-05, "loss": 23.2545, "step": 4135 }, { "epoch": 0.17239798257679964, "grad_norm": 426.0, "learning_rate": 9.477682075175076e-05, "loss": 16.1267, "step": 4136 }, { "epoch": 0.17243966487432788, "grad_norm": 628.0, "learning_rate": 9.477381666582483e-05, "loss": 19.5003, "step": 4137 }, { "epoch": 0.17248134717185612, "grad_norm": 414.0, "learning_rate": 9.477081176389212e-05, "loss": 15.3128, "step": 4138 }, { "epoch": 0.17252302946938436, "grad_norm": 454.0, "learning_rate": 9.476780604600739e-05, "loss": 16.8757, "step": 4139 }, { "epoch": 0.1725647117669126, "grad_norm": 438.0, "learning_rate": 9.476479951222542e-05, "loss": 16.5002, "step": 4140 }, { "epoch": 0.17260639406444084, "grad_norm": 312.0, "learning_rate": 9.476179216260099e-05, "loss": 12.6253, "step": 4141 }, { "epoch": 0.17264807636196908, "grad_norm": 262.0, "learning_rate": 9.475878399718894e-05, "loss": 13.6253, "step": 4142 }, { "epoch": 0.17268975865949732, "grad_norm": 446.0, "learning_rate": 9.475577501604408e-05, "loss": 16.876, "step": 4143 }, { "epoch": 0.17273144095702556, "grad_norm": 125.0, "learning_rate": 9.475276521922124e-05, "loss": 8.2508, "step": 4144 }, { "epoch": 0.1727731232545538, "grad_norm": 129.0, "learning_rate": 9.474975460677528e-05, "loss": 9.4378, "step": 4145 }, { "epoch": 0.17281480555208203, "grad_norm": 756.0, "learning_rate": 9.474674317876108e-05, "loss": 21.6256, "step": 4146 }, { "epoch": 0.17285648784961027, "grad_norm": 312.0, "learning_rate": 9.474373093523349e-05, "loss": 11.5008, "step": 4147 }, { "epoch": 0.1728981701471385, "grad_norm": 508.0, "learning_rate": 9.474071787624745e-05, "loss": 17.1253, "step": 4148 }, { "epoch": 0.17293985244466675, "grad_norm": 312.0, "learning_rate": 9.473770400185787e-05, "loss": 14.1252, "step": 4149 }, { "epoch": 0.172981534742195, "grad_norm": 119.0, "learning_rate": 9.473468931211964e-05, "loss": 9.0017, "step": 4150 }, { "epoch": 0.17302321703972323, "grad_norm": 190.0, "learning_rate": 9.473167380708773e-05, "loss": 10.1877, "step": 4151 }, { "epoch": 0.17306489933725147, "grad_norm": 568.0, "learning_rate": 9.47286574868171e-05, "loss": 17.5017, "step": 4152 }, { "epoch": 0.1731065816347797, "grad_norm": 1304.0, "learning_rate": 9.472564035136271e-05, "loss": 26.7543, "step": 4153 }, { "epoch": 0.17314826393230795, "grad_norm": 316.0, "learning_rate": 9.472262240077956e-05, "loss": 14.189, "step": 4154 }, { "epoch": 0.1731899462298362, "grad_norm": 688.0, "learning_rate": 9.471960363512264e-05, "loss": 20.6252, "step": 4155 }, { "epoch": 0.17323162852736443, "grad_norm": 394.0, "learning_rate": 9.471658405444697e-05, "loss": 14.504, "step": 4156 }, { "epoch": 0.17327331082489267, "grad_norm": 300.0, "learning_rate": 9.471356365880759e-05, "loss": 13.4377, "step": 4157 }, { "epoch": 0.1733149931224209, "grad_norm": 71.0, "learning_rate": 9.471054244825955e-05, "loss": 7.5005, "step": 4158 }, { "epoch": 0.17335667541994915, "grad_norm": 892.0, "learning_rate": 9.47075204228579e-05, "loss": 20.3754, "step": 4159 }, { "epoch": 0.17339835771747739, "grad_norm": 69.5, "learning_rate": 9.470449758265771e-05, "loss": 8.6253, "step": 4160 }, { "epoch": 0.17344004001500563, "grad_norm": 584.0, "learning_rate": 9.470147392771412e-05, "loss": 18.8759, "step": 4161 }, { "epoch": 0.17348172231253386, "grad_norm": 724.0, "learning_rate": 9.469844945808216e-05, "loss": 21.0019, "step": 4162 }, { "epoch": 0.1735234046100621, "grad_norm": 255.0, "learning_rate": 9.4695424173817e-05, "loss": 11.5628, "step": 4163 }, { "epoch": 0.17356508690759034, "grad_norm": 94.0, "learning_rate": 9.469239807497379e-05, "loss": 6.9079, "step": 4164 }, { "epoch": 0.17360676920511858, "grad_norm": 239.0, "learning_rate": 9.468937116160763e-05, "loss": 10.8129, "step": 4165 }, { "epoch": 0.17364845150264682, "grad_norm": 884.0, "learning_rate": 9.468634343377371e-05, "loss": 21.2548, "step": 4166 }, { "epoch": 0.17369013380017506, "grad_norm": 560.0, "learning_rate": 9.468331489152724e-05, "loss": 18.1253, "step": 4167 }, { "epoch": 0.1737318160977033, "grad_norm": 524.0, "learning_rate": 9.468028553492338e-05, "loss": 19.0005, "step": 4168 }, { "epoch": 0.17377349839523154, "grad_norm": 166.0, "learning_rate": 9.467725536401734e-05, "loss": 11.3128, "step": 4169 }, { "epoch": 0.17381518069275978, "grad_norm": 318.0, "learning_rate": 9.467422437886436e-05, "loss": 12.5019, "step": 4170 }, { "epoch": 0.17385686299028802, "grad_norm": 344.0, "learning_rate": 9.467119257951969e-05, "loss": 13.8752, "step": 4171 }, { "epoch": 0.17389854528781626, "grad_norm": 928.0, "learning_rate": 9.466815996603855e-05, "loss": 24.2507, "step": 4172 }, { "epoch": 0.1739402275853445, "grad_norm": 225.0, "learning_rate": 9.466512653847623e-05, "loss": 8.563, "step": 4173 }, { "epoch": 0.17398190988287274, "grad_norm": 796.0, "learning_rate": 9.466209229688801e-05, "loss": 22.7512, "step": 4174 }, { "epoch": 0.17402359218040098, "grad_norm": 284.0, "learning_rate": 9.46590572413292e-05, "loss": 13.2505, "step": 4175 }, { "epoch": 0.17406527447792922, "grad_norm": 552.0, "learning_rate": 9.465602137185511e-05, "loss": 17.6258, "step": 4176 }, { "epoch": 0.17410695677545746, "grad_norm": 490.0, "learning_rate": 9.465298468852107e-05, "loss": 16.8753, "step": 4177 }, { "epoch": 0.1741486390729857, "grad_norm": 214.0, "learning_rate": 9.464994719138241e-05, "loss": 10.6253, "step": 4178 }, { "epoch": 0.17419032137051393, "grad_norm": 145.0, "learning_rate": 9.464690888049451e-05, "loss": 9.7507, "step": 4179 }, { "epoch": 0.17423200366804217, "grad_norm": 330.0, "learning_rate": 9.464386975591273e-05, "loss": 13.8753, "step": 4180 }, { "epoch": 0.1742736859655704, "grad_norm": 350.0, "learning_rate": 9.464082981769245e-05, "loss": 13.5005, "step": 4181 }, { "epoch": 0.17431536826309865, "grad_norm": 140.0, "learning_rate": 9.46377890658891e-05, "loss": 9.688, "step": 4182 }, { "epoch": 0.1743570505606269, "grad_norm": 354.0, "learning_rate": 9.463474750055808e-05, "loss": 13.6885, "step": 4183 }, { "epoch": 0.17439873285815513, "grad_norm": 1864.0, "learning_rate": 9.46317051217548e-05, "loss": 39.5006, "step": 4184 }, { "epoch": 0.17444041515568337, "grad_norm": 1256.0, "learning_rate": 9.462866192953475e-05, "loss": 26.8756, "step": 4185 }, { "epoch": 0.1744820974532116, "grad_norm": 276.0, "learning_rate": 9.462561792395338e-05, "loss": 12.7504, "step": 4186 }, { "epoch": 0.17452377975073985, "grad_norm": 219.0, "learning_rate": 9.462257310506615e-05, "loss": 12.1878, "step": 4187 }, { "epoch": 0.1745654620482681, "grad_norm": 332.0, "learning_rate": 9.461952747292857e-05, "loss": 14.5003, "step": 4188 }, { "epoch": 0.17460714434579633, "grad_norm": 205.0, "learning_rate": 9.461648102759614e-05, "loss": 10.1253, "step": 4189 }, { "epoch": 0.17464882664332457, "grad_norm": 452.0, "learning_rate": 9.461343376912438e-05, "loss": 15.0637, "step": 4190 }, { "epoch": 0.1746905089408528, "grad_norm": 496.0, "learning_rate": 9.461038569756883e-05, "loss": 16.6252, "step": 4191 }, { "epoch": 0.17473219123838105, "grad_norm": 188.0, "learning_rate": 9.460733681298504e-05, "loss": 10.0627, "step": 4192 }, { "epoch": 0.1747738735359093, "grad_norm": 231.0, "learning_rate": 9.460428711542859e-05, "loss": 10.8126, "step": 4193 }, { "epoch": 0.17481555583343755, "grad_norm": 79.5, "learning_rate": 9.460123660495504e-05, "loss": 8.9384, "step": 4194 }, { "epoch": 0.1748572381309658, "grad_norm": 520.0, "learning_rate": 9.459818528161998e-05, "loss": 17.1253, "step": 4195 }, { "epoch": 0.17489892042849403, "grad_norm": 135.0, "learning_rate": 9.459513314547904e-05, "loss": 10.4378, "step": 4196 }, { "epoch": 0.17494060272602227, "grad_norm": 254.0, "learning_rate": 9.459208019658785e-05, "loss": 11.8753, "step": 4197 }, { "epoch": 0.1749822850235505, "grad_norm": 548.0, "learning_rate": 9.458902643500203e-05, "loss": 15.8763, "step": 4198 }, { "epoch": 0.17502396732107875, "grad_norm": 424.0, "learning_rate": 9.458597186077724e-05, "loss": 16.7507, "step": 4199 }, { "epoch": 0.175065649618607, "grad_norm": 560.0, "learning_rate": 9.458291647396918e-05, "loss": 18.1252, "step": 4200 }, { "epoch": 0.17510733191613523, "grad_norm": 192.0, "learning_rate": 9.457986027463348e-05, "loss": 10.1879, "step": 4201 }, { "epoch": 0.17514901421366347, "grad_norm": 652.0, "learning_rate": 9.457680326282588e-05, "loss": 17.5016, "step": 4202 }, { "epoch": 0.1751906965111917, "grad_norm": 448.0, "learning_rate": 9.457374543860208e-05, "loss": 14.3128, "step": 4203 }, { "epoch": 0.17523237880871995, "grad_norm": 115.0, "learning_rate": 9.457068680201783e-05, "loss": 9.2504, "step": 4204 }, { "epoch": 0.17527406110624819, "grad_norm": 350.0, "learning_rate": 9.456762735312884e-05, "loss": 14.8751, "step": 4205 }, { "epoch": 0.17531574340377643, "grad_norm": 506.0, "learning_rate": 9.456456709199089e-05, "loss": 7.5972, "step": 4206 }, { "epoch": 0.17535742570130466, "grad_norm": 1720.0, "learning_rate": 9.456150601865975e-05, "loss": 37.7555, "step": 4207 }, { "epoch": 0.1753991079988329, "grad_norm": 46.75, "learning_rate": 9.45584441331912e-05, "loss": 6.6878, "step": 4208 }, { "epoch": 0.17544079029636114, "grad_norm": 1056.0, "learning_rate": 9.455538143564105e-05, "loss": 24.2547, "step": 4209 }, { "epoch": 0.17548247259388938, "grad_norm": 446.0, "learning_rate": 9.455231792606514e-05, "loss": 14.6268, "step": 4210 }, { "epoch": 0.17552415489141762, "grad_norm": 366.0, "learning_rate": 9.454925360451925e-05, "loss": 14.5004, "step": 4211 }, { "epoch": 0.17556583718894586, "grad_norm": 848.0, "learning_rate": 9.454618847105927e-05, "loss": 24.6251, "step": 4212 }, { "epoch": 0.1756075194864741, "grad_norm": 316.0, "learning_rate": 9.454312252574105e-05, "loss": 13.8127, "step": 4213 }, { "epoch": 0.17564920178400234, "grad_norm": 226.0, "learning_rate": 9.454005576862049e-05, "loss": 11.3752, "step": 4214 }, { "epoch": 0.17569088408153058, "grad_norm": 234.0, "learning_rate": 9.453698819975344e-05, "loss": 10.938, "step": 4215 }, { "epoch": 0.17573256637905882, "grad_norm": 436.0, "learning_rate": 9.453391981919581e-05, "loss": 17.8753, "step": 4216 }, { "epoch": 0.17577424867658706, "grad_norm": 548.0, "learning_rate": 9.453085062700356e-05, "loss": 16.8774, "step": 4217 }, { "epoch": 0.1758159309741153, "grad_norm": 388.0, "learning_rate": 9.45277806232326e-05, "loss": 15.3128, "step": 4218 }, { "epoch": 0.17585761327164354, "grad_norm": 764.0, "learning_rate": 9.452470980793888e-05, "loss": 22.0004, "step": 4219 }, { "epoch": 0.17589929556917178, "grad_norm": 544.0, "learning_rate": 9.452163818117838e-05, "loss": 16.7505, "step": 4220 }, { "epoch": 0.17594097786670002, "grad_norm": 342.0, "learning_rate": 9.451856574300705e-05, "loss": 14.3763, "step": 4221 }, { "epoch": 0.17598266016422826, "grad_norm": 256.0, "learning_rate": 9.451549249348093e-05, "loss": 12.5627, "step": 4222 }, { "epoch": 0.1760243424617565, "grad_norm": 206.0, "learning_rate": 9.451241843265602e-05, "loss": 11.1878, "step": 4223 }, { "epoch": 0.17606602475928473, "grad_norm": 1440.0, "learning_rate": 9.450934356058829e-05, "loss": 34.7502, "step": 4224 }, { "epoch": 0.17610770705681297, "grad_norm": 164.0, "learning_rate": 9.450626787733386e-05, "loss": 9.7502, "step": 4225 }, { "epoch": 0.1761493893543412, "grad_norm": 229.0, "learning_rate": 9.450319138294873e-05, "loss": 11.8753, "step": 4226 }, { "epoch": 0.17619107165186945, "grad_norm": 61.0, "learning_rate": 9.450011407748898e-05, "loss": 8.3755, "step": 4227 }, { "epoch": 0.1762327539493977, "grad_norm": 376.0, "learning_rate": 9.449703596101071e-05, "loss": 14.0034, "step": 4228 }, { "epoch": 0.17627443624692593, "grad_norm": 588.0, "learning_rate": 9.449395703357002e-05, "loss": 17.5015, "step": 4229 }, { "epoch": 0.17631611854445417, "grad_norm": 346.0, "learning_rate": 9.4490877295223e-05, "loss": 14.6254, "step": 4230 }, { "epoch": 0.1763578008419824, "grad_norm": 392.0, "learning_rate": 9.44877967460258e-05, "loss": 15.3753, "step": 4231 }, { "epoch": 0.17639948313951065, "grad_norm": 254.0, "learning_rate": 9.448471538603454e-05, "loss": 12.7502, "step": 4232 }, { "epoch": 0.1764411654370389, "grad_norm": 350.0, "learning_rate": 9.44816332153054e-05, "loss": 13.5006, "step": 4233 }, { "epoch": 0.17648284773456713, "grad_norm": 440.0, "learning_rate": 9.447855023389455e-05, "loss": 14.939, "step": 4234 }, { "epoch": 0.17652453003209537, "grad_norm": 126.0, "learning_rate": 9.447546644185818e-05, "loss": 10.0012, "step": 4235 }, { "epoch": 0.1765662123296236, "grad_norm": 378.0, "learning_rate": 9.447238183925248e-05, "loss": 16.0002, "step": 4236 }, { "epoch": 0.17660789462715185, "grad_norm": 161.0, "learning_rate": 9.446929642613367e-05, "loss": 10.2503, "step": 4237 }, { "epoch": 0.17664957692468009, "grad_norm": 208.0, "learning_rate": 9.4466210202558e-05, "loss": 11.5005, "step": 4238 }, { "epoch": 0.17669125922220832, "grad_norm": 510.0, "learning_rate": 9.446312316858168e-05, "loss": 17.2502, "step": 4239 }, { "epoch": 0.17673294151973656, "grad_norm": 161.0, "learning_rate": 9.4460035324261e-05, "loss": 10.0001, "step": 4240 }, { "epoch": 0.1767746238172648, "grad_norm": 620.0, "learning_rate": 9.445694666965222e-05, "loss": 19.5008, "step": 4241 }, { "epoch": 0.17681630611479304, "grad_norm": 239.0, "learning_rate": 9.445385720481166e-05, "loss": 12.8758, "step": 4242 }, { "epoch": 0.17685798841232128, "grad_norm": 360.0, "learning_rate": 9.44507669297956e-05, "loss": 13.3752, "step": 4243 }, { "epoch": 0.17689967070984952, "grad_norm": 528.0, "learning_rate": 9.444767584466036e-05, "loss": 19.0001, "step": 4244 }, { "epoch": 0.17694135300737776, "grad_norm": 274.0, "learning_rate": 9.444458394946229e-05, "loss": 11.4412, "step": 4245 }, { "epoch": 0.176983035304906, "grad_norm": 450.0, "learning_rate": 9.444149124425771e-05, "loss": 16.0008, "step": 4246 }, { "epoch": 0.17702471760243424, "grad_norm": 262.0, "learning_rate": 9.443839772910304e-05, "loss": 12.4379, "step": 4247 }, { "epoch": 0.17706639989996248, "grad_norm": 376.0, "learning_rate": 9.44353034040546e-05, "loss": 14.0628, "step": 4248 }, { "epoch": 0.17710808219749072, "grad_norm": 494.0, "learning_rate": 9.443220826916883e-05, "loss": 16.5004, "step": 4249 }, { "epoch": 0.17714976449501896, "grad_norm": 312.0, "learning_rate": 9.442911232450212e-05, "loss": 12.0002, "step": 4250 }, { "epoch": 0.1771914467925472, "grad_norm": 2112.0, "learning_rate": 9.442601557011088e-05, "loss": 42.2502, "step": 4251 }, { "epoch": 0.17723312909007544, "grad_norm": 125.5, "learning_rate": 9.442291800605156e-05, "loss": 9.001, "step": 4252 }, { "epoch": 0.17727481138760368, "grad_norm": 532.0, "learning_rate": 9.441981963238062e-05, "loss": 16.8755, "step": 4253 }, { "epoch": 0.17731649368513192, "grad_norm": 115.5, "learning_rate": 9.441672044915453e-05, "loss": 10.0635, "step": 4254 }, { "epoch": 0.17735817598266015, "grad_norm": 488.0, "learning_rate": 9.441362045642977e-05, "loss": 18.0003, "step": 4255 }, { "epoch": 0.1773998582801884, "grad_norm": 652.0, "learning_rate": 9.441051965426283e-05, "loss": 20.6263, "step": 4256 }, { "epoch": 0.17744154057771663, "grad_norm": 253.0, "learning_rate": 9.440741804271022e-05, "loss": 12.5627, "step": 4257 }, { "epoch": 0.17748322287524487, "grad_norm": 544.0, "learning_rate": 9.440431562182849e-05, "loss": 18.7504, "step": 4258 }, { "epoch": 0.1775249051727731, "grad_norm": 223.0, "learning_rate": 9.440121239167416e-05, "loss": 11.6879, "step": 4259 }, { "epoch": 0.17756658747030135, "grad_norm": 324.0, "learning_rate": 9.439810835230379e-05, "loss": 13.5039, "step": 4260 }, { "epoch": 0.1776082697678296, "grad_norm": 163.0, "learning_rate": 9.439500350377395e-05, "loss": 9.3128, "step": 4261 }, { "epoch": 0.17764995206535783, "grad_norm": 304.0, "learning_rate": 9.439189784614122e-05, "loss": 11.5652, "step": 4262 }, { "epoch": 0.17769163436288607, "grad_norm": 282.0, "learning_rate": 9.438879137946222e-05, "loss": 13.3135, "step": 4263 }, { "epoch": 0.1777333166604143, "grad_norm": 596.0, "learning_rate": 9.438568410379356e-05, "loss": 19.0004, "step": 4264 }, { "epoch": 0.17777499895794255, "grad_norm": 540.0, "learning_rate": 9.438257601919188e-05, "loss": 20.0006, "step": 4265 }, { "epoch": 0.17781668125547082, "grad_norm": 262.0, "learning_rate": 9.43794671257138e-05, "loss": 13.1911, "step": 4266 }, { "epoch": 0.17785836355299905, "grad_norm": 260.0, "learning_rate": 9.437635742341598e-05, "loss": 12.0015, "step": 4267 }, { "epoch": 0.1779000458505273, "grad_norm": 224.0, "learning_rate": 9.437324691235512e-05, "loss": 10.6271, "step": 4268 }, { "epoch": 0.17794172814805553, "grad_norm": 260.0, "learning_rate": 9.437013559258789e-05, "loss": 11.8129, "step": 4269 }, { "epoch": 0.17798341044558377, "grad_norm": 568.0, "learning_rate": 9.436702346417101e-05, "loss": 18.7513, "step": 4270 }, { "epoch": 0.178025092743112, "grad_norm": 1012.0, "learning_rate": 9.436391052716119e-05, "loss": 28.626, "step": 4271 }, { "epoch": 0.17806677504064025, "grad_norm": 386.0, "learning_rate": 9.436079678161514e-05, "loss": 12.2516, "step": 4272 }, { "epoch": 0.1781084573381685, "grad_norm": 274.0, "learning_rate": 9.435768222758965e-05, "loss": 12.188, "step": 4273 }, { "epoch": 0.17815013963569673, "grad_norm": 203.0, "learning_rate": 9.435456686514145e-05, "loss": 10.5653, "step": 4274 }, { "epoch": 0.17819182193322497, "grad_norm": 520.0, "learning_rate": 9.435145069432735e-05, "loss": 15.4378, "step": 4275 }, { "epoch": 0.1782335042307532, "grad_norm": 466.0, "learning_rate": 9.434833371520411e-05, "loss": 15.6271, "step": 4276 }, { "epoch": 0.17827518652828145, "grad_norm": 328.0, "learning_rate": 9.434521592782856e-05, "loss": 13.5628, "step": 4277 }, { "epoch": 0.1783168688258097, "grad_norm": 284.0, "learning_rate": 9.43420973322575e-05, "loss": 14.0034, "step": 4278 }, { "epoch": 0.17835855112333793, "grad_norm": 262.0, "learning_rate": 9.43389779285478e-05, "loss": 13.0005, "step": 4279 }, { "epoch": 0.17840023342086617, "grad_norm": 326.0, "learning_rate": 9.433585771675629e-05, "loss": 13.4378, "step": 4280 }, { "epoch": 0.1784419157183944, "grad_norm": 304.0, "learning_rate": 9.433273669693981e-05, "loss": 13.3129, "step": 4281 }, { "epoch": 0.17848359801592265, "grad_norm": 108.5, "learning_rate": 9.432961486915528e-05, "loss": 10.7506, "step": 4282 }, { "epoch": 0.17852528031345088, "grad_norm": 556.0, "learning_rate": 9.432649223345959e-05, "loss": 18.2505, "step": 4283 }, { "epoch": 0.17856696261097912, "grad_norm": 316.0, "learning_rate": 9.432336878990965e-05, "loss": 13.6883, "step": 4284 }, { "epoch": 0.17860864490850736, "grad_norm": 167.0, "learning_rate": 9.432024453856236e-05, "loss": 10.8757, "step": 4285 }, { "epoch": 0.1786503272060356, "grad_norm": 452.0, "learning_rate": 9.43171194794747e-05, "loss": 16.3753, "step": 4286 }, { "epoch": 0.17869200950356384, "grad_norm": 239.0, "learning_rate": 9.431399361270359e-05, "loss": 12.0003, "step": 4287 }, { "epoch": 0.17873369180109208, "grad_norm": 440.0, "learning_rate": 9.431086693830602e-05, "loss": 15.4379, "step": 4288 }, { "epoch": 0.17877537409862032, "grad_norm": 1240.0, "learning_rate": 9.430773945633896e-05, "loss": 27.8794, "step": 4289 }, { "epoch": 0.17881705639614856, "grad_norm": 508.0, "learning_rate": 9.430461116685943e-05, "loss": 15.3754, "step": 4290 }, { "epoch": 0.1788587386936768, "grad_norm": 212.0, "learning_rate": 9.43014820699244e-05, "loss": 11.7505, "step": 4291 }, { "epoch": 0.17890042099120504, "grad_norm": 143.0, "learning_rate": 9.429835216559096e-05, "loss": 10.0006, "step": 4292 }, { "epoch": 0.17894210328873328, "grad_norm": 564.0, "learning_rate": 9.42952214539161e-05, "loss": 19.2503, "step": 4293 }, { "epoch": 0.17898378558626152, "grad_norm": 296.0, "learning_rate": 9.429208993495689e-05, "loss": 13.6882, "step": 4294 }, { "epoch": 0.17902546788378976, "grad_norm": 2016.0, "learning_rate": 9.428895760877041e-05, "loss": 40.5043, "step": 4295 }, { "epoch": 0.179067150181318, "grad_norm": 524.0, "learning_rate": 9.428582447541376e-05, "loss": 17.1253, "step": 4296 }, { "epoch": 0.17910883247884624, "grad_norm": 490.0, "learning_rate": 9.428269053494403e-05, "loss": 17.6254, "step": 4297 }, { "epoch": 0.17915051477637448, "grad_norm": 175.0, "learning_rate": 9.427955578741832e-05, "loss": 9.1258, "step": 4298 }, { "epoch": 0.17919219707390271, "grad_norm": 420.0, "learning_rate": 9.427642023289377e-05, "loss": 16.2503, "step": 4299 }, { "epoch": 0.17923387937143095, "grad_norm": 576.0, "learning_rate": 9.427328387142755e-05, "loss": 16.7508, "step": 4300 }, { "epoch": 0.1792755616689592, "grad_norm": 85.5, "learning_rate": 9.427014670307679e-05, "loss": 6.0009, "step": 4301 }, { "epoch": 0.17931724396648743, "grad_norm": 1128.0, "learning_rate": 9.426700872789869e-05, "loss": 26.1293, "step": 4302 }, { "epoch": 0.17935892626401567, "grad_norm": 276.0, "learning_rate": 9.42638699459504e-05, "loss": 12.9379, "step": 4303 }, { "epoch": 0.1794006085615439, "grad_norm": 390.0, "learning_rate": 9.426073035728917e-05, "loss": 15.0627, "step": 4304 }, { "epoch": 0.17944229085907215, "grad_norm": 148.0, "learning_rate": 9.425758996197221e-05, "loss": 9.938, "step": 4305 }, { "epoch": 0.1794839731566004, "grad_norm": 218.0, "learning_rate": 9.425444876005671e-05, "loss": 12.0636, "step": 4306 }, { "epoch": 0.17952565545412863, "grad_norm": 103.5, "learning_rate": 9.42513067516e-05, "loss": 9.2507, "step": 4307 }, { "epoch": 0.17956733775165687, "grad_norm": 392.0, "learning_rate": 9.424816393665925e-05, "loss": 14.0627, "step": 4308 }, { "epoch": 0.1796090200491851, "grad_norm": 97.0, "learning_rate": 9.424502031529181e-05, "loss": 8.5628, "step": 4309 }, { "epoch": 0.17965070234671335, "grad_norm": 496.0, "learning_rate": 9.424187588755493e-05, "loss": 17.6257, "step": 4310 }, { "epoch": 0.1796923846442416, "grad_norm": 229.0, "learning_rate": 9.423873065350595e-05, "loss": 11.1877, "step": 4311 }, { "epoch": 0.17973406694176983, "grad_norm": 532.0, "learning_rate": 9.423558461320216e-05, "loss": 17.5007, "step": 4312 }, { "epoch": 0.17977574923929807, "grad_norm": 233.0, "learning_rate": 9.423243776670093e-05, "loss": 12.7503, "step": 4313 }, { "epoch": 0.1798174315368263, "grad_norm": 306.0, "learning_rate": 9.422929011405959e-05, "loss": 13.1253, "step": 4314 }, { "epoch": 0.17985911383435454, "grad_norm": 174.0, "learning_rate": 9.422614165533552e-05, "loss": 10.3149, "step": 4315 }, { "epoch": 0.17990079613188278, "grad_norm": 150.0, "learning_rate": 9.422299239058607e-05, "loss": 10.6889, "step": 4316 }, { "epoch": 0.17994247842941102, "grad_norm": 378.0, "learning_rate": 9.421984231986868e-05, "loss": 14.1257, "step": 4317 }, { "epoch": 0.17998416072693926, "grad_norm": 223.0, "learning_rate": 9.421669144324072e-05, "loss": 11.0634, "step": 4318 }, { "epoch": 0.1800258430244675, "grad_norm": 680.0, "learning_rate": 9.421353976075965e-05, "loss": 22.5026, "step": 4319 }, { "epoch": 0.18006752532199574, "grad_norm": 568.0, "learning_rate": 9.421038727248288e-05, "loss": 18.3755, "step": 4320 }, { "epoch": 0.18010920761952398, "grad_norm": 732.0, "learning_rate": 9.42072339784679e-05, "loss": 20.1255, "step": 4321 }, { "epoch": 0.18015088991705222, "grad_norm": 498.0, "learning_rate": 9.420407987877213e-05, "loss": 16.7523, "step": 4322 }, { "epoch": 0.18019257221458046, "grad_norm": 390.0, "learning_rate": 9.420092497345308e-05, "loss": 15.3126, "step": 4323 }, { "epoch": 0.1802342545121087, "grad_norm": 130.0, "learning_rate": 9.419776926256827e-05, "loss": 9.0639, "step": 4324 }, { "epoch": 0.18027593680963694, "grad_norm": 50.5, "learning_rate": 9.419461274617518e-05, "loss": 8.3143, "step": 4325 }, { "epoch": 0.18031761910716518, "grad_norm": 2096.0, "learning_rate": 9.419145542433134e-05, "loss": 43.5079, "step": 4326 }, { "epoch": 0.18035930140469342, "grad_norm": 420.0, "learning_rate": 9.41882972970943e-05, "loss": 14.5628, "step": 4327 }, { "epoch": 0.18040098370222166, "grad_norm": 117.0, "learning_rate": 9.418513836452166e-05, "loss": 9.4377, "step": 4328 }, { "epoch": 0.1804426659997499, "grad_norm": 486.0, "learning_rate": 9.418197862667091e-05, "loss": 17.2504, "step": 4329 }, { "epoch": 0.18048434829727814, "grad_norm": 840.0, "learning_rate": 9.417881808359969e-05, "loss": 22.6256, "step": 4330 }, { "epoch": 0.18052603059480637, "grad_norm": 268.0, "learning_rate": 9.417565673536558e-05, "loss": 12.4394, "step": 4331 }, { "epoch": 0.18056771289233461, "grad_norm": 492.0, "learning_rate": 9.417249458202622e-05, "loss": 16.2503, "step": 4332 }, { "epoch": 0.18060939518986285, "grad_norm": 298.0, "learning_rate": 9.41693316236392e-05, "loss": 12.0633, "step": 4333 }, { "epoch": 0.1806510774873911, "grad_norm": 478.0, "learning_rate": 9.416616786026222e-05, "loss": 16.8757, "step": 4334 }, { "epoch": 0.18069275978491933, "grad_norm": 596.0, "learning_rate": 9.416300329195289e-05, "loss": 18.1296, "step": 4335 }, { "epoch": 0.18073444208244757, "grad_norm": 183.0, "learning_rate": 9.41598379187689e-05, "loss": 5.0948, "step": 4336 }, { "epoch": 0.1807761243799758, "grad_norm": 280.0, "learning_rate": 9.415667174076796e-05, "loss": 12.3129, "step": 4337 }, { "epoch": 0.18081780667750405, "grad_norm": 378.0, "learning_rate": 9.415350475800776e-05, "loss": 14.8757, "step": 4338 }, { "epoch": 0.18085948897503232, "grad_norm": 458.0, "learning_rate": 9.4150336970546e-05, "loss": 18.5006, "step": 4339 }, { "epoch": 0.18090117127256056, "grad_norm": 472.0, "learning_rate": 9.414716837844044e-05, "loss": 14.0639, "step": 4340 }, { "epoch": 0.1809428535700888, "grad_norm": 171.0, "learning_rate": 9.414399898174881e-05, "loss": 10.8128, "step": 4341 }, { "epoch": 0.18098453586761704, "grad_norm": 600.0, "learning_rate": 9.41408287805289e-05, "loss": 19.2503, "step": 4342 }, { "epoch": 0.18102621816514528, "grad_norm": 584.0, "learning_rate": 9.413765777483845e-05, "loss": 17.5005, "step": 4343 }, { "epoch": 0.18106790046267351, "grad_norm": 61.75, "learning_rate": 9.413448596473527e-05, "loss": 6.5639, "step": 4344 }, { "epoch": 0.18110958276020175, "grad_norm": 462.0, "learning_rate": 9.413131335027716e-05, "loss": 16.0001, "step": 4345 }, { "epoch": 0.18115126505773, "grad_norm": 352.0, "learning_rate": 9.412813993152195e-05, "loss": 13.6884, "step": 4346 }, { "epoch": 0.18119294735525823, "grad_norm": 552.0, "learning_rate": 9.412496570852748e-05, "loss": 17.8755, "step": 4347 }, { "epoch": 0.18123462965278647, "grad_norm": 1632.0, "learning_rate": 9.412179068135158e-05, "loss": 32.2576, "step": 4348 }, { "epoch": 0.1812763119503147, "grad_norm": 356.0, "learning_rate": 9.411861485005213e-05, "loss": 12.8752, "step": 4349 }, { "epoch": 0.18131799424784295, "grad_norm": 276.0, "learning_rate": 9.411543821468702e-05, "loss": 11.7505, "step": 4350 }, { "epoch": 0.1813596765453712, "grad_norm": 202.0, "learning_rate": 9.411226077531413e-05, "loss": 11.813, "step": 4351 }, { "epoch": 0.18140135884289943, "grad_norm": 290.0, "learning_rate": 9.410908253199136e-05, "loss": 13.5024, "step": 4352 }, { "epoch": 0.18144304114042767, "grad_norm": 512.0, "learning_rate": 9.410590348477665e-05, "loss": 18.6252, "step": 4353 }, { "epoch": 0.1814847234379559, "grad_norm": 410.0, "learning_rate": 9.410272363372795e-05, "loss": 15.0674, "step": 4354 }, { "epoch": 0.18152640573548415, "grad_norm": 260.0, "learning_rate": 9.409954297890318e-05, "loss": 13.2502, "step": 4355 }, { "epoch": 0.1815680880330124, "grad_norm": 386.0, "learning_rate": 9.409636152036032e-05, "loss": 14.5004, "step": 4356 }, { "epoch": 0.18160977033054063, "grad_norm": 1088.0, "learning_rate": 9.409317925815737e-05, "loss": 27.3753, "step": 4357 }, { "epoch": 0.18165145262806887, "grad_norm": 262.0, "learning_rate": 9.408999619235231e-05, "loss": 11.8127, "step": 4358 }, { "epoch": 0.1816931349255971, "grad_norm": 170.0, "learning_rate": 9.408681232300315e-05, "loss": 10.1878, "step": 4359 }, { "epoch": 0.18173481722312534, "grad_norm": 314.0, "learning_rate": 9.408362765016791e-05, "loss": 14.0629, "step": 4360 }, { "epoch": 0.18177649952065358, "grad_norm": 227.0, "learning_rate": 9.408044217390468e-05, "loss": 11.3752, "step": 4361 }, { "epoch": 0.18181818181818182, "grad_norm": 212.0, "learning_rate": 9.407725589427144e-05, "loss": 11.438, "step": 4362 }, { "epoch": 0.18185986411571006, "grad_norm": 92.0, "learning_rate": 9.407406881132633e-05, "loss": 8.6876, "step": 4363 }, { "epoch": 0.1819015464132383, "grad_norm": 298.0, "learning_rate": 9.407088092512737e-05, "loss": 13.4379, "step": 4364 }, { "epoch": 0.18194322871076654, "grad_norm": 334.0, "learning_rate": 9.406769223573272e-05, "loss": 14.3129, "step": 4365 }, { "epoch": 0.18198491100829478, "grad_norm": 1336.0, "learning_rate": 9.406450274320045e-05, "loss": 27.7543, "step": 4366 }, { "epoch": 0.18202659330582302, "grad_norm": 478.0, "learning_rate": 9.406131244758871e-05, "loss": 17.1253, "step": 4367 }, { "epoch": 0.18206827560335126, "grad_norm": 260.0, "learning_rate": 9.405812134895564e-05, "loss": 11.8128, "step": 4368 }, { "epoch": 0.1821099579008795, "grad_norm": 262.0, "learning_rate": 9.405492944735941e-05, "loss": 9.1258, "step": 4369 }, { "epoch": 0.18215164019840774, "grad_norm": 187.0, "learning_rate": 9.405173674285817e-05, "loss": 9.6253, "step": 4370 }, { "epoch": 0.18219332249593598, "grad_norm": 316.0, "learning_rate": 9.404854323551011e-05, "loss": 12.6278, "step": 4371 }, { "epoch": 0.18223500479346422, "grad_norm": 55.5, "learning_rate": 9.404534892537344e-05, "loss": 8.2507, "step": 4372 }, { "epoch": 0.18227668709099246, "grad_norm": 260.0, "learning_rate": 9.404215381250638e-05, "loss": 11.8132, "step": 4373 }, { "epoch": 0.1823183693885207, "grad_norm": 936.0, "learning_rate": 9.403895789696717e-05, "loss": 20.1295, "step": 4374 }, { "epoch": 0.18236005168604894, "grad_norm": 111.5, "learning_rate": 9.403576117881403e-05, "loss": 8.8753, "step": 4375 }, { "epoch": 0.18240173398357717, "grad_norm": 320.0, "learning_rate": 9.403256365810524e-05, "loss": 13.5007, "step": 4376 }, { "epoch": 0.1824434162811054, "grad_norm": 110.0, "learning_rate": 9.402936533489906e-05, "loss": 6.4693, "step": 4377 }, { "epoch": 0.18248509857863365, "grad_norm": 49.25, "learning_rate": 9.40261662092538e-05, "loss": 7.5626, "step": 4378 }, { "epoch": 0.1825267808761619, "grad_norm": 532.0, "learning_rate": 9.402296628122774e-05, "loss": 17.1259, "step": 4379 }, { "epoch": 0.18256846317369013, "grad_norm": 130.0, "learning_rate": 9.401976555087921e-05, "loss": 7.1581, "step": 4380 }, { "epoch": 0.18261014547121837, "grad_norm": 220.0, "learning_rate": 9.401656401826656e-05, "loss": 11.0005, "step": 4381 }, { "epoch": 0.1826518277687466, "grad_norm": 120.5, "learning_rate": 9.40133616834481e-05, "loss": 8.063, "step": 4382 }, { "epoch": 0.18269351006627485, "grad_norm": 446.0, "learning_rate": 9.401015854648223e-05, "loss": 14.8132, "step": 4383 }, { "epoch": 0.1827351923638031, "grad_norm": 262.0, "learning_rate": 9.400695460742732e-05, "loss": 11.4392, "step": 4384 }, { "epoch": 0.18277687466133133, "grad_norm": 292.0, "learning_rate": 9.400374986634175e-05, "loss": 13.0005, "step": 4385 }, { "epoch": 0.18281855695885957, "grad_norm": 227.0, "learning_rate": 9.400054432328394e-05, "loss": 11.4378, "step": 4386 }, { "epoch": 0.1828602392563878, "grad_norm": 338.0, "learning_rate": 9.399733797831229e-05, "loss": 14.5628, "step": 4387 }, { "epoch": 0.18290192155391605, "grad_norm": 436.0, "learning_rate": 9.399413083148525e-05, "loss": 16.2504, "step": 4388 }, { "epoch": 0.1829436038514443, "grad_norm": 133.0, "learning_rate": 9.399092288286128e-05, "loss": 6.9696, "step": 4389 }, { "epoch": 0.18298528614897253, "grad_norm": 704.0, "learning_rate": 9.398771413249883e-05, "loss": 21.0006, "step": 4390 }, { "epoch": 0.18302696844650077, "grad_norm": 498.0, "learning_rate": 9.398450458045638e-05, "loss": 18.1252, "step": 4391 }, { "epoch": 0.183068650744029, "grad_norm": 137.0, "learning_rate": 9.398129422679243e-05, "loss": 10.313, "step": 4392 }, { "epoch": 0.18311033304155724, "grad_norm": 320.0, "learning_rate": 9.397808307156551e-05, "loss": 14.3752, "step": 4393 }, { "epoch": 0.18315201533908548, "grad_norm": 368.0, "learning_rate": 9.39748711148341e-05, "loss": 15.5627, "step": 4394 }, { "epoch": 0.18319369763661372, "grad_norm": 274.0, "learning_rate": 9.397165835665676e-05, "loss": 11.8752, "step": 4395 }, { "epoch": 0.18323537993414196, "grad_norm": 660.0, "learning_rate": 9.396844479709205e-05, "loss": 19.6252, "step": 4396 }, { "epoch": 0.1832770622316702, "grad_norm": 362.0, "learning_rate": 9.396523043619852e-05, "loss": 14.1255, "step": 4397 }, { "epoch": 0.18331874452919844, "grad_norm": 115.5, "learning_rate": 9.396201527403477e-05, "loss": 9.9385, "step": 4398 }, { "epoch": 0.18336042682672668, "grad_norm": 235.0, "learning_rate": 9.395879931065939e-05, "loss": 11.5631, "step": 4399 }, { "epoch": 0.18340210912425492, "grad_norm": 450.0, "learning_rate": 9.3955582546131e-05, "loss": 16.5007, "step": 4400 }, { "epoch": 0.18344379142178316, "grad_norm": 91.5, "learning_rate": 9.39523649805082e-05, "loss": 8.6879, "step": 4401 }, { "epoch": 0.1834854737193114, "grad_norm": 262.0, "learning_rate": 9.394914661384966e-05, "loss": 12.1879, "step": 4402 }, { "epoch": 0.18352715601683964, "grad_norm": 274.0, "learning_rate": 9.394592744621401e-05, "loss": 12.6253, "step": 4403 }, { "epoch": 0.18356883831436788, "grad_norm": 249.0, "learning_rate": 9.394270747765994e-05, "loss": 12.3133, "step": 4404 }, { "epoch": 0.18361052061189612, "grad_norm": 350.0, "learning_rate": 9.393948670824612e-05, "loss": 14.4378, "step": 4405 }, { "epoch": 0.18365220290942436, "grad_norm": 314.0, "learning_rate": 9.393626513803124e-05, "loss": 13.5626, "step": 4406 }, { "epoch": 0.1836938852069526, "grad_norm": 306.0, "learning_rate": 9.393304276707406e-05, "loss": 13.6253, "step": 4407 }, { "epoch": 0.18373556750448083, "grad_norm": 340.0, "learning_rate": 9.392981959543326e-05, "loss": 14.5002, "step": 4408 }, { "epoch": 0.18377724980200907, "grad_norm": 264.0, "learning_rate": 9.392659562316759e-05, "loss": 12.2503, "step": 4409 }, { "epoch": 0.1838189320995373, "grad_norm": 364.0, "learning_rate": 9.392337085033582e-05, "loss": 14.563, "step": 4410 }, { "epoch": 0.18386061439706555, "grad_norm": 390.0, "learning_rate": 9.392014527699671e-05, "loss": 14.3137, "step": 4411 }, { "epoch": 0.18390229669459382, "grad_norm": 540.0, "learning_rate": 9.391691890320906e-05, "loss": 17.2509, "step": 4412 }, { "epoch": 0.18394397899212206, "grad_norm": 398.0, "learning_rate": 9.391369172903167e-05, "loss": 14.9378, "step": 4413 }, { "epoch": 0.1839856612896503, "grad_norm": 1472.0, "learning_rate": 9.391046375452335e-05, "loss": 37.0002, "step": 4414 }, { "epoch": 0.18402734358717854, "grad_norm": 258.0, "learning_rate": 9.390723497974292e-05, "loss": 9.3129, "step": 4415 }, { "epoch": 0.18406902588470678, "grad_norm": 76.5, "learning_rate": 9.390400540474924e-05, "loss": 8.3756, "step": 4416 }, { "epoch": 0.18411070818223502, "grad_norm": 102.5, "learning_rate": 9.390077502960117e-05, "loss": 8.8127, "step": 4417 }, { "epoch": 0.18415239047976326, "grad_norm": 240.0, "learning_rate": 9.389754385435757e-05, "loss": 12.1882, "step": 4418 }, { "epoch": 0.1841940727772915, "grad_norm": 210.0, "learning_rate": 9.389431187907734e-05, "loss": 10.3757, "step": 4419 }, { "epoch": 0.18423575507481973, "grad_norm": 320.0, "learning_rate": 9.389107910381937e-05, "loss": 13.5634, "step": 4420 }, { "epoch": 0.18427743737234797, "grad_norm": 676.0, "learning_rate": 9.38878455286426e-05, "loss": 20.3753, "step": 4421 }, { "epoch": 0.1843191196698762, "grad_norm": 226.0, "learning_rate": 9.388461115360595e-05, "loss": 11.8764, "step": 4422 }, { "epoch": 0.18436080196740445, "grad_norm": 189.0, "learning_rate": 9.388137597876836e-05, "loss": 10.9379, "step": 4423 }, { "epoch": 0.1844024842649327, "grad_norm": 596.0, "learning_rate": 9.38781400041888e-05, "loss": 18.7506, "step": 4424 }, { "epoch": 0.18444416656246093, "grad_norm": 217.0, "learning_rate": 9.387490322992624e-05, "loss": 12.1878, "step": 4425 }, { "epoch": 0.18448584885998917, "grad_norm": 177.0, "learning_rate": 9.387166565603967e-05, "loss": 10.8133, "step": 4426 }, { "epoch": 0.1845275311575174, "grad_norm": 336.0, "learning_rate": 9.386842728258811e-05, "loss": 11.7505, "step": 4427 }, { "epoch": 0.18456921345504565, "grad_norm": 1128.0, "learning_rate": 9.386518810963057e-05, "loss": 27.6252, "step": 4428 }, { "epoch": 0.1846108957525739, "grad_norm": 362.0, "learning_rate": 9.386194813722607e-05, "loss": 14.7537, "step": 4429 }, { "epoch": 0.18465257805010213, "grad_norm": 286.0, "learning_rate": 9.385870736543368e-05, "loss": 12.5002, "step": 4430 }, { "epoch": 0.18469426034763037, "grad_norm": 247.0, "learning_rate": 9.385546579431244e-05, "loss": 11.1253, "step": 4431 }, { "epoch": 0.1847359426451586, "grad_norm": 608.0, "learning_rate": 9.385222342392146e-05, "loss": 19.0003, "step": 4432 }, { "epoch": 0.18477762494268685, "grad_norm": 476.0, "learning_rate": 9.384898025431981e-05, "loss": 15.1885, "step": 4433 }, { "epoch": 0.18481930724021509, "grad_norm": 490.0, "learning_rate": 9.38457362855666e-05, "loss": 18.3753, "step": 4434 }, { "epoch": 0.18486098953774333, "grad_norm": 804.0, "learning_rate": 9.384249151772095e-05, "loss": 24.126, "step": 4435 }, { "epoch": 0.18490267183527156, "grad_norm": 388.0, "learning_rate": 9.383924595084202e-05, "loss": 12.2504, "step": 4436 }, { "epoch": 0.1849443541327998, "grad_norm": 205.0, "learning_rate": 9.383599958498892e-05, "loss": 8.6888, "step": 4437 }, { "epoch": 0.18498603643032804, "grad_norm": 274.0, "learning_rate": 9.383275242022082e-05, "loss": 12.7503, "step": 4438 }, { "epoch": 0.18502771872785628, "grad_norm": 524.0, "learning_rate": 9.382950445659695e-05, "loss": 15.753, "step": 4439 }, { "epoch": 0.18506940102538452, "grad_norm": 274.0, "learning_rate": 9.382625569417646e-05, "loss": 13.7508, "step": 4440 }, { "epoch": 0.18511108332291276, "grad_norm": 448.0, "learning_rate": 9.382300613301857e-05, "loss": 15.5003, "step": 4441 }, { "epoch": 0.185152765620441, "grad_norm": 193.0, "learning_rate": 9.38197557731825e-05, "loss": 12.0004, "step": 4442 }, { "epoch": 0.18519444791796924, "grad_norm": 924.0, "learning_rate": 9.38165046147275e-05, "loss": 21.5103, "step": 4443 }, { "epoch": 0.18523613021549748, "grad_norm": 456.0, "learning_rate": 9.381325265771279e-05, "loss": 15.1263, "step": 4444 }, { "epoch": 0.18527781251302572, "grad_norm": 484.0, "learning_rate": 9.380999990219768e-05, "loss": 16.7514, "step": 4445 }, { "epoch": 0.18531949481055396, "grad_norm": 364.0, "learning_rate": 9.380674634824143e-05, "loss": 13.8752, "step": 4446 }, { "epoch": 0.1853611771080822, "grad_norm": 298.0, "learning_rate": 9.380349199590335e-05, "loss": 12.063, "step": 4447 }, { "epoch": 0.18540285940561044, "grad_norm": 478.0, "learning_rate": 9.380023684524274e-05, "loss": 16.1282, "step": 4448 }, { "epoch": 0.18544454170313868, "grad_norm": 195.0, "learning_rate": 9.37969808963189e-05, "loss": 10.9378, "step": 4449 }, { "epoch": 0.18548622400066692, "grad_norm": 532.0, "learning_rate": 9.379372414919121e-05, "loss": 17.7506, "step": 4450 }, { "epoch": 0.18552790629819516, "grad_norm": 374.0, "learning_rate": 9.3790466603919e-05, "loss": 15.0643, "step": 4451 }, { "epoch": 0.1855695885957234, "grad_norm": 362.0, "learning_rate": 9.378720826056167e-05, "loss": 14.1254, "step": 4452 }, { "epoch": 0.18561127089325163, "grad_norm": 1528.0, "learning_rate": 9.378394911917856e-05, "loss": 34.2511, "step": 4453 }, { "epoch": 0.18565295319077987, "grad_norm": 316.0, "learning_rate": 9.378068917982909e-05, "loss": 13.2504, "step": 4454 }, { "epoch": 0.1856946354883081, "grad_norm": 608.0, "learning_rate": 9.377742844257269e-05, "loss": 17.3755, "step": 4455 }, { "epoch": 0.18573631778583635, "grad_norm": 490.0, "learning_rate": 9.377416690746876e-05, "loss": 17.6252, "step": 4456 }, { "epoch": 0.1857780000833646, "grad_norm": 266.0, "learning_rate": 9.377090457457676e-05, "loss": 12.3753, "step": 4457 }, { "epoch": 0.18581968238089283, "grad_norm": 648.0, "learning_rate": 9.376764144395614e-05, "loss": 18.502, "step": 4458 }, { "epoch": 0.18586136467842107, "grad_norm": 510.0, "learning_rate": 9.376437751566636e-05, "loss": 18.8753, "step": 4459 }, { "epoch": 0.1859030469759493, "grad_norm": 584.0, "learning_rate": 9.376111278976692e-05, "loss": 19.2536, "step": 4460 }, { "epoch": 0.18594472927347755, "grad_norm": 176.0, "learning_rate": 9.375784726631732e-05, "loss": 6.5318, "step": 4461 }, { "epoch": 0.1859864115710058, "grad_norm": 42.75, "learning_rate": 9.375458094537706e-05, "loss": 7.7504, "step": 4462 }, { "epoch": 0.18602809386853403, "grad_norm": 334.0, "learning_rate": 9.375131382700566e-05, "loss": 14.2511, "step": 4463 }, { "epoch": 0.18606977616606227, "grad_norm": 242.0, "learning_rate": 9.374804591126272e-05, "loss": 12.5012, "step": 4464 }, { "epoch": 0.1861114584635905, "grad_norm": 632.0, "learning_rate": 9.374477719820773e-05, "loss": 18.6252, "step": 4465 }, { "epoch": 0.18615314076111875, "grad_norm": 157.0, "learning_rate": 9.374150768790031e-05, "loss": 10.063, "step": 4466 }, { "epoch": 0.18619482305864699, "grad_norm": 1128.0, "learning_rate": 9.373823738040002e-05, "loss": 25.5045, "step": 4467 }, { "epoch": 0.18623650535617522, "grad_norm": 960.0, "learning_rate": 9.373496627576648e-05, "loss": 21.5021, "step": 4468 }, { "epoch": 0.18627818765370346, "grad_norm": 398.0, "learning_rate": 9.373169437405928e-05, "loss": 14.7503, "step": 4469 }, { "epoch": 0.1863198699512317, "grad_norm": 724.0, "learning_rate": 9.372842167533809e-05, "loss": 22.2503, "step": 4470 }, { "epoch": 0.18636155224875994, "grad_norm": 246.0, "learning_rate": 9.372514817966251e-05, "loss": 10.6878, "step": 4471 }, { "epoch": 0.18640323454628818, "grad_norm": 247.0, "learning_rate": 9.372187388709224e-05, "loss": 13.8754, "step": 4472 }, { "epoch": 0.18644491684381642, "grad_norm": 204.0, "learning_rate": 9.371859879768692e-05, "loss": 12.2509, "step": 4473 }, { "epoch": 0.18648659914134466, "grad_norm": 600.0, "learning_rate": 9.371532291150627e-05, "loss": 20.1254, "step": 4474 }, { "epoch": 0.1865282814388729, "grad_norm": 720.0, "learning_rate": 9.371204622860998e-05, "loss": 20.6273, "step": 4475 }, { "epoch": 0.18656996373640114, "grad_norm": 410.0, "learning_rate": 9.370876874905776e-05, "loss": 16.1253, "step": 4476 }, { "epoch": 0.18661164603392938, "grad_norm": 372.0, "learning_rate": 9.370549047290936e-05, "loss": 12.1883, "step": 4477 }, { "epoch": 0.18665332833145762, "grad_norm": 304.0, "learning_rate": 9.37022114002245e-05, "loss": 13.8128, "step": 4478 }, { "epoch": 0.18669501062898586, "grad_norm": 241.0, "learning_rate": 9.369893153106298e-05, "loss": 12.5003, "step": 4479 }, { "epoch": 0.1867366929265141, "grad_norm": 322.0, "learning_rate": 9.369565086548453e-05, "loss": 14.3763, "step": 4480 }, { "epoch": 0.18677837522404234, "grad_norm": 1736.0, "learning_rate": 9.369236940354898e-05, "loss": 44.7506, "step": 4481 }, { "epoch": 0.18682005752157058, "grad_norm": 75.0, "learning_rate": 9.368908714531611e-05, "loss": 8.1889, "step": 4482 }, { "epoch": 0.18686173981909882, "grad_norm": 56.5, "learning_rate": 9.368580409084576e-05, "loss": 8.501, "step": 4483 }, { "epoch": 0.18690342211662705, "grad_norm": 278.0, "learning_rate": 9.368252024019775e-05, "loss": 13.5628, "step": 4484 }, { "epoch": 0.18694510441415532, "grad_norm": 143.0, "learning_rate": 9.367923559343191e-05, "loss": 9.5005, "step": 4485 }, { "epoch": 0.18698678671168356, "grad_norm": 520.0, "learning_rate": 9.367595015060815e-05, "loss": 17.0002, "step": 4486 }, { "epoch": 0.1870284690092118, "grad_norm": 368.0, "learning_rate": 9.367266391178631e-05, "loss": 12.4401, "step": 4487 }, { "epoch": 0.18707015130674004, "grad_norm": 394.0, "learning_rate": 9.366937687702628e-05, "loss": 14.9381, "step": 4488 }, { "epoch": 0.18711183360426828, "grad_norm": 229.0, "learning_rate": 9.3666089046388e-05, "loss": 10.1876, "step": 4489 }, { "epoch": 0.18715351590179652, "grad_norm": 296.0, "learning_rate": 9.366280041993136e-05, "loss": 13.1879, "step": 4490 }, { "epoch": 0.18719519819932476, "grad_norm": 432.0, "learning_rate": 9.365951099771631e-05, "loss": 14.4411, "step": 4491 }, { "epoch": 0.187236880496853, "grad_norm": 222.0, "learning_rate": 9.36562207798028e-05, "loss": 10.5002, "step": 4492 }, { "epoch": 0.18727856279438124, "grad_norm": 1016.0, "learning_rate": 9.365292976625078e-05, "loss": 24.8753, "step": 4493 }, { "epoch": 0.18732024509190948, "grad_norm": 306.0, "learning_rate": 9.364963795712025e-05, "loss": 13.8762, "step": 4494 }, { "epoch": 0.18736192738943772, "grad_norm": 478.0, "learning_rate": 9.36463453524712e-05, "loss": 17.3762, "step": 4495 }, { "epoch": 0.18740360968696596, "grad_norm": 608.0, "learning_rate": 9.364305195236362e-05, "loss": 18.3755, "step": 4496 }, { "epoch": 0.1874452919844942, "grad_norm": 516.0, "learning_rate": 9.363975775685753e-05, "loss": 18.7506, "step": 4497 }, { "epoch": 0.18748697428202243, "grad_norm": 442.0, "learning_rate": 9.3636462766013e-05, "loss": 14.6251, "step": 4498 }, { "epoch": 0.18752865657955067, "grad_norm": 354.0, "learning_rate": 9.363316697989007e-05, "loss": 11.313, "step": 4499 }, { "epoch": 0.1875703388770789, "grad_norm": 146.0, "learning_rate": 9.362987039854878e-05, "loss": 8.6257, "step": 4500 }, { "epoch": 0.18761202117460715, "grad_norm": 408.0, "learning_rate": 9.362657302204925e-05, "loss": 15.5628, "step": 4501 }, { "epoch": 0.1876537034721354, "grad_norm": 580.0, "learning_rate": 9.362327485045153e-05, "loss": 18.2504, "step": 4502 }, { "epoch": 0.18769538576966363, "grad_norm": 332.0, "learning_rate": 9.361997588381577e-05, "loss": 14.063, "step": 4503 }, { "epoch": 0.18773706806719187, "grad_norm": 207.0, "learning_rate": 9.361667612220207e-05, "loss": 10.5628, "step": 4504 }, { "epoch": 0.1877787503647201, "grad_norm": 588.0, "learning_rate": 9.361337556567058e-05, "loss": 18.0013, "step": 4505 }, { "epoch": 0.18782043266224835, "grad_norm": 436.0, "learning_rate": 9.361007421428144e-05, "loss": 15.6879, "step": 4506 }, { "epoch": 0.1878621149597766, "grad_norm": 290.0, "learning_rate": 9.360677206809482e-05, "loss": 13.3127, "step": 4507 }, { "epoch": 0.18790379725730483, "grad_norm": 165.0, "learning_rate": 9.360346912717093e-05, "loss": 9.9379, "step": 4508 }, { "epoch": 0.18794547955483307, "grad_norm": 684.0, "learning_rate": 9.360016539156993e-05, "loss": 21.0007, "step": 4509 }, { "epoch": 0.1879871618523613, "grad_norm": 378.0, "learning_rate": 9.359686086135204e-05, "loss": 13.6903, "step": 4510 }, { "epoch": 0.18802884414988955, "grad_norm": 83.0, "learning_rate": 9.359355553657751e-05, "loss": 6.0002, "step": 4511 }, { "epoch": 0.18807052644741779, "grad_norm": 91.0, "learning_rate": 9.359024941730654e-05, "loss": 7.8753, "step": 4512 }, { "epoch": 0.18811220874494602, "grad_norm": 336.0, "learning_rate": 9.358694250359943e-05, "loss": 13.3753, "step": 4513 }, { "epoch": 0.18815389104247426, "grad_norm": 168.0, "learning_rate": 9.358363479551639e-05, "loss": 11.0002, "step": 4514 }, { "epoch": 0.1881955733400025, "grad_norm": 191.0, "learning_rate": 9.358032629311776e-05, "loss": 11.0632, "step": 4515 }, { "epoch": 0.18823725563753074, "grad_norm": 91.0, "learning_rate": 9.357701699646382e-05, "loss": 8.1252, "step": 4516 }, { "epoch": 0.18827893793505898, "grad_norm": 916.0, "learning_rate": 9.357370690561486e-05, "loss": 23.1254, "step": 4517 }, { "epoch": 0.18832062023258722, "grad_norm": 1032.0, "learning_rate": 9.357039602063122e-05, "loss": 24.6294, "step": 4518 }, { "epoch": 0.18836230253011546, "grad_norm": 344.0, "learning_rate": 9.356708434157327e-05, "loss": 14.3756, "step": 4519 }, { "epoch": 0.1884039848276437, "grad_norm": 424.0, "learning_rate": 9.356377186850131e-05, "loss": 15.313, "step": 4520 }, { "epoch": 0.18844566712517194, "grad_norm": 576.0, "learning_rate": 9.356045860147577e-05, "loss": 16.1254, "step": 4521 }, { "epoch": 0.18848734942270018, "grad_norm": 360.0, "learning_rate": 9.355714454055699e-05, "loss": 12.5004, "step": 4522 }, { "epoch": 0.18852903172022842, "grad_norm": 792.0, "learning_rate": 9.355382968580537e-05, "loss": 21.0003, "step": 4523 }, { "epoch": 0.18857071401775666, "grad_norm": 370.0, "learning_rate": 9.355051403728137e-05, "loss": 14.1878, "step": 4524 }, { "epoch": 0.1886123963152849, "grad_norm": 516.0, "learning_rate": 9.354719759504535e-05, "loss": 16.8755, "step": 4525 }, { "epoch": 0.18865407861281314, "grad_norm": 298.0, "learning_rate": 9.354388035915782e-05, "loss": 13.5005, "step": 4526 }, { "epoch": 0.18869576091034138, "grad_norm": 1080.0, "learning_rate": 9.354056232967919e-05, "loss": 27.6299, "step": 4527 }, { "epoch": 0.18873744320786962, "grad_norm": 1160.0, "learning_rate": 9.353724350666994e-05, "loss": 32.7503, "step": 4528 }, { "epoch": 0.18877912550539785, "grad_norm": 105.5, "learning_rate": 9.353392389019056e-05, "loss": 8.5627, "step": 4529 }, { "epoch": 0.1888208078029261, "grad_norm": 143.0, "learning_rate": 9.353060348030156e-05, "loss": 11.063, "step": 4530 }, { "epoch": 0.18886249010045433, "grad_norm": 272.0, "learning_rate": 9.352728227706346e-05, "loss": 13.252, "step": 4531 }, { "epoch": 0.18890417239798257, "grad_norm": 150.0, "learning_rate": 9.352396028053676e-05, "loss": 11.0009, "step": 4532 }, { "epoch": 0.1889458546955108, "grad_norm": 1032.0, "learning_rate": 9.352063749078203e-05, "loss": 25.3809, "step": 4533 }, { "epoch": 0.18898753699303905, "grad_norm": 620.0, "learning_rate": 9.351731390785981e-05, "loss": 19.1253, "step": 4534 }, { "epoch": 0.1890292192905673, "grad_norm": 656.0, "learning_rate": 9.351398953183069e-05, "loss": 19.8756, "step": 4535 }, { "epoch": 0.18907090158809553, "grad_norm": 202.0, "learning_rate": 9.351066436275524e-05, "loss": 11.6879, "step": 4536 }, { "epoch": 0.18911258388562377, "grad_norm": 1152.0, "learning_rate": 9.350733840069406e-05, "loss": 30.5003, "step": 4537 }, { "epoch": 0.189154266183152, "grad_norm": 486.0, "learning_rate": 9.350401164570779e-05, "loss": 16.7518, "step": 4538 }, { "epoch": 0.18919594848068025, "grad_norm": 268.0, "learning_rate": 9.350068409785704e-05, "loss": 11.5017, "step": 4539 }, { "epoch": 0.1892376307782085, "grad_norm": 125.5, "learning_rate": 9.349735575720247e-05, "loss": 10.8771, "step": 4540 }, { "epoch": 0.18927931307573673, "grad_norm": 386.0, "learning_rate": 9.349402662380472e-05, "loss": 14.8127, "step": 4541 }, { "epoch": 0.18932099537326497, "grad_norm": 736.0, "learning_rate": 9.349069669772448e-05, "loss": 19.0011, "step": 4542 }, { "epoch": 0.1893626776707932, "grad_norm": 106.0, "learning_rate": 9.348736597902243e-05, "loss": 9.2508, "step": 4543 }, { "epoch": 0.18940435996832145, "grad_norm": 396.0, "learning_rate": 9.348403446775927e-05, "loss": 14.9383, "step": 4544 }, { "epoch": 0.18944604226584968, "grad_norm": 338.0, "learning_rate": 9.348070216399572e-05, "loss": 14.1252, "step": 4545 }, { "epoch": 0.18948772456337792, "grad_norm": 340.0, "learning_rate": 9.347736906779252e-05, "loss": 15.1252, "step": 4546 }, { "epoch": 0.18952940686090616, "grad_norm": 384.0, "learning_rate": 9.347403517921041e-05, "loss": 16.8769, "step": 4547 }, { "epoch": 0.1895710891584344, "grad_norm": 246.0, "learning_rate": 9.347070049831015e-05, "loss": 13.8148, "step": 4548 }, { "epoch": 0.18961277145596264, "grad_norm": 140.0, "learning_rate": 9.346736502515252e-05, "loss": 9.1255, "step": 4549 }, { "epoch": 0.18965445375349088, "grad_norm": 300.0, "learning_rate": 9.346402875979829e-05, "loss": 11.8754, "step": 4550 }, { "epoch": 0.18969613605101912, "grad_norm": 760.0, "learning_rate": 9.346069170230828e-05, "loss": 24.0006, "step": 4551 }, { "epoch": 0.18973781834854736, "grad_norm": 300.0, "learning_rate": 9.34573538527433e-05, "loss": 13.1255, "step": 4552 }, { "epoch": 0.1897795006460756, "grad_norm": 386.0, "learning_rate": 9.34540152111642e-05, "loss": 15.5005, "step": 4553 }, { "epoch": 0.18982118294360384, "grad_norm": 274.0, "learning_rate": 9.345067577763182e-05, "loss": 11.3752, "step": 4554 }, { "epoch": 0.18986286524113208, "grad_norm": 348.0, "learning_rate": 9.344733555220701e-05, "loss": 13.4377, "step": 4555 }, { "epoch": 0.18990454753866032, "grad_norm": 426.0, "learning_rate": 9.344399453495066e-05, "loss": 15.2548, "step": 4556 }, { "epoch": 0.18994622983618856, "grad_norm": 482.0, "learning_rate": 9.344065272592363e-05, "loss": 15.4408, "step": 4557 }, { "epoch": 0.18998791213371682, "grad_norm": 216.0, "learning_rate": 9.343731012518686e-05, "loss": 11.1259, "step": 4558 }, { "epoch": 0.19002959443124506, "grad_norm": 280.0, "learning_rate": 9.343396673280126e-05, "loss": 13.0006, "step": 4559 }, { "epoch": 0.1900712767287733, "grad_norm": 560.0, "learning_rate": 9.343062254882775e-05, "loss": 16.2503, "step": 4560 }, { "epoch": 0.19011295902630154, "grad_norm": 284.0, "learning_rate": 9.34272775733273e-05, "loss": 12.7502, "step": 4561 }, { "epoch": 0.19015464132382978, "grad_norm": 408.0, "learning_rate": 9.342393180636086e-05, "loss": 14.5003, "step": 4562 }, { "epoch": 0.19019632362135802, "grad_norm": 322.0, "learning_rate": 9.342058524798942e-05, "loss": 14.2509, "step": 4563 }, { "epoch": 0.19023800591888626, "grad_norm": 636.0, "learning_rate": 9.341723789827393e-05, "loss": 19.8752, "step": 4564 }, { "epoch": 0.1902796882164145, "grad_norm": 260.0, "learning_rate": 9.341388975727545e-05, "loss": 12.6253, "step": 4565 }, { "epoch": 0.19032137051394274, "grad_norm": 107.0, "learning_rate": 9.341054082505496e-05, "loss": 9.8755, "step": 4566 }, { "epoch": 0.19036305281147098, "grad_norm": 414.0, "learning_rate": 9.340719110167352e-05, "loss": 15.063, "step": 4567 }, { "epoch": 0.19040473510899922, "grad_norm": 390.0, "learning_rate": 9.340384058719216e-05, "loss": 15.563, "step": 4568 }, { "epoch": 0.19044641740652746, "grad_norm": 178.0, "learning_rate": 9.340048928167196e-05, "loss": 11.0013, "step": 4569 }, { "epoch": 0.1904880997040557, "grad_norm": 312.0, "learning_rate": 9.339713718517399e-05, "loss": 13.7504, "step": 4570 }, { "epoch": 0.19052978200158394, "grad_norm": 149.0, "learning_rate": 9.339378429775934e-05, "loss": 9.6878, "step": 4571 }, { "epoch": 0.19057146429911218, "grad_norm": 524.0, "learning_rate": 9.339043061948911e-05, "loss": 15.1931, "step": 4572 }, { "epoch": 0.19061314659664041, "grad_norm": 108.5, "learning_rate": 9.338707615042445e-05, "loss": 8.3133, "step": 4573 }, { "epoch": 0.19065482889416865, "grad_norm": 346.0, "learning_rate": 9.338372089062646e-05, "loss": 15.0642, "step": 4574 }, { "epoch": 0.1906965111916969, "grad_norm": 454.0, "learning_rate": 9.338036484015631e-05, "loss": 16.2515, "step": 4575 }, { "epoch": 0.19073819348922513, "grad_norm": 230.0, "learning_rate": 9.337700799907517e-05, "loss": 11.8127, "step": 4576 }, { "epoch": 0.19077987578675337, "grad_norm": 223.0, "learning_rate": 9.337365036744419e-05, "loss": 13.6263, "step": 4577 }, { "epoch": 0.1908215580842816, "grad_norm": 152.0, "learning_rate": 9.337029194532459e-05, "loss": 9.2502, "step": 4578 }, { "epoch": 0.19086324038180985, "grad_norm": 776.0, "learning_rate": 9.336693273277757e-05, "loss": 19.5002, "step": 4579 }, { "epoch": 0.1909049226793381, "grad_norm": 201.0, "learning_rate": 9.336357272986434e-05, "loss": 11.3133, "step": 4580 }, { "epoch": 0.19094660497686633, "grad_norm": 644.0, "learning_rate": 9.336021193664617e-05, "loss": 19.2502, "step": 4581 }, { "epoch": 0.19098828727439457, "grad_norm": 428.0, "learning_rate": 9.335685035318426e-05, "loss": 14.8762, "step": 4582 }, { "epoch": 0.1910299695719228, "grad_norm": 572.0, "learning_rate": 9.335348797953993e-05, "loss": 18.7503, "step": 4583 }, { "epoch": 0.19107165186945105, "grad_norm": 1096.0, "learning_rate": 9.335012481577442e-05, "loss": 29.8754, "step": 4584 }, { "epoch": 0.1911133341669793, "grad_norm": 724.0, "learning_rate": 9.334676086194904e-05, "loss": 21.1256, "step": 4585 }, { "epoch": 0.19115501646450753, "grad_norm": 460.0, "learning_rate": 9.33433961181251e-05, "loss": 13.7506, "step": 4586 }, { "epoch": 0.19119669876203577, "grad_norm": 184.0, "learning_rate": 9.334003058436391e-05, "loss": 11.8754, "step": 4587 }, { "epoch": 0.191238381059564, "grad_norm": 596.0, "learning_rate": 9.333666426072682e-05, "loss": 17.8785, "step": 4588 }, { "epoch": 0.19128006335709224, "grad_norm": 628.0, "learning_rate": 9.333329714727517e-05, "loss": 20.8753, "step": 4589 }, { "epoch": 0.19132174565462048, "grad_norm": 207.0, "learning_rate": 9.332992924407034e-05, "loss": 11.2507, "step": 4590 }, { "epoch": 0.19136342795214872, "grad_norm": 764.0, "learning_rate": 9.332656055117371e-05, "loss": 21.8755, "step": 4591 }, { "epoch": 0.19140511024967696, "grad_norm": 484.0, "learning_rate": 9.332319106864664e-05, "loss": 16.629, "step": 4592 }, { "epoch": 0.1914467925472052, "grad_norm": 368.0, "learning_rate": 9.331982079655059e-05, "loss": 15.1882, "step": 4593 }, { "epoch": 0.19148847484473344, "grad_norm": 556.0, "learning_rate": 9.331644973494695e-05, "loss": 20.2512, "step": 4594 }, { "epoch": 0.19153015714226168, "grad_norm": 342.0, "learning_rate": 9.331307788389719e-05, "loss": 15.6887, "step": 4595 }, { "epoch": 0.19157183943978992, "grad_norm": 68.5, "learning_rate": 9.33097052434627e-05, "loss": 9.5007, "step": 4596 }, { "epoch": 0.19161352173731816, "grad_norm": 262.0, "learning_rate": 9.330633181370503e-05, "loss": 12.0003, "step": 4597 }, { "epoch": 0.1916552040348464, "grad_norm": 496.0, "learning_rate": 9.330295759468559e-05, "loss": 19.7509, "step": 4598 }, { "epoch": 0.19169688633237464, "grad_norm": 652.0, "learning_rate": 9.329958258646592e-05, "loss": 19.3753, "step": 4599 }, { "epoch": 0.19173856862990288, "grad_norm": 197.0, "learning_rate": 9.32962067891075e-05, "loss": 12.0629, "step": 4600 }, { "epoch": 0.19178025092743112, "grad_norm": 225.0, "learning_rate": 9.329283020267188e-05, "loss": 9.8758, "step": 4601 }, { "epoch": 0.19182193322495936, "grad_norm": 418.0, "learning_rate": 9.328945282722057e-05, "loss": 15.6877, "step": 4602 }, { "epoch": 0.1918636155224876, "grad_norm": 616.0, "learning_rate": 9.328607466281516e-05, "loss": 19.1255, "step": 4603 }, { "epoch": 0.19190529782001584, "grad_norm": 458.0, "learning_rate": 9.328269570951718e-05, "loss": 14.1887, "step": 4604 }, { "epoch": 0.19194698011754407, "grad_norm": 520.0, "learning_rate": 9.327931596738824e-05, "loss": 17.1254, "step": 4605 }, { "epoch": 0.19198866241507231, "grad_norm": 290.0, "learning_rate": 9.327593543648991e-05, "loss": 12.4376, "step": 4606 }, { "epoch": 0.19203034471260055, "grad_norm": 150.0, "learning_rate": 9.327255411688383e-05, "loss": 10.0012, "step": 4607 }, { "epoch": 0.1920720270101288, "grad_norm": 104.0, "learning_rate": 9.32691720086316e-05, "loss": 8.1879, "step": 4608 }, { "epoch": 0.19211370930765703, "grad_norm": 266.0, "learning_rate": 9.326578911179488e-05, "loss": 12.5632, "step": 4609 }, { "epoch": 0.19215539160518527, "grad_norm": 568.0, "learning_rate": 9.326240542643529e-05, "loss": 18.8752, "step": 4610 }, { "epoch": 0.1921970739027135, "grad_norm": 676.0, "learning_rate": 9.325902095261454e-05, "loss": 20.1252, "step": 4611 }, { "epoch": 0.19223875620024175, "grad_norm": 308.0, "learning_rate": 9.32556356903943e-05, "loss": 13.6255, "step": 4612 }, { "epoch": 0.19228043849777, "grad_norm": 296.0, "learning_rate": 9.325224963983625e-05, "loss": 12.3757, "step": 4613 }, { "epoch": 0.19232212079529823, "grad_norm": 216.0, "learning_rate": 9.324886280100211e-05, "loss": 11.5627, "step": 4614 }, { "epoch": 0.19236380309282647, "grad_norm": 580.0, "learning_rate": 9.32454751739536e-05, "loss": 18.2506, "step": 4615 }, { "epoch": 0.1924054853903547, "grad_norm": 804.0, "learning_rate": 9.324208675875248e-05, "loss": 23.3753, "step": 4616 }, { "epoch": 0.19244716768788295, "grad_norm": 114.5, "learning_rate": 9.323869755546047e-05, "loss": 9.6881, "step": 4617 }, { "epoch": 0.1924888499854112, "grad_norm": 302.0, "learning_rate": 9.323530756413938e-05, "loss": 14.0009, "step": 4618 }, { "epoch": 0.19253053228293943, "grad_norm": 856.0, "learning_rate": 9.323191678485096e-05, "loss": 21.1259, "step": 4619 }, { "epoch": 0.19257221458046767, "grad_norm": 134.0, "learning_rate": 9.322852521765701e-05, "loss": 8.6257, "step": 4620 }, { "epoch": 0.1926138968779959, "grad_norm": 308.0, "learning_rate": 9.322513286261937e-05, "loss": 13.1254, "step": 4621 }, { "epoch": 0.19265557917552414, "grad_norm": 860.0, "learning_rate": 9.322173971979984e-05, "loss": 21.0053, "step": 4622 }, { "epoch": 0.19269726147305238, "grad_norm": 416.0, "learning_rate": 9.321834578926026e-05, "loss": 15.938, "step": 4623 }, { "epoch": 0.19273894377058062, "grad_norm": 478.0, "learning_rate": 9.32149510710625e-05, "loss": 15.563, "step": 4624 }, { "epoch": 0.19278062606810886, "grad_norm": 143.0, "learning_rate": 9.321155556526842e-05, "loss": 10.1289, "step": 4625 }, { "epoch": 0.1928223083656371, "grad_norm": 2224.0, "learning_rate": 9.32081592719399e-05, "loss": 43.2504, "step": 4626 }, { "epoch": 0.19286399066316534, "grad_norm": 430.0, "learning_rate": 9.320476219113883e-05, "loss": 13.6881, "step": 4627 }, { "epoch": 0.19290567296069358, "grad_norm": 408.0, "learning_rate": 9.320136432292714e-05, "loss": 14.0628, "step": 4628 }, { "epoch": 0.19294735525822182, "grad_norm": 272.0, "learning_rate": 9.319796566736676e-05, "loss": 13.6881, "step": 4629 }, { "epoch": 0.19298903755575006, "grad_norm": 292.0, "learning_rate": 9.319456622451963e-05, "loss": 11.8769, "step": 4630 }, { "epoch": 0.19303071985327833, "grad_norm": 452.0, "learning_rate": 9.319116599444769e-05, "loss": 17.3764, "step": 4631 }, { "epoch": 0.19307240215080657, "grad_norm": 540.0, "learning_rate": 9.318776497721288e-05, "loss": 18.6266, "step": 4632 }, { "epoch": 0.1931140844483348, "grad_norm": 74.0, "learning_rate": 9.318436317287726e-05, "loss": 8.0626, "step": 4633 }, { "epoch": 0.19315576674586304, "grad_norm": 266.0, "learning_rate": 9.318096058150278e-05, "loss": 11.8173, "step": 4634 }, { "epoch": 0.19319744904339128, "grad_norm": 596.0, "learning_rate": 9.317755720315145e-05, "loss": 19.6255, "step": 4635 }, { "epoch": 0.19323913134091952, "grad_norm": 233.0, "learning_rate": 9.317415303788532e-05, "loss": 12.0003, "step": 4636 }, { "epoch": 0.19328081363844776, "grad_norm": 272.0, "learning_rate": 9.31707480857664e-05, "loss": 12.6255, "step": 4637 }, { "epoch": 0.193322495935976, "grad_norm": 318.0, "learning_rate": 9.316734234685678e-05, "loss": 13.5013, "step": 4638 }, { "epoch": 0.19336417823350424, "grad_norm": 342.0, "learning_rate": 9.31639358212185e-05, "loss": 10.6254, "step": 4639 }, { "epoch": 0.19340586053103248, "grad_norm": 264.0, "learning_rate": 9.316052850891367e-05, "loss": 11.3763, "step": 4640 }, { "epoch": 0.19344754282856072, "grad_norm": 576.0, "learning_rate": 9.315712041000437e-05, "loss": 18.6285, "step": 4641 }, { "epoch": 0.19348922512608896, "grad_norm": 348.0, "learning_rate": 9.315371152455272e-05, "loss": 14.1253, "step": 4642 }, { "epoch": 0.1935309074236172, "grad_norm": 308.0, "learning_rate": 9.315030185262086e-05, "loss": 13.1882, "step": 4643 }, { "epoch": 0.19357258972114544, "grad_norm": 612.0, "learning_rate": 9.31468913942709e-05, "loss": 17.6254, "step": 4644 }, { "epoch": 0.19361427201867368, "grad_norm": 195.0, "learning_rate": 9.314348014956502e-05, "loss": 10.6882, "step": 4645 }, { "epoch": 0.19365595431620192, "grad_norm": 1200.0, "learning_rate": 9.314006811856537e-05, "loss": 30.5014, "step": 4646 }, { "epoch": 0.19369763661373016, "grad_norm": 346.0, "learning_rate": 9.313665530133418e-05, "loss": 13.0633, "step": 4647 }, { "epoch": 0.1937393189112584, "grad_norm": 936.0, "learning_rate": 9.313324169793359e-05, "loss": 24.8752, "step": 4648 }, { "epoch": 0.19378100120878664, "grad_norm": 236.0, "learning_rate": 9.312982730842585e-05, "loss": 12.8752, "step": 4649 }, { "epoch": 0.19382268350631487, "grad_norm": 278.0, "learning_rate": 9.312641213287319e-05, "loss": 12.6253, "step": 4650 }, { "epoch": 0.19386436580384311, "grad_norm": 402.0, "learning_rate": 9.312299617133782e-05, "loss": 15.2503, "step": 4651 }, { "epoch": 0.19390604810137135, "grad_norm": 169.0, "learning_rate": 9.311957942388203e-05, "loss": 11.6254, "step": 4652 }, { "epoch": 0.1939477303988996, "grad_norm": 182.0, "learning_rate": 9.311616189056808e-05, "loss": 10.1256, "step": 4653 }, { "epoch": 0.19398941269642783, "grad_norm": 428.0, "learning_rate": 9.311274357145824e-05, "loss": 15.9378, "step": 4654 }, { "epoch": 0.19403109499395607, "grad_norm": 580.0, "learning_rate": 9.310932446661484e-05, "loss": 17.1296, "step": 4655 }, { "epoch": 0.1940727772914843, "grad_norm": 374.0, "learning_rate": 9.310590457610015e-05, "loss": 14.0631, "step": 4656 }, { "epoch": 0.19411445958901255, "grad_norm": 504.0, "learning_rate": 9.310248389997654e-05, "loss": 17.1253, "step": 4657 }, { "epoch": 0.1941561418865408, "grad_norm": 724.0, "learning_rate": 9.309906243830633e-05, "loss": 20.3787, "step": 4658 }, { "epoch": 0.19419782418406903, "grad_norm": 239.0, "learning_rate": 9.309564019115188e-05, "loss": 11.3129, "step": 4659 }, { "epoch": 0.19423950648159727, "grad_norm": 512.0, "learning_rate": 9.309221715857557e-05, "loss": 17.6257, "step": 4660 }, { "epoch": 0.1942811887791255, "grad_norm": 62.75, "learning_rate": 9.308879334063976e-05, "loss": 9.0009, "step": 4661 }, { "epoch": 0.19432287107665375, "grad_norm": 342.0, "learning_rate": 9.308536873740688e-05, "loss": 14.3752, "step": 4662 }, { "epoch": 0.194364553374182, "grad_norm": 294.0, "learning_rate": 9.308194334893933e-05, "loss": 12.0003, "step": 4663 }, { "epoch": 0.19440623567171023, "grad_norm": 227.0, "learning_rate": 9.307851717529954e-05, "loss": 11.8752, "step": 4664 }, { "epoch": 0.19444791796923847, "grad_norm": 440.0, "learning_rate": 9.307509021654993e-05, "loss": 15.7502, "step": 4665 }, { "epoch": 0.1944896002667667, "grad_norm": 608.0, "learning_rate": 9.3071662472753e-05, "loss": 17.8758, "step": 4666 }, { "epoch": 0.19453128256429494, "grad_norm": 470.0, "learning_rate": 9.306823394397118e-05, "loss": 17.6253, "step": 4667 }, { "epoch": 0.19457296486182318, "grad_norm": 544.0, "learning_rate": 9.306480463026699e-05, "loss": 16.2502, "step": 4668 }, { "epoch": 0.19461464715935142, "grad_norm": 362.0, "learning_rate": 9.306137453170289e-05, "loss": 13.5632, "step": 4669 }, { "epoch": 0.19465632945687966, "grad_norm": 54.75, "learning_rate": 9.305794364834143e-05, "loss": 6.5321, "step": 4670 }, { "epoch": 0.1946980117544079, "grad_norm": 152.0, "learning_rate": 9.305451198024513e-05, "loss": 10.0036, "step": 4671 }, { "epoch": 0.19473969405193614, "grad_norm": 330.0, "learning_rate": 9.305107952747654e-05, "loss": 14.1251, "step": 4672 }, { "epoch": 0.19478137634946438, "grad_norm": 276.0, "learning_rate": 9.304764629009817e-05, "loss": 13.8132, "step": 4673 }, { "epoch": 0.19482305864699262, "grad_norm": 508.0, "learning_rate": 9.304421226817264e-05, "loss": 18.5005, "step": 4674 }, { "epoch": 0.19486474094452086, "grad_norm": 326.0, "learning_rate": 9.304077746176253e-05, "loss": 13.3131, "step": 4675 }, { "epoch": 0.1949064232420491, "grad_norm": 326.0, "learning_rate": 9.303734187093043e-05, "loss": 13.7502, "step": 4676 }, { "epoch": 0.19494810553957734, "grad_norm": 249.0, "learning_rate": 9.303390549573894e-05, "loss": 9.002, "step": 4677 }, { "epoch": 0.19498978783710558, "grad_norm": 470.0, "learning_rate": 9.30304683362507e-05, "loss": 16.2557, "step": 4678 }, { "epoch": 0.19503147013463382, "grad_norm": 286.0, "learning_rate": 9.302703039252835e-05, "loss": 13.0628, "step": 4679 }, { "epoch": 0.19507315243216206, "grad_norm": 284.0, "learning_rate": 9.302359166463458e-05, "loss": 10.6257, "step": 4680 }, { "epoch": 0.1951148347296903, "grad_norm": 170.0, "learning_rate": 9.302015215263202e-05, "loss": 10.8755, "step": 4681 }, { "epoch": 0.19515651702721853, "grad_norm": 76.5, "learning_rate": 9.301671185658336e-05, "loss": 7.501, "step": 4682 }, { "epoch": 0.19519819932474677, "grad_norm": 516.0, "learning_rate": 9.301327077655131e-05, "loss": 18.3753, "step": 4683 }, { "epoch": 0.195239881622275, "grad_norm": 159.0, "learning_rate": 9.300982891259858e-05, "loss": 10.5627, "step": 4684 }, { "epoch": 0.19528156391980325, "grad_norm": 226.0, "learning_rate": 9.30063862647879e-05, "loss": 11.438, "step": 4685 }, { "epoch": 0.1953232462173315, "grad_norm": 144.0, "learning_rate": 9.300294283318203e-05, "loss": 9.8127, "step": 4686 }, { "epoch": 0.19536492851485973, "grad_norm": 716.0, "learning_rate": 9.299949861784369e-05, "loss": 18.7502, "step": 4687 }, { "epoch": 0.19540661081238797, "grad_norm": 324.0, "learning_rate": 9.299605361883568e-05, "loss": 12.8752, "step": 4688 }, { "epoch": 0.1954482931099162, "grad_norm": 298.0, "learning_rate": 9.299260783622076e-05, "loss": 11.4378, "step": 4689 }, { "epoch": 0.19548997540744445, "grad_norm": 276.0, "learning_rate": 9.298916127006176e-05, "loss": 12.7504, "step": 4690 }, { "epoch": 0.1955316577049727, "grad_norm": 532.0, "learning_rate": 9.298571392042148e-05, "loss": 17.2505, "step": 4691 }, { "epoch": 0.19557334000250093, "grad_norm": 241.0, "learning_rate": 9.298226578736275e-05, "loss": 12.6257, "step": 4692 }, { "epoch": 0.19561502230002917, "grad_norm": 326.0, "learning_rate": 9.297881687094841e-05, "loss": 13.6877, "step": 4693 }, { "epoch": 0.1956567045975574, "grad_norm": 272.0, "learning_rate": 9.297536717124131e-05, "loss": 13.0013, "step": 4694 }, { "epoch": 0.19569838689508565, "grad_norm": 226.0, "learning_rate": 9.297191668830433e-05, "loss": 11.6253, "step": 4695 }, { "epoch": 0.19574006919261389, "grad_norm": 68.0, "learning_rate": 9.296846542220034e-05, "loss": 6.8127, "step": 4696 }, { "epoch": 0.19578175149014213, "grad_norm": 272.0, "learning_rate": 9.296501337299228e-05, "loss": 12.5007, "step": 4697 }, { "epoch": 0.19582343378767036, "grad_norm": 173.0, "learning_rate": 9.296156054074303e-05, "loss": 11.3128, "step": 4698 }, { "epoch": 0.1958651160851986, "grad_norm": 620.0, "learning_rate": 9.295810692551552e-05, "loss": 18.7505, "step": 4699 }, { "epoch": 0.19590679838272684, "grad_norm": 135.0, "learning_rate": 9.29546525273727e-05, "loss": 9.1888, "step": 4700 }, { "epoch": 0.19594848068025508, "grad_norm": 656.0, "learning_rate": 9.295119734637752e-05, "loss": 19.1255, "step": 4701 }, { "epoch": 0.19599016297778332, "grad_norm": 416.0, "learning_rate": 9.294774138259296e-05, "loss": 15.6879, "step": 4702 }, { "epoch": 0.19603184527531156, "grad_norm": 652.0, "learning_rate": 9.2944284636082e-05, "loss": 20.3751, "step": 4703 }, { "epoch": 0.19607352757283983, "grad_norm": 264.0, "learning_rate": 9.294082710690764e-05, "loss": 13.1878, "step": 4704 }, { "epoch": 0.19611520987036807, "grad_norm": 1984.0, "learning_rate": 9.293736879513288e-05, "loss": 42.0016, "step": 4705 }, { "epoch": 0.1961568921678963, "grad_norm": 380.0, "learning_rate": 9.293390970082079e-05, "loss": 15.6254, "step": 4706 }, { "epoch": 0.19619857446542455, "grad_norm": 392.0, "learning_rate": 9.293044982403436e-05, "loss": 15.8752, "step": 4707 }, { "epoch": 0.19624025676295279, "grad_norm": 900.0, "learning_rate": 9.292698916483668e-05, "loss": 22.5002, "step": 4708 }, { "epoch": 0.19628193906048103, "grad_norm": 348.0, "learning_rate": 9.29235277232908e-05, "loss": 14.2504, "step": 4709 }, { "epoch": 0.19632362135800926, "grad_norm": 496.0, "learning_rate": 9.292006549945984e-05, "loss": 16.7503, "step": 4710 }, { "epoch": 0.1963653036555375, "grad_norm": 436.0, "learning_rate": 9.291660249340687e-05, "loss": 15.5629, "step": 4711 }, { "epoch": 0.19640698595306574, "grad_norm": 189.0, "learning_rate": 9.2913138705195e-05, "loss": 10.5009, "step": 4712 }, { "epoch": 0.19644866825059398, "grad_norm": 334.0, "learning_rate": 9.290967413488739e-05, "loss": 11.563, "step": 4713 }, { "epoch": 0.19649035054812222, "grad_norm": 222.0, "learning_rate": 9.290620878254713e-05, "loss": 12.0002, "step": 4714 }, { "epoch": 0.19653203284565046, "grad_norm": 241.0, "learning_rate": 9.290274264823742e-05, "loss": 11.1882, "step": 4715 }, { "epoch": 0.1965737151431787, "grad_norm": 504.0, "learning_rate": 9.289927573202141e-05, "loss": 16.8752, "step": 4716 }, { "epoch": 0.19661539744070694, "grad_norm": 148.0, "learning_rate": 9.289580803396229e-05, "loss": 9.6259, "step": 4717 }, { "epoch": 0.19665707973823518, "grad_norm": 608.0, "learning_rate": 9.289233955412327e-05, "loss": 19.7503, "step": 4718 }, { "epoch": 0.19669876203576342, "grad_norm": 276.0, "learning_rate": 9.288887029256755e-05, "loss": 12.6878, "step": 4719 }, { "epoch": 0.19674044433329166, "grad_norm": 752.0, "learning_rate": 9.288540024935837e-05, "loss": 20.5003, "step": 4720 }, { "epoch": 0.1967821266308199, "grad_norm": 278.0, "learning_rate": 9.288192942455896e-05, "loss": 11.2502, "step": 4721 }, { "epoch": 0.19682380892834814, "grad_norm": 282.0, "learning_rate": 9.287845781823257e-05, "loss": 13.251, "step": 4722 }, { "epoch": 0.19686549122587638, "grad_norm": 418.0, "learning_rate": 9.287498543044248e-05, "loss": 13.8754, "step": 4723 }, { "epoch": 0.19690717352340462, "grad_norm": 167.0, "learning_rate": 9.287151226125198e-05, "loss": 10.0007, "step": 4724 }, { "epoch": 0.19694885582093286, "grad_norm": 306.0, "learning_rate": 9.286803831072436e-05, "loss": 14.6257, "step": 4725 }, { "epoch": 0.1969905381184611, "grad_norm": 426.0, "learning_rate": 9.286456357892295e-05, "loss": 14.1878, "step": 4726 }, { "epoch": 0.19703222041598933, "grad_norm": 178.0, "learning_rate": 9.286108806591105e-05, "loss": 12.1256, "step": 4727 }, { "epoch": 0.19707390271351757, "grad_norm": 268.0, "learning_rate": 9.285761177175201e-05, "loss": 13.2505, "step": 4728 }, { "epoch": 0.1971155850110458, "grad_norm": 71.5, "learning_rate": 9.28541346965092e-05, "loss": 7.7505, "step": 4729 }, { "epoch": 0.19715726730857405, "grad_norm": 486.0, "learning_rate": 9.285065684024599e-05, "loss": 17.126, "step": 4730 }, { "epoch": 0.1971989496061023, "grad_norm": 556.0, "learning_rate": 9.284717820302573e-05, "loss": 18.5008, "step": 4731 }, { "epoch": 0.19724063190363053, "grad_norm": 121.0, "learning_rate": 9.284369878491186e-05, "loss": 10.0007, "step": 4732 }, { "epoch": 0.19728231420115877, "grad_norm": 53.5, "learning_rate": 9.284021858596779e-05, "loss": 7.9691, "step": 4733 }, { "epoch": 0.197323996498687, "grad_norm": 210.0, "learning_rate": 9.28367376062569e-05, "loss": 11.5629, "step": 4734 }, { "epoch": 0.19736567879621525, "grad_norm": 171.0, "learning_rate": 9.283325584584268e-05, "loss": 10.8754, "step": 4735 }, { "epoch": 0.1974073610937435, "grad_norm": 704.0, "learning_rate": 9.282977330478859e-05, "loss": 20.3761, "step": 4736 }, { "epoch": 0.19744904339127173, "grad_norm": 161.0, "learning_rate": 9.282628998315806e-05, "loss": 10.0627, "step": 4737 }, { "epoch": 0.19749072568879997, "grad_norm": 732.0, "learning_rate": 9.282280588101459e-05, "loss": 19.3756, "step": 4738 }, { "epoch": 0.1975324079863282, "grad_norm": 440.0, "learning_rate": 9.281932099842167e-05, "loss": 15.1253, "step": 4739 }, { "epoch": 0.19757409028385645, "grad_norm": 348.0, "learning_rate": 9.281583533544285e-05, "loss": 14.5636, "step": 4740 }, { "epoch": 0.19761577258138469, "grad_norm": 752.0, "learning_rate": 9.281234889214162e-05, "loss": 22.3754, "step": 4741 }, { "epoch": 0.19765745487891292, "grad_norm": 276.0, "learning_rate": 9.280886166858154e-05, "loss": 13.0627, "step": 4742 }, { "epoch": 0.19769913717644116, "grad_norm": 520.0, "learning_rate": 9.280537366482614e-05, "loss": 17.2502, "step": 4743 }, { "epoch": 0.1977408194739694, "grad_norm": 270.0, "learning_rate": 9.280188488093901e-05, "loss": 11.9377, "step": 4744 }, { "epoch": 0.19778250177149764, "grad_norm": 1152.0, "learning_rate": 9.279839531698374e-05, "loss": 28.7529, "step": 4745 }, { "epoch": 0.19782418406902588, "grad_norm": 97.5, "learning_rate": 9.279490497302389e-05, "loss": 5.7816, "step": 4746 }, { "epoch": 0.19786586636655412, "grad_norm": 174.0, "learning_rate": 9.279141384912312e-05, "loss": 11.3755, "step": 4747 }, { "epoch": 0.19790754866408236, "grad_norm": 101.5, "learning_rate": 9.278792194534502e-05, "loss": 9.8754, "step": 4748 }, { "epoch": 0.1979492309616106, "grad_norm": 460.0, "learning_rate": 9.278442926175326e-05, "loss": 16.2505, "step": 4749 }, { "epoch": 0.19799091325913884, "grad_norm": 196.0, "learning_rate": 9.278093579841146e-05, "loss": 10.6877, "step": 4750 }, { "epoch": 0.19803259555666708, "grad_norm": 1456.0, "learning_rate": 9.277744155538333e-05, "loss": 33.0008, "step": 4751 }, { "epoch": 0.19807427785419532, "grad_norm": 115.0, "learning_rate": 9.27739465327325e-05, "loss": 9.1258, "step": 4752 }, { "epoch": 0.19811596015172356, "grad_norm": 91.5, "learning_rate": 9.277045073052272e-05, "loss": 8.188, "step": 4753 }, { "epoch": 0.1981576424492518, "grad_norm": 528.0, "learning_rate": 9.276695414881768e-05, "loss": 17.2504, "step": 4754 }, { "epoch": 0.19819932474678004, "grad_norm": 322.0, "learning_rate": 9.27634567876811e-05, "loss": 13.1877, "step": 4755 }, { "epoch": 0.19824100704430828, "grad_norm": 282.0, "learning_rate": 9.275995864717672e-05, "loss": 12.9386, "step": 4756 }, { "epoch": 0.19828268934183652, "grad_norm": 450.0, "learning_rate": 9.275645972736829e-05, "loss": 16.3752, "step": 4757 }, { "epoch": 0.19832437163936475, "grad_norm": 496.0, "learning_rate": 9.27529600283196e-05, "loss": 17.2502, "step": 4758 }, { "epoch": 0.198366053936893, "grad_norm": 556.0, "learning_rate": 9.274945955009442e-05, "loss": 16.6254, "step": 4759 }, { "epoch": 0.19840773623442123, "grad_norm": 342.0, "learning_rate": 9.274595829275653e-05, "loss": 14.6877, "step": 4760 }, { "epoch": 0.19844941853194947, "grad_norm": 452.0, "learning_rate": 9.274245625636978e-05, "loss": 16.5005, "step": 4761 }, { "epoch": 0.1984911008294777, "grad_norm": 426.0, "learning_rate": 9.273895344099794e-05, "loss": 15.5007, "step": 4762 }, { "epoch": 0.19853278312700595, "grad_norm": 612.0, "learning_rate": 9.273544984670489e-05, "loss": 20.6252, "step": 4763 }, { "epoch": 0.1985744654245342, "grad_norm": 916.0, "learning_rate": 9.273194547355449e-05, "loss": 19.8801, "step": 4764 }, { "epoch": 0.19861614772206243, "grad_norm": 436.0, "learning_rate": 9.272844032161056e-05, "loss": 16.3754, "step": 4765 }, { "epoch": 0.19865783001959067, "grad_norm": 376.0, "learning_rate": 9.272493439093704e-05, "loss": 15.3753, "step": 4766 }, { "epoch": 0.1986995123171189, "grad_norm": 536.0, "learning_rate": 9.272142768159777e-05, "loss": 18.252, "step": 4767 }, { "epoch": 0.19874119461464715, "grad_norm": 410.0, "learning_rate": 9.271792019365672e-05, "loss": 14.8755, "step": 4768 }, { "epoch": 0.1987828769121754, "grad_norm": 208.0, "learning_rate": 9.271441192717776e-05, "loss": 11.5005, "step": 4769 }, { "epoch": 0.19882455920970363, "grad_norm": 210.0, "learning_rate": 9.271090288222486e-05, "loss": 11.9378, "step": 4770 }, { "epoch": 0.19886624150723187, "grad_norm": 560.0, "learning_rate": 9.270739305886195e-05, "loss": 17.2503, "step": 4771 }, { "epoch": 0.1989079238047601, "grad_norm": 312.0, "learning_rate": 9.270388245715303e-05, "loss": 13.7508, "step": 4772 }, { "epoch": 0.19894960610228835, "grad_norm": 1184.0, "learning_rate": 9.270037107716206e-05, "loss": 29.7503, "step": 4773 }, { "epoch": 0.19899128839981658, "grad_norm": 143.0, "learning_rate": 9.269685891895302e-05, "loss": 9.0003, "step": 4774 }, { "epoch": 0.19903297069734482, "grad_norm": 692.0, "learning_rate": 9.269334598258994e-05, "loss": 22.3753, "step": 4775 }, { "epoch": 0.19907465299487306, "grad_norm": 664.0, "learning_rate": 9.268983226813686e-05, "loss": 20.0003, "step": 4776 }, { "epoch": 0.19911633529240133, "grad_norm": 428.0, "learning_rate": 9.268631777565777e-05, "loss": 16.0003, "step": 4777 }, { "epoch": 0.19915801758992957, "grad_norm": 584.0, "learning_rate": 9.268280250521677e-05, "loss": 15.6257, "step": 4778 }, { "epoch": 0.1991996998874578, "grad_norm": 420.0, "learning_rate": 9.267928645687788e-05, "loss": 14.9385, "step": 4779 }, { "epoch": 0.19924138218498605, "grad_norm": 540.0, "learning_rate": 9.267576963070524e-05, "loss": 18.3755, "step": 4780 }, { "epoch": 0.1992830644825143, "grad_norm": 362.0, "learning_rate": 9.267225202676289e-05, "loss": 13.2503, "step": 4781 }, { "epoch": 0.19932474678004253, "grad_norm": 616.0, "learning_rate": 9.266873364511494e-05, "loss": 18.7512, "step": 4782 }, { "epoch": 0.19936642907757077, "grad_norm": 430.0, "learning_rate": 9.266521448582557e-05, "loss": 14.5626, "step": 4783 }, { "epoch": 0.199408111375099, "grad_norm": 232.0, "learning_rate": 9.266169454895886e-05, "loss": 11.9378, "step": 4784 }, { "epoch": 0.19944979367262725, "grad_norm": 286.0, "learning_rate": 9.265817383457898e-05, "loss": 11.8129, "step": 4785 }, { "epoch": 0.19949147597015549, "grad_norm": 187.0, "learning_rate": 9.265465234275009e-05, "loss": 10.9384, "step": 4786 }, { "epoch": 0.19953315826768372, "grad_norm": 1960.0, "learning_rate": 9.265113007353639e-05, "loss": 40.5013, "step": 4787 }, { "epoch": 0.19957484056521196, "grad_norm": 226.0, "learning_rate": 9.264760702700204e-05, "loss": 9.3128, "step": 4788 }, { "epoch": 0.1996165228627402, "grad_norm": 108.5, "learning_rate": 9.264408320321128e-05, "loss": 10.7504, "step": 4789 }, { "epoch": 0.19965820516026844, "grad_norm": 482.0, "learning_rate": 9.26405586022283e-05, "loss": 17.0003, "step": 4790 }, { "epoch": 0.19969988745779668, "grad_norm": 624.0, "learning_rate": 9.263703322411736e-05, "loss": 19.0001, "step": 4791 }, { "epoch": 0.19974156975532492, "grad_norm": 426.0, "learning_rate": 9.263350706894272e-05, "loss": 16.1257, "step": 4792 }, { "epoch": 0.19978325205285316, "grad_norm": 316.0, "learning_rate": 9.262998013676862e-05, "loss": 13.5632, "step": 4793 }, { "epoch": 0.1998249343503814, "grad_norm": 141.0, "learning_rate": 9.262645242765935e-05, "loss": 9.4377, "step": 4794 }, { "epoch": 0.19986661664790964, "grad_norm": 362.0, "learning_rate": 9.26229239416792e-05, "loss": 13.0004, "step": 4795 }, { "epoch": 0.19990829894543788, "grad_norm": 716.0, "learning_rate": 9.261939467889246e-05, "loss": 19.0051, "step": 4796 }, { "epoch": 0.19994998124296612, "grad_norm": 270.0, "learning_rate": 9.261586463936349e-05, "loss": 12.563, "step": 4797 }, { "epoch": 0.19999166354049436, "grad_norm": 490.0, "learning_rate": 9.261233382315659e-05, "loss": 13.3759, "step": 4798 }, { "epoch": 0.2000333458380226, "grad_norm": 65.0, "learning_rate": 9.260880223033613e-05, "loss": 9.3131, "step": 4799 }, { "epoch": 0.20007502813555084, "grad_norm": 194.0, "learning_rate": 9.260526986096647e-05, "loss": 11.4391, "step": 4800 }, { "epoch": 0.20011671043307908, "grad_norm": 440.0, "learning_rate": 9.260173671511199e-05, "loss": 15.188, "step": 4801 }, { "epoch": 0.20015839273060732, "grad_norm": 380.0, "learning_rate": 9.259820279283706e-05, "loss": 15.0007, "step": 4802 }, { "epoch": 0.20020007502813555, "grad_norm": 592.0, "learning_rate": 9.259466809420611e-05, "loss": 17.3756, "step": 4803 }, { "epoch": 0.2002417573256638, "grad_norm": 272.0, "learning_rate": 9.259113261928356e-05, "loss": 14.0007, "step": 4804 }, { "epoch": 0.20028343962319203, "grad_norm": 155.0, "learning_rate": 9.258759636813383e-05, "loss": 9.3755, "step": 4805 }, { "epoch": 0.20032512192072027, "grad_norm": 490.0, "learning_rate": 9.258405934082137e-05, "loss": 16.6253, "step": 4806 }, { "epoch": 0.2003668042182485, "grad_norm": 584.0, "learning_rate": 9.258052153741065e-05, "loss": 16.7554, "step": 4807 }, { "epoch": 0.20040848651577675, "grad_norm": 148.0, "learning_rate": 9.257698295796615e-05, "loss": 9.5001, "step": 4808 }, { "epoch": 0.200450168813305, "grad_norm": 840.0, "learning_rate": 9.257344360255235e-05, "loss": 24.7502, "step": 4809 }, { "epoch": 0.20049185111083323, "grad_norm": 442.0, "learning_rate": 9.256990347123378e-05, "loss": 16.0006, "step": 4810 }, { "epoch": 0.20053353340836147, "grad_norm": 396.0, "learning_rate": 9.256636256407492e-05, "loss": 14.9378, "step": 4811 }, { "epoch": 0.2005752157058897, "grad_norm": 84.5, "learning_rate": 9.256282088114034e-05, "loss": 9.3135, "step": 4812 }, { "epoch": 0.20061689800341795, "grad_norm": 668.0, "learning_rate": 9.255927842249455e-05, "loss": 19.2502, "step": 4813 }, { "epoch": 0.2006585803009462, "grad_norm": 464.0, "learning_rate": 9.255573518820216e-05, "loss": 16.0008, "step": 4814 }, { "epoch": 0.20070026259847443, "grad_norm": 348.0, "learning_rate": 9.255219117832769e-05, "loss": 13.8751, "step": 4815 }, { "epoch": 0.20074194489600267, "grad_norm": 172.0, "learning_rate": 9.254864639293577e-05, "loss": 7.0009, "step": 4816 }, { "epoch": 0.2007836271935309, "grad_norm": 182.0, "learning_rate": 9.2545100832091e-05, "loss": 11.5629, "step": 4817 }, { "epoch": 0.20082530949105915, "grad_norm": 420.0, "learning_rate": 9.254155449585797e-05, "loss": 16.5008, "step": 4818 }, { "epoch": 0.20086699178858738, "grad_norm": 520.0, "learning_rate": 9.253800738430136e-05, "loss": 14.3788, "step": 4819 }, { "epoch": 0.20090867408611562, "grad_norm": 344.0, "learning_rate": 9.253445949748577e-05, "loss": 13.3129, "step": 4820 }, { "epoch": 0.20095035638364386, "grad_norm": 75.5, "learning_rate": 9.253091083547589e-05, "loss": 7.8754, "step": 4821 }, { "epoch": 0.2009920386811721, "grad_norm": 250.0, "learning_rate": 9.252736139833638e-05, "loss": 12.0003, "step": 4822 }, { "epoch": 0.20103372097870034, "grad_norm": 390.0, "learning_rate": 9.252381118613192e-05, "loss": 15.0628, "step": 4823 }, { "epoch": 0.20107540327622858, "grad_norm": 79.0, "learning_rate": 9.252026019892724e-05, "loss": 8.5628, "step": 4824 }, { "epoch": 0.20111708557375682, "grad_norm": 376.0, "learning_rate": 9.251670843678705e-05, "loss": 15.5007, "step": 4825 }, { "epoch": 0.20115876787128506, "grad_norm": 362.0, "learning_rate": 9.251315589977607e-05, "loss": 13.9382, "step": 4826 }, { "epoch": 0.2012004501688133, "grad_norm": 177.0, "learning_rate": 9.250960258795904e-05, "loss": 12.1261, "step": 4827 }, { "epoch": 0.20124213246634154, "grad_norm": 296.0, "learning_rate": 9.250604850140074e-05, "loss": 13.5631, "step": 4828 }, { "epoch": 0.20128381476386978, "grad_norm": 207.0, "learning_rate": 9.250249364016592e-05, "loss": 11.5002, "step": 4829 }, { "epoch": 0.20132549706139802, "grad_norm": 104.0, "learning_rate": 9.24989380043194e-05, "loss": 9.5627, "step": 4830 }, { "epoch": 0.20136717935892626, "grad_norm": 90.0, "learning_rate": 9.249538159392595e-05, "loss": 9.6257, "step": 4831 }, { "epoch": 0.2014088616564545, "grad_norm": 251.0, "learning_rate": 9.24918244090504e-05, "loss": 10.1877, "step": 4832 }, { "epoch": 0.20145054395398274, "grad_norm": 156.0, "learning_rate": 9.248826644975756e-05, "loss": 11.1879, "step": 4833 }, { "epoch": 0.20149222625151098, "grad_norm": 478.0, "learning_rate": 9.248470771611232e-05, "loss": 14.7505, "step": 4834 }, { "epoch": 0.20153390854903921, "grad_norm": 320.0, "learning_rate": 9.24811482081795e-05, "loss": 16.2504, "step": 4835 }, { "epoch": 0.20157559084656745, "grad_norm": 422.0, "learning_rate": 9.247758792602398e-05, "loss": 14.5003, "step": 4836 }, { "epoch": 0.2016172731440957, "grad_norm": 544.0, "learning_rate": 9.247402686971065e-05, "loss": 17.6254, "step": 4837 }, { "epoch": 0.20165895544162393, "grad_norm": 45.5, "learning_rate": 9.247046503930442e-05, "loss": 7.9379, "step": 4838 }, { "epoch": 0.20170063773915217, "grad_norm": 268.0, "learning_rate": 9.246690243487018e-05, "loss": 11.4386, "step": 4839 }, { "epoch": 0.2017423200366804, "grad_norm": 572.0, "learning_rate": 9.246333905647288e-05, "loss": 18.0019, "step": 4840 }, { "epoch": 0.20178400233420865, "grad_norm": 82.5, "learning_rate": 9.245977490417745e-05, "loss": 9.3142, "step": 4841 }, { "epoch": 0.2018256846317369, "grad_norm": 470.0, "learning_rate": 9.245620997804886e-05, "loss": 15.7522, "step": 4842 }, { "epoch": 0.20186736692926513, "grad_norm": 836.0, "learning_rate": 9.245264427815207e-05, "loss": 20.5004, "step": 4843 }, { "epoch": 0.20190904922679337, "grad_norm": 588.0, "learning_rate": 9.244907780455208e-05, "loss": 18.1274, "step": 4844 }, { "epoch": 0.2019507315243216, "grad_norm": 292.0, "learning_rate": 9.244551055731386e-05, "loss": 12.9382, "step": 4845 }, { "epoch": 0.20199241382184985, "grad_norm": 484.0, "learning_rate": 9.244194253650245e-05, "loss": 18.0003, "step": 4846 }, { "epoch": 0.2020340961193781, "grad_norm": 632.0, "learning_rate": 9.243837374218287e-05, "loss": 20.5003, "step": 4847 }, { "epoch": 0.20207577841690633, "grad_norm": 107.5, "learning_rate": 9.243480417442016e-05, "loss": 9.1252, "step": 4848 }, { "epoch": 0.20211746071443457, "grad_norm": 884.0, "learning_rate": 9.243123383327938e-05, "loss": 26.1253, "step": 4849 }, { "epoch": 0.20215914301196283, "grad_norm": 210.0, "learning_rate": 9.24276627188256e-05, "loss": 12.3757, "step": 4850 }, { "epoch": 0.20220082530949107, "grad_norm": 338.0, "learning_rate": 9.24240908311239e-05, "loss": 13.8128, "step": 4851 }, { "epoch": 0.2022425076070193, "grad_norm": 328.0, "learning_rate": 9.242051817023938e-05, "loss": 14.1255, "step": 4852 }, { "epoch": 0.20228418990454755, "grad_norm": 262.0, "learning_rate": 9.241694473623715e-05, "loss": 11.4379, "step": 4853 }, { "epoch": 0.2023258722020758, "grad_norm": 434.0, "learning_rate": 9.241337052918233e-05, "loss": 15.3128, "step": 4854 }, { "epoch": 0.20236755449960403, "grad_norm": 502.0, "learning_rate": 9.240979554914006e-05, "loss": 15.8772, "step": 4855 }, { "epoch": 0.20240923679713227, "grad_norm": 232.0, "learning_rate": 9.240621979617552e-05, "loss": 11.7501, "step": 4856 }, { "epoch": 0.2024509190946605, "grad_norm": 362.0, "learning_rate": 9.240264327035385e-05, "loss": 12.8128, "step": 4857 }, { "epoch": 0.20249260139218875, "grad_norm": 312.0, "learning_rate": 9.239906597174024e-05, "loss": 13.1878, "step": 4858 }, { "epoch": 0.202534283689717, "grad_norm": 1004.0, "learning_rate": 9.239548790039989e-05, "loss": 25.7505, "step": 4859 }, { "epoch": 0.20257596598724523, "grad_norm": 780.0, "learning_rate": 9.2391909056398e-05, "loss": 22.376, "step": 4860 }, { "epoch": 0.20261764828477347, "grad_norm": 1360.0, "learning_rate": 9.238832943979983e-05, "loss": 28.38, "step": 4861 }, { "epoch": 0.2026593305823017, "grad_norm": 356.0, "learning_rate": 9.238474905067059e-05, "loss": 13.8131, "step": 4862 }, { "epoch": 0.20270101287982994, "grad_norm": 314.0, "learning_rate": 9.238116788907552e-05, "loss": 14.2506, "step": 4863 }, { "epoch": 0.20274269517735818, "grad_norm": 109.5, "learning_rate": 9.237758595507991e-05, "loss": 9.438, "step": 4864 }, { "epoch": 0.20278437747488642, "grad_norm": 628.0, "learning_rate": 9.237400324874901e-05, "loss": 21.0002, "step": 4865 }, { "epoch": 0.20282605977241466, "grad_norm": 178.0, "learning_rate": 9.237041977014818e-05, "loss": 9.3127, "step": 4866 }, { "epoch": 0.2028677420699429, "grad_norm": 168.0, "learning_rate": 9.236683551934267e-05, "loss": 6.5003, "step": 4867 }, { "epoch": 0.20290942436747114, "grad_norm": 544.0, "learning_rate": 9.236325049639782e-05, "loss": 16.7507, "step": 4868 }, { "epoch": 0.20295110666499938, "grad_norm": 980.0, "learning_rate": 9.235966470137895e-05, "loss": 25.8754, "step": 4869 }, { "epoch": 0.20299278896252762, "grad_norm": 157.0, "learning_rate": 9.235607813435145e-05, "loss": 10.6878, "step": 4870 }, { "epoch": 0.20303447126005586, "grad_norm": 492.0, "learning_rate": 9.235249079538068e-05, "loss": 17.7511, "step": 4871 }, { "epoch": 0.2030761535575841, "grad_norm": 220.0, "learning_rate": 9.234890268453199e-05, "loss": 11.5628, "step": 4872 }, { "epoch": 0.20311783585511234, "grad_norm": 464.0, "learning_rate": 9.23453138018708e-05, "loss": 16.3753, "step": 4873 }, { "epoch": 0.20315951815264058, "grad_norm": 69.0, "learning_rate": 9.234172414746248e-05, "loss": 6.5629, "step": 4874 }, { "epoch": 0.20320120045016882, "grad_norm": 340.0, "learning_rate": 9.23381337213725e-05, "loss": 12.3756, "step": 4875 }, { "epoch": 0.20324288274769706, "grad_norm": 296.0, "learning_rate": 9.233454252366626e-05, "loss": 13.4385, "step": 4876 }, { "epoch": 0.2032845650452253, "grad_norm": 182.0, "learning_rate": 9.233095055440925e-05, "loss": 10.7503, "step": 4877 }, { "epoch": 0.20332624734275354, "grad_norm": 434.0, "learning_rate": 9.232735781366689e-05, "loss": 15.3752, "step": 4878 }, { "epoch": 0.20336792964028177, "grad_norm": 1040.0, "learning_rate": 9.232376430150468e-05, "loss": 24.505, "step": 4879 }, { "epoch": 0.20340961193781001, "grad_norm": 246.0, "learning_rate": 9.23201700179881e-05, "loss": 11.1252, "step": 4880 }, { "epoch": 0.20345129423533825, "grad_norm": 179.0, "learning_rate": 9.231657496318268e-05, "loss": 11.0004, "step": 4881 }, { "epoch": 0.2034929765328665, "grad_norm": 228.0, "learning_rate": 9.23129791371539e-05, "loss": 12.1253, "step": 4882 }, { "epoch": 0.20353465883039473, "grad_norm": 284.0, "learning_rate": 9.230938253996735e-05, "loss": 12.8129, "step": 4883 }, { "epoch": 0.20357634112792297, "grad_norm": 193.0, "learning_rate": 9.230578517168854e-05, "loss": 11.1267, "step": 4884 }, { "epoch": 0.2036180234254512, "grad_norm": 115.5, "learning_rate": 9.230218703238303e-05, "loss": 9.9381, "step": 4885 }, { "epoch": 0.20365970572297945, "grad_norm": 348.0, "learning_rate": 9.22985881221164e-05, "loss": 14.6878, "step": 4886 }, { "epoch": 0.2037013880205077, "grad_norm": 336.0, "learning_rate": 9.229498844095427e-05, "loss": 14.3128, "step": 4887 }, { "epoch": 0.20374307031803593, "grad_norm": 188.0, "learning_rate": 9.22913879889622e-05, "loss": 11.1918, "step": 4888 }, { "epoch": 0.20378475261556417, "grad_norm": 788.0, "learning_rate": 9.228778676620585e-05, "loss": 22.1297, "step": 4889 }, { "epoch": 0.2038264349130924, "grad_norm": 520.0, "learning_rate": 9.228418477275081e-05, "loss": 15.1884, "step": 4890 }, { "epoch": 0.20386811721062065, "grad_norm": 868.0, "learning_rate": 9.228058200866276e-05, "loss": 17.7578, "step": 4891 }, { "epoch": 0.2039097995081489, "grad_norm": 458.0, "learning_rate": 9.227697847400734e-05, "loss": 16.503, "step": 4892 }, { "epoch": 0.20395148180567713, "grad_norm": 396.0, "learning_rate": 9.227337416885024e-05, "loss": 14.8135, "step": 4893 }, { "epoch": 0.20399316410320537, "grad_norm": 272.0, "learning_rate": 9.226976909325715e-05, "loss": 12.1252, "step": 4894 }, { "epoch": 0.2040348464007336, "grad_norm": 568.0, "learning_rate": 9.226616324729376e-05, "loss": 18.2551, "step": 4895 }, { "epoch": 0.20407652869826184, "grad_norm": 134.0, "learning_rate": 9.226255663102578e-05, "loss": 9.1878, "step": 4896 }, { "epoch": 0.20411821099579008, "grad_norm": 161.0, "learning_rate": 9.225894924451898e-05, "loss": 12.1253, "step": 4897 }, { "epoch": 0.20415989329331832, "grad_norm": 486.0, "learning_rate": 9.225534108783906e-05, "loss": 18.1253, "step": 4898 }, { "epoch": 0.20420157559084656, "grad_norm": 198.0, "learning_rate": 9.22517321610518e-05, "loss": 11.7505, "step": 4899 }, { "epoch": 0.2042432578883748, "grad_norm": 386.0, "learning_rate": 9.224812246422297e-05, "loss": 13.441, "step": 4900 }, { "epoch": 0.20428494018590304, "grad_norm": 358.0, "learning_rate": 9.224451199741837e-05, "loss": 14.3127, "step": 4901 }, { "epoch": 0.20432662248343128, "grad_norm": 270.0, "learning_rate": 9.224090076070378e-05, "loss": 12.3762, "step": 4902 }, { "epoch": 0.20436830478095952, "grad_norm": 544.0, "learning_rate": 9.223728875414503e-05, "loss": 15.6923, "step": 4903 }, { "epoch": 0.20440998707848776, "grad_norm": 356.0, "learning_rate": 9.223367597780792e-05, "loss": 12.2534, "step": 4904 }, { "epoch": 0.204451669376016, "grad_norm": 452.0, "learning_rate": 9.223006243175833e-05, "loss": 15.6878, "step": 4905 }, { "epoch": 0.20449335167354424, "grad_norm": 532.0, "learning_rate": 9.222644811606211e-05, "loss": 19.0001, "step": 4906 }, { "epoch": 0.20453503397107248, "grad_norm": 350.0, "learning_rate": 9.222283303078511e-05, "loss": 14.6879, "step": 4907 }, { "epoch": 0.20457671626860072, "grad_norm": 508.0, "learning_rate": 9.221921717599326e-05, "loss": 15.501, "step": 4908 }, { "epoch": 0.20461839856612896, "grad_norm": 286.0, "learning_rate": 9.22156005517524e-05, "loss": 13.0629, "step": 4909 }, { "epoch": 0.2046600808636572, "grad_norm": 270.0, "learning_rate": 9.221198315812849e-05, "loss": 13.0002, "step": 4910 }, { "epoch": 0.20470176316118543, "grad_norm": 320.0, "learning_rate": 9.220836499518743e-05, "loss": 13.1878, "step": 4911 }, { "epoch": 0.20474344545871367, "grad_norm": 392.0, "learning_rate": 9.220474606299516e-05, "loss": 15.3753, "step": 4912 }, { "epoch": 0.2047851277562419, "grad_norm": 418.0, "learning_rate": 9.220112636161767e-05, "loss": 15.0634, "step": 4913 }, { "epoch": 0.20482681005377015, "grad_norm": 556.0, "learning_rate": 9.219750589112089e-05, "loss": 16.5005, "step": 4914 }, { "epoch": 0.2048684923512984, "grad_norm": 290.0, "learning_rate": 9.219388465157082e-05, "loss": 12.7503, "step": 4915 }, { "epoch": 0.20491017464882663, "grad_norm": 180.0, "learning_rate": 9.219026264303347e-05, "loss": 11.1266, "step": 4916 }, { "epoch": 0.20495185694635487, "grad_norm": 160.0, "learning_rate": 9.218663986557483e-05, "loss": 11.001, "step": 4917 }, { "epoch": 0.2049935392438831, "grad_norm": 348.0, "learning_rate": 9.218301631926092e-05, "loss": 11.8756, "step": 4918 }, { "epoch": 0.20503522154141135, "grad_norm": 490.0, "learning_rate": 9.21793920041578e-05, "loss": 16.5003, "step": 4919 }, { "epoch": 0.2050769038389396, "grad_norm": 330.0, "learning_rate": 9.217576692033153e-05, "loss": 12.0627, "step": 4920 }, { "epoch": 0.20511858613646783, "grad_norm": 170.0, "learning_rate": 9.217214106784816e-05, "loss": 11.2507, "step": 4921 }, { "epoch": 0.20516026843399607, "grad_norm": 376.0, "learning_rate": 9.216851444677377e-05, "loss": 14.5633, "step": 4922 }, { "epoch": 0.20520195073152434, "grad_norm": 608.0, "learning_rate": 9.216488705717445e-05, "loss": 19.5005, "step": 4923 }, { "epoch": 0.20524363302905257, "grad_norm": 358.0, "learning_rate": 9.216125889911633e-05, "loss": 13.5637, "step": 4924 }, { "epoch": 0.20528531532658081, "grad_norm": 153.0, "learning_rate": 9.215762997266552e-05, "loss": 10.8132, "step": 4925 }, { "epoch": 0.20532699762410905, "grad_norm": 348.0, "learning_rate": 9.215400027788817e-05, "loss": 14.0001, "step": 4926 }, { "epoch": 0.2053686799216373, "grad_norm": 163.0, "learning_rate": 9.215036981485042e-05, "loss": 10.1256, "step": 4927 }, { "epoch": 0.20541036221916553, "grad_norm": 336.0, "learning_rate": 9.214673858361844e-05, "loss": 13.6278, "step": 4928 }, { "epoch": 0.20545204451669377, "grad_norm": 608.0, "learning_rate": 9.21431065842584e-05, "loss": 19.0005, "step": 4929 }, { "epoch": 0.205493726814222, "grad_norm": 724.0, "learning_rate": 9.213947381683651e-05, "loss": 20.2502, "step": 4930 }, { "epoch": 0.20553540911175025, "grad_norm": 278.0, "learning_rate": 9.213584028141897e-05, "loss": 9.3126, "step": 4931 }, { "epoch": 0.2055770914092785, "grad_norm": 142.0, "learning_rate": 9.213220597807199e-05, "loss": 9.9379, "step": 4932 }, { "epoch": 0.20561877370680673, "grad_norm": 199.0, "learning_rate": 9.212857090686182e-05, "loss": 9.3754, "step": 4933 }, { "epoch": 0.20566045600433497, "grad_norm": 494.0, "learning_rate": 9.212493506785472e-05, "loss": 17.0002, "step": 4934 }, { "epoch": 0.2057021383018632, "grad_norm": 268.0, "learning_rate": 9.212129846111693e-05, "loss": 11.4381, "step": 4935 }, { "epoch": 0.20574382059939145, "grad_norm": 320.0, "learning_rate": 9.211766108671471e-05, "loss": 13.5013, "step": 4936 }, { "epoch": 0.2057855028969197, "grad_norm": 452.0, "learning_rate": 9.211402294471441e-05, "loss": 12.8761, "step": 4937 }, { "epoch": 0.20582718519444793, "grad_norm": 464.0, "learning_rate": 9.211038403518229e-05, "loss": 16.2504, "step": 4938 }, { "epoch": 0.20586886749197617, "grad_norm": 108.0, "learning_rate": 9.210674435818468e-05, "loss": 9.7505, "step": 4939 }, { "epoch": 0.2059105497895044, "grad_norm": 206.0, "learning_rate": 9.210310391378793e-05, "loss": 10.5003, "step": 4940 }, { "epoch": 0.20595223208703264, "grad_norm": 696.0, "learning_rate": 9.209946270205836e-05, "loss": 17.3773, "step": 4941 }, { "epoch": 0.20599391438456088, "grad_norm": 211.0, "learning_rate": 9.209582072306235e-05, "loss": 11.8753, "step": 4942 }, { "epoch": 0.20603559668208912, "grad_norm": 280.0, "learning_rate": 9.209217797686626e-05, "loss": 12.6253, "step": 4943 }, { "epoch": 0.20607727897961736, "grad_norm": 488.0, "learning_rate": 9.208853446353651e-05, "loss": 15.5036, "step": 4944 }, { "epoch": 0.2061189612771456, "grad_norm": 422.0, "learning_rate": 9.208489018313948e-05, "loss": 15.0008, "step": 4945 }, { "epoch": 0.20616064357467384, "grad_norm": 338.0, "learning_rate": 9.208124513574158e-05, "loss": 14.7502, "step": 4946 }, { "epoch": 0.20620232587220208, "grad_norm": 264.0, "learning_rate": 9.207759932140925e-05, "loss": 11.6258, "step": 4947 }, { "epoch": 0.20624400816973032, "grad_norm": 1136.0, "learning_rate": 9.207395274020896e-05, "loss": 33.5002, "step": 4948 }, { "epoch": 0.20628569046725856, "grad_norm": 83.0, "learning_rate": 9.20703053922071e-05, "loss": 9.3135, "step": 4949 }, { "epoch": 0.2063273727647868, "grad_norm": 328.0, "learning_rate": 9.206665727747024e-05, "loss": 13.1265, "step": 4950 }, { "epoch": 0.20636905506231504, "grad_norm": 157.0, "learning_rate": 9.206300839606478e-05, "loss": 9.563, "step": 4951 }, { "epoch": 0.20641073735984328, "grad_norm": 464.0, "learning_rate": 9.205935874805728e-05, "loss": 15.1261, "step": 4952 }, { "epoch": 0.20645241965737152, "grad_norm": 46.0, "learning_rate": 9.205570833351422e-05, "loss": 6.5011, "step": 4953 }, { "epoch": 0.20649410195489976, "grad_norm": 360.0, "learning_rate": 9.205205715250216e-05, "loss": 13.7504, "step": 4954 }, { "epoch": 0.206535784252428, "grad_norm": 294.0, "learning_rate": 9.204840520508762e-05, "loss": 13.8756, "step": 4955 }, { "epoch": 0.20657746654995623, "grad_norm": 486.0, "learning_rate": 9.204475249133715e-05, "loss": 16.6252, "step": 4956 }, { "epoch": 0.20661914884748447, "grad_norm": 57.5, "learning_rate": 9.204109901131734e-05, "loss": 9.0629, "step": 4957 }, { "epoch": 0.2066608311450127, "grad_norm": 956.0, "learning_rate": 9.203744476509478e-05, "loss": 26.5005, "step": 4958 }, { "epoch": 0.20670251344254095, "grad_norm": 412.0, "learning_rate": 9.203378975273603e-05, "loss": 15.5003, "step": 4959 }, { "epoch": 0.2067441957400692, "grad_norm": 396.0, "learning_rate": 9.203013397430775e-05, "loss": 15.3133, "step": 4960 }, { "epoch": 0.20678587803759743, "grad_norm": 1280.0, "learning_rate": 9.202647742987655e-05, "loss": 26.8798, "step": 4961 }, { "epoch": 0.20682756033512567, "grad_norm": 308.0, "learning_rate": 9.202282011950904e-05, "loss": 13.1275, "step": 4962 }, { "epoch": 0.2068692426326539, "grad_norm": 332.0, "learning_rate": 9.201916204327194e-05, "loss": 14.3755, "step": 4963 }, { "epoch": 0.20691092493018215, "grad_norm": 90.0, "learning_rate": 9.201550320123187e-05, "loss": 7.219, "step": 4964 }, { "epoch": 0.2069526072277104, "grad_norm": 153.0, "learning_rate": 9.201184359345552e-05, "loss": 8.8755, "step": 4965 }, { "epoch": 0.20699428952523863, "grad_norm": 258.0, "learning_rate": 9.200818322000958e-05, "loss": 11.4379, "step": 4966 }, { "epoch": 0.20703597182276687, "grad_norm": 116.0, "learning_rate": 9.200452208096079e-05, "loss": 9.8128, "step": 4967 }, { "epoch": 0.2070776541202951, "grad_norm": 157.0, "learning_rate": 9.200086017637583e-05, "loss": 9.5002, "step": 4968 }, { "epoch": 0.20711933641782335, "grad_norm": 708.0, "learning_rate": 9.19971975063215e-05, "loss": 19.1253, "step": 4969 }, { "epoch": 0.20716101871535159, "grad_norm": 438.0, "learning_rate": 9.199353407086449e-05, "loss": 15.7502, "step": 4970 }, { "epoch": 0.20720270101287983, "grad_norm": 244.0, "learning_rate": 9.19898698700716e-05, "loss": 12.0007, "step": 4971 }, { "epoch": 0.20724438331040806, "grad_norm": 398.0, "learning_rate": 9.198620490400962e-05, "loss": 15.8752, "step": 4972 }, { "epoch": 0.2072860656079363, "grad_norm": 356.0, "learning_rate": 9.198253917274532e-05, "loss": 14.6265, "step": 4973 }, { "epoch": 0.20732774790546454, "grad_norm": 131.0, "learning_rate": 9.197887267634551e-05, "loss": 8.0642, "step": 4974 }, { "epoch": 0.20736943020299278, "grad_norm": 258.0, "learning_rate": 9.197520541487703e-05, "loss": 12.5006, "step": 4975 }, { "epoch": 0.20741111250052102, "grad_norm": 584.0, "learning_rate": 9.197153738840669e-05, "loss": 19.8752, "step": 4976 }, { "epoch": 0.20745279479804926, "grad_norm": 221.0, "learning_rate": 9.196786859700137e-05, "loss": 8.3127, "step": 4977 }, { "epoch": 0.2074944770955775, "grad_norm": 266.0, "learning_rate": 9.19641990407279e-05, "loss": 11.8128, "step": 4978 }, { "epoch": 0.20753615939310574, "grad_norm": 358.0, "learning_rate": 9.196052871965319e-05, "loss": 14.2508, "step": 4979 }, { "epoch": 0.20757784169063398, "grad_norm": 580.0, "learning_rate": 9.195685763384412e-05, "loss": 16.1293, "step": 4980 }, { "epoch": 0.20761952398816222, "grad_norm": 274.0, "learning_rate": 9.19531857833676e-05, "loss": 14.0013, "step": 4981 }, { "epoch": 0.20766120628569046, "grad_norm": 636.0, "learning_rate": 9.194951316829053e-05, "loss": 19.1255, "step": 4982 }, { "epoch": 0.2077028885832187, "grad_norm": 148.0, "learning_rate": 9.194583978867988e-05, "loss": 10.1255, "step": 4983 }, { "epoch": 0.20774457088074694, "grad_norm": 392.0, "learning_rate": 9.194216564460255e-05, "loss": 15.2503, "step": 4984 }, { "epoch": 0.20778625317827518, "grad_norm": 161.0, "learning_rate": 9.193849073612555e-05, "loss": 10.2513, "step": 4985 }, { "epoch": 0.20782793547580342, "grad_norm": 71.0, "learning_rate": 9.193481506331582e-05, "loss": 8.1883, "step": 4986 }, { "epoch": 0.20786961777333166, "grad_norm": 1496.0, "learning_rate": 9.193113862624037e-05, "loss": 35.2504, "step": 4987 }, { "epoch": 0.2079113000708599, "grad_norm": 470.0, "learning_rate": 9.192746142496619e-05, "loss": 15.0003, "step": 4988 }, { "epoch": 0.20795298236838813, "grad_norm": 362.0, "learning_rate": 9.19237834595603e-05, "loss": 14.939, "step": 4989 }, { "epoch": 0.20799466466591637, "grad_norm": 470.0, "learning_rate": 9.192010473008974e-05, "loss": 16.7504, "step": 4990 }, { "epoch": 0.2080363469634446, "grad_norm": 296.0, "learning_rate": 9.191642523662156e-05, "loss": 13.5013, "step": 4991 }, { "epoch": 0.20807802926097285, "grad_norm": 131.0, "learning_rate": 9.19127449792228e-05, "loss": 10.4379, "step": 4992 }, { "epoch": 0.2081197115585011, "grad_norm": 173.0, "learning_rate": 9.190906395796054e-05, "loss": 10.3772, "step": 4993 }, { "epoch": 0.20816139385602933, "grad_norm": 290.0, "learning_rate": 9.190538217290187e-05, "loss": 11.5003, "step": 4994 }, { "epoch": 0.20820307615355757, "grad_norm": 318.0, "learning_rate": 9.190169962411389e-05, "loss": 13.3135, "step": 4995 }, { "epoch": 0.20824475845108584, "grad_norm": 516.0, "learning_rate": 9.189801631166371e-05, "loss": 17.6252, "step": 4996 }, { "epoch": 0.20828644074861408, "grad_norm": 288.0, "learning_rate": 9.189433223561848e-05, "loss": 14.1254, "step": 4997 }, { "epoch": 0.20832812304614232, "grad_norm": 107.5, "learning_rate": 9.189064739604532e-05, "loss": 8.8129, "step": 4998 }, { "epoch": 0.20836980534367056, "grad_norm": 233.0, "learning_rate": 9.188696179301137e-05, "loss": 9.6882, "step": 4999 }, { "epoch": 0.2084114876411988, "grad_norm": 536.0, "learning_rate": 9.188327542658384e-05, "loss": 18.0007, "step": 5000 }, { "epoch": 0.20845316993872703, "grad_norm": 320.0, "learning_rate": 9.18795882968299e-05, "loss": 14.0004, "step": 5001 }, { "epoch": 0.20849485223625527, "grad_norm": 692.0, "learning_rate": 9.187590040381676e-05, "loss": 19.6257, "step": 5002 }, { "epoch": 0.2085365345337835, "grad_norm": 352.0, "learning_rate": 9.18722117476116e-05, "loss": 13.0003, "step": 5003 }, { "epoch": 0.20857821683131175, "grad_norm": 312.0, "learning_rate": 9.186852232828164e-05, "loss": 12.8752, "step": 5004 }, { "epoch": 0.20861989912884, "grad_norm": 652.0, "learning_rate": 9.186483214589418e-05, "loss": 17.2553, "step": 5005 }, { "epoch": 0.20866158142636823, "grad_norm": 760.0, "learning_rate": 9.186114120051643e-05, "loss": 19.8755, "step": 5006 }, { "epoch": 0.20870326372389647, "grad_norm": 280.0, "learning_rate": 9.185744949221566e-05, "loss": 11.813, "step": 5007 }, { "epoch": 0.2087449460214247, "grad_norm": 772.0, "learning_rate": 9.185375702105916e-05, "loss": 20.3766, "step": 5008 }, { "epoch": 0.20878662831895295, "grad_norm": 123.0, "learning_rate": 9.185006378711423e-05, "loss": 8.1255, "step": 5009 }, { "epoch": 0.2088283106164812, "grad_norm": 320.0, "learning_rate": 9.184636979044816e-05, "loss": 14.3137, "step": 5010 }, { "epoch": 0.20886999291400943, "grad_norm": 322.0, "learning_rate": 9.184267503112829e-05, "loss": 13.3754, "step": 5011 }, { "epoch": 0.20891167521153767, "grad_norm": 960.0, "learning_rate": 9.183897950922194e-05, "loss": 25.7502, "step": 5012 }, { "epoch": 0.2089533575090659, "grad_norm": 498.0, "learning_rate": 9.183528322479648e-05, "loss": 17.3757, "step": 5013 }, { "epoch": 0.20899503980659415, "grad_norm": 212.0, "learning_rate": 9.18315861779193e-05, "loss": 11.3755, "step": 5014 }, { "epoch": 0.20903672210412239, "grad_norm": 78.5, "learning_rate": 9.182788836865772e-05, "loss": 9.0008, "step": 5015 }, { "epoch": 0.20907840440165062, "grad_norm": 185.0, "learning_rate": 9.182418979707917e-05, "loss": 11.1259, "step": 5016 }, { "epoch": 0.20912008669917886, "grad_norm": 376.0, "learning_rate": 9.182049046325103e-05, "loss": 12.7501, "step": 5017 }, { "epoch": 0.2091617689967071, "grad_norm": 358.0, "learning_rate": 9.181679036724076e-05, "loss": 12.6894, "step": 5018 }, { "epoch": 0.20920345129423534, "grad_norm": 276.0, "learning_rate": 9.181308950911576e-05, "loss": 12.5628, "step": 5019 }, { "epoch": 0.20924513359176358, "grad_norm": 224.0, "learning_rate": 9.180938788894351e-05, "loss": 11.7504, "step": 5020 }, { "epoch": 0.20928681588929182, "grad_norm": 73.0, "learning_rate": 9.180568550679143e-05, "loss": 8.6877, "step": 5021 }, { "epoch": 0.20932849818682006, "grad_norm": 388.0, "learning_rate": 9.180198236272704e-05, "loss": 14.8757, "step": 5022 }, { "epoch": 0.2093701804843483, "grad_norm": 132.0, "learning_rate": 9.179827845681782e-05, "loss": 8.7506, "step": 5023 }, { "epoch": 0.20941186278187654, "grad_norm": 644.0, "learning_rate": 9.179457378913124e-05, "loss": 19.6255, "step": 5024 }, { "epoch": 0.20945354507940478, "grad_norm": 410.0, "learning_rate": 9.179086835973484e-05, "loss": 15.8751, "step": 5025 }, { "epoch": 0.20949522737693302, "grad_norm": 354.0, "learning_rate": 9.178716216869616e-05, "loss": 14.6879, "step": 5026 }, { "epoch": 0.20953690967446126, "grad_norm": 412.0, "learning_rate": 9.178345521608276e-05, "loss": 15.6252, "step": 5027 }, { "epoch": 0.2095785919719895, "grad_norm": 202.0, "learning_rate": 9.177974750196216e-05, "loss": 12.1266, "step": 5028 }, { "epoch": 0.20962027426951774, "grad_norm": 612.0, "learning_rate": 9.177603902640195e-05, "loss": 20.5002, "step": 5029 }, { "epoch": 0.20966195656704598, "grad_norm": 138.0, "learning_rate": 9.177232978946973e-05, "loss": 10.3755, "step": 5030 }, { "epoch": 0.20970363886457422, "grad_norm": 160.0, "learning_rate": 9.176861979123307e-05, "loss": 11.5628, "step": 5031 }, { "epoch": 0.20974532116210245, "grad_norm": 552.0, "learning_rate": 9.176490903175965e-05, "loss": 18.2504, "step": 5032 }, { "epoch": 0.2097870034596307, "grad_norm": 290.0, "learning_rate": 9.176119751111703e-05, "loss": 13.5626, "step": 5033 }, { "epoch": 0.20982868575715893, "grad_norm": 374.0, "learning_rate": 9.175748522937287e-05, "loss": 12.8133, "step": 5034 }, { "epoch": 0.20987036805468717, "grad_norm": 1440.0, "learning_rate": 9.175377218659485e-05, "loss": 31.1303, "step": 5035 }, { "epoch": 0.2099120503522154, "grad_norm": 376.0, "learning_rate": 9.175005838285062e-05, "loss": 15.1881, "step": 5036 }, { "epoch": 0.20995373264974365, "grad_norm": 408.0, "learning_rate": 9.174634381820786e-05, "loss": 15.563, "step": 5037 }, { "epoch": 0.2099954149472719, "grad_norm": 410.0, "learning_rate": 9.17426284927343e-05, "loss": 13.9408, "step": 5038 }, { "epoch": 0.21003709724480013, "grad_norm": 1456.0, "learning_rate": 9.17389124064976e-05, "loss": 30.6258, "step": 5039 }, { "epoch": 0.21007877954232837, "grad_norm": 360.0, "learning_rate": 9.173519555956553e-05, "loss": 15.4387, "step": 5040 }, { "epoch": 0.2101204618398566, "grad_norm": 238.0, "learning_rate": 9.173147795200583e-05, "loss": 12.3128, "step": 5041 }, { "epoch": 0.21016214413738485, "grad_norm": 400.0, "learning_rate": 9.172775958388623e-05, "loss": 12.3752, "step": 5042 }, { "epoch": 0.2102038264349131, "grad_norm": 442.0, "learning_rate": 9.17240404552745e-05, "loss": 15.5052, "step": 5043 }, { "epoch": 0.21024550873244133, "grad_norm": 330.0, "learning_rate": 9.172032056623843e-05, "loss": 13.6253, "step": 5044 }, { "epoch": 0.21028719102996957, "grad_norm": 324.0, "learning_rate": 9.171659991684583e-05, "loss": 13.6265, "step": 5045 }, { "epoch": 0.2103288733274978, "grad_norm": 213.0, "learning_rate": 9.171287850716448e-05, "loss": 11.1254, "step": 5046 }, { "epoch": 0.21037055562502605, "grad_norm": 288.0, "learning_rate": 9.17091563372622e-05, "loss": 12.5627, "step": 5047 }, { "epoch": 0.21041223792255428, "grad_norm": 209.0, "learning_rate": 9.170543340720687e-05, "loss": 11.376, "step": 5048 }, { "epoch": 0.21045392022008252, "grad_norm": 160.0, "learning_rate": 9.170170971706631e-05, "loss": 9.6878, "step": 5049 }, { "epoch": 0.21049560251761076, "grad_norm": 728.0, "learning_rate": 9.169798526690838e-05, "loss": 21.2505, "step": 5050 }, { "epoch": 0.210537284815139, "grad_norm": 240.0, "learning_rate": 9.169426005680097e-05, "loss": 12.1883, "step": 5051 }, { "epoch": 0.21057896711266724, "grad_norm": 462.0, "learning_rate": 9.169053408681197e-05, "loss": 16.3753, "step": 5052 }, { "epoch": 0.21062064941019548, "grad_norm": 284.0, "learning_rate": 9.168680735700928e-05, "loss": 10.3136, "step": 5053 }, { "epoch": 0.21066233170772372, "grad_norm": 290.0, "learning_rate": 9.168307986746083e-05, "loss": 12.6267, "step": 5054 }, { "epoch": 0.21070401400525196, "grad_norm": 576.0, "learning_rate": 9.167935161823456e-05, "loss": 16.7504, "step": 5055 }, { "epoch": 0.2107456963027802, "grad_norm": 196.0, "learning_rate": 9.16756226093984e-05, "loss": 12.0007, "step": 5056 }, { "epoch": 0.21078737860030844, "grad_norm": 98.5, "learning_rate": 9.16718928410203e-05, "loss": 9.5629, "step": 5057 }, { "epoch": 0.21082906089783668, "grad_norm": 370.0, "learning_rate": 9.166816231316825e-05, "loss": 15.6253, "step": 5058 }, { "epoch": 0.21087074319536492, "grad_norm": 209.0, "learning_rate": 9.166443102591028e-05, "loss": 10.6876, "step": 5059 }, { "epoch": 0.21091242549289316, "grad_norm": 127.5, "learning_rate": 9.166069897931433e-05, "loss": 8.6898, "step": 5060 }, { "epoch": 0.2109541077904214, "grad_norm": 640.0, "learning_rate": 9.165696617344846e-05, "loss": 18.8753, "step": 5061 }, { "epoch": 0.21099579008794964, "grad_norm": 536.0, "learning_rate": 9.165323260838066e-05, "loss": 18.1252, "step": 5062 }, { "epoch": 0.21103747238547788, "grad_norm": 258.0, "learning_rate": 9.164949828417902e-05, "loss": 12.5627, "step": 5063 }, { "epoch": 0.21107915468300611, "grad_norm": 74.5, "learning_rate": 9.164576320091156e-05, "loss": 8.6888, "step": 5064 }, { "epoch": 0.21112083698053435, "grad_norm": 155.0, "learning_rate": 9.164202735864638e-05, "loss": 10.1877, "step": 5065 }, { "epoch": 0.2111625192780626, "grad_norm": 344.0, "learning_rate": 9.163829075745155e-05, "loss": 13.7502, "step": 5066 }, { "epoch": 0.21120420157559083, "grad_norm": 446.0, "learning_rate": 9.163455339739517e-05, "loss": 16.0001, "step": 5067 }, { "epoch": 0.21124588387311907, "grad_norm": 354.0, "learning_rate": 9.163081527854537e-05, "loss": 15.3127, "step": 5068 }, { "epoch": 0.21128756617064734, "grad_norm": 274.0, "learning_rate": 9.162707640097026e-05, "loss": 11.8752, "step": 5069 }, { "epoch": 0.21132924846817558, "grad_norm": 648.0, "learning_rate": 9.162333676473798e-05, "loss": 20.0021, "step": 5070 }, { "epoch": 0.21137093076570382, "grad_norm": 200.0, "learning_rate": 9.16195963699167e-05, "loss": 10.938, "step": 5071 }, { "epoch": 0.21141261306323206, "grad_norm": 1264.0, "learning_rate": 9.161585521657458e-05, "loss": 25.8814, "step": 5072 }, { "epoch": 0.2114542953607603, "grad_norm": 88.0, "learning_rate": 9.161211330477981e-05, "loss": 6.3135, "step": 5073 }, { "epoch": 0.21149597765828854, "grad_norm": 406.0, "learning_rate": 9.160837063460057e-05, "loss": 14.6255, "step": 5074 }, { "epoch": 0.21153765995581678, "grad_norm": 221.0, "learning_rate": 9.160462720610509e-05, "loss": 11.0627, "step": 5075 }, { "epoch": 0.21157934225334502, "grad_norm": 79.5, "learning_rate": 9.160088301936159e-05, "loss": 7.4065, "step": 5076 }, { "epoch": 0.21162102455087325, "grad_norm": 322.0, "learning_rate": 9.159713807443829e-05, "loss": 14.4379, "step": 5077 }, { "epoch": 0.2116627068484015, "grad_norm": 944.0, "learning_rate": 9.159339237140346e-05, "loss": 26.5002, "step": 5078 }, { "epoch": 0.21170438914592973, "grad_norm": 227.0, "learning_rate": 9.158964591032537e-05, "loss": 11.0627, "step": 5079 }, { "epoch": 0.21174607144345797, "grad_norm": 376.0, "learning_rate": 9.158589869127229e-05, "loss": 14.4378, "step": 5080 }, { "epoch": 0.2117877537409862, "grad_norm": 61.5, "learning_rate": 9.158215071431251e-05, "loss": 8.563, "step": 5081 }, { "epoch": 0.21182943603851445, "grad_norm": 328.0, "learning_rate": 9.157840197951433e-05, "loss": 13.8759, "step": 5082 }, { "epoch": 0.2118711183360427, "grad_norm": 254.0, "learning_rate": 9.15746524869461e-05, "loss": 8.4379, "step": 5083 }, { "epoch": 0.21191280063357093, "grad_norm": 174.0, "learning_rate": 9.157090223667614e-05, "loss": 10.6259, "step": 5084 }, { "epoch": 0.21195448293109917, "grad_norm": 548.0, "learning_rate": 9.156715122877279e-05, "loss": 18.7513, "step": 5085 }, { "epoch": 0.2119961652286274, "grad_norm": 460.0, "learning_rate": 9.156339946330441e-05, "loss": 14.7502, "step": 5086 }, { "epoch": 0.21203784752615565, "grad_norm": 656.0, "learning_rate": 9.15596469403394e-05, "loss": 19.6252, "step": 5087 }, { "epoch": 0.2120795298236839, "grad_norm": 298.0, "learning_rate": 9.155589365994612e-05, "loss": 13.0626, "step": 5088 }, { "epoch": 0.21212121212121213, "grad_norm": 189.0, "learning_rate": 9.1552139622193e-05, "loss": 10.2504, "step": 5089 }, { "epoch": 0.21216289441874037, "grad_norm": 432.0, "learning_rate": 9.154838482714844e-05, "loss": 16.1255, "step": 5090 }, { "epoch": 0.2122045767162686, "grad_norm": 568.0, "learning_rate": 9.154462927488089e-05, "loss": 17.1253, "step": 5091 }, { "epoch": 0.21224625901379685, "grad_norm": 168.0, "learning_rate": 9.154087296545877e-05, "loss": 10.6879, "step": 5092 }, { "epoch": 0.21228794131132508, "grad_norm": 282.0, "learning_rate": 9.153711589895057e-05, "loss": 12.0626, "step": 5093 }, { "epoch": 0.21232962360885332, "grad_norm": 245.0, "learning_rate": 9.153335807542472e-05, "loss": 11.0631, "step": 5094 }, { "epoch": 0.21237130590638156, "grad_norm": 592.0, "learning_rate": 9.152959949494975e-05, "loss": 18.3752, "step": 5095 }, { "epoch": 0.2124129882039098, "grad_norm": 600.0, "learning_rate": 9.152584015759413e-05, "loss": 16.7503, "step": 5096 }, { "epoch": 0.21245467050143804, "grad_norm": 684.0, "learning_rate": 9.152208006342641e-05, "loss": 21.5003, "step": 5097 }, { "epoch": 0.21249635279896628, "grad_norm": 166.0, "learning_rate": 9.151831921251508e-05, "loss": 10.4385, "step": 5098 }, { "epoch": 0.21253803509649452, "grad_norm": 1136.0, "learning_rate": 9.151455760492868e-05, "loss": 25.3804, "step": 5099 }, { "epoch": 0.21257971739402276, "grad_norm": 374.0, "learning_rate": 9.151079524073581e-05, "loss": 15.2503, "step": 5100 }, { "epoch": 0.212621399691551, "grad_norm": 438.0, "learning_rate": 9.1507032120005e-05, "loss": 14.9406, "step": 5101 }, { "epoch": 0.21266308198907924, "grad_norm": 462.0, "learning_rate": 9.150326824280483e-05, "loss": 15.5003, "step": 5102 }, { "epoch": 0.21270476428660748, "grad_norm": 1464.0, "learning_rate": 9.149950360920394e-05, "loss": 31.0053, "step": 5103 }, { "epoch": 0.21274644658413572, "grad_norm": 412.0, "learning_rate": 9.149573821927091e-05, "loss": 16.2533, "step": 5104 }, { "epoch": 0.21278812888166396, "grad_norm": 724.0, "learning_rate": 9.149197207307435e-05, "loss": 24.6257, "step": 5105 }, { "epoch": 0.2128298111791922, "grad_norm": 234.0, "learning_rate": 9.148820517068292e-05, "loss": 10.8139, "step": 5106 }, { "epoch": 0.21287149347672044, "grad_norm": 100.0, "learning_rate": 9.148443751216527e-05, "loss": 8.8755, "step": 5107 }, { "epoch": 0.21291317577424868, "grad_norm": 580.0, "learning_rate": 9.148066909759006e-05, "loss": 19.1255, "step": 5108 }, { "epoch": 0.21295485807177691, "grad_norm": 572.0, "learning_rate": 9.1476899927026e-05, "loss": 17.2519, "step": 5109 }, { "epoch": 0.21299654036930515, "grad_norm": 258.0, "learning_rate": 9.147313000054171e-05, "loss": 11.7503, "step": 5110 }, { "epoch": 0.2130382226668334, "grad_norm": 390.0, "learning_rate": 9.146935931820598e-05, "loss": 14.2503, "step": 5111 }, { "epoch": 0.21307990496436163, "grad_norm": 652.0, "learning_rate": 9.146558788008747e-05, "loss": 19.6254, "step": 5112 }, { "epoch": 0.21312158726188987, "grad_norm": 98.0, "learning_rate": 9.146181568625496e-05, "loss": 10.4384, "step": 5113 }, { "epoch": 0.2131632695594181, "grad_norm": 1592.0, "learning_rate": 9.145804273677719e-05, "loss": 33.7549, "step": 5114 }, { "epoch": 0.21320495185694635, "grad_norm": 156.0, "learning_rate": 9.145426903172288e-05, "loss": 10.8128, "step": 5115 }, { "epoch": 0.2132466341544746, "grad_norm": 314.0, "learning_rate": 9.145049457116085e-05, "loss": 14.5628, "step": 5116 }, { "epoch": 0.21328831645200283, "grad_norm": 516.0, "learning_rate": 9.144671935515988e-05, "loss": 14.816, "step": 5117 }, { "epoch": 0.21332999874953107, "grad_norm": 81.0, "learning_rate": 9.144294338378875e-05, "loss": 7.5631, "step": 5118 }, { "epoch": 0.2133716810470593, "grad_norm": 264.0, "learning_rate": 9.143916665711632e-05, "loss": 11.5009, "step": 5119 }, { "epoch": 0.21341336334458755, "grad_norm": 388.0, "learning_rate": 9.143538917521139e-05, "loss": 15.7523, "step": 5120 }, { "epoch": 0.2134550456421158, "grad_norm": 110.5, "learning_rate": 9.143161093814283e-05, "loss": 9.6883, "step": 5121 }, { "epoch": 0.21349672793964403, "grad_norm": 172.0, "learning_rate": 9.142783194597946e-05, "loss": 11.0006, "step": 5122 }, { "epoch": 0.21353841023717227, "grad_norm": 191.0, "learning_rate": 9.14240521987902e-05, "loss": 10.6883, "step": 5123 }, { "epoch": 0.2135800925347005, "grad_norm": 300.0, "learning_rate": 9.142027169664389e-05, "loss": 13.6254, "step": 5124 }, { "epoch": 0.21362177483222874, "grad_norm": 348.0, "learning_rate": 9.141649043960948e-05, "loss": 14.0005, "step": 5125 }, { "epoch": 0.21366345712975698, "grad_norm": 227.0, "learning_rate": 9.141270842775581e-05, "loss": 11.3145, "step": 5126 }, { "epoch": 0.21370513942728522, "grad_norm": 286.0, "learning_rate": 9.140892566115187e-05, "loss": 12.6252, "step": 5127 }, { "epoch": 0.21374682172481346, "grad_norm": 237.0, "learning_rate": 9.140514213986659e-05, "loss": 10.8764, "step": 5128 }, { "epoch": 0.2137885040223417, "grad_norm": 250.0, "learning_rate": 9.140135786396893e-05, "loss": 12.8129, "step": 5129 }, { "epoch": 0.21383018631986994, "grad_norm": 320.0, "learning_rate": 9.139757283352784e-05, "loss": 13.0003, "step": 5130 }, { "epoch": 0.21387186861739818, "grad_norm": 214.0, "learning_rate": 9.13937870486123e-05, "loss": 11.0627, "step": 5131 }, { "epoch": 0.21391355091492642, "grad_norm": 352.0, "learning_rate": 9.139000050929132e-05, "loss": 14.9379, "step": 5132 }, { "epoch": 0.21395523321245466, "grad_norm": 262.0, "learning_rate": 9.13862132156339e-05, "loss": 13.0627, "step": 5133 }, { "epoch": 0.2139969155099829, "grad_norm": 506.0, "learning_rate": 9.138242516770909e-05, "loss": 16.6253, "step": 5134 }, { "epoch": 0.21403859780751114, "grad_norm": 138.0, "learning_rate": 9.13786363655859e-05, "loss": 9.5636, "step": 5135 }, { "epoch": 0.21408028010503938, "grad_norm": 348.0, "learning_rate": 9.13748468093334e-05, "loss": 13.9379, "step": 5136 }, { "epoch": 0.21412196240256762, "grad_norm": 90.0, "learning_rate": 9.137105649902061e-05, "loss": 9.8132, "step": 5137 }, { "epoch": 0.21416364470009586, "grad_norm": 179.0, "learning_rate": 9.136726543471667e-05, "loss": 6.0631, "step": 5138 }, { "epoch": 0.2142053269976241, "grad_norm": 215.0, "learning_rate": 9.136347361649063e-05, "loss": 11.8128, "step": 5139 }, { "epoch": 0.21424700929515234, "grad_norm": 438.0, "learning_rate": 9.135968104441161e-05, "loss": 16.7506, "step": 5140 }, { "epoch": 0.2142886915926806, "grad_norm": 196.0, "learning_rate": 9.135588771854874e-05, "loss": 8.8763, "step": 5141 }, { "epoch": 0.21433037389020884, "grad_norm": 344.0, "learning_rate": 9.135209363897116e-05, "loss": 14.3752, "step": 5142 }, { "epoch": 0.21437205618773708, "grad_norm": 151.0, "learning_rate": 9.134829880574799e-05, "loss": 10.1252, "step": 5143 }, { "epoch": 0.21441373848526532, "grad_norm": 588.0, "learning_rate": 9.13445032189484e-05, "loss": 18.8752, "step": 5144 }, { "epoch": 0.21445542078279356, "grad_norm": 185.0, "learning_rate": 9.134070687864157e-05, "loss": 10.4378, "step": 5145 }, { "epoch": 0.2144971030803218, "grad_norm": 234.0, "learning_rate": 9.133690978489669e-05, "loss": 11.5003, "step": 5146 }, { "epoch": 0.21453878537785004, "grad_norm": 236.0, "learning_rate": 9.133311193778295e-05, "loss": 11.1878, "step": 5147 }, { "epoch": 0.21458046767537828, "grad_norm": 414.0, "learning_rate": 9.132931333736958e-05, "loss": 12.8135, "step": 5148 }, { "epoch": 0.21462214997290652, "grad_norm": 454.0, "learning_rate": 9.132551398372582e-05, "loss": 16.0005, "step": 5149 }, { "epoch": 0.21466383227043476, "grad_norm": 284.0, "learning_rate": 9.132171387692088e-05, "loss": 11.5627, "step": 5150 }, { "epoch": 0.214705514567963, "grad_norm": 158.0, "learning_rate": 9.131791301702404e-05, "loss": 6.4071, "step": 5151 }, { "epoch": 0.21474719686549124, "grad_norm": 248.0, "learning_rate": 9.131411140410457e-05, "loss": 12.188, "step": 5152 }, { "epoch": 0.21478887916301947, "grad_norm": 596.0, "learning_rate": 9.131030903823176e-05, "loss": 18.1263, "step": 5153 }, { "epoch": 0.21483056146054771, "grad_norm": 212.0, "learning_rate": 9.130650591947489e-05, "loss": 10.0627, "step": 5154 }, { "epoch": 0.21487224375807595, "grad_norm": 322.0, "learning_rate": 9.130270204790329e-05, "loss": 13.6886, "step": 5155 }, { "epoch": 0.2149139260556042, "grad_norm": 258.0, "learning_rate": 9.129889742358628e-05, "loss": 12.2505, "step": 5156 }, { "epoch": 0.21495560835313243, "grad_norm": 216.0, "learning_rate": 9.129509204659319e-05, "loss": 9.4378, "step": 5157 }, { "epoch": 0.21499729065066067, "grad_norm": 384.0, "learning_rate": 9.129128591699339e-05, "loss": 12.6262, "step": 5158 }, { "epoch": 0.2150389729481889, "grad_norm": 464.0, "learning_rate": 9.128747903485622e-05, "loss": 16.2509, "step": 5159 }, { "epoch": 0.21508065524571715, "grad_norm": 636.0, "learning_rate": 9.12836714002511e-05, "loss": 19.8777, "step": 5160 }, { "epoch": 0.2151223375432454, "grad_norm": 52.75, "learning_rate": 9.12798630132474e-05, "loss": 8.8133, "step": 5161 }, { "epoch": 0.21516401984077363, "grad_norm": 816.0, "learning_rate": 9.127605387391452e-05, "loss": 19.2549, "step": 5162 }, { "epoch": 0.21520570213830187, "grad_norm": 318.0, "learning_rate": 9.12722439823219e-05, "loss": 13.4379, "step": 5163 }, { "epoch": 0.2152473844358301, "grad_norm": 238.0, "learning_rate": 9.126843333853898e-05, "loss": 11.8752, "step": 5164 }, { "epoch": 0.21528906673335835, "grad_norm": 576.0, "learning_rate": 9.126462194263518e-05, "loss": 18.8766, "step": 5165 }, { "epoch": 0.2153307490308866, "grad_norm": 93.0, "learning_rate": 9.126080979468e-05, "loss": 7.6257, "step": 5166 }, { "epoch": 0.21537243132841483, "grad_norm": 424.0, "learning_rate": 9.12569968947429e-05, "loss": 15.9381, "step": 5167 }, { "epoch": 0.21541411362594307, "grad_norm": 394.0, "learning_rate": 9.125318324289335e-05, "loss": 14.1253, "step": 5168 }, { "epoch": 0.2154557959234713, "grad_norm": 468.0, "learning_rate": 9.12493688392009e-05, "loss": 14.7531, "step": 5169 }, { "epoch": 0.21549747822099954, "grad_norm": 884.0, "learning_rate": 9.124555368373502e-05, "loss": 23.2502, "step": 5170 }, { "epoch": 0.21553916051852778, "grad_norm": 396.0, "learning_rate": 9.124173777656527e-05, "loss": 14.9377, "step": 5171 }, { "epoch": 0.21558084281605602, "grad_norm": 230.0, "learning_rate": 9.123792111776119e-05, "loss": 11.0003, "step": 5172 }, { "epoch": 0.21562252511358426, "grad_norm": 292.0, "learning_rate": 9.123410370739231e-05, "loss": 13.3128, "step": 5173 }, { "epoch": 0.2156642074111125, "grad_norm": 100.5, "learning_rate": 9.123028554552825e-05, "loss": 10.3754, "step": 5174 }, { "epoch": 0.21570588970864074, "grad_norm": 145.0, "learning_rate": 9.12264666322386e-05, "loss": 11.1883, "step": 5175 }, { "epoch": 0.21574757200616898, "grad_norm": 209.0, "learning_rate": 9.12226469675929e-05, "loss": 10.3135, "step": 5176 }, { "epoch": 0.21578925430369722, "grad_norm": 219.0, "learning_rate": 9.121882655166082e-05, "loss": 10.6883, "step": 5177 }, { "epoch": 0.21583093660122546, "grad_norm": 292.0, "learning_rate": 9.121500538451196e-05, "loss": 12.3798, "step": 5178 }, { "epoch": 0.2158726188987537, "grad_norm": 147.0, "learning_rate": 9.121118346621598e-05, "loss": 10.0006, "step": 5179 }, { "epoch": 0.21591430119628194, "grad_norm": 348.0, "learning_rate": 9.12073607968425e-05, "loss": 13.8753, "step": 5180 }, { "epoch": 0.21595598349381018, "grad_norm": 816.0, "learning_rate": 9.120353737646123e-05, "loss": 22.8752, "step": 5181 }, { "epoch": 0.21599766579133842, "grad_norm": 260.0, "learning_rate": 9.119971320514183e-05, "loss": 11.8753, "step": 5182 }, { "epoch": 0.21603934808886666, "grad_norm": 237.0, "learning_rate": 9.119588828295398e-05, "loss": 13.0628, "step": 5183 }, { "epoch": 0.2160810303863949, "grad_norm": 286.0, "learning_rate": 9.119206260996743e-05, "loss": 12.4377, "step": 5184 }, { "epoch": 0.21612271268392313, "grad_norm": 736.0, "learning_rate": 9.118823618625188e-05, "loss": 22.3753, "step": 5185 }, { "epoch": 0.21616439498145137, "grad_norm": 216.0, "learning_rate": 9.118440901187706e-05, "loss": 11.8127, "step": 5186 }, { "epoch": 0.2162060772789796, "grad_norm": 227.0, "learning_rate": 9.118058108691274e-05, "loss": 12.4377, "step": 5187 }, { "epoch": 0.21624775957650785, "grad_norm": 548.0, "learning_rate": 9.117675241142866e-05, "loss": 18.5003, "step": 5188 }, { "epoch": 0.2162894418740361, "grad_norm": 362.0, "learning_rate": 9.117292298549462e-05, "loss": 14.3752, "step": 5189 }, { "epoch": 0.21633112417156433, "grad_norm": 402.0, "learning_rate": 9.11690928091804e-05, "loss": 15.0629, "step": 5190 }, { "epoch": 0.21637280646909257, "grad_norm": 816.0, "learning_rate": 9.116526188255583e-05, "loss": 21.6254, "step": 5191 }, { "epoch": 0.2164144887666208, "grad_norm": 544.0, "learning_rate": 9.11614302056907e-05, "loss": 16.0035, "step": 5192 }, { "epoch": 0.21645617106414905, "grad_norm": 380.0, "learning_rate": 9.115759777865483e-05, "loss": 14.2505, "step": 5193 }, { "epoch": 0.2164978533616773, "grad_norm": 676.0, "learning_rate": 9.11537646015181e-05, "loss": 18.88, "step": 5194 }, { "epoch": 0.21653953565920553, "grad_norm": 364.0, "learning_rate": 9.114993067435036e-05, "loss": 14.0627, "step": 5195 }, { "epoch": 0.21658121795673377, "grad_norm": 468.0, "learning_rate": 9.114609599722148e-05, "loss": 15.2502, "step": 5196 }, { "epoch": 0.216622900254262, "grad_norm": 211.0, "learning_rate": 9.114226057020134e-05, "loss": 10.5633, "step": 5197 }, { "epoch": 0.21666458255179025, "grad_norm": 604.0, "learning_rate": 9.113842439335986e-05, "loss": 17.2506, "step": 5198 }, { "epoch": 0.2167062648493185, "grad_norm": 480.0, "learning_rate": 9.113458746676694e-05, "loss": 17.6253, "step": 5199 }, { "epoch": 0.21674794714684673, "grad_norm": 700.0, "learning_rate": 9.11307497904925e-05, "loss": 17.6263, "step": 5200 }, { "epoch": 0.21678962944437496, "grad_norm": 140.0, "learning_rate": 9.11269113646065e-05, "loss": 11.0022, "step": 5201 }, { "epoch": 0.2168313117419032, "grad_norm": 354.0, "learning_rate": 9.11230721891789e-05, "loss": 14.0627, "step": 5202 }, { "epoch": 0.21687299403943144, "grad_norm": 340.0, "learning_rate": 9.111923226427965e-05, "loss": 13.6254, "step": 5203 }, { "epoch": 0.21691467633695968, "grad_norm": 1552.0, "learning_rate": 9.111539158997873e-05, "loss": 36.5002, "step": 5204 }, { "epoch": 0.21695635863448792, "grad_norm": 272.0, "learning_rate": 9.111155016634616e-05, "loss": 12.1256, "step": 5205 }, { "epoch": 0.21699804093201616, "grad_norm": 1016.0, "learning_rate": 9.110770799345194e-05, "loss": 29.0003, "step": 5206 }, { "epoch": 0.2170397232295444, "grad_norm": 73.0, "learning_rate": 9.11038650713661e-05, "loss": 9.5629, "step": 5207 }, { "epoch": 0.21708140552707264, "grad_norm": 366.0, "learning_rate": 9.110002140015866e-05, "loss": 13.7503, "step": 5208 }, { "epoch": 0.21712308782460088, "grad_norm": 1776.0, "learning_rate": 9.109617697989967e-05, "loss": 37.2537, "step": 5209 }, { "epoch": 0.21716477012212912, "grad_norm": 316.0, "learning_rate": 9.109233181065923e-05, "loss": 14.752, "step": 5210 }, { "epoch": 0.21720645241965736, "grad_norm": 656.0, "learning_rate": 9.108848589250737e-05, "loss": 18.8752, "step": 5211 }, { "epoch": 0.2172481347171856, "grad_norm": 344.0, "learning_rate": 9.108463922551423e-05, "loss": 14.8753, "step": 5212 }, { "epoch": 0.21728981701471384, "grad_norm": 292.0, "learning_rate": 9.108079180974989e-05, "loss": 13.188, "step": 5213 }, { "epoch": 0.2173314993122421, "grad_norm": 258.0, "learning_rate": 9.107694364528448e-05, "loss": 12.0001, "step": 5214 }, { "epoch": 0.21737318160977034, "grad_norm": 412.0, "learning_rate": 9.10730947321881e-05, "loss": 15.6877, "step": 5215 }, { "epoch": 0.21741486390729858, "grad_norm": 219.0, "learning_rate": 9.106924507053094e-05, "loss": 11.3764, "step": 5216 }, { "epoch": 0.21745654620482682, "grad_norm": 444.0, "learning_rate": 9.106539466038313e-05, "loss": 15.5001, "step": 5217 }, { "epoch": 0.21749822850235506, "grad_norm": 484.0, "learning_rate": 9.106154350181486e-05, "loss": 16.1254, "step": 5218 }, { "epoch": 0.2175399107998833, "grad_norm": 85.5, "learning_rate": 9.105769159489632e-05, "loss": 7.9692, "step": 5219 }, { "epoch": 0.21758159309741154, "grad_norm": 334.0, "learning_rate": 9.105383893969771e-05, "loss": 12.9379, "step": 5220 }, { "epoch": 0.21762327539493978, "grad_norm": 292.0, "learning_rate": 9.104998553628923e-05, "loss": 13.9378, "step": 5221 }, { "epoch": 0.21766495769246802, "grad_norm": 220.0, "learning_rate": 9.104613138474114e-05, "loss": 12.0627, "step": 5222 }, { "epoch": 0.21770663998999626, "grad_norm": 242.0, "learning_rate": 9.104227648512364e-05, "loss": 11.7502, "step": 5223 }, { "epoch": 0.2177483222875245, "grad_norm": 124.5, "learning_rate": 9.1038420837507e-05, "loss": 9.1251, "step": 5224 }, { "epoch": 0.21779000458505274, "grad_norm": 500.0, "learning_rate": 9.103456444196152e-05, "loss": 17.1253, "step": 5225 }, { "epoch": 0.21783168688258098, "grad_norm": 232.0, "learning_rate": 9.103070729855745e-05, "loss": 10.0002, "step": 5226 }, { "epoch": 0.21787336918010922, "grad_norm": 1176.0, "learning_rate": 9.10268494073651e-05, "loss": 27.1303, "step": 5227 }, { "epoch": 0.21791505147763746, "grad_norm": 312.0, "learning_rate": 9.102299076845477e-05, "loss": 12.2506, "step": 5228 }, { "epoch": 0.2179567337751657, "grad_norm": 506.0, "learning_rate": 9.101913138189682e-05, "loss": 18.1253, "step": 5229 }, { "epoch": 0.21799841607269393, "grad_norm": 253.0, "learning_rate": 9.101527124776152e-05, "loss": 13.2505, "step": 5230 }, { "epoch": 0.21804009837022217, "grad_norm": 264.0, "learning_rate": 9.101141036611929e-05, "loss": 12.8762, "step": 5231 }, { "epoch": 0.2180817806677504, "grad_norm": 352.0, "learning_rate": 9.100754873704044e-05, "loss": 15.1255, "step": 5232 }, { "epoch": 0.21812346296527865, "grad_norm": 464.0, "learning_rate": 9.10036863605954e-05, "loss": 15.0002, "step": 5233 }, { "epoch": 0.2181651452628069, "grad_norm": 498.0, "learning_rate": 9.099982323685451e-05, "loss": 16.2507, "step": 5234 }, { "epoch": 0.21820682756033513, "grad_norm": 548.0, "learning_rate": 9.099595936588822e-05, "loss": 18.1256, "step": 5235 }, { "epoch": 0.21824850985786337, "grad_norm": 426.0, "learning_rate": 9.099209474776694e-05, "loss": 15.9377, "step": 5236 }, { "epoch": 0.2182901921553916, "grad_norm": 424.0, "learning_rate": 9.098822938256106e-05, "loss": 18.7505, "step": 5237 }, { "epoch": 0.21833187445291985, "grad_norm": 464.0, "learning_rate": 9.09843632703411e-05, "loss": 15.3762, "step": 5238 }, { "epoch": 0.2183735567504481, "grad_norm": 472.0, "learning_rate": 9.098049641117745e-05, "loss": 16.5027, "step": 5239 }, { "epoch": 0.21841523904797633, "grad_norm": 1112.0, "learning_rate": 9.097662880514062e-05, "loss": 25.7502, "step": 5240 }, { "epoch": 0.21845692134550457, "grad_norm": 548.0, "learning_rate": 9.097276045230111e-05, "loss": 16.7528, "step": 5241 }, { "epoch": 0.2184986036430328, "grad_norm": 304.0, "learning_rate": 9.096889135272939e-05, "loss": 14.1254, "step": 5242 }, { "epoch": 0.21854028594056105, "grad_norm": 300.0, "learning_rate": 9.0965021506496e-05, "loss": 12.8132, "step": 5243 }, { "epoch": 0.21858196823808929, "grad_norm": 392.0, "learning_rate": 9.096115091367145e-05, "loss": 15.3136, "step": 5244 }, { "epoch": 0.21862365053561753, "grad_norm": 408.0, "learning_rate": 9.095727957432627e-05, "loss": 16.6282, "step": 5245 }, { "epoch": 0.21866533283314576, "grad_norm": 370.0, "learning_rate": 9.095340748853104e-05, "loss": 16.2501, "step": 5246 }, { "epoch": 0.218707015130674, "grad_norm": 1280.0, "learning_rate": 9.094953465635635e-05, "loss": 32.0005, "step": 5247 }, { "epoch": 0.21874869742820224, "grad_norm": 94.5, "learning_rate": 9.094566107787275e-05, "loss": 6.7206, "step": 5248 }, { "epoch": 0.21879037972573048, "grad_norm": 326.0, "learning_rate": 9.094178675315081e-05, "loss": 14.0629, "step": 5249 }, { "epoch": 0.21883206202325872, "grad_norm": 434.0, "learning_rate": 9.09379116822612e-05, "loss": 16.0001, "step": 5250 }, { "epoch": 0.21887374432078696, "grad_norm": 308.0, "learning_rate": 9.093403586527452e-05, "loss": 13.0003, "step": 5251 }, { "epoch": 0.2189154266183152, "grad_norm": 262.0, "learning_rate": 9.093015930226139e-05, "loss": 12.7503, "step": 5252 }, { "epoch": 0.21895710891584344, "grad_norm": 93.0, "learning_rate": 9.092628199329248e-05, "loss": 7.8753, "step": 5253 }, { "epoch": 0.21899879121337168, "grad_norm": 308.0, "learning_rate": 9.092240393843842e-05, "loss": 13.5628, "step": 5254 }, { "epoch": 0.21904047351089992, "grad_norm": 260.0, "learning_rate": 9.091852513776995e-05, "loss": 12.5011, "step": 5255 }, { "epoch": 0.21908215580842816, "grad_norm": 520.0, "learning_rate": 9.091464559135772e-05, "loss": 18.7502, "step": 5256 }, { "epoch": 0.2191238381059564, "grad_norm": 506.0, "learning_rate": 9.091076529927242e-05, "loss": 17.5005, "step": 5257 }, { "epoch": 0.21916552040348464, "grad_norm": 280.0, "learning_rate": 9.090688426158481e-05, "loss": 13.5626, "step": 5258 }, { "epoch": 0.21920720270101288, "grad_norm": 372.0, "learning_rate": 9.090300247836561e-05, "loss": 15.5002, "step": 5259 }, { "epoch": 0.21924888499854112, "grad_norm": 452.0, "learning_rate": 9.089911994968554e-05, "loss": 12.8129, "step": 5260 }, { "epoch": 0.21929056729606936, "grad_norm": 420.0, "learning_rate": 9.089523667561539e-05, "loss": 15.0627, "step": 5261 }, { "epoch": 0.2193322495935976, "grad_norm": 59.75, "learning_rate": 9.089135265622591e-05, "loss": 7.0627, "step": 5262 }, { "epoch": 0.21937393189112583, "grad_norm": 532.0, "learning_rate": 9.088746789158791e-05, "loss": 16.3776, "step": 5263 }, { "epoch": 0.21941561418865407, "grad_norm": 452.0, "learning_rate": 9.088358238177216e-05, "loss": 16.5002, "step": 5264 }, { "epoch": 0.2194572964861823, "grad_norm": 104.5, "learning_rate": 9.087969612684952e-05, "loss": 8.4377, "step": 5265 }, { "epoch": 0.21949897878371055, "grad_norm": 240.0, "learning_rate": 9.087580912689077e-05, "loss": 11.5627, "step": 5266 }, { "epoch": 0.2195406610812388, "grad_norm": 308.0, "learning_rate": 9.087192138196678e-05, "loss": 12.8141, "step": 5267 }, { "epoch": 0.21958234337876703, "grad_norm": 608.0, "learning_rate": 9.086803289214838e-05, "loss": 21.3761, "step": 5268 }, { "epoch": 0.21962402567629527, "grad_norm": 548.0, "learning_rate": 9.086414365750647e-05, "loss": 17.8761, "step": 5269 }, { "epoch": 0.2196657079738235, "grad_norm": 191.0, "learning_rate": 9.08602536781119e-05, "loss": 10.8137, "step": 5270 }, { "epoch": 0.21970739027135175, "grad_norm": 112.0, "learning_rate": 9.085636295403559e-05, "loss": 9.3131, "step": 5271 }, { "epoch": 0.21974907256888, "grad_norm": 191.0, "learning_rate": 9.085247148534843e-05, "loss": 10.5627, "step": 5272 }, { "epoch": 0.21979075486640823, "grad_norm": 316.0, "learning_rate": 9.084857927212135e-05, "loss": 12.5027, "step": 5273 }, { "epoch": 0.21983243716393647, "grad_norm": 300.0, "learning_rate": 9.08446863144253e-05, "loss": 12.9377, "step": 5274 }, { "epoch": 0.2198741194614647, "grad_norm": 498.0, "learning_rate": 9.08407926123312e-05, "loss": 15.6271, "step": 5275 }, { "epoch": 0.21991580175899295, "grad_norm": 268.0, "learning_rate": 9.083689816591004e-05, "loss": 12.3751, "step": 5276 }, { "epoch": 0.21995748405652119, "grad_norm": 496.0, "learning_rate": 9.083300297523279e-05, "loss": 16.8752, "step": 5277 }, { "epoch": 0.21999916635404942, "grad_norm": 322.0, "learning_rate": 9.082910704037042e-05, "loss": 14.5627, "step": 5278 }, { "epoch": 0.22004084865157766, "grad_norm": 1192.0, "learning_rate": 9.082521036139395e-05, "loss": 33.5011, "step": 5279 }, { "epoch": 0.2200825309491059, "grad_norm": 322.0, "learning_rate": 9.082131293837441e-05, "loss": 13.6878, "step": 5280 }, { "epoch": 0.22012421324663414, "grad_norm": 104.0, "learning_rate": 9.081741477138282e-05, "loss": 9.1883, "step": 5281 }, { "epoch": 0.22016589554416238, "grad_norm": 472.0, "learning_rate": 9.08135158604902e-05, "loss": 13.4376, "step": 5282 }, { "epoch": 0.22020757784169062, "grad_norm": 760.0, "learning_rate": 9.080961620576765e-05, "loss": 20.8752, "step": 5283 }, { "epoch": 0.22024926013921886, "grad_norm": 356.0, "learning_rate": 9.08057158072862e-05, "loss": 14.3751, "step": 5284 }, { "epoch": 0.2202909424367471, "grad_norm": 204.0, "learning_rate": 9.0801814665117e-05, "loss": 11.7514, "step": 5285 }, { "epoch": 0.22033262473427534, "grad_norm": 376.0, "learning_rate": 9.079791277933106e-05, "loss": 14.7503, "step": 5286 }, { "epoch": 0.2203743070318036, "grad_norm": 294.0, "learning_rate": 9.079401014999956e-05, "loss": 12.7502, "step": 5287 }, { "epoch": 0.22041598932933185, "grad_norm": 350.0, "learning_rate": 9.079010677719359e-05, "loss": 13.2509, "step": 5288 }, { "epoch": 0.22045767162686009, "grad_norm": 126.5, "learning_rate": 9.078620266098432e-05, "loss": 9.3759, "step": 5289 }, { "epoch": 0.22049935392438832, "grad_norm": 158.0, "learning_rate": 9.078229780144289e-05, "loss": 10.1259, "step": 5290 }, { "epoch": 0.22054103622191656, "grad_norm": 304.0, "learning_rate": 9.077839219864044e-05, "loss": 13.5005, "step": 5291 }, { "epoch": 0.2205827185194448, "grad_norm": 140.0, "learning_rate": 9.077448585264819e-05, "loss": 10.0627, "step": 5292 }, { "epoch": 0.22062440081697304, "grad_norm": 406.0, "learning_rate": 9.077057876353731e-05, "loss": 16.3759, "step": 5293 }, { "epoch": 0.22066608311450128, "grad_norm": 163.0, "learning_rate": 9.076667093137901e-05, "loss": 7.5003, "step": 5294 }, { "epoch": 0.22070776541202952, "grad_norm": 358.0, "learning_rate": 9.076276235624452e-05, "loss": 14.7505, "step": 5295 }, { "epoch": 0.22074944770955776, "grad_norm": 270.0, "learning_rate": 9.075885303820506e-05, "loss": 12.3127, "step": 5296 }, { "epoch": 0.220791130007086, "grad_norm": 426.0, "learning_rate": 9.075494297733189e-05, "loss": 15.1253, "step": 5297 }, { "epoch": 0.22083281230461424, "grad_norm": 127.0, "learning_rate": 9.075103217369626e-05, "loss": 10.1255, "step": 5298 }, { "epoch": 0.22087449460214248, "grad_norm": 182.0, "learning_rate": 9.074712062736945e-05, "loss": 9.2516, "step": 5299 }, { "epoch": 0.22091617689967072, "grad_norm": 308.0, "learning_rate": 9.074320833842276e-05, "loss": 13.751, "step": 5300 }, { "epoch": 0.22095785919719896, "grad_norm": 352.0, "learning_rate": 9.073929530692747e-05, "loss": 14.0002, "step": 5301 }, { "epoch": 0.2209995414947272, "grad_norm": 132.0, "learning_rate": 9.07353815329549e-05, "loss": 10.563, "step": 5302 }, { "epoch": 0.22104122379225544, "grad_norm": 227.0, "learning_rate": 9.073146701657642e-05, "loss": 12.2502, "step": 5303 }, { "epoch": 0.22108290608978368, "grad_norm": 274.0, "learning_rate": 9.072755175786332e-05, "loss": 12.6882, "step": 5304 }, { "epoch": 0.22112458838731192, "grad_norm": 156.0, "learning_rate": 9.072363575688696e-05, "loss": 8.8131, "step": 5305 }, { "epoch": 0.22116627068484015, "grad_norm": 388.0, "learning_rate": 9.071971901371873e-05, "loss": 15.4379, "step": 5306 }, { "epoch": 0.2212079529823684, "grad_norm": 616.0, "learning_rate": 9.071580152843001e-05, "loss": 20.3754, "step": 5307 }, { "epoch": 0.22124963527989663, "grad_norm": 660.0, "learning_rate": 9.071188330109219e-05, "loss": 19.0005, "step": 5308 }, { "epoch": 0.22129131757742487, "grad_norm": 330.0, "learning_rate": 9.070796433177669e-05, "loss": 13.6252, "step": 5309 }, { "epoch": 0.2213329998749531, "grad_norm": 848.0, "learning_rate": 9.070404462055491e-05, "loss": 20.0032, "step": 5310 }, { "epoch": 0.22137468217248135, "grad_norm": 868.0, "learning_rate": 9.070012416749831e-05, "loss": 23.7504, "step": 5311 }, { "epoch": 0.2214163644700096, "grad_norm": 120.0, "learning_rate": 9.069620297267835e-05, "loss": 8.5009, "step": 5312 }, { "epoch": 0.22145804676753783, "grad_norm": 688.0, "learning_rate": 9.069228103616646e-05, "loss": 17.3773, "step": 5313 }, { "epoch": 0.22149972906506607, "grad_norm": 1272.0, "learning_rate": 9.068835835803415e-05, "loss": 30.5004, "step": 5314 }, { "epoch": 0.2215414113625943, "grad_norm": 272.0, "learning_rate": 9.068443493835289e-05, "loss": 12.8143, "step": 5315 }, { "epoch": 0.22158309366012255, "grad_norm": 298.0, "learning_rate": 9.068051077719417e-05, "loss": 12.3127, "step": 5316 }, { "epoch": 0.2216247759576508, "grad_norm": 78.0, "learning_rate": 9.067658587462956e-05, "loss": 8.6894, "step": 5317 }, { "epoch": 0.22166645825517903, "grad_norm": 454.0, "learning_rate": 9.067266023073055e-05, "loss": 16.5002, "step": 5318 }, { "epoch": 0.22170814055270727, "grad_norm": 664.0, "learning_rate": 9.06687338455687e-05, "loss": 21.3754, "step": 5319 }, { "epoch": 0.2217498228502355, "grad_norm": 262.0, "learning_rate": 9.066480671921556e-05, "loss": 12.8129, "step": 5320 }, { "epoch": 0.22179150514776375, "grad_norm": 604.0, "learning_rate": 9.06608788517427e-05, "loss": 18.6264, "step": 5321 }, { "epoch": 0.22183318744529198, "grad_norm": 304.0, "learning_rate": 9.065695024322174e-05, "loss": 12.2504, "step": 5322 }, { "epoch": 0.22187486974282022, "grad_norm": 268.0, "learning_rate": 9.065302089372422e-05, "loss": 12.5002, "step": 5323 }, { "epoch": 0.22191655204034846, "grad_norm": 258.0, "learning_rate": 9.064909080332182e-05, "loss": 11.9379, "step": 5324 }, { "epoch": 0.2219582343378767, "grad_norm": 478.0, "learning_rate": 9.064515997208611e-05, "loss": 16.7505, "step": 5325 }, { "epoch": 0.22199991663540494, "grad_norm": 414.0, "learning_rate": 9.064122840008875e-05, "loss": 15.2512, "step": 5326 }, { "epoch": 0.22204159893293318, "grad_norm": 612.0, "learning_rate": 9.06372960874014e-05, "loss": 20.7506, "step": 5327 }, { "epoch": 0.22208328123046142, "grad_norm": 384.0, "learning_rate": 9.063336303409573e-05, "loss": 14.7554, "step": 5328 }, { "epoch": 0.22212496352798966, "grad_norm": 171.0, "learning_rate": 9.062942924024341e-05, "loss": 9.0629, "step": 5329 }, { "epoch": 0.2221666458255179, "grad_norm": 161.0, "learning_rate": 9.062549470591612e-05, "loss": 9.0633, "step": 5330 }, { "epoch": 0.22220832812304614, "grad_norm": 145.0, "learning_rate": 9.062155943118559e-05, "loss": 8.4379, "step": 5331 }, { "epoch": 0.22225001042057438, "grad_norm": 752.0, "learning_rate": 9.061762341612354e-05, "loss": 21.2535, "step": 5332 }, { "epoch": 0.22229169271810262, "grad_norm": 288.0, "learning_rate": 9.061368666080167e-05, "loss": 13.0628, "step": 5333 }, { "epoch": 0.22233337501563086, "grad_norm": 1584.0, "learning_rate": 9.060974916529179e-05, "loss": 35.5004, "step": 5334 }, { "epoch": 0.2223750573131591, "grad_norm": 150.0, "learning_rate": 9.06058109296656e-05, "loss": 10.5007, "step": 5335 }, { "epoch": 0.22241673961068734, "grad_norm": 254.0, "learning_rate": 9.060187195399492e-05, "loss": 11.8126, "step": 5336 }, { "epoch": 0.22245842190821558, "grad_norm": 708.0, "learning_rate": 9.059793223835151e-05, "loss": 20.6257, "step": 5337 }, { "epoch": 0.22250010420574381, "grad_norm": 466.0, "learning_rate": 9.059399178280718e-05, "loss": 16.5003, "step": 5338 }, { "epoch": 0.22254178650327205, "grad_norm": 250.0, "learning_rate": 9.059005058743376e-05, "loss": 10.8753, "step": 5339 }, { "epoch": 0.2225834688008003, "grad_norm": 410.0, "learning_rate": 9.058610865230306e-05, "loss": 15.6877, "step": 5340 }, { "epoch": 0.22262515109832853, "grad_norm": 260.0, "learning_rate": 9.058216597748692e-05, "loss": 11.7503, "step": 5341 }, { "epoch": 0.22266683339585677, "grad_norm": 183.0, "learning_rate": 9.05782225630572e-05, "loss": 11.5631, "step": 5342 }, { "epoch": 0.222708515693385, "grad_norm": 336.0, "learning_rate": 9.057427840908577e-05, "loss": 13.7504, "step": 5343 }, { "epoch": 0.22275019799091325, "grad_norm": 462.0, "learning_rate": 9.057033351564453e-05, "loss": 16.2502, "step": 5344 }, { "epoch": 0.2227918802884415, "grad_norm": 460.0, "learning_rate": 9.056638788280534e-05, "loss": 15.6876, "step": 5345 }, { "epoch": 0.22283356258596973, "grad_norm": 356.0, "learning_rate": 9.056244151064015e-05, "loss": 14.3752, "step": 5346 }, { "epoch": 0.22287524488349797, "grad_norm": 458.0, "learning_rate": 9.055849439922085e-05, "loss": 18.1257, "step": 5347 }, { "epoch": 0.2229169271810262, "grad_norm": 520.0, "learning_rate": 9.055454654861939e-05, "loss": 17.0008, "step": 5348 }, { "epoch": 0.22295860947855445, "grad_norm": 314.0, "learning_rate": 9.055059795890772e-05, "loss": 14.3754, "step": 5349 }, { "epoch": 0.2230002917760827, "grad_norm": 572.0, "learning_rate": 9.05466486301578e-05, "loss": 15.9399, "step": 5350 }, { "epoch": 0.22304197407361093, "grad_norm": 75.0, "learning_rate": 9.054269856244162e-05, "loss": 10.3756, "step": 5351 }, { "epoch": 0.22308365637113917, "grad_norm": 274.0, "learning_rate": 9.053874775583115e-05, "loss": 9.1254, "step": 5352 }, { "epoch": 0.2231253386686674, "grad_norm": 96.0, "learning_rate": 9.053479621039839e-05, "loss": 7.969, "step": 5353 }, { "epoch": 0.22316702096619564, "grad_norm": 844.0, "learning_rate": 9.05308439262154e-05, "loss": 18.8799, "step": 5354 }, { "epoch": 0.22320870326372388, "grad_norm": 484.0, "learning_rate": 9.052689090335416e-05, "loss": 16.3754, "step": 5355 }, { "epoch": 0.22325038556125212, "grad_norm": 804.0, "learning_rate": 9.052293714188675e-05, "loss": 27.7504, "step": 5356 }, { "epoch": 0.22329206785878036, "grad_norm": 318.0, "learning_rate": 9.051898264188521e-05, "loss": 13.8757, "step": 5357 }, { "epoch": 0.2233337501563086, "grad_norm": 936.0, "learning_rate": 9.051502740342161e-05, "loss": 28.7509, "step": 5358 }, { "epoch": 0.22337543245383684, "grad_norm": 532.0, "learning_rate": 9.051107142656804e-05, "loss": 16.6257, "step": 5359 }, { "epoch": 0.2234171147513651, "grad_norm": 398.0, "learning_rate": 9.050711471139658e-05, "loss": 15.5628, "step": 5360 }, { "epoch": 0.22345879704889335, "grad_norm": 260.0, "learning_rate": 9.050315725797938e-05, "loss": 11.5631, "step": 5361 }, { "epoch": 0.2235004793464216, "grad_norm": 330.0, "learning_rate": 9.049919906638855e-05, "loss": 13.8752, "step": 5362 }, { "epoch": 0.22354216164394983, "grad_norm": 332.0, "learning_rate": 9.049524013669622e-05, "loss": 12.6882, "step": 5363 }, { "epoch": 0.22358384394147807, "grad_norm": 444.0, "learning_rate": 9.049128046897453e-05, "loss": 15.9396, "step": 5364 }, { "epoch": 0.2236255262390063, "grad_norm": 316.0, "learning_rate": 9.048732006329565e-05, "loss": 12.3169, "step": 5365 }, { "epoch": 0.22366720853653455, "grad_norm": 390.0, "learning_rate": 9.048335891973179e-05, "loss": 14.563, "step": 5366 }, { "epoch": 0.22370889083406278, "grad_norm": 130.0, "learning_rate": 9.047939703835511e-05, "loss": 9.063, "step": 5367 }, { "epoch": 0.22375057313159102, "grad_norm": 227.0, "learning_rate": 9.047543441923782e-05, "loss": 8.1885, "step": 5368 }, { "epoch": 0.22379225542911926, "grad_norm": 340.0, "learning_rate": 9.047147106245216e-05, "loss": 11.7506, "step": 5369 }, { "epoch": 0.2238339377266475, "grad_norm": 228.0, "learning_rate": 9.046750696807033e-05, "loss": 12.1253, "step": 5370 }, { "epoch": 0.22387562002417574, "grad_norm": 272.0, "learning_rate": 9.046354213616459e-05, "loss": 12.8127, "step": 5371 }, { "epoch": 0.22391730232170398, "grad_norm": 376.0, "learning_rate": 9.045957656680722e-05, "loss": 14.8755, "step": 5372 }, { "epoch": 0.22395898461923222, "grad_norm": 308.0, "learning_rate": 9.045561026007048e-05, "loss": 12.5011, "step": 5373 }, { "epoch": 0.22400066691676046, "grad_norm": 235.0, "learning_rate": 9.045164321602664e-05, "loss": 11.5633, "step": 5374 }, { "epoch": 0.2240423492142887, "grad_norm": 314.0, "learning_rate": 9.0447675434748e-05, "loss": 11.1876, "step": 5375 }, { "epoch": 0.22408403151181694, "grad_norm": 254.0, "learning_rate": 9.04437069163069e-05, "loss": 11.6878, "step": 5376 }, { "epoch": 0.22412571380934518, "grad_norm": 568.0, "learning_rate": 9.043973766077565e-05, "loss": 19.3752, "step": 5377 }, { "epoch": 0.22416739610687342, "grad_norm": 296.0, "learning_rate": 9.043576766822659e-05, "loss": 10.5004, "step": 5378 }, { "epoch": 0.22420907840440166, "grad_norm": 274.0, "learning_rate": 9.043179693873208e-05, "loss": 11.6253, "step": 5379 }, { "epoch": 0.2242507607019299, "grad_norm": 700.0, "learning_rate": 9.042782547236446e-05, "loss": 20.2504, "step": 5380 }, { "epoch": 0.22429244299945814, "grad_norm": 121.0, "learning_rate": 9.042385326919616e-05, "loss": 9.0005, "step": 5381 }, { "epoch": 0.22433412529698638, "grad_norm": 560.0, "learning_rate": 9.041988032929952e-05, "loss": 18.2502, "step": 5382 }, { "epoch": 0.22437580759451461, "grad_norm": 149.0, "learning_rate": 9.0415906652747e-05, "loss": 10.1896, "step": 5383 }, { "epoch": 0.22441748989204285, "grad_norm": 668.0, "learning_rate": 9.041193223961096e-05, "loss": 20.0002, "step": 5384 }, { "epoch": 0.2244591721895711, "grad_norm": 612.0, "learning_rate": 9.040795708996389e-05, "loss": 16.6254, "step": 5385 }, { "epoch": 0.22450085448709933, "grad_norm": 172.0, "learning_rate": 9.04039812038782e-05, "loss": 11.688, "step": 5386 }, { "epoch": 0.22454253678462757, "grad_norm": 438.0, "learning_rate": 9.040000458142639e-05, "loss": 15.6879, "step": 5387 }, { "epoch": 0.2245842190821558, "grad_norm": 560.0, "learning_rate": 9.03960272226809e-05, "loss": 18.1253, "step": 5388 }, { "epoch": 0.22462590137968405, "grad_norm": 1136.0, "learning_rate": 9.039204912771422e-05, "loss": 24.0009, "step": 5389 }, { "epoch": 0.2246675836772123, "grad_norm": 171.0, "learning_rate": 9.038807029659885e-05, "loss": 10.6885, "step": 5390 }, { "epoch": 0.22470926597474053, "grad_norm": 496.0, "learning_rate": 9.038409072940734e-05, "loss": 17.2501, "step": 5391 }, { "epoch": 0.22475094827226877, "grad_norm": 243.0, "learning_rate": 9.038011042621219e-05, "loss": 12.8151, "step": 5392 }, { "epoch": 0.224792630569797, "grad_norm": 724.0, "learning_rate": 9.037612938708593e-05, "loss": 25.2503, "step": 5393 }, { "epoch": 0.22483431286732525, "grad_norm": 258.0, "learning_rate": 9.037214761210113e-05, "loss": 12.8754, "step": 5394 }, { "epoch": 0.2248759951648535, "grad_norm": 402.0, "learning_rate": 9.036816510133035e-05, "loss": 15.7503, "step": 5395 }, { "epoch": 0.22491767746238173, "grad_norm": 170.0, "learning_rate": 9.036418185484618e-05, "loss": 10.6257, "step": 5396 }, { "epoch": 0.22495935975990997, "grad_norm": 512.0, "learning_rate": 9.036019787272121e-05, "loss": 18.6253, "step": 5397 }, { "epoch": 0.2250010420574382, "grad_norm": 648.0, "learning_rate": 9.035621315502805e-05, "loss": 19.0031, "step": 5398 }, { "epoch": 0.22504272435496644, "grad_norm": 928.0, "learning_rate": 9.035222770183934e-05, "loss": 25.1253, "step": 5399 }, { "epoch": 0.22508440665249468, "grad_norm": 548.0, "learning_rate": 9.034824151322768e-05, "loss": 18.5002, "step": 5400 }, { "epoch": 0.22512608895002292, "grad_norm": 1320.0, "learning_rate": 9.034425458926574e-05, "loss": 31.0044, "step": 5401 }, { "epoch": 0.22516777124755116, "grad_norm": 125.0, "learning_rate": 9.03402669300262e-05, "loss": 9.8139, "step": 5402 }, { "epoch": 0.2252094535450794, "grad_norm": 374.0, "learning_rate": 9.033627853558168e-05, "loss": 14.9384, "step": 5403 }, { "epoch": 0.22525113584260764, "grad_norm": 476.0, "learning_rate": 9.033228940600493e-05, "loss": 15.8134, "step": 5404 }, { "epoch": 0.22529281814013588, "grad_norm": 302.0, "learning_rate": 9.032829954136862e-05, "loss": 14.0627, "step": 5405 }, { "epoch": 0.22533450043766412, "grad_norm": 560.0, "learning_rate": 9.032430894174545e-05, "loss": 17.2506, "step": 5406 }, { "epoch": 0.22537618273519236, "grad_norm": 326.0, "learning_rate": 9.032031760720818e-05, "loss": 14.3128, "step": 5407 }, { "epoch": 0.2254178650327206, "grad_norm": 624.0, "learning_rate": 9.031632553782956e-05, "loss": 20.5003, "step": 5408 }, { "epoch": 0.22545954733024884, "grad_norm": 1272.0, "learning_rate": 9.031233273368231e-05, "loss": 32.5002, "step": 5409 }, { "epoch": 0.22550122962777708, "grad_norm": 364.0, "learning_rate": 9.030833919483923e-05, "loss": 16.1252, "step": 5410 }, { "epoch": 0.22554291192530532, "grad_norm": 380.0, "learning_rate": 9.030434492137307e-05, "loss": 15.5631, "step": 5411 }, { "epoch": 0.22558459422283356, "grad_norm": 134.0, "learning_rate": 9.030034991335666e-05, "loss": 9.3752, "step": 5412 }, { "epoch": 0.2256262765203618, "grad_norm": 272.0, "learning_rate": 9.02963541708628e-05, "loss": 12.5628, "step": 5413 }, { "epoch": 0.22566795881789004, "grad_norm": 434.0, "learning_rate": 9.029235769396429e-05, "loss": 16.3756, "step": 5414 }, { "epoch": 0.22570964111541827, "grad_norm": 272.0, "learning_rate": 9.0288360482734e-05, "loss": 12.9381, "step": 5415 }, { "epoch": 0.22575132341294651, "grad_norm": 68.5, "learning_rate": 9.028436253724475e-05, "loss": 7.0627, "step": 5416 }, { "epoch": 0.22579300571047475, "grad_norm": 247.0, "learning_rate": 9.028036385756944e-05, "loss": 12.1899, "step": 5417 }, { "epoch": 0.225834688008003, "grad_norm": 688.0, "learning_rate": 9.027636444378089e-05, "loss": 18.7536, "step": 5418 }, { "epoch": 0.22587637030553123, "grad_norm": 840.0, "learning_rate": 9.027236429595205e-05, "loss": 25.1256, "step": 5419 }, { "epoch": 0.22591805260305947, "grad_norm": 900.0, "learning_rate": 9.02683634141558e-05, "loss": 24.5003, "step": 5420 }, { "epoch": 0.2259597349005877, "grad_norm": 390.0, "learning_rate": 9.026436179846502e-05, "loss": 12.6254, "step": 5421 }, { "epoch": 0.22600141719811595, "grad_norm": 356.0, "learning_rate": 9.02603594489527e-05, "loss": 14.9383, "step": 5422 }, { "epoch": 0.2260430994956442, "grad_norm": 163.0, "learning_rate": 9.025635636569174e-05, "loss": 10.0629, "step": 5423 }, { "epoch": 0.22608478179317243, "grad_norm": 348.0, "learning_rate": 9.025235254875513e-05, "loss": 14.8129, "step": 5424 }, { "epoch": 0.22612646409070067, "grad_norm": 147.0, "learning_rate": 9.02483479982158e-05, "loss": 9.6253, "step": 5425 }, { "epoch": 0.2261681463882289, "grad_norm": 308.0, "learning_rate": 9.024434271414677e-05, "loss": 11.5007, "step": 5426 }, { "epoch": 0.22620982868575715, "grad_norm": 290.0, "learning_rate": 9.024033669662101e-05, "loss": 12.6256, "step": 5427 }, { "epoch": 0.2262515109832854, "grad_norm": 296.0, "learning_rate": 9.023632994571153e-05, "loss": 14.1882, "step": 5428 }, { "epoch": 0.22629319328081363, "grad_norm": 164.0, "learning_rate": 9.023232246149139e-05, "loss": 10.5002, "step": 5429 }, { "epoch": 0.22633487557834187, "grad_norm": 326.0, "learning_rate": 9.022831424403359e-05, "loss": 12.6877, "step": 5430 }, { "epoch": 0.2263765578758701, "grad_norm": 260.0, "learning_rate": 9.02243052934112e-05, "loss": 11.8754, "step": 5431 }, { "epoch": 0.22641824017339834, "grad_norm": 446.0, "learning_rate": 9.022029560969727e-05, "loss": 15.8132, "step": 5432 }, { "epoch": 0.2264599224709266, "grad_norm": 418.0, "learning_rate": 9.021628519296488e-05, "loss": 14.8763, "step": 5433 }, { "epoch": 0.22650160476845485, "grad_norm": 552.0, "learning_rate": 9.021227404328712e-05, "loss": 16.1255, "step": 5434 }, { "epoch": 0.2265432870659831, "grad_norm": 141.0, "learning_rate": 9.02082621607371e-05, "loss": 10.0004, "step": 5435 }, { "epoch": 0.22658496936351133, "grad_norm": 612.0, "learning_rate": 9.020424954538793e-05, "loss": 16.6275, "step": 5436 }, { "epoch": 0.22662665166103957, "grad_norm": 288.0, "learning_rate": 9.020023619731275e-05, "loss": 13.6258, "step": 5437 }, { "epoch": 0.2266683339585678, "grad_norm": 276.0, "learning_rate": 9.019622211658469e-05, "loss": 12.8763, "step": 5438 }, { "epoch": 0.22671001625609605, "grad_norm": 476.0, "learning_rate": 9.019220730327693e-05, "loss": 16.1252, "step": 5439 }, { "epoch": 0.2267516985536243, "grad_norm": 506.0, "learning_rate": 9.018819175746261e-05, "loss": 16.1252, "step": 5440 }, { "epoch": 0.22679338085115253, "grad_norm": 876.0, "learning_rate": 9.018417547921492e-05, "loss": 22.0041, "step": 5441 }, { "epoch": 0.22683506314868077, "grad_norm": 296.0, "learning_rate": 9.018015846860707e-05, "loss": 13.3127, "step": 5442 }, { "epoch": 0.226876745446209, "grad_norm": 1048.0, "learning_rate": 9.017614072571228e-05, "loss": 25.8755, "step": 5443 }, { "epoch": 0.22691842774373724, "grad_norm": 374.0, "learning_rate": 9.017212225060374e-05, "loss": 15.5002, "step": 5444 }, { "epoch": 0.22696011004126548, "grad_norm": 358.0, "learning_rate": 9.01681030433547e-05, "loss": 11.0002, "step": 5445 }, { "epoch": 0.22700179233879372, "grad_norm": 468.0, "learning_rate": 9.016408310403843e-05, "loss": 16.2506, "step": 5446 }, { "epoch": 0.22704347463632196, "grad_norm": 154.0, "learning_rate": 9.016006243272818e-05, "loss": 10.938, "step": 5447 }, { "epoch": 0.2270851569338502, "grad_norm": 232.0, "learning_rate": 9.015604102949722e-05, "loss": 11.6881, "step": 5448 }, { "epoch": 0.22712683923137844, "grad_norm": 384.0, "learning_rate": 9.015201889441887e-05, "loss": 15.2506, "step": 5449 }, { "epoch": 0.22716852152890668, "grad_norm": 492.0, "learning_rate": 9.014799602756639e-05, "loss": 17.8753, "step": 5450 }, { "epoch": 0.22721020382643492, "grad_norm": 402.0, "learning_rate": 9.014397242901311e-05, "loss": 16.1251, "step": 5451 }, { "epoch": 0.22725188612396316, "grad_norm": 220.0, "learning_rate": 9.013994809883239e-05, "loss": 10.8752, "step": 5452 }, { "epoch": 0.2272935684214914, "grad_norm": 456.0, "learning_rate": 9.013592303709754e-05, "loss": 15.6878, "step": 5453 }, { "epoch": 0.22733525071901964, "grad_norm": 326.0, "learning_rate": 9.013189724388193e-05, "loss": 14.0627, "step": 5454 }, { "epoch": 0.22737693301654788, "grad_norm": 448.0, "learning_rate": 9.012787071925893e-05, "loss": 15.6252, "step": 5455 }, { "epoch": 0.22741861531407612, "grad_norm": 576.0, "learning_rate": 9.012384346330193e-05, "loss": 19.2505, "step": 5456 }, { "epoch": 0.22746029761160436, "grad_norm": 282.0, "learning_rate": 9.011981547608432e-05, "loss": 13.8781, "step": 5457 }, { "epoch": 0.2275019799091326, "grad_norm": 215.0, "learning_rate": 9.011578675767951e-05, "loss": 11.2502, "step": 5458 }, { "epoch": 0.22754366220666083, "grad_norm": 478.0, "learning_rate": 9.011175730816093e-05, "loss": 16.629, "step": 5459 }, { "epoch": 0.22758534450418907, "grad_norm": 160.0, "learning_rate": 9.010772712760201e-05, "loss": 10.1299, "step": 5460 }, { "epoch": 0.2276270268017173, "grad_norm": 312.0, "learning_rate": 9.010369621607619e-05, "loss": 12.9379, "step": 5461 }, { "epoch": 0.22766870909924555, "grad_norm": 131.0, "learning_rate": 9.009966457365695e-05, "loss": 10.1269, "step": 5462 }, { "epoch": 0.2277103913967738, "grad_norm": 286.0, "learning_rate": 9.009563220041777e-05, "loss": 14.0005, "step": 5463 }, { "epoch": 0.22775207369430203, "grad_norm": 282.0, "learning_rate": 9.009159909643215e-05, "loss": 12.5629, "step": 5464 }, { "epoch": 0.22779375599183027, "grad_norm": 490.0, "learning_rate": 9.008756526177355e-05, "loss": 15.5002, "step": 5465 }, { "epoch": 0.2278354382893585, "grad_norm": 396.0, "learning_rate": 9.008353069651551e-05, "loss": 16.1281, "step": 5466 }, { "epoch": 0.22787712058688675, "grad_norm": 364.0, "learning_rate": 9.007949540073159e-05, "loss": 15.0628, "step": 5467 }, { "epoch": 0.227918802884415, "grad_norm": 474.0, "learning_rate": 9.007545937449529e-05, "loss": 16.6252, "step": 5468 }, { "epoch": 0.22796048518194323, "grad_norm": 628.0, "learning_rate": 9.00714226178802e-05, "loss": 20.2507, "step": 5469 }, { "epoch": 0.22800216747947147, "grad_norm": 436.0, "learning_rate": 9.006738513095987e-05, "loss": 16.3767, "step": 5470 }, { "epoch": 0.2280438497769997, "grad_norm": 280.0, "learning_rate": 9.006334691380788e-05, "loss": 13.8129, "step": 5471 }, { "epoch": 0.22808553207452795, "grad_norm": 592.0, "learning_rate": 9.005930796649784e-05, "loss": 18.6259, "step": 5472 }, { "epoch": 0.2281272143720562, "grad_norm": 322.0, "learning_rate": 9.005526828910337e-05, "loss": 13.2503, "step": 5473 }, { "epoch": 0.22816889666958443, "grad_norm": 270.0, "learning_rate": 9.005122788169806e-05, "loss": 12.1883, "step": 5474 }, { "epoch": 0.22821057896711266, "grad_norm": 344.0, "learning_rate": 9.004718674435559e-05, "loss": 12.5001, "step": 5475 }, { "epoch": 0.2282522612646409, "grad_norm": 348.0, "learning_rate": 9.004314487714956e-05, "loss": 10.6301, "step": 5476 }, { "epoch": 0.22829394356216914, "grad_norm": 392.0, "learning_rate": 9.003910228015369e-05, "loss": 14.7503, "step": 5477 }, { "epoch": 0.22833562585969738, "grad_norm": 249.0, "learning_rate": 9.00350589534416e-05, "loss": 11.4377, "step": 5478 }, { "epoch": 0.22837730815722562, "grad_norm": 139.0, "learning_rate": 9.0031014897087e-05, "loss": 9.9383, "step": 5479 }, { "epoch": 0.22841899045475386, "grad_norm": 428.0, "learning_rate": 9.002697011116364e-05, "loss": 16.6281, "step": 5480 }, { "epoch": 0.2284606727522821, "grad_norm": 234.0, "learning_rate": 9.002292459574517e-05, "loss": 11.5627, "step": 5481 }, { "epoch": 0.22850235504981034, "grad_norm": 334.0, "learning_rate": 9.001887835090535e-05, "loss": 13.8132, "step": 5482 }, { "epoch": 0.22854403734733858, "grad_norm": 241.0, "learning_rate": 9.001483137671791e-05, "loss": 10.3138, "step": 5483 }, { "epoch": 0.22858571964486682, "grad_norm": 102.5, "learning_rate": 9.001078367325662e-05, "loss": 9.7506, "step": 5484 }, { "epoch": 0.22862740194239506, "grad_norm": 326.0, "learning_rate": 9.000673524059525e-05, "loss": 14.3126, "step": 5485 }, { "epoch": 0.2286690842399233, "grad_norm": 332.0, "learning_rate": 9.000268607880757e-05, "loss": 11.8757, "step": 5486 }, { "epoch": 0.22871076653745154, "grad_norm": 280.0, "learning_rate": 8.99986361879674e-05, "loss": 13.0668, "step": 5487 }, { "epoch": 0.22875244883497978, "grad_norm": 272.0, "learning_rate": 8.999458556814853e-05, "loss": 12.7508, "step": 5488 }, { "epoch": 0.22879413113250802, "grad_norm": 1200.0, "learning_rate": 8.999053421942478e-05, "loss": 27.0032, "step": 5489 }, { "epoch": 0.22883581343003626, "grad_norm": 101.5, "learning_rate": 8.998648214187e-05, "loss": 6.7502, "step": 5490 }, { "epoch": 0.2288774957275645, "grad_norm": 302.0, "learning_rate": 8.998242933555802e-05, "loss": 12.6256, "step": 5491 }, { "epoch": 0.22891917802509273, "grad_norm": 217.0, "learning_rate": 8.997837580056275e-05, "loss": 11.2503, "step": 5492 }, { "epoch": 0.22896086032262097, "grad_norm": 360.0, "learning_rate": 8.997432153695799e-05, "loss": 15.1877, "step": 5493 }, { "epoch": 0.2290025426201492, "grad_norm": 696.0, "learning_rate": 8.99702665448177e-05, "loss": 20.1274, "step": 5494 }, { "epoch": 0.22904422491767745, "grad_norm": 270.0, "learning_rate": 8.996621082421575e-05, "loss": 13.0629, "step": 5495 }, { "epoch": 0.2290859072152057, "grad_norm": 560.0, "learning_rate": 8.996215437522607e-05, "loss": 15.6254, "step": 5496 }, { "epoch": 0.22912758951273393, "grad_norm": 51.25, "learning_rate": 8.995809719792254e-05, "loss": 7.9696, "step": 5497 }, { "epoch": 0.22916927181026217, "grad_norm": 148.0, "learning_rate": 8.995403929237918e-05, "loss": 11.1257, "step": 5498 }, { "epoch": 0.2292109541077904, "grad_norm": 908.0, "learning_rate": 8.994998065866989e-05, "loss": 24.0041, "step": 5499 }, { "epoch": 0.22925263640531865, "grad_norm": 1408.0, "learning_rate": 8.994592129686865e-05, "loss": 29.3797, "step": 5500 }, { "epoch": 0.2292943187028469, "grad_norm": 1016.0, "learning_rate": 8.994186120704947e-05, "loss": 24.6257, "step": 5501 }, { "epoch": 0.22933600100037513, "grad_norm": 310.0, "learning_rate": 8.993780038928629e-05, "loss": 13.5631, "step": 5502 }, { "epoch": 0.22937768329790337, "grad_norm": 384.0, "learning_rate": 8.993373884365319e-05, "loss": 15.563, "step": 5503 }, { "epoch": 0.2294193655954316, "grad_norm": 636.0, "learning_rate": 8.992967657022413e-05, "loss": 18.7515, "step": 5504 }, { "epoch": 0.22946104789295985, "grad_norm": 346.0, "learning_rate": 8.992561356907318e-05, "loss": 14.3752, "step": 5505 }, { "epoch": 0.2295027301904881, "grad_norm": 232.0, "learning_rate": 8.992154984027438e-05, "loss": 12.1886, "step": 5506 }, { "epoch": 0.22954441248801635, "grad_norm": 77.5, "learning_rate": 8.991748538390179e-05, "loss": 8.4388, "step": 5507 }, { "epoch": 0.2295860947855446, "grad_norm": 596.0, "learning_rate": 8.991342020002948e-05, "loss": 18.8753, "step": 5508 }, { "epoch": 0.22962777708307283, "grad_norm": 432.0, "learning_rate": 8.990935428873154e-05, "loss": 16.0004, "step": 5509 }, { "epoch": 0.22966945938060107, "grad_norm": 150.0, "learning_rate": 8.990528765008209e-05, "loss": 9.5003, "step": 5510 }, { "epoch": 0.2297111416781293, "grad_norm": 352.0, "learning_rate": 8.990122028415521e-05, "loss": 14.0626, "step": 5511 }, { "epoch": 0.22975282397565755, "grad_norm": 524.0, "learning_rate": 8.989715219102505e-05, "loss": 15.0002, "step": 5512 }, { "epoch": 0.2297945062731858, "grad_norm": 242.0, "learning_rate": 8.989308337076576e-05, "loss": 12.0014, "step": 5513 }, { "epoch": 0.22983618857071403, "grad_norm": 528.0, "learning_rate": 8.988901382345149e-05, "loss": 18.3757, "step": 5514 }, { "epoch": 0.22987787086824227, "grad_norm": 474.0, "learning_rate": 8.988494354915639e-05, "loss": 16.6252, "step": 5515 }, { "epoch": 0.2299195531657705, "grad_norm": 172.0, "learning_rate": 8.988087254795465e-05, "loss": 11.3756, "step": 5516 }, { "epoch": 0.22996123546329875, "grad_norm": 146.0, "learning_rate": 8.987680081992049e-05, "loss": 6.4386, "step": 5517 }, { "epoch": 0.23000291776082699, "grad_norm": 976.0, "learning_rate": 8.987272836512808e-05, "loss": 23.1305, "step": 5518 }, { "epoch": 0.23004460005835523, "grad_norm": 616.0, "learning_rate": 8.986865518365165e-05, "loss": 19.1254, "step": 5519 }, { "epoch": 0.23008628235588346, "grad_norm": 70.0, "learning_rate": 8.986458127556545e-05, "loss": 8.688, "step": 5520 }, { "epoch": 0.2301279646534117, "grad_norm": 384.0, "learning_rate": 8.986050664094373e-05, "loss": 14.876, "step": 5521 }, { "epoch": 0.23016964695093994, "grad_norm": 544.0, "learning_rate": 8.98564312798607e-05, "loss": 17.2504, "step": 5522 }, { "epoch": 0.23021132924846818, "grad_norm": 416.0, "learning_rate": 8.98523551923907e-05, "loss": 15.3761, "step": 5523 }, { "epoch": 0.23025301154599642, "grad_norm": 1096.0, "learning_rate": 8.984827837860799e-05, "loss": 22.6301, "step": 5524 }, { "epoch": 0.23029469384352466, "grad_norm": 342.0, "learning_rate": 8.984420083858684e-05, "loss": 14.6254, "step": 5525 }, { "epoch": 0.2303363761410529, "grad_norm": 133.0, "learning_rate": 8.984012257240162e-05, "loss": 9.2515, "step": 5526 }, { "epoch": 0.23037805843858114, "grad_norm": 960.0, "learning_rate": 8.983604358012663e-05, "loss": 24.5009, "step": 5527 }, { "epoch": 0.23041974073610938, "grad_norm": 736.0, "learning_rate": 8.983196386183621e-05, "loss": 19.3758, "step": 5528 }, { "epoch": 0.23046142303363762, "grad_norm": 496.0, "learning_rate": 8.98278834176047e-05, "loss": 15.6881, "step": 5529 }, { "epoch": 0.23050310533116586, "grad_norm": 892.0, "learning_rate": 8.982380224750649e-05, "loss": 23.5013, "step": 5530 }, { "epoch": 0.2305447876286941, "grad_norm": 182.0, "learning_rate": 8.981972035161594e-05, "loss": 10.3128, "step": 5531 }, { "epoch": 0.23058646992622234, "grad_norm": 330.0, "learning_rate": 8.981563773000745e-05, "loss": 11.5005, "step": 5532 }, { "epoch": 0.23062815222375058, "grad_norm": 450.0, "learning_rate": 8.981155438275544e-05, "loss": 16.6252, "step": 5533 }, { "epoch": 0.23066983452127882, "grad_norm": 154.0, "learning_rate": 8.980747030993431e-05, "loss": 9.3177, "step": 5534 }, { "epoch": 0.23071151681880706, "grad_norm": 384.0, "learning_rate": 8.980338551161849e-05, "loss": 16.1252, "step": 5535 }, { "epoch": 0.2307531991163353, "grad_norm": 222.0, "learning_rate": 8.979929998788245e-05, "loss": 7.6252, "step": 5536 }, { "epoch": 0.23079488141386353, "grad_norm": 588.0, "learning_rate": 8.979521373880061e-05, "loss": 18.251, "step": 5537 }, { "epoch": 0.23083656371139177, "grad_norm": 452.0, "learning_rate": 8.97911267644475e-05, "loss": 15.7515, "step": 5538 }, { "epoch": 0.23087824600892, "grad_norm": 237.0, "learning_rate": 8.978703906489756e-05, "loss": 11.563, "step": 5539 }, { "epoch": 0.23091992830644825, "grad_norm": 448.0, "learning_rate": 8.97829506402253e-05, "loss": 16.6257, "step": 5540 }, { "epoch": 0.2309616106039765, "grad_norm": 124.5, "learning_rate": 8.977886149050523e-05, "loss": 9.6252, "step": 5541 }, { "epoch": 0.23100329290150473, "grad_norm": 228.0, "learning_rate": 8.977477161581189e-05, "loss": 12.3138, "step": 5542 }, { "epoch": 0.23104497519903297, "grad_norm": 350.0, "learning_rate": 8.977068101621979e-05, "loss": 13.6876, "step": 5543 }, { "epoch": 0.2310866574965612, "grad_norm": 580.0, "learning_rate": 8.976658969180352e-05, "loss": 17.2504, "step": 5544 }, { "epoch": 0.23112833979408945, "grad_norm": 390.0, "learning_rate": 8.97624976426376e-05, "loss": 15.252, "step": 5545 }, { "epoch": 0.2311700220916177, "grad_norm": 264.0, "learning_rate": 8.975840486879663e-05, "loss": 13.3756, "step": 5546 }, { "epoch": 0.23121170438914593, "grad_norm": 498.0, "learning_rate": 8.975431137035522e-05, "loss": 15.9376, "step": 5547 }, { "epoch": 0.23125338668667417, "grad_norm": 398.0, "learning_rate": 8.975021714738793e-05, "loss": 17.2508, "step": 5548 }, { "epoch": 0.2312950689842024, "grad_norm": 185.0, "learning_rate": 8.974612219996943e-05, "loss": 12.0003, "step": 5549 }, { "epoch": 0.23133675128173065, "grad_norm": 180.0, "learning_rate": 8.974202652817432e-05, "loss": 9.751, "step": 5550 }, { "epoch": 0.23137843357925889, "grad_norm": 344.0, "learning_rate": 8.973793013207725e-05, "loss": 15.0003, "step": 5551 }, { "epoch": 0.23142011587678712, "grad_norm": 202.0, "learning_rate": 8.973383301175287e-05, "loss": 10.8752, "step": 5552 }, { "epoch": 0.23146179817431536, "grad_norm": 100.0, "learning_rate": 8.972973516727585e-05, "loss": 8.5006, "step": 5553 }, { "epoch": 0.2315034804718436, "grad_norm": 336.0, "learning_rate": 8.972563659872088e-05, "loss": 13.8754, "step": 5554 }, { "epoch": 0.23154516276937184, "grad_norm": 350.0, "learning_rate": 8.972153730616266e-05, "loss": 14.6256, "step": 5555 }, { "epoch": 0.23158684506690008, "grad_norm": 240.0, "learning_rate": 8.97174372896759e-05, "loss": 12.7503, "step": 5556 }, { "epoch": 0.23162852736442832, "grad_norm": 296.0, "learning_rate": 8.971333654933532e-05, "loss": 13.1252, "step": 5557 }, { "epoch": 0.23167020966195656, "grad_norm": 264.0, "learning_rate": 8.970923508521565e-05, "loss": 11.6255, "step": 5558 }, { "epoch": 0.2317118919594848, "grad_norm": 235.0, "learning_rate": 8.970513289739165e-05, "loss": 6.4075, "step": 5559 }, { "epoch": 0.23175357425701304, "grad_norm": 237.0, "learning_rate": 8.970102998593808e-05, "loss": 10.6881, "step": 5560 }, { "epoch": 0.23179525655454128, "grad_norm": 138.0, "learning_rate": 8.96969263509297e-05, "loss": 9.1253, "step": 5561 }, { "epoch": 0.23183693885206952, "grad_norm": 71.0, "learning_rate": 8.969282199244134e-05, "loss": 6.8768, "step": 5562 }, { "epoch": 0.23187862114959776, "grad_norm": 800.0, "learning_rate": 8.968871691054776e-05, "loss": 20.0067, "step": 5563 }, { "epoch": 0.231920303447126, "grad_norm": 219.0, "learning_rate": 8.968461110532378e-05, "loss": 11.7504, "step": 5564 }, { "epoch": 0.23196198574465424, "grad_norm": 243.0, "learning_rate": 8.968050457684425e-05, "loss": 11.5626, "step": 5565 }, { "epoch": 0.23200366804218248, "grad_norm": 572.0, "learning_rate": 8.9676397325184e-05, "loss": 16.7503, "step": 5566 }, { "epoch": 0.23204535033971072, "grad_norm": 272.0, "learning_rate": 8.96722893504179e-05, "loss": 13.7505, "step": 5567 }, { "epoch": 0.23208703263723895, "grad_norm": 53.5, "learning_rate": 8.966818065262079e-05, "loss": 7.0317, "step": 5568 }, { "epoch": 0.2321287149347672, "grad_norm": 346.0, "learning_rate": 8.966407123186757e-05, "loss": 13.2508, "step": 5569 }, { "epoch": 0.23217039723229543, "grad_norm": 48.5, "learning_rate": 8.965996108823313e-05, "loss": 5.9067, "step": 5570 }, { "epoch": 0.23221207952982367, "grad_norm": 58.75, "learning_rate": 8.965585022179238e-05, "loss": 7.594, "step": 5571 }, { "epoch": 0.2322537618273519, "grad_norm": 105.5, "learning_rate": 8.965173863262024e-05, "loss": 8.1877, "step": 5572 }, { "epoch": 0.23229544412488015, "grad_norm": 204.0, "learning_rate": 8.964762632079165e-05, "loss": 10.9378, "step": 5573 }, { "epoch": 0.2323371264224084, "grad_norm": 438.0, "learning_rate": 8.964351328638153e-05, "loss": 14.5628, "step": 5574 }, { "epoch": 0.23237880871993663, "grad_norm": 732.0, "learning_rate": 8.963939952946488e-05, "loss": 20.6261, "step": 5575 }, { "epoch": 0.23242049101746487, "grad_norm": 358.0, "learning_rate": 8.963528505011664e-05, "loss": 14.3754, "step": 5576 }, { "epoch": 0.2324621733149931, "grad_norm": 1032.0, "learning_rate": 8.963116984841182e-05, "loss": 21.0003, "step": 5577 }, { "epoch": 0.23250385561252135, "grad_norm": 183.0, "learning_rate": 8.96270539244254e-05, "loss": 11.6257, "step": 5578 }, { "epoch": 0.23254553791004962, "grad_norm": 282.0, "learning_rate": 8.962293727823243e-05, "loss": 14.6268, "step": 5579 }, { "epoch": 0.23258722020757785, "grad_norm": 125.0, "learning_rate": 8.96188199099079e-05, "loss": 8.7515, "step": 5580 }, { "epoch": 0.2326289025051061, "grad_norm": 88.0, "learning_rate": 8.961470181952685e-05, "loss": 6.9381, "step": 5581 }, { "epoch": 0.23267058480263433, "grad_norm": 368.0, "learning_rate": 8.961058300716435e-05, "loss": 14.1253, "step": 5582 }, { "epoch": 0.23271226710016257, "grad_norm": 528.0, "learning_rate": 8.960646347289545e-05, "loss": 14.3755, "step": 5583 }, { "epoch": 0.2327539493976908, "grad_norm": 255.0, "learning_rate": 8.960234321679526e-05, "loss": 12.0633, "step": 5584 }, { "epoch": 0.23279563169521905, "grad_norm": 528.0, "learning_rate": 8.959822223893882e-05, "loss": 18.0004, "step": 5585 }, { "epoch": 0.2328373139927473, "grad_norm": 247.0, "learning_rate": 8.959410053940128e-05, "loss": 12.1877, "step": 5586 }, { "epoch": 0.23287899629027553, "grad_norm": 584.0, "learning_rate": 8.958997811825775e-05, "loss": 18.3752, "step": 5587 }, { "epoch": 0.23292067858780377, "grad_norm": 304.0, "learning_rate": 8.958585497558334e-05, "loss": 12.4376, "step": 5588 }, { "epoch": 0.232962360885332, "grad_norm": 171.0, "learning_rate": 8.958173111145322e-05, "loss": 8.6298, "step": 5589 }, { "epoch": 0.23300404318286025, "grad_norm": 472.0, "learning_rate": 8.957760652594252e-05, "loss": 15.938, "step": 5590 }, { "epoch": 0.2330457254803885, "grad_norm": 480.0, "learning_rate": 8.957348121912645e-05, "loss": 15.938, "step": 5591 }, { "epoch": 0.23308740777791673, "grad_norm": 792.0, "learning_rate": 8.956935519108016e-05, "loss": 19.8779, "step": 5592 }, { "epoch": 0.23312909007544497, "grad_norm": 218.0, "learning_rate": 8.956522844187884e-05, "loss": 11.5003, "step": 5593 }, { "epoch": 0.2331707723729732, "grad_norm": 63.75, "learning_rate": 8.956110097159776e-05, "loss": 7.6266, "step": 5594 }, { "epoch": 0.23321245467050145, "grad_norm": 664.0, "learning_rate": 8.955697278031208e-05, "loss": 21.0012, "step": 5595 }, { "epoch": 0.23325413696802968, "grad_norm": 256.0, "learning_rate": 8.955284386809706e-05, "loss": 12.8752, "step": 5596 }, { "epoch": 0.23329581926555792, "grad_norm": 640.0, "learning_rate": 8.954871423502795e-05, "loss": 21.1252, "step": 5597 }, { "epoch": 0.23333750156308616, "grad_norm": 209.0, "learning_rate": 8.954458388118001e-05, "loss": 12.3134, "step": 5598 }, { "epoch": 0.2333791838606144, "grad_norm": 96.0, "learning_rate": 8.954045280662851e-05, "loss": 7.2818, "step": 5599 }, { "epoch": 0.23342086615814264, "grad_norm": 131.0, "learning_rate": 8.953632101144876e-05, "loss": 11.001, "step": 5600 }, { "epoch": 0.23346254845567088, "grad_norm": 470.0, "learning_rate": 8.953218849571605e-05, "loss": 17.1257, "step": 5601 }, { "epoch": 0.23350423075319912, "grad_norm": 1048.0, "learning_rate": 8.95280552595057e-05, "loss": 28.3778, "step": 5602 }, { "epoch": 0.23354591305072736, "grad_norm": 240.0, "learning_rate": 8.952392130289301e-05, "loss": 12.0635, "step": 5603 }, { "epoch": 0.2335875953482556, "grad_norm": 600.0, "learning_rate": 8.951978662595338e-05, "loss": 19.7504, "step": 5604 }, { "epoch": 0.23362927764578384, "grad_norm": 326.0, "learning_rate": 8.95156512287621e-05, "loss": 12.7503, "step": 5605 }, { "epoch": 0.23367095994331208, "grad_norm": 90.5, "learning_rate": 8.95115151113946e-05, "loss": 8.5627, "step": 5606 }, { "epoch": 0.23371264224084032, "grad_norm": 460.0, "learning_rate": 8.950737827392622e-05, "loss": 17.2503, "step": 5607 }, { "epoch": 0.23375432453836856, "grad_norm": 260.0, "learning_rate": 8.950324071643234e-05, "loss": 11.5627, "step": 5608 }, { "epoch": 0.2337960068358968, "grad_norm": 252.0, "learning_rate": 8.949910243898841e-05, "loss": 11.8759, "step": 5609 }, { "epoch": 0.23383768913342504, "grad_norm": 245.0, "learning_rate": 8.949496344166983e-05, "loss": 12.8128, "step": 5610 }, { "epoch": 0.23387937143095328, "grad_norm": 1376.0, "learning_rate": 8.949082372455201e-05, "loss": 33.2504, "step": 5611 }, { "epoch": 0.23392105372848152, "grad_norm": 320.0, "learning_rate": 8.948668328771046e-05, "loss": 14.1259, "step": 5612 }, { "epoch": 0.23396273602600975, "grad_norm": 924.0, "learning_rate": 8.948254213122058e-05, "loss": 25.5002, "step": 5613 }, { "epoch": 0.234004418323538, "grad_norm": 91.0, "learning_rate": 8.947840025515787e-05, "loss": 7.5637, "step": 5614 }, { "epoch": 0.23404610062106623, "grad_norm": 256.0, "learning_rate": 8.947425765959783e-05, "loss": 11.9377, "step": 5615 }, { "epoch": 0.23408778291859447, "grad_norm": 556.0, "learning_rate": 8.947011434461592e-05, "loss": 18.0004, "step": 5616 }, { "epoch": 0.2341294652161227, "grad_norm": 211.0, "learning_rate": 8.946597031028767e-05, "loss": 8.5627, "step": 5617 }, { "epoch": 0.23417114751365095, "grad_norm": 344.0, "learning_rate": 8.94618255566886e-05, "loss": 14.0626, "step": 5618 }, { "epoch": 0.2342128298111792, "grad_norm": 384.0, "learning_rate": 8.945768008389428e-05, "loss": 13.8144, "step": 5619 }, { "epoch": 0.23425451210870743, "grad_norm": 132.0, "learning_rate": 8.945353389198023e-05, "loss": 9.2504, "step": 5620 }, { "epoch": 0.23429619440623567, "grad_norm": 205.0, "learning_rate": 8.9449386981022e-05, "loss": 11.0626, "step": 5621 }, { "epoch": 0.2343378767037639, "grad_norm": 166.0, "learning_rate": 8.944523935109523e-05, "loss": 10.7502, "step": 5622 }, { "epoch": 0.23437955900129215, "grad_norm": 153.0, "learning_rate": 8.944109100227544e-05, "loss": 10.2502, "step": 5623 }, { "epoch": 0.2344212412988204, "grad_norm": 668.0, "learning_rate": 8.943694193463827e-05, "loss": 20.7503, "step": 5624 }, { "epoch": 0.23446292359634863, "grad_norm": 179.0, "learning_rate": 8.943279214825935e-05, "loss": 9.688, "step": 5625 }, { "epoch": 0.23450460589387687, "grad_norm": 272.0, "learning_rate": 8.942864164321427e-05, "loss": 8.7504, "step": 5626 }, { "epoch": 0.2345462881914051, "grad_norm": 131.0, "learning_rate": 8.94244904195787e-05, "loss": 9.6279, "step": 5627 }, { "epoch": 0.23458797048893335, "grad_norm": 258.0, "learning_rate": 8.94203384774283e-05, "loss": 13.0004, "step": 5628 }, { "epoch": 0.23462965278646158, "grad_norm": 692.0, "learning_rate": 8.941618581683872e-05, "loss": 21.3754, "step": 5629 }, { "epoch": 0.23467133508398982, "grad_norm": 416.0, "learning_rate": 8.941203243788567e-05, "loss": 15.1254, "step": 5630 }, { "epoch": 0.23471301738151806, "grad_norm": 376.0, "learning_rate": 8.940787834064484e-05, "loss": 14.5006, "step": 5631 }, { "epoch": 0.2347546996790463, "grad_norm": 520.0, "learning_rate": 8.94037235251919e-05, "loss": 19.3755, "step": 5632 }, { "epoch": 0.23479638197657454, "grad_norm": 243.0, "learning_rate": 8.939956799160262e-05, "loss": 11.1265, "step": 5633 }, { "epoch": 0.23483806427410278, "grad_norm": 560.0, "learning_rate": 8.939541173995271e-05, "loss": 17.0002, "step": 5634 }, { "epoch": 0.23487974657163102, "grad_norm": 452.0, "learning_rate": 8.939125477031792e-05, "loss": 17.3754, "step": 5635 }, { "epoch": 0.23492142886915926, "grad_norm": 101.0, "learning_rate": 8.938709708277402e-05, "loss": 8.2504, "step": 5636 }, { "epoch": 0.2349631111666875, "grad_norm": 147.0, "learning_rate": 8.938293867739678e-05, "loss": 10.1913, "step": 5637 }, { "epoch": 0.23500479346421574, "grad_norm": 358.0, "learning_rate": 8.937877955426199e-05, "loss": 15.4377, "step": 5638 }, { "epoch": 0.23504647576174398, "grad_norm": 112.0, "learning_rate": 8.937461971344542e-05, "loss": 6.4066, "step": 5639 }, { "epoch": 0.23508815805927222, "grad_norm": 604.0, "learning_rate": 8.937045915502294e-05, "loss": 17.7503, "step": 5640 }, { "epoch": 0.23512984035680046, "grad_norm": 224.0, "learning_rate": 8.936629787907034e-05, "loss": 10.8753, "step": 5641 }, { "epoch": 0.2351715226543287, "grad_norm": 328.0, "learning_rate": 8.936213588566347e-05, "loss": 14.1259, "step": 5642 }, { "epoch": 0.23521320495185694, "grad_norm": 123.5, "learning_rate": 8.935797317487816e-05, "loss": 10.3754, "step": 5643 }, { "epoch": 0.23525488724938518, "grad_norm": 298.0, "learning_rate": 8.93538097467903e-05, "loss": 12.8753, "step": 5644 }, { "epoch": 0.23529656954691341, "grad_norm": 396.0, "learning_rate": 8.934964560147579e-05, "loss": 14.688, "step": 5645 }, { "epoch": 0.23533825184444165, "grad_norm": 856.0, "learning_rate": 8.934548073901048e-05, "loss": 23.1258, "step": 5646 }, { "epoch": 0.2353799341419699, "grad_norm": 532.0, "learning_rate": 8.934131515947028e-05, "loss": 17.753, "step": 5647 }, { "epoch": 0.23542161643949813, "grad_norm": 584.0, "learning_rate": 8.933714886293114e-05, "loss": 17.6255, "step": 5648 }, { "epoch": 0.23546329873702637, "grad_norm": 156.0, "learning_rate": 8.933298184946895e-05, "loss": 8.9387, "step": 5649 }, { "epoch": 0.2355049810345546, "grad_norm": 700.0, "learning_rate": 8.932881411915968e-05, "loss": 17.3803, "step": 5650 }, { "epoch": 0.23554666333208285, "grad_norm": 744.0, "learning_rate": 8.932464567207928e-05, "loss": 21.3788, "step": 5651 }, { "epoch": 0.23558834562961112, "grad_norm": 59.0, "learning_rate": 8.932047650830373e-05, "loss": 8.188, "step": 5652 }, { "epoch": 0.23563002792713936, "grad_norm": 306.0, "learning_rate": 8.9316306627909e-05, "loss": 12.9393, "step": 5653 }, { "epoch": 0.2356717102246676, "grad_norm": 520.0, "learning_rate": 8.931213603097109e-05, "loss": 18.2503, "step": 5654 }, { "epoch": 0.23571339252219584, "grad_norm": 205.0, "learning_rate": 8.930796471756602e-05, "loss": 12.0009, "step": 5655 }, { "epoch": 0.23575507481972408, "grad_norm": 370.0, "learning_rate": 8.930379268776979e-05, "loss": 14.8135, "step": 5656 }, { "epoch": 0.23579675711725231, "grad_norm": 712.0, "learning_rate": 8.929961994165845e-05, "loss": 22.5003, "step": 5657 }, { "epoch": 0.23583843941478055, "grad_norm": 266.0, "learning_rate": 8.929544647930805e-05, "loss": 12.5629, "step": 5658 }, { "epoch": 0.2358801217123088, "grad_norm": 460.0, "learning_rate": 8.929127230079466e-05, "loss": 16.7506, "step": 5659 }, { "epoch": 0.23592180400983703, "grad_norm": 304.0, "learning_rate": 8.928709740619434e-05, "loss": 12.2502, "step": 5660 }, { "epoch": 0.23596348630736527, "grad_norm": 278.0, "learning_rate": 8.928292179558317e-05, "loss": 11.4378, "step": 5661 }, { "epoch": 0.2360051686048935, "grad_norm": 290.0, "learning_rate": 8.927874546903727e-05, "loss": 13.8756, "step": 5662 }, { "epoch": 0.23604685090242175, "grad_norm": 238.0, "learning_rate": 8.927456842663275e-05, "loss": 12.6879, "step": 5663 }, { "epoch": 0.23608853319995, "grad_norm": 1288.0, "learning_rate": 8.927039066844573e-05, "loss": 29.1293, "step": 5664 }, { "epoch": 0.23613021549747823, "grad_norm": 200.0, "learning_rate": 8.926621219455237e-05, "loss": 11.6253, "step": 5665 }, { "epoch": 0.23617189779500647, "grad_norm": 428.0, "learning_rate": 8.926203300502879e-05, "loss": 15.8127, "step": 5666 }, { "epoch": 0.2362135800925347, "grad_norm": 348.0, "learning_rate": 8.925785309995118e-05, "loss": 13.8126, "step": 5667 }, { "epoch": 0.23625526239006295, "grad_norm": 524.0, "learning_rate": 8.925367247939572e-05, "loss": 15.2503, "step": 5668 }, { "epoch": 0.2362969446875912, "grad_norm": 500.0, "learning_rate": 8.924949114343857e-05, "loss": 17.2506, "step": 5669 }, { "epoch": 0.23633862698511943, "grad_norm": 552.0, "learning_rate": 8.924530909215597e-05, "loss": 15.8171, "step": 5670 }, { "epoch": 0.23638030928264767, "grad_norm": 141.0, "learning_rate": 8.924112632562414e-05, "loss": 10.8754, "step": 5671 }, { "epoch": 0.2364219915801759, "grad_norm": 1360.0, "learning_rate": 8.923694284391928e-05, "loss": 33.2516, "step": 5672 }, { "epoch": 0.23646367387770414, "grad_norm": 294.0, "learning_rate": 8.923275864711766e-05, "loss": 13.876, "step": 5673 }, { "epoch": 0.23650535617523238, "grad_norm": 412.0, "learning_rate": 8.922857373529554e-05, "loss": 16.1255, "step": 5674 }, { "epoch": 0.23654703847276062, "grad_norm": 59.5, "learning_rate": 8.922438810852917e-05, "loss": 7.5635, "step": 5675 }, { "epoch": 0.23658872077028886, "grad_norm": 644.0, "learning_rate": 8.922020176689485e-05, "loss": 18.631, "step": 5676 }, { "epoch": 0.2366304030678171, "grad_norm": 700.0, "learning_rate": 8.921601471046888e-05, "loss": 21.2504, "step": 5677 }, { "epoch": 0.23667208536534534, "grad_norm": 360.0, "learning_rate": 8.921182693932754e-05, "loss": 14.188, "step": 5678 }, { "epoch": 0.23671376766287358, "grad_norm": 212.0, "learning_rate": 8.920763845354721e-05, "loss": 8.5655, "step": 5679 }, { "epoch": 0.23675544996040182, "grad_norm": 544.0, "learning_rate": 8.920344925320416e-05, "loss": 17.8759, "step": 5680 }, { "epoch": 0.23679713225793006, "grad_norm": 398.0, "learning_rate": 8.919925933837476e-05, "loss": 12.6878, "step": 5681 }, { "epoch": 0.2368388145554583, "grad_norm": 330.0, "learning_rate": 8.919506870913539e-05, "loss": 14.3751, "step": 5682 }, { "epoch": 0.23688049685298654, "grad_norm": 218.0, "learning_rate": 8.919087736556242e-05, "loss": 11.1879, "step": 5683 }, { "epoch": 0.23692217915051478, "grad_norm": 486.0, "learning_rate": 8.918668530773222e-05, "loss": 17.6261, "step": 5684 }, { "epoch": 0.23696386144804302, "grad_norm": 306.0, "learning_rate": 8.918249253572121e-05, "loss": 12.8126, "step": 5685 }, { "epoch": 0.23700554374557126, "grad_norm": 225.0, "learning_rate": 8.917829904960579e-05, "loss": 11.0031, "step": 5686 }, { "epoch": 0.2370472260430995, "grad_norm": 552.0, "learning_rate": 8.917410484946237e-05, "loss": 16.876, "step": 5687 }, { "epoch": 0.23708890834062774, "grad_norm": 520.0, "learning_rate": 8.916990993536745e-05, "loss": 18.8752, "step": 5688 }, { "epoch": 0.23713059063815597, "grad_norm": 358.0, "learning_rate": 8.916571430739743e-05, "loss": 15.1262, "step": 5689 }, { "epoch": 0.23717227293568421, "grad_norm": 466.0, "learning_rate": 8.91615179656288e-05, "loss": 16.0004, "step": 5690 }, { "epoch": 0.23721395523321245, "grad_norm": 1088.0, "learning_rate": 8.9157320910138e-05, "loss": 34.0003, "step": 5691 }, { "epoch": 0.2372556375307407, "grad_norm": 270.0, "learning_rate": 8.915312314100156e-05, "loss": 12.7543, "step": 5692 }, { "epoch": 0.23729731982826893, "grad_norm": 400.0, "learning_rate": 8.9148924658296e-05, "loss": 16.7502, "step": 5693 }, { "epoch": 0.23733900212579717, "grad_norm": 416.0, "learning_rate": 8.914472546209778e-05, "loss": 16.7504, "step": 5694 }, { "epoch": 0.2373806844233254, "grad_norm": 400.0, "learning_rate": 8.91405255524835e-05, "loss": 15.0002, "step": 5695 }, { "epoch": 0.23742236672085365, "grad_norm": 366.0, "learning_rate": 8.913632492952963e-05, "loss": 15.0627, "step": 5696 }, { "epoch": 0.2374640490183819, "grad_norm": 175.0, "learning_rate": 8.913212359331278e-05, "loss": 11.4379, "step": 5697 }, { "epoch": 0.23750573131591013, "grad_norm": 139.0, "learning_rate": 8.91279215439095e-05, "loss": 10.813, "step": 5698 }, { "epoch": 0.23754741361343837, "grad_norm": 466.0, "learning_rate": 8.912371878139638e-05, "loss": 16.3752, "step": 5699 }, { "epoch": 0.2375890959109666, "grad_norm": 282.0, "learning_rate": 8.911951530585e-05, "loss": 12.6251, "step": 5700 }, { "epoch": 0.23763077820849485, "grad_norm": 227.0, "learning_rate": 8.911531111734702e-05, "loss": 7.2817, "step": 5701 }, { "epoch": 0.2376724605060231, "grad_norm": 744.0, "learning_rate": 8.9111106215964e-05, "loss": 22.6264, "step": 5702 }, { "epoch": 0.23771414280355133, "grad_norm": 496.0, "learning_rate": 8.910690060177757e-05, "loss": 18.2502, "step": 5703 }, { "epoch": 0.23775582510107957, "grad_norm": 1208.0, "learning_rate": 8.910269427486443e-05, "loss": 25.5045, "step": 5704 }, { "epoch": 0.2377975073986078, "grad_norm": 242.0, "learning_rate": 8.909848723530122e-05, "loss": 13.3754, "step": 5705 }, { "epoch": 0.23783918969613604, "grad_norm": 330.0, "learning_rate": 8.90942794831646e-05, "loss": 13.8127, "step": 5706 }, { "epoch": 0.23788087199366428, "grad_norm": 141.0, "learning_rate": 8.909007101853127e-05, "loss": 9.6878, "step": 5707 }, { "epoch": 0.23792255429119252, "grad_norm": 266.0, "learning_rate": 8.908586184147791e-05, "loss": 12.8133, "step": 5708 }, { "epoch": 0.23796423658872076, "grad_norm": 464.0, "learning_rate": 8.908165195208127e-05, "loss": 17.3756, "step": 5709 }, { "epoch": 0.238005918886249, "grad_norm": 255.0, "learning_rate": 8.907744135041805e-05, "loss": 12.7506, "step": 5710 }, { "epoch": 0.23804760118377724, "grad_norm": 356.0, "learning_rate": 8.907323003656498e-05, "loss": 12.1275, "step": 5711 }, { "epoch": 0.23808928348130548, "grad_norm": 178.0, "learning_rate": 8.906901801059884e-05, "loss": 10.3133, "step": 5712 }, { "epoch": 0.23813096577883372, "grad_norm": 278.0, "learning_rate": 8.906480527259638e-05, "loss": 13.0639, "step": 5713 }, { "epoch": 0.23817264807636196, "grad_norm": 736.0, "learning_rate": 8.906059182263435e-05, "loss": 17.6298, "step": 5714 }, { "epoch": 0.2382143303738902, "grad_norm": 408.0, "learning_rate": 8.905637766078959e-05, "loss": 15.6252, "step": 5715 }, { "epoch": 0.23825601267141844, "grad_norm": 300.0, "learning_rate": 8.905216278713887e-05, "loss": 12.8751, "step": 5716 }, { "epoch": 0.23829769496894668, "grad_norm": 284.0, "learning_rate": 8.904794720175902e-05, "loss": 14.2503, "step": 5717 }, { "epoch": 0.23833937726647492, "grad_norm": 540.0, "learning_rate": 8.904373090472686e-05, "loss": 17.3757, "step": 5718 }, { "epoch": 0.23838105956400316, "grad_norm": 444.0, "learning_rate": 8.903951389611925e-05, "loss": 14.0043, "step": 5719 }, { "epoch": 0.2384227418615314, "grad_norm": 118.0, "learning_rate": 8.903529617601303e-05, "loss": 7.2503, "step": 5720 }, { "epoch": 0.23846442415905963, "grad_norm": 516.0, "learning_rate": 8.903107774448507e-05, "loss": 16.501, "step": 5721 }, { "epoch": 0.23850610645658787, "grad_norm": 220.0, "learning_rate": 8.902685860161224e-05, "loss": 10.8127, "step": 5722 }, { "epoch": 0.2385477887541161, "grad_norm": 632.0, "learning_rate": 8.902263874747146e-05, "loss": 20.6257, "step": 5723 }, { "epoch": 0.23858947105164435, "grad_norm": 462.0, "learning_rate": 8.901841818213963e-05, "loss": 16.5009, "step": 5724 }, { "epoch": 0.23863115334917262, "grad_norm": 186.0, "learning_rate": 8.901419690569365e-05, "loss": 11.3129, "step": 5725 }, { "epoch": 0.23867283564670086, "grad_norm": 318.0, "learning_rate": 8.900997491821048e-05, "loss": 14.1878, "step": 5726 }, { "epoch": 0.2387145179442291, "grad_norm": 326.0, "learning_rate": 8.900575221976706e-05, "loss": 14.8757, "step": 5727 }, { "epoch": 0.23875620024175734, "grad_norm": 258.0, "learning_rate": 8.900152881044033e-05, "loss": 11.3753, "step": 5728 }, { "epoch": 0.23879788253928558, "grad_norm": 520.0, "learning_rate": 8.899730469030729e-05, "loss": 17.6252, "step": 5729 }, { "epoch": 0.23883956483681382, "grad_norm": 276.0, "learning_rate": 8.89930798594449e-05, "loss": 13.2504, "step": 5730 }, { "epoch": 0.23888124713434206, "grad_norm": 175.0, "learning_rate": 8.898885431793016e-05, "loss": 11.9382, "step": 5731 }, { "epoch": 0.2389229294318703, "grad_norm": 97.0, "learning_rate": 8.898462806584009e-05, "loss": 8.8757, "step": 5732 }, { "epoch": 0.23896461172939854, "grad_norm": 992.0, "learning_rate": 8.898040110325172e-05, "loss": 26.8769, "step": 5733 }, { "epoch": 0.23900629402692677, "grad_norm": 164.0, "learning_rate": 8.897617343024209e-05, "loss": 11.3755, "step": 5734 }, { "epoch": 0.239047976324455, "grad_norm": 1184.0, "learning_rate": 8.897194504688821e-05, "loss": 26.8798, "step": 5735 }, { "epoch": 0.23908965862198325, "grad_norm": 318.0, "learning_rate": 8.89677159532672e-05, "loss": 13.0627, "step": 5736 }, { "epoch": 0.2391313409195115, "grad_norm": 160.0, "learning_rate": 8.896348614945611e-05, "loss": 8.313, "step": 5737 }, { "epoch": 0.23917302321703973, "grad_norm": 350.0, "learning_rate": 8.8959255635532e-05, "loss": 14.0627, "step": 5738 }, { "epoch": 0.23921470551456797, "grad_norm": 1248.0, "learning_rate": 8.895502441157203e-05, "loss": 24.7544, "step": 5739 }, { "epoch": 0.2392563878120962, "grad_norm": 250.0, "learning_rate": 8.895079247765325e-05, "loss": 9.8764, "step": 5740 }, { "epoch": 0.23929807010962445, "grad_norm": 416.0, "learning_rate": 8.894655983385283e-05, "loss": 15.1256, "step": 5741 }, { "epoch": 0.2393397524071527, "grad_norm": 138.0, "learning_rate": 8.894232648024791e-05, "loss": 10.3142, "step": 5742 }, { "epoch": 0.23938143470468093, "grad_norm": 384.0, "learning_rate": 8.893809241691561e-05, "loss": 12.7518, "step": 5743 }, { "epoch": 0.23942311700220917, "grad_norm": 170.0, "learning_rate": 8.893385764393314e-05, "loss": 10.2501, "step": 5744 }, { "epoch": 0.2394647992997374, "grad_norm": 1072.0, "learning_rate": 8.892962216137766e-05, "loss": 25.3804, "step": 5745 }, { "epoch": 0.23950648159726565, "grad_norm": 478.0, "learning_rate": 8.892538596932634e-05, "loss": 14.5006, "step": 5746 }, { "epoch": 0.2395481638947939, "grad_norm": 174.0, "learning_rate": 8.892114906785642e-05, "loss": 11.5003, "step": 5747 }, { "epoch": 0.23958984619232213, "grad_norm": 225.0, "learning_rate": 8.89169114570451e-05, "loss": 12.8127, "step": 5748 }, { "epoch": 0.23963152848985037, "grad_norm": 468.0, "learning_rate": 8.891267313696963e-05, "loss": 17.3775, "step": 5749 }, { "epoch": 0.2396732107873786, "grad_norm": 174.0, "learning_rate": 8.890843410770722e-05, "loss": 9.0009, "step": 5750 }, { "epoch": 0.23971489308490684, "grad_norm": 660.0, "learning_rate": 8.890419436933514e-05, "loss": 21.5002, "step": 5751 }, { "epoch": 0.23975657538243508, "grad_norm": 288.0, "learning_rate": 8.889995392193067e-05, "loss": 13.5628, "step": 5752 }, { "epoch": 0.23979825767996332, "grad_norm": 83.0, "learning_rate": 8.889571276557109e-05, "loss": 9.1882, "step": 5753 }, { "epoch": 0.23983993997749156, "grad_norm": 244.0, "learning_rate": 8.889147090033369e-05, "loss": 11.6877, "step": 5754 }, { "epoch": 0.2398816222750198, "grad_norm": 466.0, "learning_rate": 8.888722832629577e-05, "loss": 15.5008, "step": 5755 }, { "epoch": 0.23992330457254804, "grad_norm": 580.0, "learning_rate": 8.888298504353468e-05, "loss": 18.0005, "step": 5756 }, { "epoch": 0.23996498687007628, "grad_norm": 448.0, "learning_rate": 8.887874105212773e-05, "loss": 16.7507, "step": 5757 }, { "epoch": 0.24000666916760452, "grad_norm": 620.0, "learning_rate": 8.887449635215225e-05, "loss": 19.2505, "step": 5758 }, { "epoch": 0.24004835146513276, "grad_norm": 484.0, "learning_rate": 8.887025094368567e-05, "loss": 15.938, "step": 5759 }, { "epoch": 0.240090033762661, "grad_norm": 256.0, "learning_rate": 8.886600482680527e-05, "loss": 13.6257, "step": 5760 }, { "epoch": 0.24013171606018924, "grad_norm": 264.0, "learning_rate": 8.886175800158851e-05, "loss": 11.5643, "step": 5761 }, { "epoch": 0.24017339835771748, "grad_norm": 474.0, "learning_rate": 8.885751046811275e-05, "loss": 17.001, "step": 5762 }, { "epoch": 0.24021508065524572, "grad_norm": 408.0, "learning_rate": 8.88532622264554e-05, "loss": 15.8127, "step": 5763 }, { "epoch": 0.24025676295277396, "grad_norm": 262.0, "learning_rate": 8.884901327669393e-05, "loss": 12.6878, "step": 5764 }, { "epoch": 0.2402984452503022, "grad_norm": 380.0, "learning_rate": 8.884476361890572e-05, "loss": 14.7505, "step": 5765 }, { "epoch": 0.24034012754783043, "grad_norm": 254.0, "learning_rate": 8.884051325316825e-05, "loss": 13.0005, "step": 5766 }, { "epoch": 0.24038180984535867, "grad_norm": 268.0, "learning_rate": 8.883626217955898e-05, "loss": 11.3128, "step": 5767 }, { "epoch": 0.2404234921428869, "grad_norm": 272.0, "learning_rate": 8.883201039815538e-05, "loss": 14.2509, "step": 5768 }, { "epoch": 0.24046517444041515, "grad_norm": 410.0, "learning_rate": 8.882775790903494e-05, "loss": 13.7543, "step": 5769 }, { "epoch": 0.2405068567379434, "grad_norm": 580.0, "learning_rate": 8.882350471227516e-05, "loss": 17.3758, "step": 5770 }, { "epoch": 0.24054853903547163, "grad_norm": 268.0, "learning_rate": 8.881925080795357e-05, "loss": 12.7502, "step": 5771 }, { "epoch": 0.24059022133299987, "grad_norm": 268.0, "learning_rate": 8.881499619614769e-05, "loss": 12.1879, "step": 5772 }, { "epoch": 0.2406319036305281, "grad_norm": 328.0, "learning_rate": 8.881074087693506e-05, "loss": 13.6259, "step": 5773 }, { "epoch": 0.24067358592805635, "grad_norm": 244.0, "learning_rate": 8.880648485039322e-05, "loss": 12.564, "step": 5774 }, { "epoch": 0.2407152682255846, "grad_norm": 928.0, "learning_rate": 8.880222811659977e-05, "loss": 25.6252, "step": 5775 }, { "epoch": 0.24075695052311283, "grad_norm": 400.0, "learning_rate": 8.879797067563225e-05, "loss": 15.6884, "step": 5776 }, { "epoch": 0.24079863282064107, "grad_norm": 382.0, "learning_rate": 8.879371252756827e-05, "loss": 14.8127, "step": 5777 }, { "epoch": 0.2408403151181693, "grad_norm": 612.0, "learning_rate": 8.878945367248546e-05, "loss": 19.7532, "step": 5778 }, { "epoch": 0.24088199741569755, "grad_norm": 43.25, "learning_rate": 8.878519411046137e-05, "loss": 6.7216, "step": 5779 }, { "epoch": 0.24092367971322579, "grad_norm": 450.0, "learning_rate": 8.87809338415737e-05, "loss": 17.1254, "step": 5780 }, { "epoch": 0.24096536201075403, "grad_norm": 152.0, "learning_rate": 8.877667286590007e-05, "loss": 10.3765, "step": 5781 }, { "epoch": 0.24100704430828226, "grad_norm": 172.0, "learning_rate": 8.877241118351814e-05, "loss": 9.8127, "step": 5782 }, { "epoch": 0.2410487266058105, "grad_norm": 288.0, "learning_rate": 8.876814879450557e-05, "loss": 13.2521, "step": 5783 }, { "epoch": 0.24109040890333874, "grad_norm": 458.0, "learning_rate": 8.876388569894004e-05, "loss": 16.2504, "step": 5784 }, { "epoch": 0.24113209120086698, "grad_norm": 262.0, "learning_rate": 8.875962189689926e-05, "loss": 13.3151, "step": 5785 }, { "epoch": 0.24117377349839522, "grad_norm": 208.0, "learning_rate": 8.875535738846092e-05, "loss": 11.7512, "step": 5786 }, { "epoch": 0.24121545579592346, "grad_norm": 124.5, "learning_rate": 8.875109217370276e-05, "loss": 9.2504, "step": 5787 }, { "epoch": 0.2412571380934517, "grad_norm": 1112.0, "learning_rate": 8.87468262527025e-05, "loss": 27.5006, "step": 5788 }, { "epoch": 0.24129882039097994, "grad_norm": 122.5, "learning_rate": 8.874255962553788e-05, "loss": 9.7506, "step": 5789 }, { "epoch": 0.24134050268850818, "grad_norm": 268.0, "learning_rate": 8.873829229228669e-05, "loss": 12.501, "step": 5790 }, { "epoch": 0.24138218498603642, "grad_norm": 442.0, "learning_rate": 8.873402425302668e-05, "loss": 17.1258, "step": 5791 }, { "epoch": 0.24142386728356466, "grad_norm": 240.0, "learning_rate": 8.872975550783564e-05, "loss": 12.1255, "step": 5792 }, { "epoch": 0.2414655495810929, "grad_norm": 116.0, "learning_rate": 8.872548605679136e-05, "loss": 8.6259, "step": 5793 }, { "epoch": 0.24150723187862114, "grad_norm": 672.0, "learning_rate": 8.872121589997167e-05, "loss": 18.0061, "step": 5794 }, { "epoch": 0.24154891417614938, "grad_norm": 980.0, "learning_rate": 8.871694503745437e-05, "loss": 28.5011, "step": 5795 }, { "epoch": 0.24159059647367762, "grad_norm": 211.0, "learning_rate": 8.871267346931732e-05, "loss": 11.3758, "step": 5796 }, { "epoch": 0.24163227877120586, "grad_norm": 340.0, "learning_rate": 8.870840119563836e-05, "loss": 14.8758, "step": 5797 }, { "epoch": 0.24167396106873412, "grad_norm": 588.0, "learning_rate": 8.870412821649535e-05, "loss": 17.0003, "step": 5798 }, { "epoch": 0.24171564336626236, "grad_norm": 1336.0, "learning_rate": 8.869985453196617e-05, "loss": 27.2572, "step": 5799 }, { "epoch": 0.2417573256637906, "grad_norm": 266.0, "learning_rate": 8.86955801421287e-05, "loss": 12.3757, "step": 5800 }, { "epoch": 0.24179900796131884, "grad_norm": 412.0, "learning_rate": 8.869130504706085e-05, "loss": 15.6253, "step": 5801 }, { "epoch": 0.24184069025884708, "grad_norm": 171.0, "learning_rate": 8.868702924684052e-05, "loss": 9.5003, "step": 5802 }, { "epoch": 0.24188237255637532, "grad_norm": 228.0, "learning_rate": 8.868275274154567e-05, "loss": 11.627, "step": 5803 }, { "epoch": 0.24192405485390356, "grad_norm": 229.0, "learning_rate": 8.867847553125419e-05, "loss": 11.813, "step": 5804 }, { "epoch": 0.2419657371514318, "grad_norm": 940.0, "learning_rate": 8.867419761604408e-05, "loss": 21.7575, "step": 5805 }, { "epoch": 0.24200741944896004, "grad_norm": 272.0, "learning_rate": 8.866991899599328e-05, "loss": 12.2504, "step": 5806 }, { "epoch": 0.24204910174648828, "grad_norm": 260.0, "learning_rate": 8.866563967117977e-05, "loss": 11.1881, "step": 5807 }, { "epoch": 0.24209078404401652, "grad_norm": 548.0, "learning_rate": 8.866135964168154e-05, "loss": 17.2515, "step": 5808 }, { "epoch": 0.24213246634154476, "grad_norm": 374.0, "learning_rate": 8.86570789075766e-05, "loss": 14.188, "step": 5809 }, { "epoch": 0.242174148639073, "grad_norm": 340.0, "learning_rate": 8.865279746894298e-05, "loss": 14.8775, "step": 5810 }, { "epoch": 0.24221583093660123, "grad_norm": 576.0, "learning_rate": 8.86485153258587e-05, "loss": 17.5042, "step": 5811 }, { "epoch": 0.24225751323412947, "grad_norm": 80.5, "learning_rate": 8.864423247840176e-05, "loss": 7.5317, "step": 5812 }, { "epoch": 0.2422991955316577, "grad_norm": 428.0, "learning_rate": 8.863994892665029e-05, "loss": 15.1267, "step": 5813 }, { "epoch": 0.24234087782918595, "grad_norm": 338.0, "learning_rate": 8.86356646706823e-05, "loss": 12.9382, "step": 5814 }, { "epoch": 0.2423825601267142, "grad_norm": 464.0, "learning_rate": 8.863137971057589e-05, "loss": 17.5004, "step": 5815 }, { "epoch": 0.24242424242424243, "grad_norm": 91.5, "learning_rate": 8.862709404640916e-05, "loss": 7.9378, "step": 5816 }, { "epoch": 0.24246592472177067, "grad_norm": 308.0, "learning_rate": 8.862280767826023e-05, "loss": 10.6287, "step": 5817 }, { "epoch": 0.2425076070192989, "grad_norm": 206.0, "learning_rate": 8.861852060620719e-05, "loss": 12.0007, "step": 5818 }, { "epoch": 0.24254928931682715, "grad_norm": 388.0, "learning_rate": 8.861423283032817e-05, "loss": 14.5005, "step": 5819 }, { "epoch": 0.2425909716143554, "grad_norm": 358.0, "learning_rate": 8.860994435070133e-05, "loss": 15.8131, "step": 5820 }, { "epoch": 0.24263265391188363, "grad_norm": 300.0, "learning_rate": 8.860565516740485e-05, "loss": 12.1251, "step": 5821 }, { "epoch": 0.24267433620941187, "grad_norm": 290.0, "learning_rate": 8.860136528051685e-05, "loss": 8.6884, "step": 5822 }, { "epoch": 0.2427160185069401, "grad_norm": 684.0, "learning_rate": 8.859707469011556e-05, "loss": 20.6253, "step": 5823 }, { "epoch": 0.24275770080446835, "grad_norm": 228.0, "learning_rate": 8.859278339627916e-05, "loss": 9.9385, "step": 5824 }, { "epoch": 0.24279938310199659, "grad_norm": 494.0, "learning_rate": 8.858849139908585e-05, "loss": 16.1262, "step": 5825 }, { "epoch": 0.24284106539952482, "grad_norm": 272.0, "learning_rate": 8.858419869861385e-05, "loss": 13.0651, "step": 5826 }, { "epoch": 0.24288274769705306, "grad_norm": 412.0, "learning_rate": 8.85799052949414e-05, "loss": 15.4382, "step": 5827 }, { "epoch": 0.2429244299945813, "grad_norm": 125.5, "learning_rate": 8.857561118814676e-05, "loss": 9.3759, "step": 5828 }, { "epoch": 0.24296611229210954, "grad_norm": 298.0, "learning_rate": 8.857131637830818e-05, "loss": 13.1257, "step": 5829 }, { "epoch": 0.24300779458963778, "grad_norm": 294.0, "learning_rate": 8.856702086550395e-05, "loss": 11.8753, "step": 5830 }, { "epoch": 0.24304947688716602, "grad_norm": 1048.0, "learning_rate": 8.856272464981232e-05, "loss": 26.8756, "step": 5831 }, { "epoch": 0.24309115918469426, "grad_norm": 160.0, "learning_rate": 8.855842773131162e-05, "loss": 9.6283, "step": 5832 }, { "epoch": 0.2431328414822225, "grad_norm": 704.0, "learning_rate": 8.855413011008016e-05, "loss": 20.2506, "step": 5833 }, { "epoch": 0.24317452377975074, "grad_norm": 57.75, "learning_rate": 8.854983178619624e-05, "loss": 8.4385, "step": 5834 }, { "epoch": 0.24321620607727898, "grad_norm": 394.0, "learning_rate": 8.854553275973822e-05, "loss": 14.6259, "step": 5835 }, { "epoch": 0.24325788837480722, "grad_norm": 504.0, "learning_rate": 8.854123303078445e-05, "loss": 18.0026, "step": 5836 }, { "epoch": 0.24329957067233546, "grad_norm": 356.0, "learning_rate": 8.853693259941328e-05, "loss": 14.0631, "step": 5837 }, { "epoch": 0.2433412529698637, "grad_norm": 364.0, "learning_rate": 8.85326314657031e-05, "loss": 12.6877, "step": 5838 }, { "epoch": 0.24338293526739194, "grad_norm": 426.0, "learning_rate": 8.852832962973227e-05, "loss": 15.5003, "step": 5839 }, { "epoch": 0.24342461756492018, "grad_norm": 195.0, "learning_rate": 8.852402709157923e-05, "loss": 11.3753, "step": 5840 }, { "epoch": 0.24346629986244842, "grad_norm": 470.0, "learning_rate": 8.851972385132237e-05, "loss": 16.3753, "step": 5841 }, { "epoch": 0.24350798215997665, "grad_norm": 454.0, "learning_rate": 8.851541990904013e-05, "loss": 15.5637, "step": 5842 }, { "epoch": 0.2435496644575049, "grad_norm": 302.0, "learning_rate": 8.851111526481094e-05, "loss": 13.1884, "step": 5843 }, { "epoch": 0.24359134675503313, "grad_norm": 768.0, "learning_rate": 8.850680991871326e-05, "loss": 20.7507, "step": 5844 }, { "epoch": 0.24363302905256137, "grad_norm": 306.0, "learning_rate": 8.850250387082554e-05, "loss": 12.6881, "step": 5845 }, { "epoch": 0.2436747113500896, "grad_norm": 442.0, "learning_rate": 8.849819712122626e-05, "loss": 15.8127, "step": 5846 }, { "epoch": 0.24371639364761785, "grad_norm": 512.0, "learning_rate": 8.849388966999395e-05, "loss": 15.7502, "step": 5847 }, { "epoch": 0.2437580759451461, "grad_norm": 424.0, "learning_rate": 8.848958151720705e-05, "loss": 16.5006, "step": 5848 }, { "epoch": 0.24379975824267433, "grad_norm": 632.0, "learning_rate": 8.848527266294415e-05, "loss": 19.1256, "step": 5849 }, { "epoch": 0.24384144054020257, "grad_norm": 96.0, "learning_rate": 8.848096310728371e-05, "loss": 6.1261, "step": 5850 }, { "epoch": 0.2438831228377308, "grad_norm": 524.0, "learning_rate": 8.84766528503043e-05, "loss": 14.9391, "step": 5851 }, { "epoch": 0.24392480513525905, "grad_norm": 210.0, "learning_rate": 8.847234189208448e-05, "loss": 11.6253, "step": 5852 }, { "epoch": 0.2439664874327873, "grad_norm": 175.0, "learning_rate": 8.846803023270282e-05, "loss": 11.0628, "step": 5853 }, { "epoch": 0.24400816973031553, "grad_norm": 227.0, "learning_rate": 8.84637178722379e-05, "loss": 11.5631, "step": 5854 }, { "epoch": 0.24404985202784377, "grad_norm": 158.0, "learning_rate": 8.84594048107683e-05, "loss": 10.8752, "step": 5855 }, { "epoch": 0.244091534325372, "grad_norm": 139.0, "learning_rate": 8.845509104837262e-05, "loss": 8.6877, "step": 5856 }, { "epoch": 0.24413321662290025, "grad_norm": 346.0, "learning_rate": 8.845077658512953e-05, "loss": 14.3129, "step": 5857 }, { "epoch": 0.24417489892042848, "grad_norm": 552.0, "learning_rate": 8.844646142111758e-05, "loss": 18.0003, "step": 5858 }, { "epoch": 0.24421658121795672, "grad_norm": 326.0, "learning_rate": 8.844214555641548e-05, "loss": 14.3752, "step": 5859 }, { "epoch": 0.24425826351548496, "grad_norm": 268.0, "learning_rate": 8.843782899110186e-05, "loss": 12.3752, "step": 5860 }, { "epoch": 0.2442999458130132, "grad_norm": 1280.0, "learning_rate": 8.843351172525539e-05, "loss": 25.8814, "step": 5861 }, { "epoch": 0.24434162811054144, "grad_norm": 480.0, "learning_rate": 8.842919375895477e-05, "loss": 17.7503, "step": 5862 }, { "epoch": 0.24438331040806968, "grad_norm": 154.0, "learning_rate": 8.842487509227868e-05, "loss": 9.0003, "step": 5863 }, { "epoch": 0.24442499270559792, "grad_norm": 498.0, "learning_rate": 8.84205557253058e-05, "loss": 16.1253, "step": 5864 }, { "epoch": 0.24446667500312616, "grad_norm": 245.0, "learning_rate": 8.841623565811492e-05, "loss": 13.6265, "step": 5865 }, { "epoch": 0.2445083573006544, "grad_norm": 376.0, "learning_rate": 8.84119148907847e-05, "loss": 14.8769, "step": 5866 }, { "epoch": 0.24455003959818264, "grad_norm": 151.0, "learning_rate": 8.840759342339396e-05, "loss": 10.1255, "step": 5867 }, { "epoch": 0.24459172189571088, "grad_norm": 286.0, "learning_rate": 8.84032712560214e-05, "loss": 12.1878, "step": 5868 }, { "epoch": 0.24463340419323912, "grad_norm": 336.0, "learning_rate": 8.839894838874582e-05, "loss": 12.8128, "step": 5869 }, { "epoch": 0.24467508649076736, "grad_norm": 1064.0, "learning_rate": 8.839462482164598e-05, "loss": 28.7502, "step": 5870 }, { "epoch": 0.24471676878829562, "grad_norm": 266.0, "learning_rate": 8.83903005548007e-05, "loss": 12.188, "step": 5871 }, { "epoch": 0.24475845108582386, "grad_norm": 812.0, "learning_rate": 8.83859755882888e-05, "loss": 21.6254, "step": 5872 }, { "epoch": 0.2448001333833521, "grad_norm": 334.0, "learning_rate": 8.838164992218907e-05, "loss": 13.0628, "step": 5873 }, { "epoch": 0.24484181568088034, "grad_norm": 564.0, "learning_rate": 8.837732355658037e-05, "loss": 18.8754, "step": 5874 }, { "epoch": 0.24488349797840858, "grad_norm": 270.0, "learning_rate": 8.837299649154153e-05, "loss": 12.8129, "step": 5875 }, { "epoch": 0.24492518027593682, "grad_norm": 378.0, "learning_rate": 8.836866872715143e-05, "loss": 15.0003, "step": 5876 }, { "epoch": 0.24496686257346506, "grad_norm": 552.0, "learning_rate": 8.836434026348896e-05, "loss": 17.0006, "step": 5877 }, { "epoch": 0.2450085448709933, "grad_norm": 71.5, "learning_rate": 8.836001110063296e-05, "loss": 8.3133, "step": 5878 }, { "epoch": 0.24505022716852154, "grad_norm": 193.0, "learning_rate": 8.835568123866235e-05, "loss": 11.0006, "step": 5879 }, { "epoch": 0.24509190946604978, "grad_norm": 382.0, "learning_rate": 8.835135067765606e-05, "loss": 15.8756, "step": 5880 }, { "epoch": 0.24513359176357802, "grad_norm": 1840.0, "learning_rate": 8.834701941769298e-05, "loss": 35.0091, "step": 5881 }, { "epoch": 0.24517527406110626, "grad_norm": 130.0, "learning_rate": 8.834268745885208e-05, "loss": 8.6254, "step": 5882 }, { "epoch": 0.2452169563586345, "grad_norm": 620.0, "learning_rate": 8.833835480121229e-05, "loss": 20.8756, "step": 5883 }, { "epoch": 0.24525863865616274, "grad_norm": 596.0, "learning_rate": 8.833402144485259e-05, "loss": 18.1252, "step": 5884 }, { "epoch": 0.24530032095369098, "grad_norm": 380.0, "learning_rate": 8.832968738985194e-05, "loss": 14.3753, "step": 5885 }, { "epoch": 0.24534200325121922, "grad_norm": 66.5, "learning_rate": 8.832535263628933e-05, "loss": 7.6565, "step": 5886 }, { "epoch": 0.24538368554874745, "grad_norm": 300.0, "learning_rate": 8.832101718424377e-05, "loss": 12.6876, "step": 5887 }, { "epoch": 0.2454253678462757, "grad_norm": 232.0, "learning_rate": 8.831668103379427e-05, "loss": 11.8131, "step": 5888 }, { "epoch": 0.24546705014380393, "grad_norm": 139.0, "learning_rate": 8.831234418501986e-05, "loss": 9.188, "step": 5889 }, { "epoch": 0.24550873244133217, "grad_norm": 438.0, "learning_rate": 8.830800663799957e-05, "loss": 17.5007, "step": 5890 }, { "epoch": 0.2455504147388604, "grad_norm": 78.5, "learning_rate": 8.830366839281245e-05, "loss": 8.3755, "step": 5891 }, { "epoch": 0.24559209703638865, "grad_norm": 398.0, "learning_rate": 8.82993294495376e-05, "loss": 15.1255, "step": 5892 }, { "epoch": 0.2456337793339169, "grad_norm": 84.5, "learning_rate": 8.829498980825406e-05, "loss": 9.0008, "step": 5893 }, { "epoch": 0.24567546163144513, "grad_norm": 288.0, "learning_rate": 8.829064946904092e-05, "loss": 12.2516, "step": 5894 }, { "epoch": 0.24571714392897337, "grad_norm": 848.0, "learning_rate": 8.828630843197729e-05, "loss": 22.5032, "step": 5895 }, { "epoch": 0.2457588262265016, "grad_norm": 492.0, "learning_rate": 8.82819666971423e-05, "loss": 13.5629, "step": 5896 }, { "epoch": 0.24580050852402985, "grad_norm": 194.0, "learning_rate": 8.827762426461508e-05, "loss": 10.8128, "step": 5897 }, { "epoch": 0.2458421908215581, "grad_norm": 322.0, "learning_rate": 8.827328113447475e-05, "loss": 12.7506, "step": 5898 }, { "epoch": 0.24588387311908633, "grad_norm": 900.0, "learning_rate": 8.826893730680047e-05, "loss": 20.3791, "step": 5899 }, { "epoch": 0.24592555541661457, "grad_norm": 1528.0, "learning_rate": 8.826459278167141e-05, "loss": 30.0056, "step": 5900 }, { "epoch": 0.2459672377141428, "grad_norm": 115.5, "learning_rate": 8.826024755916675e-05, "loss": 8.3757, "step": 5901 }, { "epoch": 0.24600892001167105, "grad_norm": 410.0, "learning_rate": 8.82559016393657e-05, "loss": 16.2502, "step": 5902 }, { "epoch": 0.24605060230919928, "grad_norm": 117.5, "learning_rate": 8.825155502234742e-05, "loss": 10.0006, "step": 5903 }, { "epoch": 0.24609228460672752, "grad_norm": 1208.0, "learning_rate": 8.824720770819117e-05, "loss": 26.7549, "step": 5904 }, { "epoch": 0.24613396690425576, "grad_norm": 262.0, "learning_rate": 8.824285969697615e-05, "loss": 12.6253, "step": 5905 }, { "epoch": 0.246175649201784, "grad_norm": 408.0, "learning_rate": 8.823851098878165e-05, "loss": 14.3755, "step": 5906 }, { "epoch": 0.24621733149931224, "grad_norm": 180.0, "learning_rate": 8.823416158368684e-05, "loss": 10.8753, "step": 5907 }, { "epoch": 0.24625901379684048, "grad_norm": 600.0, "learning_rate": 8.822981148177107e-05, "loss": 19.6275, "step": 5908 }, { "epoch": 0.24630069609436872, "grad_norm": 91.0, "learning_rate": 8.822546068311361e-05, "loss": 10.0631, "step": 5909 }, { "epoch": 0.24634237839189696, "grad_norm": 248.0, "learning_rate": 8.822110918779372e-05, "loss": 12.7502, "step": 5910 }, { "epoch": 0.2463840606894252, "grad_norm": 242.0, "learning_rate": 8.821675699589072e-05, "loss": 11.3753, "step": 5911 }, { "epoch": 0.24642574298695344, "grad_norm": 1200.0, "learning_rate": 8.821240410748393e-05, "loss": 26.2544, "step": 5912 }, { "epoch": 0.24646742528448168, "grad_norm": 124.0, "learning_rate": 8.820805052265269e-05, "loss": 10.1879, "step": 5913 }, { "epoch": 0.24650910758200992, "grad_norm": 201.0, "learning_rate": 8.820369624147632e-05, "loss": 11.3132, "step": 5914 }, { "epoch": 0.24655078987953816, "grad_norm": 334.0, "learning_rate": 8.81993412640342e-05, "loss": 12.9402, "step": 5915 }, { "epoch": 0.2465924721770664, "grad_norm": 144.0, "learning_rate": 8.81949855904057e-05, "loss": 9.0005, "step": 5916 }, { "epoch": 0.24663415447459464, "grad_norm": 274.0, "learning_rate": 8.81906292206702e-05, "loss": 12.3753, "step": 5917 }, { "epoch": 0.24667583677212288, "grad_norm": 444.0, "learning_rate": 8.818627215490709e-05, "loss": 17.1256, "step": 5918 }, { "epoch": 0.24671751906965111, "grad_norm": 290.0, "learning_rate": 8.818191439319578e-05, "loss": 12.4377, "step": 5919 }, { "epoch": 0.24675920136717935, "grad_norm": 122.5, "learning_rate": 8.817755593561569e-05, "loss": 9.5628, "step": 5920 }, { "epoch": 0.2468008836647076, "grad_norm": 408.0, "learning_rate": 8.817319678224626e-05, "loss": 16.1254, "step": 5921 }, { "epoch": 0.24684256596223583, "grad_norm": 237.0, "learning_rate": 8.816883693316692e-05, "loss": 11.8143, "step": 5922 }, { "epoch": 0.24688424825976407, "grad_norm": 648.0, "learning_rate": 8.816447638845716e-05, "loss": 20.5111, "step": 5923 }, { "epoch": 0.2469259305572923, "grad_norm": 211.0, "learning_rate": 8.81601151481964e-05, "loss": 10.938, "step": 5924 }, { "epoch": 0.24696761285482055, "grad_norm": 250.0, "learning_rate": 8.815575321246416e-05, "loss": 12.5012, "step": 5925 }, { "epoch": 0.2470092951523488, "grad_norm": 81.5, "learning_rate": 8.815139058133994e-05, "loss": 8.0628, "step": 5926 }, { "epoch": 0.24705097744987703, "grad_norm": 868.0, "learning_rate": 8.814702725490323e-05, "loss": 25.8753, "step": 5927 }, { "epoch": 0.24709265974740527, "grad_norm": 412.0, "learning_rate": 8.814266323323356e-05, "loss": 15.2501, "step": 5928 }, { "epoch": 0.2471343420449335, "grad_norm": 76.5, "learning_rate": 8.813829851641049e-05, "loss": 8.0002, "step": 5929 }, { "epoch": 0.24717602434246175, "grad_norm": 692.0, "learning_rate": 8.813393310451353e-05, "loss": 20.2526, "step": 5930 }, { "epoch": 0.24721770663999, "grad_norm": 362.0, "learning_rate": 8.812956699762224e-05, "loss": 15.3153, "step": 5931 }, { "epoch": 0.24725938893751823, "grad_norm": 520.0, "learning_rate": 8.812520019581622e-05, "loss": 17.1252, "step": 5932 }, { "epoch": 0.24730107123504647, "grad_norm": 246.0, "learning_rate": 8.812083269917506e-05, "loss": 10.5004, "step": 5933 }, { "epoch": 0.2473427535325747, "grad_norm": 110.0, "learning_rate": 8.811646450777832e-05, "loss": 7.1257, "step": 5934 }, { "epoch": 0.24738443583010294, "grad_norm": 338.0, "learning_rate": 8.811209562170562e-05, "loss": 11.8131, "step": 5935 }, { "epoch": 0.24742611812763118, "grad_norm": 568.0, "learning_rate": 8.81077260410366e-05, "loss": 17.7507, "step": 5936 }, { "epoch": 0.24746780042515942, "grad_norm": 294.0, "learning_rate": 8.810335576585091e-05, "loss": 11.9405, "step": 5937 }, { "epoch": 0.24750948272268766, "grad_norm": 580.0, "learning_rate": 8.809898479622816e-05, "loss": 17.3752, "step": 5938 }, { "epoch": 0.2475511650202159, "grad_norm": 286.0, "learning_rate": 8.809461313224804e-05, "loss": 13.2509, "step": 5939 }, { "epoch": 0.24759284731774414, "grad_norm": 588.0, "learning_rate": 8.80902407739902e-05, "loss": 19.6253, "step": 5940 }, { "epoch": 0.24763452961527238, "grad_norm": 282.0, "learning_rate": 8.808586772153435e-05, "loss": 12.6878, "step": 5941 }, { "epoch": 0.24767621191280062, "grad_norm": 608.0, "learning_rate": 8.808149397496019e-05, "loss": 16.8759, "step": 5942 }, { "epoch": 0.24771789421032886, "grad_norm": 276.0, "learning_rate": 8.80771195343474e-05, "loss": 11.8752, "step": 5943 }, { "epoch": 0.24775957650785713, "grad_norm": 1680.0, "learning_rate": 8.807274439977575e-05, "loss": 32.7515, "step": 5944 }, { "epoch": 0.24780125880538537, "grad_norm": 508.0, "learning_rate": 8.806836857132495e-05, "loss": 16.8753, "step": 5945 }, { "epoch": 0.2478429411029136, "grad_norm": 868.0, "learning_rate": 8.806399204907472e-05, "loss": 19.1285, "step": 5946 }, { "epoch": 0.24788462340044184, "grad_norm": 332.0, "learning_rate": 8.805961483310488e-05, "loss": 14.5633, "step": 5947 }, { "epoch": 0.24792630569797008, "grad_norm": 272.0, "learning_rate": 8.805523692349518e-05, "loss": 12.8759, "step": 5948 }, { "epoch": 0.24796798799549832, "grad_norm": 328.0, "learning_rate": 8.805085832032543e-05, "loss": 14.0002, "step": 5949 }, { "epoch": 0.24800967029302656, "grad_norm": 356.0, "learning_rate": 8.804647902367537e-05, "loss": 14.1252, "step": 5950 }, { "epoch": 0.2480513525905548, "grad_norm": 182.0, "learning_rate": 8.804209903362488e-05, "loss": 10.5645, "step": 5951 }, { "epoch": 0.24809303488808304, "grad_norm": 488.0, "learning_rate": 8.803771835025374e-05, "loss": 16.6256, "step": 5952 }, { "epoch": 0.24813471718561128, "grad_norm": 374.0, "learning_rate": 8.803333697364182e-05, "loss": 14.5016, "step": 5953 }, { "epoch": 0.24817639948313952, "grad_norm": 352.0, "learning_rate": 8.802895490386895e-05, "loss": 14.938, "step": 5954 }, { "epoch": 0.24821808178066776, "grad_norm": 520.0, "learning_rate": 8.802457214101501e-05, "loss": 16.7503, "step": 5955 }, { "epoch": 0.248259764078196, "grad_norm": 332.0, "learning_rate": 8.802018868515986e-05, "loss": 14.2511, "step": 5956 }, { "epoch": 0.24830144637572424, "grad_norm": 123.0, "learning_rate": 8.80158045363834e-05, "loss": 11.1254, "step": 5957 }, { "epoch": 0.24834312867325248, "grad_norm": 256.0, "learning_rate": 8.801141969476552e-05, "loss": 12.0627, "step": 5958 }, { "epoch": 0.24838481097078072, "grad_norm": 708.0, "learning_rate": 8.800703416038615e-05, "loss": 19.5036, "step": 5959 }, { "epoch": 0.24842649326830896, "grad_norm": 213.0, "learning_rate": 8.80026479333252e-05, "loss": 12.3772, "step": 5960 }, { "epoch": 0.2484681755658372, "grad_norm": 229.0, "learning_rate": 8.799826101366262e-05, "loss": 10.6257, "step": 5961 }, { "epoch": 0.24850985786336544, "grad_norm": 223.0, "learning_rate": 8.799387340147837e-05, "loss": 12.1254, "step": 5962 }, { "epoch": 0.24855154016089367, "grad_norm": 182.0, "learning_rate": 8.79894850968524e-05, "loss": 10.6888, "step": 5963 }, { "epoch": 0.24859322245842191, "grad_norm": 648.0, "learning_rate": 8.798509609986468e-05, "loss": 19.0009, "step": 5964 }, { "epoch": 0.24863490475595015, "grad_norm": 70.5, "learning_rate": 8.798070641059522e-05, "loss": 7.7199, "step": 5965 }, { "epoch": 0.2486765870534784, "grad_norm": 382.0, "learning_rate": 8.797631602912401e-05, "loss": 15.7507, "step": 5966 }, { "epoch": 0.24871826935100663, "grad_norm": 88.5, "learning_rate": 8.797192495553109e-05, "loss": 7.8447, "step": 5967 }, { "epoch": 0.24875995164853487, "grad_norm": 406.0, "learning_rate": 8.796753318989643e-05, "loss": 15.0627, "step": 5968 }, { "epoch": 0.2488016339460631, "grad_norm": 142.0, "learning_rate": 8.796314073230015e-05, "loss": 6.5317, "step": 5969 }, { "epoch": 0.24884331624359135, "grad_norm": 382.0, "learning_rate": 8.795874758282223e-05, "loss": 13.3754, "step": 5970 }, { "epoch": 0.2488849985411196, "grad_norm": 484.0, "learning_rate": 8.795435374154278e-05, "loss": 16.2503, "step": 5971 }, { "epoch": 0.24892668083864783, "grad_norm": 408.0, "learning_rate": 8.794995920854184e-05, "loss": 14.3141, "step": 5972 }, { "epoch": 0.24896836313617607, "grad_norm": 720.0, "learning_rate": 8.794556398389955e-05, "loss": 21.6255, "step": 5973 }, { "epoch": 0.2490100454337043, "grad_norm": 141.0, "learning_rate": 8.794116806769597e-05, "loss": 11.1886, "step": 5974 }, { "epoch": 0.24905172773123255, "grad_norm": 163.0, "learning_rate": 8.793677146001125e-05, "loss": 10.3127, "step": 5975 }, { "epoch": 0.2490934100287608, "grad_norm": 324.0, "learning_rate": 8.793237416092551e-05, "loss": 14.3753, "step": 5976 }, { "epoch": 0.24913509232628903, "grad_norm": 202.0, "learning_rate": 8.792797617051885e-05, "loss": 9.6878, "step": 5977 }, { "epoch": 0.24917677462381727, "grad_norm": 422.0, "learning_rate": 8.792357748887148e-05, "loss": 14.501, "step": 5978 }, { "epoch": 0.2492184569213455, "grad_norm": 235.0, "learning_rate": 8.791917811606353e-05, "loss": 10.2504, "step": 5979 }, { "epoch": 0.24926013921887374, "grad_norm": 484.0, "learning_rate": 8.79147780521752e-05, "loss": 17.1262, "step": 5980 }, { "epoch": 0.24930182151640198, "grad_norm": 284.0, "learning_rate": 8.791037729728668e-05, "loss": 13.2502, "step": 5981 }, { "epoch": 0.24934350381393022, "grad_norm": 696.0, "learning_rate": 8.790597585147818e-05, "loss": 16.1277, "step": 5982 }, { "epoch": 0.24938518611145846, "grad_norm": 290.0, "learning_rate": 8.790157371482987e-05, "loss": 13.9389, "step": 5983 }, { "epoch": 0.2494268684089867, "grad_norm": 200.0, "learning_rate": 8.789717088742204e-05, "loss": 9.0628, "step": 5984 }, { "epoch": 0.24946855070651494, "grad_norm": 628.0, "learning_rate": 8.789276736933491e-05, "loss": 19.8756, "step": 5985 }, { "epoch": 0.24951023300404318, "grad_norm": 944.0, "learning_rate": 8.788836316064873e-05, "loss": 23.3808, "step": 5986 }, { "epoch": 0.24955191530157142, "grad_norm": 348.0, "learning_rate": 8.788395826144376e-05, "loss": 13.8771, "step": 5987 }, { "epoch": 0.24959359759909966, "grad_norm": 119.5, "learning_rate": 8.787955267180028e-05, "loss": 8.501, "step": 5988 }, { "epoch": 0.2496352798966279, "grad_norm": 592.0, "learning_rate": 8.78751463917986e-05, "loss": 17.3758, "step": 5989 }, { "epoch": 0.24967696219415614, "grad_norm": 430.0, "learning_rate": 8.7870739421519e-05, "loss": 17.1252, "step": 5990 }, { "epoch": 0.24971864449168438, "grad_norm": 688.0, "learning_rate": 8.786633176104182e-05, "loss": 23.0005, "step": 5991 }, { "epoch": 0.24976032678921262, "grad_norm": 300.0, "learning_rate": 8.78619234104474e-05, "loss": 11.2506, "step": 5992 }, { "epoch": 0.24980200908674086, "grad_norm": 544.0, "learning_rate": 8.785751436981604e-05, "loss": 16.7512, "step": 5993 }, { "epoch": 0.2498436913842691, "grad_norm": 386.0, "learning_rate": 8.785310463922814e-05, "loss": 13.8751, "step": 5994 }, { "epoch": 0.24988537368179733, "grad_norm": 278.0, "learning_rate": 8.784869421876402e-05, "loss": 12.5001, "step": 5995 }, { "epoch": 0.24992705597932557, "grad_norm": 896.0, "learning_rate": 8.784428310850412e-05, "loss": 21.5055, "step": 5996 }, { "epoch": 0.2499687382768538, "grad_norm": 352.0, "learning_rate": 8.783987130852878e-05, "loss": 14.9377, "step": 5997 }, { "epoch": 0.25001042057438205, "grad_norm": 154.0, "learning_rate": 8.783545881891843e-05, "loss": 9.8753, "step": 5998 }, { "epoch": 0.2500521028719103, "grad_norm": 236.0, "learning_rate": 8.783104563975351e-05, "loss": 12.4379, "step": 5999 }, { "epoch": 0.25009378516943853, "grad_norm": 153.0, "learning_rate": 8.782663177111438e-05, "loss": 10.9382, "step": 6000 }, { "epoch": 0.2501354674669668, "grad_norm": 340.0, "learning_rate": 8.782221721308157e-05, "loss": 13.5006, "step": 6001 }, { "epoch": 0.250177149764495, "grad_norm": 131.0, "learning_rate": 8.781780196573545e-05, "loss": 10.4378, "step": 6002 }, { "epoch": 0.2502188320620233, "grad_norm": 84.5, "learning_rate": 8.781338602915656e-05, "loss": 7.8446, "step": 6003 }, { "epoch": 0.2502605143595515, "grad_norm": 556.0, "learning_rate": 8.780896940342535e-05, "loss": 17.7502, "step": 6004 }, { "epoch": 0.25030219665707976, "grad_norm": 508.0, "learning_rate": 8.780455208862232e-05, "loss": 18.0004, "step": 6005 }, { "epoch": 0.25034387895460797, "grad_norm": 584.0, "learning_rate": 8.780013408482796e-05, "loss": 18.1254, "step": 6006 }, { "epoch": 0.25038556125213624, "grad_norm": 450.0, "learning_rate": 8.779571539212283e-05, "loss": 15.5628, "step": 6007 }, { "epoch": 0.25042724354966445, "grad_norm": 83.0, "learning_rate": 8.77912960105874e-05, "loss": 7.2504, "step": 6008 }, { "epoch": 0.2504689258471927, "grad_norm": 302.0, "learning_rate": 8.778687594030226e-05, "loss": 13.1878, "step": 6009 }, { "epoch": 0.2505106081447209, "grad_norm": 256.0, "learning_rate": 8.778245518134794e-05, "loss": 12.7502, "step": 6010 }, { "epoch": 0.2505522904422492, "grad_norm": 454.0, "learning_rate": 8.777803373380503e-05, "loss": 16.2502, "step": 6011 }, { "epoch": 0.2505939727397774, "grad_norm": 1384.0, "learning_rate": 8.77736115977541e-05, "loss": 27.2544, "step": 6012 }, { "epoch": 0.25063565503730567, "grad_norm": 672.0, "learning_rate": 8.776918877327574e-05, "loss": 20.1253, "step": 6013 }, { "epoch": 0.2506773373348339, "grad_norm": 1008.0, "learning_rate": 8.776476526045057e-05, "loss": 23.5072, "step": 6014 }, { "epoch": 0.25071901963236215, "grad_norm": 270.0, "learning_rate": 8.77603410593592e-05, "loss": 12.3754, "step": 6015 }, { "epoch": 0.25076070192989036, "grad_norm": 374.0, "learning_rate": 8.775591617008225e-05, "loss": 13.3752, "step": 6016 }, { "epoch": 0.25080238422741863, "grad_norm": 161.0, "learning_rate": 8.775149059270038e-05, "loss": 10.2503, "step": 6017 }, { "epoch": 0.25084406652494684, "grad_norm": 326.0, "learning_rate": 8.774706432729425e-05, "loss": 14.2502, "step": 6018 }, { "epoch": 0.2508857488224751, "grad_norm": 108.0, "learning_rate": 8.774263737394453e-05, "loss": 10.7509, "step": 6019 }, { "epoch": 0.2509274311200033, "grad_norm": 226.0, "learning_rate": 8.773820973273188e-05, "loss": 11.8754, "step": 6020 }, { "epoch": 0.2509691134175316, "grad_norm": 310.0, "learning_rate": 8.7733781403737e-05, "loss": 12.3152, "step": 6021 }, { "epoch": 0.2510107957150598, "grad_norm": 334.0, "learning_rate": 8.772935238704062e-05, "loss": 13.7502, "step": 6022 }, { "epoch": 0.25105247801258807, "grad_norm": 166.0, "learning_rate": 8.772492268272343e-05, "loss": 9.8128, "step": 6023 }, { "epoch": 0.2510941603101163, "grad_norm": 138.0, "learning_rate": 8.772049229086619e-05, "loss": 9.4377, "step": 6024 }, { "epoch": 0.25113584260764454, "grad_norm": 272.0, "learning_rate": 8.771606121154962e-05, "loss": 12.5628, "step": 6025 }, { "epoch": 0.25117752490517276, "grad_norm": 104.0, "learning_rate": 8.771162944485449e-05, "loss": 8.7504, "step": 6026 }, { "epoch": 0.251219207202701, "grad_norm": 410.0, "learning_rate": 8.770719699086156e-05, "loss": 15.313, "step": 6027 }, { "epoch": 0.25126088950022923, "grad_norm": 430.0, "learning_rate": 8.770276384965163e-05, "loss": 16.1257, "step": 6028 }, { "epoch": 0.2513025717977575, "grad_norm": 620.0, "learning_rate": 8.769833002130548e-05, "loss": 19.1253, "step": 6029 }, { "epoch": 0.2513442540952857, "grad_norm": 470.0, "learning_rate": 8.76938955059039e-05, "loss": 17.1259, "step": 6030 }, { "epoch": 0.251385936392814, "grad_norm": 354.0, "learning_rate": 8.768946030352774e-05, "loss": 13.0004, "step": 6031 }, { "epoch": 0.2514276186903422, "grad_norm": 190.0, "learning_rate": 8.768502441425782e-05, "loss": 10.6266, "step": 6032 }, { "epoch": 0.25146930098787046, "grad_norm": 229.0, "learning_rate": 8.768058783817499e-05, "loss": 10.4379, "step": 6033 }, { "epoch": 0.25151098328539867, "grad_norm": 380.0, "learning_rate": 8.767615057536009e-05, "loss": 14.6284, "step": 6034 }, { "epoch": 0.25155266558292694, "grad_norm": 732.0, "learning_rate": 8.767171262589403e-05, "loss": 19.1255, "step": 6035 }, { "epoch": 0.25159434788045515, "grad_norm": 456.0, "learning_rate": 8.766727398985763e-05, "loss": 17.3753, "step": 6036 }, { "epoch": 0.2516360301779834, "grad_norm": 764.0, "learning_rate": 8.766283466733183e-05, "loss": 20.8792, "step": 6037 }, { "epoch": 0.25167771247551163, "grad_norm": 628.0, "learning_rate": 8.765839465839751e-05, "loss": 19.1253, "step": 6038 }, { "epoch": 0.2517193947730399, "grad_norm": 318.0, "learning_rate": 8.765395396313563e-05, "loss": 13.0003, "step": 6039 }, { "epoch": 0.2517610770705681, "grad_norm": 624.0, "learning_rate": 8.764951258162707e-05, "loss": 19.7503, "step": 6040 }, { "epoch": 0.2518027593680964, "grad_norm": 264.0, "learning_rate": 8.764507051395282e-05, "loss": 12.6254, "step": 6041 }, { "epoch": 0.2518444416656246, "grad_norm": 316.0, "learning_rate": 8.764062776019381e-05, "loss": 13.6878, "step": 6042 }, { "epoch": 0.25188612396315285, "grad_norm": 310.0, "learning_rate": 8.763618432043104e-05, "loss": 12.6878, "step": 6043 }, { "epoch": 0.25192780626068106, "grad_norm": 568.0, "learning_rate": 8.763174019474544e-05, "loss": 18.3768, "step": 6044 }, { "epoch": 0.25196948855820933, "grad_norm": 282.0, "learning_rate": 8.762729538321804e-05, "loss": 13.5006, "step": 6045 }, { "epoch": 0.25201117085573754, "grad_norm": 185.0, "learning_rate": 8.762284988592984e-05, "loss": 8.2507, "step": 6046 }, { "epoch": 0.2520528531532658, "grad_norm": 338.0, "learning_rate": 8.761840370296189e-05, "loss": 12.8129, "step": 6047 }, { "epoch": 0.252094535450794, "grad_norm": 172.0, "learning_rate": 8.761395683439515e-05, "loss": 10.8752, "step": 6048 }, { "epoch": 0.2521362177483223, "grad_norm": 157.0, "learning_rate": 8.760950928031073e-05, "loss": 10.063, "step": 6049 }, { "epoch": 0.2521779000458505, "grad_norm": 412.0, "learning_rate": 8.760506104078968e-05, "loss": 14.7506, "step": 6050 }, { "epoch": 0.25221958234337877, "grad_norm": 284.0, "learning_rate": 8.760061211591301e-05, "loss": 12.1253, "step": 6051 }, { "epoch": 0.252261264640907, "grad_norm": 340.0, "learning_rate": 8.759616250576188e-05, "loss": 14.6878, "step": 6052 }, { "epoch": 0.25230294693843525, "grad_norm": 956.0, "learning_rate": 8.759171221041736e-05, "loss": 20.8797, "step": 6053 }, { "epoch": 0.2523446292359635, "grad_norm": 254.0, "learning_rate": 8.758726122996053e-05, "loss": 10.6254, "step": 6054 }, { "epoch": 0.2523863115334917, "grad_norm": 720.0, "learning_rate": 8.758280956447252e-05, "loss": 18.6254, "step": 6055 }, { "epoch": 0.25242799383102, "grad_norm": 78.0, "learning_rate": 8.757835721403448e-05, "loss": 9.1881, "step": 6056 }, { "epoch": 0.2524696761285482, "grad_norm": 238.0, "learning_rate": 8.757390417872755e-05, "loss": 8.3132, "step": 6057 }, { "epoch": 0.25251135842607647, "grad_norm": 588.0, "learning_rate": 8.756945045863288e-05, "loss": 19.1256, "step": 6058 }, { "epoch": 0.2525530407236047, "grad_norm": 596.0, "learning_rate": 8.756499605383162e-05, "loss": 20.7502, "step": 6059 }, { "epoch": 0.25259472302113295, "grad_norm": 304.0, "learning_rate": 8.756054096440498e-05, "loss": 13.688, "step": 6060 }, { "epoch": 0.25263640531866116, "grad_norm": 800.0, "learning_rate": 8.755608519043416e-05, "loss": 22.0002, "step": 6061 }, { "epoch": 0.25267808761618943, "grad_norm": 130.0, "learning_rate": 8.755162873200033e-05, "loss": 9.8754, "step": 6062 }, { "epoch": 0.25271976991371764, "grad_norm": 306.0, "learning_rate": 8.754717158918476e-05, "loss": 13.2503, "step": 6063 }, { "epoch": 0.2527614522112459, "grad_norm": 416.0, "learning_rate": 8.754271376206864e-05, "loss": 15.0009, "step": 6064 }, { "epoch": 0.2528031345087741, "grad_norm": 230.0, "learning_rate": 8.753825525073323e-05, "loss": 10.4378, "step": 6065 }, { "epoch": 0.2528448168063024, "grad_norm": 374.0, "learning_rate": 8.75337960552598e-05, "loss": 14.0626, "step": 6066 }, { "epoch": 0.2528864991038306, "grad_norm": 201.0, "learning_rate": 8.752933617572958e-05, "loss": 11.3755, "step": 6067 }, { "epoch": 0.25292818140135886, "grad_norm": 328.0, "learning_rate": 8.752487561222389e-05, "loss": 13.1253, "step": 6068 }, { "epoch": 0.2529698636988871, "grad_norm": 264.0, "learning_rate": 8.752041436482402e-05, "loss": 12.6877, "step": 6069 }, { "epoch": 0.25301154599641534, "grad_norm": 458.0, "learning_rate": 8.751595243361126e-05, "loss": 15.6251, "step": 6070 }, { "epoch": 0.25305322829394356, "grad_norm": 684.0, "learning_rate": 8.751148981866692e-05, "loss": 19.7512, "step": 6071 }, { "epoch": 0.2530949105914718, "grad_norm": 360.0, "learning_rate": 8.750702652007237e-05, "loss": 13.3131, "step": 6072 }, { "epoch": 0.25313659288900003, "grad_norm": 252.0, "learning_rate": 8.750256253790892e-05, "loss": 11.8752, "step": 6073 }, { "epoch": 0.2531782751865283, "grad_norm": 332.0, "learning_rate": 8.749809787225794e-05, "loss": 12.9383, "step": 6074 }, { "epoch": 0.2532199574840565, "grad_norm": 312.0, "learning_rate": 8.749363252320079e-05, "loss": 13.3765, "step": 6075 }, { "epoch": 0.2532616397815848, "grad_norm": 230.0, "learning_rate": 8.748916649081888e-05, "loss": 10.3752, "step": 6076 }, { "epoch": 0.253303322079113, "grad_norm": 61.5, "learning_rate": 8.748469977519358e-05, "loss": 6.7816, "step": 6077 }, { "epoch": 0.25334500437664126, "grad_norm": 153.0, "learning_rate": 8.748023237640628e-05, "loss": 10.0012, "step": 6078 }, { "epoch": 0.25338668667416947, "grad_norm": 52.25, "learning_rate": 8.747576429453844e-05, "loss": 7.7204, "step": 6079 }, { "epoch": 0.25342836897169774, "grad_norm": 616.0, "learning_rate": 8.747129552967144e-05, "loss": 19.1254, "step": 6080 }, { "epoch": 0.25347005126922595, "grad_norm": 247.0, "learning_rate": 8.746682608188678e-05, "loss": 12.1261, "step": 6081 }, { "epoch": 0.2535117335667542, "grad_norm": 276.0, "learning_rate": 8.746235595126588e-05, "loss": 12.1251, "step": 6082 }, { "epoch": 0.2535534158642824, "grad_norm": 197.0, "learning_rate": 8.745788513789022e-05, "loss": 11.1884, "step": 6083 }, { "epoch": 0.2535950981618107, "grad_norm": 462.0, "learning_rate": 8.745341364184127e-05, "loss": 16.6264, "step": 6084 }, { "epoch": 0.2536367804593389, "grad_norm": 290.0, "learning_rate": 8.744894146320052e-05, "loss": 13.8753, "step": 6085 }, { "epoch": 0.2536784627568672, "grad_norm": 348.0, "learning_rate": 8.744446860204951e-05, "loss": 15.5642, "step": 6086 }, { "epoch": 0.2537201450543954, "grad_norm": 290.0, "learning_rate": 8.743999505846973e-05, "loss": 14.1257, "step": 6087 }, { "epoch": 0.25376182735192365, "grad_norm": 168.0, "learning_rate": 8.743552083254272e-05, "loss": 10.3129, "step": 6088 }, { "epoch": 0.25380350964945186, "grad_norm": 1012.0, "learning_rate": 8.743104592435001e-05, "loss": 23.0055, "step": 6089 }, { "epoch": 0.25384519194698013, "grad_norm": 476.0, "learning_rate": 8.742657033397316e-05, "loss": 16.1252, "step": 6090 }, { "epoch": 0.25388687424450834, "grad_norm": 243.0, "learning_rate": 8.742209406149376e-05, "loss": 11.1878, "step": 6091 }, { "epoch": 0.2539285565420366, "grad_norm": 1288.0, "learning_rate": 8.741761710699336e-05, "loss": 25.6296, "step": 6092 }, { "epoch": 0.2539702388395648, "grad_norm": 376.0, "learning_rate": 8.741313947055358e-05, "loss": 14.938, "step": 6093 }, { "epoch": 0.2540119211370931, "grad_norm": 504.0, "learning_rate": 8.7408661152256e-05, "loss": 16.1253, "step": 6094 }, { "epoch": 0.2540536034346213, "grad_norm": 676.0, "learning_rate": 8.740418215218227e-05, "loss": 21.8751, "step": 6095 }, { "epoch": 0.25409528573214957, "grad_norm": 102.0, "learning_rate": 8.739970247041399e-05, "loss": 9.0004, "step": 6096 }, { "epoch": 0.2541369680296778, "grad_norm": 628.0, "learning_rate": 8.73952221070328e-05, "loss": 20.5006, "step": 6097 }, { "epoch": 0.25417865032720605, "grad_norm": 294.0, "learning_rate": 8.739074106212036e-05, "loss": 12.6258, "step": 6098 }, { "epoch": 0.25422033262473426, "grad_norm": 616.0, "learning_rate": 8.738625933575837e-05, "loss": 20.2503, "step": 6099 }, { "epoch": 0.2542620149222625, "grad_norm": 384.0, "learning_rate": 8.738177692802847e-05, "loss": 15.7503, "step": 6100 }, { "epoch": 0.25430369721979074, "grad_norm": 1048.0, "learning_rate": 8.737729383901237e-05, "loss": 27.3753, "step": 6101 }, { "epoch": 0.254345379517319, "grad_norm": 198.0, "learning_rate": 8.737281006879177e-05, "loss": 11.2512, "step": 6102 }, { "epoch": 0.2543870618148472, "grad_norm": 520.0, "learning_rate": 8.736832561744839e-05, "loss": 16.8753, "step": 6103 }, { "epoch": 0.2544287441123755, "grad_norm": 328.0, "learning_rate": 8.736384048506396e-05, "loss": 13.0628, "step": 6104 }, { "epoch": 0.2544704264099037, "grad_norm": 1232.0, "learning_rate": 8.735935467172022e-05, "loss": 28.8752, "step": 6105 }, { "epoch": 0.25451210870743196, "grad_norm": 346.0, "learning_rate": 8.735486817749892e-05, "loss": 15.0627, "step": 6106 }, { "epoch": 0.2545537910049602, "grad_norm": 276.0, "learning_rate": 8.735038100248184e-05, "loss": 12.8754, "step": 6107 }, { "epoch": 0.25459547330248844, "grad_norm": 472.0, "learning_rate": 8.734589314675074e-05, "loss": 17.5006, "step": 6108 }, { "epoch": 0.25463715560001665, "grad_norm": 436.0, "learning_rate": 8.734140461038743e-05, "loss": 15.6877, "step": 6109 }, { "epoch": 0.2546788378975449, "grad_norm": 462.0, "learning_rate": 8.73369153934737e-05, "loss": 17.0015, "step": 6110 }, { "epoch": 0.25472052019507313, "grad_norm": 236.0, "learning_rate": 8.733242549609139e-05, "loss": 10.9378, "step": 6111 }, { "epoch": 0.2547622024926014, "grad_norm": 117.0, "learning_rate": 8.73279349183223e-05, "loss": 11.3755, "step": 6112 }, { "epoch": 0.2548038847901296, "grad_norm": 239.0, "learning_rate": 8.732344366024827e-05, "loss": 11.9381, "step": 6113 }, { "epoch": 0.2548455670876579, "grad_norm": 270.0, "learning_rate": 8.731895172195119e-05, "loss": 11.3753, "step": 6114 }, { "epoch": 0.2548872493851861, "grad_norm": 372.0, "learning_rate": 8.731445910351288e-05, "loss": 14.6252, "step": 6115 }, { "epoch": 0.25492893168271435, "grad_norm": 684.0, "learning_rate": 8.730996580501525e-05, "loss": 20.3758, "step": 6116 }, { "epoch": 0.25497061398024257, "grad_norm": 264.0, "learning_rate": 8.730547182654018e-05, "loss": 12.0626, "step": 6117 }, { "epoch": 0.25501229627777083, "grad_norm": 732.0, "learning_rate": 8.730097716816958e-05, "loss": 19.6257, "step": 6118 }, { "epoch": 0.25505397857529905, "grad_norm": 240.0, "learning_rate": 8.729648182998535e-05, "loss": 11.3754, "step": 6119 }, { "epoch": 0.2550956608728273, "grad_norm": 916.0, "learning_rate": 8.729198581206943e-05, "loss": 21.7542, "step": 6120 }, { "epoch": 0.2551373431703555, "grad_norm": 500.0, "learning_rate": 8.728748911450375e-05, "loss": 16.1252, "step": 6121 }, { "epoch": 0.2551790254678838, "grad_norm": 450.0, "learning_rate": 8.72829917373703e-05, "loss": 15.1253, "step": 6122 }, { "epoch": 0.255220707765412, "grad_norm": 334.0, "learning_rate": 8.727849368075098e-05, "loss": 12.6258, "step": 6123 }, { "epoch": 0.25526239006294027, "grad_norm": 103.0, "learning_rate": 8.727399494472782e-05, "loss": 5.9702, "step": 6124 }, { "epoch": 0.2553040723604685, "grad_norm": 400.0, "learning_rate": 8.72694955293828e-05, "loss": 15.6252, "step": 6125 }, { "epoch": 0.25534575465799675, "grad_norm": 498.0, "learning_rate": 8.726499543479791e-05, "loss": 15.5006, "step": 6126 }, { "epoch": 0.255387436955525, "grad_norm": 1024.0, "learning_rate": 8.726049466105517e-05, "loss": 25.7506, "step": 6127 }, { "epoch": 0.2554291192530532, "grad_norm": 408.0, "learning_rate": 8.725599320823659e-05, "loss": 15.9382, "step": 6128 }, { "epoch": 0.2554708015505815, "grad_norm": 390.0, "learning_rate": 8.725149107642426e-05, "loss": 14.7503, "step": 6129 }, { "epoch": 0.2555124838481097, "grad_norm": 238.0, "learning_rate": 8.724698826570018e-05, "loss": 12.8128, "step": 6130 }, { "epoch": 0.255554166145638, "grad_norm": 225.0, "learning_rate": 8.724248477614643e-05, "loss": 11.8133, "step": 6131 }, { "epoch": 0.2555958484431662, "grad_norm": 676.0, "learning_rate": 8.723798060784509e-05, "loss": 20.2501, "step": 6132 }, { "epoch": 0.25563753074069445, "grad_norm": 470.0, "learning_rate": 8.723347576087824e-05, "loss": 14.4379, "step": 6133 }, { "epoch": 0.25567921303822266, "grad_norm": 380.0, "learning_rate": 8.7228970235328e-05, "loss": 14.4386, "step": 6134 }, { "epoch": 0.25572089533575093, "grad_norm": 478.0, "learning_rate": 8.722446403127647e-05, "loss": 15.5003, "step": 6135 }, { "epoch": 0.25576257763327914, "grad_norm": 572.0, "learning_rate": 8.721995714880578e-05, "loss": 16.6252, "step": 6136 }, { "epoch": 0.2558042599308074, "grad_norm": 604.0, "learning_rate": 8.721544958799808e-05, "loss": 19.001, "step": 6137 }, { "epoch": 0.2558459422283356, "grad_norm": 40.5, "learning_rate": 8.721094134893549e-05, "loss": 7.0631, "step": 6138 }, { "epoch": 0.2558876245258639, "grad_norm": 222.0, "learning_rate": 8.72064324317002e-05, "loss": 12.3751, "step": 6139 }, { "epoch": 0.2559293068233921, "grad_norm": 57.5, "learning_rate": 8.720192283637436e-05, "loss": 5.7818, "step": 6140 }, { "epoch": 0.25597098912092037, "grad_norm": 260.0, "learning_rate": 8.719741256304018e-05, "loss": 11.8128, "step": 6141 }, { "epoch": 0.2560126714184486, "grad_norm": 510.0, "learning_rate": 8.719290161177987e-05, "loss": 16.5018, "step": 6142 }, { "epoch": 0.25605435371597685, "grad_norm": 568.0, "learning_rate": 8.71883899826756e-05, "loss": 18.6253, "step": 6143 }, { "epoch": 0.25609603601350506, "grad_norm": 354.0, "learning_rate": 8.718387767580964e-05, "loss": 14.3127, "step": 6144 }, { "epoch": 0.2561377183110333, "grad_norm": 194.0, "learning_rate": 8.71793646912642e-05, "loss": 12.063, "step": 6145 }, { "epoch": 0.25617940060856154, "grad_norm": 173.0, "learning_rate": 8.717485102912155e-05, "loss": 8.2505, "step": 6146 }, { "epoch": 0.2562210829060898, "grad_norm": 155.0, "learning_rate": 8.717033668946393e-05, "loss": 9.6879, "step": 6147 }, { "epoch": 0.256262765203618, "grad_norm": 492.0, "learning_rate": 8.716582167237361e-05, "loss": 16.5006, "step": 6148 }, { "epoch": 0.2563044475011463, "grad_norm": 560.0, "learning_rate": 8.716130597793293e-05, "loss": 17.2505, "step": 6149 }, { "epoch": 0.2563461297986745, "grad_norm": 952.0, "learning_rate": 8.715678960622412e-05, "loss": 21.3753, "step": 6150 }, { "epoch": 0.25638781209620276, "grad_norm": 536.0, "learning_rate": 8.715227255732952e-05, "loss": 16.5006, "step": 6151 }, { "epoch": 0.256429494393731, "grad_norm": 442.0, "learning_rate": 8.714775483133146e-05, "loss": 14.8135, "step": 6152 }, { "epoch": 0.25647117669125924, "grad_norm": 52.5, "learning_rate": 8.714323642831227e-05, "loss": 8.2506, "step": 6153 }, { "epoch": 0.25651285898878745, "grad_norm": 212.0, "learning_rate": 8.71387173483543e-05, "loss": 10.8753, "step": 6154 }, { "epoch": 0.2565545412863157, "grad_norm": 229.0, "learning_rate": 8.713419759153993e-05, "loss": 11.7503, "step": 6155 }, { "epoch": 0.25659622358384393, "grad_norm": 221.0, "learning_rate": 8.712967715795148e-05, "loss": 9.5628, "step": 6156 }, { "epoch": 0.2566379058813722, "grad_norm": 270.0, "learning_rate": 8.712515604767138e-05, "loss": 12.6252, "step": 6157 }, { "epoch": 0.2566795881789004, "grad_norm": 382.0, "learning_rate": 8.712063426078203e-05, "loss": 15.5636, "step": 6158 }, { "epoch": 0.2567212704764287, "grad_norm": 238.0, "learning_rate": 8.711611179736581e-05, "loss": 11.5628, "step": 6159 }, { "epoch": 0.2567629527739569, "grad_norm": 2368.0, "learning_rate": 8.711158865750515e-05, "loss": 40.7576, "step": 6160 }, { "epoch": 0.25680463507148515, "grad_norm": 840.0, "learning_rate": 8.710706484128251e-05, "loss": 24.7504, "step": 6161 }, { "epoch": 0.25684631736901337, "grad_norm": 728.0, "learning_rate": 8.710254034878031e-05, "loss": 19.8788, "step": 6162 }, { "epoch": 0.25688799966654163, "grad_norm": 504.0, "learning_rate": 8.7098015180081e-05, "loss": 17.0006, "step": 6163 }, { "epoch": 0.25692968196406984, "grad_norm": 230.0, "learning_rate": 8.70934893352671e-05, "loss": 8.6253, "step": 6164 }, { "epoch": 0.2569713642615981, "grad_norm": 136.0, "learning_rate": 8.708896281442105e-05, "loss": 9.3751, "step": 6165 }, { "epoch": 0.2570130465591263, "grad_norm": 1560.0, "learning_rate": 8.708443561762535e-05, "loss": 30.3793, "step": 6166 }, { "epoch": 0.2570547288566546, "grad_norm": 424.0, "learning_rate": 8.707990774496256e-05, "loss": 15.938, "step": 6167 }, { "epoch": 0.2570964111541828, "grad_norm": 668.0, "learning_rate": 8.707537919651512e-05, "loss": 20.3788, "step": 6168 }, { "epoch": 0.25713809345171107, "grad_norm": 140.0, "learning_rate": 8.707084997236561e-05, "loss": 10.0628, "step": 6169 }, { "epoch": 0.2571797757492393, "grad_norm": 282.0, "learning_rate": 8.706632007259658e-05, "loss": 12.6885, "step": 6170 }, { "epoch": 0.25722145804676755, "grad_norm": 140.0, "learning_rate": 8.706178949729057e-05, "loss": 10.188, "step": 6171 }, { "epoch": 0.25726314034429576, "grad_norm": 300.0, "learning_rate": 8.705725824653015e-05, "loss": 13.3128, "step": 6172 }, { "epoch": 0.257304822641824, "grad_norm": 107.5, "learning_rate": 8.705272632039792e-05, "loss": 9.4381, "step": 6173 }, { "epoch": 0.25734650493935224, "grad_norm": 1368.0, "learning_rate": 8.704819371897646e-05, "loss": 27.5027, "step": 6174 }, { "epoch": 0.2573881872368805, "grad_norm": 1688.0, "learning_rate": 8.704366044234838e-05, "loss": 39.2504, "step": 6175 }, { "epoch": 0.2574298695344087, "grad_norm": 151.0, "learning_rate": 8.70391264905963e-05, "loss": 9.3753, "step": 6176 }, { "epoch": 0.257471551831937, "grad_norm": 956.0, "learning_rate": 8.703459186380283e-05, "loss": 24.5004, "step": 6177 }, { "epoch": 0.2575132341294652, "grad_norm": 536.0, "learning_rate": 8.703005656205067e-05, "loss": 17.2503, "step": 6178 }, { "epoch": 0.25755491642699346, "grad_norm": 492.0, "learning_rate": 8.702552058542241e-05, "loss": 14.7502, "step": 6179 }, { "epoch": 0.2575965987245217, "grad_norm": 560.0, "learning_rate": 8.702098393400078e-05, "loss": 18.5003, "step": 6180 }, { "epoch": 0.25763828102204994, "grad_norm": 186.0, "learning_rate": 8.701644660786841e-05, "loss": 10.7507, "step": 6181 }, { "epoch": 0.25767996331957815, "grad_norm": 416.0, "learning_rate": 8.701190860710803e-05, "loss": 15.0006, "step": 6182 }, { "epoch": 0.2577216456171064, "grad_norm": 308.0, "learning_rate": 8.700736993180233e-05, "loss": 12.438, "step": 6183 }, { "epoch": 0.25776332791463463, "grad_norm": 186.0, "learning_rate": 8.700283058203402e-05, "loss": 9.8127, "step": 6184 }, { "epoch": 0.2578050102121629, "grad_norm": 796.0, "learning_rate": 8.699829055788584e-05, "loss": 20.7505, "step": 6185 }, { "epoch": 0.2578466925096911, "grad_norm": 616.0, "learning_rate": 8.699374985944053e-05, "loss": 18.5013, "step": 6186 }, { "epoch": 0.2578883748072194, "grad_norm": 222.0, "learning_rate": 8.698920848678085e-05, "loss": 10.0631, "step": 6187 }, { "epoch": 0.2579300571047476, "grad_norm": 366.0, "learning_rate": 8.698466643998954e-05, "loss": 13.1904, "step": 6188 }, { "epoch": 0.25797173940227586, "grad_norm": 241.0, "learning_rate": 8.698012371914942e-05, "loss": 10.7509, "step": 6189 }, { "epoch": 0.25801342169980407, "grad_norm": 310.0, "learning_rate": 8.697558032434327e-05, "loss": 13.1878, "step": 6190 }, { "epoch": 0.25805510399733234, "grad_norm": 189.0, "learning_rate": 8.697103625565387e-05, "loss": 10.4378, "step": 6191 }, { "epoch": 0.25809678629486055, "grad_norm": 206.0, "learning_rate": 8.696649151316405e-05, "loss": 11.8136, "step": 6192 }, { "epoch": 0.2581384685923888, "grad_norm": 776.0, "learning_rate": 8.696194609695665e-05, "loss": 21.8753, "step": 6193 }, { "epoch": 0.258180150889917, "grad_norm": 446.0, "learning_rate": 8.69574000071145e-05, "loss": 15.6261, "step": 6194 }, { "epoch": 0.2582218331874453, "grad_norm": 438.0, "learning_rate": 8.695285324372047e-05, "loss": 15.4381, "step": 6195 }, { "epoch": 0.2582635154849735, "grad_norm": 145.0, "learning_rate": 8.694830580685737e-05, "loss": 9.3755, "step": 6196 }, { "epoch": 0.25830519778250177, "grad_norm": 632.0, "learning_rate": 8.694375769660816e-05, "loss": 17.7504, "step": 6197 }, { "epoch": 0.25834688008003, "grad_norm": 402.0, "learning_rate": 8.693920891305565e-05, "loss": 15.3126, "step": 6198 }, { "epoch": 0.25838856237755825, "grad_norm": 490.0, "learning_rate": 8.693465945628281e-05, "loss": 17.0004, "step": 6199 }, { "epoch": 0.2584302446750865, "grad_norm": 832.0, "learning_rate": 8.69301093263725e-05, "loss": 19.8801, "step": 6200 }, { "epoch": 0.25847192697261473, "grad_norm": 732.0, "learning_rate": 8.692555852340767e-05, "loss": 22.2501, "step": 6201 }, { "epoch": 0.258513609270143, "grad_norm": 211.0, "learning_rate": 8.692100704747127e-05, "loss": 11.0005, "step": 6202 }, { "epoch": 0.2585552915676712, "grad_norm": 102.5, "learning_rate": 8.691645489864624e-05, "loss": 9.063, "step": 6203 }, { "epoch": 0.2585969738651995, "grad_norm": 564.0, "learning_rate": 8.691190207701551e-05, "loss": 16.0003, "step": 6204 }, { "epoch": 0.2586386561627277, "grad_norm": 272.0, "learning_rate": 8.690734858266212e-05, "loss": 13.6257, "step": 6205 }, { "epoch": 0.25868033846025595, "grad_norm": 76.5, "learning_rate": 8.690279441566902e-05, "loss": 8.6252, "step": 6206 }, { "epoch": 0.25872202075778417, "grad_norm": 764.0, "learning_rate": 8.689823957611922e-05, "loss": 20.6257, "step": 6207 }, { "epoch": 0.25876370305531243, "grad_norm": 238.0, "learning_rate": 8.689368406409573e-05, "loss": 10.6257, "step": 6208 }, { "epoch": 0.25880538535284064, "grad_norm": 288.0, "learning_rate": 8.688912787968155e-05, "loss": 14.2506, "step": 6209 }, { "epoch": 0.2588470676503689, "grad_norm": 149.0, "learning_rate": 8.688457102295976e-05, "loss": 10.6252, "step": 6210 }, { "epoch": 0.2588887499478971, "grad_norm": 410.0, "learning_rate": 8.688001349401338e-05, "loss": 16.0004, "step": 6211 }, { "epoch": 0.2589304322454254, "grad_norm": 310.0, "learning_rate": 8.687545529292548e-05, "loss": 14.6255, "step": 6212 }, { "epoch": 0.2589721145429536, "grad_norm": 406.0, "learning_rate": 8.687089641977915e-05, "loss": 16.8752, "step": 6213 }, { "epoch": 0.25901379684048187, "grad_norm": 516.0, "learning_rate": 8.686633687465745e-05, "loss": 16.3754, "step": 6214 }, { "epoch": 0.2590554791380101, "grad_norm": 580.0, "learning_rate": 8.686177665764348e-05, "loss": 18.1252, "step": 6215 }, { "epoch": 0.25909716143553835, "grad_norm": 191.0, "learning_rate": 8.685721576882037e-05, "loss": 10.0014, "step": 6216 }, { "epoch": 0.25913884373306656, "grad_norm": 592.0, "learning_rate": 8.685265420827122e-05, "loss": 20.7502, "step": 6217 }, { "epoch": 0.2591805260305948, "grad_norm": 502.0, "learning_rate": 8.684809197607917e-05, "loss": 16.7538, "step": 6218 }, { "epoch": 0.25922220832812304, "grad_norm": 564.0, "learning_rate": 8.68435290723274e-05, "loss": 18.6254, "step": 6219 }, { "epoch": 0.2592638906256513, "grad_norm": 157.0, "learning_rate": 8.683896549709903e-05, "loss": 10.2503, "step": 6220 }, { "epoch": 0.2593055729231795, "grad_norm": 404.0, "learning_rate": 8.683440125047721e-05, "loss": 15.5628, "step": 6221 }, { "epoch": 0.2593472552207078, "grad_norm": 71.5, "learning_rate": 8.68298363325452e-05, "loss": 8.8132, "step": 6222 }, { "epoch": 0.259388937518236, "grad_norm": 364.0, "learning_rate": 8.682527074338613e-05, "loss": 15.1884, "step": 6223 }, { "epoch": 0.25943061981576426, "grad_norm": 120.0, "learning_rate": 8.682070448308324e-05, "loss": 10.0633, "step": 6224 }, { "epoch": 0.2594723021132925, "grad_norm": 150.0, "learning_rate": 8.681613755171975e-05, "loss": 8.6876, "step": 6225 }, { "epoch": 0.25951398441082074, "grad_norm": 280.0, "learning_rate": 8.681156994937886e-05, "loss": 12.8759, "step": 6226 }, { "epoch": 0.25955566670834895, "grad_norm": 102.5, "learning_rate": 8.680700167614387e-05, "loss": 6.501, "step": 6227 }, { "epoch": 0.2595973490058772, "grad_norm": 119.5, "learning_rate": 8.6802432732098e-05, "loss": 10.5012, "step": 6228 }, { "epoch": 0.25963903130340543, "grad_norm": 304.0, "learning_rate": 8.679786311732452e-05, "loss": 13.3753, "step": 6229 }, { "epoch": 0.2596807136009337, "grad_norm": 86.0, "learning_rate": 8.679329283190672e-05, "loss": 9.0002, "step": 6230 }, { "epoch": 0.2597223958984619, "grad_norm": 156.0, "learning_rate": 8.678872187592789e-05, "loss": 7.8128, "step": 6231 }, { "epoch": 0.2597640781959902, "grad_norm": 620.0, "learning_rate": 8.678415024947133e-05, "loss": 18.6258, "step": 6232 }, { "epoch": 0.2598057604935184, "grad_norm": 406.0, "learning_rate": 8.677957795262038e-05, "loss": 15.0003, "step": 6233 }, { "epoch": 0.25984744279104666, "grad_norm": 232.0, "learning_rate": 8.677500498545834e-05, "loss": 11.1881, "step": 6234 }, { "epoch": 0.25988912508857487, "grad_norm": 260.0, "learning_rate": 8.677043134806859e-05, "loss": 10.0635, "step": 6235 }, { "epoch": 0.25993080738610314, "grad_norm": 308.0, "learning_rate": 8.676585704053445e-05, "loss": 11.063, "step": 6236 }, { "epoch": 0.25997248968363135, "grad_norm": 200.0, "learning_rate": 8.676128206293931e-05, "loss": 10.6881, "step": 6237 }, { "epoch": 0.2600141719811596, "grad_norm": 181.0, "learning_rate": 8.675670641536653e-05, "loss": 10.3755, "step": 6238 }, { "epoch": 0.2600558542786878, "grad_norm": 212.0, "learning_rate": 8.675213009789953e-05, "loss": 11.5004, "step": 6239 }, { "epoch": 0.2600975365762161, "grad_norm": 272.0, "learning_rate": 8.674755311062168e-05, "loss": 12.5627, "step": 6240 }, { "epoch": 0.2601392188737443, "grad_norm": 580.0, "learning_rate": 8.674297545361643e-05, "loss": 18.0002, "step": 6241 }, { "epoch": 0.26018090117127257, "grad_norm": 330.0, "learning_rate": 8.673839712696716e-05, "loss": 12.2507, "step": 6242 }, { "epoch": 0.2602225834688008, "grad_norm": 220.0, "learning_rate": 8.673381813075737e-05, "loss": 13.2532, "step": 6243 }, { "epoch": 0.26026426576632905, "grad_norm": 144.0, "learning_rate": 8.672923846507049e-05, "loss": 5.0002, "step": 6244 }, { "epoch": 0.26030594806385726, "grad_norm": 624.0, "learning_rate": 8.672465812998995e-05, "loss": 19.5002, "step": 6245 }, { "epoch": 0.26034763036138553, "grad_norm": 1184.0, "learning_rate": 8.672007712559927e-05, "loss": 26.7558, "step": 6246 }, { "epoch": 0.26038931265891374, "grad_norm": 330.0, "learning_rate": 8.671549545198192e-05, "loss": 13.8127, "step": 6247 }, { "epoch": 0.260430994956442, "grad_norm": 82.5, "learning_rate": 8.671091310922141e-05, "loss": 7.2504, "step": 6248 }, { "epoch": 0.2604726772539702, "grad_norm": 224.0, "learning_rate": 8.670633009740124e-05, "loss": 11.6876, "step": 6249 }, { "epoch": 0.2605143595514985, "grad_norm": 548.0, "learning_rate": 8.670174641660495e-05, "loss": 14.7545, "step": 6250 }, { "epoch": 0.2605560418490267, "grad_norm": 888.0, "learning_rate": 8.66971620669161e-05, "loss": 24.7546, "step": 6251 }, { "epoch": 0.26059772414655497, "grad_norm": 386.0, "learning_rate": 8.669257704841818e-05, "loss": 15.6288, "step": 6252 }, { "epoch": 0.2606394064440832, "grad_norm": 760.0, "learning_rate": 8.66879913611948e-05, "loss": 22.7502, "step": 6253 }, { "epoch": 0.26068108874161144, "grad_norm": 260.0, "learning_rate": 8.668340500532952e-05, "loss": 13.063, "step": 6254 }, { "epoch": 0.26072277103913966, "grad_norm": 174.0, "learning_rate": 8.667881798090591e-05, "loss": 10.7504, "step": 6255 }, { "epoch": 0.2607644533366679, "grad_norm": 482.0, "learning_rate": 8.667423028800761e-05, "loss": 16.6256, "step": 6256 }, { "epoch": 0.26080613563419613, "grad_norm": 308.0, "learning_rate": 8.666964192671821e-05, "loss": 14.3758, "step": 6257 }, { "epoch": 0.2608478179317244, "grad_norm": 512.0, "learning_rate": 8.66650528971213e-05, "loss": 17.1263, "step": 6258 }, { "epoch": 0.2608895002292526, "grad_norm": 688.0, "learning_rate": 8.666046319930057e-05, "loss": 20.8755, "step": 6259 }, { "epoch": 0.2609311825267809, "grad_norm": 1008.0, "learning_rate": 8.665587283333965e-05, "loss": 24.6251, "step": 6260 }, { "epoch": 0.2609728648243091, "grad_norm": 137.0, "learning_rate": 8.665128179932218e-05, "loss": 10.1878, "step": 6261 }, { "epoch": 0.26101454712183736, "grad_norm": 296.0, "learning_rate": 8.664669009733184e-05, "loss": 13.0627, "step": 6262 }, { "epoch": 0.26105622941936557, "grad_norm": 564.0, "learning_rate": 8.664209772745233e-05, "loss": 18.8751, "step": 6263 }, { "epoch": 0.26109791171689384, "grad_norm": 336.0, "learning_rate": 8.663750468976733e-05, "loss": 12.8133, "step": 6264 }, { "epoch": 0.26113959401442205, "grad_norm": 410.0, "learning_rate": 8.663291098436057e-05, "loss": 14.4378, "step": 6265 }, { "epoch": 0.2611812763119503, "grad_norm": 324.0, "learning_rate": 8.662831661131574e-05, "loss": 13.6253, "step": 6266 }, { "epoch": 0.26122295860947853, "grad_norm": 229.0, "learning_rate": 8.662372157071659e-05, "loss": 11.3752, "step": 6267 }, { "epoch": 0.2612646409070068, "grad_norm": 222.0, "learning_rate": 8.661912586264686e-05, "loss": 9.9379, "step": 6268 }, { "epoch": 0.261306323204535, "grad_norm": 394.0, "learning_rate": 8.661452948719032e-05, "loss": 13.377, "step": 6269 }, { "epoch": 0.2613480055020633, "grad_norm": 310.0, "learning_rate": 8.660993244443072e-05, "loss": 14.2504, "step": 6270 }, { "epoch": 0.2613896877995915, "grad_norm": 302.0, "learning_rate": 8.660533473445187e-05, "loss": 14.063, "step": 6271 }, { "epoch": 0.26143137009711975, "grad_norm": 262.0, "learning_rate": 8.660073635733752e-05, "loss": 14.0022, "step": 6272 }, { "epoch": 0.261473052394648, "grad_norm": 516.0, "learning_rate": 8.659613731317152e-05, "loss": 17.3752, "step": 6273 }, { "epoch": 0.26151473469217623, "grad_norm": 235.0, "learning_rate": 8.659153760203766e-05, "loss": 11.3133, "step": 6274 }, { "epoch": 0.2615564169897045, "grad_norm": 272.0, "learning_rate": 8.658693722401979e-05, "loss": 13.5628, "step": 6275 }, { "epoch": 0.2615980992872327, "grad_norm": 384.0, "learning_rate": 8.658233617920172e-05, "loss": 15.0627, "step": 6276 }, { "epoch": 0.261639781584761, "grad_norm": 101.5, "learning_rate": 8.657773446766734e-05, "loss": 8.8755, "step": 6277 }, { "epoch": 0.2616814638822892, "grad_norm": 306.0, "learning_rate": 8.65731320895005e-05, "loss": 13.5004, "step": 6278 }, { "epoch": 0.26172314617981746, "grad_norm": 404.0, "learning_rate": 8.656852904478507e-05, "loss": 16.6252, "step": 6279 }, { "epoch": 0.26176482847734567, "grad_norm": 386.0, "learning_rate": 8.656392533360495e-05, "loss": 14.9377, "step": 6280 }, { "epoch": 0.26180651077487394, "grad_norm": 216.0, "learning_rate": 8.655932095604406e-05, "loss": 10.0627, "step": 6281 }, { "epoch": 0.26184819307240215, "grad_norm": 424.0, "learning_rate": 8.655471591218632e-05, "loss": 14.3128, "step": 6282 }, { "epoch": 0.2618898753699304, "grad_norm": 50.75, "learning_rate": 8.655011020211561e-05, "loss": 6.7202, "step": 6283 }, { "epoch": 0.2619315576674586, "grad_norm": 372.0, "learning_rate": 8.65455038259159e-05, "loss": 15.9377, "step": 6284 }, { "epoch": 0.2619732399649869, "grad_norm": 107.0, "learning_rate": 8.654089678367113e-05, "loss": 6.8128, "step": 6285 }, { "epoch": 0.2620149222625151, "grad_norm": 328.0, "learning_rate": 8.653628907546528e-05, "loss": 13.5627, "step": 6286 }, { "epoch": 0.26205660456004337, "grad_norm": 360.0, "learning_rate": 8.653168070138232e-05, "loss": 14.5628, "step": 6287 }, { "epoch": 0.2620982868575716, "grad_norm": 239.0, "learning_rate": 8.652707166150624e-05, "loss": 10.5007, "step": 6288 }, { "epoch": 0.26213996915509985, "grad_norm": 214.0, "learning_rate": 8.652246195592104e-05, "loss": 11.6877, "step": 6289 }, { "epoch": 0.26218165145262806, "grad_norm": 1280.0, "learning_rate": 8.651785158471072e-05, "loss": 24.7557, "step": 6290 }, { "epoch": 0.26222333375015633, "grad_norm": 300.0, "learning_rate": 8.651324054795931e-05, "loss": 12.4377, "step": 6291 }, { "epoch": 0.26226501604768454, "grad_norm": 532.0, "learning_rate": 8.650862884575085e-05, "loss": 18.1254, "step": 6292 }, { "epoch": 0.2623066983452128, "grad_norm": 330.0, "learning_rate": 8.65040164781694e-05, "loss": 13.8127, "step": 6293 }, { "epoch": 0.262348380642741, "grad_norm": 464.0, "learning_rate": 8.6499403445299e-05, "loss": 15.3128, "step": 6294 }, { "epoch": 0.2623900629402693, "grad_norm": 255.0, "learning_rate": 8.649478974722374e-05, "loss": 10.938, "step": 6295 }, { "epoch": 0.2624317452377975, "grad_norm": 924.0, "learning_rate": 8.649017538402769e-05, "loss": 24.8758, "step": 6296 }, { "epoch": 0.26247342753532577, "grad_norm": 308.0, "learning_rate": 8.648556035579495e-05, "loss": 12.6877, "step": 6297 }, { "epoch": 0.262515109832854, "grad_norm": 282.0, "learning_rate": 8.648094466260964e-05, "loss": 11.8753, "step": 6298 }, { "epoch": 0.26255679213038224, "grad_norm": 320.0, "learning_rate": 8.647632830455588e-05, "loss": 13.3752, "step": 6299 }, { "epoch": 0.26259847442791046, "grad_norm": 340.0, "learning_rate": 8.647171128171778e-05, "loss": 13.3129, "step": 6300 }, { "epoch": 0.2626401567254387, "grad_norm": 404.0, "learning_rate": 8.646709359417951e-05, "loss": 14.4386, "step": 6301 }, { "epoch": 0.26268183902296693, "grad_norm": 141.0, "learning_rate": 8.646247524202524e-05, "loss": 10.1883, "step": 6302 }, { "epoch": 0.2627235213204952, "grad_norm": 336.0, "learning_rate": 8.645785622533911e-05, "loss": 14.1254, "step": 6303 }, { "epoch": 0.2627652036180234, "grad_norm": 232.0, "learning_rate": 8.645323654420532e-05, "loss": 12.1254, "step": 6304 }, { "epoch": 0.2628068859155517, "grad_norm": 394.0, "learning_rate": 8.644861619870805e-05, "loss": 14.1877, "step": 6305 }, { "epoch": 0.2628485682130799, "grad_norm": 266.0, "learning_rate": 8.644399518893152e-05, "loss": 10.8135, "step": 6306 }, { "epoch": 0.26289025051060816, "grad_norm": 227.0, "learning_rate": 8.643937351495992e-05, "loss": 12.7502, "step": 6307 }, { "epoch": 0.26293193280813637, "grad_norm": 330.0, "learning_rate": 8.643475117687753e-05, "loss": 12.8757, "step": 6308 }, { "epoch": 0.26297361510566464, "grad_norm": 127.0, "learning_rate": 8.643012817476855e-05, "loss": 9.5001, "step": 6309 }, { "epoch": 0.26301529740319285, "grad_norm": 728.0, "learning_rate": 8.642550450871727e-05, "loss": 22.0016, "step": 6310 }, { "epoch": 0.2630569797007211, "grad_norm": 1004.0, "learning_rate": 8.642088017880792e-05, "loss": 23.5002, "step": 6311 }, { "epoch": 0.26309866199824933, "grad_norm": 836.0, "learning_rate": 8.641625518512479e-05, "loss": 24.3783, "step": 6312 }, { "epoch": 0.2631403442957776, "grad_norm": 528.0, "learning_rate": 8.641162952775219e-05, "loss": 17.7502, "step": 6313 }, { "epoch": 0.2631820265933058, "grad_norm": 194.0, "learning_rate": 8.64070032067744e-05, "loss": 8.6259, "step": 6314 }, { "epoch": 0.2632237088908341, "grad_norm": 528.0, "learning_rate": 8.640237622227576e-05, "loss": 17.6253, "step": 6315 }, { "epoch": 0.2632653911883623, "grad_norm": 169.0, "learning_rate": 8.639774857434057e-05, "loss": 10.6876, "step": 6316 }, { "epoch": 0.26330707348589055, "grad_norm": 352.0, "learning_rate": 8.639312026305318e-05, "loss": 12.0015, "step": 6317 }, { "epoch": 0.26334875578341876, "grad_norm": 1064.0, "learning_rate": 8.638849128849795e-05, "loss": 21.8803, "step": 6318 }, { "epoch": 0.26339043808094703, "grad_norm": 384.0, "learning_rate": 8.638386165075922e-05, "loss": 15.6882, "step": 6319 }, { "epoch": 0.26343212037847524, "grad_norm": 180.0, "learning_rate": 8.637923134992139e-05, "loss": 11.1252, "step": 6320 }, { "epoch": 0.2634738026760035, "grad_norm": 440.0, "learning_rate": 8.637460038606885e-05, "loss": 15.4385, "step": 6321 }, { "epoch": 0.2635154849735317, "grad_norm": 940.0, "learning_rate": 8.636996875928598e-05, "loss": 24.5007, "step": 6322 }, { "epoch": 0.26355716727106, "grad_norm": 426.0, "learning_rate": 8.63653364696572e-05, "loss": 12.9378, "step": 6323 }, { "epoch": 0.2635988495685882, "grad_norm": 298.0, "learning_rate": 8.636070351726692e-05, "loss": 14.3752, "step": 6324 }, { "epoch": 0.26364053186611647, "grad_norm": 247.0, "learning_rate": 8.635606990219963e-05, "loss": 12.9377, "step": 6325 }, { "epoch": 0.2636822141636447, "grad_norm": 494.0, "learning_rate": 8.635143562453971e-05, "loss": 17.7552, "step": 6326 }, { "epoch": 0.26372389646117295, "grad_norm": 564.0, "learning_rate": 8.634680068437166e-05, "loss": 17.1257, "step": 6327 }, { "epoch": 0.26376557875870116, "grad_norm": 360.0, "learning_rate": 8.634216508177992e-05, "loss": 14.2513, "step": 6328 }, { "epoch": 0.2638072610562294, "grad_norm": 640.0, "learning_rate": 8.633752881684902e-05, "loss": 20.6253, "step": 6329 }, { "epoch": 0.26384894335375764, "grad_norm": 264.0, "learning_rate": 8.633289188966343e-05, "loss": 12.0006, "step": 6330 }, { "epoch": 0.2638906256512859, "grad_norm": 148.0, "learning_rate": 8.632825430030764e-05, "loss": 9.1251, "step": 6331 }, { "epoch": 0.2639323079488141, "grad_norm": 69.0, "learning_rate": 8.632361604886621e-05, "loss": 9.0634, "step": 6332 }, { "epoch": 0.2639739902463424, "grad_norm": 888.0, "learning_rate": 8.631897713542364e-05, "loss": 23.0047, "step": 6333 }, { "epoch": 0.2640156725438706, "grad_norm": 239.0, "learning_rate": 8.631433756006448e-05, "loss": 12.251, "step": 6334 }, { "epoch": 0.26405735484139886, "grad_norm": 227.0, "learning_rate": 8.630969732287332e-05, "loss": 11.3127, "step": 6335 }, { "epoch": 0.2640990371389271, "grad_norm": 249.0, "learning_rate": 8.630505642393468e-05, "loss": 10.8755, "step": 6336 }, { "epoch": 0.26414071943645534, "grad_norm": 264.0, "learning_rate": 8.630041486333318e-05, "loss": 12.3127, "step": 6337 }, { "epoch": 0.26418240173398355, "grad_norm": 149.0, "learning_rate": 8.629577264115338e-05, "loss": 10.2509, "step": 6338 }, { "epoch": 0.2642240840315118, "grad_norm": 196.0, "learning_rate": 8.629112975747993e-05, "loss": 9.6252, "step": 6339 }, { "epoch": 0.26426576632904003, "grad_norm": 480.0, "learning_rate": 8.628648621239739e-05, "loss": 16.6266, "step": 6340 }, { "epoch": 0.2643074486265683, "grad_norm": 880.0, "learning_rate": 8.628184200599043e-05, "loss": 24.2508, "step": 6341 }, { "epoch": 0.2643491309240965, "grad_norm": 1472.0, "learning_rate": 8.627719713834368e-05, "loss": 26.8791, "step": 6342 }, { "epoch": 0.2643908132216248, "grad_norm": 342.0, "learning_rate": 8.627255160954178e-05, "loss": 11.9377, "step": 6343 }, { "epoch": 0.264432495519153, "grad_norm": 324.0, "learning_rate": 8.626790541966942e-05, "loss": 13.8752, "step": 6344 }, { "epoch": 0.26447417781668126, "grad_norm": 716.0, "learning_rate": 8.626325856881126e-05, "loss": 21.1258, "step": 6345 }, { "epoch": 0.2645158601142095, "grad_norm": 474.0, "learning_rate": 8.625861105705199e-05, "loss": 15.0664, "step": 6346 }, { "epoch": 0.26455754241173773, "grad_norm": 112.0, "learning_rate": 8.625396288447631e-05, "loss": 9.1256, "step": 6347 }, { "epoch": 0.264599224709266, "grad_norm": 296.0, "learning_rate": 8.624931405116896e-05, "loss": 13.0627, "step": 6348 }, { "epoch": 0.2646409070067942, "grad_norm": 516.0, "learning_rate": 8.624466455721462e-05, "loss": 17.2505, "step": 6349 }, { "epoch": 0.2646825893043225, "grad_norm": 402.0, "learning_rate": 8.624001440269807e-05, "loss": 16.0003, "step": 6350 }, { "epoch": 0.2647242716018507, "grad_norm": 536.0, "learning_rate": 8.623536358770402e-05, "loss": 15.8755, "step": 6351 }, { "epoch": 0.26476595389937896, "grad_norm": 486.0, "learning_rate": 8.623071211231725e-05, "loss": 16.5013, "step": 6352 }, { "epoch": 0.26480763619690717, "grad_norm": 476.0, "learning_rate": 8.622605997662257e-05, "loss": 15.8751, "step": 6353 }, { "epoch": 0.26484931849443544, "grad_norm": 924.0, "learning_rate": 8.622140718070471e-05, "loss": 23.0042, "step": 6354 }, { "epoch": 0.26489100079196365, "grad_norm": 282.0, "learning_rate": 8.621675372464848e-05, "loss": 14.5014, "step": 6355 }, { "epoch": 0.2649326830894919, "grad_norm": 398.0, "learning_rate": 8.62120996085387e-05, "loss": 15.3772, "step": 6356 }, { "epoch": 0.2649743653870201, "grad_norm": 420.0, "learning_rate": 8.62074448324602e-05, "loss": 15.2503, "step": 6357 }, { "epoch": 0.2650160476845484, "grad_norm": 724.0, "learning_rate": 8.62027893964978e-05, "loss": 19.3756, "step": 6358 }, { "epoch": 0.2650577299820766, "grad_norm": 326.0, "learning_rate": 8.619813330073634e-05, "loss": 12.8128, "step": 6359 }, { "epoch": 0.2650994122796049, "grad_norm": 174.0, "learning_rate": 8.61934765452607e-05, "loss": 9.439, "step": 6360 }, { "epoch": 0.2651410945771331, "grad_norm": 446.0, "learning_rate": 8.618881913015574e-05, "loss": 15.0009, "step": 6361 }, { "epoch": 0.26518277687466135, "grad_norm": 592.0, "learning_rate": 8.618416105550633e-05, "loss": 18.8752, "step": 6362 }, { "epoch": 0.26522445917218956, "grad_norm": 398.0, "learning_rate": 8.617950232139737e-05, "loss": 14.3752, "step": 6363 }, { "epoch": 0.26526614146971783, "grad_norm": 284.0, "learning_rate": 8.617484292791377e-05, "loss": 14.0011, "step": 6364 }, { "epoch": 0.26530782376724604, "grad_norm": 374.0, "learning_rate": 8.617018287514044e-05, "loss": 13.8127, "step": 6365 }, { "epoch": 0.2653495060647743, "grad_norm": 184.0, "learning_rate": 8.616552216316234e-05, "loss": 9.7503, "step": 6366 }, { "epoch": 0.2653911883623025, "grad_norm": 320.0, "learning_rate": 8.616086079206437e-05, "loss": 12.3133, "step": 6367 }, { "epoch": 0.2654328706598308, "grad_norm": 105.0, "learning_rate": 8.615619876193151e-05, "loss": 7.8754, "step": 6368 }, { "epoch": 0.265474552957359, "grad_norm": 71.5, "learning_rate": 8.61515360728487e-05, "loss": 7.2817, "step": 6369 }, { "epoch": 0.26551623525488727, "grad_norm": 1096.0, "learning_rate": 8.614687272490096e-05, "loss": 25.2511, "step": 6370 }, { "epoch": 0.2655579175524155, "grad_norm": 492.0, "learning_rate": 8.614220871817324e-05, "loss": 17.5012, "step": 6371 }, { "epoch": 0.26559959984994375, "grad_norm": 588.0, "learning_rate": 8.613754405275057e-05, "loss": 19.8758, "step": 6372 }, { "epoch": 0.26564128214747196, "grad_norm": 540.0, "learning_rate": 8.613287872871793e-05, "loss": 18.0001, "step": 6373 }, { "epoch": 0.2656829644450002, "grad_norm": 446.0, "learning_rate": 8.612821274616038e-05, "loss": 15.1264, "step": 6374 }, { "epoch": 0.26572464674252844, "grad_norm": 256.0, "learning_rate": 8.612354610516295e-05, "loss": 12.7504, "step": 6375 }, { "epoch": 0.2657663290400567, "grad_norm": 195.0, "learning_rate": 8.611887880581069e-05, "loss": 10.4378, "step": 6376 }, { "epoch": 0.2658080113375849, "grad_norm": 432.0, "learning_rate": 8.611421084818865e-05, "loss": 14.1266, "step": 6377 }, { "epoch": 0.2658496936351132, "grad_norm": 296.0, "learning_rate": 8.61095422323819e-05, "loss": 13.8753, "step": 6378 }, { "epoch": 0.2658913759326414, "grad_norm": 908.0, "learning_rate": 8.610487295847555e-05, "loss": 24.8752, "step": 6379 }, { "epoch": 0.26593305823016966, "grad_norm": 250.0, "learning_rate": 8.610020302655468e-05, "loss": 12.0005, "step": 6380 }, { "epoch": 0.2659747405276979, "grad_norm": 208.0, "learning_rate": 8.609553243670441e-05, "loss": 12.1267, "step": 6381 }, { "epoch": 0.26601642282522614, "grad_norm": 1592.0, "learning_rate": 8.609086118900986e-05, "loss": 36.7513, "step": 6382 }, { "epoch": 0.26605810512275435, "grad_norm": 221.0, "learning_rate": 8.608618928355616e-05, "loss": 11.1253, "step": 6383 }, { "epoch": 0.2660997874202826, "grad_norm": 584.0, "learning_rate": 8.608151672042845e-05, "loss": 18.2518, "step": 6384 }, { "epoch": 0.26614146971781083, "grad_norm": 235.0, "learning_rate": 8.60768434997119e-05, "loss": 11.5006, "step": 6385 }, { "epoch": 0.2661831520153391, "grad_norm": 536.0, "learning_rate": 8.607216962149167e-05, "loss": 19.7503, "step": 6386 }, { "epoch": 0.2662248343128673, "grad_norm": 1392.0, "learning_rate": 8.606749508585294e-05, "loss": 28.6316, "step": 6387 }, { "epoch": 0.2662665166103956, "grad_norm": 880.0, "learning_rate": 8.606281989288093e-05, "loss": 22.2505, "step": 6388 }, { "epoch": 0.2663081989079238, "grad_norm": 72.5, "learning_rate": 8.605814404266081e-05, "loss": 7.6884, "step": 6389 }, { "epoch": 0.26634988120545205, "grad_norm": 512.0, "learning_rate": 8.605346753527784e-05, "loss": 18.5002, "step": 6390 }, { "epoch": 0.26639156350298027, "grad_norm": 312.0, "learning_rate": 8.604879037081719e-05, "loss": 14.5627, "step": 6391 }, { "epoch": 0.26643324580050853, "grad_norm": 236.0, "learning_rate": 8.604411254936415e-05, "loss": 12.0002, "step": 6392 }, { "epoch": 0.26647492809803675, "grad_norm": 548.0, "learning_rate": 8.603943407100395e-05, "loss": 17.2502, "step": 6393 }, { "epoch": 0.266516610395565, "grad_norm": 348.0, "learning_rate": 8.603475493582187e-05, "loss": 14.9378, "step": 6394 }, { "epoch": 0.2665582926930932, "grad_norm": 65.0, "learning_rate": 8.603007514390319e-05, "loss": 6.6881, "step": 6395 }, { "epoch": 0.2665999749906215, "grad_norm": 490.0, "learning_rate": 8.602539469533318e-05, "loss": 18.1252, "step": 6396 }, { "epoch": 0.2666416572881497, "grad_norm": 132.0, "learning_rate": 8.602071359019717e-05, "loss": 8.563, "step": 6397 }, { "epoch": 0.26668333958567797, "grad_norm": 245.0, "learning_rate": 8.601603182858045e-05, "loss": 12.0636, "step": 6398 }, { "epoch": 0.2667250218832062, "grad_norm": 400.0, "learning_rate": 8.601134941056834e-05, "loss": 16.3752, "step": 6399 }, { "epoch": 0.26676670418073445, "grad_norm": 572.0, "learning_rate": 8.60066663362462e-05, "loss": 18.2502, "step": 6400 }, { "epoch": 0.26680838647826266, "grad_norm": 432.0, "learning_rate": 8.600198260569937e-05, "loss": 14.1258, "step": 6401 }, { "epoch": 0.2668500687757909, "grad_norm": 640.0, "learning_rate": 8.599729821901321e-05, "loss": 16.8753, "step": 6402 }, { "epoch": 0.26689175107331914, "grad_norm": 284.0, "learning_rate": 8.59926131762731e-05, "loss": 12.8127, "step": 6403 }, { "epoch": 0.2669334333708474, "grad_norm": 188.0, "learning_rate": 8.598792747756441e-05, "loss": 10.9379, "step": 6404 }, { "epoch": 0.2669751156683756, "grad_norm": 266.0, "learning_rate": 8.598324112297256e-05, "loss": 13.188, "step": 6405 }, { "epoch": 0.2670167979659039, "grad_norm": 398.0, "learning_rate": 8.597855411258293e-05, "loss": 14.3128, "step": 6406 }, { "epoch": 0.2670584802634321, "grad_norm": 220.0, "learning_rate": 8.597386644648097e-05, "loss": 10.1881, "step": 6407 }, { "epoch": 0.26710016256096036, "grad_norm": 171.0, "learning_rate": 8.59691781247521e-05, "loss": 9.563, "step": 6408 }, { "epoch": 0.2671418448584886, "grad_norm": 318.0, "learning_rate": 8.596448914748176e-05, "loss": 13.2502, "step": 6409 }, { "epoch": 0.26718352715601684, "grad_norm": 366.0, "learning_rate": 8.595979951475541e-05, "loss": 13.3127, "step": 6410 }, { "epoch": 0.26722520945354505, "grad_norm": 358.0, "learning_rate": 8.595510922665852e-05, "loss": 14.5629, "step": 6411 }, { "epoch": 0.2672668917510733, "grad_norm": 328.0, "learning_rate": 8.595041828327657e-05, "loss": 13.8754, "step": 6412 }, { "epoch": 0.26730857404860153, "grad_norm": 416.0, "learning_rate": 8.594572668469508e-05, "loss": 14.5003, "step": 6413 }, { "epoch": 0.2673502563461298, "grad_norm": 254.0, "learning_rate": 8.594103443099951e-05, "loss": 12.3751, "step": 6414 }, { "epoch": 0.267391938643658, "grad_norm": 272.0, "learning_rate": 8.593634152227541e-05, "loss": 12.1884, "step": 6415 }, { "epoch": 0.2674336209411863, "grad_norm": 242.0, "learning_rate": 8.593164795860829e-05, "loss": 12.5008, "step": 6416 }, { "epoch": 0.2674753032387145, "grad_norm": 243.0, "learning_rate": 8.592695374008368e-05, "loss": 12.0629, "step": 6417 }, { "epoch": 0.26751698553624276, "grad_norm": 564.0, "learning_rate": 8.592225886678716e-05, "loss": 19.8752, "step": 6418 }, { "epoch": 0.267558667833771, "grad_norm": 378.0, "learning_rate": 8.591756333880429e-05, "loss": 12.6252, "step": 6419 }, { "epoch": 0.26760035013129924, "grad_norm": 332.0, "learning_rate": 8.591286715622062e-05, "loss": 12.2502, "step": 6420 }, { "epoch": 0.2676420324288275, "grad_norm": 548.0, "learning_rate": 8.590817031912178e-05, "loss": 18.2512, "step": 6421 }, { "epoch": 0.2676837147263557, "grad_norm": 432.0, "learning_rate": 8.590347282759334e-05, "loss": 14.4378, "step": 6422 }, { "epoch": 0.267725397023884, "grad_norm": 422.0, "learning_rate": 8.589877468172092e-05, "loss": 15.7503, "step": 6423 }, { "epoch": 0.2677670793214122, "grad_norm": 428.0, "learning_rate": 8.589407588159016e-05, "loss": 15.1253, "step": 6424 }, { "epoch": 0.26780876161894046, "grad_norm": 223.0, "learning_rate": 8.588937642728664e-05, "loss": 11.8752, "step": 6425 }, { "epoch": 0.2678504439164687, "grad_norm": 228.0, "learning_rate": 8.588467631889609e-05, "loss": 11.1252, "step": 6426 }, { "epoch": 0.26789212621399694, "grad_norm": 474.0, "learning_rate": 8.58799755565041e-05, "loss": 17.2515, "step": 6427 }, { "epoch": 0.26793380851152515, "grad_norm": 352.0, "learning_rate": 8.587527414019641e-05, "loss": 14.0633, "step": 6428 }, { "epoch": 0.2679754908090534, "grad_norm": 444.0, "learning_rate": 8.587057207005862e-05, "loss": 17.5002, "step": 6429 }, { "epoch": 0.26801717310658163, "grad_norm": 360.0, "learning_rate": 8.586586934617649e-05, "loss": 16.1259, "step": 6430 }, { "epoch": 0.2680588554041099, "grad_norm": 608.0, "learning_rate": 8.586116596863571e-05, "loss": 17.7547, "step": 6431 }, { "epoch": 0.2681005377016381, "grad_norm": 292.0, "learning_rate": 8.585646193752199e-05, "loss": 12.0627, "step": 6432 }, { "epoch": 0.2681422199991664, "grad_norm": 1360.0, "learning_rate": 8.585175725292107e-05, "loss": 34.2516, "step": 6433 }, { "epoch": 0.2681839022966946, "grad_norm": 620.0, "learning_rate": 8.584705191491869e-05, "loss": 18.1256, "step": 6434 }, { "epoch": 0.26822558459422285, "grad_norm": 148.0, "learning_rate": 8.58423459236006e-05, "loss": 8.5003, "step": 6435 }, { "epoch": 0.26826726689175107, "grad_norm": 314.0, "learning_rate": 8.58376392790526e-05, "loss": 13.314, "step": 6436 }, { "epoch": 0.26830894918927933, "grad_norm": 1012.0, "learning_rate": 8.583293198136041e-05, "loss": 25.128, "step": 6437 }, { "epoch": 0.26835063148680754, "grad_norm": 364.0, "learning_rate": 8.582822403060986e-05, "loss": 15.1879, "step": 6438 }, { "epoch": 0.2683923137843358, "grad_norm": 234.0, "learning_rate": 8.582351542688675e-05, "loss": 11.4382, "step": 6439 }, { "epoch": 0.268433996081864, "grad_norm": 308.0, "learning_rate": 8.581880617027691e-05, "loss": 12.7513, "step": 6440 }, { "epoch": 0.2684756783793923, "grad_norm": 300.0, "learning_rate": 8.581409626086614e-05, "loss": 13.6882, "step": 6441 }, { "epoch": 0.2685173606769205, "grad_norm": 378.0, "learning_rate": 8.580938569874027e-05, "loss": 15.9386, "step": 6442 }, { "epoch": 0.26855904297444877, "grad_norm": 784.0, "learning_rate": 8.580467448398516e-05, "loss": 22.1252, "step": 6443 }, { "epoch": 0.268600725271977, "grad_norm": 255.0, "learning_rate": 8.579996261668672e-05, "loss": 12.1878, "step": 6444 }, { "epoch": 0.26864240756950525, "grad_norm": 576.0, "learning_rate": 8.579525009693074e-05, "loss": 18.0007, "step": 6445 }, { "epoch": 0.26868408986703346, "grad_norm": 432.0, "learning_rate": 8.579053692480318e-05, "loss": 16.8754, "step": 6446 }, { "epoch": 0.2687257721645617, "grad_norm": 119.0, "learning_rate": 8.57858231003899e-05, "loss": 8.3759, "step": 6447 }, { "epoch": 0.26876745446208994, "grad_norm": 464.0, "learning_rate": 8.57811086237768e-05, "loss": 17.3756, "step": 6448 }, { "epoch": 0.2688091367596182, "grad_norm": 238.0, "learning_rate": 8.577639349504983e-05, "loss": 11.8753, "step": 6449 }, { "epoch": 0.2688508190571464, "grad_norm": 193.0, "learning_rate": 8.577167771429492e-05, "loss": 11.0636, "step": 6450 }, { "epoch": 0.2688925013546747, "grad_norm": 362.0, "learning_rate": 8.5766961281598e-05, "loss": 13.9378, "step": 6451 }, { "epoch": 0.2689341836522029, "grad_norm": 330.0, "learning_rate": 8.576224419704504e-05, "loss": 13.6257, "step": 6452 }, { "epoch": 0.26897586594973116, "grad_norm": 87.0, "learning_rate": 8.575752646072201e-05, "loss": 8.3755, "step": 6453 }, { "epoch": 0.2690175482472594, "grad_norm": 520.0, "learning_rate": 8.575280807271488e-05, "loss": 18.1252, "step": 6454 }, { "epoch": 0.26905923054478764, "grad_norm": 1536.0, "learning_rate": 8.574808903310964e-05, "loss": 34.7503, "step": 6455 }, { "epoch": 0.26910091284231585, "grad_norm": 736.0, "learning_rate": 8.57433693419923e-05, "loss": 21.3766, "step": 6456 }, { "epoch": 0.2691425951398441, "grad_norm": 46.25, "learning_rate": 8.573864899944891e-05, "loss": 7.5007, "step": 6457 }, { "epoch": 0.26918427743737233, "grad_norm": 270.0, "learning_rate": 8.573392800556545e-05, "loss": 11.8754, "step": 6458 }, { "epoch": 0.2692259597349006, "grad_norm": 143.0, "learning_rate": 8.572920636042798e-05, "loss": 7.2195, "step": 6459 }, { "epoch": 0.2692676420324288, "grad_norm": 1032.0, "learning_rate": 8.572448406412255e-05, "loss": 26.6281, "step": 6460 }, { "epoch": 0.2693093243299571, "grad_norm": 86.0, "learning_rate": 8.571976111673523e-05, "loss": 8.3129, "step": 6461 }, { "epoch": 0.2693510066274853, "grad_norm": 158.0, "learning_rate": 8.571503751835209e-05, "loss": 10.8128, "step": 6462 }, { "epoch": 0.26939268892501356, "grad_norm": 482.0, "learning_rate": 8.571031326905923e-05, "loss": 17.8752, "step": 6463 }, { "epoch": 0.26943437122254177, "grad_norm": 506.0, "learning_rate": 8.570558836894274e-05, "loss": 16.3753, "step": 6464 }, { "epoch": 0.26947605352007004, "grad_norm": 79.5, "learning_rate": 8.570086281808871e-05, "loss": 8.7502, "step": 6465 }, { "epoch": 0.26951773581759825, "grad_norm": 67.5, "learning_rate": 8.569613661658331e-05, "loss": 8.6878, "step": 6466 }, { "epoch": 0.2695594181151265, "grad_norm": 482.0, "learning_rate": 8.569140976451265e-05, "loss": 19.251, "step": 6467 }, { "epoch": 0.2696011004126547, "grad_norm": 406.0, "learning_rate": 8.568668226196286e-05, "loss": 16.5003, "step": 6468 }, { "epoch": 0.269642782710183, "grad_norm": 444.0, "learning_rate": 8.568195410902014e-05, "loss": 17.0006, "step": 6469 }, { "epoch": 0.2696844650077112, "grad_norm": 134.0, "learning_rate": 8.567722530577062e-05, "loss": 5.5316, "step": 6470 }, { "epoch": 0.26972614730523947, "grad_norm": 668.0, "learning_rate": 8.56724958523005e-05, "loss": 21.0009, "step": 6471 }, { "epoch": 0.2697678296027677, "grad_norm": 556.0, "learning_rate": 8.5667765748696e-05, "loss": 15.0653, "step": 6472 }, { "epoch": 0.26980951190029595, "grad_norm": 888.0, "learning_rate": 8.566303499504329e-05, "loss": 24.2505, "step": 6473 }, { "epoch": 0.26985119419782416, "grad_norm": 282.0, "learning_rate": 8.56583035914286e-05, "loss": 11.7504, "step": 6474 }, { "epoch": 0.26989287649535243, "grad_norm": 255.0, "learning_rate": 8.565357153793815e-05, "loss": 12.1876, "step": 6475 }, { "epoch": 0.26993455879288064, "grad_norm": 166.0, "learning_rate": 8.564883883465822e-05, "loss": 10.5629, "step": 6476 }, { "epoch": 0.2699762410904089, "grad_norm": 211.0, "learning_rate": 8.564410548167503e-05, "loss": 11.7509, "step": 6477 }, { "epoch": 0.2700179233879371, "grad_norm": 272.0, "learning_rate": 8.563937147907483e-05, "loss": 12.0642, "step": 6478 }, { "epoch": 0.2700596056854654, "grad_norm": 410.0, "learning_rate": 8.563463682694395e-05, "loss": 13.0629, "step": 6479 }, { "epoch": 0.2701012879829936, "grad_norm": 318.0, "learning_rate": 8.562990152536864e-05, "loss": 13.8752, "step": 6480 }, { "epoch": 0.27014297028052187, "grad_norm": 272.0, "learning_rate": 8.56251655744352e-05, "loss": 12.5006, "step": 6481 }, { "epoch": 0.2701846525780501, "grad_norm": 348.0, "learning_rate": 8.562042897422997e-05, "loss": 14.9379, "step": 6482 }, { "epoch": 0.27022633487557834, "grad_norm": 352.0, "learning_rate": 8.561569172483926e-05, "loss": 12.9378, "step": 6483 }, { "epoch": 0.27026801717310656, "grad_norm": 458.0, "learning_rate": 8.561095382634941e-05, "loss": 14.8756, "step": 6484 }, { "epoch": 0.2703096994706348, "grad_norm": 680.0, "learning_rate": 8.560621527884674e-05, "loss": 16.8797, "step": 6485 }, { "epoch": 0.27035138176816303, "grad_norm": 416.0, "learning_rate": 8.560147608241767e-05, "loss": 16.376, "step": 6486 }, { "epoch": 0.2703930640656913, "grad_norm": 159.0, "learning_rate": 8.55967362371485e-05, "loss": 9.9385, "step": 6487 }, { "epoch": 0.2704347463632195, "grad_norm": 290.0, "learning_rate": 8.559199574312569e-05, "loss": 12.6256, "step": 6488 }, { "epoch": 0.2704764286607478, "grad_norm": 187.0, "learning_rate": 8.558725460043557e-05, "loss": 6.8446, "step": 6489 }, { "epoch": 0.27051811095827605, "grad_norm": 153.0, "learning_rate": 8.558251280916458e-05, "loss": 10.5007, "step": 6490 }, { "epoch": 0.27055979325580426, "grad_norm": 382.0, "learning_rate": 8.557777036939916e-05, "loss": 15.4377, "step": 6491 }, { "epoch": 0.2706014755533325, "grad_norm": 44.5, "learning_rate": 8.557302728122569e-05, "loss": 7.4385, "step": 6492 }, { "epoch": 0.27064315785086074, "grad_norm": 298.0, "learning_rate": 8.556828354473064e-05, "loss": 12.3755, "step": 6493 }, { "epoch": 0.270684840148389, "grad_norm": 1448.0, "learning_rate": 8.556353916000048e-05, "loss": 35.5017, "step": 6494 }, { "epoch": 0.2707265224459172, "grad_norm": 382.0, "learning_rate": 8.555879412712164e-05, "loss": 14.6254, "step": 6495 }, { "epoch": 0.2707682047434455, "grad_norm": 392.0, "learning_rate": 8.555404844618065e-05, "loss": 13.6254, "step": 6496 }, { "epoch": 0.2708098870409737, "grad_norm": 245.0, "learning_rate": 8.554930211726395e-05, "loss": 11.5627, "step": 6497 }, { "epoch": 0.27085156933850196, "grad_norm": 692.0, "learning_rate": 8.554455514045808e-05, "loss": 21.5004, "step": 6498 }, { "epoch": 0.2708932516360302, "grad_norm": 478.0, "learning_rate": 8.553980751584953e-05, "loss": 17.1258, "step": 6499 }, { "epoch": 0.27093493393355844, "grad_norm": 221.0, "learning_rate": 8.553505924352481e-05, "loss": 10.6253, "step": 6500 }, { "epoch": 0.27097661623108665, "grad_norm": 748.0, "learning_rate": 8.55303103235705e-05, "loss": 21.2513, "step": 6501 }, { "epoch": 0.2710182985286149, "grad_norm": 384.0, "learning_rate": 8.552556075607315e-05, "loss": 14.3131, "step": 6502 }, { "epoch": 0.27105998082614313, "grad_norm": 328.0, "learning_rate": 8.552081054111927e-05, "loss": 12.6253, "step": 6503 }, { "epoch": 0.2711016631236714, "grad_norm": 684.0, "learning_rate": 8.551605967879547e-05, "loss": 19.7502, "step": 6504 }, { "epoch": 0.2711433454211996, "grad_norm": 512.0, "learning_rate": 8.551130816918836e-05, "loss": 18.2503, "step": 6505 }, { "epoch": 0.2711850277187279, "grad_norm": 156.0, "learning_rate": 8.550655601238447e-05, "loss": 9.5004, "step": 6506 }, { "epoch": 0.2712267100162561, "grad_norm": 111.0, "learning_rate": 8.550180320847046e-05, "loss": 9.5015, "step": 6507 }, { "epoch": 0.27126839231378436, "grad_norm": 620.0, "learning_rate": 8.549704975753292e-05, "loss": 24.8763, "step": 6508 }, { "epoch": 0.27131007461131257, "grad_norm": 159.0, "learning_rate": 8.549229565965851e-05, "loss": 11.1255, "step": 6509 }, { "epoch": 0.27135175690884084, "grad_norm": 158.0, "learning_rate": 8.548754091493387e-05, "loss": 9.9378, "step": 6510 }, { "epoch": 0.27139343920636905, "grad_norm": 402.0, "learning_rate": 8.548278552344563e-05, "loss": 14.4378, "step": 6511 }, { "epoch": 0.2714351215038973, "grad_norm": 624.0, "learning_rate": 8.547802948528048e-05, "loss": 17.7501, "step": 6512 }, { "epoch": 0.2714768038014255, "grad_norm": 344.0, "learning_rate": 8.547327280052509e-05, "loss": 15.4378, "step": 6513 }, { "epoch": 0.2715184860989538, "grad_norm": 276.0, "learning_rate": 8.546851546926615e-05, "loss": 10.9383, "step": 6514 }, { "epoch": 0.271560168396482, "grad_norm": 342.0, "learning_rate": 8.546375749159039e-05, "loss": 14.1881, "step": 6515 }, { "epoch": 0.27160185069401027, "grad_norm": 494.0, "learning_rate": 8.545899886758448e-05, "loss": 16.6252, "step": 6516 }, { "epoch": 0.2716435329915385, "grad_norm": 298.0, "learning_rate": 8.545423959733519e-05, "loss": 13.1252, "step": 6517 }, { "epoch": 0.27168521528906675, "grad_norm": 161.0, "learning_rate": 8.54494796809292e-05, "loss": 10.3128, "step": 6518 }, { "epoch": 0.27172689758659496, "grad_norm": 600.0, "learning_rate": 8.544471911845332e-05, "loss": 21.8753, "step": 6519 }, { "epoch": 0.27176857988412323, "grad_norm": 412.0, "learning_rate": 8.543995790999428e-05, "loss": 14.8755, "step": 6520 }, { "epoch": 0.27181026218165144, "grad_norm": 424.0, "learning_rate": 8.543519605563887e-05, "loss": 15.0001, "step": 6521 }, { "epoch": 0.2718519444791797, "grad_norm": 69.0, "learning_rate": 8.543043355547387e-05, "loss": 8.1256, "step": 6522 }, { "epoch": 0.2718936267767079, "grad_norm": 84.5, "learning_rate": 8.542567040958604e-05, "loss": 6.4377, "step": 6523 }, { "epoch": 0.2719353090742362, "grad_norm": 556.0, "learning_rate": 8.542090661806226e-05, "loss": 18.2505, "step": 6524 }, { "epoch": 0.2719769913717644, "grad_norm": 286.0, "learning_rate": 8.54161421809893e-05, "loss": 12.3753, "step": 6525 }, { "epoch": 0.27201867366929267, "grad_norm": 442.0, "learning_rate": 8.5411377098454e-05, "loss": 15.5628, "step": 6526 }, { "epoch": 0.2720603559668209, "grad_norm": 75.0, "learning_rate": 8.540661137054321e-05, "loss": 6.8752, "step": 6527 }, { "epoch": 0.27210203826434914, "grad_norm": 588.0, "learning_rate": 8.540184499734379e-05, "loss": 19.7503, "step": 6528 }, { "epoch": 0.27214372056187736, "grad_norm": 652.0, "learning_rate": 8.53970779789426e-05, "loss": 21.2509, "step": 6529 }, { "epoch": 0.2721854028594056, "grad_norm": 142.0, "learning_rate": 8.539231031542651e-05, "loss": 9.3755, "step": 6530 }, { "epoch": 0.27222708515693383, "grad_norm": 217.0, "learning_rate": 8.538754200688244e-05, "loss": 11.8755, "step": 6531 }, { "epoch": 0.2722687674544621, "grad_norm": 716.0, "learning_rate": 8.538277305339726e-05, "loss": 21.5003, "step": 6532 }, { "epoch": 0.2723104497519903, "grad_norm": 93.5, "learning_rate": 8.53780034550579e-05, "loss": 8.0003, "step": 6533 }, { "epoch": 0.2723521320495186, "grad_norm": 368.0, "learning_rate": 8.537323321195131e-05, "loss": 15.0658, "step": 6534 }, { "epoch": 0.2723938143470468, "grad_norm": 796.0, "learning_rate": 8.536846232416438e-05, "loss": 20.6291, "step": 6535 }, { "epoch": 0.27243549664457506, "grad_norm": 228.0, "learning_rate": 8.53636907917841e-05, "loss": 11.7503, "step": 6536 }, { "epoch": 0.27247717894210327, "grad_norm": 572.0, "learning_rate": 8.535891861489741e-05, "loss": 18.6251, "step": 6537 }, { "epoch": 0.27251886123963154, "grad_norm": 416.0, "learning_rate": 8.53541457935913e-05, "loss": 14.1253, "step": 6538 }, { "epoch": 0.27256054353715975, "grad_norm": 296.0, "learning_rate": 8.534937232795273e-05, "loss": 13.3752, "step": 6539 }, { "epoch": 0.272602225834688, "grad_norm": 452.0, "learning_rate": 8.534459821806871e-05, "loss": 11.44, "step": 6540 }, { "epoch": 0.27264390813221623, "grad_norm": 62.75, "learning_rate": 8.533982346402625e-05, "loss": 7.6254, "step": 6541 }, { "epoch": 0.2726855904297445, "grad_norm": 532.0, "learning_rate": 8.533504806591237e-05, "loss": 17.3757, "step": 6542 }, { "epoch": 0.2727272727272727, "grad_norm": 113.5, "learning_rate": 8.533027202381412e-05, "loss": 9.0633, "step": 6543 }, { "epoch": 0.272768955024801, "grad_norm": 398.0, "learning_rate": 8.53254953378185e-05, "loss": 15.8755, "step": 6544 }, { "epoch": 0.2728106373223292, "grad_norm": 310.0, "learning_rate": 8.53207180080126e-05, "loss": 13.0003, "step": 6545 }, { "epoch": 0.27285231961985745, "grad_norm": 496.0, "learning_rate": 8.531594003448349e-05, "loss": 16.8753, "step": 6546 }, { "epoch": 0.27289400191738566, "grad_norm": 206.0, "learning_rate": 8.531116141731823e-05, "loss": 11.5007, "step": 6547 }, { "epoch": 0.27293568421491393, "grad_norm": 648.0, "learning_rate": 8.530638215660391e-05, "loss": 18.5053, "step": 6548 }, { "epoch": 0.27297736651244214, "grad_norm": 250.0, "learning_rate": 8.530160225242767e-05, "loss": 12.5627, "step": 6549 }, { "epoch": 0.2730190488099704, "grad_norm": 214.0, "learning_rate": 8.529682170487656e-05, "loss": 10.5004, "step": 6550 }, { "epoch": 0.2730607311074986, "grad_norm": 254.0, "learning_rate": 8.529204051403776e-05, "loss": 11.0627, "step": 6551 }, { "epoch": 0.2731024134050269, "grad_norm": 482.0, "learning_rate": 8.528725867999839e-05, "loss": 17.3786, "step": 6552 }, { "epoch": 0.2731440957025551, "grad_norm": 466.0, "learning_rate": 8.528247620284559e-05, "loss": 16.6252, "step": 6553 }, { "epoch": 0.27318577800008337, "grad_norm": 185.0, "learning_rate": 8.527769308266654e-05, "loss": 10.8145, "step": 6554 }, { "epoch": 0.2732274602976116, "grad_norm": 244.0, "learning_rate": 8.527290931954839e-05, "loss": 13.0634, "step": 6555 }, { "epoch": 0.27326914259513985, "grad_norm": 167.0, "learning_rate": 8.526812491357834e-05, "loss": 10.3132, "step": 6556 }, { "epoch": 0.27331082489266806, "grad_norm": 165.0, "learning_rate": 8.526333986484358e-05, "loss": 9.9379, "step": 6557 }, { "epoch": 0.2733525071901963, "grad_norm": 362.0, "learning_rate": 8.525855417343132e-05, "loss": 13.6259, "step": 6558 }, { "epoch": 0.27339418948772454, "grad_norm": 588.0, "learning_rate": 8.525376783942879e-05, "loss": 18.5004, "step": 6559 }, { "epoch": 0.2734358717852528, "grad_norm": 189.0, "learning_rate": 8.524898086292321e-05, "loss": 9.8763, "step": 6560 }, { "epoch": 0.273477554082781, "grad_norm": 616.0, "learning_rate": 8.524419324400181e-05, "loss": 20.3755, "step": 6561 }, { "epoch": 0.2735192363803093, "grad_norm": 237.0, "learning_rate": 8.523940498275187e-05, "loss": 11.5626, "step": 6562 }, { "epoch": 0.27356091867783755, "grad_norm": 258.0, "learning_rate": 8.523461607926064e-05, "loss": 12.6256, "step": 6563 }, { "epoch": 0.27360260097536576, "grad_norm": 376.0, "learning_rate": 8.522982653361541e-05, "loss": 13.5002, "step": 6564 }, { "epoch": 0.27364428327289403, "grad_norm": 528.0, "learning_rate": 8.522503634590347e-05, "loss": 18.5008, "step": 6565 }, { "epoch": 0.27368596557042224, "grad_norm": 520.0, "learning_rate": 8.522024551621211e-05, "loss": 16.7503, "step": 6566 }, { "epoch": 0.2737276478679505, "grad_norm": 640.0, "learning_rate": 8.521545404462865e-05, "loss": 20.3751, "step": 6567 }, { "epoch": 0.2737693301654787, "grad_norm": 63.0, "learning_rate": 8.52106619312404e-05, "loss": 7.0943, "step": 6568 }, { "epoch": 0.273811012463007, "grad_norm": 153.0, "learning_rate": 8.520586917613473e-05, "loss": 11.063, "step": 6569 }, { "epoch": 0.2738526947605352, "grad_norm": 840.0, "learning_rate": 8.520107577939896e-05, "loss": 22.0054, "step": 6570 }, { "epoch": 0.27389437705806347, "grad_norm": 242.0, "learning_rate": 8.519628174112047e-05, "loss": 11.4378, "step": 6571 }, { "epoch": 0.2739360593555917, "grad_norm": 448.0, "learning_rate": 8.51914870613866e-05, "loss": 16.3754, "step": 6572 }, { "epoch": 0.27397774165311994, "grad_norm": 516.0, "learning_rate": 8.518669174028477e-05, "loss": 17.1252, "step": 6573 }, { "epoch": 0.27401942395064816, "grad_norm": 454.0, "learning_rate": 8.518189577790237e-05, "loss": 13.6903, "step": 6574 }, { "epoch": 0.2740611062481764, "grad_norm": 284.0, "learning_rate": 8.517709917432677e-05, "loss": 13.1885, "step": 6575 }, { "epoch": 0.27410278854570463, "grad_norm": 360.0, "learning_rate": 8.517230192964542e-05, "loss": 13.1877, "step": 6576 }, { "epoch": 0.2741444708432329, "grad_norm": 268.0, "learning_rate": 8.516750404394576e-05, "loss": 11.7508, "step": 6577 }, { "epoch": 0.2741861531407611, "grad_norm": 320.0, "learning_rate": 8.51627055173152e-05, "loss": 14.3752, "step": 6578 }, { "epoch": 0.2742278354382894, "grad_norm": 358.0, "learning_rate": 8.515790634984122e-05, "loss": 15.2504, "step": 6579 }, { "epoch": 0.2742695177358176, "grad_norm": 153.0, "learning_rate": 8.515310654161128e-05, "loss": 11.4387, "step": 6580 }, { "epoch": 0.27431120003334586, "grad_norm": 430.0, "learning_rate": 8.514830609271285e-05, "loss": 14.8128, "step": 6581 }, { "epoch": 0.27435288233087407, "grad_norm": 274.0, "learning_rate": 8.51435050032334e-05, "loss": 12.7507, "step": 6582 }, { "epoch": 0.27439456462840234, "grad_norm": 256.0, "learning_rate": 8.513870327326048e-05, "loss": 12.8128, "step": 6583 }, { "epoch": 0.27443624692593055, "grad_norm": 126.0, "learning_rate": 8.513390090288156e-05, "loss": 8.3133, "step": 6584 }, { "epoch": 0.2744779292234588, "grad_norm": 732.0, "learning_rate": 8.512909789218418e-05, "loss": 19.2545, "step": 6585 }, { "epoch": 0.27451961152098703, "grad_norm": 490.0, "learning_rate": 8.512429424125588e-05, "loss": 17.0004, "step": 6586 }, { "epoch": 0.2745612938185153, "grad_norm": 368.0, "learning_rate": 8.511948995018418e-05, "loss": 14.3751, "step": 6587 }, { "epoch": 0.2746029761160435, "grad_norm": 386.0, "learning_rate": 8.511468501905667e-05, "loss": 14.3153, "step": 6588 }, { "epoch": 0.2746446584135718, "grad_norm": 352.0, "learning_rate": 8.510987944796092e-05, "loss": 14.0627, "step": 6589 }, { "epoch": 0.2746863407111, "grad_norm": 676.0, "learning_rate": 8.510507323698448e-05, "loss": 20.7507, "step": 6590 }, { "epoch": 0.27472802300862825, "grad_norm": 102.5, "learning_rate": 8.510026638621497e-05, "loss": 8.5005, "step": 6591 }, { "epoch": 0.27476970530615646, "grad_norm": 500.0, "learning_rate": 8.509545889574e-05, "loss": 17.1253, "step": 6592 }, { "epoch": 0.27481138760368473, "grad_norm": 384.0, "learning_rate": 8.509065076564717e-05, "loss": 12.0005, "step": 6593 }, { "epoch": 0.27485306990121294, "grad_norm": 392.0, "learning_rate": 8.50858419960241e-05, "loss": 15.1884, "step": 6594 }, { "epoch": 0.2748947521987412, "grad_norm": 1144.0, "learning_rate": 8.508103258695845e-05, "loss": 24.1315, "step": 6595 }, { "epoch": 0.2749364344962694, "grad_norm": 382.0, "learning_rate": 8.507622253853789e-05, "loss": 14.938, "step": 6596 }, { "epoch": 0.2749781167937977, "grad_norm": 464.0, "learning_rate": 8.507141185085004e-05, "loss": 15.5649, "step": 6597 }, { "epoch": 0.2750197990913259, "grad_norm": 294.0, "learning_rate": 8.50666005239826e-05, "loss": 12.1881, "step": 6598 }, { "epoch": 0.27506148138885417, "grad_norm": 212.0, "learning_rate": 8.506178855802325e-05, "loss": 12.0627, "step": 6599 }, { "epoch": 0.2751031636863824, "grad_norm": 382.0, "learning_rate": 8.505697595305971e-05, "loss": 15.0627, "step": 6600 }, { "epoch": 0.27514484598391065, "grad_norm": 736.0, "learning_rate": 8.505216270917964e-05, "loss": 20.2532, "step": 6601 }, { "epoch": 0.27518652828143886, "grad_norm": 384.0, "learning_rate": 8.50473488264708e-05, "loss": 14.0044, "step": 6602 }, { "epoch": 0.2752282105789671, "grad_norm": 232.0, "learning_rate": 8.504253430502094e-05, "loss": 11.1254, "step": 6603 }, { "epoch": 0.27526989287649534, "grad_norm": 560.0, "learning_rate": 8.503771914491776e-05, "loss": 19.8753, "step": 6604 }, { "epoch": 0.2753115751740236, "grad_norm": 424.0, "learning_rate": 8.503290334624905e-05, "loss": 15.5628, "step": 6605 }, { "epoch": 0.2753532574715518, "grad_norm": 640.0, "learning_rate": 8.502808690910255e-05, "loss": 19.0004, "step": 6606 }, { "epoch": 0.2753949397690801, "grad_norm": 86.5, "learning_rate": 8.502326983356607e-05, "loss": 7.469, "step": 6607 }, { "epoch": 0.2754366220666083, "grad_norm": 179.0, "learning_rate": 8.501845211972739e-05, "loss": 6.8754, "step": 6608 }, { "epoch": 0.27547830436413656, "grad_norm": 416.0, "learning_rate": 8.501363376767431e-05, "loss": 16.6254, "step": 6609 }, { "epoch": 0.2755199866616648, "grad_norm": 470.0, "learning_rate": 8.500881477749463e-05, "loss": 15.6881, "step": 6610 }, { "epoch": 0.27556166895919304, "grad_norm": 143.0, "learning_rate": 8.50039951492762e-05, "loss": 8.0628, "step": 6611 }, { "epoch": 0.27560335125672125, "grad_norm": 624.0, "learning_rate": 8.499917488310687e-05, "loss": 19.5004, "step": 6612 }, { "epoch": 0.2756450335542495, "grad_norm": 378.0, "learning_rate": 8.499435397907445e-05, "loss": 14.2504, "step": 6613 }, { "epoch": 0.27568671585177773, "grad_norm": 458.0, "learning_rate": 8.498953243726682e-05, "loss": 15.8128, "step": 6614 }, { "epoch": 0.275728398149306, "grad_norm": 99.0, "learning_rate": 8.498471025777188e-05, "loss": 8.8752, "step": 6615 }, { "epoch": 0.2757700804468342, "grad_norm": 612.0, "learning_rate": 8.497988744067746e-05, "loss": 15.8201, "step": 6616 }, { "epoch": 0.2758117627443625, "grad_norm": 324.0, "learning_rate": 8.49750639860715e-05, "loss": 14.1257, "step": 6617 }, { "epoch": 0.2758534450418907, "grad_norm": 170.0, "learning_rate": 8.497023989404191e-05, "loss": 10.0003, "step": 6618 }, { "epoch": 0.27589512733941896, "grad_norm": 127.0, "learning_rate": 8.496541516467657e-05, "loss": 8.6889, "step": 6619 }, { "epoch": 0.27593680963694717, "grad_norm": 200.0, "learning_rate": 8.496058979806346e-05, "loss": 11.7502, "step": 6620 }, { "epoch": 0.27597849193447543, "grad_norm": 380.0, "learning_rate": 8.495576379429047e-05, "loss": 16.2503, "step": 6621 }, { "epoch": 0.27602017423200365, "grad_norm": 272.0, "learning_rate": 8.495093715344559e-05, "loss": 12.3753, "step": 6622 }, { "epoch": 0.2760618565295319, "grad_norm": 157.0, "learning_rate": 8.494610987561678e-05, "loss": 9.6254, "step": 6623 }, { "epoch": 0.2761035388270601, "grad_norm": 256.0, "learning_rate": 8.494128196089201e-05, "loss": 12.0006, "step": 6624 }, { "epoch": 0.2761452211245884, "grad_norm": 736.0, "learning_rate": 8.493645340935928e-05, "loss": 21.0005, "step": 6625 }, { "epoch": 0.2761869034221166, "grad_norm": 226.0, "learning_rate": 8.493162422110658e-05, "loss": 10.8753, "step": 6626 }, { "epoch": 0.27622858571964487, "grad_norm": 480.0, "learning_rate": 8.492679439622193e-05, "loss": 18.2503, "step": 6627 }, { "epoch": 0.2762702680171731, "grad_norm": 390.0, "learning_rate": 8.492196393479336e-05, "loss": 15.1255, "step": 6628 }, { "epoch": 0.27631195031470135, "grad_norm": 232.0, "learning_rate": 8.49171328369089e-05, "loss": 11.188, "step": 6629 }, { "epoch": 0.27635363261222956, "grad_norm": 548.0, "learning_rate": 8.491230110265658e-05, "loss": 17.3772, "step": 6630 }, { "epoch": 0.2763953149097578, "grad_norm": 380.0, "learning_rate": 8.490746873212448e-05, "loss": 15.6253, "step": 6631 }, { "epoch": 0.27643699720728604, "grad_norm": 282.0, "learning_rate": 8.490263572540066e-05, "loss": 12.8751, "step": 6632 }, { "epoch": 0.2764786795048143, "grad_norm": 324.0, "learning_rate": 8.489780208257321e-05, "loss": 14.0634, "step": 6633 }, { "epoch": 0.2765203618023425, "grad_norm": 348.0, "learning_rate": 8.489296780373022e-05, "loss": 14.1257, "step": 6634 }, { "epoch": 0.2765620440998708, "grad_norm": 436.0, "learning_rate": 8.488813288895978e-05, "loss": 16.1253, "step": 6635 }, { "epoch": 0.27660372639739905, "grad_norm": 434.0, "learning_rate": 8.488329733835003e-05, "loss": 16.2503, "step": 6636 }, { "epoch": 0.27664540869492726, "grad_norm": 396.0, "learning_rate": 8.48784611519891e-05, "loss": 14.5003, "step": 6637 }, { "epoch": 0.27668709099245553, "grad_norm": 101.0, "learning_rate": 8.487362432996511e-05, "loss": 9.3755, "step": 6638 }, { "epoch": 0.27672877328998374, "grad_norm": 436.0, "learning_rate": 8.486878687236622e-05, "loss": 14.3787, "step": 6639 }, { "epoch": 0.276770455587512, "grad_norm": 166.0, "learning_rate": 8.48639487792806e-05, "loss": 9.3137, "step": 6640 }, { "epoch": 0.2768121378850402, "grad_norm": 67.0, "learning_rate": 8.48591100507964e-05, "loss": 9.7508, "step": 6641 }, { "epoch": 0.2768538201825685, "grad_norm": 268.0, "learning_rate": 8.485427068700185e-05, "loss": 12.2525, "step": 6642 }, { "epoch": 0.2768955024800967, "grad_norm": 396.0, "learning_rate": 8.484943068798511e-05, "loss": 15.5626, "step": 6643 }, { "epoch": 0.27693718477762497, "grad_norm": 300.0, "learning_rate": 8.484459005383441e-05, "loss": 14.1255, "step": 6644 }, { "epoch": 0.2769788670751532, "grad_norm": 124.5, "learning_rate": 8.483974878463794e-05, "loss": 10.8127, "step": 6645 }, { "epoch": 0.27702054937268145, "grad_norm": 464.0, "learning_rate": 8.483490688048399e-05, "loss": 18.2509, "step": 6646 }, { "epoch": 0.27706223167020966, "grad_norm": 87.0, "learning_rate": 8.483006434146075e-05, "loss": 9.6896, "step": 6647 }, { "epoch": 0.2771039139677379, "grad_norm": 146.0, "learning_rate": 8.482522116765648e-05, "loss": 10.6255, "step": 6648 }, { "epoch": 0.27714559626526614, "grad_norm": 560.0, "learning_rate": 8.482037735915948e-05, "loss": 19.5023, "step": 6649 }, { "epoch": 0.2771872785627944, "grad_norm": 48.0, "learning_rate": 8.481553291605801e-05, "loss": 7.469, "step": 6650 }, { "epoch": 0.2772289608603226, "grad_norm": 148.0, "learning_rate": 8.481068783844038e-05, "loss": 9.9377, "step": 6651 }, { "epoch": 0.2772706431578509, "grad_norm": 408.0, "learning_rate": 8.480584212639483e-05, "loss": 16.6259, "step": 6652 }, { "epoch": 0.2773123254553791, "grad_norm": 1144.0, "learning_rate": 8.480099578000976e-05, "loss": 30.7512, "step": 6653 }, { "epoch": 0.27735400775290736, "grad_norm": 268.0, "learning_rate": 8.479614879937344e-05, "loss": 12.0627, "step": 6654 }, { "epoch": 0.2773956900504356, "grad_norm": 326.0, "learning_rate": 8.479130118457421e-05, "loss": 13.8127, "step": 6655 }, { "epoch": 0.27743737234796384, "grad_norm": 189.0, "learning_rate": 8.478645293570045e-05, "loss": 11.0004, "step": 6656 }, { "epoch": 0.27747905464549205, "grad_norm": 548.0, "learning_rate": 8.478160405284046e-05, "loss": 18.256, "step": 6657 }, { "epoch": 0.2775207369430203, "grad_norm": 121.5, "learning_rate": 8.477675453608268e-05, "loss": 9.3753, "step": 6658 }, { "epoch": 0.27756241924054853, "grad_norm": 102.0, "learning_rate": 8.477190438551546e-05, "loss": 7.7814, "step": 6659 }, { "epoch": 0.2776041015380768, "grad_norm": 338.0, "learning_rate": 8.476705360122717e-05, "loss": 14.4377, "step": 6660 }, { "epoch": 0.277645783835605, "grad_norm": 812.0, "learning_rate": 8.476220218330626e-05, "loss": 23.5003, "step": 6661 }, { "epoch": 0.2776874661331333, "grad_norm": 704.0, "learning_rate": 8.475735013184114e-05, "loss": 21.2504, "step": 6662 }, { "epoch": 0.2777291484306615, "grad_norm": 230.0, "learning_rate": 8.475249744692021e-05, "loss": 10.3765, "step": 6663 }, { "epoch": 0.27777083072818975, "grad_norm": 398.0, "learning_rate": 8.474764412863194e-05, "loss": 14.4379, "step": 6664 }, { "epoch": 0.27781251302571797, "grad_norm": 380.0, "learning_rate": 8.474279017706475e-05, "loss": 15.7505, "step": 6665 }, { "epoch": 0.27785419532324623, "grad_norm": 456.0, "learning_rate": 8.473793559230714e-05, "loss": 16.7518, "step": 6666 }, { "epoch": 0.27789587762077445, "grad_norm": 540.0, "learning_rate": 8.473308037444758e-05, "loss": 15.7502, "step": 6667 }, { "epoch": 0.2779375599183027, "grad_norm": 520.0, "learning_rate": 8.472822452357454e-05, "loss": 16.3753, "step": 6668 }, { "epoch": 0.2779792422158309, "grad_norm": 268.0, "learning_rate": 8.472336803977652e-05, "loss": 11.8141, "step": 6669 }, { "epoch": 0.2780209245133592, "grad_norm": 207.0, "learning_rate": 8.471851092314204e-05, "loss": 13.1256, "step": 6670 }, { "epoch": 0.2780626068108874, "grad_norm": 342.0, "learning_rate": 8.471365317375961e-05, "loss": 13.2501, "step": 6671 }, { "epoch": 0.27810428910841567, "grad_norm": 238.0, "learning_rate": 8.470879479171778e-05, "loss": 11.7501, "step": 6672 }, { "epoch": 0.2781459714059439, "grad_norm": 350.0, "learning_rate": 8.470393577710507e-05, "loss": 15.0627, "step": 6673 }, { "epoch": 0.27818765370347215, "grad_norm": 304.0, "learning_rate": 8.469907613001006e-05, "loss": 12.6255, "step": 6674 }, { "epoch": 0.27822933600100036, "grad_norm": 227.0, "learning_rate": 8.469421585052131e-05, "loss": 8.6879, "step": 6675 }, { "epoch": 0.2782710182985286, "grad_norm": 288.0, "learning_rate": 8.468935493872738e-05, "loss": 13.7512, "step": 6676 }, { "epoch": 0.27831270059605684, "grad_norm": 1272.0, "learning_rate": 8.468449339471689e-05, "loss": 30.0007, "step": 6677 }, { "epoch": 0.2783543828935851, "grad_norm": 245.0, "learning_rate": 8.467963121857843e-05, "loss": 11.6877, "step": 6678 }, { "epoch": 0.2783960651911133, "grad_norm": 132.0, "learning_rate": 8.467476841040061e-05, "loss": 7.0326, "step": 6679 }, { "epoch": 0.2784377474886416, "grad_norm": 158.0, "learning_rate": 8.466990497027204e-05, "loss": 7.7194, "step": 6680 }, { "epoch": 0.2784794297861698, "grad_norm": 406.0, "learning_rate": 8.46650408982814e-05, "loss": 15.3752, "step": 6681 }, { "epoch": 0.27852111208369806, "grad_norm": 366.0, "learning_rate": 8.466017619451729e-05, "loss": 14.5017, "step": 6682 }, { "epoch": 0.2785627943812263, "grad_norm": 376.0, "learning_rate": 8.465531085906842e-05, "loss": 16.0002, "step": 6683 }, { "epoch": 0.27860447667875454, "grad_norm": 184.0, "learning_rate": 8.46504448920234e-05, "loss": 11.0631, "step": 6684 }, { "epoch": 0.27864615897628275, "grad_norm": 324.0, "learning_rate": 8.464557829347097e-05, "loss": 13.8757, "step": 6685 }, { "epoch": 0.278687841273811, "grad_norm": 214.0, "learning_rate": 8.46407110634998e-05, "loss": 11.5003, "step": 6686 }, { "epoch": 0.27872952357133923, "grad_norm": 96.0, "learning_rate": 8.46358432021986e-05, "loss": 10.3755, "step": 6687 }, { "epoch": 0.2787712058688675, "grad_norm": 254.0, "learning_rate": 8.463097470965607e-05, "loss": 12.0628, "step": 6688 }, { "epoch": 0.2788128881663957, "grad_norm": 374.0, "learning_rate": 8.462610558596094e-05, "loss": 14.2504, "step": 6689 }, { "epoch": 0.278854570463924, "grad_norm": 120.0, "learning_rate": 8.462123583120198e-05, "loss": 9.3127, "step": 6690 }, { "epoch": 0.2788962527614522, "grad_norm": 482.0, "learning_rate": 8.461636544546792e-05, "loss": 16.3798, "step": 6691 }, { "epoch": 0.27893793505898046, "grad_norm": 772.0, "learning_rate": 8.461149442884752e-05, "loss": 23.2535, "step": 6692 }, { "epoch": 0.27897961735650867, "grad_norm": 532.0, "learning_rate": 8.460662278142957e-05, "loss": 15.8128, "step": 6693 }, { "epoch": 0.27902129965403694, "grad_norm": 342.0, "learning_rate": 8.460175050330284e-05, "loss": 12.1877, "step": 6694 }, { "epoch": 0.27906298195156515, "grad_norm": 466.0, "learning_rate": 8.459687759455615e-05, "loss": 17.3755, "step": 6695 }, { "epoch": 0.2791046642490934, "grad_norm": 242.0, "learning_rate": 8.459200405527827e-05, "loss": 11.3752, "step": 6696 }, { "epoch": 0.2791463465466216, "grad_norm": 213.0, "learning_rate": 8.458712988555807e-05, "loss": 10.7502, "step": 6697 }, { "epoch": 0.2791880288441499, "grad_norm": 1144.0, "learning_rate": 8.458225508548434e-05, "loss": 22.3802, "step": 6698 }, { "epoch": 0.2792297111416781, "grad_norm": 288.0, "learning_rate": 8.457737965514596e-05, "loss": 13.2503, "step": 6699 }, { "epoch": 0.2792713934392064, "grad_norm": 552.0, "learning_rate": 8.457250359463176e-05, "loss": 17.5017, "step": 6700 }, { "epoch": 0.2793130757367346, "grad_norm": 382.0, "learning_rate": 8.456762690403059e-05, "loss": 15.6877, "step": 6701 }, { "epoch": 0.27935475803426285, "grad_norm": 112.5, "learning_rate": 8.456274958343137e-05, "loss": 10.2504, "step": 6702 }, { "epoch": 0.27939644033179106, "grad_norm": 1576.0, "learning_rate": 8.455787163292297e-05, "loss": 30.5052, "step": 6703 }, { "epoch": 0.27943812262931933, "grad_norm": 255.0, "learning_rate": 8.45529930525943e-05, "loss": 12.1252, "step": 6704 }, { "epoch": 0.27947980492684754, "grad_norm": 308.0, "learning_rate": 8.454811384253425e-05, "loss": 15.3127, "step": 6705 }, { "epoch": 0.2795214872243758, "grad_norm": 350.0, "learning_rate": 8.454323400283177e-05, "loss": 14.8134, "step": 6706 }, { "epoch": 0.279563169521904, "grad_norm": 135.0, "learning_rate": 8.453835353357578e-05, "loss": 8.5003, "step": 6707 }, { "epoch": 0.2796048518194323, "grad_norm": 1168.0, "learning_rate": 8.453347243485522e-05, "loss": 26.1253, "step": 6708 }, { "epoch": 0.27964653411696055, "grad_norm": 492.0, "learning_rate": 8.452859070675908e-05, "loss": 14.0033, "step": 6709 }, { "epoch": 0.27968821641448877, "grad_norm": 640.0, "learning_rate": 8.452370834937628e-05, "loss": 19.5003, "step": 6710 }, { "epoch": 0.27972989871201703, "grad_norm": 227.0, "learning_rate": 8.451882536279586e-05, "loss": 12.1879, "step": 6711 }, { "epoch": 0.27977158100954524, "grad_norm": 97.5, "learning_rate": 8.451394174710677e-05, "loss": 8.3753, "step": 6712 }, { "epoch": 0.2798132633070735, "grad_norm": 91.5, "learning_rate": 8.450905750239803e-05, "loss": 8.3754, "step": 6713 }, { "epoch": 0.2798549456046017, "grad_norm": 360.0, "learning_rate": 8.450417262875865e-05, "loss": 14.7503, "step": 6714 }, { "epoch": 0.27989662790213, "grad_norm": 812.0, "learning_rate": 8.449928712627766e-05, "loss": 20.8751, "step": 6715 }, { "epoch": 0.2799383101996582, "grad_norm": 462.0, "learning_rate": 8.44944009950441e-05, "loss": 16.1282, "step": 6716 }, { "epoch": 0.27997999249718647, "grad_norm": 548.0, "learning_rate": 8.448951423514702e-05, "loss": 19.5007, "step": 6717 }, { "epoch": 0.2800216747947147, "grad_norm": 172.0, "learning_rate": 8.448462684667549e-05, "loss": 11.5007, "step": 6718 }, { "epoch": 0.28006335709224295, "grad_norm": 434.0, "learning_rate": 8.447973882971856e-05, "loss": 14.9376, "step": 6719 }, { "epoch": 0.28010503938977116, "grad_norm": 250.0, "learning_rate": 8.447485018436534e-05, "loss": 12.4388, "step": 6720 }, { "epoch": 0.2801467216872994, "grad_norm": 308.0, "learning_rate": 8.446996091070491e-05, "loss": 14.1254, "step": 6721 }, { "epoch": 0.28018840398482764, "grad_norm": 78.0, "learning_rate": 8.44650710088264e-05, "loss": 7.2195, "step": 6722 }, { "epoch": 0.2802300862823559, "grad_norm": 153.0, "learning_rate": 8.446018047881889e-05, "loss": 10.3776, "step": 6723 }, { "epoch": 0.2802717685798841, "grad_norm": 138.0, "learning_rate": 8.445528932077154e-05, "loss": 11.5627, "step": 6724 }, { "epoch": 0.2803134508774124, "grad_norm": 350.0, "learning_rate": 8.445039753477347e-05, "loss": 15.5008, "step": 6725 }, { "epoch": 0.2803551331749406, "grad_norm": 988.0, "learning_rate": 8.444550512091384e-05, "loss": 22.5055, "step": 6726 }, { "epoch": 0.28039681547246886, "grad_norm": 436.0, "learning_rate": 8.444061207928186e-05, "loss": 15.0628, "step": 6727 }, { "epoch": 0.2804384977699971, "grad_norm": 1216.0, "learning_rate": 8.443571840996665e-05, "loss": 29.3757, "step": 6728 }, { "epoch": 0.28048018006752534, "grad_norm": 504.0, "learning_rate": 8.443082411305741e-05, "loss": 13.9377, "step": 6729 }, { "epoch": 0.28052186236505355, "grad_norm": 912.0, "learning_rate": 8.442592918864334e-05, "loss": 25.2503, "step": 6730 }, { "epoch": 0.2805635446625818, "grad_norm": 243.0, "learning_rate": 8.442103363681367e-05, "loss": 13.1254, "step": 6731 }, { "epoch": 0.28060522696011003, "grad_norm": 420.0, "learning_rate": 8.441613745765759e-05, "loss": 17.3753, "step": 6732 }, { "epoch": 0.2806469092576383, "grad_norm": 58.25, "learning_rate": 8.441124065126434e-05, "loss": 8.3127, "step": 6733 }, { "epoch": 0.2806885915551665, "grad_norm": 370.0, "learning_rate": 8.440634321772321e-05, "loss": 14.5627, "step": 6734 }, { "epoch": 0.2807302738526948, "grad_norm": 195.0, "learning_rate": 8.440144515712338e-05, "loss": 9.4377, "step": 6735 }, { "epoch": 0.280771956150223, "grad_norm": 358.0, "learning_rate": 8.439654646955419e-05, "loss": 13.8127, "step": 6736 }, { "epoch": 0.28081363844775126, "grad_norm": 318.0, "learning_rate": 8.439164715510488e-05, "loss": 14.7503, "step": 6737 }, { "epoch": 0.28085532074527947, "grad_norm": 278.0, "learning_rate": 8.438674721386473e-05, "loss": 13.2502, "step": 6738 }, { "epoch": 0.28089700304280774, "grad_norm": 436.0, "learning_rate": 8.438184664592308e-05, "loss": 15.1265, "step": 6739 }, { "epoch": 0.28093868534033595, "grad_norm": 253.0, "learning_rate": 8.437694545136922e-05, "loss": 11.8131, "step": 6740 }, { "epoch": 0.2809803676378642, "grad_norm": 298.0, "learning_rate": 8.437204363029248e-05, "loss": 11.6261, "step": 6741 }, { "epoch": 0.2810220499353924, "grad_norm": 324.0, "learning_rate": 8.436714118278217e-05, "loss": 13.6251, "step": 6742 }, { "epoch": 0.2810637322329207, "grad_norm": 296.0, "learning_rate": 8.436223810892768e-05, "loss": 13.3752, "step": 6743 }, { "epoch": 0.2811054145304489, "grad_norm": 75.5, "learning_rate": 8.435733440881835e-05, "loss": 9.4386, "step": 6744 }, { "epoch": 0.28114709682797717, "grad_norm": 420.0, "learning_rate": 8.435243008254355e-05, "loss": 15.1882, "step": 6745 }, { "epoch": 0.2811887791255054, "grad_norm": 314.0, "learning_rate": 8.434752513019266e-05, "loss": 13.0003, "step": 6746 }, { "epoch": 0.28123046142303365, "grad_norm": 98.0, "learning_rate": 8.434261955185508e-05, "loss": 8.0628, "step": 6747 }, { "epoch": 0.28127214372056186, "grad_norm": 143.0, "learning_rate": 8.43377133476202e-05, "loss": 8.8755, "step": 6748 }, { "epoch": 0.28131382601809013, "grad_norm": 208.0, "learning_rate": 8.433280651757745e-05, "loss": 11.7502, "step": 6749 }, { "epoch": 0.28135550831561834, "grad_norm": 600.0, "learning_rate": 8.432789906181627e-05, "loss": 19.5003, "step": 6750 }, { "epoch": 0.2813971906131466, "grad_norm": 948.0, "learning_rate": 8.432299098042605e-05, "loss": 21.7553, "step": 6751 }, { "epoch": 0.2814388729106748, "grad_norm": 374.0, "learning_rate": 8.431808227349629e-05, "loss": 14.8129, "step": 6752 }, { "epoch": 0.2814805552082031, "grad_norm": 232.0, "learning_rate": 8.431317294111643e-05, "loss": 12.2505, "step": 6753 }, { "epoch": 0.2815222375057313, "grad_norm": 312.0, "learning_rate": 8.430826298337595e-05, "loss": 14.0003, "step": 6754 }, { "epoch": 0.28156391980325957, "grad_norm": 636.0, "learning_rate": 8.430335240036434e-05, "loss": 17.1292, "step": 6755 }, { "epoch": 0.2816056021007878, "grad_norm": 408.0, "learning_rate": 8.429844119217108e-05, "loss": 16.2512, "step": 6756 }, { "epoch": 0.28164728439831604, "grad_norm": 388.0, "learning_rate": 8.429352935888568e-05, "loss": 14.5009, "step": 6757 }, { "epoch": 0.28168896669584426, "grad_norm": 1112.0, "learning_rate": 8.428861690059767e-05, "loss": 23.7559, "step": 6758 }, { "epoch": 0.2817306489933725, "grad_norm": 89.5, "learning_rate": 8.428370381739657e-05, "loss": 8.5005, "step": 6759 }, { "epoch": 0.28177233129090073, "grad_norm": 552.0, "learning_rate": 8.427879010937191e-05, "loss": 16.3751, "step": 6760 }, { "epoch": 0.281814013588429, "grad_norm": 676.0, "learning_rate": 8.427387577661328e-05, "loss": 18.6253, "step": 6761 }, { "epoch": 0.2818556958859572, "grad_norm": 508.0, "learning_rate": 8.426896081921022e-05, "loss": 17.7505, "step": 6762 }, { "epoch": 0.2818973781834855, "grad_norm": 600.0, "learning_rate": 8.42640452372523e-05, "loss": 18.1257, "step": 6763 }, { "epoch": 0.2819390604810137, "grad_norm": 358.0, "learning_rate": 8.42591290308291e-05, "loss": 15.0003, "step": 6764 }, { "epoch": 0.28198074277854196, "grad_norm": 250.0, "learning_rate": 8.425421220003025e-05, "loss": 11.2506, "step": 6765 }, { "epoch": 0.28202242507607017, "grad_norm": 187.0, "learning_rate": 8.424929474494534e-05, "loss": 12.1887, "step": 6766 }, { "epoch": 0.28206410737359844, "grad_norm": 664.0, "learning_rate": 8.424437666566399e-05, "loss": 18.0052, "step": 6767 }, { "epoch": 0.28210578967112665, "grad_norm": 174.0, "learning_rate": 8.423945796227584e-05, "loss": 10.8134, "step": 6768 }, { "epoch": 0.2821474719686549, "grad_norm": 868.0, "learning_rate": 8.423453863487052e-05, "loss": 25.5002, "step": 6769 }, { "epoch": 0.28218915426618313, "grad_norm": 205.0, "learning_rate": 8.422961868353772e-05, "loss": 10.0628, "step": 6770 }, { "epoch": 0.2822308365637114, "grad_norm": 808.0, "learning_rate": 8.422469810836704e-05, "loss": 21.7505, "step": 6771 }, { "epoch": 0.2822725188612396, "grad_norm": 171.0, "learning_rate": 8.421977690944822e-05, "loss": 10.5005, "step": 6772 }, { "epoch": 0.2823142011587679, "grad_norm": 960.0, "learning_rate": 8.421485508687093e-05, "loss": 30.7504, "step": 6773 }, { "epoch": 0.2823558834562961, "grad_norm": 93.0, "learning_rate": 8.420993264072488e-05, "loss": 9.3133, "step": 6774 }, { "epoch": 0.28239756575382435, "grad_norm": 540.0, "learning_rate": 8.420500957109974e-05, "loss": 17.8754, "step": 6775 }, { "epoch": 0.28243924805135256, "grad_norm": 183.0, "learning_rate": 8.420008587808528e-05, "loss": 11.6881, "step": 6776 }, { "epoch": 0.28248093034888083, "grad_norm": 1024.0, "learning_rate": 8.419516156177123e-05, "loss": 23.8791, "step": 6777 }, { "epoch": 0.28252261264640904, "grad_norm": 260.0, "learning_rate": 8.419023662224731e-05, "loss": 11.938, "step": 6778 }, { "epoch": 0.2825642949439373, "grad_norm": 352.0, "learning_rate": 8.41853110596033e-05, "loss": 14.1252, "step": 6779 }, { "epoch": 0.2826059772414655, "grad_norm": 636.0, "learning_rate": 8.418038487392895e-05, "loss": 18.1301, "step": 6780 }, { "epoch": 0.2826476595389938, "grad_norm": 652.0, "learning_rate": 8.417545806531406e-05, "loss": 19.3752, "step": 6781 }, { "epoch": 0.28268934183652206, "grad_norm": 408.0, "learning_rate": 8.417053063384841e-05, "loss": 14.8136, "step": 6782 }, { "epoch": 0.28273102413405027, "grad_norm": 288.0, "learning_rate": 8.416560257962181e-05, "loss": 10.9435, "step": 6783 }, { "epoch": 0.28277270643157854, "grad_norm": 223.0, "learning_rate": 8.416067390272406e-05, "loss": 9.313, "step": 6784 }, { "epoch": 0.28281438872910675, "grad_norm": 860.0, "learning_rate": 8.415574460324501e-05, "loss": 28.3752, "step": 6785 }, { "epoch": 0.282856071026635, "grad_norm": 1072.0, "learning_rate": 8.415081468127448e-05, "loss": 27.5009, "step": 6786 }, { "epoch": 0.2828977533241632, "grad_norm": 250.0, "learning_rate": 8.414588413690232e-05, "loss": 13.3753, "step": 6787 }, { "epoch": 0.2829394356216915, "grad_norm": 239.0, "learning_rate": 8.41409529702184e-05, "loss": 11.1255, "step": 6788 }, { "epoch": 0.2829811179192197, "grad_norm": 56.75, "learning_rate": 8.413602118131256e-05, "loss": 6.7502, "step": 6789 }, { "epoch": 0.28302280021674797, "grad_norm": 1352.0, "learning_rate": 8.413108877027471e-05, "loss": 30.13, "step": 6790 }, { "epoch": 0.2830644825142762, "grad_norm": 400.0, "learning_rate": 8.412615573719473e-05, "loss": 14.6877, "step": 6791 }, { "epoch": 0.28310616481180445, "grad_norm": 348.0, "learning_rate": 8.412122208216256e-05, "loss": 16.0003, "step": 6792 }, { "epoch": 0.28314784710933266, "grad_norm": 262.0, "learning_rate": 8.411628780526805e-05, "loss": 11.9378, "step": 6793 }, { "epoch": 0.28318952940686093, "grad_norm": 474.0, "learning_rate": 8.41113529066012e-05, "loss": 17.5007, "step": 6794 }, { "epoch": 0.28323121170438914, "grad_norm": 210.0, "learning_rate": 8.41064173862519e-05, "loss": 10.4379, "step": 6795 }, { "epoch": 0.2832728940019174, "grad_norm": 600.0, "learning_rate": 8.410148124431012e-05, "loss": 18.3776, "step": 6796 }, { "epoch": 0.2833145762994456, "grad_norm": 352.0, "learning_rate": 8.409654448086582e-05, "loss": 14.3127, "step": 6797 }, { "epoch": 0.2833562585969739, "grad_norm": 238.0, "learning_rate": 8.409160709600895e-05, "loss": 12.3128, "step": 6798 }, { "epoch": 0.2833979408945021, "grad_norm": 608.0, "learning_rate": 8.408666908982954e-05, "loss": 19.376, "step": 6799 }, { "epoch": 0.28343962319203037, "grad_norm": 282.0, "learning_rate": 8.408173046241755e-05, "loss": 13.5004, "step": 6800 }, { "epoch": 0.2834813054895586, "grad_norm": 204.0, "learning_rate": 8.407679121386298e-05, "loss": 12.0009, "step": 6801 }, { "epoch": 0.28352298778708684, "grad_norm": 169.0, "learning_rate": 8.407185134425588e-05, "loss": 9.938, "step": 6802 }, { "epoch": 0.28356467008461506, "grad_norm": 300.0, "learning_rate": 8.406691085368628e-05, "loss": 14.5004, "step": 6803 }, { "epoch": 0.2836063523821433, "grad_norm": 302.0, "learning_rate": 8.406196974224418e-05, "loss": 13.1252, "step": 6804 }, { "epoch": 0.28364803467967153, "grad_norm": 180.0, "learning_rate": 8.405702801001966e-05, "loss": 11.8752, "step": 6805 }, { "epoch": 0.2836897169771998, "grad_norm": 66.5, "learning_rate": 8.405208565710279e-05, "loss": 8.438, "step": 6806 }, { "epoch": 0.283731399274728, "grad_norm": 420.0, "learning_rate": 8.404714268358365e-05, "loss": 16.2504, "step": 6807 }, { "epoch": 0.2837730815722563, "grad_norm": 392.0, "learning_rate": 8.404219908955229e-05, "loss": 15.688, "step": 6808 }, { "epoch": 0.2838147638697845, "grad_norm": 324.0, "learning_rate": 8.403725487509883e-05, "loss": 13.7502, "step": 6809 }, { "epoch": 0.28385644616731276, "grad_norm": 532.0, "learning_rate": 8.40323100403134e-05, "loss": 18.5007, "step": 6810 }, { "epoch": 0.28389812846484097, "grad_norm": 804.0, "learning_rate": 8.402736458528607e-05, "loss": 20.1254, "step": 6811 }, { "epoch": 0.28393981076236924, "grad_norm": 692.0, "learning_rate": 8.402241851010701e-05, "loss": 20.6256, "step": 6812 }, { "epoch": 0.28398149305989745, "grad_norm": 255.0, "learning_rate": 8.401747181486637e-05, "loss": 13.1261, "step": 6813 }, { "epoch": 0.2840231753574257, "grad_norm": 492.0, "learning_rate": 8.401252449965426e-05, "loss": 17.1255, "step": 6814 }, { "epoch": 0.28406485765495393, "grad_norm": 356.0, "learning_rate": 8.400757656456089e-05, "loss": 14.192, "step": 6815 }, { "epoch": 0.2841065399524822, "grad_norm": 229.0, "learning_rate": 8.400262800967641e-05, "loss": 12.1883, "step": 6816 }, { "epoch": 0.2841482222500104, "grad_norm": 792.0, "learning_rate": 8.399767883509102e-05, "loss": 22.8762, "step": 6817 }, { "epoch": 0.2841899045475387, "grad_norm": 222.0, "learning_rate": 8.399272904089492e-05, "loss": 12.3752, "step": 6818 }, { "epoch": 0.2842315868450669, "grad_norm": 310.0, "learning_rate": 8.39877786271783e-05, "loss": 14.4377, "step": 6819 }, { "epoch": 0.28427326914259515, "grad_norm": 306.0, "learning_rate": 8.398282759403141e-05, "loss": 13.0002, "step": 6820 }, { "epoch": 0.28431495144012336, "grad_norm": 366.0, "learning_rate": 8.397787594154446e-05, "loss": 14.6252, "step": 6821 }, { "epoch": 0.28435663373765163, "grad_norm": 476.0, "learning_rate": 8.397292366980772e-05, "loss": 16.3752, "step": 6822 }, { "epoch": 0.28439831603517984, "grad_norm": 316.0, "learning_rate": 8.39679707789114e-05, "loss": 15.0002, "step": 6823 }, { "epoch": 0.2844399983327081, "grad_norm": 896.0, "learning_rate": 8.396301726894583e-05, "loss": 21.6329, "step": 6824 }, { "epoch": 0.2844816806302363, "grad_norm": 209.0, "learning_rate": 8.395806314000126e-05, "loss": 10.3755, "step": 6825 }, { "epoch": 0.2845233629277646, "grad_norm": 402.0, "learning_rate": 8.395310839216795e-05, "loss": 15.3128, "step": 6826 }, { "epoch": 0.2845650452252928, "grad_norm": 179.0, "learning_rate": 8.394815302553623e-05, "loss": 11.2502, "step": 6827 }, { "epoch": 0.28460672752282107, "grad_norm": 328.0, "learning_rate": 8.394319704019641e-05, "loss": 13.1878, "step": 6828 }, { "epoch": 0.2846484098203493, "grad_norm": 196.0, "learning_rate": 8.393824043623881e-05, "loss": 10.1258, "step": 6829 }, { "epoch": 0.28469009211787755, "grad_norm": 376.0, "learning_rate": 8.393328321375377e-05, "loss": 14.938, "step": 6830 }, { "epoch": 0.28473177441540576, "grad_norm": 440.0, "learning_rate": 8.392832537283162e-05, "loss": 15.127, "step": 6831 }, { "epoch": 0.284773456712934, "grad_norm": 160.0, "learning_rate": 8.392336691356275e-05, "loss": 8.0627, "step": 6832 }, { "epoch": 0.28481513901046224, "grad_norm": 432.0, "learning_rate": 8.391840783603749e-05, "loss": 14.6881, "step": 6833 }, { "epoch": 0.2848568213079905, "grad_norm": 318.0, "learning_rate": 8.391344814034622e-05, "loss": 11.2502, "step": 6834 }, { "epoch": 0.2848985036055187, "grad_norm": 396.0, "learning_rate": 8.390848782657938e-05, "loss": 15.1254, "step": 6835 }, { "epoch": 0.284940185903047, "grad_norm": 340.0, "learning_rate": 8.390352689482733e-05, "loss": 14.2512, "step": 6836 }, { "epoch": 0.2849818682005752, "grad_norm": 378.0, "learning_rate": 8.389856534518048e-05, "loss": 14.1252, "step": 6837 }, { "epoch": 0.28502355049810346, "grad_norm": 198.0, "learning_rate": 8.389360317772927e-05, "loss": 10.2503, "step": 6838 }, { "epoch": 0.2850652327956317, "grad_norm": 564.0, "learning_rate": 8.388864039256414e-05, "loss": 17.2504, "step": 6839 }, { "epoch": 0.28510691509315994, "grad_norm": 672.0, "learning_rate": 8.388367698977554e-05, "loss": 21.2502, "step": 6840 }, { "epoch": 0.28514859739068815, "grad_norm": 241.0, "learning_rate": 8.387871296945391e-05, "loss": 12.5631, "step": 6841 }, { "epoch": 0.2851902796882164, "grad_norm": 486.0, "learning_rate": 8.387374833168973e-05, "loss": 16.0007, "step": 6842 }, { "epoch": 0.28523196198574463, "grad_norm": 262.0, "learning_rate": 8.386878307657346e-05, "loss": 13.0628, "step": 6843 }, { "epoch": 0.2852736442832729, "grad_norm": 424.0, "learning_rate": 8.386381720419564e-05, "loss": 13.5039, "step": 6844 }, { "epoch": 0.2853153265808011, "grad_norm": 276.0, "learning_rate": 8.385885071464673e-05, "loss": 13.1253, "step": 6845 }, { "epoch": 0.2853570088783294, "grad_norm": 368.0, "learning_rate": 8.385388360801727e-05, "loss": 13.8752, "step": 6846 }, { "epoch": 0.2853986911758576, "grad_norm": 478.0, "learning_rate": 8.384891588439776e-05, "loss": 16.6253, "step": 6847 }, { "epoch": 0.28544037347338586, "grad_norm": 568.0, "learning_rate": 8.384394754387876e-05, "loss": 16.2557, "step": 6848 }, { "epoch": 0.28548205577091407, "grad_norm": 460.0, "learning_rate": 8.383897858655082e-05, "loss": 20.1254, "step": 6849 }, { "epoch": 0.28552373806844233, "grad_norm": 243.0, "learning_rate": 8.383400901250449e-05, "loss": 12.1258, "step": 6850 }, { "epoch": 0.28556542036597055, "grad_norm": 318.0, "learning_rate": 8.382903882183033e-05, "loss": 13.3754, "step": 6851 }, { "epoch": 0.2856071026634988, "grad_norm": 368.0, "learning_rate": 8.382406801461894e-05, "loss": 13.5629, "step": 6852 }, { "epoch": 0.285648784961027, "grad_norm": 1744.0, "learning_rate": 8.381909659096092e-05, "loss": 37.0002, "step": 6853 }, { "epoch": 0.2856904672585553, "grad_norm": 544.0, "learning_rate": 8.381412455094683e-05, "loss": 18.7501, "step": 6854 }, { "epoch": 0.28573214955608356, "grad_norm": 249.0, "learning_rate": 8.380915189466736e-05, "loss": 10.1251, "step": 6855 }, { "epoch": 0.28577383185361177, "grad_norm": 462.0, "learning_rate": 8.380417862221308e-05, "loss": 17.0001, "step": 6856 }, { "epoch": 0.28581551415114004, "grad_norm": 260.0, "learning_rate": 8.379920473367464e-05, "loss": 4.3446, "step": 6857 }, { "epoch": 0.28585719644866825, "grad_norm": 348.0, "learning_rate": 8.379423022914268e-05, "loss": 14.8753, "step": 6858 }, { "epoch": 0.2858988787461965, "grad_norm": 304.0, "learning_rate": 8.378925510870789e-05, "loss": 13.3753, "step": 6859 }, { "epoch": 0.28594056104372473, "grad_norm": 392.0, "learning_rate": 8.378427937246091e-05, "loss": 15.1881, "step": 6860 }, { "epoch": 0.285982243341253, "grad_norm": 428.0, "learning_rate": 8.377930302049246e-05, "loss": 13.1903, "step": 6861 }, { "epoch": 0.2860239256387812, "grad_norm": 328.0, "learning_rate": 8.377432605289321e-05, "loss": 13.8752, "step": 6862 }, { "epoch": 0.2860656079363095, "grad_norm": 298.0, "learning_rate": 8.376934846975386e-05, "loss": 12.4378, "step": 6863 }, { "epoch": 0.2861072902338377, "grad_norm": 482.0, "learning_rate": 8.376437027116515e-05, "loss": 12.5627, "step": 6864 }, { "epoch": 0.28614897253136595, "grad_norm": 133.0, "learning_rate": 8.375939145721778e-05, "loss": 10.1255, "step": 6865 }, { "epoch": 0.28619065482889416, "grad_norm": 366.0, "learning_rate": 8.375441202800252e-05, "loss": 13.7502, "step": 6866 }, { "epoch": 0.28623233712642243, "grad_norm": 640.0, "learning_rate": 8.37494319836101e-05, "loss": 21.6255, "step": 6867 }, { "epoch": 0.28627401942395064, "grad_norm": 364.0, "learning_rate": 8.374445132413127e-05, "loss": 15.3752, "step": 6868 }, { "epoch": 0.2863157017214789, "grad_norm": 210.0, "learning_rate": 8.373947004965684e-05, "loss": 12.1264, "step": 6869 }, { "epoch": 0.2863573840190071, "grad_norm": 648.0, "learning_rate": 8.373448816027756e-05, "loss": 20.3764, "step": 6870 }, { "epoch": 0.2863990663165354, "grad_norm": 1744.0, "learning_rate": 8.372950565608424e-05, "loss": 38.2533, "step": 6871 }, { "epoch": 0.2864407486140636, "grad_norm": 608.0, "learning_rate": 8.372452253716767e-05, "loss": 19.3753, "step": 6872 }, { "epoch": 0.28648243091159187, "grad_norm": 324.0, "learning_rate": 8.371953880361871e-05, "loss": 13.5629, "step": 6873 }, { "epoch": 0.2865241132091201, "grad_norm": 324.0, "learning_rate": 8.371455445552815e-05, "loss": 14.1923, "step": 6874 }, { "epoch": 0.28656579550664835, "grad_norm": 83.0, "learning_rate": 8.370956949298685e-05, "loss": 9.3128, "step": 6875 }, { "epoch": 0.28660747780417656, "grad_norm": 406.0, "learning_rate": 8.370458391608565e-05, "loss": 16.7507, "step": 6876 }, { "epoch": 0.2866491601017048, "grad_norm": 446.0, "learning_rate": 8.369959772491541e-05, "loss": 15.0005, "step": 6877 }, { "epoch": 0.28669084239923304, "grad_norm": 600.0, "learning_rate": 8.369461091956701e-05, "loss": 20.2509, "step": 6878 }, { "epoch": 0.2867325246967613, "grad_norm": 248.0, "learning_rate": 8.368962350013133e-05, "loss": 12.3752, "step": 6879 }, { "epoch": 0.2867742069942895, "grad_norm": 956.0, "learning_rate": 8.368463546669929e-05, "loss": 27.1257, "step": 6880 }, { "epoch": 0.2868158892918178, "grad_norm": 980.0, "learning_rate": 8.367964681936175e-05, "loss": 26.1252, "step": 6881 }, { "epoch": 0.286857571589346, "grad_norm": 278.0, "learning_rate": 8.367465755820969e-05, "loss": 12.6878, "step": 6882 }, { "epoch": 0.28689925388687426, "grad_norm": 784.0, "learning_rate": 8.366966768333398e-05, "loss": 25.7522, "step": 6883 }, { "epoch": 0.2869409361844025, "grad_norm": 286.0, "learning_rate": 8.36646771948256e-05, "loss": 14.1877, "step": 6884 }, { "epoch": 0.28698261848193074, "grad_norm": 92.0, "learning_rate": 8.36596860927755e-05, "loss": 8.7503, "step": 6885 }, { "epoch": 0.28702430077945895, "grad_norm": 76.0, "learning_rate": 8.365469437727461e-05, "loss": 4.9377, "step": 6886 }, { "epoch": 0.2870659830769872, "grad_norm": 204.0, "learning_rate": 8.364970204841394e-05, "loss": 11.5626, "step": 6887 }, { "epoch": 0.28710766537451543, "grad_norm": 270.0, "learning_rate": 8.364470910628446e-05, "loss": 12.5627, "step": 6888 }, { "epoch": 0.2871493476720437, "grad_norm": 588.0, "learning_rate": 8.363971555097717e-05, "loss": 18.376, "step": 6889 }, { "epoch": 0.2871910299695719, "grad_norm": 632.0, "learning_rate": 8.363472138258308e-05, "loss": 20.2506, "step": 6890 }, { "epoch": 0.2872327122671002, "grad_norm": 402.0, "learning_rate": 8.36297266011932e-05, "loss": 14.6252, "step": 6891 }, { "epoch": 0.2872743945646284, "grad_norm": 159.0, "learning_rate": 8.362473120689858e-05, "loss": 7.563, "step": 6892 }, { "epoch": 0.28731607686215666, "grad_norm": 368.0, "learning_rate": 8.361973519979023e-05, "loss": 12.9378, "step": 6893 }, { "epoch": 0.28735775915968487, "grad_norm": 572.0, "learning_rate": 8.361473857995925e-05, "loss": 20.1251, "step": 6894 }, { "epoch": 0.28739944145721313, "grad_norm": 198.0, "learning_rate": 8.360974134749665e-05, "loss": 12.3127, "step": 6895 }, { "epoch": 0.28744112375474135, "grad_norm": 498.0, "learning_rate": 8.360474350249355e-05, "loss": 17.8757, "step": 6896 }, { "epoch": 0.2874828060522696, "grad_norm": 384.0, "learning_rate": 8.3599745045041e-05, "loss": 14.3127, "step": 6897 }, { "epoch": 0.2875244883497978, "grad_norm": 286.0, "learning_rate": 8.359474597523011e-05, "loss": 12.1254, "step": 6898 }, { "epoch": 0.2875661706473261, "grad_norm": 318.0, "learning_rate": 8.358974629315201e-05, "loss": 11.1254, "step": 6899 }, { "epoch": 0.2876078529448543, "grad_norm": 636.0, "learning_rate": 8.358474599889778e-05, "loss": 18.7517, "step": 6900 }, { "epoch": 0.28764953524238257, "grad_norm": 276.0, "learning_rate": 8.35797450925586e-05, "loss": 13.5004, "step": 6901 }, { "epoch": 0.2876912175399108, "grad_norm": 65.5, "learning_rate": 8.357474357422557e-05, "loss": 8.4379, "step": 6902 }, { "epoch": 0.28773289983743905, "grad_norm": 165.0, "learning_rate": 8.356974144398986e-05, "loss": 10.5627, "step": 6903 }, { "epoch": 0.28777458213496726, "grad_norm": 239.0, "learning_rate": 8.356473870194262e-05, "loss": 12.5627, "step": 6904 }, { "epoch": 0.2878162644324955, "grad_norm": 636.0, "learning_rate": 8.355973534817506e-05, "loss": 16.3751, "step": 6905 }, { "epoch": 0.28785794673002374, "grad_norm": 616.0, "learning_rate": 8.355473138277832e-05, "loss": 18.251, "step": 6906 }, { "epoch": 0.287899629027552, "grad_norm": 231.0, "learning_rate": 8.354972680584364e-05, "loss": 11.4376, "step": 6907 }, { "epoch": 0.2879413113250802, "grad_norm": 304.0, "learning_rate": 8.354472161746221e-05, "loss": 11.8753, "step": 6908 }, { "epoch": 0.2879829936226085, "grad_norm": 211.0, "learning_rate": 8.353971581772524e-05, "loss": 11.3133, "step": 6909 }, { "epoch": 0.2880246759201367, "grad_norm": 476.0, "learning_rate": 8.353470940672397e-05, "loss": 15.9379, "step": 6910 }, { "epoch": 0.28806635821766496, "grad_norm": 484.0, "learning_rate": 8.352970238454966e-05, "loss": 14.6253, "step": 6911 }, { "epoch": 0.2881080405151932, "grad_norm": 540.0, "learning_rate": 8.352469475129355e-05, "loss": 17.2505, "step": 6912 }, { "epoch": 0.28814972281272144, "grad_norm": 302.0, "learning_rate": 8.351968650704687e-05, "loss": 13.3127, "step": 6913 }, { "epoch": 0.28819140511024965, "grad_norm": 438.0, "learning_rate": 8.351467765190096e-05, "loss": 16.6259, "step": 6914 }, { "epoch": 0.2882330874077779, "grad_norm": 528.0, "learning_rate": 8.350966818594706e-05, "loss": 17.3758, "step": 6915 }, { "epoch": 0.28827476970530613, "grad_norm": 149.0, "learning_rate": 8.350465810927648e-05, "loss": 11.0627, "step": 6916 }, { "epoch": 0.2883164520028344, "grad_norm": 836.0, "learning_rate": 8.349964742198054e-05, "loss": 23.2504, "step": 6917 }, { "epoch": 0.2883581343003626, "grad_norm": 368.0, "learning_rate": 8.349463612415056e-05, "loss": 13.0001, "step": 6918 }, { "epoch": 0.2883998165978909, "grad_norm": 438.0, "learning_rate": 8.348962421587785e-05, "loss": 14.4378, "step": 6919 }, { "epoch": 0.2884414988954191, "grad_norm": 418.0, "learning_rate": 8.348461169725376e-05, "loss": 15.3753, "step": 6920 }, { "epoch": 0.28848318119294736, "grad_norm": 392.0, "learning_rate": 8.347959856836967e-05, "loss": 14.6876, "step": 6921 }, { "epoch": 0.28852486349047557, "grad_norm": 494.0, "learning_rate": 8.34745848293169e-05, "loss": 14.8751, "step": 6922 }, { "epoch": 0.28856654578800384, "grad_norm": 178.0, "learning_rate": 8.346957048018686e-05, "loss": 11.1879, "step": 6923 }, { "epoch": 0.28860822808553205, "grad_norm": 296.0, "learning_rate": 8.346455552107093e-05, "loss": 13.1878, "step": 6924 }, { "epoch": 0.2886499103830603, "grad_norm": 532.0, "learning_rate": 8.345953995206051e-05, "loss": 16.2501, "step": 6925 }, { "epoch": 0.2886915926805885, "grad_norm": 390.0, "learning_rate": 8.3454523773247e-05, "loss": 15.4378, "step": 6926 }, { "epoch": 0.2887332749781168, "grad_norm": 153.0, "learning_rate": 8.344950698472183e-05, "loss": 8.7503, "step": 6927 }, { "epoch": 0.28877495727564506, "grad_norm": 304.0, "learning_rate": 8.344448958657641e-05, "loss": 13.1256, "step": 6928 }, { "epoch": 0.2888166395731733, "grad_norm": 320.0, "learning_rate": 8.34394715789022e-05, "loss": 13.2502, "step": 6929 }, { "epoch": 0.28885832187070154, "grad_norm": 232.0, "learning_rate": 8.343445296179065e-05, "loss": 10.9378, "step": 6930 }, { "epoch": 0.28890000416822975, "grad_norm": 430.0, "learning_rate": 8.342943373533324e-05, "loss": 16.6257, "step": 6931 }, { "epoch": 0.288941686465758, "grad_norm": 964.0, "learning_rate": 8.342441389962144e-05, "loss": 24.5004, "step": 6932 }, { "epoch": 0.28898336876328623, "grad_norm": 135.0, "learning_rate": 8.341939345474671e-05, "loss": 9.2501, "step": 6933 }, { "epoch": 0.2890250510608145, "grad_norm": 209.0, "learning_rate": 8.341437240080057e-05, "loss": 10.4386, "step": 6934 }, { "epoch": 0.2890667333583427, "grad_norm": 724.0, "learning_rate": 8.340935073787452e-05, "loss": 21.1252, "step": 6935 }, { "epoch": 0.289108415655871, "grad_norm": 1672.0, "learning_rate": 8.340432846606011e-05, "loss": 31.7542, "step": 6936 }, { "epoch": 0.2891500979533992, "grad_norm": 908.0, "learning_rate": 8.339930558544884e-05, "loss": 23.8751, "step": 6937 }, { "epoch": 0.28919178025092745, "grad_norm": 82.0, "learning_rate": 8.339428209613224e-05, "loss": 7.938, "step": 6938 }, { "epoch": 0.28923346254845567, "grad_norm": 508.0, "learning_rate": 8.338925799820191e-05, "loss": 18.1253, "step": 6939 }, { "epoch": 0.28927514484598393, "grad_norm": 176.0, "learning_rate": 8.338423329174938e-05, "loss": 11.5628, "step": 6940 }, { "epoch": 0.28931682714351215, "grad_norm": 95.5, "learning_rate": 8.337920797686624e-05, "loss": 6.9689, "step": 6941 }, { "epoch": 0.2893585094410404, "grad_norm": 97.0, "learning_rate": 8.337418205364407e-05, "loss": 9.438, "step": 6942 }, { "epoch": 0.2894001917385686, "grad_norm": 432.0, "learning_rate": 8.336915552217445e-05, "loss": 16.5004, "step": 6943 }, { "epoch": 0.2894418740360969, "grad_norm": 668.0, "learning_rate": 8.336412838254903e-05, "loss": 21.2502, "step": 6944 }, { "epoch": 0.2894835563336251, "grad_norm": 334.0, "learning_rate": 8.335910063485941e-05, "loss": 13.3127, "step": 6945 }, { "epoch": 0.28952523863115337, "grad_norm": 230.0, "learning_rate": 8.335407227919721e-05, "loss": 11.4388, "step": 6946 }, { "epoch": 0.2895669209286816, "grad_norm": 247.0, "learning_rate": 8.334904331565407e-05, "loss": 12.0003, "step": 6947 }, { "epoch": 0.28960860322620985, "grad_norm": 644.0, "learning_rate": 8.334401374432169e-05, "loss": 19.5003, "step": 6948 }, { "epoch": 0.28965028552373806, "grad_norm": 396.0, "learning_rate": 8.333898356529167e-05, "loss": 16.2504, "step": 6949 }, { "epoch": 0.2896919678212663, "grad_norm": 418.0, "learning_rate": 8.333395277865572e-05, "loss": 16.6253, "step": 6950 }, { "epoch": 0.28973365011879454, "grad_norm": 154.0, "learning_rate": 8.332892138450552e-05, "loss": 9.9385, "step": 6951 }, { "epoch": 0.2897753324163228, "grad_norm": 155.0, "learning_rate": 8.332388938293278e-05, "loss": 6.5627, "step": 6952 }, { "epoch": 0.289817014713851, "grad_norm": 240.0, "learning_rate": 8.33188567740292e-05, "loss": 10.5002, "step": 6953 }, { "epoch": 0.2898586970113793, "grad_norm": 324.0, "learning_rate": 8.331382355788649e-05, "loss": 13.5022, "step": 6954 }, { "epoch": 0.2899003793089075, "grad_norm": 224.0, "learning_rate": 8.330878973459637e-05, "loss": 11.8752, "step": 6955 }, { "epoch": 0.28994206160643576, "grad_norm": 374.0, "learning_rate": 8.330375530425063e-05, "loss": 13.7502, "step": 6956 }, { "epoch": 0.289983743903964, "grad_norm": 326.0, "learning_rate": 8.329872026694098e-05, "loss": 13.4377, "step": 6957 }, { "epoch": 0.29002542620149224, "grad_norm": 370.0, "learning_rate": 8.329368462275919e-05, "loss": 12.1885, "step": 6958 }, { "epoch": 0.29006710849902045, "grad_norm": 440.0, "learning_rate": 8.328864837179705e-05, "loss": 16.5003, "step": 6959 }, { "epoch": 0.2901087907965487, "grad_norm": 624.0, "learning_rate": 8.328361151414634e-05, "loss": 20.2504, "step": 6960 }, { "epoch": 0.29015047309407693, "grad_norm": 286.0, "learning_rate": 8.327857404989885e-05, "loss": 12.6877, "step": 6961 }, { "epoch": 0.2901921553916052, "grad_norm": 908.0, "learning_rate": 8.32735359791464e-05, "loss": 20.506, "step": 6962 }, { "epoch": 0.2902338376891334, "grad_norm": 103.0, "learning_rate": 8.326849730198081e-05, "loss": 8.6253, "step": 6963 }, { "epoch": 0.2902755199866617, "grad_norm": 272.0, "learning_rate": 8.326345801849389e-05, "loss": 13.2503, "step": 6964 }, { "epoch": 0.2903172022841899, "grad_norm": 242.0, "learning_rate": 8.32584181287775e-05, "loss": 12.3754, "step": 6965 }, { "epoch": 0.29035888458171816, "grad_norm": 470.0, "learning_rate": 8.325337763292348e-05, "loss": 16.7502, "step": 6966 }, { "epoch": 0.29040056687924637, "grad_norm": 720.0, "learning_rate": 8.324833653102371e-05, "loss": 21.7521, "step": 6967 }, { "epoch": 0.29044224917677464, "grad_norm": 386.0, "learning_rate": 8.324329482317004e-05, "loss": 14.2501, "step": 6968 }, { "epoch": 0.29048393147430285, "grad_norm": 188.0, "learning_rate": 8.323825250945439e-05, "loss": 11.1254, "step": 6969 }, { "epoch": 0.2905256137718311, "grad_norm": 640.0, "learning_rate": 8.323320958996862e-05, "loss": 19.5002, "step": 6970 }, { "epoch": 0.2905672960693593, "grad_norm": 492.0, "learning_rate": 8.322816606480469e-05, "loss": 17.5004, "step": 6971 }, { "epoch": 0.2906089783668876, "grad_norm": 223.0, "learning_rate": 8.322312193405443e-05, "loss": 11.7502, "step": 6972 }, { "epoch": 0.2906506606644158, "grad_norm": 358.0, "learning_rate": 8.321807719780987e-05, "loss": 15.0627, "step": 6973 }, { "epoch": 0.2906923429619441, "grad_norm": 268.0, "learning_rate": 8.321303185616288e-05, "loss": 11.4402, "step": 6974 }, { "epoch": 0.2907340252594723, "grad_norm": 856.0, "learning_rate": 8.320798590920545e-05, "loss": 24.0002, "step": 6975 }, { "epoch": 0.29077570755700055, "grad_norm": 410.0, "learning_rate": 8.320293935702952e-05, "loss": 16.0003, "step": 6976 }, { "epoch": 0.29081738985452876, "grad_norm": 560.0, "learning_rate": 8.319789219972707e-05, "loss": 18.7502, "step": 6977 }, { "epoch": 0.29085907215205703, "grad_norm": 205.0, "learning_rate": 8.319284443739009e-05, "loss": 9.8757, "step": 6978 }, { "epoch": 0.29090075444958524, "grad_norm": 424.0, "learning_rate": 8.318779607011058e-05, "loss": 15.8753, "step": 6979 }, { "epoch": 0.2909424367471135, "grad_norm": 224.0, "learning_rate": 8.318274709798053e-05, "loss": 11.3751, "step": 6980 }, { "epoch": 0.2909841190446417, "grad_norm": 484.0, "learning_rate": 8.317769752109197e-05, "loss": 16.8762, "step": 6981 }, { "epoch": 0.29102580134217, "grad_norm": 125.5, "learning_rate": 8.317264733953694e-05, "loss": 7.6257, "step": 6982 }, { "epoch": 0.2910674836396982, "grad_norm": 168.0, "learning_rate": 8.316759655340746e-05, "loss": 10.0002, "step": 6983 }, { "epoch": 0.29110916593722647, "grad_norm": 520.0, "learning_rate": 8.316254516279558e-05, "loss": 18.2504, "step": 6984 }, { "epoch": 0.2911508482347547, "grad_norm": 436.0, "learning_rate": 8.315749316779338e-05, "loss": 14.6878, "step": 6985 }, { "epoch": 0.29119253053228294, "grad_norm": 436.0, "learning_rate": 8.315244056849292e-05, "loss": 13.688, "step": 6986 }, { "epoch": 0.29123421282981116, "grad_norm": 193.0, "learning_rate": 8.314738736498629e-05, "loss": 11.1877, "step": 6987 }, { "epoch": 0.2912758951273394, "grad_norm": 229.0, "learning_rate": 8.314233355736556e-05, "loss": 11.2503, "step": 6988 }, { "epoch": 0.29131757742486764, "grad_norm": 190.0, "learning_rate": 8.31372791457229e-05, "loss": 11.3129, "step": 6989 }, { "epoch": 0.2913592597223959, "grad_norm": 412.0, "learning_rate": 8.313222413015036e-05, "loss": 15.3127, "step": 6990 }, { "epoch": 0.2914009420199241, "grad_norm": 640.0, "learning_rate": 8.312716851074009e-05, "loss": 20.3751, "step": 6991 }, { "epoch": 0.2914426243174524, "grad_norm": 179.0, "learning_rate": 8.312211228758425e-05, "loss": 9.1252, "step": 6992 }, { "epoch": 0.2914843066149806, "grad_norm": 178.0, "learning_rate": 8.311705546077497e-05, "loss": 10.3128, "step": 6993 }, { "epoch": 0.29152598891250886, "grad_norm": 78.5, "learning_rate": 8.31119980304044e-05, "loss": 7.5629, "step": 6994 }, { "epoch": 0.29156767121003707, "grad_norm": 123.0, "learning_rate": 8.310693999656473e-05, "loss": 9.188, "step": 6995 }, { "epoch": 0.29160935350756534, "grad_norm": 624.0, "learning_rate": 8.310188135934815e-05, "loss": 19.7546, "step": 6996 }, { "epoch": 0.29165103580509355, "grad_norm": 276.0, "learning_rate": 8.309682211884685e-05, "loss": 13.1256, "step": 6997 }, { "epoch": 0.2916927181026218, "grad_norm": 278.0, "learning_rate": 8.309176227515303e-05, "loss": 12.6878, "step": 6998 }, { "epoch": 0.29173440040015003, "grad_norm": 412.0, "learning_rate": 8.30867018283589e-05, "loss": 14.6255, "step": 6999 }, { "epoch": 0.2917760826976783, "grad_norm": 1004.0, "learning_rate": 8.308164077855667e-05, "loss": 22.7543, "step": 7000 }, { "epoch": 0.29181776499520656, "grad_norm": 396.0, "learning_rate": 8.307657912583862e-05, "loss": 15.0007, "step": 7001 }, { "epoch": 0.2918594472927348, "grad_norm": 248.0, "learning_rate": 8.307151687029697e-05, "loss": 12.1886, "step": 7002 }, { "epoch": 0.29190112959026304, "grad_norm": 840.0, "learning_rate": 8.3066454012024e-05, "loss": 21.5015, "step": 7003 }, { "epoch": 0.29194281188779125, "grad_norm": 260.0, "learning_rate": 8.306139055111197e-05, "loss": 11.6255, "step": 7004 }, { "epoch": 0.2919844941853195, "grad_norm": 436.0, "learning_rate": 8.305632648765314e-05, "loss": 16.1254, "step": 7005 }, { "epoch": 0.29202617648284773, "grad_norm": 235.0, "learning_rate": 8.305126182173984e-05, "loss": 11.7502, "step": 7006 }, { "epoch": 0.292067858780376, "grad_norm": 358.0, "learning_rate": 8.304619655346437e-05, "loss": 15.0627, "step": 7007 }, { "epoch": 0.2921095410779042, "grad_norm": 744.0, "learning_rate": 8.304113068291903e-05, "loss": 22.7501, "step": 7008 }, { "epoch": 0.2921512233754325, "grad_norm": 346.0, "learning_rate": 8.303606421019614e-05, "loss": 14.0009, "step": 7009 }, { "epoch": 0.2921929056729607, "grad_norm": 720.0, "learning_rate": 8.303099713538805e-05, "loss": 21.7502, "step": 7010 }, { "epoch": 0.29223458797048896, "grad_norm": 165.0, "learning_rate": 8.302592945858712e-05, "loss": 9.1252, "step": 7011 }, { "epoch": 0.29227627026801717, "grad_norm": 418.0, "learning_rate": 8.302086117988568e-05, "loss": 16.1252, "step": 7012 }, { "epoch": 0.29231795256554544, "grad_norm": 280.0, "learning_rate": 8.301579229937611e-05, "loss": 12.9382, "step": 7013 }, { "epoch": 0.29235963486307365, "grad_norm": 109.5, "learning_rate": 8.30107228171508e-05, "loss": 9.1251, "step": 7014 }, { "epoch": 0.2924013171606019, "grad_norm": 151.0, "learning_rate": 8.300565273330212e-05, "loss": 10.7507, "step": 7015 }, { "epoch": 0.2924429994581301, "grad_norm": 243.0, "learning_rate": 8.300058204792251e-05, "loss": 12.3753, "step": 7016 }, { "epoch": 0.2924846817556584, "grad_norm": 624.0, "learning_rate": 8.299551076110436e-05, "loss": 18.1287, "step": 7017 }, { "epoch": 0.2925263640531866, "grad_norm": 520.0, "learning_rate": 8.29904388729401e-05, "loss": 16.7502, "step": 7018 }, { "epoch": 0.29256804635071487, "grad_norm": 456.0, "learning_rate": 8.298536638352216e-05, "loss": 16.7502, "step": 7019 }, { "epoch": 0.2926097286482431, "grad_norm": 130.0, "learning_rate": 8.298029329294299e-05, "loss": 9.6876, "step": 7020 }, { "epoch": 0.29265141094577135, "grad_norm": 282.0, "learning_rate": 8.297521960129505e-05, "loss": 12.7502, "step": 7021 }, { "epoch": 0.29269309324329956, "grad_norm": 510.0, "learning_rate": 8.297014530867079e-05, "loss": 17.1257, "step": 7022 }, { "epoch": 0.29273477554082783, "grad_norm": 620.0, "learning_rate": 8.296507041516272e-05, "loss": 18.3756, "step": 7023 }, { "epoch": 0.29277645783835604, "grad_norm": 474.0, "learning_rate": 8.295999492086331e-05, "loss": 17.2502, "step": 7024 }, { "epoch": 0.2928181401358843, "grad_norm": 912.0, "learning_rate": 8.295491882586506e-05, "loss": 22.1255, "step": 7025 }, { "epoch": 0.2928598224334125, "grad_norm": 916.0, "learning_rate": 8.294984213026049e-05, "loss": 26.3751, "step": 7026 }, { "epoch": 0.2929015047309408, "grad_norm": 664.0, "learning_rate": 8.294476483414213e-05, "loss": 19.626, "step": 7027 }, { "epoch": 0.292943187028469, "grad_norm": 644.0, "learning_rate": 8.29396869376025e-05, "loss": 20.3752, "step": 7028 }, { "epoch": 0.29298486932599727, "grad_norm": 190.0, "learning_rate": 8.293460844073416e-05, "loss": 7.219, "step": 7029 }, { "epoch": 0.2930265516235255, "grad_norm": 716.0, "learning_rate": 8.292952934362966e-05, "loss": 22.2502, "step": 7030 }, { "epoch": 0.29306823392105374, "grad_norm": 348.0, "learning_rate": 8.292444964638155e-05, "loss": 14.5002, "step": 7031 }, { "epoch": 0.29310991621858196, "grad_norm": 338.0, "learning_rate": 8.291936934908242e-05, "loss": 14.0627, "step": 7032 }, { "epoch": 0.2931515985161102, "grad_norm": 131.0, "learning_rate": 8.291428845182487e-05, "loss": 9.8754, "step": 7033 }, { "epoch": 0.29319328081363843, "grad_norm": 256.0, "learning_rate": 8.29092069547015e-05, "loss": 12.3752, "step": 7034 }, { "epoch": 0.2932349631111667, "grad_norm": 452.0, "learning_rate": 8.29041248578049e-05, "loss": 16.7503, "step": 7035 }, { "epoch": 0.2932766454086949, "grad_norm": 676.0, "learning_rate": 8.289904216122771e-05, "loss": 19.2503, "step": 7036 }, { "epoch": 0.2933183277062232, "grad_norm": 294.0, "learning_rate": 8.289395886506255e-05, "loss": 12.6877, "step": 7037 }, { "epoch": 0.2933600100037514, "grad_norm": 157.0, "learning_rate": 8.288887496940208e-05, "loss": 11.3128, "step": 7038 }, { "epoch": 0.29340169230127966, "grad_norm": 684.0, "learning_rate": 8.288379047433894e-05, "loss": 16.2546, "step": 7039 }, { "epoch": 0.29344337459880787, "grad_norm": 342.0, "learning_rate": 8.28787053799658e-05, "loss": 14.6878, "step": 7040 }, { "epoch": 0.29348505689633614, "grad_norm": 223.0, "learning_rate": 8.287361968637532e-05, "loss": 11.3129, "step": 7041 }, { "epoch": 0.29352673919386435, "grad_norm": 60.25, "learning_rate": 8.286853339366022e-05, "loss": 8.5007, "step": 7042 }, { "epoch": 0.2935684214913926, "grad_norm": 568.0, "learning_rate": 8.286344650191318e-05, "loss": 17.7502, "step": 7043 }, { "epoch": 0.29361010378892083, "grad_norm": 400.0, "learning_rate": 8.285835901122689e-05, "loss": 14.4376, "step": 7044 }, { "epoch": 0.2936517860864491, "grad_norm": 76.0, "learning_rate": 8.285327092169411e-05, "loss": 9.5008, "step": 7045 }, { "epoch": 0.2936934683839773, "grad_norm": 400.0, "learning_rate": 8.284818223340755e-05, "loss": 13.3777, "step": 7046 }, { "epoch": 0.2937351506815056, "grad_norm": 227.0, "learning_rate": 8.284309294645996e-05, "loss": 12.1878, "step": 7047 }, { "epoch": 0.2937768329790338, "grad_norm": 504.0, "learning_rate": 8.283800306094407e-05, "loss": 17.1252, "step": 7048 }, { "epoch": 0.29381851527656205, "grad_norm": 1600.0, "learning_rate": 8.283291257695267e-05, "loss": 29.8802, "step": 7049 }, { "epoch": 0.29386019757409026, "grad_norm": 262.0, "learning_rate": 8.282782149457851e-05, "loss": 13.1879, "step": 7050 }, { "epoch": 0.29390187987161853, "grad_norm": 154.0, "learning_rate": 8.28227298139144e-05, "loss": 8.5636, "step": 7051 }, { "epoch": 0.29394356216914674, "grad_norm": 428.0, "learning_rate": 8.281763753505311e-05, "loss": 14.1888, "step": 7052 }, { "epoch": 0.293985244466675, "grad_norm": 420.0, "learning_rate": 8.281254465808749e-05, "loss": 12.8176, "step": 7053 }, { "epoch": 0.2940269267642032, "grad_norm": 258.0, "learning_rate": 8.28074511831103e-05, "loss": 13.1257, "step": 7054 }, { "epoch": 0.2940686090617315, "grad_norm": 528.0, "learning_rate": 8.280235711021442e-05, "loss": 18.2521, "step": 7055 }, { "epoch": 0.2941102913592597, "grad_norm": 392.0, "learning_rate": 8.279726243949268e-05, "loss": 13.8126, "step": 7056 }, { "epoch": 0.29415197365678797, "grad_norm": 536.0, "learning_rate": 8.27921671710379e-05, "loss": 16.8755, "step": 7057 }, { "epoch": 0.2941936559543162, "grad_norm": 155.0, "learning_rate": 8.278707130494297e-05, "loss": 10.7503, "step": 7058 }, { "epoch": 0.29423533825184445, "grad_norm": 1784.0, "learning_rate": 8.278197484130075e-05, "loss": 38.0002, "step": 7059 }, { "epoch": 0.29427702054937266, "grad_norm": 241.0, "learning_rate": 8.277687778020414e-05, "loss": 11.5631, "step": 7060 }, { "epoch": 0.2943187028469009, "grad_norm": 56.0, "learning_rate": 8.2771780121746e-05, "loss": 7.938, "step": 7061 }, { "epoch": 0.29436038514442914, "grad_norm": 452.0, "learning_rate": 8.276668186601928e-05, "loss": 16.6311, "step": 7062 }, { "epoch": 0.2944020674419574, "grad_norm": 1744.0, "learning_rate": 8.276158301311686e-05, "loss": 38.7501, "step": 7063 }, { "epoch": 0.2944437497394856, "grad_norm": 284.0, "learning_rate": 8.27564835631317e-05, "loss": 13.0004, "step": 7064 }, { "epoch": 0.2944854320370139, "grad_norm": 314.0, "learning_rate": 8.27513835161567e-05, "loss": 14.8133, "step": 7065 }, { "epoch": 0.2945271143345421, "grad_norm": 442.0, "learning_rate": 8.274628287228482e-05, "loss": 13.8756, "step": 7066 }, { "epoch": 0.29456879663207036, "grad_norm": 74.5, "learning_rate": 8.274118163160906e-05, "loss": 8.0626, "step": 7067 }, { "epoch": 0.2946104789295986, "grad_norm": 390.0, "learning_rate": 8.273607979422234e-05, "loss": 15.0626, "step": 7068 }, { "epoch": 0.29465216122712684, "grad_norm": 205.0, "learning_rate": 8.273097736021765e-05, "loss": 11.6252, "step": 7069 }, { "epoch": 0.29469384352465505, "grad_norm": 84.5, "learning_rate": 8.272587432968801e-05, "loss": 7.5944, "step": 7070 }, { "epoch": 0.2947355258221833, "grad_norm": 316.0, "learning_rate": 8.272077070272639e-05, "loss": 13.6877, "step": 7071 }, { "epoch": 0.29477720811971153, "grad_norm": 276.0, "learning_rate": 8.271566647942583e-05, "loss": 13.188, "step": 7072 }, { "epoch": 0.2948188904172398, "grad_norm": 324.0, "learning_rate": 8.271056165987934e-05, "loss": 14.3752, "step": 7073 }, { "epoch": 0.29486057271476807, "grad_norm": 134.0, "learning_rate": 8.270545624417997e-05, "loss": 9.2504, "step": 7074 }, { "epoch": 0.2949022550122963, "grad_norm": 536.0, "learning_rate": 8.270035023242075e-05, "loss": 18.3754, "step": 7075 }, { "epoch": 0.29494393730982454, "grad_norm": 430.0, "learning_rate": 8.269524362469474e-05, "loss": 17.7504, "step": 7076 }, { "epoch": 0.29498561960735276, "grad_norm": 119.0, "learning_rate": 8.269013642109501e-05, "loss": 8.0004, "step": 7077 }, { "epoch": 0.295027301904881, "grad_norm": 636.0, "learning_rate": 8.268502862171464e-05, "loss": 21.5006, "step": 7078 }, { "epoch": 0.29506898420240923, "grad_norm": 244.0, "learning_rate": 8.267992022664674e-05, "loss": 7.5016, "step": 7079 }, { "epoch": 0.2951106664999375, "grad_norm": 434.0, "learning_rate": 8.267481123598437e-05, "loss": 15.2521, "step": 7080 }, { "epoch": 0.2951523487974657, "grad_norm": 452.0, "learning_rate": 8.266970164982069e-05, "loss": 15.0012, "step": 7081 }, { "epoch": 0.295194031094994, "grad_norm": 680.0, "learning_rate": 8.266459146824876e-05, "loss": 19.7509, "step": 7082 }, { "epoch": 0.2952357133925222, "grad_norm": 62.0, "learning_rate": 8.265948069136178e-05, "loss": 7.8126, "step": 7083 }, { "epoch": 0.29527739569005046, "grad_norm": 1176.0, "learning_rate": 8.265436931925287e-05, "loss": 29.2502, "step": 7084 }, { "epoch": 0.29531907798757867, "grad_norm": 932.0, "learning_rate": 8.264925735201516e-05, "loss": 22.2503, "step": 7085 }, { "epoch": 0.29536076028510694, "grad_norm": 422.0, "learning_rate": 8.264414478974185e-05, "loss": 16.8752, "step": 7086 }, { "epoch": 0.29540244258263515, "grad_norm": 123.0, "learning_rate": 8.26390316325261e-05, "loss": 9.6877, "step": 7087 }, { "epoch": 0.2954441248801634, "grad_norm": 616.0, "learning_rate": 8.263391788046108e-05, "loss": 18.5012, "step": 7088 }, { "epoch": 0.29548580717769163, "grad_norm": 266.0, "learning_rate": 8.262880353364004e-05, "loss": 11.6253, "step": 7089 }, { "epoch": 0.2955274894752199, "grad_norm": 230.0, "learning_rate": 8.262368859215614e-05, "loss": 12.0002, "step": 7090 }, { "epoch": 0.2955691717727481, "grad_norm": 452.0, "learning_rate": 8.261857305610264e-05, "loss": 16.7505, "step": 7091 }, { "epoch": 0.2956108540702764, "grad_norm": 544.0, "learning_rate": 8.261345692557274e-05, "loss": 19.7504, "step": 7092 }, { "epoch": 0.2956525363678046, "grad_norm": 254.0, "learning_rate": 8.260834020065968e-05, "loss": 12.5003, "step": 7093 }, { "epoch": 0.29569421866533285, "grad_norm": 596.0, "learning_rate": 8.260322288145675e-05, "loss": 19.1253, "step": 7094 }, { "epoch": 0.29573590096286106, "grad_norm": 896.0, "learning_rate": 8.259810496805717e-05, "loss": 24.8753, "step": 7095 }, { "epoch": 0.29577758326038933, "grad_norm": 96.5, "learning_rate": 8.259298646055423e-05, "loss": 7.8447, "step": 7096 }, { "epoch": 0.29581926555791754, "grad_norm": 502.0, "learning_rate": 8.258786735904123e-05, "loss": 18.0002, "step": 7097 }, { "epoch": 0.2958609478554458, "grad_norm": 462.0, "learning_rate": 8.258274766361145e-05, "loss": 16.5003, "step": 7098 }, { "epoch": 0.295902630152974, "grad_norm": 748.0, "learning_rate": 8.25776273743582e-05, "loss": 21.6257, "step": 7099 }, { "epoch": 0.2959443124505023, "grad_norm": 772.0, "learning_rate": 8.25725064913748e-05, "loss": 19.8753, "step": 7100 }, { "epoch": 0.2959859947480305, "grad_norm": 227.0, "learning_rate": 8.256738501475459e-05, "loss": 11.1878, "step": 7101 }, { "epoch": 0.29602767704555877, "grad_norm": 98.0, "learning_rate": 8.256226294459088e-05, "loss": 9.1879, "step": 7102 }, { "epoch": 0.296069359343087, "grad_norm": 290.0, "learning_rate": 8.255714028097704e-05, "loss": 11.6262, "step": 7103 }, { "epoch": 0.29611104164061525, "grad_norm": 358.0, "learning_rate": 8.255201702400644e-05, "loss": 14.3757, "step": 7104 }, { "epoch": 0.29615272393814346, "grad_norm": 264.0, "learning_rate": 8.254689317377242e-05, "loss": 12.8752, "step": 7105 }, { "epoch": 0.2961944062356717, "grad_norm": 382.0, "learning_rate": 8.254176873036839e-05, "loss": 15.2502, "step": 7106 }, { "epoch": 0.29623608853319994, "grad_norm": 260.0, "learning_rate": 8.253664369388774e-05, "loss": 10.1252, "step": 7107 }, { "epoch": 0.2962777708307282, "grad_norm": 716.0, "learning_rate": 8.253151806442388e-05, "loss": 18.8755, "step": 7108 }, { "epoch": 0.2963194531282564, "grad_norm": 480.0, "learning_rate": 8.25263918420702e-05, "loss": 17.8752, "step": 7109 }, { "epoch": 0.2963611354257847, "grad_norm": 368.0, "learning_rate": 8.252126502692015e-05, "loss": 15.6257, "step": 7110 }, { "epoch": 0.2964028177233129, "grad_norm": 544.0, "learning_rate": 8.251613761906715e-05, "loss": 17.8753, "step": 7111 }, { "epoch": 0.29644450002084116, "grad_norm": 246.0, "learning_rate": 8.251100961860466e-05, "loss": 12.9377, "step": 7112 }, { "epoch": 0.2964861823183694, "grad_norm": 1040.0, "learning_rate": 8.250588102562614e-05, "loss": 24.2548, "step": 7113 }, { "epoch": 0.29652786461589764, "grad_norm": 708.0, "learning_rate": 8.250075184022503e-05, "loss": 20.7503, "step": 7114 }, { "epoch": 0.29656954691342585, "grad_norm": 278.0, "learning_rate": 8.249562206249486e-05, "loss": 12.3137, "step": 7115 }, { "epoch": 0.2966112292109541, "grad_norm": 406.0, "learning_rate": 8.249049169252908e-05, "loss": 14.8127, "step": 7116 }, { "epoch": 0.29665291150848233, "grad_norm": 430.0, "learning_rate": 8.24853607304212e-05, "loss": 14.8138, "step": 7117 }, { "epoch": 0.2966945938060106, "grad_norm": 452.0, "learning_rate": 8.248022917626474e-05, "loss": 15.5001, "step": 7118 }, { "epoch": 0.2967362761035388, "grad_norm": 1176.0, "learning_rate": 8.247509703015324e-05, "loss": 24.1292, "step": 7119 }, { "epoch": 0.2967779584010671, "grad_norm": 93.5, "learning_rate": 8.246996429218019e-05, "loss": 5.2502, "step": 7120 }, { "epoch": 0.2968196406985953, "grad_norm": 968.0, "learning_rate": 8.246483096243916e-05, "loss": 23.8794, "step": 7121 }, { "epoch": 0.29686132299612356, "grad_norm": 352.0, "learning_rate": 8.245969704102371e-05, "loss": 12.8132, "step": 7122 }, { "epoch": 0.29690300529365177, "grad_norm": 568.0, "learning_rate": 8.24545625280274e-05, "loss": 17.7502, "step": 7123 }, { "epoch": 0.29694468759118003, "grad_norm": 223.0, "learning_rate": 8.24494274235438e-05, "loss": 12.6253, "step": 7124 }, { "epoch": 0.29698636988870825, "grad_norm": 276.0, "learning_rate": 8.244429172766652e-05, "loss": 14.0007, "step": 7125 }, { "epoch": 0.2970280521862365, "grad_norm": 640.0, "learning_rate": 8.243915544048912e-05, "loss": 20.1257, "step": 7126 }, { "epoch": 0.2970697344837647, "grad_norm": 196.0, "learning_rate": 8.243401856210524e-05, "loss": 11.3752, "step": 7127 }, { "epoch": 0.297111416781293, "grad_norm": 328.0, "learning_rate": 8.242888109260852e-05, "loss": 13.4377, "step": 7128 }, { "epoch": 0.2971530990788212, "grad_norm": 732.0, "learning_rate": 8.242374303209253e-05, "loss": 20.8754, "step": 7129 }, { "epoch": 0.29719478137634947, "grad_norm": 508.0, "learning_rate": 8.241860438065095e-05, "loss": 17.1296, "step": 7130 }, { "epoch": 0.2972364636738777, "grad_norm": 238.0, "learning_rate": 8.241346513837744e-05, "loss": 11.3751, "step": 7131 }, { "epoch": 0.29727814597140595, "grad_norm": 68.0, "learning_rate": 8.240832530536564e-05, "loss": 9.1254, "step": 7132 }, { "epoch": 0.29731982826893416, "grad_norm": 186.0, "learning_rate": 8.240318488170924e-05, "loss": 10.6254, "step": 7133 }, { "epoch": 0.29736151056646243, "grad_norm": 404.0, "learning_rate": 8.239804386750192e-05, "loss": 14.0003, "step": 7134 }, { "epoch": 0.29740319286399064, "grad_norm": 860.0, "learning_rate": 8.239290226283737e-05, "loss": 21.8752, "step": 7135 }, { "epoch": 0.2974448751615189, "grad_norm": 116.0, "learning_rate": 8.23877600678093e-05, "loss": 9.3128, "step": 7136 }, { "epoch": 0.2974865574590471, "grad_norm": 150.0, "learning_rate": 8.238261728251143e-05, "loss": 9.3758, "step": 7137 }, { "epoch": 0.2975282397565754, "grad_norm": 462.0, "learning_rate": 8.237747390703749e-05, "loss": 15.8792, "step": 7138 }, { "epoch": 0.2975699220541036, "grad_norm": 390.0, "learning_rate": 8.23723299414812e-05, "loss": 14.9377, "step": 7139 }, { "epoch": 0.29761160435163186, "grad_norm": 160.0, "learning_rate": 8.236718538593633e-05, "loss": 11.0628, "step": 7140 }, { "epoch": 0.2976532866491601, "grad_norm": 386.0, "learning_rate": 8.236204024049665e-05, "loss": 14.8752, "step": 7141 }, { "epoch": 0.29769496894668834, "grad_norm": 250.0, "learning_rate": 8.23568945052559e-05, "loss": 10.1879, "step": 7142 }, { "epoch": 0.29773665124421655, "grad_norm": 360.0, "learning_rate": 8.235174818030787e-05, "loss": 12.5629, "step": 7143 }, { "epoch": 0.2977783335417448, "grad_norm": 676.0, "learning_rate": 8.234660126574635e-05, "loss": 18.127, "step": 7144 }, { "epoch": 0.29782001583927303, "grad_norm": 207.0, "learning_rate": 8.234145376166517e-05, "loss": 11.1258, "step": 7145 }, { "epoch": 0.2978616981368013, "grad_norm": 474.0, "learning_rate": 8.233630566815811e-05, "loss": 13.6254, "step": 7146 }, { "epoch": 0.29790338043432957, "grad_norm": 330.0, "learning_rate": 8.233115698531901e-05, "loss": 8.1879, "step": 7147 }, { "epoch": 0.2979450627318578, "grad_norm": 424.0, "learning_rate": 8.232600771324173e-05, "loss": 15.813, "step": 7148 }, { "epoch": 0.29798674502938605, "grad_norm": 155.0, "learning_rate": 8.232085785202006e-05, "loss": 10.2503, "step": 7149 }, { "epoch": 0.29802842732691426, "grad_norm": 2240.0, "learning_rate": 8.23157074017479e-05, "loss": 44.7513, "step": 7150 }, { "epoch": 0.2980701096244425, "grad_norm": 2272.0, "learning_rate": 8.23105563625191e-05, "loss": 42.5008, "step": 7151 }, { "epoch": 0.29811179192197074, "grad_norm": 80.0, "learning_rate": 8.230540473442754e-05, "loss": 7.6877, "step": 7152 }, { "epoch": 0.298153474219499, "grad_norm": 243.0, "learning_rate": 8.23002525175671e-05, "loss": 12.7513, "step": 7153 }, { "epoch": 0.2981951565170272, "grad_norm": 284.0, "learning_rate": 8.22950997120317e-05, "loss": 13.1877, "step": 7154 }, { "epoch": 0.2982368388145555, "grad_norm": 812.0, "learning_rate": 8.228994631791525e-05, "loss": 25.0001, "step": 7155 }, { "epoch": 0.2982785211120837, "grad_norm": 298.0, "learning_rate": 8.228479233531166e-05, "loss": 14.4377, "step": 7156 }, { "epoch": 0.29832020340961196, "grad_norm": 164.0, "learning_rate": 8.227963776431485e-05, "loss": 10.6259, "step": 7157 }, { "epoch": 0.2983618857071402, "grad_norm": 266.0, "learning_rate": 8.227448260501879e-05, "loss": 12.1884, "step": 7158 }, { "epoch": 0.29840356800466844, "grad_norm": 238.0, "learning_rate": 8.22693268575174e-05, "loss": 12.3754, "step": 7159 }, { "epoch": 0.29844525030219665, "grad_norm": 324.0, "learning_rate": 8.22641705219047e-05, "loss": 13.6255, "step": 7160 }, { "epoch": 0.2984869325997249, "grad_norm": 97.0, "learning_rate": 8.225901359827459e-05, "loss": 8.8756, "step": 7161 }, { "epoch": 0.29852861489725313, "grad_norm": 223.0, "learning_rate": 8.225385608672111e-05, "loss": 13.2516, "step": 7162 }, { "epoch": 0.2985702971947814, "grad_norm": 572.0, "learning_rate": 8.224869798733825e-05, "loss": 17.7512, "step": 7163 }, { "epoch": 0.2986119794923096, "grad_norm": 488.0, "learning_rate": 8.224353930021998e-05, "loss": 17.2502, "step": 7164 }, { "epoch": 0.2986536617898379, "grad_norm": 540.0, "learning_rate": 8.223838002546035e-05, "loss": 18.1256, "step": 7165 }, { "epoch": 0.2986953440873661, "grad_norm": 768.0, "learning_rate": 8.22332201631534e-05, "loss": 22.0002, "step": 7166 }, { "epoch": 0.29873702638489436, "grad_norm": 130.0, "learning_rate": 8.222805971339315e-05, "loss": 9.3751, "step": 7167 }, { "epoch": 0.29877870868242257, "grad_norm": 520.0, "learning_rate": 8.222289867627364e-05, "loss": 15.8149, "step": 7168 }, { "epoch": 0.29882039097995083, "grad_norm": 207.0, "learning_rate": 8.221773705188895e-05, "loss": 12.1253, "step": 7169 }, { "epoch": 0.29886207327747905, "grad_norm": 488.0, "learning_rate": 8.221257484033314e-05, "loss": 17.1252, "step": 7170 }, { "epoch": 0.2989037555750073, "grad_norm": 296.0, "learning_rate": 8.220741204170029e-05, "loss": 12.8752, "step": 7171 }, { "epoch": 0.2989454378725355, "grad_norm": 432.0, "learning_rate": 8.220224865608448e-05, "loss": 15.6252, "step": 7172 }, { "epoch": 0.2989871201700638, "grad_norm": 648.0, "learning_rate": 8.219708468357985e-05, "loss": 19.6253, "step": 7173 }, { "epoch": 0.299028802467592, "grad_norm": 217.0, "learning_rate": 8.219192012428049e-05, "loss": 11.5001, "step": 7174 }, { "epoch": 0.29907048476512027, "grad_norm": 52.25, "learning_rate": 8.218675497828054e-05, "loss": 8.376, "step": 7175 }, { "epoch": 0.2991121670626485, "grad_norm": 346.0, "learning_rate": 8.21815892456741e-05, "loss": 15.4388, "step": 7176 }, { "epoch": 0.29915384936017675, "grad_norm": 528.0, "learning_rate": 8.217642292655536e-05, "loss": 16.1264, "step": 7177 }, { "epoch": 0.29919553165770496, "grad_norm": 110.0, "learning_rate": 8.217125602101843e-05, "loss": 6.9377, "step": 7178 }, { "epoch": 0.2992372139552332, "grad_norm": 139.0, "learning_rate": 8.216608852915753e-05, "loss": 9.1877, "step": 7179 }, { "epoch": 0.29927889625276144, "grad_norm": 688.0, "learning_rate": 8.21609204510668e-05, "loss": 20.6252, "step": 7180 }, { "epoch": 0.2993205785502897, "grad_norm": 340.0, "learning_rate": 8.215575178684042e-05, "loss": 13.3754, "step": 7181 }, { "epoch": 0.2993622608478179, "grad_norm": 180.0, "learning_rate": 8.215058253657264e-05, "loss": 11.2502, "step": 7182 }, { "epoch": 0.2994039431453462, "grad_norm": 205.0, "learning_rate": 8.214541270035763e-05, "loss": 9.2509, "step": 7183 }, { "epoch": 0.2994456254428744, "grad_norm": 390.0, "learning_rate": 8.214024227828963e-05, "loss": 14.7502, "step": 7184 }, { "epoch": 0.29948730774040266, "grad_norm": 274.0, "learning_rate": 8.213507127046284e-05, "loss": 12.9394, "step": 7185 }, { "epoch": 0.2995289900379309, "grad_norm": 78.0, "learning_rate": 8.212989967697154e-05, "loss": 6.5627, "step": 7186 }, { "epoch": 0.29957067233545914, "grad_norm": 336.0, "learning_rate": 8.212472749790995e-05, "loss": 13.6877, "step": 7187 }, { "epoch": 0.29961235463298735, "grad_norm": 668.0, "learning_rate": 8.211955473337236e-05, "loss": 18.6291, "step": 7188 }, { "epoch": 0.2996540369305156, "grad_norm": 464.0, "learning_rate": 8.211438138345302e-05, "loss": 16.7503, "step": 7189 }, { "epoch": 0.29969571922804383, "grad_norm": 246.0, "learning_rate": 8.210920744824624e-05, "loss": 10.6882, "step": 7190 }, { "epoch": 0.2997374015255721, "grad_norm": 436.0, "learning_rate": 8.210403292784632e-05, "loss": 16.6256, "step": 7191 }, { "epoch": 0.2997790838231003, "grad_norm": 572.0, "learning_rate": 8.209885782234752e-05, "loss": 19.0011, "step": 7192 }, { "epoch": 0.2998207661206286, "grad_norm": 209.0, "learning_rate": 8.20936821318442e-05, "loss": 11.0628, "step": 7193 }, { "epoch": 0.2998624484181568, "grad_norm": 160.0, "learning_rate": 8.208850585643068e-05, "loss": 10.8132, "step": 7194 }, { "epoch": 0.29990413071568506, "grad_norm": 192.0, "learning_rate": 8.208332899620127e-05, "loss": 11.4382, "step": 7195 }, { "epoch": 0.29994581301321327, "grad_norm": 258.0, "learning_rate": 8.207815155125039e-05, "loss": 8.6878, "step": 7196 }, { "epoch": 0.29998749531074154, "grad_norm": 390.0, "learning_rate": 8.20729735216723e-05, "loss": 14.5627, "step": 7197 }, { "epoch": 0.30002917760826975, "grad_norm": 704.0, "learning_rate": 8.206779490756144e-05, "loss": 22.7502, "step": 7198 }, { "epoch": 0.300070859905798, "grad_norm": 169.0, "learning_rate": 8.206261570901216e-05, "loss": 10.064, "step": 7199 }, { "epoch": 0.3001125422033262, "grad_norm": 592.0, "learning_rate": 8.205743592611888e-05, "loss": 19.7502, "step": 7200 }, { "epoch": 0.3001542245008545, "grad_norm": 190.0, "learning_rate": 8.205225555897598e-05, "loss": 11.2518, "step": 7201 }, { "epoch": 0.3001959067983827, "grad_norm": 482.0, "learning_rate": 8.204707460767786e-05, "loss": 15.8132, "step": 7202 }, { "epoch": 0.300237589095911, "grad_norm": 388.0, "learning_rate": 8.204189307231899e-05, "loss": 13.4384, "step": 7203 }, { "epoch": 0.3002792713934392, "grad_norm": 440.0, "learning_rate": 8.203671095299375e-05, "loss": 15.376, "step": 7204 }, { "epoch": 0.30032095369096745, "grad_norm": 284.0, "learning_rate": 8.203152824979664e-05, "loss": 11.8752, "step": 7205 }, { "epoch": 0.30036263598849566, "grad_norm": 227.0, "learning_rate": 8.202634496282204e-05, "loss": 12.1261, "step": 7206 }, { "epoch": 0.30040431828602393, "grad_norm": 76.5, "learning_rate": 8.202116109216449e-05, "loss": 9.9381, "step": 7207 }, { "epoch": 0.30044600058355214, "grad_norm": 54.25, "learning_rate": 8.201597663791843e-05, "loss": 7.7189, "step": 7208 }, { "epoch": 0.3004876828810804, "grad_norm": 282.0, "learning_rate": 8.201079160017835e-05, "loss": 12.6879, "step": 7209 }, { "epoch": 0.3005293651786086, "grad_norm": 147.0, "learning_rate": 8.200560597903874e-05, "loss": 9.5626, "step": 7210 }, { "epoch": 0.3005710474761369, "grad_norm": 324.0, "learning_rate": 8.200041977459414e-05, "loss": 11.188, "step": 7211 }, { "epoch": 0.3006127297736651, "grad_norm": 260.0, "learning_rate": 8.199523298693904e-05, "loss": 11.9377, "step": 7212 }, { "epoch": 0.30065441207119337, "grad_norm": 368.0, "learning_rate": 8.1990045616168e-05, "loss": 15.1253, "step": 7213 }, { "epoch": 0.3006960943687216, "grad_norm": 198.0, "learning_rate": 8.198485766237549e-05, "loss": 10.3765, "step": 7214 }, { "epoch": 0.30073777666624985, "grad_norm": 240.0, "learning_rate": 8.197966912565615e-05, "loss": 12.0001, "step": 7215 }, { "epoch": 0.30077945896377806, "grad_norm": 520.0, "learning_rate": 8.197448000610448e-05, "loss": 16.8772, "step": 7216 }, { "epoch": 0.3008211412613063, "grad_norm": 596.0, "learning_rate": 8.19692903038151e-05, "loss": 18.5005, "step": 7217 }, { "epoch": 0.30086282355883454, "grad_norm": 225.0, "learning_rate": 8.196410001888256e-05, "loss": 10.6253, "step": 7218 }, { "epoch": 0.3009045058563628, "grad_norm": 268.0, "learning_rate": 8.195890915140144e-05, "loss": 11.6878, "step": 7219 }, { "epoch": 0.30094618815389107, "grad_norm": 59.5, "learning_rate": 8.195371770146637e-05, "loss": 8.2503, "step": 7220 }, { "epoch": 0.3009878704514193, "grad_norm": 454.0, "learning_rate": 8.194852566917195e-05, "loss": 16.0003, "step": 7221 }, { "epoch": 0.30102955274894755, "grad_norm": 304.0, "learning_rate": 8.194333305461283e-05, "loss": 10.001, "step": 7222 }, { "epoch": 0.30107123504647576, "grad_norm": 290.0, "learning_rate": 8.193813985788362e-05, "loss": 13.3132, "step": 7223 }, { "epoch": 0.301112917344004, "grad_norm": 472.0, "learning_rate": 8.193294607907897e-05, "loss": 17.2503, "step": 7224 }, { "epoch": 0.30115459964153224, "grad_norm": 644.0, "learning_rate": 8.192775171829356e-05, "loss": 16.7508, "step": 7225 }, { "epoch": 0.3011962819390605, "grad_norm": 189.0, "learning_rate": 8.192255677562203e-05, "loss": 5.5945, "step": 7226 }, { "epoch": 0.3012379642365887, "grad_norm": 414.0, "learning_rate": 8.191736125115908e-05, "loss": 14.1884, "step": 7227 }, { "epoch": 0.301279646534117, "grad_norm": 32.75, "learning_rate": 8.191216514499937e-05, "loss": 5.8439, "step": 7228 }, { "epoch": 0.3013213288316452, "grad_norm": 410.0, "learning_rate": 8.190696845723765e-05, "loss": 14.3758, "step": 7229 }, { "epoch": 0.30136301112917346, "grad_norm": 189.0, "learning_rate": 8.190177118796856e-05, "loss": 10.6254, "step": 7230 }, { "epoch": 0.3014046934267017, "grad_norm": 298.0, "learning_rate": 8.189657333728687e-05, "loss": 14.0003, "step": 7231 }, { "epoch": 0.30144637572422994, "grad_norm": 384.0, "learning_rate": 8.189137490528731e-05, "loss": 13.2502, "step": 7232 }, { "epoch": 0.30148805802175815, "grad_norm": 1328.0, "learning_rate": 8.18861758920646e-05, "loss": 26.7538, "step": 7233 }, { "epoch": 0.3015297403192864, "grad_norm": 229.0, "learning_rate": 8.188097629771351e-05, "loss": 11.6877, "step": 7234 }, { "epoch": 0.30157142261681463, "grad_norm": 231.0, "learning_rate": 8.18757761223288e-05, "loss": 10.8759, "step": 7235 }, { "epoch": 0.3016131049143429, "grad_norm": 498.0, "learning_rate": 8.187057536600522e-05, "loss": 16.3782, "step": 7236 }, { "epoch": 0.3016547872118711, "grad_norm": 620.0, "learning_rate": 8.18653740288376e-05, "loss": 20.0002, "step": 7237 }, { "epoch": 0.3016964695093994, "grad_norm": 222.0, "learning_rate": 8.18601721109207e-05, "loss": 11.4377, "step": 7238 }, { "epoch": 0.3017381518069276, "grad_norm": 400.0, "learning_rate": 8.185496961234933e-05, "loss": 15.7502, "step": 7239 }, { "epoch": 0.30177983410445586, "grad_norm": 516.0, "learning_rate": 8.184976653321831e-05, "loss": 17.2502, "step": 7240 }, { "epoch": 0.30182151640198407, "grad_norm": 748.0, "learning_rate": 8.184456287362248e-05, "loss": 22.1271, "step": 7241 }, { "epoch": 0.30186319869951234, "grad_norm": 64.0, "learning_rate": 8.183935863365665e-05, "loss": 7.7825, "step": 7242 }, { "epoch": 0.30190488099704055, "grad_norm": 107.5, "learning_rate": 8.183415381341569e-05, "loss": 8.126, "step": 7243 }, { "epoch": 0.3019465632945688, "grad_norm": 458.0, "learning_rate": 8.182894841299445e-05, "loss": 17.0002, "step": 7244 }, { "epoch": 0.301988245592097, "grad_norm": 342.0, "learning_rate": 8.182374243248781e-05, "loss": 14.0007, "step": 7245 }, { "epoch": 0.3020299278896253, "grad_norm": 155.0, "learning_rate": 8.181853587199062e-05, "loss": 12.0005, "step": 7246 }, { "epoch": 0.3020716101871535, "grad_norm": 382.0, "learning_rate": 8.18133287315978e-05, "loss": 15.4377, "step": 7247 }, { "epoch": 0.3021132924846818, "grad_norm": 428.0, "learning_rate": 8.180812101140423e-05, "loss": 17.2509, "step": 7248 }, { "epoch": 0.30215497478221, "grad_norm": 720.0, "learning_rate": 8.180291271150485e-05, "loss": 20.7509, "step": 7249 }, { "epoch": 0.30219665707973825, "grad_norm": 266.0, "learning_rate": 8.179770383199455e-05, "loss": 10.9381, "step": 7250 }, { "epoch": 0.30223833937726646, "grad_norm": 235.0, "learning_rate": 8.179249437296828e-05, "loss": 12.3753, "step": 7251 }, { "epoch": 0.30228002167479473, "grad_norm": 458.0, "learning_rate": 8.178728433452097e-05, "loss": 14.8752, "step": 7252 }, { "epoch": 0.30232170397232294, "grad_norm": 540.0, "learning_rate": 8.178207371674757e-05, "loss": 21.8755, "step": 7253 }, { "epoch": 0.3023633862698512, "grad_norm": 528.0, "learning_rate": 8.177686251974308e-05, "loss": 17.3755, "step": 7254 }, { "epoch": 0.3024050685673794, "grad_norm": 552.0, "learning_rate": 8.177165074360245e-05, "loss": 18.5006, "step": 7255 }, { "epoch": 0.3024467508649077, "grad_norm": 280.0, "learning_rate": 8.176643838842065e-05, "loss": 13.0628, "step": 7256 }, { "epoch": 0.3024884331624359, "grad_norm": 109.5, "learning_rate": 8.176122545429269e-05, "loss": 6.3134, "step": 7257 }, { "epoch": 0.30253011545996417, "grad_norm": 304.0, "learning_rate": 8.175601194131357e-05, "loss": 14.0629, "step": 7258 }, { "epoch": 0.3025717977574924, "grad_norm": 340.0, "learning_rate": 8.175079784957834e-05, "loss": 12.6281, "step": 7259 }, { "epoch": 0.30261348005502064, "grad_norm": 160.0, "learning_rate": 8.174558317918197e-05, "loss": 9.6877, "step": 7260 }, { "epoch": 0.30265516235254886, "grad_norm": 568.0, "learning_rate": 8.174036793021955e-05, "loss": 17.2502, "step": 7261 }, { "epoch": 0.3026968446500771, "grad_norm": 98.5, "learning_rate": 8.173515210278611e-05, "loss": 9.1881, "step": 7262 }, { "epoch": 0.30273852694760534, "grad_norm": 158.0, "learning_rate": 8.172993569697669e-05, "loss": 8.7505, "step": 7263 }, { "epoch": 0.3027802092451336, "grad_norm": 1472.0, "learning_rate": 8.172471871288638e-05, "loss": 31.2528, "step": 7264 }, { "epoch": 0.3028218915426618, "grad_norm": 544.0, "learning_rate": 8.171950115061025e-05, "loss": 18.3756, "step": 7265 }, { "epoch": 0.3028635738401901, "grad_norm": 338.0, "learning_rate": 8.171428301024341e-05, "loss": 15.1889, "step": 7266 }, { "epoch": 0.3029052561377183, "grad_norm": 183.0, "learning_rate": 8.170906429188094e-05, "loss": 11.6878, "step": 7267 }, { "epoch": 0.30294693843524656, "grad_norm": 516.0, "learning_rate": 8.170384499561796e-05, "loss": 16.7512, "step": 7268 }, { "epoch": 0.30298862073277477, "grad_norm": 336.0, "learning_rate": 8.169862512154959e-05, "loss": 14.8127, "step": 7269 }, { "epoch": 0.30303030303030304, "grad_norm": 494.0, "learning_rate": 8.169340466977095e-05, "loss": 17.5001, "step": 7270 }, { "epoch": 0.30307198532783125, "grad_norm": 344.0, "learning_rate": 8.168818364037722e-05, "loss": 14.2504, "step": 7271 }, { "epoch": 0.3031136676253595, "grad_norm": 157.0, "learning_rate": 8.168296203346351e-05, "loss": 11.126, "step": 7272 }, { "epoch": 0.30315534992288773, "grad_norm": 920.0, "learning_rate": 8.167773984912501e-05, "loss": 20.3788, "step": 7273 }, { "epoch": 0.303197032220416, "grad_norm": 233.0, "learning_rate": 8.167251708745689e-05, "loss": 11.1254, "step": 7274 }, { "epoch": 0.3032387145179442, "grad_norm": 205.0, "learning_rate": 8.166729374855435e-05, "loss": 11.7507, "step": 7275 }, { "epoch": 0.3032803968154725, "grad_norm": 306.0, "learning_rate": 8.166206983251254e-05, "loss": 12.8753, "step": 7276 }, { "epoch": 0.3033220791130007, "grad_norm": 146.0, "learning_rate": 8.165684533942672e-05, "loss": 10.5631, "step": 7277 }, { "epoch": 0.30336376141052895, "grad_norm": 304.0, "learning_rate": 8.165162026939208e-05, "loss": 12.0003, "step": 7278 }, { "epoch": 0.30340544370805717, "grad_norm": 478.0, "learning_rate": 8.164639462250385e-05, "loss": 14.8809, "step": 7279 }, { "epoch": 0.30344712600558543, "grad_norm": 270.0, "learning_rate": 8.164116839885725e-05, "loss": 13.0007, "step": 7280 }, { "epoch": 0.30348880830311364, "grad_norm": 138.0, "learning_rate": 8.163594159854757e-05, "loss": 9.6878, "step": 7281 }, { "epoch": 0.3035304906006419, "grad_norm": 548.0, "learning_rate": 8.163071422167004e-05, "loss": 17.2502, "step": 7282 }, { "epoch": 0.3035721728981701, "grad_norm": 158.0, "learning_rate": 8.162548626831993e-05, "loss": 8.5628, "step": 7283 }, { "epoch": 0.3036138551956984, "grad_norm": 696.0, "learning_rate": 8.162025773859252e-05, "loss": 20.7503, "step": 7284 }, { "epoch": 0.3036555374932266, "grad_norm": 362.0, "learning_rate": 8.161502863258312e-05, "loss": 16.2506, "step": 7285 }, { "epoch": 0.30369721979075487, "grad_norm": 552.0, "learning_rate": 8.160979895038702e-05, "loss": 16.5036, "step": 7286 }, { "epoch": 0.3037389020882831, "grad_norm": 318.0, "learning_rate": 8.160456869209952e-05, "loss": 14.0001, "step": 7287 }, { "epoch": 0.30378058438581135, "grad_norm": 140.0, "learning_rate": 8.159933785781595e-05, "loss": 8.5632, "step": 7288 }, { "epoch": 0.30382226668333956, "grad_norm": 239.0, "learning_rate": 8.159410644763164e-05, "loss": 12.0002, "step": 7289 }, { "epoch": 0.3038639489808678, "grad_norm": 580.0, "learning_rate": 8.158887446164192e-05, "loss": 17.1254, "step": 7290 }, { "epoch": 0.30390563127839604, "grad_norm": 235.0, "learning_rate": 8.158364189994218e-05, "loss": 9.8753, "step": 7291 }, { "epoch": 0.3039473135759243, "grad_norm": 111.0, "learning_rate": 8.157840876262776e-05, "loss": 8.6881, "step": 7292 }, { "epoch": 0.30398899587345257, "grad_norm": 258.0, "learning_rate": 8.157317504979405e-05, "loss": 11.8141, "step": 7293 }, { "epoch": 0.3040306781709808, "grad_norm": 434.0, "learning_rate": 8.15679407615364e-05, "loss": 16.2502, "step": 7294 }, { "epoch": 0.30407236046850905, "grad_norm": 564.0, "learning_rate": 8.156270589795023e-05, "loss": 17.8751, "step": 7295 }, { "epoch": 0.30411404276603726, "grad_norm": 158.0, "learning_rate": 8.155747045913094e-05, "loss": 9.8755, "step": 7296 }, { "epoch": 0.30415572506356553, "grad_norm": 348.0, "learning_rate": 8.155223444517398e-05, "loss": 14.0005, "step": 7297 }, { "epoch": 0.30419740736109374, "grad_norm": 260.0, "learning_rate": 8.15469978561747e-05, "loss": 13.3755, "step": 7298 }, { "epoch": 0.304239089658622, "grad_norm": 928.0, "learning_rate": 8.154176069222862e-05, "loss": 26.2503, "step": 7299 }, { "epoch": 0.3042807719561502, "grad_norm": 904.0, "learning_rate": 8.153652295343114e-05, "loss": 22.5046, "step": 7300 }, { "epoch": 0.3043224542536785, "grad_norm": 816.0, "learning_rate": 8.153128463987772e-05, "loss": 22.5002, "step": 7301 }, { "epoch": 0.3043641365512067, "grad_norm": 148.0, "learning_rate": 8.152604575166384e-05, "loss": 12.5632, "step": 7302 }, { "epoch": 0.30440581884873497, "grad_norm": 231.0, "learning_rate": 8.152080628888499e-05, "loss": 8.4379, "step": 7303 }, { "epoch": 0.3044475011462632, "grad_norm": 85.0, "learning_rate": 8.151556625163664e-05, "loss": 8.0627, "step": 7304 }, { "epoch": 0.30448918344379144, "grad_norm": 241.0, "learning_rate": 8.151032564001431e-05, "loss": 13.3763, "step": 7305 }, { "epoch": 0.30453086574131966, "grad_norm": 548.0, "learning_rate": 8.150508445411348e-05, "loss": 17.8752, "step": 7306 }, { "epoch": 0.3045725480388479, "grad_norm": 1792.0, "learning_rate": 8.14998426940297e-05, "loss": 38.7507, "step": 7307 }, { "epoch": 0.30461423033637613, "grad_norm": 167.0, "learning_rate": 8.149460035985847e-05, "loss": 11.063, "step": 7308 }, { "epoch": 0.3046559126339044, "grad_norm": 334.0, "learning_rate": 8.148935745169536e-05, "loss": 12.5009, "step": 7309 }, { "epoch": 0.3046975949314326, "grad_norm": 115.5, "learning_rate": 8.148411396963593e-05, "loss": 8.8754, "step": 7310 }, { "epoch": 0.3047392772289609, "grad_norm": 76.0, "learning_rate": 8.147886991377573e-05, "loss": 6.344, "step": 7311 }, { "epoch": 0.3047809595264891, "grad_norm": 227.0, "learning_rate": 8.147362528421033e-05, "loss": 11.8132, "step": 7312 }, { "epoch": 0.30482264182401736, "grad_norm": 444.0, "learning_rate": 8.14683800810353e-05, "loss": 16.8755, "step": 7313 }, { "epoch": 0.30486432412154557, "grad_norm": 253.0, "learning_rate": 8.146313430434627e-05, "loss": 12.6885, "step": 7314 }, { "epoch": 0.30490600641907384, "grad_norm": 848.0, "learning_rate": 8.14578879542388e-05, "loss": 23.8753, "step": 7315 }, { "epoch": 0.30494768871660205, "grad_norm": 484.0, "learning_rate": 8.145264103080855e-05, "loss": 18.3765, "step": 7316 }, { "epoch": 0.3049893710141303, "grad_norm": 147.0, "learning_rate": 8.14473935341511e-05, "loss": 11.1877, "step": 7317 }, { "epoch": 0.30503105331165853, "grad_norm": 688.0, "learning_rate": 8.144214546436212e-05, "loss": 19.2548, "step": 7318 }, { "epoch": 0.3050727356091868, "grad_norm": 181.0, "learning_rate": 8.143689682153728e-05, "loss": 10.0628, "step": 7319 }, { "epoch": 0.305114417906715, "grad_norm": 512.0, "learning_rate": 8.143164760577218e-05, "loss": 17.5009, "step": 7320 }, { "epoch": 0.3051561002042433, "grad_norm": 352.0, "learning_rate": 8.14263978171625e-05, "loss": 12.7507, "step": 7321 }, { "epoch": 0.3051977825017715, "grad_norm": 424.0, "learning_rate": 8.142114745580393e-05, "loss": 17.2507, "step": 7322 }, { "epoch": 0.30523946479929975, "grad_norm": 1696.0, "learning_rate": 8.141589652179218e-05, "loss": 31.5018, "step": 7323 }, { "epoch": 0.30528114709682797, "grad_norm": 1064.0, "learning_rate": 8.141064501522294e-05, "loss": 19.8811, "step": 7324 }, { "epoch": 0.30532282939435623, "grad_norm": 322.0, "learning_rate": 8.140539293619187e-05, "loss": 13.3763, "step": 7325 }, { "epoch": 0.30536451169188444, "grad_norm": 492.0, "learning_rate": 8.140014028479474e-05, "loss": 16.7505, "step": 7326 }, { "epoch": 0.3054061939894127, "grad_norm": 104.0, "learning_rate": 8.139488706112726e-05, "loss": 8.6264, "step": 7327 }, { "epoch": 0.3054478762869409, "grad_norm": 370.0, "learning_rate": 8.138963326528518e-05, "loss": 13.5627, "step": 7328 }, { "epoch": 0.3054895585844692, "grad_norm": 432.0, "learning_rate": 8.138437889736425e-05, "loss": 16.3752, "step": 7329 }, { "epoch": 0.3055312408819974, "grad_norm": 552.0, "learning_rate": 8.137912395746023e-05, "loss": 19.8753, "step": 7330 }, { "epoch": 0.30557292317952567, "grad_norm": 732.0, "learning_rate": 8.137386844566887e-05, "loss": 19.0033, "step": 7331 }, { "epoch": 0.3056146054770539, "grad_norm": 752.0, "learning_rate": 8.1368612362086e-05, "loss": 24.0004, "step": 7332 }, { "epoch": 0.30565628777458215, "grad_norm": 366.0, "learning_rate": 8.136335570680738e-05, "loss": 12.8752, "step": 7333 }, { "epoch": 0.30569797007211036, "grad_norm": 149.0, "learning_rate": 8.135809847992882e-05, "loss": 9.4379, "step": 7334 }, { "epoch": 0.3057396523696386, "grad_norm": 1004.0, "learning_rate": 8.135284068154612e-05, "loss": 26.3764, "step": 7335 }, { "epoch": 0.30578133466716684, "grad_norm": 478.0, "learning_rate": 8.134758231175512e-05, "loss": 17.5005, "step": 7336 }, { "epoch": 0.3058230169646951, "grad_norm": 328.0, "learning_rate": 8.134232337065163e-05, "loss": 14.7501, "step": 7337 }, { "epoch": 0.3058646992622233, "grad_norm": 214.0, "learning_rate": 8.133706385833156e-05, "loss": 11.4381, "step": 7338 }, { "epoch": 0.3059063815597516, "grad_norm": 868.0, "learning_rate": 8.133180377489068e-05, "loss": 22.0013, "step": 7339 }, { "epoch": 0.3059480638572798, "grad_norm": 892.0, "learning_rate": 8.132654312042491e-05, "loss": 19.631, "step": 7340 }, { "epoch": 0.30598974615480806, "grad_norm": 61.0, "learning_rate": 8.13212818950301e-05, "loss": 8.1881, "step": 7341 }, { "epoch": 0.3060314284523363, "grad_norm": 752.0, "learning_rate": 8.131602009880216e-05, "loss": 22.7507, "step": 7342 }, { "epoch": 0.30607311074986454, "grad_norm": 248.0, "learning_rate": 8.131075773183696e-05, "loss": 13.3752, "step": 7343 }, { "epoch": 0.30611479304739275, "grad_norm": 292.0, "learning_rate": 8.130549479423044e-05, "loss": 12.5626, "step": 7344 }, { "epoch": 0.306156475344921, "grad_norm": 203.0, "learning_rate": 8.130023128607849e-05, "loss": 9.6255, "step": 7345 }, { "epoch": 0.30619815764244923, "grad_norm": 358.0, "learning_rate": 8.129496720747703e-05, "loss": 13.1878, "step": 7346 }, { "epoch": 0.3062398399399775, "grad_norm": 55.75, "learning_rate": 8.128970255852205e-05, "loss": 6.7503, "step": 7347 }, { "epoch": 0.3062815222375057, "grad_norm": 213.0, "learning_rate": 8.128443733930942e-05, "loss": 10.502, "step": 7348 }, { "epoch": 0.306323204535034, "grad_norm": 135.0, "learning_rate": 8.127917154993518e-05, "loss": 6.6566, "step": 7349 }, { "epoch": 0.3063648868325622, "grad_norm": 162.0, "learning_rate": 8.127390519049525e-05, "loss": 11.0002, "step": 7350 }, { "epoch": 0.30640656913009046, "grad_norm": 133.0, "learning_rate": 8.126863826108562e-05, "loss": 9.0628, "step": 7351 }, { "epoch": 0.30644825142761867, "grad_norm": 238.0, "learning_rate": 8.126337076180227e-05, "loss": 10.5628, "step": 7352 }, { "epoch": 0.30648993372514693, "grad_norm": 888.0, "learning_rate": 8.12581026927412e-05, "loss": 24.2503, "step": 7353 }, { "epoch": 0.30653161602267515, "grad_norm": 135.0, "learning_rate": 8.125283405399847e-05, "loss": 9.6877, "step": 7354 }, { "epoch": 0.3065732983202034, "grad_norm": 188.0, "learning_rate": 8.124756484567005e-05, "loss": 10.813, "step": 7355 }, { "epoch": 0.3066149806177316, "grad_norm": 330.0, "learning_rate": 8.1242295067852e-05, "loss": 12.8131, "step": 7356 }, { "epoch": 0.3066566629152599, "grad_norm": 215.0, "learning_rate": 8.123702472064032e-05, "loss": 12.0004, "step": 7357 }, { "epoch": 0.3066983452127881, "grad_norm": 304.0, "learning_rate": 8.123175380413112e-05, "loss": 13.8757, "step": 7358 }, { "epoch": 0.30674002751031637, "grad_norm": 264.0, "learning_rate": 8.122648231842042e-05, "loss": 12.8127, "step": 7359 }, { "epoch": 0.3067817098078446, "grad_norm": 205.0, "learning_rate": 8.122121026360431e-05, "loss": 11.7509, "step": 7360 }, { "epoch": 0.30682339210537285, "grad_norm": 482.0, "learning_rate": 8.121593763977886e-05, "loss": 17.0054, "step": 7361 }, { "epoch": 0.30686507440290106, "grad_norm": 560.0, "learning_rate": 8.12106644470402e-05, "loss": 19.1255, "step": 7362 }, { "epoch": 0.30690675670042933, "grad_norm": 446.0, "learning_rate": 8.120539068548439e-05, "loss": 16.7504, "step": 7363 }, { "epoch": 0.30694843899795754, "grad_norm": 58.0, "learning_rate": 8.120011635520757e-05, "loss": 8.0003, "step": 7364 }, { "epoch": 0.3069901212954858, "grad_norm": 700.0, "learning_rate": 8.119484145630586e-05, "loss": 21.0009, "step": 7365 }, { "epoch": 0.3070318035930141, "grad_norm": 113.5, "learning_rate": 8.11895659888754e-05, "loss": 4.8128, "step": 7366 }, { "epoch": 0.3070734858905423, "grad_norm": 556.0, "learning_rate": 8.118428995301233e-05, "loss": 16.1252, "step": 7367 }, { "epoch": 0.30711516818807055, "grad_norm": 308.0, "learning_rate": 8.11790133488128e-05, "loss": 11.9384, "step": 7368 }, { "epoch": 0.30715685048559876, "grad_norm": 338.0, "learning_rate": 8.117373617637299e-05, "loss": 14.3136, "step": 7369 }, { "epoch": 0.30719853278312703, "grad_norm": 168.0, "learning_rate": 8.116845843578907e-05, "loss": 10.6252, "step": 7370 }, { "epoch": 0.30724021508065524, "grad_norm": 245.0, "learning_rate": 8.116318012715722e-05, "loss": 12.313, "step": 7371 }, { "epoch": 0.3072818973781835, "grad_norm": 332.0, "learning_rate": 8.115790125057365e-05, "loss": 13.4395, "step": 7372 }, { "epoch": 0.3073235796757117, "grad_norm": 404.0, "learning_rate": 8.115262180613456e-05, "loss": 14.5003, "step": 7373 }, { "epoch": 0.30736526197324, "grad_norm": 1032.0, "learning_rate": 8.11473417939362e-05, "loss": 22.8799, "step": 7374 }, { "epoch": 0.3074069442707682, "grad_norm": 992.0, "learning_rate": 8.114206121407473e-05, "loss": 27.6257, "step": 7375 }, { "epoch": 0.30744862656829647, "grad_norm": 552.0, "learning_rate": 8.113678006664647e-05, "loss": 19.7503, "step": 7376 }, { "epoch": 0.3074903088658247, "grad_norm": 169.0, "learning_rate": 8.11314983517476e-05, "loss": 10.6265, "step": 7377 }, { "epoch": 0.30753199116335295, "grad_norm": 105.5, "learning_rate": 8.112621606947441e-05, "loss": 8.3754, "step": 7378 }, { "epoch": 0.30757367346088116, "grad_norm": 428.0, "learning_rate": 8.112093321992318e-05, "loss": 16.6254, "step": 7379 }, { "epoch": 0.3076153557584094, "grad_norm": 192.0, "learning_rate": 8.111564980319018e-05, "loss": 9.8135, "step": 7380 }, { "epoch": 0.30765703805593764, "grad_norm": 370.0, "learning_rate": 8.111036581937169e-05, "loss": 10.8768, "step": 7381 }, { "epoch": 0.3076987203534659, "grad_norm": 227.0, "learning_rate": 8.110508126856403e-05, "loss": 11.4383, "step": 7382 }, { "epoch": 0.3077404026509941, "grad_norm": 512.0, "learning_rate": 8.109979615086349e-05, "loss": 17.2505, "step": 7383 }, { "epoch": 0.3077820849485224, "grad_norm": 217.0, "learning_rate": 8.109451046636642e-05, "loss": 11.8129, "step": 7384 }, { "epoch": 0.3078237672460506, "grad_norm": 348.0, "learning_rate": 8.108922421516913e-05, "loss": 13.8127, "step": 7385 }, { "epoch": 0.30786544954357886, "grad_norm": 322.0, "learning_rate": 8.108393739736798e-05, "loss": 14.1878, "step": 7386 }, { "epoch": 0.3079071318411071, "grad_norm": 368.0, "learning_rate": 8.10786500130593e-05, "loss": 12.7503, "step": 7387 }, { "epoch": 0.30794881413863534, "grad_norm": 564.0, "learning_rate": 8.107336206233946e-05, "loss": 18.6253, "step": 7388 }, { "epoch": 0.30799049643616355, "grad_norm": 366.0, "learning_rate": 8.106807354530483e-05, "loss": 14.0663, "step": 7389 }, { "epoch": 0.3080321787336918, "grad_norm": 560.0, "learning_rate": 8.106278446205183e-05, "loss": 17.3755, "step": 7390 }, { "epoch": 0.30807386103122003, "grad_norm": 368.0, "learning_rate": 8.10574948126768e-05, "loss": 14.3755, "step": 7391 }, { "epoch": 0.3081155433287483, "grad_norm": 632.0, "learning_rate": 8.105220459727618e-05, "loss": 16.7504, "step": 7392 }, { "epoch": 0.3081572256262765, "grad_norm": 155.0, "learning_rate": 8.104691381594638e-05, "loss": 11.2505, "step": 7393 }, { "epoch": 0.3081989079238048, "grad_norm": 382.0, "learning_rate": 8.104162246878382e-05, "loss": 14.8129, "step": 7394 }, { "epoch": 0.308240590221333, "grad_norm": 348.0, "learning_rate": 8.103633055588493e-05, "loss": 14.1877, "step": 7395 }, { "epoch": 0.30828227251886126, "grad_norm": 408.0, "learning_rate": 8.103103807734616e-05, "loss": 17.6253, "step": 7396 }, { "epoch": 0.30832395481638947, "grad_norm": 330.0, "learning_rate": 8.102574503326396e-05, "loss": 14.3128, "step": 7397 }, { "epoch": 0.30836563711391773, "grad_norm": 418.0, "learning_rate": 8.10204514237348e-05, "loss": 14.9384, "step": 7398 }, { "epoch": 0.30840731941144595, "grad_norm": 117.5, "learning_rate": 8.101515724885518e-05, "loss": 11.2513, "step": 7399 }, { "epoch": 0.3084490017089742, "grad_norm": 294.0, "learning_rate": 8.100986250872156e-05, "loss": 12.6253, "step": 7400 }, { "epoch": 0.3084906840065024, "grad_norm": 1096.0, "learning_rate": 8.100456720343042e-05, "loss": 26.2512, "step": 7401 }, { "epoch": 0.3085323663040307, "grad_norm": 232.0, "learning_rate": 8.099927133307832e-05, "loss": 13.3129, "step": 7402 }, { "epoch": 0.3085740486015589, "grad_norm": 91.5, "learning_rate": 8.099397489776172e-05, "loss": 8.9377, "step": 7403 }, { "epoch": 0.30861573089908717, "grad_norm": 108.0, "learning_rate": 8.09886778975772e-05, "loss": 9.6257, "step": 7404 }, { "epoch": 0.3086574131966154, "grad_norm": 338.0, "learning_rate": 8.098338033262127e-05, "loss": 14.5639, "step": 7405 }, { "epoch": 0.30869909549414365, "grad_norm": 107.5, "learning_rate": 8.097808220299048e-05, "loss": 6.469, "step": 7406 }, { "epoch": 0.30874077779167186, "grad_norm": 448.0, "learning_rate": 8.09727835087814e-05, "loss": 15.3751, "step": 7407 }, { "epoch": 0.30878246008920013, "grad_norm": 392.0, "learning_rate": 8.096748425009056e-05, "loss": 14.7503, "step": 7408 }, { "epoch": 0.30882414238672834, "grad_norm": 147.0, "learning_rate": 8.09621844270146e-05, "loss": 10.0021, "step": 7409 }, { "epoch": 0.3088658246842566, "grad_norm": 344.0, "learning_rate": 8.095688403965007e-05, "loss": 13.6252, "step": 7410 }, { "epoch": 0.3089075069817848, "grad_norm": 524.0, "learning_rate": 8.095158308809359e-05, "loss": 17.6254, "step": 7411 }, { "epoch": 0.3089491892793131, "grad_norm": 448.0, "learning_rate": 8.094628157244175e-05, "loss": 14.5642, "step": 7412 }, { "epoch": 0.3089908715768413, "grad_norm": 486.0, "learning_rate": 8.094097949279118e-05, "loss": 17.7515, "step": 7413 }, { "epoch": 0.30903255387436956, "grad_norm": 328.0, "learning_rate": 8.09356768492385e-05, "loss": 12.1887, "step": 7414 }, { "epoch": 0.3090742361718978, "grad_norm": 504.0, "learning_rate": 8.093037364188039e-05, "loss": 17.6251, "step": 7415 }, { "epoch": 0.30911591846942604, "grad_norm": 192.0, "learning_rate": 8.092506987081347e-05, "loss": 9.8758, "step": 7416 }, { "epoch": 0.30915760076695425, "grad_norm": 288.0, "learning_rate": 8.091976553613439e-05, "loss": 13.6262, "step": 7417 }, { "epoch": 0.3091992830644825, "grad_norm": 284.0, "learning_rate": 8.091446063793987e-05, "loss": 9.8753, "step": 7418 }, { "epoch": 0.30924096536201073, "grad_norm": 608.0, "learning_rate": 8.090915517632652e-05, "loss": 21.1253, "step": 7419 }, { "epoch": 0.309282647659539, "grad_norm": 83.0, "learning_rate": 8.09038491513911e-05, "loss": 6.7817, "step": 7420 }, { "epoch": 0.3093243299570672, "grad_norm": 936.0, "learning_rate": 8.089854256323028e-05, "loss": 20.5056, "step": 7421 }, { "epoch": 0.3093660122545955, "grad_norm": 544.0, "learning_rate": 8.089323541194075e-05, "loss": 16.8787, "step": 7422 }, { "epoch": 0.3094076945521237, "grad_norm": 118.5, "learning_rate": 8.08879276976193e-05, "loss": 9.0005, "step": 7423 }, { "epoch": 0.30944937684965196, "grad_norm": 108.5, "learning_rate": 8.088261942036262e-05, "loss": 8.7504, "step": 7424 }, { "epoch": 0.30949105914718017, "grad_norm": 226.0, "learning_rate": 8.087731058026747e-05, "loss": 11.9377, "step": 7425 }, { "epoch": 0.30953274144470844, "grad_norm": 346.0, "learning_rate": 8.087200117743057e-05, "loss": 14.1876, "step": 7426 }, { "epoch": 0.30957442374223665, "grad_norm": 155.0, "learning_rate": 8.08666912119487e-05, "loss": 9.5643, "step": 7427 }, { "epoch": 0.3096161060397649, "grad_norm": 172.0, "learning_rate": 8.086138068391866e-05, "loss": 10.6879, "step": 7428 }, { "epoch": 0.3096577883372931, "grad_norm": 282.0, "learning_rate": 8.085606959343723e-05, "loss": 12.8758, "step": 7429 }, { "epoch": 0.3096994706348214, "grad_norm": 148.0, "learning_rate": 8.085075794060118e-05, "loss": 10.5011, "step": 7430 }, { "epoch": 0.3097411529323496, "grad_norm": 440.0, "learning_rate": 8.084544572550731e-05, "loss": 16.0011, "step": 7431 }, { "epoch": 0.3097828352298779, "grad_norm": 308.0, "learning_rate": 8.084013294825248e-05, "loss": 12.6883, "step": 7432 }, { "epoch": 0.3098245175274061, "grad_norm": 470.0, "learning_rate": 8.083481960893348e-05, "loss": 17.3753, "step": 7433 }, { "epoch": 0.30986619982493435, "grad_norm": 234.0, "learning_rate": 8.082950570764714e-05, "loss": 11.6253, "step": 7434 }, { "epoch": 0.30990788212246256, "grad_norm": 366.0, "learning_rate": 8.082419124449035e-05, "loss": 14.0629, "step": 7435 }, { "epoch": 0.30994956441999083, "grad_norm": 296.0, "learning_rate": 8.081887621955992e-05, "loss": 13.5002, "step": 7436 }, { "epoch": 0.30999124671751904, "grad_norm": 320.0, "learning_rate": 8.081356063295273e-05, "loss": 13.5042, "step": 7437 }, { "epoch": 0.3100329290150473, "grad_norm": 510.0, "learning_rate": 8.080824448476567e-05, "loss": 15.1297, "step": 7438 }, { "epoch": 0.3100746113125756, "grad_norm": 368.0, "learning_rate": 8.080292777509563e-05, "loss": 15.8132, "step": 7439 }, { "epoch": 0.3101162936101038, "grad_norm": 203.0, "learning_rate": 8.079761050403949e-05, "loss": 12.1253, "step": 7440 }, { "epoch": 0.31015797590763206, "grad_norm": 190.0, "learning_rate": 8.079229267169415e-05, "loss": 9.7503, "step": 7441 }, { "epoch": 0.31019965820516027, "grad_norm": 119.5, "learning_rate": 8.078697427815656e-05, "loss": 8.4377, "step": 7442 }, { "epoch": 0.31024134050268853, "grad_norm": 856.0, "learning_rate": 8.078165532352362e-05, "loss": 18.38, "step": 7443 }, { "epoch": 0.31028302280021675, "grad_norm": 580.0, "learning_rate": 8.077633580789229e-05, "loss": 19.1255, "step": 7444 }, { "epoch": 0.310324705097745, "grad_norm": 426.0, "learning_rate": 8.07710157313595e-05, "loss": 16.1256, "step": 7445 }, { "epoch": 0.3103663873952732, "grad_norm": 556.0, "learning_rate": 8.076569509402222e-05, "loss": 17.6253, "step": 7446 }, { "epoch": 0.3104080696928015, "grad_norm": 460.0, "learning_rate": 8.076037389597742e-05, "loss": 15.0677, "step": 7447 }, { "epoch": 0.3104497519903297, "grad_norm": 398.0, "learning_rate": 8.075505213732206e-05, "loss": 14.3752, "step": 7448 }, { "epoch": 0.31049143428785797, "grad_norm": 468.0, "learning_rate": 8.074972981815316e-05, "loss": 15.3753, "step": 7449 }, { "epoch": 0.3105331165853862, "grad_norm": 318.0, "learning_rate": 8.074440693856768e-05, "loss": 12.8129, "step": 7450 }, { "epoch": 0.31057479888291445, "grad_norm": 350.0, "learning_rate": 8.073908349866268e-05, "loss": 14.5003, "step": 7451 }, { "epoch": 0.31061648118044266, "grad_norm": 608.0, "learning_rate": 8.073375949853515e-05, "loss": 15.6878, "step": 7452 }, { "epoch": 0.31065816347797093, "grad_norm": 183.0, "learning_rate": 8.072843493828213e-05, "loss": 11.8756, "step": 7453 }, { "epoch": 0.31069984577549914, "grad_norm": 180.0, "learning_rate": 8.072310981800065e-05, "loss": 10.0626, "step": 7454 }, { "epoch": 0.3107415280730274, "grad_norm": 384.0, "learning_rate": 8.071778413778775e-05, "loss": 12.7502, "step": 7455 }, { "epoch": 0.3107832103705556, "grad_norm": 205.0, "learning_rate": 8.071245789774053e-05, "loss": 10.0003, "step": 7456 }, { "epoch": 0.3108248926680839, "grad_norm": 154.0, "learning_rate": 8.070713109795603e-05, "loss": 11.8131, "step": 7457 }, { "epoch": 0.3108665749656121, "grad_norm": 364.0, "learning_rate": 8.070180373853134e-05, "loss": 14.8126, "step": 7458 }, { "epoch": 0.31090825726314036, "grad_norm": 772.0, "learning_rate": 8.069647581956355e-05, "loss": 22.0009, "step": 7459 }, { "epoch": 0.3109499395606686, "grad_norm": 456.0, "learning_rate": 8.069114734114976e-05, "loss": 17.1251, "step": 7460 }, { "epoch": 0.31099162185819684, "grad_norm": 712.0, "learning_rate": 8.068581830338708e-05, "loss": 21.3757, "step": 7461 }, { "epoch": 0.31103330415572505, "grad_norm": 296.0, "learning_rate": 8.068048870637265e-05, "loss": 12.6877, "step": 7462 }, { "epoch": 0.3110749864532533, "grad_norm": 684.0, "learning_rate": 8.067515855020357e-05, "loss": 19.6252, "step": 7463 }, { "epoch": 0.31111666875078153, "grad_norm": 388.0, "learning_rate": 8.066982783497702e-05, "loss": 15.5003, "step": 7464 }, { "epoch": 0.3111583510483098, "grad_norm": 462.0, "learning_rate": 8.066449656079015e-05, "loss": 16.1254, "step": 7465 }, { "epoch": 0.311200033345838, "grad_norm": 245.0, "learning_rate": 8.065916472774009e-05, "loss": 11.3127, "step": 7466 }, { "epoch": 0.3112417156433663, "grad_norm": 106.0, "learning_rate": 8.065383233592404e-05, "loss": 8.6894, "step": 7467 }, { "epoch": 0.3112833979408945, "grad_norm": 1176.0, "learning_rate": 8.064849938543915e-05, "loss": 25.38, "step": 7468 }, { "epoch": 0.31132508023842276, "grad_norm": 160.0, "learning_rate": 8.064316587638265e-05, "loss": 10.1878, "step": 7469 }, { "epoch": 0.31136676253595097, "grad_norm": 96.0, "learning_rate": 8.063783180885173e-05, "loss": 8.6879, "step": 7470 }, { "epoch": 0.31140844483347924, "grad_norm": 276.0, "learning_rate": 8.063249718294363e-05, "loss": 9.8768, "step": 7471 }, { "epoch": 0.31145012713100745, "grad_norm": 396.0, "learning_rate": 8.062716199875553e-05, "loss": 16.1264, "step": 7472 }, { "epoch": 0.3114918094285357, "grad_norm": 644.0, "learning_rate": 8.062182625638468e-05, "loss": 20.0004, "step": 7473 }, { "epoch": 0.3115334917260639, "grad_norm": 422.0, "learning_rate": 8.061648995592833e-05, "loss": 15.6878, "step": 7474 }, { "epoch": 0.3115751740235922, "grad_norm": 270.0, "learning_rate": 8.061115309748374e-05, "loss": 11.3127, "step": 7475 }, { "epoch": 0.3116168563211204, "grad_norm": 308.0, "learning_rate": 8.060581568114816e-05, "loss": 14.313, "step": 7476 }, { "epoch": 0.3116585386186487, "grad_norm": 302.0, "learning_rate": 8.060047770701889e-05, "loss": 12.0007, "step": 7477 }, { "epoch": 0.3117002209161769, "grad_norm": 326.0, "learning_rate": 8.059513917519316e-05, "loss": 13.3128, "step": 7478 }, { "epoch": 0.31174190321370515, "grad_norm": 189.0, "learning_rate": 8.058980008576833e-05, "loss": 12.1254, "step": 7479 }, { "epoch": 0.31178358551123336, "grad_norm": 342.0, "learning_rate": 8.058446043884168e-05, "loss": 14.5006, "step": 7480 }, { "epoch": 0.31182526780876163, "grad_norm": 43.5, "learning_rate": 8.057912023451051e-05, "loss": 7.5003, "step": 7481 }, { "epoch": 0.31186695010628984, "grad_norm": 101.0, "learning_rate": 8.057377947287217e-05, "loss": 8.8126, "step": 7482 }, { "epoch": 0.3119086324038181, "grad_norm": 284.0, "learning_rate": 8.056843815402399e-05, "loss": 11.1882, "step": 7483 }, { "epoch": 0.3119503147013463, "grad_norm": 204.0, "learning_rate": 8.056309627806329e-05, "loss": 11.0641, "step": 7484 }, { "epoch": 0.3119919969988746, "grad_norm": 330.0, "learning_rate": 8.055775384508746e-05, "loss": 14.501, "step": 7485 }, { "epoch": 0.3120336792964028, "grad_norm": 286.0, "learning_rate": 8.055241085519384e-05, "loss": 12.8752, "step": 7486 }, { "epoch": 0.31207536159393107, "grad_norm": 968.0, "learning_rate": 8.054706730847985e-05, "loss": 26.1251, "step": 7487 }, { "epoch": 0.3121170438914593, "grad_norm": 316.0, "learning_rate": 8.054172320504284e-05, "loss": 13.1255, "step": 7488 }, { "epoch": 0.31215872618898755, "grad_norm": 216.0, "learning_rate": 8.053637854498018e-05, "loss": 12.0628, "step": 7489 }, { "epoch": 0.31220040848651576, "grad_norm": 111.5, "learning_rate": 8.053103332838934e-05, "loss": 10.2504, "step": 7490 }, { "epoch": 0.312242090784044, "grad_norm": 480.0, "learning_rate": 8.05256875553677e-05, "loss": 17.1253, "step": 7491 }, { "epoch": 0.31228377308157224, "grad_norm": 448.0, "learning_rate": 8.052034122601269e-05, "loss": 16.0004, "step": 7492 }, { "epoch": 0.3123254553791005, "grad_norm": 592.0, "learning_rate": 8.051499434042176e-05, "loss": 18.8755, "step": 7493 }, { "epoch": 0.3123671376766287, "grad_norm": 336.0, "learning_rate": 8.050964689869234e-05, "loss": 14.3755, "step": 7494 }, { "epoch": 0.312408819974157, "grad_norm": 222.0, "learning_rate": 8.05042989009219e-05, "loss": 12.3754, "step": 7495 }, { "epoch": 0.3124505022716852, "grad_norm": 324.0, "learning_rate": 8.049895034720791e-05, "loss": 11.5004, "step": 7496 }, { "epoch": 0.31249218456921346, "grad_norm": 394.0, "learning_rate": 8.049360123764785e-05, "loss": 14.5001, "step": 7497 }, { "epoch": 0.31253386686674167, "grad_norm": 316.0, "learning_rate": 8.048825157233917e-05, "loss": 12.3128, "step": 7498 }, { "epoch": 0.31257554916426994, "grad_norm": 524.0, "learning_rate": 8.048290135137942e-05, "loss": 17.1253, "step": 7499 }, { "epoch": 0.31261723146179815, "grad_norm": 332.0, "learning_rate": 8.047755057486609e-05, "loss": 13.9381, "step": 7500 }, { "epoch": 0.3126589137593264, "grad_norm": 159.0, "learning_rate": 8.047219924289669e-05, "loss": 6.2816, "step": 7501 }, { "epoch": 0.31270059605685463, "grad_norm": 84.0, "learning_rate": 8.046684735556875e-05, "loss": 5.8441, "step": 7502 }, { "epoch": 0.3127422783543829, "grad_norm": 410.0, "learning_rate": 8.046149491297983e-05, "loss": 13.0631, "step": 7503 }, { "epoch": 0.3127839606519111, "grad_norm": 221.0, "learning_rate": 8.045614191522743e-05, "loss": 12.438, "step": 7504 }, { "epoch": 0.3128256429494394, "grad_norm": 406.0, "learning_rate": 8.045078836240916e-05, "loss": 14.8758, "step": 7505 }, { "epoch": 0.3128673252469676, "grad_norm": 266.0, "learning_rate": 8.044543425462257e-05, "loss": 12.3755, "step": 7506 }, { "epoch": 0.31290900754449585, "grad_norm": 189.0, "learning_rate": 8.044007959196523e-05, "loss": 12.0006, "step": 7507 }, { "epoch": 0.31295068984202407, "grad_norm": 82.0, "learning_rate": 8.043472437453474e-05, "loss": 9.126, "step": 7508 }, { "epoch": 0.31299237213955233, "grad_norm": 140.0, "learning_rate": 8.04293686024287e-05, "loss": 7.844, "step": 7509 }, { "epoch": 0.31303405443708054, "grad_norm": 116.0, "learning_rate": 8.042401227574473e-05, "loss": 8.1257, "step": 7510 }, { "epoch": 0.3130757367346088, "grad_norm": 172.0, "learning_rate": 8.04186553945804e-05, "loss": 10.5629, "step": 7511 }, { "epoch": 0.3131174190321371, "grad_norm": 296.0, "learning_rate": 8.04132979590334e-05, "loss": 13.5627, "step": 7512 }, { "epoch": 0.3131591013296653, "grad_norm": 247.0, "learning_rate": 8.040793996920133e-05, "loss": 12.9381, "step": 7513 }, { "epoch": 0.31320078362719356, "grad_norm": 182.0, "learning_rate": 8.040258142518187e-05, "loss": 10.1886, "step": 7514 }, { "epoch": 0.31324246592472177, "grad_norm": 328.0, "learning_rate": 8.039722232707266e-05, "loss": 15.3129, "step": 7515 }, { "epoch": 0.31328414822225004, "grad_norm": 296.0, "learning_rate": 8.039186267497136e-05, "loss": 14.0635, "step": 7516 }, { "epoch": 0.31332583051977825, "grad_norm": 354.0, "learning_rate": 8.038650246897567e-05, "loss": 13.5634, "step": 7517 }, { "epoch": 0.3133675128173065, "grad_norm": 468.0, "learning_rate": 8.038114170918329e-05, "loss": 17.1251, "step": 7518 }, { "epoch": 0.3134091951148347, "grad_norm": 237.0, "learning_rate": 8.037578039569192e-05, "loss": 11.5003, "step": 7519 }, { "epoch": 0.313450877412363, "grad_norm": 350.0, "learning_rate": 8.037041852859922e-05, "loss": 15.5629, "step": 7520 }, { "epoch": 0.3134925597098912, "grad_norm": 89.0, "learning_rate": 8.036505610800296e-05, "loss": 9.2506, "step": 7521 }, { "epoch": 0.3135342420074195, "grad_norm": 556.0, "learning_rate": 8.035969313400086e-05, "loss": 19.6255, "step": 7522 }, { "epoch": 0.3135759243049477, "grad_norm": 416.0, "learning_rate": 8.035432960669065e-05, "loss": 15.6252, "step": 7523 }, { "epoch": 0.31361760660247595, "grad_norm": 486.0, "learning_rate": 8.03489655261701e-05, "loss": 17.1252, "step": 7524 }, { "epoch": 0.31365928890000416, "grad_norm": 358.0, "learning_rate": 8.034360089253694e-05, "loss": 14.688, "step": 7525 }, { "epoch": 0.31370097119753243, "grad_norm": 560.0, "learning_rate": 8.033823570588897e-05, "loss": 16.8757, "step": 7526 }, { "epoch": 0.31374265349506064, "grad_norm": 290.0, "learning_rate": 8.033286996632396e-05, "loss": 12.1253, "step": 7527 }, { "epoch": 0.3137843357925889, "grad_norm": 368.0, "learning_rate": 8.03275036739397e-05, "loss": 14.0646, "step": 7528 }, { "epoch": 0.3138260180901171, "grad_norm": 652.0, "learning_rate": 8.032213682883401e-05, "loss": 19.5003, "step": 7529 }, { "epoch": 0.3138677003876454, "grad_norm": 344.0, "learning_rate": 8.031676943110467e-05, "loss": 14.4377, "step": 7530 }, { "epoch": 0.3139093826851736, "grad_norm": 320.0, "learning_rate": 8.031140148084953e-05, "loss": 14.0002, "step": 7531 }, { "epoch": 0.31395106498270187, "grad_norm": 410.0, "learning_rate": 8.03060329781664e-05, "loss": 15.5627, "step": 7532 }, { "epoch": 0.3139927472802301, "grad_norm": 254.0, "learning_rate": 8.030066392315312e-05, "loss": 10.6905, "step": 7533 }, { "epoch": 0.31403442957775834, "grad_norm": 324.0, "learning_rate": 8.029529431590754e-05, "loss": 13.0002, "step": 7534 }, { "epoch": 0.31407611187528656, "grad_norm": 430.0, "learning_rate": 8.028992415652755e-05, "loss": 17.2502, "step": 7535 }, { "epoch": 0.3141177941728148, "grad_norm": 406.0, "learning_rate": 8.0284553445111e-05, "loss": 17.7505, "step": 7536 }, { "epoch": 0.31415947647034304, "grad_norm": 640.0, "learning_rate": 8.027918218175579e-05, "loss": 19.7505, "step": 7537 }, { "epoch": 0.3142011587678713, "grad_norm": 484.0, "learning_rate": 8.027381036655977e-05, "loss": 15.9379, "step": 7538 }, { "epoch": 0.3142428410653995, "grad_norm": 548.0, "learning_rate": 8.026843799962088e-05, "loss": 16.6257, "step": 7539 }, { "epoch": 0.3142845233629278, "grad_norm": 310.0, "learning_rate": 8.026306508103702e-05, "loss": 13.7504, "step": 7540 }, { "epoch": 0.314326205660456, "grad_norm": 247.0, "learning_rate": 8.02576916109061e-05, "loss": 9.0627, "step": 7541 }, { "epoch": 0.31436788795798426, "grad_norm": 612.0, "learning_rate": 8.025231758932608e-05, "loss": 19.1253, "step": 7542 }, { "epoch": 0.31440957025551247, "grad_norm": 412.0, "learning_rate": 8.024694301639489e-05, "loss": 12.7515, "step": 7543 }, { "epoch": 0.31445125255304074, "grad_norm": 1048.0, "learning_rate": 8.024156789221046e-05, "loss": 23.5049, "step": 7544 }, { "epoch": 0.31449293485056895, "grad_norm": 400.0, "learning_rate": 8.023619221687079e-05, "loss": 15.0002, "step": 7545 }, { "epoch": 0.3145346171480972, "grad_norm": 828.0, "learning_rate": 8.023081599047384e-05, "loss": 22.3753, "step": 7546 }, { "epoch": 0.31457629944562543, "grad_norm": 496.0, "learning_rate": 8.022543921311756e-05, "loss": 18.0002, "step": 7547 }, { "epoch": 0.3146179817431537, "grad_norm": 300.0, "learning_rate": 8.022006188489998e-05, "loss": 13.5628, "step": 7548 }, { "epoch": 0.3146596640406819, "grad_norm": 223.0, "learning_rate": 8.021468400591909e-05, "loss": 12.188, "step": 7549 }, { "epoch": 0.3147013463382102, "grad_norm": 56.0, "learning_rate": 8.020930557627288e-05, "loss": 8.1253, "step": 7550 }, { "epoch": 0.3147430286357384, "grad_norm": 221.0, "learning_rate": 8.020392659605942e-05, "loss": 11.4377, "step": 7551 }, { "epoch": 0.31478471093326665, "grad_norm": 354.0, "learning_rate": 8.019854706537672e-05, "loss": 13.3753, "step": 7552 }, { "epoch": 0.31482639323079487, "grad_norm": 213.0, "learning_rate": 8.01931669843228e-05, "loss": 11.5002, "step": 7553 }, { "epoch": 0.31486807552832313, "grad_norm": 155.0, "learning_rate": 8.018778635299574e-05, "loss": 11.7502, "step": 7554 }, { "epoch": 0.31490975782585134, "grad_norm": 124.5, "learning_rate": 8.018240517149359e-05, "loss": 10.3756, "step": 7555 }, { "epoch": 0.3149514401233796, "grad_norm": 62.5, "learning_rate": 8.017702343991444e-05, "loss": 7.8138, "step": 7556 }, { "epoch": 0.3149931224209078, "grad_norm": 308.0, "learning_rate": 8.017164115835634e-05, "loss": 12.9381, "step": 7557 }, { "epoch": 0.3150348047184361, "grad_norm": 192.0, "learning_rate": 8.016625832691741e-05, "loss": 10.1881, "step": 7558 }, { "epoch": 0.3150764870159643, "grad_norm": 784.0, "learning_rate": 8.016087494569575e-05, "loss": 22.6253, "step": 7559 }, { "epoch": 0.31511816931349257, "grad_norm": 2064.0, "learning_rate": 8.015549101478945e-05, "loss": 44.5008, "step": 7560 }, { "epoch": 0.3151598516110208, "grad_norm": 402.0, "learning_rate": 8.015010653429667e-05, "loss": 14.8752, "step": 7561 }, { "epoch": 0.31520153390854905, "grad_norm": 197.0, "learning_rate": 8.014472150431552e-05, "loss": 10.5002, "step": 7562 }, { "epoch": 0.31524321620607726, "grad_norm": 396.0, "learning_rate": 8.013933592494412e-05, "loss": 12.2503, "step": 7563 }, { "epoch": 0.3152848985036055, "grad_norm": 796.0, "learning_rate": 8.013394979628066e-05, "loss": 22.7522, "step": 7564 }, { "epoch": 0.31532658080113374, "grad_norm": 392.0, "learning_rate": 8.01285631184233e-05, "loss": 16.3751, "step": 7565 }, { "epoch": 0.315368263098662, "grad_norm": 242.0, "learning_rate": 8.01231758914702e-05, "loss": 11.6882, "step": 7566 }, { "epoch": 0.3154099453961902, "grad_norm": 450.0, "learning_rate": 8.011778811551953e-05, "loss": 14.8754, "step": 7567 }, { "epoch": 0.3154516276937185, "grad_norm": 416.0, "learning_rate": 8.011239979066952e-05, "loss": 13.9394, "step": 7568 }, { "epoch": 0.3154933099912467, "grad_norm": 55.25, "learning_rate": 8.010701091701833e-05, "loss": 8.1253, "step": 7569 }, { "epoch": 0.31553499228877496, "grad_norm": 312.0, "learning_rate": 8.01016214946642e-05, "loss": 13.4379, "step": 7570 }, { "epoch": 0.3155766745863032, "grad_norm": 482.0, "learning_rate": 8.009623152370536e-05, "loss": 17.0004, "step": 7571 }, { "epoch": 0.31561835688383144, "grad_norm": 720.0, "learning_rate": 8.009084100424003e-05, "loss": 21.2504, "step": 7572 }, { "epoch": 0.31566003918135965, "grad_norm": 444.0, "learning_rate": 8.008544993636642e-05, "loss": 17.1253, "step": 7573 }, { "epoch": 0.3157017214788879, "grad_norm": 544.0, "learning_rate": 8.008005832018284e-05, "loss": 17.5002, "step": 7574 }, { "epoch": 0.31574340377641613, "grad_norm": 438.0, "learning_rate": 8.007466615578752e-05, "loss": 15.0628, "step": 7575 }, { "epoch": 0.3157850860739444, "grad_norm": 231.0, "learning_rate": 8.006927344327874e-05, "loss": 9.5005, "step": 7576 }, { "epoch": 0.3158267683714726, "grad_norm": 382.0, "learning_rate": 8.006388018275477e-05, "loss": 15.6255, "step": 7577 }, { "epoch": 0.3158684506690009, "grad_norm": 472.0, "learning_rate": 8.005848637431395e-05, "loss": 17.2503, "step": 7578 }, { "epoch": 0.3159101329665291, "grad_norm": 506.0, "learning_rate": 8.005309201805451e-05, "loss": 18.3753, "step": 7579 }, { "epoch": 0.31595181526405736, "grad_norm": 156.0, "learning_rate": 8.004769711407481e-05, "loss": 11.1253, "step": 7580 }, { "epoch": 0.31599349756158557, "grad_norm": 223.0, "learning_rate": 8.004230166247318e-05, "loss": 11.1876, "step": 7581 }, { "epoch": 0.31603517985911384, "grad_norm": 346.0, "learning_rate": 8.003690566334792e-05, "loss": 14.7504, "step": 7582 }, { "epoch": 0.31607686215664205, "grad_norm": 124.0, "learning_rate": 8.003150911679739e-05, "loss": 9.7503, "step": 7583 }, { "epoch": 0.3161185444541703, "grad_norm": 370.0, "learning_rate": 8.002611202291993e-05, "loss": 14.5003, "step": 7584 }, { "epoch": 0.3161602267516986, "grad_norm": 294.0, "learning_rate": 8.002071438181393e-05, "loss": 12.1252, "step": 7585 }, { "epoch": 0.3162019090492268, "grad_norm": 632.0, "learning_rate": 8.001531619357773e-05, "loss": 19.5026, "step": 7586 }, { "epoch": 0.31624359134675506, "grad_norm": 406.0, "learning_rate": 8.000991745830975e-05, "loss": 14.5627, "step": 7587 }, { "epoch": 0.31628527364428327, "grad_norm": 1408.0, "learning_rate": 8.000451817610835e-05, "loss": 33.0003, "step": 7588 }, { "epoch": 0.31632695594181154, "grad_norm": 472.0, "learning_rate": 7.999911834707193e-05, "loss": 15.8127, "step": 7589 }, { "epoch": 0.31636863823933975, "grad_norm": 928.0, "learning_rate": 7.999371797129893e-05, "loss": 21.8802, "step": 7590 }, { "epoch": 0.316410320536868, "grad_norm": 288.0, "learning_rate": 7.998831704888775e-05, "loss": 12.2503, "step": 7591 }, { "epoch": 0.31645200283439623, "grad_norm": 324.0, "learning_rate": 7.998291557993683e-05, "loss": 13.3752, "step": 7592 }, { "epoch": 0.3164936851319245, "grad_norm": 268.0, "learning_rate": 7.997751356454462e-05, "loss": 12.4377, "step": 7593 }, { "epoch": 0.3165353674294527, "grad_norm": 326.0, "learning_rate": 7.997211100280955e-05, "loss": 11.1255, "step": 7594 }, { "epoch": 0.316577049726981, "grad_norm": 324.0, "learning_rate": 7.99667078948301e-05, "loss": 14.2502, "step": 7595 }, { "epoch": 0.3166187320245092, "grad_norm": 326.0, "learning_rate": 7.996130424070475e-05, "loss": 13.1252, "step": 7596 }, { "epoch": 0.31666041432203745, "grad_norm": 199.0, "learning_rate": 7.995590004053196e-05, "loss": 10.8134, "step": 7597 }, { "epoch": 0.31670209661956567, "grad_norm": 170.0, "learning_rate": 7.995049529441023e-05, "loss": 11.0008, "step": 7598 }, { "epoch": 0.31674377891709393, "grad_norm": 266.0, "learning_rate": 7.994509000243809e-05, "loss": 11.7504, "step": 7599 }, { "epoch": 0.31678546121462214, "grad_norm": 253.0, "learning_rate": 7.993968416471399e-05, "loss": 11.6877, "step": 7600 }, { "epoch": 0.3168271435121504, "grad_norm": 290.0, "learning_rate": 7.99342777813365e-05, "loss": 13.6254, "step": 7601 }, { "epoch": 0.3168688258096786, "grad_norm": 219.0, "learning_rate": 7.992887085240414e-05, "loss": 10.6879, "step": 7602 }, { "epoch": 0.3169105081072069, "grad_norm": 330.0, "learning_rate": 7.992346337801546e-05, "loss": 13.3757, "step": 7603 }, { "epoch": 0.3169521904047351, "grad_norm": 312.0, "learning_rate": 7.991805535826901e-05, "loss": 14.3128, "step": 7604 }, { "epoch": 0.31699387270226337, "grad_norm": 564.0, "learning_rate": 7.991264679326333e-05, "loss": 17.7505, "step": 7605 }, { "epoch": 0.3170355549997916, "grad_norm": 156.0, "learning_rate": 7.990723768309702e-05, "loss": 10.4384, "step": 7606 }, { "epoch": 0.31707723729731985, "grad_norm": 328.0, "learning_rate": 7.990182802786864e-05, "loss": 13.8128, "step": 7607 }, { "epoch": 0.31711891959484806, "grad_norm": 584.0, "learning_rate": 7.989641782767679e-05, "loss": 18.3751, "step": 7608 }, { "epoch": 0.3171606018923763, "grad_norm": 524.0, "learning_rate": 7.989100708262008e-05, "loss": 15.6877, "step": 7609 }, { "epoch": 0.31720228418990454, "grad_norm": 97.0, "learning_rate": 7.988559579279708e-05, "loss": 8.9377, "step": 7610 }, { "epoch": 0.3172439664874328, "grad_norm": 208.0, "learning_rate": 7.988018395830647e-05, "loss": 11.1882, "step": 7611 }, { "epoch": 0.317285648784961, "grad_norm": 262.0, "learning_rate": 7.987477157924685e-05, "loss": 13.1253, "step": 7612 }, { "epoch": 0.3173273310824893, "grad_norm": 182.0, "learning_rate": 7.986935865571688e-05, "loss": 10.0629, "step": 7613 }, { "epoch": 0.3173690133800175, "grad_norm": 170.0, "learning_rate": 7.986394518781519e-05, "loss": 9.8753, "step": 7614 }, { "epoch": 0.31741069567754576, "grad_norm": 188.0, "learning_rate": 7.985853117564044e-05, "loss": 11.3756, "step": 7615 }, { "epoch": 0.317452377975074, "grad_norm": 268.0, "learning_rate": 7.985311661929131e-05, "loss": 10.7514, "step": 7616 }, { "epoch": 0.31749406027260224, "grad_norm": 380.0, "learning_rate": 7.984770151886647e-05, "loss": 13.0002, "step": 7617 }, { "epoch": 0.31753574257013045, "grad_norm": 165.0, "learning_rate": 7.984228587446463e-05, "loss": 5.2816, "step": 7618 }, { "epoch": 0.3175774248676587, "grad_norm": 988.0, "learning_rate": 7.983686968618449e-05, "loss": 21.0048, "step": 7619 }, { "epoch": 0.31761910716518693, "grad_norm": 67.0, "learning_rate": 7.983145295412474e-05, "loss": 6.8751, "step": 7620 }, { "epoch": 0.3176607894627152, "grad_norm": 458.0, "learning_rate": 7.982603567838412e-05, "loss": 17.3752, "step": 7621 }, { "epoch": 0.3177024717602434, "grad_norm": 280.0, "learning_rate": 7.982061785906134e-05, "loss": 12.1877, "step": 7622 }, { "epoch": 0.3177441540577717, "grad_norm": 306.0, "learning_rate": 7.981519949625515e-05, "loss": 10.6885, "step": 7623 }, { "epoch": 0.3177858363552999, "grad_norm": 430.0, "learning_rate": 7.980978059006431e-05, "loss": 14.7503, "step": 7624 }, { "epoch": 0.31782751865282816, "grad_norm": 324.0, "learning_rate": 7.980436114058758e-05, "loss": 12.6879, "step": 7625 }, { "epoch": 0.31786920095035637, "grad_norm": 268.0, "learning_rate": 7.979894114792372e-05, "loss": 11.5627, "step": 7626 }, { "epoch": 0.31791088324788463, "grad_norm": 1272.0, "learning_rate": 7.979352061217151e-05, "loss": 35.5001, "step": 7627 }, { "epoch": 0.31795256554541285, "grad_norm": 1120.0, "learning_rate": 7.978809953342973e-05, "loss": 29.5001, "step": 7628 }, { "epoch": 0.3179942478429411, "grad_norm": 352.0, "learning_rate": 7.978267791179722e-05, "loss": 14.3127, "step": 7629 }, { "epoch": 0.3180359301404693, "grad_norm": 520.0, "learning_rate": 7.977725574737273e-05, "loss": 17.2537, "step": 7630 }, { "epoch": 0.3180776124379976, "grad_norm": 704.0, "learning_rate": 7.977183304025512e-05, "loss": 21.1253, "step": 7631 }, { "epoch": 0.3181192947355258, "grad_norm": 255.0, "learning_rate": 7.976640979054322e-05, "loss": 12.4379, "step": 7632 }, { "epoch": 0.31816097703305407, "grad_norm": 688.0, "learning_rate": 7.976098599833586e-05, "loss": 19.8769, "step": 7633 }, { "epoch": 0.3182026593305823, "grad_norm": 121.0, "learning_rate": 7.975556166373188e-05, "loss": 9.5627, "step": 7634 }, { "epoch": 0.31824434162811055, "grad_norm": 350.0, "learning_rate": 7.975013678683014e-05, "loss": 13.9378, "step": 7635 }, { "epoch": 0.31828602392563876, "grad_norm": 430.0, "learning_rate": 7.974471136772953e-05, "loss": 15.5646, "step": 7636 }, { "epoch": 0.31832770622316703, "grad_norm": 516.0, "learning_rate": 7.973928540652891e-05, "loss": 16.5025, "step": 7637 }, { "epoch": 0.31836938852069524, "grad_norm": 171.0, "learning_rate": 7.973385890332717e-05, "loss": 8.9384, "step": 7638 }, { "epoch": 0.3184110708182235, "grad_norm": 494.0, "learning_rate": 7.972843185822322e-05, "loss": 18.0002, "step": 7639 }, { "epoch": 0.3184527531157517, "grad_norm": 366.0, "learning_rate": 7.972300427131596e-05, "loss": 14.0629, "step": 7640 }, { "epoch": 0.31849443541328, "grad_norm": 964.0, "learning_rate": 7.97175761427043e-05, "loss": 24.1295, "step": 7641 }, { "epoch": 0.3185361177108082, "grad_norm": 580.0, "learning_rate": 7.971214747248717e-05, "loss": 18.0004, "step": 7642 }, { "epoch": 0.31857780000833646, "grad_norm": 314.0, "learning_rate": 7.970671826076353e-05, "loss": 14.0628, "step": 7643 }, { "epoch": 0.3186194823058647, "grad_norm": 584.0, "learning_rate": 7.97012885076323e-05, "loss": 20.0002, "step": 7644 }, { "epoch": 0.31866116460339294, "grad_norm": 90.0, "learning_rate": 7.969585821319246e-05, "loss": 8.0004, "step": 7645 }, { "epoch": 0.31870284690092116, "grad_norm": 328.0, "learning_rate": 7.969042737754297e-05, "loss": 13.7503, "step": 7646 }, { "epoch": 0.3187445291984494, "grad_norm": 356.0, "learning_rate": 7.96849960007828e-05, "loss": 15.4378, "step": 7647 }, { "epoch": 0.31878621149597763, "grad_norm": 640.0, "learning_rate": 7.967956408301095e-05, "loss": 19.0002, "step": 7648 }, { "epoch": 0.3188278937935059, "grad_norm": 576.0, "learning_rate": 7.96741316243264e-05, "loss": 16.5005, "step": 7649 }, { "epoch": 0.3188695760910341, "grad_norm": 876.0, "learning_rate": 7.966869862482818e-05, "loss": 24.379, "step": 7650 }, { "epoch": 0.3189112583885624, "grad_norm": 430.0, "learning_rate": 7.966326508461528e-05, "loss": 15.1253, "step": 7651 }, { "epoch": 0.3189529406860906, "grad_norm": 296.0, "learning_rate": 7.965783100378674e-05, "loss": 10.9378, "step": 7652 }, { "epoch": 0.31899462298361886, "grad_norm": 162.0, "learning_rate": 7.96523963824416e-05, "loss": 10.1255, "step": 7653 }, { "epoch": 0.31903630528114707, "grad_norm": 143.0, "learning_rate": 7.96469612206789e-05, "loss": 8.7502, "step": 7654 }, { "epoch": 0.31907798757867534, "grad_norm": 450.0, "learning_rate": 7.964152551859772e-05, "loss": 15.3155, "step": 7655 }, { "epoch": 0.31911966987620355, "grad_norm": 52.75, "learning_rate": 7.963608927629708e-05, "loss": 7.7503, "step": 7656 }, { "epoch": 0.3191613521737318, "grad_norm": 504.0, "learning_rate": 7.963065249387609e-05, "loss": 18.0011, "step": 7657 }, { "epoch": 0.3192030344712601, "grad_norm": 712.0, "learning_rate": 7.962521517143384e-05, "loss": 21.3755, "step": 7658 }, { "epoch": 0.3192447167687883, "grad_norm": 446.0, "learning_rate": 7.96197773090694e-05, "loss": 16.8754, "step": 7659 }, { "epoch": 0.31928639906631656, "grad_norm": 302.0, "learning_rate": 7.96143389068819e-05, "loss": 13.6263, "step": 7660 }, { "epoch": 0.3193280813638448, "grad_norm": 320.0, "learning_rate": 7.960889996497043e-05, "loss": 14.4377, "step": 7661 }, { "epoch": 0.31936976366137304, "grad_norm": 684.0, "learning_rate": 7.960346048343414e-05, "loss": 19.0023, "step": 7662 }, { "epoch": 0.31941144595890125, "grad_norm": 217.0, "learning_rate": 7.959802046237215e-05, "loss": 12.0628, "step": 7663 }, { "epoch": 0.3194531282564295, "grad_norm": 510.0, "learning_rate": 7.959257990188363e-05, "loss": 17.1253, "step": 7664 }, { "epoch": 0.31949481055395773, "grad_norm": 149.0, "learning_rate": 7.958713880206768e-05, "loss": 9.8127, "step": 7665 }, { "epoch": 0.319536492851486, "grad_norm": 394.0, "learning_rate": 7.958169716302353e-05, "loss": 15.4378, "step": 7666 }, { "epoch": 0.3195781751490142, "grad_norm": 214.0, "learning_rate": 7.957625498485029e-05, "loss": 12.1879, "step": 7667 }, { "epoch": 0.3196198574465425, "grad_norm": 70.0, "learning_rate": 7.95708122676472e-05, "loss": 9.126, "step": 7668 }, { "epoch": 0.3196615397440707, "grad_norm": 348.0, "learning_rate": 7.956536901151343e-05, "loss": 14.3132, "step": 7669 }, { "epoch": 0.31970322204159896, "grad_norm": 304.0, "learning_rate": 7.955992521654818e-05, "loss": 13.6254, "step": 7670 }, { "epoch": 0.31974490433912717, "grad_norm": 812.0, "learning_rate": 7.955448088285067e-05, "loss": 21.2553, "step": 7671 }, { "epoch": 0.31978658663665543, "grad_norm": 1200.0, "learning_rate": 7.954903601052013e-05, "loss": 27.2555, "step": 7672 }, { "epoch": 0.31982826893418365, "grad_norm": 280.0, "learning_rate": 7.954359059965578e-05, "loss": 13.2504, "step": 7673 }, { "epoch": 0.3198699512317119, "grad_norm": 330.0, "learning_rate": 7.953814465035687e-05, "loss": 13.1879, "step": 7674 }, { "epoch": 0.3199116335292401, "grad_norm": 226.0, "learning_rate": 7.953269816272265e-05, "loss": 11.563, "step": 7675 }, { "epoch": 0.3199533158267684, "grad_norm": 2160.0, "learning_rate": 7.952725113685238e-05, "loss": 43.0003, "step": 7676 }, { "epoch": 0.3199949981242966, "grad_norm": 412.0, "learning_rate": 7.952180357284534e-05, "loss": 15.0014, "step": 7677 }, { "epoch": 0.32003668042182487, "grad_norm": 134.0, "learning_rate": 7.951635547080081e-05, "loss": 9.5627, "step": 7678 }, { "epoch": 0.3200783627193531, "grad_norm": 352.0, "learning_rate": 7.951090683081808e-05, "loss": 14.7506, "step": 7679 }, { "epoch": 0.32012004501688135, "grad_norm": 390.0, "learning_rate": 7.950545765299645e-05, "loss": 13.6906, "step": 7680 }, { "epoch": 0.32016172731440956, "grad_norm": 346.0, "learning_rate": 7.950000793743524e-05, "loss": 13.688, "step": 7681 }, { "epoch": 0.32020340961193783, "grad_norm": 211.0, "learning_rate": 7.949455768423378e-05, "loss": 11.0629, "step": 7682 }, { "epoch": 0.32024509190946604, "grad_norm": 892.0, "learning_rate": 7.948910689349136e-05, "loss": 23.6275, "step": 7683 }, { "epoch": 0.3202867742069943, "grad_norm": 410.0, "learning_rate": 7.948365556530737e-05, "loss": 14.6266, "step": 7684 }, { "epoch": 0.3203284565045225, "grad_norm": 246.0, "learning_rate": 7.947820369978112e-05, "loss": 9.813, "step": 7685 }, { "epoch": 0.3203701388020508, "grad_norm": 948.0, "learning_rate": 7.947275129701202e-05, "loss": 25.1256, "step": 7686 }, { "epoch": 0.320411821099579, "grad_norm": 424.0, "learning_rate": 7.946729835709938e-05, "loss": 14.5003, "step": 7687 }, { "epoch": 0.32045350339710726, "grad_norm": 226.0, "learning_rate": 7.946184488014263e-05, "loss": 10.2529, "step": 7688 }, { "epoch": 0.3204951856946355, "grad_norm": 139.0, "learning_rate": 7.945639086624115e-05, "loss": 10.1253, "step": 7689 }, { "epoch": 0.32053686799216374, "grad_norm": 474.0, "learning_rate": 7.945093631549431e-05, "loss": 17.5004, "step": 7690 }, { "epoch": 0.32057855028969195, "grad_norm": 195.0, "learning_rate": 7.944548122800155e-05, "loss": 9.813, "step": 7691 }, { "epoch": 0.3206202325872202, "grad_norm": 270.0, "learning_rate": 7.944002560386228e-05, "loss": 14.4378, "step": 7692 }, { "epoch": 0.32066191488474843, "grad_norm": 124.0, "learning_rate": 7.943456944317593e-05, "loss": 9.0005, "step": 7693 }, { "epoch": 0.3207035971822767, "grad_norm": 145.0, "learning_rate": 7.942911274604194e-05, "loss": 10.001, "step": 7694 }, { "epoch": 0.3207452794798049, "grad_norm": 460.0, "learning_rate": 7.942365551255978e-05, "loss": 17.0013, "step": 7695 }, { "epoch": 0.3207869617773332, "grad_norm": 364.0, "learning_rate": 7.941819774282884e-05, "loss": 13.6253, "step": 7696 }, { "epoch": 0.3208286440748614, "grad_norm": 228.0, "learning_rate": 7.941273943694867e-05, "loss": 12.5003, "step": 7697 }, { "epoch": 0.32087032637238966, "grad_norm": 151.0, "learning_rate": 7.940728059501869e-05, "loss": 9.5004, "step": 7698 }, { "epoch": 0.32091200866991787, "grad_norm": 440.0, "learning_rate": 7.940182121713843e-05, "loss": 15.1272, "step": 7699 }, { "epoch": 0.32095369096744614, "grad_norm": 354.0, "learning_rate": 7.939636130340736e-05, "loss": 15.4377, "step": 7700 }, { "epoch": 0.32099537326497435, "grad_norm": 1456.0, "learning_rate": 7.9390900853925e-05, "loss": 35.2512, "step": 7701 }, { "epoch": 0.3210370555625026, "grad_norm": 472.0, "learning_rate": 7.938543986879086e-05, "loss": 16.5002, "step": 7702 }, { "epoch": 0.3210787378600308, "grad_norm": 239.0, "learning_rate": 7.937997834810446e-05, "loss": 11.8754, "step": 7703 }, { "epoch": 0.3211204201575591, "grad_norm": 106.5, "learning_rate": 7.937451629196536e-05, "loss": 5.938, "step": 7704 }, { "epoch": 0.3211621024550873, "grad_norm": 107.5, "learning_rate": 7.936905370047308e-05, "loss": 8.5002, "step": 7705 }, { "epoch": 0.3212037847526156, "grad_norm": 181.0, "learning_rate": 7.93635905737272e-05, "loss": 11.3127, "step": 7706 }, { "epoch": 0.3212454670501438, "grad_norm": 572.0, "learning_rate": 7.935812691182727e-05, "loss": 19.2503, "step": 7707 }, { "epoch": 0.32128714934767205, "grad_norm": 548.0, "learning_rate": 7.935266271487287e-05, "loss": 18.6274, "step": 7708 }, { "epoch": 0.32132883164520026, "grad_norm": 428.0, "learning_rate": 7.93471979829636e-05, "loss": 16.6259, "step": 7709 }, { "epoch": 0.32137051394272853, "grad_norm": 308.0, "learning_rate": 7.934173271619902e-05, "loss": 13.8759, "step": 7710 }, { "epoch": 0.32141219624025674, "grad_norm": 416.0, "learning_rate": 7.933626691467877e-05, "loss": 16.3757, "step": 7711 }, { "epoch": 0.321453878537785, "grad_norm": 656.0, "learning_rate": 7.933080057850245e-05, "loss": 20.2504, "step": 7712 }, { "epoch": 0.3214955608353132, "grad_norm": 382.0, "learning_rate": 7.932533370776969e-05, "loss": 13.8767, "step": 7713 }, { "epoch": 0.3215372431328415, "grad_norm": 556.0, "learning_rate": 7.931986630258012e-05, "loss": 20.8754, "step": 7714 }, { "epoch": 0.3215789254303697, "grad_norm": 1048.0, "learning_rate": 7.931439836303338e-05, "loss": 24.2551, "step": 7715 }, { "epoch": 0.32162060772789797, "grad_norm": 288.0, "learning_rate": 7.930892988922911e-05, "loss": 12.8753, "step": 7716 }, { "epoch": 0.3216622900254262, "grad_norm": 418.0, "learning_rate": 7.930346088126701e-05, "loss": 15.1252, "step": 7717 }, { "epoch": 0.32170397232295445, "grad_norm": 444.0, "learning_rate": 7.929799133924673e-05, "loss": 16.7503, "step": 7718 }, { "epoch": 0.32174565462048266, "grad_norm": 306.0, "learning_rate": 7.929252126326795e-05, "loss": 11.5006, "step": 7719 }, { "epoch": 0.3217873369180109, "grad_norm": 428.0, "learning_rate": 7.928705065343039e-05, "loss": 13.6259, "step": 7720 }, { "epoch": 0.32182901921553914, "grad_norm": 125.0, "learning_rate": 7.928157950983372e-05, "loss": 7.563, "step": 7721 }, { "epoch": 0.3218707015130674, "grad_norm": 358.0, "learning_rate": 7.927610783257766e-05, "loss": 13.3127, "step": 7722 }, { "epoch": 0.3219123838105956, "grad_norm": 172.0, "learning_rate": 7.927063562176193e-05, "loss": 7.8754, "step": 7723 }, { "epoch": 0.3219540661081239, "grad_norm": 668.0, "learning_rate": 7.926516287748629e-05, "loss": 20.3752, "step": 7724 }, { "epoch": 0.3219957484056521, "grad_norm": 692.0, "learning_rate": 7.925968959985044e-05, "loss": 21.1281, "step": 7725 }, { "epoch": 0.32203743070318036, "grad_norm": 245.0, "learning_rate": 7.925421578895415e-05, "loss": 11.2506, "step": 7726 }, { "epoch": 0.3220791130007086, "grad_norm": 420.0, "learning_rate": 7.924874144489719e-05, "loss": 14.8754, "step": 7727 }, { "epoch": 0.32212079529823684, "grad_norm": 154.0, "learning_rate": 7.924326656777931e-05, "loss": 10.2502, "step": 7728 }, { "epoch": 0.32216247759576505, "grad_norm": 812.0, "learning_rate": 7.923779115770032e-05, "loss": 21.3778, "step": 7729 }, { "epoch": 0.3222041598932933, "grad_norm": 252.0, "learning_rate": 7.923231521475996e-05, "loss": 10.9378, "step": 7730 }, { "epoch": 0.3222458421908216, "grad_norm": 592.0, "learning_rate": 7.922683873905808e-05, "loss": 18.6252, "step": 7731 }, { "epoch": 0.3222875244883498, "grad_norm": 456.0, "learning_rate": 7.922136173069448e-05, "loss": 15.2504, "step": 7732 }, { "epoch": 0.32232920678587806, "grad_norm": 173.0, "learning_rate": 7.921588418976895e-05, "loss": 11.6258, "step": 7733 }, { "epoch": 0.3223708890834063, "grad_norm": 356.0, "learning_rate": 7.921040611638134e-05, "loss": 13.7503, "step": 7734 }, { "epoch": 0.32241257138093454, "grad_norm": 86.0, "learning_rate": 7.920492751063149e-05, "loss": 7.8755, "step": 7735 }, { "epoch": 0.32245425367846275, "grad_norm": 596.0, "learning_rate": 7.919944837261924e-05, "loss": 15.0661, "step": 7736 }, { "epoch": 0.322495935975991, "grad_norm": 179.0, "learning_rate": 7.919396870244444e-05, "loss": 10.438, "step": 7737 }, { "epoch": 0.32253761827351923, "grad_norm": 768.0, "learning_rate": 7.918848850020699e-05, "loss": 22.5006, "step": 7738 }, { "epoch": 0.3225793005710475, "grad_norm": 410.0, "learning_rate": 7.918300776600673e-05, "loss": 14.8133, "step": 7739 }, { "epoch": 0.3226209828685757, "grad_norm": 700.0, "learning_rate": 7.917752649994358e-05, "loss": 19.3752, "step": 7740 }, { "epoch": 0.322662665166104, "grad_norm": 181.0, "learning_rate": 7.917204470211741e-05, "loss": 11.0002, "step": 7741 }, { "epoch": 0.3227043474636322, "grad_norm": 376.0, "learning_rate": 7.916656237262814e-05, "loss": 14.3757, "step": 7742 }, { "epoch": 0.32274602976116046, "grad_norm": 424.0, "learning_rate": 7.916107951157566e-05, "loss": 14.3752, "step": 7743 }, { "epoch": 0.32278771205868867, "grad_norm": 268.0, "learning_rate": 7.915559611905994e-05, "loss": 9.2506, "step": 7744 }, { "epoch": 0.32282939435621694, "grad_norm": 588.0, "learning_rate": 7.915011219518089e-05, "loss": 16.0026, "step": 7745 }, { "epoch": 0.32287107665374515, "grad_norm": 788.0, "learning_rate": 7.914462774003846e-05, "loss": 23.376, "step": 7746 }, { "epoch": 0.3229127589512734, "grad_norm": 360.0, "learning_rate": 7.913914275373258e-05, "loss": 15.2504, "step": 7747 }, { "epoch": 0.3229544412488016, "grad_norm": 1256.0, "learning_rate": 7.913365723636326e-05, "loss": 26.7505, "step": 7748 }, { "epoch": 0.3229961235463299, "grad_norm": 528.0, "learning_rate": 7.912817118803044e-05, "loss": 14.3147, "step": 7749 }, { "epoch": 0.3230378058438581, "grad_norm": 420.0, "learning_rate": 7.912268460883412e-05, "loss": 16.2503, "step": 7750 }, { "epoch": 0.3230794881413864, "grad_norm": 302.0, "learning_rate": 7.911719749887428e-05, "loss": 7.5653, "step": 7751 }, { "epoch": 0.3231211704389146, "grad_norm": 310.0, "learning_rate": 7.911170985825094e-05, "loss": 13.2512, "step": 7752 }, { "epoch": 0.32316285273644285, "grad_norm": 624.0, "learning_rate": 7.91062216870641e-05, "loss": 17.3752, "step": 7753 }, { "epoch": 0.32320453503397106, "grad_norm": 1064.0, "learning_rate": 7.910073298541378e-05, "loss": 26.7502, "step": 7754 }, { "epoch": 0.32324621733149933, "grad_norm": 1416.0, "learning_rate": 7.909524375340003e-05, "loss": 30.2504, "step": 7755 }, { "epoch": 0.32328789962902754, "grad_norm": 442.0, "learning_rate": 7.908975399112286e-05, "loss": 15.9381, "step": 7756 }, { "epoch": 0.3233295819265558, "grad_norm": 270.0, "learning_rate": 7.908426369868236e-05, "loss": 12.5003, "step": 7757 }, { "epoch": 0.323371264224084, "grad_norm": 482.0, "learning_rate": 7.907877287617857e-05, "loss": 16.6275, "step": 7758 }, { "epoch": 0.3234129465216123, "grad_norm": 444.0, "learning_rate": 7.907328152371156e-05, "loss": 15.938, "step": 7759 }, { "epoch": 0.3234546288191405, "grad_norm": 250.0, "learning_rate": 7.906778964138142e-05, "loss": 11.4411, "step": 7760 }, { "epoch": 0.32349631111666877, "grad_norm": 191.0, "learning_rate": 7.906229722928822e-05, "loss": 9.1896, "step": 7761 }, { "epoch": 0.323537993414197, "grad_norm": 398.0, "learning_rate": 7.905680428753207e-05, "loss": 15.5002, "step": 7762 }, { "epoch": 0.32357967571172525, "grad_norm": 398.0, "learning_rate": 7.90513108162131e-05, "loss": 14.7502, "step": 7763 }, { "epoch": 0.32362135800925346, "grad_norm": 354.0, "learning_rate": 7.90458168154314e-05, "loss": 13.6252, "step": 7764 }, { "epoch": 0.3236630403067817, "grad_norm": 680.0, "learning_rate": 7.904032228528711e-05, "loss": 21.0001, "step": 7765 }, { "epoch": 0.32370472260430994, "grad_norm": 1080.0, "learning_rate": 7.903482722588038e-05, "loss": 30.1257, "step": 7766 }, { "epoch": 0.3237464049018382, "grad_norm": 174.0, "learning_rate": 7.902933163731133e-05, "loss": 10.2507, "step": 7767 }, { "epoch": 0.3237880871993664, "grad_norm": 768.0, "learning_rate": 7.902383551968013e-05, "loss": 21.7502, "step": 7768 }, { "epoch": 0.3238297694968947, "grad_norm": 216.0, "learning_rate": 7.901833887308698e-05, "loss": 10.9377, "step": 7769 }, { "epoch": 0.3238714517944229, "grad_norm": 63.0, "learning_rate": 7.901284169763201e-05, "loss": 7.6878, "step": 7770 }, { "epoch": 0.32391313409195116, "grad_norm": 434.0, "learning_rate": 7.900734399341543e-05, "loss": 15.1255, "step": 7771 }, { "epoch": 0.32395481638947937, "grad_norm": 800.0, "learning_rate": 7.900184576053742e-05, "loss": 20.7502, "step": 7772 }, { "epoch": 0.32399649868700764, "grad_norm": 458.0, "learning_rate": 7.89963469990982e-05, "loss": 16.5006, "step": 7773 }, { "epoch": 0.32403818098453585, "grad_norm": 720.0, "learning_rate": 7.899084770919798e-05, "loss": 19.2539, "step": 7774 }, { "epoch": 0.3240798632820641, "grad_norm": 310.0, "learning_rate": 7.8985347890937e-05, "loss": 12.3754, "step": 7775 }, { "epoch": 0.32412154557959233, "grad_norm": 326.0, "learning_rate": 7.897984754441546e-05, "loss": 11.5629, "step": 7776 }, { "epoch": 0.3241632278771206, "grad_norm": 235.0, "learning_rate": 7.897434666973364e-05, "loss": 10.8133, "step": 7777 }, { "epoch": 0.3242049101746488, "grad_norm": 680.0, "learning_rate": 7.896884526699177e-05, "loss": 16.8756, "step": 7778 }, { "epoch": 0.3242465924721771, "grad_norm": 400.0, "learning_rate": 7.896334333629014e-05, "loss": 14.2506, "step": 7779 }, { "epoch": 0.3242882747697053, "grad_norm": 1400.0, "learning_rate": 7.895784087772899e-05, "loss": 30.2541, "step": 7780 }, { "epoch": 0.32432995706723355, "grad_norm": 960.0, "learning_rate": 7.895233789140863e-05, "loss": 22.2555, "step": 7781 }, { "epoch": 0.32437163936476177, "grad_norm": 356.0, "learning_rate": 7.894683437742934e-05, "loss": 12.6885, "step": 7782 }, { "epoch": 0.32441332166229003, "grad_norm": 436.0, "learning_rate": 7.894133033589143e-05, "loss": 16.8753, "step": 7783 }, { "epoch": 0.32445500395981824, "grad_norm": 720.0, "learning_rate": 7.89358257668952e-05, "loss": 22.1252, "step": 7784 }, { "epoch": 0.3244966862573465, "grad_norm": 264.0, "learning_rate": 7.893032067054097e-05, "loss": 12.1882, "step": 7785 }, { "epoch": 0.3245383685548747, "grad_norm": 239.0, "learning_rate": 7.89248150469291e-05, "loss": 11.6888, "step": 7786 }, { "epoch": 0.324580050852403, "grad_norm": 236.0, "learning_rate": 7.891930889615988e-05, "loss": 12.8752, "step": 7787 }, { "epoch": 0.3246217331499312, "grad_norm": 346.0, "learning_rate": 7.891380221833368e-05, "loss": 11.3135, "step": 7788 }, { "epoch": 0.32466341544745947, "grad_norm": 404.0, "learning_rate": 7.89082950135509e-05, "loss": 14.7508, "step": 7789 }, { "epoch": 0.3247050977449877, "grad_norm": 338.0, "learning_rate": 7.890278728191187e-05, "loss": 14.5003, "step": 7790 }, { "epoch": 0.32474678004251595, "grad_norm": 322.0, "learning_rate": 7.889727902351697e-05, "loss": 9.8129, "step": 7791 }, { "epoch": 0.32478846234004416, "grad_norm": 270.0, "learning_rate": 7.88917702384666e-05, "loss": 12.9384, "step": 7792 }, { "epoch": 0.3248301446375724, "grad_norm": 1704.0, "learning_rate": 7.888626092686113e-05, "loss": 31.5048, "step": 7793 }, { "epoch": 0.32487182693510064, "grad_norm": 184.0, "learning_rate": 7.888075108880102e-05, "loss": 10.438, "step": 7794 }, { "epoch": 0.3249135092326289, "grad_norm": 424.0, "learning_rate": 7.887524072438664e-05, "loss": 16.2515, "step": 7795 }, { "epoch": 0.3249551915301571, "grad_norm": 364.0, "learning_rate": 7.886972983371844e-05, "loss": 15.1878, "step": 7796 }, { "epoch": 0.3249968738276854, "grad_norm": 276.0, "learning_rate": 7.886421841689686e-05, "loss": 13.3756, "step": 7797 }, { "epoch": 0.3250385561252136, "grad_norm": 338.0, "learning_rate": 7.885870647402232e-05, "loss": 14.6253, "step": 7798 }, { "epoch": 0.32508023842274186, "grad_norm": 368.0, "learning_rate": 7.88531940051953e-05, "loss": 12.0639, "step": 7799 }, { "epoch": 0.3251219207202701, "grad_norm": 262.0, "learning_rate": 7.884768101051625e-05, "loss": 11.8128, "step": 7800 }, { "epoch": 0.32516360301779834, "grad_norm": 332.0, "learning_rate": 7.884216749008566e-05, "loss": 13.0629, "step": 7801 }, { "epoch": 0.32520528531532655, "grad_norm": 672.0, "learning_rate": 7.883665344400401e-05, "loss": 21.3755, "step": 7802 }, { "epoch": 0.3252469676128548, "grad_norm": 366.0, "learning_rate": 7.883113887237179e-05, "loss": 14.9377, "step": 7803 }, { "epoch": 0.3252886499103831, "grad_norm": 536.0, "learning_rate": 7.882562377528951e-05, "loss": 19.1253, "step": 7804 }, { "epoch": 0.3253303322079113, "grad_norm": 166.0, "learning_rate": 7.882010815285766e-05, "loss": 10.0005, "step": 7805 }, { "epoch": 0.32537201450543957, "grad_norm": 462.0, "learning_rate": 7.88145920051768e-05, "loss": 15.3751, "step": 7806 }, { "epoch": 0.3254136968029678, "grad_norm": 83.0, "learning_rate": 7.880907533234743e-05, "loss": 5.9692, "step": 7807 }, { "epoch": 0.32545537910049605, "grad_norm": 564.0, "learning_rate": 7.880355813447012e-05, "loss": 19.1252, "step": 7808 }, { "epoch": 0.32549706139802426, "grad_norm": 748.0, "learning_rate": 7.879804041164538e-05, "loss": 22.3753, "step": 7809 }, { "epoch": 0.3255387436955525, "grad_norm": 356.0, "learning_rate": 7.879252216397382e-05, "loss": 13.9377, "step": 7810 }, { "epoch": 0.32558042599308074, "grad_norm": 91.5, "learning_rate": 7.878700339155597e-05, "loss": 9.7503, "step": 7811 }, { "epoch": 0.325622108290609, "grad_norm": 360.0, "learning_rate": 7.878148409449244e-05, "loss": 16.0003, "step": 7812 }, { "epoch": 0.3256637905881372, "grad_norm": 436.0, "learning_rate": 7.877596427288381e-05, "loss": 16.5007, "step": 7813 }, { "epoch": 0.3257054728856655, "grad_norm": 492.0, "learning_rate": 7.877044392683066e-05, "loss": 17.6253, "step": 7814 }, { "epoch": 0.3257471551831937, "grad_norm": 90.5, "learning_rate": 7.876492305643364e-05, "loss": 6.2504, "step": 7815 }, { "epoch": 0.32578883748072196, "grad_norm": 156.0, "learning_rate": 7.875940166179333e-05, "loss": 11.0632, "step": 7816 }, { "epoch": 0.32583051977825017, "grad_norm": 156.0, "learning_rate": 7.875387974301036e-05, "loss": 9.127, "step": 7817 }, { "epoch": 0.32587220207577844, "grad_norm": 75.0, "learning_rate": 7.87483573001854e-05, "loss": 8.5632, "step": 7818 }, { "epoch": 0.32591388437330665, "grad_norm": 212.0, "learning_rate": 7.874283433341907e-05, "loss": 9.1269, "step": 7819 }, { "epoch": 0.3259555666708349, "grad_norm": 226.0, "learning_rate": 7.873731084281202e-05, "loss": 10.7504, "step": 7820 }, { "epoch": 0.32599724896836313, "grad_norm": 1216.0, "learning_rate": 7.873178682846493e-05, "loss": 27.2509, "step": 7821 }, { "epoch": 0.3260389312658914, "grad_norm": 264.0, "learning_rate": 7.87262622904785e-05, "loss": 11.125, "step": 7822 }, { "epoch": 0.3260806135634196, "grad_norm": 117.5, "learning_rate": 7.872073722895337e-05, "loss": 10.2503, "step": 7823 }, { "epoch": 0.3261222958609479, "grad_norm": 382.0, "learning_rate": 7.871521164399025e-05, "loss": 14.5005, "step": 7824 }, { "epoch": 0.3261639781584761, "grad_norm": 384.0, "learning_rate": 7.870968553568986e-05, "loss": 15.7503, "step": 7825 }, { "epoch": 0.32620566045600435, "grad_norm": 664.0, "learning_rate": 7.870415890415291e-05, "loss": 21.3755, "step": 7826 }, { "epoch": 0.32624734275353257, "grad_norm": 197.0, "learning_rate": 7.869863174948009e-05, "loss": 12.2503, "step": 7827 }, { "epoch": 0.32628902505106083, "grad_norm": 224.0, "learning_rate": 7.869310407177217e-05, "loss": 10.0643, "step": 7828 }, { "epoch": 0.32633070734858904, "grad_norm": 580.0, "learning_rate": 7.868757587112989e-05, "loss": 16.3786, "step": 7829 }, { "epoch": 0.3263723896461173, "grad_norm": 320.0, "learning_rate": 7.868204714765399e-05, "loss": 13.8752, "step": 7830 }, { "epoch": 0.3264140719436455, "grad_norm": 264.0, "learning_rate": 7.867651790144523e-05, "loss": 12.251, "step": 7831 }, { "epoch": 0.3264557542411738, "grad_norm": 241.0, "learning_rate": 7.867098813260439e-05, "loss": 13.0008, "step": 7832 }, { "epoch": 0.326497436538702, "grad_norm": 1552.0, "learning_rate": 7.866545784123223e-05, "loss": 31.7545, "step": 7833 }, { "epoch": 0.32653911883623027, "grad_norm": 270.0, "learning_rate": 7.865992702742959e-05, "loss": 12.3128, "step": 7834 }, { "epoch": 0.3265808011337585, "grad_norm": 247.0, "learning_rate": 7.86543956912972e-05, "loss": 11.7511, "step": 7835 }, { "epoch": 0.32662248343128675, "grad_norm": 440.0, "learning_rate": 7.864886383293592e-05, "loss": 15.938, "step": 7836 }, { "epoch": 0.32666416572881496, "grad_norm": 532.0, "learning_rate": 7.864333145244656e-05, "loss": 16.8783, "step": 7837 }, { "epoch": 0.3267058480263432, "grad_norm": 286.0, "learning_rate": 7.863779854992993e-05, "loss": 12.188, "step": 7838 }, { "epoch": 0.32674753032387144, "grad_norm": 134.0, "learning_rate": 7.863226512548689e-05, "loss": 10.6254, "step": 7839 }, { "epoch": 0.3267892126213997, "grad_norm": 456.0, "learning_rate": 7.862673117921826e-05, "loss": 12.2545, "step": 7840 }, { "epoch": 0.3268308949189279, "grad_norm": 280.0, "learning_rate": 7.862119671122494e-05, "loss": 13.6252, "step": 7841 }, { "epoch": 0.3268725772164562, "grad_norm": 408.0, "learning_rate": 7.861566172160774e-05, "loss": 16.1252, "step": 7842 }, { "epoch": 0.3269142595139844, "grad_norm": 300.0, "learning_rate": 7.861012621046758e-05, "loss": 14.1258, "step": 7843 }, { "epoch": 0.32695594181151266, "grad_norm": 121.0, "learning_rate": 7.860459017790532e-05, "loss": 8.8128, "step": 7844 }, { "epoch": 0.3269976241090409, "grad_norm": 255.0, "learning_rate": 7.859905362402187e-05, "loss": 12.6878, "step": 7845 }, { "epoch": 0.32703930640656914, "grad_norm": 1440.0, "learning_rate": 7.859351654891814e-05, "loss": 26.754, "step": 7846 }, { "epoch": 0.32708098870409735, "grad_norm": 352.0, "learning_rate": 7.858797895269503e-05, "loss": 13.314, "step": 7847 }, { "epoch": 0.3271226710016256, "grad_norm": 296.0, "learning_rate": 7.858244083545346e-05, "loss": 12.9377, "step": 7848 }, { "epoch": 0.32716435329915383, "grad_norm": 1152.0, "learning_rate": 7.857690219729437e-05, "loss": 25.3793, "step": 7849 }, { "epoch": 0.3272060355966821, "grad_norm": 228.0, "learning_rate": 7.857136303831869e-05, "loss": 12.6255, "step": 7850 }, { "epoch": 0.3272477178942103, "grad_norm": 306.0, "learning_rate": 7.856582335862739e-05, "loss": 13.0002, "step": 7851 }, { "epoch": 0.3272894001917386, "grad_norm": 1024.0, "learning_rate": 7.856028315832142e-05, "loss": 26.6253, "step": 7852 }, { "epoch": 0.3273310824892668, "grad_norm": 502.0, "learning_rate": 7.855474243750176e-05, "loss": 19.0004, "step": 7853 }, { "epoch": 0.32737276478679506, "grad_norm": 310.0, "learning_rate": 7.854920119626938e-05, "loss": 12.5023, "step": 7854 }, { "epoch": 0.32741444708432327, "grad_norm": 157.0, "learning_rate": 7.854365943472529e-05, "loss": 6.5321, "step": 7855 }, { "epoch": 0.32745612938185154, "grad_norm": 233.0, "learning_rate": 7.853811715297044e-05, "loss": 11.1878, "step": 7856 }, { "epoch": 0.32749781167937975, "grad_norm": 624.0, "learning_rate": 7.85325743511059e-05, "loss": 19.7504, "step": 7857 }, { "epoch": 0.327539493976908, "grad_norm": 456.0, "learning_rate": 7.852703102923264e-05, "loss": 15.8127, "step": 7858 }, { "epoch": 0.3275811762744362, "grad_norm": 316.0, "learning_rate": 7.852148718745172e-05, "loss": 13.8756, "step": 7859 }, { "epoch": 0.3276228585719645, "grad_norm": 370.0, "learning_rate": 7.851594282586416e-05, "loss": 14.4381, "step": 7860 }, { "epoch": 0.3276645408694927, "grad_norm": 820.0, "learning_rate": 7.851039794457102e-05, "loss": 21.3753, "step": 7861 }, { "epoch": 0.32770622316702097, "grad_norm": 2176.0, "learning_rate": 7.850485254367335e-05, "loss": 37.7545, "step": 7862 }, { "epoch": 0.3277479054645492, "grad_norm": 984.0, "learning_rate": 7.849930662327218e-05, "loss": 23.6288, "step": 7863 }, { "epoch": 0.32778958776207745, "grad_norm": 74.5, "learning_rate": 7.849376018346865e-05, "loss": 8.3126, "step": 7864 }, { "epoch": 0.32783127005960566, "grad_norm": 412.0, "learning_rate": 7.84882132243638e-05, "loss": 14.5634, "step": 7865 }, { "epoch": 0.32787295235713393, "grad_norm": 744.0, "learning_rate": 7.848266574605873e-05, "loss": 21.7511, "step": 7866 }, { "epoch": 0.32791463465466214, "grad_norm": 231.0, "learning_rate": 7.847711774865455e-05, "loss": 11.2507, "step": 7867 }, { "epoch": 0.3279563169521904, "grad_norm": 1208.0, "learning_rate": 7.847156923225237e-05, "loss": 31.1251, "step": 7868 }, { "epoch": 0.3279979992497186, "grad_norm": 268.0, "learning_rate": 7.84660201969533e-05, "loss": 14.0665, "step": 7869 }, { "epoch": 0.3280396815472469, "grad_norm": 448.0, "learning_rate": 7.846047064285851e-05, "loss": 15.6881, "step": 7870 }, { "epoch": 0.3280813638447751, "grad_norm": 552.0, "learning_rate": 7.845492057006911e-05, "loss": 18.5007, "step": 7871 }, { "epoch": 0.32812304614230337, "grad_norm": 332.0, "learning_rate": 7.844936997868626e-05, "loss": 13.8752, "step": 7872 }, { "epoch": 0.3281647284398316, "grad_norm": 258.0, "learning_rate": 7.844381886881112e-05, "loss": 11.8128, "step": 7873 }, { "epoch": 0.32820641073735984, "grad_norm": 584.0, "learning_rate": 7.843826724054484e-05, "loss": 18.0009, "step": 7874 }, { "epoch": 0.32824809303488806, "grad_norm": 376.0, "learning_rate": 7.843271509398862e-05, "loss": 14.4377, "step": 7875 }, { "epoch": 0.3282897753324163, "grad_norm": 390.0, "learning_rate": 7.842716242924364e-05, "loss": 14.5002, "step": 7876 }, { "epoch": 0.3283314576299446, "grad_norm": 162.0, "learning_rate": 7.84216092464111e-05, "loss": 9.5004, "step": 7877 }, { "epoch": 0.3283731399274728, "grad_norm": 60.25, "learning_rate": 7.841605554559222e-05, "loss": 8.9382, "step": 7878 }, { "epoch": 0.32841482222500107, "grad_norm": 62.75, "learning_rate": 7.84105013268882e-05, "loss": 8.3127, "step": 7879 }, { "epoch": 0.3284565045225293, "grad_norm": 696.0, "learning_rate": 7.840494659040028e-05, "loss": 15.6885, "step": 7880 }, { "epoch": 0.32849818682005755, "grad_norm": 153.0, "learning_rate": 7.839939133622966e-05, "loss": 10.6253, "step": 7881 }, { "epoch": 0.32853986911758576, "grad_norm": 458.0, "learning_rate": 7.839383556447764e-05, "loss": 16.3754, "step": 7882 }, { "epoch": 0.328581551415114, "grad_norm": 708.0, "learning_rate": 7.838827927524542e-05, "loss": 21.0014, "step": 7883 }, { "epoch": 0.32862323371264224, "grad_norm": 238.0, "learning_rate": 7.838272246863431e-05, "loss": 11.4382, "step": 7884 }, { "epoch": 0.3286649160101705, "grad_norm": 712.0, "learning_rate": 7.837716514474556e-05, "loss": 20.2503, "step": 7885 }, { "epoch": 0.3287065983076987, "grad_norm": 508.0, "learning_rate": 7.837160730368045e-05, "loss": 13.8755, "step": 7886 }, { "epoch": 0.328748280605227, "grad_norm": 644.0, "learning_rate": 7.836604894554029e-05, "loss": 16.8799, "step": 7887 }, { "epoch": 0.3287899629027552, "grad_norm": 288.0, "learning_rate": 7.836049007042637e-05, "loss": 13.3762, "step": 7888 }, { "epoch": 0.32883164520028346, "grad_norm": 356.0, "learning_rate": 7.835493067843998e-05, "loss": 14.7509, "step": 7889 }, { "epoch": 0.3288733274978117, "grad_norm": 255.0, "learning_rate": 7.834937076968247e-05, "loss": 13.1254, "step": 7890 }, { "epoch": 0.32891500979533994, "grad_norm": 752.0, "learning_rate": 7.834381034425518e-05, "loss": 22.2501, "step": 7891 }, { "epoch": 0.32895669209286815, "grad_norm": 532.0, "learning_rate": 7.833824940225942e-05, "loss": 18.0005, "step": 7892 }, { "epoch": 0.3289983743903964, "grad_norm": 171.0, "learning_rate": 7.833268794379653e-05, "loss": 10.0627, "step": 7893 }, { "epoch": 0.32904005668792463, "grad_norm": 468.0, "learning_rate": 7.83271259689679e-05, "loss": 15.4379, "step": 7894 }, { "epoch": 0.3290817389854529, "grad_norm": 236.0, "learning_rate": 7.83215634778749e-05, "loss": 12.1287, "step": 7895 }, { "epoch": 0.3291234212829811, "grad_norm": 368.0, "learning_rate": 7.831600047061888e-05, "loss": 14.7503, "step": 7896 }, { "epoch": 0.3291651035805094, "grad_norm": 362.0, "learning_rate": 7.831043694730123e-05, "loss": 14.3752, "step": 7897 }, { "epoch": 0.3292067858780376, "grad_norm": 324.0, "learning_rate": 7.830487290802336e-05, "loss": 12.5627, "step": 7898 }, { "epoch": 0.32924846817556586, "grad_norm": 382.0, "learning_rate": 7.829930835288669e-05, "loss": 14.6263, "step": 7899 }, { "epoch": 0.32929015047309407, "grad_norm": 568.0, "learning_rate": 7.829374328199257e-05, "loss": 18.1257, "step": 7900 }, { "epoch": 0.32933183277062233, "grad_norm": 924.0, "learning_rate": 7.828817769544249e-05, "loss": 23.7508, "step": 7901 }, { "epoch": 0.32937351506815055, "grad_norm": 532.0, "learning_rate": 7.828261159333786e-05, "loss": 18.7512, "step": 7902 }, { "epoch": 0.3294151973656788, "grad_norm": 454.0, "learning_rate": 7.827704497578012e-05, "loss": 16.7502, "step": 7903 }, { "epoch": 0.329456879663207, "grad_norm": 99.0, "learning_rate": 7.827147784287072e-05, "loss": 9.0003, "step": 7904 }, { "epoch": 0.3294985619607353, "grad_norm": 149.0, "learning_rate": 7.826591019471114e-05, "loss": 8.4386, "step": 7905 }, { "epoch": 0.3295402442582635, "grad_norm": 241.0, "learning_rate": 7.826034203140283e-05, "loss": 11.1254, "step": 7906 }, { "epoch": 0.32958192655579177, "grad_norm": 47.25, "learning_rate": 7.825477335304728e-05, "loss": 7.0002, "step": 7907 }, { "epoch": 0.32962360885332, "grad_norm": 318.0, "learning_rate": 7.824920415974597e-05, "loss": 12.4381, "step": 7908 }, { "epoch": 0.32966529115084825, "grad_norm": 448.0, "learning_rate": 7.824363445160042e-05, "loss": 16.3756, "step": 7909 }, { "epoch": 0.32970697344837646, "grad_norm": 1216.0, "learning_rate": 7.823806422871212e-05, "loss": 27.2527, "step": 7910 }, { "epoch": 0.32974865574590473, "grad_norm": 306.0, "learning_rate": 7.823249349118258e-05, "loss": 14.438, "step": 7911 }, { "epoch": 0.32979033804343294, "grad_norm": 104.0, "learning_rate": 7.822692223911336e-05, "loss": 9.2503, "step": 7912 }, { "epoch": 0.3298320203409612, "grad_norm": 344.0, "learning_rate": 7.822135047260596e-05, "loss": 13.1252, "step": 7913 }, { "epoch": 0.3298737026384894, "grad_norm": 195.0, "learning_rate": 7.821577819176195e-05, "loss": 11.0006, "step": 7914 }, { "epoch": 0.3299153849360177, "grad_norm": 276.0, "learning_rate": 7.821020539668287e-05, "loss": 12.813, "step": 7915 }, { "epoch": 0.3299570672335459, "grad_norm": 350.0, "learning_rate": 7.820463208747031e-05, "loss": 15.2502, "step": 7916 }, { "epoch": 0.32999874953107416, "grad_norm": 474.0, "learning_rate": 7.819905826422582e-05, "loss": 16.2503, "step": 7917 }, { "epoch": 0.3300404318286024, "grad_norm": 165.0, "learning_rate": 7.819348392705097e-05, "loss": 10.4381, "step": 7918 }, { "epoch": 0.33008211412613064, "grad_norm": 450.0, "learning_rate": 7.818790907604738e-05, "loss": 15.2502, "step": 7919 }, { "epoch": 0.33012379642365886, "grad_norm": 460.0, "learning_rate": 7.818233371131666e-05, "loss": 15.6253, "step": 7920 }, { "epoch": 0.3301654787211871, "grad_norm": 624.0, "learning_rate": 7.81767578329604e-05, "loss": 19.1256, "step": 7921 }, { "epoch": 0.33020716101871533, "grad_norm": 116.5, "learning_rate": 7.817118144108023e-05, "loss": 8.563, "step": 7922 }, { "epoch": 0.3302488433162436, "grad_norm": 158.0, "learning_rate": 7.816560453577777e-05, "loss": 8.4381, "step": 7923 }, { "epoch": 0.3302905256137718, "grad_norm": 716.0, "learning_rate": 7.816002711715467e-05, "loss": 17.7552, "step": 7924 }, { "epoch": 0.3303322079113001, "grad_norm": 78.0, "learning_rate": 7.815444918531257e-05, "loss": 7.4689, "step": 7925 }, { "epoch": 0.3303738902088283, "grad_norm": 1328.0, "learning_rate": 7.814887074035314e-05, "loss": 31.7516, "step": 7926 }, { "epoch": 0.33041557250635656, "grad_norm": 232.0, "learning_rate": 7.814329178237804e-05, "loss": 11.9382, "step": 7927 }, { "epoch": 0.33045725480388477, "grad_norm": 536.0, "learning_rate": 7.813771231148895e-05, "loss": 17.0005, "step": 7928 }, { "epoch": 0.33049893710141304, "grad_norm": 171.0, "learning_rate": 7.813213232778755e-05, "loss": 10.7508, "step": 7929 }, { "epoch": 0.33054061939894125, "grad_norm": 548.0, "learning_rate": 7.812655183137556e-05, "loss": 16.1257, "step": 7930 }, { "epoch": 0.3305823016964695, "grad_norm": 516.0, "learning_rate": 7.812097082235465e-05, "loss": 15.7504, "step": 7931 }, { "epoch": 0.3306239839939977, "grad_norm": 246.0, "learning_rate": 7.811538930082655e-05, "loss": 12.313, "step": 7932 }, { "epoch": 0.330665666291526, "grad_norm": 54.75, "learning_rate": 7.810980726689299e-05, "loss": 7.6262, "step": 7933 }, { "epoch": 0.3307073485890542, "grad_norm": 330.0, "learning_rate": 7.810422472065571e-05, "loss": 12.9377, "step": 7934 }, { "epoch": 0.3307490308865825, "grad_norm": 286.0, "learning_rate": 7.809864166221641e-05, "loss": 12.1883, "step": 7935 }, { "epoch": 0.3307907131841107, "grad_norm": 644.0, "learning_rate": 7.809305809167688e-05, "loss": 19.8763, "step": 7936 }, { "epoch": 0.33083239548163895, "grad_norm": 346.0, "learning_rate": 7.808747400913889e-05, "loss": 14.9378, "step": 7937 }, { "epoch": 0.33087407777916716, "grad_norm": 756.0, "learning_rate": 7.808188941470419e-05, "loss": 22.6252, "step": 7938 }, { "epoch": 0.33091576007669543, "grad_norm": 588.0, "learning_rate": 7.807630430847454e-05, "loss": 16.5002, "step": 7939 }, { "epoch": 0.33095744237422364, "grad_norm": 600.0, "learning_rate": 7.807071869055176e-05, "loss": 17.3753, "step": 7940 }, { "epoch": 0.3309991246717519, "grad_norm": 164.0, "learning_rate": 7.806513256103765e-05, "loss": 10.6252, "step": 7941 }, { "epoch": 0.3310408069692801, "grad_norm": 512.0, "learning_rate": 7.805954592003401e-05, "loss": 17.0002, "step": 7942 }, { "epoch": 0.3310824892668084, "grad_norm": 264.0, "learning_rate": 7.805395876764264e-05, "loss": 13.5627, "step": 7943 }, { "epoch": 0.3311241715643366, "grad_norm": 528.0, "learning_rate": 7.804837110396538e-05, "loss": 15.8778, "step": 7944 }, { "epoch": 0.33116585386186487, "grad_norm": 78.0, "learning_rate": 7.804278292910407e-05, "loss": 8.8128, "step": 7945 }, { "epoch": 0.3312075361593931, "grad_norm": 150.0, "learning_rate": 7.803719424316058e-05, "loss": 10.1254, "step": 7946 }, { "epoch": 0.33124921845692135, "grad_norm": 418.0, "learning_rate": 7.80316050462367e-05, "loss": 15.5629, "step": 7947 }, { "epoch": 0.33129090075444956, "grad_norm": 296.0, "learning_rate": 7.802601533843434e-05, "loss": 12.6258, "step": 7948 }, { "epoch": 0.3313325830519778, "grad_norm": 390.0, "learning_rate": 7.802042511985536e-05, "loss": 15.3128, "step": 7949 }, { "epoch": 0.3313742653495061, "grad_norm": 302.0, "learning_rate": 7.801483439060167e-05, "loss": 12.0002, "step": 7950 }, { "epoch": 0.3314159476470343, "grad_norm": 560.0, "learning_rate": 7.80092431507751e-05, "loss": 19.0003, "step": 7951 }, { "epoch": 0.33145762994456257, "grad_norm": 314.0, "learning_rate": 7.80036514004776e-05, "loss": 12.5002, "step": 7952 }, { "epoch": 0.3314993122420908, "grad_norm": 175.0, "learning_rate": 7.799805913981107e-05, "loss": 6.9067, "step": 7953 }, { "epoch": 0.33154099453961905, "grad_norm": 442.0, "learning_rate": 7.799246636887743e-05, "loss": 15.7505, "step": 7954 }, { "epoch": 0.33158267683714726, "grad_norm": 456.0, "learning_rate": 7.798687308777861e-05, "loss": 15.8788, "step": 7955 }, { "epoch": 0.33162435913467553, "grad_norm": 404.0, "learning_rate": 7.798127929661654e-05, "loss": 15.8754, "step": 7956 }, { "epoch": 0.33166604143220374, "grad_norm": 195.0, "learning_rate": 7.797568499549316e-05, "loss": 12.6879, "step": 7957 }, { "epoch": 0.331707723729732, "grad_norm": 199.0, "learning_rate": 7.797009018451044e-05, "loss": 11.3127, "step": 7958 }, { "epoch": 0.3317494060272602, "grad_norm": 720.0, "learning_rate": 7.796449486377035e-05, "loss": 21.6282, "step": 7959 }, { "epoch": 0.3317910883247885, "grad_norm": 151.0, "learning_rate": 7.795889903337486e-05, "loss": 9.7502, "step": 7960 }, { "epoch": 0.3318327706223167, "grad_norm": 464.0, "learning_rate": 7.795330269342595e-05, "loss": 15.438, "step": 7961 }, { "epoch": 0.33187445291984496, "grad_norm": 488.0, "learning_rate": 7.794770584402562e-05, "loss": 16.1255, "step": 7962 }, { "epoch": 0.3319161352173732, "grad_norm": 300.0, "learning_rate": 7.794210848527585e-05, "loss": 11.0628, "step": 7963 }, { "epoch": 0.33195781751490144, "grad_norm": 376.0, "learning_rate": 7.793651061727869e-05, "loss": 14.1876, "step": 7964 }, { "epoch": 0.33199949981242965, "grad_norm": 130.0, "learning_rate": 7.793091224013615e-05, "loss": 9.1252, "step": 7965 }, { "epoch": 0.3320411821099579, "grad_norm": 852.0, "learning_rate": 7.792531335395025e-05, "loss": 24.7506, "step": 7966 }, { "epoch": 0.33208286440748613, "grad_norm": 228.0, "learning_rate": 7.791971395882302e-05, "loss": 11.8127, "step": 7967 }, { "epoch": 0.3321245467050144, "grad_norm": 62.25, "learning_rate": 7.791411405485656e-05, "loss": 7.1881, "step": 7968 }, { "epoch": 0.3321662290025426, "grad_norm": 366.0, "learning_rate": 7.790851364215286e-05, "loss": 14.0628, "step": 7969 }, { "epoch": 0.3322079113000709, "grad_norm": 444.0, "learning_rate": 7.790291272081402e-05, "loss": 15.6253, "step": 7970 }, { "epoch": 0.3322495935975991, "grad_norm": 158.0, "learning_rate": 7.789731129094214e-05, "loss": 7.6877, "step": 7971 }, { "epoch": 0.33229127589512736, "grad_norm": 336.0, "learning_rate": 7.789170935263928e-05, "loss": 14.2506, "step": 7972 }, { "epoch": 0.33233295819265557, "grad_norm": 135.0, "learning_rate": 7.788610690600753e-05, "loss": 9.3131, "step": 7973 }, { "epoch": 0.33237464049018384, "grad_norm": 376.0, "learning_rate": 7.788050395114902e-05, "loss": 13.2507, "step": 7974 }, { "epoch": 0.33241632278771205, "grad_norm": 169.0, "learning_rate": 7.787490048816584e-05, "loss": 8.4377, "step": 7975 }, { "epoch": 0.3324580050852403, "grad_norm": 84.5, "learning_rate": 7.786929651716013e-05, "loss": 7.4065, "step": 7976 }, { "epoch": 0.3324996873827685, "grad_norm": 208.0, "learning_rate": 7.7863692038234e-05, "loss": 11.8129, "step": 7977 }, { "epoch": 0.3325413696802968, "grad_norm": 74.0, "learning_rate": 7.785808705148963e-05, "loss": 6.9385, "step": 7978 }, { "epoch": 0.332583051977825, "grad_norm": 436.0, "learning_rate": 7.785248155702916e-05, "loss": 15.8128, "step": 7979 }, { "epoch": 0.3326247342753533, "grad_norm": 366.0, "learning_rate": 7.784687555495471e-05, "loss": 11.0675, "step": 7980 }, { "epoch": 0.3326664165728815, "grad_norm": 298.0, "learning_rate": 7.78412690453685e-05, "loss": 12.501, "step": 7981 }, { "epoch": 0.33270809887040975, "grad_norm": 164.0, "learning_rate": 7.783566202837269e-05, "loss": 9.2501, "step": 7982 }, { "epoch": 0.33274978116793796, "grad_norm": 314.0, "learning_rate": 7.783005450406946e-05, "loss": 13.2506, "step": 7983 }, { "epoch": 0.33279146346546623, "grad_norm": 376.0, "learning_rate": 7.782444647256102e-05, "loss": 15.2502, "step": 7984 }, { "epoch": 0.33283314576299444, "grad_norm": 536.0, "learning_rate": 7.781883793394957e-05, "loss": 16.8752, "step": 7985 }, { "epoch": 0.3328748280605227, "grad_norm": 888.0, "learning_rate": 7.781322888833734e-05, "loss": 22.003, "step": 7986 }, { "epoch": 0.3329165103580509, "grad_norm": 804.0, "learning_rate": 7.780761933582654e-05, "loss": 21.007, "step": 7987 }, { "epoch": 0.3329581926555792, "grad_norm": 1200.0, "learning_rate": 7.780200927651941e-05, "loss": 24.2549, "step": 7988 }, { "epoch": 0.3329998749531074, "grad_norm": 444.0, "learning_rate": 7.779639871051819e-05, "loss": 16.1259, "step": 7989 }, { "epoch": 0.33304155725063567, "grad_norm": 223.0, "learning_rate": 7.779078763792514e-05, "loss": 11.9378, "step": 7990 }, { "epoch": 0.3330832395481639, "grad_norm": 512.0, "learning_rate": 7.77851760588425e-05, "loss": 17.3754, "step": 7991 }, { "epoch": 0.33312492184569215, "grad_norm": 1312.0, "learning_rate": 7.777956397337259e-05, "loss": 28.2503, "step": 7992 }, { "epoch": 0.33316660414322036, "grad_norm": 350.0, "learning_rate": 7.777395138161763e-05, "loss": 13.9376, "step": 7993 }, { "epoch": 0.3332082864407486, "grad_norm": 211.0, "learning_rate": 7.776833828367995e-05, "loss": 10.1877, "step": 7994 }, { "epoch": 0.33324996873827684, "grad_norm": 233.0, "learning_rate": 7.776272467966185e-05, "loss": 12.8752, "step": 7995 }, { "epoch": 0.3332916510358051, "grad_norm": 440.0, "learning_rate": 7.775711056966561e-05, "loss": 12.8136, "step": 7996 }, { "epoch": 0.3333333333333333, "grad_norm": 1280.0, "learning_rate": 7.775149595379359e-05, "loss": 27.5039, "step": 7997 }, { "epoch": 0.3333750156308616, "grad_norm": 276.0, "learning_rate": 7.774588083214807e-05, "loss": 12.8752, "step": 7998 }, { "epoch": 0.3334166979283898, "grad_norm": 113.0, "learning_rate": 7.774026520483141e-05, "loss": 10.1878, "step": 7999 }, { "epoch": 0.33345838022591806, "grad_norm": 243.0, "learning_rate": 7.773464907194598e-05, "loss": 12.2503, "step": 8000 }, { "epoch": 0.3335000625234463, "grad_norm": 125.5, "learning_rate": 7.772903243359409e-05, "loss": 10.3129, "step": 8001 }, { "epoch": 0.33354174482097454, "grad_norm": 292.0, "learning_rate": 7.772341528987812e-05, "loss": 14.2501, "step": 8002 }, { "epoch": 0.33358342711850275, "grad_norm": 442.0, "learning_rate": 7.771779764090046e-05, "loss": 15.5021, "step": 8003 }, { "epoch": 0.333625109416031, "grad_norm": 584.0, "learning_rate": 7.771217948676346e-05, "loss": 17.5002, "step": 8004 }, { "epoch": 0.33366679171355923, "grad_norm": 196.0, "learning_rate": 7.770656082756953e-05, "loss": 11.3129, "step": 8005 }, { "epoch": 0.3337084740110875, "grad_norm": 322.0, "learning_rate": 7.770094166342107e-05, "loss": 13.438, "step": 8006 }, { "epoch": 0.3337501563086157, "grad_norm": 134.0, "learning_rate": 7.769532199442051e-05, "loss": 8.3756, "step": 8007 }, { "epoch": 0.333791838606144, "grad_norm": 232.0, "learning_rate": 7.768970182067023e-05, "loss": 11.9378, "step": 8008 }, { "epoch": 0.3338335209036722, "grad_norm": 117.0, "learning_rate": 7.768408114227268e-05, "loss": 10.1882, "step": 8009 }, { "epoch": 0.33387520320120045, "grad_norm": 242.0, "learning_rate": 7.767845995933029e-05, "loss": 10.0004, "step": 8010 }, { "epoch": 0.33391688549872867, "grad_norm": 326.0, "learning_rate": 7.767283827194551e-05, "loss": 13.4379, "step": 8011 }, { "epoch": 0.33395856779625693, "grad_norm": 276.0, "learning_rate": 7.76672160802208e-05, "loss": 12.4381, "step": 8012 }, { "epoch": 0.33400025009378514, "grad_norm": 274.0, "learning_rate": 7.766159338425863e-05, "loss": 10.6256, "step": 8013 }, { "epoch": 0.3340419323913134, "grad_norm": 155.0, "learning_rate": 7.765597018416144e-05, "loss": 7.5633, "step": 8014 }, { "epoch": 0.3340836146888416, "grad_norm": 450.0, "learning_rate": 7.765034648003175e-05, "loss": 12.6882, "step": 8015 }, { "epoch": 0.3341252969863699, "grad_norm": 298.0, "learning_rate": 7.764472227197205e-05, "loss": 13.3126, "step": 8016 }, { "epoch": 0.3341669792838981, "grad_norm": 424.0, "learning_rate": 7.763909756008483e-05, "loss": 15.6877, "step": 8017 }, { "epoch": 0.33420866158142637, "grad_norm": 380.0, "learning_rate": 7.76334723444726e-05, "loss": 16.0006, "step": 8018 }, { "epoch": 0.3342503438789546, "grad_norm": 346.0, "learning_rate": 7.762784662523787e-05, "loss": 13.6883, "step": 8019 }, { "epoch": 0.33429202617648285, "grad_norm": 732.0, "learning_rate": 7.76222204024832e-05, "loss": 20.6256, "step": 8020 }, { "epoch": 0.33433370847401106, "grad_norm": 82.0, "learning_rate": 7.761659367631111e-05, "loss": 6.3449, "step": 8021 }, { "epoch": 0.3343753907715393, "grad_norm": 544.0, "learning_rate": 7.761096644682414e-05, "loss": 17.6252, "step": 8022 }, { "epoch": 0.3344170730690676, "grad_norm": 314.0, "learning_rate": 7.760533871412485e-05, "loss": 12.2524, "step": 8023 }, { "epoch": 0.3344587553665958, "grad_norm": 392.0, "learning_rate": 7.759971047831583e-05, "loss": 14.9379, "step": 8024 }, { "epoch": 0.3345004376641241, "grad_norm": 464.0, "learning_rate": 7.759408173949963e-05, "loss": 15.0006, "step": 8025 }, { "epoch": 0.3345421199616523, "grad_norm": 258.0, "learning_rate": 7.758845249777885e-05, "loss": 12.3759, "step": 8026 }, { "epoch": 0.33458380225918055, "grad_norm": 219.0, "learning_rate": 7.758282275325606e-05, "loss": 11.8127, "step": 8027 }, { "epoch": 0.33462548455670876, "grad_norm": 286.0, "learning_rate": 7.757719250603387e-05, "loss": 13.6879, "step": 8028 }, { "epoch": 0.33466716685423703, "grad_norm": 386.0, "learning_rate": 7.75715617562149e-05, "loss": 14.7503, "step": 8029 }, { "epoch": 0.33470884915176524, "grad_norm": 237.0, "learning_rate": 7.756593050390179e-05, "loss": 12.9379, "step": 8030 }, { "epoch": 0.3347505314492935, "grad_norm": 516.0, "learning_rate": 7.756029874919713e-05, "loss": 17.6256, "step": 8031 }, { "epoch": 0.3347922137468217, "grad_norm": 368.0, "learning_rate": 7.755466649220359e-05, "loss": 14.2502, "step": 8032 }, { "epoch": 0.33483389604435, "grad_norm": 202.0, "learning_rate": 7.75490337330238e-05, "loss": 12.6254, "step": 8033 }, { "epoch": 0.3348755783418782, "grad_norm": 418.0, "learning_rate": 7.754340047176043e-05, "loss": 16.5001, "step": 8034 }, { "epoch": 0.33491726063940647, "grad_norm": 380.0, "learning_rate": 7.753776670851613e-05, "loss": 14.6878, "step": 8035 }, { "epoch": 0.3349589429369347, "grad_norm": 172.0, "learning_rate": 7.75321324433936e-05, "loss": 11.2505, "step": 8036 }, { "epoch": 0.33500062523446295, "grad_norm": 796.0, "learning_rate": 7.752649767649552e-05, "loss": 18.878, "step": 8037 }, { "epoch": 0.33504230753199116, "grad_norm": 1464.0, "learning_rate": 7.752086240792455e-05, "loss": 42.0007, "step": 8038 }, { "epoch": 0.3350839898295194, "grad_norm": 410.0, "learning_rate": 7.751522663778343e-05, "loss": 15.4377, "step": 8039 }, { "epoch": 0.33512567212704764, "grad_norm": 236.0, "learning_rate": 7.750959036617487e-05, "loss": 12.7503, "step": 8040 }, { "epoch": 0.3351673544245759, "grad_norm": 215.0, "learning_rate": 7.750395359320157e-05, "loss": 12.0035, "step": 8041 }, { "epoch": 0.3352090367221041, "grad_norm": 134.0, "learning_rate": 7.74983163189663e-05, "loss": 9.6878, "step": 8042 }, { "epoch": 0.3352507190196324, "grad_norm": 94.0, "learning_rate": 7.749267854357175e-05, "loss": 8.2506, "step": 8043 }, { "epoch": 0.3352924013171606, "grad_norm": 330.0, "learning_rate": 7.74870402671207e-05, "loss": 14.5004, "step": 8044 }, { "epoch": 0.33533408361468886, "grad_norm": 458.0, "learning_rate": 7.74814014897159e-05, "loss": 17.0002, "step": 8045 }, { "epoch": 0.33537576591221707, "grad_norm": 314.0, "learning_rate": 7.747576221146011e-05, "loss": 13.1253, "step": 8046 }, { "epoch": 0.33541744820974534, "grad_norm": 318.0, "learning_rate": 7.747012243245613e-05, "loss": 13.7503, "step": 8047 }, { "epoch": 0.33545913050727355, "grad_norm": 344.0, "learning_rate": 7.746448215280672e-05, "loss": 14.1252, "step": 8048 }, { "epoch": 0.3355008128048018, "grad_norm": 276.0, "learning_rate": 7.74588413726147e-05, "loss": 10.8753, "step": 8049 }, { "epoch": 0.33554249510233003, "grad_norm": 400.0, "learning_rate": 7.745320009198285e-05, "loss": 13.063, "step": 8050 }, { "epoch": 0.3355841773998583, "grad_norm": 190.0, "learning_rate": 7.744755831101398e-05, "loss": 8.7508, "step": 8051 }, { "epoch": 0.3356258596973865, "grad_norm": 143.0, "learning_rate": 7.744191602981093e-05, "loss": 7.6565, "step": 8052 }, { "epoch": 0.3356675419949148, "grad_norm": 444.0, "learning_rate": 7.743627324847653e-05, "loss": 15.6877, "step": 8053 }, { "epoch": 0.335709224292443, "grad_norm": 66.5, "learning_rate": 7.743062996711361e-05, "loss": 7.4379, "step": 8054 }, { "epoch": 0.33575090658997125, "grad_norm": 428.0, "learning_rate": 7.742498618582502e-05, "loss": 14.6906, "step": 8055 }, { "epoch": 0.33579258888749947, "grad_norm": 117.5, "learning_rate": 7.741934190471363e-05, "loss": 9.3145, "step": 8056 }, { "epoch": 0.33583427118502773, "grad_norm": 394.0, "learning_rate": 7.741369712388229e-05, "loss": 14.1885, "step": 8057 }, { "epoch": 0.33587595348255594, "grad_norm": 171.0, "learning_rate": 7.74080518434339e-05, "loss": 10.6253, "step": 8058 }, { "epoch": 0.3359176357800842, "grad_norm": 318.0, "learning_rate": 7.740240606347133e-05, "loss": 11.0627, "step": 8059 }, { "epoch": 0.3359593180776124, "grad_norm": 520.0, "learning_rate": 7.739675978409746e-05, "loss": 16.5005, "step": 8060 }, { "epoch": 0.3360010003751407, "grad_norm": 173.0, "learning_rate": 7.739111300541522e-05, "loss": 9.8127, "step": 8061 }, { "epoch": 0.3360426826726689, "grad_norm": 568.0, "learning_rate": 7.738546572752751e-05, "loss": 18.8759, "step": 8062 }, { "epoch": 0.33608436497019717, "grad_norm": 320.0, "learning_rate": 7.737981795053728e-05, "loss": 12.1266, "step": 8063 }, { "epoch": 0.3361260472677254, "grad_norm": 292.0, "learning_rate": 7.73741696745474e-05, "loss": 13.3127, "step": 8064 }, { "epoch": 0.33616772956525365, "grad_norm": 178.0, "learning_rate": 7.736852089966088e-05, "loss": 10.5011, "step": 8065 }, { "epoch": 0.33620941186278186, "grad_norm": 544.0, "learning_rate": 7.736287162598062e-05, "loss": 19.0002, "step": 8066 }, { "epoch": 0.3362510941603101, "grad_norm": 290.0, "learning_rate": 7.73572218536096e-05, "loss": 12.4384, "step": 8067 }, { "epoch": 0.33629277645783834, "grad_norm": 202.0, "learning_rate": 7.735157158265078e-05, "loss": 11.2501, "step": 8068 }, { "epoch": 0.3363344587553666, "grad_norm": 434.0, "learning_rate": 7.734592081320715e-05, "loss": 16.5009, "step": 8069 }, { "epoch": 0.3363761410528948, "grad_norm": 568.0, "learning_rate": 7.734026954538168e-05, "loss": 19.2503, "step": 8070 }, { "epoch": 0.3364178233504231, "grad_norm": 498.0, "learning_rate": 7.733461777927736e-05, "loss": 16.5002, "step": 8071 }, { "epoch": 0.3364595056479513, "grad_norm": 156.0, "learning_rate": 7.732896551499722e-05, "loss": 10.6255, "step": 8072 }, { "epoch": 0.33650118794547956, "grad_norm": 720.0, "learning_rate": 7.732331275264424e-05, "loss": 18.88, "step": 8073 }, { "epoch": 0.3365428702430078, "grad_norm": 150.0, "learning_rate": 7.731765949232148e-05, "loss": 8.3129, "step": 8074 }, { "epoch": 0.33658455254053604, "grad_norm": 564.0, "learning_rate": 7.731200573413193e-05, "loss": 18.0005, "step": 8075 }, { "epoch": 0.33662623483806425, "grad_norm": 288.0, "learning_rate": 7.730635147817867e-05, "loss": 10.3766, "step": 8076 }, { "epoch": 0.3366679171355925, "grad_norm": 168.0, "learning_rate": 7.730069672456473e-05, "loss": 10.1258, "step": 8077 }, { "epoch": 0.33670959943312073, "grad_norm": 418.0, "learning_rate": 7.729504147339314e-05, "loss": 15.1259, "step": 8078 }, { "epoch": 0.336751281730649, "grad_norm": 612.0, "learning_rate": 7.728938572476703e-05, "loss": 17.5006, "step": 8079 }, { "epoch": 0.3367929640281772, "grad_norm": 249.0, "learning_rate": 7.728372947878943e-05, "loss": 11.6251, "step": 8080 }, { "epoch": 0.3368346463257055, "grad_norm": 272.0, "learning_rate": 7.727807273556344e-05, "loss": 12.8752, "step": 8081 }, { "epoch": 0.3368763286232337, "grad_norm": 572.0, "learning_rate": 7.727241549519214e-05, "loss": 18.5044, "step": 8082 }, { "epoch": 0.33691801092076196, "grad_norm": 251.0, "learning_rate": 7.726675775777865e-05, "loss": 12.813, "step": 8083 }, { "epoch": 0.33695969321829017, "grad_norm": 536.0, "learning_rate": 7.72610995234261e-05, "loss": 17.3787, "step": 8084 }, { "epoch": 0.33700137551581844, "grad_norm": 318.0, "learning_rate": 7.725544079223757e-05, "loss": 13.126, "step": 8085 }, { "epoch": 0.33704305781334665, "grad_norm": 256.0, "learning_rate": 7.724978156431621e-05, "loss": 12.5003, "step": 8086 }, { "epoch": 0.3370847401108749, "grad_norm": 180.0, "learning_rate": 7.724412183976519e-05, "loss": 9.8756, "step": 8087 }, { "epoch": 0.3371264224084031, "grad_norm": 472.0, "learning_rate": 7.723846161868759e-05, "loss": 16.7506, "step": 8088 }, { "epoch": 0.3371681047059314, "grad_norm": 572.0, "learning_rate": 7.723280090118665e-05, "loss": 18.5003, "step": 8089 }, { "epoch": 0.3372097870034596, "grad_norm": 296.0, "learning_rate": 7.722713968736547e-05, "loss": 13.5636, "step": 8090 }, { "epoch": 0.33725146930098787, "grad_norm": 360.0, "learning_rate": 7.722147797732726e-05, "loss": 15.1256, "step": 8091 }, { "epoch": 0.3372931515985161, "grad_norm": 61.0, "learning_rate": 7.72158157711752e-05, "loss": 7.469, "step": 8092 }, { "epoch": 0.33733483389604435, "grad_norm": 292.0, "learning_rate": 7.721015306901246e-05, "loss": 13.938, "step": 8093 }, { "epoch": 0.33737651619357256, "grad_norm": 314.0, "learning_rate": 7.72044898709423e-05, "loss": 13.3751, "step": 8094 }, { "epoch": 0.33741819849110083, "grad_norm": 200.0, "learning_rate": 7.719882617706788e-05, "loss": 11.5627, "step": 8095 }, { "epoch": 0.3374598807886291, "grad_norm": 322.0, "learning_rate": 7.719316198749243e-05, "loss": 13.2507, "step": 8096 }, { "epoch": 0.3375015630861573, "grad_norm": 163.0, "learning_rate": 7.71874973023192e-05, "loss": 9.4382, "step": 8097 }, { "epoch": 0.3375432453836856, "grad_norm": 604.0, "learning_rate": 7.718183212165142e-05, "loss": 19.7505, "step": 8098 }, { "epoch": 0.3375849276812138, "grad_norm": 318.0, "learning_rate": 7.717616644559233e-05, "loss": 13.1878, "step": 8099 }, { "epoch": 0.33762660997874205, "grad_norm": 524.0, "learning_rate": 7.717050027424519e-05, "loss": 17.6253, "step": 8100 }, { "epoch": 0.33766829227627027, "grad_norm": 178.0, "learning_rate": 7.716483360771329e-05, "loss": 10.7508, "step": 8101 }, { "epoch": 0.33770997457379853, "grad_norm": 1328.0, "learning_rate": 7.715916644609986e-05, "loss": 30.256, "step": 8102 }, { "epoch": 0.33775165687132674, "grad_norm": 286.0, "learning_rate": 7.715349878950823e-05, "loss": 12.9377, "step": 8103 }, { "epoch": 0.337793339168855, "grad_norm": 560.0, "learning_rate": 7.714783063804166e-05, "loss": 17.7508, "step": 8104 }, { "epoch": 0.3378350214663832, "grad_norm": 1000.0, "learning_rate": 7.714216199180349e-05, "loss": 26.2502, "step": 8105 }, { "epoch": 0.3378767037639115, "grad_norm": 468.0, "learning_rate": 7.713649285089698e-05, "loss": 16.6266, "step": 8106 }, { "epoch": 0.3379183860614397, "grad_norm": 334.0, "learning_rate": 7.713082321542549e-05, "loss": 13.3132, "step": 8107 }, { "epoch": 0.33796006835896797, "grad_norm": 1752.0, "learning_rate": 7.712515308549233e-05, "loss": 33.5033, "step": 8108 }, { "epoch": 0.3380017506564962, "grad_norm": 454.0, "learning_rate": 7.711948246120086e-05, "loss": 12.314, "step": 8109 }, { "epoch": 0.33804343295402445, "grad_norm": 326.0, "learning_rate": 7.711381134265442e-05, "loss": 14.2503, "step": 8110 }, { "epoch": 0.33808511525155266, "grad_norm": 422.0, "learning_rate": 7.710813972995635e-05, "loss": 12.7509, "step": 8111 }, { "epoch": 0.3381267975490809, "grad_norm": 392.0, "learning_rate": 7.710246762321003e-05, "loss": 15.3752, "step": 8112 }, { "epoch": 0.33816847984660914, "grad_norm": 190.0, "learning_rate": 7.709679502251883e-05, "loss": 10.5628, "step": 8113 }, { "epoch": 0.3382101621441374, "grad_norm": 346.0, "learning_rate": 7.709112192798614e-05, "loss": 14.1887, "step": 8114 }, { "epoch": 0.3382518444416656, "grad_norm": 322.0, "learning_rate": 7.708544833971534e-05, "loss": 13.0627, "step": 8115 }, { "epoch": 0.3382935267391939, "grad_norm": 97.5, "learning_rate": 7.707977425780983e-05, "loss": 10.1891, "step": 8116 }, { "epoch": 0.3383352090367221, "grad_norm": 154.0, "learning_rate": 7.707409968237306e-05, "loss": 10.1254, "step": 8117 }, { "epoch": 0.33837689133425036, "grad_norm": 378.0, "learning_rate": 7.70684246135084e-05, "loss": 15.0002, "step": 8118 }, { "epoch": 0.3384185736317786, "grad_norm": 254.0, "learning_rate": 7.70627490513193e-05, "loss": 12.5004, "step": 8119 }, { "epoch": 0.33846025592930684, "grad_norm": 400.0, "learning_rate": 7.705707299590921e-05, "loss": 16.1254, "step": 8120 }, { "epoch": 0.33850193822683505, "grad_norm": 420.0, "learning_rate": 7.705139644738155e-05, "loss": 14.1251, "step": 8121 }, { "epoch": 0.3385436205243633, "grad_norm": 214.0, "learning_rate": 7.704571940583978e-05, "loss": 12.0003, "step": 8122 }, { "epoch": 0.33858530282189153, "grad_norm": 48.75, "learning_rate": 7.704004187138739e-05, "loss": 6.3129, "step": 8123 }, { "epoch": 0.3386269851194198, "grad_norm": 82.5, "learning_rate": 7.703436384412782e-05, "loss": 8.438, "step": 8124 }, { "epoch": 0.338668667416948, "grad_norm": 640.0, "learning_rate": 7.702868532416459e-05, "loss": 18.5014, "step": 8125 }, { "epoch": 0.3387103497144763, "grad_norm": 53.75, "learning_rate": 7.702300631160116e-05, "loss": 8.4383, "step": 8126 }, { "epoch": 0.3387520320120045, "grad_norm": 256.0, "learning_rate": 7.701732680654103e-05, "loss": 12.688, "step": 8127 }, { "epoch": 0.33879371430953276, "grad_norm": 316.0, "learning_rate": 7.701164680908772e-05, "loss": 12.6879, "step": 8128 }, { "epoch": 0.33883539660706097, "grad_norm": 253.0, "learning_rate": 7.700596631934477e-05, "loss": 10.1251, "step": 8129 }, { "epoch": 0.33887707890458924, "grad_norm": 588.0, "learning_rate": 7.700028533741566e-05, "loss": 18.502, "step": 8130 }, { "epoch": 0.33891876120211745, "grad_norm": 568.0, "learning_rate": 7.699460386340398e-05, "loss": 20.6253, "step": 8131 }, { "epoch": 0.3389604434996457, "grad_norm": 414.0, "learning_rate": 7.698892189741323e-05, "loss": 14.5004, "step": 8132 }, { "epoch": 0.3390021257971739, "grad_norm": 173.0, "learning_rate": 7.6983239439547e-05, "loss": 10.6879, "step": 8133 }, { "epoch": 0.3390438080947022, "grad_norm": 112.5, "learning_rate": 7.69775564899088e-05, "loss": 6.6878, "step": 8134 }, { "epoch": 0.3390854903922304, "grad_norm": 130.0, "learning_rate": 7.697187304860228e-05, "loss": 9.1891, "step": 8135 }, { "epoch": 0.33912717268975867, "grad_norm": 1000.0, "learning_rate": 7.696618911573096e-05, "loss": 23.1307, "step": 8136 }, { "epoch": 0.3391688549872869, "grad_norm": 584.0, "learning_rate": 7.696050469139846e-05, "loss": 19.5004, "step": 8137 }, { "epoch": 0.33921053728481515, "grad_norm": 332.0, "learning_rate": 7.695481977570836e-05, "loss": 13.2507, "step": 8138 }, { "epoch": 0.33925221958234336, "grad_norm": 1080.0, "learning_rate": 7.694913436876427e-05, "loss": 21.3814, "step": 8139 }, { "epoch": 0.33929390187987163, "grad_norm": 62.0, "learning_rate": 7.694344847066982e-05, "loss": 8.5631, "step": 8140 }, { "epoch": 0.33933558417739984, "grad_norm": 312.0, "learning_rate": 7.693776208152863e-05, "loss": 10.4403, "step": 8141 }, { "epoch": 0.3393772664749281, "grad_norm": 428.0, "learning_rate": 7.693207520144434e-05, "loss": 14.3751, "step": 8142 }, { "epoch": 0.3394189487724563, "grad_norm": 184.0, "learning_rate": 7.692638783052058e-05, "loss": 9.438, "step": 8143 }, { "epoch": 0.3394606310699846, "grad_norm": 48.25, "learning_rate": 7.6920699968861e-05, "loss": 6.719, "step": 8144 }, { "epoch": 0.3395023133675128, "grad_norm": 1128.0, "learning_rate": 7.69150116165693e-05, "loss": 27.1293, "step": 8145 }, { "epoch": 0.33954399566504107, "grad_norm": 344.0, "learning_rate": 7.690932277374911e-05, "loss": 13.3752, "step": 8146 }, { "epoch": 0.3395856779625693, "grad_norm": 278.0, "learning_rate": 7.690363344050413e-05, "loss": 12.3754, "step": 8147 }, { "epoch": 0.33962736026009754, "grad_norm": 748.0, "learning_rate": 7.689794361693804e-05, "loss": 24.0002, "step": 8148 }, { "epoch": 0.33966904255762576, "grad_norm": 180.0, "learning_rate": 7.689225330315454e-05, "loss": 9.3752, "step": 8149 }, { "epoch": 0.339710724855154, "grad_norm": 420.0, "learning_rate": 7.688656249925735e-05, "loss": 14.8753, "step": 8150 }, { "epoch": 0.33975240715268223, "grad_norm": 338.0, "learning_rate": 7.688087120535015e-05, "loss": 14.1879, "step": 8151 }, { "epoch": 0.3397940894502105, "grad_norm": 544.0, "learning_rate": 7.687517942153668e-05, "loss": 18.0001, "step": 8152 }, { "epoch": 0.3398357717477387, "grad_norm": 236.0, "learning_rate": 7.686948714792069e-05, "loss": 9.8756, "step": 8153 }, { "epoch": 0.339877454045267, "grad_norm": 456.0, "learning_rate": 7.686379438460592e-05, "loss": 16.0006, "step": 8154 }, { "epoch": 0.3399191363427952, "grad_norm": 240.0, "learning_rate": 7.685810113169609e-05, "loss": 8.0643, "step": 8155 }, { "epoch": 0.33996081864032346, "grad_norm": 364.0, "learning_rate": 7.6852407389295e-05, "loss": 15.188, "step": 8156 }, { "epoch": 0.34000250093785167, "grad_norm": 446.0, "learning_rate": 7.68467131575064e-05, "loss": 17.1258, "step": 8157 }, { "epoch": 0.34004418323537994, "grad_norm": 184.0, "learning_rate": 7.684101843643408e-05, "loss": 11.0633, "step": 8158 }, { "epoch": 0.34008586553290815, "grad_norm": 484.0, "learning_rate": 7.68353232261818e-05, "loss": 17.8773, "step": 8159 }, { "epoch": 0.3401275478304364, "grad_norm": 61.0, "learning_rate": 7.682962752685339e-05, "loss": 7.7191, "step": 8160 }, { "epoch": 0.34016923012796463, "grad_norm": 336.0, "learning_rate": 7.682393133855262e-05, "loss": 12.938, "step": 8161 }, { "epoch": 0.3402109124254929, "grad_norm": 118.5, "learning_rate": 7.681823466138331e-05, "loss": 9.188, "step": 8162 }, { "epoch": 0.3402525947230211, "grad_norm": 102.5, "learning_rate": 7.681253749544932e-05, "loss": 5.5004, "step": 8163 }, { "epoch": 0.3402942770205494, "grad_norm": 216.0, "learning_rate": 7.680683984085444e-05, "loss": 10.813, "step": 8164 }, { "epoch": 0.3403359593180776, "grad_norm": 516.0, "learning_rate": 7.680114169770252e-05, "loss": 16.8771, "step": 8165 }, { "epoch": 0.34037764161560585, "grad_norm": 356.0, "learning_rate": 7.679544306609743e-05, "loss": 14.5008, "step": 8166 }, { "epoch": 0.34041932391313406, "grad_norm": 408.0, "learning_rate": 7.678974394614298e-05, "loss": 14.6891, "step": 8167 }, { "epoch": 0.34046100621066233, "grad_norm": 117.0, "learning_rate": 7.678404433794308e-05, "loss": 9.3753, "step": 8168 }, { "epoch": 0.3405026885081906, "grad_norm": 213.0, "learning_rate": 7.677834424160162e-05, "loss": 11.5003, "step": 8169 }, { "epoch": 0.3405443708057188, "grad_norm": 498.0, "learning_rate": 7.677264365722243e-05, "loss": 17.2502, "step": 8170 }, { "epoch": 0.3405860531032471, "grad_norm": 270.0, "learning_rate": 7.676694258490945e-05, "loss": 7.7508, "step": 8171 }, { "epoch": 0.3406277354007753, "grad_norm": 240.0, "learning_rate": 7.676124102476656e-05, "loss": 11.8754, "step": 8172 }, { "epoch": 0.34066941769830356, "grad_norm": 344.0, "learning_rate": 7.675553897689766e-05, "loss": 13.8128, "step": 8173 }, { "epoch": 0.34071109999583177, "grad_norm": 131.0, "learning_rate": 7.674983644140672e-05, "loss": 8.3128, "step": 8174 }, { "epoch": 0.34075278229336003, "grad_norm": 187.0, "learning_rate": 7.674413341839761e-05, "loss": 9.5628, "step": 8175 }, { "epoch": 0.34079446459088825, "grad_norm": 390.0, "learning_rate": 7.67384299079743e-05, "loss": 15.7505, "step": 8176 }, { "epoch": 0.3408361468884165, "grad_norm": 170.0, "learning_rate": 7.673272591024074e-05, "loss": 10.7504, "step": 8177 }, { "epoch": 0.3408778291859447, "grad_norm": 184.0, "learning_rate": 7.672702142530088e-05, "loss": 10.9376, "step": 8178 }, { "epoch": 0.340919511483473, "grad_norm": 532.0, "learning_rate": 7.672131645325867e-05, "loss": 18.6255, "step": 8179 }, { "epoch": 0.3409611937810012, "grad_norm": 316.0, "learning_rate": 7.67156109942181e-05, "loss": 14.563, "step": 8180 }, { "epoch": 0.34100287607852947, "grad_norm": 234.0, "learning_rate": 7.670990504828314e-05, "loss": 11.5003, "step": 8181 }, { "epoch": 0.3410445583760577, "grad_norm": 352.0, "learning_rate": 7.67041986155578e-05, "loss": 14.813, "step": 8182 }, { "epoch": 0.34108624067358595, "grad_norm": 266.0, "learning_rate": 7.669849169614607e-05, "loss": 11.938, "step": 8183 }, { "epoch": 0.34112792297111416, "grad_norm": 668.0, "learning_rate": 7.669278429015195e-05, "loss": 20.2509, "step": 8184 }, { "epoch": 0.34116960526864243, "grad_norm": 600.0, "learning_rate": 7.668707639767948e-05, "loss": 19.1281, "step": 8185 }, { "epoch": 0.34121128756617064, "grad_norm": 209.0, "learning_rate": 7.668136801883266e-05, "loss": 10.8752, "step": 8186 }, { "epoch": 0.3412529698636989, "grad_norm": 656.0, "learning_rate": 7.667565915371556e-05, "loss": 16.8796, "step": 8187 }, { "epoch": 0.3412946521612271, "grad_norm": 768.0, "learning_rate": 7.666994980243217e-05, "loss": 23.5002, "step": 8188 }, { "epoch": 0.3413363344587554, "grad_norm": 692.0, "learning_rate": 7.66642399650866e-05, "loss": 15.5003, "step": 8189 }, { "epoch": 0.3413780167562836, "grad_norm": 252.0, "learning_rate": 7.665852964178289e-05, "loss": 12.6259, "step": 8190 }, { "epoch": 0.34141969905381186, "grad_norm": 250.0, "learning_rate": 7.66528188326251e-05, "loss": 12.5003, "step": 8191 }, { "epoch": 0.3414613813513401, "grad_norm": 191.0, "learning_rate": 7.664710753771734e-05, "loss": 9.8134, "step": 8192 }, { "epoch": 0.34150306364886834, "grad_norm": 207.0, "learning_rate": 7.664139575716365e-05, "loss": 11.0627, "step": 8193 }, { "epoch": 0.34154474594639656, "grad_norm": 616.0, "learning_rate": 7.663568349106817e-05, "loss": 15.3786, "step": 8194 }, { "epoch": 0.3415864282439248, "grad_norm": 460.0, "learning_rate": 7.6629970739535e-05, "loss": 13.8127, "step": 8195 }, { "epoch": 0.34162811054145303, "grad_norm": 182.0, "learning_rate": 7.662425750266824e-05, "loss": 10.3127, "step": 8196 }, { "epoch": 0.3416697928389813, "grad_norm": 199.0, "learning_rate": 7.661854378057203e-05, "loss": 9.8133, "step": 8197 }, { "epoch": 0.3417114751365095, "grad_norm": 564.0, "learning_rate": 7.66128295733505e-05, "loss": 19.8753, "step": 8198 }, { "epoch": 0.3417531574340378, "grad_norm": 316.0, "learning_rate": 7.66071148811078e-05, "loss": 15.3129, "step": 8199 }, { "epoch": 0.341794839731566, "grad_norm": 328.0, "learning_rate": 7.660139970394803e-05, "loss": 13.5628, "step": 8200 }, { "epoch": 0.34183652202909426, "grad_norm": 214.0, "learning_rate": 7.659568404197544e-05, "loss": 11.7512, "step": 8201 }, { "epoch": 0.34187820432662247, "grad_norm": 1176.0, "learning_rate": 7.658996789529411e-05, "loss": 26.6297, "step": 8202 }, { "epoch": 0.34191988662415074, "grad_norm": 192.0, "learning_rate": 7.658425126400827e-05, "loss": 8.7503, "step": 8203 }, { "epoch": 0.34196156892167895, "grad_norm": 528.0, "learning_rate": 7.657853414822208e-05, "loss": 17.5001, "step": 8204 }, { "epoch": 0.3420032512192072, "grad_norm": 197.0, "learning_rate": 7.657281654803977e-05, "loss": 11.1878, "step": 8205 }, { "epoch": 0.3420449335167354, "grad_norm": 552.0, "learning_rate": 7.656709846356548e-05, "loss": 17.7512, "step": 8206 }, { "epoch": 0.3420866158142637, "grad_norm": 356.0, "learning_rate": 7.656137989490349e-05, "loss": 13.6878, "step": 8207 }, { "epoch": 0.3421282981117919, "grad_norm": 69.0, "learning_rate": 7.655566084215797e-05, "loss": 5.6879, "step": 8208 }, { "epoch": 0.3421699804093202, "grad_norm": 446.0, "learning_rate": 7.65499413054332e-05, "loss": 14.7508, "step": 8209 }, { "epoch": 0.3422116627068484, "grad_norm": 138.0, "learning_rate": 7.654422128483338e-05, "loss": 9.9379, "step": 8210 }, { "epoch": 0.34225334500437665, "grad_norm": 608.0, "learning_rate": 7.653850078046278e-05, "loss": 17.3752, "step": 8211 }, { "epoch": 0.34229502730190486, "grad_norm": 524.0, "learning_rate": 7.653277979242564e-05, "loss": 17.3757, "step": 8212 }, { "epoch": 0.34233670959943313, "grad_norm": 336.0, "learning_rate": 7.652705832082624e-05, "loss": 14.0004, "step": 8213 }, { "epoch": 0.34237839189696134, "grad_norm": 456.0, "learning_rate": 7.652133636576884e-05, "loss": 14.5027, "step": 8214 }, { "epoch": 0.3424200741944896, "grad_norm": 292.0, "learning_rate": 7.651561392735773e-05, "loss": 12.938, "step": 8215 }, { "epoch": 0.3424617564920178, "grad_norm": 282.0, "learning_rate": 7.650989100569721e-05, "loss": 8.063, "step": 8216 }, { "epoch": 0.3425034387895461, "grad_norm": 920.0, "learning_rate": 7.650416760089156e-05, "loss": 27.2502, "step": 8217 }, { "epoch": 0.3425451210870743, "grad_norm": 215.0, "learning_rate": 7.64984437130451e-05, "loss": 11.1254, "step": 8218 }, { "epoch": 0.34258680338460257, "grad_norm": 724.0, "learning_rate": 7.649271934226216e-05, "loss": 22.2504, "step": 8219 }, { "epoch": 0.3426284856821308, "grad_norm": 211.0, "learning_rate": 7.648699448864707e-05, "loss": 11.1877, "step": 8220 }, { "epoch": 0.34267016797965905, "grad_norm": 1616.0, "learning_rate": 7.648126915230414e-05, "loss": 27.135, "step": 8221 }, { "epoch": 0.34271185027718726, "grad_norm": 104.5, "learning_rate": 7.647554333333773e-05, "loss": 7.594, "step": 8222 }, { "epoch": 0.3427535325747155, "grad_norm": 342.0, "learning_rate": 7.646981703185221e-05, "loss": 13.4379, "step": 8223 }, { "epoch": 0.34279521487224374, "grad_norm": 244.0, "learning_rate": 7.64640902479519e-05, "loss": 12.3757, "step": 8224 }, { "epoch": 0.342836897169772, "grad_norm": 300.0, "learning_rate": 7.64583629817412e-05, "loss": 11.2505, "step": 8225 }, { "epoch": 0.3428785794673002, "grad_norm": 390.0, "learning_rate": 7.64526352333245e-05, "loss": 15.1252, "step": 8226 }, { "epoch": 0.3429202617648285, "grad_norm": 520.0, "learning_rate": 7.644690700280617e-05, "loss": 18.5003, "step": 8227 }, { "epoch": 0.3429619440623567, "grad_norm": 636.0, "learning_rate": 7.644117829029061e-05, "loss": 19.8752, "step": 8228 }, { "epoch": 0.34300362635988496, "grad_norm": 136.0, "learning_rate": 7.643544909588222e-05, "loss": 10.6879, "step": 8229 }, { "epoch": 0.3430453086574132, "grad_norm": 452.0, "learning_rate": 7.642971941968543e-05, "loss": 15.7503, "step": 8230 }, { "epoch": 0.34308699095494144, "grad_norm": 402.0, "learning_rate": 7.642398926180465e-05, "loss": 15.4394, "step": 8231 }, { "epoch": 0.34312867325246965, "grad_norm": 264.0, "learning_rate": 7.641825862234432e-05, "loss": 12.3754, "step": 8232 }, { "epoch": 0.3431703555499979, "grad_norm": 452.0, "learning_rate": 7.64125275014089e-05, "loss": 17.0003, "step": 8233 }, { "epoch": 0.34321203784752613, "grad_norm": 116.0, "learning_rate": 7.64067958991028e-05, "loss": 10.0628, "step": 8234 }, { "epoch": 0.3432537201450544, "grad_norm": 252.0, "learning_rate": 7.640106381553051e-05, "loss": 12.3761, "step": 8235 }, { "epoch": 0.3432954024425826, "grad_norm": 724.0, "learning_rate": 7.63953312507965e-05, "loss": 21.3752, "step": 8236 }, { "epoch": 0.3433370847401109, "grad_norm": 324.0, "learning_rate": 7.638959820500521e-05, "loss": 12.3751, "step": 8237 }, { "epoch": 0.3433787670376391, "grad_norm": 348.0, "learning_rate": 7.638386467826118e-05, "loss": 14.6252, "step": 8238 }, { "epoch": 0.34342044933516735, "grad_norm": 656.0, "learning_rate": 7.637813067066886e-05, "loss": 19.5008, "step": 8239 }, { "epoch": 0.34346213163269557, "grad_norm": 422.0, "learning_rate": 7.637239618233276e-05, "loss": 15.3129, "step": 8240 }, { "epoch": 0.34350381393022383, "grad_norm": 115.5, "learning_rate": 7.63666612133574e-05, "loss": 11.3128, "step": 8241 }, { "epoch": 0.3435454962277521, "grad_norm": 476.0, "learning_rate": 7.63609257638473e-05, "loss": 16.3754, "step": 8242 }, { "epoch": 0.3435871785252803, "grad_norm": 238.0, "learning_rate": 7.635518983390699e-05, "loss": 12.1889, "step": 8243 }, { "epoch": 0.3436288608228086, "grad_norm": 478.0, "learning_rate": 7.634945342364101e-05, "loss": 14.6253, "step": 8244 }, { "epoch": 0.3436705431203368, "grad_norm": 276.0, "learning_rate": 7.63437165331539e-05, "loss": 11.6274, "step": 8245 }, { "epoch": 0.34371222541786506, "grad_norm": 572.0, "learning_rate": 7.63379791625502e-05, "loss": 17.8795, "step": 8246 }, { "epoch": 0.34375390771539327, "grad_norm": 416.0, "learning_rate": 7.633224131193452e-05, "loss": 15.1251, "step": 8247 }, { "epoch": 0.34379559001292154, "grad_norm": 223.0, "learning_rate": 7.632650298141139e-05, "loss": 12.5005, "step": 8248 }, { "epoch": 0.34383727231044975, "grad_norm": 221.0, "learning_rate": 7.63207641710854e-05, "loss": 11.9378, "step": 8249 }, { "epoch": 0.343878954607978, "grad_norm": 600.0, "learning_rate": 7.631502488106116e-05, "loss": 19.5007, "step": 8250 }, { "epoch": 0.3439206369055062, "grad_norm": 672.0, "learning_rate": 7.630928511144325e-05, "loss": 20.6251, "step": 8251 }, { "epoch": 0.3439623192030345, "grad_norm": 1632.0, "learning_rate": 7.630354486233628e-05, "loss": 31.5046, "step": 8252 }, { "epoch": 0.3440040015005627, "grad_norm": 372.0, "learning_rate": 7.629780413384488e-05, "loss": 16.0004, "step": 8253 }, { "epoch": 0.344045683798091, "grad_norm": 139.0, "learning_rate": 7.629206292607366e-05, "loss": 9.8129, "step": 8254 }, { "epoch": 0.3440873660956192, "grad_norm": 358.0, "learning_rate": 7.628632123912725e-05, "loss": 15.5004, "step": 8255 }, { "epoch": 0.34412904839314745, "grad_norm": 454.0, "learning_rate": 7.62805790731103e-05, "loss": 15.6885, "step": 8256 }, { "epoch": 0.34417073069067566, "grad_norm": 480.0, "learning_rate": 7.627483642812747e-05, "loss": 17.0008, "step": 8257 }, { "epoch": 0.34421241298820393, "grad_norm": 193.0, "learning_rate": 7.626909330428342e-05, "loss": 10.8127, "step": 8258 }, { "epoch": 0.34425409528573214, "grad_norm": 152.0, "learning_rate": 7.626334970168281e-05, "loss": 9.814, "step": 8259 }, { "epoch": 0.3442957775832604, "grad_norm": 808.0, "learning_rate": 7.625760562043032e-05, "loss": 22.3759, "step": 8260 }, { "epoch": 0.3443374598807886, "grad_norm": 358.0, "learning_rate": 7.625186106063065e-05, "loss": 15.3762, "step": 8261 }, { "epoch": 0.3443791421783169, "grad_norm": 302.0, "learning_rate": 7.624611602238846e-05, "loss": 12.5005, "step": 8262 }, { "epoch": 0.3444208244758451, "grad_norm": 136.0, "learning_rate": 7.624037050580848e-05, "loss": 9.8759, "step": 8263 }, { "epoch": 0.34446250677337337, "grad_norm": 556.0, "learning_rate": 7.623462451099542e-05, "loss": 18.1253, "step": 8264 }, { "epoch": 0.3445041890709016, "grad_norm": 492.0, "learning_rate": 7.622887803805401e-05, "loss": 16.3751, "step": 8265 }, { "epoch": 0.34454587136842985, "grad_norm": 552.0, "learning_rate": 7.622313108708896e-05, "loss": 17.3755, "step": 8266 }, { "epoch": 0.34458755366595806, "grad_norm": 260.0, "learning_rate": 7.621738365820501e-05, "loss": 11.501, "step": 8267 }, { "epoch": 0.3446292359634863, "grad_norm": 444.0, "learning_rate": 7.621163575150692e-05, "loss": 15.9383, "step": 8268 }, { "epoch": 0.34467091826101454, "grad_norm": 217.0, "learning_rate": 7.620588736709944e-05, "loss": 11.6253, "step": 8269 }, { "epoch": 0.3447126005585428, "grad_norm": 1240.0, "learning_rate": 7.620013850508733e-05, "loss": 29.1253, "step": 8270 }, { "epoch": 0.344754282856071, "grad_norm": 352.0, "learning_rate": 7.61943891655754e-05, "loss": 13.0003, "step": 8271 }, { "epoch": 0.3447959651535993, "grad_norm": 632.0, "learning_rate": 7.618863934866838e-05, "loss": 19.8767, "step": 8272 }, { "epoch": 0.3448376474511275, "grad_norm": 532.0, "learning_rate": 7.618288905447108e-05, "loss": 17.6253, "step": 8273 }, { "epoch": 0.34487932974865576, "grad_norm": 584.0, "learning_rate": 7.617713828308831e-05, "loss": 18.2502, "step": 8274 }, { "epoch": 0.344921012046184, "grad_norm": 316.0, "learning_rate": 7.617138703462488e-05, "loss": 12.9388, "step": 8275 }, { "epoch": 0.34496269434371224, "grad_norm": 304.0, "learning_rate": 7.616563530918558e-05, "loss": 12.5003, "step": 8276 }, { "epoch": 0.34500437664124045, "grad_norm": 332.0, "learning_rate": 7.615988310687525e-05, "loss": 13.8128, "step": 8277 }, { "epoch": 0.3450460589387687, "grad_norm": 270.0, "learning_rate": 7.615413042779876e-05, "loss": 12.0002, "step": 8278 }, { "epoch": 0.34508774123629693, "grad_norm": 644.0, "learning_rate": 7.614837727206089e-05, "loss": 18.8752, "step": 8279 }, { "epoch": 0.3451294235338252, "grad_norm": 374.0, "learning_rate": 7.614262363976654e-05, "loss": 14.9379, "step": 8280 }, { "epoch": 0.3451711058313534, "grad_norm": 43.75, "learning_rate": 7.613686953102053e-05, "loss": 7.1876, "step": 8281 }, { "epoch": 0.3452127881288817, "grad_norm": 632.0, "learning_rate": 7.613111494592777e-05, "loss": 19.1277, "step": 8282 }, { "epoch": 0.3452544704264099, "grad_norm": 143.0, "learning_rate": 7.612535988459312e-05, "loss": 9.1256, "step": 8283 }, { "epoch": 0.34529615272393815, "grad_norm": 172.0, "learning_rate": 7.611960434712147e-05, "loss": 11.3148, "step": 8284 }, { "epoch": 0.34533783502146637, "grad_norm": 159.0, "learning_rate": 7.61138483336177e-05, "loss": 11.5636, "step": 8285 }, { "epoch": 0.34537951731899463, "grad_norm": 374.0, "learning_rate": 7.610809184418674e-05, "loss": 15.0001, "step": 8286 }, { "epoch": 0.34542119961652284, "grad_norm": 532.0, "learning_rate": 7.610233487893349e-05, "loss": 17.2506, "step": 8287 }, { "epoch": 0.3454628819140511, "grad_norm": 310.0, "learning_rate": 7.609657743796285e-05, "loss": 11.9381, "step": 8288 }, { "epoch": 0.3455045642115793, "grad_norm": 488.0, "learning_rate": 7.609081952137979e-05, "loss": 14.7506, "step": 8289 }, { "epoch": 0.3455462465091076, "grad_norm": 330.0, "learning_rate": 7.608506112928923e-05, "loss": 12.8128, "step": 8290 }, { "epoch": 0.3455879288066358, "grad_norm": 181.0, "learning_rate": 7.60793022617961e-05, "loss": 8.8756, "step": 8291 }, { "epoch": 0.34562961110416407, "grad_norm": 414.0, "learning_rate": 7.607354291900538e-05, "loss": 15.0627, "step": 8292 }, { "epoch": 0.3456712934016923, "grad_norm": 484.0, "learning_rate": 7.606778310102203e-05, "loss": 16.3757, "step": 8293 }, { "epoch": 0.34571297569922055, "grad_norm": 342.0, "learning_rate": 7.6062022807951e-05, "loss": 13.7502, "step": 8294 }, { "epoch": 0.34575465799674876, "grad_norm": 430.0, "learning_rate": 7.60562620398973e-05, "loss": 15.4377, "step": 8295 }, { "epoch": 0.345796340294277, "grad_norm": 358.0, "learning_rate": 7.605050079696591e-05, "loss": 15.1253, "step": 8296 }, { "epoch": 0.34583802259180524, "grad_norm": 348.0, "learning_rate": 7.604473907926184e-05, "loss": 14.0632, "step": 8297 }, { "epoch": 0.3458797048893335, "grad_norm": 1224.0, "learning_rate": 7.603897688689009e-05, "loss": 27.7505, "step": 8298 }, { "epoch": 0.3459213871868617, "grad_norm": 262.0, "learning_rate": 7.603321421995567e-05, "loss": 12.6252, "step": 8299 }, { "epoch": 0.34596306948439, "grad_norm": 402.0, "learning_rate": 7.602745107856359e-05, "loss": 15.0006, "step": 8300 }, { "epoch": 0.3460047517819182, "grad_norm": 364.0, "learning_rate": 7.602168746281893e-05, "loss": 15.0009, "step": 8301 }, { "epoch": 0.34604643407944646, "grad_norm": 260.0, "learning_rate": 7.601592337282668e-05, "loss": 13.1253, "step": 8302 }, { "epoch": 0.3460881163769747, "grad_norm": 306.0, "learning_rate": 7.601015880869194e-05, "loss": 12.4394, "step": 8303 }, { "epoch": 0.34612979867450294, "grad_norm": 228.0, "learning_rate": 7.600439377051974e-05, "loss": 11.5011, "step": 8304 }, { "epoch": 0.34617148097203115, "grad_norm": 241.0, "learning_rate": 7.599862825841515e-05, "loss": 12.0629, "step": 8305 }, { "epoch": 0.3462131632695594, "grad_norm": 376.0, "learning_rate": 7.599286227248327e-05, "loss": 14.751, "step": 8306 }, { "epoch": 0.34625484556708763, "grad_norm": 438.0, "learning_rate": 7.598709581282915e-05, "loss": 15.813, "step": 8307 }, { "epoch": 0.3462965278646159, "grad_norm": 76.5, "learning_rate": 7.59813288795579e-05, "loss": 8.6255, "step": 8308 }, { "epoch": 0.3463382101621441, "grad_norm": 162.0, "learning_rate": 7.597556147277462e-05, "loss": 7.7512, "step": 8309 }, { "epoch": 0.3463798924596724, "grad_norm": 740.0, "learning_rate": 7.596979359258445e-05, "loss": 21.8755, "step": 8310 }, { "epoch": 0.3464215747572006, "grad_norm": 215.0, "learning_rate": 7.596402523909248e-05, "loss": 12.1254, "step": 8311 }, { "epoch": 0.34646325705472886, "grad_norm": 1256.0, "learning_rate": 7.595825641240384e-05, "loss": 29.7504, "step": 8312 }, { "epoch": 0.3465049393522571, "grad_norm": 1312.0, "learning_rate": 7.595248711262365e-05, "loss": 28.0007, "step": 8313 }, { "epoch": 0.34654662164978534, "grad_norm": 174.0, "learning_rate": 7.594671733985712e-05, "loss": 11.2504, "step": 8314 }, { "epoch": 0.3465883039473136, "grad_norm": 1160.0, "learning_rate": 7.594094709420934e-05, "loss": 30.7525, "step": 8315 }, { "epoch": 0.3466299862448418, "grad_norm": 348.0, "learning_rate": 7.593517637578549e-05, "loss": 14.0628, "step": 8316 }, { "epoch": 0.3466716685423701, "grad_norm": 308.0, "learning_rate": 7.592940518469076e-05, "loss": 13.6879, "step": 8317 }, { "epoch": 0.3467133508398983, "grad_norm": 215.0, "learning_rate": 7.592363352103031e-05, "loss": 11.7509, "step": 8318 }, { "epoch": 0.34675503313742656, "grad_norm": 1016.0, "learning_rate": 7.591786138490935e-05, "loss": 23.6312, "step": 8319 }, { "epoch": 0.34679671543495477, "grad_norm": 348.0, "learning_rate": 7.591208877643305e-05, "loss": 14.7519, "step": 8320 }, { "epoch": 0.34683839773248304, "grad_norm": 354.0, "learning_rate": 7.590631569570665e-05, "loss": 13.5628, "step": 8321 }, { "epoch": 0.34688008003001125, "grad_norm": 342.0, "learning_rate": 7.590054214283533e-05, "loss": 13.9376, "step": 8322 }, { "epoch": 0.3469217623275395, "grad_norm": 80.0, "learning_rate": 7.589476811792434e-05, "loss": 7.5941, "step": 8323 }, { "epoch": 0.34696344462506773, "grad_norm": 58.25, "learning_rate": 7.58889936210789e-05, "loss": 7.688, "step": 8324 }, { "epoch": 0.347005126922596, "grad_norm": 398.0, "learning_rate": 7.588321865240427e-05, "loss": 14.3752, "step": 8325 }, { "epoch": 0.3470468092201242, "grad_norm": 197.0, "learning_rate": 7.587744321200565e-05, "loss": 11.2502, "step": 8326 }, { "epoch": 0.3470884915176525, "grad_norm": 200.0, "learning_rate": 7.587166729998836e-05, "loss": 11.9377, "step": 8327 }, { "epoch": 0.3471301738151807, "grad_norm": 568.0, "learning_rate": 7.586589091645761e-05, "loss": 16.8769, "step": 8328 }, { "epoch": 0.34717185611270895, "grad_norm": 358.0, "learning_rate": 7.586011406151872e-05, "loss": 14.6254, "step": 8329 }, { "epoch": 0.34721353841023717, "grad_norm": 596.0, "learning_rate": 7.585433673527696e-05, "loss": 19.8754, "step": 8330 }, { "epoch": 0.34725522070776543, "grad_norm": 116.5, "learning_rate": 7.58485589378376e-05, "loss": 8.7502, "step": 8331 }, { "epoch": 0.34729690300529364, "grad_norm": 552.0, "learning_rate": 7.584278066930596e-05, "loss": 17.7504, "step": 8332 }, { "epoch": 0.3473385853028219, "grad_norm": 237.0, "learning_rate": 7.583700192978736e-05, "loss": 12.189, "step": 8333 }, { "epoch": 0.3473802676003501, "grad_norm": 284.0, "learning_rate": 7.58312227193871e-05, "loss": 13.0004, "step": 8334 }, { "epoch": 0.3474219498978784, "grad_norm": 490.0, "learning_rate": 7.582544303821052e-05, "loss": 18.3754, "step": 8335 }, { "epoch": 0.3474636321954066, "grad_norm": 177.0, "learning_rate": 7.581966288636293e-05, "loss": 10.1877, "step": 8336 }, { "epoch": 0.34750531449293487, "grad_norm": 179.0, "learning_rate": 7.58138822639497e-05, "loss": 10.6253, "step": 8337 }, { "epoch": 0.3475469967904631, "grad_norm": 234.0, "learning_rate": 7.580810117107619e-05, "loss": 13.9383, "step": 8338 }, { "epoch": 0.34758867908799135, "grad_norm": 406.0, "learning_rate": 7.580231960784773e-05, "loss": 15.0006, "step": 8339 }, { "epoch": 0.34763036138551956, "grad_norm": 580.0, "learning_rate": 7.579653757436971e-05, "loss": 19.5002, "step": 8340 }, { "epoch": 0.3476720436830478, "grad_norm": 612.0, "learning_rate": 7.57907550707475e-05, "loss": 19.3778, "step": 8341 }, { "epoch": 0.34771372598057604, "grad_norm": 460.0, "learning_rate": 7.578497209708648e-05, "loss": 15.8752, "step": 8342 }, { "epoch": 0.3477554082781043, "grad_norm": 78.0, "learning_rate": 7.577918865349207e-05, "loss": 8.813, "step": 8343 }, { "epoch": 0.3477970905756325, "grad_norm": 324.0, "learning_rate": 7.577340474006965e-05, "loss": 15.0628, "step": 8344 }, { "epoch": 0.3478387728731608, "grad_norm": 326.0, "learning_rate": 7.576762035692464e-05, "loss": 13.0017, "step": 8345 }, { "epoch": 0.347880455170689, "grad_norm": 198.0, "learning_rate": 7.576183550416247e-05, "loss": 11.0637, "step": 8346 }, { "epoch": 0.34792213746821726, "grad_norm": 270.0, "learning_rate": 7.575605018188855e-05, "loss": 10.7501, "step": 8347 }, { "epoch": 0.3479638197657455, "grad_norm": 324.0, "learning_rate": 7.575026439020833e-05, "loss": 12.5627, "step": 8348 }, { "epoch": 0.34800550206327374, "grad_norm": 149.0, "learning_rate": 7.574447812922728e-05, "loss": 9.6879, "step": 8349 }, { "epoch": 0.34804718436080195, "grad_norm": 312.0, "learning_rate": 7.57386913990508e-05, "loss": 14.3751, "step": 8350 }, { "epoch": 0.3480888666583302, "grad_norm": 120.0, "learning_rate": 7.57329041997844e-05, "loss": 9.2512, "step": 8351 }, { "epoch": 0.34813054895585843, "grad_norm": 346.0, "learning_rate": 7.572711653153353e-05, "loss": 13.3753, "step": 8352 }, { "epoch": 0.3481722312533867, "grad_norm": 384.0, "learning_rate": 7.572132839440367e-05, "loss": 14.379, "step": 8353 }, { "epoch": 0.3482139135509149, "grad_norm": 174.0, "learning_rate": 7.571553978850033e-05, "loss": 10.188, "step": 8354 }, { "epoch": 0.3482555958484432, "grad_norm": 220.0, "learning_rate": 7.5709750713929e-05, "loss": 13.9378, "step": 8355 }, { "epoch": 0.3482972781459714, "grad_norm": 190.0, "learning_rate": 7.570396117079517e-05, "loss": 8.563, "step": 8356 }, { "epoch": 0.34833896044349966, "grad_norm": 380.0, "learning_rate": 7.569817115920434e-05, "loss": 15.1253, "step": 8357 }, { "epoch": 0.34838064274102787, "grad_norm": 406.0, "learning_rate": 7.56923806792621e-05, "loss": 15.751, "step": 8358 }, { "epoch": 0.34842232503855614, "grad_norm": 188.0, "learning_rate": 7.568658973107393e-05, "loss": 11.2509, "step": 8359 }, { "epoch": 0.34846400733608435, "grad_norm": 422.0, "learning_rate": 7.568079831474537e-05, "loss": 16.1254, "step": 8360 }, { "epoch": 0.3485056896336126, "grad_norm": 75.5, "learning_rate": 7.567500643038201e-05, "loss": 9.4382, "step": 8361 }, { "epoch": 0.3485473719311408, "grad_norm": 264.0, "learning_rate": 7.566921407808936e-05, "loss": 12.8127, "step": 8362 }, { "epoch": 0.3485890542286691, "grad_norm": 488.0, "learning_rate": 7.566342125797299e-05, "loss": 15.6879, "step": 8363 }, { "epoch": 0.3486307365261973, "grad_norm": 239.0, "learning_rate": 7.56576279701385e-05, "loss": 10.0008, "step": 8364 }, { "epoch": 0.34867241882372557, "grad_norm": 362.0, "learning_rate": 7.565183421469148e-05, "loss": 14.8753, "step": 8365 }, { "epoch": 0.3487141011212538, "grad_norm": 91.0, "learning_rate": 7.564603999173749e-05, "loss": 7.8757, "step": 8366 }, { "epoch": 0.34875578341878205, "grad_norm": 159.0, "learning_rate": 7.564024530138214e-05, "loss": 6.5942, "step": 8367 }, { "epoch": 0.34879746571631026, "grad_norm": 147.0, "learning_rate": 7.563445014373105e-05, "loss": 10.2505, "step": 8368 }, { "epoch": 0.34883914801383853, "grad_norm": 164.0, "learning_rate": 7.562865451888983e-05, "loss": 10.2505, "step": 8369 }, { "epoch": 0.34888083031136674, "grad_norm": 624.0, "learning_rate": 7.562285842696409e-05, "loss": 20.8755, "step": 8370 }, { "epoch": 0.348922512608895, "grad_norm": 202.0, "learning_rate": 7.561706186805951e-05, "loss": 11.6252, "step": 8371 }, { "epoch": 0.3489641949064232, "grad_norm": 1016.0, "learning_rate": 7.561126484228167e-05, "loss": 24.5041, "step": 8372 }, { "epoch": 0.3490058772039515, "grad_norm": 394.0, "learning_rate": 7.560546734973628e-05, "loss": 13.3126, "step": 8373 }, { "epoch": 0.3490475595014797, "grad_norm": 452.0, "learning_rate": 7.559966939052897e-05, "loss": 15.0631, "step": 8374 }, { "epoch": 0.34908924179900797, "grad_norm": 204.0, "learning_rate": 7.559387096476542e-05, "loss": 10.6255, "step": 8375 }, { "epoch": 0.3491309240965362, "grad_norm": 67.0, "learning_rate": 7.558807207255128e-05, "loss": 8.3131, "step": 8376 }, { "epoch": 0.34917260639406444, "grad_norm": 266.0, "learning_rate": 7.558227271399228e-05, "loss": 12.0003, "step": 8377 }, { "epoch": 0.34921428869159266, "grad_norm": 225.0, "learning_rate": 7.557647288919407e-05, "loss": 14.3753, "step": 8378 }, { "epoch": 0.3492559709891209, "grad_norm": 296.0, "learning_rate": 7.557067259826236e-05, "loss": 12.5631, "step": 8379 }, { "epoch": 0.34929765328664913, "grad_norm": 1152.0, "learning_rate": 7.55648718413029e-05, "loss": 26.8788, "step": 8380 }, { "epoch": 0.3493393355841774, "grad_norm": 292.0, "learning_rate": 7.555907061842136e-05, "loss": 13.9377, "step": 8381 }, { "epoch": 0.3493810178817056, "grad_norm": 644.0, "learning_rate": 7.55532689297235e-05, "loss": 19.1258, "step": 8382 }, { "epoch": 0.3494227001792339, "grad_norm": 268.0, "learning_rate": 7.554746677531503e-05, "loss": 13.8129, "step": 8383 }, { "epoch": 0.3494643824767621, "grad_norm": 211.0, "learning_rate": 7.554166415530173e-05, "loss": 11.0005, "step": 8384 }, { "epoch": 0.34950606477429036, "grad_norm": 544.0, "learning_rate": 7.553586106978932e-05, "loss": 18.2505, "step": 8385 }, { "epoch": 0.3495477470718186, "grad_norm": 136.0, "learning_rate": 7.553005751888358e-05, "loss": 10.0002, "step": 8386 }, { "epoch": 0.34958942936934684, "grad_norm": 524.0, "learning_rate": 7.552425350269028e-05, "loss": 18.0012, "step": 8387 }, { "epoch": 0.3496311116668751, "grad_norm": 432.0, "learning_rate": 7.55184490213152e-05, "loss": 15.3752, "step": 8388 }, { "epoch": 0.3496727939644033, "grad_norm": 684.0, "learning_rate": 7.551264407486411e-05, "loss": 21.8765, "step": 8389 }, { "epoch": 0.3497144762619316, "grad_norm": 596.0, "learning_rate": 7.550683866344282e-05, "loss": 17.3752, "step": 8390 }, { "epoch": 0.3497561585594598, "grad_norm": 468.0, "learning_rate": 7.550103278715713e-05, "loss": 16.3752, "step": 8391 }, { "epoch": 0.34979784085698806, "grad_norm": 224.0, "learning_rate": 7.549522644611285e-05, "loss": 9.0007, "step": 8392 }, { "epoch": 0.3498395231545163, "grad_norm": 532.0, "learning_rate": 7.548941964041581e-05, "loss": 13.1267, "step": 8393 }, { "epoch": 0.34988120545204454, "grad_norm": 254.0, "learning_rate": 7.548361237017183e-05, "loss": 11.5629, "step": 8394 }, { "epoch": 0.34992288774957275, "grad_norm": 62.5, "learning_rate": 7.547780463548675e-05, "loss": 7.5314, "step": 8395 }, { "epoch": 0.349964570047101, "grad_norm": 135.0, "learning_rate": 7.547199643646642e-05, "loss": 10.7517, "step": 8396 }, { "epoch": 0.35000625234462923, "grad_norm": 135.0, "learning_rate": 7.54661877732167e-05, "loss": 8.8126, "step": 8397 }, { "epoch": 0.3500479346421575, "grad_norm": 548.0, "learning_rate": 7.546037864584344e-05, "loss": 17.6253, "step": 8398 }, { "epoch": 0.3500896169396857, "grad_norm": 234.0, "learning_rate": 7.545456905445253e-05, "loss": 12.6882, "step": 8399 }, { "epoch": 0.350131299237214, "grad_norm": 152.0, "learning_rate": 7.544875899914983e-05, "loss": 10.9401, "step": 8400 }, { "epoch": 0.3501729815347422, "grad_norm": 560.0, "learning_rate": 7.544294848004124e-05, "loss": 18.3763, "step": 8401 }, { "epoch": 0.35021466383227046, "grad_norm": 524.0, "learning_rate": 7.543713749723265e-05, "loss": 18.0004, "step": 8402 }, { "epoch": 0.35025634612979867, "grad_norm": 1104.0, "learning_rate": 7.543132605082999e-05, "loss": 24.132, "step": 8403 }, { "epoch": 0.35029802842732694, "grad_norm": 372.0, "learning_rate": 7.542551414093915e-05, "loss": 14.9377, "step": 8404 }, { "epoch": 0.35033971072485515, "grad_norm": 276.0, "learning_rate": 7.541970176766604e-05, "loss": 13.0005, "step": 8405 }, { "epoch": 0.3503813930223834, "grad_norm": 145.0, "learning_rate": 7.541388893111661e-05, "loss": 10.2505, "step": 8406 }, { "epoch": 0.3504230753199116, "grad_norm": 151.0, "learning_rate": 7.54080756313968e-05, "loss": 10.3757, "step": 8407 }, { "epoch": 0.3504647576174399, "grad_norm": 110.0, "learning_rate": 7.540226186861256e-05, "loss": 5.0629, "step": 8408 }, { "epoch": 0.3505064399149681, "grad_norm": 213.0, "learning_rate": 7.539644764286986e-05, "loss": 11.0631, "step": 8409 }, { "epoch": 0.35054812221249637, "grad_norm": 1136.0, "learning_rate": 7.539063295427463e-05, "loss": 29.5002, "step": 8410 }, { "epoch": 0.3505898045100246, "grad_norm": 426.0, "learning_rate": 7.538481780293286e-05, "loss": 15.5004, "step": 8411 }, { "epoch": 0.35063148680755285, "grad_norm": 414.0, "learning_rate": 7.537900218895053e-05, "loss": 14.3753, "step": 8412 }, { "epoch": 0.35067316910508106, "grad_norm": 588.0, "learning_rate": 7.537318611243365e-05, "loss": 15.0631, "step": 8413 }, { "epoch": 0.35071485140260933, "grad_norm": 884.0, "learning_rate": 7.536736957348817e-05, "loss": 22.1251, "step": 8414 }, { "epoch": 0.35075653370013754, "grad_norm": 332.0, "learning_rate": 7.536155257222016e-05, "loss": 15.5005, "step": 8415 }, { "epoch": 0.3507982159976658, "grad_norm": 197.0, "learning_rate": 7.535573510873559e-05, "loss": 11.0003, "step": 8416 }, { "epoch": 0.350839898295194, "grad_norm": 237.0, "learning_rate": 7.53499171831405e-05, "loss": 12.8127, "step": 8417 }, { "epoch": 0.3508815805927223, "grad_norm": 434.0, "learning_rate": 7.534409879554091e-05, "loss": 16.5002, "step": 8418 }, { "epoch": 0.3509232628902505, "grad_norm": 160.0, "learning_rate": 7.533827994604287e-05, "loss": 11.0627, "step": 8419 }, { "epoch": 0.35096494518777877, "grad_norm": 716.0, "learning_rate": 7.533246063475243e-05, "loss": 20.3762, "step": 8420 }, { "epoch": 0.351006627485307, "grad_norm": 466.0, "learning_rate": 7.532664086177564e-05, "loss": 16.5002, "step": 8421 }, { "epoch": 0.35104830978283524, "grad_norm": 304.0, "learning_rate": 7.53208206272186e-05, "loss": 13.7506, "step": 8422 }, { "epoch": 0.35108999208036346, "grad_norm": 536.0, "learning_rate": 7.531499993118732e-05, "loss": 17.2504, "step": 8423 }, { "epoch": 0.3511316743778917, "grad_norm": 221.0, "learning_rate": 7.530917877378794e-05, "loss": 10.8751, "step": 8424 }, { "epoch": 0.35117335667541993, "grad_norm": 984.0, "learning_rate": 7.530335715512653e-05, "loss": 22.8794, "step": 8425 }, { "epoch": 0.3512150389729482, "grad_norm": 179.0, "learning_rate": 7.529753507530918e-05, "loss": 10.2503, "step": 8426 }, { "epoch": 0.3512567212704764, "grad_norm": 516.0, "learning_rate": 7.529171253444202e-05, "loss": 18.0026, "step": 8427 }, { "epoch": 0.3512984035680047, "grad_norm": 370.0, "learning_rate": 7.528588953263114e-05, "loss": 15.0626, "step": 8428 }, { "epoch": 0.3513400858655329, "grad_norm": 384.0, "learning_rate": 7.528006606998268e-05, "loss": 14.7503, "step": 8429 }, { "epoch": 0.35138176816306116, "grad_norm": 412.0, "learning_rate": 7.527424214660279e-05, "loss": 14.5003, "step": 8430 }, { "epoch": 0.35142345046058937, "grad_norm": 176.0, "learning_rate": 7.526841776259757e-05, "loss": 10.563, "step": 8431 }, { "epoch": 0.35146513275811764, "grad_norm": 844.0, "learning_rate": 7.52625929180732e-05, "loss": 19.2545, "step": 8432 }, { "epoch": 0.35150681505564585, "grad_norm": 664.0, "learning_rate": 7.525676761313584e-05, "loss": 18.6255, "step": 8433 }, { "epoch": 0.3515484973531741, "grad_norm": 1136.0, "learning_rate": 7.525094184789163e-05, "loss": 28.6264, "step": 8434 }, { "epoch": 0.35159017965070233, "grad_norm": 173.0, "learning_rate": 7.524511562244679e-05, "loss": 10.8132, "step": 8435 }, { "epoch": 0.3516318619482306, "grad_norm": 390.0, "learning_rate": 7.523928893690746e-05, "loss": 14.1253, "step": 8436 }, { "epoch": 0.3516735442457588, "grad_norm": 136.0, "learning_rate": 7.523346179137985e-05, "loss": 9.688, "step": 8437 }, { "epoch": 0.3517152265432871, "grad_norm": 144.0, "learning_rate": 7.522763418597017e-05, "loss": 10.1881, "step": 8438 }, { "epoch": 0.3517569088408153, "grad_norm": 126.0, "learning_rate": 7.52218061207846e-05, "loss": 9.8754, "step": 8439 }, { "epoch": 0.35179859113834355, "grad_norm": 360.0, "learning_rate": 7.521597759592938e-05, "loss": 15.5627, "step": 8440 }, { "epoch": 0.35184027343587176, "grad_norm": 456.0, "learning_rate": 7.521014861151073e-05, "loss": 14.8131, "step": 8441 }, { "epoch": 0.35188195573340003, "grad_norm": 410.0, "learning_rate": 7.520431916763489e-05, "loss": 15.0634, "step": 8442 }, { "epoch": 0.35192363803092824, "grad_norm": 173.0, "learning_rate": 7.519848926440809e-05, "loss": 8.6254, "step": 8443 }, { "epoch": 0.3519653203284565, "grad_norm": 274.0, "learning_rate": 7.519265890193659e-05, "loss": 12.8753, "step": 8444 }, { "epoch": 0.3520070026259847, "grad_norm": 177.0, "learning_rate": 7.518682808032663e-05, "loss": 10.0014, "step": 8445 }, { "epoch": 0.352048684923513, "grad_norm": 120.0, "learning_rate": 7.518099679968451e-05, "loss": 7.2506, "step": 8446 }, { "epoch": 0.3520903672210412, "grad_norm": 237.0, "learning_rate": 7.517516506011648e-05, "loss": 11.5631, "step": 8447 }, { "epoch": 0.35213204951856947, "grad_norm": 364.0, "learning_rate": 7.516933286172883e-05, "loss": 14.0627, "step": 8448 }, { "epoch": 0.3521737318160977, "grad_norm": 270.0, "learning_rate": 7.516350020462785e-05, "loss": 11.0003, "step": 8449 }, { "epoch": 0.35221541411362595, "grad_norm": 496.0, "learning_rate": 7.515766708891987e-05, "loss": 15.8757, "step": 8450 }, { "epoch": 0.35225709641115416, "grad_norm": 264.0, "learning_rate": 7.515183351471116e-05, "loss": 11.7503, "step": 8451 }, { "epoch": 0.3522987787086824, "grad_norm": 193.0, "learning_rate": 7.514599948210805e-05, "loss": 9.001, "step": 8452 }, { "epoch": 0.35234046100621064, "grad_norm": 227.0, "learning_rate": 7.514016499121687e-05, "loss": 11.0628, "step": 8453 }, { "epoch": 0.3523821433037389, "grad_norm": 207.0, "learning_rate": 7.513433004214394e-05, "loss": 11.2501, "step": 8454 }, { "epoch": 0.3524238256012671, "grad_norm": 322.0, "learning_rate": 7.512849463499563e-05, "loss": 14.6252, "step": 8455 }, { "epoch": 0.3524655078987954, "grad_norm": 260.0, "learning_rate": 7.512265876987826e-05, "loss": 11.8752, "step": 8456 }, { "epoch": 0.3525071901963236, "grad_norm": 1112.0, "learning_rate": 7.511682244689821e-05, "loss": 25.5113, "step": 8457 }, { "epoch": 0.35254887249385186, "grad_norm": 1020.0, "learning_rate": 7.511098566616184e-05, "loss": 25.7519, "step": 8458 }, { "epoch": 0.35259055479138013, "grad_norm": 624.0, "learning_rate": 7.510514842777552e-05, "loss": 20.8764, "step": 8459 }, { "epoch": 0.35263223708890834, "grad_norm": 402.0, "learning_rate": 7.509931073184566e-05, "loss": 14.6253, "step": 8460 }, { "epoch": 0.3526739193864366, "grad_norm": 316.0, "learning_rate": 7.509347257847861e-05, "loss": 13.563, "step": 8461 }, { "epoch": 0.3527156016839648, "grad_norm": 340.0, "learning_rate": 7.508763396778081e-05, "loss": 14.3753, "step": 8462 }, { "epoch": 0.3527572839814931, "grad_norm": 255.0, "learning_rate": 7.508179489985865e-05, "loss": 12.6879, "step": 8463 }, { "epoch": 0.3527989662790213, "grad_norm": 608.0, "learning_rate": 7.507595537481856e-05, "loss": 18.751, "step": 8464 }, { "epoch": 0.35284064857654956, "grad_norm": 688.0, "learning_rate": 7.507011539276695e-05, "loss": 19.7525, "step": 8465 }, { "epoch": 0.3528823308740778, "grad_norm": 218.0, "learning_rate": 7.506427495381026e-05, "loss": 11.5627, "step": 8466 }, { "epoch": 0.35292401317160604, "grad_norm": 278.0, "learning_rate": 7.505843405805493e-05, "loss": 11.2502, "step": 8467 }, { "epoch": 0.35296569546913426, "grad_norm": 416.0, "learning_rate": 7.505259270560743e-05, "loss": 14.3128, "step": 8468 }, { "epoch": 0.3530073777666625, "grad_norm": 78.5, "learning_rate": 7.50467508965742e-05, "loss": 8.5633, "step": 8469 }, { "epoch": 0.35304906006419073, "grad_norm": 664.0, "learning_rate": 7.50409086310617e-05, "loss": 18.6265, "step": 8470 }, { "epoch": 0.353090742361719, "grad_norm": 148.0, "learning_rate": 7.503506590917642e-05, "loss": 8.5009, "step": 8471 }, { "epoch": 0.3531324246592472, "grad_norm": 912.0, "learning_rate": 7.502922273102484e-05, "loss": 21.5054, "step": 8472 }, { "epoch": 0.3531741069567755, "grad_norm": 157.0, "learning_rate": 7.502337909671347e-05, "loss": 9.1878, "step": 8473 }, { "epoch": 0.3532157892543037, "grad_norm": 330.0, "learning_rate": 7.501753500634877e-05, "loss": 13.2503, "step": 8474 }, { "epoch": 0.35325747155183196, "grad_norm": 358.0, "learning_rate": 7.50116904600373e-05, "loss": 15.0002, "step": 8475 }, { "epoch": 0.35329915384936017, "grad_norm": 195.0, "learning_rate": 7.500584545788552e-05, "loss": 10.8753, "step": 8476 }, { "epoch": 0.35334083614688844, "grad_norm": 604.0, "learning_rate": 7.500000000000001e-05, "loss": 19.2507, "step": 8477 }, { "epoch": 0.35338251844441665, "grad_norm": 1256.0, "learning_rate": 7.499415408648727e-05, "loss": 26.8756, "step": 8478 }, { "epoch": 0.3534242007419449, "grad_norm": 548.0, "learning_rate": 7.498830771745386e-05, "loss": 18.3753, "step": 8479 }, { "epoch": 0.3534658830394731, "grad_norm": 247.0, "learning_rate": 7.498246089300632e-05, "loss": 11.8128, "step": 8480 }, { "epoch": 0.3535075653370014, "grad_norm": 380.0, "learning_rate": 7.497661361325122e-05, "loss": 14.8753, "step": 8481 }, { "epoch": 0.3535492476345296, "grad_norm": 492.0, "learning_rate": 7.497076587829512e-05, "loss": 19.0003, "step": 8482 }, { "epoch": 0.3535909299320579, "grad_norm": 136.0, "learning_rate": 7.496491768824458e-05, "loss": 9.8128, "step": 8483 }, { "epoch": 0.3536326122295861, "grad_norm": 450.0, "learning_rate": 7.49590690432062e-05, "loss": 15.3128, "step": 8484 }, { "epoch": 0.35367429452711435, "grad_norm": 182.0, "learning_rate": 7.495321994328658e-05, "loss": 11.3753, "step": 8485 }, { "epoch": 0.35371597682464256, "grad_norm": 1160.0, "learning_rate": 7.494737038859231e-05, "loss": 31.8761, "step": 8486 }, { "epoch": 0.35375765912217083, "grad_norm": 1296.0, "learning_rate": 7.494152037923e-05, "loss": 26.7548, "step": 8487 }, { "epoch": 0.35379934141969904, "grad_norm": 152.0, "learning_rate": 7.493566991530627e-05, "loss": 9.6254, "step": 8488 }, { "epoch": 0.3538410237172273, "grad_norm": 604.0, "learning_rate": 7.492981899692773e-05, "loss": 19.5004, "step": 8489 }, { "epoch": 0.3538827060147555, "grad_norm": 362.0, "learning_rate": 7.492396762420103e-05, "loss": 15.0004, "step": 8490 }, { "epoch": 0.3539243883122838, "grad_norm": 568.0, "learning_rate": 7.491811579723282e-05, "loss": 18.8753, "step": 8491 }, { "epoch": 0.353966070609812, "grad_norm": 74.0, "learning_rate": 7.491226351612974e-05, "loss": 7.2504, "step": 8492 }, { "epoch": 0.35400775290734027, "grad_norm": 604.0, "learning_rate": 7.490641078099843e-05, "loss": 19.1256, "step": 8493 }, { "epoch": 0.3540494352048685, "grad_norm": 227.0, "learning_rate": 7.490055759194559e-05, "loss": 12.6879, "step": 8494 }, { "epoch": 0.35409111750239675, "grad_norm": 724.0, "learning_rate": 7.489470394907785e-05, "loss": 17.7542, "step": 8495 }, { "epoch": 0.35413279979992496, "grad_norm": 392.0, "learning_rate": 7.488884985250194e-05, "loss": 14.8129, "step": 8496 }, { "epoch": 0.3541744820974532, "grad_norm": 344.0, "learning_rate": 7.488299530232452e-05, "loss": 12.4379, "step": 8497 }, { "epoch": 0.35421616439498144, "grad_norm": 243.0, "learning_rate": 7.487714029865232e-05, "loss": 10.9376, "step": 8498 }, { "epoch": 0.3542578466925097, "grad_norm": 268.0, "learning_rate": 7.4871284841592e-05, "loss": 12.8137, "step": 8499 }, { "epoch": 0.3542995289900379, "grad_norm": 428.0, "learning_rate": 7.486542893125034e-05, "loss": 16.7503, "step": 8500 }, { "epoch": 0.3543412112875662, "grad_norm": 432.0, "learning_rate": 7.485957256773401e-05, "loss": 16.3752, "step": 8501 }, { "epoch": 0.3543828935850944, "grad_norm": 245.0, "learning_rate": 7.485371575114977e-05, "loss": 11.8131, "step": 8502 }, { "epoch": 0.35442457588262266, "grad_norm": 248.0, "learning_rate": 7.484785848160436e-05, "loss": 12.8752, "step": 8503 }, { "epoch": 0.3544662581801509, "grad_norm": 1640.0, "learning_rate": 7.484200075920451e-05, "loss": 32.7518, "step": 8504 }, { "epoch": 0.35450794047767914, "grad_norm": 544.0, "learning_rate": 7.4836142584057e-05, "loss": 16.5028, "step": 8505 }, { "epoch": 0.35454962277520735, "grad_norm": 63.75, "learning_rate": 7.483028395626859e-05, "loss": 6.6253, "step": 8506 }, { "epoch": 0.3545913050727356, "grad_norm": 47.25, "learning_rate": 7.482442487594604e-05, "loss": 7.6255, "step": 8507 }, { "epoch": 0.35463298737026383, "grad_norm": 628.0, "learning_rate": 7.481856534319614e-05, "loss": 20.0002, "step": 8508 }, { "epoch": 0.3546746696677921, "grad_norm": 576.0, "learning_rate": 7.48127053581257e-05, "loss": 17.5022, "step": 8509 }, { "epoch": 0.3547163519653203, "grad_norm": 308.0, "learning_rate": 7.480684492084148e-05, "loss": 13.0002, "step": 8510 }, { "epoch": 0.3547580342628486, "grad_norm": 328.0, "learning_rate": 7.480098403145033e-05, "loss": 13.6271, "step": 8511 }, { "epoch": 0.3547997165603768, "grad_norm": 270.0, "learning_rate": 7.479512269005904e-05, "loss": 13.3757, "step": 8512 }, { "epoch": 0.35484139885790505, "grad_norm": 179.0, "learning_rate": 7.478926089677443e-05, "loss": 10.0627, "step": 8513 }, { "epoch": 0.35488308115543327, "grad_norm": 764.0, "learning_rate": 7.478339865170334e-05, "loss": 23.0011, "step": 8514 }, { "epoch": 0.35492476345296153, "grad_norm": 466.0, "learning_rate": 7.477753595495261e-05, "loss": 15.8128, "step": 8515 }, { "epoch": 0.35496644575048975, "grad_norm": 274.0, "learning_rate": 7.477167280662909e-05, "loss": 12.2501, "step": 8516 }, { "epoch": 0.355008128048018, "grad_norm": 382.0, "learning_rate": 7.476580920683964e-05, "loss": 12.6276, "step": 8517 }, { "epoch": 0.3550498103455462, "grad_norm": 452.0, "learning_rate": 7.475994515569112e-05, "loss": 17.8755, "step": 8518 }, { "epoch": 0.3550914926430745, "grad_norm": 150.0, "learning_rate": 7.47540806532904e-05, "loss": 9.0002, "step": 8519 }, { "epoch": 0.3551331749406027, "grad_norm": 568.0, "learning_rate": 7.474821569974434e-05, "loss": 18.0001, "step": 8520 }, { "epoch": 0.35517485723813097, "grad_norm": 944.0, "learning_rate": 7.474235029515987e-05, "loss": 26.2511, "step": 8521 }, { "epoch": 0.3552165395356592, "grad_norm": 1080.0, "learning_rate": 7.473648443964387e-05, "loss": 26.7511, "step": 8522 }, { "epoch": 0.35525822183318745, "grad_norm": 492.0, "learning_rate": 7.473061813330325e-05, "loss": 12.8145, "step": 8523 }, { "epoch": 0.35529990413071566, "grad_norm": 732.0, "learning_rate": 7.472475137624491e-05, "loss": 21.5001, "step": 8524 }, { "epoch": 0.3553415864282439, "grad_norm": 69.5, "learning_rate": 7.471888416857578e-05, "loss": 8.1877, "step": 8525 }, { "epoch": 0.35538326872577214, "grad_norm": 474.0, "learning_rate": 7.47130165104028e-05, "loss": 17.1252, "step": 8526 }, { "epoch": 0.3554249510233004, "grad_norm": 164.0, "learning_rate": 7.470714840183291e-05, "loss": 10.3136, "step": 8527 }, { "epoch": 0.3554666333208286, "grad_norm": 115.5, "learning_rate": 7.470127984297303e-05, "loss": 9.252, "step": 8528 }, { "epoch": 0.3555083156183569, "grad_norm": 288.0, "learning_rate": 7.469541083393012e-05, "loss": 13.8127, "step": 8529 }, { "epoch": 0.3555499979158851, "grad_norm": 300.0, "learning_rate": 7.468954137481118e-05, "loss": 13.188, "step": 8530 }, { "epoch": 0.35559168021341336, "grad_norm": 712.0, "learning_rate": 7.468367146572315e-05, "loss": 20.0003, "step": 8531 }, { "epoch": 0.35563336251094163, "grad_norm": 352.0, "learning_rate": 7.467780110677302e-05, "loss": 12.5008, "step": 8532 }, { "epoch": 0.35567504480846984, "grad_norm": 284.0, "learning_rate": 7.467193029806775e-05, "loss": 12.5005, "step": 8533 }, { "epoch": 0.3557167271059981, "grad_norm": 106.5, "learning_rate": 7.466605903971438e-05, "loss": 10.813, "step": 8534 }, { "epoch": 0.3557584094035263, "grad_norm": 184.0, "learning_rate": 7.46601873318199e-05, "loss": 11.5002, "step": 8535 }, { "epoch": 0.3558000917010546, "grad_norm": 214.0, "learning_rate": 7.46543151744913e-05, "loss": 10.4377, "step": 8536 }, { "epoch": 0.3558417739985828, "grad_norm": 600.0, "learning_rate": 7.464844256783563e-05, "loss": 19.5009, "step": 8537 }, { "epoch": 0.35588345629611107, "grad_norm": 255.0, "learning_rate": 7.46425695119599e-05, "loss": 12.2501, "step": 8538 }, { "epoch": 0.3559251385936393, "grad_norm": 428.0, "learning_rate": 7.463669600697117e-05, "loss": 15.5627, "step": 8539 }, { "epoch": 0.35596682089116755, "grad_norm": 744.0, "learning_rate": 7.463082205297644e-05, "loss": 22.2503, "step": 8540 }, { "epoch": 0.35600850318869576, "grad_norm": 524.0, "learning_rate": 7.46249476500828e-05, "loss": 17.253, "step": 8541 }, { "epoch": 0.356050185486224, "grad_norm": 98.0, "learning_rate": 7.461907279839733e-05, "loss": 9.0632, "step": 8542 }, { "epoch": 0.35609186778375224, "grad_norm": 220.0, "learning_rate": 7.461319749802705e-05, "loss": 11.1252, "step": 8543 }, { "epoch": 0.3561335500812805, "grad_norm": 448.0, "learning_rate": 7.460732174907905e-05, "loss": 16.2506, "step": 8544 }, { "epoch": 0.3561752323788087, "grad_norm": 624.0, "learning_rate": 7.460144555166044e-05, "loss": 18.0004, "step": 8545 }, { "epoch": 0.356216914676337, "grad_norm": 199.0, "learning_rate": 7.459556890587828e-05, "loss": 11.1879, "step": 8546 }, { "epoch": 0.3562585969738652, "grad_norm": 100.5, "learning_rate": 7.458969181183972e-05, "loss": 8.4377, "step": 8547 }, { "epoch": 0.35630027927139346, "grad_norm": 73.0, "learning_rate": 7.458381426965184e-05, "loss": 7.7502, "step": 8548 }, { "epoch": 0.3563419615689217, "grad_norm": 262.0, "learning_rate": 7.457793627942176e-05, "loss": 13.2508, "step": 8549 }, { "epoch": 0.35638364386644994, "grad_norm": 1000.0, "learning_rate": 7.457205784125661e-05, "loss": 26.1254, "step": 8550 }, { "epoch": 0.35642532616397815, "grad_norm": 480.0, "learning_rate": 7.456617895526352e-05, "loss": 16.251, "step": 8551 }, { "epoch": 0.3564670084615064, "grad_norm": 456.0, "learning_rate": 7.456029962154965e-05, "loss": 16.2526, "step": 8552 }, { "epoch": 0.35650869075903463, "grad_norm": 54.75, "learning_rate": 7.455441984022214e-05, "loss": 6.0317, "step": 8553 }, { "epoch": 0.3565503730565629, "grad_norm": 516.0, "learning_rate": 7.454853961138813e-05, "loss": 16.7505, "step": 8554 }, { "epoch": 0.3565920553540911, "grad_norm": 696.0, "learning_rate": 7.454265893515482e-05, "loss": 20.2508, "step": 8555 }, { "epoch": 0.3566337376516194, "grad_norm": 302.0, "learning_rate": 7.453677781162936e-05, "loss": 13.4377, "step": 8556 }, { "epoch": 0.3566754199491476, "grad_norm": 179.0, "learning_rate": 7.453089624091896e-05, "loss": 11.5628, "step": 8557 }, { "epoch": 0.35671710224667585, "grad_norm": 280.0, "learning_rate": 7.452501422313078e-05, "loss": 11.6282, "step": 8558 }, { "epoch": 0.35675878454420407, "grad_norm": 258.0, "learning_rate": 7.451913175837206e-05, "loss": 12.9386, "step": 8559 }, { "epoch": 0.35680046684173233, "grad_norm": 332.0, "learning_rate": 7.451324884674998e-05, "loss": 15.6878, "step": 8560 }, { "epoch": 0.35684214913926054, "grad_norm": 412.0, "learning_rate": 7.450736548837176e-05, "loss": 16.1255, "step": 8561 }, { "epoch": 0.3568838314367888, "grad_norm": 482.0, "learning_rate": 7.450148168334462e-05, "loss": 17.0001, "step": 8562 }, { "epoch": 0.356925513734317, "grad_norm": 272.0, "learning_rate": 7.449559743177581e-05, "loss": 11.3763, "step": 8563 }, { "epoch": 0.3569671960318453, "grad_norm": 220.0, "learning_rate": 7.448971273377257e-05, "loss": 10.0634, "step": 8564 }, { "epoch": 0.3570088783293735, "grad_norm": 952.0, "learning_rate": 7.448382758944212e-05, "loss": 22.3766, "step": 8565 }, { "epoch": 0.35705056062690177, "grad_norm": 188.0, "learning_rate": 7.447794199889177e-05, "loss": 9.8128, "step": 8566 }, { "epoch": 0.35709224292443, "grad_norm": 100.0, "learning_rate": 7.447205596222872e-05, "loss": 8.4378, "step": 8567 }, { "epoch": 0.35713392522195825, "grad_norm": 120.5, "learning_rate": 7.446616947956031e-05, "loss": 7.844, "step": 8568 }, { "epoch": 0.35717560751948646, "grad_norm": 436.0, "learning_rate": 7.446028255099378e-05, "loss": 16.2501, "step": 8569 }, { "epoch": 0.3572172898170147, "grad_norm": 195.0, "learning_rate": 7.445439517663641e-05, "loss": 10.2503, "step": 8570 }, { "epoch": 0.35725897211454294, "grad_norm": 187.0, "learning_rate": 7.444850735659551e-05, "loss": 9.1879, "step": 8571 }, { "epoch": 0.3573006544120712, "grad_norm": 262.0, "learning_rate": 7.444261909097842e-05, "loss": 13.4379, "step": 8572 }, { "epoch": 0.3573423367095994, "grad_norm": 1496.0, "learning_rate": 7.443673037989243e-05, "loss": 27.3797, "step": 8573 }, { "epoch": 0.3573840190071277, "grad_norm": 253.0, "learning_rate": 7.443084122344486e-05, "loss": 13.8132, "step": 8574 }, { "epoch": 0.3574257013046559, "grad_norm": 372.0, "learning_rate": 7.442495162174303e-05, "loss": 14.0628, "step": 8575 }, { "epoch": 0.35746738360218416, "grad_norm": 148.0, "learning_rate": 7.441906157489428e-05, "loss": 11.0011, "step": 8576 }, { "epoch": 0.3575090658997124, "grad_norm": 352.0, "learning_rate": 7.441317108300598e-05, "loss": 11.6881, "step": 8577 }, { "epoch": 0.35755074819724064, "grad_norm": 332.0, "learning_rate": 7.440728014618546e-05, "loss": 13.6272, "step": 8578 }, { "epoch": 0.35759243049476885, "grad_norm": 234.0, "learning_rate": 7.440138876454009e-05, "loss": 12.0003, "step": 8579 }, { "epoch": 0.3576341127922971, "grad_norm": 788.0, "learning_rate": 7.439549693817726e-05, "loss": 19.2543, "step": 8580 }, { "epoch": 0.35767579508982533, "grad_norm": 360.0, "learning_rate": 7.438960466720431e-05, "loss": 15.3128, "step": 8581 }, { "epoch": 0.3577174773873536, "grad_norm": 436.0, "learning_rate": 7.438371195172867e-05, "loss": 16.2503, "step": 8582 }, { "epoch": 0.3577591596848818, "grad_norm": 206.0, "learning_rate": 7.437781879185772e-05, "loss": 8.2502, "step": 8583 }, { "epoch": 0.3578008419824101, "grad_norm": 338.0, "learning_rate": 7.437192518769882e-05, "loss": 13.8128, "step": 8584 }, { "epoch": 0.3578425242799383, "grad_norm": 1024.0, "learning_rate": 7.436603113935945e-05, "loss": 22.1297, "step": 8585 }, { "epoch": 0.35788420657746656, "grad_norm": 266.0, "learning_rate": 7.4360136646947e-05, "loss": 11.3757, "step": 8586 }, { "epoch": 0.35792588887499477, "grad_norm": 215.0, "learning_rate": 7.435424171056888e-05, "loss": 10.6895, "step": 8587 }, { "epoch": 0.35796757117252304, "grad_norm": 668.0, "learning_rate": 7.434834633033256e-05, "loss": 19.2512, "step": 8588 }, { "epoch": 0.35800925347005125, "grad_norm": 288.0, "learning_rate": 7.434245050634546e-05, "loss": 12.0004, "step": 8589 }, { "epoch": 0.3580509357675795, "grad_norm": 91.0, "learning_rate": 7.433655423871505e-05, "loss": 9.5002, "step": 8590 }, { "epoch": 0.3580926180651077, "grad_norm": 516.0, "learning_rate": 7.433065752754877e-05, "loss": 16.7502, "step": 8591 }, { "epoch": 0.358134300362636, "grad_norm": 172.0, "learning_rate": 7.43247603729541e-05, "loss": 11.1253, "step": 8592 }, { "epoch": 0.3581759826601642, "grad_norm": 132.0, "learning_rate": 7.43188627750385e-05, "loss": 9.3754, "step": 8593 }, { "epoch": 0.35821766495769247, "grad_norm": 163.0, "learning_rate": 7.431296473390947e-05, "loss": 9.8752, "step": 8594 }, { "epoch": 0.3582593472552207, "grad_norm": 696.0, "learning_rate": 7.430706624967452e-05, "loss": 20.8753, "step": 8595 }, { "epoch": 0.35830102955274895, "grad_norm": 920.0, "learning_rate": 7.43011673224411e-05, "loss": 20.3796, "step": 8596 }, { "epoch": 0.35834271185027716, "grad_norm": 418.0, "learning_rate": 7.429526795231677e-05, "loss": 15.0003, "step": 8597 }, { "epoch": 0.35838439414780543, "grad_norm": 95.5, "learning_rate": 7.4289368139409e-05, "loss": 8.9376, "step": 8598 }, { "epoch": 0.35842607644533364, "grad_norm": 664.0, "learning_rate": 7.428346788382536e-05, "loss": 19.7507, "step": 8599 }, { "epoch": 0.3584677587428619, "grad_norm": 225.0, "learning_rate": 7.427756718567335e-05, "loss": 12.1882, "step": 8600 }, { "epoch": 0.3585094410403901, "grad_norm": 188.0, "learning_rate": 7.427166604506056e-05, "loss": 10.6255, "step": 8601 }, { "epoch": 0.3585511233379184, "grad_norm": 130.0, "learning_rate": 7.426576446209445e-05, "loss": 9.2502, "step": 8602 }, { "epoch": 0.3585928056354466, "grad_norm": 366.0, "learning_rate": 7.425986243688266e-05, "loss": 13.9381, "step": 8603 }, { "epoch": 0.35863448793297487, "grad_norm": 245.0, "learning_rate": 7.425395996953272e-05, "loss": 13.3127, "step": 8604 }, { "epoch": 0.35867617023050313, "grad_norm": 340.0, "learning_rate": 7.42480570601522e-05, "loss": 13.6252, "step": 8605 }, { "epoch": 0.35871785252803134, "grad_norm": 196.0, "learning_rate": 7.42421537088487e-05, "loss": 10.0632, "step": 8606 }, { "epoch": 0.3587595348255596, "grad_norm": 63.75, "learning_rate": 7.423624991572977e-05, "loss": 7.2212, "step": 8607 }, { "epoch": 0.3588012171230878, "grad_norm": 478.0, "learning_rate": 7.423034568090306e-05, "loss": 18.0002, "step": 8608 }, { "epoch": 0.3588428994206161, "grad_norm": 1480.0, "learning_rate": 7.422444100447613e-05, "loss": 36.0005, "step": 8609 }, { "epoch": 0.3588845817181443, "grad_norm": 632.0, "learning_rate": 7.421853588655662e-05, "loss": 20.6254, "step": 8610 }, { "epoch": 0.35892626401567257, "grad_norm": 153.0, "learning_rate": 7.421263032725215e-05, "loss": 10.3753, "step": 8611 }, { "epoch": 0.3589679463132008, "grad_norm": 207.0, "learning_rate": 7.420672432667033e-05, "loss": 11.0005, "step": 8612 }, { "epoch": 0.35900962861072905, "grad_norm": 2208.0, "learning_rate": 7.420081788491882e-05, "loss": 41.7503, "step": 8613 }, { "epoch": 0.35905131090825726, "grad_norm": 976.0, "learning_rate": 7.419491100210524e-05, "loss": 20.6301, "step": 8614 }, { "epoch": 0.3590929932057855, "grad_norm": 338.0, "learning_rate": 7.418900367833729e-05, "loss": 14.0627, "step": 8615 }, { "epoch": 0.35913467550331374, "grad_norm": 129.0, "learning_rate": 7.418309591372259e-05, "loss": 9.8752, "step": 8616 }, { "epoch": 0.359176357800842, "grad_norm": 239.0, "learning_rate": 7.41771877083688e-05, "loss": 12.2506, "step": 8617 }, { "epoch": 0.3592180400983702, "grad_norm": 224.0, "learning_rate": 7.417127906238364e-05, "loss": 11.6252, "step": 8618 }, { "epoch": 0.3592597223958985, "grad_norm": 256.0, "learning_rate": 7.416536997587477e-05, "loss": 10.5005, "step": 8619 }, { "epoch": 0.3593014046934267, "grad_norm": 235.0, "learning_rate": 7.415946044894988e-05, "loss": 10.9378, "step": 8620 }, { "epoch": 0.35934308699095496, "grad_norm": 540.0, "learning_rate": 7.415355048171665e-05, "loss": 18.1296, "step": 8621 }, { "epoch": 0.3593847692884832, "grad_norm": 282.0, "learning_rate": 7.414764007428286e-05, "loss": 12.751, "step": 8622 }, { "epoch": 0.35942645158601144, "grad_norm": 708.0, "learning_rate": 7.414172922675618e-05, "loss": 21.7502, "step": 8623 }, { "epoch": 0.35946813388353965, "grad_norm": 466.0, "learning_rate": 7.413581793924434e-05, "loss": 16.7502, "step": 8624 }, { "epoch": 0.3595098161810679, "grad_norm": 442.0, "learning_rate": 7.412990621185508e-05, "loss": 15.8755, "step": 8625 }, { "epoch": 0.35955149847859613, "grad_norm": 93.0, "learning_rate": 7.412399404469612e-05, "loss": 9.2503, "step": 8626 }, { "epoch": 0.3595931807761244, "grad_norm": 358.0, "learning_rate": 7.411808143787523e-05, "loss": 15.0012, "step": 8627 }, { "epoch": 0.3596348630736526, "grad_norm": 292.0, "learning_rate": 7.411216839150016e-05, "loss": 11.5647, "step": 8628 }, { "epoch": 0.3596765453711809, "grad_norm": 198.0, "learning_rate": 7.41062549056787e-05, "loss": 11.2505, "step": 8629 }, { "epoch": 0.3597182276687091, "grad_norm": 532.0, "learning_rate": 7.41003409805186e-05, "loss": 18.6253, "step": 8630 }, { "epoch": 0.35975990996623736, "grad_norm": 552.0, "learning_rate": 7.409442661612764e-05, "loss": 16.8753, "step": 8631 }, { "epoch": 0.35980159226376557, "grad_norm": 596.0, "learning_rate": 7.408851181261363e-05, "loss": 19.1252, "step": 8632 }, { "epoch": 0.35984327456129384, "grad_norm": 644.0, "learning_rate": 7.408259657008433e-05, "loss": 19.2505, "step": 8633 }, { "epoch": 0.35988495685882205, "grad_norm": 370.0, "learning_rate": 7.407668088864757e-05, "loss": 15.0629, "step": 8634 }, { "epoch": 0.3599266391563503, "grad_norm": 406.0, "learning_rate": 7.407076476841118e-05, "loss": 15.8126, "step": 8635 }, { "epoch": 0.3599683214538785, "grad_norm": 243.0, "learning_rate": 7.406484820948296e-05, "loss": 11.7508, "step": 8636 }, { "epoch": 0.3600100037514068, "grad_norm": 264.0, "learning_rate": 7.405893121197075e-05, "loss": 11.9382, "step": 8637 }, { "epoch": 0.360051686048935, "grad_norm": 186.0, "learning_rate": 7.405301377598237e-05, "loss": 11.0004, "step": 8638 }, { "epoch": 0.36009336834646327, "grad_norm": 157.0, "learning_rate": 7.40470959016257e-05, "loss": 10.7508, "step": 8639 }, { "epoch": 0.3601350506439915, "grad_norm": 398.0, "learning_rate": 7.404117758900855e-05, "loss": 12.5635, "step": 8640 }, { "epoch": 0.36017673294151975, "grad_norm": 504.0, "learning_rate": 7.403525883823883e-05, "loss": 15.6259, "step": 8641 }, { "epoch": 0.36021841523904796, "grad_norm": 436.0, "learning_rate": 7.402933964942435e-05, "loss": 17.1253, "step": 8642 }, { "epoch": 0.36026009753657623, "grad_norm": 576.0, "learning_rate": 7.402342002267303e-05, "loss": 18.8759, "step": 8643 }, { "epoch": 0.36030177983410444, "grad_norm": 564.0, "learning_rate": 7.401749995809277e-05, "loss": 18.7502, "step": 8644 }, { "epoch": 0.3603434621316327, "grad_norm": 300.0, "learning_rate": 7.401157945579142e-05, "loss": 13.5629, "step": 8645 }, { "epoch": 0.3603851444291609, "grad_norm": 494.0, "learning_rate": 7.400565851587691e-05, "loss": 15.7507, "step": 8646 }, { "epoch": 0.3604268267266892, "grad_norm": 161.0, "learning_rate": 7.399973713845713e-05, "loss": 11.1253, "step": 8647 }, { "epoch": 0.3604685090242174, "grad_norm": 362.0, "learning_rate": 7.399381532364003e-05, "loss": 14.3752, "step": 8648 }, { "epoch": 0.36051019132174567, "grad_norm": 290.0, "learning_rate": 7.398789307153351e-05, "loss": 13.2503, "step": 8649 }, { "epoch": 0.3605518736192739, "grad_norm": 216.0, "learning_rate": 7.398197038224551e-05, "loss": 11.6878, "step": 8650 }, { "epoch": 0.36059355591680214, "grad_norm": 270.0, "learning_rate": 7.397604725588398e-05, "loss": 12.8755, "step": 8651 }, { "epoch": 0.36063523821433036, "grad_norm": 496.0, "learning_rate": 7.397012369255685e-05, "loss": 15.1879, "step": 8652 }, { "epoch": 0.3606769205118586, "grad_norm": 468.0, "learning_rate": 7.396419969237208e-05, "loss": 14.8753, "step": 8653 }, { "epoch": 0.36071860280938683, "grad_norm": 588.0, "learning_rate": 7.395827525543766e-05, "loss": 18.7502, "step": 8654 }, { "epoch": 0.3607602851069151, "grad_norm": 506.0, "learning_rate": 7.395235038186152e-05, "loss": 17.5019, "step": 8655 }, { "epoch": 0.3608019674044433, "grad_norm": 272.0, "learning_rate": 7.39464250717517e-05, "loss": 12.0628, "step": 8656 }, { "epoch": 0.3608436497019716, "grad_norm": 346.0, "learning_rate": 7.394049932521614e-05, "loss": 13.9379, "step": 8657 }, { "epoch": 0.3608853319994998, "grad_norm": 358.0, "learning_rate": 7.393457314236285e-05, "loss": 14.0646, "step": 8658 }, { "epoch": 0.36092701429702806, "grad_norm": 448.0, "learning_rate": 7.392864652329984e-05, "loss": 15.0002, "step": 8659 }, { "epoch": 0.36096869659455627, "grad_norm": 506.0, "learning_rate": 7.392271946813514e-05, "loss": 15.5633, "step": 8660 }, { "epoch": 0.36101037889208454, "grad_norm": 528.0, "learning_rate": 7.391679197697674e-05, "loss": 17.5002, "step": 8661 }, { "epoch": 0.36105206118961275, "grad_norm": 272.0, "learning_rate": 7.391086404993268e-05, "loss": 12.5666, "step": 8662 }, { "epoch": 0.361093743487141, "grad_norm": 490.0, "learning_rate": 7.3904935687111e-05, "loss": 16.6254, "step": 8663 }, { "epoch": 0.36113542578466923, "grad_norm": 452.0, "learning_rate": 7.389900688861977e-05, "loss": 15.5627, "step": 8664 }, { "epoch": 0.3611771080821975, "grad_norm": 312.0, "learning_rate": 7.389307765456699e-05, "loss": 13.3128, "step": 8665 }, { "epoch": 0.3612187903797257, "grad_norm": 88.5, "learning_rate": 7.388714798506076e-05, "loss": 8.0631, "step": 8666 }, { "epoch": 0.361260472677254, "grad_norm": 151.0, "learning_rate": 7.388121788020915e-05, "loss": 9.7503, "step": 8667 }, { "epoch": 0.3613021549747822, "grad_norm": 96.0, "learning_rate": 7.38752873401202e-05, "loss": 10.3753, "step": 8668 }, { "epoch": 0.36134383727231045, "grad_norm": 444.0, "learning_rate": 7.386935636490202e-05, "loss": 15.6252, "step": 8669 }, { "epoch": 0.36138551956983866, "grad_norm": 458.0, "learning_rate": 7.386342495466272e-05, "loss": 15.1255, "step": 8670 }, { "epoch": 0.36142720186736693, "grad_norm": 234.0, "learning_rate": 7.385749310951037e-05, "loss": 13.5009, "step": 8671 }, { "epoch": 0.36146888416489514, "grad_norm": 452.0, "learning_rate": 7.385156082955308e-05, "loss": 16.6261, "step": 8672 }, { "epoch": 0.3615105664624234, "grad_norm": 163.0, "learning_rate": 7.3845628114899e-05, "loss": 9.8126, "step": 8673 }, { "epoch": 0.3615522487599516, "grad_norm": 804.0, "learning_rate": 7.383969496565621e-05, "loss": 20.8776, "step": 8674 }, { "epoch": 0.3615939310574799, "grad_norm": 89.5, "learning_rate": 7.383376138193288e-05, "loss": 8.7503, "step": 8675 }, { "epoch": 0.3616356133550081, "grad_norm": 426.0, "learning_rate": 7.38278273638371e-05, "loss": 15.1258, "step": 8676 }, { "epoch": 0.36167729565253637, "grad_norm": 752.0, "learning_rate": 7.38218929114771e-05, "loss": 20.2515, "step": 8677 }, { "epoch": 0.36171897795006464, "grad_norm": 174.0, "learning_rate": 7.381595802496095e-05, "loss": 10.376, "step": 8678 }, { "epoch": 0.36176066024759285, "grad_norm": 430.0, "learning_rate": 7.381002270439687e-05, "loss": 15.5031, "step": 8679 }, { "epoch": 0.3618023425451211, "grad_norm": 168.0, "learning_rate": 7.3804086949893e-05, "loss": 11.2504, "step": 8680 }, { "epoch": 0.3618440248426493, "grad_norm": 652.0, "learning_rate": 7.379815076155755e-05, "loss": 18.2547, "step": 8681 }, { "epoch": 0.3618857071401776, "grad_norm": 226.0, "learning_rate": 7.379221413949867e-05, "loss": 11.1256, "step": 8682 }, { "epoch": 0.3619273894377058, "grad_norm": 316.0, "learning_rate": 7.37862770838246e-05, "loss": 13.5665, "step": 8683 }, { "epoch": 0.36196907173523407, "grad_norm": 234.0, "learning_rate": 7.378033959464348e-05, "loss": 12.2503, "step": 8684 }, { "epoch": 0.3620107540327623, "grad_norm": 324.0, "learning_rate": 7.377440167206359e-05, "loss": 13.8757, "step": 8685 }, { "epoch": 0.36205243633029055, "grad_norm": 380.0, "learning_rate": 7.37684633161931e-05, "loss": 13.3755, "step": 8686 }, { "epoch": 0.36209411862781876, "grad_norm": 286.0, "learning_rate": 7.376252452714027e-05, "loss": 11.5002, "step": 8687 }, { "epoch": 0.36213580092534703, "grad_norm": 576.0, "learning_rate": 7.375658530501332e-05, "loss": 19.3752, "step": 8688 }, { "epoch": 0.36217748322287524, "grad_norm": 40.25, "learning_rate": 7.37506456499205e-05, "loss": 7.0945, "step": 8689 }, { "epoch": 0.3622191655204035, "grad_norm": 262.0, "learning_rate": 7.374470556197005e-05, "loss": 11.876, "step": 8690 }, { "epoch": 0.3622608478179317, "grad_norm": 824.0, "learning_rate": 7.373876504127022e-05, "loss": 21.3785, "step": 8691 }, { "epoch": 0.36230253011546, "grad_norm": 119.0, "learning_rate": 7.37328240879293e-05, "loss": 10.063, "step": 8692 }, { "epoch": 0.3623442124129882, "grad_norm": 172.0, "learning_rate": 7.372688270205555e-05, "loss": 10.0627, "step": 8693 }, { "epoch": 0.36238589471051647, "grad_norm": 380.0, "learning_rate": 7.372094088375728e-05, "loss": 12.44, "step": 8694 }, { "epoch": 0.3624275770080447, "grad_norm": 278.0, "learning_rate": 7.371499863314272e-05, "loss": 13.3752, "step": 8695 }, { "epoch": 0.36246925930557294, "grad_norm": 386.0, "learning_rate": 7.370905595032023e-05, "loss": 14.6253, "step": 8696 }, { "epoch": 0.36251094160310116, "grad_norm": 258.0, "learning_rate": 7.370311283539807e-05, "loss": 12.8127, "step": 8697 }, { "epoch": 0.3625526239006294, "grad_norm": 408.0, "learning_rate": 7.369716928848458e-05, "loss": 14.1882, "step": 8698 }, { "epoch": 0.36259430619815763, "grad_norm": 406.0, "learning_rate": 7.369122530968809e-05, "loss": 14.1252, "step": 8699 }, { "epoch": 0.3626359884956859, "grad_norm": 380.0, "learning_rate": 7.36852808991169e-05, "loss": 14.6253, "step": 8700 }, { "epoch": 0.3626776707932141, "grad_norm": 374.0, "learning_rate": 7.367933605687938e-05, "loss": 14.6877, "step": 8701 }, { "epoch": 0.3627193530907424, "grad_norm": 278.0, "learning_rate": 7.367339078308385e-05, "loss": 12.7503, "step": 8702 }, { "epoch": 0.3627610353882706, "grad_norm": 456.0, "learning_rate": 7.366744507783866e-05, "loss": 15.3758, "step": 8703 }, { "epoch": 0.36280271768579886, "grad_norm": 430.0, "learning_rate": 7.36614989412522e-05, "loss": 16.0021, "step": 8704 }, { "epoch": 0.36284439998332707, "grad_norm": 680.0, "learning_rate": 7.365555237343283e-05, "loss": 20.0003, "step": 8705 }, { "epoch": 0.36288608228085534, "grad_norm": 282.0, "learning_rate": 7.36496053744889e-05, "loss": 13.2505, "step": 8706 }, { "epoch": 0.36292776457838355, "grad_norm": 294.0, "learning_rate": 7.364365794452881e-05, "loss": 9.3768, "step": 8707 }, { "epoch": 0.3629694468759118, "grad_norm": 278.0, "learning_rate": 7.363771008366097e-05, "loss": 12.8758, "step": 8708 }, { "epoch": 0.36301112917344003, "grad_norm": 227.0, "learning_rate": 7.363176179199373e-05, "loss": 11.9379, "step": 8709 }, { "epoch": 0.3630528114709683, "grad_norm": 700.0, "learning_rate": 7.362581306963556e-05, "loss": 19.2501, "step": 8710 }, { "epoch": 0.3630944937684965, "grad_norm": 348.0, "learning_rate": 7.361986391669486e-05, "loss": 13.8129, "step": 8711 }, { "epoch": 0.3631361760660248, "grad_norm": 215.0, "learning_rate": 7.361391433328002e-05, "loss": 11.188, "step": 8712 }, { "epoch": 0.363177858363553, "grad_norm": 183.0, "learning_rate": 7.360796431949951e-05, "loss": 11.2503, "step": 8713 }, { "epoch": 0.36321954066108125, "grad_norm": 248.0, "learning_rate": 7.360201387546175e-05, "loss": 12.4377, "step": 8714 }, { "epoch": 0.36326122295860946, "grad_norm": 250.0, "learning_rate": 7.35960630012752e-05, "loss": 9.8136, "step": 8715 }, { "epoch": 0.36330290525613773, "grad_norm": 812.0, "learning_rate": 7.35901116970483e-05, "loss": 21.8754, "step": 8716 }, { "epoch": 0.36334458755366594, "grad_norm": 306.0, "learning_rate": 7.358415996288952e-05, "loss": 13.8781, "step": 8717 }, { "epoch": 0.3633862698511942, "grad_norm": 744.0, "learning_rate": 7.357820779890733e-05, "loss": 21.1252, "step": 8718 }, { "epoch": 0.3634279521487224, "grad_norm": 340.0, "learning_rate": 7.357225520521022e-05, "loss": 12.938, "step": 8719 }, { "epoch": 0.3634696344462507, "grad_norm": 348.0, "learning_rate": 7.356630218190666e-05, "loss": 15.2503, "step": 8720 }, { "epoch": 0.3635113167437789, "grad_norm": 212.0, "learning_rate": 7.356034872910514e-05, "loss": 11.5632, "step": 8721 }, { "epoch": 0.36355299904130717, "grad_norm": 330.0, "learning_rate": 7.355439484691418e-05, "loss": 14.3767, "step": 8722 }, { "epoch": 0.3635946813388354, "grad_norm": 932.0, "learning_rate": 7.354844053544228e-05, "loss": 21.0054, "step": 8723 }, { "epoch": 0.36363636363636365, "grad_norm": 207.0, "learning_rate": 7.354248579479797e-05, "loss": 11.6252, "step": 8724 }, { "epoch": 0.36367804593389186, "grad_norm": 96.5, "learning_rate": 7.353653062508976e-05, "loss": 8.1878, "step": 8725 }, { "epoch": 0.3637197282314201, "grad_norm": 852.0, "learning_rate": 7.353057502642618e-05, "loss": 23.7504, "step": 8726 }, { "epoch": 0.36376141052894834, "grad_norm": 454.0, "learning_rate": 7.35246189989158e-05, "loss": 16.5003, "step": 8727 }, { "epoch": 0.3638030928264766, "grad_norm": 584.0, "learning_rate": 7.351866254266716e-05, "loss": 18.6254, "step": 8728 }, { "epoch": 0.3638447751240048, "grad_norm": 185.0, "learning_rate": 7.351270565778878e-05, "loss": 10.1897, "step": 8729 }, { "epoch": 0.3638864574215331, "grad_norm": 436.0, "learning_rate": 7.350674834438925e-05, "loss": 15.1889, "step": 8730 }, { "epoch": 0.3639281397190613, "grad_norm": 86.0, "learning_rate": 7.350079060257716e-05, "loss": 8.1878, "step": 8731 }, { "epoch": 0.36396982201658956, "grad_norm": 158.0, "learning_rate": 7.349483243246108e-05, "loss": 10.2505, "step": 8732 }, { "epoch": 0.3640115043141178, "grad_norm": 97.5, "learning_rate": 7.348887383414958e-05, "loss": 7.969, "step": 8733 }, { "epoch": 0.36405318661164604, "grad_norm": 296.0, "learning_rate": 7.348291480775128e-05, "loss": 12.8127, "step": 8734 }, { "epoch": 0.36409486890917425, "grad_norm": 604.0, "learning_rate": 7.347695535337475e-05, "loss": 16.7512, "step": 8735 }, { "epoch": 0.3641365512067025, "grad_norm": 1632.0, "learning_rate": 7.347099547112865e-05, "loss": 35.5003, "step": 8736 }, { "epoch": 0.36417823350423073, "grad_norm": 167.0, "learning_rate": 7.346503516112156e-05, "loss": 10.4382, "step": 8737 }, { "epoch": 0.364219915801759, "grad_norm": 352.0, "learning_rate": 7.345907442346213e-05, "loss": 13.5629, "step": 8738 }, { "epoch": 0.3642615980992872, "grad_norm": 358.0, "learning_rate": 7.345311325825899e-05, "loss": 14.5627, "step": 8739 }, { "epoch": 0.3643032803968155, "grad_norm": 368.0, "learning_rate": 7.344715166562077e-05, "loss": 13.9386, "step": 8740 }, { "epoch": 0.3643449626943437, "grad_norm": 438.0, "learning_rate": 7.344118964565614e-05, "loss": 16.3754, "step": 8741 }, { "epoch": 0.36438664499187196, "grad_norm": 436.0, "learning_rate": 7.343522719847373e-05, "loss": 15.1878, "step": 8742 }, { "epoch": 0.36442832728940017, "grad_norm": 102.0, "learning_rate": 7.342926432418223e-05, "loss": 8.9377, "step": 8743 }, { "epoch": 0.36447000958692843, "grad_norm": 207.0, "learning_rate": 7.342330102289032e-05, "loss": 10.6251, "step": 8744 }, { "epoch": 0.36451169188445665, "grad_norm": 280.0, "learning_rate": 7.341733729470666e-05, "loss": 13.1899, "step": 8745 }, { "epoch": 0.3645533741819849, "grad_norm": 548.0, "learning_rate": 7.341137313973995e-05, "loss": 18.0005, "step": 8746 }, { "epoch": 0.3645950564795131, "grad_norm": 474.0, "learning_rate": 7.340540855809887e-05, "loss": 16.7503, "step": 8747 }, { "epoch": 0.3646367387770414, "grad_norm": 452.0, "learning_rate": 7.339944354989216e-05, "loss": 16.3755, "step": 8748 }, { "epoch": 0.3646784210745696, "grad_norm": 1056.0, "learning_rate": 7.339347811522851e-05, "loss": 28.8754, "step": 8749 }, { "epoch": 0.36472010337209787, "grad_norm": 356.0, "learning_rate": 7.338751225421665e-05, "loss": 13.314, "step": 8750 }, { "epoch": 0.36476178566962614, "grad_norm": 378.0, "learning_rate": 7.33815459669653e-05, "loss": 15.1876, "step": 8751 }, { "epoch": 0.36480346796715435, "grad_norm": 118.0, "learning_rate": 7.33755792535832e-05, "loss": 7.8439, "step": 8752 }, { "epoch": 0.3648451502646826, "grad_norm": 508.0, "learning_rate": 7.33696121141791e-05, "loss": 18.1251, "step": 8753 }, { "epoch": 0.3648868325622108, "grad_norm": 350.0, "learning_rate": 7.336364454886173e-05, "loss": 14.3127, "step": 8754 }, { "epoch": 0.3649285148597391, "grad_norm": 412.0, "learning_rate": 7.335767655773987e-05, "loss": 14.9377, "step": 8755 }, { "epoch": 0.3649701971572673, "grad_norm": 376.0, "learning_rate": 7.335170814092228e-05, "loss": 13.7502, "step": 8756 }, { "epoch": 0.3650118794547956, "grad_norm": 290.0, "learning_rate": 7.334573929851773e-05, "loss": 11.8753, "step": 8757 }, { "epoch": 0.3650535617523238, "grad_norm": 368.0, "learning_rate": 7.333977003063502e-05, "loss": 14.8753, "step": 8758 }, { "epoch": 0.36509524404985205, "grad_norm": 528.0, "learning_rate": 7.333380033738293e-05, "loss": 18.0021, "step": 8759 }, { "epoch": 0.36513692634738026, "grad_norm": 348.0, "learning_rate": 7.332783021887024e-05, "loss": 15.4378, "step": 8760 }, { "epoch": 0.36517860864490853, "grad_norm": 988.0, "learning_rate": 7.332185967520579e-05, "loss": 23.0002, "step": 8761 }, { "epoch": 0.36522029094243674, "grad_norm": 314.0, "learning_rate": 7.331588870649836e-05, "loss": 13.6256, "step": 8762 }, { "epoch": 0.365261973239965, "grad_norm": 470.0, "learning_rate": 7.330991731285682e-05, "loss": 17.7513, "step": 8763 }, { "epoch": 0.3653036555374932, "grad_norm": 300.0, "learning_rate": 7.330394549438995e-05, "loss": 11.2502, "step": 8764 }, { "epoch": 0.3653453378350215, "grad_norm": 156.0, "learning_rate": 7.32979732512066e-05, "loss": 11.5002, "step": 8765 }, { "epoch": 0.3653870201325497, "grad_norm": 284.0, "learning_rate": 7.329200058341562e-05, "loss": 13.1254, "step": 8766 }, { "epoch": 0.36542870243007797, "grad_norm": 418.0, "learning_rate": 7.328602749112587e-05, "loss": 15.2502, "step": 8767 }, { "epoch": 0.3654703847276062, "grad_norm": 338.0, "learning_rate": 7.32800539744462e-05, "loss": 14.7502, "step": 8768 }, { "epoch": 0.36551206702513445, "grad_norm": 370.0, "learning_rate": 7.327408003348547e-05, "loss": 12.9377, "step": 8769 }, { "epoch": 0.36555374932266266, "grad_norm": 1008.0, "learning_rate": 7.326810566835258e-05, "loss": 25.1256, "step": 8770 }, { "epoch": 0.3655954316201909, "grad_norm": 556.0, "learning_rate": 7.32621308791564e-05, "loss": 18.0036, "step": 8771 }, { "epoch": 0.36563711391771914, "grad_norm": 318.0, "learning_rate": 7.325615566600579e-05, "loss": 13.1877, "step": 8772 }, { "epoch": 0.3656787962152474, "grad_norm": 280.0, "learning_rate": 7.325018002900969e-05, "loss": 12.7504, "step": 8773 }, { "epoch": 0.3657204785127756, "grad_norm": 748.0, "learning_rate": 7.324420396827702e-05, "loss": 23.5001, "step": 8774 }, { "epoch": 0.3657621608103039, "grad_norm": 368.0, "learning_rate": 7.323822748391664e-05, "loss": 15.8129, "step": 8775 }, { "epoch": 0.3658038431078321, "grad_norm": 1360.0, "learning_rate": 7.32322505760375e-05, "loss": 34.5005, "step": 8776 }, { "epoch": 0.36584552540536036, "grad_norm": 432.0, "learning_rate": 7.322627324474856e-05, "loss": 14.6262, "step": 8777 }, { "epoch": 0.3658872077028886, "grad_norm": 472.0, "learning_rate": 7.32202954901587e-05, "loss": 17.5008, "step": 8778 }, { "epoch": 0.36592889000041684, "grad_norm": 272.0, "learning_rate": 7.321431731237692e-05, "loss": 12.563, "step": 8779 }, { "epoch": 0.36597057229794505, "grad_norm": 384.0, "learning_rate": 7.320833871151212e-05, "loss": 14.0633, "step": 8780 }, { "epoch": 0.3660122545954733, "grad_norm": 490.0, "learning_rate": 7.32023596876733e-05, "loss": 17.0001, "step": 8781 }, { "epoch": 0.36605393689300153, "grad_norm": 245.0, "learning_rate": 7.319638024096942e-05, "loss": 11.5002, "step": 8782 }, { "epoch": 0.3660956191905298, "grad_norm": 159.0, "learning_rate": 7.319040037150945e-05, "loss": 9.8134, "step": 8783 }, { "epoch": 0.366137301488058, "grad_norm": 548.0, "learning_rate": 7.318442007940236e-05, "loss": 19.0005, "step": 8784 }, { "epoch": 0.3661789837855863, "grad_norm": 222.0, "learning_rate": 7.317843936475717e-05, "loss": 12.0634, "step": 8785 }, { "epoch": 0.3662206660831145, "grad_norm": 100.5, "learning_rate": 7.317245822768286e-05, "loss": 8.6265, "step": 8786 }, { "epoch": 0.36626234838064275, "grad_norm": 764.0, "learning_rate": 7.316647666828844e-05, "loss": 22.1256, "step": 8787 }, { "epoch": 0.36630403067817097, "grad_norm": 203.0, "learning_rate": 7.316049468668292e-05, "loss": 11.9379, "step": 8788 }, { "epoch": 0.36634571297569923, "grad_norm": 284.0, "learning_rate": 7.315451228297534e-05, "loss": 10.6879, "step": 8789 }, { "epoch": 0.36638739527322745, "grad_norm": 264.0, "learning_rate": 7.314852945727472e-05, "loss": 12.5006, "step": 8790 }, { "epoch": 0.3664290775707557, "grad_norm": 412.0, "learning_rate": 7.314254620969009e-05, "loss": 15.3129, "step": 8791 }, { "epoch": 0.3664707598682839, "grad_norm": 568.0, "learning_rate": 7.31365625403305e-05, "loss": 16.3786, "step": 8792 }, { "epoch": 0.3665124421658122, "grad_norm": 174.0, "learning_rate": 7.313057844930502e-05, "loss": 10.3752, "step": 8793 }, { "epoch": 0.3665541244633404, "grad_norm": 300.0, "learning_rate": 7.312459393672267e-05, "loss": 14.2508, "step": 8794 }, { "epoch": 0.36659580676086867, "grad_norm": 245.0, "learning_rate": 7.311860900269255e-05, "loss": 12.5627, "step": 8795 }, { "epoch": 0.3666374890583969, "grad_norm": 812.0, "learning_rate": 7.311262364732373e-05, "loss": 20.2515, "step": 8796 }, { "epoch": 0.36667917135592515, "grad_norm": 178.0, "learning_rate": 7.310663787072529e-05, "loss": 9.1252, "step": 8797 }, { "epoch": 0.36672085365345336, "grad_norm": 234.0, "learning_rate": 7.310065167300633e-05, "loss": 8.9388, "step": 8798 }, { "epoch": 0.3667625359509816, "grad_norm": 648.0, "learning_rate": 7.309466505427595e-05, "loss": 19.2504, "step": 8799 }, { "epoch": 0.36680421824850984, "grad_norm": 1104.0, "learning_rate": 7.308867801464322e-05, "loss": 29.3777, "step": 8800 }, { "epoch": 0.3668459005460381, "grad_norm": 266.0, "learning_rate": 7.30826905542173e-05, "loss": 13.438, "step": 8801 }, { "epoch": 0.3668875828435663, "grad_norm": 156.0, "learning_rate": 7.30767026731073e-05, "loss": 10.3753, "step": 8802 }, { "epoch": 0.3669292651410946, "grad_norm": 133.0, "learning_rate": 7.307071437142234e-05, "loss": 10.064, "step": 8803 }, { "epoch": 0.3669709474386228, "grad_norm": 1336.0, "learning_rate": 7.306472564927156e-05, "loss": 34.0002, "step": 8804 }, { "epoch": 0.36701262973615106, "grad_norm": 952.0, "learning_rate": 7.305873650676412e-05, "loss": 26.7502, "step": 8805 }, { "epoch": 0.3670543120336793, "grad_norm": 498.0, "learning_rate": 7.305274694400916e-05, "loss": 16.3754, "step": 8806 }, { "epoch": 0.36709599433120754, "grad_norm": 204.0, "learning_rate": 7.304675696111584e-05, "loss": 11.4377, "step": 8807 }, { "epoch": 0.36713767662873575, "grad_norm": 288.0, "learning_rate": 7.304076655819332e-05, "loss": 10.8755, "step": 8808 }, { "epoch": 0.367179358926264, "grad_norm": 544.0, "learning_rate": 7.303477573535078e-05, "loss": 18.2504, "step": 8809 }, { "epoch": 0.36722104122379223, "grad_norm": 560.0, "learning_rate": 7.302878449269741e-05, "loss": 18.7509, "step": 8810 }, { "epoch": 0.3672627235213205, "grad_norm": 210.0, "learning_rate": 7.302279283034241e-05, "loss": 5.2505, "step": 8811 }, { "epoch": 0.3673044058188487, "grad_norm": 330.0, "learning_rate": 7.301680074839497e-05, "loss": 14.8754, "step": 8812 }, { "epoch": 0.367346088116377, "grad_norm": 326.0, "learning_rate": 7.301080824696425e-05, "loss": 13.5006, "step": 8813 }, { "epoch": 0.3673877704139052, "grad_norm": 466.0, "learning_rate": 7.300481532615955e-05, "loss": 16.0003, "step": 8814 }, { "epoch": 0.36742945271143346, "grad_norm": 74.5, "learning_rate": 7.299882198609004e-05, "loss": 8.5629, "step": 8815 }, { "epoch": 0.36747113500896167, "grad_norm": 544.0, "learning_rate": 7.299282822686495e-05, "loss": 19.6253, "step": 8816 }, { "epoch": 0.36751281730648994, "grad_norm": 416.0, "learning_rate": 7.298683404859353e-05, "loss": 14.5632, "step": 8817 }, { "epoch": 0.36755449960401815, "grad_norm": 328.0, "learning_rate": 7.298083945138501e-05, "loss": 12.2505, "step": 8818 }, { "epoch": 0.3675961819015464, "grad_norm": 632.0, "learning_rate": 7.297484443534868e-05, "loss": 19.8753, "step": 8819 }, { "epoch": 0.3676378641990746, "grad_norm": 328.0, "learning_rate": 7.296884900059374e-05, "loss": 14.1253, "step": 8820 }, { "epoch": 0.3676795464966029, "grad_norm": 328.0, "learning_rate": 7.296285314722951e-05, "loss": 13.8137, "step": 8821 }, { "epoch": 0.3677212287941311, "grad_norm": 107.0, "learning_rate": 7.295685687536522e-05, "loss": 8.8754, "step": 8822 }, { "epoch": 0.3677629110916594, "grad_norm": 500.0, "learning_rate": 7.295086018511019e-05, "loss": 16.5006, "step": 8823 }, { "epoch": 0.36780459338918764, "grad_norm": 300.0, "learning_rate": 7.29448630765737e-05, "loss": 13.6291, "step": 8824 }, { "epoch": 0.36784627568671585, "grad_norm": 370.0, "learning_rate": 7.293886554986502e-05, "loss": 15.1877, "step": 8825 }, { "epoch": 0.3678879579842441, "grad_norm": 240.0, "learning_rate": 7.29328676050935e-05, "loss": 12.0629, "step": 8826 }, { "epoch": 0.36792964028177233, "grad_norm": 258.0, "learning_rate": 7.292686924236842e-05, "loss": 11.8127, "step": 8827 }, { "epoch": 0.3679713225793006, "grad_norm": 608.0, "learning_rate": 7.292087046179912e-05, "loss": 19.0003, "step": 8828 }, { "epoch": 0.3680130048768288, "grad_norm": 1288.0, "learning_rate": 7.291487126349492e-05, "loss": 27.0039, "step": 8829 }, { "epoch": 0.3680546871743571, "grad_norm": 286.0, "learning_rate": 7.290887164756516e-05, "loss": 12.2507, "step": 8830 }, { "epoch": 0.3680963694718853, "grad_norm": 195.0, "learning_rate": 7.290287161411918e-05, "loss": 11.7502, "step": 8831 }, { "epoch": 0.36813805176941355, "grad_norm": 668.0, "learning_rate": 7.289687116326632e-05, "loss": 22.6258, "step": 8832 }, { "epoch": 0.36817973406694177, "grad_norm": 868.0, "learning_rate": 7.289087029511596e-05, "loss": 21.1296, "step": 8833 }, { "epoch": 0.36822141636447003, "grad_norm": 229.0, "learning_rate": 7.288486900977747e-05, "loss": 11.6252, "step": 8834 }, { "epoch": 0.36826309866199824, "grad_norm": 224.0, "learning_rate": 7.287886730736018e-05, "loss": 11.5003, "step": 8835 }, { "epoch": 0.3683047809595265, "grad_norm": 208.0, "learning_rate": 7.287286518797353e-05, "loss": 10.8138, "step": 8836 }, { "epoch": 0.3683464632570547, "grad_norm": 636.0, "learning_rate": 7.286686265172687e-05, "loss": 20.6256, "step": 8837 }, { "epoch": 0.368388145554583, "grad_norm": 644.0, "learning_rate": 7.28608596987296e-05, "loss": 20.5015, "step": 8838 }, { "epoch": 0.3684298278521112, "grad_norm": 167.0, "learning_rate": 7.285485632909115e-05, "loss": 8.3756, "step": 8839 }, { "epoch": 0.36847151014963947, "grad_norm": 156.0, "learning_rate": 7.284885254292091e-05, "loss": 11.0001, "step": 8840 }, { "epoch": 0.3685131924471677, "grad_norm": 1544.0, "learning_rate": 7.28428483403283e-05, "loss": 36.7503, "step": 8841 }, { "epoch": 0.36855487474469595, "grad_norm": 280.0, "learning_rate": 7.283684372142274e-05, "loss": 11.8755, "step": 8842 }, { "epoch": 0.36859655704222416, "grad_norm": 576.0, "learning_rate": 7.28308386863137e-05, "loss": 18.5005, "step": 8843 }, { "epoch": 0.3686382393397524, "grad_norm": 131.0, "learning_rate": 7.282483323511058e-05, "loss": 7.2815, "step": 8844 }, { "epoch": 0.36867992163728064, "grad_norm": 338.0, "learning_rate": 7.281882736792285e-05, "loss": 14.6253, "step": 8845 }, { "epoch": 0.3687216039348089, "grad_norm": 174.0, "learning_rate": 7.281282108485997e-05, "loss": 11.9376, "step": 8846 }, { "epoch": 0.3687632862323371, "grad_norm": 187.0, "learning_rate": 7.280681438603141e-05, "loss": 5.8446, "step": 8847 }, { "epoch": 0.3688049685298654, "grad_norm": 334.0, "learning_rate": 7.280080727154663e-05, "loss": 15.3774, "step": 8848 }, { "epoch": 0.3688466508273936, "grad_norm": 552.0, "learning_rate": 7.27947997415151e-05, "loss": 16.6278, "step": 8849 }, { "epoch": 0.36888833312492186, "grad_norm": 256.0, "learning_rate": 7.278879179604631e-05, "loss": 13.2503, "step": 8850 }, { "epoch": 0.3689300154224501, "grad_norm": 196.0, "learning_rate": 7.278278343524979e-05, "loss": 10.1877, "step": 8851 }, { "epoch": 0.36897169771997834, "grad_norm": 266.0, "learning_rate": 7.277677465923502e-05, "loss": 13.6883, "step": 8852 }, { "epoch": 0.36901338001750655, "grad_norm": 202.0, "learning_rate": 7.27707654681115e-05, "loss": 10.8753, "step": 8853 }, { "epoch": 0.3690550623150348, "grad_norm": 328.0, "learning_rate": 7.276475586198876e-05, "loss": 13.0004, "step": 8854 }, { "epoch": 0.36909674461256303, "grad_norm": 236.0, "learning_rate": 7.275874584097632e-05, "loss": 8.6882, "step": 8855 }, { "epoch": 0.3691384269100913, "grad_norm": 482.0, "learning_rate": 7.275273540518373e-05, "loss": 15.8752, "step": 8856 }, { "epoch": 0.3691801092076195, "grad_norm": 1016.0, "learning_rate": 7.274672455472051e-05, "loss": 23.0056, "step": 8857 }, { "epoch": 0.3692217915051478, "grad_norm": 175.0, "learning_rate": 7.274071328969621e-05, "loss": 10.6253, "step": 8858 }, { "epoch": 0.369263473802676, "grad_norm": 174.0, "learning_rate": 7.27347016102204e-05, "loss": 9.7503, "step": 8859 }, { "epoch": 0.36930515610020426, "grad_norm": 460.0, "learning_rate": 7.272868951640263e-05, "loss": 16.0006, "step": 8860 }, { "epoch": 0.36934683839773247, "grad_norm": 322.0, "learning_rate": 7.272267700835248e-05, "loss": 13.0002, "step": 8861 }, { "epoch": 0.36938852069526074, "grad_norm": 81.0, "learning_rate": 7.271666408617952e-05, "loss": 7.5942, "step": 8862 }, { "epoch": 0.36943020299278895, "grad_norm": 83.5, "learning_rate": 7.271065074999333e-05, "loss": 8.9384, "step": 8863 }, { "epoch": 0.3694718852903172, "grad_norm": 608.0, "learning_rate": 7.270463699990354e-05, "loss": 18.6251, "step": 8864 }, { "epoch": 0.3695135675878454, "grad_norm": 294.0, "learning_rate": 7.26986228360197e-05, "loss": 12.2506, "step": 8865 }, { "epoch": 0.3695552498853737, "grad_norm": 364.0, "learning_rate": 7.269260825845146e-05, "loss": 13.6256, "step": 8866 }, { "epoch": 0.3695969321829019, "grad_norm": 290.0, "learning_rate": 7.268659326730841e-05, "loss": 13.1884, "step": 8867 }, { "epoch": 0.36963861448043017, "grad_norm": 236.0, "learning_rate": 7.26805778627002e-05, "loss": 12.1252, "step": 8868 }, { "epoch": 0.3696802967779584, "grad_norm": 340.0, "learning_rate": 7.267456204473642e-05, "loss": 14.1267, "step": 8869 }, { "epoch": 0.36972197907548665, "grad_norm": 92.0, "learning_rate": 7.266854581352676e-05, "loss": 8.5628, "step": 8870 }, { "epoch": 0.36976366137301486, "grad_norm": 418.0, "learning_rate": 7.266252916918082e-05, "loss": 14.441, "step": 8871 }, { "epoch": 0.36980534367054313, "grad_norm": 528.0, "learning_rate": 7.265651211180829e-05, "loss": 19.3753, "step": 8872 }, { "epoch": 0.36984702596807134, "grad_norm": 472.0, "learning_rate": 7.26504946415188e-05, "loss": 16.5002, "step": 8873 }, { "epoch": 0.3698887082655996, "grad_norm": 608.0, "learning_rate": 7.264447675842205e-05, "loss": 19.1251, "step": 8874 }, { "epoch": 0.3699303905631278, "grad_norm": 242.0, "learning_rate": 7.263845846262769e-05, "loss": 10.1253, "step": 8875 }, { "epoch": 0.3699720728606561, "grad_norm": 256.0, "learning_rate": 7.263243975424541e-05, "loss": 11.4378, "step": 8876 }, { "epoch": 0.3700137551581843, "grad_norm": 126.5, "learning_rate": 7.262642063338491e-05, "loss": 9.7503, "step": 8877 }, { "epoch": 0.37005543745571257, "grad_norm": 251.0, "learning_rate": 7.262040110015589e-05, "loss": 12.5629, "step": 8878 }, { "epoch": 0.3700971197532408, "grad_norm": 158.0, "learning_rate": 7.261438115466806e-05, "loss": 6.6889, "step": 8879 }, { "epoch": 0.37013880205076904, "grad_norm": 245.0, "learning_rate": 7.26083607970311e-05, "loss": 12.8128, "step": 8880 }, { "epoch": 0.37018048434829726, "grad_norm": 320.0, "learning_rate": 7.260234002735479e-05, "loss": 12.7504, "step": 8881 }, { "epoch": 0.3702221666458255, "grad_norm": 724.0, "learning_rate": 7.259631884574882e-05, "loss": 21.0008, "step": 8882 }, { "epoch": 0.37026384894335373, "grad_norm": 77.0, "learning_rate": 7.259029725232292e-05, "loss": 8.5633, "step": 8883 }, { "epoch": 0.370305531240882, "grad_norm": 183.0, "learning_rate": 7.258427524718685e-05, "loss": 11.6254, "step": 8884 }, { "epoch": 0.3703472135384102, "grad_norm": 468.0, "learning_rate": 7.257825283045035e-05, "loss": 16.0001, "step": 8885 }, { "epoch": 0.3703888958359385, "grad_norm": 356.0, "learning_rate": 7.257223000222321e-05, "loss": 14.3757, "step": 8886 }, { "epoch": 0.3704305781334667, "grad_norm": 239.0, "learning_rate": 7.256620676261516e-05, "loss": 12.2503, "step": 8887 }, { "epoch": 0.37047226043099496, "grad_norm": 107.0, "learning_rate": 7.256018311173599e-05, "loss": 8.6882, "step": 8888 }, { "epoch": 0.37051394272852317, "grad_norm": 466.0, "learning_rate": 7.255415904969548e-05, "loss": 15.5005, "step": 8889 }, { "epoch": 0.37055562502605144, "grad_norm": 500.0, "learning_rate": 7.254813457660341e-05, "loss": 13.6261, "step": 8890 }, { "epoch": 0.37059730732357965, "grad_norm": 844.0, "learning_rate": 7.254210969256959e-05, "loss": 21.1264, "step": 8891 }, { "epoch": 0.3706389896211079, "grad_norm": 1664.0, "learning_rate": 7.253608439770383e-05, "loss": 34.7547, "step": 8892 }, { "epoch": 0.37068067191863613, "grad_norm": 888.0, "learning_rate": 7.253005869211593e-05, "loss": 25.3767, "step": 8893 }, { "epoch": 0.3707223542161644, "grad_norm": 564.0, "learning_rate": 7.252403257591572e-05, "loss": 17.3753, "step": 8894 }, { "epoch": 0.3707640365136926, "grad_norm": 191.0, "learning_rate": 7.2518006049213e-05, "loss": 11.5005, "step": 8895 }, { "epoch": 0.3708057188112209, "grad_norm": 764.0, "learning_rate": 7.25119791121176e-05, "loss": 22.1254, "step": 8896 }, { "epoch": 0.37084740110874914, "grad_norm": 235.0, "learning_rate": 7.250595176473942e-05, "loss": 13.3133, "step": 8897 }, { "epoch": 0.37088908340627735, "grad_norm": 342.0, "learning_rate": 7.249992400718825e-05, "loss": 13.8127, "step": 8898 }, { "epoch": 0.3709307657038056, "grad_norm": 354.0, "learning_rate": 7.249389583957399e-05, "loss": 14.7504, "step": 8899 }, { "epoch": 0.37097244800133383, "grad_norm": 844.0, "learning_rate": 7.248786726200647e-05, "loss": 19.005, "step": 8900 }, { "epoch": 0.3710141302988621, "grad_norm": 696.0, "learning_rate": 7.248183827459556e-05, "loss": 17.8761, "step": 8901 }, { "epoch": 0.3710558125963903, "grad_norm": 498.0, "learning_rate": 7.247580887745116e-05, "loss": 18.7505, "step": 8902 }, { "epoch": 0.3710974948939186, "grad_norm": 228.0, "learning_rate": 7.246977907068316e-05, "loss": 11.6877, "step": 8903 }, { "epoch": 0.3711391771914468, "grad_norm": 496.0, "learning_rate": 7.246374885440141e-05, "loss": 18.3753, "step": 8904 }, { "epoch": 0.37118085948897506, "grad_norm": 410.0, "learning_rate": 7.245771822871588e-05, "loss": 15.8753, "step": 8905 }, { "epoch": 0.37122254178650327, "grad_norm": 464.0, "learning_rate": 7.245168719373642e-05, "loss": 17.2504, "step": 8906 }, { "epoch": 0.37126422408403154, "grad_norm": 442.0, "learning_rate": 7.244565574957297e-05, "loss": 16.1252, "step": 8907 }, { "epoch": 0.37130590638155975, "grad_norm": 288.0, "learning_rate": 7.243962389633546e-05, "loss": 13.1878, "step": 8908 }, { "epoch": 0.371347588679088, "grad_norm": 362.0, "learning_rate": 7.243359163413382e-05, "loss": 15.1877, "step": 8909 }, { "epoch": 0.3713892709766162, "grad_norm": 378.0, "learning_rate": 7.242755896307796e-05, "loss": 13.6881, "step": 8910 }, { "epoch": 0.3714309532741445, "grad_norm": 123.0, "learning_rate": 7.242152588327785e-05, "loss": 9.8758, "step": 8911 }, { "epoch": 0.3714726355716727, "grad_norm": 62.5, "learning_rate": 7.241549239484345e-05, "loss": 7.0628, "step": 8912 }, { "epoch": 0.37151431786920097, "grad_norm": 175.0, "learning_rate": 7.240945849788471e-05, "loss": 10.5009, "step": 8913 }, { "epoch": 0.3715560001667292, "grad_norm": 216.0, "learning_rate": 7.240342419251158e-05, "loss": 11.0629, "step": 8914 }, { "epoch": 0.37159768246425745, "grad_norm": 362.0, "learning_rate": 7.239738947883408e-05, "loss": 15.1252, "step": 8915 }, { "epoch": 0.37163936476178566, "grad_norm": 512.0, "learning_rate": 7.239135435696215e-05, "loss": 15.8133, "step": 8916 }, { "epoch": 0.37168104705931393, "grad_norm": 648.0, "learning_rate": 7.23853188270058e-05, "loss": 19.5002, "step": 8917 }, { "epoch": 0.37172272935684214, "grad_norm": 164.0, "learning_rate": 7.237928288907505e-05, "loss": 9.4377, "step": 8918 }, { "epoch": 0.3717644116543704, "grad_norm": 221.0, "learning_rate": 7.237324654327987e-05, "loss": 12.3764, "step": 8919 }, { "epoch": 0.3718060939518986, "grad_norm": 172.0, "learning_rate": 7.236720978973029e-05, "loss": 10.8129, "step": 8920 }, { "epoch": 0.3718477762494269, "grad_norm": 204.0, "learning_rate": 7.236117262853632e-05, "loss": 10.7503, "step": 8921 }, { "epoch": 0.3718894585469551, "grad_norm": 560.0, "learning_rate": 7.2355135059808e-05, "loss": 17.8754, "step": 8922 }, { "epoch": 0.37193114084448337, "grad_norm": 266.0, "learning_rate": 7.234909708365535e-05, "loss": 6.4378, "step": 8923 }, { "epoch": 0.3719728231420116, "grad_norm": 232.0, "learning_rate": 7.234305870018842e-05, "loss": 12.1258, "step": 8924 }, { "epoch": 0.37201450543953984, "grad_norm": 88.5, "learning_rate": 7.233701990951727e-05, "loss": 8.3127, "step": 8925 }, { "epoch": 0.37205618773706806, "grad_norm": 800.0, "learning_rate": 7.233098071175194e-05, "loss": 20.8796, "step": 8926 }, { "epoch": 0.3720978700345963, "grad_norm": 346.0, "learning_rate": 7.23249411070025e-05, "loss": 15.2509, "step": 8927 }, { "epoch": 0.37213955233212453, "grad_norm": 113.0, "learning_rate": 7.231890109537904e-05, "loss": 7.7189, "step": 8928 }, { "epoch": 0.3721812346296528, "grad_norm": 478.0, "learning_rate": 7.231286067699164e-05, "loss": 16.8753, "step": 8929 }, { "epoch": 0.372222916927181, "grad_norm": 668.0, "learning_rate": 7.230681985195035e-05, "loss": 20.1252, "step": 8930 }, { "epoch": 0.3722645992247093, "grad_norm": 516.0, "learning_rate": 7.23007786203653e-05, "loss": 16.3782, "step": 8931 }, { "epoch": 0.3723062815222375, "grad_norm": 394.0, "learning_rate": 7.229473698234656e-05, "loss": 14.2506, "step": 8932 }, { "epoch": 0.37234796381976576, "grad_norm": 264.0, "learning_rate": 7.228869493800429e-05, "loss": 12.563, "step": 8933 }, { "epoch": 0.37238964611729397, "grad_norm": 115.0, "learning_rate": 7.228265248744855e-05, "loss": 8.9378, "step": 8934 }, { "epoch": 0.37243132841482224, "grad_norm": 466.0, "learning_rate": 7.22766096307895e-05, "loss": 16.1253, "step": 8935 }, { "epoch": 0.37247301071235045, "grad_norm": 398.0, "learning_rate": 7.227056636813727e-05, "loss": 15.2512, "step": 8936 }, { "epoch": 0.3725146930098787, "grad_norm": 386.0, "learning_rate": 7.226452269960198e-05, "loss": 14.5627, "step": 8937 }, { "epoch": 0.37255637530740693, "grad_norm": 332.0, "learning_rate": 7.225847862529379e-05, "loss": 12.0033, "step": 8938 }, { "epoch": 0.3725980576049352, "grad_norm": 740.0, "learning_rate": 7.225243414532284e-05, "loss": 21.1252, "step": 8939 }, { "epoch": 0.3726397399024634, "grad_norm": 100.5, "learning_rate": 7.224638925979932e-05, "loss": 9.0627, "step": 8940 }, { "epoch": 0.3726814221999917, "grad_norm": 386.0, "learning_rate": 7.224034396883337e-05, "loss": 16.0004, "step": 8941 }, { "epoch": 0.3727231044975199, "grad_norm": 364.0, "learning_rate": 7.223429827253518e-05, "loss": 14.1252, "step": 8942 }, { "epoch": 0.37276478679504815, "grad_norm": 124.0, "learning_rate": 7.222825217101495e-05, "loss": 8.6879, "step": 8943 }, { "epoch": 0.37280646909257636, "grad_norm": 438.0, "learning_rate": 7.222220566438282e-05, "loss": 9.6883, "step": 8944 }, { "epoch": 0.37284815139010463, "grad_norm": 274.0, "learning_rate": 7.221615875274903e-05, "loss": 12.4398, "step": 8945 }, { "epoch": 0.37288983368763284, "grad_norm": 1744.0, "learning_rate": 7.221011143622378e-05, "loss": 34.2512, "step": 8946 }, { "epoch": 0.3729315159851611, "grad_norm": 132.0, "learning_rate": 7.220406371491727e-05, "loss": 9.8128, "step": 8947 }, { "epoch": 0.3729731982826893, "grad_norm": 166.0, "learning_rate": 7.219801558893974e-05, "loss": 9.0002, "step": 8948 }, { "epoch": 0.3730148805802176, "grad_norm": 516.0, "learning_rate": 7.219196705840139e-05, "loss": 18.3752, "step": 8949 }, { "epoch": 0.3730565628777458, "grad_norm": 832.0, "learning_rate": 7.218591812341248e-05, "loss": 21.7507, "step": 8950 }, { "epoch": 0.37309824517527407, "grad_norm": 406.0, "learning_rate": 7.217986878408324e-05, "loss": 15.2505, "step": 8951 }, { "epoch": 0.3731399274728023, "grad_norm": 298.0, "learning_rate": 7.217381904052392e-05, "loss": 12.0629, "step": 8952 }, { "epoch": 0.37318160977033055, "grad_norm": 848.0, "learning_rate": 7.216776889284478e-05, "loss": 24.7503, "step": 8953 }, { "epoch": 0.37322329206785876, "grad_norm": 408.0, "learning_rate": 7.216171834115608e-05, "loss": 15.5002, "step": 8954 }, { "epoch": 0.373264974365387, "grad_norm": 536.0, "learning_rate": 7.21556673855681e-05, "loss": 17.8757, "step": 8955 }, { "epoch": 0.37330665666291524, "grad_norm": 69.5, "learning_rate": 7.214961602619112e-05, "loss": 9.7512, "step": 8956 }, { "epoch": 0.3733483389604435, "grad_norm": 704.0, "learning_rate": 7.214356426313542e-05, "loss": 19.7541, "step": 8957 }, { "epoch": 0.3733900212579717, "grad_norm": 398.0, "learning_rate": 7.213751209651129e-05, "loss": 13.6879, "step": 8958 }, { "epoch": 0.3734317035555, "grad_norm": 1160.0, "learning_rate": 7.213145952642905e-05, "loss": 26.0003, "step": 8959 }, { "epoch": 0.3734733858530282, "grad_norm": 181.0, "learning_rate": 7.212540655299898e-05, "loss": 9.6877, "step": 8960 }, { "epoch": 0.37351506815055646, "grad_norm": 406.0, "learning_rate": 7.211935317633142e-05, "loss": 15.8131, "step": 8961 }, { "epoch": 0.3735567504480847, "grad_norm": 420.0, "learning_rate": 7.211329939653669e-05, "loss": 14.1253, "step": 8962 }, { "epoch": 0.37359843274561294, "grad_norm": 142.0, "learning_rate": 7.210724521372509e-05, "loss": 9.9379, "step": 8963 }, { "epoch": 0.37364011504314115, "grad_norm": 314.0, "learning_rate": 7.2101190628007e-05, "loss": 11.8757, "step": 8964 }, { "epoch": 0.3736817973406694, "grad_norm": 328.0, "learning_rate": 7.209513563949275e-05, "loss": 12.9377, "step": 8965 }, { "epoch": 0.37372347963819763, "grad_norm": 360.0, "learning_rate": 7.208908024829269e-05, "loss": 14.8759, "step": 8966 }, { "epoch": 0.3737651619357259, "grad_norm": 380.0, "learning_rate": 7.20830244545172e-05, "loss": 14.8752, "step": 8967 }, { "epoch": 0.3738068442332541, "grad_norm": 140.0, "learning_rate": 7.20769682582766e-05, "loss": 9.7502, "step": 8968 }, { "epoch": 0.3738485265307824, "grad_norm": 422.0, "learning_rate": 7.20709116596813e-05, "loss": 15.4377, "step": 8969 }, { "epoch": 0.37389020882831064, "grad_norm": 992.0, "learning_rate": 7.206485465884168e-05, "loss": 25.6255, "step": 8970 }, { "epoch": 0.37393189112583886, "grad_norm": 516.0, "learning_rate": 7.205879725586814e-05, "loss": 18.2501, "step": 8971 }, { "epoch": 0.3739735734233671, "grad_norm": 237.0, "learning_rate": 7.205273945087104e-05, "loss": 11.4379, "step": 8972 }, { "epoch": 0.37401525572089533, "grad_norm": 540.0, "learning_rate": 7.204668124396083e-05, "loss": 17.5005, "step": 8973 }, { "epoch": 0.3740569380184236, "grad_norm": 292.0, "learning_rate": 7.204062263524787e-05, "loss": 12.313, "step": 8974 }, { "epoch": 0.3740986203159518, "grad_norm": 122.5, "learning_rate": 7.203456362484262e-05, "loss": 8.9379, "step": 8975 }, { "epoch": 0.3741403026134801, "grad_norm": 1296.0, "learning_rate": 7.202850421285549e-05, "loss": 30.3771, "step": 8976 }, { "epoch": 0.3741819849110083, "grad_norm": 764.0, "learning_rate": 7.20224443993969e-05, "loss": 20.7503, "step": 8977 }, { "epoch": 0.37422366720853656, "grad_norm": 180.0, "learning_rate": 7.201638418457732e-05, "loss": 12.1255, "step": 8978 }, { "epoch": 0.37426534950606477, "grad_norm": 326.0, "learning_rate": 7.201032356850719e-05, "loss": 14.3129, "step": 8979 }, { "epoch": 0.37430703180359304, "grad_norm": 264.0, "learning_rate": 7.200426255129696e-05, "loss": 11.5629, "step": 8980 }, { "epoch": 0.37434871410112125, "grad_norm": 137.0, "learning_rate": 7.199820113305708e-05, "loss": 8.4379, "step": 8981 }, { "epoch": 0.3743903963986495, "grad_norm": 406.0, "learning_rate": 7.199213931389803e-05, "loss": 14.5001, "step": 8982 }, { "epoch": 0.37443207869617773, "grad_norm": 620.0, "learning_rate": 7.19860770939303e-05, "loss": 20.5004, "step": 8983 }, { "epoch": 0.374473760993706, "grad_norm": 146.0, "learning_rate": 7.198001447326436e-05, "loss": 9.5631, "step": 8984 }, { "epoch": 0.3745154432912342, "grad_norm": 328.0, "learning_rate": 7.197395145201071e-05, "loss": 13.9379, "step": 8985 }, { "epoch": 0.3745571255887625, "grad_norm": 438.0, "learning_rate": 7.196788803027983e-05, "loss": 16.6259, "step": 8986 }, { "epoch": 0.3745988078862907, "grad_norm": 2816.0, "learning_rate": 7.196182420818225e-05, "loss": 45.2618, "step": 8987 }, { "epoch": 0.37464049018381895, "grad_norm": 402.0, "learning_rate": 7.195575998582846e-05, "loss": 14.0002, "step": 8988 }, { "epoch": 0.37468217248134716, "grad_norm": 544.0, "learning_rate": 7.194969536332901e-05, "loss": 18.0002, "step": 8989 }, { "epoch": 0.37472385477887543, "grad_norm": 280.0, "learning_rate": 7.194363034079441e-05, "loss": 13.5628, "step": 8990 }, { "epoch": 0.37476553707640364, "grad_norm": 424.0, "learning_rate": 7.19375649183352e-05, "loss": 15.0003, "step": 8991 }, { "epoch": 0.3748072193739319, "grad_norm": 276.0, "learning_rate": 7.193149909606191e-05, "loss": 12.3754, "step": 8992 }, { "epoch": 0.3748489016714601, "grad_norm": 560.0, "learning_rate": 7.192543287408513e-05, "loss": 17.7505, "step": 8993 }, { "epoch": 0.3748905839689884, "grad_norm": 192.0, "learning_rate": 7.191936625251536e-05, "loss": 12.2502, "step": 8994 }, { "epoch": 0.3749322662665166, "grad_norm": 560.0, "learning_rate": 7.191329923146322e-05, "loss": 17.1274, "step": 8995 }, { "epoch": 0.37497394856404487, "grad_norm": 186.0, "learning_rate": 7.190723181103924e-05, "loss": 11.3752, "step": 8996 }, { "epoch": 0.3750156308615731, "grad_norm": 576.0, "learning_rate": 7.190116399135401e-05, "loss": 20.0002, "step": 8997 }, { "epoch": 0.37505731315910135, "grad_norm": 320.0, "learning_rate": 7.189509577251815e-05, "loss": 13.9379, "step": 8998 }, { "epoch": 0.37509899545662956, "grad_norm": 354.0, "learning_rate": 7.18890271546422e-05, "loss": 13.3758, "step": 8999 }, { "epoch": 0.3751406777541578, "grad_norm": 484.0, "learning_rate": 7.188295813783679e-05, "loss": 16.7503, "step": 9000 }, { "epoch": 0.37518236005168604, "grad_norm": 272.0, "learning_rate": 7.187688872221254e-05, "loss": 13.2502, "step": 9001 }, { "epoch": 0.3752240423492143, "grad_norm": 296.0, "learning_rate": 7.187081890788003e-05, "loss": 13.0629, "step": 9002 }, { "epoch": 0.3752657246467425, "grad_norm": 648.0, "learning_rate": 7.18647486949499e-05, "loss": 20.6252, "step": 9003 }, { "epoch": 0.3753074069442708, "grad_norm": 280.0, "learning_rate": 7.185867808353281e-05, "loss": 13.3129, "step": 9004 }, { "epoch": 0.375349089241799, "grad_norm": 462.0, "learning_rate": 7.185260707373936e-05, "loss": 15.7511, "step": 9005 }, { "epoch": 0.37539077153932726, "grad_norm": 243.0, "learning_rate": 7.18465356656802e-05, "loss": 12.1876, "step": 9006 }, { "epoch": 0.3754324538368555, "grad_norm": 760.0, "learning_rate": 7.184046385946599e-05, "loss": 21.7502, "step": 9007 }, { "epoch": 0.37547413613438374, "grad_norm": 576.0, "learning_rate": 7.18343916552074e-05, "loss": 17.7503, "step": 9008 }, { "epoch": 0.37551581843191195, "grad_norm": 458.0, "learning_rate": 7.182831905301506e-05, "loss": 14.3162, "step": 9009 }, { "epoch": 0.3755575007294402, "grad_norm": 600.0, "learning_rate": 7.18222460529997e-05, "loss": 22.2502, "step": 9010 }, { "epoch": 0.37559918302696843, "grad_norm": 109.5, "learning_rate": 7.181617265527193e-05, "loss": 9.1252, "step": 9011 }, { "epoch": 0.3756408653244967, "grad_norm": 102.0, "learning_rate": 7.181009885994252e-05, "loss": 7.6254, "step": 9012 }, { "epoch": 0.3756825476220249, "grad_norm": 256.0, "learning_rate": 7.18040246671221e-05, "loss": 11.7504, "step": 9013 }, { "epoch": 0.3757242299195532, "grad_norm": 350.0, "learning_rate": 7.179795007692138e-05, "loss": 14.2503, "step": 9014 }, { "epoch": 0.3757659122170814, "grad_norm": 172.0, "learning_rate": 7.17918750894511e-05, "loss": 11.4381, "step": 9015 }, { "epoch": 0.37580759451460966, "grad_norm": 229.0, "learning_rate": 7.178579970482195e-05, "loss": 12.0005, "step": 9016 }, { "epoch": 0.37584927681213787, "grad_norm": 434.0, "learning_rate": 7.177972392314469e-05, "loss": 16.2505, "step": 9017 }, { "epoch": 0.37589095910966613, "grad_norm": 290.0, "learning_rate": 7.177364774453002e-05, "loss": 12.5628, "step": 9018 }, { "epoch": 0.37593264140719435, "grad_norm": 232.0, "learning_rate": 7.176757116908868e-05, "loss": 11.1253, "step": 9019 }, { "epoch": 0.3759743237047226, "grad_norm": 126.0, "learning_rate": 7.176149419693142e-05, "loss": 9.1252, "step": 9020 }, { "epoch": 0.3760160060022508, "grad_norm": 128.0, "learning_rate": 7.1755416828169e-05, "loss": 8.8127, "step": 9021 }, { "epoch": 0.3760576882997791, "grad_norm": 568.0, "learning_rate": 7.17493390629122e-05, "loss": 18.6251, "step": 9022 }, { "epoch": 0.3760993705973073, "grad_norm": 2368.0, "learning_rate": 7.174326090127173e-05, "loss": 46.504, "step": 9023 }, { "epoch": 0.37614105289483557, "grad_norm": 816.0, "learning_rate": 7.173718234335842e-05, "loss": 22.7524, "step": 9024 }, { "epoch": 0.3761827351923638, "grad_norm": 374.0, "learning_rate": 7.173110338928301e-05, "loss": 14.0626, "step": 9025 }, { "epoch": 0.37622441748989205, "grad_norm": 532.0, "learning_rate": 7.172502403915634e-05, "loss": 18.1256, "step": 9026 }, { "epoch": 0.37626609978742026, "grad_norm": 212.0, "learning_rate": 7.171894429308916e-05, "loss": 11.4379, "step": 9027 }, { "epoch": 0.37630778208494853, "grad_norm": 330.0, "learning_rate": 7.171286415119229e-05, "loss": 13.8772, "step": 9028 }, { "epoch": 0.37634946438247674, "grad_norm": 896.0, "learning_rate": 7.170678361357655e-05, "loss": 22.6297, "step": 9029 }, { "epoch": 0.376391146680005, "grad_norm": 215.0, "learning_rate": 7.170070268035276e-05, "loss": 11.5007, "step": 9030 }, { "epoch": 0.3764328289775332, "grad_norm": 255.0, "learning_rate": 7.169462135163174e-05, "loss": 12.3752, "step": 9031 }, { "epoch": 0.3764745112750615, "grad_norm": 51.25, "learning_rate": 7.168853962752431e-05, "loss": 6.6252, "step": 9032 }, { "epoch": 0.3765161935725897, "grad_norm": 302.0, "learning_rate": 7.168245750814131e-05, "loss": 12.1877, "step": 9033 }, { "epoch": 0.37655787587011796, "grad_norm": 115.5, "learning_rate": 7.167637499359361e-05, "loss": 8.1259, "step": 9034 }, { "epoch": 0.3765995581676462, "grad_norm": 496.0, "learning_rate": 7.167029208399205e-05, "loss": 17.5003, "step": 9035 }, { "epoch": 0.37664124046517444, "grad_norm": 134.0, "learning_rate": 7.166420877944749e-05, "loss": 8.9377, "step": 9036 }, { "epoch": 0.37668292276270265, "grad_norm": 560.0, "learning_rate": 7.165812508007081e-05, "loss": 18.5002, "step": 9037 }, { "epoch": 0.3767246050602309, "grad_norm": 118.0, "learning_rate": 7.165204098597287e-05, "loss": 9.9388, "step": 9038 }, { "epoch": 0.37676628735775913, "grad_norm": 404.0, "learning_rate": 7.164595649726457e-05, "loss": 13.8756, "step": 9039 }, { "epoch": 0.3768079696552874, "grad_norm": 336.0, "learning_rate": 7.163987161405676e-05, "loss": 14.5005, "step": 9040 }, { "epoch": 0.3768496519528156, "grad_norm": 468.0, "learning_rate": 7.163378633646039e-05, "loss": 16.8753, "step": 9041 }, { "epoch": 0.3768913342503439, "grad_norm": 75.5, "learning_rate": 7.162770066458635e-05, "loss": 7.3128, "step": 9042 }, { "epoch": 0.37693301654787215, "grad_norm": 502.0, "learning_rate": 7.162161459854556e-05, "loss": 16.2526, "step": 9043 }, { "epoch": 0.37697469884540036, "grad_norm": 516.0, "learning_rate": 7.161552813844889e-05, "loss": 17.6252, "step": 9044 }, { "epoch": 0.3770163811429286, "grad_norm": 125.5, "learning_rate": 7.160944128440732e-05, "loss": 8.5003, "step": 9045 }, { "epoch": 0.37705806344045684, "grad_norm": 506.0, "learning_rate": 7.160335403653177e-05, "loss": 17.2502, "step": 9046 }, { "epoch": 0.3770997457379851, "grad_norm": 320.0, "learning_rate": 7.159726639493316e-05, "loss": 13.3758, "step": 9047 }, { "epoch": 0.3771414280355133, "grad_norm": 120.0, "learning_rate": 7.159117835972246e-05, "loss": 8.5628, "step": 9048 }, { "epoch": 0.3771831103330416, "grad_norm": 314.0, "learning_rate": 7.158508993101062e-05, "loss": 12.0629, "step": 9049 }, { "epoch": 0.3772247926305698, "grad_norm": 332.0, "learning_rate": 7.157900110890859e-05, "loss": 12.6879, "step": 9050 }, { "epoch": 0.37726647492809806, "grad_norm": 290.0, "learning_rate": 7.157291189352736e-05, "loss": 12.6251, "step": 9051 }, { "epoch": 0.3773081572256263, "grad_norm": 104.5, "learning_rate": 7.156682228497789e-05, "loss": 9.6255, "step": 9052 }, { "epoch": 0.37734983952315454, "grad_norm": 362.0, "learning_rate": 7.156073228337116e-05, "loss": 15.0003, "step": 9053 }, { "epoch": 0.37739152182068275, "grad_norm": 1448.0, "learning_rate": 7.155464188881818e-05, "loss": 32.5049, "step": 9054 }, { "epoch": 0.377433204118211, "grad_norm": 282.0, "learning_rate": 7.154855110142994e-05, "loss": 13.2506, "step": 9055 }, { "epoch": 0.37747488641573923, "grad_norm": 260.0, "learning_rate": 7.154245992131743e-05, "loss": 14.5634, "step": 9056 }, { "epoch": 0.3775165687132675, "grad_norm": 90.0, "learning_rate": 7.15363683485917e-05, "loss": 6.3759, "step": 9057 }, { "epoch": 0.3775582510107957, "grad_norm": 380.0, "learning_rate": 7.153027638336373e-05, "loss": 15.1252, "step": 9058 }, { "epoch": 0.377599933308324, "grad_norm": 186.0, "learning_rate": 7.152418402574457e-05, "loss": 11.8132, "step": 9059 }, { "epoch": 0.3776416156058522, "grad_norm": 100.5, "learning_rate": 7.151809127584523e-05, "loss": 10.0642, "step": 9060 }, { "epoch": 0.37768329790338045, "grad_norm": 193.0, "learning_rate": 7.151199813377678e-05, "loss": 11.5018, "step": 9061 }, { "epoch": 0.37772498020090867, "grad_norm": 237.0, "learning_rate": 7.150590459965025e-05, "loss": 11.5006, "step": 9062 }, { "epoch": 0.37776666249843693, "grad_norm": 264.0, "learning_rate": 7.14998106735767e-05, "loss": 13.1891, "step": 9063 }, { "epoch": 0.37780834479596515, "grad_norm": 512.0, "learning_rate": 7.14937163556672e-05, "loss": 17.3768, "step": 9064 }, { "epoch": 0.3778500270934934, "grad_norm": 426.0, "learning_rate": 7.148762164603279e-05, "loss": 14.8755, "step": 9065 }, { "epoch": 0.3778917093910216, "grad_norm": 205.0, "learning_rate": 7.148152654478458e-05, "loss": 11.3755, "step": 9066 }, { "epoch": 0.3779333916885499, "grad_norm": 556.0, "learning_rate": 7.147543105203364e-05, "loss": 18.2524, "step": 9067 }, { "epoch": 0.3779750739860781, "grad_norm": 484.0, "learning_rate": 7.146933516789107e-05, "loss": 17.1258, "step": 9068 }, { "epoch": 0.37801675628360637, "grad_norm": 552.0, "learning_rate": 7.146323889246796e-05, "loss": 18.6252, "step": 9069 }, { "epoch": 0.3780584385811346, "grad_norm": 226.0, "learning_rate": 7.145714222587541e-05, "loss": 11.3127, "step": 9070 }, { "epoch": 0.37810012087866285, "grad_norm": 588.0, "learning_rate": 7.145104516822454e-05, "loss": 19.3752, "step": 9071 }, { "epoch": 0.37814180317619106, "grad_norm": 231.0, "learning_rate": 7.144494771962647e-05, "loss": 9.627, "step": 9072 }, { "epoch": 0.3781834854737193, "grad_norm": 402.0, "learning_rate": 7.143884988019232e-05, "loss": 12.7503, "step": 9073 }, { "epoch": 0.37822516777124754, "grad_norm": 382.0, "learning_rate": 7.143275165003322e-05, "loss": 14.8754, "step": 9074 }, { "epoch": 0.3782668500687758, "grad_norm": 480.0, "learning_rate": 7.142665302926034e-05, "loss": 15.2507, "step": 9075 }, { "epoch": 0.378308532366304, "grad_norm": 358.0, "learning_rate": 7.14205540179848e-05, "loss": 14.3757, "step": 9076 }, { "epoch": 0.3783502146638323, "grad_norm": 154.0, "learning_rate": 7.141445461631775e-05, "loss": 10.0626, "step": 9077 }, { "epoch": 0.3783918969613605, "grad_norm": 552.0, "learning_rate": 7.140835482437036e-05, "loss": 18.7506, "step": 9078 }, { "epoch": 0.37843357925888876, "grad_norm": 190.0, "learning_rate": 7.140225464225381e-05, "loss": 10.7503, "step": 9079 }, { "epoch": 0.378475261556417, "grad_norm": 161.0, "learning_rate": 7.139615407007926e-05, "loss": 9.9378, "step": 9080 }, { "epoch": 0.37851694385394524, "grad_norm": 342.0, "learning_rate": 7.139005310795792e-05, "loss": 13.1254, "step": 9081 }, { "epoch": 0.37855862615147345, "grad_norm": 440.0, "learning_rate": 7.138395175600096e-05, "loss": 16.8812, "step": 9082 }, { "epoch": 0.3786003084490017, "grad_norm": 235.0, "learning_rate": 7.137785001431958e-05, "loss": 12.3126, "step": 9083 }, { "epoch": 0.37864199074652993, "grad_norm": 306.0, "learning_rate": 7.137174788302499e-05, "loss": 13.3129, "step": 9084 }, { "epoch": 0.3786836730440582, "grad_norm": 588.0, "learning_rate": 7.136564536222838e-05, "loss": 19.5007, "step": 9085 }, { "epoch": 0.3787253553415864, "grad_norm": 412.0, "learning_rate": 7.1359542452041e-05, "loss": 16.5006, "step": 9086 }, { "epoch": 0.3787670376391147, "grad_norm": 364.0, "learning_rate": 7.135343915257407e-05, "loss": 14.4377, "step": 9087 }, { "epoch": 0.3788087199366429, "grad_norm": 588.0, "learning_rate": 7.13473354639388e-05, "loss": 18.3757, "step": 9088 }, { "epoch": 0.37885040223417116, "grad_norm": 132.0, "learning_rate": 7.134123138624646e-05, "loss": 11.063, "step": 9089 }, { "epoch": 0.37889208453169937, "grad_norm": 1448.0, "learning_rate": 7.133512691960827e-05, "loss": 31.8756, "step": 9090 }, { "epoch": 0.37893376682922764, "grad_norm": 105.5, "learning_rate": 7.132902206413549e-05, "loss": 9.5005, "step": 9091 }, { "epoch": 0.37897544912675585, "grad_norm": 158.0, "learning_rate": 7.132291681993942e-05, "loss": 9.7506, "step": 9092 }, { "epoch": 0.3790171314242841, "grad_norm": 560.0, "learning_rate": 7.131681118713127e-05, "loss": 17.5018, "step": 9093 }, { "epoch": 0.3790588137218123, "grad_norm": 308.0, "learning_rate": 7.131070516582236e-05, "loss": 12.0628, "step": 9094 }, { "epoch": 0.3791004960193406, "grad_norm": 360.0, "learning_rate": 7.130459875612395e-05, "loss": 13.7502, "step": 9095 }, { "epoch": 0.3791421783168688, "grad_norm": 548.0, "learning_rate": 7.129849195814734e-05, "loss": 17.2508, "step": 9096 }, { "epoch": 0.3791838606143971, "grad_norm": 616.0, "learning_rate": 7.129238477200383e-05, "loss": 19.0006, "step": 9097 }, { "epoch": 0.3792255429119253, "grad_norm": 298.0, "learning_rate": 7.12862771978047e-05, "loss": 12.6259, "step": 9098 }, { "epoch": 0.37926722520945355, "grad_norm": 217.0, "learning_rate": 7.12801692356613e-05, "loss": 11.3128, "step": 9099 }, { "epoch": 0.37930890750698176, "grad_norm": 276.0, "learning_rate": 7.127406088568492e-05, "loss": 12.5627, "step": 9100 }, { "epoch": 0.37935058980451003, "grad_norm": 612.0, "learning_rate": 7.126795214798687e-05, "loss": 18.3753, "step": 9101 }, { "epoch": 0.37939227210203824, "grad_norm": 390.0, "learning_rate": 7.126184302267851e-05, "loss": 14.1891, "step": 9102 }, { "epoch": 0.3794339543995665, "grad_norm": 296.0, "learning_rate": 7.125573350987118e-05, "loss": 11.8754, "step": 9103 }, { "epoch": 0.3794756366970947, "grad_norm": 96.5, "learning_rate": 7.124962360967624e-05, "loss": 8.5004, "step": 9104 }, { "epoch": 0.379517318994623, "grad_norm": 422.0, "learning_rate": 7.1243513322205e-05, "loss": 16.6252, "step": 9105 }, { "epoch": 0.3795590012921512, "grad_norm": 452.0, "learning_rate": 7.123740264756885e-05, "loss": 13.7511, "step": 9106 }, { "epoch": 0.37960068358967947, "grad_norm": 181.0, "learning_rate": 7.123129158587915e-05, "loss": 10.4377, "step": 9107 }, { "epoch": 0.3796423658872077, "grad_norm": 106.0, "learning_rate": 7.122518013724728e-05, "loss": 9.8763, "step": 9108 }, { "epoch": 0.37968404818473594, "grad_norm": 460.0, "learning_rate": 7.121906830178462e-05, "loss": 16.6254, "step": 9109 }, { "epoch": 0.37972573048226416, "grad_norm": 75.0, "learning_rate": 7.121295607960254e-05, "loss": 6.9689, "step": 9110 }, { "epoch": 0.3797674127797924, "grad_norm": 290.0, "learning_rate": 7.120684347081248e-05, "loss": 13.1883, "step": 9111 }, { "epoch": 0.37980909507732064, "grad_norm": 266.0, "learning_rate": 7.12007304755258e-05, "loss": 12.6877, "step": 9112 }, { "epoch": 0.3798507773748489, "grad_norm": 396.0, "learning_rate": 7.119461709385392e-05, "loss": 15.5626, "step": 9113 }, { "epoch": 0.3798924596723771, "grad_norm": 226.0, "learning_rate": 7.118850332590827e-05, "loss": 10.8127, "step": 9114 }, { "epoch": 0.3799341419699054, "grad_norm": 410.0, "learning_rate": 7.118238917180025e-05, "loss": 15.5627, "step": 9115 }, { "epoch": 0.37997582426743365, "grad_norm": 350.0, "learning_rate": 7.117627463164132e-05, "loss": 12.6256, "step": 9116 }, { "epoch": 0.38001750656496186, "grad_norm": 676.0, "learning_rate": 7.117015970554291e-05, "loss": 19.3777, "step": 9117 }, { "epoch": 0.3800591888624901, "grad_norm": 788.0, "learning_rate": 7.116404439361645e-05, "loss": 24.1253, "step": 9118 }, { "epoch": 0.38010087116001834, "grad_norm": 229.0, "learning_rate": 7.11579286959734e-05, "loss": 13.5002, "step": 9119 }, { "epoch": 0.3801425534575466, "grad_norm": 324.0, "learning_rate": 7.115181261272523e-05, "loss": 13.0627, "step": 9120 }, { "epoch": 0.3801842357550748, "grad_norm": 482.0, "learning_rate": 7.11456961439834e-05, "loss": 17.6255, "step": 9121 }, { "epoch": 0.3802259180526031, "grad_norm": 250.0, "learning_rate": 7.113957928985938e-05, "loss": 11.4377, "step": 9122 }, { "epoch": 0.3802676003501313, "grad_norm": 254.0, "learning_rate": 7.113346205046465e-05, "loss": 11.1878, "step": 9123 }, { "epoch": 0.38030928264765956, "grad_norm": 214.0, "learning_rate": 7.11273444259107e-05, "loss": 11.3128, "step": 9124 }, { "epoch": 0.3803509649451878, "grad_norm": 251.0, "learning_rate": 7.1121226416309e-05, "loss": 5.2196, "step": 9125 }, { "epoch": 0.38039264724271604, "grad_norm": 304.0, "learning_rate": 7.11151080217711e-05, "loss": 13.7504, "step": 9126 }, { "epoch": 0.38043432954024425, "grad_norm": 556.0, "learning_rate": 7.110898924240847e-05, "loss": 17.8756, "step": 9127 }, { "epoch": 0.3804760118377725, "grad_norm": 171.0, "learning_rate": 7.110287007833262e-05, "loss": 10.5627, "step": 9128 }, { "epoch": 0.38051769413530073, "grad_norm": 478.0, "learning_rate": 7.109675052965512e-05, "loss": 16.7506, "step": 9129 }, { "epoch": 0.380559376432829, "grad_norm": 316.0, "learning_rate": 7.109063059648746e-05, "loss": 12.1253, "step": 9130 }, { "epoch": 0.3806010587303572, "grad_norm": 700.0, "learning_rate": 7.108451027894118e-05, "loss": 23.0004, "step": 9131 }, { "epoch": 0.3806427410278855, "grad_norm": 227.0, "learning_rate": 7.107838957712784e-05, "loss": 13.1252, "step": 9132 }, { "epoch": 0.3806844233254137, "grad_norm": 225.0, "learning_rate": 7.107226849115897e-05, "loss": 12.1251, "step": 9133 }, { "epoch": 0.38072610562294196, "grad_norm": 107.5, "learning_rate": 7.106614702114614e-05, "loss": 9.8754, "step": 9134 }, { "epoch": 0.38076778792047017, "grad_norm": 328.0, "learning_rate": 7.106002516720091e-05, "loss": 14.5004, "step": 9135 }, { "epoch": 0.38080947021799844, "grad_norm": 229.0, "learning_rate": 7.105390292943483e-05, "loss": 9.0668, "step": 9136 }, { "epoch": 0.38085115251552665, "grad_norm": 260.0, "learning_rate": 7.104778030795954e-05, "loss": 13.0016, "step": 9137 }, { "epoch": 0.3808928348130549, "grad_norm": 286.0, "learning_rate": 7.104165730288656e-05, "loss": 13.1291, "step": 9138 }, { "epoch": 0.3809345171105831, "grad_norm": 260.0, "learning_rate": 7.103553391432752e-05, "loss": 11.9377, "step": 9139 }, { "epoch": 0.3809761994081114, "grad_norm": 392.0, "learning_rate": 7.102941014239397e-05, "loss": 14.5007, "step": 9140 }, { "epoch": 0.3810178817056396, "grad_norm": 260.0, "learning_rate": 7.102328598719759e-05, "loss": 11.5003, "step": 9141 }, { "epoch": 0.38105956400316787, "grad_norm": 144.0, "learning_rate": 7.101716144884995e-05, "loss": 9.0003, "step": 9142 }, { "epoch": 0.3811012463006961, "grad_norm": 205.0, "learning_rate": 7.101103652746268e-05, "loss": 10.8128, "step": 9143 }, { "epoch": 0.38114292859822435, "grad_norm": 948.0, "learning_rate": 7.100491122314739e-05, "loss": 25.3783, "step": 9144 }, { "epoch": 0.38118461089575256, "grad_norm": 676.0, "learning_rate": 7.099878553601574e-05, "loss": 19.2501, "step": 9145 }, { "epoch": 0.38122629319328083, "grad_norm": 292.0, "learning_rate": 7.099265946617936e-05, "loss": 11.7503, "step": 9146 }, { "epoch": 0.38126797549080904, "grad_norm": 428.0, "learning_rate": 7.09865330137499e-05, "loss": 16.5003, "step": 9147 }, { "epoch": 0.3813096577883373, "grad_norm": 288.0, "learning_rate": 7.0980406178839e-05, "loss": 11.5634, "step": 9148 }, { "epoch": 0.3813513400858655, "grad_norm": 238.0, "learning_rate": 7.097427896155835e-05, "loss": 11.1255, "step": 9149 }, { "epoch": 0.3813930223833938, "grad_norm": 92.5, "learning_rate": 7.09681513620196e-05, "loss": 8.5003, "step": 9150 }, { "epoch": 0.381434704680922, "grad_norm": 60.0, "learning_rate": 7.096202338033441e-05, "loss": 7.9066, "step": 9151 }, { "epoch": 0.38147638697845027, "grad_norm": 436.0, "learning_rate": 7.095589501661452e-05, "loss": 16.7515, "step": 9152 }, { "epoch": 0.3815180692759785, "grad_norm": 126.0, "learning_rate": 7.094976627097155e-05, "loss": 8.1881, "step": 9153 }, { "epoch": 0.38155975157350674, "grad_norm": 796.0, "learning_rate": 7.094363714351724e-05, "loss": 19.8775, "step": 9154 }, { "epoch": 0.38160143387103496, "grad_norm": 292.0, "learning_rate": 7.093750763436328e-05, "loss": 12.1889, "step": 9155 }, { "epoch": 0.3816431161685632, "grad_norm": 676.0, "learning_rate": 7.093137774362139e-05, "loss": 18.0002, "step": 9156 }, { "epoch": 0.38168479846609144, "grad_norm": 474.0, "learning_rate": 7.09252474714033e-05, "loss": 17.3755, "step": 9157 }, { "epoch": 0.3817264807636197, "grad_norm": 1224.0, "learning_rate": 7.091911681782069e-05, "loss": 33.5005, "step": 9158 }, { "epoch": 0.3817681630611479, "grad_norm": 202.0, "learning_rate": 7.091298578298534e-05, "loss": 10.9378, "step": 9159 }, { "epoch": 0.3818098453586762, "grad_norm": 524.0, "learning_rate": 7.090685436700896e-05, "loss": 18.0039, "step": 9160 }, { "epoch": 0.3818515276562044, "grad_norm": 237.0, "learning_rate": 7.090072257000329e-05, "loss": 11.7508, "step": 9161 }, { "epoch": 0.38189320995373266, "grad_norm": 87.0, "learning_rate": 7.089459039208012e-05, "loss": 8.2504, "step": 9162 }, { "epoch": 0.38193489225126087, "grad_norm": 356.0, "learning_rate": 7.088845783335116e-05, "loss": 14.3127, "step": 9163 }, { "epoch": 0.38197657454878914, "grad_norm": 314.0, "learning_rate": 7.088232489392822e-05, "loss": 14.0631, "step": 9164 }, { "epoch": 0.38201825684631735, "grad_norm": 235.0, "learning_rate": 7.087619157392306e-05, "loss": 10.5003, "step": 9165 }, { "epoch": 0.3820599391438456, "grad_norm": 153.0, "learning_rate": 7.087005787344743e-05, "loss": 10.2503, "step": 9166 }, { "epoch": 0.38210162144137383, "grad_norm": 1368.0, "learning_rate": 7.086392379261315e-05, "loss": 30.2543, "step": 9167 }, { "epoch": 0.3821433037389021, "grad_norm": 772.0, "learning_rate": 7.085778933153202e-05, "loss": 23.3766, "step": 9168 }, { "epoch": 0.3821849860364303, "grad_norm": 428.0, "learning_rate": 7.085165449031583e-05, "loss": 12.6265, "step": 9169 }, { "epoch": 0.3822266683339586, "grad_norm": 486.0, "learning_rate": 7.084551926907636e-05, "loss": 16.8771, "step": 9170 }, { "epoch": 0.3822683506314868, "grad_norm": 952.0, "learning_rate": 7.083938366792548e-05, "loss": 25.7544, "step": 9171 }, { "epoch": 0.38231003292901505, "grad_norm": 800.0, "learning_rate": 7.083324768697497e-05, "loss": 23.7503, "step": 9172 }, { "epoch": 0.38235171522654327, "grad_norm": 160.0, "learning_rate": 7.082711132633668e-05, "loss": 11.626, "step": 9173 }, { "epoch": 0.38239339752407153, "grad_norm": 350.0, "learning_rate": 7.082097458612242e-05, "loss": 14.5012, "step": 9174 }, { "epoch": 0.38243507982159974, "grad_norm": 436.0, "learning_rate": 7.081483746644406e-05, "loss": 15.7504, "step": 9175 }, { "epoch": 0.382476762119128, "grad_norm": 346.0, "learning_rate": 7.080869996741343e-05, "loss": 14.7502, "step": 9176 }, { "epoch": 0.3825184444166562, "grad_norm": 358.0, "learning_rate": 7.08025620891424e-05, "loss": 13.6254, "step": 9177 }, { "epoch": 0.3825601267141845, "grad_norm": 520.0, "learning_rate": 7.079642383174283e-05, "loss": 17.6253, "step": 9178 }, { "epoch": 0.3826018090117127, "grad_norm": 245.0, "learning_rate": 7.07902851953266e-05, "loss": 12.188, "step": 9179 }, { "epoch": 0.38264349130924097, "grad_norm": 1288.0, "learning_rate": 7.078414618000558e-05, "loss": 32.5004, "step": 9180 }, { "epoch": 0.3826851736067692, "grad_norm": 143.0, "learning_rate": 7.077800678589162e-05, "loss": 10.8757, "step": 9181 }, { "epoch": 0.38272685590429745, "grad_norm": 131.0, "learning_rate": 7.077186701309667e-05, "loss": 8.5626, "step": 9182 }, { "epoch": 0.38276853820182566, "grad_norm": 286.0, "learning_rate": 7.076572686173259e-05, "loss": 12.5628, "step": 9183 }, { "epoch": 0.3828102204993539, "grad_norm": 206.0, "learning_rate": 7.07595863319113e-05, "loss": 10.7508, "step": 9184 }, { "epoch": 0.38285190279688214, "grad_norm": 568.0, "learning_rate": 7.07534454237447e-05, "loss": 18.1255, "step": 9185 }, { "epoch": 0.3828935850944104, "grad_norm": 380.0, "learning_rate": 7.074730413734472e-05, "loss": 14.5628, "step": 9186 }, { "epoch": 0.3829352673919386, "grad_norm": 490.0, "learning_rate": 7.074116247282329e-05, "loss": 15.3142, "step": 9187 }, { "epoch": 0.3829769496894669, "grad_norm": 556.0, "learning_rate": 7.073502043029232e-05, "loss": 18.3754, "step": 9188 }, { "epoch": 0.38301863198699515, "grad_norm": 438.0, "learning_rate": 7.072887800986375e-05, "loss": 18.1255, "step": 9189 }, { "epoch": 0.38306031428452336, "grad_norm": 114.0, "learning_rate": 7.072273521164955e-05, "loss": 8.0003, "step": 9190 }, { "epoch": 0.38310199658205163, "grad_norm": 1368.0, "learning_rate": 7.071659203576166e-05, "loss": 26.6298, "step": 9191 }, { "epoch": 0.38314367887957984, "grad_norm": 386.0, "learning_rate": 7.071044848231204e-05, "loss": 15.1878, "step": 9192 }, { "epoch": 0.3831853611771081, "grad_norm": 127.5, "learning_rate": 7.070430455141266e-05, "loss": 6.2819, "step": 9193 }, { "epoch": 0.3832270434746363, "grad_norm": 318.0, "learning_rate": 7.06981602431755e-05, "loss": 13.8753, "step": 9194 }, { "epoch": 0.3832687257721646, "grad_norm": 704.0, "learning_rate": 7.069201555771251e-05, "loss": 19.8757, "step": 9195 }, { "epoch": 0.3833104080696928, "grad_norm": 552.0, "learning_rate": 7.068587049513574e-05, "loss": 18.5009, "step": 9196 }, { "epoch": 0.38335209036722107, "grad_norm": 680.0, "learning_rate": 7.067972505555712e-05, "loss": 17.7544, "step": 9197 }, { "epoch": 0.3833937726647493, "grad_norm": 484.0, "learning_rate": 7.067357923908867e-05, "loss": 17.5005, "step": 9198 }, { "epoch": 0.38343545496227754, "grad_norm": 233.0, "learning_rate": 7.06674330458424e-05, "loss": 3.9534, "step": 9199 }, { "epoch": 0.38347713725980576, "grad_norm": 236.0, "learning_rate": 7.066128647593033e-05, "loss": 11.4377, "step": 9200 }, { "epoch": 0.383518819557334, "grad_norm": 354.0, "learning_rate": 7.065513952946449e-05, "loss": 15.0627, "step": 9201 }, { "epoch": 0.38356050185486223, "grad_norm": 312.0, "learning_rate": 7.064899220655688e-05, "loss": 11.1253, "step": 9202 }, { "epoch": 0.3836021841523905, "grad_norm": 608.0, "learning_rate": 7.064284450731956e-05, "loss": 19.5002, "step": 9203 }, { "epoch": 0.3836438664499187, "grad_norm": 964.0, "learning_rate": 7.063669643186458e-05, "loss": 23.8752, "step": 9204 }, { "epoch": 0.383685548747447, "grad_norm": 768.0, "learning_rate": 7.063054798030396e-05, "loss": 22.8762, "step": 9205 }, { "epoch": 0.3837272310449752, "grad_norm": 652.0, "learning_rate": 7.062439915274979e-05, "loss": 17.1252, "step": 9206 }, { "epoch": 0.38376891334250346, "grad_norm": 322.0, "learning_rate": 7.06182499493141e-05, "loss": 13.7502, "step": 9207 }, { "epoch": 0.38381059564003167, "grad_norm": 964.0, "learning_rate": 7.061210037010897e-05, "loss": 24.5003, "step": 9208 }, { "epoch": 0.38385227793755994, "grad_norm": 502.0, "learning_rate": 7.06059504152465e-05, "loss": 15.0039, "step": 9209 }, { "epoch": 0.38389396023508815, "grad_norm": 584.0, "learning_rate": 7.059980008483875e-05, "loss": 18.3755, "step": 9210 }, { "epoch": 0.3839356425326164, "grad_norm": 231.0, "learning_rate": 7.05936493789978e-05, "loss": 13.1255, "step": 9211 }, { "epoch": 0.38397732483014463, "grad_norm": 106.5, "learning_rate": 7.058749829783578e-05, "loss": 9.8777, "step": 9212 }, { "epoch": 0.3840190071276729, "grad_norm": 91.5, "learning_rate": 7.058134684146476e-05, "loss": 9.4377, "step": 9213 }, { "epoch": 0.3840606894252011, "grad_norm": 360.0, "learning_rate": 7.057519500999687e-05, "loss": 16.2506, "step": 9214 }, { "epoch": 0.3841023717227294, "grad_norm": 219.0, "learning_rate": 7.056904280354423e-05, "loss": 10.1877, "step": 9215 }, { "epoch": 0.3841440540202576, "grad_norm": 572.0, "learning_rate": 7.056289022221896e-05, "loss": 18.1254, "step": 9216 }, { "epoch": 0.38418573631778585, "grad_norm": 233.0, "learning_rate": 7.055673726613319e-05, "loss": 11.4377, "step": 9217 }, { "epoch": 0.38422741861531406, "grad_norm": 510.0, "learning_rate": 7.055058393539905e-05, "loss": 17.5003, "step": 9218 }, { "epoch": 0.38426910091284233, "grad_norm": 338.0, "learning_rate": 7.05444302301287e-05, "loss": 13.5627, "step": 9219 }, { "epoch": 0.38431078321037054, "grad_norm": 390.0, "learning_rate": 7.053827615043427e-05, "loss": 13.6879, "step": 9220 }, { "epoch": 0.3843524655078988, "grad_norm": 768.0, "learning_rate": 7.053212169642795e-05, "loss": 22.3763, "step": 9221 }, { "epoch": 0.384394147805427, "grad_norm": 400.0, "learning_rate": 7.05259668682219e-05, "loss": 14.6259, "step": 9222 }, { "epoch": 0.3844358301029553, "grad_norm": 262.0, "learning_rate": 7.051981166592827e-05, "loss": 13.3128, "step": 9223 }, { "epoch": 0.3844775124004835, "grad_norm": 668.0, "learning_rate": 7.051365608965925e-05, "loss": 20.7502, "step": 9224 }, { "epoch": 0.38451919469801177, "grad_norm": 82.5, "learning_rate": 7.050750013952702e-05, "loss": 7.6564, "step": 9225 }, { "epoch": 0.38456087699554, "grad_norm": 322.0, "learning_rate": 7.05013438156438e-05, "loss": 14.0627, "step": 9226 }, { "epoch": 0.38460255929306825, "grad_norm": 406.0, "learning_rate": 7.049518711812175e-05, "loss": 15.2511, "step": 9227 }, { "epoch": 0.38464424159059646, "grad_norm": 400.0, "learning_rate": 7.04890300470731e-05, "loss": 16.1256, "step": 9228 }, { "epoch": 0.3846859238881247, "grad_norm": 243.0, "learning_rate": 7.048287260261005e-05, "loss": 12.5009, "step": 9229 }, { "epoch": 0.38472760618565294, "grad_norm": 2240.0, "learning_rate": 7.047671478484485e-05, "loss": 43.2511, "step": 9230 }, { "epoch": 0.3847692884831812, "grad_norm": 156.0, "learning_rate": 7.047055659388968e-05, "loss": 10.0632, "step": 9231 }, { "epoch": 0.3848109707807094, "grad_norm": 532.0, "learning_rate": 7.046439802985682e-05, "loss": 18.5002, "step": 9232 }, { "epoch": 0.3848526530782377, "grad_norm": 225.0, "learning_rate": 7.045823909285848e-05, "loss": 11.0008, "step": 9233 }, { "epoch": 0.3848943353757659, "grad_norm": 194.0, "learning_rate": 7.045207978300692e-05, "loss": 10.2502, "step": 9234 }, { "epoch": 0.38493601767329416, "grad_norm": 328.0, "learning_rate": 7.044592010041439e-05, "loss": 14.2508, "step": 9235 }, { "epoch": 0.3849776999708224, "grad_norm": 88.5, "learning_rate": 7.043976004519314e-05, "loss": 8.4385, "step": 9236 }, { "epoch": 0.38501938226835064, "grad_norm": 326.0, "learning_rate": 7.043359961745545e-05, "loss": 14.3127, "step": 9237 }, { "epoch": 0.38506106456587885, "grad_norm": 360.0, "learning_rate": 7.042743881731362e-05, "loss": 14.3754, "step": 9238 }, { "epoch": 0.3851027468634071, "grad_norm": 772.0, "learning_rate": 7.042127764487987e-05, "loss": 23.5001, "step": 9239 }, { "epoch": 0.38514442916093533, "grad_norm": 672.0, "learning_rate": 7.041511610026655e-05, "loss": 20.2541, "step": 9240 }, { "epoch": 0.3851861114584636, "grad_norm": 272.0, "learning_rate": 7.040895418358591e-05, "loss": 11.5636, "step": 9241 }, { "epoch": 0.3852277937559918, "grad_norm": 932.0, "learning_rate": 7.040279189495026e-05, "loss": 25.6252, "step": 9242 }, { "epoch": 0.3852694760535201, "grad_norm": 253.0, "learning_rate": 7.039662923447194e-05, "loss": 11.5628, "step": 9243 }, { "epoch": 0.3853111583510483, "grad_norm": 800.0, "learning_rate": 7.039046620226322e-05, "loss": 19.7548, "step": 9244 }, { "epoch": 0.38535284064857656, "grad_norm": 322.0, "learning_rate": 7.038430279843645e-05, "loss": 13.813, "step": 9245 }, { "epoch": 0.38539452294610477, "grad_norm": 278.0, "learning_rate": 7.037813902310397e-05, "loss": 13.0005, "step": 9246 }, { "epoch": 0.38543620524363303, "grad_norm": 446.0, "learning_rate": 7.037197487637808e-05, "loss": 17.2504, "step": 9247 }, { "epoch": 0.38547788754116125, "grad_norm": 193.0, "learning_rate": 7.036581035837113e-05, "loss": 10.5627, "step": 9248 }, { "epoch": 0.3855195698386895, "grad_norm": 560.0, "learning_rate": 7.03596454691955e-05, "loss": 17.7503, "step": 9249 }, { "epoch": 0.3855612521362177, "grad_norm": 704.0, "learning_rate": 7.03534802089635e-05, "loss": 22.3757, "step": 9250 }, { "epoch": 0.385602934433746, "grad_norm": 652.0, "learning_rate": 7.034731457778753e-05, "loss": 15.7502, "step": 9251 }, { "epoch": 0.3856446167312742, "grad_norm": 184.0, "learning_rate": 7.034114857577996e-05, "loss": 5.9695, "step": 9252 }, { "epoch": 0.38568629902880247, "grad_norm": 226.0, "learning_rate": 7.033498220305314e-05, "loss": 11.8128, "step": 9253 }, { "epoch": 0.3857279813263307, "grad_norm": 126.0, "learning_rate": 7.032881545971945e-05, "loss": 9.4382, "step": 9254 }, { "epoch": 0.38576966362385895, "grad_norm": 548.0, "learning_rate": 7.03226483458913e-05, "loss": 17.1253, "step": 9255 }, { "epoch": 0.38581134592138716, "grad_norm": 76.0, "learning_rate": 7.031648086168109e-05, "loss": 8.4377, "step": 9256 }, { "epoch": 0.38585302821891543, "grad_norm": 294.0, "learning_rate": 7.031031300720121e-05, "loss": 12.9378, "step": 9257 }, { "epoch": 0.38589471051644364, "grad_norm": 564.0, "learning_rate": 7.030414478256408e-05, "loss": 17.7502, "step": 9258 }, { "epoch": 0.3859363928139719, "grad_norm": 560.0, "learning_rate": 7.02979761878821e-05, "loss": 19.0002, "step": 9259 }, { "epoch": 0.3859780751115001, "grad_norm": 408.0, "learning_rate": 7.029180722326769e-05, "loss": 11.8752, "step": 9260 }, { "epoch": 0.3860197574090284, "grad_norm": 928.0, "learning_rate": 7.028563788883332e-05, "loss": 24.6257, "step": 9261 }, { "epoch": 0.38606143970655665, "grad_norm": 580.0, "learning_rate": 7.027946818469137e-05, "loss": 18.5002, "step": 9262 }, { "epoch": 0.38610312200408486, "grad_norm": 300.0, "learning_rate": 7.027329811095432e-05, "loss": 14.1253, "step": 9263 }, { "epoch": 0.38614480430161313, "grad_norm": 412.0, "learning_rate": 7.026712766773462e-05, "loss": 13.8129, "step": 9264 }, { "epoch": 0.38618648659914134, "grad_norm": 340.0, "learning_rate": 7.026095685514471e-05, "loss": 13.7502, "step": 9265 }, { "epoch": 0.3862281688966696, "grad_norm": 138.0, "learning_rate": 7.025478567329706e-05, "loss": 9.4378, "step": 9266 }, { "epoch": 0.3862698511941978, "grad_norm": 470.0, "learning_rate": 7.024861412230415e-05, "loss": 15.5022, "step": 9267 }, { "epoch": 0.3863115334917261, "grad_norm": 332.0, "learning_rate": 7.024244220227845e-05, "loss": 10.5629, "step": 9268 }, { "epoch": 0.3863532157892543, "grad_norm": 376.0, "learning_rate": 7.023626991333246e-05, "loss": 14.6271, "step": 9269 }, { "epoch": 0.38639489808678257, "grad_norm": 820.0, "learning_rate": 7.023009725557863e-05, "loss": 21.8777, "step": 9270 }, { "epoch": 0.3864365803843108, "grad_norm": 780.0, "learning_rate": 7.022392422912949e-05, "loss": 18.3777, "step": 9271 }, { "epoch": 0.38647826268183905, "grad_norm": 330.0, "learning_rate": 7.021775083409754e-05, "loss": 14.7501, "step": 9272 }, { "epoch": 0.38651994497936726, "grad_norm": 364.0, "learning_rate": 7.02115770705953e-05, "loss": 14.5627, "step": 9273 }, { "epoch": 0.3865616272768955, "grad_norm": 422.0, "learning_rate": 7.020540293873524e-05, "loss": 15.5007, "step": 9274 }, { "epoch": 0.38660330957442374, "grad_norm": 348.0, "learning_rate": 7.019922843862993e-05, "loss": 13.8134, "step": 9275 }, { "epoch": 0.386644991871952, "grad_norm": 120.0, "learning_rate": 7.01930535703919e-05, "loss": 8.3753, "step": 9276 }, { "epoch": 0.3866866741694802, "grad_norm": 380.0, "learning_rate": 7.018687833413366e-05, "loss": 13.563, "step": 9277 }, { "epoch": 0.3867283564670085, "grad_norm": 272.0, "learning_rate": 7.01807027299678e-05, "loss": 12.5646, "step": 9278 }, { "epoch": 0.3867700387645367, "grad_norm": 470.0, "learning_rate": 7.01745267580068e-05, "loss": 16.6252, "step": 9279 }, { "epoch": 0.38681172106206496, "grad_norm": 324.0, "learning_rate": 7.016835041836328e-05, "loss": 13.3765, "step": 9280 }, { "epoch": 0.3868534033595932, "grad_norm": 338.0, "learning_rate": 7.016217371114978e-05, "loss": 14.7504, "step": 9281 }, { "epoch": 0.38689508565712144, "grad_norm": 201.0, "learning_rate": 7.015599663647888e-05, "loss": 11.9377, "step": 9282 }, { "epoch": 0.38693676795464965, "grad_norm": 232.0, "learning_rate": 7.014981919446315e-05, "loss": 12.5015, "step": 9283 }, { "epoch": 0.3869784502521779, "grad_norm": 222.0, "learning_rate": 7.014364138521517e-05, "loss": 10.1879, "step": 9284 }, { "epoch": 0.38702013254970613, "grad_norm": 87.5, "learning_rate": 7.013746320884755e-05, "loss": 7.3127, "step": 9285 }, { "epoch": 0.3870618148472344, "grad_norm": 86.5, "learning_rate": 7.013128466547287e-05, "loss": 8.1877, "step": 9286 }, { "epoch": 0.3871034971447626, "grad_norm": 270.0, "learning_rate": 7.012510575520373e-05, "loss": 12.8753, "step": 9287 }, { "epoch": 0.3871451794422909, "grad_norm": 1224.0, "learning_rate": 7.011892647815276e-05, "loss": 26.2506, "step": 9288 }, { "epoch": 0.3871868617398191, "grad_norm": 608.0, "learning_rate": 7.011274683443258e-05, "loss": 19.0002, "step": 9289 }, { "epoch": 0.38722854403734736, "grad_norm": 237.0, "learning_rate": 7.010656682415579e-05, "loss": 9.6894, "step": 9290 }, { "epoch": 0.38727022633487557, "grad_norm": 424.0, "learning_rate": 7.010038644743504e-05, "loss": 14.4377, "step": 9291 }, { "epoch": 0.38731190863240383, "grad_norm": 334.0, "learning_rate": 7.009420570438294e-05, "loss": 15.0002, "step": 9292 }, { "epoch": 0.38735359092993205, "grad_norm": 78.5, "learning_rate": 7.008802459511217e-05, "loss": 5.0638, "step": 9293 }, { "epoch": 0.3873952732274603, "grad_norm": 134.0, "learning_rate": 7.008184311973539e-05, "loss": 9.6252, "step": 9294 }, { "epoch": 0.3874369555249885, "grad_norm": 776.0, "learning_rate": 7.007566127836522e-05, "loss": 20.1276, "step": 9295 }, { "epoch": 0.3874786378225168, "grad_norm": 262.0, "learning_rate": 7.006947907111434e-05, "loss": 11.5002, "step": 9296 }, { "epoch": 0.387520320120045, "grad_norm": 368.0, "learning_rate": 7.006329649809543e-05, "loss": 13.9377, "step": 9297 }, { "epoch": 0.38756200241757327, "grad_norm": 260.0, "learning_rate": 7.005711355942115e-05, "loss": 12.5004, "step": 9298 }, { "epoch": 0.3876036847151015, "grad_norm": 708.0, "learning_rate": 7.00509302552042e-05, "loss": 23.7501, "step": 9299 }, { "epoch": 0.38764536701262975, "grad_norm": 326.0, "learning_rate": 7.004474658555726e-05, "loss": 13.0003, "step": 9300 }, { "epoch": 0.38768704931015796, "grad_norm": 266.0, "learning_rate": 7.003856255059305e-05, "loss": 11.0631, "step": 9301 }, { "epoch": 0.38772873160768623, "grad_norm": 266.0, "learning_rate": 7.003237815042425e-05, "loss": 13.938, "step": 9302 }, { "epoch": 0.38777041390521444, "grad_norm": 316.0, "learning_rate": 7.002619338516358e-05, "loss": 13.0627, "step": 9303 }, { "epoch": 0.3878120962027427, "grad_norm": 480.0, "learning_rate": 7.002000825492375e-05, "loss": 16.8776, "step": 9304 }, { "epoch": 0.3878537785002709, "grad_norm": 336.0, "learning_rate": 7.001382275981749e-05, "loss": 12.3148, "step": 9305 }, { "epoch": 0.3878954607977992, "grad_norm": 1048.0, "learning_rate": 7.000763689995755e-05, "loss": 31.3753, "step": 9306 }, { "epoch": 0.3879371430953274, "grad_norm": 412.0, "learning_rate": 7.000145067545664e-05, "loss": 13.5632, "step": 9307 }, { "epoch": 0.38797882539285566, "grad_norm": 486.0, "learning_rate": 6.999526408642751e-05, "loss": 17.1256, "step": 9308 }, { "epoch": 0.3880205076903839, "grad_norm": 224.0, "learning_rate": 6.998907713298293e-05, "loss": 10.1896, "step": 9309 }, { "epoch": 0.38806218998791214, "grad_norm": 424.0, "learning_rate": 6.998288981523564e-05, "loss": 15.3754, "step": 9310 }, { "epoch": 0.38810387228544035, "grad_norm": 500.0, "learning_rate": 6.997670213329841e-05, "loss": 16.2525, "step": 9311 }, { "epoch": 0.3881455545829686, "grad_norm": 92.5, "learning_rate": 6.997051408728401e-05, "loss": 9.5007, "step": 9312 }, { "epoch": 0.38818723688049683, "grad_norm": 350.0, "learning_rate": 6.996432567730522e-05, "loss": 14.4378, "step": 9313 }, { "epoch": 0.3882289191780251, "grad_norm": 127.0, "learning_rate": 6.995813690347483e-05, "loss": 7.0945, "step": 9314 }, { "epoch": 0.3882706014755533, "grad_norm": 394.0, "learning_rate": 6.995194776590561e-05, "loss": 15.2502, "step": 9315 }, { "epoch": 0.3883122837730816, "grad_norm": 704.0, "learning_rate": 6.994575826471037e-05, "loss": 19.1294, "step": 9316 }, { "epoch": 0.3883539660706098, "grad_norm": 184.0, "learning_rate": 6.99395684000019e-05, "loss": 10.3126, "step": 9317 }, { "epoch": 0.38839564836813806, "grad_norm": 246.0, "learning_rate": 6.993337817189306e-05, "loss": 12.1256, "step": 9318 }, { "epoch": 0.38843733066566627, "grad_norm": 235.0, "learning_rate": 6.992718758049662e-05, "loss": 12.0627, "step": 9319 }, { "epoch": 0.38847901296319454, "grad_norm": 1040.0, "learning_rate": 6.992099662592542e-05, "loss": 29.5003, "step": 9320 }, { "epoch": 0.38852069526072275, "grad_norm": 212.0, "learning_rate": 6.991480530829228e-05, "loss": 8.0022, "step": 9321 }, { "epoch": 0.388562377558251, "grad_norm": 235.0, "learning_rate": 6.990861362771005e-05, "loss": 12.3136, "step": 9322 }, { "epoch": 0.3886040598557792, "grad_norm": 628.0, "learning_rate": 6.990242158429156e-05, "loss": 19.1253, "step": 9323 }, { "epoch": 0.3886457421533075, "grad_norm": 324.0, "learning_rate": 6.989622917814968e-05, "loss": 10.5003, "step": 9324 }, { "epoch": 0.3886874244508357, "grad_norm": 316.0, "learning_rate": 6.989003640939726e-05, "loss": 13.7503, "step": 9325 }, { "epoch": 0.388729106748364, "grad_norm": 176.0, "learning_rate": 6.988384327814716e-05, "loss": 10.6877, "step": 9326 }, { "epoch": 0.3887707890458922, "grad_norm": 360.0, "learning_rate": 6.987764978451226e-05, "loss": 13.8131, "step": 9327 }, { "epoch": 0.38881247134342045, "grad_norm": 362.0, "learning_rate": 6.98714559286054e-05, "loss": 16.0005, "step": 9328 }, { "epoch": 0.38885415364094866, "grad_norm": 612.0, "learning_rate": 6.986526171053951e-05, "loss": 19.7505, "step": 9329 }, { "epoch": 0.38889583593847693, "grad_norm": 540.0, "learning_rate": 6.985906713042745e-05, "loss": 17.8758, "step": 9330 }, { "epoch": 0.38893751823600514, "grad_norm": 696.0, "learning_rate": 6.985287218838214e-05, "loss": 18.3752, "step": 9331 }, { "epoch": 0.3889792005335334, "grad_norm": 95.5, "learning_rate": 6.984667688451648e-05, "loss": 10.3127, "step": 9332 }, { "epoch": 0.3890208828310616, "grad_norm": 310.0, "learning_rate": 6.984048121894335e-05, "loss": 12.1254, "step": 9333 }, { "epoch": 0.3890625651285899, "grad_norm": 736.0, "learning_rate": 6.983428519177571e-05, "loss": 21.7503, "step": 9334 }, { "epoch": 0.38910424742611815, "grad_norm": 584.0, "learning_rate": 6.982808880312644e-05, "loss": 19.6252, "step": 9335 }, { "epoch": 0.38914592972364637, "grad_norm": 202.0, "learning_rate": 6.982189205310851e-05, "loss": 12.313, "step": 9336 }, { "epoch": 0.38918761202117463, "grad_norm": 268.0, "learning_rate": 6.981569494183483e-05, "loss": 11.5033, "step": 9337 }, { "epoch": 0.38922929431870285, "grad_norm": 402.0, "learning_rate": 6.980949746941836e-05, "loss": 14.5012, "step": 9338 }, { "epoch": 0.3892709766162311, "grad_norm": 268.0, "learning_rate": 6.980329963597202e-05, "loss": 12.1253, "step": 9339 }, { "epoch": 0.3893126589137593, "grad_norm": 456.0, "learning_rate": 6.97971014416088e-05, "loss": 16.6254, "step": 9340 }, { "epoch": 0.3893543412112876, "grad_norm": 233.0, "learning_rate": 6.979090288644164e-05, "loss": 11.6877, "step": 9341 }, { "epoch": 0.3893960235088158, "grad_norm": 478.0, "learning_rate": 6.978470397058352e-05, "loss": 17.6261, "step": 9342 }, { "epoch": 0.38943770580634407, "grad_norm": 620.0, "learning_rate": 6.977850469414742e-05, "loss": 17.2504, "step": 9343 }, { "epoch": 0.3894793881038723, "grad_norm": 300.0, "learning_rate": 6.977230505724632e-05, "loss": 13.3751, "step": 9344 }, { "epoch": 0.38952107040140055, "grad_norm": 192.0, "learning_rate": 6.97661050599932e-05, "loss": 11.313, "step": 9345 }, { "epoch": 0.38956275269892876, "grad_norm": 568.0, "learning_rate": 6.975990470250106e-05, "loss": 19.377, "step": 9346 }, { "epoch": 0.389604434996457, "grad_norm": 290.0, "learning_rate": 6.97537039848829e-05, "loss": 13.6877, "step": 9347 }, { "epoch": 0.38964611729398524, "grad_norm": 612.0, "learning_rate": 6.974750290725174e-05, "loss": 17.3786, "step": 9348 }, { "epoch": 0.3896877995915135, "grad_norm": 188.0, "learning_rate": 6.974130146972057e-05, "loss": 10.8127, "step": 9349 }, { "epoch": 0.3897294818890417, "grad_norm": 136.0, "learning_rate": 6.973509967240243e-05, "loss": 10.2524, "step": 9350 }, { "epoch": 0.38977116418657, "grad_norm": 1072.0, "learning_rate": 6.972889751541036e-05, "loss": 22.0008, "step": 9351 }, { "epoch": 0.3898128464840982, "grad_norm": 458.0, "learning_rate": 6.972269499885738e-05, "loss": 14.3752, "step": 9352 }, { "epoch": 0.38985452878162646, "grad_norm": 260.0, "learning_rate": 6.971649212285654e-05, "loss": 13.2526, "step": 9353 }, { "epoch": 0.3898962110791547, "grad_norm": 49.25, "learning_rate": 6.971028888752087e-05, "loss": 7.5628, "step": 9354 }, { "epoch": 0.38993789337668294, "grad_norm": 125.0, "learning_rate": 6.970408529296342e-05, "loss": 8.3127, "step": 9355 }, { "epoch": 0.38997957567421115, "grad_norm": 1384.0, "learning_rate": 6.969788133929729e-05, "loss": 29.505, "step": 9356 }, { "epoch": 0.3900212579717394, "grad_norm": 314.0, "learning_rate": 6.96916770266355e-05, "loss": 13.6253, "step": 9357 }, { "epoch": 0.39006294026926763, "grad_norm": 112.5, "learning_rate": 6.968547235509118e-05, "loss": 8.5002, "step": 9358 }, { "epoch": 0.3901046225667959, "grad_norm": 168.0, "learning_rate": 6.967926732477735e-05, "loss": 8.4381, "step": 9359 }, { "epoch": 0.3901463048643241, "grad_norm": 145.0, "learning_rate": 6.967306193580715e-05, "loss": 10.3128, "step": 9360 }, { "epoch": 0.3901879871618524, "grad_norm": 179.0, "learning_rate": 6.966685618829362e-05, "loss": 11.3127, "step": 9361 }, { "epoch": 0.3902296694593806, "grad_norm": 724.0, "learning_rate": 6.966065008234992e-05, "loss": 22.2503, "step": 9362 }, { "epoch": 0.39027135175690886, "grad_norm": 199.0, "learning_rate": 6.965444361808912e-05, "loss": 11.1897, "step": 9363 }, { "epoch": 0.39031303405443707, "grad_norm": 900.0, "learning_rate": 6.964823679562434e-05, "loss": 23.5002, "step": 9364 }, { "epoch": 0.39035471635196534, "grad_norm": 350.0, "learning_rate": 6.964202961506869e-05, "loss": 14.8753, "step": 9365 }, { "epoch": 0.39039639864949355, "grad_norm": 217.0, "learning_rate": 6.963582207653532e-05, "loss": 12.1263, "step": 9366 }, { "epoch": 0.3904380809470218, "grad_norm": 282.0, "learning_rate": 6.962961418013732e-05, "loss": 11.3755, "step": 9367 }, { "epoch": 0.39047976324455, "grad_norm": 338.0, "learning_rate": 6.962340592598789e-05, "loss": 14.6251, "step": 9368 }, { "epoch": 0.3905214455420783, "grad_norm": 446.0, "learning_rate": 6.961719731420013e-05, "loss": 15.3127, "step": 9369 }, { "epoch": 0.3905631278396065, "grad_norm": 177.0, "learning_rate": 6.961098834488722e-05, "loss": 10.8126, "step": 9370 }, { "epoch": 0.3906048101371348, "grad_norm": 354.0, "learning_rate": 6.96047790181623e-05, "loss": 13.6878, "step": 9371 }, { "epoch": 0.390646492434663, "grad_norm": 368.0, "learning_rate": 6.959856933413854e-05, "loss": 13.1912, "step": 9372 }, { "epoch": 0.39068817473219125, "grad_norm": 171.0, "learning_rate": 6.959235929292912e-05, "loss": 11.3127, "step": 9373 }, { "epoch": 0.39072985702971946, "grad_norm": 368.0, "learning_rate": 6.95861488946472e-05, "loss": 13.7504, "step": 9374 }, { "epoch": 0.39077153932724773, "grad_norm": 162.0, "learning_rate": 6.9579938139406e-05, "loss": 10.2504, "step": 9375 }, { "epoch": 0.39081322162477594, "grad_norm": 204.0, "learning_rate": 6.957372702731867e-05, "loss": 9.8753, "step": 9376 }, { "epoch": 0.3908549039223042, "grad_norm": 548.0, "learning_rate": 6.956751555849843e-05, "loss": 17.5011, "step": 9377 }, { "epoch": 0.3908965862198324, "grad_norm": 156.0, "learning_rate": 6.956130373305849e-05, "loss": 10.4377, "step": 9378 }, { "epoch": 0.3909382685173607, "grad_norm": 216.0, "learning_rate": 6.955509155111204e-05, "loss": 12.3128, "step": 9379 }, { "epoch": 0.3909799508148889, "grad_norm": 82.5, "learning_rate": 6.954887901277231e-05, "loss": 10.4387, "step": 9380 }, { "epoch": 0.39102163311241717, "grad_norm": 229.0, "learning_rate": 6.954266611815255e-05, "loss": 13.4395, "step": 9381 }, { "epoch": 0.3910633154099454, "grad_norm": 380.0, "learning_rate": 6.953645286736594e-05, "loss": 13.5638, "step": 9382 }, { "epoch": 0.39110499770747365, "grad_norm": 390.0, "learning_rate": 6.953023926052576e-05, "loss": 14.3751, "step": 9383 }, { "epoch": 0.39114668000500186, "grad_norm": 302.0, "learning_rate": 6.952402529774522e-05, "loss": 13.5629, "step": 9384 }, { "epoch": 0.3911883623025301, "grad_norm": 278.0, "learning_rate": 6.951781097913758e-05, "loss": 12.5646, "step": 9385 }, { "epoch": 0.39123004460005834, "grad_norm": 187.0, "learning_rate": 6.951159630481612e-05, "loss": 11.1252, "step": 9386 }, { "epoch": 0.3912717268975866, "grad_norm": 556.0, "learning_rate": 6.950538127489408e-05, "loss": 18.2502, "step": 9387 }, { "epoch": 0.3913134091951148, "grad_norm": 252.0, "learning_rate": 6.949916588948473e-05, "loss": 12.6892, "step": 9388 }, { "epoch": 0.3913550914926431, "grad_norm": 166.0, "learning_rate": 6.949295014870137e-05, "loss": 8.5006, "step": 9389 }, { "epoch": 0.3913967737901713, "grad_norm": 524.0, "learning_rate": 6.948673405265725e-05, "loss": 17.3756, "step": 9390 }, { "epoch": 0.39143845608769956, "grad_norm": 280.0, "learning_rate": 6.948051760146565e-05, "loss": 11.8127, "step": 9391 }, { "epoch": 0.39148013838522777, "grad_norm": 134.0, "learning_rate": 6.94743007952399e-05, "loss": 9.3129, "step": 9392 }, { "epoch": 0.39152182068275604, "grad_norm": 944.0, "learning_rate": 6.94680836340933e-05, "loss": 25.5003, "step": 9393 }, { "epoch": 0.39156350298028425, "grad_norm": 224.0, "learning_rate": 6.946186611813914e-05, "loss": 10.1877, "step": 9394 }, { "epoch": 0.3916051852778125, "grad_norm": 520.0, "learning_rate": 6.945564824749075e-05, "loss": 17.2508, "step": 9395 }, { "epoch": 0.39164686757534073, "grad_norm": 776.0, "learning_rate": 6.944943002226144e-05, "loss": 21.5003, "step": 9396 }, { "epoch": 0.391688549872869, "grad_norm": 82.5, "learning_rate": 6.944321144256454e-05, "loss": 9.5628, "step": 9397 }, { "epoch": 0.3917302321703972, "grad_norm": 87.0, "learning_rate": 6.943699250851338e-05, "loss": 7.6256, "step": 9398 }, { "epoch": 0.3917719144679255, "grad_norm": 360.0, "learning_rate": 6.943077322022132e-05, "loss": 12.2501, "step": 9399 }, { "epoch": 0.3918135967654537, "grad_norm": 856.0, "learning_rate": 6.942455357780169e-05, "loss": 27.3752, "step": 9400 }, { "epoch": 0.39185527906298195, "grad_norm": 422.0, "learning_rate": 6.941833358136784e-05, "loss": 14.063, "step": 9401 }, { "epoch": 0.39189696136051017, "grad_norm": 231.0, "learning_rate": 6.941211323103314e-05, "loss": 13.0629, "step": 9402 }, { "epoch": 0.39193864365803843, "grad_norm": 544.0, "learning_rate": 6.940589252691096e-05, "loss": 13.5642, "step": 9403 }, { "epoch": 0.39198032595556664, "grad_norm": 251.0, "learning_rate": 6.939967146911466e-05, "loss": 12.3127, "step": 9404 }, { "epoch": 0.3920220082530949, "grad_norm": 692.0, "learning_rate": 6.93934500577576e-05, "loss": 18.5003, "step": 9405 }, { "epoch": 0.3920636905506231, "grad_norm": 230.0, "learning_rate": 6.938722829295322e-05, "loss": 13.1253, "step": 9406 }, { "epoch": 0.3921053728481514, "grad_norm": 216.0, "learning_rate": 6.938100617481488e-05, "loss": 12.5002, "step": 9407 }, { "epoch": 0.39214705514567966, "grad_norm": 520.0, "learning_rate": 6.937478370345598e-05, "loss": 18.1255, "step": 9408 }, { "epoch": 0.39218873744320787, "grad_norm": 350.0, "learning_rate": 6.936856087898993e-05, "loss": 14.4379, "step": 9409 }, { "epoch": 0.39223041974073614, "grad_norm": 188.0, "learning_rate": 6.936233770153013e-05, "loss": 11.8753, "step": 9410 }, { "epoch": 0.39227210203826435, "grad_norm": 592.0, "learning_rate": 6.935611417119002e-05, "loss": 18.0015, "step": 9411 }, { "epoch": 0.3923137843357926, "grad_norm": 328.0, "learning_rate": 6.934989028808299e-05, "loss": 14.5629, "step": 9412 }, { "epoch": 0.3923554666333208, "grad_norm": 584.0, "learning_rate": 6.934366605232251e-05, "loss": 16.6269, "step": 9413 }, { "epoch": 0.3923971489308491, "grad_norm": 516.0, "learning_rate": 6.933744146402199e-05, "loss": 17.6257, "step": 9414 }, { "epoch": 0.3924388312283773, "grad_norm": 247.0, "learning_rate": 6.933121652329489e-05, "loss": 12.3131, "step": 9415 }, { "epoch": 0.39248051352590557, "grad_norm": 402.0, "learning_rate": 6.932499123025466e-05, "loss": 14.2516, "step": 9416 }, { "epoch": 0.3925221958234338, "grad_norm": 298.0, "learning_rate": 6.931876558501474e-05, "loss": 13.2506, "step": 9417 }, { "epoch": 0.39256387812096205, "grad_norm": 394.0, "learning_rate": 6.931253958768858e-05, "loss": 14.0651, "step": 9418 }, { "epoch": 0.39260556041849026, "grad_norm": 173.0, "learning_rate": 6.93063132383897e-05, "loss": 11.3131, "step": 9419 }, { "epoch": 0.39264724271601853, "grad_norm": 956.0, "learning_rate": 6.930008653723154e-05, "loss": 24.0002, "step": 9420 }, { "epoch": 0.39268892501354674, "grad_norm": 520.0, "learning_rate": 6.929385948432758e-05, "loss": 17.7509, "step": 9421 }, { "epoch": 0.392730607311075, "grad_norm": 354.0, "learning_rate": 6.928763207979133e-05, "loss": 13.2501, "step": 9422 }, { "epoch": 0.3927722896086032, "grad_norm": 360.0, "learning_rate": 6.928140432373628e-05, "loss": 14.5628, "step": 9423 }, { "epoch": 0.3928139719061315, "grad_norm": 410.0, "learning_rate": 6.927517621627591e-05, "loss": 13.3131, "step": 9424 }, { "epoch": 0.3928556542036597, "grad_norm": 384.0, "learning_rate": 6.926894775752375e-05, "loss": 14.8127, "step": 9425 }, { "epoch": 0.39289733650118797, "grad_norm": 346.0, "learning_rate": 6.92627189475933e-05, "loss": 13.9377, "step": 9426 }, { "epoch": 0.3929390187987162, "grad_norm": 980.0, "learning_rate": 6.92564897865981e-05, "loss": 19.1295, "step": 9427 }, { "epoch": 0.39298070109624444, "grad_norm": 227.0, "learning_rate": 6.925026027465165e-05, "loss": 11.0627, "step": 9428 }, { "epoch": 0.39302238339377266, "grad_norm": 460.0, "learning_rate": 6.92440304118675e-05, "loss": 15.1877, "step": 9429 }, { "epoch": 0.3930640656913009, "grad_norm": 43.0, "learning_rate": 6.923780019835918e-05, "loss": 6.2502, "step": 9430 }, { "epoch": 0.39310574798882914, "grad_norm": 420.0, "learning_rate": 6.923156963424025e-05, "loss": 15.1878, "step": 9431 }, { "epoch": 0.3931474302863574, "grad_norm": 185.0, "learning_rate": 6.922533871962426e-05, "loss": 9.7503, "step": 9432 }, { "epoch": 0.3931891125838856, "grad_norm": 342.0, "learning_rate": 6.921910745462476e-05, "loss": 14.1252, "step": 9433 }, { "epoch": 0.3932307948814139, "grad_norm": 434.0, "learning_rate": 6.921287583935535e-05, "loss": 16.7501, "step": 9434 }, { "epoch": 0.3932724771789421, "grad_norm": 240.0, "learning_rate": 6.920664387392953e-05, "loss": 11.438, "step": 9435 }, { "epoch": 0.39331415947647036, "grad_norm": 124.5, "learning_rate": 6.920041155846094e-05, "loss": 9.4389, "step": 9436 }, { "epoch": 0.39335584177399857, "grad_norm": 134.0, "learning_rate": 6.919417889306314e-05, "loss": 9.1878, "step": 9437 }, { "epoch": 0.39339752407152684, "grad_norm": 144.0, "learning_rate": 6.918794587784973e-05, "loss": 9.6256, "step": 9438 }, { "epoch": 0.39343920636905505, "grad_norm": 466.0, "learning_rate": 6.91817125129343e-05, "loss": 15.5003, "step": 9439 }, { "epoch": 0.3934808886665833, "grad_norm": 296.0, "learning_rate": 6.917547879843047e-05, "loss": 12.7502, "step": 9440 }, { "epoch": 0.39352257096411153, "grad_norm": 628.0, "learning_rate": 6.916924473445182e-05, "loss": 18.379, "step": 9441 }, { "epoch": 0.3935642532616398, "grad_norm": 278.0, "learning_rate": 6.916301032111198e-05, "loss": 12.1883, "step": 9442 }, { "epoch": 0.393605935559168, "grad_norm": 282.0, "learning_rate": 6.915677555852459e-05, "loss": 13.5006, "step": 9443 }, { "epoch": 0.3936476178566963, "grad_norm": 57.5, "learning_rate": 6.915054044680326e-05, "loss": 6.7818, "step": 9444 }, { "epoch": 0.3936893001542245, "grad_norm": 294.0, "learning_rate": 6.914430498606162e-05, "loss": 13.1251, "step": 9445 }, { "epoch": 0.39373098245175275, "grad_norm": 440.0, "learning_rate": 6.913806917641334e-05, "loss": 15.1877, "step": 9446 }, { "epoch": 0.39377266474928097, "grad_norm": 92.0, "learning_rate": 6.913183301797204e-05, "loss": 6.1564, "step": 9447 }, { "epoch": 0.39381434704680923, "grad_norm": 173.0, "learning_rate": 6.912559651085139e-05, "loss": 10.8126, "step": 9448 }, { "epoch": 0.39385602934433744, "grad_norm": 290.0, "learning_rate": 6.911935965516504e-05, "loss": 14.3128, "step": 9449 }, { "epoch": 0.3938977116418657, "grad_norm": 440.0, "learning_rate": 6.911312245102665e-05, "loss": 15.1256, "step": 9450 }, { "epoch": 0.3939393939393939, "grad_norm": 744.0, "learning_rate": 6.910688489854993e-05, "loss": 20.6252, "step": 9451 }, { "epoch": 0.3939810762369222, "grad_norm": 388.0, "learning_rate": 6.910064699784853e-05, "loss": 14.5002, "step": 9452 }, { "epoch": 0.3940227585344504, "grad_norm": 236.0, "learning_rate": 6.909440874903615e-05, "loss": 12.1252, "step": 9453 }, { "epoch": 0.39406444083197867, "grad_norm": 140.0, "learning_rate": 6.908817015222646e-05, "loss": 10.6877, "step": 9454 }, { "epoch": 0.3941061231295069, "grad_norm": 400.0, "learning_rate": 6.908193120753317e-05, "loss": 15.6263, "step": 9455 }, { "epoch": 0.39414780542703515, "grad_norm": 436.0, "learning_rate": 6.907569191507001e-05, "loss": 16.5003, "step": 9456 }, { "epoch": 0.39418948772456336, "grad_norm": 306.0, "learning_rate": 6.906945227495066e-05, "loss": 14.1877, "step": 9457 }, { "epoch": 0.3942311700220916, "grad_norm": 236.0, "learning_rate": 6.906321228728885e-05, "loss": 11.9377, "step": 9458 }, { "epoch": 0.39427285231961984, "grad_norm": 302.0, "learning_rate": 6.905697195219832e-05, "loss": 12.9377, "step": 9459 }, { "epoch": 0.3943145346171481, "grad_norm": 116.0, "learning_rate": 6.905073126979275e-05, "loss": 8.6887, "step": 9460 }, { "epoch": 0.3943562169146763, "grad_norm": 170.0, "learning_rate": 6.904449024018593e-05, "loss": 10.3131, "step": 9461 }, { "epoch": 0.3943978992122046, "grad_norm": 312.0, "learning_rate": 6.90382488634916e-05, "loss": 13.7505, "step": 9462 }, { "epoch": 0.3944395815097328, "grad_norm": 191.0, "learning_rate": 6.903200713982347e-05, "loss": 10.8756, "step": 9463 }, { "epoch": 0.39448126380726106, "grad_norm": 284.0, "learning_rate": 6.902576506929534e-05, "loss": 12.1877, "step": 9464 }, { "epoch": 0.3945229461047893, "grad_norm": 320.0, "learning_rate": 6.901952265202094e-05, "loss": 13.4377, "step": 9465 }, { "epoch": 0.39456462840231754, "grad_norm": 624.0, "learning_rate": 6.901327988811405e-05, "loss": 20.7503, "step": 9466 }, { "epoch": 0.39460631069984575, "grad_norm": 446.0, "learning_rate": 6.900703677768846e-05, "loss": 15.6877, "step": 9467 }, { "epoch": 0.394647992997374, "grad_norm": 145.0, "learning_rate": 6.900079332085791e-05, "loss": 8.8132, "step": 9468 }, { "epoch": 0.39468967529490223, "grad_norm": 452.0, "learning_rate": 6.899454951773624e-05, "loss": 16.6254, "step": 9469 }, { "epoch": 0.3947313575924305, "grad_norm": 274.0, "learning_rate": 6.898830536843721e-05, "loss": 12.1877, "step": 9470 }, { "epoch": 0.3947730398899587, "grad_norm": 484.0, "learning_rate": 6.898206087307465e-05, "loss": 17.2502, "step": 9471 }, { "epoch": 0.394814722187487, "grad_norm": 280.0, "learning_rate": 6.897581603176231e-05, "loss": 13.3129, "step": 9472 }, { "epoch": 0.3948564044850152, "grad_norm": 410.0, "learning_rate": 6.896957084461406e-05, "loss": 14.2502, "step": 9473 }, { "epoch": 0.39489808678254346, "grad_norm": 632.0, "learning_rate": 6.896332531174369e-05, "loss": 19.6252, "step": 9474 }, { "epoch": 0.39493976908007167, "grad_norm": 508.0, "learning_rate": 6.895707943326504e-05, "loss": 16.3752, "step": 9475 }, { "epoch": 0.39498145137759993, "grad_norm": 1504.0, "learning_rate": 6.895083320929192e-05, "loss": 31.0039, "step": 9476 }, { "epoch": 0.39502313367512815, "grad_norm": 57.75, "learning_rate": 6.89445866399382e-05, "loss": 6.0315, "step": 9477 }, { "epoch": 0.3950648159726564, "grad_norm": 330.0, "learning_rate": 6.89383397253177e-05, "loss": 14.0017, "step": 9478 }, { "epoch": 0.3951064982701846, "grad_norm": 195.0, "learning_rate": 6.893209246554428e-05, "loss": 12.1879, "step": 9479 }, { "epoch": 0.3951481805677129, "grad_norm": 249.0, "learning_rate": 6.89258448607318e-05, "loss": 12.3137, "step": 9480 }, { "epoch": 0.39518986286524116, "grad_norm": 476.0, "learning_rate": 6.891959691099407e-05, "loss": 17.1252, "step": 9481 }, { "epoch": 0.39523154516276937, "grad_norm": 448.0, "learning_rate": 6.891334861644506e-05, "loss": 16.3752, "step": 9482 }, { "epoch": 0.39527322746029764, "grad_norm": 258.0, "learning_rate": 6.890709997719858e-05, "loss": 7.157, "step": 9483 }, { "epoch": 0.39531490975782585, "grad_norm": 152.0, "learning_rate": 6.890085099336851e-05, "loss": 10.0628, "step": 9484 }, { "epoch": 0.3953565920553541, "grad_norm": 414.0, "learning_rate": 6.889460166506877e-05, "loss": 17.0002, "step": 9485 }, { "epoch": 0.39539827435288233, "grad_norm": 232.0, "learning_rate": 6.888835199241324e-05, "loss": 7.283, "step": 9486 }, { "epoch": 0.3954399566504106, "grad_norm": 296.0, "learning_rate": 6.88821019755158e-05, "loss": 12.688, "step": 9487 }, { "epoch": 0.3954816389479388, "grad_norm": 414.0, "learning_rate": 6.88758516144904e-05, "loss": 17.1262, "step": 9488 }, { "epoch": 0.3955233212454671, "grad_norm": 470.0, "learning_rate": 6.886960090945091e-05, "loss": 15.6255, "step": 9489 }, { "epoch": 0.3955650035429953, "grad_norm": 318.0, "learning_rate": 6.886334986051126e-05, "loss": 12.0005, "step": 9490 }, { "epoch": 0.39560668584052355, "grad_norm": 600.0, "learning_rate": 6.885709846778541e-05, "loss": 20.5002, "step": 9491 }, { "epoch": 0.39564836813805176, "grad_norm": 214.0, "learning_rate": 6.885084673138724e-05, "loss": 11.2502, "step": 9492 }, { "epoch": 0.39569005043558003, "grad_norm": 1264.0, "learning_rate": 6.884459465143071e-05, "loss": 38.0012, "step": 9493 }, { "epoch": 0.39573173273310824, "grad_norm": 170.0, "learning_rate": 6.88383422280298e-05, "loss": 10.3754, "step": 9494 }, { "epoch": 0.3957734150306365, "grad_norm": 380.0, "learning_rate": 6.883208946129842e-05, "loss": 13.1878, "step": 9495 }, { "epoch": 0.3958150973281647, "grad_norm": 684.0, "learning_rate": 6.882583635135054e-05, "loss": 21.1255, "step": 9496 }, { "epoch": 0.395856779625693, "grad_norm": 256.0, "learning_rate": 6.881958289830011e-05, "loss": 13.6269, "step": 9497 }, { "epoch": 0.3958984619232212, "grad_norm": 416.0, "learning_rate": 6.881332910226113e-05, "loss": 15.1261, "step": 9498 }, { "epoch": 0.39594014422074947, "grad_norm": 376.0, "learning_rate": 6.880707496334753e-05, "loss": 15.0633, "step": 9499 }, { "epoch": 0.3959818265182777, "grad_norm": 444.0, "learning_rate": 6.880082048167333e-05, "loss": 16.3752, "step": 9500 }, { "epoch": 0.39602350881580595, "grad_norm": 432.0, "learning_rate": 6.879456565735252e-05, "loss": 15.6879, "step": 9501 }, { "epoch": 0.39606519111333416, "grad_norm": 240.0, "learning_rate": 6.878831049049908e-05, "loss": 11.3128, "step": 9502 }, { "epoch": 0.3961068734108624, "grad_norm": 360.0, "learning_rate": 6.8782054981227e-05, "loss": 14.0627, "step": 9503 }, { "epoch": 0.39614855570839064, "grad_norm": 474.0, "learning_rate": 6.877579912965032e-05, "loss": 17.0002, "step": 9504 }, { "epoch": 0.3961902380059189, "grad_norm": 302.0, "learning_rate": 6.876954293588301e-05, "loss": 13.0633, "step": 9505 }, { "epoch": 0.3962319203034471, "grad_norm": 256.0, "learning_rate": 6.876328640003912e-05, "loss": 12.8127, "step": 9506 }, { "epoch": 0.3962736026009754, "grad_norm": 466.0, "learning_rate": 6.875702952223267e-05, "loss": 11.8753, "step": 9507 }, { "epoch": 0.3963152848985036, "grad_norm": 150.0, "learning_rate": 6.87507723025777e-05, "loss": 10.5008, "step": 9508 }, { "epoch": 0.39635696719603186, "grad_norm": 656.0, "learning_rate": 6.874451474118824e-05, "loss": 17.2513, "step": 9509 }, { "epoch": 0.3963986494935601, "grad_norm": 122.0, "learning_rate": 6.873825683817833e-05, "loss": 8.7505, "step": 9510 }, { "epoch": 0.39644033179108834, "grad_norm": 310.0, "learning_rate": 6.873199859366204e-05, "loss": 14.313, "step": 9511 }, { "epoch": 0.39648201408861655, "grad_norm": 472.0, "learning_rate": 6.87257400077534e-05, "loss": 17.2504, "step": 9512 }, { "epoch": 0.3965236963861448, "grad_norm": 376.0, "learning_rate": 6.871948108056649e-05, "loss": 12.1253, "step": 9513 }, { "epoch": 0.39656537868367303, "grad_norm": 162.0, "learning_rate": 6.871322181221537e-05, "loss": 9.313, "step": 9514 }, { "epoch": 0.3966070609812013, "grad_norm": 354.0, "learning_rate": 6.870696220281412e-05, "loss": 15.0004, "step": 9515 }, { "epoch": 0.3966487432787295, "grad_norm": 223.0, "learning_rate": 6.870070225247682e-05, "loss": 12.0628, "step": 9516 }, { "epoch": 0.3966904255762578, "grad_norm": 258.0, "learning_rate": 6.869444196131758e-05, "loss": 12.5004, "step": 9517 }, { "epoch": 0.396732107873786, "grad_norm": 720.0, "learning_rate": 6.868818132945045e-05, "loss": 22.3756, "step": 9518 }, { "epoch": 0.39677379017131426, "grad_norm": 272.0, "learning_rate": 6.868192035698957e-05, "loss": 9.8143, "step": 9519 }, { "epoch": 0.39681547246884247, "grad_norm": 1192.0, "learning_rate": 6.867565904404903e-05, "loss": 29.0001, "step": 9520 }, { "epoch": 0.39685715476637073, "grad_norm": 296.0, "learning_rate": 6.866939739074297e-05, "loss": 13.1253, "step": 9521 }, { "epoch": 0.39689883706389895, "grad_norm": 486.0, "learning_rate": 6.866313539718545e-05, "loss": 17.3753, "step": 9522 }, { "epoch": 0.3969405193614272, "grad_norm": 1480.0, "learning_rate": 6.865687306349064e-05, "loss": 34.0002, "step": 9523 }, { "epoch": 0.3969822016589554, "grad_norm": 1152.0, "learning_rate": 6.865061038977268e-05, "loss": 25.8796, "step": 9524 }, { "epoch": 0.3970238839564837, "grad_norm": 406.0, "learning_rate": 6.864434737614567e-05, "loss": 16.1252, "step": 9525 }, { "epoch": 0.3970655662540119, "grad_norm": 1024.0, "learning_rate": 6.863808402272379e-05, "loss": 24.0027, "step": 9526 }, { "epoch": 0.39710724855154017, "grad_norm": 608.0, "learning_rate": 6.863182032962117e-05, "loss": 16.8758, "step": 9527 }, { "epoch": 0.3971489308490684, "grad_norm": 416.0, "learning_rate": 6.862555629695196e-05, "loss": 14.9385, "step": 9528 }, { "epoch": 0.39719061314659665, "grad_norm": 74.0, "learning_rate": 6.861929192483034e-05, "loss": 8.0628, "step": 9529 }, { "epoch": 0.39723229544412486, "grad_norm": 684.0, "learning_rate": 6.861302721337046e-05, "loss": 19.2547, "step": 9530 }, { "epoch": 0.39727397774165313, "grad_norm": 276.0, "learning_rate": 6.86067621626865e-05, "loss": 12.6251, "step": 9531 }, { "epoch": 0.39731566003918134, "grad_norm": 211.0, "learning_rate": 6.860049677289269e-05, "loss": 12.4387, "step": 9532 }, { "epoch": 0.3973573423367096, "grad_norm": 736.0, "learning_rate": 6.859423104410313e-05, "loss": 22.0004, "step": 9533 }, { "epoch": 0.3973990246342378, "grad_norm": 484.0, "learning_rate": 6.858796497643209e-05, "loss": 14.6277, "step": 9534 }, { "epoch": 0.3974407069317661, "grad_norm": 245.0, "learning_rate": 6.858169856999372e-05, "loss": 10.9378, "step": 9535 }, { "epoch": 0.3974823892292943, "grad_norm": 172.0, "learning_rate": 6.857543182490225e-05, "loss": 10.0011, "step": 9536 }, { "epoch": 0.39752407152682256, "grad_norm": 216.0, "learning_rate": 6.85691647412719e-05, "loss": 11.7503, "step": 9537 }, { "epoch": 0.3975657538243508, "grad_norm": 125.0, "learning_rate": 6.856289731921685e-05, "loss": 8.8753, "step": 9538 }, { "epoch": 0.39760743612187904, "grad_norm": 876.0, "learning_rate": 6.855662955885137e-05, "loss": 23.3756, "step": 9539 }, { "epoch": 0.39764911841940725, "grad_norm": 105.0, "learning_rate": 6.855036146028966e-05, "loss": 9.1883, "step": 9540 }, { "epoch": 0.3976908007169355, "grad_norm": 1200.0, "learning_rate": 6.854409302364597e-05, "loss": 25.3792, "step": 9541 }, { "epoch": 0.39773248301446373, "grad_norm": 266.0, "learning_rate": 6.853782424903453e-05, "loss": 12.0627, "step": 9542 }, { "epoch": 0.397774165311992, "grad_norm": 344.0, "learning_rate": 6.853155513656959e-05, "loss": 12.6253, "step": 9543 }, { "epoch": 0.3978158476095202, "grad_norm": 576.0, "learning_rate": 6.852528568636543e-05, "loss": 19.8758, "step": 9544 }, { "epoch": 0.3978575299070485, "grad_norm": 162.0, "learning_rate": 6.851901589853627e-05, "loss": 10.188, "step": 9545 }, { "epoch": 0.3978992122045767, "grad_norm": 324.0, "learning_rate": 6.851274577319642e-05, "loss": 13.3131, "step": 9546 }, { "epoch": 0.39794089450210496, "grad_norm": 384.0, "learning_rate": 6.850647531046014e-05, "loss": 14.0628, "step": 9547 }, { "epoch": 0.39798257679963317, "grad_norm": 480.0, "learning_rate": 6.85002045104417e-05, "loss": 16.5003, "step": 9548 }, { "epoch": 0.39802425909716144, "grad_norm": 462.0, "learning_rate": 6.849393337325538e-05, "loss": 15.8754, "step": 9549 }, { "epoch": 0.39806594139468965, "grad_norm": 880.0, "learning_rate": 6.848766189901549e-05, "loss": 19.1304, "step": 9550 }, { "epoch": 0.3981076236922179, "grad_norm": 76.0, "learning_rate": 6.848139008783632e-05, "loss": 9.1885, "step": 9551 }, { "epoch": 0.3981493059897461, "grad_norm": 416.0, "learning_rate": 6.847511793983219e-05, "loss": 16.1253, "step": 9552 }, { "epoch": 0.3981909882872744, "grad_norm": 316.0, "learning_rate": 6.846884545511738e-05, "loss": 12.2502, "step": 9553 }, { "epoch": 0.39823267058480266, "grad_norm": 177.0, "learning_rate": 6.846257263380623e-05, "loss": 12.001, "step": 9554 }, { "epoch": 0.3982743528823309, "grad_norm": 736.0, "learning_rate": 6.845629947601304e-05, "loss": 20.7524, "step": 9555 }, { "epoch": 0.39831603517985914, "grad_norm": 230.0, "learning_rate": 6.845002598185215e-05, "loss": 10.3131, "step": 9556 }, { "epoch": 0.39835771747738735, "grad_norm": 93.5, "learning_rate": 6.844375215143792e-05, "loss": 7.8441, "step": 9557 }, { "epoch": 0.3983993997749156, "grad_norm": 76.0, "learning_rate": 6.843747798488466e-05, "loss": 7.5004, "step": 9558 }, { "epoch": 0.39844108207244383, "grad_norm": 136.0, "learning_rate": 6.843120348230672e-05, "loss": 9.5628, "step": 9559 }, { "epoch": 0.3984827643699721, "grad_norm": 300.0, "learning_rate": 6.842492864381848e-05, "loss": 13.5017, "step": 9560 }, { "epoch": 0.3985244466675003, "grad_norm": 152.0, "learning_rate": 6.841865346953427e-05, "loss": 9.1263, "step": 9561 }, { "epoch": 0.3985661289650286, "grad_norm": 170.0, "learning_rate": 6.841237795956845e-05, "loss": 10.6878, "step": 9562 }, { "epoch": 0.3986078112625568, "grad_norm": 434.0, "learning_rate": 6.840610211403542e-05, "loss": 16.3754, "step": 9563 }, { "epoch": 0.39864949356008506, "grad_norm": 350.0, "learning_rate": 6.839982593304954e-05, "loss": 15.2503, "step": 9564 }, { "epoch": 0.39869117585761327, "grad_norm": 115.5, "learning_rate": 6.839354941672522e-05, "loss": 9.5003, "step": 9565 }, { "epoch": 0.39873285815514153, "grad_norm": 392.0, "learning_rate": 6.83872725651768e-05, "loss": 14.8127, "step": 9566 }, { "epoch": 0.39877454045266975, "grad_norm": 94.5, "learning_rate": 6.838099537851874e-05, "loss": 7.5939, "step": 9567 }, { "epoch": 0.398816222750198, "grad_norm": 272.0, "learning_rate": 6.837471785686537e-05, "loss": 14.0012, "step": 9568 }, { "epoch": 0.3988579050477262, "grad_norm": 141.0, "learning_rate": 6.836844000033115e-05, "loss": 8.1885, "step": 9569 }, { "epoch": 0.3988995873452545, "grad_norm": 245.0, "learning_rate": 6.836216180903048e-05, "loss": 12.3127, "step": 9570 }, { "epoch": 0.3989412696427827, "grad_norm": 472.0, "learning_rate": 6.835588328307779e-05, "loss": 17.0001, "step": 9571 }, { "epoch": 0.39898295194031097, "grad_norm": 160.0, "learning_rate": 6.834960442258749e-05, "loss": 7.5941, "step": 9572 }, { "epoch": 0.3990246342378392, "grad_norm": 450.0, "learning_rate": 6.834332522767402e-05, "loss": 15.6253, "step": 9573 }, { "epoch": 0.39906631653536745, "grad_norm": 272.0, "learning_rate": 6.833704569845182e-05, "loss": 12.7503, "step": 9574 }, { "epoch": 0.39910799883289566, "grad_norm": 548.0, "learning_rate": 6.833076583503533e-05, "loss": 16.6252, "step": 9575 }, { "epoch": 0.39914968113042393, "grad_norm": 290.0, "learning_rate": 6.832448563753902e-05, "loss": 13.3752, "step": 9576 }, { "epoch": 0.39919136342795214, "grad_norm": 296.0, "learning_rate": 6.831820510607731e-05, "loss": 13.0627, "step": 9577 }, { "epoch": 0.3992330457254804, "grad_norm": 66.0, "learning_rate": 6.831192424076471e-05, "loss": 8.5628, "step": 9578 }, { "epoch": 0.3992747280230086, "grad_norm": 548.0, "learning_rate": 6.830564304171565e-05, "loss": 17.8755, "step": 9579 }, { "epoch": 0.3993164103205369, "grad_norm": 146.0, "learning_rate": 6.829936150904463e-05, "loss": 9.7503, "step": 9580 }, { "epoch": 0.3993580926180651, "grad_norm": 145.0, "learning_rate": 6.829307964286609e-05, "loss": 7.9692, "step": 9581 }, { "epoch": 0.39939977491559336, "grad_norm": 194.0, "learning_rate": 6.828679744329459e-05, "loss": 9.3141, "step": 9582 }, { "epoch": 0.3994414572131216, "grad_norm": 324.0, "learning_rate": 6.828051491044455e-05, "loss": 14.5629, "step": 9583 }, { "epoch": 0.39948313951064984, "grad_norm": 768.0, "learning_rate": 6.827423204443054e-05, "loss": 22.5002, "step": 9584 }, { "epoch": 0.39952482180817805, "grad_norm": 1160.0, "learning_rate": 6.826794884536701e-05, "loss": 22.7553, "step": 9585 }, { "epoch": 0.3995665041057063, "grad_norm": 436.0, "learning_rate": 6.82616653133685e-05, "loss": 16.1258, "step": 9586 }, { "epoch": 0.39960818640323453, "grad_norm": 132.0, "learning_rate": 6.825538144854951e-05, "loss": 9.6877, "step": 9587 }, { "epoch": 0.3996498687007628, "grad_norm": 592.0, "learning_rate": 6.824909725102457e-05, "loss": 18.7502, "step": 9588 }, { "epoch": 0.399691550998291, "grad_norm": 296.0, "learning_rate": 6.82428127209082e-05, "loss": 14.5004, "step": 9589 }, { "epoch": 0.3997332332958193, "grad_norm": 68.0, "learning_rate": 6.823652785831498e-05, "loss": 7.4689, "step": 9590 }, { "epoch": 0.3997749155933475, "grad_norm": 912.0, "learning_rate": 6.82302426633594e-05, "loss": 21.7576, "step": 9591 }, { "epoch": 0.39981659789087576, "grad_norm": 438.0, "learning_rate": 6.822395713615603e-05, "loss": 15.3145, "step": 9592 }, { "epoch": 0.39985828018840397, "grad_norm": 462.0, "learning_rate": 6.821767127681942e-05, "loss": 16.6258, "step": 9593 }, { "epoch": 0.39989996248593224, "grad_norm": 232.0, "learning_rate": 6.821138508546414e-05, "loss": 11.6878, "step": 9594 }, { "epoch": 0.39994164478346045, "grad_norm": 254.0, "learning_rate": 6.820509856220476e-05, "loss": 11.4379, "step": 9595 }, { "epoch": 0.3999833270809887, "grad_norm": 100.0, "learning_rate": 6.819881170715584e-05, "loss": 9.2503, "step": 9596 }, { "epoch": 0.4000250093785169, "grad_norm": 250.0, "learning_rate": 6.819252452043194e-05, "loss": 12.2501, "step": 9597 }, { "epoch": 0.4000666916760452, "grad_norm": 217.0, "learning_rate": 6.818623700214768e-05, "loss": 11.6876, "step": 9598 }, { "epoch": 0.4001083739735734, "grad_norm": 1048.0, "learning_rate": 6.817994915241764e-05, "loss": 26.5002, "step": 9599 }, { "epoch": 0.4001500562711017, "grad_norm": 748.0, "learning_rate": 6.817366097135641e-05, "loss": 18.0003, "step": 9600 }, { "epoch": 0.4001917385686299, "grad_norm": 382.0, "learning_rate": 6.81673724590786e-05, "loss": 15.3756, "step": 9601 }, { "epoch": 0.40023342086615815, "grad_norm": 63.25, "learning_rate": 6.816108361569881e-05, "loss": 7.2504, "step": 9602 }, { "epoch": 0.40027510316368636, "grad_norm": 528.0, "learning_rate": 6.815479444133166e-05, "loss": 18.3757, "step": 9603 }, { "epoch": 0.40031678546121463, "grad_norm": 390.0, "learning_rate": 6.814850493609176e-05, "loss": 15.9379, "step": 9604 }, { "epoch": 0.40035846775874284, "grad_norm": 384.0, "learning_rate": 6.814221510009376e-05, "loss": 15.1882, "step": 9605 }, { "epoch": 0.4004001500562711, "grad_norm": 416.0, "learning_rate": 6.813592493345227e-05, "loss": 15.1255, "step": 9606 }, { "epoch": 0.4004418323537993, "grad_norm": 572.0, "learning_rate": 6.812963443628194e-05, "loss": 18.7501, "step": 9607 }, { "epoch": 0.4004835146513276, "grad_norm": 320.0, "learning_rate": 6.812334360869742e-05, "loss": 14.2502, "step": 9608 }, { "epoch": 0.4005251969488558, "grad_norm": 133.0, "learning_rate": 6.811705245081333e-05, "loss": 9.5003, "step": 9609 }, { "epoch": 0.40056687924638407, "grad_norm": 130.0, "learning_rate": 6.811076096274438e-05, "loss": 11.2509, "step": 9610 }, { "epoch": 0.4006085615439123, "grad_norm": 712.0, "learning_rate": 6.810446914460519e-05, "loss": 18.3766, "step": 9611 }, { "epoch": 0.40065024384144055, "grad_norm": 203.0, "learning_rate": 6.809817699651045e-05, "loss": 10.7506, "step": 9612 }, { "epoch": 0.40069192613896876, "grad_norm": 268.0, "learning_rate": 6.809188451857482e-05, "loss": 12.1254, "step": 9613 }, { "epoch": 0.400733608436497, "grad_norm": 600.0, "learning_rate": 6.808559171091298e-05, "loss": 18.2513, "step": 9614 }, { "epoch": 0.40077529073402524, "grad_norm": 95.5, "learning_rate": 6.807929857363964e-05, "loss": 9.2501, "step": 9615 }, { "epoch": 0.4008169730315535, "grad_norm": 600.0, "learning_rate": 6.807300510686949e-05, "loss": 19.8754, "step": 9616 }, { "epoch": 0.4008586553290817, "grad_norm": 508.0, "learning_rate": 6.80667113107172e-05, "loss": 17.6253, "step": 9617 }, { "epoch": 0.40090033762661, "grad_norm": 320.0, "learning_rate": 6.806041718529749e-05, "loss": 12.313, "step": 9618 }, { "epoch": 0.4009420199241382, "grad_norm": 382.0, "learning_rate": 6.805412273072506e-05, "loss": 16.1251, "step": 9619 }, { "epoch": 0.40098370222166646, "grad_norm": 386.0, "learning_rate": 6.804782794711467e-05, "loss": 16.3755, "step": 9620 }, { "epoch": 0.40102538451919467, "grad_norm": 516.0, "learning_rate": 6.804153283458102e-05, "loss": 18.7502, "step": 9621 }, { "epoch": 0.40106706681672294, "grad_norm": 294.0, "learning_rate": 6.803523739323881e-05, "loss": 15.1258, "step": 9622 }, { "epoch": 0.40110874911425115, "grad_norm": 960.0, "learning_rate": 6.80289416232028e-05, "loss": 24.8761, "step": 9623 }, { "epoch": 0.4011504314117794, "grad_norm": 580.0, "learning_rate": 6.802264552458773e-05, "loss": 17.7502, "step": 9624 }, { "epoch": 0.40119211370930763, "grad_norm": 38.5, "learning_rate": 6.801634909750834e-05, "loss": 6.188, "step": 9625 }, { "epoch": 0.4012337960068359, "grad_norm": 472.0, "learning_rate": 6.80100523420794e-05, "loss": 17.3753, "step": 9626 }, { "epoch": 0.40127547830436416, "grad_norm": 342.0, "learning_rate": 6.800375525841566e-05, "loss": 13.3755, "step": 9627 }, { "epoch": 0.4013171606018924, "grad_norm": 440.0, "learning_rate": 6.799745784663187e-05, "loss": 16.7502, "step": 9628 }, { "epoch": 0.40135884289942064, "grad_norm": 378.0, "learning_rate": 6.799116010684282e-05, "loss": 17.5012, "step": 9629 }, { "epoch": 0.40140052519694885, "grad_norm": 380.0, "learning_rate": 6.798486203916328e-05, "loss": 15.6877, "step": 9630 }, { "epoch": 0.4014422074944771, "grad_norm": 324.0, "learning_rate": 6.797856364370802e-05, "loss": 15.6253, "step": 9631 }, { "epoch": 0.40148388979200533, "grad_norm": 596.0, "learning_rate": 6.797226492059186e-05, "loss": 18.3751, "step": 9632 }, { "epoch": 0.4015255720895336, "grad_norm": 1064.0, "learning_rate": 6.796596586992956e-05, "loss": 23.5056, "step": 9633 }, { "epoch": 0.4015672543870618, "grad_norm": 848.0, "learning_rate": 6.795966649183596e-05, "loss": 23.8779, "step": 9634 }, { "epoch": 0.4016089366845901, "grad_norm": 474.0, "learning_rate": 6.795336678642582e-05, "loss": 14.938, "step": 9635 }, { "epoch": 0.4016506189821183, "grad_norm": 133.0, "learning_rate": 6.794706675381398e-05, "loss": 10.1255, "step": 9636 }, { "epoch": 0.40169230127964656, "grad_norm": 95.5, "learning_rate": 6.794076639411526e-05, "loss": 7.7191, "step": 9637 }, { "epoch": 0.40173398357717477, "grad_norm": 532.0, "learning_rate": 6.793446570744448e-05, "loss": 16.2505, "step": 9638 }, { "epoch": 0.40177566587470304, "grad_norm": 183.0, "learning_rate": 6.792816469391647e-05, "loss": 10.8127, "step": 9639 }, { "epoch": 0.40181734817223125, "grad_norm": 212.0, "learning_rate": 6.792186335364608e-05, "loss": 11.8131, "step": 9640 }, { "epoch": 0.4018590304697595, "grad_norm": 166.0, "learning_rate": 6.791556168674813e-05, "loss": 10.0002, "step": 9641 }, { "epoch": 0.4019007127672877, "grad_norm": 148.0, "learning_rate": 6.790925969333748e-05, "loss": 8.9386, "step": 9642 }, { "epoch": 0.401942395064816, "grad_norm": 70.0, "learning_rate": 6.790295737352898e-05, "loss": 5.0627, "step": 9643 }, { "epoch": 0.4019840773623442, "grad_norm": 187.0, "learning_rate": 6.789665472743747e-05, "loss": 11.0005, "step": 9644 }, { "epoch": 0.4020257596598725, "grad_norm": 438.0, "learning_rate": 6.789035175517786e-05, "loss": 15.8133, "step": 9645 }, { "epoch": 0.4020674419574007, "grad_norm": 418.0, "learning_rate": 6.7884048456865e-05, "loss": 15.0047, "step": 9646 }, { "epoch": 0.40210912425492895, "grad_norm": 422.0, "learning_rate": 6.787774483261377e-05, "loss": 15.9381, "step": 9647 }, { "epoch": 0.40215080655245716, "grad_norm": 107.0, "learning_rate": 6.787144088253906e-05, "loss": 8.5634, "step": 9648 }, { "epoch": 0.40219248884998543, "grad_norm": 89.0, "learning_rate": 6.786513660675573e-05, "loss": 8.6252, "step": 9649 }, { "epoch": 0.40223417114751364, "grad_norm": 442.0, "learning_rate": 6.785883200537872e-05, "loss": 14.8796, "step": 9650 }, { "epoch": 0.4022758534450419, "grad_norm": 243.0, "learning_rate": 6.785252707852292e-05, "loss": 11.5001, "step": 9651 }, { "epoch": 0.4023175357425701, "grad_norm": 225.0, "learning_rate": 6.784622182630322e-05, "loss": 13.2503, "step": 9652 }, { "epoch": 0.4023592180400984, "grad_norm": 57.75, "learning_rate": 6.783991624883453e-05, "loss": 7.2818, "step": 9653 }, { "epoch": 0.4024009003376266, "grad_norm": 51.25, "learning_rate": 6.78336103462318e-05, "loss": 8.0004, "step": 9654 }, { "epoch": 0.40244258263515487, "grad_norm": 243.0, "learning_rate": 6.782730411860993e-05, "loss": 11.1252, "step": 9655 }, { "epoch": 0.4024842649326831, "grad_norm": 540.0, "learning_rate": 6.782099756608387e-05, "loss": 17.2502, "step": 9656 }, { "epoch": 0.40252594723021135, "grad_norm": 434.0, "learning_rate": 6.78146906887685e-05, "loss": 16.626, "step": 9657 }, { "epoch": 0.40256762952773956, "grad_norm": 170.0, "learning_rate": 6.780838348677887e-05, "loss": 10.5629, "step": 9658 }, { "epoch": 0.4026093118252678, "grad_norm": 202.0, "learning_rate": 6.780207596022985e-05, "loss": 11.5006, "step": 9659 }, { "epoch": 0.40265099412279604, "grad_norm": 340.0, "learning_rate": 6.779576810923641e-05, "loss": 14.5629, "step": 9660 }, { "epoch": 0.4026926764203243, "grad_norm": 145.0, "learning_rate": 6.778945993391353e-05, "loss": 7.9065, "step": 9661 }, { "epoch": 0.4027343587178525, "grad_norm": 404.0, "learning_rate": 6.778315143437615e-05, "loss": 15.0006, "step": 9662 }, { "epoch": 0.4027760410153808, "grad_norm": 53.5, "learning_rate": 6.777684261073925e-05, "loss": 7.1262, "step": 9663 }, { "epoch": 0.402817723312909, "grad_norm": 158.0, "learning_rate": 6.777053346311783e-05, "loss": 10.7503, "step": 9664 }, { "epoch": 0.40285940561043726, "grad_norm": 928.0, "learning_rate": 6.776422399162685e-05, "loss": 21.1301, "step": 9665 }, { "epoch": 0.40290108790796547, "grad_norm": 458.0, "learning_rate": 6.775791419638132e-05, "loss": 16.3752, "step": 9666 }, { "epoch": 0.40294277020549374, "grad_norm": 172.0, "learning_rate": 6.775160407749623e-05, "loss": 10.1253, "step": 9667 }, { "epoch": 0.40298445250302195, "grad_norm": 300.0, "learning_rate": 6.774529363508656e-05, "loss": 13.6262, "step": 9668 }, { "epoch": 0.4030261348005502, "grad_norm": 178.0, "learning_rate": 6.773898286926736e-05, "loss": 10.8127, "step": 9669 }, { "epoch": 0.40306781709807843, "grad_norm": 48.25, "learning_rate": 6.773267178015361e-05, "loss": 7.4694, "step": 9670 }, { "epoch": 0.4031094993956067, "grad_norm": 296.0, "learning_rate": 6.772636036786034e-05, "loss": 13.3767, "step": 9671 }, { "epoch": 0.4031511816931349, "grad_norm": 636.0, "learning_rate": 6.772004863250259e-05, "loss": 16.8751, "step": 9672 }, { "epoch": 0.4031928639906632, "grad_norm": 228.0, "learning_rate": 6.771373657419538e-05, "loss": 10.3135, "step": 9673 }, { "epoch": 0.4032345462881914, "grad_norm": 386.0, "learning_rate": 6.770742419305374e-05, "loss": 12.8142, "step": 9674 }, { "epoch": 0.40327622858571965, "grad_norm": 284.0, "learning_rate": 6.770111148919274e-05, "loss": 11.6255, "step": 9675 }, { "epoch": 0.40331791088324787, "grad_norm": 412.0, "learning_rate": 6.769479846272739e-05, "loss": 15.8126, "step": 9676 }, { "epoch": 0.40335959318077613, "grad_norm": 716.0, "learning_rate": 6.768848511377277e-05, "loss": 22.5008, "step": 9677 }, { "epoch": 0.40340127547830434, "grad_norm": 528.0, "learning_rate": 6.768217144244395e-05, "loss": 16.1254, "step": 9678 }, { "epoch": 0.4034429577758326, "grad_norm": 272.0, "learning_rate": 6.767585744885598e-05, "loss": 13.0005, "step": 9679 }, { "epoch": 0.4034846400733608, "grad_norm": 210.0, "learning_rate": 6.766954313312393e-05, "loss": 7.6565, "step": 9680 }, { "epoch": 0.4035263223708891, "grad_norm": 104.0, "learning_rate": 6.766322849536291e-05, "loss": 9.8133, "step": 9681 }, { "epoch": 0.4035680046684173, "grad_norm": 676.0, "learning_rate": 6.765691353568795e-05, "loss": 21.376, "step": 9682 }, { "epoch": 0.40360968696594557, "grad_norm": 282.0, "learning_rate": 6.76505982542142e-05, "loss": 12.2502, "step": 9683 }, { "epoch": 0.4036513692634738, "grad_norm": 210.0, "learning_rate": 6.764428265105673e-05, "loss": 9.3755, "step": 9684 }, { "epoch": 0.40369305156100205, "grad_norm": 632.0, "learning_rate": 6.763796672633062e-05, "loss": 18.3772, "step": 9685 }, { "epoch": 0.40373473385853026, "grad_norm": 222.0, "learning_rate": 6.763165048015102e-05, "loss": 11.6889, "step": 9686 }, { "epoch": 0.4037764161560585, "grad_norm": 572.0, "learning_rate": 6.762533391263302e-05, "loss": 18.3753, "step": 9687 }, { "epoch": 0.40381809845358674, "grad_norm": 480.0, "learning_rate": 6.761901702389175e-05, "loss": 16.7523, "step": 9688 }, { "epoch": 0.403859780751115, "grad_norm": 330.0, "learning_rate": 6.761269981404233e-05, "loss": 14.1256, "step": 9689 }, { "epoch": 0.4039014630486432, "grad_norm": 428.0, "learning_rate": 6.760638228319989e-05, "loss": 16.6254, "step": 9690 }, { "epoch": 0.4039431453461715, "grad_norm": 434.0, "learning_rate": 6.760006443147956e-05, "loss": 15.1903, "step": 9691 }, { "epoch": 0.4039848276436997, "grad_norm": 250.0, "learning_rate": 6.759374625899651e-05, "loss": 13.1889, "step": 9692 }, { "epoch": 0.40402650994122796, "grad_norm": 186.0, "learning_rate": 6.758742776586586e-05, "loss": 10.6253, "step": 9693 }, { "epoch": 0.4040681922387562, "grad_norm": 103.0, "learning_rate": 6.758110895220277e-05, "loss": 9.5627, "step": 9694 }, { "epoch": 0.40410987453628444, "grad_norm": 173.0, "learning_rate": 6.757478981812242e-05, "loss": 11.0633, "step": 9695 }, { "epoch": 0.40415155683381265, "grad_norm": 644.0, "learning_rate": 6.756847036373997e-05, "loss": 19.5007, "step": 9696 }, { "epoch": 0.4041932391313409, "grad_norm": 256.0, "learning_rate": 6.756215058917058e-05, "loss": 12.1876, "step": 9697 }, { "epoch": 0.40423492142886913, "grad_norm": 187.0, "learning_rate": 6.755583049452944e-05, "loss": 11.8126, "step": 9698 }, { "epoch": 0.4042766037263974, "grad_norm": 249.0, "learning_rate": 6.754951007993173e-05, "loss": 11.6877, "step": 9699 }, { "epoch": 0.40431828602392567, "grad_norm": 456.0, "learning_rate": 6.754318934549264e-05, "loss": 16.0006, "step": 9700 }, { "epoch": 0.4043599683214539, "grad_norm": 320.0, "learning_rate": 6.753686829132737e-05, "loss": 14.2511, "step": 9701 }, { "epoch": 0.40440165061898214, "grad_norm": 772.0, "learning_rate": 6.753054691755112e-05, "loss": 23.3752, "step": 9702 }, { "epoch": 0.40444333291651036, "grad_norm": 372.0, "learning_rate": 6.75242252242791e-05, "loss": 15.7509, "step": 9703 }, { "epoch": 0.4044850152140386, "grad_norm": 380.0, "learning_rate": 6.751790321162651e-05, "loss": 13.5627, "step": 9704 }, { "epoch": 0.40452669751156684, "grad_norm": 416.0, "learning_rate": 6.751158087970858e-05, "loss": 14.8751, "step": 9705 }, { "epoch": 0.4045683798090951, "grad_norm": 140.0, "learning_rate": 6.750525822864055e-05, "loss": 10.1257, "step": 9706 }, { "epoch": 0.4046100621066233, "grad_norm": 284.0, "learning_rate": 6.74989352585376e-05, "loss": 13.0004, "step": 9707 }, { "epoch": 0.4046517444041516, "grad_norm": 632.0, "learning_rate": 6.749261196951502e-05, "loss": 19.6252, "step": 9708 }, { "epoch": 0.4046934267016798, "grad_norm": 356.0, "learning_rate": 6.748628836168804e-05, "loss": 14.5004, "step": 9709 }, { "epoch": 0.40473510899920806, "grad_norm": 142.0, "learning_rate": 6.747996443517191e-05, "loss": 9.6253, "step": 9710 }, { "epoch": 0.40477679129673627, "grad_norm": 112.0, "learning_rate": 6.747364019008185e-05, "loss": 8.7502, "step": 9711 }, { "epoch": 0.40481847359426454, "grad_norm": 736.0, "learning_rate": 6.746731562653317e-05, "loss": 24.2506, "step": 9712 }, { "epoch": 0.40486015589179275, "grad_norm": 68.0, "learning_rate": 6.74609907446411e-05, "loss": 7.3755, "step": 9713 }, { "epoch": 0.404901838189321, "grad_norm": 288.0, "learning_rate": 6.745466554452094e-05, "loss": 9.1254, "step": 9714 }, { "epoch": 0.40494352048684923, "grad_norm": 584.0, "learning_rate": 6.744834002628792e-05, "loss": 15.7502, "step": 9715 }, { "epoch": 0.4049852027843775, "grad_norm": 290.0, "learning_rate": 6.744201419005738e-05, "loss": 11.4378, "step": 9716 }, { "epoch": 0.4050268850819057, "grad_norm": 290.0, "learning_rate": 6.743568803594458e-05, "loss": 12.8753, "step": 9717 }, { "epoch": 0.405068567379434, "grad_norm": 217.0, "learning_rate": 6.74293615640648e-05, "loss": 12.1252, "step": 9718 }, { "epoch": 0.4051102496769622, "grad_norm": 162.0, "learning_rate": 6.742303477453337e-05, "loss": 7.8132, "step": 9719 }, { "epoch": 0.40515193197449045, "grad_norm": 390.0, "learning_rate": 6.741670766746558e-05, "loss": 15.1878, "step": 9720 }, { "epoch": 0.40519361427201867, "grad_norm": 900.0, "learning_rate": 6.741038024297676e-05, "loss": 30.3775, "step": 9721 }, { "epoch": 0.40523529656954693, "grad_norm": 584.0, "learning_rate": 6.74040525011822e-05, "loss": 16.8757, "step": 9722 }, { "epoch": 0.40527697886707514, "grad_norm": 119.0, "learning_rate": 6.739772444219725e-05, "loss": 9.8127, "step": 9723 }, { "epoch": 0.4053186611646034, "grad_norm": 752.0, "learning_rate": 6.739139606613722e-05, "loss": 20.8793, "step": 9724 }, { "epoch": 0.4053603434621316, "grad_norm": 604.0, "learning_rate": 6.738506737311747e-05, "loss": 19.1256, "step": 9725 }, { "epoch": 0.4054020257596599, "grad_norm": 272.0, "learning_rate": 6.73787383632533e-05, "loss": 13.2501, "step": 9726 }, { "epoch": 0.4054437080571881, "grad_norm": 320.0, "learning_rate": 6.737240903666008e-05, "loss": 14.2502, "step": 9727 }, { "epoch": 0.40548539035471637, "grad_norm": 163.0, "learning_rate": 6.736607939345319e-05, "loss": 9.6251, "step": 9728 }, { "epoch": 0.4055270726522446, "grad_norm": 600.0, "learning_rate": 6.735974943374793e-05, "loss": 19.2513, "step": 9729 }, { "epoch": 0.40556875494977285, "grad_norm": 167.0, "learning_rate": 6.735341915765972e-05, "loss": 9.3755, "step": 9730 }, { "epoch": 0.40561043724730106, "grad_norm": 484.0, "learning_rate": 6.73470885653039e-05, "loss": 16.1268, "step": 9731 }, { "epoch": 0.4056521195448293, "grad_norm": 332.0, "learning_rate": 6.734075765679583e-05, "loss": 14.0012, "step": 9732 }, { "epoch": 0.40569380184235754, "grad_norm": 334.0, "learning_rate": 6.733442643225094e-05, "loss": 11.1885, "step": 9733 }, { "epoch": 0.4057354841398858, "grad_norm": 237.0, "learning_rate": 6.732809489178456e-05, "loss": 11.5627, "step": 9734 }, { "epoch": 0.405777166437414, "grad_norm": 332.0, "learning_rate": 6.732176303551214e-05, "loss": 14.1886, "step": 9735 }, { "epoch": 0.4058188487349423, "grad_norm": 528.0, "learning_rate": 6.731543086354904e-05, "loss": 16.6253, "step": 9736 }, { "epoch": 0.4058605310324705, "grad_norm": 60.75, "learning_rate": 6.730909837601067e-05, "loss": 7.5941, "step": 9737 }, { "epoch": 0.40590221332999876, "grad_norm": 404.0, "learning_rate": 6.730276557301246e-05, "loss": 16.1252, "step": 9738 }, { "epoch": 0.405943895627527, "grad_norm": 784.0, "learning_rate": 6.72964324546698e-05, "loss": 23.6254, "step": 9739 }, { "epoch": 0.40598557792505524, "grad_norm": 334.0, "learning_rate": 6.729009902109813e-05, "loss": 15.3131, "step": 9740 }, { "epoch": 0.40602726022258345, "grad_norm": 113.0, "learning_rate": 6.728376527241286e-05, "loss": 9.4378, "step": 9741 }, { "epoch": 0.4060689425201117, "grad_norm": 482.0, "learning_rate": 6.727743120872945e-05, "loss": 17.7502, "step": 9742 }, { "epoch": 0.40611062481763993, "grad_norm": 944.0, "learning_rate": 6.72710968301633e-05, "loss": 23.6254, "step": 9743 }, { "epoch": 0.4061523071151682, "grad_norm": 316.0, "learning_rate": 6.72647621368299e-05, "loss": 13.9379, "step": 9744 }, { "epoch": 0.4061939894126964, "grad_norm": 800.0, "learning_rate": 6.725842712884466e-05, "loss": 21.7506, "step": 9745 }, { "epoch": 0.4062356717102247, "grad_norm": 186.0, "learning_rate": 6.725209180632305e-05, "loss": 10.2504, "step": 9746 }, { "epoch": 0.4062773540077529, "grad_norm": 94.0, "learning_rate": 6.724575616938055e-05, "loss": 8.2505, "step": 9747 }, { "epoch": 0.40631903630528116, "grad_norm": 219.0, "learning_rate": 6.723942021813261e-05, "loss": 12.5628, "step": 9748 }, { "epoch": 0.40636071860280937, "grad_norm": 564.0, "learning_rate": 6.72330839526947e-05, "loss": 19.1252, "step": 9749 }, { "epoch": 0.40640240090033763, "grad_norm": 242.0, "learning_rate": 6.722674737318231e-05, "loss": 12.1254, "step": 9750 }, { "epoch": 0.40644408319786585, "grad_norm": 206.0, "learning_rate": 6.722041047971093e-05, "loss": 9.9377, "step": 9751 }, { "epoch": 0.4064857654953941, "grad_norm": 266.0, "learning_rate": 6.721407327239603e-05, "loss": 11.5627, "step": 9752 }, { "epoch": 0.4065274477929223, "grad_norm": 248.0, "learning_rate": 6.720773575135312e-05, "loss": 12.4387, "step": 9753 }, { "epoch": 0.4065691300904506, "grad_norm": 157.0, "learning_rate": 6.720139791669769e-05, "loss": 10.1254, "step": 9754 }, { "epoch": 0.4066108123879788, "grad_norm": 218.0, "learning_rate": 6.719505976854527e-05, "loss": 11.938, "step": 9755 }, { "epoch": 0.40665249468550707, "grad_norm": 82.0, "learning_rate": 6.718872130701135e-05, "loss": 7.844, "step": 9756 }, { "epoch": 0.4066941769830353, "grad_norm": 668.0, "learning_rate": 6.718238253221145e-05, "loss": 19.7545, "step": 9757 }, { "epoch": 0.40673585928056355, "grad_norm": 159.0, "learning_rate": 6.717604344426111e-05, "loss": 9.7505, "step": 9758 }, { "epoch": 0.40677754157809176, "grad_norm": 402.0, "learning_rate": 6.716970404327585e-05, "loss": 14.9377, "step": 9759 }, { "epoch": 0.40681922387562003, "grad_norm": 324.0, "learning_rate": 6.716336432937123e-05, "loss": 12.3127, "step": 9760 }, { "epoch": 0.40686090617314824, "grad_norm": 382.0, "learning_rate": 6.715702430266275e-05, "loss": 14.3761, "step": 9761 }, { "epoch": 0.4069025884706765, "grad_norm": 588.0, "learning_rate": 6.715068396326598e-05, "loss": 19.3754, "step": 9762 }, { "epoch": 0.4069442707682047, "grad_norm": 1144.0, "learning_rate": 6.714434331129648e-05, "loss": 28.7502, "step": 9763 }, { "epoch": 0.406985953065733, "grad_norm": 396.0, "learning_rate": 6.71380023468698e-05, "loss": 15.1253, "step": 9764 }, { "epoch": 0.4070276353632612, "grad_norm": 1192.0, "learning_rate": 6.713166107010151e-05, "loss": 29.1252, "step": 9765 }, { "epoch": 0.40706931766078946, "grad_norm": 292.0, "learning_rate": 6.712531948110716e-05, "loss": 11.1254, "step": 9766 }, { "epoch": 0.4071109999583177, "grad_norm": 422.0, "learning_rate": 6.711897758000236e-05, "loss": 15.4377, "step": 9767 }, { "epoch": 0.40715268225584594, "grad_norm": 117.5, "learning_rate": 6.711263536690266e-05, "loss": 10.8127, "step": 9768 }, { "epoch": 0.40719436455337416, "grad_norm": 270.0, "learning_rate": 6.710629284192367e-05, "loss": 13.0629, "step": 9769 }, { "epoch": 0.4072360468509024, "grad_norm": 528.0, "learning_rate": 6.709995000518097e-05, "loss": 17.3764, "step": 9770 }, { "epoch": 0.40727772914843063, "grad_norm": 177.0, "learning_rate": 6.709360685679016e-05, "loss": 10.1878, "step": 9771 }, { "epoch": 0.4073194114459589, "grad_norm": 324.0, "learning_rate": 6.708726339686686e-05, "loss": 14.3127, "step": 9772 }, { "epoch": 0.40736109374348717, "grad_norm": 251.0, "learning_rate": 6.708091962552666e-05, "loss": 12.3139, "step": 9773 }, { "epoch": 0.4074027760410154, "grad_norm": 560.0, "learning_rate": 6.707457554288519e-05, "loss": 20.0003, "step": 9774 }, { "epoch": 0.40744445833854365, "grad_norm": 302.0, "learning_rate": 6.706823114905805e-05, "loss": 13.7502, "step": 9775 }, { "epoch": 0.40748614063607186, "grad_norm": 220.0, "learning_rate": 6.706188644416089e-05, "loss": 10.6877, "step": 9776 }, { "epoch": 0.4075278229336001, "grad_norm": 348.0, "learning_rate": 6.705554142830935e-05, "loss": 14.1269, "step": 9777 }, { "epoch": 0.40756950523112834, "grad_norm": 616.0, "learning_rate": 6.704919610161903e-05, "loss": 17.1263, "step": 9778 }, { "epoch": 0.4076111875286566, "grad_norm": 396.0, "learning_rate": 6.704285046420562e-05, "loss": 15.5011, "step": 9779 }, { "epoch": 0.4076528698261848, "grad_norm": 360.0, "learning_rate": 6.703650451618473e-05, "loss": 12.8752, "step": 9780 }, { "epoch": 0.4076945521237131, "grad_norm": 253.0, "learning_rate": 6.703015825767204e-05, "loss": 11.5002, "step": 9781 }, { "epoch": 0.4077362344212413, "grad_norm": 282.0, "learning_rate": 6.70238116887832e-05, "loss": 13.6254, "step": 9782 }, { "epoch": 0.40777791671876956, "grad_norm": 130.0, "learning_rate": 6.701746480963387e-05, "loss": 9.6879, "step": 9783 }, { "epoch": 0.4078195990162978, "grad_norm": 312.0, "learning_rate": 6.701111762033975e-05, "loss": 13.3755, "step": 9784 }, { "epoch": 0.40786128131382604, "grad_norm": 516.0, "learning_rate": 6.700477012101649e-05, "loss": 18.8755, "step": 9785 }, { "epoch": 0.40790296361135425, "grad_norm": 205.0, "learning_rate": 6.69984223117798e-05, "loss": 11.5005, "step": 9786 }, { "epoch": 0.4079446459088825, "grad_norm": 256.0, "learning_rate": 6.699207419274534e-05, "loss": 12.4377, "step": 9787 }, { "epoch": 0.40798632820641073, "grad_norm": 246.0, "learning_rate": 6.698572576402883e-05, "loss": 12.438, "step": 9788 }, { "epoch": 0.408028010503939, "grad_norm": 384.0, "learning_rate": 6.697937702574596e-05, "loss": 13.3777, "step": 9789 }, { "epoch": 0.4080696928014672, "grad_norm": 454.0, "learning_rate": 6.697302797801244e-05, "loss": 16.6251, "step": 9790 }, { "epoch": 0.4081113750989955, "grad_norm": 728.0, "learning_rate": 6.696667862094397e-05, "loss": 21.0004, "step": 9791 }, { "epoch": 0.4081530573965237, "grad_norm": 204.0, "learning_rate": 6.696032895465628e-05, "loss": 11.8758, "step": 9792 }, { "epoch": 0.40819473969405196, "grad_norm": 330.0, "learning_rate": 6.695397897926508e-05, "loss": 13.7502, "step": 9793 }, { "epoch": 0.40823642199158017, "grad_norm": 928.0, "learning_rate": 6.694762869488612e-05, "loss": 26.1256, "step": 9794 }, { "epoch": 0.40827810428910843, "grad_norm": 225.0, "learning_rate": 6.69412781016351e-05, "loss": 12.0003, "step": 9795 }, { "epoch": 0.40831978658663665, "grad_norm": 316.0, "learning_rate": 6.693492719962779e-05, "loss": 13.1878, "step": 9796 }, { "epoch": 0.4083614688841649, "grad_norm": 68.5, "learning_rate": 6.692857598897995e-05, "loss": 6.7817, "step": 9797 }, { "epoch": 0.4084031511816931, "grad_norm": 362.0, "learning_rate": 6.692222446980729e-05, "loss": 15.0627, "step": 9798 }, { "epoch": 0.4084448334792214, "grad_norm": 63.25, "learning_rate": 6.69158726422256e-05, "loss": 8.3128, "step": 9799 }, { "epoch": 0.4084865157767496, "grad_norm": 736.0, "learning_rate": 6.690952050635062e-05, "loss": 19.7503, "step": 9800 }, { "epoch": 0.40852819807427787, "grad_norm": 314.0, "learning_rate": 6.690316806229812e-05, "loss": 14.0011, "step": 9801 }, { "epoch": 0.4085698803718061, "grad_norm": 215.0, "learning_rate": 6.68968153101839e-05, "loss": 11.251, "step": 9802 }, { "epoch": 0.40861156266933435, "grad_norm": 178.0, "learning_rate": 6.689046225012372e-05, "loss": 11.0628, "step": 9803 }, { "epoch": 0.40865324496686256, "grad_norm": 229.0, "learning_rate": 6.688410888223335e-05, "loss": 11.0007, "step": 9804 }, { "epoch": 0.40869492726439083, "grad_norm": 198.0, "learning_rate": 6.687775520662861e-05, "loss": 11.6877, "step": 9805 }, { "epoch": 0.40873660956191904, "grad_norm": 466.0, "learning_rate": 6.687140122342528e-05, "loss": 13.3771, "step": 9806 }, { "epoch": 0.4087782918594473, "grad_norm": 332.0, "learning_rate": 6.686504693273917e-05, "loss": 13.8127, "step": 9807 }, { "epoch": 0.4088199741569755, "grad_norm": 193.0, "learning_rate": 6.685869233468607e-05, "loss": 10.8752, "step": 9808 }, { "epoch": 0.4088616564545038, "grad_norm": 95.0, "learning_rate": 6.68523374293818e-05, "loss": 9.1879, "step": 9809 }, { "epoch": 0.408903338752032, "grad_norm": 580.0, "learning_rate": 6.684598221694221e-05, "loss": 18.3753, "step": 9810 }, { "epoch": 0.40894502104956026, "grad_norm": 544.0, "learning_rate": 6.683962669748308e-05, "loss": 18.3756, "step": 9811 }, { "epoch": 0.4089867033470885, "grad_norm": 238.0, "learning_rate": 6.683327087112027e-05, "loss": 9.7502, "step": 9812 }, { "epoch": 0.40902838564461674, "grad_norm": 208.0, "learning_rate": 6.682691473796959e-05, "loss": 10.2513, "step": 9813 }, { "epoch": 0.40907006794214495, "grad_norm": 462.0, "learning_rate": 6.68205582981469e-05, "loss": 16.0008, "step": 9814 }, { "epoch": 0.4091117502396732, "grad_norm": 112.5, "learning_rate": 6.681420155176805e-05, "loss": 10.5004, "step": 9815 }, { "epoch": 0.40915343253720143, "grad_norm": 282.0, "learning_rate": 6.680784449894888e-05, "loss": 12.0628, "step": 9816 }, { "epoch": 0.4091951148347297, "grad_norm": 564.0, "learning_rate": 6.680148713980525e-05, "loss": 19.1254, "step": 9817 }, { "epoch": 0.4092367971322579, "grad_norm": 664.0, "learning_rate": 6.679512947445304e-05, "loss": 20.8768, "step": 9818 }, { "epoch": 0.4092784794297862, "grad_norm": 564.0, "learning_rate": 6.678877150300808e-05, "loss": 19.1258, "step": 9819 }, { "epoch": 0.4093201617273144, "grad_norm": 136.0, "learning_rate": 6.678241322558629e-05, "loss": 10.7502, "step": 9820 }, { "epoch": 0.40936184402484266, "grad_norm": 85.0, "learning_rate": 6.67760546423035e-05, "loss": 7.1881, "step": 9821 }, { "epoch": 0.40940352632237087, "grad_norm": 1232.0, "learning_rate": 6.676969575327565e-05, "loss": 27.1296, "step": 9822 }, { "epoch": 0.40944520861989914, "grad_norm": 600.0, "learning_rate": 6.676333655861859e-05, "loss": 17.2507, "step": 9823 }, { "epoch": 0.40948689091742735, "grad_norm": 344.0, "learning_rate": 6.675697705844825e-05, "loss": 13.3129, "step": 9824 }, { "epoch": 0.4095285732149556, "grad_norm": 884.0, "learning_rate": 6.67506172528805e-05, "loss": 25.1252, "step": 9825 }, { "epoch": 0.4095702555124838, "grad_norm": 426.0, "learning_rate": 6.674425714203128e-05, "loss": 15.6891, "step": 9826 }, { "epoch": 0.4096119378100121, "grad_norm": 336.0, "learning_rate": 6.673789672601649e-05, "loss": 13.9377, "step": 9827 }, { "epoch": 0.4096536201075403, "grad_norm": 344.0, "learning_rate": 6.673153600495203e-05, "loss": 15.3128, "step": 9828 }, { "epoch": 0.4096953024050686, "grad_norm": 660.0, "learning_rate": 6.672517497895385e-05, "loss": 20.3753, "step": 9829 }, { "epoch": 0.4097369847025968, "grad_norm": 426.0, "learning_rate": 6.671881364813787e-05, "loss": 15.0003, "step": 9830 }, { "epoch": 0.40977866700012505, "grad_norm": 692.0, "learning_rate": 6.671245201262002e-05, "loss": 20.6252, "step": 9831 }, { "epoch": 0.40982034929765326, "grad_norm": 540.0, "learning_rate": 6.670609007251625e-05, "loss": 16.8759, "step": 9832 }, { "epoch": 0.40986203159518153, "grad_norm": 204.0, "learning_rate": 6.66997278279425e-05, "loss": 10.2511, "step": 9833 }, { "epoch": 0.40990371389270974, "grad_norm": 255.0, "learning_rate": 6.669336527901474e-05, "loss": 11.8128, "step": 9834 }, { "epoch": 0.409945396190238, "grad_norm": 234.0, "learning_rate": 6.66870024258489e-05, "loss": 12.1877, "step": 9835 }, { "epoch": 0.4099870784877662, "grad_norm": 242.0, "learning_rate": 6.668063926856098e-05, "loss": 12.0002, "step": 9836 }, { "epoch": 0.4100287607852945, "grad_norm": 368.0, "learning_rate": 6.667427580726692e-05, "loss": 14.6878, "step": 9837 }, { "epoch": 0.4100704430828227, "grad_norm": 282.0, "learning_rate": 6.666791204208272e-05, "loss": 12.8756, "step": 9838 }, { "epoch": 0.41011212538035097, "grad_norm": 560.0, "learning_rate": 6.666154797312432e-05, "loss": 16.1252, "step": 9839 }, { "epoch": 0.4101538076778792, "grad_norm": 378.0, "learning_rate": 6.665518360050773e-05, "loss": 14.8757, "step": 9840 }, { "epoch": 0.41019548997540745, "grad_norm": 161.0, "learning_rate": 6.664881892434895e-05, "loss": 10.2503, "step": 9841 }, { "epoch": 0.41023717227293566, "grad_norm": 904.0, "learning_rate": 6.664245394476397e-05, "loss": 22.7539, "step": 9842 }, { "epoch": 0.4102788545704639, "grad_norm": 181.0, "learning_rate": 6.663608866186878e-05, "loss": 10.1888, "step": 9843 }, { "epoch": 0.41032053686799214, "grad_norm": 532.0, "learning_rate": 6.66297230757794e-05, "loss": 17.7538, "step": 9844 }, { "epoch": 0.4103622191655204, "grad_norm": 262.0, "learning_rate": 6.662335718661185e-05, "loss": 13.1257, "step": 9845 }, { "epoch": 0.41040390146304867, "grad_norm": 1608.0, "learning_rate": 6.661699099448211e-05, "loss": 44.5013, "step": 9846 }, { "epoch": 0.4104455837605769, "grad_norm": 468.0, "learning_rate": 6.661062449950625e-05, "loss": 15.7542, "step": 9847 }, { "epoch": 0.41048726605810515, "grad_norm": 294.0, "learning_rate": 6.66042577018003e-05, "loss": 12.3755, "step": 9848 }, { "epoch": 0.41052894835563336, "grad_norm": 420.0, "learning_rate": 6.659789060148027e-05, "loss": 14.877, "step": 9849 }, { "epoch": 0.41057063065316163, "grad_norm": 262.0, "learning_rate": 6.659152319866221e-05, "loss": 12.5011, "step": 9850 }, { "epoch": 0.41061231295068984, "grad_norm": 544.0, "learning_rate": 6.658515549346215e-05, "loss": 18.5005, "step": 9851 }, { "epoch": 0.4106539952482181, "grad_norm": 260.0, "learning_rate": 6.657878748599618e-05, "loss": 11.2511, "step": 9852 }, { "epoch": 0.4106956775457463, "grad_norm": 74.0, "learning_rate": 6.657241917638035e-05, "loss": 8.9379, "step": 9853 }, { "epoch": 0.4107373598432746, "grad_norm": 936.0, "learning_rate": 6.656605056473069e-05, "loss": 20.6296, "step": 9854 }, { "epoch": 0.4107790421408028, "grad_norm": 272.0, "learning_rate": 6.655968165116327e-05, "loss": 13.4383, "step": 9855 }, { "epoch": 0.41082072443833106, "grad_norm": 620.0, "learning_rate": 6.655331243579421e-05, "loss": 19.0001, "step": 9856 }, { "epoch": 0.4108624067358593, "grad_norm": 612.0, "learning_rate": 6.654694291873956e-05, "loss": 18.7512, "step": 9857 }, { "epoch": 0.41090408903338754, "grad_norm": 292.0, "learning_rate": 6.654057310011537e-05, "loss": 12.3149, "step": 9858 }, { "epoch": 0.41094577133091575, "grad_norm": 1088.0, "learning_rate": 6.65342029800378e-05, "loss": 30.6252, "step": 9859 }, { "epoch": 0.410987453628444, "grad_norm": 2128.0, "learning_rate": 6.652783255862292e-05, "loss": 52.5002, "step": 9860 }, { "epoch": 0.41102913592597223, "grad_norm": 498.0, "learning_rate": 6.65214618359868e-05, "loss": 17.5018, "step": 9861 }, { "epoch": 0.4110708182235005, "grad_norm": 210.0, "learning_rate": 6.651509081224558e-05, "loss": 10.0002, "step": 9862 }, { "epoch": 0.4111125005210287, "grad_norm": 131.0, "learning_rate": 6.650871948751536e-05, "loss": 6.0628, "step": 9863 }, { "epoch": 0.411154182818557, "grad_norm": 334.0, "learning_rate": 6.650234786191228e-05, "loss": 13.6883, "step": 9864 }, { "epoch": 0.4111958651160852, "grad_norm": 121.0, "learning_rate": 6.649597593555243e-05, "loss": 8.1884, "step": 9865 }, { "epoch": 0.41123754741361346, "grad_norm": 362.0, "learning_rate": 6.648960370855196e-05, "loss": 14.5034, "step": 9866 }, { "epoch": 0.41127922971114167, "grad_norm": 360.0, "learning_rate": 6.648323118102699e-05, "loss": 14.0005, "step": 9867 }, { "epoch": 0.41132091200866994, "grad_norm": 596.0, "learning_rate": 6.647685835309369e-05, "loss": 19.1254, "step": 9868 }, { "epoch": 0.41136259430619815, "grad_norm": 78.5, "learning_rate": 6.647048522486816e-05, "loss": 9.6252, "step": 9869 }, { "epoch": 0.4114042766037264, "grad_norm": 304.0, "learning_rate": 6.64641117964666e-05, "loss": 13.6263, "step": 9870 }, { "epoch": 0.4114459589012546, "grad_norm": 248.0, "learning_rate": 6.64577380680051e-05, "loss": 11.0629, "step": 9871 }, { "epoch": 0.4114876411987829, "grad_norm": 241.0, "learning_rate": 6.645136403959989e-05, "loss": 10.8752, "step": 9872 }, { "epoch": 0.4115293234963111, "grad_norm": 216.0, "learning_rate": 6.644498971136712e-05, "loss": 11.9378, "step": 9873 }, { "epoch": 0.4115710057938394, "grad_norm": 180.0, "learning_rate": 6.643861508342293e-05, "loss": 10.7503, "step": 9874 }, { "epoch": 0.4116126880913676, "grad_norm": 200.0, "learning_rate": 6.643224015588353e-05, "loss": 11.6252, "step": 9875 }, { "epoch": 0.41165437038889585, "grad_norm": 556.0, "learning_rate": 6.64258649288651e-05, "loss": 18.0028, "step": 9876 }, { "epoch": 0.41169605268642406, "grad_norm": 103.5, "learning_rate": 6.641948940248382e-05, "loss": 8.6254, "step": 9877 }, { "epoch": 0.41173773498395233, "grad_norm": 69.5, "learning_rate": 6.641311357685588e-05, "loss": 8.7505, "step": 9878 }, { "epoch": 0.41177941728148054, "grad_norm": 404.0, "learning_rate": 6.64067374520975e-05, "loss": 13.4388, "step": 9879 }, { "epoch": 0.4118210995790088, "grad_norm": 476.0, "learning_rate": 6.640036102832486e-05, "loss": 16.6252, "step": 9880 }, { "epoch": 0.411862781876537, "grad_norm": 266.0, "learning_rate": 6.63939843056542e-05, "loss": 11.8762, "step": 9881 }, { "epoch": 0.4119044641740653, "grad_norm": 434.0, "learning_rate": 6.638760728420171e-05, "loss": 15.251, "step": 9882 }, { "epoch": 0.4119461464715935, "grad_norm": 1168.0, "learning_rate": 6.638122996408362e-05, "loss": 26.2537, "step": 9883 }, { "epoch": 0.41198782876912177, "grad_norm": 150.0, "learning_rate": 6.637485234541616e-05, "loss": 10.938, "step": 9884 }, { "epoch": 0.41202951106665, "grad_norm": 338.0, "learning_rate": 6.636847442831557e-05, "loss": 15.7503, "step": 9885 }, { "epoch": 0.41207119336417825, "grad_norm": 180.0, "learning_rate": 6.636209621289808e-05, "loss": 10.7502, "step": 9886 }, { "epoch": 0.41211287566170646, "grad_norm": 440.0, "learning_rate": 6.635571769927993e-05, "loss": 14.1254, "step": 9887 }, { "epoch": 0.4121545579592347, "grad_norm": 358.0, "learning_rate": 6.634933888757737e-05, "loss": 14.1269, "step": 9888 }, { "epoch": 0.41219624025676294, "grad_norm": 482.0, "learning_rate": 6.634295977790668e-05, "loss": 15.8131, "step": 9889 }, { "epoch": 0.4122379225542912, "grad_norm": 352.0, "learning_rate": 6.633658037038407e-05, "loss": 13.8756, "step": 9890 }, { "epoch": 0.4122796048518194, "grad_norm": 288.0, "learning_rate": 6.633020066512584e-05, "loss": 12.6877, "step": 9891 }, { "epoch": 0.4123212871493477, "grad_norm": 2048.0, "learning_rate": 6.632382066224826e-05, "loss": 41.7546, "step": 9892 }, { "epoch": 0.4123629694468759, "grad_norm": 900.0, "learning_rate": 6.63174403618676e-05, "loss": 22.8781, "step": 9893 }, { "epoch": 0.41240465174440416, "grad_norm": 1144.0, "learning_rate": 6.631105976410013e-05, "loss": 29.3757, "step": 9894 }, { "epoch": 0.41244633404193237, "grad_norm": 1992.0, "learning_rate": 6.630467886906216e-05, "loss": 39.0004, "step": 9895 }, { "epoch": 0.41248801633946064, "grad_norm": 106.5, "learning_rate": 6.629829767686995e-05, "loss": 7.9068, "step": 9896 }, { "epoch": 0.41252969863698885, "grad_norm": 85.5, "learning_rate": 6.629191618763984e-05, "loss": 8.3758, "step": 9897 }, { "epoch": 0.4125713809345171, "grad_norm": 512.0, "learning_rate": 6.628553440148809e-05, "loss": 15.6256, "step": 9898 }, { "epoch": 0.41261306323204533, "grad_norm": 736.0, "learning_rate": 6.627915231853105e-05, "loss": 20.1289, "step": 9899 }, { "epoch": 0.4126547455295736, "grad_norm": 274.0, "learning_rate": 6.6272769938885e-05, "loss": 11.6253, "step": 9900 }, { "epoch": 0.4126964278271018, "grad_norm": 80.0, "learning_rate": 6.626638726266629e-05, "loss": 8.3129, "step": 9901 }, { "epoch": 0.4127381101246301, "grad_norm": 296.0, "learning_rate": 6.626000428999122e-05, "loss": 12.2503, "step": 9902 }, { "epoch": 0.4127797924221583, "grad_norm": 225.0, "learning_rate": 6.625362102097612e-05, "loss": 11.1877, "step": 9903 }, { "epoch": 0.41282147471968655, "grad_norm": 84.0, "learning_rate": 6.624723745573734e-05, "loss": 7.5322, "step": 9904 }, { "epoch": 0.41286315701721477, "grad_norm": 310.0, "learning_rate": 6.624085359439122e-05, "loss": 13.8757, "step": 9905 }, { "epoch": 0.41290483931474303, "grad_norm": 91.0, "learning_rate": 6.623446943705409e-05, "loss": 9.0009, "step": 9906 }, { "epoch": 0.41294652161227124, "grad_norm": 224.0, "learning_rate": 6.622808498384231e-05, "loss": 10.7502, "step": 9907 }, { "epoch": 0.4129882039097995, "grad_norm": 346.0, "learning_rate": 6.622170023487226e-05, "loss": 14.5002, "step": 9908 }, { "epoch": 0.4130298862073277, "grad_norm": 262.0, "learning_rate": 6.621531519026027e-05, "loss": 12.6264, "step": 9909 }, { "epoch": 0.413071568504856, "grad_norm": 348.0, "learning_rate": 6.620892985012272e-05, "loss": 13.8752, "step": 9910 }, { "epoch": 0.4131132508023842, "grad_norm": 332.0, "learning_rate": 6.6202544214576e-05, "loss": 14.6894, "step": 9911 }, { "epoch": 0.41315493309991247, "grad_norm": 696.0, "learning_rate": 6.619615828373646e-05, "loss": 20.2527, "step": 9912 }, { "epoch": 0.4131966153974407, "grad_norm": 109.5, "learning_rate": 6.61897720577205e-05, "loss": 10.6254, "step": 9913 }, { "epoch": 0.41323829769496895, "grad_norm": 768.0, "learning_rate": 6.618338553664452e-05, "loss": 22.6254, "step": 9914 }, { "epoch": 0.41327997999249716, "grad_norm": 140.0, "learning_rate": 6.617699872062489e-05, "loss": 12.2502, "step": 9915 }, { "epoch": 0.4133216622900254, "grad_norm": 183.0, "learning_rate": 6.617061160977801e-05, "loss": 10.6252, "step": 9916 }, { "epoch": 0.41336334458755364, "grad_norm": 372.0, "learning_rate": 6.616422420422032e-05, "loss": 15.5629, "step": 9917 }, { "epoch": 0.4134050268850819, "grad_norm": 796.0, "learning_rate": 6.61578365040682e-05, "loss": 21.6253, "step": 9918 }, { "epoch": 0.4134467091826102, "grad_norm": 320.0, "learning_rate": 6.615144850943807e-05, "loss": 14.0005, "step": 9919 }, { "epoch": 0.4134883914801384, "grad_norm": 166.0, "learning_rate": 6.614506022044636e-05, "loss": 9.8753, "step": 9920 }, { "epoch": 0.41353007377766665, "grad_norm": 716.0, "learning_rate": 6.613867163720948e-05, "loss": 22.3753, "step": 9921 }, { "epoch": 0.41357175607519486, "grad_norm": 378.0, "learning_rate": 6.61322827598439e-05, "loss": 14.3131, "step": 9922 }, { "epoch": 0.41361343837272313, "grad_norm": 540.0, "learning_rate": 6.612589358846602e-05, "loss": 17.0004, "step": 9923 }, { "epoch": 0.41365512067025134, "grad_norm": 592.0, "learning_rate": 6.611950412319231e-05, "loss": 20.1252, "step": 9924 }, { "epoch": 0.4136968029677796, "grad_norm": 672.0, "learning_rate": 6.611311436413921e-05, "loss": 20.8756, "step": 9925 }, { "epoch": 0.4137384852653078, "grad_norm": 416.0, "learning_rate": 6.610672431142316e-05, "loss": 15.1879, "step": 9926 }, { "epoch": 0.4137801675628361, "grad_norm": 564.0, "learning_rate": 6.610033396516063e-05, "loss": 18.5012, "step": 9927 }, { "epoch": 0.4138218498603643, "grad_norm": 478.0, "learning_rate": 6.609394332546808e-05, "loss": 16.5003, "step": 9928 }, { "epoch": 0.41386353215789257, "grad_norm": 486.0, "learning_rate": 6.608755239246198e-05, "loss": 16.3752, "step": 9929 }, { "epoch": 0.4139052144554208, "grad_norm": 264.0, "learning_rate": 6.608116116625883e-05, "loss": 11.1878, "step": 9930 }, { "epoch": 0.41394689675294905, "grad_norm": 260.0, "learning_rate": 6.607476964697508e-05, "loss": 12.5006, "step": 9931 }, { "epoch": 0.41398857905047726, "grad_norm": 412.0, "learning_rate": 6.606837783472723e-05, "loss": 15.2505, "step": 9932 }, { "epoch": 0.4140302613480055, "grad_norm": 656.0, "learning_rate": 6.606198572963175e-05, "loss": 18.0002, "step": 9933 }, { "epoch": 0.41407194364553374, "grad_norm": 1776.0, "learning_rate": 6.605559333180516e-05, "loss": 36.5007, "step": 9934 }, { "epoch": 0.414113625943062, "grad_norm": 258.0, "learning_rate": 6.604920064136396e-05, "loss": 11.0632, "step": 9935 }, { "epoch": 0.4141553082405902, "grad_norm": 380.0, "learning_rate": 6.604280765842467e-05, "loss": 16.6253, "step": 9936 }, { "epoch": 0.4141969905381185, "grad_norm": 101.0, "learning_rate": 6.603641438310376e-05, "loss": 9.5627, "step": 9937 }, { "epoch": 0.4142386728356467, "grad_norm": 366.0, "learning_rate": 6.60300208155178e-05, "loss": 13.9376, "step": 9938 }, { "epoch": 0.41428035513317496, "grad_norm": 250.0, "learning_rate": 6.602362695578327e-05, "loss": 13.6879, "step": 9939 }, { "epoch": 0.41432203743070317, "grad_norm": 346.0, "learning_rate": 6.601723280401672e-05, "loss": 13.8136, "step": 9940 }, { "epoch": 0.41436371972823144, "grad_norm": 652.0, "learning_rate": 6.601083836033469e-05, "loss": 21.2534, "step": 9941 }, { "epoch": 0.41440540202575965, "grad_norm": 302.0, "learning_rate": 6.600444362485369e-05, "loss": 12.5628, "step": 9942 }, { "epoch": 0.4144470843232879, "grad_norm": 159.0, "learning_rate": 6.59980485976903e-05, "loss": 9.188, "step": 9943 }, { "epoch": 0.41448876662081613, "grad_norm": 1104.0, "learning_rate": 6.599165327896105e-05, "loss": 25.1291, "step": 9944 }, { "epoch": 0.4145304489183444, "grad_norm": 324.0, "learning_rate": 6.59852576687825e-05, "loss": 15.0001, "step": 9945 }, { "epoch": 0.4145721312158726, "grad_norm": 146.0, "learning_rate": 6.597886176727119e-05, "loss": 10.9381, "step": 9946 }, { "epoch": 0.4146138135134009, "grad_norm": 1720.0, "learning_rate": 6.597246557454373e-05, "loss": 38.5005, "step": 9947 }, { "epoch": 0.4146554958109291, "grad_norm": 189.0, "learning_rate": 6.596606909071667e-05, "loss": 11.938, "step": 9948 }, { "epoch": 0.41469717810845735, "grad_norm": 346.0, "learning_rate": 6.595967231590656e-05, "loss": 14.8755, "step": 9949 }, { "epoch": 0.41473886040598557, "grad_norm": 139.0, "learning_rate": 6.595327525023004e-05, "loss": 10.7508, "step": 9950 }, { "epoch": 0.41478054270351383, "grad_norm": 157.0, "learning_rate": 6.594687789380363e-05, "loss": 10.6878, "step": 9951 }, { "epoch": 0.41482222500104204, "grad_norm": 330.0, "learning_rate": 6.594048024674398e-05, "loss": 13.5627, "step": 9952 }, { "epoch": 0.4148639072985703, "grad_norm": 164.0, "learning_rate": 6.593408230916764e-05, "loss": 10.3752, "step": 9953 }, { "epoch": 0.4149055895960985, "grad_norm": 332.0, "learning_rate": 6.592768408119124e-05, "loss": 14.1264, "step": 9954 }, { "epoch": 0.4149472718936268, "grad_norm": 588.0, "learning_rate": 6.59212855629314e-05, "loss": 19.6252, "step": 9955 }, { "epoch": 0.414988954191155, "grad_norm": 181.0, "learning_rate": 6.59148867545047e-05, "loss": 10.688, "step": 9956 }, { "epoch": 0.41503063648868327, "grad_norm": 536.0, "learning_rate": 6.590848765602779e-05, "loss": 15.8752, "step": 9957 }, { "epoch": 0.4150723187862115, "grad_norm": 316.0, "learning_rate": 6.590208826761726e-05, "loss": 13.1899, "step": 9958 }, { "epoch": 0.41511400108373975, "grad_norm": 114.0, "learning_rate": 6.589568858938976e-05, "loss": 9.628, "step": 9959 }, { "epoch": 0.41515568338126796, "grad_norm": 127.0, "learning_rate": 6.588928862146194e-05, "loss": 9.6878, "step": 9960 }, { "epoch": 0.4151973656787962, "grad_norm": 229.0, "learning_rate": 6.588288836395043e-05, "loss": 9.4376, "step": 9961 }, { "epoch": 0.41523904797632444, "grad_norm": 342.0, "learning_rate": 6.587648781697185e-05, "loss": 13.0006, "step": 9962 }, { "epoch": 0.4152807302738527, "grad_norm": 536.0, "learning_rate": 6.587008698064288e-05, "loss": 16.6266, "step": 9963 }, { "epoch": 0.4153224125713809, "grad_norm": 105.5, "learning_rate": 6.586368585508017e-05, "loss": 9.688, "step": 9964 }, { "epoch": 0.4153640948689092, "grad_norm": 284.0, "learning_rate": 6.585728444040038e-05, "loss": 13.0005, "step": 9965 }, { "epoch": 0.4154057771664374, "grad_norm": 306.0, "learning_rate": 6.585088273672016e-05, "loss": 13.5627, "step": 9966 }, { "epoch": 0.41544745946396566, "grad_norm": 205.0, "learning_rate": 6.584448074415621e-05, "loss": 9.8754, "step": 9967 }, { "epoch": 0.4154891417614939, "grad_norm": 466.0, "learning_rate": 6.583807846282519e-05, "loss": 16.8755, "step": 9968 }, { "epoch": 0.41553082405902214, "grad_norm": 300.0, "learning_rate": 6.583167589284377e-05, "loss": 13.5636, "step": 9969 }, { "epoch": 0.41557250635655035, "grad_norm": 410.0, "learning_rate": 6.582527303432865e-05, "loss": 15.1253, "step": 9970 }, { "epoch": 0.4156141886540786, "grad_norm": 740.0, "learning_rate": 6.581886988739654e-05, "loss": 22.8752, "step": 9971 }, { "epoch": 0.41565587095160683, "grad_norm": 191.0, "learning_rate": 6.58124664521641e-05, "loss": 10.9378, "step": 9972 }, { "epoch": 0.4156975532491351, "grad_norm": 216.0, "learning_rate": 6.580606272874807e-05, "loss": 11.5004, "step": 9973 }, { "epoch": 0.4157392355466633, "grad_norm": 262.0, "learning_rate": 6.579965871726514e-05, "loss": 11.6898, "step": 9974 }, { "epoch": 0.4157809178441916, "grad_norm": 434.0, "learning_rate": 6.579325441783204e-05, "loss": 16.1257, "step": 9975 }, { "epoch": 0.4158226001417198, "grad_norm": 736.0, "learning_rate": 6.578684983056547e-05, "loss": 19.6284, "step": 9976 }, { "epoch": 0.41586428243924806, "grad_norm": 272.0, "learning_rate": 6.578044495558216e-05, "loss": 11.8129, "step": 9977 }, { "epoch": 0.41590596473677627, "grad_norm": 181.0, "learning_rate": 6.577403979299884e-05, "loss": 10.5007, "step": 9978 }, { "epoch": 0.41594764703430454, "grad_norm": 824.0, "learning_rate": 6.576763434293224e-05, "loss": 20.5005, "step": 9979 }, { "epoch": 0.41598932933183275, "grad_norm": 217.0, "learning_rate": 6.576122860549911e-05, "loss": 12.0016, "step": 9980 }, { "epoch": 0.416031011629361, "grad_norm": 448.0, "learning_rate": 6.575482258081617e-05, "loss": 16.2502, "step": 9981 }, { "epoch": 0.4160726939268892, "grad_norm": 440.0, "learning_rate": 6.574841626900021e-05, "loss": 15.5002, "step": 9982 }, { "epoch": 0.4161143762244175, "grad_norm": 197.0, "learning_rate": 6.574200967016797e-05, "loss": 10.1258, "step": 9983 }, { "epoch": 0.4161560585219457, "grad_norm": 1520.0, "learning_rate": 6.57356027844362e-05, "loss": 32.2544, "step": 9984 }, { "epoch": 0.41619774081947397, "grad_norm": 416.0, "learning_rate": 6.572919561192166e-05, "loss": 15.8127, "step": 9985 }, { "epoch": 0.4162394231170022, "grad_norm": 161.0, "learning_rate": 6.572278815274114e-05, "loss": 10.1876, "step": 9986 }, { "epoch": 0.41628110541453045, "grad_norm": 133.0, "learning_rate": 6.571638040701141e-05, "loss": 9.938, "step": 9987 }, { "epoch": 0.41632278771205866, "grad_norm": 318.0, "learning_rate": 6.570997237484926e-05, "loss": 14.1252, "step": 9988 }, { "epoch": 0.41636447000958693, "grad_norm": 808.0, "learning_rate": 6.570356405637147e-05, "loss": 21.6253, "step": 9989 }, { "epoch": 0.41640615230711514, "grad_norm": 488.0, "learning_rate": 6.569715545169483e-05, "loss": 17.7502, "step": 9990 }, { "epoch": 0.4164478346046434, "grad_norm": 648.0, "learning_rate": 6.569074656093612e-05, "loss": 19.5002, "step": 9991 }, { "epoch": 0.4164895169021717, "grad_norm": 512.0, "learning_rate": 6.568433738421218e-05, "loss": 16.8754, "step": 9992 }, { "epoch": 0.4165311991996999, "grad_norm": 1024.0, "learning_rate": 6.567792792163981e-05, "loss": 25.8754, "step": 9993 }, { "epoch": 0.41657288149722815, "grad_norm": 170.0, "learning_rate": 6.567151817333578e-05, "loss": 10.0003, "step": 9994 }, { "epoch": 0.41661456379475637, "grad_norm": 340.0, "learning_rate": 6.566510813941695e-05, "loss": 13.6878, "step": 9995 }, { "epoch": 0.41665624609228463, "grad_norm": 324.0, "learning_rate": 6.565869782000015e-05, "loss": 13.6253, "step": 9996 }, { "epoch": 0.41669792838981284, "grad_norm": 187.0, "learning_rate": 6.565228721520217e-05, "loss": 11.3127, "step": 9997 }, { "epoch": 0.4167396106873411, "grad_norm": 243.0, "learning_rate": 6.564587632513988e-05, "loss": 12.8134, "step": 9998 }, { "epoch": 0.4167812929848693, "grad_norm": 624.0, "learning_rate": 6.56394651499301e-05, "loss": 19.5003, "step": 9999 }, { "epoch": 0.4168229752823976, "grad_norm": 232.0, "learning_rate": 6.563305368968968e-05, "loss": 12.3128, "step": 10000 }, { "epoch": 0.4168646575799258, "grad_norm": 676.0, "learning_rate": 6.562664194453548e-05, "loss": 18.6295, "step": 10001 }, { "epoch": 0.41690633987745407, "grad_norm": 416.0, "learning_rate": 6.562022991458433e-05, "loss": 15.3129, "step": 10002 }, { "epoch": 0.4169480221749823, "grad_norm": 644.0, "learning_rate": 6.561381759995311e-05, "loss": 20.3752, "step": 10003 }, { "epoch": 0.41698970447251055, "grad_norm": 428.0, "learning_rate": 6.560740500075868e-05, "loss": 15.2507, "step": 10004 }, { "epoch": 0.41703138677003876, "grad_norm": 404.0, "learning_rate": 6.560099211711789e-05, "loss": 14.254, "step": 10005 }, { "epoch": 0.417073069067567, "grad_norm": 230.0, "learning_rate": 6.559457894914765e-05, "loss": 12.3129, "step": 10006 }, { "epoch": 0.41711475136509524, "grad_norm": 96.5, "learning_rate": 6.558816549696483e-05, "loss": 4.8129, "step": 10007 }, { "epoch": 0.4171564336626235, "grad_norm": 157.0, "learning_rate": 6.55817517606863e-05, "loss": 11.6253, "step": 10008 }, { "epoch": 0.4171981159601517, "grad_norm": 108.5, "learning_rate": 6.557533774042895e-05, "loss": 9.1254, "step": 10009 }, { "epoch": 0.41723979825768, "grad_norm": 342.0, "learning_rate": 6.55689234363097e-05, "loss": 14.3133, "step": 10010 }, { "epoch": 0.4172814805552082, "grad_norm": 544.0, "learning_rate": 6.556250884844544e-05, "loss": 18.1255, "step": 10011 }, { "epoch": 0.41732316285273646, "grad_norm": 600.0, "learning_rate": 6.555609397695307e-05, "loss": 19.2502, "step": 10012 }, { "epoch": 0.4173648451502647, "grad_norm": 490.0, "learning_rate": 6.554967882194952e-05, "loss": 17.3753, "step": 10013 }, { "epoch": 0.41740652744779294, "grad_norm": 592.0, "learning_rate": 6.554326338355168e-05, "loss": 19.5009, "step": 10014 }, { "epoch": 0.41744820974532115, "grad_norm": 652.0, "learning_rate": 6.553684766187649e-05, "loss": 18.8753, "step": 10015 }, { "epoch": 0.4174898920428494, "grad_norm": 1176.0, "learning_rate": 6.553043165704086e-05, "loss": 25.6293, "step": 10016 }, { "epoch": 0.41753157434037763, "grad_norm": 74.0, "learning_rate": 6.552401536916175e-05, "loss": 8.3753, "step": 10017 }, { "epoch": 0.4175732566379059, "grad_norm": 198.0, "learning_rate": 6.551759879835608e-05, "loss": 9.7503, "step": 10018 }, { "epoch": 0.4176149389354341, "grad_norm": 176.0, "learning_rate": 6.551118194474077e-05, "loss": 10.6877, "step": 10019 }, { "epoch": 0.4176566212329624, "grad_norm": 258.0, "learning_rate": 6.550476480843281e-05, "loss": 13.3753, "step": 10020 }, { "epoch": 0.4176983035304906, "grad_norm": 69.5, "learning_rate": 6.549834738954915e-05, "loss": 7.7815, "step": 10021 }, { "epoch": 0.41773998582801886, "grad_norm": 223.0, "learning_rate": 6.549192968820671e-05, "loss": 12.5629, "step": 10022 }, { "epoch": 0.41778166812554707, "grad_norm": 222.0, "learning_rate": 6.548551170452248e-05, "loss": 9.938, "step": 10023 }, { "epoch": 0.41782335042307533, "grad_norm": 176.0, "learning_rate": 6.547909343861344e-05, "loss": 8.6888, "step": 10024 }, { "epoch": 0.41786503272060355, "grad_norm": 520.0, "learning_rate": 6.547267489059655e-05, "loss": 16.5005, "step": 10025 }, { "epoch": 0.4179067150181318, "grad_norm": 676.0, "learning_rate": 6.546625606058876e-05, "loss": 17.2504, "step": 10026 }, { "epoch": 0.41794839731566, "grad_norm": 384.0, "learning_rate": 6.54598369487071e-05, "loss": 14.9379, "step": 10027 }, { "epoch": 0.4179900796131883, "grad_norm": 540.0, "learning_rate": 6.545341755506854e-05, "loss": 18.8752, "step": 10028 }, { "epoch": 0.4180317619107165, "grad_norm": 588.0, "learning_rate": 6.544699787979007e-05, "loss": 18.2501, "step": 10029 }, { "epoch": 0.41807344420824477, "grad_norm": 434.0, "learning_rate": 6.544057792298868e-05, "loss": 15.563, "step": 10030 }, { "epoch": 0.418115126505773, "grad_norm": 270.0, "learning_rate": 6.54341576847814e-05, "loss": 11.8752, "step": 10031 }, { "epoch": 0.41815680880330125, "grad_norm": 210.0, "learning_rate": 6.542773716528522e-05, "loss": 8.7504, "step": 10032 }, { "epoch": 0.41819849110082946, "grad_norm": 348.0, "learning_rate": 6.542131636461717e-05, "loss": 14.1877, "step": 10033 }, { "epoch": 0.41824017339835773, "grad_norm": 175.0, "learning_rate": 6.541489528289425e-05, "loss": 10.3753, "step": 10034 }, { "epoch": 0.41828185569588594, "grad_norm": 151.0, "learning_rate": 6.540847392023348e-05, "loss": 10.2503, "step": 10035 }, { "epoch": 0.4183235379934142, "grad_norm": 200.0, "learning_rate": 6.540205227675193e-05, "loss": 12.0636, "step": 10036 }, { "epoch": 0.4183652202909424, "grad_norm": 170.0, "learning_rate": 6.53956303525666e-05, "loss": 10.1879, "step": 10037 }, { "epoch": 0.4184069025884707, "grad_norm": 211.0, "learning_rate": 6.538920814779454e-05, "loss": 12.3756, "step": 10038 }, { "epoch": 0.4184485848859989, "grad_norm": 284.0, "learning_rate": 6.53827856625528e-05, "loss": 9.4381, "step": 10039 }, { "epoch": 0.41849026718352716, "grad_norm": 276.0, "learning_rate": 6.537636289695843e-05, "loss": 12.7505, "step": 10040 }, { "epoch": 0.4185319494810554, "grad_norm": 344.0, "learning_rate": 6.536993985112849e-05, "loss": 13.8762, "step": 10041 }, { "epoch": 0.41857363177858364, "grad_norm": 486.0, "learning_rate": 6.536351652518e-05, "loss": 14.0027, "step": 10042 }, { "epoch": 0.41861531407611186, "grad_norm": 316.0, "learning_rate": 6.535709291923008e-05, "loss": 12.7503, "step": 10043 }, { "epoch": 0.4186569963736401, "grad_norm": 442.0, "learning_rate": 6.535066903339577e-05, "loss": 16.2501, "step": 10044 }, { "epoch": 0.41869867867116833, "grad_norm": 188.0, "learning_rate": 6.534424486779416e-05, "loss": 11.8752, "step": 10045 }, { "epoch": 0.4187403609686966, "grad_norm": 336.0, "learning_rate": 6.533782042254232e-05, "loss": 14.4383, "step": 10046 }, { "epoch": 0.4187820432662248, "grad_norm": 234.0, "learning_rate": 6.533139569775734e-05, "loss": 12.7503, "step": 10047 }, { "epoch": 0.4188237255637531, "grad_norm": 808.0, "learning_rate": 6.532497069355632e-05, "loss": 20.2544, "step": 10048 }, { "epoch": 0.4188654078612813, "grad_norm": 688.0, "learning_rate": 6.531854541005634e-05, "loss": 19.3779, "step": 10049 }, { "epoch": 0.41890709015880956, "grad_norm": 292.0, "learning_rate": 6.531211984737452e-05, "loss": 14.5634, "step": 10050 }, { "epoch": 0.41894877245633777, "grad_norm": 106.0, "learning_rate": 6.530569400562795e-05, "loss": 8.9377, "step": 10051 }, { "epoch": 0.41899045475386604, "grad_norm": 636.0, "learning_rate": 6.529926788493374e-05, "loss": 19.8752, "step": 10052 }, { "epoch": 0.41903213705139425, "grad_norm": 298.0, "learning_rate": 6.529284148540903e-05, "loss": 13.7502, "step": 10053 }, { "epoch": 0.4190738193489225, "grad_norm": 438.0, "learning_rate": 6.528641480717092e-05, "loss": 16.0025, "step": 10054 }, { "epoch": 0.4191155016464507, "grad_norm": 764.0, "learning_rate": 6.527998785033655e-05, "loss": 21.8751, "step": 10055 }, { "epoch": 0.419157183943979, "grad_norm": 374.0, "learning_rate": 6.527356061502303e-05, "loss": 14.7517, "step": 10056 }, { "epoch": 0.4191988662415072, "grad_norm": 736.0, "learning_rate": 6.526713310134753e-05, "loss": 19.2529, "step": 10057 }, { "epoch": 0.4192405485390355, "grad_norm": 152.0, "learning_rate": 6.526070530942716e-05, "loss": 9.8761, "step": 10058 }, { "epoch": 0.4192822308365637, "grad_norm": 235.0, "learning_rate": 6.525427723937909e-05, "loss": 13.0627, "step": 10059 }, { "epoch": 0.41932391313409195, "grad_norm": 552.0, "learning_rate": 6.524784889132044e-05, "loss": 19.3754, "step": 10060 }, { "epoch": 0.41936559543162016, "grad_norm": 160.0, "learning_rate": 6.524142026536841e-05, "loss": 8.9378, "step": 10061 }, { "epoch": 0.41940727772914843, "grad_norm": 860.0, "learning_rate": 6.523499136164015e-05, "loss": 24.8754, "step": 10062 }, { "epoch": 0.41944896002667664, "grad_norm": 632.0, "learning_rate": 6.522856218025282e-05, "loss": 18.0051, "step": 10063 }, { "epoch": 0.4194906423242049, "grad_norm": 328.0, "learning_rate": 6.522213272132358e-05, "loss": 11.8753, "step": 10064 }, { "epoch": 0.4195323246217332, "grad_norm": 290.0, "learning_rate": 6.521570298496961e-05, "loss": 10.8127, "step": 10065 }, { "epoch": 0.4195740069192614, "grad_norm": 144.0, "learning_rate": 6.520927297130812e-05, "loss": 10.4389, "step": 10066 }, { "epoch": 0.41961568921678966, "grad_norm": 93.0, "learning_rate": 6.520284268045629e-05, "loss": 8.2507, "step": 10067 }, { "epoch": 0.41965737151431787, "grad_norm": 276.0, "learning_rate": 6.519641211253129e-05, "loss": 12.6878, "step": 10068 }, { "epoch": 0.41969905381184613, "grad_norm": 604.0, "learning_rate": 6.518998126765032e-05, "loss": 16.5001, "step": 10069 }, { "epoch": 0.41974073610937435, "grad_norm": 1600.0, "learning_rate": 6.51835501459306e-05, "loss": 32.254, "step": 10070 }, { "epoch": 0.4197824184069026, "grad_norm": 420.0, "learning_rate": 6.517711874748934e-05, "loss": 15.256, "step": 10071 }, { "epoch": 0.4198241007044308, "grad_norm": 844.0, "learning_rate": 6.517068707244373e-05, "loss": 24.8755, "step": 10072 }, { "epoch": 0.4198657830019591, "grad_norm": 544.0, "learning_rate": 6.5164255120911e-05, "loss": 17.626, "step": 10073 }, { "epoch": 0.4199074652994873, "grad_norm": 178.0, "learning_rate": 6.515782289300839e-05, "loss": 10.1255, "step": 10074 }, { "epoch": 0.41994914759701557, "grad_norm": 314.0, "learning_rate": 6.51513903888531e-05, "loss": 13.5631, "step": 10075 }, { "epoch": 0.4199908298945438, "grad_norm": 243.0, "learning_rate": 6.514495760856239e-05, "loss": 11.3757, "step": 10076 }, { "epoch": 0.42003251219207205, "grad_norm": 215.0, "learning_rate": 6.513852455225347e-05, "loss": 11.7511, "step": 10077 }, { "epoch": 0.42007419448960026, "grad_norm": 868.0, "learning_rate": 6.513209122004359e-05, "loss": 24.2502, "step": 10078 }, { "epoch": 0.42011587678712853, "grad_norm": 1464.0, "learning_rate": 6.512565761205e-05, "loss": 33.7503, "step": 10079 }, { "epoch": 0.42015755908465674, "grad_norm": 156.0, "learning_rate": 6.511922372838999e-05, "loss": 10.5628, "step": 10080 }, { "epoch": 0.420199241382185, "grad_norm": 366.0, "learning_rate": 6.511278956918077e-05, "loss": 15.4377, "step": 10081 }, { "epoch": 0.4202409236797132, "grad_norm": 237.0, "learning_rate": 6.51063551345396e-05, "loss": 12.4381, "step": 10082 }, { "epoch": 0.4202826059772415, "grad_norm": 202.0, "learning_rate": 6.509992042458378e-05, "loss": 10.8128, "step": 10083 }, { "epoch": 0.4203242882747697, "grad_norm": 258.0, "learning_rate": 6.509348543943056e-05, "loss": 13.6255, "step": 10084 }, { "epoch": 0.42036597057229796, "grad_norm": 304.0, "learning_rate": 6.508705017919725e-05, "loss": 14.3129, "step": 10085 }, { "epoch": 0.4204076528698262, "grad_norm": 235.0, "learning_rate": 6.50806146440011e-05, "loss": 9.9415, "step": 10086 }, { "epoch": 0.42044933516735444, "grad_norm": 210.0, "learning_rate": 6.50741788339594e-05, "loss": 11.5009, "step": 10087 }, { "epoch": 0.42049101746488265, "grad_norm": 1680.0, "learning_rate": 6.506774274918947e-05, "loss": 34.0048, "step": 10088 }, { "epoch": 0.4205326997624109, "grad_norm": 183.0, "learning_rate": 6.506130638980858e-05, "loss": 10.4377, "step": 10089 }, { "epoch": 0.42057438205993913, "grad_norm": 119.0, "learning_rate": 6.505486975593404e-05, "loss": 9.1253, "step": 10090 }, { "epoch": 0.4206160643574674, "grad_norm": 600.0, "learning_rate": 6.504843284768317e-05, "loss": 16.8809, "step": 10091 }, { "epoch": 0.4206577466549956, "grad_norm": 181.0, "learning_rate": 6.504199566517328e-05, "loss": 11.001, "step": 10092 }, { "epoch": 0.4206994289525239, "grad_norm": 362.0, "learning_rate": 6.503555820852167e-05, "loss": 15.0004, "step": 10093 }, { "epoch": 0.4207411112500521, "grad_norm": 632.0, "learning_rate": 6.502912047784568e-05, "loss": 20.0001, "step": 10094 }, { "epoch": 0.42078279354758036, "grad_norm": 390.0, "learning_rate": 6.502268247326264e-05, "loss": 15.8131, "step": 10095 }, { "epoch": 0.42082447584510857, "grad_norm": 52.75, "learning_rate": 6.501624419488988e-05, "loss": 8.063, "step": 10096 }, { "epoch": 0.42086615814263684, "grad_norm": 350.0, "learning_rate": 6.500980564284473e-05, "loss": 12.7531, "step": 10097 }, { "epoch": 0.42090784044016505, "grad_norm": 139.0, "learning_rate": 6.500336681724455e-05, "loss": 9.2503, "step": 10098 }, { "epoch": 0.4209495227376933, "grad_norm": 201.0, "learning_rate": 6.499692771820667e-05, "loss": 9.6881, "step": 10099 }, { "epoch": 0.4209912050352215, "grad_norm": 312.0, "learning_rate": 6.499048834584845e-05, "loss": 12.2502, "step": 10100 }, { "epoch": 0.4210328873327498, "grad_norm": 494.0, "learning_rate": 6.498404870028725e-05, "loss": 17.7505, "step": 10101 }, { "epoch": 0.421074569630278, "grad_norm": 166.0, "learning_rate": 6.497760878164043e-05, "loss": 9.5627, "step": 10102 }, { "epoch": 0.4211162519278063, "grad_norm": 524.0, "learning_rate": 6.497116859002536e-05, "loss": 17.8751, "step": 10103 }, { "epoch": 0.4211579342253345, "grad_norm": 960.0, "learning_rate": 6.496472812555942e-05, "loss": 25.6253, "step": 10104 }, { "epoch": 0.42119961652286275, "grad_norm": 136.0, "learning_rate": 6.495828738835999e-05, "loss": 10.1262, "step": 10105 }, { "epoch": 0.42124129882039096, "grad_norm": 352.0, "learning_rate": 6.495184637854443e-05, "loss": 15.1254, "step": 10106 }, { "epoch": 0.42128298111791923, "grad_norm": 141.0, "learning_rate": 6.494540509623016e-05, "loss": 10.3753, "step": 10107 }, { "epoch": 0.42132466341544744, "grad_norm": 111.0, "learning_rate": 6.493896354153453e-05, "loss": 9.3757, "step": 10108 }, { "epoch": 0.4213663457129757, "grad_norm": 125.0, "learning_rate": 6.493252171457498e-05, "loss": 10.188, "step": 10109 }, { "epoch": 0.4214080280105039, "grad_norm": 324.0, "learning_rate": 6.492607961546889e-05, "loss": 12.4379, "step": 10110 }, { "epoch": 0.4214497103080322, "grad_norm": 320.0, "learning_rate": 6.491963724433367e-05, "loss": 14.4377, "step": 10111 }, { "epoch": 0.4214913926055604, "grad_norm": 426.0, "learning_rate": 6.491319460128672e-05, "loss": 16.1252, "step": 10112 }, { "epoch": 0.42153307490308867, "grad_norm": 217.0, "learning_rate": 6.490675168644552e-05, "loss": 12.0002, "step": 10113 }, { "epoch": 0.4215747572006169, "grad_norm": 106.5, "learning_rate": 6.490030849992742e-05, "loss": 9.3127, "step": 10114 }, { "epoch": 0.42161643949814515, "grad_norm": 304.0, "learning_rate": 6.489386504184988e-05, "loss": 13.1252, "step": 10115 }, { "epoch": 0.42165812179567336, "grad_norm": 486.0, "learning_rate": 6.488742131233032e-05, "loss": 18.1255, "step": 10116 }, { "epoch": 0.4216998040932016, "grad_norm": 205.0, "learning_rate": 6.488097731148619e-05, "loss": 10.7504, "step": 10117 }, { "epoch": 0.42174148639072984, "grad_norm": 171.0, "learning_rate": 6.487453303943494e-05, "loss": 9.6879, "step": 10118 }, { "epoch": 0.4217831686882581, "grad_norm": 237.0, "learning_rate": 6.486808849629398e-05, "loss": 11.5627, "step": 10119 }, { "epoch": 0.4218248509857863, "grad_norm": 434.0, "learning_rate": 6.48616436821808e-05, "loss": 14.4378, "step": 10120 }, { "epoch": 0.4218665332833146, "grad_norm": 410.0, "learning_rate": 6.485519859721285e-05, "loss": 15.2501, "step": 10121 }, { "epoch": 0.4219082155808428, "grad_norm": 1012.0, "learning_rate": 6.484875324150759e-05, "loss": 29.8757, "step": 10122 }, { "epoch": 0.42194989787837106, "grad_norm": 400.0, "learning_rate": 6.484230761518246e-05, "loss": 15.0004, "step": 10123 }, { "epoch": 0.4219915801758993, "grad_norm": 245.0, "learning_rate": 6.483586171835497e-05, "loss": 13.1917, "step": 10124 }, { "epoch": 0.42203326247342754, "grad_norm": 147.0, "learning_rate": 6.48294155511426e-05, "loss": 5.8754, "step": 10125 }, { "epoch": 0.42207494477095575, "grad_norm": 340.0, "learning_rate": 6.48229691136628e-05, "loss": 13.438, "step": 10126 }, { "epoch": 0.422116627068484, "grad_norm": 89.0, "learning_rate": 6.481652240603306e-05, "loss": 8.9378, "step": 10127 }, { "epoch": 0.42215830936601223, "grad_norm": 1152.0, "learning_rate": 6.48100754283709e-05, "loss": 32.0044, "step": 10128 }, { "epoch": 0.4221999916635405, "grad_norm": 580.0, "learning_rate": 6.48036281807938e-05, "loss": 19.0005, "step": 10129 }, { "epoch": 0.4222416739610687, "grad_norm": 1056.0, "learning_rate": 6.479718066341925e-05, "loss": 30.2502, "step": 10130 }, { "epoch": 0.422283356258597, "grad_norm": 1004.0, "learning_rate": 6.479073287636479e-05, "loss": 23.0048, "step": 10131 }, { "epoch": 0.4223250385561252, "grad_norm": 101.5, "learning_rate": 6.47842848197479e-05, "loss": 9.5004, "step": 10132 }, { "epoch": 0.42236672085365345, "grad_norm": 214.0, "learning_rate": 6.477783649368609e-05, "loss": 11.2501, "step": 10133 }, { "epoch": 0.42240840315118167, "grad_norm": 64.5, "learning_rate": 6.477138789829692e-05, "loss": 8.1881, "step": 10134 }, { "epoch": 0.42245008544870993, "grad_norm": 464.0, "learning_rate": 6.476493903369788e-05, "loss": 17.2515, "step": 10135 }, { "epoch": 0.42249176774623814, "grad_norm": 121.5, "learning_rate": 6.475848990000653e-05, "loss": 9.4386, "step": 10136 }, { "epoch": 0.4225334500437664, "grad_norm": 494.0, "learning_rate": 6.475204049734038e-05, "loss": 17.1252, "step": 10137 }, { "epoch": 0.4225751323412947, "grad_norm": 568.0, "learning_rate": 6.474559082581699e-05, "loss": 21.1254, "step": 10138 }, { "epoch": 0.4226168146388229, "grad_norm": 604.0, "learning_rate": 6.473914088555388e-05, "loss": 19.8754, "step": 10139 }, { "epoch": 0.42265849693635116, "grad_norm": 390.0, "learning_rate": 6.473269067666865e-05, "loss": 14.8137, "step": 10140 }, { "epoch": 0.42270017923387937, "grad_norm": 640.0, "learning_rate": 6.472624019927879e-05, "loss": 19.2505, "step": 10141 }, { "epoch": 0.42274186153140764, "grad_norm": 67.0, "learning_rate": 6.471978945350192e-05, "loss": 8.3756, "step": 10142 }, { "epoch": 0.42278354382893585, "grad_norm": 720.0, "learning_rate": 6.471333843945558e-05, "loss": 22.1285, "step": 10143 }, { "epoch": 0.4228252261264641, "grad_norm": 362.0, "learning_rate": 6.470688715725734e-05, "loss": 12.6273, "step": 10144 }, { "epoch": 0.4228669084239923, "grad_norm": 708.0, "learning_rate": 6.470043560702476e-05, "loss": 23.3753, "step": 10145 }, { "epoch": 0.4229085907215206, "grad_norm": 338.0, "learning_rate": 6.469398378887546e-05, "loss": 13.8127, "step": 10146 }, { "epoch": 0.4229502730190488, "grad_norm": 680.0, "learning_rate": 6.468753170292698e-05, "loss": 18.5003, "step": 10147 }, { "epoch": 0.4229919553165771, "grad_norm": 342.0, "learning_rate": 6.468107934929692e-05, "loss": 14.8129, "step": 10148 }, { "epoch": 0.4230336376141053, "grad_norm": 494.0, "learning_rate": 6.467462672810291e-05, "loss": 17.1253, "step": 10149 }, { "epoch": 0.42307531991163355, "grad_norm": 253.0, "learning_rate": 6.466817383946252e-05, "loss": 11.7506, "step": 10150 }, { "epoch": 0.42311700220916176, "grad_norm": 136.0, "learning_rate": 6.466172068349336e-05, "loss": 7.907, "step": 10151 }, { "epoch": 0.42315868450669003, "grad_norm": 308.0, "learning_rate": 6.465526726031304e-05, "loss": 11.6278, "step": 10152 }, { "epoch": 0.42320036680421824, "grad_norm": 120.0, "learning_rate": 6.464881357003917e-05, "loss": 9.1881, "step": 10153 }, { "epoch": 0.4232420491017465, "grad_norm": 262.0, "learning_rate": 6.464235961278937e-05, "loss": 12.813, "step": 10154 }, { "epoch": 0.4232837313992747, "grad_norm": 448.0, "learning_rate": 6.463590538868127e-05, "loss": 16.7503, "step": 10155 }, { "epoch": 0.423325413696803, "grad_norm": 99.5, "learning_rate": 6.462945089783249e-05, "loss": 8.5002, "step": 10156 }, { "epoch": 0.4233670959943312, "grad_norm": 207.0, "learning_rate": 6.462299614036067e-05, "loss": 12.3128, "step": 10157 }, { "epoch": 0.42340877829185947, "grad_norm": 548.0, "learning_rate": 6.461654111638346e-05, "loss": 19.3768, "step": 10158 }, { "epoch": 0.4234504605893877, "grad_norm": 564.0, "learning_rate": 6.461008582601849e-05, "loss": 20.5002, "step": 10159 }, { "epoch": 0.42349214288691595, "grad_norm": 696.0, "learning_rate": 6.46036302693834e-05, "loss": 18.7537, "step": 10160 }, { "epoch": 0.42353382518444416, "grad_norm": 233.0, "learning_rate": 6.459717444659585e-05, "loss": 12.3752, "step": 10161 }, { "epoch": 0.4235755074819724, "grad_norm": 1200.0, "learning_rate": 6.45907183577735e-05, "loss": 28.3754, "step": 10162 }, { "epoch": 0.42361718977950064, "grad_norm": 608.0, "learning_rate": 6.4584262003034e-05, "loss": 16.0018, "step": 10163 }, { "epoch": 0.4236588720770289, "grad_norm": 372.0, "learning_rate": 6.457780538249504e-05, "loss": 14.5629, "step": 10164 }, { "epoch": 0.4237005543745571, "grad_norm": 360.0, "learning_rate": 6.45713484962743e-05, "loss": 12.3129, "step": 10165 }, { "epoch": 0.4237422366720854, "grad_norm": 426.0, "learning_rate": 6.456489134448943e-05, "loss": 16.5004, "step": 10166 }, { "epoch": 0.4237839189696136, "grad_norm": 213.0, "learning_rate": 6.455843392725813e-05, "loss": 11.8126, "step": 10167 }, { "epoch": 0.42382560126714186, "grad_norm": 107.5, "learning_rate": 6.455197624469805e-05, "loss": 10.5631, "step": 10168 }, { "epoch": 0.42386728356467007, "grad_norm": 223.0, "learning_rate": 6.454551829692694e-05, "loss": 11.1877, "step": 10169 }, { "epoch": 0.42390896586219834, "grad_norm": 121.5, "learning_rate": 6.453906008406245e-05, "loss": 8.0628, "step": 10170 }, { "epoch": 0.42395064815972655, "grad_norm": 159.0, "learning_rate": 6.453260160622232e-05, "loss": 9.4382, "step": 10171 }, { "epoch": 0.4239923304572548, "grad_norm": 130.0, "learning_rate": 6.452614286352422e-05, "loss": 9.5005, "step": 10172 }, { "epoch": 0.42403401275478303, "grad_norm": 442.0, "learning_rate": 6.451968385608586e-05, "loss": 15.8751, "step": 10173 }, { "epoch": 0.4240756950523113, "grad_norm": 516.0, "learning_rate": 6.4513224584025e-05, "loss": 18.0004, "step": 10174 }, { "epoch": 0.4241173773498395, "grad_norm": 312.0, "learning_rate": 6.450676504745933e-05, "loss": 12.3158, "step": 10175 }, { "epoch": 0.4241590596473678, "grad_norm": 176.0, "learning_rate": 6.450030524650657e-05, "loss": 12.1882, "step": 10176 }, { "epoch": 0.424200741944896, "grad_norm": 243.0, "learning_rate": 6.449384518128448e-05, "loss": 12.5627, "step": 10177 }, { "epoch": 0.42424242424242425, "grad_norm": 1184.0, "learning_rate": 6.448738485191075e-05, "loss": 26.3843, "step": 10178 }, { "epoch": 0.42428410653995247, "grad_norm": 1760.0, "learning_rate": 6.448092425850317e-05, "loss": 35.256, "step": 10179 }, { "epoch": 0.42432578883748073, "grad_norm": 664.0, "learning_rate": 6.447446340117943e-05, "loss": 20.6251, "step": 10180 }, { "epoch": 0.42436747113500894, "grad_norm": 136.0, "learning_rate": 6.446800228005732e-05, "loss": 11.0641, "step": 10181 }, { "epoch": 0.4244091534325372, "grad_norm": 462.0, "learning_rate": 6.446154089525459e-05, "loss": 15.7502, "step": 10182 }, { "epoch": 0.4244508357300654, "grad_norm": 394.0, "learning_rate": 6.445507924688899e-05, "loss": 15.9377, "step": 10183 }, { "epoch": 0.4244925180275937, "grad_norm": 1264.0, "learning_rate": 6.44486173350783e-05, "loss": 28.8767, "step": 10184 }, { "epoch": 0.4245342003251219, "grad_norm": 368.0, "learning_rate": 6.444215515994027e-05, "loss": 13.8753, "step": 10185 }, { "epoch": 0.42457588262265017, "grad_norm": 120.0, "learning_rate": 6.443569272159266e-05, "loss": 10.4387, "step": 10186 }, { "epoch": 0.4246175649201784, "grad_norm": 656.0, "learning_rate": 6.442923002015329e-05, "loss": 18.2548, "step": 10187 }, { "epoch": 0.42465924721770665, "grad_norm": 454.0, "learning_rate": 6.44227670557399e-05, "loss": 15.8753, "step": 10188 }, { "epoch": 0.42470092951523486, "grad_norm": 292.0, "learning_rate": 6.441630382847033e-05, "loss": 13.3129, "step": 10189 }, { "epoch": 0.4247426118127631, "grad_norm": 88.0, "learning_rate": 6.440984033846232e-05, "loss": 6.3442, "step": 10190 }, { "epoch": 0.42478429411029134, "grad_norm": 294.0, "learning_rate": 6.440337658583371e-05, "loss": 13.6263, "step": 10191 }, { "epoch": 0.4248259764078196, "grad_norm": 656.0, "learning_rate": 6.439691257070227e-05, "loss": 18.7508, "step": 10192 }, { "epoch": 0.4248676587053478, "grad_norm": 932.0, "learning_rate": 6.439044829318583e-05, "loss": 25.7502, "step": 10193 }, { "epoch": 0.4249093410028761, "grad_norm": 234.0, "learning_rate": 6.438398375340218e-05, "loss": 11.5627, "step": 10194 }, { "epoch": 0.4249510233004043, "grad_norm": 256.0, "learning_rate": 6.437751895146916e-05, "loss": 11.0007, "step": 10195 }, { "epoch": 0.42499270559793256, "grad_norm": 356.0, "learning_rate": 6.437105388750458e-05, "loss": 13.3751, "step": 10196 }, { "epoch": 0.4250343878954608, "grad_norm": 167.0, "learning_rate": 6.436458856162626e-05, "loss": 9.6265, "step": 10197 }, { "epoch": 0.42507607019298904, "grad_norm": 312.0, "learning_rate": 6.435812297395204e-05, "loss": 12.8129, "step": 10198 }, { "epoch": 0.42511775249051725, "grad_norm": 128.0, "learning_rate": 6.435165712459974e-05, "loss": 8.8755, "step": 10199 }, { "epoch": 0.4251594347880455, "grad_norm": 149.0, "learning_rate": 6.434519101368723e-05, "loss": 9.8758, "step": 10200 }, { "epoch": 0.42520111708557373, "grad_norm": 404.0, "learning_rate": 6.433872464133235e-05, "loss": 15.3754, "step": 10201 }, { "epoch": 0.425242799383102, "grad_norm": 888.0, "learning_rate": 6.433225800765293e-05, "loss": 21.376, "step": 10202 }, { "epoch": 0.4252844816806302, "grad_norm": 334.0, "learning_rate": 6.432579111276684e-05, "loss": 13.7504, "step": 10203 }, { "epoch": 0.4253261639781585, "grad_norm": 175.0, "learning_rate": 6.431932395679193e-05, "loss": 11.3752, "step": 10204 }, { "epoch": 0.4253678462756867, "grad_norm": 108.0, "learning_rate": 6.431285653984607e-05, "loss": 8.0002, "step": 10205 }, { "epoch": 0.42540952857321496, "grad_norm": 137.0, "learning_rate": 6.430638886204713e-05, "loss": 10.6253, "step": 10206 }, { "epoch": 0.42545121087074317, "grad_norm": 632.0, "learning_rate": 6.429992092351299e-05, "loss": 18.8755, "step": 10207 }, { "epoch": 0.42549289316827144, "grad_norm": 720.0, "learning_rate": 6.429345272436151e-05, "loss": 20.5002, "step": 10208 }, { "epoch": 0.4255345754657997, "grad_norm": 640.0, "learning_rate": 6.428698426471059e-05, "loss": 17.7523, "step": 10209 }, { "epoch": 0.4255762577633279, "grad_norm": 612.0, "learning_rate": 6.428051554467812e-05, "loss": 20.2503, "step": 10210 }, { "epoch": 0.4256179400608562, "grad_norm": 536.0, "learning_rate": 6.427404656438196e-05, "loss": 18.0003, "step": 10211 }, { "epoch": 0.4256596223583844, "grad_norm": 134.0, "learning_rate": 6.426757732394006e-05, "loss": 9.6921, "step": 10212 }, { "epoch": 0.42570130465591266, "grad_norm": 376.0, "learning_rate": 6.42611078234703e-05, "loss": 15.2502, "step": 10213 }, { "epoch": 0.42574298695344087, "grad_norm": 308.0, "learning_rate": 6.425463806309058e-05, "loss": 12.3751, "step": 10214 }, { "epoch": 0.42578466925096914, "grad_norm": 153.0, "learning_rate": 6.42481680429188e-05, "loss": 7.3762, "step": 10215 }, { "epoch": 0.42582635154849735, "grad_norm": 290.0, "learning_rate": 6.42416977630729e-05, "loss": 12.2527, "step": 10216 }, { "epoch": 0.4258680338460256, "grad_norm": 302.0, "learning_rate": 6.423522722367081e-05, "loss": 13.814, "step": 10217 }, { "epoch": 0.42590971614355383, "grad_norm": 184.0, "learning_rate": 6.422875642483043e-05, "loss": 12.1877, "step": 10218 }, { "epoch": 0.4259513984410821, "grad_norm": 378.0, "learning_rate": 6.42222853666697e-05, "loss": 14.4378, "step": 10219 }, { "epoch": 0.4259930807386103, "grad_norm": 197.0, "learning_rate": 6.421581404930654e-05, "loss": 9.8776, "step": 10220 }, { "epoch": 0.4260347630361386, "grad_norm": 320.0, "learning_rate": 6.420934247285893e-05, "loss": 14.6877, "step": 10221 }, { "epoch": 0.4260764453336668, "grad_norm": 262.0, "learning_rate": 6.42028706374448e-05, "loss": 12.3139, "step": 10222 }, { "epoch": 0.42611812763119505, "grad_norm": 430.0, "learning_rate": 6.419639854318206e-05, "loss": 14.3777, "step": 10223 }, { "epoch": 0.42615980992872327, "grad_norm": 272.0, "learning_rate": 6.41899261901887e-05, "loss": 12.6877, "step": 10224 }, { "epoch": 0.42620149222625153, "grad_norm": 904.0, "learning_rate": 6.41834535785827e-05, "loss": 26.0003, "step": 10225 }, { "epoch": 0.42624317452377974, "grad_norm": 700.0, "learning_rate": 6.4176980708482e-05, "loss": 17.7553, "step": 10226 }, { "epoch": 0.426284856821308, "grad_norm": 752.0, "learning_rate": 6.417050758000455e-05, "loss": 17.754, "step": 10227 }, { "epoch": 0.4263265391188362, "grad_norm": 227.0, "learning_rate": 6.416403419326834e-05, "loss": 11.3752, "step": 10228 }, { "epoch": 0.4263682214163645, "grad_norm": 344.0, "learning_rate": 6.415756054839136e-05, "loss": 14.9376, "step": 10229 }, { "epoch": 0.4264099037138927, "grad_norm": 992.0, "learning_rate": 6.415108664549158e-05, "loss": 26.877, "step": 10230 }, { "epoch": 0.42645158601142097, "grad_norm": 189.0, "learning_rate": 6.414461248468698e-05, "loss": 9.9378, "step": 10231 }, { "epoch": 0.4264932683089492, "grad_norm": 628.0, "learning_rate": 6.413813806609557e-05, "loss": 19.7502, "step": 10232 }, { "epoch": 0.42653495060647745, "grad_norm": 366.0, "learning_rate": 6.413166338983535e-05, "loss": 14.8128, "step": 10233 }, { "epoch": 0.42657663290400566, "grad_norm": 350.0, "learning_rate": 6.41251884560243e-05, "loss": 14.0005, "step": 10234 }, { "epoch": 0.4266183152015339, "grad_norm": 370.0, "learning_rate": 6.411871326478042e-05, "loss": 10.3133, "step": 10235 }, { "epoch": 0.42665999749906214, "grad_norm": 302.0, "learning_rate": 6.411223781622175e-05, "loss": 13.2507, "step": 10236 }, { "epoch": 0.4267016797965904, "grad_norm": 316.0, "learning_rate": 6.410576211046631e-05, "loss": 13.5628, "step": 10237 }, { "epoch": 0.4267433620941186, "grad_norm": 516.0, "learning_rate": 6.409928614763208e-05, "loss": 16.0002, "step": 10238 }, { "epoch": 0.4267850443916469, "grad_norm": 708.0, "learning_rate": 6.409280992783711e-05, "loss": 21.8752, "step": 10239 }, { "epoch": 0.4268267266891751, "grad_norm": 616.0, "learning_rate": 6.408633345119944e-05, "loss": 19.0014, "step": 10240 }, { "epoch": 0.42686840898670336, "grad_norm": 97.5, "learning_rate": 6.407985671783709e-05, "loss": 8.4378, "step": 10241 }, { "epoch": 0.4269100912842316, "grad_norm": 310.0, "learning_rate": 6.407337972786811e-05, "loss": 11.8755, "step": 10242 }, { "epoch": 0.42695177358175984, "grad_norm": 464.0, "learning_rate": 6.406690248141052e-05, "loss": 14.8768, "step": 10243 }, { "epoch": 0.42699345587928805, "grad_norm": 410.0, "learning_rate": 6.406042497858239e-05, "loss": 16.0007, "step": 10244 }, { "epoch": 0.4270351381768163, "grad_norm": 81.0, "learning_rate": 6.405394721950176e-05, "loss": 9.4381, "step": 10245 }, { "epoch": 0.42707682047434453, "grad_norm": 232.0, "learning_rate": 6.40474692042867e-05, "loss": 11.1878, "step": 10246 }, { "epoch": 0.4271185027718728, "grad_norm": 350.0, "learning_rate": 6.404099093305527e-05, "loss": 14.0002, "step": 10247 }, { "epoch": 0.427160185069401, "grad_norm": 320.0, "learning_rate": 6.403451240592553e-05, "loss": 14.2503, "step": 10248 }, { "epoch": 0.4272018673669293, "grad_norm": 466.0, "learning_rate": 6.402803362301555e-05, "loss": 17.3752, "step": 10249 }, { "epoch": 0.4272435496644575, "grad_norm": 1080.0, "learning_rate": 6.402155458444341e-05, "loss": 29.3752, "step": 10250 }, { "epoch": 0.42728523196198576, "grad_norm": 1408.0, "learning_rate": 6.40150752903272e-05, "loss": 34.7509, "step": 10251 }, { "epoch": 0.42732691425951397, "grad_norm": 732.0, "learning_rate": 6.4008595740785e-05, "loss": 22.6274, "step": 10252 }, { "epoch": 0.42736859655704224, "grad_norm": 278.0, "learning_rate": 6.40021159359349e-05, "loss": 13.7505, "step": 10253 }, { "epoch": 0.42741027885457045, "grad_norm": 316.0, "learning_rate": 6.399563587589499e-05, "loss": 14.1884, "step": 10254 }, { "epoch": 0.4274519611520987, "grad_norm": 426.0, "learning_rate": 6.398915556078337e-05, "loss": 15.1252, "step": 10255 }, { "epoch": 0.4274936434496269, "grad_norm": 105.5, "learning_rate": 6.398267499071816e-05, "loss": 9.3756, "step": 10256 }, { "epoch": 0.4275353257471552, "grad_norm": 512.0, "learning_rate": 6.397619416581746e-05, "loss": 17.2503, "step": 10257 }, { "epoch": 0.4275770080446834, "grad_norm": 169.0, "learning_rate": 6.396971308619937e-05, "loss": 10.3754, "step": 10258 }, { "epoch": 0.42761869034221167, "grad_norm": 185.0, "learning_rate": 6.396323175198202e-05, "loss": 11.8769, "step": 10259 }, { "epoch": 0.4276603726397399, "grad_norm": 1728.0, "learning_rate": 6.395675016328352e-05, "loss": 34.0051, "step": 10260 }, { "epoch": 0.42770205493726815, "grad_norm": 374.0, "learning_rate": 6.395026832022202e-05, "loss": 14.2504, "step": 10261 }, { "epoch": 0.42774373723479636, "grad_norm": 172.0, "learning_rate": 6.394378622291565e-05, "loss": 10.0007, "step": 10262 }, { "epoch": 0.42778541953232463, "grad_norm": 264.0, "learning_rate": 6.393730387148252e-05, "loss": 12.8751, "step": 10263 }, { "epoch": 0.42782710182985284, "grad_norm": 98.5, "learning_rate": 6.39308212660408e-05, "loss": 7.8753, "step": 10264 }, { "epoch": 0.4278687841273811, "grad_norm": 1280.0, "learning_rate": 6.392433840670864e-05, "loss": 26.1294, "step": 10265 }, { "epoch": 0.4279104664249093, "grad_norm": 434.0, "learning_rate": 6.391785529360416e-05, "loss": 15.2503, "step": 10266 }, { "epoch": 0.4279521487224376, "grad_norm": 306.0, "learning_rate": 6.391137192684553e-05, "loss": 13.0629, "step": 10267 }, { "epoch": 0.4279938310199658, "grad_norm": 504.0, "learning_rate": 6.390488830655092e-05, "loss": 19.626, "step": 10268 }, { "epoch": 0.42803551331749407, "grad_norm": 804.0, "learning_rate": 6.389840443283847e-05, "loss": 23.5, "step": 10269 }, { "epoch": 0.4280771956150223, "grad_norm": 500.0, "learning_rate": 6.38919203058264e-05, "loss": 17.2508, "step": 10270 }, { "epoch": 0.42811887791255054, "grad_norm": 502.0, "learning_rate": 6.388543592563282e-05, "loss": 16.7501, "step": 10271 }, { "epoch": 0.42816056021007876, "grad_norm": 229.0, "learning_rate": 6.387895129237594e-05, "loss": 11.8753, "step": 10272 }, { "epoch": 0.428202242507607, "grad_norm": 114.5, "learning_rate": 6.387246640617395e-05, "loss": 9.7515, "step": 10273 }, { "epoch": 0.42824392480513523, "grad_norm": 264.0, "learning_rate": 6.386598126714501e-05, "loss": 12.8129, "step": 10274 }, { "epoch": 0.4282856071026635, "grad_norm": 1368.0, "learning_rate": 6.385949587540735e-05, "loss": 34.7503, "step": 10275 }, { "epoch": 0.4283272894001917, "grad_norm": 158.0, "learning_rate": 6.385301023107914e-05, "loss": 9.7503, "step": 10276 }, { "epoch": 0.42836897169772, "grad_norm": 776.0, "learning_rate": 6.384652433427859e-05, "loss": 21.7528, "step": 10277 }, { "epoch": 0.4284106539952482, "grad_norm": 358.0, "learning_rate": 6.384003818512391e-05, "loss": 15.0006, "step": 10278 }, { "epoch": 0.42845233629277646, "grad_norm": 374.0, "learning_rate": 6.38335517837333e-05, "loss": 16.3752, "step": 10279 }, { "epoch": 0.42849401859030467, "grad_norm": 266.0, "learning_rate": 6.382706513022497e-05, "loss": 13.7504, "step": 10280 }, { "epoch": 0.42853570088783294, "grad_norm": 450.0, "learning_rate": 6.382057822471717e-05, "loss": 15.6256, "step": 10281 }, { "epoch": 0.4285773831853612, "grad_norm": 468.0, "learning_rate": 6.38140910673281e-05, "loss": 15.1276, "step": 10282 }, { "epoch": 0.4286190654828894, "grad_norm": 241.0, "learning_rate": 6.380760365817598e-05, "loss": 11.8128, "step": 10283 }, { "epoch": 0.4286607477804177, "grad_norm": 292.0, "learning_rate": 6.380111599737908e-05, "loss": 9.6891, "step": 10284 }, { "epoch": 0.4287024300779459, "grad_norm": 1056.0, "learning_rate": 6.37946280850556e-05, "loss": 23.8802, "step": 10285 }, { "epoch": 0.42874411237547416, "grad_norm": 356.0, "learning_rate": 6.37881399213238e-05, "loss": 14.6877, "step": 10286 }, { "epoch": 0.4287857946730024, "grad_norm": 520.0, "learning_rate": 6.378165150630192e-05, "loss": 17.8756, "step": 10287 }, { "epoch": 0.42882747697053064, "grad_norm": 524.0, "learning_rate": 6.377516284010822e-05, "loss": 17.2504, "step": 10288 }, { "epoch": 0.42886915926805885, "grad_norm": 249.0, "learning_rate": 6.376867392286096e-05, "loss": 12.0011, "step": 10289 }, { "epoch": 0.4289108415655871, "grad_norm": 288.0, "learning_rate": 6.376218475467841e-05, "loss": 13.7508, "step": 10290 }, { "epoch": 0.42895252386311533, "grad_norm": 384.0, "learning_rate": 6.37556953356788e-05, "loss": 14.9417, "step": 10291 }, { "epoch": 0.4289942061606436, "grad_norm": 744.0, "learning_rate": 6.374920566598044e-05, "loss": 20.8752, "step": 10292 }, { "epoch": 0.4290358884581718, "grad_norm": 356.0, "learning_rate": 6.374271574570156e-05, "loss": 13.1878, "step": 10293 }, { "epoch": 0.4290775707557001, "grad_norm": 98.0, "learning_rate": 6.373622557496049e-05, "loss": 9.8129, "step": 10294 }, { "epoch": 0.4291192530532283, "grad_norm": 270.0, "learning_rate": 6.372973515387548e-05, "loss": 12.6254, "step": 10295 }, { "epoch": 0.42916093535075656, "grad_norm": 282.0, "learning_rate": 6.372324448256482e-05, "loss": 12.8127, "step": 10296 }, { "epoch": 0.42920261764828477, "grad_norm": 552.0, "learning_rate": 6.371675356114683e-05, "loss": 19.0004, "step": 10297 }, { "epoch": 0.42924429994581303, "grad_norm": 218.0, "learning_rate": 6.371026238973978e-05, "loss": 12.0002, "step": 10298 }, { "epoch": 0.42928598224334125, "grad_norm": 390.0, "learning_rate": 6.370377096846196e-05, "loss": 15.6882, "step": 10299 }, { "epoch": 0.4293276645408695, "grad_norm": 438.0, "learning_rate": 6.369727929743172e-05, "loss": 15.5628, "step": 10300 }, { "epoch": 0.4293693468383977, "grad_norm": 262.0, "learning_rate": 6.369078737676735e-05, "loss": 12.3127, "step": 10301 }, { "epoch": 0.429411029135926, "grad_norm": 360.0, "learning_rate": 6.368429520658716e-05, "loss": 13.8127, "step": 10302 }, { "epoch": 0.4294527114334542, "grad_norm": 520.0, "learning_rate": 6.367780278700948e-05, "loss": 17.1258, "step": 10303 }, { "epoch": 0.42949439373098247, "grad_norm": 231.0, "learning_rate": 6.367131011815261e-05, "loss": 12.3132, "step": 10304 }, { "epoch": 0.4295360760285107, "grad_norm": 476.0, "learning_rate": 6.366481720013492e-05, "loss": 16.7504, "step": 10305 }, { "epoch": 0.42957775832603895, "grad_norm": 438.0, "learning_rate": 6.365832403307472e-05, "loss": 15.8133, "step": 10306 }, { "epoch": 0.42961944062356716, "grad_norm": 684.0, "learning_rate": 6.365183061709034e-05, "loss": 21.8753, "step": 10307 }, { "epoch": 0.42966112292109543, "grad_norm": 174.0, "learning_rate": 6.364533695230015e-05, "loss": 10.0634, "step": 10308 }, { "epoch": 0.42970280521862364, "grad_norm": 392.0, "learning_rate": 6.363884303882248e-05, "loss": 14.5627, "step": 10309 }, { "epoch": 0.4297444875161519, "grad_norm": 202.0, "learning_rate": 6.363234887677568e-05, "loss": 12.0656, "step": 10310 }, { "epoch": 0.4297861698136801, "grad_norm": 644.0, "learning_rate": 6.362585446627812e-05, "loss": 21.7503, "step": 10311 }, { "epoch": 0.4298278521112084, "grad_norm": 152.0, "learning_rate": 6.361935980744813e-05, "loss": 10.8127, "step": 10312 }, { "epoch": 0.4298695344087366, "grad_norm": 438.0, "learning_rate": 6.361286490040412e-05, "loss": 16.2502, "step": 10313 }, { "epoch": 0.42991121670626486, "grad_norm": 249.0, "learning_rate": 6.360636974526444e-05, "loss": 13.7504, "step": 10314 }, { "epoch": 0.4299528990037931, "grad_norm": 476.0, "learning_rate": 6.359987434214744e-05, "loss": 16.2501, "step": 10315 }, { "epoch": 0.42999458130132134, "grad_norm": 372.0, "learning_rate": 6.359337869117156e-05, "loss": 13.1264, "step": 10316 }, { "epoch": 0.43003626359884956, "grad_norm": 220.0, "learning_rate": 6.358688279245513e-05, "loss": 12.7506, "step": 10317 }, { "epoch": 0.4300779458963778, "grad_norm": 406.0, "learning_rate": 6.358038664611654e-05, "loss": 15.4392, "step": 10318 }, { "epoch": 0.43011962819390603, "grad_norm": 448.0, "learning_rate": 6.357389025227421e-05, "loss": 16.7504, "step": 10319 }, { "epoch": 0.4301613104914343, "grad_norm": 368.0, "learning_rate": 6.356739361104653e-05, "loss": 13.1877, "step": 10320 }, { "epoch": 0.4302029927889625, "grad_norm": 258.0, "learning_rate": 6.35608967225519e-05, "loss": 12.8129, "step": 10321 }, { "epoch": 0.4302446750864908, "grad_norm": 181.0, "learning_rate": 6.355439958690871e-05, "loss": 11.2501, "step": 10322 }, { "epoch": 0.430286357384019, "grad_norm": 242.0, "learning_rate": 6.354790220423539e-05, "loss": 11.1253, "step": 10323 }, { "epoch": 0.43032803968154726, "grad_norm": 380.0, "learning_rate": 6.354140457465035e-05, "loss": 15.2503, "step": 10324 }, { "epoch": 0.43036972197907547, "grad_norm": 446.0, "learning_rate": 6.3534906698272e-05, "loss": 14.441, "step": 10325 }, { "epoch": 0.43041140427660374, "grad_norm": 326.0, "learning_rate": 6.352840857521878e-05, "loss": 14.1877, "step": 10326 }, { "epoch": 0.43045308657413195, "grad_norm": 796.0, "learning_rate": 6.352191020560912e-05, "loss": 21.0003, "step": 10327 }, { "epoch": 0.4304947688716602, "grad_norm": 628.0, "learning_rate": 6.351541158956144e-05, "loss": 19.2503, "step": 10328 }, { "epoch": 0.4305364511691884, "grad_norm": 215.0, "learning_rate": 6.350891272719417e-05, "loss": 11.6252, "step": 10329 }, { "epoch": 0.4305781334667167, "grad_norm": 446.0, "learning_rate": 6.350241361862579e-05, "loss": 17.2502, "step": 10330 }, { "epoch": 0.4306198157642449, "grad_norm": 516.0, "learning_rate": 6.349591426397472e-05, "loss": 18.2502, "step": 10331 }, { "epoch": 0.4306614980617732, "grad_norm": 166.0, "learning_rate": 6.348941466335941e-05, "loss": 9.7502, "step": 10332 }, { "epoch": 0.4307031803593014, "grad_norm": 346.0, "learning_rate": 6.348291481689831e-05, "loss": 14.5627, "step": 10333 }, { "epoch": 0.43074486265682965, "grad_norm": 56.25, "learning_rate": 6.347641472470991e-05, "loss": 7.0943, "step": 10334 }, { "epoch": 0.43078654495435786, "grad_norm": 212.0, "learning_rate": 6.346991438691265e-05, "loss": 11.5003, "step": 10335 }, { "epoch": 0.43082822725188613, "grad_norm": 88.5, "learning_rate": 6.346341380362499e-05, "loss": 9.6885, "step": 10336 }, { "epoch": 0.43086990954941434, "grad_norm": 410.0, "learning_rate": 6.345691297496543e-05, "loss": 15.626, "step": 10337 }, { "epoch": 0.4309115918469426, "grad_norm": 183.0, "learning_rate": 6.345041190105243e-05, "loss": 10.7502, "step": 10338 }, { "epoch": 0.4309532741444708, "grad_norm": 200.0, "learning_rate": 6.344391058200449e-05, "loss": 10.5627, "step": 10339 }, { "epoch": 0.4309949564419991, "grad_norm": 116.0, "learning_rate": 6.343740901794008e-05, "loss": 9.7505, "step": 10340 }, { "epoch": 0.4310366387395273, "grad_norm": 426.0, "learning_rate": 6.34309072089777e-05, "loss": 15.2503, "step": 10341 }, { "epoch": 0.43107832103705557, "grad_norm": 780.0, "learning_rate": 6.342440515523584e-05, "loss": 22.3755, "step": 10342 }, { "epoch": 0.4311200033345838, "grad_norm": 564.0, "learning_rate": 6.3417902856833e-05, "loss": 18.876, "step": 10343 }, { "epoch": 0.43116168563211205, "grad_norm": 692.0, "learning_rate": 6.34114003138877e-05, "loss": 20.6256, "step": 10344 }, { "epoch": 0.43120336792964026, "grad_norm": 326.0, "learning_rate": 6.340489752651843e-05, "loss": 13.1254, "step": 10345 }, { "epoch": 0.4312450502271685, "grad_norm": 324.0, "learning_rate": 6.339839449484371e-05, "loss": 12.7515, "step": 10346 }, { "epoch": 0.43128673252469674, "grad_norm": 374.0, "learning_rate": 6.339189121898208e-05, "loss": 15.6255, "step": 10347 }, { "epoch": 0.431328414822225, "grad_norm": 852.0, "learning_rate": 6.338538769905202e-05, "loss": 22.5002, "step": 10348 }, { "epoch": 0.4313700971197532, "grad_norm": 215.0, "learning_rate": 6.33788839351721e-05, "loss": 9.8758, "step": 10349 }, { "epoch": 0.4314117794172815, "grad_norm": 438.0, "learning_rate": 6.337237992746082e-05, "loss": 13.3164, "step": 10350 }, { "epoch": 0.4314534617148097, "grad_norm": 448.0, "learning_rate": 6.336587567603673e-05, "loss": 16.6255, "step": 10351 }, { "epoch": 0.43149514401233796, "grad_norm": 744.0, "learning_rate": 6.335937118101836e-05, "loss": 20.2508, "step": 10352 }, { "epoch": 0.4315368263098662, "grad_norm": 260.0, "learning_rate": 6.33528664425243e-05, "loss": 13.2503, "step": 10353 }, { "epoch": 0.43157850860739444, "grad_norm": 568.0, "learning_rate": 6.334636146067304e-05, "loss": 18.0002, "step": 10354 }, { "epoch": 0.4316201909049227, "grad_norm": 502.0, "learning_rate": 6.333985623558315e-05, "loss": 14.7539, "step": 10355 }, { "epoch": 0.4316618732024509, "grad_norm": 304.0, "learning_rate": 6.33333507673732e-05, "loss": 13.1877, "step": 10356 }, { "epoch": 0.4317035554999792, "grad_norm": 544.0, "learning_rate": 6.332684505616175e-05, "loss": 19.3753, "step": 10357 }, { "epoch": 0.4317452377975074, "grad_norm": 374.0, "learning_rate": 6.332033910206737e-05, "loss": 14.5629, "step": 10358 }, { "epoch": 0.43178692009503566, "grad_norm": 436.0, "learning_rate": 6.331383290520862e-05, "loss": 16.7503, "step": 10359 }, { "epoch": 0.4318286023925639, "grad_norm": 604.0, "learning_rate": 6.33073264657041e-05, "loss": 20.7502, "step": 10360 }, { "epoch": 0.43187028469009214, "grad_norm": 219.0, "learning_rate": 6.330081978367238e-05, "loss": 12.1264, "step": 10361 }, { "epoch": 0.43191196698762035, "grad_norm": 820.0, "learning_rate": 6.329431285923199e-05, "loss": 24.1254, "step": 10362 }, { "epoch": 0.4319536492851486, "grad_norm": 98.5, "learning_rate": 6.328780569250161e-05, "loss": 8.2502, "step": 10363 }, { "epoch": 0.43199533158267683, "grad_norm": 968.0, "learning_rate": 6.328129828359977e-05, "loss": 24.6259, "step": 10364 }, { "epoch": 0.4320370138802051, "grad_norm": 266.0, "learning_rate": 6.32747906326451e-05, "loss": 13.5003, "step": 10365 }, { "epoch": 0.4320786961777333, "grad_norm": 680.0, "learning_rate": 6.32682827397562e-05, "loss": 16.8801, "step": 10366 }, { "epoch": 0.4321203784752616, "grad_norm": 72.5, "learning_rate": 6.326177460505167e-05, "loss": 8.1254, "step": 10367 }, { "epoch": 0.4321620607727898, "grad_norm": 318.0, "learning_rate": 6.325526622865012e-05, "loss": 13.3752, "step": 10368 }, { "epoch": 0.43220374307031806, "grad_norm": 506.0, "learning_rate": 6.324875761067015e-05, "loss": 17.0003, "step": 10369 }, { "epoch": 0.43224542536784627, "grad_norm": 173.0, "learning_rate": 6.32422487512304e-05, "loss": 10.4378, "step": 10370 }, { "epoch": 0.43228710766537454, "grad_norm": 368.0, "learning_rate": 6.32357396504495e-05, "loss": 10.7503, "step": 10371 }, { "epoch": 0.43232878996290275, "grad_norm": 460.0, "learning_rate": 6.322923030844608e-05, "loss": 16.8752, "step": 10372 }, { "epoch": 0.432370472260431, "grad_norm": 304.0, "learning_rate": 6.322272072533874e-05, "loss": 11.7502, "step": 10373 }, { "epoch": 0.4324121545579592, "grad_norm": 370.0, "learning_rate": 6.321621090124616e-05, "loss": 13.5628, "step": 10374 }, { "epoch": 0.4324538368554875, "grad_norm": 652.0, "learning_rate": 6.320970083628695e-05, "loss": 19.7501, "step": 10375 }, { "epoch": 0.4324955191530157, "grad_norm": 772.0, "learning_rate": 6.320319053057976e-05, "loss": 21.6251, "step": 10376 }, { "epoch": 0.432537201450544, "grad_norm": 340.0, "learning_rate": 6.319667998424327e-05, "loss": 15.1264, "step": 10377 }, { "epoch": 0.4325788837480722, "grad_norm": 107.5, "learning_rate": 6.319016919739611e-05, "loss": 7.1564, "step": 10378 }, { "epoch": 0.43262056604560045, "grad_norm": 414.0, "learning_rate": 6.318365817015695e-05, "loss": 15.3135, "step": 10379 }, { "epoch": 0.43266224834312866, "grad_norm": 468.0, "learning_rate": 6.317714690264445e-05, "loss": 15.7534, "step": 10380 }, { "epoch": 0.43270393064065693, "grad_norm": 382.0, "learning_rate": 6.317063539497727e-05, "loss": 14.1877, "step": 10381 }, { "epoch": 0.43274561293818514, "grad_norm": 139.0, "learning_rate": 6.316412364727408e-05, "loss": 8.7507, "step": 10382 }, { "epoch": 0.4327872952357134, "grad_norm": 336.0, "learning_rate": 6.31576116596536e-05, "loss": 13.2502, "step": 10383 }, { "epoch": 0.4328289775332416, "grad_norm": 62.75, "learning_rate": 6.315109943223445e-05, "loss": 7.469, "step": 10384 }, { "epoch": 0.4328706598307699, "grad_norm": 328.0, "learning_rate": 6.314458696513535e-05, "loss": 13.2508, "step": 10385 }, { "epoch": 0.4329123421282981, "grad_norm": 64.5, "learning_rate": 6.3138074258475e-05, "loss": 8.7504, "step": 10386 }, { "epoch": 0.43295402442582637, "grad_norm": 430.0, "learning_rate": 6.313156131237206e-05, "loss": 15.8753, "step": 10387 }, { "epoch": 0.4329957067233546, "grad_norm": 524.0, "learning_rate": 6.312504812694526e-05, "loss": 17.1273, "step": 10388 }, { "epoch": 0.43303738902088285, "grad_norm": 139.0, "learning_rate": 6.31185347023133e-05, "loss": 9.1884, "step": 10389 }, { "epoch": 0.43307907131841106, "grad_norm": 210.0, "learning_rate": 6.311202103859487e-05, "loss": 11.8757, "step": 10390 }, { "epoch": 0.4331207536159393, "grad_norm": 264.0, "learning_rate": 6.31055071359087e-05, "loss": 12.5004, "step": 10391 }, { "epoch": 0.43316243591346754, "grad_norm": 92.5, "learning_rate": 6.309899299437349e-05, "loss": 9.9392, "step": 10392 }, { "epoch": 0.4332041182109958, "grad_norm": 328.0, "learning_rate": 6.309247861410798e-05, "loss": 13.0629, "step": 10393 }, { "epoch": 0.433245800508524, "grad_norm": 636.0, "learning_rate": 6.308596399523089e-05, "loss": 18.3756, "step": 10394 }, { "epoch": 0.4332874828060523, "grad_norm": 122.0, "learning_rate": 6.307944913786093e-05, "loss": 9.0631, "step": 10395 }, { "epoch": 0.4333291651035805, "grad_norm": 217.0, "learning_rate": 6.307293404211687e-05, "loss": 11.3127, "step": 10396 }, { "epoch": 0.43337084740110876, "grad_norm": 366.0, "learning_rate": 6.306641870811741e-05, "loss": 13.5627, "step": 10397 }, { "epoch": 0.433412529698637, "grad_norm": 416.0, "learning_rate": 6.30599031359813e-05, "loss": 15.9385, "step": 10398 }, { "epoch": 0.43345421199616524, "grad_norm": 784.0, "learning_rate": 6.30533873258273e-05, "loss": 23.8752, "step": 10399 }, { "epoch": 0.43349589429369345, "grad_norm": 284.0, "learning_rate": 6.304687127777415e-05, "loss": 15.2504, "step": 10400 }, { "epoch": 0.4335375765912217, "grad_norm": 556.0, "learning_rate": 6.304035499194063e-05, "loss": 19.6254, "step": 10401 }, { "epoch": 0.43357925888874993, "grad_norm": 520.0, "learning_rate": 6.303383846844548e-05, "loss": 17.3755, "step": 10402 }, { "epoch": 0.4336209411862782, "grad_norm": 704.0, "learning_rate": 6.302732170740748e-05, "loss": 20.8752, "step": 10403 }, { "epoch": 0.4336626234838064, "grad_norm": 218.0, "learning_rate": 6.302080470894536e-05, "loss": 10.1253, "step": 10404 }, { "epoch": 0.4337043057813347, "grad_norm": 204.0, "learning_rate": 6.301428747317793e-05, "loss": 10.5005, "step": 10405 }, { "epoch": 0.4337459880788629, "grad_norm": 177.0, "learning_rate": 6.300777000022396e-05, "loss": 11.1253, "step": 10406 }, { "epoch": 0.43378767037639115, "grad_norm": 488.0, "learning_rate": 6.300125229020221e-05, "loss": 16.6252, "step": 10407 }, { "epoch": 0.43382935267391937, "grad_norm": 328.0, "learning_rate": 6.299473434323151e-05, "loss": 15.0012, "step": 10408 }, { "epoch": 0.43387103497144763, "grad_norm": 145.0, "learning_rate": 6.29882161594306e-05, "loss": 8.8757, "step": 10409 }, { "epoch": 0.43391271726897584, "grad_norm": 118.5, "learning_rate": 6.29816977389183e-05, "loss": 9.5003, "step": 10410 }, { "epoch": 0.4339543995665041, "grad_norm": 100.0, "learning_rate": 6.297517908181342e-05, "loss": 8.7504, "step": 10411 }, { "epoch": 0.4339960818640323, "grad_norm": 454.0, "learning_rate": 6.296866018823473e-05, "loss": 15.6252, "step": 10412 }, { "epoch": 0.4340377641615606, "grad_norm": 386.0, "learning_rate": 6.296214105830108e-05, "loss": 13.1255, "step": 10413 }, { "epoch": 0.4340794464590888, "grad_norm": 221.0, "learning_rate": 6.295562169213124e-05, "loss": 12.5003, "step": 10414 }, { "epoch": 0.43412112875661707, "grad_norm": 298.0, "learning_rate": 6.294910208984405e-05, "loss": 12.5627, "step": 10415 }, { "epoch": 0.4341628110541453, "grad_norm": 204.0, "learning_rate": 6.294258225155832e-05, "loss": 10.4381, "step": 10416 }, { "epoch": 0.43420449335167355, "grad_norm": 442.0, "learning_rate": 6.293606217739288e-05, "loss": 15.2503, "step": 10417 }, { "epoch": 0.43424617564920176, "grad_norm": 53.0, "learning_rate": 6.292954186746657e-05, "loss": 6.9065, "step": 10418 }, { "epoch": 0.43428785794673, "grad_norm": 436.0, "learning_rate": 6.29230213218982e-05, "loss": 16.0003, "step": 10419 }, { "epoch": 0.43432954024425824, "grad_norm": 258.0, "learning_rate": 6.291650054080663e-05, "loss": 12.3127, "step": 10420 }, { "epoch": 0.4343712225417865, "grad_norm": 358.0, "learning_rate": 6.290997952431069e-05, "loss": 14.6256, "step": 10421 }, { "epoch": 0.4344129048393147, "grad_norm": 185.0, "learning_rate": 6.290345827252922e-05, "loss": 10.938, "step": 10422 }, { "epoch": 0.434454587136843, "grad_norm": 207.0, "learning_rate": 6.289693678558109e-05, "loss": 10.6258, "step": 10423 }, { "epoch": 0.4344962694343712, "grad_norm": 274.0, "learning_rate": 6.289041506358513e-05, "loss": 12.5002, "step": 10424 }, { "epoch": 0.43453795173189946, "grad_norm": 474.0, "learning_rate": 6.28838931066602e-05, "loss": 17.5003, "step": 10425 }, { "epoch": 0.4345796340294277, "grad_norm": 151.0, "learning_rate": 6.287737091492519e-05, "loss": 9.7502, "step": 10426 }, { "epoch": 0.43462131632695594, "grad_norm": 40.75, "learning_rate": 6.287084848849894e-05, "loss": 7.2503, "step": 10427 }, { "epoch": 0.4346629986244842, "grad_norm": 161.0, "learning_rate": 6.286432582750034e-05, "loss": 8.3141, "step": 10428 }, { "epoch": 0.4347046809220124, "grad_norm": 138.0, "learning_rate": 6.285780293204827e-05, "loss": 10.1258, "step": 10429 }, { "epoch": 0.4347463632195407, "grad_norm": 310.0, "learning_rate": 6.28512798022616e-05, "loss": 12.6878, "step": 10430 }, { "epoch": 0.4347880455170689, "grad_norm": 199.0, "learning_rate": 6.28447564382592e-05, "loss": 11.2502, "step": 10431 }, { "epoch": 0.43482972781459717, "grad_norm": 196.0, "learning_rate": 6.283823284015999e-05, "loss": 10.9377, "step": 10432 }, { "epoch": 0.4348714101121254, "grad_norm": 112.0, "learning_rate": 6.283170900808284e-05, "loss": 5.7504, "step": 10433 }, { "epoch": 0.43491309240965365, "grad_norm": 442.0, "learning_rate": 6.282518494214665e-05, "loss": 15.6879, "step": 10434 }, { "epoch": 0.43495477470718186, "grad_norm": 163.0, "learning_rate": 6.281866064247033e-05, "loss": 10.3754, "step": 10435 }, { "epoch": 0.4349964570047101, "grad_norm": 2192.0, "learning_rate": 6.281213610917278e-05, "loss": 41.7507, "step": 10436 }, { "epoch": 0.43503813930223834, "grad_norm": 53.25, "learning_rate": 6.280561134237292e-05, "loss": 7.5946, "step": 10437 }, { "epoch": 0.4350798215997666, "grad_norm": 270.0, "learning_rate": 6.279908634218963e-05, "loss": 11.8129, "step": 10438 }, { "epoch": 0.4351215038972948, "grad_norm": 364.0, "learning_rate": 6.279256110874187e-05, "loss": 14.6254, "step": 10439 }, { "epoch": 0.4351631861948231, "grad_norm": 272.0, "learning_rate": 6.278603564214855e-05, "loss": 12.8752, "step": 10440 }, { "epoch": 0.4352048684923513, "grad_norm": 276.0, "learning_rate": 6.277950994252861e-05, "loss": 13.1885, "step": 10441 }, { "epoch": 0.43524655078987956, "grad_norm": 249.0, "learning_rate": 6.277298401000095e-05, "loss": 12.6255, "step": 10442 }, { "epoch": 0.43528823308740777, "grad_norm": 116.5, "learning_rate": 6.276645784468453e-05, "loss": 10.5632, "step": 10443 }, { "epoch": 0.43532991538493604, "grad_norm": 492.0, "learning_rate": 6.275993144669828e-05, "loss": 17.0007, "step": 10444 }, { "epoch": 0.43537159768246425, "grad_norm": 402.0, "learning_rate": 6.275340481616114e-05, "loss": 16.6281, "step": 10445 }, { "epoch": 0.4354132799799925, "grad_norm": 154.0, "learning_rate": 6.274687795319208e-05, "loss": 7.5009, "step": 10446 }, { "epoch": 0.43545496227752073, "grad_norm": 336.0, "learning_rate": 6.274035085791003e-05, "loss": 13.1278, "step": 10447 }, { "epoch": 0.435496644575049, "grad_norm": 382.0, "learning_rate": 6.273382353043396e-05, "loss": 16.0024, "step": 10448 }, { "epoch": 0.4355383268725772, "grad_norm": 298.0, "learning_rate": 6.272729597088281e-05, "loss": 12.2502, "step": 10449 }, { "epoch": 0.4355800091701055, "grad_norm": 288.0, "learning_rate": 6.272076817937556e-05, "loss": 13.0628, "step": 10450 }, { "epoch": 0.4356216914676337, "grad_norm": 524.0, "learning_rate": 6.27142401560312e-05, "loss": 18.6254, "step": 10451 }, { "epoch": 0.43566337376516195, "grad_norm": 414.0, "learning_rate": 6.270771190096867e-05, "loss": 15.5626, "step": 10452 }, { "epoch": 0.43570505606269017, "grad_norm": 596.0, "learning_rate": 6.270118341430697e-05, "loss": 18.8754, "step": 10453 }, { "epoch": 0.43574673836021843, "grad_norm": 454.0, "learning_rate": 6.269465469616507e-05, "loss": 15.7516, "step": 10454 }, { "epoch": 0.43578842065774664, "grad_norm": 221.0, "learning_rate": 6.268812574666196e-05, "loss": 12.1878, "step": 10455 }, { "epoch": 0.4358301029552749, "grad_norm": 227.0, "learning_rate": 6.268159656591664e-05, "loss": 13.0635, "step": 10456 }, { "epoch": 0.4358717852528031, "grad_norm": 1128.0, "learning_rate": 6.267506715404809e-05, "loss": 26.7508, "step": 10457 }, { "epoch": 0.4359134675503314, "grad_norm": 188.0, "learning_rate": 6.266853751117533e-05, "loss": 10.0002, "step": 10458 }, { "epoch": 0.4359551498478596, "grad_norm": 274.0, "learning_rate": 6.266200763741733e-05, "loss": 14.5003, "step": 10459 }, { "epoch": 0.43599683214538787, "grad_norm": 183.0, "learning_rate": 6.265547753289313e-05, "loss": 10.563, "step": 10460 }, { "epoch": 0.4360385144429161, "grad_norm": 426.0, "learning_rate": 6.264894719772172e-05, "loss": 15.8127, "step": 10461 }, { "epoch": 0.43608019674044435, "grad_norm": 143.0, "learning_rate": 6.264241663202212e-05, "loss": 9.6262, "step": 10462 }, { "epoch": 0.43612187903797256, "grad_norm": 260.0, "learning_rate": 6.263588583591337e-05, "loss": 12.5627, "step": 10463 }, { "epoch": 0.4361635613355008, "grad_norm": 412.0, "learning_rate": 6.262935480951446e-05, "loss": 15.5002, "step": 10464 }, { "epoch": 0.43620524363302904, "grad_norm": 62.75, "learning_rate": 6.262282355294445e-05, "loss": 9.0628, "step": 10465 }, { "epoch": 0.4362469259305573, "grad_norm": 176.0, "learning_rate": 6.261629206632235e-05, "loss": 10.6252, "step": 10466 }, { "epoch": 0.4362886082280855, "grad_norm": 181.0, "learning_rate": 6.260976034976723e-05, "loss": 10.7506, "step": 10467 }, { "epoch": 0.4363302905256138, "grad_norm": 158.0, "learning_rate": 6.260322840339809e-05, "loss": 10.3752, "step": 10468 }, { "epoch": 0.436371972823142, "grad_norm": 418.0, "learning_rate": 6.259669622733401e-05, "loss": 15.1255, "step": 10469 }, { "epoch": 0.43641365512067026, "grad_norm": 237.0, "learning_rate": 6.2590163821694e-05, "loss": 11.5008, "step": 10470 }, { "epoch": 0.4364553374181985, "grad_norm": 199.0, "learning_rate": 6.258363118659716e-05, "loss": 10.4377, "step": 10471 }, { "epoch": 0.43649701971572674, "grad_norm": 294.0, "learning_rate": 6.25770983221625e-05, "loss": 11.3129, "step": 10472 }, { "epoch": 0.43653870201325495, "grad_norm": 234.0, "learning_rate": 6.257056522850913e-05, "loss": 12.5007, "step": 10473 }, { "epoch": 0.4365803843107832, "grad_norm": 148.0, "learning_rate": 6.256403190575607e-05, "loss": 10.2504, "step": 10474 }, { "epoch": 0.43662206660831143, "grad_norm": 494.0, "learning_rate": 6.255749835402243e-05, "loss": 18.2503, "step": 10475 }, { "epoch": 0.4366637489058397, "grad_norm": 1056.0, "learning_rate": 6.255096457342725e-05, "loss": 24.6301, "step": 10476 }, { "epoch": 0.4367054312033679, "grad_norm": 286.0, "learning_rate": 6.254443056408963e-05, "loss": 14.2507, "step": 10477 }, { "epoch": 0.4367471135008962, "grad_norm": 964.0, "learning_rate": 6.253789632612868e-05, "loss": 24.8752, "step": 10478 }, { "epoch": 0.4367887957984244, "grad_norm": 676.0, "learning_rate": 6.253136185966342e-05, "loss": 19.5002, "step": 10479 }, { "epoch": 0.43683047809595266, "grad_norm": 214.0, "learning_rate": 6.252482716481299e-05, "loss": 10.8129, "step": 10480 }, { "epoch": 0.43687216039348087, "grad_norm": 448.0, "learning_rate": 6.251829224169649e-05, "loss": 15.6252, "step": 10481 }, { "epoch": 0.43691384269100914, "grad_norm": 290.0, "learning_rate": 6.251175709043297e-05, "loss": 13.7504, "step": 10482 }, { "epoch": 0.43695552498853735, "grad_norm": 180.0, "learning_rate": 6.250522171114159e-05, "loss": 11.1253, "step": 10483 }, { "epoch": 0.4369972072860656, "grad_norm": 314.0, "learning_rate": 6.249868610394144e-05, "loss": 13.3127, "step": 10484 }, { "epoch": 0.4370388895835938, "grad_norm": 404.0, "learning_rate": 6.249215026895162e-05, "loss": 17.0004, "step": 10485 }, { "epoch": 0.4370805718811221, "grad_norm": 388.0, "learning_rate": 6.248561420629124e-05, "loss": 15.0627, "step": 10486 }, { "epoch": 0.4371222541786503, "grad_norm": 185.0, "learning_rate": 6.247907791607943e-05, "loss": 11.0004, "step": 10487 }, { "epoch": 0.43716393647617857, "grad_norm": 100.0, "learning_rate": 6.247254139843533e-05, "loss": 6.6253, "step": 10488 }, { "epoch": 0.4372056187737068, "grad_norm": 234.0, "learning_rate": 6.246600465347805e-05, "loss": 11.3128, "step": 10489 }, { "epoch": 0.43724730107123505, "grad_norm": 83.0, "learning_rate": 6.245946768132674e-05, "loss": 10.0002, "step": 10490 }, { "epoch": 0.43728898336876326, "grad_norm": 384.0, "learning_rate": 6.24529304821005e-05, "loss": 13.8128, "step": 10491 }, { "epoch": 0.43733066566629153, "grad_norm": 212.0, "learning_rate": 6.244639305591852e-05, "loss": 11.3757, "step": 10492 }, { "epoch": 0.43737234796381974, "grad_norm": 172.0, "learning_rate": 6.243985540289991e-05, "loss": 10.0628, "step": 10493 }, { "epoch": 0.437414030261348, "grad_norm": 256.0, "learning_rate": 6.243331752316384e-05, "loss": 11.8131, "step": 10494 }, { "epoch": 0.4374557125588762, "grad_norm": 812.0, "learning_rate": 6.242677941682945e-05, "loss": 26.5003, "step": 10495 }, { "epoch": 0.4374973948564045, "grad_norm": 388.0, "learning_rate": 6.242024108401591e-05, "loss": 15.1252, "step": 10496 }, { "epoch": 0.4375390771539327, "grad_norm": 96.5, "learning_rate": 6.241370252484235e-05, "loss": 8.0005, "step": 10497 }, { "epoch": 0.43758075945146097, "grad_norm": 236.0, "learning_rate": 6.240716373942798e-05, "loss": 11.8755, "step": 10498 }, { "epoch": 0.4376224417489892, "grad_norm": 374.0, "learning_rate": 6.240062472789195e-05, "loss": 15.5628, "step": 10499 }, { "epoch": 0.43766412404651744, "grad_norm": 274.0, "learning_rate": 6.239408549035343e-05, "loss": 11.6879, "step": 10500 }, { "epoch": 0.4377058063440457, "grad_norm": 246.0, "learning_rate": 6.23875460269316e-05, "loss": 10.0631, "step": 10501 }, { "epoch": 0.4377474886415739, "grad_norm": 414.0, "learning_rate": 6.238100633774563e-05, "loss": 14.6253, "step": 10502 }, { "epoch": 0.4377891709391022, "grad_norm": 83.0, "learning_rate": 6.237446642291473e-05, "loss": 9.0006, "step": 10503 }, { "epoch": 0.4378308532366304, "grad_norm": 356.0, "learning_rate": 6.23679262825581e-05, "loss": 14.5002, "step": 10504 }, { "epoch": 0.43787253553415867, "grad_norm": 306.0, "learning_rate": 6.236138591679491e-05, "loss": 12.0638, "step": 10505 }, { "epoch": 0.4379142178316869, "grad_norm": 251.0, "learning_rate": 6.235484532574433e-05, "loss": 12.2502, "step": 10506 }, { "epoch": 0.43795590012921515, "grad_norm": 308.0, "learning_rate": 6.234830450952563e-05, "loss": 14.9384, "step": 10507 }, { "epoch": 0.43799758242674336, "grad_norm": 215.0, "learning_rate": 6.234176346825798e-05, "loss": 10.2502, "step": 10508 }, { "epoch": 0.4380392647242716, "grad_norm": 386.0, "learning_rate": 6.233522220206059e-05, "loss": 14.8752, "step": 10509 }, { "epoch": 0.43808094702179984, "grad_norm": 368.0, "learning_rate": 6.232868071105269e-05, "loss": 14.4377, "step": 10510 }, { "epoch": 0.4381226293193281, "grad_norm": 184.0, "learning_rate": 6.232213899535348e-05, "loss": 8.5004, "step": 10511 }, { "epoch": 0.4381643116168563, "grad_norm": 163.0, "learning_rate": 6.231559705508219e-05, "loss": 9.5628, "step": 10512 }, { "epoch": 0.4382059939143846, "grad_norm": 486.0, "learning_rate": 6.230905489035805e-05, "loss": 17.2504, "step": 10513 }, { "epoch": 0.4382476762119128, "grad_norm": 430.0, "learning_rate": 6.23025125013003e-05, "loss": 16.6259, "step": 10514 }, { "epoch": 0.43828935850944106, "grad_norm": 454.0, "learning_rate": 6.229596988802817e-05, "loss": 17.6257, "step": 10515 }, { "epoch": 0.4383310408069693, "grad_norm": 140.0, "learning_rate": 6.228942705066088e-05, "loss": 10.0008, "step": 10516 }, { "epoch": 0.43837272310449754, "grad_norm": 266.0, "learning_rate": 6.22828839893177e-05, "loss": 11.3133, "step": 10517 }, { "epoch": 0.43841440540202575, "grad_norm": 224.0, "learning_rate": 6.227634070411787e-05, "loss": 11.7503, "step": 10518 }, { "epoch": 0.438456087699554, "grad_norm": 400.0, "learning_rate": 6.226979719518065e-05, "loss": 15.6253, "step": 10519 }, { "epoch": 0.43849776999708223, "grad_norm": 239.0, "learning_rate": 6.226325346262529e-05, "loss": 12.0627, "step": 10520 }, { "epoch": 0.4385394522946105, "grad_norm": 412.0, "learning_rate": 6.225670950657102e-05, "loss": 14.6254, "step": 10521 }, { "epoch": 0.4385811345921387, "grad_norm": 1016.0, "learning_rate": 6.225016532713716e-05, "loss": 27.8759, "step": 10522 }, { "epoch": 0.438622816889667, "grad_norm": 221.0, "learning_rate": 6.224362092444293e-05, "loss": 11.2508, "step": 10523 }, { "epoch": 0.4386644991871952, "grad_norm": 228.0, "learning_rate": 6.223707629860763e-05, "loss": 11.2503, "step": 10524 }, { "epoch": 0.43870618148472346, "grad_norm": 249.0, "learning_rate": 6.223053144975053e-05, "loss": 12.2504, "step": 10525 }, { "epoch": 0.43874786378225167, "grad_norm": 217.0, "learning_rate": 6.22239863779909e-05, "loss": 9.4378, "step": 10526 }, { "epoch": 0.43878954607977994, "grad_norm": 63.25, "learning_rate": 6.221744108344802e-05, "loss": 6.2504, "step": 10527 }, { "epoch": 0.43883122837730815, "grad_norm": 282.0, "learning_rate": 6.221089556624122e-05, "loss": 15.1252, "step": 10528 }, { "epoch": 0.4388729106748364, "grad_norm": 496.0, "learning_rate": 6.220434982648975e-05, "loss": 17.5003, "step": 10529 }, { "epoch": 0.4389145929723646, "grad_norm": 444.0, "learning_rate": 6.219780386431293e-05, "loss": 15.9381, "step": 10530 }, { "epoch": 0.4389562752698929, "grad_norm": 210.0, "learning_rate": 6.219125767983004e-05, "loss": 12.3129, "step": 10531 }, { "epoch": 0.4389979575674211, "grad_norm": 402.0, "learning_rate": 6.218471127316042e-05, "loss": 15.6252, "step": 10532 }, { "epoch": 0.43903963986494937, "grad_norm": 664.0, "learning_rate": 6.217816464442333e-05, "loss": 20.1254, "step": 10533 }, { "epoch": 0.4390813221624776, "grad_norm": 155.0, "learning_rate": 6.217161779373812e-05, "loss": 10.5003, "step": 10534 }, { "epoch": 0.43912300446000585, "grad_norm": 352.0, "learning_rate": 6.21650707212241e-05, "loss": 13.6877, "step": 10535 }, { "epoch": 0.43916468675753406, "grad_norm": 146.0, "learning_rate": 6.215852342700056e-05, "loss": 9.2537, "step": 10536 }, { "epoch": 0.43920636905506233, "grad_norm": 336.0, "learning_rate": 6.215197591118689e-05, "loss": 15.438, "step": 10537 }, { "epoch": 0.43924805135259054, "grad_norm": 500.0, "learning_rate": 6.214542817390235e-05, "loss": 16.6261, "step": 10538 }, { "epoch": 0.4392897336501188, "grad_norm": 183.0, "learning_rate": 6.213888021526631e-05, "loss": 11.2502, "step": 10539 }, { "epoch": 0.439331415947647, "grad_norm": 676.0, "learning_rate": 6.21323320353981e-05, "loss": 20.6252, "step": 10540 }, { "epoch": 0.4393730982451753, "grad_norm": 716.0, "learning_rate": 6.212578363441707e-05, "loss": 18.2564, "step": 10541 }, { "epoch": 0.4394147805427035, "grad_norm": 348.0, "learning_rate": 6.211923501244255e-05, "loss": 13.5641, "step": 10542 }, { "epoch": 0.43945646284023177, "grad_norm": 1536.0, "learning_rate": 6.21126861695939e-05, "loss": 30.6284, "step": 10543 }, { "epoch": 0.43949814513776, "grad_norm": 210.0, "learning_rate": 6.210613710599047e-05, "loss": 9.5001, "step": 10544 }, { "epoch": 0.43953982743528824, "grad_norm": 1448.0, "learning_rate": 6.209958782175162e-05, "loss": 28.2503, "step": 10545 }, { "epoch": 0.43958150973281646, "grad_norm": 976.0, "learning_rate": 6.20930383169967e-05, "loss": 30.0001, "step": 10546 }, { "epoch": 0.4396231920303447, "grad_norm": 644.0, "learning_rate": 6.208648859184508e-05, "loss": 20.7502, "step": 10547 }, { "epoch": 0.43966487432787293, "grad_norm": 904.0, "learning_rate": 6.207993864641614e-05, "loss": 27.1267, "step": 10548 }, { "epoch": 0.4397065566254012, "grad_norm": 348.0, "learning_rate": 6.207338848082924e-05, "loss": 15.1877, "step": 10549 }, { "epoch": 0.4397482389229294, "grad_norm": 396.0, "learning_rate": 6.206683809520378e-05, "loss": 16.8759, "step": 10550 }, { "epoch": 0.4397899212204577, "grad_norm": 87.0, "learning_rate": 6.206028748965908e-05, "loss": 8.813, "step": 10551 }, { "epoch": 0.4398316035179859, "grad_norm": 600.0, "learning_rate": 6.205373666431462e-05, "loss": 17.2512, "step": 10552 }, { "epoch": 0.43987328581551416, "grad_norm": 446.0, "learning_rate": 6.204718561928971e-05, "loss": 16.0003, "step": 10553 }, { "epoch": 0.43991496811304237, "grad_norm": 366.0, "learning_rate": 6.204063435470378e-05, "loss": 14.7503, "step": 10554 }, { "epoch": 0.43995665041057064, "grad_norm": 192.0, "learning_rate": 6.203408287067623e-05, "loss": 7.6882, "step": 10555 }, { "epoch": 0.43999833270809885, "grad_norm": 105.0, "learning_rate": 6.202753116732645e-05, "loss": 10.0638, "step": 10556 }, { "epoch": 0.4400400150056271, "grad_norm": 176.0, "learning_rate": 6.202097924477383e-05, "loss": 11.4377, "step": 10557 }, { "epoch": 0.44008169730315533, "grad_norm": 398.0, "learning_rate": 6.201442710313782e-05, "loss": 15.1879, "step": 10558 }, { "epoch": 0.4401233796006836, "grad_norm": 85.5, "learning_rate": 6.20078747425378e-05, "loss": 9.4379, "step": 10559 }, { "epoch": 0.4401650618982118, "grad_norm": 230.0, "learning_rate": 6.200132216309319e-05, "loss": 12.2504, "step": 10560 }, { "epoch": 0.4402067441957401, "grad_norm": 500.0, "learning_rate": 6.199476936492342e-05, "loss": 16.8755, "step": 10561 }, { "epoch": 0.4402484264932683, "grad_norm": 183.0, "learning_rate": 6.198821634814791e-05, "loss": 10.3135, "step": 10562 }, { "epoch": 0.44029010879079655, "grad_norm": 660.0, "learning_rate": 6.19816631128861e-05, "loss": 20.7502, "step": 10563 }, { "epoch": 0.44033179108832476, "grad_norm": 740.0, "learning_rate": 6.197510965925741e-05, "loss": 21.8752, "step": 10564 }, { "epoch": 0.44037347338585303, "grad_norm": 110.5, "learning_rate": 6.196855598738128e-05, "loss": 9.4377, "step": 10565 }, { "epoch": 0.44041515568338124, "grad_norm": 428.0, "learning_rate": 6.196200209737716e-05, "loss": 15.2513, "step": 10566 }, { "epoch": 0.4404568379809095, "grad_norm": 1072.0, "learning_rate": 6.195544798936449e-05, "loss": 25.8783, "step": 10567 }, { "epoch": 0.4404985202784377, "grad_norm": 320.0, "learning_rate": 6.194889366346273e-05, "loss": 14.8127, "step": 10568 }, { "epoch": 0.440540202575966, "grad_norm": 944.0, "learning_rate": 6.19423391197913e-05, "loss": 25.7505, "step": 10569 }, { "epoch": 0.4405818848734942, "grad_norm": 258.0, "learning_rate": 6.193578435846969e-05, "loss": 11.9377, "step": 10570 }, { "epoch": 0.44062356717102247, "grad_norm": 500.0, "learning_rate": 6.192922937961736e-05, "loss": 17.5002, "step": 10571 }, { "epoch": 0.4406652494685507, "grad_norm": 580.0, "learning_rate": 6.192267418335375e-05, "loss": 15.815, "step": 10572 }, { "epoch": 0.44070693176607895, "grad_norm": 460.0, "learning_rate": 6.191611876979837e-05, "loss": 15.5001, "step": 10573 }, { "epoch": 0.4407486140636072, "grad_norm": 318.0, "learning_rate": 6.190956313907065e-05, "loss": 14.8778, "step": 10574 }, { "epoch": 0.4407902963611354, "grad_norm": 292.0, "learning_rate": 6.190300729129008e-05, "loss": 13.1251, "step": 10575 }, { "epoch": 0.4408319786586637, "grad_norm": 426.0, "learning_rate": 6.189645122657616e-05, "loss": 16.501, "step": 10576 }, { "epoch": 0.4408736609561919, "grad_norm": 1576.0, "learning_rate": 6.188989494504836e-05, "loss": 32.255, "step": 10577 }, { "epoch": 0.44091534325372017, "grad_norm": 252.0, "learning_rate": 6.188333844682615e-05, "loss": 11.6877, "step": 10578 }, { "epoch": 0.4409570255512484, "grad_norm": 132.0, "learning_rate": 6.187678173202905e-05, "loss": 9.0006, "step": 10579 }, { "epoch": 0.44099870784877665, "grad_norm": 406.0, "learning_rate": 6.187022480077655e-05, "loss": 13.9378, "step": 10580 }, { "epoch": 0.44104039014630486, "grad_norm": 322.0, "learning_rate": 6.186366765318813e-05, "loss": 13.1877, "step": 10581 }, { "epoch": 0.44108207244383313, "grad_norm": 175.0, "learning_rate": 6.185711028938334e-05, "loss": 10.1255, "step": 10582 }, { "epoch": 0.44112375474136134, "grad_norm": 326.0, "learning_rate": 6.185055270948167e-05, "loss": 14.5005, "step": 10583 }, { "epoch": 0.4411654370388896, "grad_norm": 205.0, "learning_rate": 6.18439949136026e-05, "loss": 11.0021, "step": 10584 }, { "epoch": 0.4412071193364178, "grad_norm": 255.0, "learning_rate": 6.18374369018657e-05, "loss": 13.0636, "step": 10585 }, { "epoch": 0.4412488016339461, "grad_norm": 254.0, "learning_rate": 6.183087867439043e-05, "loss": 12.4378, "step": 10586 }, { "epoch": 0.4412904839314743, "grad_norm": 183.0, "learning_rate": 6.182432023129636e-05, "loss": 9.8137, "step": 10587 }, { "epoch": 0.44133216622900256, "grad_norm": 724.0, "learning_rate": 6.181776157270302e-05, "loss": 20.127, "step": 10588 }, { "epoch": 0.4413738485265308, "grad_norm": 596.0, "learning_rate": 6.18112026987299e-05, "loss": 19.1276, "step": 10589 }, { "epoch": 0.44141553082405904, "grad_norm": 752.0, "learning_rate": 6.180464360949658e-05, "loss": 21.5003, "step": 10590 }, { "epoch": 0.44145721312158726, "grad_norm": 97.5, "learning_rate": 6.179808430512256e-05, "loss": 7.8754, "step": 10591 }, { "epoch": 0.4414988954191155, "grad_norm": 592.0, "learning_rate": 6.179152478572743e-05, "loss": 16.2548, "step": 10592 }, { "epoch": 0.44154057771664373, "grad_norm": 1896.0, "learning_rate": 6.17849650514307e-05, "loss": 37.2503, "step": 10593 }, { "epoch": 0.441582260014172, "grad_norm": 276.0, "learning_rate": 6.177840510235195e-05, "loss": 12.9378, "step": 10594 }, { "epoch": 0.4416239423117002, "grad_norm": 254.0, "learning_rate": 6.17718449386107e-05, "loss": 13.5627, "step": 10595 }, { "epoch": 0.4416656246092285, "grad_norm": 366.0, "learning_rate": 6.176528456032656e-05, "loss": 14.6878, "step": 10596 }, { "epoch": 0.4417073069067567, "grad_norm": 350.0, "learning_rate": 6.175872396761904e-05, "loss": 15.4379, "step": 10597 }, { "epoch": 0.44174898920428496, "grad_norm": 472.0, "learning_rate": 6.175216316060773e-05, "loss": 16.2504, "step": 10598 }, { "epoch": 0.44179067150181317, "grad_norm": 126.5, "learning_rate": 6.174560213941222e-05, "loss": 9.0007, "step": 10599 }, { "epoch": 0.44183235379934144, "grad_norm": 676.0, "learning_rate": 6.173904090415205e-05, "loss": 19.5011, "step": 10600 }, { "epoch": 0.44187403609686965, "grad_norm": 142.0, "learning_rate": 6.173247945494684e-05, "loss": 11.6888, "step": 10601 }, { "epoch": 0.4419157183943979, "grad_norm": 1544.0, "learning_rate": 6.172591779191614e-05, "loss": 35.2537, "step": 10602 }, { "epoch": 0.44195740069192613, "grad_norm": 362.0, "learning_rate": 6.171935591517954e-05, "loss": 14.7503, "step": 10603 }, { "epoch": 0.4419990829894544, "grad_norm": 424.0, "learning_rate": 6.171279382485665e-05, "loss": 15.7503, "step": 10604 }, { "epoch": 0.4420407652869826, "grad_norm": 604.0, "learning_rate": 6.170623152106704e-05, "loss": 16.8754, "step": 10605 }, { "epoch": 0.4420824475845109, "grad_norm": 432.0, "learning_rate": 6.169966900393033e-05, "loss": 16.2502, "step": 10606 }, { "epoch": 0.4421241298820391, "grad_norm": 235.0, "learning_rate": 6.169310627356611e-05, "loss": 10.7504, "step": 10607 }, { "epoch": 0.44216581217956735, "grad_norm": 126.0, "learning_rate": 6.168654333009399e-05, "loss": 9.6256, "step": 10608 }, { "epoch": 0.44220749447709556, "grad_norm": 192.0, "learning_rate": 6.167998017363359e-05, "loss": 11.4381, "step": 10609 }, { "epoch": 0.44224917677462383, "grad_norm": 636.0, "learning_rate": 6.167341680430451e-05, "loss": 20.3781, "step": 10610 }, { "epoch": 0.44229085907215204, "grad_norm": 225.0, "learning_rate": 6.166685322222637e-05, "loss": 11.8127, "step": 10611 }, { "epoch": 0.4423325413696803, "grad_norm": 382.0, "learning_rate": 6.166028942751879e-05, "loss": 14.5003, "step": 10612 }, { "epoch": 0.4423742236672085, "grad_norm": 402.0, "learning_rate": 6.165372542030141e-05, "loss": 14.6252, "step": 10613 }, { "epoch": 0.4424159059647368, "grad_norm": 190.0, "learning_rate": 6.164716120069384e-05, "loss": 10.8758, "step": 10614 }, { "epoch": 0.442457588262265, "grad_norm": 508.0, "learning_rate": 6.164059676881573e-05, "loss": 17.0028, "step": 10615 }, { "epoch": 0.44249927055979327, "grad_norm": 147.0, "learning_rate": 6.163403212478672e-05, "loss": 10.3129, "step": 10616 }, { "epoch": 0.4425409528573215, "grad_norm": 454.0, "learning_rate": 6.16274672687264e-05, "loss": 14.8127, "step": 10617 }, { "epoch": 0.44258263515484975, "grad_norm": 336.0, "learning_rate": 6.162090220075449e-05, "loss": 14.2503, "step": 10618 }, { "epoch": 0.44262431745237796, "grad_norm": 148.0, "learning_rate": 6.16143369209906e-05, "loss": 10.7507, "step": 10619 }, { "epoch": 0.4426659997499062, "grad_norm": 426.0, "learning_rate": 6.16077714295544e-05, "loss": 17.6253, "step": 10620 }, { "epoch": 0.44270768204743444, "grad_norm": 227.0, "learning_rate": 6.160120572656553e-05, "loss": 12.5002, "step": 10621 }, { "epoch": 0.4427493643449627, "grad_norm": 592.0, "learning_rate": 6.159463981214363e-05, "loss": 17.6254, "step": 10622 }, { "epoch": 0.4427910466424909, "grad_norm": 300.0, "learning_rate": 6.158807368640842e-05, "loss": 10.1271, "step": 10623 }, { "epoch": 0.4428327289400192, "grad_norm": 154.0, "learning_rate": 6.158150734947955e-05, "loss": 10.6253, "step": 10624 }, { "epoch": 0.4428744112375474, "grad_norm": 596.0, "learning_rate": 6.157494080147664e-05, "loss": 16.5042, "step": 10625 }, { "epoch": 0.44291609353507566, "grad_norm": 892.0, "learning_rate": 6.156837404251944e-05, "loss": 22.7546, "step": 10626 }, { "epoch": 0.4429577758326039, "grad_norm": 424.0, "learning_rate": 6.156180707272758e-05, "loss": 16.6258, "step": 10627 }, { "epoch": 0.44299945813013214, "grad_norm": 230.0, "learning_rate": 6.155523989222076e-05, "loss": 11.1254, "step": 10628 }, { "epoch": 0.44304114042766035, "grad_norm": 414.0, "learning_rate": 6.154867250111866e-05, "loss": 15.9378, "step": 10629 }, { "epoch": 0.4430828227251886, "grad_norm": 322.0, "learning_rate": 6.154210489954099e-05, "loss": 12.9381, "step": 10630 }, { "epoch": 0.44312450502271683, "grad_norm": 424.0, "learning_rate": 6.153553708760743e-05, "loss": 15.8126, "step": 10631 }, { "epoch": 0.4431661873202451, "grad_norm": 199.0, "learning_rate": 6.152896906543769e-05, "loss": 11.1253, "step": 10632 }, { "epoch": 0.4432078696177733, "grad_norm": 216.0, "learning_rate": 6.152240083315146e-05, "loss": 12.6253, "step": 10633 }, { "epoch": 0.4432495519153016, "grad_norm": 360.0, "learning_rate": 6.151583239086844e-05, "loss": 12.3754, "step": 10634 }, { "epoch": 0.4432912342128298, "grad_norm": 386.0, "learning_rate": 6.150926373870837e-05, "loss": 13.6877, "step": 10635 }, { "epoch": 0.44333291651035805, "grad_norm": 94.5, "learning_rate": 6.150269487679095e-05, "loss": 9.0638, "step": 10636 }, { "epoch": 0.44337459880788627, "grad_norm": 382.0, "learning_rate": 6.149612580523588e-05, "loss": 14.9379, "step": 10637 }, { "epoch": 0.44341628110541453, "grad_norm": 96.5, "learning_rate": 6.14895565241629e-05, "loss": 9.2507, "step": 10638 }, { "epoch": 0.44345796340294275, "grad_norm": 796.0, "learning_rate": 6.148298703369174e-05, "loss": 23.0002, "step": 10639 }, { "epoch": 0.443499645700471, "grad_norm": 149.0, "learning_rate": 6.147641733394212e-05, "loss": 9.9377, "step": 10640 }, { "epoch": 0.4435413279979992, "grad_norm": 450.0, "learning_rate": 6.146984742503377e-05, "loss": 17.0022, "step": 10641 }, { "epoch": 0.4435830102955275, "grad_norm": 346.0, "learning_rate": 6.146327730708642e-05, "loss": 13.7502, "step": 10642 }, { "epoch": 0.4436246925930557, "grad_norm": 108.0, "learning_rate": 6.145670698021984e-05, "loss": 8.6876, "step": 10643 }, { "epoch": 0.44366637489058397, "grad_norm": 262.0, "learning_rate": 6.145013644455375e-05, "loss": 13.1252, "step": 10644 }, { "epoch": 0.4437080571881122, "grad_norm": 506.0, "learning_rate": 6.144356570020791e-05, "loss": 16.1253, "step": 10645 }, { "epoch": 0.44374973948564045, "grad_norm": 231.0, "learning_rate": 6.143699474730208e-05, "loss": 11.5628, "step": 10646 }, { "epoch": 0.4437914217831687, "grad_norm": 180.0, "learning_rate": 6.1430423585956e-05, "loss": 10.6251, "step": 10647 }, { "epoch": 0.4438331040806969, "grad_norm": 225.0, "learning_rate": 6.142385221628944e-05, "loss": 12.8753, "step": 10648 }, { "epoch": 0.4438747863782252, "grad_norm": 208.0, "learning_rate": 6.141728063842215e-05, "loss": 12.3755, "step": 10649 }, { "epoch": 0.4439164686757534, "grad_norm": 352.0, "learning_rate": 6.141070885247391e-05, "loss": 14.1252, "step": 10650 }, { "epoch": 0.4439581509732817, "grad_norm": 624.0, "learning_rate": 6.140413685856449e-05, "loss": 19.5002, "step": 10651 }, { "epoch": 0.4439998332708099, "grad_norm": 288.0, "learning_rate": 6.139756465681365e-05, "loss": 13.1878, "step": 10652 }, { "epoch": 0.44404151556833815, "grad_norm": 468.0, "learning_rate": 6.13909922473412e-05, "loss": 16.3755, "step": 10653 }, { "epoch": 0.44408319786586636, "grad_norm": 1312.0, "learning_rate": 6.138441963026689e-05, "loss": 31.3765, "step": 10654 }, { "epoch": 0.44412488016339463, "grad_norm": 338.0, "learning_rate": 6.137784680571053e-05, "loss": 13.4379, "step": 10655 }, { "epoch": 0.44416656246092284, "grad_norm": 376.0, "learning_rate": 6.13712737737919e-05, "loss": 13.5629, "step": 10656 }, { "epoch": 0.4442082447584511, "grad_norm": 1640.0, "learning_rate": 6.13647005346308e-05, "loss": 34.501, "step": 10657 }, { "epoch": 0.4442499270559793, "grad_norm": 1040.0, "learning_rate": 6.135812708834701e-05, "loss": 25.8766, "step": 10658 }, { "epoch": 0.4442916093535076, "grad_norm": 1768.0, "learning_rate": 6.135155343506036e-05, "loss": 28.0054, "step": 10659 }, { "epoch": 0.4443332916510358, "grad_norm": 45.75, "learning_rate": 6.134497957489065e-05, "loss": 7.0629, "step": 10660 }, { "epoch": 0.44437497394856407, "grad_norm": 338.0, "learning_rate": 6.133840550795766e-05, "loss": 13.6266, "step": 10661 }, { "epoch": 0.4444166562460923, "grad_norm": 1012.0, "learning_rate": 6.133183123438123e-05, "loss": 23.2524, "step": 10662 }, { "epoch": 0.44445833854362055, "grad_norm": 120.5, "learning_rate": 6.132525675428118e-05, "loss": 9.7504, "step": 10663 }, { "epoch": 0.44450002084114876, "grad_norm": 516.0, "learning_rate": 6.131868206777731e-05, "loss": 17.1252, "step": 10664 }, { "epoch": 0.444541703138677, "grad_norm": 772.0, "learning_rate": 6.131210717498945e-05, "loss": 20.626, "step": 10665 }, { "epoch": 0.44458338543620524, "grad_norm": 276.0, "learning_rate": 6.130553207603744e-05, "loss": 12.9384, "step": 10666 }, { "epoch": 0.4446250677337335, "grad_norm": 192.0, "learning_rate": 6.129895677104109e-05, "loss": 11.6252, "step": 10667 }, { "epoch": 0.4446667500312617, "grad_norm": 170.0, "learning_rate": 6.129238126012027e-05, "loss": 10.1252, "step": 10668 }, { "epoch": 0.44470843232879, "grad_norm": 157.0, "learning_rate": 6.128580554339479e-05, "loss": 10.6256, "step": 10669 }, { "epoch": 0.4447501146263182, "grad_norm": 182.0, "learning_rate": 6.12792296209845e-05, "loss": 10.2504, "step": 10670 }, { "epoch": 0.44479179692384646, "grad_norm": 436.0, "learning_rate": 6.127265349300926e-05, "loss": 16.5014, "step": 10671 }, { "epoch": 0.4448334792213747, "grad_norm": 170.0, "learning_rate": 6.126607715958889e-05, "loss": 11.8753, "step": 10672 }, { "epoch": 0.44487516151890294, "grad_norm": 502.0, "learning_rate": 6.125950062084327e-05, "loss": 16.5002, "step": 10673 }, { "epoch": 0.44491684381643115, "grad_norm": 328.0, "learning_rate": 6.125292387689228e-05, "loss": 13.4376, "step": 10674 }, { "epoch": 0.4449585261139594, "grad_norm": 246.0, "learning_rate": 6.124634692785572e-05, "loss": 9.0007, "step": 10675 }, { "epoch": 0.44500020841148763, "grad_norm": 270.0, "learning_rate": 6.12397697738535e-05, "loss": 13.0002, "step": 10676 }, { "epoch": 0.4450418907090159, "grad_norm": 127.0, "learning_rate": 6.123319241500548e-05, "loss": 9.4378, "step": 10677 }, { "epoch": 0.4450835730065441, "grad_norm": 292.0, "learning_rate": 6.122661485143153e-05, "loss": 14.0003, "step": 10678 }, { "epoch": 0.4451252553040724, "grad_norm": 342.0, "learning_rate": 6.122003708325152e-05, "loss": 11.6879, "step": 10679 }, { "epoch": 0.4451669376016006, "grad_norm": 274.0, "learning_rate": 6.121345911058534e-05, "loss": 13.3132, "step": 10680 }, { "epoch": 0.44520861989912885, "grad_norm": 96.5, "learning_rate": 6.120688093355288e-05, "loss": 7.5627, "step": 10681 }, { "epoch": 0.44525030219665707, "grad_norm": 151.0, "learning_rate": 6.120030255227402e-05, "loss": 10.8754, "step": 10682 }, { "epoch": 0.44529198449418533, "grad_norm": 576.0, "learning_rate": 6.119372396686864e-05, "loss": 18.6253, "step": 10683 }, { "epoch": 0.44533366679171354, "grad_norm": 370.0, "learning_rate": 6.118714517745667e-05, "loss": 14.8752, "step": 10684 }, { "epoch": 0.4453753490892418, "grad_norm": 278.0, "learning_rate": 6.118056618415795e-05, "loss": 12.5004, "step": 10685 }, { "epoch": 0.44541703138677, "grad_norm": 684.0, "learning_rate": 6.117398698709244e-05, "loss": 20.0017, "step": 10686 }, { "epoch": 0.4454587136842983, "grad_norm": 876.0, "learning_rate": 6.116740758638003e-05, "loss": 26.6256, "step": 10687 }, { "epoch": 0.4455003959818265, "grad_norm": 239.0, "learning_rate": 6.116082798214062e-05, "loss": 12.8753, "step": 10688 }, { "epoch": 0.44554207827935477, "grad_norm": 520.0, "learning_rate": 6.115424817449413e-05, "loss": 16.8754, "step": 10689 }, { "epoch": 0.445583760576883, "grad_norm": 386.0, "learning_rate": 6.11476681635605e-05, "loss": 15.2505, "step": 10690 }, { "epoch": 0.44562544287441125, "grad_norm": 196.0, "learning_rate": 6.114108794945958e-05, "loss": 10.938, "step": 10691 }, { "epoch": 0.44566712517193946, "grad_norm": 121.5, "learning_rate": 6.113450753231137e-05, "loss": 8.438, "step": 10692 }, { "epoch": 0.4457088074694677, "grad_norm": 410.0, "learning_rate": 6.112792691223577e-05, "loss": 14.1905, "step": 10693 }, { "epoch": 0.44575048976699594, "grad_norm": 280.0, "learning_rate": 6.112134608935272e-05, "loss": 12.2502, "step": 10694 }, { "epoch": 0.4457921720645242, "grad_norm": 326.0, "learning_rate": 6.111476506378214e-05, "loss": 12.5008, "step": 10695 }, { "epoch": 0.4458338543620524, "grad_norm": 318.0, "learning_rate": 6.110818383564399e-05, "loss": 13.4397, "step": 10696 }, { "epoch": 0.4458755366595807, "grad_norm": 568.0, "learning_rate": 6.110160240505819e-05, "loss": 18.7503, "step": 10697 }, { "epoch": 0.4459172189571089, "grad_norm": 202.0, "learning_rate": 6.10950207721447e-05, "loss": 12.3766, "step": 10698 }, { "epoch": 0.44595890125463716, "grad_norm": 120.5, "learning_rate": 6.10884389370235e-05, "loss": 10.6879, "step": 10699 }, { "epoch": 0.4460005835521654, "grad_norm": 147.0, "learning_rate": 6.108185689981449e-05, "loss": 9.0628, "step": 10700 }, { "epoch": 0.44604226584969364, "grad_norm": 141.0, "learning_rate": 6.107527466063767e-05, "loss": 10.0004, "step": 10701 }, { "epoch": 0.44608394814722185, "grad_norm": 199.0, "learning_rate": 6.106869221961296e-05, "loss": 8.5002, "step": 10702 }, { "epoch": 0.4461256304447501, "grad_norm": 270.0, "learning_rate": 6.106210957686037e-05, "loss": 13.1877, "step": 10703 }, { "epoch": 0.44616731274227833, "grad_norm": 270.0, "learning_rate": 6.105552673249985e-05, "loss": 12.6877, "step": 10704 }, { "epoch": 0.4462089950398066, "grad_norm": 256.0, "learning_rate": 6.104894368665137e-05, "loss": 10.5001, "step": 10705 }, { "epoch": 0.4462506773373348, "grad_norm": 292.0, "learning_rate": 6.104236043943491e-05, "loss": 12.5629, "step": 10706 }, { "epoch": 0.4462923596348631, "grad_norm": 148.0, "learning_rate": 6.103577699097045e-05, "loss": 7.9064, "step": 10707 }, { "epoch": 0.4463340419323913, "grad_norm": 364.0, "learning_rate": 6.102919334137798e-05, "loss": 14.7503, "step": 10708 }, { "epoch": 0.44637572422991956, "grad_norm": 900.0, "learning_rate": 6.102260949077748e-05, "loss": 25.502, "step": 10709 }, { "epoch": 0.44641740652744777, "grad_norm": 212.0, "learning_rate": 6.101602543928895e-05, "loss": 11.8127, "step": 10710 }, { "epoch": 0.44645908882497604, "grad_norm": 708.0, "learning_rate": 6.100944118703237e-05, "loss": 21.0003, "step": 10711 }, { "epoch": 0.44650077112250425, "grad_norm": 298.0, "learning_rate": 6.1002856734127756e-05, "loss": 14.0006, "step": 10712 }, { "epoch": 0.4465424534200325, "grad_norm": 340.0, "learning_rate": 6.0996272080695095e-05, "loss": 14.2502, "step": 10713 }, { "epoch": 0.4465841357175607, "grad_norm": 348.0, "learning_rate": 6.09896872268544e-05, "loss": 15.2503, "step": 10714 }, { "epoch": 0.446625818015089, "grad_norm": 784.0, "learning_rate": 6.098310217272568e-05, "loss": 21.1257, "step": 10715 }, { "epoch": 0.4466675003126172, "grad_norm": 792.0, "learning_rate": 6.097651691842894e-05, "loss": 23.6251, "step": 10716 }, { "epoch": 0.44670918261014547, "grad_norm": 474.0, "learning_rate": 6.096993146408421e-05, "loss": 15.5631, "step": 10717 }, { "epoch": 0.4467508649076737, "grad_norm": 219.0, "learning_rate": 6.09633458098115e-05, "loss": 10.2503, "step": 10718 }, { "epoch": 0.44679254720520195, "grad_norm": 165.0, "learning_rate": 6.095675995573085e-05, "loss": 5.9379, "step": 10719 }, { "epoch": 0.4468342295027302, "grad_norm": 430.0, "learning_rate": 6.095017390196227e-05, "loss": 16.7502, "step": 10720 }, { "epoch": 0.44687591180025843, "grad_norm": 314.0, "learning_rate": 6.094358764862581e-05, "loss": 13.2501, "step": 10721 }, { "epoch": 0.4469175940977867, "grad_norm": 350.0, "learning_rate": 6.0937001195841484e-05, "loss": 13.9376, "step": 10722 }, { "epoch": 0.4469592763953149, "grad_norm": 290.0, "learning_rate": 6.093041454372934e-05, "loss": 13.0627, "step": 10723 }, { "epoch": 0.4470009586928432, "grad_norm": 382.0, "learning_rate": 6.092382769240943e-05, "loss": 15.063, "step": 10724 }, { "epoch": 0.4470426409903714, "grad_norm": 316.0, "learning_rate": 6.0917240642001774e-05, "loss": 13.3752, "step": 10725 }, { "epoch": 0.44708432328789965, "grad_norm": 624.0, "learning_rate": 6.091065339262645e-05, "loss": 17.7502, "step": 10726 }, { "epoch": 0.44712600558542787, "grad_norm": 1720.0, "learning_rate": 6.09040659444035e-05, "loss": 34.5049, "step": 10727 }, { "epoch": 0.44716768788295613, "grad_norm": 374.0, "learning_rate": 6.0897478297452984e-05, "loss": 13.8128, "step": 10728 }, { "epoch": 0.44720937018048434, "grad_norm": 506.0, "learning_rate": 6.0890890451894946e-05, "loss": 17.1256, "step": 10729 }, { "epoch": 0.4472510524780126, "grad_norm": 482.0, "learning_rate": 6.0884302407849455e-05, "loss": 16.126, "step": 10730 }, { "epoch": 0.4472927347755408, "grad_norm": 584.0, "learning_rate": 6.087771416543661e-05, "loss": 20.0026, "step": 10731 }, { "epoch": 0.4473344170730691, "grad_norm": 100.5, "learning_rate": 6.087112572477644e-05, "loss": 8.9381, "step": 10732 }, { "epoch": 0.4473760993705973, "grad_norm": 354.0, "learning_rate": 6.086453708598905e-05, "loss": 14.2505, "step": 10733 }, { "epoch": 0.44741778166812557, "grad_norm": 438.0, "learning_rate": 6.085794824919451e-05, "loss": 15.6877, "step": 10734 }, { "epoch": 0.4474594639656538, "grad_norm": 422.0, "learning_rate": 6.085135921451288e-05, "loss": 15.4377, "step": 10735 }, { "epoch": 0.44750114626318205, "grad_norm": 276.0, "learning_rate": 6.084476998206429e-05, "loss": 11.6879, "step": 10736 }, { "epoch": 0.44754282856071026, "grad_norm": 298.0, "learning_rate": 6.083818055196879e-05, "loss": 12.2507, "step": 10737 }, { "epoch": 0.4475845108582385, "grad_norm": 209.0, "learning_rate": 6.083159092434649e-05, "loss": 11.5014, "step": 10738 }, { "epoch": 0.44762619315576674, "grad_norm": 720.0, "learning_rate": 6.0825001099317483e-05, "loss": 19.5004, "step": 10739 }, { "epoch": 0.447667875453295, "grad_norm": 388.0, "learning_rate": 6.081841107700187e-05, "loss": 14.6883, "step": 10740 }, { "epoch": 0.4477095577508232, "grad_norm": 255.0, "learning_rate": 6.081182085751975e-05, "loss": 12.1253, "step": 10741 }, { "epoch": 0.4477512400483515, "grad_norm": 660.0, "learning_rate": 6.0805230440991245e-05, "loss": 21.6252, "step": 10742 }, { "epoch": 0.4477929223458797, "grad_norm": 406.0, "learning_rate": 6.079863982753644e-05, "loss": 15.8754, "step": 10743 }, { "epoch": 0.44783460464340796, "grad_norm": 318.0, "learning_rate": 6.079204901727548e-05, "loss": 13.1879, "step": 10744 }, { "epoch": 0.4478762869409362, "grad_norm": 350.0, "learning_rate": 6.0785458010328463e-05, "loss": 13.9377, "step": 10745 }, { "epoch": 0.44791796923846444, "grad_norm": 342.0, "learning_rate": 6.077886680681553e-05, "loss": 13.5649, "step": 10746 }, { "epoch": 0.44795965153599265, "grad_norm": 316.0, "learning_rate": 6.077227540685677e-05, "loss": 13.8128, "step": 10747 }, { "epoch": 0.4480013338335209, "grad_norm": 398.0, "learning_rate": 6.0765683810572346e-05, "loss": 15.8754, "step": 10748 }, { "epoch": 0.44804301613104913, "grad_norm": 1376.0, "learning_rate": 6.075909201808239e-05, "loss": 26.0052, "step": 10749 }, { "epoch": 0.4480846984285774, "grad_norm": 376.0, "learning_rate": 6.075250002950701e-05, "loss": 15.7518, "step": 10750 }, { "epoch": 0.4481263807261056, "grad_norm": 194.0, "learning_rate": 6.0745907844966366e-05, "loss": 10.1252, "step": 10751 }, { "epoch": 0.4481680630236339, "grad_norm": 884.0, "learning_rate": 6.07393154645806e-05, "loss": 22.3752, "step": 10752 }, { "epoch": 0.4482097453211621, "grad_norm": 368.0, "learning_rate": 6.073272288846986e-05, "loss": 14.0629, "step": 10753 }, { "epoch": 0.44825142761869036, "grad_norm": 124.0, "learning_rate": 6.0726130116754286e-05, "loss": 9.9378, "step": 10754 }, { "epoch": 0.44829310991621857, "grad_norm": 146.0, "learning_rate": 6.071953714955404e-05, "loss": 9.9377, "step": 10755 }, { "epoch": 0.44833479221374684, "grad_norm": 346.0, "learning_rate": 6.071294398698928e-05, "loss": 15.438, "step": 10756 }, { "epoch": 0.44837647451127505, "grad_norm": 286.0, "learning_rate": 6.070635062918016e-05, "loss": 12.5628, "step": 10757 }, { "epoch": 0.4484181568088033, "grad_norm": 856.0, "learning_rate": 6.069975707624686e-05, "loss": 22.88, "step": 10758 }, { "epoch": 0.4484598391063315, "grad_norm": 276.0, "learning_rate": 6.069316332830952e-05, "loss": 10.8752, "step": 10759 }, { "epoch": 0.4485015214038598, "grad_norm": 688.0, "learning_rate": 6.0686569385488345e-05, "loss": 21.5006, "step": 10760 }, { "epoch": 0.448543203701388, "grad_norm": 680.0, "learning_rate": 6.0679975247903484e-05, "loss": 20.2505, "step": 10761 }, { "epoch": 0.44858488599891627, "grad_norm": 556.0, "learning_rate": 6.0673380915675135e-05, "loss": 17.5003, "step": 10762 }, { "epoch": 0.4486265682964445, "grad_norm": 179.0, "learning_rate": 6.066678638892347e-05, "loss": 9.9378, "step": 10763 }, { "epoch": 0.44866825059397275, "grad_norm": 460.0, "learning_rate": 6.066019166776867e-05, "loss": 13.6888, "step": 10764 }, { "epoch": 0.44870993289150096, "grad_norm": 398.0, "learning_rate": 6.065359675233093e-05, "loss": 15.0631, "step": 10765 }, { "epoch": 0.44875161518902923, "grad_norm": 378.0, "learning_rate": 6.064700164273045e-05, "loss": 14.1254, "step": 10766 }, { "epoch": 0.44879329748655744, "grad_norm": 396.0, "learning_rate": 6.064040633908742e-05, "loss": 14.5002, "step": 10767 }, { "epoch": 0.4488349797840857, "grad_norm": 207.0, "learning_rate": 6.063381084152203e-05, "loss": 11.6252, "step": 10768 }, { "epoch": 0.4488766620816139, "grad_norm": 141.0, "learning_rate": 6.06272151501545e-05, "loss": 9.8754, "step": 10769 }, { "epoch": 0.4489183443791422, "grad_norm": 262.0, "learning_rate": 6.062061926510503e-05, "loss": 12.6262, "step": 10770 }, { "epoch": 0.4489600266766704, "grad_norm": 880.0, "learning_rate": 6.061402318649383e-05, "loss": 24.2503, "step": 10771 }, { "epoch": 0.44900170897419867, "grad_norm": 111.5, "learning_rate": 6.0607426914441126e-05, "loss": 9.5636, "step": 10772 }, { "epoch": 0.4490433912717269, "grad_norm": 107.5, "learning_rate": 6.0600830449067114e-05, "loss": 9.4379, "step": 10773 }, { "epoch": 0.44908507356925514, "grad_norm": 318.0, "learning_rate": 6.059423379049203e-05, "loss": 14.8755, "step": 10774 }, { "epoch": 0.44912675586678336, "grad_norm": 336.0, "learning_rate": 6.058763693883609e-05, "loss": 12.6876, "step": 10775 }, { "epoch": 0.4491684381643116, "grad_norm": 166.0, "learning_rate": 6.058103989421953e-05, "loss": 9.8126, "step": 10776 }, { "epoch": 0.44921012046183983, "grad_norm": 66.5, "learning_rate": 6.0574442656762576e-05, "loss": 9.3756, "step": 10777 }, { "epoch": 0.4492518027593681, "grad_norm": 404.0, "learning_rate": 6.056784522658547e-05, "loss": 15.0008, "step": 10778 }, { "epoch": 0.4492934850568963, "grad_norm": 376.0, "learning_rate": 6.056124760380845e-05, "loss": 15.2509, "step": 10779 }, { "epoch": 0.4493351673544246, "grad_norm": 290.0, "learning_rate": 6.0554649788551745e-05, "loss": 13.5019, "step": 10780 }, { "epoch": 0.4493768496519528, "grad_norm": 516.0, "learning_rate": 6.054805178093561e-05, "loss": 18.0002, "step": 10781 }, { "epoch": 0.44941853194948106, "grad_norm": 434.0, "learning_rate": 6.05414535810803e-05, "loss": 13.6258, "step": 10782 }, { "epoch": 0.44946021424700927, "grad_norm": 332.0, "learning_rate": 6.053485518910607e-05, "loss": 14.3127, "step": 10783 }, { "epoch": 0.44950189654453754, "grad_norm": 195.0, "learning_rate": 6.052825660513316e-05, "loss": 10.2504, "step": 10784 }, { "epoch": 0.44954357884206575, "grad_norm": 896.0, "learning_rate": 6.052165782928184e-05, "loss": 25.8753, "step": 10785 }, { "epoch": 0.449585261139594, "grad_norm": 440.0, "learning_rate": 6.051505886167237e-05, "loss": 16.7503, "step": 10786 }, { "epoch": 0.44962694343712223, "grad_norm": 246.0, "learning_rate": 6.050845970242502e-05, "loss": 10.9378, "step": 10787 }, { "epoch": 0.4496686257346505, "grad_norm": 498.0, "learning_rate": 6.050186035166007e-05, "loss": 16.6253, "step": 10788 }, { "epoch": 0.4497103080321787, "grad_norm": 234.0, "learning_rate": 6.049526080949777e-05, "loss": 11.0638, "step": 10789 }, { "epoch": 0.449751990329707, "grad_norm": 147.0, "learning_rate": 6.048866107605842e-05, "loss": 9.8757, "step": 10790 }, { "epoch": 0.4497936726272352, "grad_norm": 210.0, "learning_rate": 6.048206115146228e-05, "loss": 10.5636, "step": 10791 }, { "epoch": 0.44983535492476345, "grad_norm": 436.0, "learning_rate": 6.0475461035829637e-05, "loss": 15.0005, "step": 10792 }, { "epoch": 0.4498770372222917, "grad_norm": 540.0, "learning_rate": 6.0468860729280796e-05, "loss": 16.8756, "step": 10793 }, { "epoch": 0.44991871951981993, "grad_norm": 1624.0, "learning_rate": 6.046226023193604e-05, "loss": 37.2503, "step": 10794 }, { "epoch": 0.4499604018173482, "grad_norm": 81.5, "learning_rate": 6.045565954391567e-05, "loss": 9.1884, "step": 10795 }, { "epoch": 0.4500020841148764, "grad_norm": 219.0, "learning_rate": 6.0449058665339964e-05, "loss": 11.0628, "step": 10796 }, { "epoch": 0.4500437664124047, "grad_norm": 85.5, "learning_rate": 6.044245759632925e-05, "loss": 9.8753, "step": 10797 }, { "epoch": 0.4500854487099329, "grad_norm": 192.0, "learning_rate": 6.0435856337003816e-05, "loss": 10.9378, "step": 10798 }, { "epoch": 0.45012713100746116, "grad_norm": 338.0, "learning_rate": 6.042925488748396e-05, "loss": 13.8131, "step": 10799 }, { "epoch": 0.45016881330498937, "grad_norm": 252.0, "learning_rate": 6.0422653247890024e-05, "loss": 11.7502, "step": 10800 }, { "epoch": 0.45021049560251764, "grad_norm": 310.0, "learning_rate": 6.04160514183423e-05, "loss": 13.3752, "step": 10801 }, { "epoch": 0.45025217790004585, "grad_norm": 350.0, "learning_rate": 6.0409449398961116e-05, "loss": 13.2502, "step": 10802 }, { "epoch": 0.4502938601975741, "grad_norm": 147.0, "learning_rate": 6.04028471898668e-05, "loss": 8.8127, "step": 10803 }, { "epoch": 0.4503355424951023, "grad_norm": 165.0, "learning_rate": 6.039624479117966e-05, "loss": 9.5626, "step": 10804 }, { "epoch": 0.4503772247926306, "grad_norm": 255.0, "learning_rate": 6.038964220302004e-05, "loss": 12.5637, "step": 10805 }, { "epoch": 0.4504189070901588, "grad_norm": 448.0, "learning_rate": 6.0383039425508256e-05, "loss": 14.0004, "step": 10806 }, { "epoch": 0.45046058938768707, "grad_norm": 242.0, "learning_rate": 6.037643645876467e-05, "loss": 12.0631, "step": 10807 }, { "epoch": 0.4505022716852153, "grad_norm": 1208.0, "learning_rate": 6.03698333029096e-05, "loss": 29.1253, "step": 10808 }, { "epoch": 0.45054395398274355, "grad_norm": 170.0, "learning_rate": 6.0363229958063406e-05, "loss": 11.1252, "step": 10809 }, { "epoch": 0.45058563628027176, "grad_norm": 144.0, "learning_rate": 6.035662642434643e-05, "loss": 10.4379, "step": 10810 }, { "epoch": 0.45062731857780003, "grad_norm": 796.0, "learning_rate": 6.035002270187901e-05, "loss": 20.0002, "step": 10811 }, { "epoch": 0.45066900087532824, "grad_norm": 117.5, "learning_rate": 6.0343418790781515e-05, "loss": 9.8751, "step": 10812 }, { "epoch": 0.4507106831728565, "grad_norm": 354.0, "learning_rate": 6.0336814691174284e-05, "loss": 14.0002, "step": 10813 }, { "epoch": 0.4507523654703847, "grad_norm": 488.0, "learning_rate": 6.033021040317769e-05, "loss": 16.2511, "step": 10814 }, { "epoch": 0.450794047767913, "grad_norm": 296.0, "learning_rate": 6.03236059269121e-05, "loss": 13.1877, "step": 10815 }, { "epoch": 0.4508357300654412, "grad_norm": 520.0, "learning_rate": 6.031700126249788e-05, "loss": 15.6253, "step": 10816 }, { "epoch": 0.45087741236296947, "grad_norm": 460.0, "learning_rate": 6.031039641005538e-05, "loss": 16.6253, "step": 10817 }, { "epoch": 0.4509190946604977, "grad_norm": 250.0, "learning_rate": 6.0303791369704984e-05, "loss": 11.6877, "step": 10818 }, { "epoch": 0.45096077695802594, "grad_norm": 180.0, "learning_rate": 6.0297186141567094e-05, "loss": 10.0012, "step": 10819 }, { "epoch": 0.45100245925555416, "grad_norm": 572.0, "learning_rate": 6.029058072576207e-05, "loss": 18.7503, "step": 10820 }, { "epoch": 0.4510441415530824, "grad_norm": 276.0, "learning_rate": 6.0283975122410294e-05, "loss": 12.5627, "step": 10821 }, { "epoch": 0.45108582385061063, "grad_norm": 306.0, "learning_rate": 6.0277369331632164e-05, "loss": 14.0005, "step": 10822 }, { "epoch": 0.4511275061481389, "grad_norm": 95.5, "learning_rate": 6.027076335354807e-05, "loss": 9.1254, "step": 10823 }, { "epoch": 0.4511691884456671, "grad_norm": 900.0, "learning_rate": 6.026415718827839e-05, "loss": 23.5004, "step": 10824 }, { "epoch": 0.4512108707431954, "grad_norm": 135.0, "learning_rate": 6.0257550835943545e-05, "loss": 10.7508, "step": 10825 }, { "epoch": 0.4512525530407236, "grad_norm": 171.0, "learning_rate": 6.0250944296663915e-05, "loss": 10.6253, "step": 10826 }, { "epoch": 0.45129423533825186, "grad_norm": 136.0, "learning_rate": 6.024433757055992e-05, "loss": 9.6252, "step": 10827 }, { "epoch": 0.45133591763578007, "grad_norm": 376.0, "learning_rate": 6.0237730657751966e-05, "loss": 12.8129, "step": 10828 }, { "epoch": 0.45137759993330834, "grad_norm": 284.0, "learning_rate": 6.0231123558360456e-05, "loss": 12.9377, "step": 10829 }, { "epoch": 0.45141928223083655, "grad_norm": 816.0, "learning_rate": 6.0224516272505816e-05, "loss": 24.3752, "step": 10830 }, { "epoch": 0.4514609645283648, "grad_norm": 268.0, "learning_rate": 6.0217908800308455e-05, "loss": 11.3159, "step": 10831 }, { "epoch": 0.45150264682589303, "grad_norm": 428.0, "learning_rate": 6.02113011418888e-05, "loss": 15.6891, "step": 10832 }, { "epoch": 0.4515443291234213, "grad_norm": 108.0, "learning_rate": 6.020469329736728e-05, "loss": 8.5005, "step": 10833 }, { "epoch": 0.4515860114209495, "grad_norm": 448.0, "learning_rate": 6.019808526686431e-05, "loss": 16.2503, "step": 10834 }, { "epoch": 0.4516276937184778, "grad_norm": 241.0, "learning_rate": 6.019147705050033e-05, "loss": 11.5003, "step": 10835 }, { "epoch": 0.451669376016006, "grad_norm": 708.0, "learning_rate": 6.0184868648395786e-05, "loss": 22.6254, "step": 10836 }, { "epoch": 0.45171105831353425, "grad_norm": 318.0, "learning_rate": 6.017826006067111e-05, "loss": 13.3151, "step": 10837 }, { "epoch": 0.45175274061106246, "grad_norm": 564.0, "learning_rate": 6.017165128744673e-05, "loss": 19.1253, "step": 10838 }, { "epoch": 0.45179442290859073, "grad_norm": 211.0, "learning_rate": 6.01650423288431e-05, "loss": 12.5628, "step": 10839 }, { "epoch": 0.45183610520611894, "grad_norm": 348.0, "learning_rate": 6.0158433184980686e-05, "loss": 14.4377, "step": 10840 }, { "epoch": 0.4518777875036472, "grad_norm": 262.0, "learning_rate": 6.015182385597992e-05, "loss": 13.3752, "step": 10841 }, { "epoch": 0.4519194698011754, "grad_norm": 1264.0, "learning_rate": 6.0145214341961254e-05, "loss": 27.3803, "step": 10842 }, { "epoch": 0.4519611520987037, "grad_norm": 1112.0, "learning_rate": 6.013860464304515e-05, "loss": 26.5007, "step": 10843 }, { "epoch": 0.4520028343962319, "grad_norm": 278.0, "learning_rate": 6.01319947593521e-05, "loss": 11.8753, "step": 10844 }, { "epoch": 0.45204451669376017, "grad_norm": 300.0, "learning_rate": 6.012538469100253e-05, "loss": 12.6876, "step": 10845 }, { "epoch": 0.4520861989912884, "grad_norm": 474.0, "learning_rate": 6.011877443811693e-05, "loss": 17.7504, "step": 10846 }, { "epoch": 0.45212788128881665, "grad_norm": 580.0, "learning_rate": 6.0112164000815766e-05, "loss": 18.2505, "step": 10847 }, { "epoch": 0.45216956358634486, "grad_norm": 197.0, "learning_rate": 6.010555337921952e-05, "loss": 11.0003, "step": 10848 }, { "epoch": 0.4522112458838731, "grad_norm": 230.0, "learning_rate": 6.009894257344866e-05, "loss": 12.6253, "step": 10849 }, { "epoch": 0.45225292818140134, "grad_norm": 328.0, "learning_rate": 6.009233158362367e-05, "loss": 13.8754, "step": 10850 }, { "epoch": 0.4522946104789296, "grad_norm": 290.0, "learning_rate": 6.0085720409865055e-05, "loss": 12.6881, "step": 10851 }, { "epoch": 0.4523362927764578, "grad_norm": 96.0, "learning_rate": 6.007910905229328e-05, "loss": 7.6255, "step": 10852 }, { "epoch": 0.4523779750739861, "grad_norm": 792.0, "learning_rate": 6.007249751102886e-05, "loss": 23.7518, "step": 10853 }, { "epoch": 0.4524196573715143, "grad_norm": 752.0, "learning_rate": 6.006588578619227e-05, "loss": 20.0002, "step": 10854 }, { "epoch": 0.45246133966904256, "grad_norm": 239.0, "learning_rate": 6.005927387790402e-05, "loss": 12.4385, "step": 10855 }, { "epoch": 0.4525030219665708, "grad_norm": 219.0, "learning_rate": 6.005266178628459e-05, "loss": 9.313, "step": 10856 }, { "epoch": 0.45254470426409904, "grad_norm": 372.0, "learning_rate": 6.004604951145454e-05, "loss": 15.5003, "step": 10857 }, { "epoch": 0.45258638656162725, "grad_norm": 616.0, "learning_rate": 6.003943705353433e-05, "loss": 18.0002, "step": 10858 }, { "epoch": 0.4526280688591555, "grad_norm": 440.0, "learning_rate": 6.0032824412644485e-05, "loss": 15.4377, "step": 10859 }, { "epoch": 0.45266975115668373, "grad_norm": 174.0, "learning_rate": 6.002621158890553e-05, "loss": 10.5632, "step": 10860 }, { "epoch": 0.452711433454212, "grad_norm": 1328.0, "learning_rate": 6.001959858243797e-05, "loss": 33.2505, "step": 10861 }, { "epoch": 0.4527531157517402, "grad_norm": 552.0, "learning_rate": 6.001298539336235e-05, "loss": 16.6291, "step": 10862 }, { "epoch": 0.4527947980492685, "grad_norm": 474.0, "learning_rate": 6.0006372021799184e-05, "loss": 17.2506, "step": 10863 }, { "epoch": 0.4528364803467967, "grad_norm": 226.0, "learning_rate": 5.9999758467868995e-05, "loss": 12.3755, "step": 10864 }, { "epoch": 0.45287816264432496, "grad_norm": 181.0, "learning_rate": 5.999314473169232e-05, "loss": 10.9377, "step": 10865 }, { "epoch": 0.4529198449418532, "grad_norm": 524.0, "learning_rate": 5.998653081338969e-05, "loss": 17.1257, "step": 10866 }, { "epoch": 0.45296152723938143, "grad_norm": 446.0, "learning_rate": 5.9979916713081655e-05, "loss": 15.4377, "step": 10867 }, { "epoch": 0.4530032095369097, "grad_norm": 186.0, "learning_rate": 5.9973302430888746e-05, "loss": 11.2505, "step": 10868 }, { "epoch": 0.4530448918344379, "grad_norm": 146.0, "learning_rate": 5.996668796693151e-05, "loss": 10.4378, "step": 10869 }, { "epoch": 0.4530865741319662, "grad_norm": 1256.0, "learning_rate": 5.9960073321330515e-05, "loss": 30.1253, "step": 10870 }, { "epoch": 0.4531282564294944, "grad_norm": 492.0, "learning_rate": 5.9953458494206285e-05, "loss": 17.3752, "step": 10871 }, { "epoch": 0.45316993872702266, "grad_norm": 278.0, "learning_rate": 5.9946843485679406e-05, "loss": 13.9379, "step": 10872 }, { "epoch": 0.45321162102455087, "grad_norm": 262.0, "learning_rate": 5.994022829587041e-05, "loss": 13.3139, "step": 10873 }, { "epoch": 0.45325330332207914, "grad_norm": 364.0, "learning_rate": 5.993361292489987e-05, "loss": 13.3128, "step": 10874 }, { "epoch": 0.45329498561960735, "grad_norm": 784.0, "learning_rate": 5.992699737288836e-05, "loss": 25.0002, "step": 10875 }, { "epoch": 0.4533366679171356, "grad_norm": 270.0, "learning_rate": 5.992038163995645e-05, "loss": 12.0626, "step": 10876 }, { "epoch": 0.45337835021466383, "grad_norm": 266.0, "learning_rate": 5.991376572622469e-05, "loss": 12.0627, "step": 10877 }, { "epoch": 0.4534200325121921, "grad_norm": 53.75, "learning_rate": 5.9907149631813675e-05, "loss": 7.719, "step": 10878 }, { "epoch": 0.4534617148097203, "grad_norm": 196.0, "learning_rate": 5.990053335684398e-05, "loss": 12.6884, "step": 10879 }, { "epoch": 0.4535033971072486, "grad_norm": 596.0, "learning_rate": 5.9893916901436176e-05, "loss": 18.2522, "step": 10880 }, { "epoch": 0.4535450794047768, "grad_norm": 312.0, "learning_rate": 5.988730026571085e-05, "loss": 13.6254, "step": 10881 }, { "epoch": 0.45358676170230505, "grad_norm": 212.0, "learning_rate": 5.988068344978862e-05, "loss": 11.0003, "step": 10882 }, { "epoch": 0.45362844399983326, "grad_norm": 298.0, "learning_rate": 5.9874066453790045e-05, "loss": 13.7503, "step": 10883 }, { "epoch": 0.45367012629736153, "grad_norm": 201.0, "learning_rate": 5.986744927783574e-05, "loss": 11.3127, "step": 10884 }, { "epoch": 0.45371180859488974, "grad_norm": 544.0, "learning_rate": 5.98608319220463e-05, "loss": 18.5002, "step": 10885 }, { "epoch": 0.453753490892418, "grad_norm": 340.0, "learning_rate": 5.985421438654232e-05, "loss": 14.189, "step": 10886 }, { "epoch": 0.4537951731899462, "grad_norm": 832.0, "learning_rate": 5.9847596671444395e-05, "loss": 25.7503, "step": 10887 }, { "epoch": 0.4538368554874745, "grad_norm": 374.0, "learning_rate": 5.984097877687316e-05, "loss": 14.3762, "step": 10888 }, { "epoch": 0.4538785377850027, "grad_norm": 195.0, "learning_rate": 5.983436070294921e-05, "loss": 10.3127, "step": 10889 }, { "epoch": 0.45392022008253097, "grad_norm": 1224.0, "learning_rate": 5.982774244979317e-05, "loss": 23.5051, "step": 10890 }, { "epoch": 0.4539619023800592, "grad_norm": 660.0, "learning_rate": 5.982112401752564e-05, "loss": 21.1271, "step": 10891 }, { "epoch": 0.45400358467758745, "grad_norm": 220.0, "learning_rate": 5.981450540626725e-05, "loss": 12.1253, "step": 10892 }, { "epoch": 0.45404526697511566, "grad_norm": 206.0, "learning_rate": 5.980788661613864e-05, "loss": 12.1254, "step": 10893 }, { "epoch": 0.4540869492726439, "grad_norm": 856.0, "learning_rate": 5.9801267647260405e-05, "loss": 22.6254, "step": 10894 }, { "epoch": 0.45412863157017214, "grad_norm": 284.0, "learning_rate": 5.9794648499753216e-05, "loss": 12.8129, "step": 10895 }, { "epoch": 0.4541703138677004, "grad_norm": 243.0, "learning_rate": 5.978802917373769e-05, "loss": 12.1879, "step": 10896 }, { "epoch": 0.4542119961652286, "grad_norm": 144.0, "learning_rate": 5.9781409669334455e-05, "loss": 7.8755, "step": 10897 }, { "epoch": 0.4542536784627569, "grad_norm": 404.0, "learning_rate": 5.977478998666417e-05, "loss": 15.4418, "step": 10898 }, { "epoch": 0.4542953607602851, "grad_norm": 227.0, "learning_rate": 5.976817012584746e-05, "loss": 6.2823, "step": 10899 }, { "epoch": 0.45433704305781336, "grad_norm": 306.0, "learning_rate": 5.976155008700498e-05, "loss": 14.3752, "step": 10900 }, { "epoch": 0.4543787253553416, "grad_norm": 406.0, "learning_rate": 5.975492987025739e-05, "loss": 15.3133, "step": 10901 }, { "epoch": 0.45442040765286984, "grad_norm": 486.0, "learning_rate": 5.974830947572534e-05, "loss": 16.8753, "step": 10902 }, { "epoch": 0.45446208995039805, "grad_norm": 412.0, "learning_rate": 5.974168890352948e-05, "loss": 14.5632, "step": 10903 }, { "epoch": 0.4545037722479263, "grad_norm": 120.0, "learning_rate": 5.9735068153790476e-05, "loss": 9.6256, "step": 10904 }, { "epoch": 0.45454545454545453, "grad_norm": 92.0, "learning_rate": 5.972844722662899e-05, "loss": 9.8759, "step": 10905 }, { "epoch": 0.4545871368429828, "grad_norm": 142.0, "learning_rate": 5.972182612216568e-05, "loss": 10.1259, "step": 10906 }, { "epoch": 0.454628819140511, "grad_norm": 182.0, "learning_rate": 5.9715204840521234e-05, "loss": 11.3131, "step": 10907 }, { "epoch": 0.4546705014380393, "grad_norm": 196.0, "learning_rate": 5.9708583381816316e-05, "loss": 11.7501, "step": 10908 }, { "epoch": 0.4547121837355675, "grad_norm": 356.0, "learning_rate": 5.970196174617162e-05, "loss": 14.2502, "step": 10909 }, { "epoch": 0.45475386603309575, "grad_norm": 126.0, "learning_rate": 5.969533993370779e-05, "loss": 9.438, "step": 10910 }, { "epoch": 0.45479554833062397, "grad_norm": 104.0, "learning_rate": 5.968871794454554e-05, "loss": 8.3126, "step": 10911 }, { "epoch": 0.45483723062815223, "grad_norm": 256.0, "learning_rate": 5.9682095778805536e-05, "loss": 11.8752, "step": 10912 }, { "epoch": 0.45487891292568045, "grad_norm": 292.0, "learning_rate": 5.967547343660849e-05, "loss": 8.8755, "step": 10913 }, { "epoch": 0.4549205952232087, "grad_norm": 240.0, "learning_rate": 5.966885091807507e-05, "loss": 13.1256, "step": 10914 }, { "epoch": 0.4549622775207369, "grad_norm": 190.0, "learning_rate": 5.9662228223325986e-05, "loss": 11.5628, "step": 10915 }, { "epoch": 0.4550039598182652, "grad_norm": 292.0, "learning_rate": 5.965560535248194e-05, "loss": 13.5647, "step": 10916 }, { "epoch": 0.4550456421157934, "grad_norm": 272.0, "learning_rate": 5.9648982305663625e-05, "loss": 13.688, "step": 10917 }, { "epoch": 0.45508732441332167, "grad_norm": 292.0, "learning_rate": 5.964235908299175e-05, "loss": 13.4377, "step": 10918 }, { "epoch": 0.4551290067108499, "grad_norm": 312.0, "learning_rate": 5.963573568458702e-05, "loss": 14.1877, "step": 10919 }, { "epoch": 0.45517068900837815, "grad_norm": 320.0, "learning_rate": 5.9629112110570164e-05, "loss": 14.1879, "step": 10920 }, { "epoch": 0.45521237130590636, "grad_norm": 201.0, "learning_rate": 5.962248836106187e-05, "loss": 11.2502, "step": 10921 }, { "epoch": 0.4552540536034346, "grad_norm": 334.0, "learning_rate": 5.961586443618288e-05, "loss": 14.2502, "step": 10922 }, { "epoch": 0.45529573590096284, "grad_norm": 306.0, "learning_rate": 5.9609240336053906e-05, "loss": 12.813, "step": 10923 }, { "epoch": 0.4553374181984911, "grad_norm": 356.0, "learning_rate": 5.960261606079568e-05, "loss": 13.8131, "step": 10924 }, { "epoch": 0.4553791004960193, "grad_norm": 620.0, "learning_rate": 5.9595991610528926e-05, "loss": 19.3784, "step": 10925 }, { "epoch": 0.4554207827935476, "grad_norm": 588.0, "learning_rate": 5.958936698537436e-05, "loss": 17.3755, "step": 10926 }, { "epoch": 0.4554624650910758, "grad_norm": 420.0, "learning_rate": 5.958274218545273e-05, "loss": 16.0002, "step": 10927 }, { "epoch": 0.45550414738860406, "grad_norm": 328.0, "learning_rate": 5.9576117210884783e-05, "loss": 10.563, "step": 10928 }, { "epoch": 0.4555458296861323, "grad_norm": 138.0, "learning_rate": 5.9569492061791254e-05, "loss": 9.3772, "step": 10929 }, { "epoch": 0.45558751198366054, "grad_norm": 600.0, "learning_rate": 5.956286673829287e-05, "loss": 20.626, "step": 10930 }, { "epoch": 0.45562919428118875, "grad_norm": 208.0, "learning_rate": 5.95562412405104e-05, "loss": 11.1252, "step": 10931 }, { "epoch": 0.455670876578717, "grad_norm": 748.0, "learning_rate": 5.954961556856457e-05, "loss": 20.1271, "step": 10932 }, { "epoch": 0.45571255887624523, "grad_norm": 298.0, "learning_rate": 5.954298972257616e-05, "loss": 12.5637, "step": 10933 }, { "epoch": 0.4557542411737735, "grad_norm": 544.0, "learning_rate": 5.953636370266591e-05, "loss": 20.1255, "step": 10934 }, { "epoch": 0.4557959234713017, "grad_norm": 400.0, "learning_rate": 5.952973750895459e-05, "loss": 15.064, "step": 10935 }, { "epoch": 0.45583760576883, "grad_norm": 204.0, "learning_rate": 5.952311114156296e-05, "loss": 8.065, "step": 10936 }, { "epoch": 0.4558792880663582, "grad_norm": 382.0, "learning_rate": 5.951648460061178e-05, "loss": 15.0001, "step": 10937 }, { "epoch": 0.45592097036388646, "grad_norm": 312.0, "learning_rate": 5.950985788622182e-05, "loss": 14.1254, "step": 10938 }, { "epoch": 0.4559626526614147, "grad_norm": 136.0, "learning_rate": 5.950323099851386e-05, "loss": 9.9399, "step": 10939 }, { "epoch": 0.45600433495894294, "grad_norm": 372.0, "learning_rate": 5.949660393760868e-05, "loss": 13.1261, "step": 10940 }, { "epoch": 0.4560460172564712, "grad_norm": 436.0, "learning_rate": 5.948997670362704e-05, "loss": 16.0025, "step": 10941 }, { "epoch": 0.4560876995539994, "grad_norm": 89.5, "learning_rate": 5.948334929668973e-05, "loss": 7.2815, "step": 10942 }, { "epoch": 0.4561293818515277, "grad_norm": 191.0, "learning_rate": 5.9476721716917536e-05, "loss": 11.813, "step": 10943 }, { "epoch": 0.4561710641490559, "grad_norm": 652.0, "learning_rate": 5.947009396443124e-05, "loss": 21.5005, "step": 10944 }, { "epoch": 0.45621274644658416, "grad_norm": 1020.0, "learning_rate": 5.946346603935166e-05, "loss": 22.7541, "step": 10945 }, { "epoch": 0.4562544287441124, "grad_norm": 868.0, "learning_rate": 5.945683794179956e-05, "loss": 19.254, "step": 10946 }, { "epoch": 0.45629611104164064, "grad_norm": 207.0, "learning_rate": 5.945020967189575e-05, "loss": 11.2506, "step": 10947 }, { "epoch": 0.45633779333916885, "grad_norm": 109.0, "learning_rate": 5.944358122976104e-05, "loss": 9.1254, "step": 10948 }, { "epoch": 0.4563794756366971, "grad_norm": 181.0, "learning_rate": 5.943695261551622e-05, "loss": 10.813, "step": 10949 }, { "epoch": 0.45642115793422533, "grad_norm": 478.0, "learning_rate": 5.94303238292821e-05, "loss": 17.1254, "step": 10950 }, { "epoch": 0.4564628402317536, "grad_norm": 122.0, "learning_rate": 5.942369487117948e-05, "loss": 9.313, "step": 10951 }, { "epoch": 0.4565045225292818, "grad_norm": 253.0, "learning_rate": 5.9417065741329193e-05, "loss": 11.0627, "step": 10952 }, { "epoch": 0.4565462048268101, "grad_norm": 302.0, "learning_rate": 5.941043643985205e-05, "loss": 12.1877, "step": 10953 }, { "epoch": 0.4565878871243383, "grad_norm": 616.0, "learning_rate": 5.940380696686887e-05, "loss": 20.0003, "step": 10954 }, { "epoch": 0.45662956942186655, "grad_norm": 524.0, "learning_rate": 5.939717732250046e-05, "loss": 18.3751, "step": 10955 }, { "epoch": 0.45667125171939477, "grad_norm": 109.0, "learning_rate": 5.9390547506867675e-05, "loss": 8.5628, "step": 10956 }, { "epoch": 0.45671293401692303, "grad_norm": 964.0, "learning_rate": 5.938391752009131e-05, "loss": 22.0045, "step": 10957 }, { "epoch": 0.45675461631445124, "grad_norm": 296.0, "learning_rate": 5.937728736229222e-05, "loss": 13.0011, "step": 10958 }, { "epoch": 0.4567962986119795, "grad_norm": 352.0, "learning_rate": 5.937065703359124e-05, "loss": 14.6887, "step": 10959 }, { "epoch": 0.4568379809095077, "grad_norm": 572.0, "learning_rate": 5.936402653410921e-05, "loss": 18.2548, "step": 10960 }, { "epoch": 0.456879663207036, "grad_norm": 502.0, "learning_rate": 5.935739586396696e-05, "loss": 16.1256, "step": 10961 }, { "epoch": 0.4569213455045642, "grad_norm": 536.0, "learning_rate": 5.9350765023285334e-05, "loss": 19.2505, "step": 10962 }, { "epoch": 0.45696302780209247, "grad_norm": 478.0, "learning_rate": 5.934413401218519e-05, "loss": 17.6253, "step": 10963 }, { "epoch": 0.4570047100996207, "grad_norm": 294.0, "learning_rate": 5.933750283078738e-05, "loss": 12.5628, "step": 10964 }, { "epoch": 0.45704639239714895, "grad_norm": 276.0, "learning_rate": 5.9330871479212744e-05, "loss": 8.814, "step": 10965 }, { "epoch": 0.45708807469467716, "grad_norm": 227.0, "learning_rate": 5.932423995758215e-05, "loss": 11.5008, "step": 10966 }, { "epoch": 0.4571297569922054, "grad_norm": 428.0, "learning_rate": 5.9317608266016455e-05, "loss": 16.1251, "step": 10967 }, { "epoch": 0.45717143928973364, "grad_norm": 664.0, "learning_rate": 5.931097640463652e-05, "loss": 19.2502, "step": 10968 }, { "epoch": 0.4572131215872619, "grad_norm": 454.0, "learning_rate": 5.93043443735632e-05, "loss": 14.1876, "step": 10969 }, { "epoch": 0.4572548038847901, "grad_norm": 260.0, "learning_rate": 5.92977121729174e-05, "loss": 11.3128, "step": 10970 }, { "epoch": 0.4572964861823184, "grad_norm": 386.0, "learning_rate": 5.929107980281996e-05, "loss": 13.8128, "step": 10971 }, { "epoch": 0.4573381684798466, "grad_norm": 195.0, "learning_rate": 5.928444726339177e-05, "loss": 11.5028, "step": 10972 }, { "epoch": 0.45737985077737486, "grad_norm": 190.0, "learning_rate": 5.927781455475371e-05, "loss": 11.8755, "step": 10973 }, { "epoch": 0.4574215330749031, "grad_norm": 229.0, "learning_rate": 5.927118167702664e-05, "loss": 11.5018, "step": 10974 }, { "epoch": 0.45746321537243134, "grad_norm": 772.0, "learning_rate": 5.926454863033149e-05, "loss": 21.2505, "step": 10975 }, { "epoch": 0.45750489766995955, "grad_norm": 366.0, "learning_rate": 5.925791541478909e-05, "loss": 14.6878, "step": 10976 }, { "epoch": 0.4575465799674878, "grad_norm": 280.0, "learning_rate": 5.925128203052037e-05, "loss": 12.2504, "step": 10977 }, { "epoch": 0.45758826226501603, "grad_norm": 360.0, "learning_rate": 5.924464847764621e-05, "loss": 12.6256, "step": 10978 }, { "epoch": 0.4576299445625443, "grad_norm": 492.0, "learning_rate": 5.923801475628752e-05, "loss": 16.3754, "step": 10979 }, { "epoch": 0.4576716268600725, "grad_norm": 684.0, "learning_rate": 5.923138086656518e-05, "loss": 19.753, "step": 10980 }, { "epoch": 0.4577133091576008, "grad_norm": 253.0, "learning_rate": 5.922474680860011e-05, "loss": 10.9376, "step": 10981 }, { "epoch": 0.457754991455129, "grad_norm": 524.0, "learning_rate": 5.92181125825132e-05, "loss": 16.7502, "step": 10982 }, { "epoch": 0.45779667375265726, "grad_norm": 438.0, "learning_rate": 5.921147818842537e-05, "loss": 15.0002, "step": 10983 }, { "epoch": 0.45783835605018547, "grad_norm": 298.0, "learning_rate": 5.920484362645755e-05, "loss": 14.1255, "step": 10984 }, { "epoch": 0.45788003834771374, "grad_norm": 454.0, "learning_rate": 5.9198208896730634e-05, "loss": 13.4401, "step": 10985 }, { "epoch": 0.45792172064524195, "grad_norm": 346.0, "learning_rate": 5.919157399936554e-05, "loss": 11.8764, "step": 10986 }, { "epoch": 0.4579634029427702, "grad_norm": 284.0, "learning_rate": 5.918493893448319e-05, "loss": 13.8127, "step": 10987 }, { "epoch": 0.4580050852402984, "grad_norm": 494.0, "learning_rate": 5.917830370220452e-05, "loss": 17.3752, "step": 10988 }, { "epoch": 0.4580467675378267, "grad_norm": 490.0, "learning_rate": 5.917166830265044e-05, "loss": 17.2503, "step": 10989 }, { "epoch": 0.4580884498353549, "grad_norm": 748.0, "learning_rate": 5.9165032735941894e-05, "loss": 20.6263, "step": 10990 }, { "epoch": 0.45813013213288317, "grad_norm": 330.0, "learning_rate": 5.915839700219982e-05, "loss": 14.0004, "step": 10991 }, { "epoch": 0.4581718144304114, "grad_norm": 494.0, "learning_rate": 5.915176110154515e-05, "loss": 17.7501, "step": 10992 }, { "epoch": 0.45821349672793965, "grad_norm": 162.0, "learning_rate": 5.9145125034098815e-05, "loss": 9.8128, "step": 10993 }, { "epoch": 0.45825517902546786, "grad_norm": 237.0, "learning_rate": 5.913848879998176e-05, "loss": 12.7504, "step": 10994 }, { "epoch": 0.45829686132299613, "grad_norm": 332.0, "learning_rate": 5.913185239931494e-05, "loss": 14.4377, "step": 10995 }, { "epoch": 0.45833854362052434, "grad_norm": 458.0, "learning_rate": 5.912521583221929e-05, "loss": 17.5006, "step": 10996 }, { "epoch": 0.4583802259180526, "grad_norm": 784.0, "learning_rate": 5.911857909881579e-05, "loss": 23.2503, "step": 10997 }, { "epoch": 0.4584219082155808, "grad_norm": 292.0, "learning_rate": 5.9111942199225355e-05, "loss": 12.6881, "step": 10998 }, { "epoch": 0.4584635905131091, "grad_norm": 396.0, "learning_rate": 5.9105305133568976e-05, "loss": 14.8754, "step": 10999 }, { "epoch": 0.4585052728106373, "grad_norm": 474.0, "learning_rate": 5.909866790196761e-05, "loss": 15.9385, "step": 11000 }, { "epoch": 0.45854695510816557, "grad_norm": 276.0, "learning_rate": 5.909203050454221e-05, "loss": 10.6253, "step": 11001 }, { "epoch": 0.4585886374056938, "grad_norm": 556.0, "learning_rate": 5.908539294141374e-05, "loss": 18.8755, "step": 11002 }, { "epoch": 0.45863031970322204, "grad_norm": 302.0, "learning_rate": 5.9078755212703185e-05, "loss": 13.8754, "step": 11003 }, { "epoch": 0.45867200200075026, "grad_norm": 378.0, "learning_rate": 5.90721173185315e-05, "loss": 13.1877, "step": 11004 }, { "epoch": 0.4587136842982785, "grad_norm": 226.0, "learning_rate": 5.906547925901968e-05, "loss": 12.876, "step": 11005 }, { "epoch": 0.45875536659580674, "grad_norm": 174.0, "learning_rate": 5.905884103428869e-05, "loss": 9.0627, "step": 11006 }, { "epoch": 0.458797048893335, "grad_norm": 684.0, "learning_rate": 5.905220264445952e-05, "loss": 19.2505, "step": 11007 }, { "epoch": 0.4588387311908632, "grad_norm": 260.0, "learning_rate": 5.904556408965315e-05, "loss": 12.1254, "step": 11008 }, { "epoch": 0.4588804134883915, "grad_norm": 286.0, "learning_rate": 5.903892536999058e-05, "loss": 11.0016, "step": 11009 }, { "epoch": 0.4589220957859197, "grad_norm": 221.0, "learning_rate": 5.903228648559279e-05, "loss": 11.5629, "step": 11010 }, { "epoch": 0.45896377808344796, "grad_norm": 804.0, "learning_rate": 5.902564743658078e-05, "loss": 20.3753, "step": 11011 }, { "epoch": 0.4590054603809762, "grad_norm": 172.0, "learning_rate": 5.901900822307553e-05, "loss": 10.2503, "step": 11012 }, { "epoch": 0.45904714267850444, "grad_norm": 516.0, "learning_rate": 5.9012368845198074e-05, "loss": 16.8753, "step": 11013 }, { "epoch": 0.4590888249760327, "grad_norm": 584.0, "learning_rate": 5.900572930306938e-05, "loss": 17.8757, "step": 11014 }, { "epoch": 0.4591305072735609, "grad_norm": 194.0, "learning_rate": 5.8999089596810476e-05, "loss": 10.563, "step": 11015 }, { "epoch": 0.4591721895710892, "grad_norm": 260.0, "learning_rate": 5.899244972654236e-05, "loss": 12.314, "step": 11016 }, { "epoch": 0.4592138718686174, "grad_norm": 740.0, "learning_rate": 5.898580969238606e-05, "loss": 21.0003, "step": 11017 }, { "epoch": 0.45925555416614566, "grad_norm": 241.0, "learning_rate": 5.8979169494462586e-05, "loss": 13.0003, "step": 11018 }, { "epoch": 0.4592972364636739, "grad_norm": 194.0, "learning_rate": 5.897252913289294e-05, "loss": 11.0006, "step": 11019 }, { "epoch": 0.45933891876120214, "grad_norm": 110.5, "learning_rate": 5.896588860779814e-05, "loss": 8.8753, "step": 11020 }, { "epoch": 0.45938060105873035, "grad_norm": 792.0, "learning_rate": 5.895924791929924e-05, "loss": 21.6251, "step": 11021 }, { "epoch": 0.4594222833562586, "grad_norm": 760.0, "learning_rate": 5.895260706751725e-05, "loss": 23.0003, "step": 11022 }, { "epoch": 0.45946396565378683, "grad_norm": 432.0, "learning_rate": 5.8945966052573195e-05, "loss": 16.7503, "step": 11023 }, { "epoch": 0.4595056479513151, "grad_norm": 98.5, "learning_rate": 5.8939324874588134e-05, "loss": 9.3133, "step": 11024 }, { "epoch": 0.4595473302488433, "grad_norm": 320.0, "learning_rate": 5.893268353368306e-05, "loss": 11.9377, "step": 11025 }, { "epoch": 0.4595890125463716, "grad_norm": 290.0, "learning_rate": 5.892604202997906e-05, "loss": 13.126, "step": 11026 }, { "epoch": 0.4596306948438998, "grad_norm": 684.0, "learning_rate": 5.891940036359713e-05, "loss": 22.0009, "step": 11027 }, { "epoch": 0.45967237714142806, "grad_norm": 126.5, "learning_rate": 5.891275853465834e-05, "loss": 7.9065, "step": 11028 }, { "epoch": 0.45971405943895627, "grad_norm": 676.0, "learning_rate": 5.890611654328375e-05, "loss": 21.5033, "step": 11029 }, { "epoch": 0.45975574173648454, "grad_norm": 268.0, "learning_rate": 5.889947438959438e-05, "loss": 13.8759, "step": 11030 }, { "epoch": 0.45979742403401275, "grad_norm": 580.0, "learning_rate": 5.88928320737113e-05, "loss": 18.0002, "step": 11031 }, { "epoch": 0.459839106331541, "grad_norm": 528.0, "learning_rate": 5.888618959575556e-05, "loss": 17.2513, "step": 11032 }, { "epoch": 0.4598807886290692, "grad_norm": 258.0, "learning_rate": 5.8879546955848245e-05, "loss": 11.8127, "step": 11033 }, { "epoch": 0.4599224709265975, "grad_norm": 282.0, "learning_rate": 5.887290415411039e-05, "loss": 13.8127, "step": 11034 }, { "epoch": 0.4599641532241257, "grad_norm": 328.0, "learning_rate": 5.886626119066307e-05, "loss": 13.8756, "step": 11035 }, { "epoch": 0.46000583552165397, "grad_norm": 544.0, "learning_rate": 5.8859618065627344e-05, "loss": 16.8753, "step": 11036 }, { "epoch": 0.4600475178191822, "grad_norm": 282.0, "learning_rate": 5.8852974779124306e-05, "loss": 13.6255, "step": 11037 }, { "epoch": 0.46008920011671045, "grad_norm": 304.0, "learning_rate": 5.8846331331275e-05, "loss": 12.8753, "step": 11038 }, { "epoch": 0.46013088241423866, "grad_norm": 40.0, "learning_rate": 5.8839687722200534e-05, "loss": 5.4069, "step": 11039 }, { "epoch": 0.46017256471176693, "grad_norm": 812.0, "learning_rate": 5.883304395202197e-05, "loss": 24.1262, "step": 11040 }, { "epoch": 0.46021424700929514, "grad_norm": 290.0, "learning_rate": 5.882640002086039e-05, "loss": 13.3126, "step": 11041 }, { "epoch": 0.4602559293068234, "grad_norm": 260.0, "learning_rate": 5.881975592883691e-05, "loss": 13.0004, "step": 11042 }, { "epoch": 0.4602976116043516, "grad_norm": 664.0, "learning_rate": 5.8813111676072565e-05, "loss": 22.126, "step": 11043 }, { "epoch": 0.4603392939018799, "grad_norm": 1120.0, "learning_rate": 5.8806467262688495e-05, "loss": 25.2552, "step": 11044 }, { "epoch": 0.4603809761994081, "grad_norm": 354.0, "learning_rate": 5.879982268880576e-05, "loss": 12.8152, "step": 11045 }, { "epoch": 0.46042265849693637, "grad_norm": 120.0, "learning_rate": 5.8793177954545486e-05, "loss": 9.4378, "step": 11046 }, { "epoch": 0.4604643407944646, "grad_norm": 556.0, "learning_rate": 5.878653306002877e-05, "loss": 17.3757, "step": 11047 }, { "epoch": 0.46050602309199284, "grad_norm": 229.0, "learning_rate": 5.8779888005376704e-05, "loss": 11.7503, "step": 11048 }, { "epoch": 0.46054770538952106, "grad_norm": 1120.0, "learning_rate": 5.877324279071039e-05, "loss": 25.8793, "step": 11049 }, { "epoch": 0.4605893876870493, "grad_norm": 604.0, "learning_rate": 5.876659741615096e-05, "loss": 18.3756, "step": 11050 }, { "epoch": 0.46063106998457753, "grad_norm": 332.0, "learning_rate": 5.875995188181952e-05, "loss": 13.563, "step": 11051 }, { "epoch": 0.4606727522821058, "grad_norm": 408.0, "learning_rate": 5.875330618783717e-05, "loss": 14.2509, "step": 11052 }, { "epoch": 0.460714434579634, "grad_norm": 1488.0, "learning_rate": 5.874666033432503e-05, "loss": 33.0041, "step": 11053 }, { "epoch": 0.4607561168771623, "grad_norm": 202.0, "learning_rate": 5.8740014321404234e-05, "loss": 12.3127, "step": 11054 }, { "epoch": 0.4607977991746905, "grad_norm": 334.0, "learning_rate": 5.873336814919591e-05, "loss": 12.8129, "step": 11055 }, { "epoch": 0.46083948147221876, "grad_norm": 334.0, "learning_rate": 5.872672181782117e-05, "loss": 14.3128, "step": 11056 }, { "epoch": 0.46088116376974697, "grad_norm": 1488.0, "learning_rate": 5.8720075327401137e-05, "loss": 29.0047, "step": 11057 }, { "epoch": 0.46092284606727524, "grad_norm": 492.0, "learning_rate": 5.871342867805698e-05, "loss": 17.2507, "step": 11058 }, { "epoch": 0.46096452836480345, "grad_norm": 298.0, "learning_rate": 5.87067818699098e-05, "loss": 12.813, "step": 11059 }, { "epoch": 0.4610062106623317, "grad_norm": 292.0, "learning_rate": 5.870013490308075e-05, "loss": 12.4386, "step": 11060 }, { "epoch": 0.46104789295985993, "grad_norm": 250.0, "learning_rate": 5.869348777769097e-05, "loss": 11.8771, "step": 11061 }, { "epoch": 0.4610895752573882, "grad_norm": 576.0, "learning_rate": 5.8686840493861596e-05, "loss": 17.3752, "step": 11062 }, { "epoch": 0.4611312575549164, "grad_norm": 356.0, "learning_rate": 5.8680193051713796e-05, "loss": 14.0627, "step": 11063 }, { "epoch": 0.4611729398524447, "grad_norm": 177.0, "learning_rate": 5.8673545451368695e-05, "loss": 11.8142, "step": 11064 }, { "epoch": 0.4612146221499729, "grad_norm": 314.0, "learning_rate": 5.866689769294747e-05, "loss": 13.8755, "step": 11065 }, { "epoch": 0.46125630444750115, "grad_norm": 210.0, "learning_rate": 5.866024977657125e-05, "loss": 11.1257, "step": 11066 }, { "epoch": 0.46129798674502936, "grad_norm": 568.0, "learning_rate": 5.8653601702361224e-05, "loss": 18.7502, "step": 11067 }, { "epoch": 0.46133966904255763, "grad_norm": 572.0, "learning_rate": 5.864695347043853e-05, "loss": 18.0006, "step": 11068 }, { "epoch": 0.46138135134008584, "grad_norm": 294.0, "learning_rate": 5.864030508092434e-05, "loss": 12.4383, "step": 11069 }, { "epoch": 0.4614230336376141, "grad_norm": 166.0, "learning_rate": 5.8633656533939816e-05, "loss": 10.5629, "step": 11070 }, { "epoch": 0.4614647159351423, "grad_norm": 306.0, "learning_rate": 5.862700782960615e-05, "loss": 13.6908, "step": 11071 }, { "epoch": 0.4615063982326706, "grad_norm": 520.0, "learning_rate": 5.8620358968044496e-05, "loss": 16.376, "step": 11072 }, { "epoch": 0.4615480805301988, "grad_norm": 844.0, "learning_rate": 5.861370994937604e-05, "loss": 20.3798, "step": 11073 }, { "epoch": 0.46158976282772707, "grad_norm": 221.0, "learning_rate": 5.860706077372194e-05, "loss": 11.1253, "step": 11074 }, { "epoch": 0.4616314451252553, "grad_norm": 262.0, "learning_rate": 5.860041144120341e-05, "loss": 12.4379, "step": 11075 }, { "epoch": 0.46167312742278355, "grad_norm": 296.0, "learning_rate": 5.859376195194162e-05, "loss": 13.5629, "step": 11076 }, { "epoch": 0.46171480972031176, "grad_norm": 157.0, "learning_rate": 5.858711230605774e-05, "loss": 8.7506, "step": 11077 }, { "epoch": 0.46175649201784, "grad_norm": 354.0, "learning_rate": 5.8580462503672986e-05, "loss": 14.688, "step": 11078 }, { "epoch": 0.46179817431536824, "grad_norm": 584.0, "learning_rate": 5.857381254490854e-05, "loss": 19.3752, "step": 11079 }, { "epoch": 0.4618398566128965, "grad_norm": 648.0, "learning_rate": 5.856716242988559e-05, "loss": 19.1252, "step": 11080 }, { "epoch": 0.4618815389104247, "grad_norm": 348.0, "learning_rate": 5.856051215872536e-05, "loss": 14.4378, "step": 11081 }, { "epoch": 0.461923221207953, "grad_norm": 470.0, "learning_rate": 5.855386173154902e-05, "loss": 17.0003, "step": 11082 }, { "epoch": 0.4619649035054812, "grad_norm": 498.0, "learning_rate": 5.8547211148477785e-05, "loss": 17.0001, "step": 11083 }, { "epoch": 0.46200658580300946, "grad_norm": 512.0, "learning_rate": 5.854056040963288e-05, "loss": 18.1252, "step": 11084 }, { "epoch": 0.46204826810053773, "grad_norm": 784.0, "learning_rate": 5.853390951513551e-05, "loss": 22.2502, "step": 11085 }, { "epoch": 0.46208995039806594, "grad_norm": 362.0, "learning_rate": 5.8527258465106885e-05, "loss": 13.8752, "step": 11086 }, { "epoch": 0.4621316326955942, "grad_norm": 320.0, "learning_rate": 5.8520607259668205e-05, "loss": 12.6879, "step": 11087 }, { "epoch": 0.4621733149931224, "grad_norm": 332.0, "learning_rate": 5.85139558989407e-05, "loss": 14.5012, "step": 11088 }, { "epoch": 0.4622149972906507, "grad_norm": 382.0, "learning_rate": 5.8507304383045604e-05, "loss": 14.3757, "step": 11089 }, { "epoch": 0.4622566795881789, "grad_norm": 716.0, "learning_rate": 5.850065271210412e-05, "loss": 19.7501, "step": 11090 }, { "epoch": 0.46229836188570717, "grad_norm": 1584.0, "learning_rate": 5.849400088623749e-05, "loss": 33.2553, "step": 11091 }, { "epoch": 0.4623400441832354, "grad_norm": 146.0, "learning_rate": 5.848734890556694e-05, "loss": 9.2519, "step": 11092 }, { "epoch": 0.46238172648076364, "grad_norm": 48.75, "learning_rate": 5.8480696770213706e-05, "loss": 7.7827, "step": 11093 }, { "epoch": 0.46242340877829186, "grad_norm": 164.0, "learning_rate": 5.847404448029902e-05, "loss": 10.0004, "step": 11094 }, { "epoch": 0.4624650910758201, "grad_norm": 498.0, "learning_rate": 5.84673920359441e-05, "loss": 17.3753, "step": 11095 }, { "epoch": 0.46250677337334833, "grad_norm": 488.0, "learning_rate": 5.846073943727023e-05, "loss": 15.1888, "step": 11096 }, { "epoch": 0.4625484556708766, "grad_norm": 1144.0, "learning_rate": 5.8454086684398625e-05, "loss": 30.8753, "step": 11097 }, { "epoch": 0.4625901379684048, "grad_norm": 988.0, "learning_rate": 5.844743377745054e-05, "loss": 23.6298, "step": 11098 }, { "epoch": 0.4626318202659331, "grad_norm": 426.0, "learning_rate": 5.844078071654724e-05, "loss": 15.4377, "step": 11099 }, { "epoch": 0.4626735025634613, "grad_norm": 536.0, "learning_rate": 5.843412750180994e-05, "loss": 16.5006, "step": 11100 }, { "epoch": 0.46271518486098956, "grad_norm": 95.5, "learning_rate": 5.842747413335994e-05, "loss": 9.3755, "step": 11101 }, { "epoch": 0.46275686715851777, "grad_norm": 616.0, "learning_rate": 5.842082061131846e-05, "loss": 16.7505, "step": 11102 }, { "epoch": 0.46279854945604604, "grad_norm": 520.0, "learning_rate": 5.841416693580678e-05, "loss": 17.6253, "step": 11103 }, { "epoch": 0.46284023175357425, "grad_norm": 968.0, "learning_rate": 5.8407513106946165e-05, "loss": 23.7506, "step": 11104 }, { "epoch": 0.4628819140511025, "grad_norm": 137.0, "learning_rate": 5.8400859124857874e-05, "loss": 9.9386, "step": 11105 }, { "epoch": 0.46292359634863073, "grad_norm": 157.0, "learning_rate": 5.839420498966318e-05, "loss": 11.2517, "step": 11106 }, { "epoch": 0.462965278646159, "grad_norm": 664.0, "learning_rate": 5.838755070148335e-05, "loss": 19.3755, "step": 11107 }, { "epoch": 0.4630069609436872, "grad_norm": 768.0, "learning_rate": 5.838089626043966e-05, "loss": 20.8752, "step": 11108 }, { "epoch": 0.4630486432412155, "grad_norm": 256.0, "learning_rate": 5.83742416666534e-05, "loss": 11.8132, "step": 11109 }, { "epoch": 0.4630903255387437, "grad_norm": 229.0, "learning_rate": 5.836758692024584e-05, "loss": 10.7504, "step": 11110 }, { "epoch": 0.46313200783627195, "grad_norm": 418.0, "learning_rate": 5.8360932021338264e-05, "loss": 15.3759, "step": 11111 }, { "epoch": 0.46317369013380016, "grad_norm": 536.0, "learning_rate": 5.8354276970051966e-05, "loss": 17.3753, "step": 11112 }, { "epoch": 0.46321537243132843, "grad_norm": 146.0, "learning_rate": 5.834762176650823e-05, "loss": 10.3752, "step": 11113 }, { "epoch": 0.46325705472885664, "grad_norm": 71.5, "learning_rate": 5.834096641082834e-05, "loss": 8.563, "step": 11114 }, { "epoch": 0.4632987370263849, "grad_norm": 476.0, "learning_rate": 5.83343109031336e-05, "loss": 15.3131, "step": 11115 }, { "epoch": 0.4633404193239131, "grad_norm": 884.0, "learning_rate": 5.832765524354531e-05, "loss": 21.7545, "step": 11116 }, { "epoch": 0.4633821016214414, "grad_norm": 404.0, "learning_rate": 5.8320999432184755e-05, "loss": 16.0004, "step": 11117 }, { "epoch": 0.4634237839189696, "grad_norm": 348.0, "learning_rate": 5.8314343469173246e-05, "loss": 13.5628, "step": 11118 }, { "epoch": 0.46346546621649787, "grad_norm": 215.0, "learning_rate": 5.830768735463209e-05, "loss": 12.1882, "step": 11119 }, { "epoch": 0.4635071485140261, "grad_norm": 600.0, "learning_rate": 5.830103108868259e-05, "loss": 18.3759, "step": 11120 }, { "epoch": 0.46354883081155435, "grad_norm": 334.0, "learning_rate": 5.8294374671446064e-05, "loss": 14.5627, "step": 11121 }, { "epoch": 0.46359051310908256, "grad_norm": 138.0, "learning_rate": 5.828771810304383e-05, "loss": 10.0012, "step": 11122 }, { "epoch": 0.4636321954066108, "grad_norm": 154.0, "learning_rate": 5.828106138359719e-05, "loss": 9.9381, "step": 11123 }, { "epoch": 0.46367387770413904, "grad_norm": 112.0, "learning_rate": 5.827440451322748e-05, "loss": 9.8754, "step": 11124 }, { "epoch": 0.4637155600016673, "grad_norm": 462.0, "learning_rate": 5.8267747492056015e-05, "loss": 17.5004, "step": 11125 }, { "epoch": 0.4637572422991955, "grad_norm": 358.0, "learning_rate": 5.8261090320204105e-05, "loss": 14.0004, "step": 11126 }, { "epoch": 0.4637989245967238, "grad_norm": 548.0, "learning_rate": 5.82544329977931e-05, "loss": 16.3758, "step": 11127 }, { "epoch": 0.463840606894252, "grad_norm": 92.5, "learning_rate": 5.824777552494431e-05, "loss": 8.4383, "step": 11128 }, { "epoch": 0.46388228919178026, "grad_norm": 688.0, "learning_rate": 5.82411179017791e-05, "loss": 17.8795, "step": 11129 }, { "epoch": 0.4639239714893085, "grad_norm": 330.0, "learning_rate": 5.8234460128418764e-05, "loss": 12.6252, "step": 11130 }, { "epoch": 0.46396565378683674, "grad_norm": 154.0, "learning_rate": 5.8227802204984674e-05, "loss": 10.6252, "step": 11131 }, { "epoch": 0.46400733608436495, "grad_norm": 274.0, "learning_rate": 5.822114413159815e-05, "loss": 12.2504, "step": 11132 }, { "epoch": 0.4640490183818932, "grad_norm": 127.5, "learning_rate": 5.8214485908380544e-05, "loss": 9.3127, "step": 11133 }, { "epoch": 0.46409070067942143, "grad_norm": 240.0, "learning_rate": 5.8207827535453195e-05, "loss": 10.8135, "step": 11134 }, { "epoch": 0.4641323829769497, "grad_norm": 58.0, "learning_rate": 5.820116901293748e-05, "loss": 6.6877, "step": 11135 }, { "epoch": 0.4641740652744779, "grad_norm": 253.0, "learning_rate": 5.819451034095472e-05, "loss": 11.3132, "step": 11136 }, { "epoch": 0.4642157475720062, "grad_norm": 195.0, "learning_rate": 5.818785151962629e-05, "loss": 10.8752, "step": 11137 }, { "epoch": 0.4642574298695344, "grad_norm": 101.5, "learning_rate": 5.818119254907354e-05, "loss": 8.2502, "step": 11138 }, { "epoch": 0.46429911216706266, "grad_norm": 480.0, "learning_rate": 5.817453342941782e-05, "loss": 15.6279, "step": 11139 }, { "epoch": 0.46434079446459087, "grad_norm": 372.0, "learning_rate": 5.816787416078051e-05, "loss": 14.8753, "step": 11140 }, { "epoch": 0.46438247676211913, "grad_norm": 1168.0, "learning_rate": 5.8161214743282964e-05, "loss": 22.2543, "step": 11141 }, { "epoch": 0.46442415905964735, "grad_norm": 540.0, "learning_rate": 5.815455517704655e-05, "loss": 16.3753, "step": 11142 }, { "epoch": 0.4644658413571756, "grad_norm": 174.0, "learning_rate": 5.814789546219266e-05, "loss": 9.5002, "step": 11143 }, { "epoch": 0.4645075236547038, "grad_norm": 326.0, "learning_rate": 5.814123559884264e-05, "loss": 8.3754, "step": 11144 }, { "epoch": 0.4645492059522321, "grad_norm": 552.0, "learning_rate": 5.813457558711788e-05, "loss": 18.5005, "step": 11145 }, { "epoch": 0.4645908882497603, "grad_norm": 254.0, "learning_rate": 5.8127915427139746e-05, "loss": 13.0001, "step": 11146 }, { "epoch": 0.46463257054728857, "grad_norm": 215.0, "learning_rate": 5.812125511902965e-05, "loss": 9.9388, "step": 11147 }, { "epoch": 0.4646742528448168, "grad_norm": 340.0, "learning_rate": 5.811459466290895e-05, "loss": 14.8753, "step": 11148 }, { "epoch": 0.46471593514234505, "grad_norm": 284.0, "learning_rate": 5.810793405889905e-05, "loss": 12.0008, "step": 11149 }, { "epoch": 0.46475761743987326, "grad_norm": 135.0, "learning_rate": 5.810127330712132e-05, "loss": 9.5005, "step": 11150 }, { "epoch": 0.46479929973740153, "grad_norm": 284.0, "learning_rate": 5.809461240769718e-05, "loss": 13.3128, "step": 11151 }, { "epoch": 0.46484098203492974, "grad_norm": 153.0, "learning_rate": 5.8087951360747994e-05, "loss": 10.7505, "step": 11152 }, { "epoch": 0.464882664332458, "grad_norm": 520.0, "learning_rate": 5.8081290166395186e-05, "loss": 17.1251, "step": 11153 }, { "epoch": 0.4649243466299862, "grad_norm": 348.0, "learning_rate": 5.8074628824760146e-05, "loss": 13.3127, "step": 11154 }, { "epoch": 0.4649660289275145, "grad_norm": 448.0, "learning_rate": 5.806796733596428e-05, "loss": 17.0008, "step": 11155 }, { "epoch": 0.4650077112250427, "grad_norm": 458.0, "learning_rate": 5.806130570012899e-05, "loss": 16.0002, "step": 11156 }, { "epoch": 0.46504939352257096, "grad_norm": 648.0, "learning_rate": 5.8054643917375695e-05, "loss": 20.2504, "step": 11157 }, { "epoch": 0.46509107582009923, "grad_norm": 560.0, "learning_rate": 5.8047981987825787e-05, "loss": 19.0002, "step": 11158 }, { "epoch": 0.46513275811762744, "grad_norm": 288.0, "learning_rate": 5.804131991160069e-05, "loss": 12.1252, "step": 11159 }, { "epoch": 0.4651744404151557, "grad_norm": 440.0, "learning_rate": 5.8034657688821834e-05, "loss": 14.0005, "step": 11160 }, { "epoch": 0.4652161227126839, "grad_norm": 564.0, "learning_rate": 5.802799531961063e-05, "loss": 18.8753, "step": 11161 }, { "epoch": 0.4652578050102122, "grad_norm": 95.0, "learning_rate": 5.80213328040885e-05, "loss": 8.2505, "step": 11162 }, { "epoch": 0.4652994873077404, "grad_norm": 472.0, "learning_rate": 5.801467014237686e-05, "loss": 15.3127, "step": 11163 }, { "epoch": 0.46534116960526867, "grad_norm": 316.0, "learning_rate": 5.800800733459715e-05, "loss": 14.1283, "step": 11164 }, { "epoch": 0.4653828519027969, "grad_norm": 680.0, "learning_rate": 5.8001344380870794e-05, "loss": 17.1255, "step": 11165 }, { "epoch": 0.46542453420032515, "grad_norm": 173.0, "learning_rate": 5.799468128131923e-05, "loss": 10.7501, "step": 11166 }, { "epoch": 0.46546621649785336, "grad_norm": 398.0, "learning_rate": 5.798801803606388e-05, "loss": 14.9381, "step": 11167 }, { "epoch": 0.4655078987953816, "grad_norm": 1496.0, "learning_rate": 5.7981354645226203e-05, "loss": 37.2504, "step": 11168 }, { "epoch": 0.46554958109290984, "grad_norm": 324.0, "learning_rate": 5.797469110892764e-05, "loss": 14.3754, "step": 11169 }, { "epoch": 0.4655912633904381, "grad_norm": 420.0, "learning_rate": 5.79680274272896e-05, "loss": 16.2503, "step": 11170 }, { "epoch": 0.4656329456879663, "grad_norm": 294.0, "learning_rate": 5.796136360043355e-05, "loss": 14.2503, "step": 11171 }, { "epoch": 0.4656746279854946, "grad_norm": 322.0, "learning_rate": 5.795469962848096e-05, "loss": 13.0002, "step": 11172 }, { "epoch": 0.4657163102830228, "grad_norm": 356.0, "learning_rate": 5.7948035511553254e-05, "loss": 14.5628, "step": 11173 }, { "epoch": 0.46575799258055106, "grad_norm": 222.0, "learning_rate": 5.794137124977189e-05, "loss": 13.3151, "step": 11174 }, { "epoch": 0.4657996748780793, "grad_norm": 238.0, "learning_rate": 5.793470684325835e-05, "loss": 12.0003, "step": 11175 }, { "epoch": 0.46584135717560754, "grad_norm": 127.0, "learning_rate": 5.7928042292134054e-05, "loss": 11.688, "step": 11176 }, { "epoch": 0.46588303947313575, "grad_norm": 708.0, "learning_rate": 5.79213775965205e-05, "loss": 25.0033, "step": 11177 }, { "epoch": 0.465924721770664, "grad_norm": 203.0, "learning_rate": 5.791471275653913e-05, "loss": 11.8126, "step": 11178 }, { "epoch": 0.46596640406819223, "grad_norm": 468.0, "learning_rate": 5.7908047772311404e-05, "loss": 15.6876, "step": 11179 }, { "epoch": 0.4660080863657205, "grad_norm": 408.0, "learning_rate": 5.7901382643958816e-05, "loss": 15.3127, "step": 11180 }, { "epoch": 0.4660497686632487, "grad_norm": 476.0, "learning_rate": 5.789471737160283e-05, "loss": 15.2502, "step": 11181 }, { "epoch": 0.466091450960777, "grad_norm": 188.0, "learning_rate": 5.788805195536492e-05, "loss": 11.5633, "step": 11182 }, { "epoch": 0.4661331332583052, "grad_norm": 356.0, "learning_rate": 5.7881386395366546e-05, "loss": 14.4378, "step": 11183 }, { "epoch": 0.46617481555583345, "grad_norm": 540.0, "learning_rate": 5.787472069172921e-05, "loss": 18.3763, "step": 11184 }, { "epoch": 0.46621649785336167, "grad_norm": 720.0, "learning_rate": 5.786805484457441e-05, "loss": 20.1251, "step": 11185 }, { "epoch": 0.46625818015088993, "grad_norm": 836.0, "learning_rate": 5.786138885402359e-05, "loss": 24.6251, "step": 11186 }, { "epoch": 0.46629986244841815, "grad_norm": 388.0, "learning_rate": 5.7854722720198275e-05, "loss": 15.0626, "step": 11187 }, { "epoch": 0.4663415447459464, "grad_norm": 248.0, "learning_rate": 5.784805644321994e-05, "loss": 12.0627, "step": 11188 }, { "epoch": 0.4663832270434746, "grad_norm": 636.0, "learning_rate": 5.7841390023210076e-05, "loss": 19.0018, "step": 11189 }, { "epoch": 0.4664249093410029, "grad_norm": 688.0, "learning_rate": 5.7834723460290185e-05, "loss": 19.3776, "step": 11190 }, { "epoch": 0.4664665916385311, "grad_norm": 278.0, "learning_rate": 5.782805675458176e-05, "loss": 12.5003, "step": 11191 }, { "epoch": 0.46650827393605937, "grad_norm": 536.0, "learning_rate": 5.7821389906206315e-05, "loss": 16.0005, "step": 11192 }, { "epoch": 0.4665499562335876, "grad_norm": 191.0, "learning_rate": 5.781472291528534e-05, "loss": 5.5942, "step": 11193 }, { "epoch": 0.46659163853111585, "grad_norm": 784.0, "learning_rate": 5.780805578194034e-05, "loss": 18.8793, "step": 11194 }, { "epoch": 0.46663332082864406, "grad_norm": 245.0, "learning_rate": 5.780138850629283e-05, "loss": 11.7503, "step": 11195 }, { "epoch": 0.4666750031261723, "grad_norm": 736.0, "learning_rate": 5.779472108846432e-05, "loss": 19.1253, "step": 11196 }, { "epoch": 0.46671668542370054, "grad_norm": 498.0, "learning_rate": 5.778805352857632e-05, "loss": 16.6252, "step": 11197 }, { "epoch": 0.4667583677212288, "grad_norm": 229.0, "learning_rate": 5.778138582675038e-05, "loss": 9.5003, "step": 11198 }, { "epoch": 0.466800050018757, "grad_norm": 146.0, "learning_rate": 5.777471798310797e-05, "loss": 10.6882, "step": 11199 }, { "epoch": 0.4668417323162853, "grad_norm": 69.5, "learning_rate": 5.7768049997770647e-05, "loss": 7.0939, "step": 11200 }, { "epoch": 0.4668834146138135, "grad_norm": 432.0, "learning_rate": 5.776138187085992e-05, "loss": 17.1267, "step": 11201 }, { "epoch": 0.46692509691134176, "grad_norm": 230.0, "learning_rate": 5.7754713602497314e-05, "loss": 11.5627, "step": 11202 }, { "epoch": 0.46696677920887, "grad_norm": 127.5, "learning_rate": 5.774804519280437e-05, "loss": 8.0637, "step": 11203 }, { "epoch": 0.46700846150639824, "grad_norm": 94.5, "learning_rate": 5.774137664190261e-05, "loss": 8.0002, "step": 11204 }, { "epoch": 0.46705014380392645, "grad_norm": 326.0, "learning_rate": 5.773470794991358e-05, "loss": 13.938, "step": 11205 }, { "epoch": 0.4670918261014547, "grad_norm": 270.0, "learning_rate": 5.772803911695881e-05, "loss": 12.0006, "step": 11206 }, { "epoch": 0.46713350839898293, "grad_norm": 430.0, "learning_rate": 5.7721370143159834e-05, "loss": 15.3756, "step": 11207 }, { "epoch": 0.4671751906965112, "grad_norm": 255.0, "learning_rate": 5.7714701028638205e-05, "loss": 12.3758, "step": 11208 }, { "epoch": 0.4672168729940394, "grad_norm": 56.5, "learning_rate": 5.7708031773515456e-05, "loss": 8.2503, "step": 11209 }, { "epoch": 0.4672585552915677, "grad_norm": 191.0, "learning_rate": 5.770136237791315e-05, "loss": 11.3129, "step": 11210 }, { "epoch": 0.4673002375890959, "grad_norm": 462.0, "learning_rate": 5.7694692841952837e-05, "loss": 15.8755, "step": 11211 }, { "epoch": 0.46734191988662416, "grad_norm": 1864.0, "learning_rate": 5.768802316575606e-05, "loss": 34.5044, "step": 11212 }, { "epoch": 0.46738360218415237, "grad_norm": 152.0, "learning_rate": 5.7681353349444375e-05, "loss": 9.6878, "step": 11213 }, { "epoch": 0.46742528448168064, "grad_norm": 288.0, "learning_rate": 5.767468339313935e-05, "loss": 12.1898, "step": 11214 }, { "epoch": 0.46746696677920885, "grad_norm": 103.0, "learning_rate": 5.766801329696254e-05, "loss": 10.5017, "step": 11215 }, { "epoch": 0.4675086490767371, "grad_norm": 1032.0, "learning_rate": 5.76613430610355e-05, "loss": 25.2501, "step": 11216 }, { "epoch": 0.4675503313742653, "grad_norm": 484.0, "learning_rate": 5.7654672685479816e-05, "loss": 17.6262, "step": 11217 }, { "epoch": 0.4675920136717936, "grad_norm": 414.0, "learning_rate": 5.7648002170417025e-05, "loss": 15.7505, "step": 11218 }, { "epoch": 0.4676336959693218, "grad_norm": 133.0, "learning_rate": 5.7641331515968735e-05, "loss": 8.3754, "step": 11219 }, { "epoch": 0.4676753782668501, "grad_norm": 179.0, "learning_rate": 5.7634660722256486e-05, "loss": 12.1276, "step": 11220 }, { "epoch": 0.4677170605643783, "grad_norm": 149.0, "learning_rate": 5.762798978940185e-05, "loss": 12.0014, "step": 11221 }, { "epoch": 0.46775874286190655, "grad_norm": 151.0, "learning_rate": 5.7621318717526454e-05, "loss": 9.6876, "step": 11222 }, { "epoch": 0.46780042515943476, "grad_norm": 288.0, "learning_rate": 5.761464750675183e-05, "loss": 13.0003, "step": 11223 }, { "epoch": 0.46784210745696303, "grad_norm": 163.0, "learning_rate": 5.760797615719959e-05, "loss": 11.1254, "step": 11224 }, { "epoch": 0.46788378975449124, "grad_norm": 166.0, "learning_rate": 5.7601304668991295e-05, "loss": 9.5628, "step": 11225 }, { "epoch": 0.4679254720520195, "grad_norm": 336.0, "learning_rate": 5.759463304224857e-05, "loss": 13.7511, "step": 11226 }, { "epoch": 0.4679671543495477, "grad_norm": 262.0, "learning_rate": 5.758796127709296e-05, "loss": 13.438, "step": 11227 }, { "epoch": 0.468008836647076, "grad_norm": 290.0, "learning_rate": 5.7581289373646095e-05, "loss": 12.5639, "step": 11228 }, { "epoch": 0.4680505189446042, "grad_norm": 51.5, "learning_rate": 5.757461733202956e-05, "loss": 7.5003, "step": 11229 }, { "epoch": 0.46809220124213247, "grad_norm": 274.0, "learning_rate": 5.756794515236494e-05, "loss": 12.9388, "step": 11230 }, { "epoch": 0.46813388353966073, "grad_norm": 253.0, "learning_rate": 5.7561272834773864e-05, "loss": 13.4392, "step": 11231 }, { "epoch": 0.46817556583718895, "grad_norm": 988.0, "learning_rate": 5.755460037937791e-05, "loss": 24.6301, "step": 11232 }, { "epoch": 0.4682172481347172, "grad_norm": 510.0, "learning_rate": 5.754792778629869e-05, "loss": 18.7502, "step": 11233 }, { "epoch": 0.4682589304322454, "grad_norm": 56.5, "learning_rate": 5.754125505565782e-05, "loss": 6.7503, "step": 11234 }, { "epoch": 0.4683006127297737, "grad_norm": 274.0, "learning_rate": 5.7534582187576904e-05, "loss": 11.5636, "step": 11235 }, { "epoch": 0.4683422950273019, "grad_norm": 292.0, "learning_rate": 5.752790918217756e-05, "loss": 13.313, "step": 11236 }, { "epoch": 0.46838397732483017, "grad_norm": 362.0, "learning_rate": 5.7521236039581415e-05, "loss": 14.1252, "step": 11237 }, { "epoch": 0.4684256596223584, "grad_norm": 418.0, "learning_rate": 5.751456275991006e-05, "loss": 16.1252, "step": 11238 }, { "epoch": 0.46846734191988665, "grad_norm": 406.0, "learning_rate": 5.7507889343285135e-05, "loss": 14.5003, "step": 11239 }, { "epoch": 0.46850902421741486, "grad_norm": 280.0, "learning_rate": 5.7501215789828264e-05, "loss": 12.4379, "step": 11240 }, { "epoch": 0.4685507065149431, "grad_norm": 121.5, "learning_rate": 5.7494542099661075e-05, "loss": 9.1878, "step": 11241 }, { "epoch": 0.46859238881247134, "grad_norm": 207.0, "learning_rate": 5.7487868272905174e-05, "loss": 12.1883, "step": 11242 }, { "epoch": 0.4686340711099996, "grad_norm": 844.0, "learning_rate": 5.748119430968223e-05, "loss": 24.2573, "step": 11243 }, { "epoch": 0.4686757534075278, "grad_norm": 792.0, "learning_rate": 5.747452021011385e-05, "loss": 21.3762, "step": 11244 }, { "epoch": 0.4687174357050561, "grad_norm": 470.0, "learning_rate": 5.7467845974321666e-05, "loss": 15.8126, "step": 11245 }, { "epoch": 0.4687591180025843, "grad_norm": 388.0, "learning_rate": 5.746117160242732e-05, "loss": 15.2505, "step": 11246 }, { "epoch": 0.46880080030011256, "grad_norm": 215.0, "learning_rate": 5.745449709455246e-05, "loss": 10.9377, "step": 11247 }, { "epoch": 0.4688424825976408, "grad_norm": 406.0, "learning_rate": 5.744782245081875e-05, "loss": 15.6878, "step": 11248 }, { "epoch": 0.46888416489516904, "grad_norm": 1128.0, "learning_rate": 5.744114767134781e-05, "loss": 25.3752, "step": 11249 }, { "epoch": 0.46892584719269725, "grad_norm": 255.0, "learning_rate": 5.743447275626128e-05, "loss": 13.2503, "step": 11250 }, { "epoch": 0.4689675294902255, "grad_norm": 360.0, "learning_rate": 5.742779770568083e-05, "loss": 14.8752, "step": 11251 }, { "epoch": 0.46900921178775373, "grad_norm": 560.0, "learning_rate": 5.742112251972811e-05, "loss": 16.0024, "step": 11252 }, { "epoch": 0.469050894085282, "grad_norm": 310.0, "learning_rate": 5.741444719852477e-05, "loss": 13.688, "step": 11253 }, { "epoch": 0.4690925763828102, "grad_norm": 152.0, "learning_rate": 5.740777174219247e-05, "loss": 10.7501, "step": 11254 }, { "epoch": 0.4691342586803385, "grad_norm": 240.0, "learning_rate": 5.740109615085287e-05, "loss": 11.6253, "step": 11255 }, { "epoch": 0.4691759409778667, "grad_norm": 222.0, "learning_rate": 5.739442042462765e-05, "loss": 11.6253, "step": 11256 }, { "epoch": 0.46921762327539496, "grad_norm": 748.0, "learning_rate": 5.7387744563638444e-05, "loss": 20.5002, "step": 11257 }, { "epoch": 0.46925930557292317, "grad_norm": 354.0, "learning_rate": 5.738106856800694e-05, "loss": 15.688, "step": 11258 }, { "epoch": 0.46930098787045144, "grad_norm": 356.0, "learning_rate": 5.7374392437854806e-05, "loss": 14.3127, "step": 11259 }, { "epoch": 0.46934267016797965, "grad_norm": 344.0, "learning_rate": 5.736771617330372e-05, "loss": 14.8127, "step": 11260 }, { "epoch": 0.4693843524655079, "grad_norm": 1012.0, "learning_rate": 5.7361039774475355e-05, "loss": 21.3799, "step": 11261 }, { "epoch": 0.4694260347630361, "grad_norm": 414.0, "learning_rate": 5.735436324149139e-05, "loss": 16.2508, "step": 11262 }, { "epoch": 0.4694677170605644, "grad_norm": 104.5, "learning_rate": 5.7347686574473494e-05, "loss": 9.0626, "step": 11263 }, { "epoch": 0.4695093993580926, "grad_norm": 756.0, "learning_rate": 5.734100977354336e-05, "loss": 21.1282, "step": 11264 }, { "epoch": 0.46955108165562087, "grad_norm": 218.0, "learning_rate": 5.733433283882268e-05, "loss": 11.8755, "step": 11265 }, { "epoch": 0.4695927639531491, "grad_norm": 408.0, "learning_rate": 5.732765577043312e-05, "loss": 16.6254, "step": 11266 }, { "epoch": 0.46963444625067735, "grad_norm": 524.0, "learning_rate": 5.732097856849638e-05, "loss": 15.0637, "step": 11267 }, { "epoch": 0.46967612854820556, "grad_norm": 233.0, "learning_rate": 5.7314301233134174e-05, "loss": 10.813, "step": 11268 }, { "epoch": 0.46971781084573383, "grad_norm": 312.0, "learning_rate": 5.730762376446816e-05, "loss": 13.1257, "step": 11269 }, { "epoch": 0.46975949314326204, "grad_norm": 340.0, "learning_rate": 5.730094616262007e-05, "loss": 14.5627, "step": 11270 }, { "epoch": 0.4698011754407903, "grad_norm": 544.0, "learning_rate": 5.729426842771158e-05, "loss": 21.0003, "step": 11271 }, { "epoch": 0.4698428577383185, "grad_norm": 145.0, "learning_rate": 5.728759055986439e-05, "loss": 11.1878, "step": 11272 }, { "epoch": 0.4698845400358468, "grad_norm": 112.0, "learning_rate": 5.728091255920023e-05, "loss": 7.3126, "step": 11273 }, { "epoch": 0.469926222333375, "grad_norm": 764.0, "learning_rate": 5.727423442584079e-05, "loss": 19.7554, "step": 11274 }, { "epoch": 0.46996790463090327, "grad_norm": 764.0, "learning_rate": 5.726755615990778e-05, "loss": 23.6252, "step": 11275 }, { "epoch": 0.4700095869284315, "grad_norm": 139.0, "learning_rate": 5.7260877761522914e-05, "loss": 9.7502, "step": 11276 }, { "epoch": 0.47005126922595974, "grad_norm": 192.0, "learning_rate": 5.72541992308079e-05, "loss": 7.5317, "step": 11277 }, { "epoch": 0.47009295152348796, "grad_norm": 688.0, "learning_rate": 5.724752056788447e-05, "loss": 18.0012, "step": 11278 }, { "epoch": 0.4701346338210162, "grad_norm": 266.0, "learning_rate": 5.724084177287434e-05, "loss": 12.2507, "step": 11279 }, { "epoch": 0.47017631611854444, "grad_norm": 376.0, "learning_rate": 5.723416284589922e-05, "loss": 15.3127, "step": 11280 }, { "epoch": 0.4702179984160727, "grad_norm": 656.0, "learning_rate": 5.722748378708084e-05, "loss": 19.6257, "step": 11281 }, { "epoch": 0.4702596807136009, "grad_norm": 251.0, "learning_rate": 5.722080459654092e-05, "loss": 13.0003, "step": 11282 }, { "epoch": 0.4703013630111292, "grad_norm": 316.0, "learning_rate": 5.7214125274401195e-05, "loss": 14.3777, "step": 11283 }, { "epoch": 0.4703430453086574, "grad_norm": 209.0, "learning_rate": 5.720744582078339e-05, "loss": 11.1885, "step": 11284 }, { "epoch": 0.47038472760618566, "grad_norm": 160.0, "learning_rate": 5.720076623580925e-05, "loss": 9.9377, "step": 11285 }, { "epoch": 0.47042640990371387, "grad_norm": 360.0, "learning_rate": 5.719408651960052e-05, "loss": 14.7502, "step": 11286 }, { "epoch": 0.47046809220124214, "grad_norm": 796.0, "learning_rate": 5.718740667227892e-05, "loss": 21.2508, "step": 11287 }, { "epoch": 0.47050977449877035, "grad_norm": 302.0, "learning_rate": 5.718072669396619e-05, "loss": 13.1877, "step": 11288 }, { "epoch": 0.4705514567962986, "grad_norm": 203.0, "learning_rate": 5.717404658478408e-05, "loss": 11.6252, "step": 11289 }, { "epoch": 0.47059313909382683, "grad_norm": 512.0, "learning_rate": 5.716736634485433e-05, "loss": 17.6252, "step": 11290 }, { "epoch": 0.4706348213913551, "grad_norm": 117.5, "learning_rate": 5.71606859742987e-05, "loss": 9.0001, "step": 11291 }, { "epoch": 0.4706765036888833, "grad_norm": 1416.0, "learning_rate": 5.7154005473238936e-05, "loss": 31.3755, "step": 11292 }, { "epoch": 0.4707181859864116, "grad_norm": 408.0, "learning_rate": 5.714732484179678e-05, "loss": 16.7504, "step": 11293 }, { "epoch": 0.4707598682839398, "grad_norm": 177.0, "learning_rate": 5.7140644080094e-05, "loss": 8.3757, "step": 11294 }, { "epoch": 0.47080155058146805, "grad_norm": 784.0, "learning_rate": 5.713396318825234e-05, "loss": 21.6257, "step": 11295 }, { "epoch": 0.47084323287899627, "grad_norm": 264.0, "learning_rate": 5.712728216639357e-05, "loss": 12.8129, "step": 11296 }, { "epoch": 0.47088491517652453, "grad_norm": 476.0, "learning_rate": 5.712060101463943e-05, "loss": 18.1254, "step": 11297 }, { "epoch": 0.47092659747405274, "grad_norm": 71.5, "learning_rate": 5.711391973311173e-05, "loss": 8.6879, "step": 11298 }, { "epoch": 0.470968279771581, "grad_norm": 119.5, "learning_rate": 5.71072383219322e-05, "loss": 8.0627, "step": 11299 }, { "epoch": 0.4710099620691092, "grad_norm": 225.0, "learning_rate": 5.7100556781222634e-05, "loss": 12.0003, "step": 11300 }, { "epoch": 0.4710516443666375, "grad_norm": 71.5, "learning_rate": 5.709387511110478e-05, "loss": 8.9379, "step": 11301 }, { "epoch": 0.4710933266641657, "grad_norm": 478.0, "learning_rate": 5.7087193311700425e-05, "loss": 16.6251, "step": 11302 }, { "epoch": 0.47113500896169397, "grad_norm": 214.0, "learning_rate": 5.708051138313134e-05, "loss": 12.1264, "step": 11303 }, { "epoch": 0.47117669125922224, "grad_norm": 408.0, "learning_rate": 5.707382932551931e-05, "loss": 16.7502, "step": 11304 }, { "epoch": 0.47121837355675045, "grad_norm": 248.0, "learning_rate": 5.706714713898611e-05, "loss": 10.0629, "step": 11305 }, { "epoch": 0.4712600558542787, "grad_norm": 310.0, "learning_rate": 5.7060464823653525e-05, "loss": 13.3127, "step": 11306 }, { "epoch": 0.4713017381518069, "grad_norm": 502.0, "learning_rate": 5.705378237964335e-05, "loss": 18.1255, "step": 11307 }, { "epoch": 0.4713434204493352, "grad_norm": 454.0, "learning_rate": 5.704709980707736e-05, "loss": 16.8753, "step": 11308 }, { "epoch": 0.4713851027468634, "grad_norm": 336.0, "learning_rate": 5.7040417106077334e-05, "loss": 12.7502, "step": 11309 }, { "epoch": 0.47142678504439167, "grad_norm": 300.0, "learning_rate": 5.7033734276765104e-05, "loss": 12.4402, "step": 11310 }, { "epoch": 0.4714684673419199, "grad_norm": 196.0, "learning_rate": 5.702705131926244e-05, "loss": 12.3754, "step": 11311 }, { "epoch": 0.47151014963944815, "grad_norm": 85.5, "learning_rate": 5.702036823369114e-05, "loss": 6.5002, "step": 11312 }, { "epoch": 0.47155183193697636, "grad_norm": 268.0, "learning_rate": 5.701368502017299e-05, "loss": 12.3128, "step": 11313 }, { "epoch": 0.47159351423450463, "grad_norm": 628.0, "learning_rate": 5.700700167882983e-05, "loss": 18.6281, "step": 11314 }, { "epoch": 0.47163519653203284, "grad_norm": 195.0, "learning_rate": 5.700031820978343e-05, "loss": 9.6257, "step": 11315 }, { "epoch": 0.4716768788295611, "grad_norm": 328.0, "learning_rate": 5.699363461315561e-05, "loss": 15.0626, "step": 11316 }, { "epoch": 0.4717185611270893, "grad_norm": 320.0, "learning_rate": 5.698695088906818e-05, "loss": 12.5009, "step": 11317 }, { "epoch": 0.4717602434246176, "grad_norm": 304.0, "learning_rate": 5.6980267037642954e-05, "loss": 13.0628, "step": 11318 }, { "epoch": 0.4718019257221458, "grad_norm": 197.0, "learning_rate": 5.697358305900173e-05, "loss": 11.0007, "step": 11319 }, { "epoch": 0.47184360801967407, "grad_norm": 188.0, "learning_rate": 5.6966898953266355e-05, "loss": 10.7503, "step": 11320 }, { "epoch": 0.4718852903172023, "grad_norm": 199.0, "learning_rate": 5.696021472055861e-05, "loss": 12.3757, "step": 11321 }, { "epoch": 0.47192697261473054, "grad_norm": 366.0, "learning_rate": 5.695353036100034e-05, "loss": 14.5005, "step": 11322 }, { "epoch": 0.47196865491225876, "grad_norm": 548.0, "learning_rate": 5.694684587471336e-05, "loss": 17.5002, "step": 11323 }, { "epoch": 0.472010337209787, "grad_norm": 227.0, "learning_rate": 5.6940161261819504e-05, "loss": 11.5003, "step": 11324 }, { "epoch": 0.47205201950731523, "grad_norm": 240.0, "learning_rate": 5.6933476522440585e-05, "loss": 12.7502, "step": 11325 }, { "epoch": 0.4720937018048435, "grad_norm": 580.0, "learning_rate": 5.6926791656698444e-05, "loss": 18.2504, "step": 11326 }, { "epoch": 0.4721353841023717, "grad_norm": 322.0, "learning_rate": 5.692010666471491e-05, "loss": 14.6251, "step": 11327 }, { "epoch": 0.4721770663999, "grad_norm": 88.0, "learning_rate": 5.691342154661182e-05, "loss": 8.2504, "step": 11328 }, { "epoch": 0.4722187486974282, "grad_norm": 85.5, "learning_rate": 5.690673630251101e-05, "loss": 8.5002, "step": 11329 }, { "epoch": 0.47226043099495646, "grad_norm": 148.0, "learning_rate": 5.690005093253431e-05, "loss": 10.1877, "step": 11330 }, { "epoch": 0.47230211329248467, "grad_norm": 224.0, "learning_rate": 5.689336543680358e-05, "loss": 10.8131, "step": 11331 }, { "epoch": 0.47234379559001294, "grad_norm": 576.0, "learning_rate": 5.6886679815440646e-05, "loss": 18.0002, "step": 11332 }, { "epoch": 0.47238547788754115, "grad_norm": 245.0, "learning_rate": 5.6879994068567366e-05, "loss": 13.3752, "step": 11333 }, { "epoch": 0.4724271601850694, "grad_norm": 268.0, "learning_rate": 5.6873308196305576e-05, "loss": 11.0002, "step": 11334 }, { "epoch": 0.47246884248259763, "grad_norm": 116.0, "learning_rate": 5.6866622198777134e-05, "loss": 9.1256, "step": 11335 }, { "epoch": 0.4725105247801259, "grad_norm": 420.0, "learning_rate": 5.685993607610389e-05, "loss": 15.0001, "step": 11336 }, { "epoch": 0.4725522070776541, "grad_norm": 246.0, "learning_rate": 5.685324982840771e-05, "loss": 11.8128, "step": 11337 }, { "epoch": 0.4725938893751824, "grad_norm": 328.0, "learning_rate": 5.684656345581044e-05, "loss": 13.5006, "step": 11338 }, { "epoch": 0.4726355716727106, "grad_norm": 46.75, "learning_rate": 5.683987695843395e-05, "loss": 7.6878, "step": 11339 }, { "epoch": 0.47267725397023885, "grad_norm": 532.0, "learning_rate": 5.6833190336400086e-05, "loss": 16.8767, "step": 11340 }, { "epoch": 0.47271893626776706, "grad_norm": 408.0, "learning_rate": 5.6826503589830726e-05, "loss": 15.5641, "step": 11341 }, { "epoch": 0.47276061856529533, "grad_norm": 143.0, "learning_rate": 5.681981671884773e-05, "loss": 9.3756, "step": 11342 }, { "epoch": 0.47280230086282354, "grad_norm": 548.0, "learning_rate": 5.6813129723572975e-05, "loss": 17.8753, "step": 11343 }, { "epoch": 0.4728439831603518, "grad_norm": 812.0, "learning_rate": 5.680644260412832e-05, "loss": 22.8753, "step": 11344 }, { "epoch": 0.47288566545788, "grad_norm": 378.0, "learning_rate": 5.679975536063564e-05, "loss": 12.5641, "step": 11345 }, { "epoch": 0.4729273477554083, "grad_norm": 62.25, "learning_rate": 5.6793067993216834e-05, "loss": 8.3131, "step": 11346 }, { "epoch": 0.4729690300529365, "grad_norm": 628.0, "learning_rate": 5.678638050199373e-05, "loss": 19.3755, "step": 11347 }, { "epoch": 0.47301071235046477, "grad_norm": 680.0, "learning_rate": 5.6779692887088254e-05, "loss": 19.3782, "step": 11348 }, { "epoch": 0.473052394647993, "grad_norm": 314.0, "learning_rate": 5.6773005148622285e-05, "loss": 13.8753, "step": 11349 }, { "epoch": 0.47309407694552125, "grad_norm": 450.0, "learning_rate": 5.6766317286717683e-05, "loss": 16.8758, "step": 11350 }, { "epoch": 0.47313575924304946, "grad_norm": 406.0, "learning_rate": 5.6759629301496353e-05, "loss": 14.8757, "step": 11351 }, { "epoch": 0.4731774415405777, "grad_norm": 230.0, "learning_rate": 5.675294119308018e-05, "loss": 11.6879, "step": 11352 }, { "epoch": 0.47321912383810594, "grad_norm": 1056.0, "learning_rate": 5.674625296159105e-05, "loss": 29.8751, "step": 11353 }, { "epoch": 0.4732608061356342, "grad_norm": 139.0, "learning_rate": 5.673956460715086e-05, "loss": 9.6254, "step": 11354 }, { "epoch": 0.4733024884331624, "grad_norm": 235.0, "learning_rate": 5.6732876129881506e-05, "loss": 11.3753, "step": 11355 }, { "epoch": 0.4733441707306907, "grad_norm": 298.0, "learning_rate": 5.672618752990489e-05, "loss": 12.9379, "step": 11356 }, { "epoch": 0.4733858530282189, "grad_norm": 203.0, "learning_rate": 5.671949880734292e-05, "loss": 9.9377, "step": 11357 }, { "epoch": 0.47342753532574716, "grad_norm": 338.0, "learning_rate": 5.6712809962317474e-05, "loss": 13.9377, "step": 11358 }, { "epoch": 0.4734692176232754, "grad_norm": 262.0, "learning_rate": 5.670612099495047e-05, "loss": 11.1881, "step": 11359 }, { "epoch": 0.47351089992080364, "grad_norm": 434.0, "learning_rate": 5.669943190536381e-05, "loss": 13.6252, "step": 11360 }, { "epoch": 0.47355258221833185, "grad_norm": 149.0, "learning_rate": 5.669274269367942e-05, "loss": 10.504, "step": 11361 }, { "epoch": 0.4735942645158601, "grad_norm": 69.0, "learning_rate": 5.6686053360019195e-05, "loss": 7.6881, "step": 11362 }, { "epoch": 0.47363594681338833, "grad_norm": 246.0, "learning_rate": 5.667936390450506e-05, "loss": 11.8128, "step": 11363 }, { "epoch": 0.4736776291109166, "grad_norm": 584.0, "learning_rate": 5.6672674327258924e-05, "loss": 18.0012, "step": 11364 }, { "epoch": 0.4737193114084448, "grad_norm": 139.0, "learning_rate": 5.6665984628402704e-05, "loss": 9.3129, "step": 11365 }, { "epoch": 0.4737609937059731, "grad_norm": 394.0, "learning_rate": 5.665929480805833e-05, "loss": 13.8127, "step": 11366 }, { "epoch": 0.4738026760035013, "grad_norm": 183.0, "learning_rate": 5.66526048663477e-05, "loss": 11.2506, "step": 11367 }, { "epoch": 0.47384435830102956, "grad_norm": 189.0, "learning_rate": 5.664591480339278e-05, "loss": 11.7503, "step": 11368 }, { "epoch": 0.47388604059855777, "grad_norm": 396.0, "learning_rate": 5.663922461931545e-05, "loss": 15.0009, "step": 11369 }, { "epoch": 0.47392772289608603, "grad_norm": 422.0, "learning_rate": 5.663253431423767e-05, "loss": 15.6885, "step": 11370 }, { "epoch": 0.47396940519361425, "grad_norm": 151.0, "learning_rate": 5.662584388828136e-05, "loss": 9.8127, "step": 11371 }, { "epoch": 0.4740110874911425, "grad_norm": 632.0, "learning_rate": 5.6619153341568455e-05, "loss": 20.0001, "step": 11372 }, { "epoch": 0.4740527697886707, "grad_norm": 138.0, "learning_rate": 5.661246267422089e-05, "loss": 5.595, "step": 11373 }, { "epoch": 0.474094452086199, "grad_norm": 235.0, "learning_rate": 5.6605771886360626e-05, "loss": 10.4378, "step": 11374 }, { "epoch": 0.4741361343837272, "grad_norm": 368.0, "learning_rate": 5.6599080978109565e-05, "loss": 14.1251, "step": 11375 }, { "epoch": 0.47417781668125547, "grad_norm": 242.0, "learning_rate": 5.659238994958968e-05, "loss": 11.9379, "step": 11376 }, { "epoch": 0.47421949897878374, "grad_norm": 41.75, "learning_rate": 5.658569880092289e-05, "loss": 6.4691, "step": 11377 }, { "epoch": 0.47426118127631195, "grad_norm": 219.0, "learning_rate": 5.657900753223117e-05, "loss": 11.1252, "step": 11378 }, { "epoch": 0.4743028635738402, "grad_norm": 94.5, "learning_rate": 5.6572316143636436e-05, "loss": 9.438, "step": 11379 }, { "epoch": 0.47434454587136843, "grad_norm": 322.0, "learning_rate": 5.656562463526066e-05, "loss": 15.3756, "step": 11380 }, { "epoch": 0.4743862281688967, "grad_norm": 274.0, "learning_rate": 5.65589330072258e-05, "loss": 12.9391, "step": 11381 }, { "epoch": 0.4744279104664249, "grad_norm": 187.0, "learning_rate": 5.6552241259653806e-05, "loss": 11.0631, "step": 11382 }, { "epoch": 0.4744695927639532, "grad_norm": 54.75, "learning_rate": 5.654554939266663e-05, "loss": 7.9073, "step": 11383 }, { "epoch": 0.4745112750614814, "grad_norm": 130.0, "learning_rate": 5.6538857406386226e-05, "loss": 9.6881, "step": 11384 }, { "epoch": 0.47455295735900965, "grad_norm": 207.0, "learning_rate": 5.653216530093457e-05, "loss": 9.1253, "step": 11385 }, { "epoch": 0.47459463965653786, "grad_norm": 430.0, "learning_rate": 5.652547307643362e-05, "loss": 15.7503, "step": 11386 }, { "epoch": 0.47463632195406613, "grad_norm": 141.0, "learning_rate": 5.651878073300535e-05, "loss": 5.6256, "step": 11387 }, { "epoch": 0.47467800425159434, "grad_norm": 504.0, "learning_rate": 5.6512088270771725e-05, "loss": 17.7502, "step": 11388 }, { "epoch": 0.4747196865491226, "grad_norm": 382.0, "learning_rate": 5.650539568985471e-05, "loss": 16.6252, "step": 11389 }, { "epoch": 0.4747613688466508, "grad_norm": 704.0, "learning_rate": 5.649870299037627e-05, "loss": 20.5024, "step": 11390 }, { "epoch": 0.4748030511441791, "grad_norm": 276.0, "learning_rate": 5.649201017245841e-05, "loss": 13.5635, "step": 11391 }, { "epoch": 0.4748447334417073, "grad_norm": 217.0, "learning_rate": 5.648531723622308e-05, "loss": 11.6877, "step": 11392 }, { "epoch": 0.47488641573923557, "grad_norm": 116.0, "learning_rate": 5.647862418179226e-05, "loss": 6.9382, "step": 11393 }, { "epoch": 0.4749280980367638, "grad_norm": 362.0, "learning_rate": 5.647193100928796e-05, "loss": 14.5629, "step": 11394 }, { "epoch": 0.47496978033429205, "grad_norm": 484.0, "learning_rate": 5.646523771883212e-05, "loss": 16.7503, "step": 11395 }, { "epoch": 0.47501146263182026, "grad_norm": 378.0, "learning_rate": 5.6458544310546756e-05, "loss": 6.5003, "step": 11396 }, { "epoch": 0.4750531449293485, "grad_norm": 240.0, "learning_rate": 5.645185078455386e-05, "loss": 13.5003, "step": 11397 }, { "epoch": 0.47509482722687674, "grad_norm": 588.0, "learning_rate": 5.644515714097539e-05, "loss": 19.3759, "step": 11398 }, { "epoch": 0.475136509524405, "grad_norm": 334.0, "learning_rate": 5.643846337993337e-05, "loss": 14.3753, "step": 11399 }, { "epoch": 0.4751781918219332, "grad_norm": 268.0, "learning_rate": 5.643176950154978e-05, "loss": 11.5627, "step": 11400 }, { "epoch": 0.4752198741194615, "grad_norm": 208.0, "learning_rate": 5.6425075505946624e-05, "loss": 11.7501, "step": 11401 }, { "epoch": 0.4752615564169897, "grad_norm": 350.0, "learning_rate": 5.64183813932459e-05, "loss": 13.2503, "step": 11402 }, { "epoch": 0.47530323871451796, "grad_norm": 572.0, "learning_rate": 5.6411687163569596e-05, "loss": 17.0003, "step": 11403 }, { "epoch": 0.4753449210120462, "grad_norm": 404.0, "learning_rate": 5.640499281703974e-05, "loss": 15.7502, "step": 11404 }, { "epoch": 0.47538660330957444, "grad_norm": 1040.0, "learning_rate": 5.63982983537783e-05, "loss": 26.7502, "step": 11405 }, { "epoch": 0.47542828560710265, "grad_norm": 764.0, "learning_rate": 5.639160377390732e-05, "loss": 22.0002, "step": 11406 }, { "epoch": 0.4754699679046309, "grad_norm": 278.0, "learning_rate": 5.638490907754879e-05, "loss": 12.5626, "step": 11407 }, { "epoch": 0.47551165020215913, "grad_norm": 604.0, "learning_rate": 5.637821426482472e-05, "loss": 19.6253, "step": 11408 }, { "epoch": 0.4755533324996874, "grad_norm": 852.0, "learning_rate": 5.6371519335857135e-05, "loss": 24.8753, "step": 11409 }, { "epoch": 0.4755950147972156, "grad_norm": 956.0, "learning_rate": 5.636482429076804e-05, "loss": 23.3839, "step": 11410 }, { "epoch": 0.4756366970947439, "grad_norm": 636.0, "learning_rate": 5.635812912967946e-05, "loss": 20.2502, "step": 11411 }, { "epoch": 0.4756783793922721, "grad_norm": 188.0, "learning_rate": 5.635143385271341e-05, "loss": 10.938, "step": 11412 }, { "epoch": 0.47572006168980036, "grad_norm": 330.0, "learning_rate": 5.634473845999191e-05, "loss": 13.8133, "step": 11413 }, { "epoch": 0.47576174398732857, "grad_norm": 308.0, "learning_rate": 5.633804295163699e-05, "loss": 13.3135, "step": 11414 }, { "epoch": 0.47580342628485683, "grad_norm": 218.0, "learning_rate": 5.633134732777069e-05, "loss": 11.3752, "step": 11415 }, { "epoch": 0.47584510858238505, "grad_norm": 151.0, "learning_rate": 5.632465158851501e-05, "loss": 9.1253, "step": 11416 }, { "epoch": 0.4758867908799133, "grad_norm": 664.0, "learning_rate": 5.6317955733992e-05, "loss": 19.7502, "step": 11417 }, { "epoch": 0.4759284731774415, "grad_norm": 820.0, "learning_rate": 5.6311259764323675e-05, "loss": 22.0002, "step": 11418 }, { "epoch": 0.4759701554749698, "grad_norm": 251.0, "learning_rate": 5.630456367963209e-05, "loss": 9.0006, "step": 11419 }, { "epoch": 0.476011837772498, "grad_norm": 292.0, "learning_rate": 5.6297867480039265e-05, "loss": 12.6259, "step": 11420 }, { "epoch": 0.47605352007002627, "grad_norm": 272.0, "learning_rate": 5.629117116566726e-05, "loss": 10.7511, "step": 11421 }, { "epoch": 0.4760952023675545, "grad_norm": 1216.0, "learning_rate": 5.6284474736638095e-05, "loss": 29.0004, "step": 11422 }, { "epoch": 0.47613688466508275, "grad_norm": 804.0, "learning_rate": 5.6277778193073806e-05, "loss": 25.8751, "step": 11423 }, { "epoch": 0.47617856696261096, "grad_norm": 206.0, "learning_rate": 5.627108153509646e-05, "loss": 11.8127, "step": 11424 }, { "epoch": 0.47622024926013923, "grad_norm": 340.0, "learning_rate": 5.626438476282809e-05, "loss": 14.0627, "step": 11425 }, { "epoch": 0.47626193155766744, "grad_norm": 172.0, "learning_rate": 5.625768787639076e-05, "loss": 10.8753, "step": 11426 }, { "epoch": 0.4763036138551957, "grad_norm": 804.0, "learning_rate": 5.625099087590653e-05, "loss": 22.8752, "step": 11427 }, { "epoch": 0.4763452961527239, "grad_norm": 540.0, "learning_rate": 5.624429376149741e-05, "loss": 16.1302, "step": 11428 }, { "epoch": 0.4763869784502522, "grad_norm": 852.0, "learning_rate": 5.62375965332855e-05, "loss": 23.2502, "step": 11429 }, { "epoch": 0.4764286607477804, "grad_norm": 716.0, "learning_rate": 5.623089919139283e-05, "loss": 23.0007, "step": 11430 }, { "epoch": 0.47647034304530866, "grad_norm": 255.0, "learning_rate": 5.622420173594147e-05, "loss": 12.5627, "step": 11431 }, { "epoch": 0.4765120253428369, "grad_norm": 616.0, "learning_rate": 5.6217504167053484e-05, "loss": 19.6273, "step": 11432 }, { "epoch": 0.47655370764036514, "grad_norm": 210.0, "learning_rate": 5.621080648485093e-05, "loss": 4.6256, "step": 11433 }, { "epoch": 0.47659538993789335, "grad_norm": 109.0, "learning_rate": 5.620410868945588e-05, "loss": 8.4379, "step": 11434 }, { "epoch": 0.4766370722354216, "grad_norm": 123.0, "learning_rate": 5.619741078099038e-05, "loss": 8.8127, "step": 11435 }, { "epoch": 0.47667875453294983, "grad_norm": 800.0, "learning_rate": 5.6190712759576535e-05, "loss": 18.1293, "step": 11436 }, { "epoch": 0.4767204368304781, "grad_norm": 114.0, "learning_rate": 5.61840146253364e-05, "loss": 7.9689, "step": 11437 }, { "epoch": 0.4767621191280063, "grad_norm": 498.0, "learning_rate": 5.617731637839205e-05, "loss": 16.8753, "step": 11438 }, { "epoch": 0.4768038014255346, "grad_norm": 91.0, "learning_rate": 5.617061801886556e-05, "loss": 8.2504, "step": 11439 }, { "epoch": 0.4768454837230628, "grad_norm": 474.0, "learning_rate": 5.616391954687901e-05, "loss": 16.8751, "step": 11440 }, { "epoch": 0.47688716602059106, "grad_norm": 450.0, "learning_rate": 5.615722096255448e-05, "loss": 16.3756, "step": 11441 }, { "epoch": 0.47692884831811927, "grad_norm": 372.0, "learning_rate": 5.6150522266014035e-05, "loss": 14.0626, "step": 11442 }, { "epoch": 0.47697053061564754, "grad_norm": 82.5, "learning_rate": 5.614382345737979e-05, "loss": 8.9377, "step": 11443 }, { "epoch": 0.47701221291317575, "grad_norm": 97.0, "learning_rate": 5.613712453677382e-05, "loss": 7.5941, "step": 11444 }, { "epoch": 0.477053895210704, "grad_norm": 402.0, "learning_rate": 5.613042550431821e-05, "loss": 15.1255, "step": 11445 }, { "epoch": 0.4770955775082322, "grad_norm": 228.0, "learning_rate": 5.6123726360135055e-05, "loss": 12.2501, "step": 11446 }, { "epoch": 0.4771372598057605, "grad_norm": 306.0, "learning_rate": 5.611702710434643e-05, "loss": 11.9381, "step": 11447 }, { "epoch": 0.4771789421032887, "grad_norm": 328.0, "learning_rate": 5.611032773707444e-05, "loss": 13.5627, "step": 11448 }, { "epoch": 0.477220624400817, "grad_norm": 306.0, "learning_rate": 5.6103628258441197e-05, "loss": 13.2503, "step": 11449 }, { "epoch": 0.47726230669834524, "grad_norm": 502.0, "learning_rate": 5.609692866856878e-05, "loss": 14.7514, "step": 11450 }, { "epoch": 0.47730398899587345, "grad_norm": 506.0, "learning_rate": 5.6090228967579305e-05, "loss": 16.3767, "step": 11451 }, { "epoch": 0.4773456712934017, "grad_norm": 292.0, "learning_rate": 5.608352915559486e-05, "loss": 14.1878, "step": 11452 }, { "epoch": 0.47738735359092993, "grad_norm": 370.0, "learning_rate": 5.607682923273756e-05, "loss": 14.4377, "step": 11453 }, { "epoch": 0.4774290358884582, "grad_norm": 222.0, "learning_rate": 5.607012919912951e-05, "loss": 13.2507, "step": 11454 }, { "epoch": 0.4774707181859864, "grad_norm": 392.0, "learning_rate": 5.606342905489281e-05, "loss": 14.6252, "step": 11455 }, { "epoch": 0.4775124004835147, "grad_norm": 144.0, "learning_rate": 5.6056728800149584e-05, "loss": 9.2503, "step": 11456 }, { "epoch": 0.4775540827810429, "grad_norm": 133.0, "learning_rate": 5.605002843502193e-05, "loss": 8.2508, "step": 11457 }, { "epoch": 0.47759576507857116, "grad_norm": 540.0, "learning_rate": 5.604332795963198e-05, "loss": 17.0005, "step": 11458 }, { "epoch": 0.47763744737609937, "grad_norm": 720.0, "learning_rate": 5.6036627374101824e-05, "loss": 20.7503, "step": 11459 }, { "epoch": 0.47767912967362763, "grad_norm": 173.0, "learning_rate": 5.60299266785536e-05, "loss": 11.5635, "step": 11460 }, { "epoch": 0.47772081197115585, "grad_norm": 148.0, "learning_rate": 5.6023225873109444e-05, "loss": 10.1879, "step": 11461 }, { "epoch": 0.4777624942686841, "grad_norm": 203.0, "learning_rate": 5.601652495789145e-05, "loss": 10.9378, "step": 11462 }, { "epoch": 0.4778041765662123, "grad_norm": 876.0, "learning_rate": 5.6009823933021763e-05, "loss": 26.5003, "step": 11463 }, { "epoch": 0.4778458588637406, "grad_norm": 426.0, "learning_rate": 5.60031227986225e-05, "loss": 14.9378, "step": 11464 }, { "epoch": 0.4778875411612688, "grad_norm": 414.0, "learning_rate": 5.599642155481578e-05, "loss": 14.6878, "step": 11465 }, { "epoch": 0.47792922345879707, "grad_norm": 104.5, "learning_rate": 5.598972020172376e-05, "loss": 9.5641, "step": 11466 }, { "epoch": 0.4779709057563253, "grad_norm": 1184.0, "learning_rate": 5.598301873946855e-05, "loss": 24.6303, "step": 11467 }, { "epoch": 0.47801258805385355, "grad_norm": 154.0, "learning_rate": 5.59763171681723e-05, "loss": 10.0632, "step": 11468 }, { "epoch": 0.47805427035138176, "grad_norm": 676.0, "learning_rate": 5.596961548795713e-05, "loss": 19.8762, "step": 11469 }, { "epoch": 0.47809595264891, "grad_norm": 412.0, "learning_rate": 5.596291369894518e-05, "loss": 15.1253, "step": 11470 }, { "epoch": 0.47813763494643824, "grad_norm": 420.0, "learning_rate": 5.595621180125862e-05, "loss": 17.5008, "step": 11471 }, { "epoch": 0.4781793172439665, "grad_norm": 422.0, "learning_rate": 5.594950979501956e-05, "loss": 16.2502, "step": 11472 }, { "epoch": 0.4782209995414947, "grad_norm": 364.0, "learning_rate": 5.594280768035014e-05, "loss": 13.0009, "step": 11473 }, { "epoch": 0.478262681839023, "grad_norm": 468.0, "learning_rate": 5.5936105457372545e-05, "loss": 14.938, "step": 11474 }, { "epoch": 0.4783043641365512, "grad_norm": 600.0, "learning_rate": 5.5929403126208893e-05, "loss": 18.7502, "step": 11475 }, { "epoch": 0.47834604643407946, "grad_norm": 348.0, "learning_rate": 5.592270068698134e-05, "loss": 15.3766, "step": 11476 }, { "epoch": 0.4783877287316077, "grad_norm": 624.0, "learning_rate": 5.591599813981205e-05, "loss": 16.6297, "step": 11477 }, { "epoch": 0.47842941102913594, "grad_norm": 466.0, "learning_rate": 5.590929548482316e-05, "loss": 16.5008, "step": 11478 }, { "epoch": 0.47847109332666415, "grad_norm": 282.0, "learning_rate": 5.5902592722136835e-05, "loss": 13.7504, "step": 11479 }, { "epoch": 0.4785127756241924, "grad_norm": 456.0, "learning_rate": 5.589588985187525e-05, "loss": 16.6254, "step": 11480 }, { "epoch": 0.47855445792172063, "grad_norm": 1232.0, "learning_rate": 5.5889186874160535e-05, "loss": 31.6259, "step": 11481 }, { "epoch": 0.4785961402192489, "grad_norm": 314.0, "learning_rate": 5.588248378911487e-05, "loss": 14.063, "step": 11482 }, { "epoch": 0.4786378225167771, "grad_norm": 260.0, "learning_rate": 5.587578059686041e-05, "loss": 12.3759, "step": 11483 }, { "epoch": 0.4786795048143054, "grad_norm": 520.0, "learning_rate": 5.5869077297519334e-05, "loss": 15.6889, "step": 11484 }, { "epoch": 0.4787211871118336, "grad_norm": 207.0, "learning_rate": 5.58623738912138e-05, "loss": 11.3128, "step": 11485 }, { "epoch": 0.47876286940936186, "grad_norm": 292.0, "learning_rate": 5.585567037806597e-05, "loss": 13.6257, "step": 11486 }, { "epoch": 0.47880455170689007, "grad_norm": 494.0, "learning_rate": 5.584896675819804e-05, "loss": 15.9382, "step": 11487 }, { "epoch": 0.47884623400441834, "grad_norm": 189.0, "learning_rate": 5.584226303173217e-05, "loss": 11.7502, "step": 11488 }, { "epoch": 0.47888791630194655, "grad_norm": 250.0, "learning_rate": 5.583555919879054e-05, "loss": 12.8128, "step": 11489 }, { "epoch": 0.4789295985994748, "grad_norm": 280.0, "learning_rate": 5.582885525949533e-05, "loss": 11.6877, "step": 11490 }, { "epoch": 0.478971280897003, "grad_norm": 320.0, "learning_rate": 5.5822151213968696e-05, "loss": 13.4379, "step": 11491 }, { "epoch": 0.4790129631945313, "grad_norm": 402.0, "learning_rate": 5.581544706233286e-05, "loss": 14.5002, "step": 11492 }, { "epoch": 0.4790546454920595, "grad_norm": 189.0, "learning_rate": 5.580874280470998e-05, "loss": 11.2502, "step": 11493 }, { "epoch": 0.4790963277895878, "grad_norm": 580.0, "learning_rate": 5.580203844122225e-05, "loss": 18.5006, "step": 11494 }, { "epoch": 0.479138010087116, "grad_norm": 139.0, "learning_rate": 5.579533397199185e-05, "loss": 10.938, "step": 11495 }, { "epoch": 0.47917969238464425, "grad_norm": 280.0, "learning_rate": 5.578862939714097e-05, "loss": 11.3128, "step": 11496 }, { "epoch": 0.47922137468217246, "grad_norm": 232.0, "learning_rate": 5.57819247167918e-05, "loss": 10.4379, "step": 11497 }, { "epoch": 0.47926305697970073, "grad_norm": 194.0, "learning_rate": 5.5775219931066537e-05, "loss": 10.0007, "step": 11498 }, { "epoch": 0.47930473927722894, "grad_norm": 226.0, "learning_rate": 5.576851504008739e-05, "loss": 10.4377, "step": 11499 }, { "epoch": 0.4793464215747572, "grad_norm": 308.0, "learning_rate": 5.576181004397655e-05, "loss": 12.6877, "step": 11500 }, { "epoch": 0.4793881038722854, "grad_norm": 372.0, "learning_rate": 5.5755104942856194e-05, "loss": 14.8128, "step": 11501 }, { "epoch": 0.4794297861698137, "grad_norm": 364.0, "learning_rate": 5.574839973684856e-05, "loss": 13.9377, "step": 11502 }, { "epoch": 0.4794714684673419, "grad_norm": 154.0, "learning_rate": 5.5741694426075806e-05, "loss": 8.5018, "step": 11503 }, { "epoch": 0.47951315076487017, "grad_norm": 442.0, "learning_rate": 5.573498901066018e-05, "loss": 13.313, "step": 11504 }, { "epoch": 0.4795548330623984, "grad_norm": 300.0, "learning_rate": 5.572828349072386e-05, "loss": 12.4378, "step": 11505 }, { "epoch": 0.47959651535992665, "grad_norm": 268.0, "learning_rate": 5.5721577866389075e-05, "loss": 13.2503, "step": 11506 }, { "epoch": 0.47963819765745486, "grad_norm": 154.0, "learning_rate": 5.571487213777802e-05, "loss": 10.9379, "step": 11507 }, { "epoch": 0.4796798799549831, "grad_norm": 346.0, "learning_rate": 5.570816630501291e-05, "loss": 14.3753, "step": 11508 }, { "epoch": 0.47972156225251134, "grad_norm": 112.5, "learning_rate": 5.570146036821596e-05, "loss": 8.5004, "step": 11509 }, { "epoch": 0.4797632445500396, "grad_norm": 424.0, "learning_rate": 5.5694754327509404e-05, "loss": 16.7503, "step": 11510 }, { "epoch": 0.4798049268475678, "grad_norm": 580.0, "learning_rate": 5.568804818301542e-05, "loss": 17.6263, "step": 11511 }, { "epoch": 0.4798466091450961, "grad_norm": 282.0, "learning_rate": 5.568134193485627e-05, "loss": 13.8768, "step": 11512 }, { "epoch": 0.4798882914426243, "grad_norm": 438.0, "learning_rate": 5.567463558315416e-05, "loss": 14.2505, "step": 11513 }, { "epoch": 0.47992997374015256, "grad_norm": 760.0, "learning_rate": 5.56679291280313e-05, "loss": 21.6291, "step": 11514 }, { "epoch": 0.47997165603768077, "grad_norm": 177.0, "learning_rate": 5.566122256960994e-05, "loss": 10.6878, "step": 11515 }, { "epoch": 0.48001333833520904, "grad_norm": 704.0, "learning_rate": 5.5654515908012294e-05, "loss": 18.1304, "step": 11516 }, { "epoch": 0.48005502063273725, "grad_norm": 512.0, "learning_rate": 5.5647809143360595e-05, "loss": 18.7503, "step": 11517 }, { "epoch": 0.4800967029302655, "grad_norm": 366.0, "learning_rate": 5.5641102275777065e-05, "loss": 15.4376, "step": 11518 }, { "epoch": 0.48013838522779373, "grad_norm": 230.0, "learning_rate": 5.5634395305383957e-05, "loss": 12.0009, "step": 11519 }, { "epoch": 0.480180067525322, "grad_norm": 251.0, "learning_rate": 5.5627688232303485e-05, "loss": 12.3753, "step": 11520 }, { "epoch": 0.4802217498228502, "grad_norm": 386.0, "learning_rate": 5.562098105665791e-05, "loss": 12.627, "step": 11521 }, { "epoch": 0.4802634321203785, "grad_norm": 221.0, "learning_rate": 5.561427377856945e-05, "loss": 12.1878, "step": 11522 }, { "epoch": 0.48030511441790674, "grad_norm": 1072.0, "learning_rate": 5.5607566398160325e-05, "loss": 24.5052, "step": 11523 }, { "epoch": 0.48034679671543495, "grad_norm": 103.0, "learning_rate": 5.5600858915552834e-05, "loss": 6.1255, "step": 11524 }, { "epoch": 0.4803884790129632, "grad_norm": 552.0, "learning_rate": 5.5594151330869185e-05, "loss": 18.1252, "step": 11525 }, { "epoch": 0.48043016131049143, "grad_norm": 484.0, "learning_rate": 5.558744364423163e-05, "loss": 16.2514, "step": 11526 }, { "epoch": 0.4804718436080197, "grad_norm": 370.0, "learning_rate": 5.5580735855762425e-05, "loss": 13.8753, "step": 11527 }, { "epoch": 0.4805135259055479, "grad_norm": 576.0, "learning_rate": 5.557402796558381e-05, "loss": 16.0012, "step": 11528 }, { "epoch": 0.4805552082030762, "grad_norm": 268.0, "learning_rate": 5.5567319973818036e-05, "loss": 13.2504, "step": 11529 }, { "epoch": 0.4805968905006044, "grad_norm": 552.0, "learning_rate": 5.5560611880587366e-05, "loss": 17.1253, "step": 11530 }, { "epoch": 0.48063857279813266, "grad_norm": 1032.0, "learning_rate": 5.555390368601404e-05, "loss": 27.0008, "step": 11531 }, { "epoch": 0.48068025509566087, "grad_norm": 744.0, "learning_rate": 5.554719539022034e-05, "loss": 21.3752, "step": 11532 }, { "epoch": 0.48072193739318914, "grad_norm": 213.0, "learning_rate": 5.554048699332851e-05, "loss": 6.9075, "step": 11533 }, { "epoch": 0.48076361969071735, "grad_norm": 86.5, "learning_rate": 5.553377849546081e-05, "loss": 5.5942, "step": 11534 }, { "epoch": 0.4808053019882456, "grad_norm": 612.0, "learning_rate": 5.5527069896739505e-05, "loss": 17.1281, "step": 11535 }, { "epoch": 0.4808469842857738, "grad_norm": 129.0, "learning_rate": 5.552036119728685e-05, "loss": 10.563, "step": 11536 }, { "epoch": 0.4808886665833021, "grad_norm": 564.0, "learning_rate": 5.551365239722513e-05, "loss": 18.3799, "step": 11537 }, { "epoch": 0.4809303488808303, "grad_norm": 564.0, "learning_rate": 5.550694349667661e-05, "loss": 15.2502, "step": 11538 }, { "epoch": 0.48097203117835857, "grad_norm": 482.0, "learning_rate": 5.550023449576356e-05, "loss": 16.5028, "step": 11539 }, { "epoch": 0.4810137134758868, "grad_norm": 368.0, "learning_rate": 5.549352539460824e-05, "loss": 15.626, "step": 11540 }, { "epoch": 0.48105539577341505, "grad_norm": 67.0, "learning_rate": 5.5486816193332935e-05, "loss": 6.7195, "step": 11541 }, { "epoch": 0.48109707807094326, "grad_norm": 576.0, "learning_rate": 5.5480106892059925e-05, "loss": 19.3773, "step": 11542 }, { "epoch": 0.48113876036847153, "grad_norm": 238.0, "learning_rate": 5.547339749091147e-05, "loss": 12.6271, "step": 11543 }, { "epoch": 0.48118044266599974, "grad_norm": 684.0, "learning_rate": 5.546668799000986e-05, "loss": 20.3769, "step": 11544 }, { "epoch": 0.481222124963528, "grad_norm": 258.0, "learning_rate": 5.5459978389477385e-05, "loss": 12.9382, "step": 11545 }, { "epoch": 0.4812638072610562, "grad_norm": 992.0, "learning_rate": 5.5453268689436313e-05, "loss": 24.0039, "step": 11546 }, { "epoch": 0.4813054895585845, "grad_norm": 1152.0, "learning_rate": 5.544655889000892e-05, "loss": 27.1285, "step": 11547 }, { "epoch": 0.4813471718561127, "grad_norm": 604.0, "learning_rate": 5.543984899131753e-05, "loss": 18.5004, "step": 11548 }, { "epoch": 0.48138885415364097, "grad_norm": 486.0, "learning_rate": 5.543313899348439e-05, "loss": 17.6253, "step": 11549 }, { "epoch": 0.4814305364511692, "grad_norm": 364.0, "learning_rate": 5.5426428896631834e-05, "loss": 12.7501, "step": 11550 }, { "epoch": 0.48147221874869744, "grad_norm": 740.0, "learning_rate": 5.5419718700882105e-05, "loss": 23.3751, "step": 11551 }, { "epoch": 0.48151390104622566, "grad_norm": 266.0, "learning_rate": 5.541300840635754e-05, "loss": 11.5004, "step": 11552 }, { "epoch": 0.4815555833437539, "grad_norm": 2080.0, "learning_rate": 5.5406298013180415e-05, "loss": 39.7508, "step": 11553 }, { "epoch": 0.48159726564128214, "grad_norm": 462.0, "learning_rate": 5.539958752147302e-05, "loss": 17.126, "step": 11554 }, { "epoch": 0.4816389479388104, "grad_norm": 57.75, "learning_rate": 5.539287693135766e-05, "loss": 8.5628, "step": 11555 }, { "epoch": 0.4816806302363386, "grad_norm": 334.0, "learning_rate": 5.538616624295665e-05, "loss": 14.5006, "step": 11556 }, { "epoch": 0.4817223125338669, "grad_norm": 302.0, "learning_rate": 5.537945545639228e-05, "loss": 14.2504, "step": 11557 }, { "epoch": 0.4817639948313951, "grad_norm": 896.0, "learning_rate": 5.537274457178685e-05, "loss": 22.505, "step": 11558 }, { "epoch": 0.48180567712892336, "grad_norm": 264.0, "learning_rate": 5.536603358926269e-05, "loss": 12.2503, "step": 11559 }, { "epoch": 0.48184735942645157, "grad_norm": 184.0, "learning_rate": 5.535932250894207e-05, "loss": 11.2502, "step": 11560 }, { "epoch": 0.48188904172397984, "grad_norm": 266.0, "learning_rate": 5.5352611330947325e-05, "loss": 11.938, "step": 11561 }, { "epoch": 0.48193072402150805, "grad_norm": 396.0, "learning_rate": 5.534590005540077e-05, "loss": 15.3127, "step": 11562 }, { "epoch": 0.4819724063190363, "grad_norm": 432.0, "learning_rate": 5.533918868242471e-05, "loss": 14.7517, "step": 11563 }, { "epoch": 0.48201408861656453, "grad_norm": 382.0, "learning_rate": 5.5332477212141465e-05, "loss": 14.0005, "step": 11564 }, { "epoch": 0.4820557709140928, "grad_norm": 206.0, "learning_rate": 5.532576564467334e-05, "loss": 12.6879, "step": 11565 }, { "epoch": 0.482097453211621, "grad_norm": 78.5, "learning_rate": 5.531905398014268e-05, "loss": 8.0629, "step": 11566 }, { "epoch": 0.4821391355091493, "grad_norm": 414.0, "learning_rate": 5.531234221867178e-05, "loss": 14.6893, "step": 11567 }, { "epoch": 0.4821808178066775, "grad_norm": 356.0, "learning_rate": 5.530563036038298e-05, "loss": 15.1884, "step": 11568 }, { "epoch": 0.48222250010420575, "grad_norm": 536.0, "learning_rate": 5.5298918405398584e-05, "loss": 19.8753, "step": 11569 }, { "epoch": 0.48226418240173397, "grad_norm": 564.0, "learning_rate": 5.529220635384093e-05, "loss": 17.5003, "step": 11570 }, { "epoch": 0.48230586469926223, "grad_norm": 332.0, "learning_rate": 5.528549420583234e-05, "loss": 12.6258, "step": 11571 }, { "epoch": 0.48234754699679044, "grad_norm": 294.0, "learning_rate": 5.5278781961495164e-05, "loss": 14.438, "step": 11572 }, { "epoch": 0.4823892292943187, "grad_norm": 616.0, "learning_rate": 5.527206962095172e-05, "loss": 19.0002, "step": 11573 }, { "epoch": 0.4824309115918469, "grad_norm": 724.0, "learning_rate": 5.526535718432432e-05, "loss": 21.3751, "step": 11574 }, { "epoch": 0.4824725938893752, "grad_norm": 294.0, "learning_rate": 5.5258644651735325e-05, "loss": 12.2515, "step": 11575 }, { "epoch": 0.4825142761869034, "grad_norm": 788.0, "learning_rate": 5.525193202330706e-05, "loss": 22.5002, "step": 11576 }, { "epoch": 0.48255595848443167, "grad_norm": 253.0, "learning_rate": 5.524521929916189e-05, "loss": 13.0003, "step": 11577 }, { "epoch": 0.4825976407819599, "grad_norm": 476.0, "learning_rate": 5.523850647942211e-05, "loss": 16.2504, "step": 11578 }, { "epoch": 0.48263932307948815, "grad_norm": 94.5, "learning_rate": 5.52317935642101e-05, "loss": 8.3128, "step": 11579 }, { "epoch": 0.48268100537701636, "grad_norm": 436.0, "learning_rate": 5.522508055364818e-05, "loss": 15.4377, "step": 11580 }, { "epoch": 0.4827226876745446, "grad_norm": 304.0, "learning_rate": 5.52183674478587e-05, "loss": 13.2502, "step": 11581 }, { "epoch": 0.48276436997207284, "grad_norm": 268.0, "learning_rate": 5.5211654246964016e-05, "loss": 13.7504, "step": 11582 }, { "epoch": 0.4828060522696011, "grad_norm": 120.5, "learning_rate": 5.520494095108647e-05, "loss": 11.1257, "step": 11583 }, { "epoch": 0.4828477345671293, "grad_norm": 414.0, "learning_rate": 5.51982275603484e-05, "loss": 15.7544, "step": 11584 }, { "epoch": 0.4828894168646576, "grad_norm": 404.0, "learning_rate": 5.51915140748722e-05, "loss": 12.314, "step": 11585 }, { "epoch": 0.4829310991621858, "grad_norm": 876.0, "learning_rate": 5.518480049478016e-05, "loss": 22.8765, "step": 11586 }, { "epoch": 0.48297278145971406, "grad_norm": 96.0, "learning_rate": 5.517808682019468e-05, "loss": 7.9691, "step": 11587 }, { "epoch": 0.4830144637572423, "grad_norm": 444.0, "learning_rate": 5.517137305123813e-05, "loss": 16.6251, "step": 11588 }, { "epoch": 0.48305614605477054, "grad_norm": 320.0, "learning_rate": 5.516465918803283e-05, "loss": 13.5628, "step": 11589 }, { "epoch": 0.48309782835229875, "grad_norm": 732.0, "learning_rate": 5.515794523070116e-05, "loss": 20.2502, "step": 11590 }, { "epoch": 0.483139510649827, "grad_norm": 120.0, "learning_rate": 5.515123117936548e-05, "loss": 10.1257, "step": 11591 }, { "epoch": 0.48318119294735523, "grad_norm": 182.0, "learning_rate": 5.514451703414816e-05, "loss": 11.0637, "step": 11592 }, { "epoch": 0.4832228752448835, "grad_norm": 494.0, "learning_rate": 5.513780279517156e-05, "loss": 17.3754, "step": 11593 }, { "epoch": 0.4832645575424117, "grad_norm": 540.0, "learning_rate": 5.5131088462558044e-05, "loss": 18.7508, "step": 11594 }, { "epoch": 0.48330623983994, "grad_norm": 1728.0, "learning_rate": 5.5124374036429985e-05, "loss": 34.5004, "step": 11595 }, { "epoch": 0.48334792213746824, "grad_norm": 326.0, "learning_rate": 5.511765951690976e-05, "loss": 13.129, "step": 11596 }, { "epoch": 0.48338960443499646, "grad_norm": 133.0, "learning_rate": 5.5110944904119724e-05, "loss": 9.9377, "step": 11597 }, { "epoch": 0.4834312867325247, "grad_norm": 119.5, "learning_rate": 5.510423019818227e-05, "loss": 8.6876, "step": 11598 }, { "epoch": 0.48347296903005293, "grad_norm": 202.0, "learning_rate": 5.5097515399219754e-05, "loss": 11.7504, "step": 11599 }, { "epoch": 0.4835146513275812, "grad_norm": 95.0, "learning_rate": 5.5090800507354586e-05, "loss": 7.7191, "step": 11600 }, { "epoch": 0.4835563336251094, "grad_norm": 198.0, "learning_rate": 5.508408552270913e-05, "loss": 9.9377, "step": 11601 }, { "epoch": 0.4835980159226377, "grad_norm": 181.0, "learning_rate": 5.507737044540575e-05, "loss": 10.6252, "step": 11602 }, { "epoch": 0.4836396982201659, "grad_norm": 115.0, "learning_rate": 5.507065527556685e-05, "loss": 9.8127, "step": 11603 }, { "epoch": 0.48368138051769416, "grad_norm": 584.0, "learning_rate": 5.5063940013314805e-05, "loss": 19.6252, "step": 11604 }, { "epoch": 0.48372306281522237, "grad_norm": 210.0, "learning_rate": 5.505722465877201e-05, "loss": 11.2503, "step": 11605 }, { "epoch": 0.48376474511275064, "grad_norm": 179.0, "learning_rate": 5.505050921206084e-05, "loss": 10.5003, "step": 11606 }, { "epoch": 0.48380642741027885, "grad_norm": 520.0, "learning_rate": 5.504379367330369e-05, "loss": 18.3757, "step": 11607 }, { "epoch": 0.4838481097078071, "grad_norm": 256.0, "learning_rate": 5.503707804262296e-05, "loss": 11.8751, "step": 11608 }, { "epoch": 0.48388979200533533, "grad_norm": 324.0, "learning_rate": 5.5030362320141026e-05, "loss": 13.2502, "step": 11609 }, { "epoch": 0.4839314743028636, "grad_norm": 410.0, "learning_rate": 5.502364650598031e-05, "loss": 16.3754, "step": 11610 }, { "epoch": 0.4839731566003918, "grad_norm": 78.0, "learning_rate": 5.501693060026317e-05, "loss": 9.3765, "step": 11611 }, { "epoch": 0.4840148388979201, "grad_norm": 172.0, "learning_rate": 5.501021460311202e-05, "loss": 10.9378, "step": 11612 }, { "epoch": 0.4840565211954483, "grad_norm": 580.0, "learning_rate": 5.5003498514649274e-05, "loss": 18.2503, "step": 11613 }, { "epoch": 0.48409820349297655, "grad_norm": 61.5, "learning_rate": 5.4996782334997335e-05, "loss": 6.844, "step": 11614 }, { "epoch": 0.48413988579050476, "grad_norm": 416.0, "learning_rate": 5.499006606427858e-05, "loss": 14.0638, "step": 11615 }, { "epoch": 0.48418156808803303, "grad_norm": 252.0, "learning_rate": 5.4983349702615436e-05, "loss": 12.44, "step": 11616 }, { "epoch": 0.48422325038556124, "grad_norm": 528.0, "learning_rate": 5.4976633250130295e-05, "loss": 17.2503, "step": 11617 }, { "epoch": 0.4842649326830895, "grad_norm": 460.0, "learning_rate": 5.496991670694558e-05, "loss": 16.0005, "step": 11618 }, { "epoch": 0.4843066149806177, "grad_norm": 148.0, "learning_rate": 5.496320007318368e-05, "loss": 9.7502, "step": 11619 }, { "epoch": 0.484348297278146, "grad_norm": 172.0, "learning_rate": 5.495648334896704e-05, "loss": 10.438, "step": 11620 }, { "epoch": 0.4843899795756742, "grad_norm": 904.0, "learning_rate": 5.4949766534418024e-05, "loss": 25.2503, "step": 11621 }, { "epoch": 0.48443166187320247, "grad_norm": 484.0, "learning_rate": 5.494304962965909e-05, "loss": 15.4383, "step": 11622 }, { "epoch": 0.4844733441707307, "grad_norm": 292.0, "learning_rate": 5.4936332634812636e-05, "loss": 12.3751, "step": 11623 }, { "epoch": 0.48451502646825895, "grad_norm": 458.0, "learning_rate": 5.492961555000107e-05, "loss": 16.7504, "step": 11624 }, { "epoch": 0.48455670876578716, "grad_norm": 260.0, "learning_rate": 5.492289837534682e-05, "loss": 11.7506, "step": 11625 }, { "epoch": 0.4845983910633154, "grad_norm": 498.0, "learning_rate": 5.491618111097233e-05, "loss": 16.7506, "step": 11626 }, { "epoch": 0.48464007336084364, "grad_norm": 188.0, "learning_rate": 5.490946375699999e-05, "loss": 10.0628, "step": 11627 }, { "epoch": 0.4846817556583719, "grad_norm": 528.0, "learning_rate": 5.490274631355224e-05, "loss": 18.8752, "step": 11628 }, { "epoch": 0.4847234379559001, "grad_norm": 454.0, "learning_rate": 5.489602878075151e-05, "loss": 15.2502, "step": 11629 }, { "epoch": 0.4847651202534284, "grad_norm": 588.0, "learning_rate": 5.488931115872021e-05, "loss": 16.1255, "step": 11630 }, { "epoch": 0.4848068025509566, "grad_norm": 392.0, "learning_rate": 5.488259344758079e-05, "loss": 15.5012, "step": 11631 }, { "epoch": 0.48484848484848486, "grad_norm": 123.5, "learning_rate": 5.487587564745567e-05, "loss": 10.2502, "step": 11632 }, { "epoch": 0.4848901671460131, "grad_norm": 340.0, "learning_rate": 5.486915775846728e-05, "loss": 12.938, "step": 11633 }, { "epoch": 0.48493184944354134, "grad_norm": 640.0, "learning_rate": 5.486243978073805e-05, "loss": 20.2502, "step": 11634 }, { "epoch": 0.48497353174106955, "grad_norm": 188.0, "learning_rate": 5.485572171439044e-05, "loss": 9.3752, "step": 11635 }, { "epoch": 0.4850152140385978, "grad_norm": 352.0, "learning_rate": 5.4849003559546866e-05, "loss": 14.6879, "step": 11636 }, { "epoch": 0.48505689633612603, "grad_norm": 144.0, "learning_rate": 5.484228531632975e-05, "loss": 10.8752, "step": 11637 }, { "epoch": 0.4850985786336543, "grad_norm": 208.0, "learning_rate": 5.4835566984861573e-05, "loss": 11.3754, "step": 11638 }, { "epoch": 0.4851402609311825, "grad_norm": 282.0, "learning_rate": 5.482884856526476e-05, "loss": 11.3752, "step": 11639 }, { "epoch": 0.4851819432287108, "grad_norm": 139.0, "learning_rate": 5.482213005766175e-05, "loss": 9.6877, "step": 11640 }, { "epoch": 0.485223625526239, "grad_norm": 364.0, "learning_rate": 5.481541146217499e-05, "loss": 15.0003, "step": 11641 }, { "epoch": 0.48526530782376726, "grad_norm": 246.0, "learning_rate": 5.480869277892693e-05, "loss": 12.2501, "step": 11642 }, { "epoch": 0.48530699012129547, "grad_norm": 53.5, "learning_rate": 5.480197400804001e-05, "loss": 7.3752, "step": 11643 }, { "epoch": 0.48534867241882373, "grad_norm": 131.0, "learning_rate": 5.47952551496367e-05, "loss": 9.9382, "step": 11644 }, { "epoch": 0.48539035471635195, "grad_norm": 482.0, "learning_rate": 5.478853620383944e-05, "loss": 16.7503, "step": 11645 }, { "epoch": 0.4854320370138802, "grad_norm": 1080.0, "learning_rate": 5.4781817170770676e-05, "loss": 26.0004, "step": 11646 }, { "epoch": 0.4854737193114084, "grad_norm": 63.25, "learning_rate": 5.477509805055286e-05, "loss": 8.4378, "step": 11647 }, { "epoch": 0.4855154016089367, "grad_norm": 434.0, "learning_rate": 5.476837884330848e-05, "loss": 14.9379, "step": 11648 }, { "epoch": 0.4855570839064649, "grad_norm": 107.0, "learning_rate": 5.476165954915995e-05, "loss": 8.6252, "step": 11649 }, { "epoch": 0.48559876620399317, "grad_norm": 213.0, "learning_rate": 5.4754940168229765e-05, "loss": 11.8129, "step": 11650 }, { "epoch": 0.4856404485015214, "grad_norm": 704.0, "learning_rate": 5.474822070064037e-05, "loss": 21.3759, "step": 11651 }, { "epoch": 0.48568213079904965, "grad_norm": 1440.0, "learning_rate": 5.474150114651423e-05, "loss": 27.3799, "step": 11652 }, { "epoch": 0.48572381309657786, "grad_norm": 142.0, "learning_rate": 5.473478150597382e-05, "loss": 9.8128, "step": 11653 }, { "epoch": 0.48576549539410613, "grad_norm": 1312.0, "learning_rate": 5.4728061779141585e-05, "loss": 26.7533, "step": 11654 }, { "epoch": 0.48580717769163434, "grad_norm": 632.0, "learning_rate": 5.472134196614e-05, "loss": 20.6253, "step": 11655 }, { "epoch": 0.4858488599891626, "grad_norm": 180.0, "learning_rate": 5.471462206709156e-05, "loss": 9.5006, "step": 11656 }, { "epoch": 0.4858905422866908, "grad_norm": 700.0, "learning_rate": 5.47079020821187e-05, "loss": 20.1252, "step": 11657 }, { "epoch": 0.4859322245842191, "grad_norm": 418.0, "learning_rate": 5.470118201134391e-05, "loss": 14.1881, "step": 11658 }, { "epoch": 0.4859739068817473, "grad_norm": 464.0, "learning_rate": 5.4694461854889655e-05, "loss": 15.2526, "step": 11659 }, { "epoch": 0.48601558917927556, "grad_norm": 540.0, "learning_rate": 5.468774161287843e-05, "loss": 18.8756, "step": 11660 }, { "epoch": 0.4860572714768038, "grad_norm": 736.0, "learning_rate": 5.4681021285432686e-05, "loss": 22.2504, "step": 11661 }, { "epoch": 0.48609895377433204, "grad_norm": 588.0, "learning_rate": 5.46743008726749e-05, "loss": 19.5007, "step": 11662 }, { "epoch": 0.48614063607186025, "grad_norm": 51.0, "learning_rate": 5.4667580374727576e-05, "loss": 6.7815, "step": 11663 }, { "epoch": 0.4861823183693885, "grad_norm": 632.0, "learning_rate": 5.46608597917132e-05, "loss": 21.3754, "step": 11664 }, { "epoch": 0.48622400066691673, "grad_norm": 268.0, "learning_rate": 5.465413912375423e-05, "loss": 12.8752, "step": 11665 }, { "epoch": 0.486265682964445, "grad_norm": 264.0, "learning_rate": 5.464741837097316e-05, "loss": 7.4069, "step": 11666 }, { "epoch": 0.4863073652619732, "grad_norm": 752.0, "learning_rate": 5.464069753349248e-05, "loss": 18.8752, "step": 11667 }, { "epoch": 0.4863490475595015, "grad_norm": 82.0, "learning_rate": 5.463397661143468e-05, "loss": 8.8129, "step": 11668 }, { "epoch": 0.48639072985702975, "grad_norm": 1176.0, "learning_rate": 5.462725560492224e-05, "loss": 26.3781, "step": 11669 }, { "epoch": 0.48643241215455796, "grad_norm": 624.0, "learning_rate": 5.462053451407766e-05, "loss": 20.3751, "step": 11670 }, { "epoch": 0.4864740944520862, "grad_norm": 556.0, "learning_rate": 5.4613813339023424e-05, "loss": 18.3754, "step": 11671 }, { "epoch": 0.48651577674961444, "grad_norm": 420.0, "learning_rate": 5.460709207988203e-05, "loss": 15.0002, "step": 11672 }, { "epoch": 0.4865574590471427, "grad_norm": 468.0, "learning_rate": 5.4600370736775974e-05, "loss": 15.4378, "step": 11673 }, { "epoch": 0.4865991413446709, "grad_norm": 672.0, "learning_rate": 5.459364930982775e-05, "loss": 20.3753, "step": 11674 }, { "epoch": 0.4866408236421992, "grad_norm": 472.0, "learning_rate": 5.4586927799159856e-05, "loss": 17.5001, "step": 11675 }, { "epoch": 0.4866825059397274, "grad_norm": 1504.0, "learning_rate": 5.45802062048948e-05, "loss": 36.0002, "step": 11676 }, { "epoch": 0.48672418823725566, "grad_norm": 904.0, "learning_rate": 5.4573484527155086e-05, "loss": 25.6255, "step": 11677 }, { "epoch": 0.4867658705347839, "grad_norm": 82.5, "learning_rate": 5.456676276606321e-05, "loss": 8.5002, "step": 11678 }, { "epoch": 0.48680755283231214, "grad_norm": 408.0, "learning_rate": 5.4560040921741676e-05, "loss": 15.813, "step": 11679 }, { "epoch": 0.48684923512984035, "grad_norm": 620.0, "learning_rate": 5.4553318994312984e-05, "loss": 18.8776, "step": 11680 }, { "epoch": 0.4868909174273686, "grad_norm": 340.0, "learning_rate": 5.4546596983899654e-05, "loss": 14.2503, "step": 11681 }, { "epoch": 0.48693259972489683, "grad_norm": 912.0, "learning_rate": 5.45398748906242e-05, "loss": 24.6266, "step": 11682 }, { "epoch": 0.4869742820224251, "grad_norm": 116.0, "learning_rate": 5.45331527146091e-05, "loss": 8.1878, "step": 11683 }, { "epoch": 0.4870159643199533, "grad_norm": 358.0, "learning_rate": 5.4526430455976906e-05, "loss": 14.5005, "step": 11684 }, { "epoch": 0.4870576466174816, "grad_norm": 358.0, "learning_rate": 5.451970811485012e-05, "loss": 15.3751, "step": 11685 }, { "epoch": 0.4870993289150098, "grad_norm": 245.0, "learning_rate": 5.4512985691351236e-05, "loss": 12.5627, "step": 11686 }, { "epoch": 0.48714101121253806, "grad_norm": 126.5, "learning_rate": 5.450626318560279e-05, "loss": 10.2503, "step": 11687 }, { "epoch": 0.48718269351006627, "grad_norm": 70.0, "learning_rate": 5.4499540597727303e-05, "loss": 7.876, "step": 11688 }, { "epoch": 0.48722437580759453, "grad_norm": 304.0, "learning_rate": 5.44928179278473e-05, "loss": 13.9381, "step": 11689 }, { "epoch": 0.48726605810512275, "grad_norm": 235.0, "learning_rate": 5.4486095176085274e-05, "loss": 11.8126, "step": 11690 }, { "epoch": 0.487307740402651, "grad_norm": 170.0, "learning_rate": 5.4479372342563775e-05, "loss": 10.5004, "step": 11691 }, { "epoch": 0.4873494227001792, "grad_norm": 426.0, "learning_rate": 5.447264942740531e-05, "loss": 16.2505, "step": 11692 }, { "epoch": 0.4873911049977075, "grad_norm": 286.0, "learning_rate": 5.4465926430732416e-05, "loss": 13.1892, "step": 11693 }, { "epoch": 0.4874327872952357, "grad_norm": 560.0, "learning_rate": 5.445920335266762e-05, "loss": 15.8752, "step": 11694 }, { "epoch": 0.48747446959276397, "grad_norm": 552.0, "learning_rate": 5.445248019333345e-05, "loss": 16.6254, "step": 11695 }, { "epoch": 0.4875161518902922, "grad_norm": 492.0, "learning_rate": 5.444575695285242e-05, "loss": 16.1253, "step": 11696 }, { "epoch": 0.48755783418782045, "grad_norm": 64.0, "learning_rate": 5.443903363134708e-05, "loss": 6.0003, "step": 11697 }, { "epoch": 0.48759951648534866, "grad_norm": 226.0, "learning_rate": 5.4432310228939966e-05, "loss": 12.0004, "step": 11698 }, { "epoch": 0.48764119878287693, "grad_norm": 231.0, "learning_rate": 5.4425586745753595e-05, "loss": 11.7506, "step": 11699 }, { "epoch": 0.48768288108040514, "grad_norm": 452.0, "learning_rate": 5.4418863181910504e-05, "loss": 15.3753, "step": 11700 }, { "epoch": 0.4877245633779334, "grad_norm": 284.0, "learning_rate": 5.4412139537533255e-05, "loss": 13.1879, "step": 11701 }, { "epoch": 0.4877662456754616, "grad_norm": 218.0, "learning_rate": 5.440541581274436e-05, "loss": 9.313, "step": 11702 }, { "epoch": 0.4878079279729899, "grad_norm": 135.0, "learning_rate": 5.439869200766638e-05, "loss": 9.8752, "step": 11703 }, { "epoch": 0.4878496102705181, "grad_norm": 916.0, "learning_rate": 5.439196812242186e-05, "loss": 22.0005, "step": 11704 }, { "epoch": 0.48789129256804636, "grad_norm": 434.0, "learning_rate": 5.438524415713331e-05, "loss": 16.0002, "step": 11705 }, { "epoch": 0.4879329748655746, "grad_norm": 262.0, "learning_rate": 5.4378520111923304e-05, "loss": 13.5643, "step": 11706 }, { "epoch": 0.48797465716310284, "grad_norm": 235.0, "learning_rate": 5.437179598691439e-05, "loss": 11.3754, "step": 11707 }, { "epoch": 0.48801633946063105, "grad_norm": 126.5, "learning_rate": 5.436507178222909e-05, "loss": 10.0631, "step": 11708 }, { "epoch": 0.4880580217581593, "grad_norm": 173.0, "learning_rate": 5.435834749798997e-05, "loss": 10.6877, "step": 11709 }, { "epoch": 0.48809970405568753, "grad_norm": 334.0, "learning_rate": 5.4351623134319584e-05, "loss": 13.6877, "step": 11710 }, { "epoch": 0.4881413863532158, "grad_norm": 436.0, "learning_rate": 5.434489869134048e-05, "loss": 16.5003, "step": 11711 }, { "epoch": 0.488183068650744, "grad_norm": 298.0, "learning_rate": 5.4338174169175204e-05, "loss": 13.5629, "step": 11712 }, { "epoch": 0.4882247509482723, "grad_norm": 280.0, "learning_rate": 5.433144956794634e-05, "loss": 12.2505, "step": 11713 }, { "epoch": 0.4882664332458005, "grad_norm": 270.0, "learning_rate": 5.43247248877764e-05, "loss": 11.6252, "step": 11714 }, { "epoch": 0.48830811554332876, "grad_norm": 284.0, "learning_rate": 5.431800012878798e-05, "loss": 12.0641, "step": 11715 }, { "epoch": 0.48834979784085697, "grad_norm": 92.5, "learning_rate": 5.4311275291103616e-05, "loss": 8.1877, "step": 11716 }, { "epoch": 0.48839148013838524, "grad_norm": 424.0, "learning_rate": 5.4304550374845884e-05, "loss": 14.0654, "step": 11717 }, { "epoch": 0.48843316243591345, "grad_norm": 300.0, "learning_rate": 5.429782538013734e-05, "loss": 13.0628, "step": 11718 }, { "epoch": 0.4884748447334417, "grad_norm": 188.0, "learning_rate": 5.429110030710054e-05, "loss": 11.1879, "step": 11719 }, { "epoch": 0.4885165270309699, "grad_norm": 366.0, "learning_rate": 5.428437515585806e-05, "loss": 14.9378, "step": 11720 }, { "epoch": 0.4885582093284982, "grad_norm": 736.0, "learning_rate": 5.427764992653246e-05, "loss": 21.5036, "step": 11721 }, { "epoch": 0.4885998916260264, "grad_norm": 235.0, "learning_rate": 5.427092461924631e-05, "loss": 8.0636, "step": 11722 }, { "epoch": 0.4886415739235547, "grad_norm": 446.0, "learning_rate": 5.426419923412218e-05, "loss": 14.4379, "step": 11723 }, { "epoch": 0.4886832562210829, "grad_norm": 1272.0, "learning_rate": 5.4257473771282655e-05, "loss": 32.7501, "step": 11724 }, { "epoch": 0.48872493851861115, "grad_norm": 98.5, "learning_rate": 5.4250748230850255e-05, "loss": 9.5629, "step": 11725 }, { "epoch": 0.48876662081613936, "grad_norm": 129.0, "learning_rate": 5.424402261294762e-05, "loss": 6.2818, "step": 11726 }, { "epoch": 0.48880830311366763, "grad_norm": 800.0, "learning_rate": 5.4237296917697286e-05, "loss": 23.3753, "step": 11727 }, { "epoch": 0.48884998541119584, "grad_norm": 376.0, "learning_rate": 5.423057114522185e-05, "loss": 15.0627, "step": 11728 }, { "epoch": 0.4888916677087241, "grad_norm": 171.0, "learning_rate": 5.4223845295643884e-05, "loss": 11.7508, "step": 11729 }, { "epoch": 0.4889333500062523, "grad_norm": 245.0, "learning_rate": 5.4217119369085945e-05, "loss": 13.0015, "step": 11730 }, { "epoch": 0.4889750323037806, "grad_norm": 374.0, "learning_rate": 5.421039336567064e-05, "loss": 14.8753, "step": 11731 }, { "epoch": 0.4890167146013088, "grad_norm": 147.0, "learning_rate": 5.420366728552054e-05, "loss": 9.5004, "step": 11732 }, { "epoch": 0.48905839689883707, "grad_norm": 109.0, "learning_rate": 5.419694112875824e-05, "loss": 9.5626, "step": 11733 }, { "epoch": 0.4891000791963653, "grad_norm": 416.0, "learning_rate": 5.4190214895506305e-05, "loss": 14.4402, "step": 11734 }, { "epoch": 0.48914176149389355, "grad_norm": 556.0, "learning_rate": 5.418348858588733e-05, "loss": 17.3752, "step": 11735 }, { "epoch": 0.48918344379142176, "grad_norm": 156.0, "learning_rate": 5.417676220002391e-05, "loss": 10.5009, "step": 11736 }, { "epoch": 0.48922512608895, "grad_norm": 500.0, "learning_rate": 5.4170035738038625e-05, "loss": 13.689, "step": 11737 }, { "epoch": 0.48926680838647824, "grad_norm": 346.0, "learning_rate": 5.416330920005406e-05, "loss": 13.6895, "step": 11738 }, { "epoch": 0.4893084906840065, "grad_norm": 194.0, "learning_rate": 5.415658258619283e-05, "loss": 11.7502, "step": 11739 }, { "epoch": 0.4893501729815347, "grad_norm": 230.0, "learning_rate": 5.414985589657751e-05, "loss": 11.1258, "step": 11740 }, { "epoch": 0.489391855279063, "grad_norm": 197.0, "learning_rate": 5.4143129131330696e-05, "loss": 12.2507, "step": 11741 }, { "epoch": 0.48943353757659125, "grad_norm": 247.0, "learning_rate": 5.413640229057498e-05, "loss": 11.6883, "step": 11742 }, { "epoch": 0.48947521987411946, "grad_norm": 103.5, "learning_rate": 5.412967537443298e-05, "loss": 7.6566, "step": 11743 }, { "epoch": 0.4895169021716477, "grad_norm": 272.0, "learning_rate": 5.412294838302726e-05, "loss": 13.6879, "step": 11744 }, { "epoch": 0.48955858446917594, "grad_norm": 572.0, "learning_rate": 5.411622131648045e-05, "loss": 18.5004, "step": 11745 }, { "epoch": 0.4896002667667042, "grad_norm": 123.5, "learning_rate": 5.410949417491514e-05, "loss": 11.1256, "step": 11746 }, { "epoch": 0.4896419490642324, "grad_norm": 50.5, "learning_rate": 5.4102766958453945e-05, "loss": 7.7191, "step": 11747 }, { "epoch": 0.4896836313617607, "grad_norm": 332.0, "learning_rate": 5.4096039667219445e-05, "loss": 12.0002, "step": 11748 }, { "epoch": 0.4897253136592889, "grad_norm": 170.0, "learning_rate": 5.408931230133426e-05, "loss": 10.0627, "step": 11749 }, { "epoch": 0.48976699595681716, "grad_norm": 72.0, "learning_rate": 5.4082584860920993e-05, "loss": 8.4383, "step": 11750 }, { "epoch": 0.4898086782543454, "grad_norm": 800.0, "learning_rate": 5.4075857346102254e-05, "loss": 25.5002, "step": 11751 }, { "epoch": 0.48985036055187364, "grad_norm": 132.0, "learning_rate": 5.4069129757000656e-05, "loss": 9.8753, "step": 11752 }, { "epoch": 0.48989204284940185, "grad_norm": 380.0, "learning_rate": 5.40624020937388e-05, "loss": 13.9379, "step": 11753 }, { "epoch": 0.4899337251469301, "grad_norm": 306.0, "learning_rate": 5.4055674356439325e-05, "loss": 12.5627, "step": 11754 }, { "epoch": 0.48997540744445833, "grad_norm": 692.0, "learning_rate": 5.404894654522481e-05, "loss": 17.3758, "step": 11755 }, { "epoch": 0.4900170897419866, "grad_norm": 136.0, "learning_rate": 5.404221866021789e-05, "loss": 10.3129, "step": 11756 }, { "epoch": 0.4900587720395148, "grad_norm": 192.0, "learning_rate": 5.403549070154118e-05, "loss": 11.2511, "step": 11757 }, { "epoch": 0.4901004543370431, "grad_norm": 98.5, "learning_rate": 5.402876266931729e-05, "loss": 9.3755, "step": 11758 }, { "epoch": 0.4901421366345713, "grad_norm": 756.0, "learning_rate": 5.4022034563668834e-05, "loss": 21.8758, "step": 11759 }, { "epoch": 0.49018381893209956, "grad_norm": 58.75, "learning_rate": 5.401530638471844e-05, "loss": 7.0316, "step": 11760 }, { "epoch": 0.49022550122962777, "grad_norm": 358.0, "learning_rate": 5.400857813258875e-05, "loss": 14.9378, "step": 11761 }, { "epoch": 0.49026718352715604, "grad_norm": 302.0, "learning_rate": 5.400184980740235e-05, "loss": 13.1256, "step": 11762 }, { "epoch": 0.49030886582468425, "grad_norm": 201.0, "learning_rate": 5.399512140928188e-05, "loss": 11.1877, "step": 11763 }, { "epoch": 0.4903505481222125, "grad_norm": 464.0, "learning_rate": 5.3988392938349975e-05, "loss": 15.6252, "step": 11764 }, { "epoch": 0.4903922304197407, "grad_norm": 796.0, "learning_rate": 5.398166439472926e-05, "loss": 26.2504, "step": 11765 }, { "epoch": 0.490433912717269, "grad_norm": 169.0, "learning_rate": 5.397493577854236e-05, "loss": 10.5004, "step": 11766 }, { "epoch": 0.4904755950147972, "grad_norm": 254.0, "learning_rate": 5.396820708991189e-05, "loss": 10.313, "step": 11767 }, { "epoch": 0.4905172773123255, "grad_norm": 89.5, "learning_rate": 5.39614783289605e-05, "loss": 7.5014, "step": 11768 }, { "epoch": 0.4905589596098537, "grad_norm": 210.0, "learning_rate": 5.395474949581082e-05, "loss": 11.9377, "step": 11769 }, { "epoch": 0.49060064190738195, "grad_norm": 168.0, "learning_rate": 5.394802059058547e-05, "loss": 8.626, "step": 11770 }, { "epoch": 0.49064232420491016, "grad_norm": 628.0, "learning_rate": 5.39412916134071e-05, "loss": 18.8753, "step": 11771 }, { "epoch": 0.49068400650243843, "grad_norm": 336.0, "learning_rate": 5.393456256439834e-05, "loss": 14.3753, "step": 11772 }, { "epoch": 0.49072568879996664, "grad_norm": 540.0, "learning_rate": 5.392783344368183e-05, "loss": 18.0012, "step": 11773 }, { "epoch": 0.4907673710974949, "grad_norm": 478.0, "learning_rate": 5.392110425138021e-05, "loss": 16.5003, "step": 11774 }, { "epoch": 0.4908090533950231, "grad_norm": 236.0, "learning_rate": 5.391437498761609e-05, "loss": 12.3129, "step": 11775 }, { "epoch": 0.4908507356925514, "grad_norm": 568.0, "learning_rate": 5.3907645652512165e-05, "loss": 17.7509, "step": 11776 }, { "epoch": 0.4908924179900796, "grad_norm": 245.0, "learning_rate": 5.390091624619105e-05, "loss": 12.6878, "step": 11777 }, { "epoch": 0.49093410028760787, "grad_norm": 472.0, "learning_rate": 5.389418676877538e-05, "loss": 17.2508, "step": 11778 }, { "epoch": 0.4909757825851361, "grad_norm": 446.0, "learning_rate": 5.388745722038781e-05, "loss": 16.3762, "step": 11779 }, { "epoch": 0.49101746488266435, "grad_norm": 71.5, "learning_rate": 5.388072760115099e-05, "loss": 7.344, "step": 11780 }, { "epoch": 0.49105914718019256, "grad_norm": 424.0, "learning_rate": 5.387399791118758e-05, "loss": 15.5629, "step": 11781 }, { "epoch": 0.4911008294777208, "grad_norm": 374.0, "learning_rate": 5.3867268150620196e-05, "loss": 13.9384, "step": 11782 }, { "epoch": 0.49114251177524904, "grad_norm": 326.0, "learning_rate": 5.386053831957152e-05, "loss": 14.0004, "step": 11783 }, { "epoch": 0.4911841940727773, "grad_norm": 358.0, "learning_rate": 5.385380841816418e-05, "loss": 13.8126, "step": 11784 }, { "epoch": 0.4912258763703055, "grad_norm": 232.0, "learning_rate": 5.384707844652084e-05, "loss": 12.1878, "step": 11785 }, { "epoch": 0.4912675586678338, "grad_norm": 223.0, "learning_rate": 5.3840348404764165e-05, "loss": 5.7818, "step": 11786 }, { "epoch": 0.491309240965362, "grad_norm": 296.0, "learning_rate": 5.3833618293016786e-05, "loss": 10.6879, "step": 11787 }, { "epoch": 0.49135092326289026, "grad_norm": 352.0, "learning_rate": 5.3826888111401365e-05, "loss": 13.6252, "step": 11788 }, { "epoch": 0.49139260556041847, "grad_norm": 384.0, "learning_rate": 5.382015786004059e-05, "loss": 14.9384, "step": 11789 }, { "epoch": 0.49143428785794674, "grad_norm": 540.0, "learning_rate": 5.381342753905708e-05, "loss": 17.2502, "step": 11790 }, { "epoch": 0.49147597015547495, "grad_norm": 1528.0, "learning_rate": 5.380669714857353e-05, "loss": 31.6253, "step": 11791 }, { "epoch": 0.4915176524530032, "grad_norm": 282.0, "learning_rate": 5.379996668871259e-05, "loss": 12.6883, "step": 11792 }, { "epoch": 0.49155933475053143, "grad_norm": 60.75, "learning_rate": 5.379323615959691e-05, "loss": 7.8754, "step": 11793 }, { "epoch": 0.4916010170480597, "grad_norm": 948.0, "learning_rate": 5.378650556134916e-05, "loss": 24.7561, "step": 11794 }, { "epoch": 0.4916426993455879, "grad_norm": 324.0, "learning_rate": 5.3779774894092016e-05, "loss": 11.0004, "step": 11795 }, { "epoch": 0.4916843816431162, "grad_norm": 296.0, "learning_rate": 5.377304415794814e-05, "loss": 13.563, "step": 11796 }, { "epoch": 0.4917260639406444, "grad_norm": 696.0, "learning_rate": 5.37663133530402e-05, "loss": 20.8753, "step": 11797 }, { "epoch": 0.49176774623817265, "grad_norm": 960.0, "learning_rate": 5.375958247949087e-05, "loss": 24.6253, "step": 11798 }, { "epoch": 0.49180942853570087, "grad_norm": 668.0, "learning_rate": 5.37528515374228e-05, "loss": 21.0001, "step": 11799 }, { "epoch": 0.49185111083322913, "grad_norm": 506.0, "learning_rate": 5.374612052695869e-05, "loss": 16.6252, "step": 11800 }, { "epoch": 0.49189279313075734, "grad_norm": 320.0, "learning_rate": 5.373938944822119e-05, "loss": 14.2508, "step": 11801 }, { "epoch": 0.4919344754282856, "grad_norm": 177.0, "learning_rate": 5.373265830133298e-05, "loss": 10.0009, "step": 11802 }, { "epoch": 0.4919761577258138, "grad_norm": 352.0, "learning_rate": 5.372592708641676e-05, "loss": 15.1252, "step": 11803 }, { "epoch": 0.4920178400233421, "grad_norm": 127.5, "learning_rate": 5.371919580359518e-05, "loss": 10.5628, "step": 11804 }, { "epoch": 0.4920595223208703, "grad_norm": 376.0, "learning_rate": 5.371246445299093e-05, "loss": 15.6877, "step": 11805 }, { "epoch": 0.49210120461839857, "grad_norm": 352.0, "learning_rate": 5.370573303472668e-05, "loss": 13.6252, "step": 11806 }, { "epoch": 0.4921428869159268, "grad_norm": 2160.0, "learning_rate": 5.369900154892512e-05, "loss": 45.251, "step": 11807 }, { "epoch": 0.49218456921345505, "grad_norm": 438.0, "learning_rate": 5.369226999570893e-05, "loss": 15.8752, "step": 11808 }, { "epoch": 0.49222625151098326, "grad_norm": 268.0, "learning_rate": 5.3685538375200796e-05, "loss": 10.8134, "step": 11809 }, { "epoch": 0.4922679338085115, "grad_norm": 312.0, "learning_rate": 5.3678806687523384e-05, "loss": 15.3755, "step": 11810 }, { "epoch": 0.49230961610603974, "grad_norm": 652.0, "learning_rate": 5.367207493279941e-05, "loss": 21.2505, "step": 11811 }, { "epoch": 0.492351298403568, "grad_norm": 516.0, "learning_rate": 5.366534311115153e-05, "loss": 17.001, "step": 11812 }, { "epoch": 0.4923929807010962, "grad_norm": 1848.0, "learning_rate": 5.365861122270245e-05, "loss": 42.7503, "step": 11813 }, { "epoch": 0.4924346629986245, "grad_norm": 96.5, "learning_rate": 5.365187926757486e-05, "loss": 8.5005, "step": 11814 }, { "epoch": 0.49247634529615275, "grad_norm": 190.0, "learning_rate": 5.3645147245891436e-05, "loss": 10.9377, "step": 11815 }, { "epoch": 0.49251802759368096, "grad_norm": 204.0, "learning_rate": 5.363841515777489e-05, "loss": 10.2507, "step": 11816 }, { "epoch": 0.49255970989120923, "grad_norm": 53.5, "learning_rate": 5.36316830033479e-05, "loss": 7.3757, "step": 11817 }, { "epoch": 0.49260139218873744, "grad_norm": 386.0, "learning_rate": 5.362495078273318e-05, "loss": 13.0003, "step": 11818 }, { "epoch": 0.4926430744862657, "grad_norm": 474.0, "learning_rate": 5.3618218496053384e-05, "loss": 15.563, "step": 11819 }, { "epoch": 0.4926847567837939, "grad_norm": 206.0, "learning_rate": 5.3611486143431255e-05, "loss": 12.4378, "step": 11820 }, { "epoch": 0.4927264390813222, "grad_norm": 210.0, "learning_rate": 5.360475372498946e-05, "loss": 11.1254, "step": 11821 }, { "epoch": 0.4927681213788504, "grad_norm": 159.0, "learning_rate": 5.359802124085072e-05, "loss": 10.3127, "step": 11822 }, { "epoch": 0.49280980367637867, "grad_norm": 616.0, "learning_rate": 5.359128869113771e-05, "loss": 19.2502, "step": 11823 }, { "epoch": 0.4928514859739069, "grad_norm": 1112.0, "learning_rate": 5.3584556075973145e-05, "loss": 27.7503, "step": 11824 }, { "epoch": 0.49289316827143514, "grad_norm": 368.0, "learning_rate": 5.357782339547974e-05, "loss": 15.3754, "step": 11825 }, { "epoch": 0.49293485056896336, "grad_norm": 73.0, "learning_rate": 5.357109064978016e-05, "loss": 5.9066, "step": 11826 }, { "epoch": 0.4929765328664916, "grad_norm": 450.0, "learning_rate": 5.356435783899716e-05, "loss": 17.7503, "step": 11827 }, { "epoch": 0.49301821516401984, "grad_norm": 330.0, "learning_rate": 5.355762496325342e-05, "loss": 14.3752, "step": 11828 }, { "epoch": 0.4930598974615481, "grad_norm": 480.0, "learning_rate": 5.355089202267165e-05, "loss": 17.2504, "step": 11829 }, { "epoch": 0.4931015797590763, "grad_norm": 332.0, "learning_rate": 5.354415901737455e-05, "loss": 13.0001, "step": 11830 }, { "epoch": 0.4931432620566046, "grad_norm": 58.25, "learning_rate": 5.353742594748484e-05, "loss": 8.9378, "step": 11831 }, { "epoch": 0.4931849443541328, "grad_norm": 75.5, "learning_rate": 5.3530692813125226e-05, "loss": 8.2506, "step": 11832 }, { "epoch": 0.49322662665166106, "grad_norm": 260.0, "learning_rate": 5.352395961441843e-05, "loss": 12.6877, "step": 11833 }, { "epoch": 0.49326830894918927, "grad_norm": 516.0, "learning_rate": 5.351722635148715e-05, "loss": 17.7502, "step": 11834 }, { "epoch": 0.49330999124671754, "grad_norm": 73.5, "learning_rate": 5.351049302445411e-05, "loss": 8.1883, "step": 11835 }, { "epoch": 0.49335167354424575, "grad_norm": 404.0, "learning_rate": 5.350375963344203e-05, "loss": 15.0629, "step": 11836 }, { "epoch": 0.493393355841774, "grad_norm": 113.0, "learning_rate": 5.3497026178573604e-05, "loss": 9.3753, "step": 11837 }, { "epoch": 0.49343503813930223, "grad_norm": 386.0, "learning_rate": 5.3490292659971565e-05, "loss": 14.2534, "step": 11838 }, { "epoch": 0.4934767204368305, "grad_norm": 676.0, "learning_rate": 5.348355907775864e-05, "loss": 19.0003, "step": 11839 }, { "epoch": 0.4935184027343587, "grad_norm": 143.0, "learning_rate": 5.3476825432057545e-05, "loss": 9.8133, "step": 11840 }, { "epoch": 0.493560085031887, "grad_norm": 238.0, "learning_rate": 5.3470091722991e-05, "loss": 11.1877, "step": 11841 }, { "epoch": 0.4936017673294152, "grad_norm": 101.5, "learning_rate": 5.3463357950681716e-05, "loss": 9.3754, "step": 11842 }, { "epoch": 0.49364344962694345, "grad_norm": 143.0, "learning_rate": 5.345662411525243e-05, "loss": 9.6253, "step": 11843 }, { "epoch": 0.49368513192447167, "grad_norm": 106.5, "learning_rate": 5.344989021682587e-05, "loss": 9.2505, "step": 11844 }, { "epoch": 0.49372681422199993, "grad_norm": 772.0, "learning_rate": 5.344315625552474e-05, "loss": 23.3755, "step": 11845 }, { "epoch": 0.49376849651952814, "grad_norm": 330.0, "learning_rate": 5.343642223147179e-05, "loss": 13.8752, "step": 11846 }, { "epoch": 0.4938101788170564, "grad_norm": 58.25, "learning_rate": 5.342968814478975e-05, "loss": 6.9689, "step": 11847 }, { "epoch": 0.4938518611145846, "grad_norm": 664.0, "learning_rate": 5.342295399560132e-05, "loss": 19.3753, "step": 11848 }, { "epoch": 0.4938935434121129, "grad_norm": 227.0, "learning_rate": 5.3416219784029265e-05, "loss": 11.4381, "step": 11849 }, { "epoch": 0.4939352257096411, "grad_norm": 246.0, "learning_rate": 5.3409485510196286e-05, "loss": 12.563, "step": 11850 }, { "epoch": 0.49397690800716937, "grad_norm": 181.0, "learning_rate": 5.340275117422513e-05, "loss": 11.0011, "step": 11851 }, { "epoch": 0.4940185903046976, "grad_norm": 243.0, "learning_rate": 5.339601677623854e-05, "loss": 12.0626, "step": 11852 }, { "epoch": 0.49406027260222585, "grad_norm": 2384.0, "learning_rate": 5.338928231635925e-05, "loss": 47.5002, "step": 11853 }, { "epoch": 0.49410195489975406, "grad_norm": 294.0, "learning_rate": 5.338254779470998e-05, "loss": 12.6877, "step": 11854 }, { "epoch": 0.4941436371972823, "grad_norm": 280.0, "learning_rate": 5.337581321141348e-05, "loss": 13.0632, "step": 11855 }, { "epoch": 0.49418531949481054, "grad_norm": 548.0, "learning_rate": 5.336907856659248e-05, "loss": 18.0007, "step": 11856 }, { "epoch": 0.4942270017923388, "grad_norm": 138.0, "learning_rate": 5.336234386036973e-05, "loss": 10.0001, "step": 11857 }, { "epoch": 0.494268684089867, "grad_norm": 188.0, "learning_rate": 5.3355609092867966e-05, "loss": 11.4378, "step": 11858 }, { "epoch": 0.4943103663873953, "grad_norm": 79.5, "learning_rate": 5.334887426420993e-05, "loss": 6.9065, "step": 11859 }, { "epoch": 0.4943520486849235, "grad_norm": 410.0, "learning_rate": 5.3342139374518354e-05, "loss": 16.5003, "step": 11860 }, { "epoch": 0.49439373098245176, "grad_norm": 188.0, "learning_rate": 5.333540442391599e-05, "loss": 10.3753, "step": 11861 }, { "epoch": 0.49443541327998, "grad_norm": 448.0, "learning_rate": 5.332866941252559e-05, "loss": 16.8753, "step": 11862 }, { "epoch": 0.49447709557750824, "grad_norm": 268.0, "learning_rate": 5.332193434046988e-05, "loss": 12.6879, "step": 11863 }, { "epoch": 0.49451877787503645, "grad_norm": 928.0, "learning_rate": 5.3315199207871634e-05, "loss": 27.2504, "step": 11864 }, { "epoch": 0.4945604601725647, "grad_norm": 454.0, "learning_rate": 5.33084640148536e-05, "loss": 13.8128, "step": 11865 }, { "epoch": 0.49460214247009293, "grad_norm": 152.0, "learning_rate": 5.3301728761538505e-05, "loss": 10.8763, "step": 11866 }, { "epoch": 0.4946438247676212, "grad_norm": 364.0, "learning_rate": 5.32949934480491e-05, "loss": 14.2501, "step": 11867 }, { "epoch": 0.4946855070651494, "grad_norm": 532.0, "learning_rate": 5.328825807450817e-05, "loss": 19.0002, "step": 11868 }, { "epoch": 0.4947271893626777, "grad_norm": 298.0, "learning_rate": 5.3281522641038426e-05, "loss": 13.4381, "step": 11869 }, { "epoch": 0.4947688716602059, "grad_norm": 200.0, "learning_rate": 5.327478714776265e-05, "loss": 8.814, "step": 11870 }, { "epoch": 0.49481055395773416, "grad_norm": 151.0, "learning_rate": 5.326805159480358e-05, "loss": 8.6879, "step": 11871 }, { "epoch": 0.49485223625526237, "grad_norm": 348.0, "learning_rate": 5.326131598228399e-05, "loss": 15.2503, "step": 11872 }, { "epoch": 0.49489391855279063, "grad_norm": 628.0, "learning_rate": 5.3254580310326616e-05, "loss": 21.3752, "step": 11873 }, { "epoch": 0.49493560085031885, "grad_norm": 228.0, "learning_rate": 5.324784457905423e-05, "loss": 11.4384, "step": 11874 }, { "epoch": 0.4949772831478471, "grad_norm": 422.0, "learning_rate": 5.324110878858959e-05, "loss": 16.1251, "step": 11875 }, { "epoch": 0.4950189654453753, "grad_norm": 308.0, "learning_rate": 5.323437293905544e-05, "loss": 14.0004, "step": 11876 }, { "epoch": 0.4950606477429036, "grad_norm": 223.0, "learning_rate": 5.3227637030574575e-05, "loss": 11.0002, "step": 11877 }, { "epoch": 0.4951023300404318, "grad_norm": 640.0, "learning_rate": 5.3220901063269736e-05, "loss": 21.5001, "step": 11878 }, { "epoch": 0.49514401233796007, "grad_norm": 213.0, "learning_rate": 5.3214165037263684e-05, "loss": 11.2504, "step": 11879 }, { "epoch": 0.4951856946354883, "grad_norm": 340.0, "learning_rate": 5.32074289526792e-05, "loss": 13.5003, "step": 11880 }, { "epoch": 0.49522737693301655, "grad_norm": 374.0, "learning_rate": 5.3200692809639016e-05, "loss": 15.6877, "step": 11881 }, { "epoch": 0.49526905923054476, "grad_norm": 274.0, "learning_rate": 5.319395660826594e-05, "loss": 12.1252, "step": 11882 }, { "epoch": 0.49531074152807303, "grad_norm": 294.0, "learning_rate": 5.318722034868272e-05, "loss": 14.1877, "step": 11883 }, { "epoch": 0.49535242382560124, "grad_norm": 241.0, "learning_rate": 5.3180484031012126e-05, "loss": 10.8129, "step": 11884 }, { "epoch": 0.4953941061231295, "grad_norm": 318.0, "learning_rate": 5.317374765537693e-05, "loss": 13.5634, "step": 11885 }, { "epoch": 0.4954357884206577, "grad_norm": 380.0, "learning_rate": 5.316701122189989e-05, "loss": 15.6877, "step": 11886 }, { "epoch": 0.495477470718186, "grad_norm": 217.0, "learning_rate": 5.3160274730703796e-05, "loss": 11.0003, "step": 11887 }, { "epoch": 0.49551915301571425, "grad_norm": 400.0, "learning_rate": 5.3153538181911414e-05, "loss": 15.0004, "step": 11888 }, { "epoch": 0.49556083531324246, "grad_norm": 326.0, "learning_rate": 5.314680157564551e-05, "loss": 13.5633, "step": 11889 }, { "epoch": 0.49560251761077073, "grad_norm": 119.0, "learning_rate": 5.314006491202887e-05, "loss": 9.5628, "step": 11890 }, { "epoch": 0.49564419990829894, "grad_norm": 502.0, "learning_rate": 5.3133328191184286e-05, "loss": 17.5017, "step": 11891 }, { "epoch": 0.4956858822058272, "grad_norm": 536.0, "learning_rate": 5.3126591413234506e-05, "loss": 16.5033, "step": 11892 }, { "epoch": 0.4957275645033554, "grad_norm": 712.0, "learning_rate": 5.311985457830232e-05, "loss": 20.1254, "step": 11893 }, { "epoch": 0.4957692468008837, "grad_norm": 161.0, "learning_rate": 5.3113117686510505e-05, "loss": 10.8132, "step": 11894 }, { "epoch": 0.4958109290984119, "grad_norm": 360.0, "learning_rate": 5.3106380737981855e-05, "loss": 15.6878, "step": 11895 }, { "epoch": 0.49585261139594017, "grad_norm": 1520.0, "learning_rate": 5.309964373283913e-05, "loss": 35.0002, "step": 11896 }, { "epoch": 0.4958942936934684, "grad_norm": 166.0, "learning_rate": 5.309290667120512e-05, "loss": 11.8754, "step": 11897 }, { "epoch": 0.49593597599099665, "grad_norm": 366.0, "learning_rate": 5.308616955320263e-05, "loss": 13.6252, "step": 11898 }, { "epoch": 0.49597765828852486, "grad_norm": 568.0, "learning_rate": 5.307943237895441e-05, "loss": 16.7542, "step": 11899 }, { "epoch": 0.4960193405860531, "grad_norm": 122.5, "learning_rate": 5.3072695148583264e-05, "loss": 10.0002, "step": 11900 }, { "epoch": 0.49606102288358134, "grad_norm": 296.0, "learning_rate": 5.306595786221196e-05, "loss": 15.1877, "step": 11901 }, { "epoch": 0.4961027051811096, "grad_norm": 548.0, "learning_rate": 5.3059220519963314e-05, "loss": 18.5002, "step": 11902 }, { "epoch": 0.4961443874786378, "grad_norm": 245.0, "learning_rate": 5.305248312196011e-05, "loss": 13.1282, "step": 11903 }, { "epoch": 0.4961860697761661, "grad_norm": 260.0, "learning_rate": 5.304574566832513e-05, "loss": 12.3753, "step": 11904 }, { "epoch": 0.4962277520736943, "grad_norm": 282.0, "learning_rate": 5.303900815918116e-05, "loss": 13.6879, "step": 11905 }, { "epoch": 0.49626943437122256, "grad_norm": 500.0, "learning_rate": 5.303227059465099e-05, "loss": 18.2506, "step": 11906 }, { "epoch": 0.4963111166687508, "grad_norm": 524.0, "learning_rate": 5.3025532974857426e-05, "loss": 18.8751, "step": 11907 }, { "epoch": 0.49635279896627904, "grad_norm": 302.0, "learning_rate": 5.301879529992326e-05, "loss": 13.3756, "step": 11908 }, { "epoch": 0.49639448126380725, "grad_norm": 147.0, "learning_rate": 5.3012057569971285e-05, "loss": 10.0002, "step": 11909 }, { "epoch": 0.4964361635613355, "grad_norm": 130.0, "learning_rate": 5.300531978512428e-05, "loss": 9.5627, "step": 11910 }, { "epoch": 0.49647784585886373, "grad_norm": 460.0, "learning_rate": 5.2998581945505067e-05, "loss": 17.2501, "step": 11911 }, { "epoch": 0.496519528156392, "grad_norm": 336.0, "learning_rate": 5.299184405123643e-05, "loss": 14.0004, "step": 11912 }, { "epoch": 0.4965612104539202, "grad_norm": 207.0, "learning_rate": 5.298510610244116e-05, "loss": 11.7504, "step": 11913 }, { "epoch": 0.4966028927514485, "grad_norm": 510.0, "learning_rate": 5.297836809924206e-05, "loss": 17.0002, "step": 11914 }, { "epoch": 0.4966445750489767, "grad_norm": 212.0, "learning_rate": 5.2971630041761945e-05, "loss": 11.5627, "step": 11915 }, { "epoch": 0.49668625734650496, "grad_norm": 212.0, "learning_rate": 5.2964891930123614e-05, "loss": 11.1878, "step": 11916 }, { "epoch": 0.49672793964403317, "grad_norm": 210.0, "learning_rate": 5.2958153764449866e-05, "loss": 12.2502, "step": 11917 }, { "epoch": 0.49676962194156143, "grad_norm": 88.5, "learning_rate": 5.29514155448635e-05, "loss": 8.7503, "step": 11918 }, { "epoch": 0.49681130423908965, "grad_norm": 255.0, "learning_rate": 5.294467727148732e-05, "loss": 13.2501, "step": 11919 }, { "epoch": 0.4968529865366179, "grad_norm": 354.0, "learning_rate": 5.2937938944444146e-05, "loss": 10.4401, "step": 11920 }, { "epoch": 0.4968946688341461, "grad_norm": 181.0, "learning_rate": 5.293120056385677e-05, "loss": 8.9379, "step": 11921 }, { "epoch": 0.4969363511316744, "grad_norm": 107.0, "learning_rate": 5.2924462129847997e-05, "loss": 9.7503, "step": 11922 }, { "epoch": 0.4969780334292026, "grad_norm": 222.0, "learning_rate": 5.291772364254064e-05, "loss": 11.6254, "step": 11923 }, { "epoch": 0.49701971572673087, "grad_norm": 504.0, "learning_rate": 5.291098510205752e-05, "loss": 17.5003, "step": 11924 }, { "epoch": 0.4970613980242591, "grad_norm": 213.0, "learning_rate": 5.290424650852144e-05, "loss": 10.4379, "step": 11925 }, { "epoch": 0.49710308032178735, "grad_norm": 282.0, "learning_rate": 5.2897507862055184e-05, "loss": 14.001, "step": 11926 }, { "epoch": 0.49714476261931556, "grad_norm": 644.0, "learning_rate": 5.289076916278162e-05, "loss": 19.5018, "step": 11927 }, { "epoch": 0.49718644491684383, "grad_norm": 396.0, "learning_rate": 5.2884030410823515e-05, "loss": 14.3755, "step": 11928 }, { "epoch": 0.49722812721437204, "grad_norm": 652.0, "learning_rate": 5.28772916063037e-05, "loss": 18.376, "step": 11929 }, { "epoch": 0.4972698095119003, "grad_norm": 270.0, "learning_rate": 5.287055274934501e-05, "loss": 12.6877, "step": 11930 }, { "epoch": 0.4973114918094285, "grad_norm": 136.0, "learning_rate": 5.286381384007022e-05, "loss": 9.3127, "step": 11931 }, { "epoch": 0.4973531741069568, "grad_norm": 520.0, "learning_rate": 5.285707487860218e-05, "loss": 16.5003, "step": 11932 }, { "epoch": 0.497394856404485, "grad_norm": 72.0, "learning_rate": 5.285033586506369e-05, "loss": 6.4065, "step": 11933 }, { "epoch": 0.49743653870201326, "grad_norm": 152.0, "learning_rate": 5.284359679957758e-05, "loss": 10.5626, "step": 11934 }, { "epoch": 0.4974782209995415, "grad_norm": 318.0, "learning_rate": 5.283685768226666e-05, "loss": 10.3138, "step": 11935 }, { "epoch": 0.49751990329706974, "grad_norm": 616.0, "learning_rate": 5.283011851325377e-05, "loss": 18.5002, "step": 11936 }, { "epoch": 0.49756158559459795, "grad_norm": 324.0, "learning_rate": 5.2823379292661703e-05, "loss": 13.3129, "step": 11937 }, { "epoch": 0.4976032678921262, "grad_norm": 656.0, "learning_rate": 5.281664002061331e-05, "loss": 19.7501, "step": 11938 }, { "epoch": 0.49764495018965443, "grad_norm": 444.0, "learning_rate": 5.280990069723139e-05, "loss": 12.7521, "step": 11939 }, { "epoch": 0.4976866324871827, "grad_norm": 532.0, "learning_rate": 5.280316132263878e-05, "loss": 16.7502, "step": 11940 }, { "epoch": 0.4977283147847109, "grad_norm": 288.0, "learning_rate": 5.2796421896958315e-05, "loss": 12.8751, "step": 11941 }, { "epoch": 0.4977699970822392, "grad_norm": 736.0, "learning_rate": 5.278968242031282e-05, "loss": 24.1252, "step": 11942 }, { "epoch": 0.4978116793797674, "grad_norm": 237.0, "learning_rate": 5.2782942892825094e-05, "loss": 12.3128, "step": 11943 }, { "epoch": 0.49785336167729566, "grad_norm": 888.0, "learning_rate": 5.2776203314618e-05, "loss": 24.2527, "step": 11944 }, { "epoch": 0.49789504397482387, "grad_norm": 234.0, "learning_rate": 5.2769463685814357e-05, "loss": 11.5004, "step": 11945 }, { "epoch": 0.49793672627235214, "grad_norm": 332.0, "learning_rate": 5.276272400653699e-05, "loss": 13.6253, "step": 11946 }, { "epoch": 0.49797840856988035, "grad_norm": 1072.0, "learning_rate": 5.275598427690873e-05, "loss": 24.5046, "step": 11947 }, { "epoch": 0.4980200908674086, "grad_norm": 316.0, "learning_rate": 5.274924449705242e-05, "loss": 13.3751, "step": 11948 }, { "epoch": 0.4980617731649368, "grad_norm": 131.0, "learning_rate": 5.274250466709088e-05, "loss": 8.5006, "step": 11949 }, { "epoch": 0.4981034554624651, "grad_norm": 896.0, "learning_rate": 5.2735764787146944e-05, "loss": 20.8807, "step": 11950 }, { "epoch": 0.4981451377599933, "grad_norm": 316.0, "learning_rate": 5.2729024857343454e-05, "loss": 12.8755, "step": 11951 }, { "epoch": 0.4981868200575216, "grad_norm": 338.0, "learning_rate": 5.272228487780323e-05, "loss": 13.7502, "step": 11952 }, { "epoch": 0.4982285023550498, "grad_norm": 568.0, "learning_rate": 5.271554484864915e-05, "loss": 13.5667, "step": 11953 }, { "epoch": 0.49827018465257805, "grad_norm": 334.0, "learning_rate": 5.2708804770004005e-05, "loss": 12.5003, "step": 11954 }, { "epoch": 0.49831186695010626, "grad_norm": 616.0, "learning_rate": 5.270206464199066e-05, "loss": 19.0003, "step": 11955 }, { "epoch": 0.49835354924763453, "grad_norm": 318.0, "learning_rate": 5.269532446473194e-05, "loss": 13.5002, "step": 11956 }, { "epoch": 0.49839523154516274, "grad_norm": 580.0, "learning_rate": 5.26885842383507e-05, "loss": 19.1257, "step": 11957 }, { "epoch": 0.498436913842691, "grad_norm": 149.0, "learning_rate": 5.268184396296978e-05, "loss": 9.9377, "step": 11958 }, { "epoch": 0.4984785961402192, "grad_norm": 418.0, "learning_rate": 5.2675103638712e-05, "loss": 14.0627, "step": 11959 }, { "epoch": 0.4985202784377475, "grad_norm": 198.0, "learning_rate": 5.2668363265700227e-05, "loss": 9.2503, "step": 11960 }, { "epoch": 0.49856196073527576, "grad_norm": 175.0, "learning_rate": 5.2661622844057305e-05, "loss": 9.8129, "step": 11961 }, { "epoch": 0.49860364303280397, "grad_norm": 306.0, "learning_rate": 5.265488237390606e-05, "loss": 14.3755, "step": 11962 }, { "epoch": 0.49864532533033223, "grad_norm": 294.0, "learning_rate": 5.264814185536935e-05, "loss": 13.6879, "step": 11963 }, { "epoch": 0.49868700762786045, "grad_norm": 406.0, "learning_rate": 5.264140128857e-05, "loss": 14.2505, "step": 11964 }, { "epoch": 0.4987286899253887, "grad_norm": 314.0, "learning_rate": 5.26346606736309e-05, "loss": 13.0626, "step": 11965 }, { "epoch": 0.4987703722229169, "grad_norm": 150.0, "learning_rate": 5.262792001067487e-05, "loss": 9.8753, "step": 11966 }, { "epoch": 0.4988120545204452, "grad_norm": 243.0, "learning_rate": 5.2621179299824774e-05, "loss": 12.2505, "step": 11967 }, { "epoch": 0.4988537368179734, "grad_norm": 179.0, "learning_rate": 5.2614438541203434e-05, "loss": 8.6886, "step": 11968 }, { "epoch": 0.49889541911550167, "grad_norm": 264.0, "learning_rate": 5.2607697734933733e-05, "loss": 13.0005, "step": 11969 }, { "epoch": 0.4989371014130299, "grad_norm": 700.0, "learning_rate": 5.2600956881138505e-05, "loss": 19.0011, "step": 11970 }, { "epoch": 0.49897878371055815, "grad_norm": 308.0, "learning_rate": 5.259421597994062e-05, "loss": 13.3753, "step": 11971 }, { "epoch": 0.49902046600808636, "grad_norm": 186.0, "learning_rate": 5.2587475031462906e-05, "loss": 10.9379, "step": 11972 }, { "epoch": 0.49906214830561463, "grad_norm": 434.0, "learning_rate": 5.258073403582823e-05, "loss": 16.5018, "step": 11973 }, { "epoch": 0.49910383060314284, "grad_norm": 193.0, "learning_rate": 5.2573992993159446e-05, "loss": 12.4389, "step": 11974 }, { "epoch": 0.4991455129006711, "grad_norm": 326.0, "learning_rate": 5.256725190357942e-05, "loss": 14.2508, "step": 11975 }, { "epoch": 0.4991871951981993, "grad_norm": 684.0, "learning_rate": 5.256051076721099e-05, "loss": 22.2504, "step": 11976 }, { "epoch": 0.4992288774957276, "grad_norm": 184.0, "learning_rate": 5.2553769584177014e-05, "loss": 11.7502, "step": 11977 }, { "epoch": 0.4992705597932558, "grad_norm": 139.0, "learning_rate": 5.254702835460037e-05, "loss": 9.6254, "step": 11978 }, { "epoch": 0.49931224209078406, "grad_norm": 584.0, "learning_rate": 5.254028707860391e-05, "loss": 18.8777, "step": 11979 }, { "epoch": 0.4993539243883123, "grad_norm": 462.0, "learning_rate": 5.25335457563105e-05, "loss": 17.2502, "step": 11980 }, { "epoch": 0.49939560668584054, "grad_norm": 312.0, "learning_rate": 5.252680438784299e-05, "loss": 13.1882, "step": 11981 }, { "epoch": 0.49943728898336875, "grad_norm": 354.0, "learning_rate": 5.252006297332425e-05, "loss": 13.8133, "step": 11982 }, { "epoch": 0.499478971280897, "grad_norm": 510.0, "learning_rate": 5.251332151287712e-05, "loss": 19.0026, "step": 11983 }, { "epoch": 0.49952065357842523, "grad_norm": 208.0, "learning_rate": 5.25065800066245e-05, "loss": 11.3756, "step": 11984 }, { "epoch": 0.4995623358759535, "grad_norm": 296.0, "learning_rate": 5.249983845468923e-05, "loss": 12.2506, "step": 11985 }, { "epoch": 0.4996040181734817, "grad_norm": 792.0, "learning_rate": 5.249309685719419e-05, "loss": 18.3794, "step": 11986 }, { "epoch": 0.49964570047101, "grad_norm": 620.0, "learning_rate": 5.2486355214262225e-05, "loss": 18.6254, "step": 11987 }, { "epoch": 0.4996873827685382, "grad_norm": 426.0, "learning_rate": 5.247961352601622e-05, "loss": 13.5634, "step": 11988 }, { "epoch": 0.49972906506606646, "grad_norm": 157.0, "learning_rate": 5.247287179257904e-05, "loss": 11.1256, "step": 11989 }, { "epoch": 0.49977074736359467, "grad_norm": 152.0, "learning_rate": 5.246613001407356e-05, "loss": 8.9381, "step": 11990 }, { "epoch": 0.49981242966112294, "grad_norm": 438.0, "learning_rate": 5.2459388190622625e-05, "loss": 16.3752, "step": 11991 }, { "epoch": 0.49985411195865115, "grad_norm": 264.0, "learning_rate": 5.245264632234913e-05, "loss": 12.3128, "step": 11992 }, { "epoch": 0.4998957942561794, "grad_norm": 478.0, "learning_rate": 5.2445904409375946e-05, "loss": 15.627, "step": 11993 }, { "epoch": 0.4999374765537076, "grad_norm": 412.0, "learning_rate": 5.243916245182593e-05, "loss": 15.3752, "step": 11994 }, { "epoch": 0.4999791588512359, "grad_norm": 185.0, "learning_rate": 5.243242044982196e-05, "loss": 10.6253, "step": 11995 }, { "epoch": 0.5000208411487641, "grad_norm": 460.0, "learning_rate": 5.242567840348691e-05, "loss": 16.3753, "step": 11996 }, { "epoch": 0.5000625234462923, "grad_norm": 211.0, "learning_rate": 5.241893631294367e-05, "loss": 11.3128, "step": 11997 }, { "epoch": 0.5001042057438206, "grad_norm": 648.0, "learning_rate": 5.241219417831509e-05, "loss": 16.6287, "step": 11998 }, { "epoch": 0.5001458880413489, "grad_norm": 596.0, "learning_rate": 5.240545199972405e-05, "loss": 19.5012, "step": 11999 }, { "epoch": 0.5001875703388771, "grad_norm": 182.0, "learning_rate": 5.239870977729344e-05, "loss": 9.3129, "step": 12000 }, { "epoch": 0.5002292526364053, "grad_norm": 276.0, "learning_rate": 5.2391967511146144e-05, "loss": 13.3127, "step": 12001 }, { "epoch": 0.5002709349339336, "grad_norm": 184.0, "learning_rate": 5.2385225201405e-05, "loss": 11.314, "step": 12002 }, { "epoch": 0.5003126172314618, "grad_norm": 262.0, "learning_rate": 5.237848284819293e-05, "loss": 10.7508, "step": 12003 }, { "epoch": 0.50035429952899, "grad_norm": 183.0, "learning_rate": 5.23717404516328e-05, "loss": 10.8759, "step": 12004 }, { "epoch": 0.5003959818265182, "grad_norm": 330.0, "learning_rate": 5.2364998011847496e-05, "loss": 14.1256, "step": 12005 }, { "epoch": 0.5004376641240466, "grad_norm": 134.0, "learning_rate": 5.2358255528959885e-05, "loss": 7.8441, "step": 12006 }, { "epoch": 0.5004793464215748, "grad_norm": 226.0, "learning_rate": 5.235151300309287e-05, "loss": 11.8752, "step": 12007 }, { "epoch": 0.500521028719103, "grad_norm": 402.0, "learning_rate": 5.234477043436931e-05, "loss": 15.7508, "step": 12008 }, { "epoch": 0.5005627110166312, "grad_norm": 316.0, "learning_rate": 5.233802782291209e-05, "loss": 12.8126, "step": 12009 }, { "epoch": 0.5006043933141595, "grad_norm": 300.0, "learning_rate": 5.233128516884412e-05, "loss": 12.7502, "step": 12010 }, { "epoch": 0.5006460756116877, "grad_norm": 226.0, "learning_rate": 5.232454247228828e-05, "loss": 11.6251, "step": 12011 }, { "epoch": 0.5006877579092159, "grad_norm": 119.5, "learning_rate": 5.2317799733367434e-05, "loss": 9.8754, "step": 12012 }, { "epoch": 0.5007294402067441, "grad_norm": 402.0, "learning_rate": 5.231105695220448e-05, "loss": 15.813, "step": 12013 }, { "epoch": 0.5007711225042725, "grad_norm": 528.0, "learning_rate": 5.2304314128922316e-05, "loss": 17.7502, "step": 12014 }, { "epoch": 0.5008128048018007, "grad_norm": 604.0, "learning_rate": 5.229757126364381e-05, "loss": 19.5003, "step": 12015 }, { "epoch": 0.5008544870993289, "grad_norm": 528.0, "learning_rate": 5.229082835649186e-05, "loss": 16.8751, "step": 12016 }, { "epoch": 0.5008961693968571, "grad_norm": 346.0, "learning_rate": 5.2284085407589376e-05, "loss": 14.0002, "step": 12017 }, { "epoch": 0.5009378516943854, "grad_norm": 408.0, "learning_rate": 5.2277342417059226e-05, "loss": 15.5003, "step": 12018 }, { "epoch": 0.5009795339919136, "grad_norm": 908.0, "learning_rate": 5.227059938502432e-05, "loss": 25.6255, "step": 12019 }, { "epoch": 0.5010212162894419, "grad_norm": 378.0, "learning_rate": 5.226385631160753e-05, "loss": 15.1252, "step": 12020 }, { "epoch": 0.5010628985869701, "grad_norm": 992.0, "learning_rate": 5.225711319693175e-05, "loss": 23.5036, "step": 12021 }, { "epoch": 0.5011045808844984, "grad_norm": 464.0, "learning_rate": 5.225037004111989e-05, "loss": 15.4378, "step": 12022 }, { "epoch": 0.5011462631820266, "grad_norm": 1088.0, "learning_rate": 5.2243626844294835e-05, "loss": 24.634, "step": 12023 }, { "epoch": 0.5011879454795548, "grad_norm": 644.0, "learning_rate": 5.223688360657949e-05, "loss": 19.5002, "step": 12024 }, { "epoch": 0.501229627777083, "grad_norm": 444.0, "learning_rate": 5.223014032809673e-05, "loss": 16.5002, "step": 12025 }, { "epoch": 0.5012713100746113, "grad_norm": 382.0, "learning_rate": 5.2223397008969466e-05, "loss": 14.2506, "step": 12026 }, { "epoch": 0.5013129923721396, "grad_norm": 260.0, "learning_rate": 5.221665364932059e-05, "loss": 11.8126, "step": 12027 }, { "epoch": 0.5013546746696678, "grad_norm": 338.0, "learning_rate": 5.220991024927301e-05, "loss": 12.8141, "step": 12028 }, { "epoch": 0.501396356967196, "grad_norm": 278.0, "learning_rate": 5.220316680894962e-05, "loss": 12.6878, "step": 12029 }, { "epoch": 0.5014380392647243, "grad_norm": 880.0, "learning_rate": 5.219642332847332e-05, "loss": 24.5003, "step": 12030 }, { "epoch": 0.5014797215622525, "grad_norm": 276.0, "learning_rate": 5.218967980796702e-05, "loss": 12.8752, "step": 12031 }, { "epoch": 0.5015214038597807, "grad_norm": 474.0, "learning_rate": 5.2182936247553595e-05, "loss": 17.0002, "step": 12032 }, { "epoch": 0.501563086157309, "grad_norm": 230.0, "learning_rate": 5.217619264735597e-05, "loss": 11.8128, "step": 12033 }, { "epoch": 0.5016047684548373, "grad_norm": 556.0, "learning_rate": 5.216944900749704e-05, "loss": 17.8753, "step": 12034 }, { "epoch": 0.5016464507523655, "grad_norm": 540.0, "learning_rate": 5.216270532809972e-05, "loss": 18.3766, "step": 12035 }, { "epoch": 0.5016881330498937, "grad_norm": 708.0, "learning_rate": 5.2155961609286886e-05, "loss": 21.2531, "step": 12036 }, { "epoch": 0.501729815347422, "grad_norm": 596.0, "learning_rate": 5.214921785118146e-05, "loss": 17.5006, "step": 12037 }, { "epoch": 0.5017714976449502, "grad_norm": 229.0, "learning_rate": 5.2142474053906356e-05, "loss": 11.7514, "step": 12038 }, { "epoch": 0.5018131799424784, "grad_norm": 476.0, "learning_rate": 5.213573021758448e-05, "loss": 15.6879, "step": 12039 }, { "epoch": 0.5018548622400066, "grad_norm": 430.0, "learning_rate": 5.21289863423387e-05, "loss": 13.6257, "step": 12040 }, { "epoch": 0.501896544537535, "grad_norm": 169.0, "learning_rate": 5.2122242428291986e-05, "loss": 10.313, "step": 12041 }, { "epoch": 0.5019382268350632, "grad_norm": 183.0, "learning_rate": 5.2115498475567204e-05, "loss": 10.7508, "step": 12042 }, { "epoch": 0.5019799091325914, "grad_norm": 482.0, "learning_rate": 5.210875448428728e-05, "loss": 17.8752, "step": 12043 }, { "epoch": 0.5020215914301196, "grad_norm": 332.0, "learning_rate": 5.210201045457511e-05, "loss": 13.5628, "step": 12044 }, { "epoch": 0.5020632737276479, "grad_norm": 238.0, "learning_rate": 5.209526638655362e-05, "loss": 11.5634, "step": 12045 }, { "epoch": 0.5021049560251761, "grad_norm": 328.0, "learning_rate": 5.208852228034572e-05, "loss": 14.1877, "step": 12046 }, { "epoch": 0.5021466383227043, "grad_norm": 70.0, "learning_rate": 5.20817781360743e-05, "loss": 8.2504, "step": 12047 }, { "epoch": 0.5021883206202326, "grad_norm": 320.0, "learning_rate": 5.2075033953862303e-05, "loss": 13.4377, "step": 12048 }, { "epoch": 0.5022300029177609, "grad_norm": 1120.0, "learning_rate": 5.206828973383262e-05, "loss": 29.5008, "step": 12049 }, { "epoch": 0.5022716852152891, "grad_norm": 444.0, "learning_rate": 5.206154547610817e-05, "loss": 14.0675, "step": 12050 }, { "epoch": 0.5023133675128173, "grad_norm": 788.0, "learning_rate": 5.2054801180811886e-05, "loss": 27.5003, "step": 12051 }, { "epoch": 0.5023550498103455, "grad_norm": 199.0, "learning_rate": 5.204805684806664e-05, "loss": 13.3128, "step": 12052 }, { "epoch": 0.5023967321078738, "grad_norm": 482.0, "learning_rate": 5.2041312477995395e-05, "loss": 17.2502, "step": 12053 }, { "epoch": 0.502438414405402, "grad_norm": 668.0, "learning_rate": 5.2034568070721055e-05, "loss": 18.5003, "step": 12054 }, { "epoch": 0.5024800967029303, "grad_norm": 334.0, "learning_rate": 5.2027823626366526e-05, "loss": 14.8128, "step": 12055 }, { "epoch": 0.5025217790004585, "grad_norm": 326.0, "learning_rate": 5.202107914505473e-05, "loss": 13.0002, "step": 12056 }, { "epoch": 0.5025634612979868, "grad_norm": 96.5, "learning_rate": 5.201433462690858e-05, "loss": 8.1883, "step": 12057 }, { "epoch": 0.502605143595515, "grad_norm": 512.0, "learning_rate": 5.2007590072051014e-05, "loss": 16.3783, "step": 12058 }, { "epoch": 0.5026468258930432, "grad_norm": 310.0, "learning_rate": 5.200084548060493e-05, "loss": 13.4376, "step": 12059 }, { "epoch": 0.5026885081905714, "grad_norm": 190.0, "learning_rate": 5.199410085269327e-05, "loss": 12.3754, "step": 12060 }, { "epoch": 0.5027301904880997, "grad_norm": 234.0, "learning_rate": 5.198735618843894e-05, "loss": 11.6277, "step": 12061 }, { "epoch": 0.502771872785628, "grad_norm": 440.0, "learning_rate": 5.198061148796487e-05, "loss": 15.3126, "step": 12062 }, { "epoch": 0.5028135550831562, "grad_norm": 249.0, "learning_rate": 5.197386675139398e-05, "loss": 12.0002, "step": 12063 }, { "epoch": 0.5028552373806844, "grad_norm": 190.0, "learning_rate": 5.196712197884919e-05, "loss": 10.2502, "step": 12064 }, { "epoch": 0.5028969196782127, "grad_norm": 776.0, "learning_rate": 5.196037717045341e-05, "loss": 22.8756, "step": 12065 }, { "epoch": 0.5029386019757409, "grad_norm": 494.0, "learning_rate": 5.1953632326329605e-05, "loss": 19.1253, "step": 12066 }, { "epoch": 0.5029802842732691, "grad_norm": 243.0, "learning_rate": 5.194688744660067e-05, "loss": 12.0038, "step": 12067 }, { "epoch": 0.5030219665707973, "grad_norm": 358.0, "learning_rate": 5.1940142531389544e-05, "loss": 14.6252, "step": 12068 }, { "epoch": 0.5030636488683257, "grad_norm": 896.0, "learning_rate": 5.1933397580819143e-05, "loss": 25.5011, "step": 12069 }, { "epoch": 0.5031053311658539, "grad_norm": 362.0, "learning_rate": 5.19266525950124e-05, "loss": 14.4379, "step": 12070 }, { "epoch": 0.5031470134633821, "grad_norm": 704.0, "learning_rate": 5.191990757409225e-05, "loss": 20.8755, "step": 12071 }, { "epoch": 0.5031886957609103, "grad_norm": 404.0, "learning_rate": 5.19131625181816e-05, "loss": 14.4376, "step": 12072 }, { "epoch": 0.5032303780584386, "grad_norm": 396.0, "learning_rate": 5.190641742740341e-05, "loss": 13.1253, "step": 12073 }, { "epoch": 0.5032720603559668, "grad_norm": 272.0, "learning_rate": 5.189967230188059e-05, "loss": 12.3753, "step": 12074 }, { "epoch": 0.503313742653495, "grad_norm": 470.0, "learning_rate": 5.1892927141736056e-05, "loss": 16.8754, "step": 12075 }, { "epoch": 0.5033554249510233, "grad_norm": 159.0, "learning_rate": 5.188618194709277e-05, "loss": 10.3752, "step": 12076 }, { "epoch": 0.5033971072485516, "grad_norm": 474.0, "learning_rate": 5.1879436718073647e-05, "loss": 16.2502, "step": 12077 }, { "epoch": 0.5034387895460798, "grad_norm": 350.0, "learning_rate": 5.1872691454801626e-05, "loss": 14.1887, "step": 12078 }, { "epoch": 0.503480471843608, "grad_norm": 215.0, "learning_rate": 5.186594615739963e-05, "loss": 11.1877, "step": 12079 }, { "epoch": 0.5035221541411362, "grad_norm": 400.0, "learning_rate": 5.185920082599061e-05, "loss": 14.1886, "step": 12080 }, { "epoch": 0.5035638364386645, "grad_norm": 314.0, "learning_rate": 5.185245546069748e-05, "loss": 12.4377, "step": 12081 }, { "epoch": 0.5036055187361927, "grad_norm": 456.0, "learning_rate": 5.1845710061643193e-05, "loss": 15.8761, "step": 12082 }, { "epoch": 0.503647201033721, "grad_norm": 628.0, "learning_rate": 5.183896462895067e-05, "loss": 20.126, "step": 12083 }, { "epoch": 0.5036888833312492, "grad_norm": 370.0, "learning_rate": 5.1832219162742866e-05, "loss": 15.0627, "step": 12084 }, { "epoch": 0.5037305656287775, "grad_norm": 1344.0, "learning_rate": 5.18254736631427e-05, "loss": 30.8762, "step": 12085 }, { "epoch": 0.5037722479263057, "grad_norm": 336.0, "learning_rate": 5.181872813027311e-05, "loss": 14.4377, "step": 12086 }, { "epoch": 0.5038139302238339, "grad_norm": 720.0, "learning_rate": 5.181198256425703e-05, "loss": 21.3752, "step": 12087 }, { "epoch": 0.5038556125213621, "grad_norm": 560.0, "learning_rate": 5.1805236965217417e-05, "loss": 18.7502, "step": 12088 }, { "epoch": 0.5038972948188905, "grad_norm": 720.0, "learning_rate": 5.17984913332772e-05, "loss": 21.6251, "step": 12089 }, { "epoch": 0.5039389771164187, "grad_norm": 374.0, "learning_rate": 5.179174566855931e-05, "loss": 13.502, "step": 12090 }, { "epoch": 0.5039806594139469, "grad_norm": 282.0, "learning_rate": 5.178499997118671e-05, "loss": 12.0627, "step": 12091 }, { "epoch": 0.5040223417114751, "grad_norm": 462.0, "learning_rate": 5.177825424128232e-05, "loss": 16.2503, "step": 12092 }, { "epoch": 0.5040640240090034, "grad_norm": 836.0, "learning_rate": 5.177150847896909e-05, "loss": 20.5033, "step": 12093 }, { "epoch": 0.5041057063065316, "grad_norm": 588.0, "learning_rate": 5.176476268436996e-05, "loss": 17.2502, "step": 12094 }, { "epoch": 0.5041473886040598, "grad_norm": 572.0, "learning_rate": 5.1758016857607874e-05, "loss": 16.8762, "step": 12095 }, { "epoch": 0.504189070901588, "grad_norm": 528.0, "learning_rate": 5.1751270998805766e-05, "loss": 18.5022, "step": 12096 }, { "epoch": 0.5042307531991164, "grad_norm": 296.0, "learning_rate": 5.17445251080866e-05, "loss": 13.7502, "step": 12097 }, { "epoch": 0.5042724354966446, "grad_norm": 432.0, "learning_rate": 5.173777918557331e-05, "loss": 15.9378, "step": 12098 }, { "epoch": 0.5043141177941728, "grad_norm": 284.0, "learning_rate": 5.1731033231388835e-05, "loss": 12.5006, "step": 12099 }, { "epoch": 0.504355800091701, "grad_norm": 310.0, "learning_rate": 5.1724287245656136e-05, "loss": 13.6257, "step": 12100 }, { "epoch": 0.5043974823892293, "grad_norm": 490.0, "learning_rate": 5.1717541228498135e-05, "loss": 17.7504, "step": 12101 }, { "epoch": 0.5044391646867575, "grad_norm": 382.0, "learning_rate": 5.1710795180037794e-05, "loss": 14.8752, "step": 12102 }, { "epoch": 0.5044808469842857, "grad_norm": 336.0, "learning_rate": 5.1704049100398055e-05, "loss": 14.2505, "step": 12103 }, { "epoch": 0.504522529281814, "grad_norm": 268.0, "learning_rate": 5.169730298970188e-05, "loss": 11.8127, "step": 12104 }, { "epoch": 0.5045642115793423, "grad_norm": 1152.0, "learning_rate": 5.1690556848072205e-05, "loss": 24.1299, "step": 12105 }, { "epoch": 0.5046058938768705, "grad_norm": 59.5, "learning_rate": 5.168381067563197e-05, "loss": 6.6879, "step": 12106 }, { "epoch": 0.5046475761743987, "grad_norm": 462.0, "learning_rate": 5.167706447250416e-05, "loss": 16.7505, "step": 12107 }, { "epoch": 0.504689258471927, "grad_norm": 304.0, "learning_rate": 5.167031823881168e-05, "loss": 14.1261, "step": 12108 }, { "epoch": 0.5047309407694552, "grad_norm": 316.0, "learning_rate": 5.166357197467752e-05, "loss": 15.0036, "step": 12109 }, { "epoch": 0.5047726230669835, "grad_norm": 107.5, "learning_rate": 5.16568256802246e-05, "loss": 8.3757, "step": 12110 }, { "epoch": 0.5048143053645117, "grad_norm": 608.0, "learning_rate": 5.1650079355575884e-05, "loss": 18.7504, "step": 12111 }, { "epoch": 0.50485598766204, "grad_norm": 203.0, "learning_rate": 5.1643333000854335e-05, "loss": 11.0001, "step": 12112 }, { "epoch": 0.5048976699595682, "grad_norm": 173.0, "learning_rate": 5.163658661618288e-05, "loss": 11.3753, "step": 12113 }, { "epoch": 0.5049393522570964, "grad_norm": 572.0, "learning_rate": 5.162984020168451e-05, "loss": 15.192, "step": 12114 }, { "epoch": 0.5049810345546246, "grad_norm": 306.0, "learning_rate": 5.1623093757482135e-05, "loss": 13.5627, "step": 12115 }, { "epoch": 0.5050227168521529, "grad_norm": 568.0, "learning_rate": 5.161634728369874e-05, "loss": 19.2505, "step": 12116 }, { "epoch": 0.5050643991496812, "grad_norm": 412.0, "learning_rate": 5.160960078045728e-05, "loss": 14.6876, "step": 12117 }, { "epoch": 0.5051060814472094, "grad_norm": 246.0, "learning_rate": 5.1602854247880697e-05, "loss": 12.0001, "step": 12118 }, { "epoch": 0.5051477637447376, "grad_norm": 384.0, "learning_rate": 5.1596107686091955e-05, "loss": 14.1252, "step": 12119 }, { "epoch": 0.5051894460422659, "grad_norm": 360.0, "learning_rate": 5.158936109521401e-05, "loss": 14.688, "step": 12120 }, { "epoch": 0.5052311283397941, "grad_norm": 190.0, "learning_rate": 5.158261447536982e-05, "loss": 10.1254, "step": 12121 }, { "epoch": 0.5052728106373223, "grad_norm": 362.0, "learning_rate": 5.1575867826682335e-05, "loss": 15.0002, "step": 12122 }, { "epoch": 0.5053144929348505, "grad_norm": 664.0, "learning_rate": 5.156912114927451e-05, "loss": 21.5002, "step": 12123 }, { "epoch": 0.5053561752323789, "grad_norm": 364.0, "learning_rate": 5.156237444326934e-05, "loss": 13.9378, "step": 12124 }, { "epoch": 0.5053978575299071, "grad_norm": 282.0, "learning_rate": 5.1555627708789735e-05, "loss": 12.6252, "step": 12125 }, { "epoch": 0.5054395398274353, "grad_norm": 172.0, "learning_rate": 5.154888094595868e-05, "loss": 11.9389, "step": 12126 }, { "epoch": 0.5054812221249635, "grad_norm": 374.0, "learning_rate": 5.154213415489913e-05, "loss": 15.1889, "step": 12127 }, { "epoch": 0.5055229044224918, "grad_norm": 724.0, "learning_rate": 5.153538733573405e-05, "loss": 20.6284, "step": 12128 }, { "epoch": 0.50556458672002, "grad_norm": 502.0, "learning_rate": 5.15286404885864e-05, "loss": 17.2507, "step": 12129 }, { "epoch": 0.5056062690175482, "grad_norm": 380.0, "learning_rate": 5.1521893613579154e-05, "loss": 12.1259, "step": 12130 }, { "epoch": 0.5056479513150764, "grad_norm": 348.0, "learning_rate": 5.151514671083525e-05, "loss": 13.8128, "step": 12131 }, { "epoch": 0.5056896336126048, "grad_norm": 278.0, "learning_rate": 5.1508399780477666e-05, "loss": 13.3134, "step": 12132 }, { "epoch": 0.505731315910133, "grad_norm": 135.0, "learning_rate": 5.1501652822629356e-05, "loss": 8.3754, "step": 12133 }, { "epoch": 0.5057729982076612, "grad_norm": 350.0, "learning_rate": 5.14949058374133e-05, "loss": 14.3753, "step": 12134 }, { "epoch": 0.5058146805051894, "grad_norm": 316.0, "learning_rate": 5.148815882495245e-05, "loss": 13.7502, "step": 12135 }, { "epoch": 0.5058563628027177, "grad_norm": 308.0, "learning_rate": 5.148141178536976e-05, "loss": 12.7502, "step": 12136 }, { "epoch": 0.5058980451002459, "grad_norm": 532.0, "learning_rate": 5.147466471878822e-05, "loss": 19.0004, "step": 12137 }, { "epoch": 0.5059397273977742, "grad_norm": 644.0, "learning_rate": 5.146791762533078e-05, "loss": 17.1269, "step": 12138 }, { "epoch": 0.5059814096953024, "grad_norm": 243.0, "learning_rate": 5.1461170505120426e-05, "loss": 13.1879, "step": 12139 }, { "epoch": 0.5060230919928307, "grad_norm": 260.0, "learning_rate": 5.14544233582801e-05, "loss": 13.3755, "step": 12140 }, { "epoch": 0.5060647742903589, "grad_norm": 42.0, "learning_rate": 5.144767618493277e-05, "loss": 6.8442, "step": 12141 }, { "epoch": 0.5061064565878871, "grad_norm": 336.0, "learning_rate": 5.144092898520142e-05, "loss": 14.0634, "step": 12142 }, { "epoch": 0.5061481388854153, "grad_norm": 1256.0, "learning_rate": 5.143418175920901e-05, "loss": 25.8797, "step": 12143 }, { "epoch": 0.5061898211829436, "grad_norm": 350.0, "learning_rate": 5.142743450707851e-05, "loss": 15.3775, "step": 12144 }, { "epoch": 0.5062315034804719, "grad_norm": 266.0, "learning_rate": 5.14206872289329e-05, "loss": 12.7503, "step": 12145 }, { "epoch": 0.5062731857780001, "grad_norm": 414.0, "learning_rate": 5.141393992489513e-05, "loss": 15.314, "step": 12146 }, { "epoch": 0.5063148680755283, "grad_norm": 378.0, "learning_rate": 5.140719259508817e-05, "loss": 14.8129, "step": 12147 }, { "epoch": 0.5063565503730566, "grad_norm": 506.0, "learning_rate": 5.140044523963502e-05, "loss": 17.0004, "step": 12148 }, { "epoch": 0.5063982326705848, "grad_norm": 180.0, "learning_rate": 5.139369785865862e-05, "loss": 11.7503, "step": 12149 }, { "epoch": 0.506439914968113, "grad_norm": 588.0, "learning_rate": 5.1386950452281954e-05, "loss": 17.7513, "step": 12150 }, { "epoch": 0.5064815972656412, "grad_norm": 245.0, "learning_rate": 5.138020302062799e-05, "loss": 13.2507, "step": 12151 }, { "epoch": 0.5065232795631696, "grad_norm": 616.0, "learning_rate": 5.1373455563819704e-05, "loss": 19.1253, "step": 12152 }, { "epoch": 0.5065649618606978, "grad_norm": 286.0, "learning_rate": 5.136670808198006e-05, "loss": 11.8129, "step": 12153 }, { "epoch": 0.506606644158226, "grad_norm": 310.0, "learning_rate": 5.1359960575232055e-05, "loss": 12.6876, "step": 12154 }, { "epoch": 0.5066483264557542, "grad_norm": 115.0, "learning_rate": 5.1353213043698644e-05, "loss": 11.1254, "step": 12155 }, { "epoch": 0.5066900087532825, "grad_norm": 201.0, "learning_rate": 5.1346465487502804e-05, "loss": 12.376, "step": 12156 }, { "epoch": 0.5067316910508107, "grad_norm": 210.0, "learning_rate": 5.133971790676751e-05, "loss": 9.9378, "step": 12157 }, { "epoch": 0.5067733733483389, "grad_norm": 233.0, "learning_rate": 5.133297030161574e-05, "loss": 12.3132, "step": 12158 }, { "epoch": 0.5068150556458672, "grad_norm": 192.0, "learning_rate": 5.132622267217047e-05, "loss": 9.8752, "step": 12159 }, { "epoch": 0.5068567379433955, "grad_norm": 302.0, "learning_rate": 5.131947501855468e-05, "loss": 13.5629, "step": 12160 }, { "epoch": 0.5068984202409237, "grad_norm": 135.0, "learning_rate": 5.131272734089133e-05, "loss": 10.8134, "step": 12161 }, { "epoch": 0.5069401025384519, "grad_norm": 282.0, "learning_rate": 5.1305979639303405e-05, "loss": 14.1899, "step": 12162 }, { "epoch": 0.5069817848359801, "grad_norm": 426.0, "learning_rate": 5.129923191391389e-05, "loss": 15.5003, "step": 12163 }, { "epoch": 0.5070234671335084, "grad_norm": 322.0, "learning_rate": 5.1292484164845764e-05, "loss": 11.2512, "step": 12164 }, { "epoch": 0.5070651494310366, "grad_norm": 462.0, "learning_rate": 5.1285736392221995e-05, "loss": 17.0006, "step": 12165 }, { "epoch": 0.5071068317285649, "grad_norm": 86.5, "learning_rate": 5.1278988596165555e-05, "loss": 9.8132, "step": 12166 }, { "epoch": 0.5071485140260931, "grad_norm": 376.0, "learning_rate": 5.127224077679944e-05, "loss": 14.3143, "step": 12167 }, { "epoch": 0.5071901963236214, "grad_norm": 816.0, "learning_rate": 5.126549293424663e-05, "loss": 21.5005, "step": 12168 }, { "epoch": 0.5072318786211496, "grad_norm": 211.0, "learning_rate": 5.12587450686301e-05, "loss": 12.7503, "step": 12169 }, { "epoch": 0.5072735609186778, "grad_norm": 156.0, "learning_rate": 5.1251997180072816e-05, "loss": 9.1886, "step": 12170 }, { "epoch": 0.507315243216206, "grad_norm": 386.0, "learning_rate": 5.124524926869779e-05, "loss": 15.688, "step": 12171 }, { "epoch": 0.5073569255137343, "grad_norm": 203.0, "learning_rate": 5.123850133462797e-05, "loss": 11.0004, "step": 12172 }, { "epoch": 0.5073986078112626, "grad_norm": 149.0, "learning_rate": 5.123175337798637e-05, "loss": 7.8438, "step": 12173 }, { "epoch": 0.5074402901087908, "grad_norm": 712.0, "learning_rate": 5.1225005398895955e-05, "loss": 20.1258, "step": 12174 }, { "epoch": 0.507481972406319, "grad_norm": 89.0, "learning_rate": 5.12182573974797e-05, "loss": 8.6879, "step": 12175 }, { "epoch": 0.5075236547038473, "grad_norm": 250.0, "learning_rate": 5.121150937386059e-05, "loss": 11.6252, "step": 12176 }, { "epoch": 0.5075653370013755, "grad_norm": 508.0, "learning_rate": 5.120476132816162e-05, "loss": 15.8128, "step": 12177 }, { "epoch": 0.5076070192989037, "grad_norm": 1440.0, "learning_rate": 5.1198013260505765e-05, "loss": 27.6303, "step": 12178 }, { "epoch": 0.507648701596432, "grad_norm": 89.0, "learning_rate": 5.1191265171016e-05, "loss": 7.8442, "step": 12179 }, { "epoch": 0.5076903838939603, "grad_norm": 282.0, "learning_rate": 5.118451705981534e-05, "loss": 11.6877, "step": 12180 }, { "epoch": 0.5077320661914885, "grad_norm": 247.0, "learning_rate": 5.117776892702675e-05, "loss": 12.1878, "step": 12181 }, { "epoch": 0.5077737484890167, "grad_norm": 340.0, "learning_rate": 5.117102077277321e-05, "loss": 15.0005, "step": 12182 }, { "epoch": 0.507815430786545, "grad_norm": 237.0, "learning_rate": 5.116427259717772e-05, "loss": 11.8759, "step": 12183 }, { "epoch": 0.5078571130840732, "grad_norm": 412.0, "learning_rate": 5.115752440036325e-05, "loss": 14.4378, "step": 12184 }, { "epoch": 0.5078987953816014, "grad_norm": 752.0, "learning_rate": 5.115077618245281e-05, "loss": 20.1252, "step": 12185 }, { "epoch": 0.5079404776791296, "grad_norm": 294.0, "learning_rate": 5.1144027943569364e-05, "loss": 13.3132, "step": 12186 }, { "epoch": 0.507982159976658, "grad_norm": 494.0, "learning_rate": 5.11372796838359e-05, "loss": 15.6877, "step": 12187 }, { "epoch": 0.5080238422741862, "grad_norm": 183.0, "learning_rate": 5.113053140337541e-05, "loss": 11.2503, "step": 12188 }, { "epoch": 0.5080655245717144, "grad_norm": 219.0, "learning_rate": 5.112378310231091e-05, "loss": 13.3131, "step": 12189 }, { "epoch": 0.5081072068692426, "grad_norm": 548.0, "learning_rate": 5.1117034780765336e-05, "loss": 18.6254, "step": 12190 }, { "epoch": 0.5081488891667709, "grad_norm": 100.0, "learning_rate": 5.111028643886171e-05, "loss": 8.0012, "step": 12191 }, { "epoch": 0.5081905714642991, "grad_norm": 392.0, "learning_rate": 5.110353807672301e-05, "loss": 13.5633, "step": 12192 }, { "epoch": 0.5082322537618273, "grad_norm": 1012.0, "learning_rate": 5.109678969447225e-05, "loss": 26.0, "step": 12193 }, { "epoch": 0.5082739360593556, "grad_norm": 308.0, "learning_rate": 5.109004129223238e-05, "loss": 13.7502, "step": 12194 }, { "epoch": 0.5083156183568839, "grad_norm": 155.0, "learning_rate": 5.108329287012643e-05, "loss": 11.0629, "step": 12195 }, { "epoch": 0.5083573006544121, "grad_norm": 712.0, "learning_rate": 5.107654442827736e-05, "loss": 20.0049, "step": 12196 }, { "epoch": 0.5083989829519403, "grad_norm": 274.0, "learning_rate": 5.106979596680817e-05, "loss": 12.5634, "step": 12197 }, { "epoch": 0.5084406652494685, "grad_norm": 243.0, "learning_rate": 5.106304748584187e-05, "loss": 12.1879, "step": 12198 }, { "epoch": 0.5084823475469968, "grad_norm": 177.0, "learning_rate": 5.105629898550142e-05, "loss": 9.3128, "step": 12199 }, { "epoch": 0.508524029844525, "grad_norm": 404.0, "learning_rate": 5.1049550465909825e-05, "loss": 14.5626, "step": 12200 }, { "epoch": 0.5085657121420533, "grad_norm": 113.5, "learning_rate": 5.104280192719009e-05, "loss": 6.5955, "step": 12201 }, { "epoch": 0.5086073944395815, "grad_norm": 170.0, "learning_rate": 5.103605336946519e-05, "loss": 5.1251, "step": 12202 }, { "epoch": 0.5086490767371098, "grad_norm": 366.0, "learning_rate": 5.102930479285812e-05, "loss": 13.9378, "step": 12203 }, { "epoch": 0.508690759034638, "grad_norm": 900.0, "learning_rate": 5.102255619749188e-05, "loss": 24.1254, "step": 12204 }, { "epoch": 0.5087324413321662, "grad_norm": 290.0, "learning_rate": 5.1015807583489474e-05, "loss": 13.2511, "step": 12205 }, { "epoch": 0.5087741236296944, "grad_norm": 348.0, "learning_rate": 5.1009058950973876e-05, "loss": 13.5002, "step": 12206 }, { "epoch": 0.5088158059272228, "grad_norm": 656.0, "learning_rate": 5.100231030006809e-05, "loss": 20.3752, "step": 12207 }, { "epoch": 0.508857488224751, "grad_norm": 354.0, "learning_rate": 5.099556163089512e-05, "loss": 13.0005, "step": 12208 }, { "epoch": 0.5088991705222792, "grad_norm": 512.0, "learning_rate": 5.098881294357795e-05, "loss": 16.8752, "step": 12209 }, { "epoch": 0.5089408528198074, "grad_norm": 352.0, "learning_rate": 5.098206423823956e-05, "loss": 14.6253, "step": 12210 }, { "epoch": 0.5089825351173357, "grad_norm": 516.0, "learning_rate": 5.097531551500297e-05, "loss": 16.2502, "step": 12211 }, { "epoch": 0.5090242174148639, "grad_norm": 700.0, "learning_rate": 5.096856677399118e-05, "loss": 19.7505, "step": 12212 }, { "epoch": 0.5090658997123921, "grad_norm": 2400.0, "learning_rate": 5.0961818015327156e-05, "loss": 43.5073, "step": 12213 }, { "epoch": 0.5091075820099203, "grad_norm": 109.0, "learning_rate": 5.0955069239133926e-05, "loss": 9.8753, "step": 12214 }, { "epoch": 0.5091492643074487, "grad_norm": 346.0, "learning_rate": 5.094832044553447e-05, "loss": 13.6877, "step": 12215 }, { "epoch": 0.5091909466049769, "grad_norm": 524.0, "learning_rate": 5.0941571634651776e-05, "loss": 17.8763, "step": 12216 }, { "epoch": 0.5092326289025051, "grad_norm": 418.0, "learning_rate": 5.0934822806608875e-05, "loss": 16.5003, "step": 12217 }, { "epoch": 0.5092743112000333, "grad_norm": 704.0, "learning_rate": 5.092807396152873e-05, "loss": 19.755, "step": 12218 }, { "epoch": 0.5093159934975616, "grad_norm": 520.0, "learning_rate": 5.0921325099534365e-05, "loss": 18.5003, "step": 12219 }, { "epoch": 0.5093576757950898, "grad_norm": 184.0, "learning_rate": 5.091457622074877e-05, "loss": 10.6877, "step": 12220 }, { "epoch": 0.509399358092618, "grad_norm": 54.0, "learning_rate": 5.090782732529494e-05, "loss": 7.2816, "step": 12221 }, { "epoch": 0.5094410403901463, "grad_norm": 416.0, "learning_rate": 5.0901078413295875e-05, "loss": 16.8753, "step": 12222 }, { "epoch": 0.5094827226876746, "grad_norm": 628.0, "learning_rate": 5.089432948487458e-05, "loss": 18.5014, "step": 12223 }, { "epoch": 0.5095244049852028, "grad_norm": 328.0, "learning_rate": 5.0887580540154045e-05, "loss": 13.5007, "step": 12224 }, { "epoch": 0.509566087282731, "grad_norm": 564.0, "learning_rate": 5.0880831579257285e-05, "loss": 17.0012, "step": 12225 }, { "epoch": 0.5096077695802592, "grad_norm": 167.0, "learning_rate": 5.0874082602307286e-05, "loss": 10.1878, "step": 12226 }, { "epoch": 0.5096494518777875, "grad_norm": 178.0, "learning_rate": 5.086733360942705e-05, "loss": 8.6253, "step": 12227 }, { "epoch": 0.5096911341753158, "grad_norm": 384.0, "learning_rate": 5.086058460073958e-05, "loss": 14.8753, "step": 12228 }, { "epoch": 0.509732816472844, "grad_norm": 258.0, "learning_rate": 5.085383557636788e-05, "loss": 11.8128, "step": 12229 }, { "epoch": 0.5097744987703722, "grad_norm": 304.0, "learning_rate": 5.084708653643495e-05, "loss": 13.5637, "step": 12230 }, { "epoch": 0.5098161810679005, "grad_norm": 182.0, "learning_rate": 5.084033748106381e-05, "loss": 10.6879, "step": 12231 }, { "epoch": 0.5098578633654287, "grad_norm": 324.0, "learning_rate": 5.083358841037742e-05, "loss": 14.6878, "step": 12232 }, { "epoch": 0.5098995456629569, "grad_norm": 264.0, "learning_rate": 5.082683932449882e-05, "loss": 14.7506, "step": 12233 }, { "epoch": 0.5099412279604851, "grad_norm": 486.0, "learning_rate": 5.0820090223551e-05, "loss": 17.7502, "step": 12234 }, { "epoch": 0.5099829102580135, "grad_norm": 568.0, "learning_rate": 5.081334110765696e-05, "loss": 18.1255, "step": 12235 }, { "epoch": 0.5100245925555417, "grad_norm": 318.0, "learning_rate": 5.080659197693971e-05, "loss": 12.9391, "step": 12236 }, { "epoch": 0.5100662748530699, "grad_norm": 492.0, "learning_rate": 5.0799842831522245e-05, "loss": 16.876, "step": 12237 }, { "epoch": 0.5101079571505981, "grad_norm": 422.0, "learning_rate": 5.079309367152758e-05, "loss": 15.3753, "step": 12238 }, { "epoch": 0.5101496394481264, "grad_norm": 386.0, "learning_rate": 5.078634449707871e-05, "loss": 13.8131, "step": 12239 }, { "epoch": 0.5101913217456546, "grad_norm": 340.0, "learning_rate": 5.0779595308298645e-05, "loss": 14.5628, "step": 12240 }, { "epoch": 0.5102330040431828, "grad_norm": 162.0, "learning_rate": 5.077284610531037e-05, "loss": 10.2503, "step": 12241 }, { "epoch": 0.510274686340711, "grad_norm": 808.0, "learning_rate": 5.0766096888236917e-05, "loss": 20.5008, "step": 12242 }, { "epoch": 0.5103163686382394, "grad_norm": 140.0, "learning_rate": 5.0759347657201285e-05, "loss": 8.1256, "step": 12243 }, { "epoch": 0.5103580509357676, "grad_norm": 1592.0, "learning_rate": 5.075259841232647e-05, "loss": 34.5033, "step": 12244 }, { "epoch": 0.5103997332332958, "grad_norm": 286.0, "learning_rate": 5.074584915373548e-05, "loss": 12.8127, "step": 12245 }, { "epoch": 0.510441415530824, "grad_norm": 864.0, "learning_rate": 5.073909988155132e-05, "loss": 23.6252, "step": 12246 }, { "epoch": 0.5104830978283523, "grad_norm": 484.0, "learning_rate": 5.0732350595897015e-05, "loss": 16.7528, "step": 12247 }, { "epoch": 0.5105247801258805, "grad_norm": 328.0, "learning_rate": 5.072560129689554e-05, "loss": 13.5004, "step": 12248 }, { "epoch": 0.5105664624234088, "grad_norm": 129.0, "learning_rate": 5.0718851984669925e-05, "loss": 10.8752, "step": 12249 }, { "epoch": 0.510608144720937, "grad_norm": 378.0, "learning_rate": 5.071210265934316e-05, "loss": 15.5002, "step": 12250 }, { "epoch": 0.5106498270184653, "grad_norm": 362.0, "learning_rate": 5.0705353321038276e-05, "loss": 15.6253, "step": 12251 }, { "epoch": 0.5106915093159935, "grad_norm": 209.0, "learning_rate": 5.0698603969878255e-05, "loss": 12.0628, "step": 12252 }, { "epoch": 0.5107331916135217, "grad_norm": 202.0, "learning_rate": 5.069185460598611e-05, "loss": 10.6882, "step": 12253 }, { "epoch": 0.51077487391105, "grad_norm": 82.5, "learning_rate": 5.0685105229484855e-05, "loss": 7.3758, "step": 12254 }, { "epoch": 0.5108165562085782, "grad_norm": 560.0, "learning_rate": 5.0678355840497495e-05, "loss": 18.7503, "step": 12255 }, { "epoch": 0.5108582385061065, "grad_norm": 1472.0, "learning_rate": 5.0671606439147045e-05, "loss": 34.7526, "step": 12256 }, { "epoch": 0.5108999208036347, "grad_norm": 306.0, "learning_rate": 5.0664857025556515e-05, "loss": 9.063, "step": 12257 }, { "epoch": 0.510941603101163, "grad_norm": 253.0, "learning_rate": 5.0658107599848894e-05, "loss": 10.5628, "step": 12258 }, { "epoch": 0.5109832853986912, "grad_norm": 166.0, "learning_rate": 5.065135816214721e-05, "loss": 10.9377, "step": 12259 }, { "epoch": 0.5110249676962194, "grad_norm": 458.0, "learning_rate": 5.064460871257447e-05, "loss": 17.251, "step": 12260 }, { "epoch": 0.5110666499937476, "grad_norm": 288.0, "learning_rate": 5.0637859251253675e-05, "loss": 12.5626, "step": 12261 }, { "epoch": 0.511108332291276, "grad_norm": 202.0, "learning_rate": 5.0631109778307826e-05, "loss": 11.5008, "step": 12262 }, { "epoch": 0.5111500145888042, "grad_norm": 334.0, "learning_rate": 5.062436029385996e-05, "loss": 13.7504, "step": 12263 }, { "epoch": 0.5111916968863324, "grad_norm": 118.5, "learning_rate": 5.0617610798033076e-05, "loss": 9.3757, "step": 12264 }, { "epoch": 0.5112333791838606, "grad_norm": 131.0, "learning_rate": 5.061086129095017e-05, "loss": 9.0627, "step": 12265 }, { "epoch": 0.5112750614813889, "grad_norm": 340.0, "learning_rate": 5.0604111772734255e-05, "loss": 13.2501, "step": 12266 }, { "epoch": 0.5113167437789171, "grad_norm": 864.0, "learning_rate": 5.059736224350836e-05, "loss": 28.3754, "step": 12267 }, { "epoch": 0.5113584260764453, "grad_norm": 476.0, "learning_rate": 5.059061270339549e-05, "loss": 17.1255, "step": 12268 }, { "epoch": 0.5114001083739735, "grad_norm": 516.0, "learning_rate": 5.058386315251864e-05, "loss": 17.8752, "step": 12269 }, { "epoch": 0.5114417906715019, "grad_norm": 330.0, "learning_rate": 5.057711359100084e-05, "loss": 14.3128, "step": 12270 }, { "epoch": 0.5114834729690301, "grad_norm": 996.0, "learning_rate": 5.05703640189651e-05, "loss": 24.1297, "step": 12271 }, { "epoch": 0.5115251552665583, "grad_norm": 528.0, "learning_rate": 5.056361443653441e-05, "loss": 16.5007, "step": 12272 }, { "epoch": 0.5115668375640865, "grad_norm": 1320.0, "learning_rate": 5.0556864843831805e-05, "loss": 30.8759, "step": 12273 }, { "epoch": 0.5116085198616148, "grad_norm": 668.0, "learning_rate": 5.055011524098029e-05, "loss": 18.7508, "step": 12274 }, { "epoch": 0.511650202159143, "grad_norm": 372.0, "learning_rate": 5.054336562810288e-05, "loss": 14.8129, "step": 12275 }, { "epoch": 0.5116918844566712, "grad_norm": 576.0, "learning_rate": 5.053661600532257e-05, "loss": 19.2502, "step": 12276 }, { "epoch": 0.5117335667541995, "grad_norm": 492.0, "learning_rate": 5.0529866372762394e-05, "loss": 19.0002, "step": 12277 }, { "epoch": 0.5117752490517278, "grad_norm": 780.0, "learning_rate": 5.052311673054536e-05, "loss": 20.0071, "step": 12278 }, { "epoch": 0.511816931349256, "grad_norm": 608.0, "learning_rate": 5.051636707879446e-05, "loss": 19.3751, "step": 12279 }, { "epoch": 0.5118586136467842, "grad_norm": 197.0, "learning_rate": 5.050961741763274e-05, "loss": 11.2502, "step": 12280 }, { "epoch": 0.5119002959443124, "grad_norm": 414.0, "learning_rate": 5.050286774718319e-05, "loss": 15.5002, "step": 12281 }, { "epoch": 0.5119419782418407, "grad_norm": 239.0, "learning_rate": 5.049611806756883e-05, "loss": 12.1253, "step": 12282 }, { "epoch": 0.511983660539369, "grad_norm": 432.0, "learning_rate": 5.0489368378912685e-05, "loss": 15.5012, "step": 12283 }, { "epoch": 0.5120253428368972, "grad_norm": 90.5, "learning_rate": 5.0482618681337744e-05, "loss": 6.7818, "step": 12284 }, { "epoch": 0.5120670251344254, "grad_norm": 804.0, "learning_rate": 5.047586897496704e-05, "loss": 22.3761, "step": 12285 }, { "epoch": 0.5121087074319537, "grad_norm": 354.0, "learning_rate": 5.046911925992359e-05, "loss": 14.0628, "step": 12286 }, { "epoch": 0.5121503897294819, "grad_norm": 207.0, "learning_rate": 5.046236953633039e-05, "loss": 12.1878, "step": 12287 }, { "epoch": 0.5121920720270101, "grad_norm": 322.0, "learning_rate": 5.045561980431047e-05, "loss": 13.5629, "step": 12288 }, { "epoch": 0.5122337543245383, "grad_norm": 324.0, "learning_rate": 5.044887006398684e-05, "loss": 13.8129, "step": 12289 }, { "epoch": 0.5122754366220666, "grad_norm": 592.0, "learning_rate": 5.044212031548251e-05, "loss": 17.6251, "step": 12290 }, { "epoch": 0.5123171189195949, "grad_norm": 169.0, "learning_rate": 5.043537055892049e-05, "loss": 10.0003, "step": 12291 }, { "epoch": 0.5123588012171231, "grad_norm": 506.0, "learning_rate": 5.042862079442381e-05, "loss": 17.6253, "step": 12292 }, { "epoch": 0.5124004835146513, "grad_norm": 88.0, "learning_rate": 5.0421871022115474e-05, "loss": 8.1881, "step": 12293 }, { "epoch": 0.5124421658121796, "grad_norm": 156.0, "learning_rate": 5.0415121242118515e-05, "loss": 10.0627, "step": 12294 }, { "epoch": 0.5124838481097078, "grad_norm": 294.0, "learning_rate": 5.040837145455591e-05, "loss": 14.3754, "step": 12295 }, { "epoch": 0.512525530407236, "grad_norm": 440.0, "learning_rate": 5.040162165955072e-05, "loss": 17.2501, "step": 12296 }, { "epoch": 0.5125672127047642, "grad_norm": 155.0, "learning_rate": 5.039487185722593e-05, "loss": 7.2506, "step": 12297 }, { "epoch": 0.5126088950022926, "grad_norm": 123.0, "learning_rate": 5.038812204770458e-05, "loss": 7.5942, "step": 12298 }, { "epoch": 0.5126505772998208, "grad_norm": 524.0, "learning_rate": 5.0381372231109655e-05, "loss": 16.1253, "step": 12299 }, { "epoch": 0.512692259597349, "grad_norm": 241.0, "learning_rate": 5.03746224075642e-05, "loss": 10.9378, "step": 12300 }, { "epoch": 0.5127339418948772, "grad_norm": 258.0, "learning_rate": 5.03678725771912e-05, "loss": 12.0628, "step": 12301 }, { "epoch": 0.5127756241924055, "grad_norm": 676.0, "learning_rate": 5.036112274011371e-05, "loss": 21.8756, "step": 12302 }, { "epoch": 0.5128173064899337, "grad_norm": 1448.0, "learning_rate": 5.0354372896454715e-05, "loss": 29.6299, "step": 12303 }, { "epoch": 0.512858988787462, "grad_norm": 320.0, "learning_rate": 5.034762304633723e-05, "loss": 14.313, "step": 12304 }, { "epoch": 0.5129006710849902, "grad_norm": 796.0, "learning_rate": 5.03408731898843e-05, "loss": 22.0002, "step": 12305 }, { "epoch": 0.5129423533825185, "grad_norm": 219.0, "learning_rate": 5.033412332721892e-05, "loss": 12.0639, "step": 12306 }, { "epoch": 0.5129840356800467, "grad_norm": 153.0, "learning_rate": 5.032737345846412e-05, "loss": 10.1253, "step": 12307 }, { "epoch": 0.5130257179775749, "grad_norm": 246.0, "learning_rate": 5.032062358374291e-05, "loss": 12.5627, "step": 12308 }, { "epoch": 0.5130674002751031, "grad_norm": 430.0, "learning_rate": 5.0313873703178305e-05, "loss": 15.6884, "step": 12309 }, { "epoch": 0.5131090825726314, "grad_norm": 494.0, "learning_rate": 5.030712381689332e-05, "loss": 17.8761, "step": 12310 }, { "epoch": 0.5131507648701596, "grad_norm": 328.0, "learning_rate": 5.030037392501098e-05, "loss": 13.8754, "step": 12311 }, { "epoch": 0.5131924471676879, "grad_norm": 1272.0, "learning_rate": 5.02936240276543e-05, "loss": 28.5057, "step": 12312 }, { "epoch": 0.5132341294652161, "grad_norm": 916.0, "learning_rate": 5.028687412494628e-05, "loss": 21.6294, "step": 12313 }, { "epoch": 0.5132758117627444, "grad_norm": 396.0, "learning_rate": 5.028012421700997e-05, "loss": 15.1262, "step": 12314 }, { "epoch": 0.5133174940602726, "grad_norm": 588.0, "learning_rate": 5.0273374303968365e-05, "loss": 19.8752, "step": 12315 }, { "epoch": 0.5133591763578008, "grad_norm": 676.0, "learning_rate": 5.0266624385944494e-05, "loss": 20.5025, "step": 12316 }, { "epoch": 0.513400858655329, "grad_norm": 217.0, "learning_rate": 5.0259874463061364e-05, "loss": 11.6252, "step": 12317 }, { "epoch": 0.5134425409528574, "grad_norm": 134.0, "learning_rate": 5.0253124535442e-05, "loss": 9.9383, "step": 12318 }, { "epoch": 0.5134842232503856, "grad_norm": 410.0, "learning_rate": 5.024637460320942e-05, "loss": 16.1266, "step": 12319 }, { "epoch": 0.5135259055479138, "grad_norm": 728.0, "learning_rate": 5.023962466648664e-05, "loss": 20.7503, "step": 12320 }, { "epoch": 0.513567587845442, "grad_norm": 154.0, "learning_rate": 5.0232874725396685e-05, "loss": 10.3756, "step": 12321 }, { "epoch": 0.5136092701429703, "grad_norm": 544.0, "learning_rate": 5.022612478006257e-05, "loss": 17.8752, "step": 12322 }, { "epoch": 0.5136509524404985, "grad_norm": 224.0, "learning_rate": 5.02193748306073e-05, "loss": 7.7195, "step": 12323 }, { "epoch": 0.5136926347380267, "grad_norm": 105.0, "learning_rate": 5.021262487715391e-05, "loss": 8.2502, "step": 12324 }, { "epoch": 0.513734317035555, "grad_norm": 346.0, "learning_rate": 5.0205874919825416e-05, "loss": 14.7504, "step": 12325 }, { "epoch": 0.5137759993330833, "grad_norm": 253.0, "learning_rate": 5.019912495874483e-05, "loss": 13.1251, "step": 12326 }, { "epoch": 0.5138176816306115, "grad_norm": 318.0, "learning_rate": 5.019237499403516e-05, "loss": 13.1254, "step": 12327 }, { "epoch": 0.5138593639281397, "grad_norm": 476.0, "learning_rate": 5.018562502581946e-05, "loss": 16.6253, "step": 12328 }, { "epoch": 0.513901046225668, "grad_norm": 386.0, "learning_rate": 5.0178875054220707e-05, "loss": 14.6251, "step": 12329 }, { "epoch": 0.5139427285231962, "grad_norm": 340.0, "learning_rate": 5.0172125079361954e-05, "loss": 14.1251, "step": 12330 }, { "epoch": 0.5139844108207244, "grad_norm": 213.0, "learning_rate": 5.0165375101366206e-05, "loss": 11.0633, "step": 12331 }, { "epoch": 0.5140260931182526, "grad_norm": 396.0, "learning_rate": 5.015862512035647e-05, "loss": 14.5629, "step": 12332 }, { "epoch": 0.514067775415781, "grad_norm": 276.0, "learning_rate": 5.0151875136455797e-05, "loss": 13.6256, "step": 12333 }, { "epoch": 0.5141094577133092, "grad_norm": 334.0, "learning_rate": 5.0145125149787176e-05, "loss": 12.688, "step": 12334 }, { "epoch": 0.5141511400108374, "grad_norm": 296.0, "learning_rate": 5.013837516047364e-05, "loss": 13.688, "step": 12335 }, { "epoch": 0.5141928223083656, "grad_norm": 412.0, "learning_rate": 5.0131625168638196e-05, "loss": 16.1253, "step": 12336 }, { "epoch": 0.5142345046058939, "grad_norm": 402.0, "learning_rate": 5.0124875174403884e-05, "loss": 17.2506, "step": 12337 }, { "epoch": 0.5142761869034221, "grad_norm": 344.0, "learning_rate": 5.01181251778937e-05, "loss": 15.063, "step": 12338 }, { "epoch": 0.5143178692009504, "grad_norm": 1400.0, "learning_rate": 5.011137517923068e-05, "loss": 29.2539, "step": 12339 }, { "epoch": 0.5143595514984786, "grad_norm": 620.0, "learning_rate": 5.010462517853783e-05, "loss": 18.7506, "step": 12340 }, { "epoch": 0.5144012337960069, "grad_norm": 1528.0, "learning_rate": 5.0097875175938195e-05, "loss": 28.253, "step": 12341 }, { "epoch": 0.5144429160935351, "grad_norm": 172.0, "learning_rate": 5.009112517155474e-05, "loss": 3.1877, "step": 12342 }, { "epoch": 0.5144845983910633, "grad_norm": 608.0, "learning_rate": 5.008437516551055e-05, "loss": 19.0005, "step": 12343 }, { "epoch": 0.5145262806885915, "grad_norm": 462.0, "learning_rate": 5.007762515792861e-05, "loss": 15.8757, "step": 12344 }, { "epoch": 0.5145679629861198, "grad_norm": 338.0, "learning_rate": 5.007087514893194e-05, "loss": 14.1257, "step": 12345 }, { "epoch": 0.514609645283648, "grad_norm": 420.0, "learning_rate": 5.006412513864357e-05, "loss": 14.3128, "step": 12346 }, { "epoch": 0.5146513275811763, "grad_norm": 484.0, "learning_rate": 5.0057375127186515e-05, "loss": 15.8767, "step": 12347 }, { "epoch": 0.5146930098787045, "grad_norm": 552.0, "learning_rate": 5.005062511468379e-05, "loss": 18.7504, "step": 12348 }, { "epoch": 0.5147346921762328, "grad_norm": 204.0, "learning_rate": 5.004387510125842e-05, "loss": 10.2506, "step": 12349 }, { "epoch": 0.514776374473761, "grad_norm": 508.0, "learning_rate": 5.003712508703342e-05, "loss": 16.8752, "step": 12350 }, { "epoch": 0.5148180567712892, "grad_norm": 344.0, "learning_rate": 5.003037507213181e-05, "loss": 12.7502, "step": 12351 }, { "epoch": 0.5148597390688174, "grad_norm": 498.0, "learning_rate": 5.0023625056676616e-05, "loss": 19.0003, "step": 12352 }, { "epoch": 0.5149014213663458, "grad_norm": 338.0, "learning_rate": 5.001687504079085e-05, "loss": 14.0628, "step": 12353 }, { "epoch": 0.514943103663874, "grad_norm": 366.0, "learning_rate": 5.001012502459753e-05, "loss": 12.8127, "step": 12354 }, { "epoch": 0.5149847859614022, "grad_norm": 206.0, "learning_rate": 5.000337500821968e-05, "loss": 10.8754, "step": 12355 }, { "epoch": 0.5150264682589304, "grad_norm": 880.0, "learning_rate": 4.999662499178033e-05, "loss": 24.8756, "step": 12356 }, { "epoch": 0.5150681505564587, "grad_norm": 266.0, "learning_rate": 4.9989874975402474e-05, "loss": 14.6251, "step": 12357 }, { "epoch": 0.5151098328539869, "grad_norm": 1896.0, "learning_rate": 4.998312495920917e-05, "loss": 38.5004, "step": 12358 }, { "epoch": 0.5151515151515151, "grad_norm": 334.0, "learning_rate": 4.9976374943323396e-05, "loss": 13.0626, "step": 12359 }, { "epoch": 0.5151931974490433, "grad_norm": 548.0, "learning_rate": 4.996962492786821e-05, "loss": 17.1254, "step": 12360 }, { "epoch": 0.5152348797465717, "grad_norm": 302.0, "learning_rate": 4.996287491296659e-05, "loss": 13.8132, "step": 12361 }, { "epoch": 0.5152765620440999, "grad_norm": 344.0, "learning_rate": 4.9956124898741605e-05, "loss": 14.063, "step": 12362 }, { "epoch": 0.5153182443416281, "grad_norm": 103.5, "learning_rate": 4.9949374885316216e-05, "loss": 7.6877, "step": 12363 }, { "epoch": 0.5153599266391563, "grad_norm": 276.0, "learning_rate": 4.994262487281351e-05, "loss": 12.8131, "step": 12364 }, { "epoch": 0.5154016089366846, "grad_norm": 478.0, "learning_rate": 4.9935874861356443e-05, "loss": 16.7503, "step": 12365 }, { "epoch": 0.5154432912342128, "grad_norm": 628.0, "learning_rate": 4.992912485106808e-05, "loss": 23.1265, "step": 12366 }, { "epoch": 0.515484973531741, "grad_norm": 360.0, "learning_rate": 4.99223748420714e-05, "loss": 15.0628, "step": 12367 }, { "epoch": 0.5155266558292693, "grad_norm": 223.0, "learning_rate": 4.991562483448946e-05, "loss": 13.1883, "step": 12368 }, { "epoch": 0.5155683381267976, "grad_norm": 342.0, "learning_rate": 4.9908874828445265e-05, "loss": 12.5657, "step": 12369 }, { "epoch": 0.5156100204243258, "grad_norm": 400.0, "learning_rate": 4.990212482406184e-05, "loss": 15.2508, "step": 12370 }, { "epoch": 0.515651702721854, "grad_norm": 137.0, "learning_rate": 4.9895374821462173e-05, "loss": 10.1251, "step": 12371 }, { "epoch": 0.5156933850193822, "grad_norm": 454.0, "learning_rate": 4.988862482076934e-05, "loss": 16.0002, "step": 12372 }, { "epoch": 0.5157350673169105, "grad_norm": 280.0, "learning_rate": 4.988187482210631e-05, "loss": 11.9379, "step": 12373 }, { "epoch": 0.5157767496144388, "grad_norm": 66.5, "learning_rate": 4.987512482559614e-05, "loss": 8.814, "step": 12374 }, { "epoch": 0.515818431911967, "grad_norm": 85.0, "learning_rate": 4.9868374831361815e-05, "loss": 8.1877, "step": 12375 }, { "epoch": 0.5158601142094952, "grad_norm": 134.0, "learning_rate": 4.986162483952638e-05, "loss": 10.5002, "step": 12376 }, { "epoch": 0.5159017965070235, "grad_norm": 556.0, "learning_rate": 4.9854874850212836e-05, "loss": 16.7511, "step": 12377 }, { "epoch": 0.5159434788045517, "grad_norm": 193.0, "learning_rate": 4.984812486354423e-05, "loss": 11.3753, "step": 12378 }, { "epoch": 0.5159851611020799, "grad_norm": 270.0, "learning_rate": 4.9841374879643535e-05, "loss": 11.2503, "step": 12379 }, { "epoch": 0.5160268433996081, "grad_norm": 486.0, "learning_rate": 4.9834624898633806e-05, "loss": 17.6256, "step": 12380 }, { "epoch": 0.5160685256971365, "grad_norm": 302.0, "learning_rate": 4.982787492063806e-05, "loss": 14.8129, "step": 12381 }, { "epoch": 0.5161102079946647, "grad_norm": 420.0, "learning_rate": 4.9821124945779305e-05, "loss": 15.3127, "step": 12382 }, { "epoch": 0.5161518902921929, "grad_norm": 288.0, "learning_rate": 4.981437497418055e-05, "loss": 13.7503, "step": 12383 }, { "epoch": 0.5161935725897211, "grad_norm": 644.0, "learning_rate": 4.9807625005964856e-05, "loss": 20.5008, "step": 12384 }, { "epoch": 0.5162352548872494, "grad_norm": 249.0, "learning_rate": 4.9800875041255185e-05, "loss": 10.1895, "step": 12385 }, { "epoch": 0.5162769371847776, "grad_norm": 117.5, "learning_rate": 4.9794125080174616e-05, "loss": 8.6256, "step": 12386 }, { "epoch": 0.5163186194823058, "grad_norm": 434.0, "learning_rate": 4.9787375122846105e-05, "loss": 14.1904, "step": 12387 }, { "epoch": 0.516360301779834, "grad_norm": 296.0, "learning_rate": 4.978062516939272e-05, "loss": 13.2503, "step": 12388 }, { "epoch": 0.5164019840773624, "grad_norm": 648.0, "learning_rate": 4.977387521993745e-05, "loss": 20.2503, "step": 12389 }, { "epoch": 0.5164436663748906, "grad_norm": 202.0, "learning_rate": 4.976712527460334e-05, "loss": 11.1877, "step": 12390 }, { "epoch": 0.5164853486724188, "grad_norm": 78.5, "learning_rate": 4.9760375333513365e-05, "loss": 8.6879, "step": 12391 }, { "epoch": 0.516527030969947, "grad_norm": 916.0, "learning_rate": 4.9753625396790584e-05, "loss": 21.2553, "step": 12392 }, { "epoch": 0.5165687132674753, "grad_norm": 608.0, "learning_rate": 4.974687546455801e-05, "loss": 18.6259, "step": 12393 }, { "epoch": 0.5166103955650035, "grad_norm": 158.0, "learning_rate": 4.974012553693865e-05, "loss": 9.9379, "step": 12394 }, { "epoch": 0.5166520778625318, "grad_norm": 412.0, "learning_rate": 4.973337561405551e-05, "loss": 15.5007, "step": 12395 }, { "epoch": 0.51669376016006, "grad_norm": 660.0, "learning_rate": 4.972662569603165e-05, "loss": 17.3802, "step": 12396 }, { "epoch": 0.5167354424575883, "grad_norm": 430.0, "learning_rate": 4.9719875782990036e-05, "loss": 15.4381, "step": 12397 }, { "epoch": 0.5167771247551165, "grad_norm": 784.0, "learning_rate": 4.971312587505374e-05, "loss": 20.6295, "step": 12398 }, { "epoch": 0.5168188070526447, "grad_norm": 456.0, "learning_rate": 4.970637597234572e-05, "loss": 16.3751, "step": 12399 }, { "epoch": 0.516860489350173, "grad_norm": 282.0, "learning_rate": 4.9699626074989045e-05, "loss": 12.8128, "step": 12400 }, { "epoch": 0.5169021716477012, "grad_norm": 386.0, "learning_rate": 4.96928761831067e-05, "loss": 12.5044, "step": 12401 }, { "epoch": 0.5169438539452295, "grad_norm": 224.0, "learning_rate": 4.968612629682172e-05, "loss": 12.0628, "step": 12402 }, { "epoch": 0.5169855362427577, "grad_norm": 580.0, "learning_rate": 4.967937641625711e-05, "loss": 18.3755, "step": 12403 }, { "epoch": 0.517027218540286, "grad_norm": 227.0, "learning_rate": 4.96726265415359e-05, "loss": 12.2502, "step": 12404 }, { "epoch": 0.5170689008378142, "grad_norm": 130.0, "learning_rate": 4.966587667278109e-05, "loss": 7.5942, "step": 12405 }, { "epoch": 0.5171105831353424, "grad_norm": 101.0, "learning_rate": 4.965912681011571e-05, "loss": 7.9377, "step": 12406 }, { "epoch": 0.5171522654328706, "grad_norm": 412.0, "learning_rate": 4.965237695366278e-05, "loss": 15.876, "step": 12407 }, { "epoch": 0.517193947730399, "grad_norm": 440.0, "learning_rate": 4.964562710354531e-05, "loss": 14.5628, "step": 12408 }, { "epoch": 0.5172356300279272, "grad_norm": 310.0, "learning_rate": 4.963887725988631e-05, "loss": 12.0008, "step": 12409 }, { "epoch": 0.5172773123254554, "grad_norm": 520.0, "learning_rate": 4.963212742280882e-05, "loss": 14.0046, "step": 12410 }, { "epoch": 0.5173189946229836, "grad_norm": 536.0, "learning_rate": 4.962537759243582e-05, "loss": 17.6256, "step": 12411 }, { "epoch": 0.5173606769205119, "grad_norm": 524.0, "learning_rate": 4.961862776889036e-05, "loss": 16.5003, "step": 12412 }, { "epoch": 0.5174023592180401, "grad_norm": 420.0, "learning_rate": 4.9611877952295435e-05, "loss": 15.376, "step": 12413 }, { "epoch": 0.5174440415155683, "grad_norm": 284.0, "learning_rate": 4.9605128142774085e-05, "loss": 13.1253, "step": 12414 }, { "epoch": 0.5174857238130965, "grad_norm": 480.0, "learning_rate": 4.959837834044928e-05, "loss": 17.0002, "step": 12415 }, { "epoch": 0.5175274061106249, "grad_norm": 151.0, "learning_rate": 4.95916285454441e-05, "loss": 11.1878, "step": 12416 }, { "epoch": 0.5175690884081531, "grad_norm": 832.0, "learning_rate": 4.95848787578815e-05, "loss": 21.5008, "step": 12417 }, { "epoch": 0.5176107707056813, "grad_norm": 588.0, "learning_rate": 4.957812897788454e-05, "loss": 18.2504, "step": 12418 }, { "epoch": 0.5176524530032095, "grad_norm": 227.0, "learning_rate": 4.9571379205576204e-05, "loss": 10.4385, "step": 12419 }, { "epoch": 0.5176941353007378, "grad_norm": 716.0, "learning_rate": 4.956462944107952e-05, "loss": 20.7506, "step": 12420 }, { "epoch": 0.517735817598266, "grad_norm": 192.0, "learning_rate": 4.95578796845175e-05, "loss": 10.0627, "step": 12421 }, { "epoch": 0.5177774998957942, "grad_norm": 130.0, "learning_rate": 4.955112993601318e-05, "loss": 9.7503, "step": 12422 }, { "epoch": 0.5178191821933225, "grad_norm": 592.0, "learning_rate": 4.954438019568954e-05, "loss": 17.1252, "step": 12423 }, { "epoch": 0.5178608644908508, "grad_norm": 320.0, "learning_rate": 4.9537630463669624e-05, "loss": 13.5004, "step": 12424 }, { "epoch": 0.517902546788379, "grad_norm": 300.0, "learning_rate": 4.953088074007642e-05, "loss": 11.8752, "step": 12425 }, { "epoch": 0.5179442290859072, "grad_norm": 115.5, "learning_rate": 4.9524131025032975e-05, "loss": 8.9385, "step": 12426 }, { "epoch": 0.5179859113834354, "grad_norm": 1560.0, "learning_rate": 4.951738131866227e-05, "loss": 36.2502, "step": 12427 }, { "epoch": 0.5180275936809637, "grad_norm": 238.0, "learning_rate": 4.951063162108734e-05, "loss": 9.8763, "step": 12428 }, { "epoch": 0.518069275978492, "grad_norm": 458.0, "learning_rate": 4.950388193243118e-05, "loss": 17.2503, "step": 12429 }, { "epoch": 0.5181109582760202, "grad_norm": 720.0, "learning_rate": 4.949713225281682e-05, "loss": 21.6254, "step": 12430 }, { "epoch": 0.5181526405735484, "grad_norm": 512.0, "learning_rate": 4.949038258236728e-05, "loss": 17.6252, "step": 12431 }, { "epoch": 0.5181943228710767, "grad_norm": 322.0, "learning_rate": 4.948363292120555e-05, "loss": 13.2501, "step": 12432 }, { "epoch": 0.5182360051686049, "grad_norm": 532.0, "learning_rate": 4.947688326945465e-05, "loss": 17.7502, "step": 12433 }, { "epoch": 0.5182776874661331, "grad_norm": 82.5, "learning_rate": 4.9470133627237624e-05, "loss": 6.6878, "step": 12434 }, { "epoch": 0.5183193697636613, "grad_norm": 1288.0, "learning_rate": 4.9463383994677436e-05, "loss": 31.1255, "step": 12435 }, { "epoch": 0.5183610520611897, "grad_norm": 155.0, "learning_rate": 4.945663437189715e-05, "loss": 9.1256, "step": 12436 }, { "epoch": 0.5184027343587179, "grad_norm": 340.0, "learning_rate": 4.9449884759019724e-05, "loss": 12.8754, "step": 12437 }, { "epoch": 0.5184444166562461, "grad_norm": 198.0, "learning_rate": 4.944313515616821e-05, "loss": 10.9378, "step": 12438 }, { "epoch": 0.5184860989537743, "grad_norm": 414.0, "learning_rate": 4.94363855634656e-05, "loss": 15.063, "step": 12439 }, { "epoch": 0.5185277812513026, "grad_norm": 251.0, "learning_rate": 4.942963598103493e-05, "loss": 12.6253, "step": 12440 }, { "epoch": 0.5185694635488308, "grad_norm": 240.0, "learning_rate": 4.942288640899917e-05, "loss": 13.1885, "step": 12441 }, { "epoch": 0.518611145846359, "grad_norm": 342.0, "learning_rate": 4.9416136847481375e-05, "loss": 12.4386, "step": 12442 }, { "epoch": 0.5186528281438872, "grad_norm": 402.0, "learning_rate": 4.9409387296604524e-05, "loss": 15.5006, "step": 12443 }, { "epoch": 0.5186945104414156, "grad_norm": 294.0, "learning_rate": 4.9402637756491645e-05, "loss": 13.938, "step": 12444 }, { "epoch": 0.5187361927389438, "grad_norm": 142.0, "learning_rate": 4.939588822726575e-05, "loss": 10.2504, "step": 12445 }, { "epoch": 0.518777875036472, "grad_norm": 184.0, "learning_rate": 4.938913870904985e-05, "loss": 10.5627, "step": 12446 }, { "epoch": 0.5188195573340002, "grad_norm": 162.0, "learning_rate": 4.9382389201966936e-05, "loss": 12.0003, "step": 12447 }, { "epoch": 0.5188612396315285, "grad_norm": 592.0, "learning_rate": 4.937563970614006e-05, "loss": 18.8755, "step": 12448 }, { "epoch": 0.5189029219290567, "grad_norm": 255.0, "learning_rate": 4.936889022169218e-05, "loss": 13.0631, "step": 12449 }, { "epoch": 0.518944604226585, "grad_norm": 498.0, "learning_rate": 4.936214074874635e-05, "loss": 16.6258, "step": 12450 }, { "epoch": 0.5189862865241132, "grad_norm": 1104.0, "learning_rate": 4.935539128742555e-05, "loss": 26.0006, "step": 12451 }, { "epoch": 0.5190279688216415, "grad_norm": 123.0, "learning_rate": 4.9348641837852814e-05, "loss": 11.6259, "step": 12452 }, { "epoch": 0.5190696511191697, "grad_norm": 544.0, "learning_rate": 4.934189240015111e-05, "loss": 17.7518, "step": 12453 }, { "epoch": 0.5191113334166979, "grad_norm": 193.0, "learning_rate": 4.933514297444351e-05, "loss": 9.8752, "step": 12454 }, { "epoch": 0.5191530157142261, "grad_norm": 660.0, "learning_rate": 4.932839356085297e-05, "loss": 19.7505, "step": 12455 }, { "epoch": 0.5191946980117544, "grad_norm": 320.0, "learning_rate": 4.932164415950251e-05, "loss": 12.8756, "step": 12456 }, { "epoch": 0.5192363803092827, "grad_norm": 82.5, "learning_rate": 4.931489477051516e-05, "loss": 7.0319, "step": 12457 }, { "epoch": 0.5192780626068109, "grad_norm": 442.0, "learning_rate": 4.93081453940139e-05, "loss": 17.1258, "step": 12458 }, { "epoch": 0.5193197449043391, "grad_norm": 352.0, "learning_rate": 4.9301396030121756e-05, "loss": 13.1878, "step": 12459 }, { "epoch": 0.5193614272018674, "grad_norm": 163.0, "learning_rate": 4.929464667896175e-05, "loss": 8.5627, "step": 12460 }, { "epoch": 0.5194031094993956, "grad_norm": 660.0, "learning_rate": 4.928789734065685e-05, "loss": 18.5036, "step": 12461 }, { "epoch": 0.5194447917969238, "grad_norm": 536.0, "learning_rate": 4.92811480153301e-05, "loss": 18.2502, "step": 12462 }, { "epoch": 0.519486474094452, "grad_norm": 384.0, "learning_rate": 4.927439870310447e-05, "loss": 15.6253, "step": 12463 }, { "epoch": 0.5195281563919804, "grad_norm": 580.0, "learning_rate": 4.926764940410301e-05, "loss": 20.6255, "step": 12464 }, { "epoch": 0.5195698386895086, "grad_norm": 580.0, "learning_rate": 4.926090011844868e-05, "loss": 19.2501, "step": 12465 }, { "epoch": 0.5196115209870368, "grad_norm": 768.0, "learning_rate": 4.925415084626454e-05, "loss": 20.7506, "step": 12466 }, { "epoch": 0.519653203284565, "grad_norm": 1296.0, "learning_rate": 4.924740158767354e-05, "loss": 26.0039, "step": 12467 }, { "epoch": 0.5196948855820933, "grad_norm": 454.0, "learning_rate": 4.9240652342798726e-05, "loss": 16.252, "step": 12468 }, { "epoch": 0.5197365678796215, "grad_norm": 207.0, "learning_rate": 4.923390311176309e-05, "loss": 11.6255, "step": 12469 }, { "epoch": 0.5197782501771497, "grad_norm": 756.0, "learning_rate": 4.922715389468964e-05, "loss": 19.3764, "step": 12470 }, { "epoch": 0.5198199324746781, "grad_norm": 418.0, "learning_rate": 4.922040469170137e-05, "loss": 13.8753, "step": 12471 }, { "epoch": 0.5198616147722063, "grad_norm": 736.0, "learning_rate": 4.921365550292131e-05, "loss": 21.1257, "step": 12472 }, { "epoch": 0.5199032970697345, "grad_norm": 460.0, "learning_rate": 4.920690632847243e-05, "loss": 18.2503, "step": 12473 }, { "epoch": 0.5199449793672627, "grad_norm": 268.0, "learning_rate": 4.920015716847777e-05, "loss": 13.7506, "step": 12474 }, { "epoch": 0.519986661664791, "grad_norm": 344.0, "learning_rate": 4.91934080230603e-05, "loss": 14.8136, "step": 12475 }, { "epoch": 0.5200283439623192, "grad_norm": 302.0, "learning_rate": 4.918665889234306e-05, "loss": 14.0004, "step": 12476 }, { "epoch": 0.5200700262598474, "grad_norm": 119.0, "learning_rate": 4.9179909776449005e-05, "loss": 8.6255, "step": 12477 }, { "epoch": 0.5201117085573757, "grad_norm": 282.0, "learning_rate": 4.91731606755012e-05, "loss": 14.1877, "step": 12478 }, { "epoch": 0.520153390854904, "grad_norm": 472.0, "learning_rate": 4.916641158962259e-05, "loss": 17.0001, "step": 12479 }, { "epoch": 0.5201950731524322, "grad_norm": 204.0, "learning_rate": 4.9159662518936225e-05, "loss": 8.6876, "step": 12480 }, { "epoch": 0.5202367554499604, "grad_norm": 217.0, "learning_rate": 4.9152913463565056e-05, "loss": 11.3752, "step": 12481 }, { "epoch": 0.5202784377474886, "grad_norm": 240.0, "learning_rate": 4.914616442363213e-05, "loss": 10.7504, "step": 12482 }, { "epoch": 0.5203201200450169, "grad_norm": 245.0, "learning_rate": 4.9139415399260425e-05, "loss": 11.001, "step": 12483 }, { "epoch": 0.5203618023425451, "grad_norm": 360.0, "learning_rate": 4.9132666390572975e-05, "loss": 14.5002, "step": 12484 }, { "epoch": 0.5204034846400734, "grad_norm": 274.0, "learning_rate": 4.9125917397692726e-05, "loss": 13.1879, "step": 12485 }, { "epoch": 0.5204451669376016, "grad_norm": 207.0, "learning_rate": 4.911916842074274e-05, "loss": 12.1252, "step": 12486 }, { "epoch": 0.5204868492351299, "grad_norm": 496.0, "learning_rate": 4.911241945984597e-05, "loss": 17.5003, "step": 12487 }, { "epoch": 0.5205285315326581, "grad_norm": 668.0, "learning_rate": 4.910567051512544e-05, "loss": 19.6253, "step": 12488 }, { "epoch": 0.5205702138301863, "grad_norm": 456.0, "learning_rate": 4.9098921586704136e-05, "loss": 15.6256, "step": 12489 }, { "epoch": 0.5206118961277145, "grad_norm": 488.0, "learning_rate": 4.909217267470508e-05, "loss": 17.0002, "step": 12490 }, { "epoch": 0.5206535784252428, "grad_norm": 310.0, "learning_rate": 4.9085423779251235e-05, "loss": 14.0627, "step": 12491 }, { "epoch": 0.5206952607227711, "grad_norm": 458.0, "learning_rate": 4.907867490046565e-05, "loss": 13.6279, "step": 12492 }, { "epoch": 0.5207369430202993, "grad_norm": 370.0, "learning_rate": 4.907192603847128e-05, "loss": 13.1252, "step": 12493 }, { "epoch": 0.5207786253178275, "grad_norm": 604.0, "learning_rate": 4.9065177193391143e-05, "loss": 16.8793, "step": 12494 }, { "epoch": 0.5208203076153558, "grad_norm": 1144.0, "learning_rate": 4.9058428365348235e-05, "loss": 33.0004, "step": 12495 }, { "epoch": 0.520861989912884, "grad_norm": 512.0, "learning_rate": 4.905167955446555e-05, "loss": 17.8753, "step": 12496 }, { "epoch": 0.5209036722104122, "grad_norm": 364.0, "learning_rate": 4.9044930760866085e-05, "loss": 13.0003, "step": 12497 }, { "epoch": 0.5209453545079404, "grad_norm": 280.0, "learning_rate": 4.903818198467286e-05, "loss": 12.7504, "step": 12498 }, { "epoch": 0.5209870368054688, "grad_norm": 296.0, "learning_rate": 4.903143322600884e-05, "loss": 14.1877, "step": 12499 }, { "epoch": 0.521028719102997, "grad_norm": 390.0, "learning_rate": 4.902468448499705e-05, "loss": 15.1252, "step": 12500 }, { "epoch": 0.5210704014005252, "grad_norm": 490.0, "learning_rate": 4.9017935761760446e-05, "loss": 17.6255, "step": 12501 }, { "epoch": 0.5211120836980534, "grad_norm": 376.0, "learning_rate": 4.901118705642208e-05, "loss": 15.688, "step": 12502 }, { "epoch": 0.5211537659955817, "grad_norm": 1296.0, "learning_rate": 4.900443836910489e-05, "loss": 29.7545, "step": 12503 }, { "epoch": 0.5211954482931099, "grad_norm": 164.0, "learning_rate": 4.8997689699931924e-05, "loss": 10.6256, "step": 12504 }, { "epoch": 0.5212371305906381, "grad_norm": 528.0, "learning_rate": 4.8990941049026136e-05, "loss": 17.6252, "step": 12505 }, { "epoch": 0.5212788128881664, "grad_norm": 127.5, "learning_rate": 4.898419241651054e-05, "loss": 9.1879, "step": 12506 }, { "epoch": 0.5213204951856947, "grad_norm": 197.0, "learning_rate": 4.8977443802508126e-05, "loss": 12.8758, "step": 12507 }, { "epoch": 0.5213621774832229, "grad_norm": 640.0, "learning_rate": 4.8970695207141895e-05, "loss": 21.0005, "step": 12508 }, { "epoch": 0.5214038597807511, "grad_norm": 142.0, "learning_rate": 4.896394663053482e-05, "loss": 10.2502, "step": 12509 }, { "epoch": 0.5214455420782793, "grad_norm": 592.0, "learning_rate": 4.8957198072809936e-05, "loss": 20.2502, "step": 12510 }, { "epoch": 0.5214872243758076, "grad_norm": 354.0, "learning_rate": 4.895044953409018e-05, "loss": 14.1251, "step": 12511 }, { "epoch": 0.5215289066733358, "grad_norm": 478.0, "learning_rate": 4.894370101449861e-05, "loss": 14.8752, "step": 12512 }, { "epoch": 0.5215705889708641, "grad_norm": 362.0, "learning_rate": 4.8936952514158145e-05, "loss": 10.8165, "step": 12513 }, { "epoch": 0.5216122712683923, "grad_norm": 196.0, "learning_rate": 4.8930204033191845e-05, "loss": 11.0003, "step": 12514 }, { "epoch": 0.5216539535659206, "grad_norm": 360.0, "learning_rate": 4.8923455571722645e-05, "loss": 12.3127, "step": 12515 }, { "epoch": 0.5216956358634488, "grad_norm": 223.0, "learning_rate": 4.8916707129873595e-05, "loss": 10.1878, "step": 12516 }, { "epoch": 0.521737318160977, "grad_norm": 434.0, "learning_rate": 4.890995870776762e-05, "loss": 16.1252, "step": 12517 }, { "epoch": 0.5217790004585052, "grad_norm": 334.0, "learning_rate": 4.890321030552776e-05, "loss": 13.5002, "step": 12518 }, { "epoch": 0.5218206827560335, "grad_norm": 478.0, "learning_rate": 4.8896461923276994e-05, "loss": 17.1278, "step": 12519 }, { "epoch": 0.5218623650535618, "grad_norm": 362.0, "learning_rate": 4.8889713561138304e-05, "loss": 12.9379, "step": 12520 }, { "epoch": 0.52190404735109, "grad_norm": 576.0, "learning_rate": 4.8882965219234675e-05, "loss": 18.2503, "step": 12521 }, { "epoch": 0.5219457296486182, "grad_norm": 262.0, "learning_rate": 4.887621689768912e-05, "loss": 12.5006, "step": 12522 }, { "epoch": 0.5219874119461465, "grad_norm": 406.0, "learning_rate": 4.886946859662459e-05, "loss": 15.3755, "step": 12523 }, { "epoch": 0.5220290942436747, "grad_norm": 346.0, "learning_rate": 4.8862720316164125e-05, "loss": 14.1886, "step": 12524 }, { "epoch": 0.5220707765412029, "grad_norm": 656.0, "learning_rate": 4.885597205643065e-05, "loss": 19.6253, "step": 12525 }, { "epoch": 0.5221124588387311, "grad_norm": 219.0, "learning_rate": 4.884922381754721e-05, "loss": 13.5013, "step": 12526 }, { "epoch": 0.5221541411362595, "grad_norm": 1176.0, "learning_rate": 4.884247559963676e-05, "loss": 28.5021, "step": 12527 }, { "epoch": 0.5221958234337877, "grad_norm": 344.0, "learning_rate": 4.88357274028223e-05, "loss": 15.1883, "step": 12528 }, { "epoch": 0.5222375057313159, "grad_norm": 390.0, "learning_rate": 4.88289792272268e-05, "loss": 14.6879, "step": 12529 }, { "epoch": 0.5222791880288441, "grad_norm": 165.0, "learning_rate": 4.882223107297327e-05, "loss": 11.2502, "step": 12530 }, { "epoch": 0.5223208703263724, "grad_norm": 1012.0, "learning_rate": 4.881548294018467e-05, "loss": 22.129, "step": 12531 }, { "epoch": 0.5223625526239006, "grad_norm": 127.5, "learning_rate": 4.8808734828984e-05, "loss": 8.0635, "step": 12532 }, { "epoch": 0.5224042349214288, "grad_norm": 276.0, "learning_rate": 4.8801986739494254e-05, "loss": 13.3753, "step": 12533 }, { "epoch": 0.5224459172189571, "grad_norm": 350.0, "learning_rate": 4.87952386718384e-05, "loss": 14.3126, "step": 12534 }, { "epoch": 0.5224875995164854, "grad_norm": 394.0, "learning_rate": 4.878849062613942e-05, "loss": 15.5648, "step": 12535 }, { "epoch": 0.5225292818140136, "grad_norm": 434.0, "learning_rate": 4.878174260252033e-05, "loss": 15.5003, "step": 12536 }, { "epoch": 0.5225709641115418, "grad_norm": 252.0, "learning_rate": 4.877499460110406e-05, "loss": 13.6878, "step": 12537 }, { "epoch": 0.52261264640907, "grad_norm": 392.0, "learning_rate": 4.8768246622013655e-05, "loss": 15.5002, "step": 12538 }, { "epoch": 0.5226543287065983, "grad_norm": 282.0, "learning_rate": 4.876149866537203e-05, "loss": 14.3755, "step": 12539 }, { "epoch": 0.5226960110041265, "grad_norm": 844.0, "learning_rate": 4.875475073130223e-05, "loss": 24.1266, "step": 12540 }, { "epoch": 0.5227376933016548, "grad_norm": 316.0, "learning_rate": 4.874800281992719e-05, "loss": 11.7504, "step": 12541 }, { "epoch": 0.522779375599183, "grad_norm": 310.0, "learning_rate": 4.8741254931369925e-05, "loss": 13.3127, "step": 12542 }, { "epoch": 0.5228210578967113, "grad_norm": 215.0, "learning_rate": 4.8734507065753384e-05, "loss": 11.313, "step": 12543 }, { "epoch": 0.5228627401942395, "grad_norm": 382.0, "learning_rate": 4.872775922320057e-05, "loss": 15.3755, "step": 12544 }, { "epoch": 0.5229044224917677, "grad_norm": 466.0, "learning_rate": 4.872101140383446e-05, "loss": 16.7501, "step": 12545 }, { "epoch": 0.522946104789296, "grad_norm": 432.0, "learning_rate": 4.871426360777803e-05, "loss": 16.5016, "step": 12546 }, { "epoch": 0.5229877870868243, "grad_norm": 406.0, "learning_rate": 4.870751583515425e-05, "loss": 13.5004, "step": 12547 }, { "epoch": 0.5230294693843525, "grad_norm": 498.0, "learning_rate": 4.870076808608613e-05, "loss": 18.3752, "step": 12548 }, { "epoch": 0.5230711516818807, "grad_norm": 202.0, "learning_rate": 4.86940203606966e-05, "loss": 9.5629, "step": 12549 }, { "epoch": 0.523112833979409, "grad_norm": 330.0, "learning_rate": 4.8687272659108694e-05, "loss": 14.2502, "step": 12550 }, { "epoch": 0.5231545162769372, "grad_norm": 192.0, "learning_rate": 4.8680524981445334e-05, "loss": 10.938, "step": 12551 }, { "epoch": 0.5231961985744654, "grad_norm": 482.0, "learning_rate": 4.867377732782955e-05, "loss": 17.3754, "step": 12552 }, { "epoch": 0.5232378808719936, "grad_norm": 390.0, "learning_rate": 4.866702969838426e-05, "loss": 15.751, "step": 12553 }, { "epoch": 0.523279563169522, "grad_norm": 89.5, "learning_rate": 4.8660282093232506e-05, "loss": 8.2512, "step": 12554 }, { "epoch": 0.5233212454670502, "grad_norm": 298.0, "learning_rate": 4.865353451249721e-05, "loss": 12.2504, "step": 12555 }, { "epoch": 0.5233629277645784, "grad_norm": 486.0, "learning_rate": 4.864678695630137e-05, "loss": 17.5005, "step": 12556 }, { "epoch": 0.5234046100621066, "grad_norm": 448.0, "learning_rate": 4.8640039424767957e-05, "loss": 16.8753, "step": 12557 }, { "epoch": 0.5234462923596349, "grad_norm": 241.0, "learning_rate": 4.863329191801995e-05, "loss": 8.8139, "step": 12558 }, { "epoch": 0.5234879746571631, "grad_norm": 182.0, "learning_rate": 4.862654443618031e-05, "loss": 11.1877, "step": 12559 }, { "epoch": 0.5235296569546913, "grad_norm": 828.0, "learning_rate": 4.861979697937203e-05, "loss": 19.1348, "step": 12560 }, { "epoch": 0.5235713392522195, "grad_norm": 664.0, "learning_rate": 4.861304954771806e-05, "loss": 21.0003, "step": 12561 }, { "epoch": 0.5236130215497479, "grad_norm": 442.0, "learning_rate": 4.86063021413414e-05, "loss": 15.8757, "step": 12562 }, { "epoch": 0.5236547038472761, "grad_norm": 286.0, "learning_rate": 4.8599554760365e-05, "loss": 14.1879, "step": 12563 }, { "epoch": 0.5236963861448043, "grad_norm": 194.0, "learning_rate": 4.859280740491185e-05, "loss": 12.1253, "step": 12564 }, { "epoch": 0.5237380684423325, "grad_norm": 187.0, "learning_rate": 4.858606007510489e-05, "loss": 11.688, "step": 12565 }, { "epoch": 0.5237797507398608, "grad_norm": 2304.0, "learning_rate": 4.857931277106713e-05, "loss": 42.252, "step": 12566 }, { "epoch": 0.523821433037389, "grad_norm": 354.0, "learning_rate": 4.85725654929215e-05, "loss": 9.8754, "step": 12567 }, { "epoch": 0.5238631153349173, "grad_norm": 66.5, "learning_rate": 4.8565818240791014e-05, "loss": 8.3752, "step": 12568 }, { "epoch": 0.5239047976324455, "grad_norm": 244.0, "learning_rate": 4.8559071014798595e-05, "loss": 11.8131, "step": 12569 }, { "epoch": 0.5239464799299738, "grad_norm": 114.0, "learning_rate": 4.855232381506725e-05, "loss": 9.938, "step": 12570 }, { "epoch": 0.523988162227502, "grad_norm": 484.0, "learning_rate": 4.854557664171991e-05, "loss": 17.2503, "step": 12571 }, { "epoch": 0.5240298445250302, "grad_norm": 145.0, "learning_rate": 4.853882949487959e-05, "loss": 10.2508, "step": 12572 }, { "epoch": 0.5240715268225584, "grad_norm": 123.5, "learning_rate": 4.853208237466922e-05, "loss": 9.1253, "step": 12573 }, { "epoch": 0.5241132091200867, "grad_norm": 544.0, "learning_rate": 4.85253352812118e-05, "loss": 18.1255, "step": 12574 }, { "epoch": 0.524154891417615, "grad_norm": 208.0, "learning_rate": 4.851858821463025e-05, "loss": 10.7503, "step": 12575 }, { "epoch": 0.5241965737151432, "grad_norm": 88.0, "learning_rate": 4.8511841175047576e-05, "loss": 8.6918, "step": 12576 }, { "epoch": 0.5242382560126714, "grad_norm": 532.0, "learning_rate": 4.8505094162586715e-05, "loss": 18.3787, "step": 12577 }, { "epoch": 0.5242799383101997, "grad_norm": 102.5, "learning_rate": 4.849834717737066e-05, "loss": 8.8755, "step": 12578 }, { "epoch": 0.5243216206077279, "grad_norm": 89.0, "learning_rate": 4.8491600219522346e-05, "loss": 8.3753, "step": 12579 }, { "epoch": 0.5243633029052561, "grad_norm": 412.0, "learning_rate": 4.8484853289164775e-05, "loss": 14.4379, "step": 12580 }, { "epoch": 0.5244049852027843, "grad_norm": 222.0, "learning_rate": 4.8478106386420865e-05, "loss": 11.6878, "step": 12581 }, { "epoch": 0.5244466675003127, "grad_norm": 772.0, "learning_rate": 4.8471359511413604e-05, "loss": 22.5002, "step": 12582 }, { "epoch": 0.5244883497978409, "grad_norm": 182.0, "learning_rate": 4.846461266426596e-05, "loss": 11.9385, "step": 12583 }, { "epoch": 0.5245300320953691, "grad_norm": 494.0, "learning_rate": 4.8457865845100885e-05, "loss": 17.7507, "step": 12584 }, { "epoch": 0.5245717143928973, "grad_norm": 1688.0, "learning_rate": 4.8451119054041325e-05, "loss": 37.7503, "step": 12585 }, { "epoch": 0.5246133966904256, "grad_norm": 362.0, "learning_rate": 4.844437229121029e-05, "loss": 15.5008, "step": 12586 }, { "epoch": 0.5246550789879538, "grad_norm": 422.0, "learning_rate": 4.8437625556730674e-05, "loss": 13.5001, "step": 12587 }, { "epoch": 0.524696761285482, "grad_norm": 588.0, "learning_rate": 4.84308788507255e-05, "loss": 20.2518, "step": 12588 }, { "epoch": 0.5247384435830103, "grad_norm": 1184.0, "learning_rate": 4.842413217331768e-05, "loss": 28.8769, "step": 12589 }, { "epoch": 0.5247801258805386, "grad_norm": 133.0, "learning_rate": 4.841738552463021e-05, "loss": 10.1251, "step": 12590 }, { "epoch": 0.5248218081780668, "grad_norm": 364.0, "learning_rate": 4.8410638904786e-05, "loss": 15.1881, "step": 12591 }, { "epoch": 0.524863490475595, "grad_norm": 804.0, "learning_rate": 4.840389231390807e-05, "loss": 22.6254, "step": 12592 }, { "epoch": 0.5249051727731232, "grad_norm": 322.0, "learning_rate": 4.8397145752119315e-05, "loss": 13.6878, "step": 12593 }, { "epoch": 0.5249468550706515, "grad_norm": 396.0, "learning_rate": 4.8390399219542735e-05, "loss": 15.2524, "step": 12594 }, { "epoch": 0.5249885373681797, "grad_norm": 274.0, "learning_rate": 4.838365271630127e-05, "loss": 13.3127, "step": 12595 }, { "epoch": 0.525030219665708, "grad_norm": 342.0, "learning_rate": 4.837690624251788e-05, "loss": 13.3127, "step": 12596 }, { "epoch": 0.5250719019632362, "grad_norm": 220.0, "learning_rate": 4.837015979831551e-05, "loss": 7.9689, "step": 12597 }, { "epoch": 0.5251135842607645, "grad_norm": 298.0, "learning_rate": 4.836341338381714e-05, "loss": 13.1255, "step": 12598 }, { "epoch": 0.5251552665582927, "grad_norm": 312.0, "learning_rate": 4.835666699914568e-05, "loss": 14.1879, "step": 12599 }, { "epoch": 0.5251969488558209, "grad_norm": 736.0, "learning_rate": 4.834992064442414e-05, "loss": 25.7504, "step": 12600 }, { "epoch": 0.5252386311533491, "grad_norm": 452.0, "learning_rate": 4.834317431977541e-05, "loss": 16.001, "step": 12601 }, { "epoch": 0.5252803134508774, "grad_norm": 203.0, "learning_rate": 4.8336428025322514e-05, "loss": 5.6252, "step": 12602 }, { "epoch": 0.5253219957484057, "grad_norm": 113.0, "learning_rate": 4.832968176118833e-05, "loss": 6.2504, "step": 12603 }, { "epoch": 0.5253636780459339, "grad_norm": 85.0, "learning_rate": 4.832293552749587e-05, "loss": 9.9391, "step": 12604 }, { "epoch": 0.5254053603434621, "grad_norm": 344.0, "learning_rate": 4.8316189324368035e-05, "loss": 12.5651, "step": 12605 }, { "epoch": 0.5254470426409904, "grad_norm": 264.0, "learning_rate": 4.830944315192783e-05, "loss": 12.9384, "step": 12606 }, { "epoch": 0.5254887249385186, "grad_norm": 588.0, "learning_rate": 4.830269701029814e-05, "loss": 17.8755, "step": 12607 }, { "epoch": 0.5255304072360468, "grad_norm": 272.0, "learning_rate": 4.829595089960196e-05, "loss": 12.7502, "step": 12608 }, { "epoch": 0.525572089533575, "grad_norm": 414.0, "learning_rate": 4.828920481996221e-05, "loss": 15.6253, "step": 12609 }, { "epoch": 0.5256137718311034, "grad_norm": 160.0, "learning_rate": 4.828245877150189e-05, "loss": 10.7503, "step": 12610 }, { "epoch": 0.5256554541286316, "grad_norm": 516.0, "learning_rate": 4.827571275434388e-05, "loss": 17.5002, "step": 12611 }, { "epoch": 0.5256971364261598, "grad_norm": 1848.0, "learning_rate": 4.826896676861118e-05, "loss": 34.7504, "step": 12612 }, { "epoch": 0.525738818723688, "grad_norm": 308.0, "learning_rate": 4.82622208144267e-05, "loss": 13.6258, "step": 12613 }, { "epoch": 0.5257805010212163, "grad_norm": 452.0, "learning_rate": 4.825547489191342e-05, "loss": 16.2504, "step": 12614 }, { "epoch": 0.5258221833187445, "grad_norm": 114.5, "learning_rate": 4.824872900119424e-05, "loss": 9.7508, "step": 12615 }, { "epoch": 0.5258638656162727, "grad_norm": 564.0, "learning_rate": 4.824198314239215e-05, "loss": 17.254, "step": 12616 }, { "epoch": 0.5259055479138011, "grad_norm": 94.0, "learning_rate": 4.823523731563005e-05, "loss": 8.5629, "step": 12617 }, { "epoch": 0.5259472302113293, "grad_norm": 384.0, "learning_rate": 4.822849152103094e-05, "loss": 14.7534, "step": 12618 }, { "epoch": 0.5259889125088575, "grad_norm": 280.0, "learning_rate": 4.8221745758717697e-05, "loss": 13.3752, "step": 12619 }, { "epoch": 0.5260305948063857, "grad_norm": 484.0, "learning_rate": 4.821500002881331e-05, "loss": 17.001, "step": 12620 }, { "epoch": 0.526072277103914, "grad_norm": 386.0, "learning_rate": 4.82082543314407e-05, "loss": 14.5642, "step": 12621 }, { "epoch": 0.5261139594014422, "grad_norm": 960.0, "learning_rate": 4.820150866672282e-05, "loss": 23.0004, "step": 12622 }, { "epoch": 0.5261556416989704, "grad_norm": 644.0, "learning_rate": 4.8194763034782595e-05, "loss": 23.0018, "step": 12623 }, { "epoch": 0.5261973239964987, "grad_norm": 162.0, "learning_rate": 4.818801743574299e-05, "loss": 10.0629, "step": 12624 }, { "epoch": 0.526239006294027, "grad_norm": 173.0, "learning_rate": 4.8181271869726904e-05, "loss": 9.8134, "step": 12625 }, { "epoch": 0.5262806885915552, "grad_norm": 684.0, "learning_rate": 4.817452633685733e-05, "loss": 21.6255, "step": 12626 }, { "epoch": 0.5263223708890834, "grad_norm": 219.0, "learning_rate": 4.8167780837257145e-05, "loss": 11.3755, "step": 12627 }, { "epoch": 0.5263640531866116, "grad_norm": 464.0, "learning_rate": 4.8161035371049346e-05, "loss": 15.8133, "step": 12628 }, { "epoch": 0.5264057354841399, "grad_norm": 260.0, "learning_rate": 4.815428993835682e-05, "loss": 13.0005, "step": 12629 }, { "epoch": 0.5264474177816681, "grad_norm": 356.0, "learning_rate": 4.814754453930254e-05, "loss": 14.3129, "step": 12630 }, { "epoch": 0.5264891000791964, "grad_norm": 330.0, "learning_rate": 4.81407991740094e-05, "loss": 10.0653, "step": 12631 }, { "epoch": 0.5265307823767246, "grad_norm": 300.0, "learning_rate": 4.813405384260038e-05, "loss": 14.5005, "step": 12632 }, { "epoch": 0.5265724646742529, "grad_norm": 366.0, "learning_rate": 4.812730854519839e-05, "loss": 14.4381, "step": 12633 }, { "epoch": 0.5266141469717811, "grad_norm": 442.0, "learning_rate": 4.8120563281926365e-05, "loss": 16.1253, "step": 12634 }, { "epoch": 0.5266558292693093, "grad_norm": 1472.0, "learning_rate": 4.811381805290724e-05, "loss": 30.1292, "step": 12635 }, { "epoch": 0.5266975115668375, "grad_norm": 221.0, "learning_rate": 4.810707285826396e-05, "loss": 9.1261, "step": 12636 }, { "epoch": 0.5267391938643659, "grad_norm": 588.0, "learning_rate": 4.810032769811943e-05, "loss": 19.6254, "step": 12637 }, { "epoch": 0.5267808761618941, "grad_norm": 322.0, "learning_rate": 4.809358257259661e-05, "loss": 14.1258, "step": 12638 }, { "epoch": 0.5268225584594223, "grad_norm": 201.0, "learning_rate": 4.8086837481818405e-05, "loss": 11.1877, "step": 12639 }, { "epoch": 0.5268642407569505, "grad_norm": 544.0, "learning_rate": 4.8080092425907775e-05, "loss": 17.8754, "step": 12640 }, { "epoch": 0.5269059230544788, "grad_norm": 294.0, "learning_rate": 4.807334740498761e-05, "loss": 10.1252, "step": 12641 }, { "epoch": 0.526947605352007, "grad_norm": 248.0, "learning_rate": 4.806660241918088e-05, "loss": 11.5628, "step": 12642 }, { "epoch": 0.5269892876495352, "grad_norm": 408.0, "learning_rate": 4.805985746861047e-05, "loss": 15.1255, "step": 12643 }, { "epoch": 0.5270309699470634, "grad_norm": 30.375, "learning_rate": 4.8053112553399335e-05, "loss": 6.0951, "step": 12644 }, { "epoch": 0.5270726522445918, "grad_norm": 93.0, "learning_rate": 4.804636767367041e-05, "loss": 8.938, "step": 12645 }, { "epoch": 0.52711433454212, "grad_norm": 199.0, "learning_rate": 4.8039622829546596e-05, "loss": 10.7503, "step": 12646 }, { "epoch": 0.5271560168396482, "grad_norm": 180.0, "learning_rate": 4.8032878021150825e-05, "loss": 11.0626, "step": 12647 }, { "epoch": 0.5271976991371764, "grad_norm": 258.0, "learning_rate": 4.802613324860605e-05, "loss": 13.0627, "step": 12648 }, { "epoch": 0.5272393814347047, "grad_norm": 324.0, "learning_rate": 4.801938851203514e-05, "loss": 13.6887, "step": 12649 }, { "epoch": 0.5272810637322329, "grad_norm": 354.0, "learning_rate": 4.8012643811561084e-05, "loss": 15.2503, "step": 12650 }, { "epoch": 0.5273227460297611, "grad_norm": 266.0, "learning_rate": 4.8005899147306746e-05, "loss": 14.063, "step": 12651 }, { "epoch": 0.5273644283272894, "grad_norm": 238.0, "learning_rate": 4.799915451939509e-05, "loss": 12.2506, "step": 12652 }, { "epoch": 0.5274061106248177, "grad_norm": 464.0, "learning_rate": 4.7992409927949e-05, "loss": 16.5002, "step": 12653 }, { "epoch": 0.5274477929223459, "grad_norm": 310.0, "learning_rate": 4.798566537309144e-05, "loss": 13.5001, "step": 12654 }, { "epoch": 0.5274894752198741, "grad_norm": 604.0, "learning_rate": 4.797892085494529e-05, "loss": 19.7507, "step": 12655 }, { "epoch": 0.5275311575174023, "grad_norm": 336.0, "learning_rate": 4.79721763736335e-05, "loss": 14.0004, "step": 12656 }, { "epoch": 0.5275728398149306, "grad_norm": 392.0, "learning_rate": 4.796543192927896e-05, "loss": 15.3753, "step": 12657 }, { "epoch": 0.5276145221124589, "grad_norm": 230.0, "learning_rate": 4.7958687522004616e-05, "loss": 12.251, "step": 12658 }, { "epoch": 0.5276562044099871, "grad_norm": 800.0, "learning_rate": 4.7951943151933365e-05, "loss": 22.3753, "step": 12659 }, { "epoch": 0.5276978867075153, "grad_norm": 560.0, "learning_rate": 4.794519881918814e-05, "loss": 17.6259, "step": 12660 }, { "epoch": 0.5277395690050436, "grad_norm": 356.0, "learning_rate": 4.793845452389183e-05, "loss": 13.6879, "step": 12661 }, { "epoch": 0.5277812513025718, "grad_norm": 130.0, "learning_rate": 4.79317102661674e-05, "loss": 6.6252, "step": 12662 }, { "epoch": 0.5278229336001, "grad_norm": 620.0, "learning_rate": 4.792496604613771e-05, "loss": 17.7504, "step": 12663 }, { "epoch": 0.5278646158976282, "grad_norm": 436.0, "learning_rate": 4.7918221863925714e-05, "loss": 15.8132, "step": 12664 }, { "epoch": 0.5279062981951566, "grad_norm": 368.0, "learning_rate": 4.79114777196543e-05, "loss": 15.0007, "step": 12665 }, { "epoch": 0.5279479804926848, "grad_norm": 340.0, "learning_rate": 4.79047336134464e-05, "loss": 14.5628, "step": 12666 }, { "epoch": 0.527989662790213, "grad_norm": 160.0, "learning_rate": 4.7897989545424895e-05, "loss": 8.9379, "step": 12667 }, { "epoch": 0.5280313450877412, "grad_norm": 308.0, "learning_rate": 4.789124551571275e-05, "loss": 13.3752, "step": 12668 }, { "epoch": 0.5280730273852695, "grad_norm": 296.0, "learning_rate": 4.788450152443281e-05, "loss": 12.8753, "step": 12669 }, { "epoch": 0.5281147096827977, "grad_norm": 352.0, "learning_rate": 4.7877757571708025e-05, "loss": 14.2507, "step": 12670 }, { "epoch": 0.5281563919803259, "grad_norm": 382.0, "learning_rate": 4.787101365766131e-05, "loss": 14.1252, "step": 12671 }, { "epoch": 0.5281980742778541, "grad_norm": 426.0, "learning_rate": 4.786426978241555e-05, "loss": 17.3754, "step": 12672 }, { "epoch": 0.5282397565753825, "grad_norm": 278.0, "learning_rate": 4.785752594609365e-05, "loss": 13.3128, "step": 12673 }, { "epoch": 0.5282814388729107, "grad_norm": 434.0, "learning_rate": 4.7850782148818556e-05, "loss": 16.2502, "step": 12674 }, { "epoch": 0.5283231211704389, "grad_norm": 952.0, "learning_rate": 4.784403839071313e-05, "loss": 24.2511, "step": 12675 }, { "epoch": 0.5283648034679671, "grad_norm": 124.5, "learning_rate": 4.7837294671900314e-05, "loss": 10.7503, "step": 12676 }, { "epoch": 0.5284064857654954, "grad_norm": 240.0, "learning_rate": 4.783055099250297e-05, "loss": 12.6878, "step": 12677 }, { "epoch": 0.5284481680630236, "grad_norm": 302.0, "learning_rate": 4.782380735264405e-05, "loss": 13.6251, "step": 12678 }, { "epoch": 0.5284898503605518, "grad_norm": 148.0, "learning_rate": 4.781706375244642e-05, "loss": 9.9381, "step": 12679 }, { "epoch": 0.5285315326580801, "grad_norm": 133.0, "learning_rate": 4.781032019203301e-05, "loss": 11.313, "step": 12680 }, { "epoch": 0.5285732149556084, "grad_norm": 360.0, "learning_rate": 4.780357667152669e-05, "loss": 14.5628, "step": 12681 }, { "epoch": 0.5286148972531366, "grad_norm": 124.0, "learning_rate": 4.779683319105039e-05, "loss": 8.9378, "step": 12682 }, { "epoch": 0.5286565795506648, "grad_norm": 102.5, "learning_rate": 4.7790089750727e-05, "loss": 10.1257, "step": 12683 }, { "epoch": 0.528698261848193, "grad_norm": 466.0, "learning_rate": 4.778334635067942e-05, "loss": 16.6253, "step": 12684 }, { "epoch": 0.5287399441457213, "grad_norm": 564.0, "learning_rate": 4.7776602991030545e-05, "loss": 17.5031, "step": 12685 }, { "epoch": 0.5287816264432496, "grad_norm": 544.0, "learning_rate": 4.7769859671903294e-05, "loss": 17.6253, "step": 12686 }, { "epoch": 0.5288233087407778, "grad_norm": 308.0, "learning_rate": 4.7763116393420526e-05, "loss": 13.1877, "step": 12687 }, { "epoch": 0.528864991038306, "grad_norm": 326.0, "learning_rate": 4.775637315570519e-05, "loss": 13.2503, "step": 12688 }, { "epoch": 0.5289066733358343, "grad_norm": 402.0, "learning_rate": 4.774962995888012e-05, "loss": 14.6877, "step": 12689 }, { "epoch": 0.5289483556333625, "grad_norm": 420.0, "learning_rate": 4.774288680306827e-05, "loss": 15.8127, "step": 12690 }, { "epoch": 0.5289900379308907, "grad_norm": 446.0, "learning_rate": 4.773614368839249e-05, "loss": 15.5004, "step": 12691 }, { "epoch": 0.529031720228419, "grad_norm": 396.0, "learning_rate": 4.7729400614975706e-05, "loss": 14.5019, "step": 12692 }, { "epoch": 0.5290734025259473, "grad_norm": 368.0, "learning_rate": 4.772265758294078e-05, "loss": 15.1256, "step": 12693 }, { "epoch": 0.5291150848234755, "grad_norm": 91.5, "learning_rate": 4.771591459241064e-05, "loss": 10.4379, "step": 12694 }, { "epoch": 0.5291567671210037, "grad_norm": 219.0, "learning_rate": 4.770917164350814e-05, "loss": 11.6879, "step": 12695 }, { "epoch": 0.529198449418532, "grad_norm": 408.0, "learning_rate": 4.770242873635621e-05, "loss": 15.0628, "step": 12696 }, { "epoch": 0.5292401317160602, "grad_norm": 47.25, "learning_rate": 4.76956858710777e-05, "loss": 7.2814, "step": 12697 }, { "epoch": 0.5292818140135884, "grad_norm": 370.0, "learning_rate": 4.768894304779554e-05, "loss": 14.688, "step": 12698 }, { "epoch": 0.5293234963111166, "grad_norm": 316.0, "learning_rate": 4.7682200266632584e-05, "loss": 14.0629, "step": 12699 }, { "epoch": 0.529365178608645, "grad_norm": 306.0, "learning_rate": 4.767545752771175e-05, "loss": 12.5632, "step": 12700 }, { "epoch": 0.5294068609061732, "grad_norm": 366.0, "learning_rate": 4.766871483115589e-05, "loss": 15.2503, "step": 12701 }, { "epoch": 0.5294485432037014, "grad_norm": 56.25, "learning_rate": 4.766197217708793e-05, "loss": 8.6251, "step": 12702 }, { "epoch": 0.5294902255012296, "grad_norm": 352.0, "learning_rate": 4.765522956563071e-05, "loss": 14.9389, "step": 12703 }, { "epoch": 0.5295319077987579, "grad_norm": 223.0, "learning_rate": 4.764848699690716e-05, "loss": 11.8128, "step": 12704 }, { "epoch": 0.5295735900962861, "grad_norm": 456.0, "learning_rate": 4.764174447104012e-05, "loss": 16.2503, "step": 12705 }, { "epoch": 0.5296152723938143, "grad_norm": 186.0, "learning_rate": 4.763500198815253e-05, "loss": 11.0628, "step": 12706 }, { "epoch": 0.5296569546913426, "grad_norm": 203.0, "learning_rate": 4.7628259548367206e-05, "loss": 10.6252, "step": 12707 }, { "epoch": 0.5296986369888709, "grad_norm": 144.0, "learning_rate": 4.762151715180708e-05, "loss": 10.0009, "step": 12708 }, { "epoch": 0.5297403192863991, "grad_norm": 704.0, "learning_rate": 4.761477479859501e-05, "loss": 21.8757, "step": 12709 }, { "epoch": 0.5297820015839273, "grad_norm": 348.0, "learning_rate": 4.760803248885388e-05, "loss": 14.3127, "step": 12710 }, { "epoch": 0.5298236838814555, "grad_norm": 96.0, "learning_rate": 4.7601290222706565e-05, "loss": 9.9378, "step": 12711 }, { "epoch": 0.5298653661789838, "grad_norm": 368.0, "learning_rate": 4.759454800027597e-05, "loss": 15.3756, "step": 12712 }, { "epoch": 0.529907048476512, "grad_norm": 344.0, "learning_rate": 4.758780582168492e-05, "loss": 15.1255, "step": 12713 }, { "epoch": 0.5299487307740403, "grad_norm": 928.0, "learning_rate": 4.758106368705635e-05, "loss": 27.626, "step": 12714 }, { "epoch": 0.5299904130715685, "grad_norm": 278.0, "learning_rate": 4.7574321596513094e-05, "loss": 12.3128, "step": 12715 }, { "epoch": 0.5300320953690968, "grad_norm": 300.0, "learning_rate": 4.756757955017806e-05, "loss": 12.6881, "step": 12716 }, { "epoch": 0.530073777666625, "grad_norm": 348.0, "learning_rate": 4.7560837548174084e-05, "loss": 15.5005, "step": 12717 }, { "epoch": 0.5301154599641532, "grad_norm": 648.0, "learning_rate": 4.755409559062408e-05, "loss": 18.8752, "step": 12718 }, { "epoch": 0.5301571422616814, "grad_norm": 221.0, "learning_rate": 4.754735367765088e-05, "loss": 12.0005, "step": 12719 }, { "epoch": 0.5301988245592097, "grad_norm": 344.0, "learning_rate": 4.7540611809377386e-05, "loss": 13.6877, "step": 12720 }, { "epoch": 0.530240506856738, "grad_norm": 1672.0, "learning_rate": 4.753386998592646e-05, "loss": 39.5004, "step": 12721 }, { "epoch": 0.5302821891542662, "grad_norm": 490.0, "learning_rate": 4.752712820742097e-05, "loss": 16.7501, "step": 12722 }, { "epoch": 0.5303238714517944, "grad_norm": 536.0, "learning_rate": 4.752038647398378e-05, "loss": 16.7503, "step": 12723 }, { "epoch": 0.5303655537493227, "grad_norm": 191.0, "learning_rate": 4.751364478573779e-05, "loss": 11.0002, "step": 12724 }, { "epoch": 0.5304072360468509, "grad_norm": 596.0, "learning_rate": 4.7506903142805824e-05, "loss": 19.6252, "step": 12725 }, { "epoch": 0.5304489183443791, "grad_norm": 74.0, "learning_rate": 4.750016154531079e-05, "loss": 8.2502, "step": 12726 }, { "epoch": 0.5304906006419073, "grad_norm": 528.0, "learning_rate": 4.749341999337551e-05, "loss": 16.8753, "step": 12727 }, { "epoch": 0.5305322829394357, "grad_norm": 288.0, "learning_rate": 4.74866784871229e-05, "loss": 12.1877, "step": 12728 }, { "epoch": 0.5305739652369639, "grad_norm": 700.0, "learning_rate": 4.747993702667577e-05, "loss": 19.5005, "step": 12729 }, { "epoch": 0.5306156475344921, "grad_norm": 374.0, "learning_rate": 4.747319561215703e-05, "loss": 13.0627, "step": 12730 }, { "epoch": 0.5306573298320203, "grad_norm": 608.0, "learning_rate": 4.746645424368951e-05, "loss": 17.6253, "step": 12731 }, { "epoch": 0.5306990121295486, "grad_norm": 1056.0, "learning_rate": 4.745971292139609e-05, "loss": 26.8764, "step": 12732 }, { "epoch": 0.5307406944270768, "grad_norm": 512.0, "learning_rate": 4.7452971645399636e-05, "loss": 16.1253, "step": 12733 }, { "epoch": 0.530782376724605, "grad_norm": 274.0, "learning_rate": 4.7446230415823e-05, "loss": 13.6877, "step": 12734 }, { "epoch": 0.5308240590221333, "grad_norm": 302.0, "learning_rate": 4.743948923278902e-05, "loss": 13.1885, "step": 12735 }, { "epoch": 0.5308657413196616, "grad_norm": 362.0, "learning_rate": 4.743274809642061e-05, "loss": 14.0011, "step": 12736 }, { "epoch": 0.5309074236171898, "grad_norm": 350.0, "learning_rate": 4.7426007006840566e-05, "loss": 12.5005, "step": 12737 }, { "epoch": 0.530949105914718, "grad_norm": 59.5, "learning_rate": 4.7419265964171796e-05, "loss": 8.3129, "step": 12738 }, { "epoch": 0.5309907882122462, "grad_norm": 107.0, "learning_rate": 4.7412524968537106e-05, "loss": 10.4379, "step": 12739 }, { "epoch": 0.5310324705097745, "grad_norm": 302.0, "learning_rate": 4.740578402005941e-05, "loss": 11.5628, "step": 12740 }, { "epoch": 0.5310741528073027, "grad_norm": 146.0, "learning_rate": 4.73990431188615e-05, "loss": 10.2505, "step": 12741 }, { "epoch": 0.531115835104831, "grad_norm": 292.0, "learning_rate": 4.7392302265066285e-05, "loss": 12.0003, "step": 12742 }, { "epoch": 0.5311575174023592, "grad_norm": 560.0, "learning_rate": 4.738556145879657e-05, "loss": 18.3753, "step": 12743 }, { "epoch": 0.5311991996998875, "grad_norm": 165.0, "learning_rate": 4.737882070017525e-05, "loss": 9.5003, "step": 12744 }, { "epoch": 0.5312408819974157, "grad_norm": 142.0, "learning_rate": 4.7372079989325134e-05, "loss": 11.0002, "step": 12745 }, { "epoch": 0.5312825642949439, "grad_norm": 276.0, "learning_rate": 4.7365339326369105e-05, "loss": 10.2502, "step": 12746 }, { "epoch": 0.5313242465924721, "grad_norm": 584.0, "learning_rate": 4.7358598711430005e-05, "loss": 19.3753, "step": 12747 }, { "epoch": 0.5313659288900004, "grad_norm": 952.0, "learning_rate": 4.7351858144630674e-05, "loss": 28.5003, "step": 12748 }, { "epoch": 0.5314076111875287, "grad_norm": 188.0, "learning_rate": 4.734511762609395e-05, "loss": 10.7502, "step": 12749 }, { "epoch": 0.5314492934850569, "grad_norm": 102.0, "learning_rate": 4.733837715594272e-05, "loss": 4.6876, "step": 12750 }, { "epoch": 0.5314909757825851, "grad_norm": 536.0, "learning_rate": 4.733163673429978e-05, "loss": 18.3763, "step": 12751 }, { "epoch": 0.5315326580801134, "grad_norm": 109.0, "learning_rate": 4.732489636128802e-05, "loss": 9.7508, "step": 12752 }, { "epoch": 0.5315743403776416, "grad_norm": 219.0, "learning_rate": 4.731815603703024e-05, "loss": 11.6877, "step": 12753 }, { "epoch": 0.5316160226751698, "grad_norm": 83.0, "learning_rate": 4.731141576164932e-05, "loss": 7.3757, "step": 12754 }, { "epoch": 0.531657704972698, "grad_norm": 193.0, "learning_rate": 4.7304675535268064e-05, "loss": 12.0006, "step": 12755 }, { "epoch": 0.5316993872702264, "grad_norm": 175.0, "learning_rate": 4.729793535800937e-05, "loss": 11.4378, "step": 12756 }, { "epoch": 0.5317410695677546, "grad_norm": 146.0, "learning_rate": 4.729119522999601e-05, "loss": 10.1254, "step": 12757 }, { "epoch": 0.5317827518652828, "grad_norm": 228.0, "learning_rate": 4.728445515135087e-05, "loss": 12.0628, "step": 12758 }, { "epoch": 0.531824434162811, "grad_norm": 145.0, "learning_rate": 4.727771512219677e-05, "loss": 8.8127, "step": 12759 }, { "epoch": 0.5318661164603393, "grad_norm": 330.0, "learning_rate": 4.727097514265657e-05, "loss": 12.8138, "step": 12760 }, { "epoch": 0.5319077987578675, "grad_norm": 308.0, "learning_rate": 4.726423521285307e-05, "loss": 14.6265, "step": 12761 }, { "epoch": 0.5319494810553957, "grad_norm": 219.0, "learning_rate": 4.7257495332909155e-05, "loss": 11.9378, "step": 12762 }, { "epoch": 0.5319911633529241, "grad_norm": 496.0, "learning_rate": 4.72507555029476e-05, "loss": 17.8753, "step": 12763 }, { "epoch": 0.5320328456504523, "grad_norm": 362.0, "learning_rate": 4.724401572309129e-05, "loss": 13.1251, "step": 12764 }, { "epoch": 0.5320745279479805, "grad_norm": 109.0, "learning_rate": 4.7237275993463023e-05, "loss": 9.3145, "step": 12765 }, { "epoch": 0.5321162102455087, "grad_norm": 247.0, "learning_rate": 4.723053631418566e-05, "loss": 12.7506, "step": 12766 }, { "epoch": 0.532157892543037, "grad_norm": 488.0, "learning_rate": 4.722379668538201e-05, "loss": 15.5003, "step": 12767 }, { "epoch": 0.5321995748405652, "grad_norm": 470.0, "learning_rate": 4.7217057107174924e-05, "loss": 16.2503, "step": 12768 }, { "epoch": 0.5322412571380934, "grad_norm": 672.0, "learning_rate": 4.72103175796872e-05, "loss": 21.2502, "step": 12769 }, { "epoch": 0.5322829394356217, "grad_norm": 604.0, "learning_rate": 4.7203578103041697e-05, "loss": 21.3754, "step": 12770 }, { "epoch": 0.53232462173315, "grad_norm": 225.0, "learning_rate": 4.7196838677361236e-05, "loss": 10.6253, "step": 12771 }, { "epoch": 0.5323663040306782, "grad_norm": 69.5, "learning_rate": 4.719009930276863e-05, "loss": 8.5003, "step": 12772 }, { "epoch": 0.5324079863282064, "grad_norm": 255.0, "learning_rate": 4.7183359979386705e-05, "loss": 12.7502, "step": 12773 }, { "epoch": 0.5324496686257346, "grad_norm": 272.0, "learning_rate": 4.717662070733832e-05, "loss": 11.003, "step": 12774 }, { "epoch": 0.5324913509232629, "grad_norm": 1048.0, "learning_rate": 4.716988148674625e-05, "loss": 24.3789, "step": 12775 }, { "epoch": 0.5325330332207912, "grad_norm": 324.0, "learning_rate": 4.716314231773336e-05, "loss": 13.9377, "step": 12776 }, { "epoch": 0.5325747155183194, "grad_norm": 175.0, "learning_rate": 4.715640320042243e-05, "loss": 10.1879, "step": 12777 }, { "epoch": 0.5326163978158476, "grad_norm": 156.0, "learning_rate": 4.7149664134936335e-05, "loss": 9.0003, "step": 12778 }, { "epoch": 0.5326580801133759, "grad_norm": 352.0, "learning_rate": 4.714292512139783e-05, "loss": 14.1255, "step": 12779 }, { "epoch": 0.5326997624109041, "grad_norm": 454.0, "learning_rate": 4.71361861599298e-05, "loss": 17.1253, "step": 12780 }, { "epoch": 0.5327414447084323, "grad_norm": 324.0, "learning_rate": 4.7129447250655004e-05, "loss": 13.7504, "step": 12781 }, { "epoch": 0.5327831270059605, "grad_norm": 124.5, "learning_rate": 4.712270839369632e-05, "loss": 6.5324, "step": 12782 }, { "epoch": 0.5328248093034889, "grad_norm": 2128.0, "learning_rate": 4.71159695891765e-05, "loss": 42.0002, "step": 12783 }, { "epoch": 0.5328664916010171, "grad_norm": 414.0, "learning_rate": 4.71092308372184e-05, "loss": 14.9378, "step": 12784 }, { "epoch": 0.5329081738985453, "grad_norm": 310.0, "learning_rate": 4.710249213794483e-05, "loss": 12.4378, "step": 12785 }, { "epoch": 0.5329498561960735, "grad_norm": 356.0, "learning_rate": 4.709575349147859e-05, "loss": 15.3129, "step": 12786 }, { "epoch": 0.5329915384936018, "grad_norm": 116.5, "learning_rate": 4.708901489794249e-05, "loss": 8.2506, "step": 12787 }, { "epoch": 0.53303322079113, "grad_norm": 628.0, "learning_rate": 4.708227635745938e-05, "loss": 20.7502, "step": 12788 }, { "epoch": 0.5330749030886582, "grad_norm": 266.0, "learning_rate": 4.7075537870152015e-05, "loss": 13.3752, "step": 12789 }, { "epoch": 0.5331165853861864, "grad_norm": 154.0, "learning_rate": 4.706879943614326e-05, "loss": 8.938, "step": 12790 }, { "epoch": 0.5331582676837148, "grad_norm": 100.5, "learning_rate": 4.7062061055555866e-05, "loss": 9.2502, "step": 12791 }, { "epoch": 0.533199949981243, "grad_norm": 484.0, "learning_rate": 4.70553227285127e-05, "loss": 17.1253, "step": 12792 }, { "epoch": 0.5332416322787712, "grad_norm": 952.0, "learning_rate": 4.704858445513651e-05, "loss": 25.0015, "step": 12793 }, { "epoch": 0.5332833145762994, "grad_norm": 204.0, "learning_rate": 4.704184623555016e-05, "loss": 12.188, "step": 12794 }, { "epoch": 0.5333249968738277, "grad_norm": 176.0, "learning_rate": 4.703510806987639e-05, "loss": 11.5638, "step": 12795 }, { "epoch": 0.5333666791713559, "grad_norm": 390.0, "learning_rate": 4.702836995823806e-05, "loss": 15.1877, "step": 12796 }, { "epoch": 0.5334083614688842, "grad_norm": 548.0, "learning_rate": 4.702163190075795e-05, "loss": 16.1257, "step": 12797 }, { "epoch": 0.5334500437664124, "grad_norm": 390.0, "learning_rate": 4.7014893897558866e-05, "loss": 15.1261, "step": 12798 }, { "epoch": 0.5334917260639407, "grad_norm": 442.0, "learning_rate": 4.700815594876359e-05, "loss": 12.8755, "step": 12799 }, { "epoch": 0.5335334083614689, "grad_norm": 108.0, "learning_rate": 4.700141805449496e-05, "loss": 8.1252, "step": 12800 }, { "epoch": 0.5335750906589971, "grad_norm": 151.0, "learning_rate": 4.699468021487573e-05, "loss": 10.563, "step": 12801 }, { "epoch": 0.5336167729565253, "grad_norm": 540.0, "learning_rate": 4.698794243002874e-05, "loss": 17.2502, "step": 12802 }, { "epoch": 0.5336584552540536, "grad_norm": 154.0, "learning_rate": 4.698120470007675e-05, "loss": 9.4381, "step": 12803 }, { "epoch": 0.5337001375515819, "grad_norm": 524.0, "learning_rate": 4.6974467025142586e-05, "loss": 16.8752, "step": 12804 }, { "epoch": 0.5337418198491101, "grad_norm": 178.0, "learning_rate": 4.696772940534901e-05, "loss": 10.8767, "step": 12805 }, { "epoch": 0.5337835021466383, "grad_norm": 552.0, "learning_rate": 4.6960991840818865e-05, "loss": 16.3753, "step": 12806 }, { "epoch": 0.5338251844441666, "grad_norm": 111.5, "learning_rate": 4.6954254331674886e-05, "loss": 9.4377, "step": 12807 }, { "epoch": 0.5338668667416948, "grad_norm": 756.0, "learning_rate": 4.6947516878039904e-05, "loss": 20.7502, "step": 12808 }, { "epoch": 0.533908549039223, "grad_norm": 640.0, "learning_rate": 4.69407794800367e-05, "loss": 16.8777, "step": 12809 }, { "epoch": 0.5339502313367512, "grad_norm": 328.0, "learning_rate": 4.693404213778805e-05, "loss": 14.0005, "step": 12810 }, { "epoch": 0.5339919136342796, "grad_norm": 478.0, "learning_rate": 4.6927304851416754e-05, "loss": 17.3752, "step": 12811 }, { "epoch": 0.5340335959318078, "grad_norm": 212.0, "learning_rate": 4.6920567621045616e-05, "loss": 12.0002, "step": 12812 }, { "epoch": 0.534075278229336, "grad_norm": 168.0, "learning_rate": 4.691383044679739e-05, "loss": 10.8137, "step": 12813 }, { "epoch": 0.5341169605268642, "grad_norm": 338.0, "learning_rate": 4.69070933287949e-05, "loss": 14.3752, "step": 12814 }, { "epoch": 0.5341586428243925, "grad_norm": 163.0, "learning_rate": 4.690035626716088e-05, "loss": 9.626, "step": 12815 }, { "epoch": 0.5342003251219207, "grad_norm": 107.5, "learning_rate": 4.6893619262018177e-05, "loss": 8.0005, "step": 12816 }, { "epoch": 0.5342420074194489, "grad_norm": 604.0, "learning_rate": 4.688688231348951e-05, "loss": 18.2506, "step": 12817 }, { "epoch": 0.5342836897169772, "grad_norm": 350.0, "learning_rate": 4.68801454216977e-05, "loss": 13.5003, "step": 12818 }, { "epoch": 0.5343253720145055, "grad_norm": 362.0, "learning_rate": 4.6873408586765506e-05, "loss": 12.4399, "step": 12819 }, { "epoch": 0.5343670543120337, "grad_norm": 832.0, "learning_rate": 4.686667180881574e-05, "loss": 22.5005, "step": 12820 }, { "epoch": 0.5344087366095619, "grad_norm": 320.0, "learning_rate": 4.685993508797114e-05, "loss": 12.5638, "step": 12821 }, { "epoch": 0.5344504189070901, "grad_norm": 298.0, "learning_rate": 4.68531984243545e-05, "loss": 13.0628, "step": 12822 }, { "epoch": 0.5344921012046184, "grad_norm": 314.0, "learning_rate": 4.684646181808859e-05, "loss": 12.1275, "step": 12823 }, { "epoch": 0.5345337835021466, "grad_norm": 290.0, "learning_rate": 4.683972526929622e-05, "loss": 13.0629, "step": 12824 }, { "epoch": 0.5345754657996749, "grad_norm": 238.0, "learning_rate": 4.6832988778100116e-05, "loss": 11.6878, "step": 12825 }, { "epoch": 0.5346171480972031, "grad_norm": 708.0, "learning_rate": 4.68262523446231e-05, "loss": 21.8754, "step": 12826 }, { "epoch": 0.5346588303947314, "grad_norm": 780.0, "learning_rate": 4.6819515968987886e-05, "loss": 21.0004, "step": 12827 }, { "epoch": 0.5347005126922596, "grad_norm": 47.5, "learning_rate": 4.68127796513173e-05, "loss": 6.7816, "step": 12828 }, { "epoch": 0.5347421949897878, "grad_norm": 552.0, "learning_rate": 4.6806043391734066e-05, "loss": 17.0002, "step": 12829 }, { "epoch": 0.534783877287316, "grad_norm": 390.0, "learning_rate": 4.6799307190360995e-05, "loss": 15.4386, "step": 12830 }, { "epoch": 0.5348255595848443, "grad_norm": 420.0, "learning_rate": 4.679257104732082e-05, "loss": 15.8752, "step": 12831 }, { "epoch": 0.5348672418823726, "grad_norm": 194.0, "learning_rate": 4.6785834962736335e-05, "loss": 11.0003, "step": 12832 }, { "epoch": 0.5349089241799008, "grad_norm": 636.0, "learning_rate": 4.6779098936730276e-05, "loss": 22.1251, "step": 12833 }, { "epoch": 0.534950606477429, "grad_norm": 388.0, "learning_rate": 4.6772362969425436e-05, "loss": 13.3164, "step": 12834 }, { "epoch": 0.5349922887749573, "grad_norm": 296.0, "learning_rate": 4.676562706094457e-05, "loss": 13.6252, "step": 12835 }, { "epoch": 0.5350339710724855, "grad_norm": 378.0, "learning_rate": 4.675889121141043e-05, "loss": 15.3128, "step": 12836 }, { "epoch": 0.5350756533700137, "grad_norm": 406.0, "learning_rate": 4.675215542094578e-05, "loss": 14.876, "step": 12837 }, { "epoch": 0.535117335667542, "grad_norm": 108.5, "learning_rate": 4.674541968967341e-05, "loss": 11.5005, "step": 12838 }, { "epoch": 0.5351590179650703, "grad_norm": 272.0, "learning_rate": 4.673868401771603e-05, "loss": 11.126, "step": 12839 }, { "epoch": 0.5352007002625985, "grad_norm": 149.0, "learning_rate": 4.673194840519644e-05, "loss": 9.6257, "step": 12840 }, { "epoch": 0.5352423825601267, "grad_norm": 288.0, "learning_rate": 4.6725212852237366e-05, "loss": 12.1254, "step": 12841 }, { "epoch": 0.535284064857655, "grad_norm": 227.0, "learning_rate": 4.67184773589616e-05, "loss": 12.1252, "step": 12842 }, { "epoch": 0.5353257471551832, "grad_norm": 2008.0, "learning_rate": 4.671174192549185e-05, "loss": 43.0003, "step": 12843 }, { "epoch": 0.5353674294527114, "grad_norm": 416.0, "learning_rate": 4.670500655195091e-05, "loss": 15.0643, "step": 12844 }, { "epoch": 0.5354091117502396, "grad_norm": 512.0, "learning_rate": 4.669827123846151e-05, "loss": 16.7548, "step": 12845 }, { "epoch": 0.535450794047768, "grad_norm": 344.0, "learning_rate": 4.669153598514642e-05, "loss": 15.001, "step": 12846 }, { "epoch": 0.5354924763452962, "grad_norm": 420.0, "learning_rate": 4.668480079212837e-05, "loss": 16.8754, "step": 12847 }, { "epoch": 0.5355341586428244, "grad_norm": 229.0, "learning_rate": 4.667806565953013e-05, "loss": 8.1894, "step": 12848 }, { "epoch": 0.5355758409403526, "grad_norm": 320.0, "learning_rate": 4.667133058747442e-05, "loss": 12.1253, "step": 12849 }, { "epoch": 0.5356175232378809, "grad_norm": 548.0, "learning_rate": 4.666459557608403e-05, "loss": 18.6251, "step": 12850 }, { "epoch": 0.5356592055354091, "grad_norm": 112.5, "learning_rate": 4.665786062548166e-05, "loss": 5.7502, "step": 12851 }, { "epoch": 0.5357008878329373, "grad_norm": 274.0, "learning_rate": 4.6651125735790104e-05, "loss": 12.938, "step": 12852 }, { "epoch": 0.5357425701304656, "grad_norm": 668.0, "learning_rate": 4.6644390907132045e-05, "loss": 18.6273, "step": 12853 }, { "epoch": 0.5357842524279939, "grad_norm": 310.0, "learning_rate": 4.6637656139630293e-05, "loss": 12.8754, "step": 12854 }, { "epoch": 0.5358259347255221, "grad_norm": 274.0, "learning_rate": 4.6630921433407527e-05, "loss": 12.1252, "step": 12855 }, { "epoch": 0.5358676170230503, "grad_norm": 215.0, "learning_rate": 4.6624186788586544e-05, "loss": 11.7501, "step": 12856 }, { "epoch": 0.5359092993205785, "grad_norm": 474.0, "learning_rate": 4.661745220529003e-05, "loss": 17.0004, "step": 12857 }, { "epoch": 0.5359509816181068, "grad_norm": 97.5, "learning_rate": 4.661071768364076e-05, "loss": 8.6252, "step": 12858 }, { "epoch": 0.535992663915635, "grad_norm": 85.0, "learning_rate": 4.660398322376146e-05, "loss": 7.9076, "step": 12859 }, { "epoch": 0.5360343462131633, "grad_norm": 330.0, "learning_rate": 4.659724882577487e-05, "loss": 13.8751, "step": 12860 }, { "epoch": 0.5360760285106915, "grad_norm": 358.0, "learning_rate": 4.659051448980372e-05, "loss": 15.1253, "step": 12861 }, { "epoch": 0.5361177108082198, "grad_norm": 130.0, "learning_rate": 4.658378021597076e-05, "loss": 10.0628, "step": 12862 }, { "epoch": 0.536159393105748, "grad_norm": 628.0, "learning_rate": 4.657704600439869e-05, "loss": 20.7519, "step": 12863 }, { "epoch": 0.5362010754032762, "grad_norm": 1360.0, "learning_rate": 4.657031185521028e-05, "loss": 25.5042, "step": 12864 }, { "epoch": 0.5362427577008044, "grad_norm": 163.0, "learning_rate": 4.656357776852822e-05, "loss": 9.0627, "step": 12865 }, { "epoch": 0.5362844399983328, "grad_norm": 175.0, "learning_rate": 4.6556843744475274e-05, "loss": 11.5629, "step": 12866 }, { "epoch": 0.536326122295861, "grad_norm": 219.0, "learning_rate": 4.655010978317414e-05, "loss": 11.5631, "step": 12867 }, { "epoch": 0.5363678045933892, "grad_norm": 438.0, "learning_rate": 4.654337588474759e-05, "loss": 16.001, "step": 12868 }, { "epoch": 0.5364094868909174, "grad_norm": 960.0, "learning_rate": 4.653664204931829e-05, "loss": 21.1287, "step": 12869 }, { "epoch": 0.5364511691884457, "grad_norm": 1528.0, "learning_rate": 4.6529908277009025e-05, "loss": 33.5006, "step": 12870 }, { "epoch": 0.5364928514859739, "grad_norm": 75.5, "learning_rate": 4.6523174567942467e-05, "loss": 8.1879, "step": 12871 }, { "epoch": 0.5365345337835021, "grad_norm": 664.0, "learning_rate": 4.6516440922241365e-05, "loss": 19.5006, "step": 12872 }, { "epoch": 0.5365762160810303, "grad_norm": 628.0, "learning_rate": 4.6509707340028446e-05, "loss": 18.2503, "step": 12873 }, { "epoch": 0.5366178983785587, "grad_norm": 239.0, "learning_rate": 4.6502973821426414e-05, "loss": 13.0002, "step": 12874 }, { "epoch": 0.5366595806760869, "grad_norm": 556.0, "learning_rate": 4.649624036655799e-05, "loss": 16.2549, "step": 12875 }, { "epoch": 0.5367012629736151, "grad_norm": 338.0, "learning_rate": 4.648950697554591e-05, "loss": 14.3129, "step": 12876 }, { "epoch": 0.5367429452711433, "grad_norm": 536.0, "learning_rate": 4.648277364851286e-05, "loss": 18.2522, "step": 12877 }, { "epoch": 0.5367846275686716, "grad_norm": 648.0, "learning_rate": 4.647604038558159e-05, "loss": 20.1274, "step": 12878 }, { "epoch": 0.5368263098661998, "grad_norm": 684.0, "learning_rate": 4.6469307186874786e-05, "loss": 20.3752, "step": 12879 }, { "epoch": 0.536867992163728, "grad_norm": 256.0, "learning_rate": 4.646257405251518e-05, "loss": 12.8128, "step": 12880 }, { "epoch": 0.5369096744612563, "grad_norm": 532.0, "learning_rate": 4.6455840982625466e-05, "loss": 18.2502, "step": 12881 }, { "epoch": 0.5369513567587846, "grad_norm": 145.0, "learning_rate": 4.6449107977328374e-05, "loss": 7.1877, "step": 12882 }, { "epoch": 0.5369930390563128, "grad_norm": 185.0, "learning_rate": 4.644237503674659e-05, "loss": 10.6266, "step": 12883 }, { "epoch": 0.537034721353841, "grad_norm": 251.0, "learning_rate": 4.643564216100285e-05, "loss": 11.9376, "step": 12884 }, { "epoch": 0.5370764036513692, "grad_norm": 171.0, "learning_rate": 4.642890935021984e-05, "loss": 10.1878, "step": 12885 }, { "epoch": 0.5371180859488975, "grad_norm": 496.0, "learning_rate": 4.6422176604520284e-05, "loss": 14.4384, "step": 12886 }, { "epoch": 0.5371597682464258, "grad_norm": 356.0, "learning_rate": 4.641544392402686e-05, "loss": 14.6252, "step": 12887 }, { "epoch": 0.537201450543954, "grad_norm": 612.0, "learning_rate": 4.6408711308862316e-05, "loss": 18.8757, "step": 12888 }, { "epoch": 0.5372431328414822, "grad_norm": 238.0, "learning_rate": 4.6401978759149295e-05, "loss": 12.1876, "step": 12889 }, { "epoch": 0.5372848151390105, "grad_norm": 292.0, "learning_rate": 4.639524627501056e-05, "loss": 13.8754, "step": 12890 }, { "epoch": 0.5373264974365387, "grad_norm": 146.0, "learning_rate": 4.638851385656876e-05, "loss": 10.0631, "step": 12891 }, { "epoch": 0.5373681797340669, "grad_norm": 136.0, "learning_rate": 4.6381781503946635e-05, "loss": 9.6877, "step": 12892 }, { "epoch": 0.5374098620315951, "grad_norm": 125.5, "learning_rate": 4.637504921726684e-05, "loss": 6.4067, "step": 12893 }, { "epoch": 0.5374515443291235, "grad_norm": 688.0, "learning_rate": 4.636831699665212e-05, "loss": 18.0013, "step": 12894 }, { "epoch": 0.5374932266266517, "grad_norm": 340.0, "learning_rate": 4.6361584842225124e-05, "loss": 14.0003, "step": 12895 }, { "epoch": 0.5375349089241799, "grad_norm": 322.0, "learning_rate": 4.6354852754108575e-05, "loss": 14.8752, "step": 12896 }, { "epoch": 0.5375765912217081, "grad_norm": 272.0, "learning_rate": 4.634812073242516e-05, "loss": 12.3751, "step": 12897 }, { "epoch": 0.5376182735192364, "grad_norm": 616.0, "learning_rate": 4.634138877729757e-05, "loss": 19.6251, "step": 12898 }, { "epoch": 0.5376599558167646, "grad_norm": 162.0, "learning_rate": 4.633465688884848e-05, "loss": 10.3761, "step": 12899 }, { "epoch": 0.5377016381142928, "grad_norm": 274.0, "learning_rate": 4.6327925067200615e-05, "loss": 11.0627, "step": 12900 }, { "epoch": 0.537743320411821, "grad_norm": 366.0, "learning_rate": 4.632119331247662e-05, "loss": 15.6877, "step": 12901 }, { "epoch": 0.5377850027093494, "grad_norm": 310.0, "learning_rate": 4.6314461624799236e-05, "loss": 14.1253, "step": 12902 }, { "epoch": 0.5378266850068776, "grad_norm": 368.0, "learning_rate": 4.6307730004291075e-05, "loss": 13.9379, "step": 12903 }, { "epoch": 0.5378683673044058, "grad_norm": 176.0, "learning_rate": 4.63009984510749e-05, "loss": 9.0647, "step": 12904 }, { "epoch": 0.537910049601934, "grad_norm": 556.0, "learning_rate": 4.629426696527333e-05, "loss": 17.8758, "step": 12905 }, { "epoch": 0.5379517318994623, "grad_norm": 152.0, "learning_rate": 4.628753554700909e-05, "loss": 10.1256, "step": 12906 }, { "epoch": 0.5379934141969905, "grad_norm": 444.0, "learning_rate": 4.628080419640483e-05, "loss": 16.5005, "step": 12907 }, { "epoch": 0.5380350964945187, "grad_norm": 968.0, "learning_rate": 4.6274072913583263e-05, "loss": 24.3752, "step": 12908 }, { "epoch": 0.5380767787920471, "grad_norm": 326.0, "learning_rate": 4.6267341698667024e-05, "loss": 11.1253, "step": 12909 }, { "epoch": 0.5381184610895753, "grad_norm": 294.0, "learning_rate": 4.626061055177883e-05, "loss": 13.7507, "step": 12910 }, { "epoch": 0.5381601433871035, "grad_norm": 98.5, "learning_rate": 4.625387947304132e-05, "loss": 10.2511, "step": 12911 }, { "epoch": 0.5382018256846317, "grad_norm": 352.0, "learning_rate": 4.624714846257722e-05, "loss": 15.0629, "step": 12912 }, { "epoch": 0.53824350798216, "grad_norm": 160.0, "learning_rate": 4.624041752050915e-05, "loss": 9.0629, "step": 12913 }, { "epoch": 0.5382851902796882, "grad_norm": 308.0, "learning_rate": 4.623368664695982e-05, "loss": 13.5628, "step": 12914 }, { "epoch": 0.5383268725772165, "grad_norm": 450.0, "learning_rate": 4.622695584205187e-05, "loss": 16.3751, "step": 12915 }, { "epoch": 0.5383685548747447, "grad_norm": 59.5, "learning_rate": 4.6220225105908e-05, "loss": 6.9377, "step": 12916 }, { "epoch": 0.538410237172273, "grad_norm": 628.0, "learning_rate": 4.621349443865085e-05, "loss": 21.0005, "step": 12917 }, { "epoch": 0.5384519194698012, "grad_norm": 129.0, "learning_rate": 4.620676384040312e-05, "loss": 10.0627, "step": 12918 }, { "epoch": 0.5384936017673294, "grad_norm": 83.0, "learning_rate": 4.620003331128743e-05, "loss": 8.4379, "step": 12919 }, { "epoch": 0.5385352840648576, "grad_norm": 96.5, "learning_rate": 4.619330285142649e-05, "loss": 8.3128, "step": 12920 }, { "epoch": 0.538576966362386, "grad_norm": 516.0, "learning_rate": 4.618657246094292e-05, "loss": 17.7503, "step": 12921 }, { "epoch": 0.5386186486599142, "grad_norm": 175.0, "learning_rate": 4.617984213995943e-05, "loss": 9.8752, "step": 12922 }, { "epoch": 0.5386603309574424, "grad_norm": 488.0, "learning_rate": 4.617311188859864e-05, "loss": 15.7507, "step": 12923 }, { "epoch": 0.5387020132549706, "grad_norm": 199.0, "learning_rate": 4.616638170698324e-05, "loss": 11.0005, "step": 12924 }, { "epoch": 0.5387436955524989, "grad_norm": 258.0, "learning_rate": 4.615965159523585e-05, "loss": 7.313, "step": 12925 }, { "epoch": 0.5387853778500271, "grad_norm": 308.0, "learning_rate": 4.615292155347918e-05, "loss": 13.6877, "step": 12926 }, { "epoch": 0.5388270601475553, "grad_norm": 225.0, "learning_rate": 4.6146191581835826e-05, "loss": 10.0003, "step": 12927 }, { "epoch": 0.5388687424450835, "grad_norm": 872.0, "learning_rate": 4.6139461680428506e-05, "loss": 22.0003, "step": 12928 }, { "epoch": 0.5389104247426119, "grad_norm": 426.0, "learning_rate": 4.613273184937981e-05, "loss": 15.5002, "step": 12929 }, { "epoch": 0.5389521070401401, "grad_norm": 248.0, "learning_rate": 4.6126002088812445e-05, "loss": 10.2503, "step": 12930 }, { "epoch": 0.5389937893376683, "grad_norm": 213.0, "learning_rate": 4.611927239884901e-05, "loss": 11.1879, "step": 12931 }, { "epoch": 0.5390354716351965, "grad_norm": 152.0, "learning_rate": 4.6112542779612205e-05, "loss": 9.1884, "step": 12932 }, { "epoch": 0.5390771539327248, "grad_norm": 276.0, "learning_rate": 4.610581323122463e-05, "loss": 11.9379, "step": 12933 }, { "epoch": 0.539118836230253, "grad_norm": 127.0, "learning_rate": 4.609908375380897e-05, "loss": 10.2501, "step": 12934 }, { "epoch": 0.5391605185277812, "grad_norm": 636.0, "learning_rate": 4.609235434748785e-05, "loss": 19.3752, "step": 12935 }, { "epoch": 0.5392022008253095, "grad_norm": 158.0, "learning_rate": 4.608562501238392e-05, "loss": 9.6878, "step": 12936 }, { "epoch": 0.5392438831228378, "grad_norm": 628.0, "learning_rate": 4.607889574861981e-05, "loss": 20.1252, "step": 12937 }, { "epoch": 0.539285565420366, "grad_norm": 1320.0, "learning_rate": 4.60721665563182e-05, "loss": 33.2508, "step": 12938 }, { "epoch": 0.5393272477178942, "grad_norm": 148.0, "learning_rate": 4.606543743560167e-05, "loss": 9.9377, "step": 12939 }, { "epoch": 0.5393689300154224, "grad_norm": 524.0, "learning_rate": 4.605870838659293e-05, "loss": 18.0012, "step": 12940 }, { "epoch": 0.5394106123129507, "grad_norm": 172.0, "learning_rate": 4.605197940941454e-05, "loss": 10.563, "step": 12941 }, { "epoch": 0.5394522946104789, "grad_norm": 185.0, "learning_rate": 4.604525050418921e-05, "loss": 11.5628, "step": 12942 }, { "epoch": 0.5394939769080072, "grad_norm": 428.0, "learning_rate": 4.603852167103951e-05, "loss": 17.3752, "step": 12943 }, { "epoch": 0.5395356592055354, "grad_norm": 198.0, "learning_rate": 4.603179291008813e-05, "loss": 13.188, "step": 12944 }, { "epoch": 0.5395773415030637, "grad_norm": 408.0, "learning_rate": 4.602506422145766e-05, "loss": 15.2512, "step": 12945 }, { "epoch": 0.5396190238005919, "grad_norm": 233.0, "learning_rate": 4.6018335605270766e-05, "loss": 11.9382, "step": 12946 }, { "epoch": 0.5396607060981201, "grad_norm": 203.0, "learning_rate": 4.601160706165003e-05, "loss": 10.6254, "step": 12947 }, { "epoch": 0.5397023883956483, "grad_norm": 239.0, "learning_rate": 4.600487859071813e-05, "loss": 10.8129, "step": 12948 }, { "epoch": 0.5397440706931766, "grad_norm": 219.0, "learning_rate": 4.599815019259765e-05, "loss": 11.6878, "step": 12949 }, { "epoch": 0.5397857529907049, "grad_norm": 314.0, "learning_rate": 4.599142186741127e-05, "loss": 14.9387, "step": 12950 }, { "epoch": 0.5398274352882331, "grad_norm": 188.0, "learning_rate": 4.598469361528156e-05, "loss": 11.1252, "step": 12951 }, { "epoch": 0.5398691175857613, "grad_norm": 406.0, "learning_rate": 4.597796543633119e-05, "loss": 13.5628, "step": 12952 }, { "epoch": 0.5399107998832896, "grad_norm": 380.0, "learning_rate": 4.5971237330682726e-05, "loss": 15.5004, "step": 12953 }, { "epoch": 0.5399524821808178, "grad_norm": 95.5, "learning_rate": 4.5964509298458843e-05, "loss": 8.876, "step": 12954 }, { "epoch": 0.539994164478346, "grad_norm": 1272.0, "learning_rate": 4.595778133978212e-05, "loss": 27.13, "step": 12955 }, { "epoch": 0.5400358467758742, "grad_norm": 326.0, "learning_rate": 4.5951053454775214e-05, "loss": 13.8757, "step": 12956 }, { "epoch": 0.5400775290734026, "grad_norm": 340.0, "learning_rate": 4.5944325643560687e-05, "loss": 14.4382, "step": 12957 }, { "epoch": 0.5401192113709308, "grad_norm": 776.0, "learning_rate": 4.593759790626121e-05, "loss": 21.6258, "step": 12958 }, { "epoch": 0.540160893668459, "grad_norm": 1456.0, "learning_rate": 4.5930870242999355e-05, "loss": 31.508, "step": 12959 }, { "epoch": 0.5402025759659872, "grad_norm": 340.0, "learning_rate": 4.592414265389776e-05, "loss": 14.5016, "step": 12960 }, { "epoch": 0.5402442582635155, "grad_norm": 1176.0, "learning_rate": 4.5917415139079025e-05, "loss": 27.6281, "step": 12961 }, { "epoch": 0.5402859405610437, "grad_norm": 800.0, "learning_rate": 4.591068769866576e-05, "loss": 22.0004, "step": 12962 }, { "epoch": 0.5403276228585719, "grad_norm": 139.0, "learning_rate": 4.5903960332780566e-05, "loss": 9.6254, "step": 12963 }, { "epoch": 0.5403693051561002, "grad_norm": 79.5, "learning_rate": 4.589723304154609e-05, "loss": 6.2507, "step": 12964 }, { "epoch": 0.5404109874536285, "grad_norm": 149.0, "learning_rate": 4.5890505825084864e-05, "loss": 10.2502, "step": 12965 }, { "epoch": 0.5404526697511567, "grad_norm": 680.0, "learning_rate": 4.588377868351957e-05, "loss": 15.4423, "step": 12966 }, { "epoch": 0.5404943520486849, "grad_norm": 368.0, "learning_rate": 4.587705161697275e-05, "loss": 14.7504, "step": 12967 }, { "epoch": 0.5405360343462131, "grad_norm": 258.0, "learning_rate": 4.5870324625567055e-05, "loss": 11.8127, "step": 12968 }, { "epoch": 0.5405777166437414, "grad_norm": 140.0, "learning_rate": 4.586359770942503e-05, "loss": 10.6882, "step": 12969 }, { "epoch": 0.5406193989412696, "grad_norm": 106.5, "learning_rate": 4.5856870868669336e-05, "loss": 6.7191, "step": 12970 }, { "epoch": 0.5406610812387979, "grad_norm": 612.0, "learning_rate": 4.585014410342251e-05, "loss": 19.3754, "step": 12971 }, { "epoch": 0.5407027635363261, "grad_norm": 314.0, "learning_rate": 4.584341741380719e-05, "loss": 11.8131, "step": 12972 }, { "epoch": 0.5407444458338544, "grad_norm": 278.0, "learning_rate": 4.583669079994595e-05, "loss": 12.4382, "step": 12973 }, { "epoch": 0.5407861281313826, "grad_norm": 504.0, "learning_rate": 4.582996426196139e-05, "loss": 17.3751, "step": 12974 }, { "epoch": 0.5408278104289108, "grad_norm": 42.5, "learning_rate": 4.58232377999761e-05, "loss": 6.8128, "step": 12975 }, { "epoch": 0.540869492726439, "grad_norm": 1256.0, "learning_rate": 4.581651141411269e-05, "loss": 31.6252, "step": 12976 }, { "epoch": 0.5409111750239673, "grad_norm": 247.0, "learning_rate": 4.5809785104493707e-05, "loss": 12.1252, "step": 12977 }, { "epoch": 0.5409528573214956, "grad_norm": 199.0, "learning_rate": 4.5803058871241786e-05, "loss": 11.0002, "step": 12978 }, { "epoch": 0.5409945396190238, "grad_norm": 156.0, "learning_rate": 4.579633271447947e-05, "loss": 10.2504, "step": 12979 }, { "epoch": 0.5410362219165521, "grad_norm": 528.0, "learning_rate": 4.578960663432938e-05, "loss": 18.8751, "step": 12980 }, { "epoch": 0.5410779042140803, "grad_norm": 644.0, "learning_rate": 4.5782880630914067e-05, "loss": 20.2501, "step": 12981 }, { "epoch": 0.5411195865116085, "grad_norm": 356.0, "learning_rate": 4.577615470435615e-05, "loss": 14.2503, "step": 12982 }, { "epoch": 0.5411612688091367, "grad_norm": 64.0, "learning_rate": 4.5769428854778155e-05, "loss": 7.8757, "step": 12983 }, { "epoch": 0.541202951106665, "grad_norm": 696.0, "learning_rate": 4.576270308230272e-05, "loss": 21.3765, "step": 12984 }, { "epoch": 0.5412446334041933, "grad_norm": 190.0, "learning_rate": 4.575597738705239e-05, "loss": 11.0001, "step": 12985 }, { "epoch": 0.5412863157017215, "grad_norm": 628.0, "learning_rate": 4.574925176914975e-05, "loss": 17.8752, "step": 12986 }, { "epoch": 0.5413279979992497, "grad_norm": 232.0, "learning_rate": 4.5742526228717363e-05, "loss": 11.1876, "step": 12987 }, { "epoch": 0.541369680296778, "grad_norm": 63.0, "learning_rate": 4.573580076587784e-05, "loss": 8.1252, "step": 12988 }, { "epoch": 0.5414113625943062, "grad_norm": 744.0, "learning_rate": 4.57290753807537e-05, "loss": 22.6256, "step": 12989 }, { "epoch": 0.5414530448918344, "grad_norm": 1640.0, "learning_rate": 4.572235007346756e-05, "loss": 29.1303, "step": 12990 }, { "epoch": 0.5414947271893626, "grad_norm": 494.0, "learning_rate": 4.5715624844141955e-05, "loss": 16.5002, "step": 12991 }, { "epoch": 0.541536409486891, "grad_norm": 288.0, "learning_rate": 4.570889969289948e-05, "loss": 13.5002, "step": 12992 }, { "epoch": 0.5415780917844192, "grad_norm": 728.0, "learning_rate": 4.5702174619862675e-05, "loss": 22.5001, "step": 12993 }, { "epoch": 0.5416197740819474, "grad_norm": 1544.0, "learning_rate": 4.569544962515414e-05, "loss": 34.7507, "step": 12994 }, { "epoch": 0.5416614563794756, "grad_norm": 448.0, "learning_rate": 4.568872470889639e-05, "loss": 16.3752, "step": 12995 }, { "epoch": 0.5417031386770039, "grad_norm": 334.0, "learning_rate": 4.568199987121204e-05, "loss": 14.1253, "step": 12996 }, { "epoch": 0.5417448209745321, "grad_norm": 1008.0, "learning_rate": 4.567527511222361e-05, "loss": 21.7537, "step": 12997 }, { "epoch": 0.5417865032720603, "grad_norm": 394.0, "learning_rate": 4.566855043205368e-05, "loss": 13.4392, "step": 12998 }, { "epoch": 0.5418281855695886, "grad_norm": 171.0, "learning_rate": 4.56618258308248e-05, "loss": 9.1889, "step": 12999 }, { "epoch": 0.5418698678671169, "grad_norm": 464.0, "learning_rate": 4.5655101308659537e-05, "loss": 17.0005, "step": 13000 }, { "epoch": 0.5419115501646451, "grad_norm": 226.0, "learning_rate": 4.564837686568042e-05, "loss": 12.1879, "step": 13001 }, { "epoch": 0.5419532324621733, "grad_norm": 201.0, "learning_rate": 4.5641652502010044e-05, "loss": 12.4377, "step": 13002 }, { "epoch": 0.5419949147597015, "grad_norm": 146.0, "learning_rate": 4.563492821777092e-05, "loss": 10.6256, "step": 13003 }, { "epoch": 0.5420365970572298, "grad_norm": 536.0, "learning_rate": 4.562820401308564e-05, "loss": 16.3754, "step": 13004 }, { "epoch": 0.542078279354758, "grad_norm": 58.0, "learning_rate": 4.56214798880767e-05, "loss": 6.8752, "step": 13005 }, { "epoch": 0.5421199616522863, "grad_norm": 852.0, "learning_rate": 4.561475584286671e-05, "loss": 23.5002, "step": 13006 }, { "epoch": 0.5421616439498145, "grad_norm": 350.0, "learning_rate": 4.5608031877578154e-05, "loss": 12.5003, "step": 13007 }, { "epoch": 0.5422033262473428, "grad_norm": 204.0, "learning_rate": 4.560130799233363e-05, "loss": 10.8129, "step": 13008 }, { "epoch": 0.542245008544871, "grad_norm": 239.0, "learning_rate": 4.559458418725564e-05, "loss": 11.5627, "step": 13009 }, { "epoch": 0.5422866908423992, "grad_norm": 206.0, "learning_rate": 4.558786046246675e-05, "loss": 12.5626, "step": 13010 }, { "epoch": 0.5423283731399274, "grad_norm": 540.0, "learning_rate": 4.55811368180895e-05, "loss": 18.1252, "step": 13011 }, { "epoch": 0.5423700554374558, "grad_norm": 468.0, "learning_rate": 4.557441325424642e-05, "loss": 14.6888, "step": 13012 }, { "epoch": 0.542411737734984, "grad_norm": 740.0, "learning_rate": 4.5567689771060046e-05, "loss": 20.8752, "step": 13013 }, { "epoch": 0.5424534200325122, "grad_norm": 438.0, "learning_rate": 4.556096636865294e-05, "loss": 16.3754, "step": 13014 }, { "epoch": 0.5424951023300404, "grad_norm": 204.0, "learning_rate": 4.5554243047147584e-05, "loss": 10.6886, "step": 13015 }, { "epoch": 0.5425367846275687, "grad_norm": 114.0, "learning_rate": 4.554751980666658e-05, "loss": 9.0009, "step": 13016 }, { "epoch": 0.5425784669250969, "grad_norm": 68.0, "learning_rate": 4.554079664733239e-05, "loss": 7.3443, "step": 13017 }, { "epoch": 0.5426201492226251, "grad_norm": 118.0, "learning_rate": 4.55340735692676e-05, "loss": 8.9385, "step": 13018 }, { "epoch": 0.5426618315201533, "grad_norm": 708.0, "learning_rate": 4.5527350572594696e-05, "loss": 21.2504, "step": 13019 }, { "epoch": 0.5427035138176817, "grad_norm": 162.0, "learning_rate": 4.552062765743625e-05, "loss": 10.6255, "step": 13020 }, { "epoch": 0.5427451961152099, "grad_norm": 422.0, "learning_rate": 4.551390482391474e-05, "loss": 15.7504, "step": 13021 }, { "epoch": 0.5427868784127381, "grad_norm": 95.0, "learning_rate": 4.550718207215272e-05, "loss": 9.1878, "step": 13022 }, { "epoch": 0.5428285607102663, "grad_norm": 193.0, "learning_rate": 4.550045940227271e-05, "loss": 11.3127, "step": 13023 }, { "epoch": 0.5428702430077946, "grad_norm": 120.5, "learning_rate": 4.549373681439722e-05, "loss": 9.9377, "step": 13024 }, { "epoch": 0.5429119253053228, "grad_norm": 436.0, "learning_rate": 4.548701430864877e-05, "loss": 15.7508, "step": 13025 }, { "epoch": 0.542953607602851, "grad_norm": 292.0, "learning_rate": 4.5480291885149905e-05, "loss": 14.188, "step": 13026 }, { "epoch": 0.5429952899003793, "grad_norm": 108.0, "learning_rate": 4.54735695440231e-05, "loss": 9.1879, "step": 13027 }, { "epoch": 0.5430369721979076, "grad_norm": 314.0, "learning_rate": 4.546684728539091e-05, "loss": 11.2508, "step": 13028 }, { "epoch": 0.5430786544954358, "grad_norm": 768.0, "learning_rate": 4.5460125109375817e-05, "loss": 20.5048, "step": 13029 }, { "epoch": 0.543120336792964, "grad_norm": 380.0, "learning_rate": 4.5453403016100364e-05, "loss": 13.0003, "step": 13030 }, { "epoch": 0.5431620190904922, "grad_norm": 456.0, "learning_rate": 4.544668100568703e-05, "loss": 11.1892, "step": 13031 }, { "epoch": 0.5432037013880205, "grad_norm": 124.5, "learning_rate": 4.543995907825835e-05, "loss": 8.4377, "step": 13032 }, { "epoch": 0.5432453836855488, "grad_norm": 664.0, "learning_rate": 4.5433237233936804e-05, "loss": 21.6251, "step": 13033 }, { "epoch": 0.543287065983077, "grad_norm": 356.0, "learning_rate": 4.542651547284493e-05, "loss": 13.6284, "step": 13034 }, { "epoch": 0.5433287482806052, "grad_norm": 149.0, "learning_rate": 4.541979379510521e-05, "loss": 9.188, "step": 13035 }, { "epoch": 0.5433704305781335, "grad_norm": 107.0, "learning_rate": 4.5413072200840156e-05, "loss": 9.0627, "step": 13036 }, { "epoch": 0.5434121128756617, "grad_norm": 324.0, "learning_rate": 4.5406350690172253e-05, "loss": 12.3751, "step": 13037 }, { "epoch": 0.5434537951731899, "grad_norm": 848.0, "learning_rate": 4.5399629263224044e-05, "loss": 23.0003, "step": 13038 }, { "epoch": 0.5434954774707181, "grad_norm": 268.0, "learning_rate": 4.539290792011798e-05, "loss": 12.8754, "step": 13039 }, { "epoch": 0.5435371597682465, "grad_norm": 346.0, "learning_rate": 4.53861866609766e-05, "loss": 15.1878, "step": 13040 }, { "epoch": 0.5435788420657747, "grad_norm": 474.0, "learning_rate": 4.537946548592236e-05, "loss": 16.3753, "step": 13041 }, { "epoch": 0.5436205243633029, "grad_norm": 988.0, "learning_rate": 4.537274439507778e-05, "loss": 26.1258, "step": 13042 }, { "epoch": 0.5436622066608311, "grad_norm": 198.0, "learning_rate": 4.5366023388565335e-05, "loss": 11.4379, "step": 13043 }, { "epoch": 0.5437038889583594, "grad_norm": 308.0, "learning_rate": 4.535930246650754e-05, "loss": 11.7502, "step": 13044 }, { "epoch": 0.5437455712558876, "grad_norm": 165.0, "learning_rate": 4.5352581629026844e-05, "loss": 7.126, "step": 13045 }, { "epoch": 0.5437872535534158, "grad_norm": 143.0, "learning_rate": 4.534586087624579e-05, "loss": 10.1877, "step": 13046 }, { "epoch": 0.543828935850944, "grad_norm": 324.0, "learning_rate": 4.5339140208286815e-05, "loss": 13.3753, "step": 13047 }, { "epoch": 0.5438706181484724, "grad_norm": 294.0, "learning_rate": 4.533241962527243e-05, "loss": 12.3753, "step": 13048 }, { "epoch": 0.5439123004460006, "grad_norm": 442.0, "learning_rate": 4.532569912732511e-05, "loss": 15.6251, "step": 13049 }, { "epoch": 0.5439539827435288, "grad_norm": 221.0, "learning_rate": 4.531897871456734e-05, "loss": 10.4377, "step": 13050 }, { "epoch": 0.543995665041057, "grad_norm": 432.0, "learning_rate": 4.5312258387121584e-05, "loss": 15.9377, "step": 13051 }, { "epoch": 0.5440373473385853, "grad_norm": 416.0, "learning_rate": 4.530553814511036e-05, "loss": 15.1878, "step": 13052 }, { "epoch": 0.5440790296361135, "grad_norm": 115.5, "learning_rate": 4.52988179886561e-05, "loss": 6.9378, "step": 13053 }, { "epoch": 0.5441207119336418, "grad_norm": 201.0, "learning_rate": 4.529209791788132e-05, "loss": 10.6878, "step": 13054 }, { "epoch": 0.5441623942311701, "grad_norm": 544.0, "learning_rate": 4.528537793290845e-05, "loss": 17.502, "step": 13055 }, { "epoch": 0.5442040765286983, "grad_norm": 58.0, "learning_rate": 4.527865803386001e-05, "loss": 7.7818, "step": 13056 }, { "epoch": 0.5442457588262265, "grad_norm": 75.5, "learning_rate": 4.527193822085842e-05, "loss": 8.9382, "step": 13057 }, { "epoch": 0.5442874411237547, "grad_norm": 1528.0, "learning_rate": 4.5265218494026204e-05, "loss": 34.0005, "step": 13058 }, { "epoch": 0.544329123421283, "grad_norm": 342.0, "learning_rate": 4.525849885348578e-05, "loss": 13.1253, "step": 13059 }, { "epoch": 0.5443708057188112, "grad_norm": 432.0, "learning_rate": 4.525177929935964e-05, "loss": 15.3127, "step": 13060 }, { "epoch": 0.5444124880163395, "grad_norm": 286.0, "learning_rate": 4.5245059831770246e-05, "loss": 12.8751, "step": 13061 }, { "epoch": 0.5444541703138677, "grad_norm": 221.0, "learning_rate": 4.523834045084006e-05, "loss": 11.9379, "step": 13062 }, { "epoch": 0.544495852611396, "grad_norm": 358.0, "learning_rate": 4.5231621156691534e-05, "loss": 11.8139, "step": 13063 }, { "epoch": 0.5445375349089242, "grad_norm": 468.0, "learning_rate": 4.522490194944715e-05, "loss": 17.3752, "step": 13064 }, { "epoch": 0.5445792172064524, "grad_norm": 588.0, "learning_rate": 4.5218182829229335e-05, "loss": 18.8752, "step": 13065 }, { "epoch": 0.5446208995039806, "grad_norm": 213.0, "learning_rate": 4.521146379616059e-05, "loss": 13.188, "step": 13066 }, { "epoch": 0.544662581801509, "grad_norm": 268.0, "learning_rate": 4.520474485036331e-05, "loss": 12.8754, "step": 13067 }, { "epoch": 0.5447042640990372, "grad_norm": 127.0, "learning_rate": 4.5198025991960005e-05, "loss": 8.0627, "step": 13068 }, { "epoch": 0.5447459463965654, "grad_norm": 290.0, "learning_rate": 4.519130722107308e-05, "loss": 13.6879, "step": 13069 }, { "epoch": 0.5447876286940936, "grad_norm": 232.0, "learning_rate": 4.5184588537825035e-05, "loss": 12.3752, "step": 13070 }, { "epoch": 0.5448293109916219, "grad_norm": 197.0, "learning_rate": 4.5177869942338264e-05, "loss": 11.438, "step": 13071 }, { "epoch": 0.5448709932891501, "grad_norm": 294.0, "learning_rate": 4.517115143473527e-05, "loss": 11.1251, "step": 13072 }, { "epoch": 0.5449126755866783, "grad_norm": 372.0, "learning_rate": 4.516443301513844e-05, "loss": 16.0011, "step": 13073 }, { "epoch": 0.5449543578842065, "grad_norm": 246.0, "learning_rate": 4.515771468367026e-05, "loss": 13.0004, "step": 13074 }, { "epoch": 0.5449960401817349, "grad_norm": 420.0, "learning_rate": 4.515099644045315e-05, "loss": 17.6265, "step": 13075 }, { "epoch": 0.5450377224792631, "grad_norm": 194.0, "learning_rate": 4.514427828560959e-05, "loss": 10.3753, "step": 13076 }, { "epoch": 0.5450794047767913, "grad_norm": 1088.0, "learning_rate": 4.5137560219261956e-05, "loss": 25.2544, "step": 13077 }, { "epoch": 0.5451210870743195, "grad_norm": 400.0, "learning_rate": 4.5130842241532746e-05, "loss": 16.5002, "step": 13078 }, { "epoch": 0.5451627693718478, "grad_norm": 284.0, "learning_rate": 4.5124124352544345e-05, "loss": 12.1251, "step": 13079 }, { "epoch": 0.545204451669376, "grad_norm": 195.0, "learning_rate": 4.5117406552419234e-05, "loss": 10.7516, "step": 13080 }, { "epoch": 0.5452461339669042, "grad_norm": 292.0, "learning_rate": 4.51106888412798e-05, "loss": 12.1252, "step": 13081 }, { "epoch": 0.5452878162644325, "grad_norm": 1040.0, "learning_rate": 4.5103971219248516e-05, "loss": 26.7506, "step": 13082 }, { "epoch": 0.5453294985619608, "grad_norm": 330.0, "learning_rate": 4.509725368644776e-05, "loss": 14.3753, "step": 13083 }, { "epoch": 0.545371180859489, "grad_norm": 506.0, "learning_rate": 4.5090536243000034e-05, "loss": 17.0001, "step": 13084 }, { "epoch": 0.5454128631570172, "grad_norm": 203.0, "learning_rate": 4.508381888902768e-05, "loss": 9.5629, "step": 13085 }, { "epoch": 0.5454545454545454, "grad_norm": 134.0, "learning_rate": 4.507710162465319e-05, "loss": 10.563, "step": 13086 }, { "epoch": 0.5454962277520737, "grad_norm": 544.0, "learning_rate": 4.507038444999895e-05, "loss": 18.6256, "step": 13087 }, { "epoch": 0.545537910049602, "grad_norm": 223.0, "learning_rate": 4.506366736518739e-05, "loss": 11.438, "step": 13088 }, { "epoch": 0.5455795923471302, "grad_norm": 398.0, "learning_rate": 4.505695037034092e-05, "loss": 15.7508, "step": 13089 }, { "epoch": 0.5456212746446584, "grad_norm": 89.5, "learning_rate": 4.5050233465581995e-05, "loss": 9.0003, "step": 13090 }, { "epoch": 0.5456629569421867, "grad_norm": 426.0, "learning_rate": 4.504351665103298e-05, "loss": 15.0004, "step": 13091 }, { "epoch": 0.5457046392397149, "grad_norm": 792.0, "learning_rate": 4.5036799926816335e-05, "loss": 21.2504, "step": 13092 }, { "epoch": 0.5457463215372431, "grad_norm": 422.0, "learning_rate": 4.5030083293054434e-05, "loss": 15.0004, "step": 13093 }, { "epoch": 0.5457880038347713, "grad_norm": 212.0, "learning_rate": 4.5023366749869724e-05, "loss": 11.6256, "step": 13094 }, { "epoch": 0.5458296861322997, "grad_norm": 624.0, "learning_rate": 4.5016650297384576e-05, "loss": 19.3752, "step": 13095 }, { "epoch": 0.5458713684298279, "grad_norm": 576.0, "learning_rate": 4.500993393572144e-05, "loss": 20.7501, "step": 13096 }, { "epoch": 0.5459130507273561, "grad_norm": 320.0, "learning_rate": 4.5003217665002676e-05, "loss": 12.5005, "step": 13097 }, { "epoch": 0.5459547330248843, "grad_norm": 224.0, "learning_rate": 4.499650148535073e-05, "loss": 11.6252, "step": 13098 }, { "epoch": 0.5459964153224126, "grad_norm": 60.75, "learning_rate": 4.498978539688799e-05, "loss": 8.3133, "step": 13099 }, { "epoch": 0.5460380976199408, "grad_norm": 326.0, "learning_rate": 4.498306939973685e-05, "loss": 12.5004, "step": 13100 }, { "epoch": 0.546079779917469, "grad_norm": 504.0, "learning_rate": 4.4976353494019705e-05, "loss": 18.3752, "step": 13101 }, { "epoch": 0.5461214622149972, "grad_norm": 368.0, "learning_rate": 4.4969637679858986e-05, "loss": 13.8132, "step": 13102 }, { "epoch": 0.5461631445125256, "grad_norm": 133.0, "learning_rate": 4.4962921957377054e-05, "loss": 9.4384, "step": 13103 }, { "epoch": 0.5462048268100538, "grad_norm": 490.0, "learning_rate": 4.495620632669632e-05, "loss": 16.1252, "step": 13104 }, { "epoch": 0.546246509107582, "grad_norm": 138.0, "learning_rate": 4.494949078793917e-05, "loss": 10.1882, "step": 13105 }, { "epoch": 0.5462881914051102, "grad_norm": 370.0, "learning_rate": 4.494277534122801e-05, "loss": 12.3751, "step": 13106 }, { "epoch": 0.5463298737026385, "grad_norm": 1464.0, "learning_rate": 4.49360599866852e-05, "loss": 33.7505, "step": 13107 }, { "epoch": 0.5463715560001667, "grad_norm": 792.0, "learning_rate": 4.492934472443317e-05, "loss": 22.2506, "step": 13108 }, { "epoch": 0.546413238297695, "grad_norm": 276.0, "learning_rate": 4.492262955459426e-05, "loss": 12.7502, "step": 13109 }, { "epoch": 0.5464549205952232, "grad_norm": 820.0, "learning_rate": 4.491591447729089e-05, "loss": 22.5007, "step": 13110 }, { "epoch": 0.5464966028927515, "grad_norm": 856.0, "learning_rate": 4.4909199492645425e-05, "loss": 20.1293, "step": 13111 }, { "epoch": 0.5465382851902797, "grad_norm": 408.0, "learning_rate": 4.490248460078025e-05, "loss": 16.3754, "step": 13112 }, { "epoch": 0.5465799674878079, "grad_norm": 310.0, "learning_rate": 4.489576980181774e-05, "loss": 13.3127, "step": 13113 }, { "epoch": 0.5466216497853361, "grad_norm": 540.0, "learning_rate": 4.4889055095880295e-05, "loss": 17.7505, "step": 13114 }, { "epoch": 0.5466633320828644, "grad_norm": 64.5, "learning_rate": 4.488234048309026e-05, "loss": 8.6888, "step": 13115 }, { "epoch": 0.5467050143803927, "grad_norm": 656.0, "learning_rate": 4.487562596357004e-05, "loss": 19.7542, "step": 13116 }, { "epoch": 0.5467466966779209, "grad_norm": 572.0, "learning_rate": 4.486891153744197e-05, "loss": 20.1253, "step": 13117 }, { "epoch": 0.5467883789754491, "grad_norm": 406.0, "learning_rate": 4.486219720482847e-05, "loss": 15.8126, "step": 13118 }, { "epoch": 0.5468300612729774, "grad_norm": 304.0, "learning_rate": 4.485548296585185e-05, "loss": 12.2523, "step": 13119 }, { "epoch": 0.5468717435705056, "grad_norm": 268.0, "learning_rate": 4.484876882063454e-05, "loss": 12.8128, "step": 13120 }, { "epoch": 0.5469134258680338, "grad_norm": 282.0, "learning_rate": 4.4842054769298846e-05, "loss": 13.5628, "step": 13121 }, { "epoch": 0.546955108165562, "grad_norm": 380.0, "learning_rate": 4.483534081196719e-05, "loss": 12.6876, "step": 13122 }, { "epoch": 0.5469967904630904, "grad_norm": 253.0, "learning_rate": 4.4828626948761886e-05, "loss": 12.8129, "step": 13123 }, { "epoch": 0.5470384727606186, "grad_norm": 192.0, "learning_rate": 4.482191317980532e-05, "loss": 10.8756, "step": 13124 }, { "epoch": 0.5470801550581468, "grad_norm": 484.0, "learning_rate": 4.481519950521985e-05, "loss": 18.7511, "step": 13125 }, { "epoch": 0.5471218373556751, "grad_norm": 328.0, "learning_rate": 4.480848592512783e-05, "loss": 14.5627, "step": 13126 }, { "epoch": 0.5471635196532033, "grad_norm": 67.0, "learning_rate": 4.48017724396516e-05, "loss": 6.8127, "step": 13127 }, { "epoch": 0.5472052019507315, "grad_norm": 235.0, "learning_rate": 4.479505904891356e-05, "loss": 11.8755, "step": 13128 }, { "epoch": 0.5472468842482597, "grad_norm": 274.0, "learning_rate": 4.4788345753035996e-05, "loss": 13.2502, "step": 13129 }, { "epoch": 0.5472885665457881, "grad_norm": 512.0, "learning_rate": 4.4781632552141326e-05, "loss": 16.3756, "step": 13130 }, { "epoch": 0.5473302488433163, "grad_norm": 148.0, "learning_rate": 4.477491944635184e-05, "loss": 9.7501, "step": 13131 }, { "epoch": 0.5473719311408445, "grad_norm": 316.0, "learning_rate": 4.4768206435789926e-05, "loss": 14.1878, "step": 13132 }, { "epoch": 0.5474136134383727, "grad_norm": 142.0, "learning_rate": 4.4761493520577893e-05, "loss": 10.6879, "step": 13133 }, { "epoch": 0.547455295735901, "grad_norm": 576.0, "learning_rate": 4.4754780700838136e-05, "loss": 20.5004, "step": 13134 }, { "epoch": 0.5474969780334292, "grad_norm": 318.0, "learning_rate": 4.4748067976692945e-05, "loss": 14.5003, "step": 13135 }, { "epoch": 0.5475386603309574, "grad_norm": 620.0, "learning_rate": 4.4741355348264686e-05, "loss": 19.0001, "step": 13136 }, { "epoch": 0.5475803426284857, "grad_norm": 135.0, "learning_rate": 4.47346428156757e-05, "loss": 10.7504, "step": 13137 }, { "epoch": 0.547622024926014, "grad_norm": 140.0, "learning_rate": 4.4727930379048306e-05, "loss": 10.3752, "step": 13138 }, { "epoch": 0.5476637072235422, "grad_norm": 255.0, "learning_rate": 4.472121803850484e-05, "loss": 12.3753, "step": 13139 }, { "epoch": 0.5477053895210704, "grad_norm": 612.0, "learning_rate": 4.471450579416767e-05, "loss": 19.6252, "step": 13140 }, { "epoch": 0.5477470718185986, "grad_norm": 79.0, "learning_rate": 4.470779364615908e-05, "loss": 8.8129, "step": 13141 }, { "epoch": 0.5477887541161269, "grad_norm": 133.0, "learning_rate": 4.470108159460144e-05, "loss": 9.8752, "step": 13142 }, { "epoch": 0.5478304364136551, "grad_norm": 472.0, "learning_rate": 4.469436963961704e-05, "loss": 16.6253, "step": 13143 }, { "epoch": 0.5478721187111834, "grad_norm": 126.5, "learning_rate": 4.468765778132824e-05, "loss": 9.1254, "step": 13144 }, { "epoch": 0.5479138010087116, "grad_norm": 130.0, "learning_rate": 4.4680946019857326e-05, "loss": 8.8754, "step": 13145 }, { "epoch": 0.5479554833062399, "grad_norm": 151.0, "learning_rate": 4.467423435532667e-05, "loss": 10.688, "step": 13146 }, { "epoch": 0.5479971656037681, "grad_norm": 220.0, "learning_rate": 4.466752278785855e-05, "loss": 11.0629, "step": 13147 }, { "epoch": 0.5480388479012963, "grad_norm": 824.0, "learning_rate": 4.46608113175753e-05, "loss": 23.8758, "step": 13148 }, { "epoch": 0.5480805301988245, "grad_norm": 235.0, "learning_rate": 4.4654099944599244e-05, "loss": 13.9378, "step": 13149 }, { "epoch": 0.5481222124963528, "grad_norm": 213.0, "learning_rate": 4.4647388669052686e-05, "loss": 10.5008, "step": 13150 }, { "epoch": 0.5481638947938811, "grad_norm": 474.0, "learning_rate": 4.464067749105794e-05, "loss": 16.3755, "step": 13151 }, { "epoch": 0.5482055770914093, "grad_norm": 142.0, "learning_rate": 4.4633966410737335e-05, "loss": 10.2504, "step": 13152 }, { "epoch": 0.5482472593889375, "grad_norm": 290.0, "learning_rate": 4.462725542821315e-05, "loss": 13.2501, "step": 13153 }, { "epoch": 0.5482889416864658, "grad_norm": 60.75, "learning_rate": 4.462054454360774e-05, "loss": 8.8754, "step": 13154 }, { "epoch": 0.548330623983994, "grad_norm": 276.0, "learning_rate": 4.461383375704336e-05, "loss": 12.5627, "step": 13155 }, { "epoch": 0.5483723062815222, "grad_norm": 306.0, "learning_rate": 4.4607123068642356e-05, "loss": 14.0004, "step": 13156 }, { "epoch": 0.5484139885790504, "grad_norm": 852.0, "learning_rate": 4.4600412478526995e-05, "loss": 23.5032, "step": 13157 }, { "epoch": 0.5484556708765788, "grad_norm": 366.0, "learning_rate": 4.459370198681962e-05, "loss": 13.9378, "step": 13158 }, { "epoch": 0.548497353174107, "grad_norm": 436.0, "learning_rate": 4.458699159364247e-05, "loss": 16.2545, "step": 13159 }, { "epoch": 0.5485390354716352, "grad_norm": 160.0, "learning_rate": 4.458028129911791e-05, "loss": 11.3771, "step": 13160 }, { "epoch": 0.5485807177691634, "grad_norm": 488.0, "learning_rate": 4.4573571103368184e-05, "loss": 16.7502, "step": 13161 }, { "epoch": 0.5486224000666917, "grad_norm": 500.0, "learning_rate": 4.4566861006515616e-05, "loss": 17.0006, "step": 13162 }, { "epoch": 0.5486640823642199, "grad_norm": 1408.0, "learning_rate": 4.4560151008682474e-05, "loss": 28.5007, "step": 13163 }, { "epoch": 0.5487057646617481, "grad_norm": 153.0, "learning_rate": 4.455344110999109e-05, "loss": 9.1878, "step": 13164 }, { "epoch": 0.5487474469592764, "grad_norm": 392.0, "learning_rate": 4.4546731310563705e-05, "loss": 14.3752, "step": 13165 }, { "epoch": 0.5487891292568047, "grad_norm": 272.0, "learning_rate": 4.454002161052264e-05, "loss": 13.2504, "step": 13166 }, { "epoch": 0.5488308115543329, "grad_norm": 170.0, "learning_rate": 4.453331200999015e-05, "loss": 11.3756, "step": 13167 }, { "epoch": 0.5488724938518611, "grad_norm": 856.0, "learning_rate": 4.4526602509088553e-05, "loss": 22.5026, "step": 13168 }, { "epoch": 0.5489141761493893, "grad_norm": 318.0, "learning_rate": 4.451989310794009e-05, "loss": 14.0003, "step": 13169 }, { "epoch": 0.5489558584469176, "grad_norm": 195.0, "learning_rate": 4.4513183806667083e-05, "loss": 11.0007, "step": 13170 }, { "epoch": 0.5489975407444458, "grad_norm": 182.0, "learning_rate": 4.450647460539177e-05, "loss": 12.1255, "step": 13171 }, { "epoch": 0.5490392230419741, "grad_norm": 108.5, "learning_rate": 4.4499765504236465e-05, "loss": 8.6883, "step": 13172 }, { "epoch": 0.5490809053395023, "grad_norm": 262.0, "learning_rate": 4.44930565033234e-05, "loss": 11.2511, "step": 13173 }, { "epoch": 0.5491225876370306, "grad_norm": 446.0, "learning_rate": 4.448634760277487e-05, "loss": 16.3758, "step": 13174 }, { "epoch": 0.5491642699345588, "grad_norm": 462.0, "learning_rate": 4.447963880271316e-05, "loss": 16.1251, "step": 13175 }, { "epoch": 0.549205952232087, "grad_norm": 1224.0, "learning_rate": 4.447293010326052e-05, "loss": 27.7547, "step": 13176 }, { "epoch": 0.5492476345296152, "grad_norm": 376.0, "learning_rate": 4.44662215045392e-05, "loss": 15.3757, "step": 13177 }, { "epoch": 0.5492893168271435, "grad_norm": 288.0, "learning_rate": 4.445951300667151e-05, "loss": 12.2505, "step": 13178 }, { "epoch": 0.5493309991246718, "grad_norm": 124.0, "learning_rate": 4.445280460977967e-05, "loss": 8.3756, "step": 13179 }, { "epoch": 0.5493726814222, "grad_norm": 310.0, "learning_rate": 4.4446096313985976e-05, "loss": 11.8133, "step": 13180 }, { "epoch": 0.5494143637197282, "grad_norm": 298.0, "learning_rate": 4.443938811941265e-05, "loss": 13.1255, "step": 13181 }, { "epoch": 0.5494560460172565, "grad_norm": 214.0, "learning_rate": 4.443268002618199e-05, "loss": 11.0629, "step": 13182 }, { "epoch": 0.5494977283147847, "grad_norm": 1016.0, "learning_rate": 4.442597203441621e-05, "loss": 23.6299, "step": 13183 }, { "epoch": 0.5495394106123129, "grad_norm": 576.0, "learning_rate": 4.441926414423761e-05, "loss": 18.2511, "step": 13184 }, { "epoch": 0.5495810929098411, "grad_norm": 186.0, "learning_rate": 4.441255635576838e-05, "loss": 8.438, "step": 13185 }, { "epoch": 0.5496227752073695, "grad_norm": 382.0, "learning_rate": 4.4405848669130826e-05, "loss": 14.8757, "step": 13186 }, { "epoch": 0.5496644575048977, "grad_norm": 203.0, "learning_rate": 4.4399141084447184e-05, "loss": 6.0324, "step": 13187 }, { "epoch": 0.5497061398024259, "grad_norm": 243.0, "learning_rate": 4.4392433601839686e-05, "loss": 12.2504, "step": 13188 }, { "epoch": 0.5497478220999541, "grad_norm": 448.0, "learning_rate": 4.438572622143057e-05, "loss": 15.3127, "step": 13189 }, { "epoch": 0.5497895043974824, "grad_norm": 264.0, "learning_rate": 4.437901894334212e-05, "loss": 11.8126, "step": 13190 }, { "epoch": 0.5498311866950106, "grad_norm": 320.0, "learning_rate": 4.437231176769652e-05, "loss": 11.8756, "step": 13191 }, { "epoch": 0.5498728689925388, "grad_norm": 452.0, "learning_rate": 4.436560469461606e-05, "loss": 15.8127, "step": 13192 }, { "epoch": 0.549914551290067, "grad_norm": 294.0, "learning_rate": 4.435889772422294e-05, "loss": 11.5022, "step": 13193 }, { "epoch": 0.5499562335875954, "grad_norm": 494.0, "learning_rate": 4.4352190856639424e-05, "loss": 15.8752, "step": 13194 }, { "epoch": 0.5499979158851236, "grad_norm": 528.0, "learning_rate": 4.434548409198772e-05, "loss": 15.1252, "step": 13195 }, { "epoch": 0.5500395981826518, "grad_norm": 155.0, "learning_rate": 4.433877743039008e-05, "loss": 8.8129, "step": 13196 }, { "epoch": 0.55008128048018, "grad_norm": 272.0, "learning_rate": 4.433207087196871e-05, "loss": 12.6882, "step": 13197 }, { "epoch": 0.5501229627777083, "grad_norm": 164.0, "learning_rate": 4.4325364416845854e-05, "loss": 10.5628, "step": 13198 }, { "epoch": 0.5501646450752365, "grad_norm": 358.0, "learning_rate": 4.4318658065143744e-05, "loss": 14.0003, "step": 13199 }, { "epoch": 0.5502063273727648, "grad_norm": 418.0, "learning_rate": 4.431195181698459e-05, "loss": 15.4376, "step": 13200 }, { "epoch": 0.5502480096702931, "grad_norm": 432.0, "learning_rate": 4.430524567249061e-05, "loss": 16.2503, "step": 13201 }, { "epoch": 0.5502896919678213, "grad_norm": 165.0, "learning_rate": 4.429853963178405e-05, "loss": 9.6257, "step": 13202 }, { "epoch": 0.5503313742653495, "grad_norm": 45.5, "learning_rate": 4.42918336949871e-05, "loss": 7.2503, "step": 13203 }, { "epoch": 0.5503730565628777, "grad_norm": 358.0, "learning_rate": 4.4285127862222004e-05, "loss": 13.4378, "step": 13204 }, { "epoch": 0.550414738860406, "grad_norm": 520.0, "learning_rate": 4.4278422133610936e-05, "loss": 18.8754, "step": 13205 }, { "epoch": 0.5504564211579343, "grad_norm": 458.0, "learning_rate": 4.4271716509276156e-05, "loss": 15.5631, "step": 13206 }, { "epoch": 0.5504981034554625, "grad_norm": 484.0, "learning_rate": 4.426501098933983e-05, "loss": 17.1254, "step": 13207 }, { "epoch": 0.5505397857529907, "grad_norm": 482.0, "learning_rate": 4.4258305573924205e-05, "loss": 15.4386, "step": 13208 }, { "epoch": 0.550581468050519, "grad_norm": 235.0, "learning_rate": 4.425160026315146e-05, "loss": 12.5627, "step": 13209 }, { "epoch": 0.5506231503480472, "grad_norm": 119.0, "learning_rate": 4.4244895057143824e-05, "loss": 9.1256, "step": 13210 }, { "epoch": 0.5506648326455754, "grad_norm": 358.0, "learning_rate": 4.423818995602347e-05, "loss": 14.9376, "step": 13211 }, { "epoch": 0.5507065149431036, "grad_norm": 180.0, "learning_rate": 4.423148495991262e-05, "loss": 8.9381, "step": 13212 }, { "epoch": 0.550748197240632, "grad_norm": 496.0, "learning_rate": 4.422478006893347e-05, "loss": 17.5002, "step": 13213 }, { "epoch": 0.5507898795381602, "grad_norm": 213.0, "learning_rate": 4.421807528320822e-05, "loss": 12.6254, "step": 13214 }, { "epoch": 0.5508315618356884, "grad_norm": 348.0, "learning_rate": 4.4211370602859044e-05, "loss": 13.942, "step": 13215 }, { "epoch": 0.5508732441332166, "grad_norm": 180.0, "learning_rate": 4.420466602800818e-05, "loss": 9.8753, "step": 13216 }, { "epoch": 0.5509149264307449, "grad_norm": 262.0, "learning_rate": 4.419796155877777e-05, "loss": 12.1877, "step": 13217 }, { "epoch": 0.5509566087282731, "grad_norm": 344.0, "learning_rate": 4.419125719529004e-05, "loss": 15.3756, "step": 13218 }, { "epoch": 0.5509982910258013, "grad_norm": 168.0, "learning_rate": 4.418455293766715e-05, "loss": 11.7509, "step": 13219 }, { "epoch": 0.5510399733233295, "grad_norm": 106.0, "learning_rate": 4.4177848786031315e-05, "loss": 7.938, "step": 13220 }, { "epoch": 0.5510816556208579, "grad_norm": 408.0, "learning_rate": 4.417114474050469e-05, "loss": 14.938, "step": 13221 }, { "epoch": 0.5511233379183861, "grad_norm": 580.0, "learning_rate": 4.4164440801209484e-05, "loss": 19.6253, "step": 13222 }, { "epoch": 0.5511650202159143, "grad_norm": 192.0, "learning_rate": 4.4157736968267835e-05, "loss": 9.7502, "step": 13223 }, { "epoch": 0.5512067025134425, "grad_norm": 117.0, "learning_rate": 4.415103324180197e-05, "loss": 10.7509, "step": 13224 }, { "epoch": 0.5512483848109708, "grad_norm": 193.0, "learning_rate": 4.414432962193404e-05, "loss": 11.6258, "step": 13225 }, { "epoch": 0.551290067108499, "grad_norm": 274.0, "learning_rate": 4.4137626108786224e-05, "loss": 12.6253, "step": 13226 }, { "epoch": 0.5513317494060272, "grad_norm": 940.0, "learning_rate": 4.4130922702480684e-05, "loss": 20.1252, "step": 13227 }, { "epoch": 0.5513734317035555, "grad_norm": 772.0, "learning_rate": 4.412421940313961e-05, "loss": 22.1289, "step": 13228 }, { "epoch": 0.5514151140010838, "grad_norm": 196.0, "learning_rate": 4.4117516210885145e-05, "loss": 12.2516, "step": 13229 }, { "epoch": 0.551456796298612, "grad_norm": 716.0, "learning_rate": 4.411081312583949e-05, "loss": 20.2502, "step": 13230 }, { "epoch": 0.5514984785961402, "grad_norm": 234.0, "learning_rate": 4.410411014812477e-05, "loss": 12.8756, "step": 13231 }, { "epoch": 0.5515401608936684, "grad_norm": 239.0, "learning_rate": 4.4097407277863176e-05, "loss": 12.5002, "step": 13232 }, { "epoch": 0.5515818431911967, "grad_norm": 332.0, "learning_rate": 4.409070451517685e-05, "loss": 14.2502, "step": 13233 }, { "epoch": 0.551623525488725, "grad_norm": 474.0, "learning_rate": 4.4084001860187975e-05, "loss": 14.6897, "step": 13234 }, { "epoch": 0.5516652077862532, "grad_norm": 302.0, "learning_rate": 4.4077299313018664e-05, "loss": 12.628, "step": 13235 }, { "epoch": 0.5517068900837814, "grad_norm": 296.0, "learning_rate": 4.407059687379112e-05, "loss": 13.9377, "step": 13236 }, { "epoch": 0.5517485723813097, "grad_norm": 344.0, "learning_rate": 4.4063894542627473e-05, "loss": 14.6883, "step": 13237 }, { "epoch": 0.5517902546788379, "grad_norm": 276.0, "learning_rate": 4.4057192319649865e-05, "loss": 13.3754, "step": 13238 }, { "epoch": 0.5518319369763661, "grad_norm": 712.0, "learning_rate": 4.4050490204980456e-05, "loss": 21.8759, "step": 13239 }, { "epoch": 0.5518736192738943, "grad_norm": 229.0, "learning_rate": 4.40437881987414e-05, "loss": 11.1881, "step": 13240 }, { "epoch": 0.5519153015714227, "grad_norm": 260.0, "learning_rate": 4.403708630105482e-05, "loss": 11.8753, "step": 13241 }, { "epoch": 0.5519569838689509, "grad_norm": 368.0, "learning_rate": 4.4030384512042896e-05, "loss": 13.5009, "step": 13242 }, { "epoch": 0.5519986661664791, "grad_norm": 652.0, "learning_rate": 4.402368283182772e-05, "loss": 20.6252, "step": 13243 }, { "epoch": 0.5520403484640073, "grad_norm": 227.0, "learning_rate": 4.401698126053147e-05, "loss": 12.5634, "step": 13244 }, { "epoch": 0.5520820307615356, "grad_norm": 290.0, "learning_rate": 4.401027979827625e-05, "loss": 12.9386, "step": 13245 }, { "epoch": 0.5521237130590638, "grad_norm": 528.0, "learning_rate": 4.4003578445184234e-05, "loss": 15.4394, "step": 13246 }, { "epoch": 0.552165395356592, "grad_norm": 572.0, "learning_rate": 4.399687720137751e-05, "loss": 18.2502, "step": 13247 }, { "epoch": 0.5522070776541202, "grad_norm": 1168.0, "learning_rate": 4.399017606697826e-05, "loss": 26.5029, "step": 13248 }, { "epoch": 0.5522487599516486, "grad_norm": 648.0, "learning_rate": 4.398347504210856e-05, "loss": 20.5014, "step": 13249 }, { "epoch": 0.5522904422491768, "grad_norm": 264.0, "learning_rate": 4.397677412689057e-05, "loss": 12.938, "step": 13250 }, { "epoch": 0.552332124546705, "grad_norm": 75.0, "learning_rate": 4.397007332144641e-05, "loss": 6.282, "step": 13251 }, { "epoch": 0.5523738068442332, "grad_norm": 236.0, "learning_rate": 4.396337262589818e-05, "loss": 11.5629, "step": 13252 }, { "epoch": 0.5524154891417615, "grad_norm": 346.0, "learning_rate": 4.395667204036804e-05, "loss": 14.2502, "step": 13253 }, { "epoch": 0.5524571714392897, "grad_norm": 100.5, "learning_rate": 4.394997156497807e-05, "loss": 7.344, "step": 13254 }, { "epoch": 0.552498853736818, "grad_norm": 736.0, "learning_rate": 4.3943271199850435e-05, "loss": 25.002, "step": 13255 }, { "epoch": 0.5525405360343462, "grad_norm": 186.0, "learning_rate": 4.393657094510719e-05, "loss": 9.9377, "step": 13256 }, { "epoch": 0.5525822183318745, "grad_norm": 79.0, "learning_rate": 4.3929870800870504e-05, "loss": 8.4381, "step": 13257 }, { "epoch": 0.5526239006294027, "grad_norm": 492.0, "learning_rate": 4.392317076726244e-05, "loss": 15.3167, "step": 13258 }, { "epoch": 0.5526655829269309, "grad_norm": 197.0, "learning_rate": 4.391647084440515e-05, "loss": 11.126, "step": 13259 }, { "epoch": 0.5527072652244591, "grad_norm": 159.0, "learning_rate": 4.39097710324207e-05, "loss": 11.2508, "step": 13260 }, { "epoch": 0.5527489475219874, "grad_norm": 494.0, "learning_rate": 4.390307133143123e-05, "loss": 17.1253, "step": 13261 }, { "epoch": 0.5527906298195157, "grad_norm": 756.0, "learning_rate": 4.389637174155881e-05, "loss": 20.8771, "step": 13262 }, { "epoch": 0.5528323121170439, "grad_norm": 436.0, "learning_rate": 4.388967226292557e-05, "loss": 15.2501, "step": 13263 }, { "epoch": 0.5528739944145721, "grad_norm": 282.0, "learning_rate": 4.388297289565358e-05, "loss": 13.2504, "step": 13264 }, { "epoch": 0.5529156767121004, "grad_norm": 796.0, "learning_rate": 4.387627363986496e-05, "loss": 21.6252, "step": 13265 }, { "epoch": 0.5529573590096286, "grad_norm": 294.0, "learning_rate": 4.38695744956818e-05, "loss": 13.2503, "step": 13266 }, { "epoch": 0.5529990413071568, "grad_norm": 624.0, "learning_rate": 4.3862875463226186e-05, "loss": 19.1253, "step": 13267 }, { "epoch": 0.553040723604685, "grad_norm": 203.0, "learning_rate": 4.3856176542620205e-05, "loss": 10.0669, "step": 13268 }, { "epoch": 0.5530824059022134, "grad_norm": 190.0, "learning_rate": 4.384947773398597e-05, "loss": 11.9377, "step": 13269 }, { "epoch": 0.5531240881997416, "grad_norm": 480.0, "learning_rate": 4.3842779037445526e-05, "loss": 13.0004, "step": 13270 }, { "epoch": 0.5531657704972698, "grad_norm": 178.0, "learning_rate": 4.3836080453121004e-05, "loss": 10.6879, "step": 13271 }, { "epoch": 0.5532074527947981, "grad_norm": 176.0, "learning_rate": 4.382938198113444e-05, "loss": 11.8129, "step": 13272 }, { "epoch": 0.5532491350923263, "grad_norm": 214.0, "learning_rate": 4.3822683621607966e-05, "loss": 12.2506, "step": 13273 }, { "epoch": 0.5532908173898545, "grad_norm": 1336.0, "learning_rate": 4.38159853746636e-05, "loss": 26.7545, "step": 13274 }, { "epoch": 0.5533324996873827, "grad_norm": 3568.0, "learning_rate": 4.380928724042348e-05, "loss": 66.0141, "step": 13275 }, { "epoch": 0.5533741819849111, "grad_norm": 276.0, "learning_rate": 4.3802589219009614e-05, "loss": 11.7508, "step": 13276 }, { "epoch": 0.5534158642824393, "grad_norm": 189.0, "learning_rate": 4.379589131054413e-05, "loss": 10.0003, "step": 13277 }, { "epoch": 0.5534575465799675, "grad_norm": 490.0, "learning_rate": 4.378919351514908e-05, "loss": 16.8752, "step": 13278 }, { "epoch": 0.5534992288774957, "grad_norm": 150.0, "learning_rate": 4.378249583294653e-05, "loss": 10.188, "step": 13279 }, { "epoch": 0.553540911175024, "grad_norm": 468.0, "learning_rate": 4.377579826405853e-05, "loss": 18.3752, "step": 13280 }, { "epoch": 0.5535825934725522, "grad_norm": 280.0, "learning_rate": 4.376910080860718e-05, "loss": 13.2506, "step": 13281 }, { "epoch": 0.5536242757700804, "grad_norm": 520.0, "learning_rate": 4.3762403466714505e-05, "loss": 17.2501, "step": 13282 }, { "epoch": 0.5536659580676087, "grad_norm": 302.0, "learning_rate": 4.37557062385026e-05, "loss": 12.6878, "step": 13283 }, { "epoch": 0.553707640365137, "grad_norm": 808.0, "learning_rate": 4.374900912409347e-05, "loss": 20.6273, "step": 13284 }, { "epoch": 0.5537493226626652, "grad_norm": 612.0, "learning_rate": 4.374231212360924e-05, "loss": 18.3753, "step": 13285 }, { "epoch": 0.5537910049601934, "grad_norm": 161.0, "learning_rate": 4.37356152371719e-05, "loss": 9.6298, "step": 13286 }, { "epoch": 0.5538326872577216, "grad_norm": 436.0, "learning_rate": 4.372891846490356e-05, "loss": 15.4381, "step": 13287 }, { "epoch": 0.5538743695552499, "grad_norm": 124.0, "learning_rate": 4.37222218069262e-05, "loss": 8.3129, "step": 13288 }, { "epoch": 0.5539160518527781, "grad_norm": 164.0, "learning_rate": 4.3715525263361924e-05, "loss": 10.6253, "step": 13289 }, { "epoch": 0.5539577341503064, "grad_norm": 482.0, "learning_rate": 4.370882883433275e-05, "loss": 16.8752, "step": 13290 }, { "epoch": 0.5539994164478346, "grad_norm": 470.0, "learning_rate": 4.370213251996074e-05, "loss": 15.8148, "step": 13291 }, { "epoch": 0.5540410987453629, "grad_norm": 350.0, "learning_rate": 4.369543632036791e-05, "loss": 15.1886, "step": 13292 }, { "epoch": 0.5540827810428911, "grad_norm": 490.0, "learning_rate": 4.3688740235676337e-05, "loss": 16.6262, "step": 13293 }, { "epoch": 0.5541244633404193, "grad_norm": 233.0, "learning_rate": 4.368204426600801e-05, "loss": 11.9377, "step": 13294 }, { "epoch": 0.5541661456379475, "grad_norm": 1208.0, "learning_rate": 4.3675348411485004e-05, "loss": 29.3754, "step": 13295 }, { "epoch": 0.5542078279354758, "grad_norm": 636.0, "learning_rate": 4.3668652672229314e-05, "loss": 18.5009, "step": 13296 }, { "epoch": 0.5542495102330041, "grad_norm": 512.0, "learning_rate": 4.366195704836301e-05, "loss": 15.0025, "step": 13297 }, { "epoch": 0.5542911925305323, "grad_norm": 536.0, "learning_rate": 4.365526154000808e-05, "loss": 17.0004, "step": 13298 }, { "epoch": 0.5543328748280605, "grad_norm": 296.0, "learning_rate": 4.364856614728661e-05, "loss": 13.0003, "step": 13299 }, { "epoch": 0.5543745571255888, "grad_norm": 135.0, "learning_rate": 4.3641870870320544e-05, "loss": 9.5005, "step": 13300 }, { "epoch": 0.554416239423117, "grad_norm": 426.0, "learning_rate": 4.363517570923198e-05, "loss": 16.3761, "step": 13301 }, { "epoch": 0.5544579217206452, "grad_norm": 218.0, "learning_rate": 4.362848066414287e-05, "loss": 12.4383, "step": 13302 }, { "epoch": 0.5544996040181734, "grad_norm": 220.0, "learning_rate": 4.362178573517528e-05, "loss": 12.1262, "step": 13303 }, { "epoch": 0.5545412863157018, "grad_norm": 720.0, "learning_rate": 4.3615090922451224e-05, "loss": 22.0002, "step": 13304 }, { "epoch": 0.55458296861323, "grad_norm": 424.0, "learning_rate": 4.360839622609269e-05, "loss": 15.3752, "step": 13305 }, { "epoch": 0.5546246509107582, "grad_norm": 316.0, "learning_rate": 4.360170164622169e-05, "loss": 12.8128, "step": 13306 }, { "epoch": 0.5546663332082864, "grad_norm": 692.0, "learning_rate": 4.359500718296028e-05, "loss": 20.6288, "step": 13307 }, { "epoch": 0.5547080155058147, "grad_norm": 282.0, "learning_rate": 4.35883128364304e-05, "loss": 11.6255, "step": 13308 }, { "epoch": 0.5547496978033429, "grad_norm": 366.0, "learning_rate": 4.3581618606754114e-05, "loss": 12.6254, "step": 13309 }, { "epoch": 0.5547913801008711, "grad_norm": 199.0, "learning_rate": 4.3574924494053374e-05, "loss": 11.6254, "step": 13310 }, { "epoch": 0.5548330623983994, "grad_norm": 1688.0, "learning_rate": 4.356823049845023e-05, "loss": 34.5039, "step": 13311 }, { "epoch": 0.5548747446959277, "grad_norm": 680.0, "learning_rate": 4.356153662006663e-05, "loss": 21.7504, "step": 13312 }, { "epoch": 0.5549164269934559, "grad_norm": 346.0, "learning_rate": 4.355484285902462e-05, "loss": 13.063, "step": 13313 }, { "epoch": 0.5549581092909841, "grad_norm": 142.0, "learning_rate": 4.3548149215446155e-05, "loss": 7.6567, "step": 13314 }, { "epoch": 0.5549997915885123, "grad_norm": 316.0, "learning_rate": 4.354145568945325e-05, "loss": 12.8754, "step": 13315 }, { "epoch": 0.5550414738860406, "grad_norm": 288.0, "learning_rate": 4.353476228116788e-05, "loss": 13.1252, "step": 13316 }, { "epoch": 0.5550831561835688, "grad_norm": 270.0, "learning_rate": 4.3528068990712056e-05, "loss": 13.5629, "step": 13317 }, { "epoch": 0.5551248384810971, "grad_norm": 360.0, "learning_rate": 4.352137581820773e-05, "loss": 14.627, "step": 13318 }, { "epoch": 0.5551665207786253, "grad_norm": 165.0, "learning_rate": 4.351468276377693e-05, "loss": 10.3754, "step": 13319 }, { "epoch": 0.5552082030761536, "grad_norm": 430.0, "learning_rate": 4.350798982754159e-05, "loss": 14.4377, "step": 13320 }, { "epoch": 0.5552498853736818, "grad_norm": 454.0, "learning_rate": 4.350129700962373e-05, "loss": 15.5003, "step": 13321 }, { "epoch": 0.55529156767121, "grad_norm": 576.0, "learning_rate": 4.3494604310145294e-05, "loss": 17.2528, "step": 13322 }, { "epoch": 0.5553332499687382, "grad_norm": 231.0, "learning_rate": 4.348791172922829e-05, "loss": 11.7504, "step": 13323 }, { "epoch": 0.5553749322662666, "grad_norm": 264.0, "learning_rate": 4.348121926699465e-05, "loss": 13.0007, "step": 13324 }, { "epoch": 0.5554166145637948, "grad_norm": 157.0, "learning_rate": 4.347452692356639e-05, "loss": 10.3758, "step": 13325 }, { "epoch": 0.555458296861323, "grad_norm": 90.0, "learning_rate": 4.3467834699065436e-05, "loss": 7.5006, "step": 13326 }, { "epoch": 0.5554999791588512, "grad_norm": 66.0, "learning_rate": 4.346114259361378e-05, "loss": 7.2815, "step": 13327 }, { "epoch": 0.5555416614563795, "grad_norm": 81.5, "learning_rate": 4.345445060733338e-05, "loss": 8.1254, "step": 13328 }, { "epoch": 0.5555833437539077, "grad_norm": 182.0, "learning_rate": 4.3447758740346206e-05, "loss": 10.3752, "step": 13329 }, { "epoch": 0.5556250260514359, "grad_norm": 322.0, "learning_rate": 4.3441066992774195e-05, "loss": 12.7503, "step": 13330 }, { "epoch": 0.5556667083489641, "grad_norm": 266.0, "learning_rate": 4.343437536473934e-05, "loss": 13.1252, "step": 13331 }, { "epoch": 0.5557083906464925, "grad_norm": 498.0, "learning_rate": 4.342768385636357e-05, "loss": 15.2503, "step": 13332 }, { "epoch": 0.5557500729440207, "grad_norm": 992.0, "learning_rate": 4.342099246776885e-05, "loss": 28.0007, "step": 13333 }, { "epoch": 0.5557917552415489, "grad_norm": 450.0, "learning_rate": 4.3414301199077115e-05, "loss": 14.1897, "step": 13334 }, { "epoch": 0.5558334375390771, "grad_norm": 184.0, "learning_rate": 4.340761005041034e-05, "loss": 7.688, "step": 13335 }, { "epoch": 0.5558751198366054, "grad_norm": 684.0, "learning_rate": 4.340091902189043e-05, "loss": 20.1257, "step": 13336 }, { "epoch": 0.5559168021341336, "grad_norm": 223.0, "learning_rate": 4.339422811363939e-05, "loss": 11.1252, "step": 13337 }, { "epoch": 0.5559584844316618, "grad_norm": 194.0, "learning_rate": 4.33875373257791e-05, "loss": 10.6881, "step": 13338 }, { "epoch": 0.5560001667291901, "grad_norm": 416.0, "learning_rate": 4.3380846658431564e-05, "loss": 14.5005, "step": 13339 }, { "epoch": 0.5560418490267184, "grad_norm": 360.0, "learning_rate": 4.3374156111718654e-05, "loss": 15.2507, "step": 13340 }, { "epoch": 0.5560835313242466, "grad_norm": 344.0, "learning_rate": 4.3367465685762345e-05, "loss": 14.3752, "step": 13341 }, { "epoch": 0.5561252136217748, "grad_norm": 298.0, "learning_rate": 4.336077538068455e-05, "loss": 13.3753, "step": 13342 }, { "epoch": 0.556166895919303, "grad_norm": 238.0, "learning_rate": 4.335408519660724e-05, "loss": 11.8127, "step": 13343 }, { "epoch": 0.5562085782168313, "grad_norm": 167.0, "learning_rate": 4.3347395133652296e-05, "loss": 11.9382, "step": 13344 }, { "epoch": 0.5562502605143596, "grad_norm": 948.0, "learning_rate": 4.334070519194169e-05, "loss": 21.88, "step": 13345 }, { "epoch": 0.5562919428118878, "grad_norm": 596.0, "learning_rate": 4.3334015371597294e-05, "loss": 18.2501, "step": 13346 }, { "epoch": 0.5563336251094161, "grad_norm": 198.0, "learning_rate": 4.332732567274109e-05, "loss": 11.8753, "step": 13347 }, { "epoch": 0.5563753074069443, "grad_norm": 436.0, "learning_rate": 4.332063609549494e-05, "loss": 13.8752, "step": 13348 }, { "epoch": 0.5564169897044725, "grad_norm": 420.0, "learning_rate": 4.331394663998081e-05, "loss": 14.2532, "step": 13349 }, { "epoch": 0.5564586720020007, "grad_norm": 544.0, "learning_rate": 4.330725730632058e-05, "loss": 17.2505, "step": 13350 }, { "epoch": 0.556500354299529, "grad_norm": 482.0, "learning_rate": 4.3300568094636195e-05, "loss": 16.6254, "step": 13351 }, { "epoch": 0.5565420365970573, "grad_norm": 272.0, "learning_rate": 4.3293879005049534e-05, "loss": 12.9381, "step": 13352 }, { "epoch": 0.5565837188945855, "grad_norm": 115.5, "learning_rate": 4.328719003768253e-05, "loss": 9.6271, "step": 13353 }, { "epoch": 0.5566254011921137, "grad_norm": 772.0, "learning_rate": 4.32805011926571e-05, "loss": 22.0006, "step": 13354 }, { "epoch": 0.556667083489642, "grad_norm": 214.0, "learning_rate": 4.327381247009511e-05, "loss": 11.5628, "step": 13355 }, { "epoch": 0.5567087657871702, "grad_norm": 180.0, "learning_rate": 4.3267123870118485e-05, "loss": 11.1879, "step": 13356 }, { "epoch": 0.5567504480846984, "grad_norm": 532.0, "learning_rate": 4.3260435392849143e-05, "loss": 18.2502, "step": 13357 }, { "epoch": 0.5567921303822266, "grad_norm": 552.0, "learning_rate": 4.325374703840896e-05, "loss": 18.1254, "step": 13358 }, { "epoch": 0.556833812679755, "grad_norm": 380.0, "learning_rate": 4.324705880691983e-05, "loss": 14.4378, "step": 13359 }, { "epoch": 0.5568754949772832, "grad_norm": 368.0, "learning_rate": 4.3240370698503645e-05, "loss": 14.8754, "step": 13360 }, { "epoch": 0.5569171772748114, "grad_norm": 584.0, "learning_rate": 4.323368271328233e-05, "loss": 18.5038, "step": 13361 }, { "epoch": 0.5569588595723396, "grad_norm": 488.0, "learning_rate": 4.322699485137772e-05, "loss": 15.9377, "step": 13362 }, { "epoch": 0.5570005418698679, "grad_norm": 414.0, "learning_rate": 4.322030711291175e-05, "loss": 14.9388, "step": 13363 }, { "epoch": 0.5570422241673961, "grad_norm": 502.0, "learning_rate": 4.3213619498006266e-05, "loss": 17.1261, "step": 13364 }, { "epoch": 0.5570839064649243, "grad_norm": 284.0, "learning_rate": 4.3206932006783184e-05, "loss": 13.188, "step": 13365 }, { "epoch": 0.5571255887624526, "grad_norm": 604.0, "learning_rate": 4.320024463936436e-05, "loss": 19.8766, "step": 13366 }, { "epoch": 0.5571672710599809, "grad_norm": 430.0, "learning_rate": 4.319355739587169e-05, "loss": 15.0627, "step": 13367 }, { "epoch": 0.5572089533575091, "grad_norm": 262.0, "learning_rate": 4.318687027642703e-05, "loss": 13.3127, "step": 13368 }, { "epoch": 0.5572506356550373, "grad_norm": 580.0, "learning_rate": 4.3180183281152276e-05, "loss": 17.002, "step": 13369 }, { "epoch": 0.5572923179525655, "grad_norm": 672.0, "learning_rate": 4.317349641016927e-05, "loss": 20.7503, "step": 13370 }, { "epoch": 0.5573340002500938, "grad_norm": 418.0, "learning_rate": 4.3166809663599925e-05, "loss": 16.0003, "step": 13371 }, { "epoch": 0.557375682547622, "grad_norm": 246.0, "learning_rate": 4.316012304156605e-05, "loss": 11.7505, "step": 13372 }, { "epoch": 0.5574173648451503, "grad_norm": 174.0, "learning_rate": 4.3153436544189576e-05, "loss": 12.2503, "step": 13373 }, { "epoch": 0.5574590471426785, "grad_norm": 410.0, "learning_rate": 4.3146750171592295e-05, "loss": 14.9378, "step": 13374 }, { "epoch": 0.5575007294402068, "grad_norm": 250.0, "learning_rate": 4.314006392389612e-05, "loss": 11.8127, "step": 13375 }, { "epoch": 0.557542411737735, "grad_norm": 1088.0, "learning_rate": 4.313337780122287e-05, "loss": 28.6257, "step": 13376 }, { "epoch": 0.5575840940352632, "grad_norm": 396.0, "learning_rate": 4.3126691803694436e-05, "loss": 15.4384, "step": 13377 }, { "epoch": 0.5576257763327914, "grad_norm": 248.0, "learning_rate": 4.312000593143265e-05, "loss": 11.6878, "step": 13378 }, { "epoch": 0.5576674586303197, "grad_norm": 660.0, "learning_rate": 4.3113320184559366e-05, "loss": 20.7506, "step": 13379 }, { "epoch": 0.557709140927848, "grad_norm": 376.0, "learning_rate": 4.3106634563196426e-05, "loss": 14.4376, "step": 13380 }, { "epoch": 0.5577508232253762, "grad_norm": 438.0, "learning_rate": 4.30999490674657e-05, "loss": 16.2504, "step": 13381 }, { "epoch": 0.5577925055229044, "grad_norm": 171.0, "learning_rate": 4.3093263697489e-05, "loss": 9.8126, "step": 13382 }, { "epoch": 0.5578341878204327, "grad_norm": 218.0, "learning_rate": 4.30865784533882e-05, "loss": 11.5004, "step": 13383 }, { "epoch": 0.5578758701179609, "grad_norm": 528.0, "learning_rate": 4.3079893335285085e-05, "loss": 17.8753, "step": 13384 }, { "epoch": 0.5579175524154891, "grad_norm": 51.75, "learning_rate": 4.307320834330156e-05, "loss": 7.3442, "step": 13385 }, { "epoch": 0.5579592347130173, "grad_norm": 796.0, "learning_rate": 4.306652347755942e-05, "loss": 21.3755, "step": 13386 }, { "epoch": 0.5580009170105457, "grad_norm": 201.0, "learning_rate": 4.3059838738180514e-05, "loss": 10.9377, "step": 13387 }, { "epoch": 0.5580425993080739, "grad_norm": 366.0, "learning_rate": 4.3053154125286646e-05, "loss": 14.7503, "step": 13388 }, { "epoch": 0.5580842816056021, "grad_norm": 159.0, "learning_rate": 4.3046469638999674e-05, "loss": 11.1256, "step": 13389 }, { "epoch": 0.5581259639031303, "grad_norm": 588.0, "learning_rate": 4.3039785279441394e-05, "loss": 19.7502, "step": 13390 }, { "epoch": 0.5581676462006586, "grad_norm": 700.0, "learning_rate": 4.3033101046733656e-05, "loss": 21.8756, "step": 13391 }, { "epoch": 0.5582093284981868, "grad_norm": 112.0, "learning_rate": 4.302641694099827e-05, "loss": 11.1257, "step": 13392 }, { "epoch": 0.558251010795715, "grad_norm": 536.0, "learning_rate": 4.301973296235706e-05, "loss": 16.3757, "step": 13393 }, { "epoch": 0.5582926930932433, "grad_norm": 280.0, "learning_rate": 4.3013049110931816e-05, "loss": 12.8128, "step": 13394 }, { "epoch": 0.5583343753907716, "grad_norm": 173.0, "learning_rate": 4.30063653868444e-05, "loss": 8.0629, "step": 13395 }, { "epoch": 0.5583760576882998, "grad_norm": 370.0, "learning_rate": 4.2999681790216575e-05, "loss": 15.1878, "step": 13396 }, { "epoch": 0.558417739985828, "grad_norm": 116.5, "learning_rate": 4.299299832117019e-05, "loss": 10.4379, "step": 13397 }, { "epoch": 0.5584594222833562, "grad_norm": 392.0, "learning_rate": 4.2986314979827e-05, "loss": 15.0631, "step": 13398 }, { "epoch": 0.5585011045808845, "grad_norm": 268.0, "learning_rate": 4.297963176630888e-05, "loss": 13.4376, "step": 13399 }, { "epoch": 0.5585427868784127, "grad_norm": 148.0, "learning_rate": 4.2972948680737565e-05, "loss": 10.0629, "step": 13400 }, { "epoch": 0.558584469175941, "grad_norm": 149.0, "learning_rate": 4.296626572323491e-05, "loss": 10.3753, "step": 13401 }, { "epoch": 0.5586261514734692, "grad_norm": 1168.0, "learning_rate": 4.2959582893922664e-05, "loss": 25.0045, "step": 13402 }, { "epoch": 0.5586678337709975, "grad_norm": 588.0, "learning_rate": 4.295290019292265e-05, "loss": 18.5002, "step": 13403 }, { "epoch": 0.5587095160685257, "grad_norm": 223.0, "learning_rate": 4.294621762035666e-05, "loss": 11.8126, "step": 13404 }, { "epoch": 0.5587511983660539, "grad_norm": 296.0, "learning_rate": 4.293953517634648e-05, "loss": 12.4378, "step": 13405 }, { "epoch": 0.5587928806635821, "grad_norm": 187.0, "learning_rate": 4.293285286101388e-05, "loss": 10.7503, "step": 13406 }, { "epoch": 0.5588345629611104, "grad_norm": 486.0, "learning_rate": 4.2926170674480695e-05, "loss": 18.6255, "step": 13407 }, { "epoch": 0.5588762452586387, "grad_norm": 460.0, "learning_rate": 4.291948861686866e-05, "loss": 17.0007, "step": 13408 }, { "epoch": 0.5589179275561669, "grad_norm": 1472.0, "learning_rate": 4.2912806688299586e-05, "loss": 31.7524, "step": 13409 }, { "epoch": 0.5589596098536951, "grad_norm": 173.0, "learning_rate": 4.2906124888895224e-05, "loss": 10.8127, "step": 13410 }, { "epoch": 0.5590012921512234, "grad_norm": 245.0, "learning_rate": 4.289944321877738e-05, "loss": 12.6888, "step": 13411 }, { "epoch": 0.5590429744487516, "grad_norm": 278.0, "learning_rate": 4.2892761678067794e-05, "loss": 10.1255, "step": 13412 }, { "epoch": 0.5590846567462798, "grad_norm": 210.0, "learning_rate": 4.2886080266888285e-05, "loss": 10.5003, "step": 13413 }, { "epoch": 0.559126339043808, "grad_norm": 1328.0, "learning_rate": 4.2879398985360566e-05, "loss": 32.0002, "step": 13414 }, { "epoch": 0.5591680213413364, "grad_norm": 169.0, "learning_rate": 4.2872717833606445e-05, "loss": 8.7503, "step": 13415 }, { "epoch": 0.5592097036388646, "grad_norm": 330.0, "learning_rate": 4.286603681174768e-05, "loss": 14.3754, "step": 13416 }, { "epoch": 0.5592513859363928, "grad_norm": 118.0, "learning_rate": 4.285935591990602e-05, "loss": 9.8752, "step": 13417 }, { "epoch": 0.5592930682339211, "grad_norm": 540.0, "learning_rate": 4.285267515820322e-05, "loss": 17.1253, "step": 13418 }, { "epoch": 0.5593347505314493, "grad_norm": 324.0, "learning_rate": 4.284599452676108e-05, "loss": 12.0628, "step": 13419 }, { "epoch": 0.5593764328289775, "grad_norm": 442.0, "learning_rate": 4.28393140257013e-05, "loss": 15.1883, "step": 13420 }, { "epoch": 0.5594181151265057, "grad_norm": 708.0, "learning_rate": 4.2832633655145674e-05, "loss": 20.126, "step": 13421 }, { "epoch": 0.5594597974240341, "grad_norm": 648.0, "learning_rate": 4.282595341521592e-05, "loss": 20.1259, "step": 13422 }, { "epoch": 0.5595014797215623, "grad_norm": 498.0, "learning_rate": 4.281927330603382e-05, "loss": 17.2518, "step": 13423 }, { "epoch": 0.5595431620190905, "grad_norm": 324.0, "learning_rate": 4.281259332772108e-05, "loss": 15.3128, "step": 13424 }, { "epoch": 0.5595848443166187, "grad_norm": 474.0, "learning_rate": 4.280591348039949e-05, "loss": 15.2502, "step": 13425 }, { "epoch": 0.559626526614147, "grad_norm": 490.0, "learning_rate": 4.279923376419074e-05, "loss": 17.6252, "step": 13426 }, { "epoch": 0.5596682089116752, "grad_norm": 426.0, "learning_rate": 4.2792554179216615e-05, "loss": 16.6251, "step": 13427 }, { "epoch": 0.5597098912092034, "grad_norm": 414.0, "learning_rate": 4.278587472559881e-05, "loss": 15.8753, "step": 13428 }, { "epoch": 0.5597515735067317, "grad_norm": 278.0, "learning_rate": 4.277919540345909e-05, "loss": 13.126, "step": 13429 }, { "epoch": 0.55979325580426, "grad_norm": 123.5, "learning_rate": 4.277251621291918e-05, "loss": 9.751, "step": 13430 }, { "epoch": 0.5598349381017882, "grad_norm": 414.0, "learning_rate": 4.2765837154100797e-05, "loss": 15.6878, "step": 13431 }, { "epoch": 0.5598766203993164, "grad_norm": 316.0, "learning_rate": 4.2759158227125665e-05, "loss": 14.3753, "step": 13432 }, { "epoch": 0.5599183026968446, "grad_norm": 190.0, "learning_rate": 4.2752479432115535e-05, "loss": 11.2512, "step": 13433 }, { "epoch": 0.5599599849943729, "grad_norm": 476.0, "learning_rate": 4.274580076919209e-05, "loss": 16.3753, "step": 13434 }, { "epoch": 0.5600016672919012, "grad_norm": 312.0, "learning_rate": 4.273912223847709e-05, "loss": 12.3147, "step": 13435 }, { "epoch": 0.5600433495894294, "grad_norm": 282.0, "learning_rate": 4.273244384009222e-05, "loss": 11.8127, "step": 13436 }, { "epoch": 0.5600850318869576, "grad_norm": 472.0, "learning_rate": 4.2725765574159224e-05, "loss": 16.2503, "step": 13437 }, { "epoch": 0.5601267141844859, "grad_norm": 278.0, "learning_rate": 4.271908744079977e-05, "loss": 12.7517, "step": 13438 }, { "epoch": 0.5601683964820141, "grad_norm": 644.0, "learning_rate": 4.271240944013561e-05, "loss": 18.8753, "step": 13439 }, { "epoch": 0.5602100787795423, "grad_norm": 480.0, "learning_rate": 4.2705731572288424e-05, "loss": 17.7502, "step": 13440 }, { "epoch": 0.5602517610770705, "grad_norm": 408.0, "learning_rate": 4.269905383737994e-05, "loss": 10.2514, "step": 13441 }, { "epoch": 0.5602934433745989, "grad_norm": 234.0, "learning_rate": 4.269237623553184e-05, "loss": 11.4379, "step": 13442 }, { "epoch": 0.5603351256721271, "grad_norm": 119.5, "learning_rate": 4.268569876686583e-05, "loss": 7.1879, "step": 13443 }, { "epoch": 0.5603768079696553, "grad_norm": 100.5, "learning_rate": 4.267902143150361e-05, "loss": 9.0627, "step": 13444 }, { "epoch": 0.5604184902671835, "grad_norm": 148.0, "learning_rate": 4.267234422956688e-05, "loss": 10.9384, "step": 13445 }, { "epoch": 0.5604601725647118, "grad_norm": 300.0, "learning_rate": 4.266566716117732e-05, "loss": 12.8126, "step": 13446 }, { "epoch": 0.56050185486224, "grad_norm": 652.0, "learning_rate": 4.265899022645665e-05, "loss": 20.8754, "step": 13447 }, { "epoch": 0.5605435371597682, "grad_norm": 428.0, "learning_rate": 4.2652313425526504e-05, "loss": 13.4382, "step": 13448 }, { "epoch": 0.5605852194572964, "grad_norm": 580.0, "learning_rate": 4.264563675850862e-05, "loss": 20.2504, "step": 13449 }, { "epoch": 0.5606269017548248, "grad_norm": 354.0, "learning_rate": 4.2638960225524637e-05, "loss": 14.3142, "step": 13450 }, { "epoch": 0.560668584052353, "grad_norm": 300.0, "learning_rate": 4.2632283826696284e-05, "loss": 12.8752, "step": 13451 }, { "epoch": 0.5607102663498812, "grad_norm": 692.0, "learning_rate": 4.2625607562145186e-05, "loss": 22.1252, "step": 13452 }, { "epoch": 0.5607519486474094, "grad_norm": 160.0, "learning_rate": 4.261893143199306e-05, "loss": 9.3753, "step": 13453 }, { "epoch": 0.5607936309449377, "grad_norm": 552.0, "learning_rate": 4.261225543636157e-05, "loss": 17.3772, "step": 13454 }, { "epoch": 0.5608353132424659, "grad_norm": 154.0, "learning_rate": 4.2605579575372364e-05, "loss": 11.6254, "step": 13455 }, { "epoch": 0.5608769955399941, "grad_norm": 177.0, "learning_rate": 4.2598903849147126e-05, "loss": 10.0004, "step": 13456 }, { "epoch": 0.5609186778375224, "grad_norm": 384.0, "learning_rate": 4.259222825780754e-05, "loss": 13.8756, "step": 13457 }, { "epoch": 0.5609603601350507, "grad_norm": 370.0, "learning_rate": 4.2585552801475236e-05, "loss": 12.5629, "step": 13458 }, { "epoch": 0.5610020424325789, "grad_norm": 140.0, "learning_rate": 4.2578877480271906e-05, "loss": 6.5952, "step": 13459 }, { "epoch": 0.5610437247301071, "grad_norm": 247.0, "learning_rate": 4.257220229431917e-05, "loss": 11.0002, "step": 13460 }, { "epoch": 0.5610854070276353, "grad_norm": 352.0, "learning_rate": 4.2565527243738736e-05, "loss": 15.4401, "step": 13461 }, { "epoch": 0.5611270893251636, "grad_norm": 149.0, "learning_rate": 4.25588523286522e-05, "loss": 10.5002, "step": 13462 }, { "epoch": 0.5611687716226919, "grad_norm": 350.0, "learning_rate": 4.2552177549181265e-05, "loss": 15.0629, "step": 13463 }, { "epoch": 0.5612104539202201, "grad_norm": 948.0, "learning_rate": 4.254550290544753e-05, "loss": 24.3751, "step": 13464 }, { "epoch": 0.5612521362177483, "grad_norm": 167.0, "learning_rate": 4.253882839757269e-05, "loss": 10.5004, "step": 13465 }, { "epoch": 0.5612938185152766, "grad_norm": 812.0, "learning_rate": 4.253215402567835e-05, "loss": 19.6317, "step": 13466 }, { "epoch": 0.5613355008128048, "grad_norm": 234.0, "learning_rate": 4.252547978988617e-05, "loss": 12.8143, "step": 13467 }, { "epoch": 0.561377183110333, "grad_norm": 388.0, "learning_rate": 4.251880569031777e-05, "loss": 14.8129, "step": 13468 }, { "epoch": 0.5614188654078612, "grad_norm": 640.0, "learning_rate": 4.251213172709483e-05, "loss": 20.1258, "step": 13469 }, { "epoch": 0.5614605477053896, "grad_norm": 225.0, "learning_rate": 4.250545790033893e-05, "loss": 11.0008, "step": 13470 }, { "epoch": 0.5615022300029178, "grad_norm": 272.0, "learning_rate": 4.249878421017174e-05, "loss": 13.0003, "step": 13471 }, { "epoch": 0.561543912300446, "grad_norm": 205.0, "learning_rate": 4.2492110656714856e-05, "loss": 6.5631, "step": 13472 }, { "epoch": 0.5615855945979742, "grad_norm": 664.0, "learning_rate": 4.248543724008995e-05, "loss": 21.0003, "step": 13473 }, { "epoch": 0.5616272768955025, "grad_norm": 169.0, "learning_rate": 4.247876396041859e-05, "loss": 8.6252, "step": 13474 }, { "epoch": 0.5616689591930307, "grad_norm": 1012.0, "learning_rate": 4.247209081782245e-05, "loss": 23.2518, "step": 13475 }, { "epoch": 0.5617106414905589, "grad_norm": 536.0, "learning_rate": 4.2465417812423094e-05, "loss": 17.7502, "step": 13476 }, { "epoch": 0.5617523237880871, "grad_norm": 400.0, "learning_rate": 4.2458744944342194e-05, "loss": 16.3753, "step": 13477 }, { "epoch": 0.5617940060856155, "grad_norm": 488.0, "learning_rate": 4.245207221370131e-05, "loss": 17.1252, "step": 13478 }, { "epoch": 0.5618356883831437, "grad_norm": 124.0, "learning_rate": 4.244539962062209e-05, "loss": 7.2815, "step": 13479 }, { "epoch": 0.5618773706806719, "grad_norm": 220.0, "learning_rate": 4.243872716522614e-05, "loss": 12.4397, "step": 13480 }, { "epoch": 0.5619190529782001, "grad_norm": 632.0, "learning_rate": 4.243205484763506e-05, "loss": 19.7516, "step": 13481 }, { "epoch": 0.5619607352757284, "grad_norm": 232.0, "learning_rate": 4.242538266797044e-05, "loss": 12.0629, "step": 13482 }, { "epoch": 0.5620024175732566, "grad_norm": 78.0, "learning_rate": 4.241871062635391e-05, "loss": 8.1256, "step": 13483 }, { "epoch": 0.5620440998707849, "grad_norm": 804.0, "learning_rate": 4.2412038722907035e-05, "loss": 23.1254, "step": 13484 }, { "epoch": 0.5620857821683131, "grad_norm": 584.0, "learning_rate": 4.240536695775145e-05, "loss": 20.0006, "step": 13485 }, { "epoch": 0.5621274644658414, "grad_norm": 88.5, "learning_rate": 4.2398695331008696e-05, "loss": 8.7514, "step": 13486 }, { "epoch": 0.5621691467633696, "grad_norm": 154.0, "learning_rate": 4.239202384280042e-05, "loss": 11.6878, "step": 13487 }, { "epoch": 0.5622108290608978, "grad_norm": 98.5, "learning_rate": 4.238535249324817e-05, "loss": 8.5005, "step": 13488 }, { "epoch": 0.562252511358426, "grad_norm": 266.0, "learning_rate": 4.2378681282473564e-05, "loss": 12.3129, "step": 13489 }, { "epoch": 0.5622941936559543, "grad_norm": 498.0, "learning_rate": 4.237201021059815e-05, "loss": 16.0008, "step": 13490 }, { "epoch": 0.5623358759534826, "grad_norm": 75.5, "learning_rate": 4.236533927774353e-05, "loss": 8.6256, "step": 13491 }, { "epoch": 0.5623775582510108, "grad_norm": 324.0, "learning_rate": 4.235866848403128e-05, "loss": 14.5002, "step": 13492 }, { "epoch": 0.5624192405485391, "grad_norm": 1256.0, "learning_rate": 4.235199782958298e-05, "loss": 27.626, "step": 13493 }, { "epoch": 0.5624609228460673, "grad_norm": 332.0, "learning_rate": 4.234532731452019e-05, "loss": 13.563, "step": 13494 }, { "epoch": 0.5625026051435955, "grad_norm": 880.0, "learning_rate": 4.2338656938964504e-05, "loss": 21.6253, "step": 13495 }, { "epoch": 0.5625442874411237, "grad_norm": 612.0, "learning_rate": 4.233198670303746e-05, "loss": 17.3752, "step": 13496 }, { "epoch": 0.562585969738652, "grad_norm": 243.0, "learning_rate": 4.2325316606860655e-05, "loss": 12.1886, "step": 13497 }, { "epoch": 0.5626276520361803, "grad_norm": 268.0, "learning_rate": 4.2318646650555616e-05, "loss": 13.1877, "step": 13498 }, { "epoch": 0.5626693343337085, "grad_norm": 348.0, "learning_rate": 4.231197683424395e-05, "loss": 15.3754, "step": 13499 }, { "epoch": 0.5627110166312367, "grad_norm": 225.0, "learning_rate": 4.230530715804716e-05, "loss": 10.5002, "step": 13500 }, { "epoch": 0.562752698928765, "grad_norm": 478.0, "learning_rate": 4.229863762208686e-05, "loss": 15.9383, "step": 13501 }, { "epoch": 0.5627943812262932, "grad_norm": 932.0, "learning_rate": 4.229196822648455e-05, "loss": 25.7509, "step": 13502 }, { "epoch": 0.5628360635238214, "grad_norm": 166.0, "learning_rate": 4.2285298971361806e-05, "loss": 10.6256, "step": 13503 }, { "epoch": 0.5628777458213496, "grad_norm": 412.0, "learning_rate": 4.227862985684018e-05, "loss": 16.0009, "step": 13504 }, { "epoch": 0.562919428118878, "grad_norm": 282.0, "learning_rate": 4.227196088304121e-05, "loss": 12.4377, "step": 13505 }, { "epoch": 0.5629611104164062, "grad_norm": 186.0, "learning_rate": 4.226529205008642e-05, "loss": 10.9377, "step": 13506 }, { "epoch": 0.5630027927139344, "grad_norm": 262.0, "learning_rate": 4.22586233580974e-05, "loss": 12.5002, "step": 13507 }, { "epoch": 0.5630444750114626, "grad_norm": 191.0, "learning_rate": 4.2251954807195635e-05, "loss": 10.6897, "step": 13508 }, { "epoch": 0.5630861573089909, "grad_norm": 398.0, "learning_rate": 4.22452863975027e-05, "loss": 13.6261, "step": 13509 }, { "epoch": 0.5631278396065191, "grad_norm": 454.0, "learning_rate": 4.223861812914008e-05, "loss": 15.4378, "step": 13510 }, { "epoch": 0.5631695219040473, "grad_norm": 356.0, "learning_rate": 4.2231950002229365e-05, "loss": 12.876, "step": 13511 }, { "epoch": 0.5632112042015756, "grad_norm": 752.0, "learning_rate": 4.222528201689203e-05, "loss": 19.7549, "step": 13512 }, { "epoch": 0.5632528864991039, "grad_norm": 1020.0, "learning_rate": 4.221861417324964e-05, "loss": 23.7548, "step": 13513 }, { "epoch": 0.5632945687966321, "grad_norm": 266.0, "learning_rate": 4.221194647142367e-05, "loss": 13.1254, "step": 13514 }, { "epoch": 0.5633362510941603, "grad_norm": 1192.0, "learning_rate": 4.220527891153569e-05, "loss": 24.0056, "step": 13515 }, { "epoch": 0.5633779333916885, "grad_norm": 414.0, "learning_rate": 4.2198611493707176e-05, "loss": 15.5635, "step": 13516 }, { "epoch": 0.5634196156892168, "grad_norm": 300.0, "learning_rate": 4.219194421805967e-05, "loss": 13.6256, "step": 13517 }, { "epoch": 0.563461297986745, "grad_norm": 1720.0, "learning_rate": 4.218527708471467e-05, "loss": 40.5001, "step": 13518 }, { "epoch": 0.5635029802842733, "grad_norm": 193.0, "learning_rate": 4.2178610093793703e-05, "loss": 9.6266, "step": 13519 }, { "epoch": 0.5635446625818015, "grad_norm": 584.0, "learning_rate": 4.217194324541824e-05, "loss": 18.0007, "step": 13520 }, { "epoch": 0.5635863448793298, "grad_norm": 187.0, "learning_rate": 4.2165276539709826e-05, "loss": 7.5941, "step": 13521 }, { "epoch": 0.563628027176858, "grad_norm": 90.0, "learning_rate": 4.215860997678992e-05, "loss": 9.5631, "step": 13522 }, { "epoch": 0.5636697094743862, "grad_norm": 256.0, "learning_rate": 4.215194355678007e-05, "loss": 11.9376, "step": 13523 }, { "epoch": 0.5637113917719144, "grad_norm": 584.0, "learning_rate": 4.214527727980172e-05, "loss": 19.5002, "step": 13524 }, { "epoch": 0.5637530740694428, "grad_norm": 924.0, "learning_rate": 4.213861114597641e-05, "loss": 26.1253, "step": 13525 }, { "epoch": 0.563794756366971, "grad_norm": 209.0, "learning_rate": 4.21319451554256e-05, "loss": 12.1252, "step": 13526 }, { "epoch": 0.5638364386644992, "grad_norm": 394.0, "learning_rate": 4.2125279308270794e-05, "loss": 16.5006, "step": 13527 }, { "epoch": 0.5638781209620274, "grad_norm": 358.0, "learning_rate": 4.211861360463346e-05, "loss": 13.1879, "step": 13528 }, { "epoch": 0.5639198032595557, "grad_norm": 636.0, "learning_rate": 4.2111948044635096e-05, "loss": 20.2504, "step": 13529 }, { "epoch": 0.5639614855570839, "grad_norm": 246.0, "learning_rate": 4.210528262839718e-05, "loss": 12.2504, "step": 13530 }, { "epoch": 0.5640031678546121, "grad_norm": 472.0, "learning_rate": 4.209861735604119e-05, "loss": 17.5003, "step": 13531 }, { "epoch": 0.5640448501521403, "grad_norm": 350.0, "learning_rate": 4.2091952227688594e-05, "loss": 14.8756, "step": 13532 }, { "epoch": 0.5640865324496687, "grad_norm": 102.5, "learning_rate": 4.208528724346089e-05, "loss": 9.6252, "step": 13533 }, { "epoch": 0.5641282147471969, "grad_norm": 154.0, "learning_rate": 4.2078622403479503e-05, "loss": 9.5009, "step": 13534 }, { "epoch": 0.5641698970447251, "grad_norm": 484.0, "learning_rate": 4.207195770786596e-05, "loss": 16.1291, "step": 13535 }, { "epoch": 0.5642115793422533, "grad_norm": 220.0, "learning_rate": 4.206529315674166e-05, "loss": 11.2502, "step": 13536 }, { "epoch": 0.5642532616397816, "grad_norm": 412.0, "learning_rate": 4.2058628750228114e-05, "loss": 14.5004, "step": 13537 }, { "epoch": 0.5642949439373098, "grad_norm": 320.0, "learning_rate": 4.205196448844675e-05, "loss": 11.0002, "step": 13538 }, { "epoch": 0.564336626234838, "grad_norm": 1072.0, "learning_rate": 4.204530037151906e-05, "loss": 23.5048, "step": 13539 }, { "epoch": 0.5643783085323663, "grad_norm": 440.0, "learning_rate": 4.203863639956645e-05, "loss": 13.8128, "step": 13540 }, { "epoch": 0.5644199908298946, "grad_norm": 132.0, "learning_rate": 4.203197257271041e-05, "loss": 7.0005, "step": 13541 }, { "epoch": 0.5644616731274228, "grad_norm": 366.0, "learning_rate": 4.202530889107238e-05, "loss": 13.6251, "step": 13542 }, { "epoch": 0.564503355424951, "grad_norm": 235.0, "learning_rate": 4.20186453547738e-05, "loss": 9.8753, "step": 13543 }, { "epoch": 0.5645450377224792, "grad_norm": 272.0, "learning_rate": 4.201198196393611e-05, "loss": 12.5628, "step": 13544 }, { "epoch": 0.5645867200200075, "grad_norm": 572.0, "learning_rate": 4.200531871868078e-05, "loss": 18.8753, "step": 13545 }, { "epoch": 0.5646284023175357, "grad_norm": 242.0, "learning_rate": 4.1998655619129204e-05, "loss": 12.313, "step": 13546 }, { "epoch": 0.564670084615064, "grad_norm": 151.0, "learning_rate": 4.199199266540286e-05, "loss": 9.7504, "step": 13547 }, { "epoch": 0.5647117669125922, "grad_norm": 474.0, "learning_rate": 4.1985329857623135e-05, "loss": 15.8756, "step": 13548 }, { "epoch": 0.5647534492101205, "grad_norm": 99.5, "learning_rate": 4.197866719591151e-05, "loss": 7.2814, "step": 13549 }, { "epoch": 0.5647951315076487, "grad_norm": 294.0, "learning_rate": 4.197200468038937e-05, "loss": 13.1251, "step": 13550 }, { "epoch": 0.5648368138051769, "grad_norm": 408.0, "learning_rate": 4.196534231117817e-05, "loss": 14.4414, "step": 13551 }, { "epoch": 0.5648784961027051, "grad_norm": 438.0, "learning_rate": 4.195868008839931e-05, "loss": 17.0026, "step": 13552 }, { "epoch": 0.5649201784002335, "grad_norm": 173.0, "learning_rate": 4.195201801217423e-05, "loss": 11.0005, "step": 13553 }, { "epoch": 0.5649618606977617, "grad_norm": 536.0, "learning_rate": 4.194535608262432e-05, "loss": 17.3754, "step": 13554 }, { "epoch": 0.5650035429952899, "grad_norm": 260.0, "learning_rate": 4.193869429987102e-05, "loss": 13.0628, "step": 13555 }, { "epoch": 0.5650452252928181, "grad_norm": 484.0, "learning_rate": 4.193203266403572e-05, "loss": 16.5003, "step": 13556 }, { "epoch": 0.5650869075903464, "grad_norm": 430.0, "learning_rate": 4.1925371175239866e-05, "loss": 15.8753, "step": 13557 }, { "epoch": 0.5651285898878746, "grad_norm": 143.0, "learning_rate": 4.191870983360481e-05, "loss": 10.6251, "step": 13558 }, { "epoch": 0.5651702721854028, "grad_norm": 2040.0, "learning_rate": 4.191204863925202e-05, "loss": 39.5075, "step": 13559 }, { "epoch": 0.565211954482931, "grad_norm": 446.0, "learning_rate": 4.190538759230282e-05, "loss": 15.7502, "step": 13560 }, { "epoch": 0.5652536367804594, "grad_norm": 568.0, "learning_rate": 4.189872669287869e-05, "loss": 18.5003, "step": 13561 }, { "epoch": 0.5652953190779876, "grad_norm": 358.0, "learning_rate": 4.189206594110095e-05, "loss": 15.6252, "step": 13562 }, { "epoch": 0.5653370013755158, "grad_norm": 286.0, "learning_rate": 4.1885405337091064e-05, "loss": 13.6878, "step": 13563 }, { "epoch": 0.5653786836730441, "grad_norm": 294.0, "learning_rate": 4.1878744880970355e-05, "loss": 13.2502, "step": 13564 }, { "epoch": 0.5654203659705723, "grad_norm": 358.0, "learning_rate": 4.187208457286026e-05, "loss": 13.5626, "step": 13565 }, { "epoch": 0.5654620482681005, "grad_norm": 195.0, "learning_rate": 4.186542441288213e-05, "loss": 10.313, "step": 13566 }, { "epoch": 0.5655037305656287, "grad_norm": 262.0, "learning_rate": 4.1858764401157367e-05, "loss": 13.4379, "step": 13567 }, { "epoch": 0.5655454128631571, "grad_norm": 2048.0, "learning_rate": 4.185210453780735e-05, "loss": 37.0037, "step": 13568 }, { "epoch": 0.5655870951606853, "grad_norm": 145.0, "learning_rate": 4.184544482295346e-05, "loss": 10.1255, "step": 13569 }, { "epoch": 0.5656287774582135, "grad_norm": 143.0, "learning_rate": 4.1838785256717034e-05, "loss": 11.3752, "step": 13570 }, { "epoch": 0.5656704597557417, "grad_norm": 648.0, "learning_rate": 4.1832125839219506e-05, "loss": 19.1251, "step": 13571 }, { "epoch": 0.56571214205327, "grad_norm": 180.0, "learning_rate": 4.182546657058218e-05, "loss": 11.6892, "step": 13572 }, { "epoch": 0.5657538243507982, "grad_norm": 420.0, "learning_rate": 4.181880745092647e-05, "loss": 15.1879, "step": 13573 }, { "epoch": 0.5657955066483265, "grad_norm": 215.0, "learning_rate": 4.1812148480373706e-05, "loss": 8.3753, "step": 13574 }, { "epoch": 0.5658371889458547, "grad_norm": 358.0, "learning_rate": 4.180548965904528e-05, "loss": 14.5003, "step": 13575 }, { "epoch": 0.565878871243383, "grad_norm": 312.0, "learning_rate": 4.179883098706252e-05, "loss": 12.7504, "step": 13576 }, { "epoch": 0.5659205535409112, "grad_norm": 378.0, "learning_rate": 4.179217246454681e-05, "loss": 14.5627, "step": 13577 }, { "epoch": 0.5659622358384394, "grad_norm": 374.0, "learning_rate": 4.178551409161946e-05, "loss": 10.1887, "step": 13578 }, { "epoch": 0.5660039181359676, "grad_norm": 229.0, "learning_rate": 4.177885586840186e-05, "loss": 5.6574, "step": 13579 }, { "epoch": 0.5660456004334959, "grad_norm": 708.0, "learning_rate": 4.177219779501534e-05, "loss": 20.1253, "step": 13580 }, { "epoch": 0.5660872827310242, "grad_norm": 904.0, "learning_rate": 4.176553987158124e-05, "loss": 20.001, "step": 13581 }, { "epoch": 0.5661289650285524, "grad_norm": 97.0, "learning_rate": 4.1758882098220906e-05, "loss": 8.8139, "step": 13582 }, { "epoch": 0.5661706473260806, "grad_norm": 1472.0, "learning_rate": 4.175222447505569e-05, "loss": 32.7508, "step": 13583 }, { "epoch": 0.5662123296236089, "grad_norm": 107.0, "learning_rate": 4.1745567002206906e-05, "loss": 7.8751, "step": 13584 }, { "epoch": 0.5662540119211371, "grad_norm": 177.0, "learning_rate": 4.1738909679795906e-05, "loss": 8.1881, "step": 13585 }, { "epoch": 0.5662956942186653, "grad_norm": 176.0, "learning_rate": 4.173225250794399e-05, "loss": 12.0629, "step": 13586 }, { "epoch": 0.5663373765161935, "grad_norm": 182.0, "learning_rate": 4.1725595486772534e-05, "loss": 10.2502, "step": 13587 }, { "epoch": 0.5663790588137219, "grad_norm": 232.0, "learning_rate": 4.171893861640281e-05, "loss": 11.5003, "step": 13588 }, { "epoch": 0.5664207411112501, "grad_norm": 109.0, "learning_rate": 4.171228189695619e-05, "loss": 9.1877, "step": 13589 }, { "epoch": 0.5664624234087783, "grad_norm": 310.0, "learning_rate": 4.1705625328553934e-05, "loss": 13.9378, "step": 13590 }, { "epoch": 0.5665041057063065, "grad_norm": 270.0, "learning_rate": 4.169896891131743e-05, "loss": 12.5628, "step": 13591 }, { "epoch": 0.5665457880038348, "grad_norm": 56.75, "learning_rate": 4.169231264536792e-05, "loss": 7.4066, "step": 13592 }, { "epoch": 0.566587470301363, "grad_norm": 171.0, "learning_rate": 4.1685656530826765e-05, "loss": 10.8755, "step": 13593 }, { "epoch": 0.5666291525988912, "grad_norm": 1048.0, "learning_rate": 4.167900056781524e-05, "loss": 26.8753, "step": 13594 }, { "epoch": 0.5666708348964195, "grad_norm": 217.0, "learning_rate": 4.1672344756454704e-05, "loss": 11.6887, "step": 13595 }, { "epoch": 0.5667125171939478, "grad_norm": 238.0, "learning_rate": 4.16656890968664e-05, "loss": 11.2504, "step": 13596 }, { "epoch": 0.566754199491476, "grad_norm": 236.0, "learning_rate": 4.165903358917167e-05, "loss": 11.3754, "step": 13597 }, { "epoch": 0.5667958817890042, "grad_norm": 179.0, "learning_rate": 4.165237823349177e-05, "loss": 11.0002, "step": 13598 }, { "epoch": 0.5668375640865324, "grad_norm": 386.0, "learning_rate": 4.164572302994804e-05, "loss": 15.0005, "step": 13599 }, { "epoch": 0.5668792463840607, "grad_norm": 150.0, "learning_rate": 4.163906797866173e-05, "loss": 9.1893, "step": 13600 }, { "epoch": 0.5669209286815889, "grad_norm": 229.0, "learning_rate": 4.163241307975417e-05, "loss": 11.7502, "step": 13601 }, { "epoch": 0.5669626109791172, "grad_norm": 286.0, "learning_rate": 4.16257583333466e-05, "loss": 12.9377, "step": 13602 }, { "epoch": 0.5670042932766454, "grad_norm": 266.0, "learning_rate": 4.161910373956035e-05, "loss": 12.6879, "step": 13603 }, { "epoch": 0.5670459755741737, "grad_norm": 418.0, "learning_rate": 4.161244929851666e-05, "loss": 14.7503, "step": 13604 }, { "epoch": 0.5670876578717019, "grad_norm": 137.0, "learning_rate": 4.160579501033683e-05, "loss": 7.6877, "step": 13605 }, { "epoch": 0.5671293401692301, "grad_norm": 370.0, "learning_rate": 4.159914087514214e-05, "loss": 15.1267, "step": 13606 }, { "epoch": 0.5671710224667583, "grad_norm": 828.0, "learning_rate": 4.1592486893053854e-05, "loss": 24.7503, "step": 13607 }, { "epoch": 0.5672127047642866, "grad_norm": 536.0, "learning_rate": 4.158583306419322e-05, "loss": 20.2513, "step": 13608 }, { "epoch": 0.5672543870618149, "grad_norm": 290.0, "learning_rate": 4.157917938868155e-05, "loss": 12.7503, "step": 13609 }, { "epoch": 0.5672960693593431, "grad_norm": 162.0, "learning_rate": 4.157252586664006e-05, "loss": 10.8127, "step": 13610 }, { "epoch": 0.5673377516568713, "grad_norm": 816.0, "learning_rate": 4.156587249819006e-05, "loss": 21.1285, "step": 13611 }, { "epoch": 0.5673794339543996, "grad_norm": 510.0, "learning_rate": 4.155921928345276e-05, "loss": 16.8767, "step": 13612 }, { "epoch": 0.5674211162519278, "grad_norm": 324.0, "learning_rate": 4.155256622254946e-05, "loss": 12.3128, "step": 13613 }, { "epoch": 0.567462798549456, "grad_norm": 210.0, "learning_rate": 4.154591331560137e-05, "loss": 11.438, "step": 13614 }, { "epoch": 0.5675044808469842, "grad_norm": 378.0, "learning_rate": 4.153926056272978e-05, "loss": 14.5627, "step": 13615 }, { "epoch": 0.5675461631445126, "grad_norm": 106.0, "learning_rate": 4.15326079640559e-05, "loss": 8.7505, "step": 13616 }, { "epoch": 0.5675878454420408, "grad_norm": 202.0, "learning_rate": 4.1525955519701e-05, "loss": 11.813, "step": 13617 }, { "epoch": 0.567629527739569, "grad_norm": 282.0, "learning_rate": 4.1519303229786305e-05, "loss": 11.6254, "step": 13618 }, { "epoch": 0.5676712100370972, "grad_norm": 452.0, "learning_rate": 4.1512651094433066e-05, "loss": 17.0009, "step": 13619 }, { "epoch": 0.5677128923346255, "grad_norm": 212.0, "learning_rate": 4.1505999113762504e-05, "loss": 12.1269, "step": 13620 }, { "epoch": 0.5677545746321537, "grad_norm": 442.0, "learning_rate": 4.149934728789589e-05, "loss": 13.7504, "step": 13621 }, { "epoch": 0.5677962569296819, "grad_norm": 672.0, "learning_rate": 4.14926956169544e-05, "loss": 20.3756, "step": 13622 }, { "epoch": 0.5678379392272102, "grad_norm": 492.0, "learning_rate": 4.148604410105931e-05, "loss": 18.1254, "step": 13623 }, { "epoch": 0.5678796215247385, "grad_norm": 552.0, "learning_rate": 4.147939274033179e-05, "loss": 19.0002, "step": 13624 }, { "epoch": 0.5679213038222667, "grad_norm": 123.0, "learning_rate": 4.147274153489313e-05, "loss": 7.2819, "step": 13625 }, { "epoch": 0.5679629861197949, "grad_norm": 386.0, "learning_rate": 4.146609048486449e-05, "loss": 14.5003, "step": 13626 }, { "epoch": 0.5680046684173231, "grad_norm": 228.0, "learning_rate": 4.145943959036712e-05, "loss": 11.6252, "step": 13627 }, { "epoch": 0.5680463507148514, "grad_norm": 79.5, "learning_rate": 4.1452788851522206e-05, "loss": 7.0631, "step": 13628 }, { "epoch": 0.5680880330123796, "grad_norm": 118.5, "learning_rate": 4.1446138268450986e-05, "loss": 9.3131, "step": 13629 }, { "epoch": 0.5681297153099079, "grad_norm": 214.0, "learning_rate": 4.1439487841274654e-05, "loss": 11.8752, "step": 13630 }, { "epoch": 0.5681713976074361, "grad_norm": 580.0, "learning_rate": 4.143283757011442e-05, "loss": 17.1253, "step": 13631 }, { "epoch": 0.5682130799049644, "grad_norm": 540.0, "learning_rate": 4.142618745509147e-05, "loss": 17.1284, "step": 13632 }, { "epoch": 0.5682547622024926, "grad_norm": 304.0, "learning_rate": 4.1419537496327026e-05, "loss": 14.3755, "step": 13633 }, { "epoch": 0.5682964445000208, "grad_norm": 278.0, "learning_rate": 4.141288769394226e-05, "loss": 13.0627, "step": 13634 }, { "epoch": 0.568338126797549, "grad_norm": 600.0, "learning_rate": 4.14062380480584e-05, "loss": 23.3752, "step": 13635 }, { "epoch": 0.5683798090950773, "grad_norm": 1304.0, "learning_rate": 4.139958855879659e-05, "loss": 27.1307, "step": 13636 }, { "epoch": 0.5684214913926056, "grad_norm": 226.0, "learning_rate": 4.1392939226278063e-05, "loss": 12.1879, "step": 13637 }, { "epoch": 0.5684631736901338, "grad_norm": 776.0, "learning_rate": 4.138629005062397e-05, "loss": 19.0034, "step": 13638 }, { "epoch": 0.5685048559876621, "grad_norm": 254.0, "learning_rate": 4.1379641031955515e-05, "loss": 13.4379, "step": 13639 }, { "epoch": 0.5685465382851903, "grad_norm": 380.0, "learning_rate": 4.137299217039385e-05, "loss": 14.7503, "step": 13640 }, { "epoch": 0.5685882205827185, "grad_norm": 253.0, "learning_rate": 4.136634346606019e-05, "loss": 12.0637, "step": 13641 }, { "epoch": 0.5686299028802467, "grad_norm": 100.0, "learning_rate": 4.135969491907567e-05, "loss": 9.5004, "step": 13642 }, { "epoch": 0.568671585177775, "grad_norm": 640.0, "learning_rate": 4.1353046529561476e-05, "loss": 19.379, "step": 13643 }, { "epoch": 0.5687132674753033, "grad_norm": 696.0, "learning_rate": 4.134639829763879e-05, "loss": 18.5002, "step": 13644 }, { "epoch": 0.5687549497728315, "grad_norm": 205.0, "learning_rate": 4.1339750223428755e-05, "loss": 10.5627, "step": 13645 }, { "epoch": 0.5687966320703597, "grad_norm": 568.0, "learning_rate": 4.133310230705254e-05, "loss": 18.0006, "step": 13646 }, { "epoch": 0.568838314367888, "grad_norm": 342.0, "learning_rate": 4.1326454548631316e-05, "loss": 14.0003, "step": 13647 }, { "epoch": 0.5688799966654162, "grad_norm": 338.0, "learning_rate": 4.131980694828621e-05, "loss": 13.9379, "step": 13648 }, { "epoch": 0.5689216789629444, "grad_norm": 596.0, "learning_rate": 4.131315950613841e-05, "loss": 18.2508, "step": 13649 }, { "epoch": 0.5689633612604726, "grad_norm": 1048.0, "learning_rate": 4.1306512222309036e-05, "loss": 26.3754, "step": 13650 }, { "epoch": 0.569005043558001, "grad_norm": 221.0, "learning_rate": 4.129986509691926e-05, "loss": 12.0003, "step": 13651 }, { "epoch": 0.5690467258555292, "grad_norm": 306.0, "learning_rate": 4.12932181300902e-05, "loss": 13.5002, "step": 13652 }, { "epoch": 0.5690884081530574, "grad_norm": 576.0, "learning_rate": 4.128657132194303e-05, "loss": 17.0011, "step": 13653 }, { "epoch": 0.5691300904505856, "grad_norm": 414.0, "learning_rate": 4.1279924672598855e-05, "loss": 16.501, "step": 13654 }, { "epoch": 0.5691717727481139, "grad_norm": 340.0, "learning_rate": 4.127327818217884e-05, "loss": 14.3127, "step": 13655 }, { "epoch": 0.5692134550456421, "grad_norm": 340.0, "learning_rate": 4.12666318508041e-05, "loss": 15.1255, "step": 13656 }, { "epoch": 0.5692551373431703, "grad_norm": 224.0, "learning_rate": 4.125998567859577e-05, "loss": 12.1256, "step": 13657 }, { "epoch": 0.5692968196406986, "grad_norm": 932.0, "learning_rate": 4.125333966567497e-05, "loss": 24.253, "step": 13658 }, { "epoch": 0.5693385019382269, "grad_norm": 149.0, "learning_rate": 4.1246693812162844e-05, "loss": 10.8133, "step": 13659 }, { "epoch": 0.5693801842357551, "grad_norm": 98.0, "learning_rate": 4.1240048118180486e-05, "loss": 9.0635, "step": 13660 }, { "epoch": 0.5694218665332833, "grad_norm": 219.0, "learning_rate": 4.1233402583849046e-05, "loss": 10.7502, "step": 13661 }, { "epoch": 0.5694635488308115, "grad_norm": 146.0, "learning_rate": 4.12267572092896e-05, "loss": 9.5007, "step": 13662 }, { "epoch": 0.5695052311283398, "grad_norm": 126.0, "learning_rate": 4.1220111994623314e-05, "loss": 10.5002, "step": 13663 }, { "epoch": 0.569546913425868, "grad_norm": 227.0, "learning_rate": 4.121346693997123e-05, "loss": 12.3752, "step": 13664 }, { "epoch": 0.5695885957233963, "grad_norm": 700.0, "learning_rate": 4.1206822045454526e-05, "loss": 21.0002, "step": 13665 }, { "epoch": 0.5696302780209245, "grad_norm": 780.0, "learning_rate": 4.1200177311194236e-05, "loss": 21.0038, "step": 13666 }, { "epoch": 0.5696719603184528, "grad_norm": 444.0, "learning_rate": 4.119353273731152e-05, "loss": 16.3757, "step": 13667 }, { "epoch": 0.569713642615981, "grad_norm": 186.0, "learning_rate": 4.118688832392744e-05, "loss": 10.1251, "step": 13668 }, { "epoch": 0.5697553249135092, "grad_norm": 358.0, "learning_rate": 4.118024407116311e-05, "loss": 14.1878, "step": 13669 }, { "epoch": 0.5697970072110374, "grad_norm": 324.0, "learning_rate": 4.1173599979139606e-05, "loss": 13.1879, "step": 13670 }, { "epoch": 0.5698386895085658, "grad_norm": 276.0, "learning_rate": 4.116695604797804e-05, "loss": 12.1877, "step": 13671 }, { "epoch": 0.569880371806094, "grad_norm": 700.0, "learning_rate": 4.116031227779947e-05, "loss": 20.8759, "step": 13672 }, { "epoch": 0.5699220541036222, "grad_norm": 764.0, "learning_rate": 4.115366866872501e-05, "loss": 20.2543, "step": 13673 }, { "epoch": 0.5699637364011504, "grad_norm": 352.0, "learning_rate": 4.11470252208757e-05, "loss": 14.4378, "step": 13674 }, { "epoch": 0.5700054186986787, "grad_norm": 86.0, "learning_rate": 4.114038193437267e-05, "loss": 8.7502, "step": 13675 }, { "epoch": 0.5700471009962069, "grad_norm": 60.5, "learning_rate": 4.113373880933694e-05, "loss": 8.313, "step": 13676 }, { "epoch": 0.5700887832937351, "grad_norm": 85.5, "learning_rate": 4.112709584588963e-05, "loss": 7.5626, "step": 13677 }, { "epoch": 0.5701304655912633, "grad_norm": 119.5, "learning_rate": 4.112045304415176e-05, "loss": 7.3755, "step": 13678 }, { "epoch": 0.5701721478887917, "grad_norm": 52.75, "learning_rate": 4.111381040424445e-05, "loss": 7.844, "step": 13679 }, { "epoch": 0.5702138301863199, "grad_norm": 61.0, "learning_rate": 4.1107167926288704e-05, "loss": 8.0001, "step": 13680 }, { "epoch": 0.5702555124838481, "grad_norm": 354.0, "learning_rate": 4.110052561040563e-05, "loss": 13.0641, "step": 13681 }, { "epoch": 0.5702971947813763, "grad_norm": 458.0, "learning_rate": 4.109388345671625e-05, "loss": 18.5005, "step": 13682 }, { "epoch": 0.5703388770789046, "grad_norm": 203.0, "learning_rate": 4.108724146534167e-05, "loss": 10.1254, "step": 13683 }, { "epoch": 0.5703805593764328, "grad_norm": 208.0, "learning_rate": 4.1080599636402875e-05, "loss": 11.3127, "step": 13684 }, { "epoch": 0.570422241673961, "grad_norm": 127.0, "learning_rate": 4.107395797002096e-05, "loss": 8.6253, "step": 13685 }, { "epoch": 0.5704639239714893, "grad_norm": 358.0, "learning_rate": 4.1067316466316936e-05, "loss": 14.8752, "step": 13686 }, { "epoch": 0.5705056062690176, "grad_norm": 398.0, "learning_rate": 4.1060675125411884e-05, "loss": 16.0007, "step": 13687 }, { "epoch": 0.5705472885665458, "grad_norm": 166.0, "learning_rate": 4.1054033947426796e-05, "loss": 9.5004, "step": 13688 }, { "epoch": 0.570588970864074, "grad_norm": 276.0, "learning_rate": 4.104739293248276e-05, "loss": 13.0003, "step": 13689 }, { "epoch": 0.5706306531616022, "grad_norm": 97.0, "learning_rate": 4.104075208070076e-05, "loss": 7.1572, "step": 13690 }, { "epoch": 0.5706723354591305, "grad_norm": 172.0, "learning_rate": 4.1034111392201866e-05, "loss": 11.0627, "step": 13691 }, { "epoch": 0.5707140177566588, "grad_norm": 226.0, "learning_rate": 4.102747086710708e-05, "loss": 11.313, "step": 13692 }, { "epoch": 0.570755700054187, "grad_norm": 116.0, "learning_rate": 4.102083050553743e-05, "loss": 9.5634, "step": 13693 }, { "epoch": 0.5707973823517152, "grad_norm": 460.0, "learning_rate": 4.101419030761395e-05, "loss": 17.2503, "step": 13694 }, { "epoch": 0.5708390646492435, "grad_norm": 1280.0, "learning_rate": 4.100755027345764e-05, "loss": 36.5177, "step": 13695 }, { "epoch": 0.5708807469467717, "grad_norm": 222.0, "learning_rate": 4.1000910403189516e-05, "loss": 10.6877, "step": 13696 }, { "epoch": 0.5709224292442999, "grad_norm": 201.0, "learning_rate": 4.0994270696930624e-05, "loss": 11.6252, "step": 13697 }, { "epoch": 0.5709641115418281, "grad_norm": 400.0, "learning_rate": 4.098763115480193e-05, "loss": 15.1254, "step": 13698 }, { "epoch": 0.5710057938393565, "grad_norm": 470.0, "learning_rate": 4.098099177692448e-05, "loss": 16.7514, "step": 13699 }, { "epoch": 0.5710474761368847, "grad_norm": 258.0, "learning_rate": 4.0974352563419226e-05, "loss": 12.4378, "step": 13700 }, { "epoch": 0.5710891584344129, "grad_norm": 624.0, "learning_rate": 4.096771351440722e-05, "loss": 19.3753, "step": 13701 }, { "epoch": 0.5711308407319411, "grad_norm": 272.0, "learning_rate": 4.0961074630009424e-05, "loss": 13.1877, "step": 13702 }, { "epoch": 0.5711725230294694, "grad_norm": 215.0, "learning_rate": 4.095443591034686e-05, "loss": 8.3754, "step": 13703 }, { "epoch": 0.5712142053269976, "grad_norm": 233.0, "learning_rate": 4.094779735554049e-05, "loss": 13.1878, "step": 13704 }, { "epoch": 0.5712558876245258, "grad_norm": 262.0, "learning_rate": 4.0941158965711315e-05, "loss": 12.1253, "step": 13705 }, { "epoch": 0.571297569922054, "grad_norm": 146.0, "learning_rate": 4.093452074098033e-05, "loss": 8.3754, "step": 13706 }, { "epoch": 0.5713392522195824, "grad_norm": 270.0, "learning_rate": 4.092788268146851e-05, "loss": 10.6878, "step": 13707 }, { "epoch": 0.5713809345171106, "grad_norm": 221.0, "learning_rate": 4.092124478729682e-05, "loss": 12.0004, "step": 13708 }, { "epoch": 0.5714226168146388, "grad_norm": 249.0, "learning_rate": 4.0914607058586276e-05, "loss": 12.8752, "step": 13709 }, { "epoch": 0.5714642991121671, "grad_norm": 768.0, "learning_rate": 4.090796949545779e-05, "loss": 21.6257, "step": 13710 }, { "epoch": 0.5715059814096953, "grad_norm": 149.0, "learning_rate": 4.0901332098032403e-05, "loss": 10.7502, "step": 13711 }, { "epoch": 0.5715476637072235, "grad_norm": 424.0, "learning_rate": 4.089469486643102e-05, "loss": 16.1252, "step": 13712 }, { "epoch": 0.5715893460047518, "grad_norm": 1104.0, "learning_rate": 4.088805780077466e-05, "loss": 29.1262, "step": 13713 }, { "epoch": 0.5716310283022801, "grad_norm": 166.0, "learning_rate": 4.088142090118422e-05, "loss": 10.6251, "step": 13714 }, { "epoch": 0.5716727105998083, "grad_norm": 382.0, "learning_rate": 4.087478416778072e-05, "loss": 12.2505, "step": 13715 }, { "epoch": 0.5717143928973365, "grad_norm": 684.0, "learning_rate": 4.086814760068507e-05, "loss": 18.8754, "step": 13716 }, { "epoch": 0.5717560751948647, "grad_norm": 652.0, "learning_rate": 4.086151120001825e-05, "loss": 20.0003, "step": 13717 }, { "epoch": 0.571797757492393, "grad_norm": 258.0, "learning_rate": 4.0854874965901204e-05, "loss": 12.0631, "step": 13718 }, { "epoch": 0.5718394397899212, "grad_norm": 223.0, "learning_rate": 4.0848238898454864e-05, "loss": 13.0003, "step": 13719 }, { "epoch": 0.5718811220874495, "grad_norm": 280.0, "learning_rate": 4.0841602997800174e-05, "loss": 12.2505, "step": 13720 }, { "epoch": 0.5719228043849777, "grad_norm": 249.0, "learning_rate": 4.083496726405811e-05, "loss": 11.2503, "step": 13721 }, { "epoch": 0.571964486682506, "grad_norm": 308.0, "learning_rate": 4.082833169734956e-05, "loss": 12.6877, "step": 13722 }, { "epoch": 0.5720061689800342, "grad_norm": 111.0, "learning_rate": 4.08216962977955e-05, "loss": 9.0636, "step": 13723 }, { "epoch": 0.5720478512775624, "grad_norm": 310.0, "learning_rate": 4.0815061065516814e-05, "loss": 14.1255, "step": 13724 }, { "epoch": 0.5720895335750906, "grad_norm": 312.0, "learning_rate": 4.080842600063447e-05, "loss": 13.8755, "step": 13725 }, { "epoch": 0.572131215872619, "grad_norm": 208.0, "learning_rate": 4.080179110326937e-05, "loss": 11.8129, "step": 13726 }, { "epoch": 0.5721728981701472, "grad_norm": 716.0, "learning_rate": 4.079515637354246e-05, "loss": 19.8752, "step": 13727 }, { "epoch": 0.5722145804676754, "grad_norm": 292.0, "learning_rate": 4.078852181157462e-05, "loss": 12.3127, "step": 13728 }, { "epoch": 0.5722562627652036, "grad_norm": 244.0, "learning_rate": 4.078188741748681e-05, "loss": 12.5002, "step": 13729 }, { "epoch": 0.5722979450627319, "grad_norm": 103.0, "learning_rate": 4.0775253191399895e-05, "loss": 10.1881, "step": 13730 }, { "epoch": 0.5723396273602601, "grad_norm": 888.0, "learning_rate": 4.0768619133434835e-05, "loss": 23.5007, "step": 13731 }, { "epoch": 0.5723813096577883, "grad_norm": 302.0, "learning_rate": 4.07619852437125e-05, "loss": 13.6255, "step": 13732 }, { "epoch": 0.5724229919553165, "grad_norm": 95.0, "learning_rate": 4.07553515223538e-05, "loss": 9.6256, "step": 13733 }, { "epoch": 0.5724646742528449, "grad_norm": 472.0, "learning_rate": 4.0748717969479635e-05, "loss": 15.6256, "step": 13734 }, { "epoch": 0.5725063565503731, "grad_norm": 156.0, "learning_rate": 4.074208458521092e-05, "loss": 9.438, "step": 13735 }, { "epoch": 0.5725480388479013, "grad_norm": 69.5, "learning_rate": 4.073545136966852e-05, "loss": 7.9066, "step": 13736 }, { "epoch": 0.5725897211454295, "grad_norm": 402.0, "learning_rate": 4.072881832297336e-05, "loss": 15.4376, "step": 13737 }, { "epoch": 0.5726314034429578, "grad_norm": 336.0, "learning_rate": 4.072218544524629e-05, "loss": 12.8126, "step": 13738 }, { "epoch": 0.572673085740486, "grad_norm": 348.0, "learning_rate": 4.071555273660824e-05, "loss": 13.4377, "step": 13739 }, { "epoch": 0.5727147680380142, "grad_norm": 171.0, "learning_rate": 4.0708920197180035e-05, "loss": 9.5007, "step": 13740 }, { "epoch": 0.5727564503355425, "grad_norm": 132.0, "learning_rate": 4.070228782708261e-05, "loss": 8.8758, "step": 13741 }, { "epoch": 0.5727981326330708, "grad_norm": 304.0, "learning_rate": 4.069565562643679e-05, "loss": 14.8136, "step": 13742 }, { "epoch": 0.572839814930599, "grad_norm": 205.0, "learning_rate": 4.0689023595363486e-05, "loss": 10.5003, "step": 13743 }, { "epoch": 0.5728814972281272, "grad_norm": 274.0, "learning_rate": 4.068239173398356e-05, "loss": 12.5626, "step": 13744 }, { "epoch": 0.5729231795256554, "grad_norm": 304.0, "learning_rate": 4.0675760042417857e-05, "loss": 14.0003, "step": 13745 }, { "epoch": 0.5729648618231837, "grad_norm": 241.0, "learning_rate": 4.0669128520787254e-05, "loss": 12.3755, "step": 13746 }, { "epoch": 0.573006544120712, "grad_norm": 185.0, "learning_rate": 4.066249716921263e-05, "loss": 11.938, "step": 13747 }, { "epoch": 0.5730482264182402, "grad_norm": 160.0, "learning_rate": 4.0655865987814806e-05, "loss": 10.5003, "step": 13748 }, { "epoch": 0.5730899087157684, "grad_norm": 474.0, "learning_rate": 4.064923497671467e-05, "loss": 17.1254, "step": 13749 }, { "epoch": 0.5731315910132967, "grad_norm": 450.0, "learning_rate": 4.0642604136033045e-05, "loss": 16.5001, "step": 13750 }, { "epoch": 0.5731732733108249, "grad_norm": 392.0, "learning_rate": 4.06359734658908e-05, "loss": 14.1252, "step": 13751 }, { "epoch": 0.5732149556083531, "grad_norm": 181.0, "learning_rate": 4.062934296640876e-05, "loss": 10.4378, "step": 13752 }, { "epoch": 0.5732566379058813, "grad_norm": 466.0, "learning_rate": 4.062271263770779e-05, "loss": 18.1254, "step": 13753 }, { "epoch": 0.5732983202034097, "grad_norm": 844.0, "learning_rate": 4.061608247990869e-05, "loss": 19.3768, "step": 13754 }, { "epoch": 0.5733400025009379, "grad_norm": 218.0, "learning_rate": 4.0609452493132336e-05, "loss": 10.4378, "step": 13755 }, { "epoch": 0.5733816847984661, "grad_norm": 310.0, "learning_rate": 4.0602822677499544e-05, "loss": 13.1877, "step": 13756 }, { "epoch": 0.5734233670959943, "grad_norm": 140.0, "learning_rate": 4.059619303313115e-05, "loss": 10.6251, "step": 13757 }, { "epoch": 0.5734650493935226, "grad_norm": 406.0, "learning_rate": 4.058956356014795e-05, "loss": 14.7502, "step": 13758 }, { "epoch": 0.5735067316910508, "grad_norm": 676.0, "learning_rate": 4.058293425867081e-05, "loss": 20.0005, "step": 13759 }, { "epoch": 0.573548413988579, "grad_norm": 134.0, "learning_rate": 4.057630512882051e-05, "loss": 9.9379, "step": 13760 }, { "epoch": 0.5735900962861072, "grad_norm": 560.0, "learning_rate": 4.056967617071792e-05, "loss": 18.3753, "step": 13761 }, { "epoch": 0.5736317785836356, "grad_norm": 346.0, "learning_rate": 4.056304738448378e-05, "loss": 14.0627, "step": 13762 }, { "epoch": 0.5736734608811638, "grad_norm": 254.0, "learning_rate": 4.055641877023897e-05, "loss": 11.5627, "step": 13763 }, { "epoch": 0.573715143178692, "grad_norm": 460.0, "learning_rate": 4.0549790328104245e-05, "loss": 16.7504, "step": 13764 }, { "epoch": 0.5737568254762202, "grad_norm": 312.0, "learning_rate": 4.0543162058200445e-05, "loss": 14.3753, "step": 13765 }, { "epoch": 0.5737985077737485, "grad_norm": 444.0, "learning_rate": 4.053653396064834e-05, "loss": 13.5004, "step": 13766 }, { "epoch": 0.5738401900712767, "grad_norm": 1352.0, "learning_rate": 4.0529906035568766e-05, "loss": 29.7505, "step": 13767 }, { "epoch": 0.573881872368805, "grad_norm": 430.0, "learning_rate": 4.0523278283082475e-05, "loss": 16.2501, "step": 13768 }, { "epoch": 0.5739235546663332, "grad_norm": 364.0, "learning_rate": 4.051665070331028e-05, "loss": 15.0004, "step": 13769 }, { "epoch": 0.5739652369638615, "grad_norm": 214.0, "learning_rate": 4.051002329637298e-05, "loss": 12.3138, "step": 13770 }, { "epoch": 0.5740069192613897, "grad_norm": 308.0, "learning_rate": 4.050339606239134e-05, "loss": 13.3129, "step": 13771 }, { "epoch": 0.5740486015589179, "grad_norm": 592.0, "learning_rate": 4.049676900148614e-05, "loss": 17.8753, "step": 13772 }, { "epoch": 0.5740902838564461, "grad_norm": 194.0, "learning_rate": 4.049014211377819e-05, "loss": 11.3127, "step": 13773 }, { "epoch": 0.5741319661539744, "grad_norm": 103.5, "learning_rate": 4.0483515399388226e-05, "loss": 8.9376, "step": 13774 }, { "epoch": 0.5741736484515026, "grad_norm": 636.0, "learning_rate": 4.047688885843706e-05, "loss": 18.13, "step": 13775 }, { "epoch": 0.5742153307490309, "grad_norm": 135.0, "learning_rate": 4.047026249104541e-05, "loss": 9.8752, "step": 13776 }, { "epoch": 0.5742570130465591, "grad_norm": 258.0, "learning_rate": 4.04636362973341e-05, "loss": 12.3127, "step": 13777 }, { "epoch": 0.5742986953440874, "grad_norm": 101.5, "learning_rate": 4.0457010277423846e-05, "loss": 10.0003, "step": 13778 }, { "epoch": 0.5743403776416156, "grad_norm": 354.0, "learning_rate": 4.045038443143544e-05, "loss": 11.7506, "step": 13779 }, { "epoch": 0.5743820599391438, "grad_norm": 119.0, "learning_rate": 4.044375875948961e-05, "loss": 9.9378, "step": 13780 }, { "epoch": 0.574423742236672, "grad_norm": 480.0, "learning_rate": 4.0437133261707136e-05, "loss": 16.3761, "step": 13781 }, { "epoch": 0.5744654245342004, "grad_norm": 135.0, "learning_rate": 4.0430507938208764e-05, "loss": 10.3128, "step": 13782 }, { "epoch": 0.5745071068317286, "grad_norm": 77.0, "learning_rate": 4.042388278911523e-05, "loss": 7.7505, "step": 13783 }, { "epoch": 0.5745487891292568, "grad_norm": 600.0, "learning_rate": 4.041725781454726e-05, "loss": 19.001, "step": 13784 }, { "epoch": 0.5745904714267851, "grad_norm": 456.0, "learning_rate": 4.041063301462565e-05, "loss": 16.5002, "step": 13785 }, { "epoch": 0.5746321537243133, "grad_norm": 141.0, "learning_rate": 4.040400838947108e-05, "loss": 11.0632, "step": 13786 }, { "epoch": 0.5746738360218415, "grad_norm": 956.0, "learning_rate": 4.039738393920433e-05, "loss": 23.1293, "step": 13787 }, { "epoch": 0.5747155183193697, "grad_norm": 83.0, "learning_rate": 4.0390759663946085e-05, "loss": 7.9064, "step": 13788 }, { "epoch": 0.5747572006168981, "grad_norm": 436.0, "learning_rate": 4.038413556381713e-05, "loss": 15.5627, "step": 13789 }, { "epoch": 0.5747988829144263, "grad_norm": 276.0, "learning_rate": 4.037751163893813e-05, "loss": 14.1257, "step": 13790 }, { "epoch": 0.5748405652119545, "grad_norm": 93.0, "learning_rate": 4.0370887889429854e-05, "loss": 8.188, "step": 13791 }, { "epoch": 0.5748822475094827, "grad_norm": 888.0, "learning_rate": 4.036426431541298e-05, "loss": 23.626, "step": 13792 }, { "epoch": 0.574923929807011, "grad_norm": 456.0, "learning_rate": 4.035764091700826e-05, "loss": 15.438, "step": 13793 }, { "epoch": 0.5749656121045392, "grad_norm": 448.0, "learning_rate": 4.035101769433639e-05, "loss": 16.5001, "step": 13794 }, { "epoch": 0.5750072944020674, "grad_norm": 668.0, "learning_rate": 4.034439464751807e-05, "loss": 17.7509, "step": 13795 }, { "epoch": 0.5750489766995956, "grad_norm": 294.0, "learning_rate": 4.033777177667401e-05, "loss": 10.5006, "step": 13796 }, { "epoch": 0.575090658997124, "grad_norm": 346.0, "learning_rate": 4.0331149081924944e-05, "loss": 16.2503, "step": 13797 }, { "epoch": 0.5751323412946522, "grad_norm": 368.0, "learning_rate": 4.032452656339151e-05, "loss": 14.3127, "step": 13798 }, { "epoch": 0.5751740235921804, "grad_norm": 640.0, "learning_rate": 4.031790422119447e-05, "loss": 18.5004, "step": 13799 }, { "epoch": 0.5752157058897086, "grad_norm": 264.0, "learning_rate": 4.031128205545447e-05, "loss": 7.751, "step": 13800 }, { "epoch": 0.5752573881872369, "grad_norm": 532.0, "learning_rate": 4.030466006629222e-05, "loss": 17.1285, "step": 13801 }, { "epoch": 0.5752990704847651, "grad_norm": 512.0, "learning_rate": 4.029803825382839e-05, "loss": 17.3756, "step": 13802 }, { "epoch": 0.5753407527822934, "grad_norm": 584.0, "learning_rate": 4.029141661818369e-05, "loss": 19.5019, "step": 13803 }, { "epoch": 0.5753824350798216, "grad_norm": 189.0, "learning_rate": 4.0284795159478764e-05, "loss": 11.0004, "step": 13804 }, { "epoch": 0.5754241173773499, "grad_norm": 516.0, "learning_rate": 4.027817387783433e-05, "loss": 17.5006, "step": 13805 }, { "epoch": 0.5754657996748781, "grad_norm": 556.0, "learning_rate": 4.0271552773371016e-05, "loss": 18.8751, "step": 13806 }, { "epoch": 0.5755074819724063, "grad_norm": 380.0, "learning_rate": 4.0264931846209536e-05, "loss": 14.9381, "step": 13807 }, { "epoch": 0.5755491642699345, "grad_norm": 310.0, "learning_rate": 4.025831109647052e-05, "loss": 13.5011, "step": 13808 }, { "epoch": 0.5755908465674628, "grad_norm": 612.0, "learning_rate": 4.0251690524274674e-05, "loss": 19.3751, "step": 13809 }, { "epoch": 0.575632528864991, "grad_norm": 237.0, "learning_rate": 4.024507012974261e-05, "loss": 12.5014, "step": 13810 }, { "epoch": 0.5756742111625193, "grad_norm": 278.0, "learning_rate": 4.0238449912995026e-05, "loss": 13.3134, "step": 13811 }, { "epoch": 0.5757158934600475, "grad_norm": 209.0, "learning_rate": 4.0231829874152546e-05, "loss": 11.0003, "step": 13812 }, { "epoch": 0.5757575757575758, "grad_norm": 374.0, "learning_rate": 4.0225210013335846e-05, "loss": 14.5627, "step": 13813 }, { "epoch": 0.575799258055104, "grad_norm": 284.0, "learning_rate": 4.021859033066554e-05, "loss": 13.2503, "step": 13814 }, { "epoch": 0.5758409403526322, "grad_norm": 105.0, "learning_rate": 4.021197082626232e-05, "loss": 8.6878, "step": 13815 }, { "epoch": 0.5758826226501604, "grad_norm": 258.0, "learning_rate": 4.020535150024678e-05, "loss": 11.0629, "step": 13816 }, { "epoch": 0.5759243049476888, "grad_norm": 145.0, "learning_rate": 4.01987323527396e-05, "loss": 10.1257, "step": 13817 }, { "epoch": 0.575965987245217, "grad_norm": 360.0, "learning_rate": 4.019211338386137e-05, "loss": 13.5006, "step": 13818 }, { "epoch": 0.5760076695427452, "grad_norm": 366.0, "learning_rate": 4.018549459373275e-05, "loss": 11.3129, "step": 13819 }, { "epoch": 0.5760493518402734, "grad_norm": 864.0, "learning_rate": 4.017887598247437e-05, "loss": 20.7547, "step": 13820 }, { "epoch": 0.5760910341378017, "grad_norm": 880.0, "learning_rate": 4.017225755020685e-05, "loss": 23.0015, "step": 13821 }, { "epoch": 0.5761327164353299, "grad_norm": 340.0, "learning_rate": 4.016563929705079e-05, "loss": 14.0627, "step": 13822 }, { "epoch": 0.5761743987328581, "grad_norm": 182.0, "learning_rate": 4.0159021223126846e-05, "loss": 9.6251, "step": 13823 }, { "epoch": 0.5762160810303864, "grad_norm": 298.0, "learning_rate": 4.0152403328555597e-05, "loss": 11.8757, "step": 13824 }, { "epoch": 0.5762577633279147, "grad_norm": 360.0, "learning_rate": 4.01457856134577e-05, "loss": 14.7509, "step": 13825 }, { "epoch": 0.5762994456254429, "grad_norm": 251.0, "learning_rate": 4.0139168077953705e-05, "loss": 12.4378, "step": 13826 }, { "epoch": 0.5763411279229711, "grad_norm": 217.0, "learning_rate": 4.013255072216427e-05, "loss": 8.0628, "step": 13827 }, { "epoch": 0.5763828102204993, "grad_norm": 284.0, "learning_rate": 4.0125933546209947e-05, "loss": 12.8129, "step": 13828 }, { "epoch": 0.5764244925180276, "grad_norm": 213.0, "learning_rate": 4.01193165502114e-05, "loss": 9.6881, "step": 13829 }, { "epoch": 0.5764661748155558, "grad_norm": 1368.0, "learning_rate": 4.0112699734289145e-05, "loss": 27.0048, "step": 13830 }, { "epoch": 0.576507857113084, "grad_norm": 296.0, "learning_rate": 4.0106083098563836e-05, "loss": 12.5006, "step": 13831 }, { "epoch": 0.5765495394106123, "grad_norm": 230.0, "learning_rate": 4.009946664315604e-05, "loss": 11.6252, "step": 13832 }, { "epoch": 0.5765912217081406, "grad_norm": 326.0, "learning_rate": 4.0092850368186344e-05, "loss": 14.0003, "step": 13833 }, { "epoch": 0.5766329040056688, "grad_norm": 264.0, "learning_rate": 4.0086234273775315e-05, "loss": 12.8127, "step": 13834 }, { "epoch": 0.576674586303197, "grad_norm": 340.0, "learning_rate": 4.007961836004357e-05, "loss": 14.188, "step": 13835 }, { "epoch": 0.5767162686007252, "grad_norm": 478.0, "learning_rate": 4.0073002627111637e-05, "loss": 15.1884, "step": 13836 }, { "epoch": 0.5767579508982535, "grad_norm": 556.0, "learning_rate": 4.0066387075100134e-05, "loss": 18.3753, "step": 13837 }, { "epoch": 0.5767996331957818, "grad_norm": 536.0, "learning_rate": 4.005977170412959e-05, "loss": 16.7505, "step": 13838 }, { "epoch": 0.57684131549331, "grad_norm": 336.0, "learning_rate": 4.005315651432061e-05, "loss": 13.3129, "step": 13839 }, { "epoch": 0.5768829977908382, "grad_norm": 306.0, "learning_rate": 4.004654150579371e-05, "loss": 14.3126, "step": 13840 }, { "epoch": 0.5769246800883665, "grad_norm": 358.0, "learning_rate": 4.00399266786695e-05, "loss": 13.0627, "step": 13841 }, { "epoch": 0.5769663623858947, "grad_norm": 848.0, "learning_rate": 4.003331203306849e-05, "loss": 20.6284, "step": 13842 }, { "epoch": 0.5770080446834229, "grad_norm": 286.0, "learning_rate": 4.0026697569111265e-05, "loss": 13.8752, "step": 13843 }, { "epoch": 0.5770497269809511, "grad_norm": 452.0, "learning_rate": 4.002008328691836e-05, "loss": 16.8752, "step": 13844 }, { "epoch": 0.5770914092784795, "grad_norm": 1160.0, "learning_rate": 4.001346918661032e-05, "loss": 27.3752, "step": 13845 }, { "epoch": 0.5771330915760077, "grad_norm": 215.0, "learning_rate": 4.000685526830768e-05, "loss": 12.3133, "step": 13846 }, { "epoch": 0.5771747738735359, "grad_norm": 468.0, "learning_rate": 4.000024153213102e-05, "loss": 14.8754, "step": 13847 }, { "epoch": 0.5772164561710641, "grad_norm": 175.0, "learning_rate": 3.9993627978200814e-05, "loss": 10.2505, "step": 13848 }, { "epoch": 0.5772581384685924, "grad_norm": 232.0, "learning_rate": 3.998701460663765e-05, "loss": 11.6252, "step": 13849 }, { "epoch": 0.5772998207661206, "grad_norm": 432.0, "learning_rate": 3.998040141756202e-05, "loss": 15.8752, "step": 13850 }, { "epoch": 0.5773415030636488, "grad_norm": 251.0, "learning_rate": 3.997378841109448e-05, "loss": 12.8128, "step": 13851 }, { "epoch": 0.577383185361177, "grad_norm": 207.0, "learning_rate": 3.996717558735551e-05, "loss": 12.6258, "step": 13852 }, { "epoch": 0.5774248676587054, "grad_norm": 1296.0, "learning_rate": 3.9960562946465685e-05, "loss": 30.5005, "step": 13853 }, { "epoch": 0.5774665499562336, "grad_norm": 704.0, "learning_rate": 3.9953950488545464e-05, "loss": 19.6253, "step": 13854 }, { "epoch": 0.5775082322537618, "grad_norm": 45.5, "learning_rate": 3.994733821371541e-05, "loss": 7.6568, "step": 13855 }, { "epoch": 0.5775499145512901, "grad_norm": 502.0, "learning_rate": 3.994072612209599e-05, "loss": 16.6295, "step": 13856 }, { "epoch": 0.5775915968488183, "grad_norm": 1096.0, "learning_rate": 3.993411421380774e-05, "loss": 28.1251, "step": 13857 }, { "epoch": 0.5776332791463465, "grad_norm": 444.0, "learning_rate": 3.9927502488971154e-05, "loss": 15.6876, "step": 13858 }, { "epoch": 0.5776749614438748, "grad_norm": 106.5, "learning_rate": 3.992089094770672e-05, "loss": 8.8128, "step": 13859 }, { "epoch": 0.5777166437414031, "grad_norm": 262.0, "learning_rate": 3.9914279590134936e-05, "loss": 12.9398, "step": 13860 }, { "epoch": 0.5777583260389313, "grad_norm": 215.0, "learning_rate": 3.9907668416376335e-05, "loss": 13.3131, "step": 13861 }, { "epoch": 0.5778000083364595, "grad_norm": 238.0, "learning_rate": 3.9901057426551344e-05, "loss": 11.6252, "step": 13862 }, { "epoch": 0.5778416906339877, "grad_norm": 328.0, "learning_rate": 3.9894446620780494e-05, "loss": 13.3763, "step": 13863 }, { "epoch": 0.577883372931516, "grad_norm": 320.0, "learning_rate": 3.988783599918424e-05, "loss": 14.4376, "step": 13864 }, { "epoch": 0.5779250552290442, "grad_norm": 544.0, "learning_rate": 3.988122556188308e-05, "loss": 17.0002, "step": 13865 }, { "epoch": 0.5779667375265725, "grad_norm": 244.0, "learning_rate": 3.987461530899747e-05, "loss": 11.5002, "step": 13866 }, { "epoch": 0.5780084198241007, "grad_norm": 254.0, "learning_rate": 3.986800524064792e-05, "loss": 13.1254, "step": 13867 }, { "epoch": 0.578050102121629, "grad_norm": 296.0, "learning_rate": 3.9861395356954846e-05, "loss": 13.0627, "step": 13868 }, { "epoch": 0.5780917844191572, "grad_norm": 354.0, "learning_rate": 3.985478565803875e-05, "loss": 13.6253, "step": 13869 }, { "epoch": 0.5781334667166854, "grad_norm": 310.0, "learning_rate": 3.9848176144020094e-05, "loss": 13.4377, "step": 13870 }, { "epoch": 0.5781751490142136, "grad_norm": 278.0, "learning_rate": 3.9841566815019325e-05, "loss": 13.0002, "step": 13871 }, { "epoch": 0.578216831311742, "grad_norm": 784.0, "learning_rate": 3.983495767115689e-05, "loss": 23.8752, "step": 13872 }, { "epoch": 0.5782585136092702, "grad_norm": 1080.0, "learning_rate": 3.9828348712553284e-05, "loss": 28.0003, "step": 13873 }, { "epoch": 0.5783001959067984, "grad_norm": 264.0, "learning_rate": 3.9821739939328895e-05, "loss": 12.1277, "step": 13874 }, { "epoch": 0.5783418782043266, "grad_norm": 81.5, "learning_rate": 3.9815131351604226e-05, "loss": 8.5627, "step": 13875 }, { "epoch": 0.5783835605018549, "grad_norm": 318.0, "learning_rate": 3.980852294949966e-05, "loss": 12.2503, "step": 13876 }, { "epoch": 0.5784252427993831, "grad_norm": 524.0, "learning_rate": 3.98019147331357e-05, "loss": 17.2502, "step": 13877 }, { "epoch": 0.5784669250969113, "grad_norm": 264.0, "learning_rate": 3.979530670263273e-05, "loss": 10.1879, "step": 13878 }, { "epoch": 0.5785086073944395, "grad_norm": 364.0, "learning_rate": 3.978869885811122e-05, "loss": 15.751, "step": 13879 }, { "epoch": 0.5785502896919679, "grad_norm": 272.0, "learning_rate": 3.978209119969155e-05, "loss": 11.8753, "step": 13880 }, { "epoch": 0.5785919719894961, "grad_norm": 207.0, "learning_rate": 3.977548372749419e-05, "loss": 11.8127, "step": 13881 }, { "epoch": 0.5786336542870243, "grad_norm": 223.0, "learning_rate": 3.976887644163955e-05, "loss": 13.3757, "step": 13882 }, { "epoch": 0.5786753365845525, "grad_norm": 516.0, "learning_rate": 3.9762269342248046e-05, "loss": 18.0002, "step": 13883 }, { "epoch": 0.5787170188820808, "grad_norm": 1416.0, "learning_rate": 3.975566242944008e-05, "loss": 29.129, "step": 13884 }, { "epoch": 0.578758701179609, "grad_norm": 472.0, "learning_rate": 3.974905570333609e-05, "loss": 16.0008, "step": 13885 }, { "epoch": 0.5788003834771372, "grad_norm": 224.0, "learning_rate": 3.974244916405646e-05, "loss": 11.6253, "step": 13886 }, { "epoch": 0.5788420657746655, "grad_norm": 177.0, "learning_rate": 3.9735842811721616e-05, "loss": 10.3752, "step": 13887 }, { "epoch": 0.5788837480721938, "grad_norm": 528.0, "learning_rate": 3.972923664645193e-05, "loss": 17.5011, "step": 13888 }, { "epoch": 0.578925430369722, "grad_norm": 458.0, "learning_rate": 3.972263066836784e-05, "loss": 15.626, "step": 13889 }, { "epoch": 0.5789671126672502, "grad_norm": 896.0, "learning_rate": 3.9716024877589704e-05, "loss": 23.005, "step": 13890 }, { "epoch": 0.5790087949647784, "grad_norm": 80.5, "learning_rate": 3.970941927423794e-05, "loss": 8.6878, "step": 13891 }, { "epoch": 0.5790504772623067, "grad_norm": 384.0, "learning_rate": 3.970281385843291e-05, "loss": 15.2502, "step": 13892 }, { "epoch": 0.579092159559835, "grad_norm": 728.0, "learning_rate": 3.969620863029502e-05, "loss": 22.0003, "step": 13893 }, { "epoch": 0.5791338418573632, "grad_norm": 502.0, "learning_rate": 3.968960358994463e-05, "loss": 18.0005, "step": 13894 }, { "epoch": 0.5791755241548914, "grad_norm": 158.0, "learning_rate": 3.968299873750214e-05, "loss": 10.0628, "step": 13895 }, { "epoch": 0.5792172064524197, "grad_norm": 442.0, "learning_rate": 3.9676394073087914e-05, "loss": 16.5004, "step": 13896 }, { "epoch": 0.5792588887499479, "grad_norm": 146.0, "learning_rate": 3.966978959682232e-05, "loss": 11.6258, "step": 13897 }, { "epoch": 0.5793005710474761, "grad_norm": 1152.0, "learning_rate": 3.9663185308825714e-05, "loss": 22.755, "step": 13898 }, { "epoch": 0.5793422533450043, "grad_norm": 892.0, "learning_rate": 3.9656581209218504e-05, "loss": 25.5015, "step": 13899 }, { "epoch": 0.5793839356425327, "grad_norm": 752.0, "learning_rate": 3.964997729812099e-05, "loss": 20.1256, "step": 13900 }, { "epoch": 0.5794256179400609, "grad_norm": 324.0, "learning_rate": 3.9643373575653586e-05, "loss": 13.8752, "step": 13901 }, { "epoch": 0.5794673002375891, "grad_norm": 548.0, "learning_rate": 3.9636770041936585e-05, "loss": 17.7501, "step": 13902 }, { "epoch": 0.5795089825351173, "grad_norm": 462.0, "learning_rate": 3.963016669709041e-05, "loss": 17.5005, "step": 13903 }, { "epoch": 0.5795506648326456, "grad_norm": 223.0, "learning_rate": 3.9623563541235334e-05, "loss": 10.5003, "step": 13904 }, { "epoch": 0.5795923471301738, "grad_norm": 1768.0, "learning_rate": 3.9616960574491756e-05, "loss": 34.5032, "step": 13905 }, { "epoch": 0.579634029427702, "grad_norm": 512.0, "learning_rate": 3.9610357796979966e-05, "loss": 17.7502, "step": 13906 }, { "epoch": 0.5796757117252302, "grad_norm": 191.0, "learning_rate": 3.9603755208820346e-05, "loss": 12.3132, "step": 13907 }, { "epoch": 0.5797173940227586, "grad_norm": 516.0, "learning_rate": 3.9597152810133214e-05, "loss": 16.3753, "step": 13908 }, { "epoch": 0.5797590763202868, "grad_norm": 568.0, "learning_rate": 3.959055060103889e-05, "loss": 18.3754, "step": 13909 }, { "epoch": 0.579800758617815, "grad_norm": 304.0, "learning_rate": 3.95839485816577e-05, "loss": 13.126, "step": 13910 }, { "epoch": 0.5798424409153432, "grad_norm": 612.0, "learning_rate": 3.957734675210999e-05, "loss": 19.2505, "step": 13911 }, { "epoch": 0.5798841232128715, "grad_norm": 316.0, "learning_rate": 3.9570745112516035e-05, "loss": 12.9379, "step": 13912 }, { "epoch": 0.5799258055103997, "grad_norm": 418.0, "learning_rate": 3.95641436629962e-05, "loss": 16.7501, "step": 13913 }, { "epoch": 0.579967487807928, "grad_norm": 376.0, "learning_rate": 3.955754240367075e-05, "loss": 14.5022, "step": 13914 }, { "epoch": 0.5800091701054562, "grad_norm": 1280.0, "learning_rate": 3.955094133466004e-05, "loss": 26.5044, "step": 13915 }, { "epoch": 0.5800508524029845, "grad_norm": 193.0, "learning_rate": 3.954434045608433e-05, "loss": 11.7501, "step": 13916 }, { "epoch": 0.5800925347005127, "grad_norm": 150.0, "learning_rate": 3.953773976806397e-05, "loss": 11.3127, "step": 13917 }, { "epoch": 0.5801342169980409, "grad_norm": 408.0, "learning_rate": 3.95311392707192e-05, "loss": 14.438, "step": 13918 }, { "epoch": 0.5801758992955691, "grad_norm": 336.0, "learning_rate": 3.952453896417037e-05, "loss": 14.563, "step": 13919 }, { "epoch": 0.5802175815930974, "grad_norm": 354.0, "learning_rate": 3.951793884853773e-05, "loss": 14.6253, "step": 13920 }, { "epoch": 0.5802592638906257, "grad_norm": 266.0, "learning_rate": 3.95113389239416e-05, "loss": 12.6254, "step": 13921 }, { "epoch": 0.5803009461881539, "grad_norm": 272.0, "learning_rate": 3.950473919050223e-05, "loss": 11.6879, "step": 13922 }, { "epoch": 0.5803426284856821, "grad_norm": 406.0, "learning_rate": 3.949813964833995e-05, "loss": 15.3759, "step": 13923 }, { "epoch": 0.5803843107832104, "grad_norm": 756.0, "learning_rate": 3.949154029757498e-05, "loss": 20.1292, "step": 13924 }, { "epoch": 0.5804259930807386, "grad_norm": 165.0, "learning_rate": 3.948494113832764e-05, "loss": 9.3751, "step": 13925 }, { "epoch": 0.5804676753782668, "grad_norm": 528.0, "learning_rate": 3.947834217071816e-05, "loss": 17.1254, "step": 13926 }, { "epoch": 0.580509357675795, "grad_norm": 712.0, "learning_rate": 3.947174339486685e-05, "loss": 21.2501, "step": 13927 }, { "epoch": 0.5805510399733234, "grad_norm": 454.0, "learning_rate": 3.946514481089394e-05, "loss": 14.7514, "step": 13928 }, { "epoch": 0.5805927222708516, "grad_norm": 157.0, "learning_rate": 3.9458546418919715e-05, "loss": 10.0004, "step": 13929 }, { "epoch": 0.5806344045683798, "grad_norm": 151.0, "learning_rate": 3.9451948219064396e-05, "loss": 10.313, "step": 13930 }, { "epoch": 0.5806760868659081, "grad_norm": 206.0, "learning_rate": 3.9445350211448274e-05, "loss": 11.8755, "step": 13931 }, { "epoch": 0.5807177691634363, "grad_norm": 1576.0, "learning_rate": 3.9438752396191565e-05, "loss": 31.754, "step": 13932 }, { "epoch": 0.5807594514609645, "grad_norm": 748.0, "learning_rate": 3.9432154773414535e-05, "loss": 21.6252, "step": 13933 }, { "epoch": 0.5808011337584927, "grad_norm": 364.0, "learning_rate": 3.9425557343237415e-05, "loss": 16.2501, "step": 13934 }, { "epoch": 0.5808428160560211, "grad_norm": 100.0, "learning_rate": 3.941896010578048e-05, "loss": 7.8753, "step": 13935 }, { "epoch": 0.5808844983535493, "grad_norm": 454.0, "learning_rate": 3.941236306116391e-05, "loss": 17.2501, "step": 13936 }, { "epoch": 0.5809261806510775, "grad_norm": 502.0, "learning_rate": 3.9405766209507984e-05, "loss": 17.3756, "step": 13937 }, { "epoch": 0.5809678629486057, "grad_norm": 482.0, "learning_rate": 3.9399169550932884e-05, "loss": 18.2503, "step": 13938 }, { "epoch": 0.581009545246134, "grad_norm": 147.0, "learning_rate": 3.9392573085558885e-05, "loss": 8.8753, "step": 13939 }, { "epoch": 0.5810512275436622, "grad_norm": 540.0, "learning_rate": 3.938597681350616e-05, "loss": 16.3762, "step": 13940 }, { "epoch": 0.5810929098411904, "grad_norm": 462.0, "learning_rate": 3.937938073489498e-05, "loss": 15.9387, "step": 13941 }, { "epoch": 0.5811345921387187, "grad_norm": 167.0, "learning_rate": 3.93727848498455e-05, "loss": 8.5006, "step": 13942 }, { "epoch": 0.581176274436247, "grad_norm": 404.0, "learning_rate": 3.9366189158477987e-05, "loss": 16.2507, "step": 13943 }, { "epoch": 0.5812179567337752, "grad_norm": 476.0, "learning_rate": 3.9359593660912584e-05, "loss": 15.5662, "step": 13944 }, { "epoch": 0.5812596390313034, "grad_norm": 249.0, "learning_rate": 3.9352998357269555e-05, "loss": 12.0004, "step": 13945 }, { "epoch": 0.5813013213288316, "grad_norm": 161.0, "learning_rate": 3.9346403247669074e-05, "loss": 10.813, "step": 13946 }, { "epoch": 0.5813430036263599, "grad_norm": 414.0, "learning_rate": 3.9339808332231334e-05, "loss": 15.3128, "step": 13947 }, { "epoch": 0.5813846859238881, "grad_norm": 346.0, "learning_rate": 3.933321361107653e-05, "loss": 13.5641, "step": 13948 }, { "epoch": 0.5814263682214164, "grad_norm": 488.0, "learning_rate": 3.932661908432487e-05, "loss": 17.0003, "step": 13949 }, { "epoch": 0.5814680505189446, "grad_norm": 324.0, "learning_rate": 3.9320024752096514e-05, "loss": 14.7515, "step": 13950 }, { "epoch": 0.5815097328164729, "grad_norm": 169.0, "learning_rate": 3.931343061451167e-05, "loss": 10.6881, "step": 13951 }, { "epoch": 0.5815514151140011, "grad_norm": 416.0, "learning_rate": 3.930683667169047e-05, "loss": 17.6254, "step": 13952 }, { "epoch": 0.5815930974115293, "grad_norm": 137.0, "learning_rate": 3.9300242923753155e-05, "loss": 9.7502, "step": 13953 }, { "epoch": 0.5816347797090575, "grad_norm": 213.0, "learning_rate": 3.9293649370819844e-05, "loss": 10.5002, "step": 13954 }, { "epoch": 0.5816764620065858, "grad_norm": 370.0, "learning_rate": 3.9287056013010736e-05, "loss": 14.6255, "step": 13955 }, { "epoch": 0.5817181443041141, "grad_norm": 245.0, "learning_rate": 3.928046285044596e-05, "loss": 9.0629, "step": 13956 }, { "epoch": 0.5817598266016423, "grad_norm": 198.0, "learning_rate": 3.927386988324572e-05, "loss": 10.8133, "step": 13957 }, { "epoch": 0.5818015088991705, "grad_norm": 382.0, "learning_rate": 3.926727711153015e-05, "loss": 13.9376, "step": 13958 }, { "epoch": 0.5818431911966988, "grad_norm": 448.0, "learning_rate": 3.9260684535419404e-05, "loss": 16.7502, "step": 13959 }, { "epoch": 0.581884873494227, "grad_norm": 314.0, "learning_rate": 3.9254092155033625e-05, "loss": 13.8752, "step": 13960 }, { "epoch": 0.5819265557917552, "grad_norm": 196.0, "learning_rate": 3.9247499970493e-05, "loss": 11.0627, "step": 13961 }, { "epoch": 0.5819682380892834, "grad_norm": 552.0, "learning_rate": 3.9240907981917616e-05, "loss": 18.3755, "step": 13962 }, { "epoch": 0.5820099203868118, "grad_norm": 848.0, "learning_rate": 3.923431618942766e-05, "loss": 23.0006, "step": 13963 }, { "epoch": 0.58205160268434, "grad_norm": 712.0, "learning_rate": 3.922772459314322e-05, "loss": 23.1252, "step": 13964 }, { "epoch": 0.5820932849818682, "grad_norm": 322.0, "learning_rate": 3.922113319318449e-05, "loss": 13.6257, "step": 13965 }, { "epoch": 0.5821349672793964, "grad_norm": 392.0, "learning_rate": 3.9214541989671535e-05, "loss": 15.0633, "step": 13966 }, { "epoch": 0.5821766495769247, "grad_norm": 432.0, "learning_rate": 3.9207950982724527e-05, "loss": 16.5004, "step": 13967 }, { "epoch": 0.5822183318744529, "grad_norm": 404.0, "learning_rate": 3.9201360172463556e-05, "loss": 14.6252, "step": 13968 }, { "epoch": 0.5822600141719811, "grad_norm": 404.0, "learning_rate": 3.9194769559008767e-05, "loss": 15.5633, "step": 13969 }, { "epoch": 0.5823016964695094, "grad_norm": 168.0, "learning_rate": 3.9188179142480254e-05, "loss": 11.1253, "step": 13970 }, { "epoch": 0.5823433787670377, "grad_norm": 126.0, "learning_rate": 3.918158892299814e-05, "loss": 9.3753, "step": 13971 }, { "epoch": 0.5823850610645659, "grad_norm": 532.0, "learning_rate": 3.917499890068252e-05, "loss": 13.8791, "step": 13972 }, { "epoch": 0.5824267433620941, "grad_norm": 350.0, "learning_rate": 3.916840907565352e-05, "loss": 14.3752, "step": 13973 }, { "epoch": 0.5824684256596223, "grad_norm": 506.0, "learning_rate": 3.9161819448031213e-05, "loss": 16.3753, "step": 13974 }, { "epoch": 0.5825101079571506, "grad_norm": 201.0, "learning_rate": 3.915523001793573e-05, "loss": 11.5635, "step": 13975 }, { "epoch": 0.5825517902546788, "grad_norm": 804.0, "learning_rate": 3.914864078548711e-05, "loss": 21.3752, "step": 13976 }, { "epoch": 0.5825934725522071, "grad_norm": 412.0, "learning_rate": 3.914205175080551e-05, "loss": 14.6884, "step": 13977 }, { "epoch": 0.5826351548497353, "grad_norm": 366.0, "learning_rate": 3.9135462914010954e-05, "loss": 14.6283, "step": 13978 }, { "epoch": 0.5826768371472636, "grad_norm": 524.0, "learning_rate": 3.912887427522357e-05, "loss": 14.255, "step": 13979 }, { "epoch": 0.5827185194447918, "grad_norm": 528.0, "learning_rate": 3.91222858345634e-05, "loss": 18.8757, "step": 13980 }, { "epoch": 0.58276020174232, "grad_norm": 644.0, "learning_rate": 3.9115697592150556e-05, "loss": 20.5007, "step": 13981 }, { "epoch": 0.5828018840398482, "grad_norm": 213.0, "learning_rate": 3.9109109548105065e-05, "loss": 11.1877, "step": 13982 }, { "epoch": 0.5828435663373766, "grad_norm": 119.5, "learning_rate": 3.9102521702547034e-05, "loss": 9.8132, "step": 13983 }, { "epoch": 0.5828852486349048, "grad_norm": 528.0, "learning_rate": 3.909593405559651e-05, "loss": 18.2514, "step": 13984 }, { "epoch": 0.582926930932433, "grad_norm": 125.5, "learning_rate": 3.908934660737356e-05, "loss": 9.4378, "step": 13985 }, { "epoch": 0.5829686132299612, "grad_norm": 428.0, "learning_rate": 3.9082759357998224e-05, "loss": 15.8762, "step": 13986 }, { "epoch": 0.5830102955274895, "grad_norm": 536.0, "learning_rate": 3.907617230759059e-05, "loss": 18.6255, "step": 13987 }, { "epoch": 0.5830519778250177, "grad_norm": 189.0, "learning_rate": 3.906958545627066e-05, "loss": 11.0627, "step": 13988 }, { "epoch": 0.5830936601225459, "grad_norm": 390.0, "learning_rate": 3.906299880415853e-05, "loss": 14.3134, "step": 13989 }, { "epoch": 0.5831353424200741, "grad_norm": 260.0, "learning_rate": 3.90564123513742e-05, "loss": 11.6258, "step": 13990 }, { "epoch": 0.5831770247176025, "grad_norm": 448.0, "learning_rate": 3.904982609803773e-05, "loss": 16.0003, "step": 13991 }, { "epoch": 0.5832187070151307, "grad_norm": 237.0, "learning_rate": 3.904324004426915e-05, "loss": 12.0007, "step": 13992 }, { "epoch": 0.5832603893126589, "grad_norm": 205.0, "learning_rate": 3.903665419018851e-05, "loss": 11.5628, "step": 13993 }, { "epoch": 0.5833020716101871, "grad_norm": 322.0, "learning_rate": 3.903006853591579e-05, "loss": 13.1253, "step": 13994 }, { "epoch": 0.5833437539077154, "grad_norm": 262.0, "learning_rate": 3.9023483081571065e-05, "loss": 12.7503, "step": 13995 }, { "epoch": 0.5833854362052436, "grad_norm": 616.0, "learning_rate": 3.901689782727433e-05, "loss": 18.1254, "step": 13996 }, { "epoch": 0.5834271185027718, "grad_norm": 1024.0, "learning_rate": 3.9010312773145614e-05, "loss": 27.1268, "step": 13997 }, { "epoch": 0.5834688008003001, "grad_norm": 242.0, "learning_rate": 3.900372791930491e-05, "loss": 11.689, "step": 13998 }, { "epoch": 0.5835104830978284, "grad_norm": 330.0, "learning_rate": 3.899714326587226e-05, "loss": 11.9379, "step": 13999 }, { "epoch": 0.5835521653953566, "grad_norm": 145.0, "learning_rate": 3.8990558812967624e-05, "loss": 12.0629, "step": 14000 }, { "epoch": 0.5835938476928848, "grad_norm": 161.0, "learning_rate": 3.8983974560711065e-05, "loss": 10.8128, "step": 14001 }, { "epoch": 0.5836355299904131, "grad_norm": 302.0, "learning_rate": 3.8977390509222516e-05, "loss": 12.5626, "step": 14002 }, { "epoch": 0.5836772122879413, "grad_norm": 173.0, "learning_rate": 3.897080665862203e-05, "loss": 5.1572, "step": 14003 }, { "epoch": 0.5837188945854695, "grad_norm": 808.0, "learning_rate": 3.8964223009029546e-05, "loss": 20.5001, "step": 14004 }, { "epoch": 0.5837605768829978, "grad_norm": 404.0, "learning_rate": 3.89576395605651e-05, "loss": 15.3753, "step": 14005 }, { "epoch": 0.5838022591805261, "grad_norm": 280.0, "learning_rate": 3.895105631334863e-05, "loss": 13.1252, "step": 14006 }, { "epoch": 0.5838439414780543, "grad_norm": 223.0, "learning_rate": 3.894447326750016e-05, "loss": 11.5626, "step": 14007 }, { "epoch": 0.5838856237755825, "grad_norm": 258.0, "learning_rate": 3.893789042313964e-05, "loss": 11.0003, "step": 14008 }, { "epoch": 0.5839273060731107, "grad_norm": 848.0, "learning_rate": 3.893130778038705e-05, "loss": 20.2504, "step": 14009 }, { "epoch": 0.583968988370639, "grad_norm": 716.0, "learning_rate": 3.8924725339362346e-05, "loss": 21.7507, "step": 14010 }, { "epoch": 0.5840106706681673, "grad_norm": 752.0, "learning_rate": 3.891814310018552e-05, "loss": 23.1252, "step": 14011 }, { "epoch": 0.5840523529656955, "grad_norm": 364.0, "learning_rate": 3.891156106297651e-05, "loss": 15.3762, "step": 14012 }, { "epoch": 0.5840940352632237, "grad_norm": 1112.0, "learning_rate": 3.8904979227855295e-05, "loss": 28.0015, "step": 14013 }, { "epoch": 0.584135717560752, "grad_norm": 140.0, "learning_rate": 3.889839759494181e-05, "loss": 10.3128, "step": 14014 }, { "epoch": 0.5841773998582802, "grad_norm": 266.0, "learning_rate": 3.8891816164356026e-05, "loss": 12.6878, "step": 14015 }, { "epoch": 0.5842190821558084, "grad_norm": 492.0, "learning_rate": 3.8885234936217864e-05, "loss": 16.6255, "step": 14016 }, { "epoch": 0.5842607644533366, "grad_norm": 380.0, "learning_rate": 3.88786539106473e-05, "loss": 14.8751, "step": 14017 }, { "epoch": 0.584302446750865, "grad_norm": 205.0, "learning_rate": 3.887207308776423e-05, "loss": 11.5006, "step": 14018 }, { "epoch": 0.5843441290483932, "grad_norm": 154.0, "learning_rate": 3.886549246768864e-05, "loss": 10.6267, "step": 14019 }, { "epoch": 0.5843858113459214, "grad_norm": 432.0, "learning_rate": 3.885891205054042e-05, "loss": 16.2505, "step": 14020 }, { "epoch": 0.5844274936434496, "grad_norm": 106.5, "learning_rate": 3.8852331836439525e-05, "loss": 8.8754, "step": 14021 }, { "epoch": 0.5844691759409779, "grad_norm": 372.0, "learning_rate": 3.884575182550586e-05, "loss": 11.3779, "step": 14022 }, { "epoch": 0.5845108582385061, "grad_norm": 80.5, "learning_rate": 3.883917201785938e-05, "loss": 9.6258, "step": 14023 }, { "epoch": 0.5845525405360343, "grad_norm": 196.0, "learning_rate": 3.883259241361996e-05, "loss": 11.0002, "step": 14024 }, { "epoch": 0.5845942228335625, "grad_norm": 278.0, "learning_rate": 3.8826013012907555e-05, "loss": 9.626, "step": 14025 }, { "epoch": 0.5846359051310909, "grad_norm": 400.0, "learning_rate": 3.881943381584204e-05, "loss": 14.9378, "step": 14026 }, { "epoch": 0.5846775874286191, "grad_norm": 490.0, "learning_rate": 3.881285482254335e-05, "loss": 15.316, "step": 14027 }, { "epoch": 0.5847192697261473, "grad_norm": 207.0, "learning_rate": 3.880627603313136e-05, "loss": 9.0002, "step": 14028 }, { "epoch": 0.5847609520236755, "grad_norm": 548.0, "learning_rate": 3.8799697447725996e-05, "loss": 18.7501, "step": 14029 }, { "epoch": 0.5848026343212038, "grad_norm": 506.0, "learning_rate": 3.8793119066447124e-05, "loss": 16.1252, "step": 14030 }, { "epoch": 0.584844316618732, "grad_norm": 260.0, "learning_rate": 3.878654088941467e-05, "loss": 12.3126, "step": 14031 }, { "epoch": 0.5848859989162603, "grad_norm": 360.0, "learning_rate": 3.877996291674848e-05, "loss": 14.1876, "step": 14032 }, { "epoch": 0.5849276812137885, "grad_norm": 224.0, "learning_rate": 3.8773385148568484e-05, "loss": 11.7505, "step": 14033 }, { "epoch": 0.5849693635113168, "grad_norm": 368.0, "learning_rate": 3.876680758499453e-05, "loss": 14.2515, "step": 14034 }, { "epoch": 0.585011045808845, "grad_norm": 348.0, "learning_rate": 3.876023022614651e-05, "loss": 14.6881, "step": 14035 }, { "epoch": 0.5850527281063732, "grad_norm": 52.75, "learning_rate": 3.875365307214428e-05, "loss": 7.0006, "step": 14036 }, { "epoch": 0.5850944104039014, "grad_norm": 268.0, "learning_rate": 3.8747076123107736e-05, "loss": 10.8753, "step": 14037 }, { "epoch": 0.5851360927014297, "grad_norm": 412.0, "learning_rate": 3.874049937915671e-05, "loss": 15.0644, "step": 14038 }, { "epoch": 0.585177774998958, "grad_norm": 500.0, "learning_rate": 3.8733922840411113e-05, "loss": 16.1258, "step": 14039 }, { "epoch": 0.5852194572964862, "grad_norm": 394.0, "learning_rate": 3.872734650699075e-05, "loss": 13.9377, "step": 14040 }, { "epoch": 0.5852611395940144, "grad_norm": 217.0, "learning_rate": 3.8720770379015506e-05, "loss": 11.8752, "step": 14041 }, { "epoch": 0.5853028218915427, "grad_norm": 716.0, "learning_rate": 3.871419445660521e-05, "loss": 20.3754, "step": 14042 }, { "epoch": 0.5853445041890709, "grad_norm": 155.0, "learning_rate": 3.870761873987975e-05, "loss": 9.8126, "step": 14043 }, { "epoch": 0.5853861864865991, "grad_norm": 141.0, "learning_rate": 3.8701043228958906e-05, "loss": 10.6255, "step": 14044 }, { "epoch": 0.5854278687841273, "grad_norm": 628.0, "learning_rate": 3.869446792396257e-05, "loss": 18.6258, "step": 14045 }, { "epoch": 0.5854695510816557, "grad_norm": 268.0, "learning_rate": 3.8687892825010556e-05, "loss": 13.4382, "step": 14046 }, { "epoch": 0.5855112333791839, "grad_norm": 202.0, "learning_rate": 3.868131793222271e-05, "loss": 11.0004, "step": 14047 }, { "epoch": 0.5855529156767121, "grad_norm": 171.0, "learning_rate": 3.8674743245718825e-05, "loss": 7.595, "step": 14048 }, { "epoch": 0.5855945979742403, "grad_norm": 184.0, "learning_rate": 3.866816876561878e-05, "loss": 10.0008, "step": 14049 }, { "epoch": 0.5856362802717686, "grad_norm": 348.0, "learning_rate": 3.866159449204233e-05, "loss": 14.3753, "step": 14050 }, { "epoch": 0.5856779625692968, "grad_norm": 226.0, "learning_rate": 3.8655020425109366e-05, "loss": 12.6877, "step": 14051 }, { "epoch": 0.585719644866825, "grad_norm": 58.75, "learning_rate": 3.8648446564939636e-05, "loss": 8.1254, "step": 14052 }, { "epoch": 0.5857613271643533, "grad_norm": 358.0, "learning_rate": 3.864187291165299e-05, "loss": 15.2503, "step": 14053 }, { "epoch": 0.5858030094618816, "grad_norm": 227.0, "learning_rate": 3.86352994653692e-05, "loss": 11.9378, "step": 14054 }, { "epoch": 0.5858446917594098, "grad_norm": 145.0, "learning_rate": 3.8628726226208113e-05, "loss": 10.4378, "step": 14055 }, { "epoch": 0.585886374056938, "grad_norm": 458.0, "learning_rate": 3.862215319428947e-05, "loss": 15.7509, "step": 14056 }, { "epoch": 0.5859280563544662, "grad_norm": 488.0, "learning_rate": 3.861558036973312e-05, "loss": 16.3752, "step": 14057 }, { "epoch": 0.5859697386519945, "grad_norm": 258.0, "learning_rate": 3.860900775265881e-05, "loss": 12.3127, "step": 14058 }, { "epoch": 0.5860114209495227, "grad_norm": 404.0, "learning_rate": 3.860243534318635e-05, "loss": 16.6266, "step": 14059 }, { "epoch": 0.586053103247051, "grad_norm": 540.0, "learning_rate": 3.8595863141435515e-05, "loss": 16.1269, "step": 14060 }, { "epoch": 0.5860947855445792, "grad_norm": 536.0, "learning_rate": 3.858929114752611e-05, "loss": 18.3752, "step": 14061 }, { "epoch": 0.5861364678421075, "grad_norm": 536.0, "learning_rate": 3.858271936157785e-05, "loss": 17.5004, "step": 14062 }, { "epoch": 0.5861781501396357, "grad_norm": 175.0, "learning_rate": 3.8576147783710573e-05, "loss": 12.0629, "step": 14063 }, { "epoch": 0.5862198324371639, "grad_norm": 392.0, "learning_rate": 3.8569576414044e-05, "loss": 15.6877, "step": 14064 }, { "epoch": 0.5862615147346921, "grad_norm": 172.0, "learning_rate": 3.8563005252697925e-05, "loss": 8.1883, "step": 14065 }, { "epoch": 0.5863031970322204, "grad_norm": 370.0, "learning_rate": 3.8556434299792074e-05, "loss": 14.9382, "step": 14066 }, { "epoch": 0.5863448793297487, "grad_norm": 91.5, "learning_rate": 3.8549863555446253e-05, "loss": 8.5633, "step": 14067 }, { "epoch": 0.5863865616272769, "grad_norm": 908.0, "learning_rate": 3.854329301978016e-05, "loss": 23.6253, "step": 14068 }, { "epoch": 0.5864282439248051, "grad_norm": 89.5, "learning_rate": 3.853672269291359e-05, "loss": 8.7514, "step": 14069 }, { "epoch": 0.5864699262223334, "grad_norm": 198.0, "learning_rate": 3.8530152574966244e-05, "loss": 11.1255, "step": 14070 }, { "epoch": 0.5865116085198616, "grad_norm": 151.0, "learning_rate": 3.8523582666057896e-05, "loss": 10.1253, "step": 14071 }, { "epoch": 0.5865532908173898, "grad_norm": 180.0, "learning_rate": 3.8517012966308275e-05, "loss": 11.2505, "step": 14072 }, { "epoch": 0.586594973114918, "grad_norm": 840.0, "learning_rate": 3.85104434758371e-05, "loss": 23.7511, "step": 14073 }, { "epoch": 0.5866366554124464, "grad_norm": 338.0, "learning_rate": 3.850387419476412e-05, "loss": 13.938, "step": 14074 }, { "epoch": 0.5866783377099746, "grad_norm": 282.0, "learning_rate": 3.849730512320906e-05, "loss": 14.0633, "step": 14075 }, { "epoch": 0.5867200200075028, "grad_norm": 223.0, "learning_rate": 3.8490736261291625e-05, "loss": 11.3129, "step": 14076 }, { "epoch": 0.5867617023050311, "grad_norm": 336.0, "learning_rate": 3.848416760913156e-05, "loss": 14.5628, "step": 14077 }, { "epoch": 0.5868033846025593, "grad_norm": 324.0, "learning_rate": 3.8477599166848546e-05, "loss": 12.442, "step": 14078 }, { "epoch": 0.5868450669000875, "grad_norm": 199.0, "learning_rate": 3.8471030934562324e-05, "loss": 10.5006, "step": 14079 }, { "epoch": 0.5868867491976157, "grad_norm": 179.0, "learning_rate": 3.846446291239257e-05, "loss": 10.9378, "step": 14080 }, { "epoch": 0.5869284314951441, "grad_norm": 69.5, "learning_rate": 3.845789510045902e-05, "loss": 8.3137, "step": 14081 }, { "epoch": 0.5869701137926723, "grad_norm": 96.5, "learning_rate": 3.845132749888134e-05, "loss": 8.7503, "step": 14082 }, { "epoch": 0.5870117960902005, "grad_norm": 748.0, "learning_rate": 3.844476010777925e-05, "loss": 22.6256, "step": 14083 }, { "epoch": 0.5870534783877287, "grad_norm": 346.0, "learning_rate": 3.843819292727243e-05, "loss": 14.2541, "step": 14084 }, { "epoch": 0.587095160685257, "grad_norm": 632.0, "learning_rate": 3.843162595748058e-05, "loss": 19.1251, "step": 14085 }, { "epoch": 0.5871368429827852, "grad_norm": 117.5, "learning_rate": 3.842505919852335e-05, "loss": 10.4378, "step": 14086 }, { "epoch": 0.5871785252803134, "grad_norm": 195.0, "learning_rate": 3.841849265052048e-05, "loss": 11.5003, "step": 14087 }, { "epoch": 0.5872202075778417, "grad_norm": 280.0, "learning_rate": 3.8411926313591575e-05, "loss": 14.3754, "step": 14088 }, { "epoch": 0.58726188987537, "grad_norm": 376.0, "learning_rate": 3.8405360187856363e-05, "loss": 14.5003, "step": 14089 }, { "epoch": 0.5873035721728982, "grad_norm": 636.0, "learning_rate": 3.839879427343448e-05, "loss": 19.7504, "step": 14090 }, { "epoch": 0.5873452544704264, "grad_norm": 278.0, "learning_rate": 3.839222857044561e-05, "loss": 12.7502, "step": 14091 }, { "epoch": 0.5873869367679546, "grad_norm": 223.0, "learning_rate": 3.838566307900939e-05, "loss": 11.1257, "step": 14092 }, { "epoch": 0.5874286190654829, "grad_norm": 235.0, "learning_rate": 3.8379097799245515e-05, "loss": 13.0627, "step": 14093 }, { "epoch": 0.5874703013630111, "grad_norm": 280.0, "learning_rate": 3.837253273127359e-05, "loss": 11.7502, "step": 14094 }, { "epoch": 0.5875119836605394, "grad_norm": 960.0, "learning_rate": 3.836596787521331e-05, "loss": 25.0003, "step": 14095 }, { "epoch": 0.5875536659580676, "grad_norm": 544.0, "learning_rate": 3.835940323118428e-05, "loss": 18.3752, "step": 14096 }, { "epoch": 0.5875953482555959, "grad_norm": 233.0, "learning_rate": 3.8352838799306165e-05, "loss": 11.0008, "step": 14097 }, { "epoch": 0.5876370305531241, "grad_norm": 442.0, "learning_rate": 3.834627457969859e-05, "loss": 14.9388, "step": 14098 }, { "epoch": 0.5876787128506523, "grad_norm": 209.0, "learning_rate": 3.833971057248122e-05, "loss": 9.5002, "step": 14099 }, { "epoch": 0.5877203951481805, "grad_norm": 356.0, "learning_rate": 3.833314677777363e-05, "loss": 13.5002, "step": 14100 }, { "epoch": 0.5877620774457089, "grad_norm": 76.5, "learning_rate": 3.8326583195695504e-05, "loss": 8.3755, "step": 14101 }, { "epoch": 0.5878037597432371, "grad_norm": 1024.0, "learning_rate": 3.832001982636641e-05, "loss": 21.8789, "step": 14102 }, { "epoch": 0.5878454420407653, "grad_norm": 242.0, "learning_rate": 3.8313456669906016e-05, "loss": 11.6877, "step": 14103 }, { "epoch": 0.5878871243382935, "grad_norm": 308.0, "learning_rate": 3.830689372643389e-05, "loss": 13.5631, "step": 14104 }, { "epoch": 0.5879288066358218, "grad_norm": 608.0, "learning_rate": 3.830033099606968e-05, "loss": 18.1252, "step": 14105 }, { "epoch": 0.58797048893335, "grad_norm": 227.0, "learning_rate": 3.829376847893296e-05, "loss": 11.0006, "step": 14106 }, { "epoch": 0.5880121712308782, "grad_norm": 237.0, "learning_rate": 3.828720617514337e-05, "loss": 12.4377, "step": 14107 }, { "epoch": 0.5880538535284064, "grad_norm": 560.0, "learning_rate": 3.828064408482046e-05, "loss": 18.8752, "step": 14108 }, { "epoch": 0.5880955358259348, "grad_norm": 456.0, "learning_rate": 3.827408220808387e-05, "loss": 15.1285, "step": 14109 }, { "epoch": 0.588137218123463, "grad_norm": 318.0, "learning_rate": 3.826752054505317e-05, "loss": 13.7505, "step": 14110 }, { "epoch": 0.5881789004209912, "grad_norm": 1424.0, "learning_rate": 3.826095909584795e-05, "loss": 25.5047, "step": 14111 }, { "epoch": 0.5882205827185194, "grad_norm": 438.0, "learning_rate": 3.825439786058778e-05, "loss": 16.5002, "step": 14112 }, { "epoch": 0.5882622650160477, "grad_norm": 404.0, "learning_rate": 3.824783683939228e-05, "loss": 14.6252, "step": 14113 }, { "epoch": 0.5883039473135759, "grad_norm": 464.0, "learning_rate": 3.824127603238096e-05, "loss": 17.5006, "step": 14114 }, { "epoch": 0.5883456296111041, "grad_norm": 528.0, "learning_rate": 3.823471543967346e-05, "loss": 17.6252, "step": 14115 }, { "epoch": 0.5883873119086324, "grad_norm": 1456.0, "learning_rate": 3.82281550613893e-05, "loss": 29.0037, "step": 14116 }, { "epoch": 0.5884289942061607, "grad_norm": 128.0, "learning_rate": 3.822159489764807e-05, "loss": 7.3442, "step": 14117 }, { "epoch": 0.5884706765036889, "grad_norm": 704.0, "learning_rate": 3.82150349485693e-05, "loss": 17.3799, "step": 14118 }, { "epoch": 0.5885123588012171, "grad_norm": 209.0, "learning_rate": 3.8208475214272586e-05, "loss": 11.1878, "step": 14119 }, { "epoch": 0.5885540410987453, "grad_norm": 231.0, "learning_rate": 3.8201915694877436e-05, "loss": 9.8129, "step": 14120 }, { "epoch": 0.5885957233962736, "grad_norm": 1576.0, "learning_rate": 3.8195356390503436e-05, "loss": 35.2504, "step": 14121 }, { "epoch": 0.5886374056938019, "grad_norm": 688.0, "learning_rate": 3.8188797301270105e-05, "loss": 19.8789, "step": 14122 }, { "epoch": 0.5886790879913301, "grad_norm": 95.0, "learning_rate": 3.8182238427297004e-05, "loss": 9.6886, "step": 14123 }, { "epoch": 0.5887207702888583, "grad_norm": 189.0, "learning_rate": 3.817567976870363e-05, "loss": 9.6256, "step": 14124 }, { "epoch": 0.5887624525863866, "grad_norm": 151.0, "learning_rate": 3.816912132560957e-05, "loss": 10.2503, "step": 14125 }, { "epoch": 0.5888041348839148, "grad_norm": 556.0, "learning_rate": 3.816256309813431e-05, "loss": 15.5628, "step": 14126 }, { "epoch": 0.588845817181443, "grad_norm": 720.0, "learning_rate": 3.815600508639741e-05, "loss": 19.1281, "step": 14127 }, { "epoch": 0.5888874994789712, "grad_norm": 181.0, "learning_rate": 3.8149447290518335e-05, "loss": 11.0627, "step": 14128 }, { "epoch": 0.5889291817764996, "grad_norm": 155.0, "learning_rate": 3.8142889710616665e-05, "loss": 10.1881, "step": 14129 }, { "epoch": 0.5889708640740278, "grad_norm": 436.0, "learning_rate": 3.813633234681186e-05, "loss": 15.1264, "step": 14130 }, { "epoch": 0.589012546371556, "grad_norm": 310.0, "learning_rate": 3.8129775199223464e-05, "loss": 12.1883, "step": 14131 }, { "epoch": 0.5890542286690842, "grad_norm": 438.0, "learning_rate": 3.812321826797096e-05, "loss": 15.3752, "step": 14132 }, { "epoch": 0.5890959109666125, "grad_norm": 255.0, "learning_rate": 3.811666155317386e-05, "loss": 12.7503, "step": 14133 }, { "epoch": 0.5891375932641407, "grad_norm": 326.0, "learning_rate": 3.811010505495166e-05, "loss": 14.5004, "step": 14134 }, { "epoch": 0.5891792755616689, "grad_norm": 410.0, "learning_rate": 3.810354877342385e-05, "loss": 15.8752, "step": 14135 }, { "epoch": 0.5892209578591971, "grad_norm": 392.0, "learning_rate": 3.8096992708709915e-05, "loss": 15.3127, "step": 14136 }, { "epoch": 0.5892626401567255, "grad_norm": 107.5, "learning_rate": 3.8090436860929365e-05, "loss": 7.2509, "step": 14137 }, { "epoch": 0.5893043224542537, "grad_norm": 228.0, "learning_rate": 3.808388123020163e-05, "loss": 12.1253, "step": 14138 }, { "epoch": 0.5893460047517819, "grad_norm": 508.0, "learning_rate": 3.807732581664625e-05, "loss": 16.8754, "step": 14139 }, { "epoch": 0.5893876870493101, "grad_norm": 676.0, "learning_rate": 3.807077062038264e-05, "loss": 22.1252, "step": 14140 }, { "epoch": 0.5894293693468384, "grad_norm": 544.0, "learning_rate": 3.806421564153031e-05, "loss": 17.8752, "step": 14141 }, { "epoch": 0.5894710516443666, "grad_norm": 394.0, "learning_rate": 3.80576608802087e-05, "loss": 15.8752, "step": 14142 }, { "epoch": 0.5895127339418949, "grad_norm": 173.0, "learning_rate": 3.805110633653729e-05, "loss": 10.6253, "step": 14143 }, { "epoch": 0.5895544162394231, "grad_norm": 470.0, "learning_rate": 3.804455201063551e-05, "loss": 15.4385, "step": 14144 }, { "epoch": 0.5895960985369514, "grad_norm": 133.0, "learning_rate": 3.8037997902622855e-05, "loss": 9.6884, "step": 14145 }, { "epoch": 0.5896377808344796, "grad_norm": 242.0, "learning_rate": 3.803144401261872e-05, "loss": 12.0006, "step": 14146 }, { "epoch": 0.5896794631320078, "grad_norm": 454.0, "learning_rate": 3.80248903407426e-05, "loss": 14.8752, "step": 14147 }, { "epoch": 0.5897211454295361, "grad_norm": 316.0, "learning_rate": 3.801833688711391e-05, "loss": 14.942, "step": 14148 }, { "epoch": 0.5897628277270643, "grad_norm": 564.0, "learning_rate": 3.801178365185211e-05, "loss": 18.3752, "step": 14149 }, { "epoch": 0.5898045100245926, "grad_norm": 228.0, "learning_rate": 3.8005230635076595e-05, "loss": 11.0627, "step": 14150 }, { "epoch": 0.5898461923221208, "grad_norm": 1216.0, "learning_rate": 3.799867783690684e-05, "loss": 28.3781, "step": 14151 }, { "epoch": 0.5898878746196491, "grad_norm": 233.0, "learning_rate": 3.799212525746222e-05, "loss": 12.063, "step": 14152 }, { "epoch": 0.5899295569171773, "grad_norm": 344.0, "learning_rate": 3.798557289686221e-05, "loss": 13.6261, "step": 14153 }, { "epoch": 0.5899712392147055, "grad_norm": 346.0, "learning_rate": 3.7979020755226175e-05, "loss": 13.1256, "step": 14154 }, { "epoch": 0.5900129215122337, "grad_norm": 296.0, "learning_rate": 3.797246883267357e-05, "loss": 11.8128, "step": 14155 }, { "epoch": 0.590054603809762, "grad_norm": 255.0, "learning_rate": 3.796591712932378e-05, "loss": 12.8127, "step": 14156 }, { "epoch": 0.5900962861072903, "grad_norm": 450.0, "learning_rate": 3.7959365645296234e-05, "loss": 15.1302, "step": 14157 }, { "epoch": 0.5901379684048185, "grad_norm": 146.0, "learning_rate": 3.79528143807103e-05, "loss": 9.6254, "step": 14158 }, { "epoch": 0.5901796507023467, "grad_norm": 740.0, "learning_rate": 3.7946263335685396e-05, "loss": 17.8783, "step": 14159 }, { "epoch": 0.590221332999875, "grad_norm": 251.0, "learning_rate": 3.793971251034092e-05, "loss": 13.188, "step": 14160 }, { "epoch": 0.5902630152974032, "grad_norm": 232.0, "learning_rate": 3.793316190479625e-05, "loss": 9.438, "step": 14161 }, { "epoch": 0.5903046975949314, "grad_norm": 856.0, "learning_rate": 3.7926611519170766e-05, "loss": 22.3753, "step": 14162 }, { "epoch": 0.5903463798924596, "grad_norm": 796.0, "learning_rate": 3.792006135358388e-05, "loss": 22.7502, "step": 14163 }, { "epoch": 0.590388062189988, "grad_norm": 390.0, "learning_rate": 3.791351140815493e-05, "loss": 13.0009, "step": 14164 }, { "epoch": 0.5904297444875162, "grad_norm": 772.0, "learning_rate": 3.790696168300333e-05, "loss": 23.1252, "step": 14165 }, { "epoch": 0.5904714267850444, "grad_norm": 410.0, "learning_rate": 3.79004121782484e-05, "loss": 14.8753, "step": 14166 }, { "epoch": 0.5905131090825726, "grad_norm": 222.0, "learning_rate": 3.789386289400955e-05, "loss": 12.5628, "step": 14167 }, { "epoch": 0.5905547913801009, "grad_norm": 438.0, "learning_rate": 3.788731383040611e-05, "loss": 17.5007, "step": 14168 }, { "epoch": 0.5905964736776291, "grad_norm": 206.0, "learning_rate": 3.788076498755747e-05, "loss": 11.0003, "step": 14169 }, { "epoch": 0.5906381559751573, "grad_norm": 332.0, "learning_rate": 3.7874216365582945e-05, "loss": 13.6877, "step": 14170 }, { "epoch": 0.5906798382726856, "grad_norm": 246.0, "learning_rate": 3.786766796460191e-05, "loss": 11.8126, "step": 14171 }, { "epoch": 0.5907215205702139, "grad_norm": 396.0, "learning_rate": 3.78611197847337e-05, "loss": 14.5011, "step": 14172 }, { "epoch": 0.5907632028677421, "grad_norm": 118.0, "learning_rate": 3.785457182609767e-05, "loss": 6.7506, "step": 14173 }, { "epoch": 0.5908048851652703, "grad_norm": 73.0, "learning_rate": 3.7848024088813125e-05, "loss": 7.2504, "step": 14174 }, { "epoch": 0.5908465674627985, "grad_norm": 836.0, "learning_rate": 3.784147657299945e-05, "loss": 23.3753, "step": 14175 }, { "epoch": 0.5908882497603268, "grad_norm": 368.0, "learning_rate": 3.7834929278775916e-05, "loss": 15.5004, "step": 14176 }, { "epoch": 0.590929932057855, "grad_norm": 384.0, "learning_rate": 3.7828382206261904e-05, "loss": 15.0002, "step": 14177 }, { "epoch": 0.5909716143553833, "grad_norm": 502.0, "learning_rate": 3.782183535557668e-05, "loss": 16.8755, "step": 14178 }, { "epoch": 0.5910132966529115, "grad_norm": 592.0, "learning_rate": 3.7815288726839606e-05, "loss": 17.6254, "step": 14179 }, { "epoch": 0.5910549789504398, "grad_norm": 262.0, "learning_rate": 3.780874232016996e-05, "loss": 11.1879, "step": 14180 }, { "epoch": 0.591096661247968, "grad_norm": 302.0, "learning_rate": 3.780219613568709e-05, "loss": 12.7503, "step": 14181 }, { "epoch": 0.5911383435454962, "grad_norm": 312.0, "learning_rate": 3.779565017351026e-05, "loss": 11.5032, "step": 14182 }, { "epoch": 0.5911800258430244, "grad_norm": 612.0, "learning_rate": 3.7789104433758794e-05, "loss": 20.3754, "step": 14183 }, { "epoch": 0.5912217081405527, "grad_norm": 308.0, "learning_rate": 3.778255891655198e-05, "loss": 13.9378, "step": 14184 }, { "epoch": 0.591263390438081, "grad_norm": 372.0, "learning_rate": 3.777601362200912e-05, "loss": 15.313, "step": 14185 }, { "epoch": 0.5913050727356092, "grad_norm": 880.0, "learning_rate": 3.7769468550249486e-05, "loss": 22.2502, "step": 14186 }, { "epoch": 0.5913467550331374, "grad_norm": 141.0, "learning_rate": 3.7762923701392395e-05, "loss": 11.6256, "step": 14187 }, { "epoch": 0.5913884373306657, "grad_norm": 450.0, "learning_rate": 3.775637907555708e-05, "loss": 16.876, "step": 14188 }, { "epoch": 0.5914301196281939, "grad_norm": 47.5, "learning_rate": 3.774983467286287e-05, "loss": 7.4065, "step": 14189 }, { "epoch": 0.5914718019257221, "grad_norm": 410.0, "learning_rate": 3.774329049342898e-05, "loss": 15.2502, "step": 14190 }, { "epoch": 0.5915134842232503, "grad_norm": 288.0, "learning_rate": 3.7736746537374744e-05, "loss": 14.1252, "step": 14191 }, { "epoch": 0.5915551665207787, "grad_norm": 104.5, "learning_rate": 3.773020280481936e-05, "loss": 8.2505, "step": 14192 }, { "epoch": 0.5915968488183069, "grad_norm": 243.0, "learning_rate": 3.772365929588215e-05, "loss": 11.7507, "step": 14193 }, { "epoch": 0.5916385311158351, "grad_norm": 390.0, "learning_rate": 3.771711601068231e-05, "loss": 13.1289, "step": 14194 }, { "epoch": 0.5916802134133633, "grad_norm": 61.5, "learning_rate": 3.771057294933914e-05, "loss": 8.5004, "step": 14195 }, { "epoch": 0.5917218957108916, "grad_norm": 412.0, "learning_rate": 3.770403011197185e-05, "loss": 14.1876, "step": 14196 }, { "epoch": 0.5917635780084198, "grad_norm": 156.0, "learning_rate": 3.769748749869972e-05, "loss": 8.2513, "step": 14197 }, { "epoch": 0.591805260305948, "grad_norm": 552.0, "learning_rate": 3.769094510964196e-05, "loss": 16.1256, "step": 14198 }, { "epoch": 0.5918469426034763, "grad_norm": 278.0, "learning_rate": 3.768440294491783e-05, "loss": 11.69, "step": 14199 }, { "epoch": 0.5918886249010046, "grad_norm": 126.0, "learning_rate": 3.767786100464653e-05, "loss": 8.7508, "step": 14200 }, { "epoch": 0.5919303071985328, "grad_norm": 73.5, "learning_rate": 3.767131928894734e-05, "loss": 9.2503, "step": 14201 }, { "epoch": 0.591971989496061, "grad_norm": 386.0, "learning_rate": 3.766477779793942e-05, "loss": 13.0008, "step": 14202 }, { "epoch": 0.5920136717935892, "grad_norm": 352.0, "learning_rate": 3.765823653174204e-05, "loss": 15.2502, "step": 14203 }, { "epoch": 0.5920553540911175, "grad_norm": 1400.0, "learning_rate": 3.765169549047438e-05, "loss": 32.2503, "step": 14204 }, { "epoch": 0.5920970363886457, "grad_norm": 572.0, "learning_rate": 3.764515467425568e-05, "loss": 18.7502, "step": 14205 }, { "epoch": 0.592138718686174, "grad_norm": 632.0, "learning_rate": 3.763861408320512e-05, "loss": 18.5004, "step": 14206 }, { "epoch": 0.5921804009837022, "grad_norm": 1008.0, "learning_rate": 3.763207371744193e-05, "loss": 25.8788, "step": 14207 }, { "epoch": 0.5922220832812305, "grad_norm": 61.25, "learning_rate": 3.762553357708527e-05, "loss": 7.0939, "step": 14208 }, { "epoch": 0.5922637655787587, "grad_norm": 171.0, "learning_rate": 3.761899366225438e-05, "loss": 10.8757, "step": 14209 }, { "epoch": 0.5923054478762869, "grad_norm": 402.0, "learning_rate": 3.761245397306842e-05, "loss": 16.5002, "step": 14210 }, { "epoch": 0.5923471301738151, "grad_norm": 183.0, "learning_rate": 3.760591450964659e-05, "loss": 11.2505, "step": 14211 }, { "epoch": 0.5923888124713435, "grad_norm": 732.0, "learning_rate": 3.759937527210806e-05, "loss": 21.3753, "step": 14212 }, { "epoch": 0.5924304947688717, "grad_norm": 151.0, "learning_rate": 3.7592836260572036e-05, "loss": 10.4378, "step": 14213 }, { "epoch": 0.5924721770663999, "grad_norm": 138.0, "learning_rate": 3.758629747515765e-05, "loss": 10.4382, "step": 14214 }, { "epoch": 0.5925138593639281, "grad_norm": 88.0, "learning_rate": 3.757975891598412e-05, "loss": 7.6564, "step": 14215 }, { "epoch": 0.5925555416614564, "grad_norm": 588.0, "learning_rate": 3.757322058317056e-05, "loss": 18.6254, "step": 14216 }, { "epoch": 0.5925972239589846, "grad_norm": 1064.0, "learning_rate": 3.756668247683618e-05, "loss": 25.3755, "step": 14217 }, { "epoch": 0.5926389062565128, "grad_norm": 528.0, "learning_rate": 3.7560144597100094e-05, "loss": 17.6251, "step": 14218 }, { "epoch": 0.592680588554041, "grad_norm": 376.0, "learning_rate": 3.75536069440815e-05, "loss": 12.8139, "step": 14219 }, { "epoch": 0.5927222708515694, "grad_norm": 502.0, "learning_rate": 3.7547069517899505e-05, "loss": 17.6252, "step": 14220 }, { "epoch": 0.5927639531490976, "grad_norm": 103.5, "learning_rate": 3.754053231867328e-05, "loss": 9.3764, "step": 14221 }, { "epoch": 0.5928056354466258, "grad_norm": 167.0, "learning_rate": 3.753399534652197e-05, "loss": 9.1253, "step": 14222 }, { "epoch": 0.5928473177441541, "grad_norm": 520.0, "learning_rate": 3.7527458601564684e-05, "loss": 18.2502, "step": 14223 }, { "epoch": 0.5928890000416823, "grad_norm": 520.0, "learning_rate": 3.752092208392057e-05, "loss": 16.0004, "step": 14224 }, { "epoch": 0.5929306823392105, "grad_norm": 592.0, "learning_rate": 3.751438579370878e-05, "loss": 21.0003, "step": 14225 }, { "epoch": 0.5929723646367387, "grad_norm": 342.0, "learning_rate": 3.75078497310484e-05, "loss": 11.6253, "step": 14226 }, { "epoch": 0.5930140469342671, "grad_norm": 197.0, "learning_rate": 3.7501313896058586e-05, "loss": 12.0002, "step": 14227 }, { "epoch": 0.5930557292317953, "grad_norm": 484.0, "learning_rate": 3.7494778288858413e-05, "loss": 18.0005, "step": 14228 }, { "epoch": 0.5930974115293235, "grad_norm": 71.0, "learning_rate": 3.748824290956704e-05, "loss": 8.1252, "step": 14229 }, { "epoch": 0.5931390938268517, "grad_norm": 1216.0, "learning_rate": 3.748170775830353e-05, "loss": 32.7506, "step": 14230 }, { "epoch": 0.59318077612438, "grad_norm": 90.5, "learning_rate": 3.747517283518702e-05, "loss": 8.9386, "step": 14231 }, { "epoch": 0.5932224584219082, "grad_norm": 352.0, "learning_rate": 3.746863814033659e-05, "loss": 14.0652, "step": 14232 }, { "epoch": 0.5932641407194365, "grad_norm": 580.0, "learning_rate": 3.746210367387135e-05, "loss": 19.5006, "step": 14233 }, { "epoch": 0.5933058230169647, "grad_norm": 406.0, "learning_rate": 3.745556943591037e-05, "loss": 17.2505, "step": 14234 }, { "epoch": 0.593347505314493, "grad_norm": 628.0, "learning_rate": 3.744903542657276e-05, "loss": 19.1251, "step": 14235 }, { "epoch": 0.5933891876120212, "grad_norm": 147.0, "learning_rate": 3.744250164597759e-05, "loss": 10.3127, "step": 14236 }, { "epoch": 0.5934308699095494, "grad_norm": 242.0, "learning_rate": 3.7435968094243946e-05, "loss": 12.7504, "step": 14237 }, { "epoch": 0.5934725522070776, "grad_norm": 243.0, "learning_rate": 3.742943477149089e-05, "loss": 9.5002, "step": 14238 }, { "epoch": 0.5935142345046059, "grad_norm": 314.0, "learning_rate": 3.742290167783752e-05, "loss": 13.2504, "step": 14239 }, { "epoch": 0.5935559168021342, "grad_norm": 390.0, "learning_rate": 3.7416368813402855e-05, "loss": 15.3126, "step": 14240 }, { "epoch": 0.5935975990996624, "grad_norm": 354.0, "learning_rate": 3.740983617830602e-05, "loss": 14.2502, "step": 14241 }, { "epoch": 0.5936392813971906, "grad_norm": 168.0, "learning_rate": 3.740330377266601e-05, "loss": 9.1878, "step": 14242 }, { "epoch": 0.5936809636947189, "grad_norm": 1272.0, "learning_rate": 3.7396771596601925e-05, "loss": 28.754, "step": 14243 }, { "epoch": 0.5937226459922471, "grad_norm": 274.0, "learning_rate": 3.7390239650232784e-05, "loss": 13.8753, "step": 14244 }, { "epoch": 0.5937643282897753, "grad_norm": 498.0, "learning_rate": 3.738370793367766e-05, "loss": 17.6254, "step": 14245 }, { "epoch": 0.5938060105873035, "grad_norm": 174.0, "learning_rate": 3.737717644705556e-05, "loss": 11.6878, "step": 14246 }, { "epoch": 0.5938476928848319, "grad_norm": 548.0, "learning_rate": 3.737064519048554e-05, "loss": 17.2504, "step": 14247 }, { "epoch": 0.5938893751823601, "grad_norm": 264.0, "learning_rate": 3.736411416408665e-05, "loss": 13.1881, "step": 14248 }, { "epoch": 0.5939310574798883, "grad_norm": 456.0, "learning_rate": 3.735758336797789e-05, "loss": 15.8751, "step": 14249 }, { "epoch": 0.5939727397774165, "grad_norm": 169.0, "learning_rate": 3.7351052802278285e-05, "loss": 11.3133, "step": 14250 }, { "epoch": 0.5940144220749448, "grad_norm": 104.5, "learning_rate": 3.734452246710689e-05, "loss": 10.5003, "step": 14251 }, { "epoch": 0.594056104372473, "grad_norm": 296.0, "learning_rate": 3.733799236258268e-05, "loss": 12.8127, "step": 14252 }, { "epoch": 0.5940977866700012, "grad_norm": 604.0, "learning_rate": 3.7331462488824695e-05, "loss": 18.6259, "step": 14253 }, { "epoch": 0.5941394689675294, "grad_norm": 201.0, "learning_rate": 3.7324932845951916e-05, "loss": 11.2506, "step": 14254 }, { "epoch": 0.5941811512650578, "grad_norm": 436.0, "learning_rate": 3.731840343408338e-05, "loss": 16.6256, "step": 14255 }, { "epoch": 0.594222833562586, "grad_norm": 354.0, "learning_rate": 3.731187425333804e-05, "loss": 11.5628, "step": 14256 }, { "epoch": 0.5942645158601142, "grad_norm": 181.0, "learning_rate": 3.7305345303834946e-05, "loss": 11.3131, "step": 14257 }, { "epoch": 0.5943061981576424, "grad_norm": 242.0, "learning_rate": 3.729881658569304e-05, "loss": 11.1252, "step": 14258 }, { "epoch": 0.5943478804551707, "grad_norm": 342.0, "learning_rate": 3.729228809903134e-05, "loss": 15.6883, "step": 14259 }, { "epoch": 0.5943895627526989, "grad_norm": 145.0, "learning_rate": 3.728575984396882e-05, "loss": 9.1878, "step": 14260 }, { "epoch": 0.5944312450502272, "grad_norm": 336.0, "learning_rate": 3.727923182062445e-05, "loss": 14.0001, "step": 14261 }, { "epoch": 0.5944729273477554, "grad_norm": 406.0, "learning_rate": 3.72727040291172e-05, "loss": 15.939, "step": 14262 }, { "epoch": 0.5945146096452837, "grad_norm": 370.0, "learning_rate": 3.726617646956607e-05, "loss": 13.4378, "step": 14263 }, { "epoch": 0.5945562919428119, "grad_norm": 254.0, "learning_rate": 3.725964914208998e-05, "loss": 12.3127, "step": 14264 }, { "epoch": 0.5945979742403401, "grad_norm": 229.0, "learning_rate": 3.725312204680794e-05, "loss": 11.8759, "step": 14265 }, { "epoch": 0.5946396565378683, "grad_norm": 640.0, "learning_rate": 3.724659518383886e-05, "loss": 20.0006, "step": 14266 }, { "epoch": 0.5946813388353966, "grad_norm": 161.0, "learning_rate": 3.7240068553301744e-05, "loss": 9.8752, "step": 14267 }, { "epoch": 0.5947230211329249, "grad_norm": 184.0, "learning_rate": 3.723354215531548e-05, "loss": 11.5002, "step": 14268 }, { "epoch": 0.5947647034304531, "grad_norm": 216.0, "learning_rate": 3.722701598999907e-05, "loss": 11.6877, "step": 14269 }, { "epoch": 0.5948063857279813, "grad_norm": 193.0, "learning_rate": 3.72204900574714e-05, "loss": 10.1253, "step": 14270 }, { "epoch": 0.5948480680255096, "grad_norm": 390.0, "learning_rate": 3.721396435785146e-05, "loss": 15.0009, "step": 14271 }, { "epoch": 0.5948897503230378, "grad_norm": 684.0, "learning_rate": 3.720743889125813e-05, "loss": 18.8807, "step": 14272 }, { "epoch": 0.594931432620566, "grad_norm": 132.0, "learning_rate": 3.7200913657810375e-05, "loss": 10.0007, "step": 14273 }, { "epoch": 0.5949731149180942, "grad_norm": 1608.0, "learning_rate": 3.71943886576271e-05, "loss": 31.5092, "step": 14274 }, { "epoch": 0.5950147972156226, "grad_norm": 219.0, "learning_rate": 3.718786389082724e-05, "loss": 12.1877, "step": 14275 }, { "epoch": 0.5950564795131508, "grad_norm": 454.0, "learning_rate": 3.718133935752968e-05, "loss": 13.5004, "step": 14276 }, { "epoch": 0.595098161810679, "grad_norm": 145.0, "learning_rate": 3.7174815057853376e-05, "loss": 10.1252, "step": 14277 }, { "epoch": 0.5951398441082072, "grad_norm": 340.0, "learning_rate": 3.7168290991917174e-05, "loss": 13.0004, "step": 14278 }, { "epoch": 0.5951815264057355, "grad_norm": 428.0, "learning_rate": 3.7161767159840034e-05, "loss": 15.3753, "step": 14279 }, { "epoch": 0.5952232087032637, "grad_norm": 576.0, "learning_rate": 3.71552435617408e-05, "loss": 18.7505, "step": 14280 }, { "epoch": 0.5952648910007919, "grad_norm": 512.0, "learning_rate": 3.7148720197738426e-05, "loss": 17.3751, "step": 14281 }, { "epoch": 0.5953065732983202, "grad_norm": 141.0, "learning_rate": 3.714219706795173e-05, "loss": 9.2503, "step": 14282 }, { "epoch": 0.5953482555958485, "grad_norm": 508.0, "learning_rate": 3.713567417249967e-05, "loss": 14.5036, "step": 14283 }, { "epoch": 0.5953899378933767, "grad_norm": 446.0, "learning_rate": 3.712915151150106e-05, "loss": 15.5004, "step": 14284 }, { "epoch": 0.5954316201909049, "grad_norm": 105.5, "learning_rate": 3.7122629085074826e-05, "loss": 6.8129, "step": 14285 }, { "epoch": 0.5954733024884331, "grad_norm": 580.0, "learning_rate": 3.711610689333982e-05, "loss": 19.6251, "step": 14286 }, { "epoch": 0.5955149847859614, "grad_norm": 306.0, "learning_rate": 3.71095849364149e-05, "loss": 13.5003, "step": 14287 }, { "epoch": 0.5955566670834896, "grad_norm": 304.0, "learning_rate": 3.710306321441893e-05, "loss": 11.814, "step": 14288 }, { "epoch": 0.5955983493810179, "grad_norm": 1048.0, "learning_rate": 3.70965417274708e-05, "loss": 26.0001, "step": 14289 }, { "epoch": 0.5956400316785461, "grad_norm": 112.5, "learning_rate": 3.7090020475689326e-05, "loss": 10.2507, "step": 14290 }, { "epoch": 0.5956817139760744, "grad_norm": 245.0, "learning_rate": 3.708349945919339e-05, "loss": 12.4391, "step": 14291 }, { "epoch": 0.5957233962736026, "grad_norm": 506.0, "learning_rate": 3.7076978678101805e-05, "loss": 16.7502, "step": 14292 }, { "epoch": 0.5957650785711308, "grad_norm": 221.0, "learning_rate": 3.7070458132533445e-05, "loss": 9.3135, "step": 14293 }, { "epoch": 0.5958067608686591, "grad_norm": 168.0, "learning_rate": 3.706393782260712e-05, "loss": 9.9378, "step": 14294 }, { "epoch": 0.5958484431661873, "grad_norm": 536.0, "learning_rate": 3.7057417748441695e-05, "loss": 17.5002, "step": 14295 }, { "epoch": 0.5958901254637156, "grad_norm": 244.0, "learning_rate": 3.705089791015596e-05, "loss": 13.4377, "step": 14296 }, { "epoch": 0.5959318077612438, "grad_norm": 247.0, "learning_rate": 3.7044378307868775e-05, "loss": 12.6253, "step": 14297 }, { "epoch": 0.5959734900587721, "grad_norm": 231.0, "learning_rate": 3.703785894169895e-05, "loss": 12.5002, "step": 14298 }, { "epoch": 0.5960151723563003, "grad_norm": 298.0, "learning_rate": 3.703133981176527e-05, "loss": 10.9378, "step": 14299 }, { "epoch": 0.5960568546538285, "grad_norm": 84.5, "learning_rate": 3.702482091818659e-05, "loss": 8.0626, "step": 14300 }, { "epoch": 0.5960985369513567, "grad_norm": 280.0, "learning_rate": 3.7018302261081714e-05, "loss": 11.5664, "step": 14301 }, { "epoch": 0.596140219248885, "grad_norm": 424.0, "learning_rate": 3.7011783840569405e-05, "loss": 15.5001, "step": 14302 }, { "epoch": 0.5961819015464133, "grad_norm": 386.0, "learning_rate": 3.700526565676852e-05, "loss": 15.5005, "step": 14303 }, { "epoch": 0.5962235838439415, "grad_norm": 228.0, "learning_rate": 3.6998747709797784e-05, "loss": 13.813, "step": 14304 }, { "epoch": 0.5962652661414697, "grad_norm": 604.0, "learning_rate": 3.699222999977606e-05, "loss": 18.2502, "step": 14305 }, { "epoch": 0.596306948438998, "grad_norm": 378.0, "learning_rate": 3.698571252682208e-05, "loss": 14.2507, "step": 14306 }, { "epoch": 0.5963486307365262, "grad_norm": 302.0, "learning_rate": 3.6979195291054656e-05, "loss": 13.4377, "step": 14307 }, { "epoch": 0.5963903130340544, "grad_norm": 251.0, "learning_rate": 3.697267829259254e-05, "loss": 12.7502, "step": 14308 }, { "epoch": 0.5964319953315826, "grad_norm": 404.0, "learning_rate": 3.696616153155452e-05, "loss": 14.3128, "step": 14309 }, { "epoch": 0.596473677629111, "grad_norm": 198.0, "learning_rate": 3.695964500805938e-05, "loss": 11.2502, "step": 14310 }, { "epoch": 0.5965153599266392, "grad_norm": 604.0, "learning_rate": 3.695312872222585e-05, "loss": 20.1254, "step": 14311 }, { "epoch": 0.5965570422241674, "grad_norm": 136.0, "learning_rate": 3.69466126741727e-05, "loss": 10.3149, "step": 14312 }, { "epoch": 0.5965987245216956, "grad_norm": 190.0, "learning_rate": 3.694009686401872e-05, "loss": 11.1255, "step": 14313 }, { "epoch": 0.5966404068192239, "grad_norm": 628.0, "learning_rate": 3.693358129188261e-05, "loss": 19.5017, "step": 14314 }, { "epoch": 0.5966820891167521, "grad_norm": 78.0, "learning_rate": 3.692706595788316e-05, "loss": 6.9689, "step": 14315 }, { "epoch": 0.5967237714142803, "grad_norm": 324.0, "learning_rate": 3.692055086213907e-05, "loss": 11.7501, "step": 14316 }, { "epoch": 0.5967654537118086, "grad_norm": 49.75, "learning_rate": 3.691403600476914e-05, "loss": 7.6253, "step": 14317 }, { "epoch": 0.5968071360093369, "grad_norm": 316.0, "learning_rate": 3.6907521385892025e-05, "loss": 12.7516, "step": 14318 }, { "epoch": 0.5968488183068651, "grad_norm": 247.0, "learning_rate": 3.690100700562652e-05, "loss": 12.2502, "step": 14319 }, { "epoch": 0.5968905006043933, "grad_norm": 404.0, "learning_rate": 3.689449286409131e-05, "loss": 15.6878, "step": 14320 }, { "epoch": 0.5969321829019215, "grad_norm": 540.0, "learning_rate": 3.6887978961405146e-05, "loss": 18.6251, "step": 14321 }, { "epoch": 0.5969738651994498, "grad_norm": 226.0, "learning_rate": 3.6881465297686714e-05, "loss": 12.2504, "step": 14322 }, { "epoch": 0.597015547496978, "grad_norm": 448.0, "learning_rate": 3.687495187305475e-05, "loss": 14.7503, "step": 14323 }, { "epoch": 0.5970572297945063, "grad_norm": 620.0, "learning_rate": 3.686843868762795e-05, "loss": 18.8808, "step": 14324 }, { "epoch": 0.5970989120920345, "grad_norm": 344.0, "learning_rate": 3.686192574152502e-05, "loss": 14.8128, "step": 14325 }, { "epoch": 0.5971405943895628, "grad_norm": 724.0, "learning_rate": 3.685541303486465e-05, "loss": 20.5003, "step": 14326 }, { "epoch": 0.597182276687091, "grad_norm": 149.0, "learning_rate": 3.6848900567765574e-05, "loss": 8.6252, "step": 14327 }, { "epoch": 0.5972239589846192, "grad_norm": 346.0, "learning_rate": 3.684238834034642e-05, "loss": 14.2502, "step": 14328 }, { "epoch": 0.5972656412821474, "grad_norm": 876.0, "learning_rate": 3.683587635272594e-05, "loss": 22.2507, "step": 14329 }, { "epoch": 0.5973073235796758, "grad_norm": 308.0, "learning_rate": 3.682936460502275e-05, "loss": 13.9383, "step": 14330 }, { "epoch": 0.597349005877204, "grad_norm": 372.0, "learning_rate": 3.682285309735558e-05, "loss": 14.8128, "step": 14331 }, { "epoch": 0.5973906881747322, "grad_norm": 532.0, "learning_rate": 3.681634182984307e-05, "loss": 18.8752, "step": 14332 }, { "epoch": 0.5974323704722604, "grad_norm": 462.0, "learning_rate": 3.680983080260392e-05, "loss": 16.6257, "step": 14333 }, { "epoch": 0.5974740527697887, "grad_norm": 972.0, "learning_rate": 3.6803320015756746e-05, "loss": 24.7504, "step": 14334 }, { "epoch": 0.5975157350673169, "grad_norm": 540.0, "learning_rate": 3.679680946942024e-05, "loss": 17.8752, "step": 14335 }, { "epoch": 0.5975574173648451, "grad_norm": 700.0, "learning_rate": 3.6790299163713074e-05, "loss": 18.5034, "step": 14336 }, { "epoch": 0.5975990996623733, "grad_norm": 258.0, "learning_rate": 3.6783789098753865e-05, "loss": 13.0001, "step": 14337 }, { "epoch": 0.5976407819599017, "grad_norm": 110.0, "learning_rate": 3.677727927466127e-05, "loss": 9.6253, "step": 14338 }, { "epoch": 0.5976824642574299, "grad_norm": 169.0, "learning_rate": 3.677076969155395e-05, "loss": 10.8126, "step": 14339 }, { "epoch": 0.5977241465549581, "grad_norm": 1088.0, "learning_rate": 3.676426034955051e-05, "loss": 23.3754, "step": 14340 }, { "epoch": 0.5977658288524863, "grad_norm": 1472.0, "learning_rate": 3.6757751248769614e-05, "loss": 30.7545, "step": 14341 }, { "epoch": 0.5978075111500146, "grad_norm": 93.5, "learning_rate": 3.675124238932986e-05, "loss": 8.1253, "step": 14342 }, { "epoch": 0.5978491934475428, "grad_norm": 246.0, "learning_rate": 3.674473377134991e-05, "loss": 4.5315, "step": 14343 }, { "epoch": 0.597890875745071, "grad_norm": 434.0, "learning_rate": 3.6738225394948344e-05, "loss": 16.6256, "step": 14344 }, { "epoch": 0.5979325580425993, "grad_norm": 167.0, "learning_rate": 3.673171726024381e-05, "loss": 11.0628, "step": 14345 }, { "epoch": 0.5979742403401276, "grad_norm": 548.0, "learning_rate": 3.672520936735489e-05, "loss": 17.5003, "step": 14346 }, { "epoch": 0.5980159226376558, "grad_norm": 143.0, "learning_rate": 3.671870171640023e-05, "loss": 9.5003, "step": 14347 }, { "epoch": 0.598057604935184, "grad_norm": 127.5, "learning_rate": 3.6712194307498406e-05, "loss": 8.5002, "step": 14348 }, { "epoch": 0.5980992872327122, "grad_norm": 170.0, "learning_rate": 3.6705687140768014e-05, "loss": 8.9382, "step": 14349 }, { "epoch": 0.5981409695302405, "grad_norm": 201.0, "learning_rate": 3.669918021632764e-05, "loss": 12.0003, "step": 14350 }, { "epoch": 0.5981826518277688, "grad_norm": 364.0, "learning_rate": 3.6692673534295916e-05, "loss": 10.44, "step": 14351 }, { "epoch": 0.598224334125297, "grad_norm": 736.0, "learning_rate": 3.668616709479138e-05, "loss": 22.5011, "step": 14352 }, { "epoch": 0.5982660164228252, "grad_norm": 137.0, "learning_rate": 3.6679660897932646e-05, "loss": 7.8757, "step": 14353 }, { "epoch": 0.5983076987203535, "grad_norm": 482.0, "learning_rate": 3.667315494383825e-05, "loss": 15.4383, "step": 14354 }, { "epoch": 0.5983493810178817, "grad_norm": 424.0, "learning_rate": 3.666664923262682e-05, "loss": 15.9377, "step": 14355 }, { "epoch": 0.5983910633154099, "grad_norm": 346.0, "learning_rate": 3.6660143764416866e-05, "loss": 15.1254, "step": 14356 }, { "epoch": 0.5984327456129381, "grad_norm": 394.0, "learning_rate": 3.6653638539326994e-05, "loss": 13.8145, "step": 14357 }, { "epoch": 0.5984744279104665, "grad_norm": 238.0, "learning_rate": 3.6647133557475724e-05, "loss": 12.4378, "step": 14358 }, { "epoch": 0.5985161102079947, "grad_norm": 564.0, "learning_rate": 3.664062881898165e-05, "loss": 17.7501, "step": 14359 }, { "epoch": 0.5985577925055229, "grad_norm": 160.0, "learning_rate": 3.663412432396328e-05, "loss": 10.8128, "step": 14360 }, { "epoch": 0.5985994748030511, "grad_norm": 256.0, "learning_rate": 3.662762007253919e-05, "loss": 9.8771, "step": 14361 }, { "epoch": 0.5986411571005794, "grad_norm": 490.0, "learning_rate": 3.662111606482791e-05, "loss": 17.0001, "step": 14362 }, { "epoch": 0.5986828393981076, "grad_norm": 616.0, "learning_rate": 3.6614612300947994e-05, "loss": 19.5033, "step": 14363 }, { "epoch": 0.5987245216956358, "grad_norm": 960.0, "learning_rate": 3.660810878101794e-05, "loss": 22.3753, "step": 14364 }, { "epoch": 0.598766203993164, "grad_norm": 280.0, "learning_rate": 3.66016055051563e-05, "loss": 12.1253, "step": 14365 }, { "epoch": 0.5988078862906924, "grad_norm": 540.0, "learning_rate": 3.659510247348158e-05, "loss": 17.8768, "step": 14366 }, { "epoch": 0.5988495685882206, "grad_norm": 104.0, "learning_rate": 3.658859968611232e-05, "loss": 8.8133, "step": 14367 }, { "epoch": 0.5988912508857488, "grad_norm": 276.0, "learning_rate": 3.658209714316701e-05, "loss": 13.4379, "step": 14368 }, { "epoch": 0.5989329331832771, "grad_norm": 264.0, "learning_rate": 3.6575594844764186e-05, "loss": 11.6252, "step": 14369 }, { "epoch": 0.5989746154808053, "grad_norm": 314.0, "learning_rate": 3.656909279102232e-05, "loss": 12.7502, "step": 14370 }, { "epoch": 0.5990162977783335, "grad_norm": 225.0, "learning_rate": 3.656259098205995e-05, "loss": 12.0632, "step": 14371 }, { "epoch": 0.5990579800758618, "grad_norm": 302.0, "learning_rate": 3.6556089417995524e-05, "loss": 13.3126, "step": 14372 }, { "epoch": 0.5990996623733901, "grad_norm": 450.0, "learning_rate": 3.6549588098947584e-05, "loss": 14.2504, "step": 14373 }, { "epoch": 0.5991413446709183, "grad_norm": 103.5, "learning_rate": 3.6543087025034584e-05, "loss": 9.0634, "step": 14374 }, { "epoch": 0.5991830269684465, "grad_norm": 508.0, "learning_rate": 3.653658619637502e-05, "loss": 17.3752, "step": 14375 }, { "epoch": 0.5992247092659747, "grad_norm": 202.0, "learning_rate": 3.6530085613087363e-05, "loss": 9.7502, "step": 14376 }, { "epoch": 0.599266391563503, "grad_norm": 572.0, "learning_rate": 3.6523585275290115e-05, "loss": 16.2543, "step": 14377 }, { "epoch": 0.5993080738610312, "grad_norm": 241.0, "learning_rate": 3.651708518310169e-05, "loss": 13.1879, "step": 14378 }, { "epoch": 0.5993497561585595, "grad_norm": 344.0, "learning_rate": 3.6510585336640615e-05, "loss": 14.8129, "step": 14379 }, { "epoch": 0.5993914384560877, "grad_norm": 628.0, "learning_rate": 3.650408573602529e-05, "loss": 19.6293, "step": 14380 }, { "epoch": 0.599433120753616, "grad_norm": 616.0, "learning_rate": 3.649758638137423e-05, "loss": 19.5001, "step": 14381 }, { "epoch": 0.5994748030511442, "grad_norm": 344.0, "learning_rate": 3.649108727280583e-05, "loss": 14.2502, "step": 14382 }, { "epoch": 0.5995164853486724, "grad_norm": 1160.0, "learning_rate": 3.648458841043858e-05, "loss": 31.2512, "step": 14383 }, { "epoch": 0.5995581676462006, "grad_norm": 248.0, "learning_rate": 3.6478089794390895e-05, "loss": 11.5007, "step": 14384 }, { "epoch": 0.599599849943729, "grad_norm": 380.0, "learning_rate": 3.647159142478123e-05, "loss": 15.3752, "step": 14385 }, { "epoch": 0.5996415322412572, "grad_norm": 600.0, "learning_rate": 3.6465093301728015e-05, "loss": 19.5007, "step": 14386 }, { "epoch": 0.5996832145387854, "grad_norm": 420.0, "learning_rate": 3.645859542534967e-05, "loss": 14.7503, "step": 14387 }, { "epoch": 0.5997248968363136, "grad_norm": 172.0, "learning_rate": 3.645209779576462e-05, "loss": 10.876, "step": 14388 }, { "epoch": 0.5997665791338419, "grad_norm": 266.0, "learning_rate": 3.6445600413091316e-05, "loss": 12.1252, "step": 14389 }, { "epoch": 0.5998082614313701, "grad_norm": 115.5, "learning_rate": 3.643910327744812e-05, "loss": 9.0638, "step": 14390 }, { "epoch": 0.5998499437288983, "grad_norm": 122.5, "learning_rate": 3.6432606388953495e-05, "loss": 8.8139, "step": 14391 }, { "epoch": 0.5998916260264265, "grad_norm": 69.5, "learning_rate": 3.642610974772579e-05, "loss": 8.1878, "step": 14392 }, { "epoch": 0.5999333083239549, "grad_norm": 492.0, "learning_rate": 3.6419613353883476e-05, "loss": 17.2504, "step": 14393 }, { "epoch": 0.5999749906214831, "grad_norm": 420.0, "learning_rate": 3.641311720754489e-05, "loss": 13.2508, "step": 14394 }, { "epoch": 0.6000166729190113, "grad_norm": 139.0, "learning_rate": 3.640662130882847e-05, "loss": 8.8131, "step": 14395 }, { "epoch": 0.6000583552165395, "grad_norm": 356.0, "learning_rate": 3.6400125657852556e-05, "loss": 15.6253, "step": 14396 }, { "epoch": 0.6001000375140678, "grad_norm": 334.0, "learning_rate": 3.639363025473559e-05, "loss": 12.6876, "step": 14397 }, { "epoch": 0.600141719811596, "grad_norm": 63.5, "learning_rate": 3.6387135099595894e-05, "loss": 7.3753, "step": 14398 }, { "epoch": 0.6001834021091242, "grad_norm": 540.0, "learning_rate": 3.638064019255187e-05, "loss": 17.2502, "step": 14399 }, { "epoch": 0.6002250844066525, "grad_norm": 129.0, "learning_rate": 3.6374145533721895e-05, "loss": 10.1255, "step": 14400 }, { "epoch": 0.6002667667041808, "grad_norm": 360.0, "learning_rate": 3.6367651123224334e-05, "loss": 14.2503, "step": 14401 }, { "epoch": 0.600308449001709, "grad_norm": 648.0, "learning_rate": 3.636115696117753e-05, "loss": 19.5004, "step": 14402 }, { "epoch": 0.6003501312992372, "grad_norm": 588.0, "learning_rate": 3.6354663047699866e-05, "loss": 18.8756, "step": 14403 }, { "epoch": 0.6003918135967654, "grad_norm": 936.0, "learning_rate": 3.634816938290966e-05, "loss": 30.0005, "step": 14404 }, { "epoch": 0.6004334958942937, "grad_norm": 458.0, "learning_rate": 3.63416759669253e-05, "loss": 16.7506, "step": 14405 }, { "epoch": 0.600475178191822, "grad_norm": 382.0, "learning_rate": 3.6335182799865086e-05, "loss": 15.5003, "step": 14406 }, { "epoch": 0.6005168604893502, "grad_norm": 478.0, "learning_rate": 3.6328689881847404e-05, "loss": 17.0003, "step": 14407 }, { "epoch": 0.6005585427868784, "grad_norm": 488.0, "learning_rate": 3.6322197212990535e-05, "loss": 16.1256, "step": 14408 }, { "epoch": 0.6006002250844067, "grad_norm": 620.0, "learning_rate": 3.631570479341286e-05, "loss": 18.8758, "step": 14409 }, { "epoch": 0.6006419073819349, "grad_norm": 88.0, "learning_rate": 3.630921262323266e-05, "loss": 9.0632, "step": 14410 }, { "epoch": 0.6006835896794631, "grad_norm": 276.0, "learning_rate": 3.6302720702568295e-05, "loss": 12.0626, "step": 14411 }, { "epoch": 0.6007252719769913, "grad_norm": 230.0, "learning_rate": 3.629622903153805e-05, "loss": 10.7502, "step": 14412 }, { "epoch": 0.6007669542745196, "grad_norm": 254.0, "learning_rate": 3.628973761026024e-05, "loss": 11.7504, "step": 14413 }, { "epoch": 0.6008086365720479, "grad_norm": 198.0, "learning_rate": 3.628324643885318e-05, "loss": 11.3128, "step": 14414 }, { "epoch": 0.6008503188695761, "grad_norm": 278.0, "learning_rate": 3.627675551743519e-05, "loss": 12.1254, "step": 14415 }, { "epoch": 0.6008920011671043, "grad_norm": 644.0, "learning_rate": 3.6270264846124534e-05, "loss": 17.7528, "step": 14416 }, { "epoch": 0.6009336834646326, "grad_norm": 182.0, "learning_rate": 3.626377442503953e-05, "loss": 9.5634, "step": 14417 }, { "epoch": 0.6009753657621608, "grad_norm": 97.5, "learning_rate": 3.625728425429844e-05, "loss": 6.969, "step": 14418 }, { "epoch": 0.601017048059689, "grad_norm": 209.0, "learning_rate": 3.625079433401959e-05, "loss": 11.5004, "step": 14419 }, { "epoch": 0.6010587303572172, "grad_norm": 1288.0, "learning_rate": 3.6244304664321204e-05, "loss": 27.1297, "step": 14420 }, { "epoch": 0.6011004126547456, "grad_norm": 352.0, "learning_rate": 3.6237815245321614e-05, "loss": 15.2503, "step": 14421 }, { "epoch": 0.6011420949522738, "grad_norm": 386.0, "learning_rate": 3.623132607713904e-05, "loss": 15.8753, "step": 14422 }, { "epoch": 0.601183777249802, "grad_norm": 176.0, "learning_rate": 3.622483715989178e-05, "loss": 11.1877, "step": 14423 }, { "epoch": 0.6012254595473302, "grad_norm": 298.0, "learning_rate": 3.621834849369809e-05, "loss": 14.001, "step": 14424 }, { "epoch": 0.6012671418448585, "grad_norm": 253.0, "learning_rate": 3.621186007867622e-05, "loss": 12.4385, "step": 14425 }, { "epoch": 0.6013088241423867, "grad_norm": 564.0, "learning_rate": 3.620537191494441e-05, "loss": 16.7507, "step": 14426 }, { "epoch": 0.601350506439915, "grad_norm": 146.0, "learning_rate": 3.619888400262095e-05, "loss": 11.0628, "step": 14427 }, { "epoch": 0.6013921887374432, "grad_norm": 256.0, "learning_rate": 3.619239634182403e-05, "loss": 12.0628, "step": 14428 }, { "epoch": 0.6014338710349715, "grad_norm": 412.0, "learning_rate": 3.618590893267193e-05, "loss": 14.8751, "step": 14429 }, { "epoch": 0.6014755533324997, "grad_norm": 2272.0, "learning_rate": 3.617942177528284e-05, "loss": 43.5045, "step": 14430 }, { "epoch": 0.6015172356300279, "grad_norm": 370.0, "learning_rate": 3.6172934869775044e-05, "loss": 16.1254, "step": 14431 }, { "epoch": 0.6015589179275561, "grad_norm": 656.0, "learning_rate": 3.616644821626671e-05, "loss": 20.0003, "step": 14432 }, { "epoch": 0.6016006002250844, "grad_norm": 348.0, "learning_rate": 3.615996181487612e-05, "loss": 15.1253, "step": 14433 }, { "epoch": 0.6016422825226126, "grad_norm": 151.0, "learning_rate": 3.615347566572143e-05, "loss": 9.5627, "step": 14434 }, { "epoch": 0.6016839648201409, "grad_norm": 336.0, "learning_rate": 3.614698976892088e-05, "loss": 13.6252, "step": 14435 }, { "epoch": 0.6017256471176691, "grad_norm": 440.0, "learning_rate": 3.6140504124592666e-05, "loss": 17.2502, "step": 14436 }, { "epoch": 0.6017673294151974, "grad_norm": 109.5, "learning_rate": 3.6134018732855e-05, "loss": 10.7504, "step": 14437 }, { "epoch": 0.6018090117127256, "grad_norm": 390.0, "learning_rate": 3.612753359382606e-05, "loss": 14.8754, "step": 14438 }, { "epoch": 0.6018506940102538, "grad_norm": 368.0, "learning_rate": 3.6121048707624084e-05, "loss": 14.1878, "step": 14439 }, { "epoch": 0.6018923763077821, "grad_norm": 396.0, "learning_rate": 3.61145640743672e-05, "loss": 15.2502, "step": 14440 }, { "epoch": 0.6019340586053104, "grad_norm": 336.0, "learning_rate": 3.610807969417364e-05, "loss": 12.1257, "step": 14441 }, { "epoch": 0.6019757409028386, "grad_norm": 442.0, "learning_rate": 3.6101595567161525e-05, "loss": 13.8778, "step": 14442 }, { "epoch": 0.6020174232003668, "grad_norm": 1012.0, "learning_rate": 3.60951116934491e-05, "loss": 23.2548, "step": 14443 }, { "epoch": 0.6020591054978951, "grad_norm": 920.0, "learning_rate": 3.608862807315448e-05, "loss": 20.2551, "step": 14444 }, { "epoch": 0.6021007877954233, "grad_norm": 332.0, "learning_rate": 3.6082144706395864e-05, "loss": 14.3753, "step": 14445 }, { "epoch": 0.6021424700929515, "grad_norm": 211.0, "learning_rate": 3.607566159329138e-05, "loss": 11.0631, "step": 14446 }, { "epoch": 0.6021841523904797, "grad_norm": 1312.0, "learning_rate": 3.606917873395922e-05, "loss": 29.38, "step": 14447 }, { "epoch": 0.602225834688008, "grad_norm": 494.0, "learning_rate": 3.606269612851749e-05, "loss": 15.9408, "step": 14448 }, { "epoch": 0.6022675169855363, "grad_norm": 366.0, "learning_rate": 3.605621377708437e-05, "loss": 12.8751, "step": 14449 }, { "epoch": 0.6023091992830645, "grad_norm": 336.0, "learning_rate": 3.604973167977799e-05, "loss": 14.7507, "step": 14450 }, { "epoch": 0.6023508815805927, "grad_norm": 117.5, "learning_rate": 3.6043249836716494e-05, "loss": 8.438, "step": 14451 }, { "epoch": 0.602392563878121, "grad_norm": 424.0, "learning_rate": 3.6036768248017996e-05, "loss": 15.6878, "step": 14452 }, { "epoch": 0.6024342461756492, "grad_norm": 212.0, "learning_rate": 3.603028691380066e-05, "loss": 10.9378, "step": 14453 }, { "epoch": 0.6024759284731774, "grad_norm": 155.0, "learning_rate": 3.6023805834182555e-05, "loss": 10.6878, "step": 14454 }, { "epoch": 0.6025176107707056, "grad_norm": 133.0, "learning_rate": 3.6017325009281855e-05, "loss": 9.6253, "step": 14455 }, { "epoch": 0.602559293068234, "grad_norm": 216.0, "learning_rate": 3.601084443921663e-05, "loss": 10.938, "step": 14456 }, { "epoch": 0.6026009753657622, "grad_norm": 138.0, "learning_rate": 3.600436412410503e-05, "loss": 11.7517, "step": 14457 }, { "epoch": 0.6026426576632904, "grad_norm": 136.0, "learning_rate": 3.599788406406511e-05, "loss": 10.4378, "step": 14458 }, { "epoch": 0.6026843399608186, "grad_norm": 1080.0, "learning_rate": 3.599140425921502e-05, "loss": 31.3751, "step": 14459 }, { "epoch": 0.6027260222583469, "grad_norm": 196.0, "learning_rate": 3.5984924709672806e-05, "loss": 10.7507, "step": 14460 }, { "epoch": 0.6027677045558751, "grad_norm": 430.0, "learning_rate": 3.59784454155566e-05, "loss": 17.0003, "step": 14461 }, { "epoch": 0.6028093868534034, "grad_norm": 250.0, "learning_rate": 3.597196637698447e-05, "loss": 11.1878, "step": 14462 }, { "epoch": 0.6028510691509316, "grad_norm": 310.0, "learning_rate": 3.596548759407449e-05, "loss": 13.3127, "step": 14463 }, { "epoch": 0.6028927514484599, "grad_norm": 67.5, "learning_rate": 3.595900906694474e-05, "loss": 7.7502, "step": 14464 }, { "epoch": 0.6029344337459881, "grad_norm": 94.5, "learning_rate": 3.5952530795713315e-05, "loss": 10.6263, "step": 14465 }, { "epoch": 0.6029761160435163, "grad_norm": 122.0, "learning_rate": 3.5946052780498245e-05, "loss": 10.5631, "step": 14466 }, { "epoch": 0.6030177983410445, "grad_norm": 880.0, "learning_rate": 3.593957502141763e-05, "loss": 23.6263, "step": 14467 }, { "epoch": 0.6030594806385728, "grad_norm": 328.0, "learning_rate": 3.5933097518589486e-05, "loss": 12.3129, "step": 14468 }, { "epoch": 0.603101162936101, "grad_norm": 340.0, "learning_rate": 3.592662027213192e-05, "loss": 15.3134, "step": 14469 }, { "epoch": 0.6031428452336293, "grad_norm": 92.0, "learning_rate": 3.592014328216292e-05, "loss": 7.5003, "step": 14470 }, { "epoch": 0.6031845275311575, "grad_norm": 154.0, "learning_rate": 3.591366654880057e-05, "loss": 9.0627, "step": 14471 }, { "epoch": 0.6032262098286858, "grad_norm": 536.0, "learning_rate": 3.590719007216289e-05, "loss": 17.376, "step": 14472 }, { "epoch": 0.603267892126214, "grad_norm": 438.0, "learning_rate": 3.590071385236793e-05, "loss": 16.7505, "step": 14473 }, { "epoch": 0.6033095744237422, "grad_norm": 278.0, "learning_rate": 3.5894237889533714e-05, "loss": 10.3776, "step": 14474 }, { "epoch": 0.6033512567212704, "grad_norm": 256.0, "learning_rate": 3.588776218377825e-05, "loss": 12.6877, "step": 14475 }, { "epoch": 0.6033929390187988, "grad_norm": 175.0, "learning_rate": 3.588128673521958e-05, "loss": 10.0001, "step": 14476 }, { "epoch": 0.603434621316327, "grad_norm": 648.0, "learning_rate": 3.587481154397573e-05, "loss": 20.2509, "step": 14477 }, { "epoch": 0.6034763036138552, "grad_norm": 188.0, "learning_rate": 3.5868336610164665e-05, "loss": 10.8761, "step": 14478 }, { "epoch": 0.6035179859113834, "grad_norm": 162.0, "learning_rate": 3.5861861933904446e-05, "loss": 5.4378, "step": 14479 }, { "epoch": 0.6035596682089117, "grad_norm": 400.0, "learning_rate": 3.585538751531302e-05, "loss": 15.0001, "step": 14480 }, { "epoch": 0.6036013505064399, "grad_norm": 1800.0, "learning_rate": 3.5848913354508446e-05, "loss": 35.2547, "step": 14481 }, { "epoch": 0.6036430328039681, "grad_norm": 804.0, "learning_rate": 3.5842439451608654e-05, "loss": 23.5001, "step": 14482 }, { "epoch": 0.6036847151014963, "grad_norm": 1016.0, "learning_rate": 3.583596580673168e-05, "loss": 22.8795, "step": 14483 }, { "epoch": 0.6037263973990247, "grad_norm": 378.0, "learning_rate": 3.582949241999547e-05, "loss": 12.8752, "step": 14484 }, { "epoch": 0.6037680796965529, "grad_norm": 153.0, "learning_rate": 3.5823019291518035e-05, "loss": 7.5629, "step": 14485 }, { "epoch": 0.6038097619940811, "grad_norm": 488.0, "learning_rate": 3.5816546421417313e-05, "loss": 15.8126, "step": 14486 }, { "epoch": 0.6038514442916093, "grad_norm": 183.0, "learning_rate": 3.58100738098113e-05, "loss": 11.9381, "step": 14487 }, { "epoch": 0.6038931265891376, "grad_norm": 118.5, "learning_rate": 3.5803601456817947e-05, "loss": 10.0003, "step": 14488 }, { "epoch": 0.6039348088866658, "grad_norm": 260.0, "learning_rate": 3.579712936255523e-05, "loss": 13.7504, "step": 14489 }, { "epoch": 0.603976491184194, "grad_norm": 468.0, "learning_rate": 3.579065752714108e-05, "loss": 14.5631, "step": 14490 }, { "epoch": 0.6040181734817223, "grad_norm": 700.0, "learning_rate": 3.578418595069347e-05, "loss": 19.5005, "step": 14491 }, { "epoch": 0.6040598557792506, "grad_norm": 416.0, "learning_rate": 3.5777714633330315e-05, "loss": 14.9386, "step": 14492 }, { "epoch": 0.6041015380767788, "grad_norm": 348.0, "learning_rate": 3.5771243575169596e-05, "loss": 13.7509, "step": 14493 }, { "epoch": 0.604143220374307, "grad_norm": 231.0, "learning_rate": 3.57647727763292e-05, "loss": 11.6877, "step": 14494 }, { "epoch": 0.6041849026718352, "grad_norm": 274.0, "learning_rate": 3.575830223692711e-05, "loss": 12.7502, "step": 14495 }, { "epoch": 0.6042265849693635, "grad_norm": 308.0, "learning_rate": 3.5751831957081206e-05, "loss": 12.5628, "step": 14496 }, { "epoch": 0.6042682672668918, "grad_norm": 422.0, "learning_rate": 3.574536193690945e-05, "loss": 16.8756, "step": 14497 }, { "epoch": 0.60430994956442, "grad_norm": 520.0, "learning_rate": 3.573889217652971e-05, "loss": 18.2502, "step": 14498 }, { "epoch": 0.6043516318619482, "grad_norm": 221.0, "learning_rate": 3.573242267605995e-05, "loss": 10.6263, "step": 14499 }, { "epoch": 0.6043933141594765, "grad_norm": 224.0, "learning_rate": 3.572595343561804e-05, "loss": 12.3129, "step": 14500 }, { "epoch": 0.6044349964570047, "grad_norm": 290.0, "learning_rate": 3.57194844553219e-05, "loss": 14.1255, "step": 14501 }, { "epoch": 0.6044766787545329, "grad_norm": 292.0, "learning_rate": 3.5713015735289416e-05, "loss": 12.5006, "step": 14502 }, { "epoch": 0.6045183610520611, "grad_norm": 700.0, "learning_rate": 3.570654727563851e-05, "loss": 21.0003, "step": 14503 }, { "epoch": 0.6045600433495895, "grad_norm": 251.0, "learning_rate": 3.5700079076487024e-05, "loss": 12.8759, "step": 14504 }, { "epoch": 0.6046017256471177, "grad_norm": 292.0, "learning_rate": 3.5693611137952885e-05, "loss": 11.5011, "step": 14505 }, { "epoch": 0.6046434079446459, "grad_norm": 436.0, "learning_rate": 3.568714346015394e-05, "loss": 16.5002, "step": 14506 }, { "epoch": 0.6046850902421741, "grad_norm": 232.0, "learning_rate": 3.568067604320809e-05, "loss": 12.252, "step": 14507 }, { "epoch": 0.6047267725397024, "grad_norm": 316.0, "learning_rate": 3.567420888723317e-05, "loss": 12.627, "step": 14508 }, { "epoch": 0.6047684548372306, "grad_norm": 660.0, "learning_rate": 3.566774199234709e-05, "loss": 19.7506, "step": 14509 }, { "epoch": 0.6048101371347588, "grad_norm": 462.0, "learning_rate": 3.566127535866767e-05, "loss": 17.501, "step": 14510 }, { "epoch": 0.604851819432287, "grad_norm": 250.0, "learning_rate": 3.565480898631277e-05, "loss": 13.1257, "step": 14511 }, { "epoch": 0.6048935017298154, "grad_norm": 350.0, "learning_rate": 3.564834287540027e-05, "loss": 13.8143, "step": 14512 }, { "epoch": 0.6049351840273436, "grad_norm": 408.0, "learning_rate": 3.564187702604798e-05, "loss": 16.0003, "step": 14513 }, { "epoch": 0.6049768663248718, "grad_norm": 196.0, "learning_rate": 3.5635411438373755e-05, "loss": 10.688, "step": 14514 }, { "epoch": 0.6050185486224001, "grad_norm": 79.5, "learning_rate": 3.562894611249545e-05, "loss": 9.8759, "step": 14515 }, { "epoch": 0.6050602309199283, "grad_norm": 512.0, "learning_rate": 3.5622481048530856e-05, "loss": 17.8754, "step": 14516 }, { "epoch": 0.6051019132174565, "grad_norm": 241.0, "learning_rate": 3.5616016246597837e-05, "loss": 12.2502, "step": 14517 }, { "epoch": 0.6051435955149848, "grad_norm": 712.0, "learning_rate": 3.560955170681418e-05, "loss": 20.2504, "step": 14518 }, { "epoch": 0.6051852778125131, "grad_norm": 144.0, "learning_rate": 3.560308742929775e-05, "loss": 9.5004, "step": 14519 }, { "epoch": 0.6052269601100413, "grad_norm": 2064.0, "learning_rate": 3.55966234141663e-05, "loss": 35.5092, "step": 14520 }, { "epoch": 0.6052686424075695, "grad_norm": 348.0, "learning_rate": 3.559015966153769e-05, "loss": 13.8127, "step": 14521 }, { "epoch": 0.6053103247050977, "grad_norm": 414.0, "learning_rate": 3.5583696171529686e-05, "loss": 16.001, "step": 14522 }, { "epoch": 0.605352007002626, "grad_norm": 326.0, "learning_rate": 3.557723294426011e-05, "loss": 14.5011, "step": 14523 }, { "epoch": 0.6053936893001542, "grad_norm": 848.0, "learning_rate": 3.557076997984673e-05, "loss": 21.7501, "step": 14524 }, { "epoch": 0.6054353715976825, "grad_norm": 298.0, "learning_rate": 3.556430727840735e-05, "loss": 13.8129, "step": 14525 }, { "epoch": 0.6054770538952107, "grad_norm": 229.0, "learning_rate": 3.555784484005975e-05, "loss": 10.9378, "step": 14526 }, { "epoch": 0.605518736192739, "grad_norm": 274.0, "learning_rate": 3.555138266492173e-05, "loss": 12.8134, "step": 14527 }, { "epoch": 0.6055604184902672, "grad_norm": 356.0, "learning_rate": 3.5544920753111014e-05, "loss": 15.1911, "step": 14528 }, { "epoch": 0.6056021007877954, "grad_norm": 197.0, "learning_rate": 3.553845910474542e-05, "loss": 11.6878, "step": 14529 }, { "epoch": 0.6056437830853236, "grad_norm": 404.0, "learning_rate": 3.553199771994269e-05, "loss": 15.5002, "step": 14530 }, { "epoch": 0.605685465382852, "grad_norm": 412.0, "learning_rate": 3.552553659882059e-05, "loss": 16.2505, "step": 14531 }, { "epoch": 0.6057271476803802, "grad_norm": 804.0, "learning_rate": 3.551907574149685e-05, "loss": 20.2521, "step": 14532 }, { "epoch": 0.6057688299779084, "grad_norm": 143.0, "learning_rate": 3.5512615148089274e-05, "loss": 10.0626, "step": 14533 }, { "epoch": 0.6058105122754366, "grad_norm": 51.25, "learning_rate": 3.550615481871554e-05, "loss": 7.1565, "step": 14534 }, { "epoch": 0.6058521945729649, "grad_norm": 414.0, "learning_rate": 3.549969475349345e-05, "loss": 15.3126, "step": 14535 }, { "epoch": 0.6058938768704931, "grad_norm": 304.0, "learning_rate": 3.549323495254068e-05, "loss": 14.5628, "step": 14536 }, { "epoch": 0.6059355591680213, "grad_norm": 418.0, "learning_rate": 3.548677541597501e-05, "loss": 16.2503, "step": 14537 }, { "epoch": 0.6059772414655495, "grad_norm": 892.0, "learning_rate": 3.548031614391415e-05, "loss": 21.382, "step": 14538 }, { "epoch": 0.6060189237630779, "grad_norm": 772.0, "learning_rate": 3.54738571364758e-05, "loss": 20.2534, "step": 14539 }, { "epoch": 0.6060606060606061, "grad_norm": 266.0, "learning_rate": 3.5467398393777696e-05, "loss": 12.6252, "step": 14540 }, { "epoch": 0.6061022883581343, "grad_norm": 139.0, "learning_rate": 3.5460939915937564e-05, "loss": 9.3757, "step": 14541 }, { "epoch": 0.6061439706556625, "grad_norm": 294.0, "learning_rate": 3.545448170307307e-05, "loss": 12.4383, "step": 14542 }, { "epoch": 0.6061856529531908, "grad_norm": 418.0, "learning_rate": 3.544802375530196e-05, "loss": 15.0631, "step": 14543 }, { "epoch": 0.606227335250719, "grad_norm": 203.0, "learning_rate": 3.544156607274189e-05, "loss": 11.4388, "step": 14544 }, { "epoch": 0.6062690175482472, "grad_norm": 328.0, "learning_rate": 3.5435108655510596e-05, "loss": 13.1252, "step": 14545 }, { "epoch": 0.6063106998457755, "grad_norm": 418.0, "learning_rate": 3.5428651503725704e-05, "loss": 14.938, "step": 14546 }, { "epoch": 0.6063523821433038, "grad_norm": 376.0, "learning_rate": 3.542219461750497e-05, "loss": 14.3758, "step": 14547 }, { "epoch": 0.606394064440832, "grad_norm": 75.5, "learning_rate": 3.5415737996966e-05, "loss": 7.3753, "step": 14548 }, { "epoch": 0.6064357467383602, "grad_norm": 458.0, "learning_rate": 3.540928164222652e-05, "loss": 14.5627, "step": 14549 }, { "epoch": 0.6064774290358884, "grad_norm": 290.0, "learning_rate": 3.540282555340417e-05, "loss": 12.3753, "step": 14550 }, { "epoch": 0.6065191113334167, "grad_norm": 135.0, "learning_rate": 3.5396369730616624e-05, "loss": 10.3135, "step": 14551 }, { "epoch": 0.606560793630945, "grad_norm": 708.0, "learning_rate": 3.538991417398152e-05, "loss": 19.2504, "step": 14552 }, { "epoch": 0.6066024759284732, "grad_norm": 386.0, "learning_rate": 3.5383458883616554e-05, "loss": 14.0005, "step": 14553 }, { "epoch": 0.6066441582260014, "grad_norm": 72.0, "learning_rate": 3.5377003859639324e-05, "loss": 7.1565, "step": 14554 }, { "epoch": 0.6066858405235297, "grad_norm": 692.0, "learning_rate": 3.537054910216752e-05, "loss": 21.3755, "step": 14555 }, { "epoch": 0.6067275228210579, "grad_norm": 226.0, "learning_rate": 3.5364094611318734e-05, "loss": 11.8127, "step": 14556 }, { "epoch": 0.6067692051185861, "grad_norm": 412.0, "learning_rate": 3.535764038721065e-05, "loss": 15.563, "step": 14557 }, { "epoch": 0.6068108874161143, "grad_norm": 354.0, "learning_rate": 3.535118642996084e-05, "loss": 13.9377, "step": 14558 }, { "epoch": 0.6068525697136427, "grad_norm": 374.0, "learning_rate": 3.534473273968698e-05, "loss": 15.0004, "step": 14559 }, { "epoch": 0.6068942520111709, "grad_norm": 316.0, "learning_rate": 3.533827931650665e-05, "loss": 14.0006, "step": 14560 }, { "epoch": 0.6069359343086991, "grad_norm": 496.0, "learning_rate": 3.533182616053749e-05, "loss": 17.251, "step": 14561 }, { "epoch": 0.6069776166062273, "grad_norm": 139.0, "learning_rate": 3.53253732718971e-05, "loss": 10.3754, "step": 14562 }, { "epoch": 0.6070192989037556, "grad_norm": 217.0, "learning_rate": 3.531892065070308e-05, "loss": 12.5002, "step": 14563 }, { "epoch": 0.6070609812012838, "grad_norm": 241.0, "learning_rate": 3.531246829707303e-05, "loss": 12.7505, "step": 14564 }, { "epoch": 0.607102663498812, "grad_norm": 168.0, "learning_rate": 3.530601621112457e-05, "loss": 11.5005, "step": 14565 }, { "epoch": 0.6071443457963402, "grad_norm": 118.5, "learning_rate": 3.529956439297525e-05, "loss": 9.938, "step": 14566 }, { "epoch": 0.6071860280938686, "grad_norm": 217.0, "learning_rate": 3.529311284274269e-05, "loss": 10.2512, "step": 14567 }, { "epoch": 0.6072277103913968, "grad_norm": 247.0, "learning_rate": 3.528666156054443e-05, "loss": 13.1879, "step": 14568 }, { "epoch": 0.607269392688925, "grad_norm": 290.0, "learning_rate": 3.528021054649809e-05, "loss": 11.9378, "step": 14569 }, { "epoch": 0.6073110749864532, "grad_norm": 544.0, "learning_rate": 3.5273759800721206e-05, "loss": 19.0002, "step": 14570 }, { "epoch": 0.6073527572839815, "grad_norm": 1448.0, "learning_rate": 3.526730932333138e-05, "loss": 29.629, "step": 14571 }, { "epoch": 0.6073944395815097, "grad_norm": 156.0, "learning_rate": 3.526085911444612e-05, "loss": 9.1258, "step": 14572 }, { "epoch": 0.607436121879038, "grad_norm": 728.0, "learning_rate": 3.525440917418303e-05, "loss": 19.0004, "step": 14573 }, { "epoch": 0.6074778041765662, "grad_norm": 39.5, "learning_rate": 3.5247959502659634e-05, "loss": 6.8754, "step": 14574 }, { "epoch": 0.6075194864740945, "grad_norm": 418.0, "learning_rate": 3.524151009999349e-05, "loss": 15.2506, "step": 14575 }, { "epoch": 0.6075611687716227, "grad_norm": 276.0, "learning_rate": 3.523506096630213e-05, "loss": 11.8128, "step": 14576 }, { "epoch": 0.6076028510691509, "grad_norm": 384.0, "learning_rate": 3.522861210170309e-05, "loss": 16.6261, "step": 14577 }, { "epoch": 0.6076445333666791, "grad_norm": 294.0, "learning_rate": 3.522216350631391e-05, "loss": 13.8128, "step": 14578 }, { "epoch": 0.6076862156642074, "grad_norm": 77.5, "learning_rate": 3.521571518025213e-05, "loss": 7.751, "step": 14579 }, { "epoch": 0.6077278979617357, "grad_norm": 324.0, "learning_rate": 3.5209267123635224e-05, "loss": 13.5627, "step": 14580 }, { "epoch": 0.6077695802592639, "grad_norm": 260.0, "learning_rate": 3.520281933658076e-05, "loss": 14.6882, "step": 14581 }, { "epoch": 0.6078112625567921, "grad_norm": 972.0, "learning_rate": 3.5196371819206215e-05, "loss": 24.2536, "step": 14582 }, { "epoch": 0.6078529448543204, "grad_norm": 372.0, "learning_rate": 3.518992457162912e-05, "loss": 16.6254, "step": 14583 }, { "epoch": 0.6078946271518486, "grad_norm": 164.0, "learning_rate": 3.518347759396694e-05, "loss": 10.313, "step": 14584 }, { "epoch": 0.6079363094493768, "grad_norm": 328.0, "learning_rate": 3.517703088633723e-05, "loss": 13.6258, "step": 14585 }, { "epoch": 0.6079779917469051, "grad_norm": 215.0, "learning_rate": 3.5170584448857424e-05, "loss": 11.8752, "step": 14586 }, { "epoch": 0.6080196740444334, "grad_norm": 394.0, "learning_rate": 3.5164138281645036e-05, "loss": 15.3753, "step": 14587 }, { "epoch": 0.6080613563419616, "grad_norm": 1488.0, "learning_rate": 3.5157692384817546e-05, "loss": 33.2506, "step": 14588 }, { "epoch": 0.6081030386394898, "grad_norm": 476.0, "learning_rate": 3.515124675849243e-05, "loss": 16.0021, "step": 14589 }, { "epoch": 0.6081447209370181, "grad_norm": 278.0, "learning_rate": 3.514480140278716e-05, "loss": 11.5636, "step": 14590 }, { "epoch": 0.6081864032345463, "grad_norm": 568.0, "learning_rate": 3.513835631781921e-05, "loss": 17.1256, "step": 14591 }, { "epoch": 0.6082280855320745, "grad_norm": 45.5, "learning_rate": 3.513191150370603e-05, "loss": 6.8753, "step": 14592 }, { "epoch": 0.6082697678296027, "grad_norm": 276.0, "learning_rate": 3.512546696056509e-05, "loss": 12.1878, "step": 14593 }, { "epoch": 0.6083114501271311, "grad_norm": 87.0, "learning_rate": 3.5119022688513815e-05, "loss": 8.3752, "step": 14594 }, { "epoch": 0.6083531324246593, "grad_norm": 442.0, "learning_rate": 3.5112578687669695e-05, "loss": 17.2503, "step": 14595 }, { "epoch": 0.6083948147221875, "grad_norm": 450.0, "learning_rate": 3.510613495815013e-05, "loss": 15.7502, "step": 14596 }, { "epoch": 0.6084364970197157, "grad_norm": 1120.0, "learning_rate": 3.5099691500072606e-05, "loss": 26.6254, "step": 14597 }, { "epoch": 0.608478179317244, "grad_norm": 394.0, "learning_rate": 3.5093248313554497e-05, "loss": 16.7503, "step": 14598 }, { "epoch": 0.6085198616147722, "grad_norm": 256.0, "learning_rate": 3.508680539871327e-05, "loss": 12.6877, "step": 14599 }, { "epoch": 0.6085615439123004, "grad_norm": 446.0, "learning_rate": 3.508036275566635e-05, "loss": 16.3754, "step": 14600 }, { "epoch": 0.6086032262098287, "grad_norm": 440.0, "learning_rate": 3.5073920384531136e-05, "loss": 16.3761, "step": 14601 }, { "epoch": 0.608644908507357, "grad_norm": 254.0, "learning_rate": 3.5067478285425034e-05, "loss": 10.6258, "step": 14602 }, { "epoch": 0.6086865908048852, "grad_norm": 165.0, "learning_rate": 3.506103645846549e-05, "loss": 9.9378, "step": 14603 }, { "epoch": 0.6087282731024134, "grad_norm": 114.0, "learning_rate": 3.505459490376986e-05, "loss": 10.6253, "step": 14604 }, { "epoch": 0.6087699553999416, "grad_norm": 53.25, "learning_rate": 3.504815362145559e-05, "loss": 8.0635, "step": 14605 }, { "epoch": 0.6088116376974699, "grad_norm": 126.0, "learning_rate": 3.504171261164002e-05, "loss": 8.7501, "step": 14606 }, { "epoch": 0.6088533199949981, "grad_norm": 201.0, "learning_rate": 3.503527187444059e-05, "loss": 10.1884, "step": 14607 }, { "epoch": 0.6088950022925264, "grad_norm": 154.0, "learning_rate": 3.502883140997464e-05, "loss": 10.7505, "step": 14608 }, { "epoch": 0.6089366845900546, "grad_norm": 672.0, "learning_rate": 3.5022391218359585e-05, "loss": 17.2547, "step": 14609 }, { "epoch": 0.6089783668875829, "grad_norm": 384.0, "learning_rate": 3.501595129971276e-05, "loss": 15.1255, "step": 14610 }, { "epoch": 0.6090200491851111, "grad_norm": 162.0, "learning_rate": 3.500951165415157e-05, "loss": 10.0002, "step": 14611 }, { "epoch": 0.6090617314826393, "grad_norm": 202.0, "learning_rate": 3.500307228179335e-05, "loss": 9.1252, "step": 14612 }, { "epoch": 0.6091034137801675, "grad_norm": 528.0, "learning_rate": 3.499663318275547e-05, "loss": 17.5025, "step": 14613 }, { "epoch": 0.6091450960776958, "grad_norm": 364.0, "learning_rate": 3.499019435715527e-05, "loss": 15.0002, "step": 14614 }, { "epoch": 0.6091867783752241, "grad_norm": 125.5, "learning_rate": 3.4983755805110135e-05, "loss": 9.7504, "step": 14615 }, { "epoch": 0.6092284606727523, "grad_norm": 780.0, "learning_rate": 3.497731752673736e-05, "loss": 23.1254, "step": 14616 }, { "epoch": 0.6092701429702805, "grad_norm": 484.0, "learning_rate": 3.497087952215433e-05, "loss": 17.5004, "step": 14617 }, { "epoch": 0.6093118252678088, "grad_norm": 684.0, "learning_rate": 3.496444179147834e-05, "loss": 20.6256, "step": 14618 }, { "epoch": 0.609353507565337, "grad_norm": 492.0, "learning_rate": 3.495800433482674e-05, "loss": 17.7501, "step": 14619 }, { "epoch": 0.6093951898628652, "grad_norm": 394.0, "learning_rate": 3.495156715231684e-05, "loss": 15.1915, "step": 14620 }, { "epoch": 0.6094368721603934, "grad_norm": 188.0, "learning_rate": 3.4945130244065985e-05, "loss": 8.8754, "step": 14621 }, { "epoch": 0.6094785544579218, "grad_norm": 171.0, "learning_rate": 3.4938693610191435e-05, "loss": 10.0006, "step": 14622 }, { "epoch": 0.60952023675545, "grad_norm": 324.0, "learning_rate": 3.493225725081056e-05, "loss": 14.0027, "step": 14623 }, { "epoch": 0.6095619190529782, "grad_norm": 440.0, "learning_rate": 3.4925821166040604e-05, "loss": 15.5628, "step": 14624 }, { "epoch": 0.6096036013505064, "grad_norm": 330.0, "learning_rate": 3.491938535599892e-05, "loss": 13.0031, "step": 14625 }, { "epoch": 0.6096452836480347, "grad_norm": 139.0, "learning_rate": 3.4912949820802766e-05, "loss": 10.3129, "step": 14626 }, { "epoch": 0.6096869659455629, "grad_norm": 358.0, "learning_rate": 3.490651456056945e-05, "loss": 13.8785, "step": 14627 }, { "epoch": 0.6097286482430911, "grad_norm": 532.0, "learning_rate": 3.490007957541623e-05, "loss": 17.3753, "step": 14628 }, { "epoch": 0.6097703305406194, "grad_norm": 237.0, "learning_rate": 3.4893644865460414e-05, "loss": 9.9379, "step": 14629 }, { "epoch": 0.6098120128381477, "grad_norm": 147.0, "learning_rate": 3.488721043081925e-05, "loss": 10.6877, "step": 14630 }, { "epoch": 0.6098536951356759, "grad_norm": 135.0, "learning_rate": 3.488077627161004e-05, "loss": 9.8128, "step": 14631 }, { "epoch": 0.6098953774332041, "grad_norm": 288.0, "learning_rate": 3.487434238795e-05, "loss": 12.0631, "step": 14632 }, { "epoch": 0.6099370597307323, "grad_norm": 163.0, "learning_rate": 3.486790877995643e-05, "loss": 9.0004, "step": 14633 }, { "epoch": 0.6099787420282606, "grad_norm": 936.0, "learning_rate": 3.486147544774655e-05, "loss": 21.3756, "step": 14634 }, { "epoch": 0.6100204243257888, "grad_norm": 278.0, "learning_rate": 3.485504239143764e-05, "loss": 12.0628, "step": 14635 }, { "epoch": 0.6100621066233171, "grad_norm": 350.0, "learning_rate": 3.484860961114691e-05, "loss": 14.5003, "step": 14636 }, { "epoch": 0.6101037889208453, "grad_norm": 472.0, "learning_rate": 3.4842177106991625e-05, "loss": 15.9377, "step": 14637 }, { "epoch": 0.6101454712183736, "grad_norm": 416.0, "learning_rate": 3.483574487908901e-05, "loss": 15.8768, "step": 14638 }, { "epoch": 0.6101871535159018, "grad_norm": 434.0, "learning_rate": 3.4829312927556285e-05, "loss": 16.5002, "step": 14639 }, { "epoch": 0.61022883581343, "grad_norm": 66.5, "learning_rate": 3.4822881252510675e-05, "loss": 8.6263, "step": 14640 }, { "epoch": 0.6102705181109582, "grad_norm": 237.0, "learning_rate": 3.4816449854069414e-05, "loss": 12.1878, "step": 14641 }, { "epoch": 0.6103122004084865, "grad_norm": 346.0, "learning_rate": 3.481001873234968e-05, "loss": 14.751, "step": 14642 }, { "epoch": 0.6103538827060148, "grad_norm": 280.0, "learning_rate": 3.480358788746874e-05, "loss": 13.5004, "step": 14643 }, { "epoch": 0.610395565003543, "grad_norm": 223.0, "learning_rate": 3.479715731954373e-05, "loss": 11.6877, "step": 14644 }, { "epoch": 0.6104372473010712, "grad_norm": 402.0, "learning_rate": 3.479072702869189e-05, "loss": 15.8753, "step": 14645 }, { "epoch": 0.6104789295985995, "grad_norm": 648.0, "learning_rate": 3.4784297015030386e-05, "loss": 20.0001, "step": 14646 }, { "epoch": 0.6105206118961277, "grad_norm": 220.0, "learning_rate": 3.477786727867644e-05, "loss": 11.3128, "step": 14647 }, { "epoch": 0.6105622941936559, "grad_norm": 230.0, "learning_rate": 3.47714378197472e-05, "loss": 11.9376, "step": 14648 }, { "epoch": 0.6106039764911841, "grad_norm": 348.0, "learning_rate": 3.476500863835986e-05, "loss": 14.5627, "step": 14649 }, { "epoch": 0.6106456587887125, "grad_norm": 420.0, "learning_rate": 3.475857973463159e-05, "loss": 16.5002, "step": 14650 }, { "epoch": 0.6106873410862407, "grad_norm": 320.0, "learning_rate": 3.475215110867957e-05, "loss": 12.3128, "step": 14651 }, { "epoch": 0.6107290233837689, "grad_norm": 604.0, "learning_rate": 3.474572276062092e-05, "loss": 19.1264, "step": 14652 }, { "epoch": 0.6107707056812971, "grad_norm": 776.0, "learning_rate": 3.4739294690572855e-05, "loss": 22.2503, "step": 14653 }, { "epoch": 0.6108123879788254, "grad_norm": 107.0, "learning_rate": 3.4732866898652485e-05, "loss": 9.188, "step": 14654 }, { "epoch": 0.6108540702763536, "grad_norm": 92.5, "learning_rate": 3.472643938497698e-05, "loss": 9.7526, "step": 14655 }, { "epoch": 0.6108957525738818, "grad_norm": 448.0, "learning_rate": 3.4720012149663464e-05, "loss": 17.2544, "step": 14656 }, { "epoch": 0.6109374348714101, "grad_norm": 198.0, "learning_rate": 3.4713585192829095e-05, "loss": 11.5007, "step": 14657 }, { "epoch": 0.6109791171689384, "grad_norm": 266.0, "learning_rate": 3.470715851459098e-05, "loss": 12.6252, "step": 14658 }, { "epoch": 0.6110207994664666, "grad_norm": 239.0, "learning_rate": 3.470073211506627e-05, "loss": 11.3753, "step": 14659 }, { "epoch": 0.6110624817639948, "grad_norm": 504.0, "learning_rate": 3.4694305994372056e-05, "loss": 16.2502, "step": 14660 }, { "epoch": 0.6111041640615231, "grad_norm": 288.0, "learning_rate": 3.46878801526255e-05, "loss": 13.3127, "step": 14661 }, { "epoch": 0.6111458463590513, "grad_norm": 208.0, "learning_rate": 3.4681454589943666e-05, "loss": 11.6883, "step": 14662 }, { "epoch": 0.6111875286565795, "grad_norm": 644.0, "learning_rate": 3.4675029306443695e-05, "loss": 19.251, "step": 14663 }, { "epoch": 0.6112292109541078, "grad_norm": 564.0, "learning_rate": 3.4668604302242667e-05, "loss": 19.7503, "step": 14664 }, { "epoch": 0.6112708932516361, "grad_norm": 900.0, "learning_rate": 3.4662179577457696e-05, "loss": 22.6254, "step": 14665 }, { "epoch": 0.6113125755491643, "grad_norm": 442.0, "learning_rate": 3.465575513220585e-05, "loss": 15.6255, "step": 14666 }, { "epoch": 0.6113542578466925, "grad_norm": 175.0, "learning_rate": 3.464933096660424e-05, "loss": 10.8754, "step": 14667 }, { "epoch": 0.6113959401442207, "grad_norm": 236.0, "learning_rate": 3.4642907080769924e-05, "loss": 12.5008, "step": 14668 }, { "epoch": 0.611437622441749, "grad_norm": 129.0, "learning_rate": 3.463648347482001e-05, "loss": 10.813, "step": 14669 }, { "epoch": 0.6114793047392773, "grad_norm": 156.0, "learning_rate": 3.463006014887153e-05, "loss": 11.3752, "step": 14670 }, { "epoch": 0.6115209870368055, "grad_norm": 512.0, "learning_rate": 3.462363710304159e-05, "loss": 15.9381, "step": 14671 }, { "epoch": 0.6115626693343337, "grad_norm": 680.0, "learning_rate": 3.46172143374472e-05, "loss": 19.7505, "step": 14672 }, { "epoch": 0.611604351631862, "grad_norm": 384.0, "learning_rate": 3.461079185220547e-05, "loss": 15.0004, "step": 14673 }, { "epoch": 0.6116460339293902, "grad_norm": 240.0, "learning_rate": 3.460436964743341e-05, "loss": 12.5041, "step": 14674 }, { "epoch": 0.6116877162269184, "grad_norm": 362.0, "learning_rate": 3.459794772324808e-05, "loss": 13.6253, "step": 14675 }, { "epoch": 0.6117293985244466, "grad_norm": 454.0, "learning_rate": 3.459152607976652e-05, "loss": 15.3757, "step": 14676 }, { "epoch": 0.611771080821975, "grad_norm": 223.0, "learning_rate": 3.458510471710578e-05, "loss": 11.0003, "step": 14677 }, { "epoch": 0.6118127631195032, "grad_norm": 708.0, "learning_rate": 3.457868363538285e-05, "loss": 19.2502, "step": 14678 }, { "epoch": 0.6118544454170314, "grad_norm": 364.0, "learning_rate": 3.45722628347148e-05, "loss": 14.2501, "step": 14679 }, { "epoch": 0.6118961277145596, "grad_norm": 536.0, "learning_rate": 3.456584231521861e-05, "loss": 15.8779, "step": 14680 }, { "epoch": 0.6119378100120879, "grad_norm": 404.0, "learning_rate": 3.455942207701134e-05, "loss": 14.9377, "step": 14681 }, { "epoch": 0.6119794923096161, "grad_norm": 133.0, "learning_rate": 3.455300212020995e-05, "loss": 10.5006, "step": 14682 }, { "epoch": 0.6120211746071443, "grad_norm": 264.0, "learning_rate": 3.454658244493149e-05, "loss": 13.313, "step": 14683 }, { "epoch": 0.6120628569046725, "grad_norm": 207.0, "learning_rate": 3.454016305129292e-05, "loss": 11.7503, "step": 14684 }, { "epoch": 0.6121045392022009, "grad_norm": 290.0, "learning_rate": 3.4533743939411264e-05, "loss": 14.0627, "step": 14685 }, { "epoch": 0.6121462214997291, "grad_norm": 322.0, "learning_rate": 3.452732510940347e-05, "loss": 13.6879, "step": 14686 }, { "epoch": 0.6121879037972573, "grad_norm": 294.0, "learning_rate": 3.4520906561386576e-05, "loss": 13.1251, "step": 14687 }, { "epoch": 0.6122295860947855, "grad_norm": 342.0, "learning_rate": 3.451448829547753e-05, "loss": 13.4386, "step": 14688 }, { "epoch": 0.6122712683923138, "grad_norm": 132.0, "learning_rate": 3.45080703117933e-05, "loss": 10.9381, "step": 14689 }, { "epoch": 0.612312950689842, "grad_norm": 231.0, "learning_rate": 3.450165261045086e-05, "loss": 13.1881, "step": 14690 }, { "epoch": 0.6123546329873703, "grad_norm": 96.0, "learning_rate": 3.44952351915672e-05, "loss": 7.3761, "step": 14691 }, { "epoch": 0.6123963152848985, "grad_norm": 376.0, "learning_rate": 3.4488818055259234e-05, "loss": 14.2502, "step": 14692 }, { "epoch": 0.6124379975824268, "grad_norm": 398.0, "learning_rate": 3.4482401201643955e-05, "loss": 14.9403, "step": 14693 }, { "epoch": 0.612479679879955, "grad_norm": 672.0, "learning_rate": 3.4475984630838266e-05, "loss": 18.6253, "step": 14694 }, { "epoch": 0.6125213621774832, "grad_norm": 235.0, "learning_rate": 3.446956834295916e-05, "loss": 12.2573, "step": 14695 }, { "epoch": 0.6125630444750114, "grad_norm": 588.0, "learning_rate": 3.4463152338123526e-05, "loss": 21.8751, "step": 14696 }, { "epoch": 0.6126047267725397, "grad_norm": 318.0, "learning_rate": 3.445673661644835e-05, "loss": 13.2504, "step": 14697 }, { "epoch": 0.612646409070068, "grad_norm": 424.0, "learning_rate": 3.44503211780505e-05, "loss": 16.1255, "step": 14698 }, { "epoch": 0.6126880913675962, "grad_norm": 129.0, "learning_rate": 3.444390602304695e-05, "loss": 8.6255, "step": 14699 }, { "epoch": 0.6127297736651244, "grad_norm": 816.0, "learning_rate": 3.443749115155457e-05, "loss": 23.7501, "step": 14700 }, { "epoch": 0.6127714559626527, "grad_norm": 492.0, "learning_rate": 3.4431076563690314e-05, "loss": 17.2502, "step": 14701 }, { "epoch": 0.6128131382601809, "grad_norm": 332.0, "learning_rate": 3.442466225957106e-05, "loss": 12.8128, "step": 14702 }, { "epoch": 0.6128548205577091, "grad_norm": 332.0, "learning_rate": 3.441824823931372e-05, "loss": 13.5629, "step": 14703 }, { "epoch": 0.6128965028552373, "grad_norm": 680.0, "learning_rate": 3.441183450303518e-05, "loss": 20.1256, "step": 14704 }, { "epoch": 0.6129381851527657, "grad_norm": 560.0, "learning_rate": 3.440542105085237e-05, "loss": 18.6252, "step": 14705 }, { "epoch": 0.6129798674502939, "grad_norm": 908.0, "learning_rate": 3.439900788288212e-05, "loss": 24.1253, "step": 14706 }, { "epoch": 0.6130215497478221, "grad_norm": 368.0, "learning_rate": 3.439259499924134e-05, "loss": 14.3128, "step": 14707 }, { "epoch": 0.6130632320453503, "grad_norm": 316.0, "learning_rate": 3.438618240004691e-05, "loss": 13.7502, "step": 14708 }, { "epoch": 0.6131049143428786, "grad_norm": 133.0, "learning_rate": 3.4379770085415694e-05, "loss": 10.0006, "step": 14709 }, { "epoch": 0.6131465966404068, "grad_norm": 160.0, "learning_rate": 3.4373358055464534e-05, "loss": 10.7506, "step": 14710 }, { "epoch": 0.613188278937935, "grad_norm": 620.0, "learning_rate": 3.4366946310310334e-05, "loss": 18.7533, "step": 14711 }, { "epoch": 0.6132299612354633, "grad_norm": 460.0, "learning_rate": 3.436053485006991e-05, "loss": 16.3757, "step": 14712 }, { "epoch": 0.6132716435329916, "grad_norm": 264.0, "learning_rate": 3.435412367486013e-05, "loss": 10.7503, "step": 14713 }, { "epoch": 0.6133133258305198, "grad_norm": 247.0, "learning_rate": 3.4347712784797834e-05, "loss": 13.0013, "step": 14714 }, { "epoch": 0.613355008128048, "grad_norm": 400.0, "learning_rate": 3.434130217999987e-05, "loss": 15.7506, "step": 14715 }, { "epoch": 0.6133966904255762, "grad_norm": 366.0, "learning_rate": 3.433489186058305e-05, "loss": 15.2502, "step": 14716 }, { "epoch": 0.6134383727231045, "grad_norm": 362.0, "learning_rate": 3.432848182666424e-05, "loss": 14.0002, "step": 14717 }, { "epoch": 0.6134800550206327, "grad_norm": 153.0, "learning_rate": 3.4322072078360215e-05, "loss": 9.1255, "step": 14718 }, { "epoch": 0.613521737318161, "grad_norm": 204.0, "learning_rate": 3.431566261578783e-05, "loss": 11.3754, "step": 14719 }, { "epoch": 0.6135634196156892, "grad_norm": 446.0, "learning_rate": 3.4309253439063884e-05, "loss": 16.2507, "step": 14720 }, { "epoch": 0.6136051019132175, "grad_norm": 756.0, "learning_rate": 3.43028445483052e-05, "loss": 21.2513, "step": 14721 }, { "epoch": 0.6136467842107457, "grad_norm": 288.0, "learning_rate": 3.4296435943628545e-05, "loss": 12.8127, "step": 14722 }, { "epoch": 0.6136884665082739, "grad_norm": 111.0, "learning_rate": 3.4290027625150755e-05, "loss": 8.313, "step": 14723 }, { "epoch": 0.6137301488058021, "grad_norm": 150.0, "learning_rate": 3.428361959298859e-05, "loss": 8.7503, "step": 14724 }, { "epoch": 0.6137718311033304, "grad_norm": 124.5, "learning_rate": 3.427721184725887e-05, "loss": 7.7189, "step": 14725 }, { "epoch": 0.6138135134008587, "grad_norm": 832.0, "learning_rate": 3.427080438807835e-05, "loss": 20.0049, "step": 14726 }, { "epoch": 0.6138551956983869, "grad_norm": 272.0, "learning_rate": 3.426439721556383e-05, "loss": 13.8753, "step": 14727 }, { "epoch": 0.6138968779959151, "grad_norm": 480.0, "learning_rate": 3.4257990329832043e-05, "loss": 16.7502, "step": 14728 }, { "epoch": 0.6139385602934434, "grad_norm": 426.0, "learning_rate": 3.4251583730999804e-05, "loss": 16.3754, "step": 14729 }, { "epoch": 0.6139802425909716, "grad_norm": 360.0, "learning_rate": 3.4245177419183825e-05, "loss": 14.0627, "step": 14730 }, { "epoch": 0.6140219248884998, "grad_norm": 692.0, "learning_rate": 3.4238771394500915e-05, "loss": 20.3751, "step": 14731 }, { "epoch": 0.6140636071860281, "grad_norm": 133.0, "learning_rate": 3.423236565706777e-05, "loss": 9.1877, "step": 14732 }, { "epoch": 0.6141052894835564, "grad_norm": 426.0, "learning_rate": 3.422596020700118e-05, "loss": 17.1255, "step": 14733 }, { "epoch": 0.6141469717810846, "grad_norm": 684.0, "learning_rate": 3.421955504441785e-05, "loss": 20.1253, "step": 14734 }, { "epoch": 0.6141886540786128, "grad_norm": 256.0, "learning_rate": 3.421315016943455e-05, "loss": 12.876, "step": 14735 }, { "epoch": 0.6142303363761411, "grad_norm": 480.0, "learning_rate": 3.420674558216796e-05, "loss": 11.6882, "step": 14736 }, { "epoch": 0.6142720186736693, "grad_norm": 620.0, "learning_rate": 3.420034128273487e-05, "loss": 18.7501, "step": 14737 }, { "epoch": 0.6143137009711975, "grad_norm": 226.0, "learning_rate": 3.419393727125193e-05, "loss": 11.7505, "step": 14738 }, { "epoch": 0.6143553832687257, "grad_norm": 322.0, "learning_rate": 3.418753354783591e-05, "loss": 12.5001, "step": 14739 }, { "epoch": 0.6143970655662541, "grad_norm": 239.0, "learning_rate": 3.418113011260347e-05, "loss": 12.8752, "step": 14740 }, { "epoch": 0.6144387478637823, "grad_norm": 119.0, "learning_rate": 3.4174726965671364e-05, "loss": 9.1878, "step": 14741 }, { "epoch": 0.6144804301613105, "grad_norm": 243.0, "learning_rate": 3.416832410715625e-05, "loss": 11.6251, "step": 14742 }, { "epoch": 0.6145221124588387, "grad_norm": 832.0, "learning_rate": 3.4161921537174844e-05, "loss": 24.1256, "step": 14743 }, { "epoch": 0.614563794756367, "grad_norm": 306.0, "learning_rate": 3.41555192558438e-05, "loss": 12.0002, "step": 14744 }, { "epoch": 0.6146054770538952, "grad_norm": 107.0, "learning_rate": 3.4149117263279864e-05, "loss": 8.5629, "step": 14745 }, { "epoch": 0.6146471593514234, "grad_norm": 644.0, "learning_rate": 3.414271555959964e-05, "loss": 20.0002, "step": 14746 }, { "epoch": 0.6146888416489517, "grad_norm": 255.0, "learning_rate": 3.413631414491985e-05, "loss": 13.0635, "step": 14747 }, { "epoch": 0.61473052394648, "grad_norm": 2080.0, "learning_rate": 3.412991301935713e-05, "loss": 38.7534, "step": 14748 }, { "epoch": 0.6147722062440082, "grad_norm": 468.0, "learning_rate": 3.4123512183028166e-05, "loss": 16.5018, "step": 14749 }, { "epoch": 0.6148138885415364, "grad_norm": 462.0, "learning_rate": 3.4117111636049585e-05, "loss": 18.2503, "step": 14750 }, { "epoch": 0.6148555708390646, "grad_norm": 147.0, "learning_rate": 3.411071137853807e-05, "loss": 10.3133, "step": 14751 }, { "epoch": 0.6148972531365929, "grad_norm": 216.0, "learning_rate": 3.410431141061025e-05, "loss": 11.6877, "step": 14752 }, { "epoch": 0.6149389354341211, "grad_norm": 282.0, "learning_rate": 3.4097911732382756e-05, "loss": 11.4377, "step": 14753 }, { "epoch": 0.6149806177316494, "grad_norm": 140.0, "learning_rate": 3.409151234397223e-05, "loss": 9.5001, "step": 14754 }, { "epoch": 0.6150223000291776, "grad_norm": 412.0, "learning_rate": 3.408511324549532e-05, "loss": 17.1275, "step": 14755 }, { "epoch": 0.6150639823267059, "grad_norm": 560.0, "learning_rate": 3.4078714437068616e-05, "loss": 17.0028, "step": 14756 }, { "epoch": 0.6151056646242341, "grad_norm": 177.0, "learning_rate": 3.407231591880878e-05, "loss": 10.8753, "step": 14757 }, { "epoch": 0.6151473469217623, "grad_norm": 840.0, "learning_rate": 3.406591769083237e-05, "loss": 22.6263, "step": 14758 }, { "epoch": 0.6151890292192905, "grad_norm": 208.0, "learning_rate": 3.405951975325605e-05, "loss": 10.8751, "step": 14759 }, { "epoch": 0.6152307115168189, "grad_norm": 280.0, "learning_rate": 3.405312210619638e-05, "loss": 13.4384, "step": 14760 }, { "epoch": 0.6152723938143471, "grad_norm": 660.0, "learning_rate": 3.404672474976999e-05, "loss": 19.7502, "step": 14761 }, { "epoch": 0.6153140761118753, "grad_norm": 215.0, "learning_rate": 3.404032768409344e-05, "loss": 10.0003, "step": 14762 }, { "epoch": 0.6153557584094035, "grad_norm": 960.0, "learning_rate": 3.403393090928335e-05, "loss": 24.5032, "step": 14763 }, { "epoch": 0.6153974407069318, "grad_norm": 185.0, "learning_rate": 3.402753442545628e-05, "loss": 6.0021, "step": 14764 }, { "epoch": 0.61543912300446, "grad_norm": 466.0, "learning_rate": 3.402113823272881e-05, "loss": 15.5006, "step": 14765 }, { "epoch": 0.6154808053019882, "grad_norm": 410.0, "learning_rate": 3.4014742331217516e-05, "loss": 15.938, "step": 14766 }, { "epoch": 0.6155224875995164, "grad_norm": 544.0, "learning_rate": 3.4008346721038975e-05, "loss": 16.8752, "step": 14767 }, { "epoch": 0.6155641698970448, "grad_norm": 260.0, "learning_rate": 3.400195140230971e-05, "loss": 14.3758, "step": 14768 }, { "epoch": 0.615605852194573, "grad_norm": 145.0, "learning_rate": 3.399555637514633e-05, "loss": 8.6878, "step": 14769 }, { "epoch": 0.6156475344921012, "grad_norm": 250.0, "learning_rate": 3.3989161639665326e-05, "loss": 12.6257, "step": 14770 }, { "epoch": 0.6156892167896294, "grad_norm": 328.0, "learning_rate": 3.39827671959833e-05, "loss": 12.1879, "step": 14771 }, { "epoch": 0.6157308990871577, "grad_norm": 62.25, "learning_rate": 3.397637304421674e-05, "loss": 8.4385, "step": 14772 }, { "epoch": 0.6157725813846859, "grad_norm": 110.5, "learning_rate": 3.396997918448223e-05, "loss": 8.8128, "step": 14773 }, { "epoch": 0.6158142636822141, "grad_norm": 604.0, "learning_rate": 3.3963585616896244e-05, "loss": 18.1256, "step": 14774 }, { "epoch": 0.6158559459797424, "grad_norm": 504.0, "learning_rate": 3.395719234157535e-05, "loss": 19.1253, "step": 14775 }, { "epoch": 0.6158976282772707, "grad_norm": 612.0, "learning_rate": 3.3950799358636045e-05, "loss": 20.1252, "step": 14776 }, { "epoch": 0.6159393105747989, "grad_norm": 788.0, "learning_rate": 3.394440666819485e-05, "loss": 20.7526, "step": 14777 }, { "epoch": 0.6159809928723271, "grad_norm": 79.5, "learning_rate": 3.393801427036826e-05, "loss": 8.564, "step": 14778 }, { "epoch": 0.6160226751698553, "grad_norm": 338.0, "learning_rate": 3.3931622165272803e-05, "loss": 15.1885, "step": 14779 }, { "epoch": 0.6160643574673836, "grad_norm": 202.0, "learning_rate": 3.3925230353024935e-05, "loss": 11.0015, "step": 14780 }, { "epoch": 0.6161060397649119, "grad_norm": 460.0, "learning_rate": 3.391883883374119e-05, "loss": 16.6252, "step": 14781 }, { "epoch": 0.6161477220624401, "grad_norm": 402.0, "learning_rate": 3.391244760753802e-05, "loss": 12.6916, "step": 14782 }, { "epoch": 0.6161894043599683, "grad_norm": 696.0, "learning_rate": 3.390605667453195e-05, "loss": 22.0003, "step": 14783 }, { "epoch": 0.6162310866574966, "grad_norm": 216.0, "learning_rate": 3.389966603483939e-05, "loss": 12.1256, "step": 14784 }, { "epoch": 0.6162727689550248, "grad_norm": 704.0, "learning_rate": 3.389327568857687e-05, "loss": 18.7555, "step": 14785 }, { "epoch": 0.616314451252553, "grad_norm": 224.0, "learning_rate": 3.388688563586081e-05, "loss": 12.8753, "step": 14786 }, { "epoch": 0.6163561335500812, "grad_norm": 436.0, "learning_rate": 3.3880495876807716e-05, "loss": 15.6253, "step": 14787 }, { "epoch": 0.6163978158476096, "grad_norm": 444.0, "learning_rate": 3.387410641153399e-05, "loss": 16.5005, "step": 14788 }, { "epoch": 0.6164394981451378, "grad_norm": 1656.0, "learning_rate": 3.3867717240156115e-05, "loss": 31.2534, "step": 14789 }, { "epoch": 0.616481180442666, "grad_norm": 231.0, "learning_rate": 3.3861328362790524e-05, "loss": 11.938, "step": 14790 }, { "epoch": 0.6165228627401942, "grad_norm": 372.0, "learning_rate": 3.385493977955366e-05, "loss": 14.3754, "step": 14791 }, { "epoch": 0.6165645450377225, "grad_norm": 476.0, "learning_rate": 3.384855149056194e-05, "loss": 17.7502, "step": 14792 }, { "epoch": 0.6166062273352507, "grad_norm": 406.0, "learning_rate": 3.3842163495931826e-05, "loss": 17.1281, "step": 14793 }, { "epoch": 0.6166479096327789, "grad_norm": 209.0, "learning_rate": 3.38357757957797e-05, "loss": 11.3754, "step": 14794 }, { "epoch": 0.6166895919303071, "grad_norm": 972.0, "learning_rate": 3.3829388390222006e-05, "loss": 27.2505, "step": 14795 }, { "epoch": 0.6167312742278355, "grad_norm": 124.5, "learning_rate": 3.382300127937513e-05, "loss": 9.5626, "step": 14796 }, { "epoch": 0.6167729565253637, "grad_norm": 284.0, "learning_rate": 3.381661446335551e-05, "loss": 11.6253, "step": 14797 }, { "epoch": 0.6168146388228919, "grad_norm": 320.0, "learning_rate": 3.38102279422795e-05, "loss": 14.5627, "step": 14798 }, { "epoch": 0.6168563211204201, "grad_norm": 145.0, "learning_rate": 3.3803841716263564e-05, "loss": 8.1255, "step": 14799 }, { "epoch": 0.6168980034179484, "grad_norm": 342.0, "learning_rate": 3.379745578542401e-05, "loss": 14.0641, "step": 14800 }, { "epoch": 0.6169396857154766, "grad_norm": 528.0, "learning_rate": 3.379107014987728e-05, "loss": 17.0003, "step": 14801 }, { "epoch": 0.6169813680130048, "grad_norm": 506.0, "learning_rate": 3.3784684809739745e-05, "loss": 18.0002, "step": 14802 }, { "epoch": 0.6170230503105332, "grad_norm": 544.0, "learning_rate": 3.3778299765127753e-05, "loss": 17.6254, "step": 14803 }, { "epoch": 0.6170647326080614, "grad_norm": 556.0, "learning_rate": 3.3771915016157684e-05, "loss": 16.8786, "step": 14804 }, { "epoch": 0.6171064149055896, "grad_norm": 90.0, "learning_rate": 3.3765530562945926e-05, "loss": 10.3133, "step": 14805 }, { "epoch": 0.6171480972031178, "grad_norm": 189.0, "learning_rate": 3.3759146405608794e-05, "loss": 11.5004, "step": 14806 }, { "epoch": 0.6171897795006461, "grad_norm": 338.0, "learning_rate": 3.3752762544262676e-05, "loss": 14.8128, "step": 14807 }, { "epoch": 0.6172314617981743, "grad_norm": 392.0, "learning_rate": 3.374637897902389e-05, "loss": 15.5626, "step": 14808 }, { "epoch": 0.6172731440957026, "grad_norm": 464.0, "learning_rate": 3.373999571000881e-05, "loss": 15.8753, "step": 14809 }, { "epoch": 0.6173148263932308, "grad_norm": 224.0, "learning_rate": 3.373361273733373e-05, "loss": 12.0001, "step": 14810 }, { "epoch": 0.6173565086907591, "grad_norm": 192.0, "learning_rate": 3.372723006111501e-05, "loss": 11.126, "step": 14811 }, { "epoch": 0.6173981909882873, "grad_norm": 628.0, "learning_rate": 3.372084768146896e-05, "loss": 18.7535, "step": 14812 }, { "epoch": 0.6174398732858155, "grad_norm": 768.0, "learning_rate": 3.371446559851191e-05, "loss": 21.5003, "step": 14813 }, { "epoch": 0.6174815555833437, "grad_norm": 304.0, "learning_rate": 3.370808381236018e-05, "loss": 14.5627, "step": 14814 }, { "epoch": 0.617523237880872, "grad_norm": 784.0, "learning_rate": 3.370170232313006e-05, "loss": 19.5043, "step": 14815 }, { "epoch": 0.6175649201784003, "grad_norm": 255.0, "learning_rate": 3.369532113093785e-05, "loss": 11.9377, "step": 14816 }, { "epoch": 0.6176066024759285, "grad_norm": 334.0, "learning_rate": 3.3688940235899894e-05, "loss": 14.2503, "step": 14817 }, { "epoch": 0.6176482847734567, "grad_norm": 138.0, "learning_rate": 3.368255963813241e-05, "loss": 9.6257, "step": 14818 }, { "epoch": 0.617689967070985, "grad_norm": 238.0, "learning_rate": 3.3676179337751756e-05, "loss": 11.4377, "step": 14819 }, { "epoch": 0.6177316493685132, "grad_norm": 164.0, "learning_rate": 3.366979933487416e-05, "loss": 9.1881, "step": 14820 }, { "epoch": 0.6177733316660414, "grad_norm": 338.0, "learning_rate": 3.3663419629615946e-05, "loss": 14.0005, "step": 14821 }, { "epoch": 0.6178150139635696, "grad_norm": 310.0, "learning_rate": 3.3657040222093336e-05, "loss": 13.1254, "step": 14822 }, { "epoch": 0.617856696261098, "grad_norm": 300.0, "learning_rate": 3.3650661112422644e-05, "loss": 13.1885, "step": 14823 }, { "epoch": 0.6178983785586262, "grad_norm": 97.0, "learning_rate": 3.364428230072008e-05, "loss": 7.2815, "step": 14824 }, { "epoch": 0.6179400608561544, "grad_norm": 350.0, "learning_rate": 3.3637903787101944e-05, "loss": 15.3752, "step": 14825 }, { "epoch": 0.6179817431536826, "grad_norm": 332.0, "learning_rate": 3.3631525571684444e-05, "loss": 14.1252, "step": 14826 }, { "epoch": 0.6180234254512109, "grad_norm": 560.0, "learning_rate": 3.362514765458384e-05, "loss": 16.2512, "step": 14827 }, { "epoch": 0.6180651077487391, "grad_norm": 668.0, "learning_rate": 3.361877003591638e-05, "loss": 21.2504, "step": 14828 }, { "epoch": 0.6181067900462673, "grad_norm": 212.0, "learning_rate": 3.3612392715798306e-05, "loss": 10.5014, "step": 14829 }, { "epoch": 0.6181484723437956, "grad_norm": 227.0, "learning_rate": 3.360601569434581e-05, "loss": 12.0006, "step": 14830 }, { "epoch": 0.6181901546413239, "grad_norm": 216.0, "learning_rate": 3.359963897167515e-05, "loss": 12.3753, "step": 14831 }, { "epoch": 0.6182318369388521, "grad_norm": 808.0, "learning_rate": 3.3593262547902515e-05, "loss": 20.8756, "step": 14832 }, { "epoch": 0.6182735192363803, "grad_norm": 272.0, "learning_rate": 3.3586886423144135e-05, "loss": 10.6258, "step": 14833 }, { "epoch": 0.6183152015339085, "grad_norm": 85.5, "learning_rate": 3.358051059751619e-05, "loss": 9.6877, "step": 14834 }, { "epoch": 0.6183568838314368, "grad_norm": 114.5, "learning_rate": 3.357413507113493e-05, "loss": 9.6252, "step": 14835 }, { "epoch": 0.618398566128965, "grad_norm": 796.0, "learning_rate": 3.356775984411647e-05, "loss": 22.6274, "step": 14836 }, { "epoch": 0.6184402484264933, "grad_norm": 320.0, "learning_rate": 3.3561384916577086e-05, "loss": 13.0003, "step": 14837 }, { "epoch": 0.6184819307240215, "grad_norm": 904.0, "learning_rate": 3.35550102886329e-05, "loss": 24.7503, "step": 14838 }, { "epoch": 0.6185236130215498, "grad_norm": 90.5, "learning_rate": 3.354863596040012e-05, "loss": 10.1881, "step": 14839 }, { "epoch": 0.618565295319078, "grad_norm": 213.0, "learning_rate": 3.3542261931994905e-05, "loss": 12.438, "step": 14840 }, { "epoch": 0.6186069776166062, "grad_norm": 185.0, "learning_rate": 3.353588820353343e-05, "loss": 10.7503, "step": 14841 }, { "epoch": 0.6186486599141344, "grad_norm": 91.0, "learning_rate": 3.352951477513184e-05, "loss": 10.5005, "step": 14842 }, { "epoch": 0.6186903422116627, "grad_norm": 1112.0, "learning_rate": 3.352314164690633e-05, "loss": 30.5008, "step": 14843 }, { "epoch": 0.618732024509191, "grad_norm": 416.0, "learning_rate": 3.351676881897301e-05, "loss": 15.7503, "step": 14844 }, { "epoch": 0.6187737068067192, "grad_norm": 836.0, "learning_rate": 3.351039629144805e-05, "loss": 24.8755, "step": 14845 }, { "epoch": 0.6188153891042474, "grad_norm": 540.0, "learning_rate": 3.3504024064447575e-05, "loss": 16.5032, "step": 14846 }, { "epoch": 0.6188570714017757, "grad_norm": 274.0, "learning_rate": 3.349765213808774e-05, "loss": 12.6876, "step": 14847 }, { "epoch": 0.6188987536993039, "grad_norm": 150.0, "learning_rate": 3.3491280512484634e-05, "loss": 9.5627, "step": 14848 }, { "epoch": 0.6189404359968321, "grad_norm": 524.0, "learning_rate": 3.3484909187754434e-05, "loss": 17.3752, "step": 14849 }, { "epoch": 0.6189821182943603, "grad_norm": 368.0, "learning_rate": 3.3478538164013204e-05, "loss": 14.8127, "step": 14850 }, { "epoch": 0.6190238005918887, "grad_norm": 352.0, "learning_rate": 3.34721674413771e-05, "loss": 14.313, "step": 14851 }, { "epoch": 0.6190654828894169, "grad_norm": 1064.0, "learning_rate": 3.346579701996221e-05, "loss": 27.7502, "step": 14852 }, { "epoch": 0.6191071651869451, "grad_norm": 472.0, "learning_rate": 3.345942689988463e-05, "loss": 16.8751, "step": 14853 }, { "epoch": 0.6191488474844733, "grad_norm": 446.0, "learning_rate": 3.345305708126046e-05, "loss": 16.6251, "step": 14854 }, { "epoch": 0.6191905297820016, "grad_norm": 262.0, "learning_rate": 3.344668756420581e-05, "loss": 10.8757, "step": 14855 }, { "epoch": 0.6192322120795298, "grad_norm": 186.0, "learning_rate": 3.344031834883673e-05, "loss": 9.5008, "step": 14856 }, { "epoch": 0.619273894377058, "grad_norm": 462.0, "learning_rate": 3.343394943526934e-05, "loss": 16.2503, "step": 14857 }, { "epoch": 0.6193155766745863, "grad_norm": 364.0, "learning_rate": 3.342758082361967e-05, "loss": 14.7504, "step": 14858 }, { "epoch": 0.6193572589721146, "grad_norm": 185.0, "learning_rate": 3.342121251400383e-05, "loss": 9.7501, "step": 14859 }, { "epoch": 0.6193989412696428, "grad_norm": 1304.0, "learning_rate": 3.341484450653784e-05, "loss": 27.6297, "step": 14860 }, { "epoch": 0.619440623567171, "grad_norm": 83.0, "learning_rate": 3.3408476801337815e-05, "loss": 7.3449, "step": 14861 }, { "epoch": 0.6194823058646992, "grad_norm": 624.0, "learning_rate": 3.340210939851974e-05, "loss": 17.5001, "step": 14862 }, { "epoch": 0.6195239881622275, "grad_norm": 326.0, "learning_rate": 3.3395742298199715e-05, "loss": 14.4377, "step": 14863 }, { "epoch": 0.6195656704597557, "grad_norm": 464.0, "learning_rate": 3.3389375500493744e-05, "loss": 17.3755, "step": 14864 }, { "epoch": 0.619607352757284, "grad_norm": 368.0, "learning_rate": 3.33830090055179e-05, "loss": 14.8761, "step": 14865 }, { "epoch": 0.6196490350548122, "grad_norm": 276.0, "learning_rate": 3.3376642813388165e-05, "loss": 13.2502, "step": 14866 }, { "epoch": 0.6196907173523405, "grad_norm": 180.0, "learning_rate": 3.3370276924220616e-05, "loss": 11.4379, "step": 14867 }, { "epoch": 0.6197323996498687, "grad_norm": 382.0, "learning_rate": 3.336391133813123e-05, "loss": 16.3754, "step": 14868 }, { "epoch": 0.6197740819473969, "grad_norm": 248.0, "learning_rate": 3.3357546055236055e-05, "loss": 12.9379, "step": 14869 }, { "epoch": 0.6198157642449251, "grad_norm": 158.0, "learning_rate": 3.3351181075651055e-05, "loss": 9.6876, "step": 14870 }, { "epoch": 0.6198574465424534, "grad_norm": 57.5, "learning_rate": 3.3344816399492285e-05, "loss": 8.0008, "step": 14871 }, { "epoch": 0.6198991288399817, "grad_norm": 296.0, "learning_rate": 3.3338452026875686e-05, "loss": 13.2502, "step": 14872 }, { "epoch": 0.6199408111375099, "grad_norm": 430.0, "learning_rate": 3.333208795791731e-05, "loss": 15.688, "step": 14873 }, { "epoch": 0.6199824934350381, "grad_norm": 132.0, "learning_rate": 3.332572419273308e-05, "loss": 9.3753, "step": 14874 }, { "epoch": 0.6200241757325664, "grad_norm": 312.0, "learning_rate": 3.3319360731439034e-05, "loss": 13.4378, "step": 14875 }, { "epoch": 0.6200658580300946, "grad_norm": 784.0, "learning_rate": 3.3312997574151095e-05, "loss": 22.1252, "step": 14876 }, { "epoch": 0.6201075403276228, "grad_norm": 676.0, "learning_rate": 3.3306634720985266e-05, "loss": 19.5002, "step": 14877 }, { "epoch": 0.6201492226251512, "grad_norm": 454.0, "learning_rate": 3.3300272172057505e-05, "loss": 16.3755, "step": 14878 }, { "epoch": 0.6201909049226794, "grad_norm": 486.0, "learning_rate": 3.329390992748377e-05, "loss": 17.5002, "step": 14879 }, { "epoch": 0.6202325872202076, "grad_norm": 448.0, "learning_rate": 3.328754798737998e-05, "loss": 15.6878, "step": 14880 }, { "epoch": 0.6202742695177358, "grad_norm": 406.0, "learning_rate": 3.328118635186215e-05, "loss": 16.6253, "step": 14881 }, { "epoch": 0.6203159518152641, "grad_norm": 434.0, "learning_rate": 3.3274825021046164e-05, "loss": 14.2519, "step": 14882 }, { "epoch": 0.6203576341127923, "grad_norm": 240.0, "learning_rate": 3.326846399504799e-05, "loss": 11.4381, "step": 14883 }, { "epoch": 0.6203993164103205, "grad_norm": 156.0, "learning_rate": 3.326210327398352e-05, "loss": 10.1253, "step": 14884 }, { "epoch": 0.6204409987078487, "grad_norm": 105.0, "learning_rate": 3.3255742857968734e-05, "loss": 8.5629, "step": 14885 }, { "epoch": 0.6204826810053771, "grad_norm": 370.0, "learning_rate": 3.324938274711949e-05, "loss": 17.3752, "step": 14886 }, { "epoch": 0.6205243633029053, "grad_norm": 258.0, "learning_rate": 3.324302294155177e-05, "loss": 10.6253, "step": 14887 }, { "epoch": 0.6205660456004335, "grad_norm": 272.0, "learning_rate": 3.3236663441381413e-05, "loss": 11.2502, "step": 14888 }, { "epoch": 0.6206077278979617, "grad_norm": 254.0, "learning_rate": 3.3230304246724364e-05, "loss": 12.3131, "step": 14889 }, { "epoch": 0.62064941019549, "grad_norm": 864.0, "learning_rate": 3.3223945357696506e-05, "loss": 22.377, "step": 14890 }, { "epoch": 0.6206910924930182, "grad_norm": 181.0, "learning_rate": 3.3217586774413737e-05, "loss": 11.6878, "step": 14891 }, { "epoch": 0.6207327747905464, "grad_norm": 354.0, "learning_rate": 3.321122849699193e-05, "loss": 14.5642, "step": 14892 }, { "epoch": 0.6207744570880747, "grad_norm": 270.0, "learning_rate": 3.320487052554699e-05, "loss": 10.7519, "step": 14893 }, { "epoch": 0.620816139385603, "grad_norm": 147.0, "learning_rate": 3.319851286019475e-05, "loss": 10.2502, "step": 14894 }, { "epoch": 0.6208578216831312, "grad_norm": 410.0, "learning_rate": 3.319215550105114e-05, "loss": 15.4387, "step": 14895 }, { "epoch": 0.6208995039806594, "grad_norm": 123.5, "learning_rate": 3.318579844823195e-05, "loss": 10.5631, "step": 14896 }, { "epoch": 0.6209411862781876, "grad_norm": 494.0, "learning_rate": 3.317944170185311e-05, "loss": 14.4423, "step": 14897 }, { "epoch": 0.6209828685757159, "grad_norm": 183.0, "learning_rate": 3.317308526203041e-05, "loss": 11.0002, "step": 14898 }, { "epoch": 0.6210245508732442, "grad_norm": 159.0, "learning_rate": 3.316672912887975e-05, "loss": 10.5629, "step": 14899 }, { "epoch": 0.6210662331707724, "grad_norm": 400.0, "learning_rate": 3.316037330251693e-05, "loss": 16.0003, "step": 14900 }, { "epoch": 0.6211079154683006, "grad_norm": 1120.0, "learning_rate": 3.3154017783057804e-05, "loss": 23.8801, "step": 14901 }, { "epoch": 0.6211495977658289, "grad_norm": 338.0, "learning_rate": 3.3147662570618196e-05, "loss": 15.0014, "step": 14902 }, { "epoch": 0.6211912800633571, "grad_norm": 57.5, "learning_rate": 3.314130766531395e-05, "loss": 7.469, "step": 14903 }, { "epoch": 0.6212329623608853, "grad_norm": 390.0, "learning_rate": 3.3134953067260845e-05, "loss": 14.2502, "step": 14904 }, { "epoch": 0.6212746446584135, "grad_norm": 428.0, "learning_rate": 3.3128598776574735e-05, "loss": 15.9379, "step": 14905 }, { "epoch": 0.6213163269559419, "grad_norm": 82.5, "learning_rate": 3.312224479337139e-05, "loss": 8.1892, "step": 14906 }, { "epoch": 0.6213580092534701, "grad_norm": 508.0, "learning_rate": 3.3115891117766665e-05, "loss": 18.0009, "step": 14907 }, { "epoch": 0.6213996915509983, "grad_norm": 274.0, "learning_rate": 3.3109537749876296e-05, "loss": 14.5639, "step": 14908 }, { "epoch": 0.6214413738485265, "grad_norm": 224.0, "learning_rate": 3.310318468981612e-05, "loss": 10.8754, "step": 14909 }, { "epoch": 0.6214830561460548, "grad_norm": 548.0, "learning_rate": 3.309683193770188e-05, "loss": 18.0005, "step": 14910 }, { "epoch": 0.621524738443583, "grad_norm": 378.0, "learning_rate": 3.30904794936494e-05, "loss": 14.9401, "step": 14911 }, { "epoch": 0.6215664207411112, "grad_norm": 231.0, "learning_rate": 3.308412735777442e-05, "loss": 10.3135, "step": 14912 }, { "epoch": 0.6216081030386394, "grad_norm": 402.0, "learning_rate": 3.307777553019273e-05, "loss": 15.5006, "step": 14913 }, { "epoch": 0.6216497853361678, "grad_norm": 99.0, "learning_rate": 3.307142401102007e-05, "loss": 9.5638, "step": 14914 }, { "epoch": 0.621691467633696, "grad_norm": 348.0, "learning_rate": 3.306507280037221e-05, "loss": 14.7502, "step": 14915 }, { "epoch": 0.6217331499312242, "grad_norm": 101.0, "learning_rate": 3.305872189836491e-05, "loss": 7.6565, "step": 14916 }, { "epoch": 0.6217748322287524, "grad_norm": 156.0, "learning_rate": 3.305237130511391e-05, "loss": 8.5005, "step": 14917 }, { "epoch": 0.6218165145262807, "grad_norm": 157.0, "learning_rate": 3.304602102073493e-05, "loss": 9.6877, "step": 14918 }, { "epoch": 0.6218581968238089, "grad_norm": 416.0, "learning_rate": 3.3039671045343756e-05, "loss": 15.6253, "step": 14919 }, { "epoch": 0.6218998791213372, "grad_norm": 204.0, "learning_rate": 3.303332137905605e-05, "loss": 10.9376, "step": 14920 }, { "epoch": 0.6219415614188654, "grad_norm": 316.0, "learning_rate": 3.302697202198759e-05, "loss": 13.5629, "step": 14921 }, { "epoch": 0.6219832437163937, "grad_norm": 1336.0, "learning_rate": 3.302062297425406e-05, "loss": 28.6302, "step": 14922 }, { "epoch": 0.6220249260139219, "grad_norm": 470.0, "learning_rate": 3.301427423597119e-05, "loss": 15.5652, "step": 14923 }, { "epoch": 0.6220666083114501, "grad_norm": 264.0, "learning_rate": 3.300792580725466e-05, "loss": 12.5002, "step": 14924 }, { "epoch": 0.6221082906089783, "grad_norm": 736.0, "learning_rate": 3.300157768822022e-05, "loss": 21.8754, "step": 14925 }, { "epoch": 0.6221499729065066, "grad_norm": 412.0, "learning_rate": 3.2995229878983516e-05, "loss": 16.1264, "step": 14926 }, { "epoch": 0.6221916552040349, "grad_norm": 312.0, "learning_rate": 3.2988882379660254e-05, "loss": 13.6255, "step": 14927 }, { "epoch": 0.6222333375015631, "grad_norm": 126.0, "learning_rate": 3.2982535190366136e-05, "loss": 7.3753, "step": 14928 }, { "epoch": 0.6222750197990913, "grad_norm": 170.0, "learning_rate": 3.2976188311216823e-05, "loss": 11.8753, "step": 14929 }, { "epoch": 0.6223167020966196, "grad_norm": 306.0, "learning_rate": 3.2969841742327975e-05, "loss": 13.1888, "step": 14930 }, { "epoch": 0.6223583843941478, "grad_norm": 284.0, "learning_rate": 3.296349548381529e-05, "loss": 12.0004, "step": 14931 }, { "epoch": 0.622400066691676, "grad_norm": 213.0, "learning_rate": 3.2957149535794395e-05, "loss": 11.0626, "step": 14932 }, { "epoch": 0.6224417489892042, "grad_norm": 516.0, "learning_rate": 3.2950803898380984e-05, "loss": 18.0004, "step": 14933 }, { "epoch": 0.6224834312867326, "grad_norm": 195.0, "learning_rate": 3.294445857169066e-05, "loss": 12.1883, "step": 14934 }, { "epoch": 0.6225251135842608, "grad_norm": 608.0, "learning_rate": 3.2938113555839125e-05, "loss": 19.5003, "step": 14935 }, { "epoch": 0.622566795881789, "grad_norm": 430.0, "learning_rate": 3.293176885094196e-05, "loss": 17.1255, "step": 14936 }, { "epoch": 0.6226084781793172, "grad_norm": 197.0, "learning_rate": 3.2925424457114836e-05, "loss": 12.2508, "step": 14937 }, { "epoch": 0.6226501604768455, "grad_norm": 332.0, "learning_rate": 3.291908037447335e-05, "loss": 14.4377, "step": 14938 }, { "epoch": 0.6226918427743737, "grad_norm": 204.0, "learning_rate": 3.291273660313316e-05, "loss": 12.1878, "step": 14939 }, { "epoch": 0.6227335250719019, "grad_norm": 195.0, "learning_rate": 3.290639314320985e-05, "loss": 11.5001, "step": 14940 }, { "epoch": 0.6227752073694302, "grad_norm": 572.0, "learning_rate": 3.2900049994819035e-05, "loss": 18.1253, "step": 14941 }, { "epoch": 0.6228168896669585, "grad_norm": 170.0, "learning_rate": 3.289370715807634e-05, "loss": 10.8752, "step": 14942 }, { "epoch": 0.6228585719644867, "grad_norm": 123.5, "learning_rate": 3.288736463309735e-05, "loss": 9.5002, "step": 14943 }, { "epoch": 0.6229002542620149, "grad_norm": 466.0, "learning_rate": 3.2881022419997654e-05, "loss": 15.6878, "step": 14944 }, { "epoch": 0.6229419365595431, "grad_norm": 1656.0, "learning_rate": 3.2874680518892855e-05, "loss": 33.2502, "step": 14945 }, { "epoch": 0.6229836188570714, "grad_norm": 247.0, "learning_rate": 3.28683389298985e-05, "loss": 11.3754, "step": 14946 }, { "epoch": 0.6230253011545996, "grad_norm": 516.0, "learning_rate": 3.2861997653130216e-05, "loss": 16.3755, "step": 14947 }, { "epoch": 0.6230669834521279, "grad_norm": 438.0, "learning_rate": 3.285565668870353e-05, "loss": 16.5002, "step": 14948 }, { "epoch": 0.6231086657496562, "grad_norm": 394.0, "learning_rate": 3.284931603673404e-05, "loss": 15.1877, "step": 14949 }, { "epoch": 0.6231503480471844, "grad_norm": 74.0, "learning_rate": 3.2842975697337264e-05, "loss": 8.8752, "step": 14950 }, { "epoch": 0.6231920303447126, "grad_norm": 800.0, "learning_rate": 3.28366356706288e-05, "loss": 21.7508, "step": 14951 }, { "epoch": 0.6232337126422408, "grad_norm": 197.0, "learning_rate": 3.283029595672416e-05, "loss": 10.5013, "step": 14952 }, { "epoch": 0.6232753949397691, "grad_norm": 181.0, "learning_rate": 3.28239565557389e-05, "loss": 9.3129, "step": 14953 }, { "epoch": 0.6233170772372973, "grad_norm": 672.0, "learning_rate": 3.281761746778855e-05, "loss": 20.7503, "step": 14954 }, { "epoch": 0.6233587595348256, "grad_norm": 222.0, "learning_rate": 3.281127869298867e-05, "loss": 11.0631, "step": 14955 }, { "epoch": 0.6234004418323538, "grad_norm": 358.0, "learning_rate": 3.2804940231454746e-05, "loss": 14.5002, "step": 14956 }, { "epoch": 0.6234421241298821, "grad_norm": 138.0, "learning_rate": 3.279860208330233e-05, "loss": 8.6261, "step": 14957 }, { "epoch": 0.6234838064274103, "grad_norm": 418.0, "learning_rate": 3.279226424864689e-05, "loss": 16.8751, "step": 14958 }, { "epoch": 0.6235254887249385, "grad_norm": 234.0, "learning_rate": 3.278592672760399e-05, "loss": 11.4379, "step": 14959 }, { "epoch": 0.6235671710224667, "grad_norm": 346.0, "learning_rate": 3.277958952028908e-05, "loss": 15.1253, "step": 14960 }, { "epoch": 0.623608853319995, "grad_norm": 688.0, "learning_rate": 3.2773252626817705e-05, "loss": 20.6255, "step": 14961 }, { "epoch": 0.6236505356175233, "grad_norm": 336.0, "learning_rate": 3.27669160473053e-05, "loss": 14.4379, "step": 14962 }, { "epoch": 0.6236922179150515, "grad_norm": 332.0, "learning_rate": 3.2760579781867405e-05, "loss": 15.2505, "step": 14963 }, { "epoch": 0.6237339002125797, "grad_norm": 568.0, "learning_rate": 3.275424383061946e-05, "loss": 19.3762, "step": 14964 }, { "epoch": 0.623775582510108, "grad_norm": 296.0, "learning_rate": 3.274790819367696e-05, "loss": 10.313, "step": 14965 }, { "epoch": 0.6238172648076362, "grad_norm": 117.5, "learning_rate": 3.2741572871155356e-05, "loss": 8.1254, "step": 14966 }, { "epoch": 0.6238589471051644, "grad_norm": 308.0, "learning_rate": 3.2735237863170123e-05, "loss": 13.4379, "step": 14967 }, { "epoch": 0.6239006294026926, "grad_norm": 524.0, "learning_rate": 3.27289031698367e-05, "loss": 18.0005, "step": 14968 }, { "epoch": 0.623942311700221, "grad_norm": 165.0, "learning_rate": 3.272256879127058e-05, "loss": 12.3755, "step": 14969 }, { "epoch": 0.6239839939977492, "grad_norm": 143.0, "learning_rate": 3.271623472758715e-05, "loss": 10.0631, "step": 14970 }, { "epoch": 0.6240256762952774, "grad_norm": 340.0, "learning_rate": 3.27099009789019e-05, "loss": 14.5006, "step": 14971 }, { "epoch": 0.6240673585928056, "grad_norm": 133.0, "learning_rate": 3.270356754533021e-05, "loss": 9.6878, "step": 14972 }, { "epoch": 0.6241090408903339, "grad_norm": 1032.0, "learning_rate": 3.269723442698757e-05, "loss": 23.6282, "step": 14973 }, { "epoch": 0.6241507231878621, "grad_norm": 176.0, "learning_rate": 3.2690901623989337e-05, "loss": 11.8126, "step": 14974 }, { "epoch": 0.6241924054853903, "grad_norm": 144.0, "learning_rate": 3.268456913645098e-05, "loss": 10.0627, "step": 14975 }, { "epoch": 0.6242340877829186, "grad_norm": 302.0, "learning_rate": 3.2678236964487876e-05, "loss": 13.126, "step": 14976 }, { "epoch": 0.6242757700804469, "grad_norm": 122.0, "learning_rate": 3.267190510821545e-05, "loss": 6.5316, "step": 14977 }, { "epoch": 0.6243174523779751, "grad_norm": 426.0, "learning_rate": 3.266557356774909e-05, "loss": 15.9381, "step": 14978 }, { "epoch": 0.6243591346755033, "grad_norm": 135.0, "learning_rate": 3.265924234320418e-05, "loss": 10.7508, "step": 14979 }, { "epoch": 0.6244008169730315, "grad_norm": 228.0, "learning_rate": 3.265291143469612e-05, "loss": 10.063, "step": 14980 }, { "epoch": 0.6244424992705598, "grad_norm": 404.0, "learning_rate": 3.26465808423403e-05, "loss": 14.7503, "step": 14981 }, { "epoch": 0.624484181568088, "grad_norm": 294.0, "learning_rate": 3.264025056625207e-05, "loss": 11.3128, "step": 14982 }, { "epoch": 0.6245258638656163, "grad_norm": 158.0, "learning_rate": 3.2633920606546843e-05, "loss": 8.3753, "step": 14983 }, { "epoch": 0.6245675461631445, "grad_norm": 235.0, "learning_rate": 3.262759096333993e-05, "loss": 11.3753, "step": 14984 }, { "epoch": 0.6246092284606728, "grad_norm": 88.5, "learning_rate": 3.2621261636746724e-05, "loss": 8.313, "step": 14985 }, { "epoch": 0.624650910758201, "grad_norm": 148.0, "learning_rate": 3.261493262688256e-05, "loss": 10.0629, "step": 14986 }, { "epoch": 0.6246925930557292, "grad_norm": 588.0, "learning_rate": 3.26086039338628e-05, "loss": 19.5004, "step": 14987 }, { "epoch": 0.6247342753532574, "grad_norm": 296.0, "learning_rate": 3.260227555780276e-05, "loss": 14.1255, "step": 14988 }, { "epoch": 0.6247759576507858, "grad_norm": 368.0, "learning_rate": 3.2595947498817804e-05, "loss": 13.6258, "step": 14989 }, { "epoch": 0.624817639948314, "grad_norm": 322.0, "learning_rate": 3.258961975702325e-05, "loss": 13.8754, "step": 14990 }, { "epoch": 0.6248593222458422, "grad_norm": 382.0, "learning_rate": 3.2583292332534424e-05, "loss": 15.0003, "step": 14991 }, { "epoch": 0.6249010045433704, "grad_norm": 374.0, "learning_rate": 3.257696522546663e-05, "loss": 14.1251, "step": 14992 }, { "epoch": 0.6249426868408987, "grad_norm": 354.0, "learning_rate": 3.257063843593522e-05, "loss": 14.2505, "step": 14993 }, { "epoch": 0.6249843691384269, "grad_norm": 1536.0, "learning_rate": 3.256431196405544e-05, "loss": 28.0047, "step": 14994 }, { "epoch": 0.6250260514359551, "grad_norm": 1624.0, "learning_rate": 3.255798580994264e-05, "loss": 31.7588, "step": 14995 }, { "epoch": 0.6250677337334833, "grad_norm": 480.0, "learning_rate": 3.255165997371208e-05, "loss": 16.8754, "step": 14996 }, { "epoch": 0.6251094160310117, "grad_norm": 510.0, "learning_rate": 3.2545334455479094e-05, "loss": 16.7516, "step": 14997 }, { "epoch": 0.6251510983285399, "grad_norm": 430.0, "learning_rate": 3.253900925535891e-05, "loss": 15.0003, "step": 14998 }, { "epoch": 0.6251927806260681, "grad_norm": 1112.0, "learning_rate": 3.253268437346685e-05, "loss": 29.3753, "step": 14999 }, { "epoch": 0.6252344629235963, "grad_norm": 276.0, "learning_rate": 3.2526359809918154e-05, "loss": 12.6252, "step": 15000 }, { "epoch": 0.6252761452211246, "grad_norm": 264.0, "learning_rate": 3.252003556482812e-05, "loss": 13.0627, "step": 15001 }, { "epoch": 0.6253178275186528, "grad_norm": 276.0, "learning_rate": 3.251371163831197e-05, "loss": 7.6253, "step": 15002 }, { "epoch": 0.625359509816181, "grad_norm": 332.0, "learning_rate": 3.250738803048499e-05, "loss": 12.3752, "step": 15003 }, { "epoch": 0.6254011921137093, "grad_norm": 372.0, "learning_rate": 3.250106474146241e-05, "loss": 12.0024, "step": 15004 }, { "epoch": 0.6254428744112376, "grad_norm": 366.0, "learning_rate": 3.249474177135948e-05, "loss": 16.1255, "step": 15005 }, { "epoch": 0.6254845567087658, "grad_norm": 86.0, "learning_rate": 3.248841912029142e-05, "loss": 8.6257, "step": 15006 }, { "epoch": 0.625526239006294, "grad_norm": 252.0, "learning_rate": 3.2482096788373504e-05, "loss": 10.6878, "step": 15007 }, { "epoch": 0.6255679213038222, "grad_norm": 800.0, "learning_rate": 3.247577477572091e-05, "loss": 21.7503, "step": 15008 }, { "epoch": 0.6256096036013505, "grad_norm": 832.0, "learning_rate": 3.2469453082448896e-05, "loss": 23.5034, "step": 15009 }, { "epoch": 0.6256512858988788, "grad_norm": 260.0, "learning_rate": 3.246313170867263e-05, "loss": 12.5005, "step": 15010 }, { "epoch": 0.625692968196407, "grad_norm": 346.0, "learning_rate": 3.245681065450738e-05, "loss": 15.1254, "step": 15011 }, { "epoch": 0.6257346504939352, "grad_norm": 153.0, "learning_rate": 3.2450489920068275e-05, "loss": 9.7501, "step": 15012 }, { "epoch": 0.6257763327914635, "grad_norm": 536.0, "learning_rate": 3.2444169505470576e-05, "loss": 18.0003, "step": 15013 }, { "epoch": 0.6258180150889917, "grad_norm": 414.0, "learning_rate": 3.243784941082942e-05, "loss": 15.1879, "step": 15014 }, { "epoch": 0.6258596973865199, "grad_norm": 1304.0, "learning_rate": 3.2431529636260035e-05, "loss": 31.0002, "step": 15015 }, { "epoch": 0.6259013796840481, "grad_norm": 124.0, "learning_rate": 3.242521018187759e-05, "loss": 5.688, "step": 15016 }, { "epoch": 0.6259430619815765, "grad_norm": 246.0, "learning_rate": 3.241889104779724e-05, "loss": 12.5004, "step": 15017 }, { "epoch": 0.6259847442791047, "grad_norm": 122.0, "learning_rate": 3.2412572234134156e-05, "loss": 6.4377, "step": 15018 }, { "epoch": 0.6260264265766329, "grad_norm": 600.0, "learning_rate": 3.240625374100351e-05, "loss": 19.7502, "step": 15019 }, { "epoch": 0.6260681088741611, "grad_norm": 236.0, "learning_rate": 3.239993556852045e-05, "loss": 12.9381, "step": 15020 }, { "epoch": 0.6261097911716894, "grad_norm": 458.0, "learning_rate": 3.239361771680014e-05, "loss": 17.2505, "step": 15021 }, { "epoch": 0.6261514734692176, "grad_norm": 358.0, "learning_rate": 3.238730018595768e-05, "loss": 14.3753, "step": 15022 }, { "epoch": 0.6261931557667458, "grad_norm": 238.0, "learning_rate": 3.238098297610827e-05, "loss": 12.2504, "step": 15023 }, { "epoch": 0.6262348380642742, "grad_norm": 470.0, "learning_rate": 3.237466608736699e-05, "loss": 17.6253, "step": 15024 }, { "epoch": 0.6262765203618024, "grad_norm": 280.0, "learning_rate": 3.2368349519848996e-05, "loss": 13.0005, "step": 15025 }, { "epoch": 0.6263182026593306, "grad_norm": 884.0, "learning_rate": 3.236203327366938e-05, "loss": 24.6253, "step": 15026 }, { "epoch": 0.6263598849568588, "grad_norm": 296.0, "learning_rate": 3.2355717348943285e-05, "loss": 12.0005, "step": 15027 }, { "epoch": 0.6264015672543871, "grad_norm": 468.0, "learning_rate": 3.234940174578581e-05, "loss": 17.3759, "step": 15028 }, { "epoch": 0.6264432495519153, "grad_norm": 400.0, "learning_rate": 3.2343086464312054e-05, "loss": 15.7505, "step": 15029 }, { "epoch": 0.6264849318494435, "grad_norm": 334.0, "learning_rate": 3.23367715046371e-05, "loss": 14.5003, "step": 15030 }, { "epoch": 0.6265266141469717, "grad_norm": 248.0, "learning_rate": 3.233045686687608e-05, "loss": 11.6878, "step": 15031 }, { "epoch": 0.6265682964445001, "grad_norm": 314.0, "learning_rate": 3.232414255114403e-05, "loss": 11.8129, "step": 15032 }, { "epoch": 0.6266099787420283, "grad_norm": 418.0, "learning_rate": 3.231782855755607e-05, "loss": 16.5005, "step": 15033 }, { "epoch": 0.6266516610395565, "grad_norm": 528.0, "learning_rate": 3.231151488622724e-05, "loss": 18.6259, "step": 15034 }, { "epoch": 0.6266933433370847, "grad_norm": 74.5, "learning_rate": 3.230520153727263e-05, "loss": 6.6254, "step": 15035 }, { "epoch": 0.626735025634613, "grad_norm": 312.0, "learning_rate": 3.229888851080728e-05, "loss": 12.563, "step": 15036 }, { "epoch": 0.6267767079321412, "grad_norm": 490.0, "learning_rate": 3.229257580694628e-05, "loss": 16.2502, "step": 15037 }, { "epoch": 0.6268183902296695, "grad_norm": 298.0, "learning_rate": 3.228626342580463e-05, "loss": 12.2503, "step": 15038 }, { "epoch": 0.6268600725271977, "grad_norm": 478.0, "learning_rate": 3.227995136749743e-05, "loss": 16.6251, "step": 15039 }, { "epoch": 0.626901754824726, "grad_norm": 251.0, "learning_rate": 3.227363963213966e-05, "loss": 13.2504, "step": 15040 }, { "epoch": 0.6269434371222542, "grad_norm": 195.0, "learning_rate": 3.226732821984639e-05, "loss": 11.0627, "step": 15041 }, { "epoch": 0.6269851194197824, "grad_norm": 95.5, "learning_rate": 3.226101713073266e-05, "loss": 7.8754, "step": 15042 }, { "epoch": 0.6270268017173106, "grad_norm": 892.0, "learning_rate": 3.225470636491344e-05, "loss": 22.6252, "step": 15043 }, { "epoch": 0.627068484014839, "grad_norm": 174.0, "learning_rate": 3.224839592250378e-05, "loss": 10.3752, "step": 15044 }, { "epoch": 0.6271101663123672, "grad_norm": 115.5, "learning_rate": 3.224208580361868e-05, "loss": 9.4377, "step": 15045 }, { "epoch": 0.6271518486098954, "grad_norm": 474.0, "learning_rate": 3.223577600837315e-05, "loss": 16.2505, "step": 15046 }, { "epoch": 0.6271935309074236, "grad_norm": 412.0, "learning_rate": 3.222946653688217e-05, "loss": 15.4381, "step": 15047 }, { "epoch": 0.6272352132049519, "grad_norm": 600.0, "learning_rate": 3.2223157389260756e-05, "loss": 19.7502, "step": 15048 }, { "epoch": 0.6272768955024801, "grad_norm": 596.0, "learning_rate": 3.221684856562386e-05, "loss": 18.8755, "step": 15049 }, { "epoch": 0.6273185778000083, "grad_norm": 332.0, "learning_rate": 3.2210540066086495e-05, "loss": 13.3133, "step": 15050 }, { "epoch": 0.6273602600975365, "grad_norm": 292.0, "learning_rate": 3.2204231890763595e-05, "loss": 13.0635, "step": 15051 }, { "epoch": 0.6274019423950649, "grad_norm": 76.5, "learning_rate": 3.2197924039770167e-05, "loss": 8.3129, "step": 15052 }, { "epoch": 0.6274436246925931, "grad_norm": 115.5, "learning_rate": 3.2191616513221134e-05, "loss": 10.2502, "step": 15053 }, { "epoch": 0.6274853069901213, "grad_norm": 1312.0, "learning_rate": 3.218530931123149e-05, "loss": 24.8762, "step": 15054 }, { "epoch": 0.6275269892876495, "grad_norm": 80.5, "learning_rate": 3.217900243391615e-05, "loss": 8.9379, "step": 15055 }, { "epoch": 0.6275686715851778, "grad_norm": 238.0, "learning_rate": 3.217269588139008e-05, "loss": 12.2506, "step": 15056 }, { "epoch": 0.627610353882706, "grad_norm": 129.0, "learning_rate": 3.216638965376821e-05, "loss": 8.8127, "step": 15057 }, { "epoch": 0.6276520361802342, "grad_norm": 241.0, "learning_rate": 3.216008375116548e-05, "loss": 12.438, "step": 15058 }, { "epoch": 0.6276937184777625, "grad_norm": 588.0, "learning_rate": 3.215377817369679e-05, "loss": 19.1255, "step": 15059 }, { "epoch": 0.6277354007752908, "grad_norm": 198.0, "learning_rate": 3.2147472921477094e-05, "loss": 10.7506, "step": 15060 }, { "epoch": 0.627777083072819, "grad_norm": 176.0, "learning_rate": 3.214116799462127e-05, "loss": 11.5002, "step": 15061 }, { "epoch": 0.6278187653703472, "grad_norm": 600.0, "learning_rate": 3.213486339324426e-05, "loss": 18.6253, "step": 15062 }, { "epoch": 0.6278604476678754, "grad_norm": 330.0, "learning_rate": 3.212855911746094e-05, "loss": 14.9379, "step": 15063 }, { "epoch": 0.6279021299654037, "grad_norm": 173.0, "learning_rate": 3.212225516738624e-05, "loss": 10.3127, "step": 15064 }, { "epoch": 0.6279438122629319, "grad_norm": 1012.0, "learning_rate": 3.2115951543134996e-05, "loss": 26.6258, "step": 15065 }, { "epoch": 0.6279854945604602, "grad_norm": 96.0, "learning_rate": 3.210964824482215e-05, "loss": 8.8128, "step": 15066 }, { "epoch": 0.6280271768579884, "grad_norm": 668.0, "learning_rate": 3.210334527256252e-05, "loss": 21.7502, "step": 15067 }, { "epoch": 0.6280688591555167, "grad_norm": 438.0, "learning_rate": 3.209704262647104e-05, "loss": 16.5009, "step": 15068 }, { "epoch": 0.6281105414530449, "grad_norm": 82.0, "learning_rate": 3.2090740306662536e-05, "loss": 7.6571, "step": 15069 }, { "epoch": 0.6281522237505731, "grad_norm": 328.0, "learning_rate": 3.2084438313251884e-05, "loss": 13.7502, "step": 15070 }, { "epoch": 0.6281939060481013, "grad_norm": 708.0, "learning_rate": 3.207813664635392e-05, "loss": 20.6252, "step": 15071 }, { "epoch": 0.6282355883456296, "grad_norm": 360.0, "learning_rate": 3.207183530608353e-05, "loss": 14.0659, "step": 15072 }, { "epoch": 0.6282772706431579, "grad_norm": 484.0, "learning_rate": 3.206553429255551e-05, "loss": 15.7531, "step": 15073 }, { "epoch": 0.6283189529406861, "grad_norm": 396.0, "learning_rate": 3.2059233605884744e-05, "loss": 14.6879, "step": 15074 }, { "epoch": 0.6283606352382143, "grad_norm": 186.0, "learning_rate": 3.205293324618601e-05, "loss": 11.5629, "step": 15075 }, { "epoch": 0.6284023175357426, "grad_norm": 440.0, "learning_rate": 3.20466332135742e-05, "loss": 16.2509, "step": 15076 }, { "epoch": 0.6284439998332708, "grad_norm": 408.0, "learning_rate": 3.2040333508164056e-05, "loss": 16.3764, "step": 15077 }, { "epoch": 0.628485682130799, "grad_norm": 186.0, "learning_rate": 3.203403413007045e-05, "loss": 11.0006, "step": 15078 }, { "epoch": 0.6285273644283272, "grad_norm": 440.0, "learning_rate": 3.202773507940815e-05, "loss": 16.5004, "step": 15079 }, { "epoch": 0.6285690467258556, "grad_norm": 340.0, "learning_rate": 3.202143635629198e-05, "loss": 13.9376, "step": 15080 }, { "epoch": 0.6286107290233838, "grad_norm": 376.0, "learning_rate": 3.2015137960836736e-05, "loss": 14.7504, "step": 15081 }, { "epoch": 0.628652411320912, "grad_norm": 200.0, "learning_rate": 3.2008839893157196e-05, "loss": 11.0011, "step": 15082 }, { "epoch": 0.6286940936184402, "grad_norm": 184.0, "learning_rate": 3.2002542153368135e-05, "loss": 12.439, "step": 15083 }, { "epoch": 0.6287357759159685, "grad_norm": 270.0, "learning_rate": 3.1996244741584356e-05, "loss": 12.4377, "step": 15084 }, { "epoch": 0.6287774582134967, "grad_norm": 382.0, "learning_rate": 3.1989947657920596e-05, "loss": 14.9379, "step": 15085 }, { "epoch": 0.6288191405110249, "grad_norm": 320.0, "learning_rate": 3.1983650902491664e-05, "loss": 14.2507, "step": 15086 }, { "epoch": 0.6288608228085532, "grad_norm": 248.0, "learning_rate": 3.197735447541227e-05, "loss": 13.1254, "step": 15087 }, { "epoch": 0.6289025051060815, "grad_norm": 139.0, "learning_rate": 3.1971058376797214e-05, "loss": 10.8129, "step": 15088 }, { "epoch": 0.6289441874036097, "grad_norm": 488.0, "learning_rate": 3.19647626067612e-05, "loss": 16.6253, "step": 15089 }, { "epoch": 0.6289858697011379, "grad_norm": 976.0, "learning_rate": 3.1958467165419e-05, "loss": 26.7503, "step": 15090 }, { "epoch": 0.6290275519986661, "grad_norm": 241.0, "learning_rate": 3.195217205288533e-05, "loss": 11.2508, "step": 15091 }, { "epoch": 0.6290692342961944, "grad_norm": 117.0, "learning_rate": 3.194587726927494e-05, "loss": 8.9382, "step": 15092 }, { "epoch": 0.6291109165937226, "grad_norm": 316.0, "learning_rate": 3.193958281470252e-05, "loss": 14.0627, "step": 15093 }, { "epoch": 0.6291525988912509, "grad_norm": 76.0, "learning_rate": 3.1933288689282814e-05, "loss": 7.8439, "step": 15094 }, { "epoch": 0.6291942811887792, "grad_norm": 204.0, "learning_rate": 3.1926994893130525e-05, "loss": 11.6256, "step": 15095 }, { "epoch": 0.6292359634863074, "grad_norm": 450.0, "learning_rate": 3.192070142636037e-05, "loss": 15.1892, "step": 15096 }, { "epoch": 0.6292776457838356, "grad_norm": 326.0, "learning_rate": 3.191440828908701e-05, "loss": 14.0003, "step": 15097 }, { "epoch": 0.6293193280813638, "grad_norm": 1104.0, "learning_rate": 3.19081154814252e-05, "loss": 29.7503, "step": 15098 }, { "epoch": 0.6293610103788921, "grad_norm": 232.0, "learning_rate": 3.1901823003489555e-05, "loss": 11.6877, "step": 15099 }, { "epoch": 0.6294026926764204, "grad_norm": 229.0, "learning_rate": 3.1895530855394825e-05, "loss": 14.0629, "step": 15100 }, { "epoch": 0.6294443749739486, "grad_norm": 142.0, "learning_rate": 3.1889239037255626e-05, "loss": 10.063, "step": 15101 }, { "epoch": 0.6294860572714768, "grad_norm": 548.0, "learning_rate": 3.1882947549186674e-05, "loss": 18.8756, "step": 15102 }, { "epoch": 0.6295277395690051, "grad_norm": 434.0, "learning_rate": 3.1876656391302597e-05, "loss": 15.8751, "step": 15103 }, { "epoch": 0.6295694218665333, "grad_norm": 55.25, "learning_rate": 3.187036556371808e-05, "loss": 8.3752, "step": 15104 }, { "epoch": 0.6296111041640615, "grad_norm": 202.0, "learning_rate": 3.186407506654774e-05, "loss": 10.188, "step": 15105 }, { "epoch": 0.6296527864615897, "grad_norm": 180.0, "learning_rate": 3.185778489990625e-05, "loss": 10.8758, "step": 15106 }, { "epoch": 0.629694468759118, "grad_norm": 354.0, "learning_rate": 3.185149506390825e-05, "loss": 14.0032, "step": 15107 }, { "epoch": 0.6297361510566463, "grad_norm": 482.0, "learning_rate": 3.184520555866836e-05, "loss": 16.7503, "step": 15108 }, { "epoch": 0.6297778333541745, "grad_norm": 203.0, "learning_rate": 3.1838916384301194e-05, "loss": 10.2511, "step": 15109 }, { "epoch": 0.6298195156517027, "grad_norm": 988.0, "learning_rate": 3.183262754092141e-05, "loss": 27.7545, "step": 15110 }, { "epoch": 0.629861197949231, "grad_norm": 198.0, "learning_rate": 3.1826339028643595e-05, "loss": 10.8753, "step": 15111 }, { "epoch": 0.6299028802467592, "grad_norm": 264.0, "learning_rate": 3.1820050847582374e-05, "loss": 11.8753, "step": 15112 }, { "epoch": 0.6299445625442874, "grad_norm": 1056.0, "learning_rate": 3.181376299785231e-05, "loss": 26.7516, "step": 15113 }, { "epoch": 0.6299862448418156, "grad_norm": 298.0, "learning_rate": 3.180747547956807e-05, "loss": 13.1252, "step": 15114 }, { "epoch": 0.630027927139344, "grad_norm": 824.0, "learning_rate": 3.1801188292844176e-05, "loss": 23.6253, "step": 15115 }, { "epoch": 0.6300696094368722, "grad_norm": 126.0, "learning_rate": 3.179490143779525e-05, "loss": 10.3754, "step": 15116 }, { "epoch": 0.6301112917344004, "grad_norm": 880.0, "learning_rate": 3.1788614914535856e-05, "loss": 22.5046, "step": 15117 }, { "epoch": 0.6301529740319286, "grad_norm": 175.0, "learning_rate": 3.1782328723180575e-05, "loss": 9.313, "step": 15118 }, { "epoch": 0.6301946563294569, "grad_norm": 684.0, "learning_rate": 3.177604286384398e-05, "loss": 19.7505, "step": 15119 }, { "epoch": 0.6302363386269851, "grad_norm": 274.0, "learning_rate": 3.176975733664061e-05, "loss": 14.0008, "step": 15120 }, { "epoch": 0.6302780209245133, "grad_norm": 418.0, "learning_rate": 3.176347214168502e-05, "loss": 15.6876, "step": 15121 }, { "epoch": 0.6303197032220416, "grad_norm": 198.0, "learning_rate": 3.17571872790918e-05, "loss": 11.5003, "step": 15122 }, { "epoch": 0.6303613855195699, "grad_norm": 284.0, "learning_rate": 3.175090274897544e-05, "loss": 13.0002, "step": 15123 }, { "epoch": 0.6304030678170981, "grad_norm": 250.0, "learning_rate": 3.174461855145051e-05, "loss": 12.5631, "step": 15124 }, { "epoch": 0.6304447501146263, "grad_norm": 376.0, "learning_rate": 3.173833468663151e-05, "loss": 14.5024, "step": 15125 }, { "epoch": 0.6304864324121545, "grad_norm": 228.0, "learning_rate": 3.1732051154633e-05, "loss": 10.188, "step": 15126 }, { "epoch": 0.6305281147096828, "grad_norm": 494.0, "learning_rate": 3.172576795556946e-05, "loss": 15.8129, "step": 15127 }, { "epoch": 0.630569797007211, "grad_norm": 376.0, "learning_rate": 3.1719485089555444e-05, "loss": 15.7503, "step": 15128 }, { "epoch": 0.6306114793047393, "grad_norm": 508.0, "learning_rate": 3.171320255670541e-05, "loss": 14.5004, "step": 15129 }, { "epoch": 0.6306531616022675, "grad_norm": 173.0, "learning_rate": 3.1706920357133906e-05, "loss": 8.6255, "step": 15130 }, { "epoch": 0.6306948438997958, "grad_norm": 308.0, "learning_rate": 3.1700638490955384e-05, "loss": 13.5628, "step": 15131 }, { "epoch": 0.630736526197324, "grad_norm": 121.5, "learning_rate": 3.169435695828436e-05, "loss": 11.1253, "step": 15132 }, { "epoch": 0.6307782084948522, "grad_norm": 66.5, "learning_rate": 3.168807575923529e-05, "loss": 6.7821, "step": 15133 }, { "epoch": 0.6308198907923804, "grad_norm": 572.0, "learning_rate": 3.1681794893922695e-05, "loss": 18.6252, "step": 15134 }, { "epoch": 0.6308615730899088, "grad_norm": 214.0, "learning_rate": 3.167551436246099e-05, "loss": 12.1253, "step": 15135 }, { "epoch": 0.630903255387437, "grad_norm": 508.0, "learning_rate": 3.166923416496468e-05, "loss": 19.376, "step": 15136 }, { "epoch": 0.6309449376849652, "grad_norm": 175.0, "learning_rate": 3.166295430154819e-05, "loss": 10.6253, "step": 15137 }, { "epoch": 0.6309866199824934, "grad_norm": 716.0, "learning_rate": 3.1656674772326e-05, "loss": 20.8753, "step": 15138 }, { "epoch": 0.6310283022800217, "grad_norm": 132.0, "learning_rate": 3.165039557741252e-05, "loss": 9.4376, "step": 15139 }, { "epoch": 0.6310699845775499, "grad_norm": 240.0, "learning_rate": 3.164411671692223e-05, "loss": 11.8126, "step": 15140 }, { "epoch": 0.6311116668750781, "grad_norm": 376.0, "learning_rate": 3.163783819096952e-05, "loss": 12.1878, "step": 15141 }, { "epoch": 0.6311533491726063, "grad_norm": 274.0, "learning_rate": 3.1631559999668865e-05, "loss": 9.5008, "step": 15142 }, { "epoch": 0.6311950314701347, "grad_norm": 314.0, "learning_rate": 3.162528214313464e-05, "loss": 12.5628, "step": 15143 }, { "epoch": 0.6312367137676629, "grad_norm": 494.0, "learning_rate": 3.161900462148129e-05, "loss": 17.0003, "step": 15144 }, { "epoch": 0.6312783960651911, "grad_norm": 180.0, "learning_rate": 3.16127274348232e-05, "loss": 10.2504, "step": 15145 }, { "epoch": 0.6313200783627193, "grad_norm": 432.0, "learning_rate": 3.1606450583274795e-05, "loss": 16.0003, "step": 15146 }, { "epoch": 0.6313617606602476, "grad_norm": 312.0, "learning_rate": 3.160017406695045e-05, "loss": 13.4377, "step": 15147 }, { "epoch": 0.6314034429577758, "grad_norm": 306.0, "learning_rate": 3.159389788596459e-05, "loss": 14.0008, "step": 15148 }, { "epoch": 0.631445125255304, "grad_norm": 113.5, "learning_rate": 3.158762204043155e-05, "loss": 9.0003, "step": 15149 }, { "epoch": 0.6314868075528323, "grad_norm": 288.0, "learning_rate": 3.158134653046575e-05, "loss": 12.313, "step": 15150 }, { "epoch": 0.6315284898503606, "grad_norm": 138.0, "learning_rate": 3.157507135618153e-05, "loss": 9.8129, "step": 15151 }, { "epoch": 0.6315701721478888, "grad_norm": 231.0, "learning_rate": 3.156879651769329e-05, "loss": 11.563, "step": 15152 }, { "epoch": 0.631611854445417, "grad_norm": 660.0, "learning_rate": 3.1562522015115345e-05, "loss": 19.2508, "step": 15153 }, { "epoch": 0.6316535367429452, "grad_norm": 448.0, "learning_rate": 3.1556247848562096e-05, "loss": 16.3754, "step": 15154 }, { "epoch": 0.6316952190404735, "grad_norm": 332.0, "learning_rate": 3.154997401814784e-05, "loss": 14.0627, "step": 15155 }, { "epoch": 0.6317369013380018, "grad_norm": 179.0, "learning_rate": 3.154370052398697e-05, "loss": 10.3764, "step": 15156 }, { "epoch": 0.63177858363553, "grad_norm": 107.0, "learning_rate": 3.1537427366193784e-05, "loss": 8.1885, "step": 15157 }, { "epoch": 0.6318202659330582, "grad_norm": 414.0, "learning_rate": 3.1531154544882635e-05, "loss": 15.7505, "step": 15158 }, { "epoch": 0.6318619482305865, "grad_norm": 228.0, "learning_rate": 3.152488206016782e-05, "loss": 10.6876, "step": 15159 }, { "epoch": 0.6319036305281147, "grad_norm": 294.0, "learning_rate": 3.1518609912163673e-05, "loss": 13.1253, "step": 15160 }, { "epoch": 0.6319453128256429, "grad_norm": 660.0, "learning_rate": 3.15123381009845e-05, "loss": 20.0003, "step": 15161 }, { "epoch": 0.6319869951231711, "grad_norm": 444.0, "learning_rate": 3.150606662674462e-05, "loss": 17.2502, "step": 15162 }, { "epoch": 0.6320286774206995, "grad_norm": 155.0, "learning_rate": 3.1499795489558304e-05, "loss": 11.6885, "step": 15163 }, { "epoch": 0.6320703597182277, "grad_norm": 508.0, "learning_rate": 3.149352468953987e-05, "loss": 15.5646, "step": 15164 }, { "epoch": 0.6321120420157559, "grad_norm": 306.0, "learning_rate": 3.1487254226803575e-05, "loss": 13.8754, "step": 15165 }, { "epoch": 0.6321537243132841, "grad_norm": 206.0, "learning_rate": 3.148098410146373e-05, "loss": 11.6879, "step": 15166 }, { "epoch": 0.6321954066108124, "grad_norm": 382.0, "learning_rate": 3.147471431363458e-05, "loss": 15.0629, "step": 15167 }, { "epoch": 0.6322370889083406, "grad_norm": 1264.0, "learning_rate": 3.1468444863430426e-05, "loss": 26.2553, "step": 15168 }, { "epoch": 0.6322787712058688, "grad_norm": 296.0, "learning_rate": 3.146217575096548e-05, "loss": 11.3753, "step": 15169 }, { "epoch": 0.6323204535033972, "grad_norm": 234.0, "learning_rate": 3.145590697635404e-05, "loss": 10.6252, "step": 15170 }, { "epoch": 0.6323621358009254, "grad_norm": 131.0, "learning_rate": 3.144963853971034e-05, "loss": 9.1879, "step": 15171 }, { "epoch": 0.6324038180984536, "grad_norm": 302.0, "learning_rate": 3.144337044114864e-05, "loss": 13.8752, "step": 15172 }, { "epoch": 0.6324455003959818, "grad_norm": 416.0, "learning_rate": 3.143710268078314e-05, "loss": 14.7508, "step": 15173 }, { "epoch": 0.6324871826935101, "grad_norm": 420.0, "learning_rate": 3.143083525872811e-05, "loss": 15.3127, "step": 15174 }, { "epoch": 0.6325288649910383, "grad_norm": 126.0, "learning_rate": 3.142456817509773e-05, "loss": 8.7502, "step": 15175 }, { "epoch": 0.6325705472885665, "grad_norm": 458.0, "learning_rate": 3.141830143000628e-05, "loss": 16.0002, "step": 15176 }, { "epoch": 0.6326122295860948, "grad_norm": 264.0, "learning_rate": 3.141203502356791e-05, "loss": 12.5004, "step": 15177 }, { "epoch": 0.6326539118836231, "grad_norm": 190.0, "learning_rate": 3.140576895589687e-05, "loss": 10.8128, "step": 15178 }, { "epoch": 0.6326955941811513, "grad_norm": 120.5, "learning_rate": 3.139950322710732e-05, "loss": 5.8138, "step": 15179 }, { "epoch": 0.6327372764786795, "grad_norm": 230.0, "learning_rate": 3.139323783731349e-05, "loss": 11.5004, "step": 15180 }, { "epoch": 0.6327789587762077, "grad_norm": 636.0, "learning_rate": 3.138697278662954e-05, "loss": 18.0003, "step": 15181 }, { "epoch": 0.632820641073736, "grad_norm": 392.0, "learning_rate": 3.138070807516967e-05, "loss": 14.3752, "step": 15182 }, { "epoch": 0.6328623233712642, "grad_norm": 184.0, "learning_rate": 3.137444370304805e-05, "loss": 10.4387, "step": 15183 }, { "epoch": 0.6329040056687925, "grad_norm": 239.0, "learning_rate": 3.136817967037885e-05, "loss": 12.1878, "step": 15184 }, { "epoch": 0.6329456879663207, "grad_norm": 300.0, "learning_rate": 3.136191597727621e-05, "loss": 13.9377, "step": 15185 }, { "epoch": 0.632987370263849, "grad_norm": 185.0, "learning_rate": 3.135565262385434e-05, "loss": 11.3128, "step": 15186 }, { "epoch": 0.6330290525613772, "grad_norm": 462.0, "learning_rate": 3.134938961022733e-05, "loss": 15.8128, "step": 15187 }, { "epoch": 0.6330707348589054, "grad_norm": 77.5, "learning_rate": 3.134312693650937e-05, "loss": 8.5632, "step": 15188 }, { "epoch": 0.6331124171564336, "grad_norm": 278.0, "learning_rate": 3.1336864602814554e-05, "loss": 12.9376, "step": 15189 }, { "epoch": 0.633154099453962, "grad_norm": 164.0, "learning_rate": 3.133060260925706e-05, "loss": 9.688, "step": 15190 }, { "epoch": 0.6331957817514902, "grad_norm": 248.0, "learning_rate": 3.1324340955950966e-05, "loss": 12.1879, "step": 15191 }, { "epoch": 0.6332374640490184, "grad_norm": 888.0, "learning_rate": 3.131807964301044e-05, "loss": 24.3752, "step": 15192 }, { "epoch": 0.6332791463465466, "grad_norm": 446.0, "learning_rate": 3.131181867054955e-05, "loss": 16.1256, "step": 15193 }, { "epoch": 0.6333208286440749, "grad_norm": 732.0, "learning_rate": 3.1305558038682435e-05, "loss": 20.0023, "step": 15194 }, { "epoch": 0.6333625109416031, "grad_norm": 256.0, "learning_rate": 3.129929774752318e-05, "loss": 10.8126, "step": 15195 }, { "epoch": 0.6334041932391313, "grad_norm": 284.0, "learning_rate": 3.1293037797185886e-05, "loss": 13.8752, "step": 15196 }, { "epoch": 0.6334458755366595, "grad_norm": 186.0, "learning_rate": 3.128677818778463e-05, "loss": 9.8148, "step": 15197 }, { "epoch": 0.6334875578341879, "grad_norm": 250.0, "learning_rate": 3.1280518919433524e-05, "loss": 13.3754, "step": 15198 }, { "epoch": 0.6335292401317161, "grad_norm": 1448.0, "learning_rate": 3.12742599922466e-05, "loss": 36.2503, "step": 15199 }, { "epoch": 0.6335709224292443, "grad_norm": 668.0, "learning_rate": 3.126800140633798e-05, "loss": 19.6259, "step": 15200 }, { "epoch": 0.6336126047267725, "grad_norm": 195.0, "learning_rate": 3.1261743161821664e-05, "loss": 10.2504, "step": 15201 }, { "epoch": 0.6336542870243008, "grad_norm": 1272.0, "learning_rate": 3.125548525881177e-05, "loss": 29.0032, "step": 15202 }, { "epoch": 0.633695969321829, "grad_norm": 160.0, "learning_rate": 3.1249227697422296e-05, "loss": 10.3753, "step": 15203 }, { "epoch": 0.6337376516193572, "grad_norm": 123.0, "learning_rate": 3.124297047776733e-05, "loss": 9.1879, "step": 15204 }, { "epoch": 0.6337793339168855, "grad_norm": 230.0, "learning_rate": 3.123671359996088e-05, "loss": 11.4378, "step": 15205 }, { "epoch": 0.6338210162144138, "grad_norm": 316.0, "learning_rate": 3.1230457064117e-05, "loss": 13.1888, "step": 15206 }, { "epoch": 0.633862698511942, "grad_norm": 450.0, "learning_rate": 3.1224200870349696e-05, "loss": 15.2502, "step": 15207 }, { "epoch": 0.6339043808094702, "grad_norm": 444.0, "learning_rate": 3.121794501877301e-05, "loss": 15.1254, "step": 15208 }, { "epoch": 0.6339460631069984, "grad_norm": 89.5, "learning_rate": 3.1211689509500924e-05, "loss": 9.1878, "step": 15209 }, { "epoch": 0.6339877454045267, "grad_norm": 101.0, "learning_rate": 3.120543434264749e-05, "loss": 10.3767, "step": 15210 }, { "epoch": 0.634029427702055, "grad_norm": 456.0, "learning_rate": 3.119917951832666e-05, "loss": 15.7516, "step": 15211 }, { "epoch": 0.6340711099995832, "grad_norm": 255.0, "learning_rate": 3.119292503665248e-05, "loss": 12.0632, "step": 15212 }, { "epoch": 0.6341127922971114, "grad_norm": 256.0, "learning_rate": 3.1186670897738876e-05, "loss": 11.9377, "step": 15213 }, { "epoch": 0.6341544745946397, "grad_norm": 372.0, "learning_rate": 3.11804171016999e-05, "loss": 14.3754, "step": 15214 }, { "epoch": 0.6341961568921679, "grad_norm": 88.5, "learning_rate": 3.1174163648649465e-05, "loss": 8.0003, "step": 15215 }, { "epoch": 0.6342378391896961, "grad_norm": 408.0, "learning_rate": 3.1167910538701595e-05, "loss": 15.3763, "step": 15216 }, { "epoch": 0.6342795214872243, "grad_norm": 316.0, "learning_rate": 3.11616577719702e-05, "loss": 14.8129, "step": 15217 }, { "epoch": 0.6343212037847527, "grad_norm": 564.0, "learning_rate": 3.115540534856929e-05, "loss": 18.3769, "step": 15218 }, { "epoch": 0.6343628860822809, "grad_norm": 140.0, "learning_rate": 3.114915326861276e-05, "loss": 10.6879, "step": 15219 }, { "epoch": 0.6344045683798091, "grad_norm": 364.0, "learning_rate": 3.1142901532214605e-05, "loss": 14.5631, "step": 15220 }, { "epoch": 0.6344462506773373, "grad_norm": 246.0, "learning_rate": 3.113665013948874e-05, "loss": 10.7504, "step": 15221 }, { "epoch": 0.6344879329748656, "grad_norm": 1400.0, "learning_rate": 3.113039909054911e-05, "loss": 38.0004, "step": 15222 }, { "epoch": 0.6345296152723938, "grad_norm": 368.0, "learning_rate": 3.112414838550961e-05, "loss": 15.1272, "step": 15223 }, { "epoch": 0.634571297569922, "grad_norm": 292.0, "learning_rate": 3.11178980244842e-05, "loss": 11.3129, "step": 15224 }, { "epoch": 0.6346129798674502, "grad_norm": 496.0, "learning_rate": 3.1111648007586766e-05, "loss": 15.8752, "step": 15225 }, { "epoch": 0.6346546621649786, "grad_norm": 716.0, "learning_rate": 3.110539833493124e-05, "loss": 20.8753, "step": 15226 }, { "epoch": 0.6346963444625068, "grad_norm": 72.5, "learning_rate": 3.1099149006631484e-05, "loss": 5.9064, "step": 15227 }, { "epoch": 0.634738026760035, "grad_norm": 564.0, "learning_rate": 3.109290002280144e-05, "loss": 18.2502, "step": 15228 }, { "epoch": 0.6347797090575632, "grad_norm": 392.0, "learning_rate": 3.1086651383554944e-05, "loss": 11.3754, "step": 15229 }, { "epoch": 0.6348213913550915, "grad_norm": 498.0, "learning_rate": 3.108040308900593e-05, "loss": 17.1254, "step": 15230 }, { "epoch": 0.6348630736526197, "grad_norm": 464.0, "learning_rate": 3.107415513926823e-05, "loss": 16.3753, "step": 15231 }, { "epoch": 0.634904755950148, "grad_norm": 588.0, "learning_rate": 3.106790753445573e-05, "loss": 19.3751, "step": 15232 }, { "epoch": 0.6349464382476762, "grad_norm": 163.0, "learning_rate": 3.1061660274682314e-05, "loss": 9.4378, "step": 15233 }, { "epoch": 0.6349881205452045, "grad_norm": 212.0, "learning_rate": 3.105541336006182e-05, "loss": 12.1251, "step": 15234 }, { "epoch": 0.6350298028427327, "grad_norm": 608.0, "learning_rate": 3.1049166790708076e-05, "loss": 19.1259, "step": 15235 }, { "epoch": 0.6350714851402609, "grad_norm": 352.0, "learning_rate": 3.1042920566734975e-05, "loss": 16.8752, "step": 15236 }, { "epoch": 0.6351131674377891, "grad_norm": 290.0, "learning_rate": 3.1036674688256306e-05, "loss": 12.8753, "step": 15237 }, { "epoch": 0.6351548497353174, "grad_norm": 356.0, "learning_rate": 3.103042915538595e-05, "loss": 13.1254, "step": 15238 }, { "epoch": 0.6351965320328457, "grad_norm": 101.5, "learning_rate": 3.1024183968237684e-05, "loss": 9.7508, "step": 15239 }, { "epoch": 0.6352382143303739, "grad_norm": 260.0, "learning_rate": 3.101793912692538e-05, "loss": 13.3752, "step": 15240 }, { "epoch": 0.6352798966279022, "grad_norm": 1392.0, "learning_rate": 3.1011694631562785e-05, "loss": 26.8802, "step": 15241 }, { "epoch": 0.6353215789254304, "grad_norm": 350.0, "learning_rate": 3.100545048226377e-05, "loss": 13.188, "step": 15242 }, { "epoch": 0.6353632612229586, "grad_norm": 276.0, "learning_rate": 3.099920667914208e-05, "loss": 12.5639, "step": 15243 }, { "epoch": 0.6354049435204868, "grad_norm": 466.0, "learning_rate": 3.0992963222311554e-05, "loss": 16.6254, "step": 15244 }, { "epoch": 0.6354466258180151, "grad_norm": 111.5, "learning_rate": 3.098672011188595e-05, "loss": 10.064, "step": 15245 }, { "epoch": 0.6354883081155434, "grad_norm": 484.0, "learning_rate": 3.098047734797907e-05, "loss": 17.2511, "step": 15246 }, { "epoch": 0.6355299904130716, "grad_norm": 258.0, "learning_rate": 3.097423493070466e-05, "loss": 11.4379, "step": 15247 }, { "epoch": 0.6355716727105998, "grad_norm": 528.0, "learning_rate": 3.096799286017653e-05, "loss": 16.0005, "step": 15248 }, { "epoch": 0.6356133550081281, "grad_norm": 142.0, "learning_rate": 3.0961751136508404e-05, "loss": 9.7503, "step": 15249 }, { "epoch": 0.6356550373056563, "grad_norm": 108.0, "learning_rate": 3.095550975981407e-05, "loss": 9.6878, "step": 15250 }, { "epoch": 0.6356967196031845, "grad_norm": 592.0, "learning_rate": 3.094926873020724e-05, "loss": 18.501, "step": 15251 }, { "epoch": 0.6357384019007127, "grad_norm": 524.0, "learning_rate": 3.09430280478017e-05, "loss": 17.1296, "step": 15252 }, { "epoch": 0.6357800841982411, "grad_norm": 736.0, "learning_rate": 3.093678771271114e-05, "loss": 21.7512, "step": 15253 }, { "epoch": 0.6358217664957693, "grad_norm": 628.0, "learning_rate": 3.0930547725049354e-05, "loss": 19.0006, "step": 15254 }, { "epoch": 0.6358634487932975, "grad_norm": 520.0, "learning_rate": 3.092430808493e-05, "loss": 17.5004, "step": 15255 }, { "epoch": 0.6359051310908257, "grad_norm": 336.0, "learning_rate": 3.091806879246684e-05, "loss": 13.8128, "step": 15256 }, { "epoch": 0.635946813388354, "grad_norm": 728.0, "learning_rate": 3.091182984777354e-05, "loss": 20.3799, "step": 15257 }, { "epoch": 0.6359884956858822, "grad_norm": 185.0, "learning_rate": 3.090559125096386e-05, "loss": 11.814, "step": 15258 }, { "epoch": 0.6360301779834104, "grad_norm": 368.0, "learning_rate": 3.0899353002151466e-05, "loss": 13.3754, "step": 15259 }, { "epoch": 0.6360718602809387, "grad_norm": 1360.0, "learning_rate": 3.0893115101450076e-05, "loss": 30.0016, "step": 15260 }, { "epoch": 0.636113542578467, "grad_norm": 270.0, "learning_rate": 3.088687754897334e-05, "loss": 13.3752, "step": 15261 }, { "epoch": 0.6361552248759952, "grad_norm": 158.0, "learning_rate": 3.088064034483498e-05, "loss": 10.4377, "step": 15262 }, { "epoch": 0.6361969071735234, "grad_norm": 322.0, "learning_rate": 3.087440348914862e-05, "loss": 13.1885, "step": 15263 }, { "epoch": 0.6362385894710516, "grad_norm": 608.0, "learning_rate": 3.086816698202797e-05, "loss": 19.5036, "step": 15264 }, { "epoch": 0.6362802717685799, "grad_norm": 708.0, "learning_rate": 3.086193082358666e-05, "loss": 20.7502, "step": 15265 }, { "epoch": 0.6363219540661081, "grad_norm": 344.0, "learning_rate": 3.0855695013938384e-05, "loss": 12.1252, "step": 15266 }, { "epoch": 0.6363636363636364, "grad_norm": 260.0, "learning_rate": 3.084945955319675e-05, "loss": 13.4376, "step": 15267 }, { "epoch": 0.6364053186611646, "grad_norm": 232.0, "learning_rate": 3.0843224441475424e-05, "loss": 11.8127, "step": 15268 }, { "epoch": 0.6364470009586929, "grad_norm": 446.0, "learning_rate": 3.0836989678888016e-05, "loss": 16.0002, "step": 15269 }, { "epoch": 0.6364886832562211, "grad_norm": 189.0, "learning_rate": 3.083075526554818e-05, "loss": 11.0632, "step": 15270 }, { "epoch": 0.6365303655537493, "grad_norm": 247.0, "learning_rate": 3.082452120156954e-05, "loss": 11.9379, "step": 15271 }, { "epoch": 0.6365720478512775, "grad_norm": 800.0, "learning_rate": 3.08182874870657e-05, "loss": 22.501, "step": 15272 }, { "epoch": 0.6366137301488058, "grad_norm": 272.0, "learning_rate": 3.0812054122150266e-05, "loss": 8.4376, "step": 15273 }, { "epoch": 0.6366554124463341, "grad_norm": 334.0, "learning_rate": 3.080582110693686e-05, "loss": 12.2502, "step": 15274 }, { "epoch": 0.6366970947438623, "grad_norm": 180.0, "learning_rate": 3.0799588441539054e-05, "loss": 10.813, "step": 15275 }, { "epoch": 0.6367387770413905, "grad_norm": 176.0, "learning_rate": 3.0793356126070475e-05, "loss": 9.7502, "step": 15276 }, { "epoch": 0.6367804593389188, "grad_norm": 844.0, "learning_rate": 3.0787124160644665e-05, "loss": 22.5053, "step": 15277 }, { "epoch": 0.636822141636447, "grad_norm": 147.0, "learning_rate": 3.078089254537524e-05, "loss": 10.4379, "step": 15278 }, { "epoch": 0.6368638239339752, "grad_norm": 258.0, "learning_rate": 3.077466128037574e-05, "loss": 12.8131, "step": 15279 }, { "epoch": 0.6369055062315034, "grad_norm": 223.0, "learning_rate": 3.076843036575976e-05, "loss": 10.5631, "step": 15280 }, { "epoch": 0.6369471885290318, "grad_norm": 68.0, "learning_rate": 3.076219980164082e-05, "loss": 7.8129, "step": 15281 }, { "epoch": 0.63698887082656, "grad_norm": 252.0, "learning_rate": 3.075596958813251e-05, "loss": 13.3755, "step": 15282 }, { "epoch": 0.6370305531240882, "grad_norm": 292.0, "learning_rate": 3.0749739725348365e-05, "loss": 11.8752, "step": 15283 }, { "epoch": 0.6370722354216164, "grad_norm": 199.0, "learning_rate": 3.0743510213401916e-05, "loss": 10.1881, "step": 15284 }, { "epoch": 0.6371139177191447, "grad_norm": 282.0, "learning_rate": 3.07372810524067e-05, "loss": 13.5627, "step": 15285 }, { "epoch": 0.6371556000166729, "grad_norm": 234.0, "learning_rate": 3.073105224247626e-05, "loss": 11.7502, "step": 15286 }, { "epoch": 0.6371972823142011, "grad_norm": 308.0, "learning_rate": 3.072482378372409e-05, "loss": 11.8752, "step": 15287 }, { "epoch": 0.6372389646117294, "grad_norm": 636.0, "learning_rate": 3.071859567626374e-05, "loss": 18.751, "step": 15288 }, { "epoch": 0.6372806469092577, "grad_norm": 266.0, "learning_rate": 3.071236792020867e-05, "loss": 12.3758, "step": 15289 }, { "epoch": 0.6373223292067859, "grad_norm": 434.0, "learning_rate": 3.070614051567243e-05, "loss": 16.0003, "step": 15290 }, { "epoch": 0.6373640115043141, "grad_norm": 149.0, "learning_rate": 3.0699913462768464e-05, "loss": 9.3131, "step": 15291 }, { "epoch": 0.6374056938018423, "grad_norm": 276.0, "learning_rate": 3.069368676161032e-05, "loss": 13.3753, "step": 15292 }, { "epoch": 0.6374473760993706, "grad_norm": 298.0, "learning_rate": 3.068746041231142e-05, "loss": 13.5662, "step": 15293 }, { "epoch": 0.6374890583968988, "grad_norm": 482.0, "learning_rate": 3.068123441498528e-05, "loss": 17.7503, "step": 15294 }, { "epoch": 0.6375307406944271, "grad_norm": 334.0, "learning_rate": 3.067500876974536e-05, "loss": 14.7502, "step": 15295 }, { "epoch": 0.6375724229919553, "grad_norm": 60.0, "learning_rate": 3.066878347670512e-05, "loss": 7.5944, "step": 15296 }, { "epoch": 0.6376141052894836, "grad_norm": 172.0, "learning_rate": 3.0662558535978006e-05, "loss": 12.1881, "step": 15297 }, { "epoch": 0.6376557875870118, "grad_norm": 820.0, "learning_rate": 3.0656333947677494e-05, "loss": 23.5007, "step": 15298 }, { "epoch": 0.63769746988454, "grad_norm": 640.0, "learning_rate": 3.0650109711917e-05, "loss": 18.5004, "step": 15299 }, { "epoch": 0.6377391521820682, "grad_norm": 384.0, "learning_rate": 3.0643885828809994e-05, "loss": 15.3126, "step": 15300 }, { "epoch": 0.6377808344795965, "grad_norm": 193.0, "learning_rate": 3.063766229846987e-05, "loss": 11.3753, "step": 15301 }, { "epoch": 0.6378225167771248, "grad_norm": 186.0, "learning_rate": 3.063143912101009e-05, "loss": 10.2505, "step": 15302 }, { "epoch": 0.637864199074653, "grad_norm": 314.0, "learning_rate": 3.062521629654402e-05, "loss": 13.1883, "step": 15303 }, { "epoch": 0.6379058813721812, "grad_norm": 344.0, "learning_rate": 3.0618993825185135e-05, "loss": 13.3126, "step": 15304 }, { "epoch": 0.6379475636697095, "grad_norm": 564.0, "learning_rate": 3.061277170704678e-05, "loss": 17.8761, "step": 15305 }, { "epoch": 0.6379892459672377, "grad_norm": 175.0, "learning_rate": 3.0606549942242405e-05, "loss": 10.0628, "step": 15306 }, { "epoch": 0.6380309282647659, "grad_norm": 434.0, "learning_rate": 3.0600328530885356e-05, "loss": 17.1252, "step": 15307 }, { "epoch": 0.6380726105622941, "grad_norm": 154.0, "learning_rate": 3.0594107473089055e-05, "loss": 10.3752, "step": 15308 }, { "epoch": 0.6381142928598225, "grad_norm": 88.0, "learning_rate": 3.058788676896687e-05, "loss": 8.8753, "step": 15309 }, { "epoch": 0.6381559751573507, "grad_norm": 490.0, "learning_rate": 3.058166641863217e-05, "loss": 15.6273, "step": 15310 }, { "epoch": 0.6381976574548789, "grad_norm": 239.0, "learning_rate": 3.057544642219831e-05, "loss": 10.8133, "step": 15311 }, { "epoch": 0.6382393397524071, "grad_norm": 632.0, "learning_rate": 3.056922677977869e-05, "loss": 14.5674, "step": 15312 }, { "epoch": 0.6382810220499354, "grad_norm": 296.0, "learning_rate": 3.05630074914866e-05, "loss": 13.6253, "step": 15313 }, { "epoch": 0.6383227043474636, "grad_norm": 154.0, "learning_rate": 3.0556788557435465e-05, "loss": 9.3132, "step": 15314 }, { "epoch": 0.6383643866449918, "grad_norm": 196.0, "learning_rate": 3.055056997773856e-05, "loss": 11.3757, "step": 15315 }, { "epoch": 0.6384060689425202, "grad_norm": 153.0, "learning_rate": 3.054435175250926e-05, "loss": 8.688, "step": 15316 }, { "epoch": 0.6384477512400484, "grad_norm": 772.0, "learning_rate": 3.053813388186085e-05, "loss": 17.8753, "step": 15317 }, { "epoch": 0.6384894335375766, "grad_norm": 660.0, "learning_rate": 3.053191636590671e-05, "loss": 18.5002, "step": 15318 }, { "epoch": 0.6385311158351048, "grad_norm": 204.0, "learning_rate": 3.052569920476009e-05, "loss": 10.1881, "step": 15319 }, { "epoch": 0.6385727981326331, "grad_norm": 760.0, "learning_rate": 3.051948239853435e-05, "loss": 21.8808, "step": 15320 }, { "epoch": 0.6386144804301613, "grad_norm": 316.0, "learning_rate": 3.0513265947342772e-05, "loss": 13.3132, "step": 15321 }, { "epoch": 0.6386561627276895, "grad_norm": 176.0, "learning_rate": 3.050704985129865e-05, "loss": 10.1881, "step": 15322 }, { "epoch": 0.6386978450252178, "grad_norm": 247.0, "learning_rate": 3.0500834110515263e-05, "loss": 12.0005, "step": 15323 }, { "epoch": 0.6387395273227461, "grad_norm": 236.0, "learning_rate": 3.049461872510593e-05, "loss": 10.7503, "step": 15324 }, { "epoch": 0.6387812096202743, "grad_norm": 276.0, "learning_rate": 3.0488403695183883e-05, "loss": 13.1253, "step": 15325 }, { "epoch": 0.6388228919178025, "grad_norm": 290.0, "learning_rate": 3.048218902086243e-05, "loss": 13.1878, "step": 15326 }, { "epoch": 0.6388645742153307, "grad_norm": 218.0, "learning_rate": 3.0475974702254782e-05, "loss": 11.8753, "step": 15327 }, { "epoch": 0.638906256512859, "grad_norm": 118.0, "learning_rate": 3.0469760739474262e-05, "loss": 7.5005, "step": 15328 }, { "epoch": 0.6389479388103873, "grad_norm": 89.0, "learning_rate": 3.0463547132634063e-05, "loss": 6.0317, "step": 15329 }, { "epoch": 0.6389896211079155, "grad_norm": 768.0, "learning_rate": 3.0457333881847473e-05, "loss": 22.1287, "step": 15330 }, { "epoch": 0.6390313034054437, "grad_norm": 372.0, "learning_rate": 3.045112098722769e-05, "loss": 14.3757, "step": 15331 }, { "epoch": 0.639072985702972, "grad_norm": 370.0, "learning_rate": 3.0444908448887966e-05, "loss": 14.3762, "step": 15332 }, { "epoch": 0.6391146680005002, "grad_norm": 314.0, "learning_rate": 3.0438696266941525e-05, "loss": 14.0011, "step": 15333 }, { "epoch": 0.6391563502980284, "grad_norm": 448.0, "learning_rate": 3.0432484441501575e-05, "loss": 16.7511, "step": 15334 }, { "epoch": 0.6391980325955566, "grad_norm": 736.0, "learning_rate": 3.042627297268133e-05, "loss": 21.1281, "step": 15335 }, { "epoch": 0.639239714893085, "grad_norm": 460.0, "learning_rate": 3.042006186059402e-05, "loss": 17.1252, "step": 15336 }, { "epoch": 0.6392813971906132, "grad_norm": 208.0, "learning_rate": 3.04138511053528e-05, "loss": 12.1253, "step": 15337 }, { "epoch": 0.6393230794881414, "grad_norm": 280.0, "learning_rate": 3.0407640707070896e-05, "loss": 12.8128, "step": 15338 }, { "epoch": 0.6393647617856696, "grad_norm": 468.0, "learning_rate": 3.040143066586146e-05, "loss": 15.2502, "step": 15339 }, { "epoch": 0.6394064440831979, "grad_norm": 264.0, "learning_rate": 3.0395220981837714e-05, "loss": 12.8127, "step": 15340 }, { "epoch": 0.6394481263807261, "grad_norm": 340.0, "learning_rate": 3.038901165511278e-05, "loss": 14.3126, "step": 15341 }, { "epoch": 0.6394898086782543, "grad_norm": 221.0, "learning_rate": 3.0382802685799875e-05, "loss": 10.6879, "step": 15342 }, { "epoch": 0.6395314909757825, "grad_norm": 140.0, "learning_rate": 3.037659407401211e-05, "loss": 10.9379, "step": 15343 }, { "epoch": 0.6395731732733109, "grad_norm": 344.0, "learning_rate": 3.0370385819862684e-05, "loss": 11.6255, "step": 15344 }, { "epoch": 0.6396148555708391, "grad_norm": 536.0, "learning_rate": 3.0364177923464698e-05, "loss": 16.1271, "step": 15345 }, { "epoch": 0.6396565378683673, "grad_norm": 424.0, "learning_rate": 3.0357970384931322e-05, "loss": 15.5001, "step": 15346 }, { "epoch": 0.6396982201658955, "grad_norm": 544.0, "learning_rate": 3.0351763204375672e-05, "loss": 16.7502, "step": 15347 }, { "epoch": 0.6397399024634238, "grad_norm": 74.5, "learning_rate": 3.034555638191089e-05, "loss": 8.2501, "step": 15348 }, { "epoch": 0.639781584760952, "grad_norm": 464.0, "learning_rate": 3.0339349917650077e-05, "loss": 17.0011, "step": 15349 }, { "epoch": 0.6398232670584802, "grad_norm": 244.0, "learning_rate": 3.0333143811706378e-05, "loss": 11.7503, "step": 15350 }, { "epoch": 0.6398649493560085, "grad_norm": 660.0, "learning_rate": 3.0326938064192856e-05, "loss": 21.1258, "step": 15351 }, { "epoch": 0.6399066316535368, "grad_norm": 238.0, "learning_rate": 3.032073267522265e-05, "loss": 12.6881, "step": 15352 }, { "epoch": 0.639948313951065, "grad_norm": 728.0, "learning_rate": 3.031452764490883e-05, "loss": 21.6256, "step": 15353 }, { "epoch": 0.6399899962485932, "grad_norm": 66.0, "learning_rate": 3.03083229733645e-05, "loss": 7.7504, "step": 15354 }, { "epoch": 0.6400316785461214, "grad_norm": 214.0, "learning_rate": 3.0302118660702716e-05, "loss": 11.1877, "step": 15355 }, { "epoch": 0.6400733608436497, "grad_norm": 212.0, "learning_rate": 3.029591470703659e-05, "loss": 11.8752, "step": 15356 }, { "epoch": 0.640115043141178, "grad_norm": 536.0, "learning_rate": 3.0289711112479147e-05, "loss": 17.8764, "step": 15357 }, { "epoch": 0.6401567254387062, "grad_norm": 167.0, "learning_rate": 3.028350787714348e-05, "loss": 10.2503, "step": 15358 }, { "epoch": 0.6401984077362344, "grad_norm": 306.0, "learning_rate": 3.0277305001142626e-05, "loss": 13.5628, "step": 15359 }, { "epoch": 0.6402400900337627, "grad_norm": 584.0, "learning_rate": 3.027110248458964e-05, "loss": 17.6253, "step": 15360 }, { "epoch": 0.6402817723312909, "grad_norm": 302.0, "learning_rate": 3.0264900327597557e-05, "loss": 13.0631, "step": 15361 }, { "epoch": 0.6403234546288191, "grad_norm": 412.0, "learning_rate": 3.025869853027944e-05, "loss": 14.1257, "step": 15362 }, { "epoch": 0.6403651369263473, "grad_norm": 157.0, "learning_rate": 3.0252497092748265e-05, "loss": 11.4377, "step": 15363 }, { "epoch": 0.6404068192238757, "grad_norm": 136.0, "learning_rate": 3.0246296015117113e-05, "loss": 9.3759, "step": 15364 }, { "epoch": 0.6404485015214039, "grad_norm": 194.0, "learning_rate": 3.024009529749895e-05, "loss": 11.1877, "step": 15365 }, { "epoch": 0.6404901838189321, "grad_norm": 162.0, "learning_rate": 3.0233894940006813e-05, "loss": 9.6254, "step": 15366 }, { "epoch": 0.6405318661164603, "grad_norm": 242.0, "learning_rate": 3.0227694942753683e-05, "loss": 13.7514, "step": 15367 }, { "epoch": 0.6405735484139886, "grad_norm": 48.5, "learning_rate": 3.0221495305852587e-05, "loss": 6.0947, "step": 15368 }, { "epoch": 0.6406152307115168, "grad_norm": 604.0, "learning_rate": 3.021529602941648e-05, "loss": 19.7505, "step": 15369 }, { "epoch": 0.640656913009045, "grad_norm": 498.0, "learning_rate": 3.020909711355836e-05, "loss": 15.3126, "step": 15370 }, { "epoch": 0.6406985953065732, "grad_norm": 182.0, "learning_rate": 3.0202898558391213e-05, "loss": 11.2502, "step": 15371 }, { "epoch": 0.6407402776041016, "grad_norm": 844.0, "learning_rate": 3.0196700364027986e-05, "loss": 21.8755, "step": 15372 }, { "epoch": 0.6407819599016298, "grad_norm": 452.0, "learning_rate": 3.019050253058165e-05, "loss": 15.0009, "step": 15373 }, { "epoch": 0.640823642199158, "grad_norm": 176.0, "learning_rate": 3.0184305058165185e-05, "loss": 11.6255, "step": 15374 }, { "epoch": 0.6408653244966862, "grad_norm": 172.0, "learning_rate": 3.0178107946891492e-05, "loss": 11.0002, "step": 15375 }, { "epoch": 0.6409070067942145, "grad_norm": 382.0, "learning_rate": 3.0171911196873566e-05, "loss": 15.8128, "step": 15376 }, { "epoch": 0.6409486890917427, "grad_norm": 326.0, "learning_rate": 3.01657148082243e-05, "loss": 14.6877, "step": 15377 }, { "epoch": 0.640990371389271, "grad_norm": 184.0, "learning_rate": 3.015951878105666e-05, "loss": 11.1255, "step": 15378 }, { "epoch": 0.6410320536867992, "grad_norm": 346.0, "learning_rate": 3.0153323115483535e-05, "loss": 14.1876, "step": 15379 }, { "epoch": 0.6410737359843275, "grad_norm": 364.0, "learning_rate": 3.014712781161787e-05, "loss": 15.3752, "step": 15380 }, { "epoch": 0.6411154182818557, "grad_norm": 318.0, "learning_rate": 3.0140932869572547e-05, "loss": 12.5008, "step": 15381 }, { "epoch": 0.6411571005793839, "grad_norm": 740.0, "learning_rate": 3.0134738289460508e-05, "loss": 21.3755, "step": 15382 }, { "epoch": 0.6411987828769121, "grad_norm": 173.0, "learning_rate": 3.0128544071394603e-05, "loss": 10.1254, "step": 15383 }, { "epoch": 0.6412404651744404, "grad_norm": 396.0, "learning_rate": 3.012235021548776e-05, "loss": 14.6255, "step": 15384 }, { "epoch": 0.6412821474719687, "grad_norm": 158.0, "learning_rate": 3.011615672185284e-05, "loss": 10.3753, "step": 15385 }, { "epoch": 0.6413238297694969, "grad_norm": 676.0, "learning_rate": 3.0109963590602745e-05, "loss": 21.1256, "step": 15386 }, { "epoch": 0.6413655120670252, "grad_norm": 680.0, "learning_rate": 3.0103770821850308e-05, "loss": 21.2503, "step": 15387 }, { "epoch": 0.6414071943645534, "grad_norm": 498.0, "learning_rate": 3.0097578415708437e-05, "loss": 16.2511, "step": 15388 }, { "epoch": 0.6414488766620816, "grad_norm": 536.0, "learning_rate": 3.0091386372289947e-05, "loss": 16.8753, "step": 15389 }, { "epoch": 0.6414905589596098, "grad_norm": 476.0, "learning_rate": 3.008519469170773e-05, "loss": 17.2502, "step": 15390 }, { "epoch": 0.6415322412571381, "grad_norm": 324.0, "learning_rate": 3.0079003374074584e-05, "loss": 13.4378, "step": 15391 }, { "epoch": 0.6415739235546664, "grad_norm": 247.0, "learning_rate": 3.0072812419503395e-05, "loss": 11.5627, "step": 15392 }, { "epoch": 0.6416156058521946, "grad_norm": 314.0, "learning_rate": 3.006662182810694e-05, "loss": 11.5014, "step": 15393 }, { "epoch": 0.6416572881497228, "grad_norm": 302.0, "learning_rate": 3.0060431599998095e-05, "loss": 13.9377, "step": 15394 }, { "epoch": 0.6416989704472511, "grad_norm": 278.0, "learning_rate": 3.0054241735289633e-05, "loss": 12.2503, "step": 15395 }, { "epoch": 0.6417406527447793, "grad_norm": 304.0, "learning_rate": 3.0048052234094404e-05, "loss": 12.7509, "step": 15396 }, { "epoch": 0.6417823350423075, "grad_norm": 328.0, "learning_rate": 3.004186309652518e-05, "loss": 15.0631, "step": 15397 }, { "epoch": 0.6418240173398357, "grad_norm": 132.0, "learning_rate": 3.0035674322694786e-05, "loss": 8.8752, "step": 15398 }, { "epoch": 0.6418656996373641, "grad_norm": 211.0, "learning_rate": 3.002948591271598e-05, "loss": 10.6255, "step": 15399 }, { "epoch": 0.6419073819348923, "grad_norm": 230.0, "learning_rate": 3.0023297866701594e-05, "loss": 9.8132, "step": 15400 }, { "epoch": 0.6419490642324205, "grad_norm": 464.0, "learning_rate": 3.0017110184764352e-05, "loss": 15.7503, "step": 15401 }, { "epoch": 0.6419907465299487, "grad_norm": 876.0, "learning_rate": 3.0010922867017078e-05, "loss": 22.6262, "step": 15402 }, { "epoch": 0.642032428827477, "grad_norm": 180.0, "learning_rate": 3.0004735913572478e-05, "loss": 11.1252, "step": 15403 }, { "epoch": 0.6420741111250052, "grad_norm": 452.0, "learning_rate": 2.9998549324543372e-05, "loss": 16.2502, "step": 15404 }, { "epoch": 0.6421157934225334, "grad_norm": 370.0, "learning_rate": 2.9992363100042455e-05, "loss": 15.876, "step": 15405 }, { "epoch": 0.6421574757200617, "grad_norm": 442.0, "learning_rate": 2.9986177240182523e-05, "loss": 14.9393, "step": 15406 }, { "epoch": 0.64219915801759, "grad_norm": 151.0, "learning_rate": 2.997999174507626e-05, "loss": 10.6253, "step": 15407 }, { "epoch": 0.6422408403151182, "grad_norm": 1408.0, "learning_rate": 2.997380661483643e-05, "loss": 32.2505, "step": 15408 }, { "epoch": 0.6422825226126464, "grad_norm": 322.0, "learning_rate": 2.9967621849575766e-05, "loss": 13.1878, "step": 15409 }, { "epoch": 0.6423242049101746, "grad_norm": 280.0, "learning_rate": 2.996143744940696e-05, "loss": 11.5007, "step": 15410 }, { "epoch": 0.6423658872077029, "grad_norm": 264.0, "learning_rate": 2.995525341444273e-05, "loss": 12.2502, "step": 15411 }, { "epoch": 0.6424075695052311, "grad_norm": 210.0, "learning_rate": 2.994906974479581e-05, "loss": 11.8753, "step": 15412 }, { "epoch": 0.6424492518027594, "grad_norm": 168.0, "learning_rate": 2.994288644057885e-05, "loss": 9.4377, "step": 15413 }, { "epoch": 0.6424909341002876, "grad_norm": 160.0, "learning_rate": 2.9936703501904585e-05, "loss": 10.5007, "step": 15414 }, { "epoch": 0.6425326163978159, "grad_norm": 167.0, "learning_rate": 2.9930520928885656e-05, "loss": 9.8131, "step": 15415 }, { "epoch": 0.6425742986953441, "grad_norm": 500.0, "learning_rate": 2.9924338721634793e-05, "loss": 18.1252, "step": 15416 }, { "epoch": 0.6426159809928723, "grad_norm": 552.0, "learning_rate": 2.9918156880264615e-05, "loss": 18.6253, "step": 15417 }, { "epoch": 0.6426576632904005, "grad_norm": 328.0, "learning_rate": 2.9911975404887826e-05, "loss": 13.0639, "step": 15418 }, { "epoch": 0.6426993455879288, "grad_norm": 688.0, "learning_rate": 2.990579429561705e-05, "loss": 20.7539, "step": 15419 }, { "epoch": 0.6427410278854571, "grad_norm": 458.0, "learning_rate": 2.9899613552564975e-05, "loss": 16.2503, "step": 15420 }, { "epoch": 0.6427827101829853, "grad_norm": 206.0, "learning_rate": 2.989343317584422e-05, "loss": 11.7505, "step": 15421 }, { "epoch": 0.6428243924805135, "grad_norm": 436.0, "learning_rate": 2.9887253165567432e-05, "loss": 17.1255, "step": 15422 }, { "epoch": 0.6428660747780418, "grad_norm": 132.0, "learning_rate": 2.9881073521847235e-05, "loss": 9.0628, "step": 15423 }, { "epoch": 0.64290775707557, "grad_norm": 122.5, "learning_rate": 2.987489424479627e-05, "loss": 10.0005, "step": 15424 }, { "epoch": 0.6429494393730982, "grad_norm": 532.0, "learning_rate": 2.986871533452713e-05, "loss": 15.6268, "step": 15425 }, { "epoch": 0.6429911216706264, "grad_norm": 290.0, "learning_rate": 2.9862536791152462e-05, "loss": 13.6252, "step": 15426 }, { "epoch": 0.6430328039681548, "grad_norm": 384.0, "learning_rate": 2.9856358614784824e-05, "loss": 11.8754, "step": 15427 }, { "epoch": 0.643074486265683, "grad_norm": 584.0, "learning_rate": 2.985018080553686e-05, "loss": 19.6253, "step": 15428 }, { "epoch": 0.6431161685632112, "grad_norm": 448.0, "learning_rate": 2.9844003363521115e-05, "loss": 16.3753, "step": 15429 }, { "epoch": 0.6431578508607394, "grad_norm": 760.0, "learning_rate": 2.9837826288850224e-05, "loss": 21.8752, "step": 15430 }, { "epoch": 0.6431995331582677, "grad_norm": 155.0, "learning_rate": 2.9831649581636723e-05, "loss": 9.9378, "step": 15431 }, { "epoch": 0.6432412154557959, "grad_norm": 346.0, "learning_rate": 2.9825473241993207e-05, "loss": 13.7536, "step": 15432 }, { "epoch": 0.6432828977533241, "grad_norm": 536.0, "learning_rate": 2.981929727003222e-05, "loss": 17.5003, "step": 15433 }, { "epoch": 0.6433245800508524, "grad_norm": 262.0, "learning_rate": 2.981312166586634e-05, "loss": 13.1258, "step": 15434 }, { "epoch": 0.6433662623483807, "grad_norm": 135.0, "learning_rate": 2.9806946429608108e-05, "loss": 9.0005, "step": 15435 }, { "epoch": 0.6434079446459089, "grad_norm": 344.0, "learning_rate": 2.980077156137007e-05, "loss": 15.7504, "step": 15436 }, { "epoch": 0.6434496269434371, "grad_norm": 332.0, "learning_rate": 2.9794597061264757e-05, "loss": 12.6884, "step": 15437 }, { "epoch": 0.6434913092409653, "grad_norm": 468.0, "learning_rate": 2.9788422929404724e-05, "loss": 16.5002, "step": 15438 }, { "epoch": 0.6435329915384936, "grad_norm": 145.0, "learning_rate": 2.9782249165902453e-05, "loss": 9.8128, "step": 15439 }, { "epoch": 0.6435746738360218, "grad_norm": 181.0, "learning_rate": 2.9776075770870515e-05, "loss": 7.7511, "step": 15440 }, { "epoch": 0.6436163561335501, "grad_norm": 388.0, "learning_rate": 2.9769902744421363e-05, "loss": 16.2505, "step": 15441 }, { "epoch": 0.6436580384310783, "grad_norm": 418.0, "learning_rate": 2.9763730086667557e-05, "loss": 16.5003, "step": 15442 }, { "epoch": 0.6436997207286066, "grad_norm": 572.0, "learning_rate": 2.9757557797721542e-05, "loss": 16.6274, "step": 15443 }, { "epoch": 0.6437414030261348, "grad_norm": 560.0, "learning_rate": 2.9751385877695852e-05, "loss": 18.8751, "step": 15444 }, { "epoch": 0.643783085323663, "grad_norm": 474.0, "learning_rate": 2.9745214326702935e-05, "loss": 16.7533, "step": 15445 }, { "epoch": 0.6438247676211912, "grad_norm": 516.0, "learning_rate": 2.9739043144855294e-05, "loss": 16.8767, "step": 15446 }, { "epoch": 0.6438664499187196, "grad_norm": 220.0, "learning_rate": 2.9732872332265392e-05, "loss": 11.8126, "step": 15447 }, { "epoch": 0.6439081322162478, "grad_norm": 179.0, "learning_rate": 2.9726701889045684e-05, "loss": 9.3756, "step": 15448 }, { "epoch": 0.643949814513776, "grad_norm": 696.0, "learning_rate": 2.972053181530863e-05, "loss": 21.8753, "step": 15449 }, { "epoch": 0.6439914968113042, "grad_norm": 596.0, "learning_rate": 2.9714362111166705e-05, "loss": 18.2506, "step": 15450 }, { "epoch": 0.6440331791088325, "grad_norm": 556.0, "learning_rate": 2.970819277673231e-05, "loss": 19.2503, "step": 15451 }, { "epoch": 0.6440748614063607, "grad_norm": 384.0, "learning_rate": 2.9702023812117918e-05, "loss": 16.8754, "step": 15452 }, { "epoch": 0.6441165437038889, "grad_norm": 1240.0, "learning_rate": 2.969585521743593e-05, "loss": 27.8752, "step": 15453 }, { "epoch": 0.6441582260014171, "grad_norm": 173.0, "learning_rate": 2.96896869927988e-05, "loss": 10.7507, "step": 15454 }, { "epoch": 0.6441999082989455, "grad_norm": 81.0, "learning_rate": 2.9683519138318905e-05, "loss": 8.2506, "step": 15455 }, { "epoch": 0.6442415905964737, "grad_norm": 170.0, "learning_rate": 2.9677351654108702e-05, "loss": 8.563, "step": 15456 }, { "epoch": 0.6442832728940019, "grad_norm": 740.0, "learning_rate": 2.967118454028055e-05, "loss": 20.8755, "step": 15457 }, { "epoch": 0.6443249551915301, "grad_norm": 312.0, "learning_rate": 2.966501779694687e-05, "loss": 9.3757, "step": 15458 }, { "epoch": 0.6443666374890584, "grad_norm": 992.0, "learning_rate": 2.9658851424220047e-05, "loss": 22.7543, "step": 15459 }, { "epoch": 0.6444083197865866, "grad_norm": 312.0, "learning_rate": 2.9652685422212467e-05, "loss": 13.188, "step": 15460 }, { "epoch": 0.6444500020841148, "grad_norm": 266.0, "learning_rate": 2.9646519791036487e-05, "loss": 11.8134, "step": 15461 }, { "epoch": 0.6444916843816432, "grad_norm": 414.0, "learning_rate": 2.9640354530804515e-05, "loss": 15.6894, "step": 15462 }, { "epoch": 0.6445333666791714, "grad_norm": 416.0, "learning_rate": 2.9634189641628862e-05, "loss": 14.313, "step": 15463 }, { "epoch": 0.6445750489766996, "grad_norm": 378.0, "learning_rate": 2.9628025123621938e-05, "loss": 15.2501, "step": 15464 }, { "epoch": 0.6446167312742278, "grad_norm": 552.0, "learning_rate": 2.9621860976896036e-05, "loss": 18.5007, "step": 15465 }, { "epoch": 0.6446584135717561, "grad_norm": 136.0, "learning_rate": 2.9615697201563552e-05, "loss": 9.5628, "step": 15466 }, { "epoch": 0.6447000958692843, "grad_norm": 194.0, "learning_rate": 2.9609533797736777e-05, "loss": 12.1882, "step": 15467 }, { "epoch": 0.6447417781668126, "grad_norm": 284.0, "learning_rate": 2.9603370765528083e-05, "loss": 12.1255, "step": 15468 }, { "epoch": 0.6447834604643408, "grad_norm": 596.0, "learning_rate": 2.9597208105049735e-05, "loss": 20.3753, "step": 15469 }, { "epoch": 0.6448251427618691, "grad_norm": 330.0, "learning_rate": 2.959104581641411e-05, "loss": 13.5001, "step": 15470 }, { "epoch": 0.6448668250593973, "grad_norm": 270.0, "learning_rate": 2.9584883899733462e-05, "loss": 12.8768, "step": 15471 }, { "epoch": 0.6449085073569255, "grad_norm": 684.0, "learning_rate": 2.957872235512013e-05, "loss": 20.6252, "step": 15472 }, { "epoch": 0.6449501896544537, "grad_norm": 157.0, "learning_rate": 2.9572561182686388e-05, "loss": 9.064, "step": 15473 }, { "epoch": 0.644991871951982, "grad_norm": 169.0, "learning_rate": 2.9566400382544547e-05, "loss": 9.7502, "step": 15474 }, { "epoch": 0.6450335542495103, "grad_norm": 138.0, "learning_rate": 2.9560239954806857e-05, "loss": 10.1878, "step": 15475 }, { "epoch": 0.6450752365470385, "grad_norm": 145.0, "learning_rate": 2.955407989958563e-05, "loss": 10.0631, "step": 15476 }, { "epoch": 0.6451169188445667, "grad_norm": 139.0, "learning_rate": 2.9547920216993087e-05, "loss": 10.1878, "step": 15477 }, { "epoch": 0.645158601142095, "grad_norm": 85.5, "learning_rate": 2.9541760907141535e-05, "loss": 7.4064, "step": 15478 }, { "epoch": 0.6452002834396232, "grad_norm": 732.0, "learning_rate": 2.9535601970143184e-05, "loss": 16.8788, "step": 15479 }, { "epoch": 0.6452419657371514, "grad_norm": 51.5, "learning_rate": 2.9529443406110326e-05, "loss": 6.7816, "step": 15480 }, { "epoch": 0.6452836480346796, "grad_norm": 904.0, "learning_rate": 2.9523285215155162e-05, "loss": 21.8802, "step": 15481 }, { "epoch": 0.645325330332208, "grad_norm": 300.0, "learning_rate": 2.9517127397389955e-05, "loss": 13.8754, "step": 15482 }, { "epoch": 0.6453670126297362, "grad_norm": 728.0, "learning_rate": 2.951096995292691e-05, "loss": 24.5002, "step": 15483 }, { "epoch": 0.6454086949272644, "grad_norm": 320.0, "learning_rate": 2.9504812881878262e-05, "loss": 13.2501, "step": 15484 }, { "epoch": 0.6454503772247926, "grad_norm": 238.0, "learning_rate": 2.9498656184356215e-05, "loss": 13.3752, "step": 15485 }, { "epoch": 0.6454920595223209, "grad_norm": 310.0, "learning_rate": 2.949249986047298e-05, "loss": 13.6257, "step": 15486 }, { "epoch": 0.6455337418198491, "grad_norm": 350.0, "learning_rate": 2.9486343910340752e-05, "loss": 13.8752, "step": 15487 }, { "epoch": 0.6455754241173773, "grad_norm": 348.0, "learning_rate": 2.948018833407174e-05, "loss": 15.1258, "step": 15488 }, { "epoch": 0.6456171064149056, "grad_norm": 320.0, "learning_rate": 2.94740331317781e-05, "loss": 13.7502, "step": 15489 }, { "epoch": 0.6456587887124339, "grad_norm": 310.0, "learning_rate": 2.946787830357205e-05, "loss": 13.3755, "step": 15490 }, { "epoch": 0.6457004710099621, "grad_norm": 496.0, "learning_rate": 2.9461723849565714e-05, "loss": 17.6259, "step": 15491 }, { "epoch": 0.6457421533074903, "grad_norm": 354.0, "learning_rate": 2.945556976987131e-05, "loss": 13.8752, "step": 15492 }, { "epoch": 0.6457838356050185, "grad_norm": 480.0, "learning_rate": 2.9449416064600953e-05, "loss": 15.188, "step": 15493 }, { "epoch": 0.6458255179025468, "grad_norm": 780.0, "learning_rate": 2.9443262733866827e-05, "loss": 19.6293, "step": 15494 }, { "epoch": 0.645867200200075, "grad_norm": 247.0, "learning_rate": 2.9437109777781046e-05, "loss": 12.0626, "step": 15495 }, { "epoch": 0.6459088824976033, "grad_norm": 202.0, "learning_rate": 2.9430957196455772e-05, "loss": 9.5626, "step": 15496 }, { "epoch": 0.6459505647951315, "grad_norm": 462.0, "learning_rate": 2.942480499000313e-05, "loss": 15.2502, "step": 15497 }, { "epoch": 0.6459922470926598, "grad_norm": 764.0, "learning_rate": 2.941865315853525e-05, "loss": 21.3756, "step": 15498 }, { "epoch": 0.646033929390188, "grad_norm": 1024.0, "learning_rate": 2.941250170216422e-05, "loss": 22.6283, "step": 15499 }, { "epoch": 0.6460756116877162, "grad_norm": 472.0, "learning_rate": 2.9406350621002205e-05, "loss": 17.8759, "step": 15500 }, { "epoch": 0.6461172939852444, "grad_norm": 206.0, "learning_rate": 2.940019991516125e-05, "loss": 11.7514, "step": 15501 }, { "epoch": 0.6461589762827727, "grad_norm": 454.0, "learning_rate": 2.939404958475351e-05, "loss": 17.3753, "step": 15502 }, { "epoch": 0.646200658580301, "grad_norm": 227.0, "learning_rate": 2.938789962989102e-05, "loss": 9.7527, "step": 15503 }, { "epoch": 0.6462423408778292, "grad_norm": 298.0, "learning_rate": 2.9381750050685906e-05, "loss": 13.0003, "step": 15504 }, { "epoch": 0.6462840231753574, "grad_norm": 864.0, "learning_rate": 2.937560084725021e-05, "loss": 21.6256, "step": 15505 }, { "epoch": 0.6463257054728857, "grad_norm": 912.0, "learning_rate": 2.9369452019696043e-05, "loss": 24.0033, "step": 15506 }, { "epoch": 0.6463673877704139, "grad_norm": 1704.0, "learning_rate": 2.9363303568135425e-05, "loss": 34.504, "step": 15507 }, { "epoch": 0.6464090700679421, "grad_norm": 136.0, "learning_rate": 2.9357155492680445e-05, "loss": 10.9382, "step": 15508 }, { "epoch": 0.6464507523654703, "grad_norm": 596.0, "learning_rate": 2.935100779344312e-05, "loss": 19.1253, "step": 15509 }, { "epoch": 0.6464924346629987, "grad_norm": 139.0, "learning_rate": 2.9344860470535528e-05, "loss": 10.1254, "step": 15510 }, { "epoch": 0.6465341169605269, "grad_norm": 280.0, "learning_rate": 2.9338713524069672e-05, "loss": 10.3755, "step": 15511 }, { "epoch": 0.6465757992580551, "grad_norm": 308.0, "learning_rate": 2.9332566954157615e-05, "loss": 13.1252, "step": 15512 }, { "epoch": 0.6466174815555833, "grad_norm": 107.5, "learning_rate": 2.9326420760911342e-05, "loss": 9.5005, "step": 15513 }, { "epoch": 0.6466591638531116, "grad_norm": 324.0, "learning_rate": 2.9320274944442905e-05, "loss": 12.5003, "step": 15514 }, { "epoch": 0.6467008461506398, "grad_norm": 1264.0, "learning_rate": 2.9314129504864275e-05, "loss": 29.6254, "step": 15515 }, { "epoch": 0.646742528448168, "grad_norm": 270.0, "learning_rate": 2.9307984442287484e-05, "loss": 12.6254, "step": 15516 }, { "epoch": 0.6467842107456963, "grad_norm": 516.0, "learning_rate": 2.9301839756824502e-05, "loss": 16.8759, "step": 15517 }, { "epoch": 0.6468258930432246, "grad_norm": 266.0, "learning_rate": 2.9295695448587346e-05, "loss": 11.5635, "step": 15518 }, { "epoch": 0.6468675753407528, "grad_norm": 160.0, "learning_rate": 2.9289551517687958e-05, "loss": 12.438, "step": 15519 }, { "epoch": 0.646909257638281, "grad_norm": 596.0, "learning_rate": 2.928340796423835e-05, "loss": 20.0003, "step": 15520 }, { "epoch": 0.6469509399358092, "grad_norm": 334.0, "learning_rate": 2.9277264788350457e-05, "loss": 14.6879, "step": 15521 }, { "epoch": 0.6469926222333375, "grad_norm": 178.0, "learning_rate": 2.9271121990136252e-05, "loss": 11.8127, "step": 15522 }, { "epoch": 0.6470343045308657, "grad_norm": 436.0, "learning_rate": 2.9264979569707697e-05, "loss": 16.5004, "step": 15523 }, { "epoch": 0.647075986828394, "grad_norm": 1192.0, "learning_rate": 2.925883752717673e-05, "loss": 31.8752, "step": 15524 }, { "epoch": 0.6471176691259222, "grad_norm": 300.0, "learning_rate": 2.9252695862655276e-05, "loss": 11.8752, "step": 15525 }, { "epoch": 0.6471593514234505, "grad_norm": 324.0, "learning_rate": 2.9246554576255304e-05, "loss": 14.1254, "step": 15526 }, { "epoch": 0.6472010337209787, "grad_norm": 127.5, "learning_rate": 2.9240413668088697e-05, "loss": 9.5015, "step": 15527 }, { "epoch": 0.6472427160185069, "grad_norm": 142.0, "learning_rate": 2.9234273138267414e-05, "loss": 10.6253, "step": 15528 }, { "epoch": 0.6472843983160351, "grad_norm": 1824.0, "learning_rate": 2.9228132986903328e-05, "loss": 38.0001, "step": 15529 }, { "epoch": 0.6473260806135634, "grad_norm": 83.0, "learning_rate": 2.9221993214108377e-05, "loss": 9.1256, "step": 15530 }, { "epoch": 0.6473677629110917, "grad_norm": 588.0, "learning_rate": 2.9215853819994433e-05, "loss": 18.5028, "step": 15531 }, { "epoch": 0.6474094452086199, "grad_norm": 354.0, "learning_rate": 2.920971480467341e-05, "loss": 15.3753, "step": 15532 }, { "epoch": 0.6474511275061482, "grad_norm": 179.0, "learning_rate": 2.9203576168257163e-05, "loss": 10.3791, "step": 15533 }, { "epoch": 0.6474928098036764, "grad_norm": 156.0, "learning_rate": 2.9197437910857595e-05, "loss": 10.5008, "step": 15534 }, { "epoch": 0.6475344921012046, "grad_norm": 54.0, "learning_rate": 2.9191300032586567e-05, "loss": 6.9066, "step": 15535 }, { "epoch": 0.6475761743987328, "grad_norm": 458.0, "learning_rate": 2.9185162533555966e-05, "loss": 16.6252, "step": 15536 }, { "epoch": 0.6476178566962612, "grad_norm": 244.0, "learning_rate": 2.9179025413877582e-05, "loss": 12.064, "step": 15537 }, { "epoch": 0.6476595389937894, "grad_norm": 199.0, "learning_rate": 2.9172888673663352e-05, "loss": 12.0632, "step": 15538 }, { "epoch": 0.6477012212913176, "grad_norm": 196.0, "learning_rate": 2.9166752313025036e-05, "loss": 11.3131, "step": 15539 }, { "epoch": 0.6477429035888458, "grad_norm": 382.0, "learning_rate": 2.9160616332074524e-05, "loss": 13.8787, "step": 15540 }, { "epoch": 0.6477845858863741, "grad_norm": 716.0, "learning_rate": 2.9154480730923635e-05, "loss": 21.5001, "step": 15541 }, { "epoch": 0.6478262681839023, "grad_norm": 300.0, "learning_rate": 2.9148345509684183e-05, "loss": 13.6878, "step": 15542 }, { "epoch": 0.6478679504814305, "grad_norm": 456.0, "learning_rate": 2.9142210668467984e-05, "loss": 16.2503, "step": 15543 }, { "epoch": 0.6479096327789587, "grad_norm": 1568.0, "learning_rate": 2.9136076207386847e-05, "loss": 32.7551, "step": 15544 }, { "epoch": 0.6479513150764871, "grad_norm": 262.0, "learning_rate": 2.9129942126552572e-05, "loss": 12.1251, "step": 15545 }, { "epoch": 0.6479929973740153, "grad_norm": 1104.0, "learning_rate": 2.912380842607696e-05, "loss": 27.6259, "step": 15546 }, { "epoch": 0.6480346796715435, "grad_norm": 209.0, "learning_rate": 2.9117675106071784e-05, "loss": 10.8756, "step": 15547 }, { "epoch": 0.6480763619690717, "grad_norm": 274.0, "learning_rate": 2.9111542166648842e-05, "loss": 13.2504, "step": 15548 }, { "epoch": 0.6481180442666, "grad_norm": 468.0, "learning_rate": 2.9105409607919892e-05, "loss": 16.8753, "step": 15549 }, { "epoch": 0.6481597265641282, "grad_norm": 1160.0, "learning_rate": 2.9099277429996707e-05, "loss": 28.0006, "step": 15550 }, { "epoch": 0.6482014088616564, "grad_norm": 250.0, "learning_rate": 2.9093145632991053e-05, "loss": 12.6257, "step": 15551 }, { "epoch": 0.6482430911591847, "grad_norm": 868.0, "learning_rate": 2.9087014217014675e-05, "loss": 24.8753, "step": 15552 }, { "epoch": 0.648284773456713, "grad_norm": 207.0, "learning_rate": 2.908088318217931e-05, "loss": 11.7504, "step": 15553 }, { "epoch": 0.6483264557542412, "grad_norm": 1600.0, "learning_rate": 2.9074752528596715e-05, "loss": 41.2501, "step": 15554 }, { "epoch": 0.6483681380517694, "grad_norm": 196.0, "learning_rate": 2.9068622256378586e-05, "loss": 11.7502, "step": 15555 }, { "epoch": 0.6484098203492976, "grad_norm": 400.0, "learning_rate": 2.9062492365636717e-05, "loss": 14.7501, "step": 15556 }, { "epoch": 0.6484515026468259, "grad_norm": 260.0, "learning_rate": 2.9056362856482743e-05, "loss": 13.3128, "step": 15557 }, { "epoch": 0.6484931849443542, "grad_norm": 260.0, "learning_rate": 2.9050233729028463e-05, "loss": 13.1253, "step": 15558 }, { "epoch": 0.6485348672418824, "grad_norm": 318.0, "learning_rate": 2.9044104983385485e-05, "loss": 13.2503, "step": 15559 }, { "epoch": 0.6485765495394106, "grad_norm": 1048.0, "learning_rate": 2.9037976619665597e-05, "loss": 31.8753, "step": 15560 }, { "epoch": 0.6486182318369389, "grad_norm": 91.0, "learning_rate": 2.90318486379804e-05, "loss": 8.5002, "step": 15561 }, { "epoch": 0.6486599141344671, "grad_norm": 712.0, "learning_rate": 2.9025721038441665e-05, "loss": 17.6295, "step": 15562 }, { "epoch": 0.6487015964319953, "grad_norm": 480.0, "learning_rate": 2.9019593821160997e-05, "loss": 16.5024, "step": 15563 }, { "epoch": 0.6487432787295235, "grad_norm": 2048.0, "learning_rate": 2.901346698625012e-05, "loss": 40.0052, "step": 15564 }, { "epoch": 0.6487849610270519, "grad_norm": 240.0, "learning_rate": 2.9007340533820636e-05, "loss": 11.1876, "step": 15565 }, { "epoch": 0.6488266433245801, "grad_norm": 354.0, "learning_rate": 2.9001214463984256e-05, "loss": 14.1254, "step": 15566 }, { "epoch": 0.6488683256221083, "grad_norm": 374.0, "learning_rate": 2.899508877685261e-05, "loss": 14.1254, "step": 15567 }, { "epoch": 0.6489100079196365, "grad_norm": 306.0, "learning_rate": 2.898896347253732e-05, "loss": 10.8758, "step": 15568 }, { "epoch": 0.6489516902171648, "grad_norm": 1004.0, "learning_rate": 2.8982838551150048e-05, "loss": 23.3798, "step": 15569 }, { "epoch": 0.648993372514693, "grad_norm": 175.0, "learning_rate": 2.897671401280241e-05, "loss": 9.313, "step": 15570 }, { "epoch": 0.6490350548122212, "grad_norm": 310.0, "learning_rate": 2.8970589857606022e-05, "loss": 14.1882, "step": 15571 }, { "epoch": 0.6490767371097494, "grad_norm": 1096.0, "learning_rate": 2.8964466085672504e-05, "loss": 21.5045, "step": 15572 }, { "epoch": 0.6491184194072778, "grad_norm": 836.0, "learning_rate": 2.8958342697113454e-05, "loss": 26.3753, "step": 15573 }, { "epoch": 0.649160101704806, "grad_norm": 548.0, "learning_rate": 2.8952219692040482e-05, "loss": 18.8754, "step": 15574 }, { "epoch": 0.6492017840023342, "grad_norm": 230.0, "learning_rate": 2.894609707056517e-05, "loss": 12.4378, "step": 15575 }, { "epoch": 0.6492434662998624, "grad_norm": 1176.0, "learning_rate": 2.893997483279911e-05, "loss": 31.2509, "step": 15576 }, { "epoch": 0.6492851485973907, "grad_norm": 388.0, "learning_rate": 2.8933852978853876e-05, "loss": 16.3754, "step": 15577 }, { "epoch": 0.6493268308949189, "grad_norm": 300.0, "learning_rate": 2.892773150884104e-05, "loss": 13.3752, "step": 15578 }, { "epoch": 0.6493685131924471, "grad_norm": 448.0, "learning_rate": 2.8921610422872168e-05, "loss": 15.438, "step": 15579 }, { "epoch": 0.6494101954899754, "grad_norm": 184.0, "learning_rate": 2.8915489721058824e-05, "loss": 7.1569, "step": 15580 }, { "epoch": 0.6494518777875037, "grad_norm": 159.0, "learning_rate": 2.890936940351253e-05, "loss": 12.0629, "step": 15581 }, { "epoch": 0.6494935600850319, "grad_norm": 326.0, "learning_rate": 2.8903249470344885e-05, "loss": 14.6881, "step": 15582 }, { "epoch": 0.6495352423825601, "grad_norm": 264.0, "learning_rate": 2.8897129921667356e-05, "loss": 12.7508, "step": 15583 }, { "epoch": 0.6495769246800883, "grad_norm": 105.0, "learning_rate": 2.8891010757591546e-05, "loss": 8.876, "step": 15584 }, { "epoch": 0.6496186069776166, "grad_norm": 384.0, "learning_rate": 2.8884891978228902e-05, "loss": 14.2522, "step": 15585 }, { "epoch": 0.6496602892751449, "grad_norm": 604.0, "learning_rate": 2.887877358369101e-05, "loss": 19.1252, "step": 15586 }, { "epoch": 0.6497019715726731, "grad_norm": 229.0, "learning_rate": 2.8872655574089315e-05, "loss": 12.4378, "step": 15587 }, { "epoch": 0.6497436538702013, "grad_norm": 444.0, "learning_rate": 2.8866537949535376e-05, "loss": 17.7517, "step": 15588 }, { "epoch": 0.6497853361677296, "grad_norm": 332.0, "learning_rate": 2.8860420710140623e-05, "loss": 14.0002, "step": 15589 }, { "epoch": 0.6498270184652578, "grad_norm": 308.0, "learning_rate": 2.8854303856016596e-05, "loss": 13.1288, "step": 15590 }, { "epoch": 0.649868700762786, "grad_norm": 198.0, "learning_rate": 2.884818738727476e-05, "loss": 12.6255, "step": 15591 }, { "epoch": 0.6499103830603142, "grad_norm": 316.0, "learning_rate": 2.8842071304026596e-05, "loss": 14.0004, "step": 15592 }, { "epoch": 0.6499520653578426, "grad_norm": 728.0, "learning_rate": 2.8835955606383548e-05, "loss": 18.6291, "step": 15593 }, { "epoch": 0.6499937476553708, "grad_norm": 219.0, "learning_rate": 2.8829840294457095e-05, "loss": 11.0629, "step": 15594 }, { "epoch": 0.650035429952899, "grad_norm": 194.0, "learning_rate": 2.882372536835868e-05, "loss": 7.5001, "step": 15595 }, { "epoch": 0.6500771122504272, "grad_norm": 296.0, "learning_rate": 2.881761082819975e-05, "loss": 12.8129, "step": 15596 }, { "epoch": 0.6501187945479555, "grad_norm": 482.0, "learning_rate": 2.8811496674091743e-05, "loss": 15.1251, "step": 15597 }, { "epoch": 0.6501604768454837, "grad_norm": 452.0, "learning_rate": 2.8805382906146094e-05, "loss": 17.5004, "step": 15598 }, { "epoch": 0.6502021591430119, "grad_norm": 144.0, "learning_rate": 2.8799269524474214e-05, "loss": 11.0003, "step": 15599 }, { "epoch": 0.6502438414405401, "grad_norm": 516.0, "learning_rate": 2.8793156529187537e-05, "loss": 18.3752, "step": 15600 }, { "epoch": 0.6502855237380685, "grad_norm": 206.0, "learning_rate": 2.878704392039746e-05, "loss": 10.6878, "step": 15601 }, { "epoch": 0.6503272060355967, "grad_norm": 288.0, "learning_rate": 2.8780931698215397e-05, "loss": 12.6914, "step": 15602 }, { "epoch": 0.6503688883331249, "grad_norm": 716.0, "learning_rate": 2.8774819862752733e-05, "loss": 21.1283, "step": 15603 }, { "epoch": 0.6504105706306531, "grad_norm": 536.0, "learning_rate": 2.876870841412086e-05, "loss": 17.5003, "step": 15604 }, { "epoch": 0.6504522529281814, "grad_norm": 748.0, "learning_rate": 2.8762597352431165e-05, "loss": 20.6285, "step": 15605 }, { "epoch": 0.6504939352257096, "grad_norm": 864.0, "learning_rate": 2.8756486677795012e-05, "loss": 20.8796, "step": 15606 }, { "epoch": 0.6505356175232379, "grad_norm": 336.0, "learning_rate": 2.8750376390323762e-05, "loss": 14.5033, "step": 15607 }, { "epoch": 0.6505772998207662, "grad_norm": 120.0, "learning_rate": 2.8744266490128824e-05, "loss": 9.2504, "step": 15608 }, { "epoch": 0.6506189821182944, "grad_norm": 226.0, "learning_rate": 2.8738156977321473e-05, "loss": 8.5004, "step": 15609 }, { "epoch": 0.6506606644158226, "grad_norm": 376.0, "learning_rate": 2.8732047852013146e-05, "loss": 14.5629, "step": 15610 }, { "epoch": 0.6507023467133508, "grad_norm": 318.0, "learning_rate": 2.872593911431509e-05, "loss": 13.8127, "step": 15611 }, { "epoch": 0.6507440290108791, "grad_norm": 336.0, "learning_rate": 2.8719830764338728e-05, "loss": 14.8127, "step": 15612 }, { "epoch": 0.6507857113084073, "grad_norm": 466.0, "learning_rate": 2.8713722802195298e-05, "loss": 16.7505, "step": 15613 }, { "epoch": 0.6508273936059356, "grad_norm": 436.0, "learning_rate": 2.8707615227996198e-05, "loss": 15.1905, "step": 15614 }, { "epoch": 0.6508690759034638, "grad_norm": 320.0, "learning_rate": 2.8701508041852655e-05, "loss": 13.6252, "step": 15615 }, { "epoch": 0.6509107582009921, "grad_norm": 334.0, "learning_rate": 2.8695401243876048e-05, "loss": 14.5005, "step": 15616 }, { "epoch": 0.6509524404985203, "grad_norm": 772.0, "learning_rate": 2.8689294834177637e-05, "loss": 22.3756, "step": 15617 }, { "epoch": 0.6509941227960485, "grad_norm": 336.0, "learning_rate": 2.868318881286872e-05, "loss": 14.6882, "step": 15618 }, { "epoch": 0.6510358050935767, "grad_norm": 636.0, "learning_rate": 2.8677083180060587e-05, "loss": 21.5003, "step": 15619 }, { "epoch": 0.651077487391105, "grad_norm": 468.0, "learning_rate": 2.8670977935864502e-05, "loss": 15.879, "step": 15620 }, { "epoch": 0.6511191696886333, "grad_norm": 964.0, "learning_rate": 2.8664873080391734e-05, "loss": 26.5003, "step": 15621 }, { "epoch": 0.6511608519861615, "grad_norm": 418.0, "learning_rate": 2.865876861375355e-05, "loss": 14.9377, "step": 15622 }, { "epoch": 0.6512025342836897, "grad_norm": 256.0, "learning_rate": 2.8652664536061202e-05, "loss": 12.6252, "step": 15623 }, { "epoch": 0.651244216581218, "grad_norm": 123.5, "learning_rate": 2.8646560847425942e-05, "loss": 5.9065, "step": 15624 }, { "epoch": 0.6512858988787462, "grad_norm": 356.0, "learning_rate": 2.8640457547959004e-05, "loss": 13.9378, "step": 15625 }, { "epoch": 0.6513275811762744, "grad_norm": 430.0, "learning_rate": 2.863435463777162e-05, "loss": 16.5007, "step": 15626 }, { "epoch": 0.6513692634738026, "grad_norm": 692.0, "learning_rate": 2.8628252116975023e-05, "loss": 23.2514, "step": 15627 }, { "epoch": 0.651410945771331, "grad_norm": 506.0, "learning_rate": 2.862214998568043e-05, "loss": 17.6261, "step": 15628 }, { "epoch": 0.6514526280688592, "grad_norm": 262.0, "learning_rate": 2.8616048243999048e-05, "loss": 12.3752, "step": 15629 }, { "epoch": 0.6514943103663874, "grad_norm": 520.0, "learning_rate": 2.8609946892042084e-05, "loss": 16.5004, "step": 15630 }, { "epoch": 0.6515359926639156, "grad_norm": 95.0, "learning_rate": 2.860384592992072e-05, "loss": 7.6255, "step": 15631 }, { "epoch": 0.6515776749614439, "grad_norm": 322.0, "learning_rate": 2.8597745357746197e-05, "loss": 11.1897, "step": 15632 }, { "epoch": 0.6516193572589721, "grad_norm": 338.0, "learning_rate": 2.8591645175629634e-05, "loss": 14.0643, "step": 15633 }, { "epoch": 0.6516610395565003, "grad_norm": 139.0, "learning_rate": 2.858554538368228e-05, "loss": 8.5001, "step": 15634 }, { "epoch": 0.6517027218540286, "grad_norm": 139.0, "learning_rate": 2.8579445982015206e-05, "loss": 9.8756, "step": 15635 }, { "epoch": 0.6517444041515569, "grad_norm": 253.0, "learning_rate": 2.8573346970739686e-05, "loss": 13.5002, "step": 15636 }, { "epoch": 0.6517860864490851, "grad_norm": 488.0, "learning_rate": 2.8567248349966768e-05, "loss": 15.7515, "step": 15637 }, { "epoch": 0.6518277687466133, "grad_norm": 352.0, "learning_rate": 2.85611501198077e-05, "loss": 14.2502, "step": 15638 }, { "epoch": 0.6518694510441415, "grad_norm": 416.0, "learning_rate": 2.8555052280373536e-05, "loss": 16.0002, "step": 15639 }, { "epoch": 0.6519111333416698, "grad_norm": 334.0, "learning_rate": 2.8548954831775483e-05, "loss": 14.0629, "step": 15640 }, { "epoch": 0.651952815639198, "grad_norm": 426.0, "learning_rate": 2.8542857774124593e-05, "loss": 16.2502, "step": 15641 }, { "epoch": 0.6519944979367263, "grad_norm": 1240.0, "learning_rate": 2.8536761107532047e-05, "loss": 28.5048, "step": 15642 }, { "epoch": 0.6520361802342545, "grad_norm": 482.0, "learning_rate": 2.8530664832108934e-05, "loss": 17.5036, "step": 15643 }, { "epoch": 0.6520778625317828, "grad_norm": 720.0, "learning_rate": 2.8524568947966356e-05, "loss": 21.5003, "step": 15644 }, { "epoch": 0.652119544829311, "grad_norm": 394.0, "learning_rate": 2.8518473455215422e-05, "loss": 15.5001, "step": 15645 }, { "epoch": 0.6521612271268392, "grad_norm": 163.0, "learning_rate": 2.8512378353967206e-05, "loss": 9.0002, "step": 15646 }, { "epoch": 0.6522029094243674, "grad_norm": 556.0, "learning_rate": 2.8506283644332808e-05, "loss": 18.2503, "step": 15647 }, { "epoch": 0.6522445917218958, "grad_norm": 804.0, "learning_rate": 2.8500189326423305e-05, "loss": 21.5005, "step": 15648 }, { "epoch": 0.652286274019424, "grad_norm": 316.0, "learning_rate": 2.849409540034975e-05, "loss": 12.6255, "step": 15649 }, { "epoch": 0.6523279563169522, "grad_norm": 254.0, "learning_rate": 2.8488001866223222e-05, "loss": 12.6254, "step": 15650 }, { "epoch": 0.6523696386144804, "grad_norm": 368.0, "learning_rate": 2.848190872415477e-05, "loss": 12.9378, "step": 15651 }, { "epoch": 0.6524113209120087, "grad_norm": 286.0, "learning_rate": 2.8475815974255443e-05, "loss": 13.8752, "step": 15652 }, { "epoch": 0.6524530032095369, "grad_norm": 156.0, "learning_rate": 2.8469723616636278e-05, "loss": 9.0627, "step": 15653 }, { "epoch": 0.6524946855070651, "grad_norm": 628.0, "learning_rate": 2.8463631651408307e-05, "loss": 20.1266, "step": 15654 }, { "epoch": 0.6525363678045933, "grad_norm": 348.0, "learning_rate": 2.8457540078682567e-05, "loss": 14.6251, "step": 15655 }, { "epoch": 0.6525780501021217, "grad_norm": 312.0, "learning_rate": 2.8451448898570065e-05, "loss": 14.5629, "step": 15656 }, { "epoch": 0.6526197323996499, "grad_norm": 146.0, "learning_rate": 2.8445358111181808e-05, "loss": 8.6878, "step": 15657 }, { "epoch": 0.6526614146971781, "grad_norm": 308.0, "learning_rate": 2.8439267716628847e-05, "loss": 13.3752, "step": 15658 }, { "epoch": 0.6527030969947063, "grad_norm": 231.0, "learning_rate": 2.8433177715022108e-05, "loss": 12.6256, "step": 15659 }, { "epoch": 0.6527447792922346, "grad_norm": 354.0, "learning_rate": 2.8427088106472666e-05, "loss": 13.0003, "step": 15660 }, { "epoch": 0.6527864615897628, "grad_norm": 386.0, "learning_rate": 2.842099889109141e-05, "loss": 12.8153, "step": 15661 }, { "epoch": 0.652828143887291, "grad_norm": 668.0, "learning_rate": 2.8414910068989402e-05, "loss": 22.6252, "step": 15662 }, { "epoch": 0.6528698261848193, "grad_norm": 306.0, "learning_rate": 2.840882164027754e-05, "loss": 13.6878, "step": 15663 }, { "epoch": 0.6529115084823476, "grad_norm": 300.0, "learning_rate": 2.840273360506686e-05, "loss": 13.0004, "step": 15664 }, { "epoch": 0.6529531907798758, "grad_norm": 390.0, "learning_rate": 2.839664596346824e-05, "loss": 14.9376, "step": 15665 }, { "epoch": 0.652994873077404, "grad_norm": 204.0, "learning_rate": 2.8390558715592676e-05, "loss": 11.5005, "step": 15666 }, { "epoch": 0.6530365553749322, "grad_norm": 452.0, "learning_rate": 2.838447186155111e-05, "loss": 15.313, "step": 15667 }, { "epoch": 0.6530782376724605, "grad_norm": 190.0, "learning_rate": 2.8378385401454455e-05, "loss": 10.8757, "step": 15668 }, { "epoch": 0.6531199199699887, "grad_norm": 1120.0, "learning_rate": 2.8372299335413643e-05, "loss": 25.0005, "step": 15669 }, { "epoch": 0.653161602267517, "grad_norm": 1104.0, "learning_rate": 2.83662136635396e-05, "loss": 27.5048, "step": 15670 }, { "epoch": 0.6532032845650452, "grad_norm": 664.0, "learning_rate": 2.836012838594323e-05, "loss": 20.0004, "step": 15671 }, { "epoch": 0.6532449668625735, "grad_norm": 1464.0, "learning_rate": 2.835404350273545e-05, "loss": 29.5057, "step": 15672 }, { "epoch": 0.6532866491601017, "grad_norm": 166.0, "learning_rate": 2.8347959014027138e-05, "loss": 10.7524, "step": 15673 }, { "epoch": 0.6533283314576299, "grad_norm": 272.0, "learning_rate": 2.8341874919929202e-05, "loss": 13.3753, "step": 15674 }, { "epoch": 0.6533700137551581, "grad_norm": 322.0, "learning_rate": 2.8335791220552515e-05, "loss": 11.5645, "step": 15675 }, { "epoch": 0.6534116960526865, "grad_norm": 1728.0, "learning_rate": 2.8329707916007958e-05, "loss": 35.2502, "step": 15676 }, { "epoch": 0.6534533783502147, "grad_norm": 142.0, "learning_rate": 2.8323625006406394e-05, "loss": 9.5627, "step": 15677 }, { "epoch": 0.6534950606477429, "grad_norm": 1488.0, "learning_rate": 2.8317542491858696e-05, "loss": 32.0002, "step": 15678 }, { "epoch": 0.6535367429452712, "grad_norm": 60.0, "learning_rate": 2.8311460372475707e-05, "loss": 7.6571, "step": 15679 }, { "epoch": 0.6535784252427994, "grad_norm": 143.0, "learning_rate": 2.8305378648368276e-05, "loss": 9.0001, "step": 15680 }, { "epoch": 0.6536201075403276, "grad_norm": 434.0, "learning_rate": 2.829929731964723e-05, "loss": 16.1253, "step": 15681 }, { "epoch": 0.6536617898378558, "grad_norm": 200.0, "learning_rate": 2.829321638642345e-05, "loss": 10.0006, "step": 15682 }, { "epoch": 0.6537034721353842, "grad_norm": 374.0, "learning_rate": 2.8287135848807693e-05, "loss": 14.0626, "step": 15683 }, { "epoch": 0.6537451544329124, "grad_norm": 134.0, "learning_rate": 2.828105570691085e-05, "loss": 7.0318, "step": 15684 }, { "epoch": 0.6537868367304406, "grad_norm": 107.5, "learning_rate": 2.8274975960843663e-05, "loss": 8.5627, "step": 15685 }, { "epoch": 0.6538285190279688, "grad_norm": 262.0, "learning_rate": 2.8268896610716998e-05, "loss": 13.9378, "step": 15686 }, { "epoch": 0.6538702013254971, "grad_norm": 498.0, "learning_rate": 2.8262817656641584e-05, "loss": 17.6253, "step": 15687 }, { "epoch": 0.6539118836230253, "grad_norm": 828.0, "learning_rate": 2.825673909872829e-05, "loss": 19.2538, "step": 15688 }, { "epoch": 0.6539535659205535, "grad_norm": 462.0, "learning_rate": 2.8250660937087814e-05, "loss": 17.7502, "step": 15689 }, { "epoch": 0.6539952482180817, "grad_norm": 81.0, "learning_rate": 2.8244583171831012e-05, "loss": 5.5626, "step": 15690 }, { "epoch": 0.6540369305156101, "grad_norm": 776.0, "learning_rate": 2.8238505803068577e-05, "loss": 19.3797, "step": 15691 }, { "epoch": 0.6540786128131383, "grad_norm": 308.0, "learning_rate": 2.8232428830911323e-05, "loss": 11.8751, "step": 15692 }, { "epoch": 0.6541202951106665, "grad_norm": 226.0, "learning_rate": 2.8226352255469985e-05, "loss": 12.1252, "step": 15693 }, { "epoch": 0.6541619774081947, "grad_norm": 600.0, "learning_rate": 2.8220276076855313e-05, "loss": 18.3753, "step": 15694 }, { "epoch": 0.654203659705723, "grad_norm": 1272.0, "learning_rate": 2.8214200295178038e-05, "loss": 28.627, "step": 15695 }, { "epoch": 0.6542453420032512, "grad_norm": 544.0, "learning_rate": 2.82081249105489e-05, "loss": 19.0004, "step": 15696 }, { "epoch": 0.6542870243007795, "grad_norm": 300.0, "learning_rate": 2.8202049923078623e-05, "loss": 13.0027, "step": 15697 }, { "epoch": 0.6543287065983077, "grad_norm": 77.5, "learning_rate": 2.8195975332877915e-05, "loss": 6.3751, "step": 15698 }, { "epoch": 0.654370388895836, "grad_norm": 336.0, "learning_rate": 2.8189901140057496e-05, "loss": 15.1878, "step": 15699 }, { "epoch": 0.6544120711933642, "grad_norm": 204.0, "learning_rate": 2.8183827344728064e-05, "loss": 10.8129, "step": 15700 }, { "epoch": 0.6544537534908924, "grad_norm": 372.0, "learning_rate": 2.817775394700032e-05, "loss": 14.9378, "step": 15701 }, { "epoch": 0.6544954357884206, "grad_norm": 458.0, "learning_rate": 2.8171680946984934e-05, "loss": 15.3752, "step": 15702 }, { "epoch": 0.6545371180859489, "grad_norm": 228.0, "learning_rate": 2.8165608344792617e-05, "loss": 12.9379, "step": 15703 }, { "epoch": 0.6545788003834772, "grad_norm": 488.0, "learning_rate": 2.8159536140534017e-05, "loss": 15.128, "step": 15704 }, { "epoch": 0.6546204826810054, "grad_norm": 512.0, "learning_rate": 2.8153464334319814e-05, "loss": 17.6253, "step": 15705 }, { "epoch": 0.6546621649785336, "grad_norm": 448.0, "learning_rate": 2.814739292626065e-05, "loss": 16.5004, "step": 15706 }, { "epoch": 0.6547038472760619, "grad_norm": 924.0, "learning_rate": 2.814132191646719e-05, "loss": 19.5052, "step": 15707 }, { "epoch": 0.6547455295735901, "grad_norm": 724.0, "learning_rate": 2.8135251305050104e-05, "loss": 21.7504, "step": 15708 }, { "epoch": 0.6547872118711183, "grad_norm": 460.0, "learning_rate": 2.8129181092119972e-05, "loss": 16.1252, "step": 15709 }, { "epoch": 0.6548288941686465, "grad_norm": 432.0, "learning_rate": 2.812311127778749e-05, "loss": 16.2531, "step": 15710 }, { "epoch": 0.6548705764661749, "grad_norm": 138.0, "learning_rate": 2.8117041862163206e-05, "loss": 9.3757, "step": 15711 }, { "epoch": 0.6549122587637031, "grad_norm": 114.5, "learning_rate": 2.811097284535782e-05, "loss": 8.8753, "step": 15712 }, { "epoch": 0.6549539410612313, "grad_norm": 540.0, "learning_rate": 2.810490422748186e-05, "loss": 18.7504, "step": 15713 }, { "epoch": 0.6549956233587595, "grad_norm": 141.0, "learning_rate": 2.8098836008646002e-05, "loss": 10.5002, "step": 15714 }, { "epoch": 0.6550373056562878, "grad_norm": 458.0, "learning_rate": 2.809276818896076e-05, "loss": 16.3764, "step": 15715 }, { "epoch": 0.655078987953816, "grad_norm": 406.0, "learning_rate": 2.8086700768536788e-05, "loss": 14.3127, "step": 15716 }, { "epoch": 0.6551206702513442, "grad_norm": 604.0, "learning_rate": 2.808063374748463e-05, "loss": 19.8752, "step": 15717 }, { "epoch": 0.6551623525488725, "grad_norm": 664.0, "learning_rate": 2.8074567125914873e-05, "loss": 19.7508, "step": 15718 }, { "epoch": 0.6552040348464008, "grad_norm": 540.0, "learning_rate": 2.8068500903938078e-05, "loss": 17.5003, "step": 15719 }, { "epoch": 0.655245717143929, "grad_norm": 113.0, "learning_rate": 2.80624350816648e-05, "loss": 9.5004, "step": 15720 }, { "epoch": 0.6552873994414572, "grad_norm": 780.0, "learning_rate": 2.8056369659205593e-05, "loss": 22.5009, "step": 15721 }, { "epoch": 0.6553290817389854, "grad_norm": 154.0, "learning_rate": 2.8050304636670992e-05, "loss": 9.1252, "step": 15722 }, { "epoch": 0.6553707640365137, "grad_norm": 126.5, "learning_rate": 2.804424001417154e-05, "loss": 9.5629, "step": 15723 }, { "epoch": 0.6554124463340419, "grad_norm": 205.0, "learning_rate": 2.8038175791817767e-05, "loss": 11.627, "step": 15724 }, { "epoch": 0.6554541286315702, "grad_norm": 104.5, "learning_rate": 2.8032111969720183e-05, "loss": 8.5633, "step": 15725 }, { "epoch": 0.6554958109290984, "grad_norm": 584.0, "learning_rate": 2.8026048547989307e-05, "loss": 16.1254, "step": 15726 }, { "epoch": 0.6555374932266267, "grad_norm": 107.5, "learning_rate": 2.8019985526735653e-05, "loss": 9.3129, "step": 15727 }, { "epoch": 0.6555791755241549, "grad_norm": 374.0, "learning_rate": 2.801392290606971e-05, "loss": 14.5007, "step": 15728 }, { "epoch": 0.6556208578216831, "grad_norm": 258.0, "learning_rate": 2.8007860686101973e-05, "loss": 11.4406, "step": 15729 }, { "epoch": 0.6556625401192113, "grad_norm": 154.0, "learning_rate": 2.800179886694293e-05, "loss": 10.5008, "step": 15730 }, { "epoch": 0.6557042224167396, "grad_norm": 232.0, "learning_rate": 2.7995737448703057e-05, "loss": 13.0629, "step": 15731 }, { "epoch": 0.6557459047142679, "grad_norm": 266.0, "learning_rate": 2.7989676431492822e-05, "loss": 12.192, "step": 15732 }, { "epoch": 0.6557875870117961, "grad_norm": 292.0, "learning_rate": 2.798361581542267e-05, "loss": 13.8127, "step": 15733 }, { "epoch": 0.6558292693093243, "grad_norm": 362.0, "learning_rate": 2.7977555600603106e-05, "loss": 15.0627, "step": 15734 }, { "epoch": 0.6558709516068526, "grad_norm": 532.0, "learning_rate": 2.7971495787144507e-05, "loss": 18.0033, "step": 15735 }, { "epoch": 0.6559126339043808, "grad_norm": 294.0, "learning_rate": 2.79654363751574e-05, "loss": 13.6877, "step": 15736 }, { "epoch": 0.655954316201909, "grad_norm": 304.0, "learning_rate": 2.7959377364752125e-05, "loss": 14.2506, "step": 15737 }, { "epoch": 0.6559959984994372, "grad_norm": 135.0, "learning_rate": 2.7953318756039204e-05, "loss": 9.5003, "step": 15738 }, { "epoch": 0.6560376807969656, "grad_norm": 322.0, "learning_rate": 2.7947260549128956e-05, "loss": 12.2502, "step": 15739 }, { "epoch": 0.6560793630944938, "grad_norm": 123.0, "learning_rate": 2.7941202744131883e-05, "loss": 10.3755, "step": 15740 }, { "epoch": 0.656121045392022, "grad_norm": 260.0, "learning_rate": 2.7935145341158308e-05, "loss": 10.9379, "step": 15741 }, { "epoch": 0.6561627276895502, "grad_norm": 151.0, "learning_rate": 2.7929088340318692e-05, "loss": 10.5629, "step": 15742 }, { "epoch": 0.6562044099870785, "grad_norm": 220.0, "learning_rate": 2.79230317417234e-05, "loss": 6.0006, "step": 15743 }, { "epoch": 0.6562460922846067, "grad_norm": 227.0, "learning_rate": 2.7916975545482817e-05, "loss": 12.6877, "step": 15744 }, { "epoch": 0.6562877745821349, "grad_norm": 684.0, "learning_rate": 2.791091975170731e-05, "loss": 22.0004, "step": 15745 }, { "epoch": 0.6563294568796632, "grad_norm": 482.0, "learning_rate": 2.790486436050725e-05, "loss": 16.3754, "step": 15746 }, { "epoch": 0.6563711391771915, "grad_norm": 700.0, "learning_rate": 2.7898809371992997e-05, "loss": 18.8756, "step": 15747 }, { "epoch": 0.6564128214747197, "grad_norm": 206.0, "learning_rate": 2.789275478627491e-05, "loss": 11.3128, "step": 15748 }, { "epoch": 0.6564545037722479, "grad_norm": 560.0, "learning_rate": 2.788670060346333e-05, "loss": 18.2515, "step": 15749 }, { "epoch": 0.6564961860697761, "grad_norm": 376.0, "learning_rate": 2.7880646823668588e-05, "loss": 15.7504, "step": 15750 }, { "epoch": 0.6565378683673044, "grad_norm": 278.0, "learning_rate": 2.7874593447001028e-05, "loss": 10.4378, "step": 15751 }, { "epoch": 0.6565795506648326, "grad_norm": 140.0, "learning_rate": 2.786854047357097e-05, "loss": 7.6252, "step": 15752 }, { "epoch": 0.6566212329623609, "grad_norm": 352.0, "learning_rate": 2.786248790348872e-05, "loss": 14.8752, "step": 15753 }, { "epoch": 0.6566629152598892, "grad_norm": 564.0, "learning_rate": 2.7856435736864595e-05, "loss": 18.6252, "step": 15754 }, { "epoch": 0.6567045975574174, "grad_norm": 70.5, "learning_rate": 2.785038397380889e-05, "loss": 9.0006, "step": 15755 }, { "epoch": 0.6567462798549456, "grad_norm": 200.0, "learning_rate": 2.784433261443191e-05, "loss": 12.2503, "step": 15756 }, { "epoch": 0.6567879621524738, "grad_norm": 127.5, "learning_rate": 2.7838281658843913e-05, "loss": 9.5005, "step": 15757 }, { "epoch": 0.6568296444500021, "grad_norm": 544.0, "learning_rate": 2.7832231107155237e-05, "loss": 17.6252, "step": 15758 }, { "epoch": 0.6568713267475303, "grad_norm": 486.0, "learning_rate": 2.782618095947608e-05, "loss": 17.0002, "step": 15759 }, { "epoch": 0.6569130090450586, "grad_norm": 192.0, "learning_rate": 2.782013121591678e-05, "loss": 11.814, "step": 15760 }, { "epoch": 0.6569546913425868, "grad_norm": 688.0, "learning_rate": 2.7814081876587523e-05, "loss": 22.2516, "step": 15761 }, { "epoch": 0.6569963736401151, "grad_norm": 226.0, "learning_rate": 2.780803294159863e-05, "loss": 12.6253, "step": 15762 }, { "epoch": 0.6570380559376433, "grad_norm": 227.0, "learning_rate": 2.7801984411060267e-05, "loss": 11.0003, "step": 15763 }, { "epoch": 0.6570797382351715, "grad_norm": 376.0, "learning_rate": 2.7795936285082746e-05, "loss": 12.5628, "step": 15764 }, { "epoch": 0.6571214205326997, "grad_norm": 153.0, "learning_rate": 2.7789888563776222e-05, "loss": 10.1877, "step": 15765 }, { "epoch": 0.657163102830228, "grad_norm": 920.0, "learning_rate": 2.7783841247250986e-05, "loss": 26.2503, "step": 15766 }, { "epoch": 0.6572047851277563, "grad_norm": 340.0, "learning_rate": 2.777779433561718e-05, "loss": 14.3754, "step": 15767 }, { "epoch": 0.6572464674252845, "grad_norm": 67.5, "learning_rate": 2.7771747828985063e-05, "loss": 7.0636, "step": 15768 }, { "epoch": 0.6572881497228127, "grad_norm": 502.0, "learning_rate": 2.7765701727464814e-05, "loss": 17.6252, "step": 15769 }, { "epoch": 0.657329832020341, "grad_norm": 466.0, "learning_rate": 2.7759656031166626e-05, "loss": 17.1253, "step": 15770 }, { "epoch": 0.6573715143178692, "grad_norm": 211.0, "learning_rate": 2.7753610740200685e-05, "loss": 11.8756, "step": 15771 }, { "epoch": 0.6574131966153974, "grad_norm": 226.0, "learning_rate": 2.774756585467716e-05, "loss": 10.3127, "step": 15772 }, { "epoch": 0.6574548789129256, "grad_norm": 250.0, "learning_rate": 2.774152137470622e-05, "loss": 13.8133, "step": 15773 }, { "epoch": 0.657496561210454, "grad_norm": 502.0, "learning_rate": 2.773547730039803e-05, "loss": 17.3772, "step": 15774 }, { "epoch": 0.6575382435079822, "grad_norm": 233.0, "learning_rate": 2.7729433631862746e-05, "loss": 10.5628, "step": 15775 }, { "epoch": 0.6575799258055104, "grad_norm": 306.0, "learning_rate": 2.7723390369210507e-05, "loss": 12.6877, "step": 15776 }, { "epoch": 0.6576216081030386, "grad_norm": 456.0, "learning_rate": 2.7717347512551463e-05, "loss": 14.8755, "step": 15777 }, { "epoch": 0.6576632904005669, "grad_norm": 828.0, "learning_rate": 2.7711305061995728e-05, "loss": 18.3793, "step": 15778 }, { "epoch": 0.6577049726980951, "grad_norm": 136.0, "learning_rate": 2.7705263017653443e-05, "loss": 10.688, "step": 15779 }, { "epoch": 0.6577466549956233, "grad_norm": 664.0, "learning_rate": 2.769922137963472e-05, "loss": 18.8767, "step": 15780 }, { "epoch": 0.6577883372931516, "grad_norm": 424.0, "learning_rate": 2.7693180148049663e-05, "loss": 16.5006, "step": 15781 }, { "epoch": 0.6578300195906799, "grad_norm": 156.0, "learning_rate": 2.7687139323008382e-05, "loss": 11.1269, "step": 15782 }, { "epoch": 0.6578717018882081, "grad_norm": 356.0, "learning_rate": 2.7681098904620938e-05, "loss": 14.3128, "step": 15783 }, { "epoch": 0.6579133841857363, "grad_norm": 125.0, "learning_rate": 2.7675058892997496e-05, "loss": 9.6253, "step": 15784 }, { "epoch": 0.6579550664832645, "grad_norm": 276.0, "learning_rate": 2.7669019288248054e-05, "loss": 12.6257, "step": 15785 }, { "epoch": 0.6579967487807928, "grad_norm": 260.0, "learning_rate": 2.7662980090482747e-05, "loss": 10.9377, "step": 15786 }, { "epoch": 0.658038431078321, "grad_norm": 366.0, "learning_rate": 2.765694129981158e-05, "loss": 14.3759, "step": 15787 }, { "epoch": 0.6580801133758493, "grad_norm": 108.5, "learning_rate": 2.7650902916344666e-05, "loss": 9.5629, "step": 15788 }, { "epoch": 0.6581217956733775, "grad_norm": 494.0, "learning_rate": 2.7644864940192004e-05, "loss": 17.8752, "step": 15789 }, { "epoch": 0.6581634779709058, "grad_norm": 660.0, "learning_rate": 2.7638827371463698e-05, "loss": 16.5038, "step": 15790 }, { "epoch": 0.658205160268434, "grad_norm": 258.0, "learning_rate": 2.763279021026971e-05, "loss": 11.6253, "step": 15791 }, { "epoch": 0.6582468425659622, "grad_norm": 322.0, "learning_rate": 2.7626753456720127e-05, "loss": 13.9386, "step": 15792 }, { "epoch": 0.6582885248634904, "grad_norm": 360.0, "learning_rate": 2.7620717110924942e-05, "loss": 15.1251, "step": 15793 }, { "epoch": 0.6583302071610188, "grad_norm": 452.0, "learning_rate": 2.7614681172994184e-05, "loss": 16.1278, "step": 15794 }, { "epoch": 0.658371889458547, "grad_norm": 1200.0, "learning_rate": 2.7608645643037844e-05, "loss": 29.5002, "step": 15795 }, { "epoch": 0.6584135717560752, "grad_norm": 880.0, "learning_rate": 2.7602610521165928e-05, "loss": 20.8817, "step": 15796 }, { "epoch": 0.6584552540536034, "grad_norm": 189.0, "learning_rate": 2.759657580748842e-05, "loss": 8.1254, "step": 15797 }, { "epoch": 0.6584969363511317, "grad_norm": 262.0, "learning_rate": 2.759054150211531e-05, "loss": 12.0628, "step": 15798 }, { "epoch": 0.6585386186486599, "grad_norm": 404.0, "learning_rate": 2.7584507605156562e-05, "loss": 15.5003, "step": 15799 }, { "epoch": 0.6585803009461881, "grad_norm": 328.0, "learning_rate": 2.757847411672216e-05, "loss": 12.6883, "step": 15800 }, { "epoch": 0.6586219832437163, "grad_norm": 448.0, "learning_rate": 2.7572441036922054e-05, "loss": 16.7501, "step": 15801 }, { "epoch": 0.6586636655412447, "grad_norm": 338.0, "learning_rate": 2.7566408365866204e-05, "loss": 14.3764, "step": 15802 }, { "epoch": 0.6587053478387729, "grad_norm": 288.0, "learning_rate": 2.7560376103664553e-05, "loss": 13.3757, "step": 15803 }, { "epoch": 0.6587470301363011, "grad_norm": 572.0, "learning_rate": 2.7554344250427034e-05, "loss": 21.1255, "step": 15804 }, { "epoch": 0.6587887124338293, "grad_norm": 272.0, "learning_rate": 2.7548312806263586e-05, "loss": 12.3761, "step": 15805 }, { "epoch": 0.6588303947313576, "grad_norm": 804.0, "learning_rate": 2.754228177128414e-05, "loss": 23.6252, "step": 15806 }, { "epoch": 0.6588720770288858, "grad_norm": 176.0, "learning_rate": 2.753625114559857e-05, "loss": 7.0009, "step": 15807 }, { "epoch": 0.658913759326414, "grad_norm": 201.0, "learning_rate": 2.7530220929316863e-05, "loss": 11.6254, "step": 15808 }, { "epoch": 0.6589554416239423, "grad_norm": 374.0, "learning_rate": 2.7524191122548837e-05, "loss": 15.4377, "step": 15809 }, { "epoch": 0.6589971239214706, "grad_norm": 201.0, "learning_rate": 2.7518161725404458e-05, "loss": 11.1877, "step": 15810 }, { "epoch": 0.6590388062189988, "grad_norm": 488.0, "learning_rate": 2.7512132737993533e-05, "loss": 18.0002, "step": 15811 }, { "epoch": 0.659080488516527, "grad_norm": 394.0, "learning_rate": 2.7506104160426033e-05, "loss": 15.2505, "step": 15812 }, { "epoch": 0.6591221708140552, "grad_norm": 600.0, "learning_rate": 2.7500075992811735e-05, "loss": 18.1256, "step": 15813 }, { "epoch": 0.6591638531115835, "grad_norm": 230.0, "learning_rate": 2.7494048235260595e-05, "loss": 12.0002, "step": 15814 }, { "epoch": 0.6592055354091118, "grad_norm": 560.0, "learning_rate": 2.748802088788238e-05, "loss": 18.1253, "step": 15815 }, { "epoch": 0.65924721770664, "grad_norm": 600.0, "learning_rate": 2.7481993950787032e-05, "loss": 17.8771, "step": 15816 }, { "epoch": 0.6592889000041682, "grad_norm": 368.0, "learning_rate": 2.7475967424084293e-05, "loss": 14.8752, "step": 15817 }, { "epoch": 0.6593305823016965, "grad_norm": 241.0, "learning_rate": 2.7469941307884074e-05, "loss": 11.7504, "step": 15818 }, { "epoch": 0.6593722645992247, "grad_norm": 209.0, "learning_rate": 2.746391560229617e-05, "loss": 5.6252, "step": 15819 }, { "epoch": 0.6594139468967529, "grad_norm": 110.0, "learning_rate": 2.74578903074304e-05, "loss": 10.1257, "step": 15820 }, { "epoch": 0.6594556291942811, "grad_norm": 278.0, "learning_rate": 2.7451865423396593e-05, "loss": 13.6257, "step": 15821 }, { "epoch": 0.6594973114918095, "grad_norm": 808.0, "learning_rate": 2.744584095030453e-05, "loss": 21.7502, "step": 15822 }, { "epoch": 0.6595389937893377, "grad_norm": 268.0, "learning_rate": 2.7439816888264026e-05, "loss": 12.3128, "step": 15823 }, { "epoch": 0.6595806760868659, "grad_norm": 740.0, "learning_rate": 2.7433793237384852e-05, "loss": 24.7505, "step": 15824 }, { "epoch": 0.6596223583843942, "grad_norm": 348.0, "learning_rate": 2.742776999777681e-05, "loss": 13.9377, "step": 15825 }, { "epoch": 0.6596640406819224, "grad_norm": 181.0, "learning_rate": 2.7421747169549654e-05, "loss": 10.8753, "step": 15826 }, { "epoch": 0.6597057229794506, "grad_norm": 592.0, "learning_rate": 2.7415724752813164e-05, "loss": 16.8763, "step": 15827 }, { "epoch": 0.6597474052769788, "grad_norm": 520.0, "learning_rate": 2.7409702747677096e-05, "loss": 18.7503, "step": 15828 }, { "epoch": 0.6597890875745072, "grad_norm": 226.0, "learning_rate": 2.7403681154251203e-05, "loss": 12.0004, "step": 15829 }, { "epoch": 0.6598307698720354, "grad_norm": 524.0, "learning_rate": 2.7397659972645224e-05, "loss": 17.6252, "step": 15830 }, { "epoch": 0.6598724521695636, "grad_norm": 364.0, "learning_rate": 2.7391639202968898e-05, "loss": 15.0626, "step": 15831 }, { "epoch": 0.6599141344670918, "grad_norm": 190.0, "learning_rate": 2.7385618845331956e-05, "loss": 9.6253, "step": 15832 }, { "epoch": 0.6599558167646201, "grad_norm": 600.0, "learning_rate": 2.7379598899844095e-05, "loss": 20.2519, "step": 15833 }, { "epoch": 0.6599974990621483, "grad_norm": 292.0, "learning_rate": 2.7373579366615098e-05, "loss": 13.1885, "step": 15834 }, { "epoch": 0.6600391813596765, "grad_norm": 504.0, "learning_rate": 2.736756024575458e-05, "loss": 16.3754, "step": 15835 }, { "epoch": 0.6600808636572048, "grad_norm": 90.0, "learning_rate": 2.7361541537372326e-05, "loss": 8.5628, "step": 15836 }, { "epoch": 0.6601225459547331, "grad_norm": 88.5, "learning_rate": 2.7355523241577953e-05, "loss": 8.8754, "step": 15837 }, { "epoch": 0.6601642282522613, "grad_norm": 560.0, "learning_rate": 2.7349505358481213e-05, "loss": 18.5002, "step": 15838 }, { "epoch": 0.6602059105497895, "grad_norm": 66.0, "learning_rate": 2.734348788819171e-05, "loss": 9.1252, "step": 15839 }, { "epoch": 0.6602475928473177, "grad_norm": 462.0, "learning_rate": 2.7337470830819197e-05, "loss": 17.3754, "step": 15840 }, { "epoch": 0.660289275144846, "grad_norm": 672.0, "learning_rate": 2.7331454186473245e-05, "loss": 21.3753, "step": 15841 }, { "epoch": 0.6603309574423742, "grad_norm": 183.0, "learning_rate": 2.7325437955263574e-05, "loss": 10.8142, "step": 15842 }, { "epoch": 0.6603726397399025, "grad_norm": 159.0, "learning_rate": 2.731942213729981e-05, "loss": 10.2502, "step": 15843 }, { "epoch": 0.6604143220374307, "grad_norm": 536.0, "learning_rate": 2.731340673269158e-05, "loss": 18.2503, "step": 15844 }, { "epoch": 0.660456004334959, "grad_norm": 58.25, "learning_rate": 2.7307391741548537e-05, "loss": 8.2505, "step": 15845 }, { "epoch": 0.6604976866324872, "grad_norm": 366.0, "learning_rate": 2.7301377163980296e-05, "loss": 14.6259, "step": 15846 }, { "epoch": 0.6605393689300154, "grad_norm": 1280.0, "learning_rate": 2.729536300009647e-05, "loss": 26.6311, "step": 15847 }, { "epoch": 0.6605810512275436, "grad_norm": 206.0, "learning_rate": 2.7289349250006667e-05, "loss": 11.6254, "step": 15848 }, { "epoch": 0.660622733525072, "grad_norm": 416.0, "learning_rate": 2.7283335913820485e-05, "loss": 15.876, "step": 15849 }, { "epoch": 0.6606644158226002, "grad_norm": 59.5, "learning_rate": 2.727732299164753e-05, "loss": 7.7816, "step": 15850 }, { "epoch": 0.6607060981201284, "grad_norm": 296.0, "learning_rate": 2.727131048359738e-05, "loss": 10.8752, "step": 15851 }, { "epoch": 0.6607477804176566, "grad_norm": 60.0, "learning_rate": 2.7265298389779608e-05, "loss": 8.0628, "step": 15852 }, { "epoch": 0.6607894627151849, "grad_norm": 480.0, "learning_rate": 2.7259286710303798e-05, "loss": 17.0006, "step": 15853 }, { "epoch": 0.6608311450127131, "grad_norm": 892.0, "learning_rate": 2.72532754452795e-05, "loss": 24.2535, "step": 15854 }, { "epoch": 0.6608728273102413, "grad_norm": 177.0, "learning_rate": 2.724726459481628e-05, "loss": 10.813, "step": 15855 }, { "epoch": 0.6609145096077695, "grad_norm": 114.0, "learning_rate": 2.7241254159023684e-05, "loss": 10.0004, "step": 15856 }, { "epoch": 0.6609561919052979, "grad_norm": 392.0, "learning_rate": 2.7235244138011252e-05, "loss": 14.8754, "step": 15857 }, { "epoch": 0.6609978742028261, "grad_norm": 79.0, "learning_rate": 2.7229234531888515e-05, "loss": 6.9702, "step": 15858 }, { "epoch": 0.6610395565003543, "grad_norm": 258.0, "learning_rate": 2.722322534076498e-05, "loss": 12.2502, "step": 15859 }, { "epoch": 0.6610812387978825, "grad_norm": 454.0, "learning_rate": 2.721721656475022e-05, "loss": 16.2503, "step": 15860 }, { "epoch": 0.6611229210954108, "grad_norm": 215.0, "learning_rate": 2.7211208203953675e-05, "loss": 11.4379, "step": 15861 }, { "epoch": 0.661164603392939, "grad_norm": 684.0, "learning_rate": 2.720520025848492e-05, "loss": 23.2504, "step": 15862 }, { "epoch": 0.6612062856904672, "grad_norm": 380.0, "learning_rate": 2.7199192728453383e-05, "loss": 11.8752, "step": 15863 }, { "epoch": 0.6612479679879955, "grad_norm": 398.0, "learning_rate": 2.7193185613968615e-05, "loss": 14.063, "step": 15864 }, { "epoch": 0.6612896502855238, "grad_norm": 430.0, "learning_rate": 2.718717891514002e-05, "loss": 15.813, "step": 15865 }, { "epoch": 0.661331332583052, "grad_norm": 298.0, "learning_rate": 2.7181172632077168e-05, "loss": 14.5005, "step": 15866 }, { "epoch": 0.6613730148805802, "grad_norm": 318.0, "learning_rate": 2.717516676488942e-05, "loss": 13.2502, "step": 15867 }, { "epoch": 0.6614146971781084, "grad_norm": 378.0, "learning_rate": 2.716916131368631e-05, "loss": 13.5007, "step": 15868 }, { "epoch": 0.6614563794756367, "grad_norm": 217.0, "learning_rate": 2.716315627857725e-05, "loss": 11.8129, "step": 15869 }, { "epoch": 0.661498061773165, "grad_norm": 412.0, "learning_rate": 2.715715165967171e-05, "loss": 14.5628, "step": 15870 }, { "epoch": 0.6615397440706932, "grad_norm": 506.0, "learning_rate": 2.7151147457079097e-05, "loss": 17.2502, "step": 15871 }, { "epoch": 0.6615814263682214, "grad_norm": 310.0, "learning_rate": 2.7145143670908858e-05, "loss": 13.8758, "step": 15872 }, { "epoch": 0.6616231086657497, "grad_norm": 342.0, "learning_rate": 2.7139140301270394e-05, "loss": 14.7503, "step": 15873 }, { "epoch": 0.6616647909632779, "grad_norm": 314.0, "learning_rate": 2.7133137348273142e-05, "loss": 13.5007, "step": 15874 }, { "epoch": 0.6617064732608061, "grad_norm": 592.0, "learning_rate": 2.7127134812026478e-05, "loss": 19.6255, "step": 15875 }, { "epoch": 0.6617481555583343, "grad_norm": 496.0, "learning_rate": 2.7121132692639818e-05, "loss": 18.2503, "step": 15876 }, { "epoch": 0.6617898378558627, "grad_norm": 76.0, "learning_rate": 2.711513099022255e-05, "loss": 9.3768, "step": 15877 }, { "epoch": 0.6618315201533909, "grad_norm": 208.0, "learning_rate": 2.7109129704884044e-05, "loss": 10.9377, "step": 15878 }, { "epoch": 0.6618732024509191, "grad_norm": 460.0, "learning_rate": 2.7103128836733682e-05, "loss": 16.0002, "step": 15879 }, { "epoch": 0.6619148847484473, "grad_norm": 229.0, "learning_rate": 2.7097128385880833e-05, "loss": 11.9398, "step": 15880 }, { "epoch": 0.6619565670459756, "grad_norm": 233.0, "learning_rate": 2.7091128352434857e-05, "loss": 11.1252, "step": 15881 }, { "epoch": 0.6619982493435038, "grad_norm": 171.0, "learning_rate": 2.7085128736505094e-05, "loss": 10.1257, "step": 15882 }, { "epoch": 0.662039931641032, "grad_norm": 171.0, "learning_rate": 2.7079129538200875e-05, "loss": 11.0002, "step": 15883 }, { "epoch": 0.6620816139385602, "grad_norm": 776.0, "learning_rate": 2.7073130757631593e-05, "loss": 21.7502, "step": 15884 }, { "epoch": 0.6621232962360886, "grad_norm": 284.0, "learning_rate": 2.7067132394906504e-05, "loss": 13.7508, "step": 15885 }, { "epoch": 0.6621649785336168, "grad_norm": 390.0, "learning_rate": 2.7061134450134996e-05, "loss": 15.8754, "step": 15886 }, { "epoch": 0.662206660831145, "grad_norm": 464.0, "learning_rate": 2.705513692342631e-05, "loss": 15.1254, "step": 15887 }, { "epoch": 0.6622483431286732, "grad_norm": 182.0, "learning_rate": 2.7049139814889834e-05, "loss": 10.6252, "step": 15888 }, { "epoch": 0.6622900254262015, "grad_norm": 156.0, "learning_rate": 2.704314312463478e-05, "loss": 11.4379, "step": 15889 }, { "epoch": 0.6623317077237297, "grad_norm": 480.0, "learning_rate": 2.7037146852770523e-05, "loss": 17.0002, "step": 15890 }, { "epoch": 0.662373390021258, "grad_norm": 252.0, "learning_rate": 2.7031150999406263e-05, "loss": 10.6883, "step": 15891 }, { "epoch": 0.6624150723187862, "grad_norm": 322.0, "learning_rate": 2.702515556465135e-05, "loss": 12.8128, "step": 15892 }, { "epoch": 0.6624567546163145, "grad_norm": 422.0, "learning_rate": 2.701916054861498e-05, "loss": 15.0628, "step": 15893 }, { "epoch": 0.6624984369138427, "grad_norm": 252.0, "learning_rate": 2.7013165951406473e-05, "loss": 11.7501, "step": 15894 }, { "epoch": 0.6625401192113709, "grad_norm": 1080.0, "learning_rate": 2.7007171773135047e-05, "loss": 27.5005, "step": 15895 }, { "epoch": 0.6625818015088991, "grad_norm": 1248.0, "learning_rate": 2.7001178013909966e-05, "loss": 30.8753, "step": 15896 }, { "epoch": 0.6626234838064274, "grad_norm": 131.0, "learning_rate": 2.699518467384045e-05, "loss": 11.2504, "step": 15897 }, { "epoch": 0.6626651661039556, "grad_norm": 210.0, "learning_rate": 2.6989191753035737e-05, "loss": 11.5631, "step": 15898 }, { "epoch": 0.6627068484014839, "grad_norm": 448.0, "learning_rate": 2.6983199251605052e-05, "loss": 16.7503, "step": 15899 }, { "epoch": 0.6627485306990122, "grad_norm": 636.0, "learning_rate": 2.6977207169657605e-05, "loss": 19.7513, "step": 15900 }, { "epoch": 0.6627902129965404, "grad_norm": 472.0, "learning_rate": 2.6971215507302596e-05, "loss": 14.5001, "step": 15901 }, { "epoch": 0.6628318952940686, "grad_norm": 480.0, "learning_rate": 2.6965224264649223e-05, "loss": 16.2501, "step": 15902 }, { "epoch": 0.6628735775915968, "grad_norm": 274.0, "learning_rate": 2.6959233441806698e-05, "loss": 12.5629, "step": 15903 }, { "epoch": 0.6629152598891251, "grad_norm": 450.0, "learning_rate": 2.6953243038884178e-05, "loss": 15.752, "step": 15904 }, { "epoch": 0.6629569421866534, "grad_norm": 378.0, "learning_rate": 2.694725305599085e-05, "loss": 15.6877, "step": 15905 }, { "epoch": 0.6629986244841816, "grad_norm": 183.0, "learning_rate": 2.6941263493235885e-05, "loss": 10.6254, "step": 15906 }, { "epoch": 0.6630403067817098, "grad_norm": 117.5, "learning_rate": 2.6935274350728434e-05, "loss": 9.5004, "step": 15907 }, { "epoch": 0.6630819890792381, "grad_norm": 233.0, "learning_rate": 2.692928562857766e-05, "loss": 10.0636, "step": 15908 }, { "epoch": 0.6631236713767663, "grad_norm": 150.0, "learning_rate": 2.6923297326892686e-05, "loss": 10.813, "step": 15909 }, { "epoch": 0.6631653536742945, "grad_norm": 402.0, "learning_rate": 2.691730944578271e-05, "loss": 14.3126, "step": 15910 }, { "epoch": 0.6632070359718227, "grad_norm": 732.0, "learning_rate": 2.691132198535677e-05, "loss": 21.3752, "step": 15911 }, { "epoch": 0.6632487182693511, "grad_norm": 111.0, "learning_rate": 2.690533494572408e-05, "loss": 8.7511, "step": 15912 }, { "epoch": 0.6632904005668793, "grad_norm": 249.0, "learning_rate": 2.6899348326993667e-05, "loss": 12.2503, "step": 15913 }, { "epoch": 0.6633320828644075, "grad_norm": 286.0, "learning_rate": 2.6893362129274722e-05, "loss": 12.7514, "step": 15914 }, { "epoch": 0.6633737651619357, "grad_norm": 229.0, "learning_rate": 2.688737635267627e-05, "loss": 9.2506, "step": 15915 }, { "epoch": 0.663415447459464, "grad_norm": 712.0, "learning_rate": 2.688139099730747e-05, "loss": 23.1254, "step": 15916 }, { "epoch": 0.6634571297569922, "grad_norm": 440.0, "learning_rate": 2.6875406063277332e-05, "loss": 16.1252, "step": 15917 }, { "epoch": 0.6634988120545204, "grad_norm": 117.5, "learning_rate": 2.6869421550694996e-05, "loss": 7.7819, "step": 15918 }, { "epoch": 0.6635404943520486, "grad_norm": 362.0, "learning_rate": 2.68634374596695e-05, "loss": 14.3752, "step": 15919 }, { "epoch": 0.663582176649577, "grad_norm": 127.0, "learning_rate": 2.6857453790309912e-05, "loss": 10.0002, "step": 15920 }, { "epoch": 0.6636238589471052, "grad_norm": 336.0, "learning_rate": 2.685147054272528e-05, "loss": 15.2502, "step": 15921 }, { "epoch": 0.6636655412446334, "grad_norm": 262.0, "learning_rate": 2.684548771702466e-05, "loss": 13.8755, "step": 15922 }, { "epoch": 0.6637072235421616, "grad_norm": 258.0, "learning_rate": 2.6839505313317077e-05, "loss": 12.0629, "step": 15923 }, { "epoch": 0.6637489058396899, "grad_norm": 264.0, "learning_rate": 2.6833523331711563e-05, "loss": 11.2512, "step": 15924 }, { "epoch": 0.6637905881372181, "grad_norm": 334.0, "learning_rate": 2.6827541772317142e-05, "loss": 14.1254, "step": 15925 }, { "epoch": 0.6638322704347464, "grad_norm": 195.0, "learning_rate": 2.6821560635242836e-05, "loss": 10.6877, "step": 15926 }, { "epoch": 0.6638739527322746, "grad_norm": 169.0, "learning_rate": 2.6815579920597644e-05, "loss": 9.7506, "step": 15927 }, { "epoch": 0.6639156350298029, "grad_norm": 201.0, "learning_rate": 2.6809599628490568e-05, "loss": 11.8758, "step": 15928 }, { "epoch": 0.6639573173273311, "grad_norm": 77.0, "learning_rate": 2.6803619759030592e-05, "loss": 8.2503, "step": 15929 }, { "epoch": 0.6639989996248593, "grad_norm": 237.0, "learning_rate": 2.6797640312326704e-05, "loss": 12.1254, "step": 15930 }, { "epoch": 0.6640406819223875, "grad_norm": 352.0, "learning_rate": 2.6791661288487885e-05, "loss": 15.2502, "step": 15931 }, { "epoch": 0.6640823642199158, "grad_norm": 91.0, "learning_rate": 2.6785682687623092e-05, "loss": 8.313, "step": 15932 }, { "epoch": 0.664124046517444, "grad_norm": 232.0, "learning_rate": 2.6779704509841286e-05, "loss": 10.6877, "step": 15933 }, { "epoch": 0.6641657288149723, "grad_norm": 258.0, "learning_rate": 2.677372675525146e-05, "loss": 12.688, "step": 15934 }, { "epoch": 0.6642074111125005, "grad_norm": 163.0, "learning_rate": 2.6767749423962486e-05, "loss": 10.4383, "step": 15935 }, { "epoch": 0.6642490934100288, "grad_norm": 91.5, "learning_rate": 2.6761772516083377e-05, "loss": 9.1879, "step": 15936 }, { "epoch": 0.664290775707557, "grad_norm": 294.0, "learning_rate": 2.675579603172299e-05, "loss": 14.0003, "step": 15937 }, { "epoch": 0.6643324580050852, "grad_norm": 215.0, "learning_rate": 2.6749819970990318e-05, "loss": 12.2504, "step": 15938 }, { "epoch": 0.6643741403026134, "grad_norm": 442.0, "learning_rate": 2.6743844333994215e-05, "loss": 14.6877, "step": 15939 }, { "epoch": 0.6644158226001418, "grad_norm": 422.0, "learning_rate": 2.6737869120843638e-05, "loss": 14.2523, "step": 15940 }, { "epoch": 0.66445750489767, "grad_norm": 462.0, "learning_rate": 2.6731894331647434e-05, "loss": 15.5033, "step": 15941 }, { "epoch": 0.6644991871951982, "grad_norm": 334.0, "learning_rate": 2.6725919966514557e-05, "loss": 13.5627, "step": 15942 }, { "epoch": 0.6645408694927264, "grad_norm": 1144.0, "learning_rate": 2.671994602555381e-05, "loss": 27.2508, "step": 15943 }, { "epoch": 0.6645825517902547, "grad_norm": 728.0, "learning_rate": 2.671397250887414e-05, "loss": 20.8753, "step": 15944 }, { "epoch": 0.6646242340877829, "grad_norm": 528.0, "learning_rate": 2.6707999416584383e-05, "loss": 18.0002, "step": 15945 }, { "epoch": 0.6646659163853111, "grad_norm": 494.0, "learning_rate": 2.6702026748793406e-05, "loss": 16.3757, "step": 15946 }, { "epoch": 0.6647075986828394, "grad_norm": 181.0, "learning_rate": 2.6696054505610064e-05, "loss": 8.8753, "step": 15947 }, { "epoch": 0.6647492809803677, "grad_norm": 348.0, "learning_rate": 2.669008268714319e-05, "loss": 13.1254, "step": 15948 }, { "epoch": 0.6647909632778959, "grad_norm": 166.0, "learning_rate": 2.6684111293501633e-05, "loss": 11.3755, "step": 15949 }, { "epoch": 0.6648326455754241, "grad_norm": 716.0, "learning_rate": 2.6678140324794222e-05, "loss": 21.2504, "step": 15950 }, { "epoch": 0.6648743278729523, "grad_norm": 264.0, "learning_rate": 2.6672169781129763e-05, "loss": 12.9377, "step": 15951 }, { "epoch": 0.6649160101704806, "grad_norm": 124.5, "learning_rate": 2.6666199662617086e-05, "loss": 10.7509, "step": 15952 }, { "epoch": 0.6649576924680088, "grad_norm": 116.0, "learning_rate": 2.666022996936499e-05, "loss": 10.2505, "step": 15953 }, { "epoch": 0.664999374765537, "grad_norm": 233.0, "learning_rate": 2.6654260701482282e-05, "loss": 12.4398, "step": 15954 }, { "epoch": 0.6650410570630653, "grad_norm": 266.0, "learning_rate": 2.664829185907774e-05, "loss": 12.5004, "step": 15955 }, { "epoch": 0.6650827393605936, "grad_norm": 368.0, "learning_rate": 2.6642323442260154e-05, "loss": 16.0005, "step": 15956 }, { "epoch": 0.6651244216581218, "grad_norm": 474.0, "learning_rate": 2.663635545113829e-05, "loss": 17.8754, "step": 15957 }, { "epoch": 0.66516610395565, "grad_norm": 174.0, "learning_rate": 2.663038788582093e-05, "loss": 9.7505, "step": 15958 }, { "epoch": 0.6652077862531782, "grad_norm": 580.0, "learning_rate": 2.66244207464168e-05, "loss": 17.6255, "step": 15959 }, { "epoch": 0.6652494685507065, "grad_norm": 284.0, "learning_rate": 2.661845403303472e-05, "loss": 12.5001, "step": 15960 }, { "epoch": 0.6652911508482348, "grad_norm": 1112.0, "learning_rate": 2.6612487745783342e-05, "loss": 25.3816, "step": 15961 }, { "epoch": 0.665332833145763, "grad_norm": 302.0, "learning_rate": 2.66065218847715e-05, "loss": 12.8136, "step": 15962 }, { "epoch": 0.6653745154432912, "grad_norm": 304.0, "learning_rate": 2.660055645010784e-05, "loss": 11.9385, "step": 15963 }, { "epoch": 0.6654161977408195, "grad_norm": 684.0, "learning_rate": 2.659459144190114e-05, "loss": 23.3756, "step": 15964 }, { "epoch": 0.6654578800383477, "grad_norm": 178.0, "learning_rate": 2.6588626860260057e-05, "loss": 11.2504, "step": 15965 }, { "epoch": 0.6654995623358759, "grad_norm": 422.0, "learning_rate": 2.658266270529337e-05, "loss": 17.0003, "step": 15966 }, { "epoch": 0.6655412446334041, "grad_norm": 302.0, "learning_rate": 2.6576698977109693e-05, "loss": 12.6255, "step": 15967 }, { "epoch": 0.6655829269309325, "grad_norm": 592.0, "learning_rate": 2.657073567581777e-05, "loss": 18.7509, "step": 15968 }, { "epoch": 0.6656246092284607, "grad_norm": 334.0, "learning_rate": 2.656477280152628e-05, "loss": 13.9378, "step": 15969 }, { "epoch": 0.6656662915259889, "grad_norm": 720.0, "learning_rate": 2.6558810354343878e-05, "loss": 19.0039, "step": 15970 }, { "epoch": 0.6657079738235172, "grad_norm": 454.0, "learning_rate": 2.6552848334379238e-05, "loss": 17.5002, "step": 15971 }, { "epoch": 0.6657496561210454, "grad_norm": 113.0, "learning_rate": 2.6546886741741023e-05, "loss": 9.9377, "step": 15972 }, { "epoch": 0.6657913384185736, "grad_norm": 330.0, "learning_rate": 2.654092557653788e-05, "loss": 14.0637, "step": 15973 }, { "epoch": 0.6658330207161018, "grad_norm": 227.0, "learning_rate": 2.653496483887844e-05, "loss": 11.9381, "step": 15974 }, { "epoch": 0.6658747030136302, "grad_norm": 304.0, "learning_rate": 2.652900452887136e-05, "loss": 13.8755, "step": 15975 }, { "epoch": 0.6659163853111584, "grad_norm": 362.0, "learning_rate": 2.652304464662525e-05, "loss": 15.188, "step": 15976 }, { "epoch": 0.6659580676086866, "grad_norm": 508.0, "learning_rate": 2.6517085192248743e-05, "loss": 16.1286, "step": 15977 }, { "epoch": 0.6659997499062148, "grad_norm": 247.0, "learning_rate": 2.6511126165850436e-05, "loss": 13.5009, "step": 15978 }, { "epoch": 0.6660414322037431, "grad_norm": 266.0, "learning_rate": 2.650516756753894e-05, "loss": 11.6878, "step": 15979 }, { "epoch": 0.6660831145012713, "grad_norm": 245.0, "learning_rate": 2.649920939742285e-05, "loss": 12.1876, "step": 15980 }, { "epoch": 0.6661247967987995, "grad_norm": 86.0, "learning_rate": 2.6493251655610764e-05, "loss": 8.5628, "step": 15981 }, { "epoch": 0.6661664790963278, "grad_norm": 185.0, "learning_rate": 2.6487294342211245e-05, "loss": 8.2505, "step": 15982 }, { "epoch": 0.6662081613938561, "grad_norm": 748.0, "learning_rate": 2.6481337457332876e-05, "loss": 21.7542, "step": 15983 }, { "epoch": 0.6662498436913843, "grad_norm": 804.0, "learning_rate": 2.647538100108422e-05, "loss": 23.0006, "step": 15984 }, { "epoch": 0.6662915259889125, "grad_norm": 456.0, "learning_rate": 2.6469424973573815e-05, "loss": 15.8787, "step": 15985 }, { "epoch": 0.6663332082864407, "grad_norm": 1232.0, "learning_rate": 2.6463469374910267e-05, "loss": 29.7502, "step": 15986 }, { "epoch": 0.666374890583969, "grad_norm": 250.0, "learning_rate": 2.645751420520204e-05, "loss": 13.3752, "step": 15987 }, { "epoch": 0.6664165728814972, "grad_norm": 72.0, "learning_rate": 2.645155946455774e-05, "loss": 5.7819, "step": 15988 }, { "epoch": 0.6664582551790255, "grad_norm": 170.0, "learning_rate": 2.644560515308583e-05, "loss": 11.063, "step": 15989 }, { "epoch": 0.6664999374765537, "grad_norm": 238.0, "learning_rate": 2.643965127089489e-05, "loss": 11.6253, "step": 15990 }, { "epoch": 0.666541619774082, "grad_norm": 256.0, "learning_rate": 2.643369781809336e-05, "loss": 12.1885, "step": 15991 }, { "epoch": 0.6665833020716102, "grad_norm": 189.0, "learning_rate": 2.6427744794789817e-05, "loss": 11.1879, "step": 15992 }, { "epoch": 0.6666249843691384, "grad_norm": 380.0, "learning_rate": 2.6421792201092676e-05, "loss": 14.4378, "step": 15993 }, { "epoch": 0.6666666666666666, "grad_norm": 464.0, "learning_rate": 2.641584003711049e-05, "loss": 16.8753, "step": 15994 }, { "epoch": 0.666708348964195, "grad_norm": 146.0, "learning_rate": 2.6409888302951715e-05, "loss": 9.6253, "step": 15995 }, { "epoch": 0.6667500312617232, "grad_norm": 160.0, "learning_rate": 2.640393699872481e-05, "loss": 10.5009, "step": 15996 }, { "epoch": 0.6667917135592514, "grad_norm": 164.0, "learning_rate": 2.6397986124538253e-05, "loss": 10.9377, "step": 15997 }, { "epoch": 0.6668333958567796, "grad_norm": 704.0, "learning_rate": 2.63920356805005e-05, "loss": 20.8753, "step": 15998 }, { "epoch": 0.6668750781543079, "grad_norm": 53.75, "learning_rate": 2.6386085666719986e-05, "loss": 8.2503, "step": 15999 }, { "epoch": 0.6669167604518361, "grad_norm": 398.0, "learning_rate": 2.638013608330516e-05, "loss": 12.8129, "step": 16000 }, { "epoch": 0.6669584427493643, "grad_norm": 260.0, "learning_rate": 2.6374186930364446e-05, "loss": 12.1252, "step": 16001 }, { "epoch": 0.6670001250468925, "grad_norm": 168.0, "learning_rate": 2.636823820800628e-05, "loss": 10.3752, "step": 16002 }, { "epoch": 0.6670418073444209, "grad_norm": 436.0, "learning_rate": 2.6362289916339067e-05, "loss": 16.2503, "step": 16003 }, { "epoch": 0.6670834896419491, "grad_norm": 223.0, "learning_rate": 2.6356342055471217e-05, "loss": 12.876, "step": 16004 }, { "epoch": 0.6671251719394773, "grad_norm": 272.0, "learning_rate": 2.6350394625511132e-05, "loss": 12.7505, "step": 16005 }, { "epoch": 0.6671668542370055, "grad_norm": 952.0, "learning_rate": 2.6344447626567205e-05, "loss": 28.0004, "step": 16006 }, { "epoch": 0.6672085365345338, "grad_norm": 123.5, "learning_rate": 2.6338501058747818e-05, "loss": 9.7502, "step": 16007 }, { "epoch": 0.667250218832062, "grad_norm": 292.0, "learning_rate": 2.633255492216135e-05, "loss": 14.5004, "step": 16008 }, { "epoch": 0.6672919011295902, "grad_norm": 98.5, "learning_rate": 2.6326609216916153e-05, "loss": 8.2503, "step": 16009 }, { "epoch": 0.6673335834271185, "grad_norm": 596.0, "learning_rate": 2.6320663943120638e-05, "loss": 18.1261, "step": 16010 }, { "epoch": 0.6673752657246468, "grad_norm": 276.0, "learning_rate": 2.6314719100883094e-05, "loss": 13.1879, "step": 16011 }, { "epoch": 0.667416948022175, "grad_norm": 370.0, "learning_rate": 2.6308774690311933e-05, "loss": 15.7503, "step": 16012 }, { "epoch": 0.6674586303197032, "grad_norm": 382.0, "learning_rate": 2.6302830711515415e-05, "loss": 15.6255, "step": 16013 }, { "epoch": 0.6675003126172314, "grad_norm": 668.0, "learning_rate": 2.629688716460195e-05, "loss": 19.0013, "step": 16014 }, { "epoch": 0.6675419949147597, "grad_norm": 350.0, "learning_rate": 2.629094404967979e-05, "loss": 12.3789, "step": 16015 }, { "epoch": 0.667583677212288, "grad_norm": 177.0, "learning_rate": 2.628500136685731e-05, "loss": 10.3752, "step": 16016 }, { "epoch": 0.6676253595098162, "grad_norm": 356.0, "learning_rate": 2.6279059116242744e-05, "loss": 12.7507, "step": 16017 }, { "epoch": 0.6676670418073444, "grad_norm": 304.0, "learning_rate": 2.627311729794447e-05, "loss": 13.1877, "step": 16018 }, { "epoch": 0.6677087241048727, "grad_norm": 414.0, "learning_rate": 2.626717591207071e-05, "loss": 15.8772, "step": 16019 }, { "epoch": 0.6677504064024009, "grad_norm": 304.0, "learning_rate": 2.6261234958729784e-05, "loss": 14.0628, "step": 16020 }, { "epoch": 0.6677920886999291, "grad_norm": 100.5, "learning_rate": 2.625529443802997e-05, "loss": 10.6258, "step": 16021 }, { "epoch": 0.6678337709974573, "grad_norm": 264.0, "learning_rate": 2.6249354350079514e-05, "loss": 10.1254, "step": 16022 }, { "epoch": 0.6678754532949857, "grad_norm": 274.0, "learning_rate": 2.624341469498669e-05, "loss": 13.9379, "step": 16023 }, { "epoch": 0.6679171355925139, "grad_norm": 154.0, "learning_rate": 2.623747547285974e-05, "loss": 9.2505, "step": 16024 }, { "epoch": 0.6679588178900421, "grad_norm": 296.0, "learning_rate": 2.62315366838069e-05, "loss": 12.9378, "step": 16025 }, { "epoch": 0.6680005001875703, "grad_norm": 464.0, "learning_rate": 2.622559832793643e-05, "loss": 16.1256, "step": 16026 }, { "epoch": 0.6680421824850986, "grad_norm": 390.0, "learning_rate": 2.6219660405356527e-05, "loss": 16.2502, "step": 16027 }, { "epoch": 0.6680838647826268, "grad_norm": 266.0, "learning_rate": 2.6213722916175433e-05, "loss": 11.5001, "step": 16028 }, { "epoch": 0.668125547080155, "grad_norm": 472.0, "learning_rate": 2.6207785860501343e-05, "loss": 15.188, "step": 16029 }, { "epoch": 0.6681672293776832, "grad_norm": 312.0, "learning_rate": 2.6201849238442473e-05, "loss": 14.3753, "step": 16030 }, { "epoch": 0.6682089116752116, "grad_norm": 332.0, "learning_rate": 2.6195913050107013e-05, "loss": 12.8131, "step": 16031 }, { "epoch": 0.6682505939727398, "grad_norm": 91.5, "learning_rate": 2.618997729560315e-05, "loss": 9.3754, "step": 16032 }, { "epoch": 0.668292276270268, "grad_norm": 167.0, "learning_rate": 2.6184041975039064e-05, "loss": 10.3137, "step": 16033 }, { "epoch": 0.6683339585677962, "grad_norm": 394.0, "learning_rate": 2.617810708852293e-05, "loss": 14.8755, "step": 16034 }, { "epoch": 0.6683756408653245, "grad_norm": 300.0, "learning_rate": 2.6172172636162885e-05, "loss": 13.4377, "step": 16035 }, { "epoch": 0.6684173231628527, "grad_norm": 462.0, "learning_rate": 2.6166238618067152e-05, "loss": 16.6252, "step": 16036 }, { "epoch": 0.668459005460381, "grad_norm": 370.0, "learning_rate": 2.616030503434379e-05, "loss": 14.0002, "step": 16037 }, { "epoch": 0.6685006877579092, "grad_norm": 340.0, "learning_rate": 2.6154371885101033e-05, "loss": 13.8154, "step": 16038 }, { "epoch": 0.6685423700554375, "grad_norm": 696.0, "learning_rate": 2.614843917044692e-05, "loss": 18.7522, "step": 16039 }, { "epoch": 0.6685840523529657, "grad_norm": 270.0, "learning_rate": 2.614250689048966e-05, "loss": 13.6254, "step": 16040 }, { "epoch": 0.6686257346504939, "grad_norm": 245.0, "learning_rate": 2.6136575045337298e-05, "loss": 12.7502, "step": 16041 }, { "epoch": 0.6686674169480221, "grad_norm": 632.0, "learning_rate": 2.6130643635098007e-05, "loss": 17.8752, "step": 16042 }, { "epoch": 0.6687090992455504, "grad_norm": 312.0, "learning_rate": 2.612471265987981e-05, "loss": 12.8768, "step": 16043 }, { "epoch": 0.6687507815430787, "grad_norm": 492.0, "learning_rate": 2.6118782119790874e-05, "loss": 17.6255, "step": 16044 }, { "epoch": 0.6687924638406069, "grad_norm": 142.0, "learning_rate": 2.611285201493925e-05, "loss": 9.2502, "step": 16045 }, { "epoch": 0.6688341461381352, "grad_norm": 696.0, "learning_rate": 2.6106922345433015e-05, "loss": 20.8756, "step": 16046 }, { "epoch": 0.6688758284356634, "grad_norm": 450.0, "learning_rate": 2.6100993111380246e-05, "loss": 15.8129, "step": 16047 }, { "epoch": 0.6689175107331916, "grad_norm": 490.0, "learning_rate": 2.6095064312889002e-05, "loss": 15.3768, "step": 16048 }, { "epoch": 0.6689591930307198, "grad_norm": 324.0, "learning_rate": 2.608913595006733e-05, "loss": 13.9378, "step": 16049 }, { "epoch": 0.6690008753282481, "grad_norm": 952.0, "learning_rate": 2.6083208023023277e-05, "loss": 25.1251, "step": 16050 }, { "epoch": 0.6690425576257764, "grad_norm": 1648.0, "learning_rate": 2.607728053186488e-05, "loss": 31.8752, "step": 16051 }, { "epoch": 0.6690842399233046, "grad_norm": 1072.0, "learning_rate": 2.6071353476700165e-05, "loss": 25.2504, "step": 16052 }, { "epoch": 0.6691259222208328, "grad_norm": 241.0, "learning_rate": 2.6065426857637167e-05, "loss": 11.9377, "step": 16053 }, { "epoch": 0.6691676045183611, "grad_norm": 468.0, "learning_rate": 2.605950067478388e-05, "loss": 17.3756, "step": 16054 }, { "epoch": 0.6692092868158893, "grad_norm": 163.0, "learning_rate": 2.6053574928248326e-05, "loss": 11.3753, "step": 16055 }, { "epoch": 0.6692509691134175, "grad_norm": 576.0, "learning_rate": 2.6047649618138494e-05, "loss": 17.751, "step": 16056 }, { "epoch": 0.6692926514109457, "grad_norm": 68.5, "learning_rate": 2.6041724744562368e-05, "loss": 7.9065, "step": 16057 }, { "epoch": 0.6693343337084741, "grad_norm": 231.0, "learning_rate": 2.603580030762794e-05, "loss": 11.7503, "step": 16058 }, { "epoch": 0.6693760160060023, "grad_norm": 233.0, "learning_rate": 2.602987630744316e-05, "loss": 13.0003, "step": 16059 }, { "epoch": 0.6694176983035305, "grad_norm": 356.0, "learning_rate": 2.602395274411604e-05, "loss": 14.1882, "step": 16060 }, { "epoch": 0.6694593806010587, "grad_norm": 195.0, "learning_rate": 2.6018029617754484e-05, "loss": 10.3128, "step": 16061 }, { "epoch": 0.669501062898587, "grad_norm": 176.0, "learning_rate": 2.6012106928466506e-05, "loss": 12.0638, "step": 16062 }, { "epoch": 0.6695427451961152, "grad_norm": 76.0, "learning_rate": 2.6006184676359964e-05, "loss": 9.7504, "step": 16063 }, { "epoch": 0.6695844274936434, "grad_norm": 416.0, "learning_rate": 2.6000262861542878e-05, "loss": 15.9376, "step": 16064 }, { "epoch": 0.6696261097911717, "grad_norm": 506.0, "learning_rate": 2.5994341484123092e-05, "loss": 17.7504, "step": 16065 }, { "epoch": 0.6696677920887, "grad_norm": 334.0, "learning_rate": 2.5988420544208604e-05, "loss": 10.9386, "step": 16066 }, { "epoch": 0.6697094743862282, "grad_norm": 304.0, "learning_rate": 2.5982500041907244e-05, "loss": 14.8149, "step": 16067 }, { "epoch": 0.6697511566837564, "grad_norm": 916.0, "learning_rate": 2.5976579977326988e-05, "loss": 24.2507, "step": 16068 }, { "epoch": 0.6697928389812846, "grad_norm": 128.0, "learning_rate": 2.597066035057566e-05, "loss": 9.5006, "step": 16069 }, { "epoch": 0.6698345212788129, "grad_norm": 294.0, "learning_rate": 2.5964741161761197e-05, "loss": 12.5628, "step": 16070 }, { "epoch": 0.6698762035763411, "grad_norm": 96.0, "learning_rate": 2.5958822410991458e-05, "loss": 9.8754, "step": 16071 }, { "epoch": 0.6699178858738694, "grad_norm": 89.5, "learning_rate": 2.595290409837432e-05, "loss": 7.3128, "step": 16072 }, { "epoch": 0.6699595681713976, "grad_norm": 588.0, "learning_rate": 2.5946986224017632e-05, "loss": 18.0003, "step": 16073 }, { "epoch": 0.6700012504689259, "grad_norm": 728.0, "learning_rate": 2.5941068788029266e-05, "loss": 20.5021, "step": 16074 }, { "epoch": 0.6700429327664541, "grad_norm": 177.0, "learning_rate": 2.5935151790517047e-05, "loss": 11.7503, "step": 16075 }, { "epoch": 0.6700846150639823, "grad_norm": 380.0, "learning_rate": 2.5929235231588828e-05, "loss": 15.0002, "step": 16076 }, { "epoch": 0.6701262973615105, "grad_norm": 396.0, "learning_rate": 2.592331911135243e-05, "loss": 14.0629, "step": 16077 }, { "epoch": 0.6701679796590388, "grad_norm": 193.0, "learning_rate": 2.5917403429915687e-05, "loss": 11.6252, "step": 16078 }, { "epoch": 0.6702096619565671, "grad_norm": 392.0, "learning_rate": 2.5911488187386397e-05, "loss": 15.5023, "step": 16079 }, { "epoch": 0.6702513442540953, "grad_norm": 672.0, "learning_rate": 2.590557338387237e-05, "loss": 20.1253, "step": 16080 }, { "epoch": 0.6702930265516235, "grad_norm": 162.0, "learning_rate": 2.5899659019481415e-05, "loss": 9.6254, "step": 16081 }, { "epoch": 0.6703347088491518, "grad_norm": 225.0, "learning_rate": 2.589374509432131e-05, "loss": 11.7503, "step": 16082 }, { "epoch": 0.67037639114668, "grad_norm": 294.0, "learning_rate": 2.5887831608499848e-05, "loss": 12.8756, "step": 16083 }, { "epoch": 0.6704180734442082, "grad_norm": 708.0, "learning_rate": 2.5881918562124785e-05, "loss": 21.7506, "step": 16084 }, { "epoch": 0.6704597557417364, "grad_norm": 308.0, "learning_rate": 2.5876005955303885e-05, "loss": 13.1877, "step": 16085 }, { "epoch": 0.6705014380392648, "grad_norm": 568.0, "learning_rate": 2.587009378814495e-05, "loss": 19.2502, "step": 16086 }, { "epoch": 0.670543120336793, "grad_norm": 664.0, "learning_rate": 2.586418206075566e-05, "loss": 19.5007, "step": 16087 }, { "epoch": 0.6705848026343212, "grad_norm": 302.0, "learning_rate": 2.5858270773243842e-05, "loss": 12.9379, "step": 16088 }, { "epoch": 0.6706264849318494, "grad_norm": 88.5, "learning_rate": 2.5852359925717136e-05, "loss": 8.3753, "step": 16089 }, { "epoch": 0.6706681672293777, "grad_norm": 328.0, "learning_rate": 2.5846449518283354e-05, "loss": 14.8753, "step": 16090 }, { "epoch": 0.6707098495269059, "grad_norm": 338.0, "learning_rate": 2.5840539551050136e-05, "loss": 14.6259, "step": 16091 }, { "epoch": 0.6707515318244341, "grad_norm": 438.0, "learning_rate": 2.5834630024125267e-05, "loss": 15.7506, "step": 16092 }, { "epoch": 0.6707932141219624, "grad_norm": 280.0, "learning_rate": 2.582872093761637e-05, "loss": 13.6257, "step": 16093 }, { "epoch": 0.6708348964194907, "grad_norm": 308.0, "learning_rate": 2.5822812291631203e-05, "loss": 13.5627, "step": 16094 }, { "epoch": 0.6708765787170189, "grad_norm": 456.0, "learning_rate": 2.581690408627743e-05, "loss": 17.1254, "step": 16095 }, { "epoch": 0.6709182610145471, "grad_norm": 146.0, "learning_rate": 2.5810996321662716e-05, "loss": 9.3129, "step": 16096 }, { "epoch": 0.6709599433120753, "grad_norm": 270.0, "learning_rate": 2.5805088997894755e-05, "loss": 12.3127, "step": 16097 }, { "epoch": 0.6710016256096036, "grad_norm": 155.0, "learning_rate": 2.579918211508119e-05, "loss": 11.4378, "step": 16098 }, { "epoch": 0.6710433079071318, "grad_norm": 298.0, "learning_rate": 2.5793275673329676e-05, "loss": 14.2503, "step": 16099 }, { "epoch": 0.6710849902046601, "grad_norm": 320.0, "learning_rate": 2.5787369672747863e-05, "loss": 14.7503, "step": 16100 }, { "epoch": 0.6711266725021883, "grad_norm": 314.0, "learning_rate": 2.578146411344339e-05, "loss": 13.8751, "step": 16101 }, { "epoch": 0.6711683547997166, "grad_norm": 412.0, "learning_rate": 2.5775558995523884e-05, "loss": 15.5012, "step": 16102 }, { "epoch": 0.6712100370972448, "grad_norm": 668.0, "learning_rate": 2.5769654319096958e-05, "loss": 21.3752, "step": 16103 }, { "epoch": 0.671251719394773, "grad_norm": 282.0, "learning_rate": 2.5763750084270243e-05, "loss": 14.4379, "step": 16104 }, { "epoch": 0.6712934016923012, "grad_norm": 1112.0, "learning_rate": 2.5757846291151328e-05, "loss": 33.754, "step": 16105 }, { "epoch": 0.6713350839898296, "grad_norm": 314.0, "learning_rate": 2.5751942939847817e-05, "loss": 14.001, "step": 16106 }, { "epoch": 0.6713767662873578, "grad_norm": 466.0, "learning_rate": 2.57460400304673e-05, "loss": 17.3751, "step": 16107 }, { "epoch": 0.671418448584886, "grad_norm": 596.0, "learning_rate": 2.5740137563117355e-05, "loss": 19.8752, "step": 16108 }, { "epoch": 0.6714601308824142, "grad_norm": 388.0, "learning_rate": 2.5734235537905536e-05, "loss": 13.6877, "step": 16109 }, { "epoch": 0.6715018131799425, "grad_norm": 161.0, "learning_rate": 2.5728333954939476e-05, "loss": 10.6258, "step": 16110 }, { "epoch": 0.6715434954774707, "grad_norm": 127.0, "learning_rate": 2.5722432814326637e-05, "loss": 8.8753, "step": 16111 }, { "epoch": 0.6715851777749989, "grad_norm": 290.0, "learning_rate": 2.5716532116174652e-05, "loss": 13.3756, "step": 16112 }, { "epoch": 0.6716268600725271, "grad_norm": 122.5, "learning_rate": 2.5710631860590996e-05, "loss": 8.0629, "step": 16113 }, { "epoch": 0.6716685423700555, "grad_norm": 532.0, "learning_rate": 2.570473204768326e-05, "loss": 17.7509, "step": 16114 }, { "epoch": 0.6717102246675837, "grad_norm": 450.0, "learning_rate": 2.5698832677558903e-05, "loss": 16.2503, "step": 16115 }, { "epoch": 0.6717519069651119, "grad_norm": 237.0, "learning_rate": 2.569293375032552e-05, "loss": 13.2508, "step": 16116 }, { "epoch": 0.6717935892626402, "grad_norm": 342.0, "learning_rate": 2.568703526609053e-05, "loss": 14.8127, "step": 16117 }, { "epoch": 0.6718352715601684, "grad_norm": 161.0, "learning_rate": 2.5681137224961527e-05, "loss": 10.6259, "step": 16118 }, { "epoch": 0.6718769538576966, "grad_norm": 556.0, "learning_rate": 2.5675239627045922e-05, "loss": 17.8753, "step": 16119 }, { "epoch": 0.6719186361552248, "grad_norm": 266.0, "learning_rate": 2.566934247245124e-05, "loss": 11.3128, "step": 16120 }, { "epoch": 0.6719603184527532, "grad_norm": 213.0, "learning_rate": 2.5663445761284965e-05, "loss": 11.0626, "step": 16121 }, { "epoch": 0.6720020007502814, "grad_norm": 1632.0, "learning_rate": 2.5657549493654542e-05, "loss": 30.8804, "step": 16122 }, { "epoch": 0.6720436830478096, "grad_norm": 502.0, "learning_rate": 2.565165366966745e-05, "loss": 15.5662, "step": 16123 }, { "epoch": 0.6720853653453378, "grad_norm": 264.0, "learning_rate": 2.564575828943112e-05, "loss": 12.1877, "step": 16124 }, { "epoch": 0.6721270476428661, "grad_norm": 157.0, "learning_rate": 2.5639863353053016e-05, "loss": 10.4379, "step": 16125 }, { "epoch": 0.6721687299403943, "grad_norm": 872.0, "learning_rate": 2.5633968860640557e-05, "loss": 24.6252, "step": 16126 }, { "epoch": 0.6722104122379225, "grad_norm": 478.0, "learning_rate": 2.5628074812301183e-05, "loss": 16.8753, "step": 16127 }, { "epoch": 0.6722520945354508, "grad_norm": 596.0, "learning_rate": 2.5622181208142316e-05, "loss": 19.2502, "step": 16128 }, { "epoch": 0.6722937768329791, "grad_norm": 796.0, "learning_rate": 2.5616288048271352e-05, "loss": 22.0002, "step": 16129 }, { "epoch": 0.6723354591305073, "grad_norm": 494.0, "learning_rate": 2.56103953327957e-05, "loss": 19.5004, "step": 16130 }, { "epoch": 0.6723771414280355, "grad_norm": 548.0, "learning_rate": 2.5604503061822767e-05, "loss": 17.7507, "step": 16131 }, { "epoch": 0.6724188237255637, "grad_norm": 214.0, "learning_rate": 2.5598611235459926e-05, "loss": 10.5626, "step": 16132 }, { "epoch": 0.672460506023092, "grad_norm": 306.0, "learning_rate": 2.5592719853814562e-05, "loss": 13.5002, "step": 16133 }, { "epoch": 0.6725021883206203, "grad_norm": 436.0, "learning_rate": 2.5586828916994044e-05, "loss": 17.1257, "step": 16134 }, { "epoch": 0.6725438706181485, "grad_norm": 223.0, "learning_rate": 2.5580938425105716e-05, "loss": 11.5632, "step": 16135 }, { "epoch": 0.6725855529156767, "grad_norm": 211.0, "learning_rate": 2.5575048378256992e-05, "loss": 13.6879, "step": 16136 }, { "epoch": 0.672627235213205, "grad_norm": 73.0, "learning_rate": 2.5569158776555148e-05, "loss": 8.6879, "step": 16137 }, { "epoch": 0.6726689175107332, "grad_norm": 964.0, "learning_rate": 2.5563269620107588e-05, "loss": 22.6297, "step": 16138 }, { "epoch": 0.6727105998082614, "grad_norm": 366.0, "learning_rate": 2.5557380909021572e-05, "loss": 13.6257, "step": 16139 }, { "epoch": 0.6727522821057896, "grad_norm": 366.0, "learning_rate": 2.5551492643404494e-05, "loss": 12.7537, "step": 16140 }, { "epoch": 0.672793964403318, "grad_norm": 356.0, "learning_rate": 2.5545604823363593e-05, "loss": 15.3752, "step": 16141 }, { "epoch": 0.6728356467008462, "grad_norm": 462.0, "learning_rate": 2.5539717449006262e-05, "loss": 16.3752, "step": 16142 }, { "epoch": 0.6728773289983744, "grad_norm": 410.0, "learning_rate": 2.5533830520439704e-05, "loss": 13.1877, "step": 16143 }, { "epoch": 0.6729190112959026, "grad_norm": 330.0, "learning_rate": 2.55279440377713e-05, "loss": 12.1877, "step": 16144 }, { "epoch": 0.6729606935934309, "grad_norm": 278.0, "learning_rate": 2.552205800110825e-05, "loss": 12.5002, "step": 16145 }, { "epoch": 0.6730023758909591, "grad_norm": 426.0, "learning_rate": 2.551617241055788e-05, "loss": 16.8751, "step": 16146 }, { "epoch": 0.6730440581884873, "grad_norm": 254.0, "learning_rate": 2.5510287266227446e-05, "loss": 13.7502, "step": 16147 }, { "epoch": 0.6730857404860155, "grad_norm": 217.0, "learning_rate": 2.55044025682242e-05, "loss": 11.1255, "step": 16148 }, { "epoch": 0.6731274227835439, "grad_norm": 292.0, "learning_rate": 2.549851831665539e-05, "loss": 13.7502, "step": 16149 }, { "epoch": 0.6731691050810721, "grad_norm": 1272.0, "learning_rate": 2.549263451162826e-05, "loss": 30.3755, "step": 16150 }, { "epoch": 0.6732107873786003, "grad_norm": 354.0, "learning_rate": 2.5486751153250043e-05, "loss": 13.4378, "step": 16151 }, { "epoch": 0.6732524696761285, "grad_norm": 428.0, "learning_rate": 2.5480868241627964e-05, "loss": 16.8751, "step": 16152 }, { "epoch": 0.6732941519736568, "grad_norm": 462.0, "learning_rate": 2.5474985776869232e-05, "loss": 17.1252, "step": 16153 }, { "epoch": 0.673335834271185, "grad_norm": 155.0, "learning_rate": 2.5469103759081066e-05, "loss": 9.4376, "step": 16154 }, { "epoch": 0.6733775165687133, "grad_norm": 486.0, "learning_rate": 2.546322218837066e-05, "loss": 17.1252, "step": 16155 }, { "epoch": 0.6734191988662415, "grad_norm": 83.0, "learning_rate": 2.5457341064845207e-05, "loss": 9.5627, "step": 16156 }, { "epoch": 0.6734608811637698, "grad_norm": 120.0, "learning_rate": 2.545146038861189e-05, "loss": 9.2514, "step": 16157 }, { "epoch": 0.673502563461298, "grad_norm": 398.0, "learning_rate": 2.5445580159777893e-05, "loss": 14.8129, "step": 16158 }, { "epoch": 0.6735442457588262, "grad_norm": 276.0, "learning_rate": 2.543970037845037e-05, "loss": 12.7512, "step": 16159 }, { "epoch": 0.6735859280563544, "grad_norm": 116.5, "learning_rate": 2.5433821044736493e-05, "loss": 9.0006, "step": 16160 }, { "epoch": 0.6736276103538827, "grad_norm": 588.0, "learning_rate": 2.542794215874339e-05, "loss": 19.8752, "step": 16161 }, { "epoch": 0.673669292651411, "grad_norm": 330.0, "learning_rate": 2.5422063720578254e-05, "loss": 13.6888, "step": 16162 }, { "epoch": 0.6737109749489392, "grad_norm": 1032.0, "learning_rate": 2.5416185730348153e-05, "loss": 26.7504, "step": 16163 }, { "epoch": 0.6737526572464674, "grad_norm": 376.0, "learning_rate": 2.5410308188160293e-05, "loss": 17.6256, "step": 16164 }, { "epoch": 0.6737943395439957, "grad_norm": 404.0, "learning_rate": 2.5404431094121707e-05, "loss": 14.7505, "step": 16165 }, { "epoch": 0.6738360218415239, "grad_norm": 201.0, "learning_rate": 2.5398554448339586e-05, "loss": 11.5627, "step": 16166 }, { "epoch": 0.6738777041390521, "grad_norm": 732.0, "learning_rate": 2.5392678250920952e-05, "loss": 20.8754, "step": 16167 }, { "epoch": 0.6739193864365803, "grad_norm": 482.0, "learning_rate": 2.5386802501972983e-05, "loss": 17.8769, "step": 16168 }, { "epoch": 0.6739610687341087, "grad_norm": 360.0, "learning_rate": 2.5380927201602688e-05, "loss": 13.8754, "step": 16169 }, { "epoch": 0.6740027510316369, "grad_norm": 338.0, "learning_rate": 2.53750523499172e-05, "loss": 12.752, "step": 16170 }, { "epoch": 0.6740444333291651, "grad_norm": 306.0, "learning_rate": 2.5369177947023565e-05, "loss": 13.1253, "step": 16171 }, { "epoch": 0.6740861156266933, "grad_norm": 676.0, "learning_rate": 2.5363303993028854e-05, "loss": 21.2502, "step": 16172 }, { "epoch": 0.6741277979242216, "grad_norm": 227.0, "learning_rate": 2.5357430488040107e-05, "loss": 12.2502, "step": 16173 }, { "epoch": 0.6741694802217498, "grad_norm": 228.0, "learning_rate": 2.535155743216438e-05, "loss": 11.6252, "step": 16174 }, { "epoch": 0.674211162519278, "grad_norm": 312.0, "learning_rate": 2.5345684825508708e-05, "loss": 12.3751, "step": 16175 }, { "epoch": 0.6742528448168063, "grad_norm": 106.0, "learning_rate": 2.5339812668180118e-05, "loss": 9.6252, "step": 16176 }, { "epoch": 0.6742945271143346, "grad_norm": 1264.0, "learning_rate": 2.533394096028563e-05, "loss": 32.5006, "step": 16177 }, { "epoch": 0.6743362094118628, "grad_norm": 426.0, "learning_rate": 2.5328069701932257e-05, "loss": 14.8127, "step": 16178 }, { "epoch": 0.674377891709391, "grad_norm": 336.0, "learning_rate": 2.5322198893227005e-05, "loss": 13.5002, "step": 16179 }, { "epoch": 0.6744195740069192, "grad_norm": 852.0, "learning_rate": 2.5316328534276867e-05, "loss": 23.0006, "step": 16180 }, { "epoch": 0.6744612563044475, "grad_norm": 508.0, "learning_rate": 2.5310458625188838e-05, "loss": 17.1253, "step": 16181 }, { "epoch": 0.6745029386019757, "grad_norm": 800.0, "learning_rate": 2.530458916606989e-05, "loss": 19.6283, "step": 16182 }, { "epoch": 0.674544620899504, "grad_norm": 103.5, "learning_rate": 2.5298720157027e-05, "loss": 10.7504, "step": 16183 }, { "epoch": 0.6745863031970322, "grad_norm": 318.0, "learning_rate": 2.529285159816712e-05, "loss": 14.3777, "step": 16184 }, { "epoch": 0.6746279854945605, "grad_norm": 330.0, "learning_rate": 2.52869834895972e-05, "loss": 14.0004, "step": 16185 }, { "epoch": 0.6746696677920887, "grad_norm": 187.0, "learning_rate": 2.5281115831424236e-05, "loss": 10.5628, "step": 16186 }, { "epoch": 0.6747113500896169, "grad_norm": 362.0, "learning_rate": 2.527524862375509e-05, "loss": 15.0005, "step": 16187 }, { "epoch": 0.6747530323871451, "grad_norm": 432.0, "learning_rate": 2.5269381866696773e-05, "loss": 14.6254, "step": 16188 }, { "epoch": 0.6747947146846734, "grad_norm": 282.0, "learning_rate": 2.526351556035613e-05, "loss": 12.6252, "step": 16189 }, { "epoch": 0.6748363969822017, "grad_norm": 420.0, "learning_rate": 2.525764970484015e-05, "loss": 17.1259, "step": 16190 }, { "epoch": 0.6748780792797299, "grad_norm": 72.0, "learning_rate": 2.525178430025566e-05, "loss": 8.5005, "step": 16191 }, { "epoch": 0.6749197615772582, "grad_norm": 864.0, "learning_rate": 2.5245919346709634e-05, "loss": 24.6252, "step": 16192 }, { "epoch": 0.6749614438747864, "grad_norm": 536.0, "learning_rate": 2.5240054844308893e-05, "loss": 18.0011, "step": 16193 }, { "epoch": 0.6750031261723146, "grad_norm": 712.0, "learning_rate": 2.5234190793160384e-05, "loss": 21.1257, "step": 16194 }, { "epoch": 0.6750448084698428, "grad_norm": 330.0, "learning_rate": 2.5228327193370914e-05, "loss": 13.0626, "step": 16195 }, { "epoch": 0.6750864907673712, "grad_norm": 284.0, "learning_rate": 2.5222464045047393e-05, "loss": 11.9381, "step": 16196 }, { "epoch": 0.6751281730648994, "grad_norm": 203.0, "learning_rate": 2.521660134829667e-05, "loss": 11.8133, "step": 16197 }, { "epoch": 0.6751698553624276, "grad_norm": 216.0, "learning_rate": 2.5210739103225583e-05, "loss": 10.5627, "step": 16198 }, { "epoch": 0.6752115376599558, "grad_norm": 262.0, "learning_rate": 2.5204877309940977e-05, "loss": 12.3127, "step": 16199 }, { "epoch": 0.6752532199574841, "grad_norm": 270.0, "learning_rate": 2.5199015968549688e-05, "loss": 13.0003, "step": 16200 }, { "epoch": 0.6752949022550123, "grad_norm": 214.0, "learning_rate": 2.5193155079158527e-05, "loss": 9.7502, "step": 16201 }, { "epoch": 0.6753365845525405, "grad_norm": 96.0, "learning_rate": 2.518729464187432e-05, "loss": 9.5627, "step": 16202 }, { "epoch": 0.6753782668500687, "grad_norm": 260.0, "learning_rate": 2.518143465680387e-05, "loss": 12.9377, "step": 16203 }, { "epoch": 0.6754199491475971, "grad_norm": 372.0, "learning_rate": 2.517557512405398e-05, "loss": 15.2503, "step": 16204 }, { "epoch": 0.6754616314451253, "grad_norm": 1576.0, "learning_rate": 2.5169716043731435e-05, "loss": 35.7502, "step": 16205 }, { "epoch": 0.6755033137426535, "grad_norm": 136.0, "learning_rate": 2.516385741594302e-05, "loss": 7.5005, "step": 16206 }, { "epoch": 0.6755449960401817, "grad_norm": 141.0, "learning_rate": 2.5157999240795505e-05, "loss": 6.2191, "step": 16207 }, { "epoch": 0.67558667833771, "grad_norm": 67.5, "learning_rate": 2.5152141518395668e-05, "loss": 7.1573, "step": 16208 }, { "epoch": 0.6756283606352382, "grad_norm": 864.0, "learning_rate": 2.514628424885025e-05, "loss": 25.0007, "step": 16209 }, { "epoch": 0.6756700429327664, "grad_norm": 53.0, "learning_rate": 2.5140427432266007e-05, "loss": 7.6259, "step": 16210 }, { "epoch": 0.6757117252302947, "grad_norm": 406.0, "learning_rate": 2.5134571068749662e-05, "loss": 15.2502, "step": 16211 }, { "epoch": 0.675753407527823, "grad_norm": 300.0, "learning_rate": 2.5128715158408012e-05, "loss": 9.7518, "step": 16212 }, { "epoch": 0.6757950898253512, "grad_norm": 179.0, "learning_rate": 2.512285970134769e-05, "loss": 9.1887, "step": 16213 }, { "epoch": 0.6758367721228794, "grad_norm": 428.0, "learning_rate": 2.51170046976755e-05, "loss": 16.5002, "step": 16214 }, { "epoch": 0.6758784544204076, "grad_norm": 148.0, "learning_rate": 2.511115014749807e-05, "loss": 9.063, "step": 16215 }, { "epoch": 0.6759201367179359, "grad_norm": 194.0, "learning_rate": 2.5105296050922178e-05, "loss": 11.5633, "step": 16216 }, { "epoch": 0.6759618190154641, "grad_norm": 186.0, "learning_rate": 2.509944240805443e-05, "loss": 10.7504, "step": 16217 }, { "epoch": 0.6760035013129924, "grad_norm": 454.0, "learning_rate": 2.50935892190016e-05, "loss": 16.6259, "step": 16218 }, { "epoch": 0.6760451836105206, "grad_norm": 1336.0, "learning_rate": 2.5087736483870274e-05, "loss": 32.0003, "step": 16219 }, { "epoch": 0.6760868659080489, "grad_norm": 474.0, "learning_rate": 2.5081884202767186e-05, "loss": 17.5005, "step": 16220 }, { "epoch": 0.6761285482055771, "grad_norm": 140.0, "learning_rate": 2.507603237579897e-05, "loss": 11.688, "step": 16221 }, { "epoch": 0.6761702305031053, "grad_norm": 270.0, "learning_rate": 2.5070181003072274e-05, "loss": 13.5013, "step": 16222 }, { "epoch": 0.6762119128006335, "grad_norm": 176.0, "learning_rate": 2.5064330084693742e-05, "loss": 9.8753, "step": 16223 }, { "epoch": 0.6762535950981619, "grad_norm": 164.0, "learning_rate": 2.5058479620770013e-05, "loss": 10.6877, "step": 16224 }, { "epoch": 0.6762952773956901, "grad_norm": 332.0, "learning_rate": 2.50526296114077e-05, "loss": 14.3131, "step": 16225 }, { "epoch": 0.6763369596932183, "grad_norm": 644.0, "learning_rate": 2.504678005671343e-05, "loss": 19.5003, "step": 16226 }, { "epoch": 0.6763786419907465, "grad_norm": 89.5, "learning_rate": 2.5040930956793806e-05, "loss": 5.7825, "step": 16227 }, { "epoch": 0.6764203242882748, "grad_norm": 251.0, "learning_rate": 2.503508231175544e-05, "loss": 11.9376, "step": 16228 }, { "epoch": 0.676462006585803, "grad_norm": 135.0, "learning_rate": 2.502923412170491e-05, "loss": 10.1253, "step": 16229 }, { "epoch": 0.6765036888833312, "grad_norm": 560.0, "learning_rate": 2.5023386386748803e-05, "loss": 15.8168, "step": 16230 }, { "epoch": 0.6765453711808594, "grad_norm": 216.0, "learning_rate": 2.5017539106993694e-05, "loss": 12.4377, "step": 16231 }, { "epoch": 0.6765870534783878, "grad_norm": 205.0, "learning_rate": 2.5011692282546157e-05, "loss": 10.5007, "step": 16232 }, { "epoch": 0.676628735775916, "grad_norm": 286.0, "learning_rate": 2.500584591351275e-05, "loss": 15.8135, "step": 16233 }, { "epoch": 0.6766704180734442, "grad_norm": 122.5, "learning_rate": 2.500000000000001e-05, "loss": 7.4377, "step": 16234 }, { "epoch": 0.6767121003709724, "grad_norm": 736.0, "learning_rate": 2.499415454211448e-05, "loss": 19.6291, "step": 16235 }, { "epoch": 0.6767537826685007, "grad_norm": 162.0, "learning_rate": 2.4988309539962735e-05, "loss": 11.4378, "step": 16236 }, { "epoch": 0.6767954649660289, "grad_norm": 868.0, "learning_rate": 2.4982464993651235e-05, "loss": 24.6253, "step": 16237 }, { "epoch": 0.6768371472635571, "grad_norm": 368.0, "learning_rate": 2.4976620903286562e-05, "loss": 14.7502, "step": 16238 }, { "epoch": 0.6768788295610854, "grad_norm": 62.5, "learning_rate": 2.4970777268975165e-05, "loss": 8.3757, "step": 16239 }, { "epoch": 0.6769205118586137, "grad_norm": 229.0, "learning_rate": 2.4964934090823604e-05, "loss": 13.0007, "step": 16240 }, { "epoch": 0.6769621941561419, "grad_norm": 414.0, "learning_rate": 2.4959091368938307e-05, "loss": 15.0007, "step": 16241 }, { "epoch": 0.6770038764536701, "grad_norm": 398.0, "learning_rate": 2.4953249103425834e-05, "loss": 17.7508, "step": 16242 }, { "epoch": 0.6770455587511983, "grad_norm": 138.0, "learning_rate": 2.4947407294392583e-05, "loss": 11.2507, "step": 16243 }, { "epoch": 0.6770872410487266, "grad_norm": 494.0, "learning_rate": 2.4941565941945096e-05, "loss": 18.1257, "step": 16244 }, { "epoch": 0.6771289233462549, "grad_norm": 564.0, "learning_rate": 2.493572504618975e-05, "loss": 18.7508, "step": 16245 }, { "epoch": 0.6771706056437831, "grad_norm": 231.0, "learning_rate": 2.492988460723306e-05, "loss": 12.0627, "step": 16246 }, { "epoch": 0.6772122879413113, "grad_norm": 516.0, "learning_rate": 2.4924044625181453e-05, "loss": 18.1254, "step": 16247 }, { "epoch": 0.6772539702388396, "grad_norm": 264.0, "learning_rate": 2.4918205100141356e-05, "loss": 12.7504, "step": 16248 }, { "epoch": 0.6772956525363678, "grad_norm": 406.0, "learning_rate": 2.4912366032219198e-05, "loss": 13.3755, "step": 16249 }, { "epoch": 0.677337334833896, "grad_norm": 212.0, "learning_rate": 2.4906527421521396e-05, "loss": 10.6253, "step": 16250 }, { "epoch": 0.6773790171314242, "grad_norm": 322.0, "learning_rate": 2.4900689268154358e-05, "loss": 12.2503, "step": 16251 }, { "epoch": 0.6774206994289526, "grad_norm": 324.0, "learning_rate": 2.4894851572224485e-05, "loss": 13.8756, "step": 16252 }, { "epoch": 0.6774623817264808, "grad_norm": 220.0, "learning_rate": 2.488901433383818e-05, "loss": 11.5627, "step": 16253 }, { "epoch": 0.677504064024009, "grad_norm": 207.0, "learning_rate": 2.488317755310181e-05, "loss": 12.8751, "step": 16254 }, { "epoch": 0.6775457463215372, "grad_norm": 1184.0, "learning_rate": 2.487734123012176e-05, "loss": 30.7502, "step": 16255 }, { "epoch": 0.6775874286190655, "grad_norm": 434.0, "learning_rate": 2.4871505365004394e-05, "loss": 17.1252, "step": 16256 }, { "epoch": 0.6776291109165937, "grad_norm": 636.0, "learning_rate": 2.4865669957856075e-05, "loss": 19.2546, "step": 16257 }, { "epoch": 0.6776707932141219, "grad_norm": 482.0, "learning_rate": 2.4859835008783157e-05, "loss": 17.2502, "step": 16258 }, { "epoch": 0.6777124755116501, "grad_norm": 66.0, "learning_rate": 2.4854000517891973e-05, "loss": 8.8753, "step": 16259 }, { "epoch": 0.6777541578091785, "grad_norm": 274.0, "learning_rate": 2.484816648528886e-05, "loss": 12.8755, "step": 16260 }, { "epoch": 0.6777958401067067, "grad_norm": 157.0, "learning_rate": 2.484233291108013e-05, "loss": 9.8752, "step": 16261 }, { "epoch": 0.6778375224042349, "grad_norm": 776.0, "learning_rate": 2.4836499795372153e-05, "loss": 22.6253, "step": 16262 }, { "epoch": 0.6778792047017632, "grad_norm": 330.0, "learning_rate": 2.483066713827116e-05, "loss": 14.4396, "step": 16263 }, { "epoch": 0.6779208869992914, "grad_norm": 118.5, "learning_rate": 2.482483493988354e-05, "loss": 8.3753, "step": 16264 }, { "epoch": 0.6779625692968196, "grad_norm": 53.0, "learning_rate": 2.4819003200315495e-05, "loss": 7.6251, "step": 16265 }, { "epoch": 0.6780042515943479, "grad_norm": 242.0, "learning_rate": 2.481317191967339e-05, "loss": 12.3127, "step": 16266 }, { "epoch": 0.6780459338918762, "grad_norm": 422.0, "learning_rate": 2.4807341098063418e-05, "loss": 13.8751, "step": 16267 }, { "epoch": 0.6780876161894044, "grad_norm": 366.0, "learning_rate": 2.4801510735591936e-05, "loss": 14.7503, "step": 16268 }, { "epoch": 0.6781292984869326, "grad_norm": 191.0, "learning_rate": 2.479568083236512e-05, "loss": 9.6255, "step": 16269 }, { "epoch": 0.6781709807844608, "grad_norm": 131.0, "learning_rate": 2.4789851388489277e-05, "loss": 9.5634, "step": 16270 }, { "epoch": 0.6782126630819891, "grad_norm": 744.0, "learning_rate": 2.478402240407063e-05, "loss": 20.2502, "step": 16271 }, { "epoch": 0.6782543453795173, "grad_norm": 322.0, "learning_rate": 2.4778193879215412e-05, "loss": 12.6879, "step": 16272 }, { "epoch": 0.6782960276770456, "grad_norm": 572.0, "learning_rate": 2.477236581402985e-05, "loss": 18.1254, "step": 16273 }, { "epoch": 0.6783377099745738, "grad_norm": 310.0, "learning_rate": 2.476653820862016e-05, "loss": 13.813, "step": 16274 }, { "epoch": 0.6783793922721021, "grad_norm": 184.0, "learning_rate": 2.4760711063092552e-05, "loss": 11.2505, "step": 16275 }, { "epoch": 0.6784210745696303, "grad_norm": 304.0, "learning_rate": 2.475488437755323e-05, "loss": 13.5003, "step": 16276 }, { "epoch": 0.6784627568671585, "grad_norm": 310.0, "learning_rate": 2.4749058152108373e-05, "loss": 13.5003, "step": 16277 }, { "epoch": 0.6785044391646867, "grad_norm": 236.0, "learning_rate": 2.4743232386864174e-05, "loss": 11.8127, "step": 16278 }, { "epoch": 0.678546121462215, "grad_norm": 224.0, "learning_rate": 2.4737407081926813e-05, "loss": 12.0662, "step": 16279 }, { "epoch": 0.6785878037597433, "grad_norm": 64.5, "learning_rate": 2.4731582237402447e-05, "loss": 6.5941, "step": 16280 }, { "epoch": 0.6786294860572715, "grad_norm": 211.0, "learning_rate": 2.4725757853397236e-05, "loss": 10.5632, "step": 16281 }, { "epoch": 0.6786711683547997, "grad_norm": 588.0, "learning_rate": 2.471993393001733e-05, "loss": 18.6252, "step": 16282 }, { "epoch": 0.678712850652328, "grad_norm": 1112.0, "learning_rate": 2.471411046736888e-05, "loss": 27.1253, "step": 16283 }, { "epoch": 0.6787545329498562, "grad_norm": 442.0, "learning_rate": 2.4708287465558005e-05, "loss": 16.1253, "step": 16284 }, { "epoch": 0.6787962152473844, "grad_norm": 156.0, "learning_rate": 2.4702464924690836e-05, "loss": 11.0626, "step": 16285 }, { "epoch": 0.6788378975449126, "grad_norm": 1080.0, "learning_rate": 2.4696642844873496e-05, "loss": 24.5014, "step": 16286 }, { "epoch": 0.678879579842441, "grad_norm": 732.0, "learning_rate": 2.4690821226212063e-05, "loss": 22.8753, "step": 16287 }, { "epoch": 0.6789212621399692, "grad_norm": 210.0, "learning_rate": 2.4685000068812697e-05, "loss": 11.1877, "step": 16288 }, { "epoch": 0.6789629444374974, "grad_norm": 700.0, "learning_rate": 2.4679179372781414e-05, "loss": 20.1256, "step": 16289 }, { "epoch": 0.6790046267350256, "grad_norm": 752.0, "learning_rate": 2.4673359138224377e-05, "loss": 21.6253, "step": 16290 }, { "epoch": 0.6790463090325539, "grad_norm": 848.0, "learning_rate": 2.4667539365247577e-05, "loss": 21.7509, "step": 16291 }, { "epoch": 0.6790879913300821, "grad_norm": 494.0, "learning_rate": 2.4661720053957154e-05, "loss": 16.2503, "step": 16292 }, { "epoch": 0.6791296736276103, "grad_norm": 190.0, "learning_rate": 2.46559012044591e-05, "loss": 10.5007, "step": 16293 }, { "epoch": 0.6791713559251386, "grad_norm": 852.0, "learning_rate": 2.4650082816859537e-05, "loss": 23.6252, "step": 16294 }, { "epoch": 0.6792130382226669, "grad_norm": 241.0, "learning_rate": 2.4644264891264424e-05, "loss": 12.2503, "step": 16295 }, { "epoch": 0.6792547205201951, "grad_norm": 264.0, "learning_rate": 2.4638447427779848e-05, "loss": 12.8764, "step": 16296 }, { "epoch": 0.6792964028177233, "grad_norm": 210.0, "learning_rate": 2.4632630426511827e-05, "loss": 11.8752, "step": 16297 }, { "epoch": 0.6793380851152515, "grad_norm": 194.0, "learning_rate": 2.4626813887566362e-05, "loss": 10.6878, "step": 16298 }, { "epoch": 0.6793797674127798, "grad_norm": 1216.0, "learning_rate": 2.4620997811049472e-05, "loss": 32.7521, "step": 16299 }, { "epoch": 0.679421449710308, "grad_norm": 260.0, "learning_rate": 2.461518219706715e-05, "loss": 13.0011, "step": 16300 }, { "epoch": 0.6794631320078363, "grad_norm": 420.0, "learning_rate": 2.4609367045725378e-05, "loss": 15.5005, "step": 16301 }, { "epoch": 0.6795048143053645, "grad_norm": 480.0, "learning_rate": 2.4603552357130156e-05, "loss": 17.5006, "step": 16302 }, { "epoch": 0.6795464966028928, "grad_norm": 1040.0, "learning_rate": 2.4597738131387438e-05, "loss": 24.7555, "step": 16303 }, { "epoch": 0.679588178900421, "grad_norm": 227.0, "learning_rate": 2.4591924368603202e-05, "loss": 10.1268, "step": 16304 }, { "epoch": 0.6796298611979492, "grad_norm": 532.0, "learning_rate": 2.45861110688834e-05, "loss": 17.2502, "step": 16305 }, { "epoch": 0.6796715434954774, "grad_norm": 408.0, "learning_rate": 2.4580298232333982e-05, "loss": 16.3758, "step": 16306 }, { "epoch": 0.6797132257930057, "grad_norm": 206.0, "learning_rate": 2.4574485859060882e-05, "loss": 11.5002, "step": 16307 }, { "epoch": 0.679754908090534, "grad_norm": 472.0, "learning_rate": 2.4568673949170034e-05, "loss": 17.2505, "step": 16308 }, { "epoch": 0.6797965903880622, "grad_norm": 111.0, "learning_rate": 2.4562862502767357e-05, "loss": 6.7501, "step": 16309 }, { "epoch": 0.6798382726855904, "grad_norm": 320.0, "learning_rate": 2.455705151995878e-05, "loss": 14.4378, "step": 16310 }, { "epoch": 0.6798799549831187, "grad_norm": 314.0, "learning_rate": 2.4551241000850165e-05, "loss": 13.5002, "step": 16311 }, { "epoch": 0.6799216372806469, "grad_norm": 189.0, "learning_rate": 2.4545430945547493e-05, "loss": 10.5024, "step": 16312 }, { "epoch": 0.6799633195781751, "grad_norm": 346.0, "learning_rate": 2.4539621354156556e-05, "loss": 14.9377, "step": 16313 }, { "epoch": 0.6800050018757033, "grad_norm": 502.0, "learning_rate": 2.453381222678332e-05, "loss": 17.5001, "step": 16314 }, { "epoch": 0.6800466841732317, "grad_norm": 239.0, "learning_rate": 2.452800356353358e-05, "loss": 12.1258, "step": 16315 }, { "epoch": 0.6800883664707599, "grad_norm": 720.0, "learning_rate": 2.4522195364513273e-05, "loss": 22.7502, "step": 16316 }, { "epoch": 0.6801300487682881, "grad_norm": 354.0, "learning_rate": 2.451638762982818e-05, "loss": 14.6253, "step": 16317 }, { "epoch": 0.6801717310658163, "grad_norm": 234.0, "learning_rate": 2.4510580359584223e-05, "loss": 10.5641, "step": 16318 }, { "epoch": 0.6802134133633446, "grad_norm": 552.0, "learning_rate": 2.4504773553887162e-05, "loss": 17.6269, "step": 16319 }, { "epoch": 0.6802550956608728, "grad_norm": 378.0, "learning_rate": 2.4498967212842903e-05, "loss": 16.0004, "step": 16320 }, { "epoch": 0.680296777958401, "grad_norm": 147.0, "learning_rate": 2.4493161336557196e-05, "loss": 10.7503, "step": 16321 }, { "epoch": 0.6803384602559293, "grad_norm": 222.0, "learning_rate": 2.4487355925135903e-05, "loss": 12.0003, "step": 16322 }, { "epoch": 0.6803801425534576, "grad_norm": 520.0, "learning_rate": 2.4481550978684814e-05, "loss": 17.1254, "step": 16323 }, { "epoch": 0.6804218248509858, "grad_norm": 194.0, "learning_rate": 2.4475746497309726e-05, "loss": 11.0007, "step": 16324 }, { "epoch": 0.680463507148514, "grad_norm": 422.0, "learning_rate": 2.4469942481116424e-05, "loss": 15.5003, "step": 16325 }, { "epoch": 0.6805051894460422, "grad_norm": 612.0, "learning_rate": 2.4464138930210684e-05, "loss": 19.0001, "step": 16326 }, { "epoch": 0.6805468717435705, "grad_norm": 193.0, "learning_rate": 2.4458335844698282e-05, "loss": 11.8127, "step": 16327 }, { "epoch": 0.6805885540410987, "grad_norm": 422.0, "learning_rate": 2.4452533224684976e-05, "loss": 16.8753, "step": 16328 }, { "epoch": 0.680630236338627, "grad_norm": 716.0, "learning_rate": 2.4446731070276517e-05, "loss": 18.1278, "step": 16329 }, { "epoch": 0.6806719186361552, "grad_norm": 235.0, "learning_rate": 2.4440929381578654e-05, "loss": 11.6252, "step": 16330 }, { "epoch": 0.6807136009336835, "grad_norm": 62.25, "learning_rate": 2.4435128158697124e-05, "loss": 8.6881, "step": 16331 }, { "epoch": 0.6807552832312117, "grad_norm": 474.0, "learning_rate": 2.442932740173765e-05, "loss": 18.0018, "step": 16332 }, { "epoch": 0.6807969655287399, "grad_norm": 124.5, "learning_rate": 2.4423527110805955e-05, "loss": 9.6878, "step": 16333 }, { "epoch": 0.6808386478262681, "grad_norm": 776.0, "learning_rate": 2.441772728600775e-05, "loss": 21.8752, "step": 16334 }, { "epoch": 0.6808803301237965, "grad_norm": 316.0, "learning_rate": 2.4411927927448737e-05, "loss": 13.3761, "step": 16335 }, { "epoch": 0.6809220124213247, "grad_norm": 504.0, "learning_rate": 2.4406129035234608e-05, "loss": 18.2502, "step": 16336 }, { "epoch": 0.6809636947188529, "grad_norm": 194.0, "learning_rate": 2.4400330609471027e-05, "loss": 11.3127, "step": 16337 }, { "epoch": 0.6810053770163812, "grad_norm": 102.5, "learning_rate": 2.4394532650263733e-05, "loss": 8.8127, "step": 16338 }, { "epoch": 0.6810470593139094, "grad_norm": 312.0, "learning_rate": 2.4388735157718322e-05, "loss": 15.5031, "step": 16339 }, { "epoch": 0.6810887416114376, "grad_norm": 173.0, "learning_rate": 2.438293813194052e-05, "loss": 11.3143, "step": 16340 }, { "epoch": 0.6811304239089658, "grad_norm": 222.0, "learning_rate": 2.4377141573035905e-05, "loss": 10.6255, "step": 16341 }, { "epoch": 0.6811721062064942, "grad_norm": 236.0, "learning_rate": 2.4371345481110202e-05, "loss": 11.7504, "step": 16342 }, { "epoch": 0.6812137885040224, "grad_norm": 138.0, "learning_rate": 2.436554985626896e-05, "loss": 10.3131, "step": 16343 }, { "epoch": 0.6812554708015506, "grad_norm": 484.0, "learning_rate": 2.435975469861789e-05, "loss": 13.5626, "step": 16344 }, { "epoch": 0.6812971530990788, "grad_norm": 139.0, "learning_rate": 2.4353960008262523e-05, "loss": 11.3756, "step": 16345 }, { "epoch": 0.6813388353966071, "grad_norm": 197.0, "learning_rate": 2.4348165785308534e-05, "loss": 11.4377, "step": 16346 }, { "epoch": 0.6813805176941353, "grad_norm": 137.0, "learning_rate": 2.43423720298615e-05, "loss": 8.5006, "step": 16347 }, { "epoch": 0.6814221999916635, "grad_norm": 100.5, "learning_rate": 2.4336578742027018e-05, "loss": 9.7511, "step": 16348 }, { "epoch": 0.6814638822891917, "grad_norm": 364.0, "learning_rate": 2.433078592191066e-05, "loss": 15.0633, "step": 16349 }, { "epoch": 0.6815055645867201, "grad_norm": 394.0, "learning_rate": 2.432499356961801e-05, "loss": 14.438, "step": 16350 }, { "epoch": 0.6815472468842483, "grad_norm": 92.5, "learning_rate": 2.4319201685254633e-05, "loss": 9.8134, "step": 16351 }, { "epoch": 0.6815889291817765, "grad_norm": 238.0, "learning_rate": 2.431341026892608e-05, "loss": 12.1254, "step": 16352 }, { "epoch": 0.6816306114793047, "grad_norm": 392.0, "learning_rate": 2.4307619320737912e-05, "loss": 15.2503, "step": 16353 }, { "epoch": 0.681672293776833, "grad_norm": 776.0, "learning_rate": 2.4301828840795652e-05, "loss": 19.876, "step": 16354 }, { "epoch": 0.6817139760743612, "grad_norm": 476.0, "learning_rate": 2.4296038829204855e-05, "loss": 16.1261, "step": 16355 }, { "epoch": 0.6817556583718895, "grad_norm": 214.0, "learning_rate": 2.4290249286071026e-05, "loss": 11.8129, "step": 16356 }, { "epoch": 0.6817973406694177, "grad_norm": 478.0, "learning_rate": 2.4284460211499687e-05, "loss": 15.8755, "step": 16357 }, { "epoch": 0.681839022966946, "grad_norm": 644.0, "learning_rate": 2.4278671605596342e-05, "loss": 19.0004, "step": 16358 }, { "epoch": 0.6818807052644742, "grad_norm": 1128.0, "learning_rate": 2.4272883468466496e-05, "loss": 25.1263, "step": 16359 }, { "epoch": 0.6819223875620024, "grad_norm": 181.0, "learning_rate": 2.4267095800215627e-05, "loss": 10.1253, "step": 16360 }, { "epoch": 0.6819640698595306, "grad_norm": 214.0, "learning_rate": 2.42613086009492e-05, "loss": 9.8128, "step": 16361 }, { "epoch": 0.6820057521570589, "grad_norm": 268.0, "learning_rate": 2.4255521870772752e-05, "loss": 13.0004, "step": 16362 }, { "epoch": 0.6820474344545872, "grad_norm": 75.5, "learning_rate": 2.424973560979166e-05, "loss": 8.2505, "step": 16363 }, { "epoch": 0.6820891167521154, "grad_norm": 201.0, "learning_rate": 2.4243949818111465e-05, "loss": 11.7501, "step": 16364 }, { "epoch": 0.6821307990496436, "grad_norm": 215.0, "learning_rate": 2.4238164495837535e-05, "loss": 11.6878, "step": 16365 }, { "epoch": 0.6821724813471719, "grad_norm": 700.0, "learning_rate": 2.423237964307538e-05, "loss": 22.7502, "step": 16366 }, { "epoch": 0.6822141636447001, "grad_norm": 209.0, "learning_rate": 2.4226595259930358e-05, "loss": 12.0628, "step": 16367 }, { "epoch": 0.6822558459422283, "grad_norm": 450.0, "learning_rate": 2.4220811346507955e-05, "loss": 16.3753, "step": 16368 }, { "epoch": 0.6822975282397565, "grad_norm": 198.0, "learning_rate": 2.421502790291352e-05, "loss": 11.1252, "step": 16369 }, { "epoch": 0.6823392105372849, "grad_norm": 143.0, "learning_rate": 2.4209244929252534e-05, "loss": 9.6254, "step": 16370 }, { "epoch": 0.6823808928348131, "grad_norm": 206.0, "learning_rate": 2.42034624256303e-05, "loss": 11.2502, "step": 16371 }, { "epoch": 0.6824225751323413, "grad_norm": 292.0, "learning_rate": 2.4197680392152283e-05, "loss": 13.564, "step": 16372 }, { "epoch": 0.6824642574298695, "grad_norm": 213.0, "learning_rate": 2.4191898828923826e-05, "loss": 12.1253, "step": 16373 }, { "epoch": 0.6825059397273978, "grad_norm": 488.0, "learning_rate": 2.4186117736050308e-05, "loss": 17.3752, "step": 16374 }, { "epoch": 0.682547622024926, "grad_norm": 704.0, "learning_rate": 2.418033711363708e-05, "loss": 19.6303, "step": 16375 }, { "epoch": 0.6825893043224542, "grad_norm": 780.0, "learning_rate": 2.4174556961789503e-05, "loss": 22.5022, "step": 16376 }, { "epoch": 0.6826309866199824, "grad_norm": 239.0, "learning_rate": 2.4168777280612913e-05, "loss": 11.9379, "step": 16377 }, { "epoch": 0.6826726689175108, "grad_norm": 187.0, "learning_rate": 2.4162998070212655e-05, "loss": 10.2502, "step": 16378 }, { "epoch": 0.682714351215039, "grad_norm": 200.0, "learning_rate": 2.4157219330694055e-05, "loss": 11.0628, "step": 16379 }, { "epoch": 0.6827560335125672, "grad_norm": 952.0, "learning_rate": 2.4151441062162423e-05, "loss": 21.6302, "step": 16380 }, { "epoch": 0.6827977158100954, "grad_norm": 157.0, "learning_rate": 2.414566326472307e-05, "loss": 10.0018, "step": 16381 }, { "epoch": 0.6828393981076237, "grad_norm": 278.0, "learning_rate": 2.41398859384813e-05, "loss": 11.1254, "step": 16382 }, { "epoch": 0.6828810804051519, "grad_norm": 302.0, "learning_rate": 2.4134109083542407e-05, "loss": 13.9381, "step": 16383 }, { "epoch": 0.6829227627026802, "grad_norm": 262.0, "learning_rate": 2.4128332700011668e-05, "loss": 11.3126, "step": 16384 }, { "epoch": 0.6829644450002084, "grad_norm": 270.0, "learning_rate": 2.4122556787994366e-05, "loss": 13.0002, "step": 16385 }, { "epoch": 0.6830061272977367, "grad_norm": 228.0, "learning_rate": 2.4116781347595763e-05, "loss": 12.0006, "step": 16386 }, { "epoch": 0.6830478095952649, "grad_norm": 422.0, "learning_rate": 2.4111006378921097e-05, "loss": 15.8753, "step": 16387 }, { "epoch": 0.6830894918927931, "grad_norm": 748.0, "learning_rate": 2.4105231882075675e-05, "loss": 20.2514, "step": 16388 }, { "epoch": 0.6831311741903213, "grad_norm": 290.0, "learning_rate": 2.4099457857164665e-05, "loss": 12.5002, "step": 16389 }, { "epoch": 0.6831728564878496, "grad_norm": 258.0, "learning_rate": 2.4093684304293373e-05, "loss": 12.5001, "step": 16390 }, { "epoch": 0.6832145387853779, "grad_norm": 524.0, "learning_rate": 2.4087911223566944e-05, "loss": 19.0004, "step": 16391 }, { "epoch": 0.6832562210829061, "grad_norm": 388.0, "learning_rate": 2.4082138615090673e-05, "loss": 15.5001, "step": 16392 }, { "epoch": 0.6832979033804343, "grad_norm": 192.0, "learning_rate": 2.407636647896969e-05, "loss": 12.314, "step": 16393 }, { "epoch": 0.6833395856779626, "grad_norm": 392.0, "learning_rate": 2.4070594815309266e-05, "loss": 14.3752, "step": 16394 }, { "epoch": 0.6833812679754908, "grad_norm": 203.0, "learning_rate": 2.4064823624214517e-05, "loss": 10.7505, "step": 16395 }, { "epoch": 0.683422950273019, "grad_norm": 1032.0, "learning_rate": 2.4059052905790674e-05, "loss": 22.7543, "step": 16396 }, { "epoch": 0.6834646325705472, "grad_norm": 684.0, "learning_rate": 2.4053282660142896e-05, "loss": 18.8754, "step": 16397 }, { "epoch": 0.6835063148680756, "grad_norm": 174.0, "learning_rate": 2.404751288737635e-05, "loss": 5.9377, "step": 16398 }, { "epoch": 0.6835479971656038, "grad_norm": 147.0, "learning_rate": 2.4041743587596176e-05, "loss": 9.1256, "step": 16399 }, { "epoch": 0.683589679463132, "grad_norm": 223.0, "learning_rate": 2.4035974760907536e-05, "loss": 11.5627, "step": 16400 }, { "epoch": 0.6836313617606602, "grad_norm": 163.0, "learning_rate": 2.4030206407415562e-05, "loss": 10.8128, "step": 16401 }, { "epoch": 0.6836730440581885, "grad_norm": 205.0, "learning_rate": 2.402443852722538e-05, "loss": 12.0002, "step": 16402 }, { "epoch": 0.6837147263557167, "grad_norm": 206.0, "learning_rate": 2.401867112044211e-05, "loss": 11.1878, "step": 16403 }, { "epoch": 0.6837564086532449, "grad_norm": 268.0, "learning_rate": 2.4012904187170872e-05, "loss": 12.4377, "step": 16404 }, { "epoch": 0.6837980909507732, "grad_norm": 288.0, "learning_rate": 2.4007137727516755e-05, "loss": 13.1878, "step": 16405 }, { "epoch": 0.6838397732483015, "grad_norm": 112.5, "learning_rate": 2.400137174158486e-05, "loss": 8.7504, "step": 16406 }, { "epoch": 0.6838814555458297, "grad_norm": 151.0, "learning_rate": 2.3995606229480278e-05, "loss": 9.9386, "step": 16407 }, { "epoch": 0.6839231378433579, "grad_norm": 764.0, "learning_rate": 2.3989841191308077e-05, "loss": 23.2501, "step": 16408 }, { "epoch": 0.6839648201408862, "grad_norm": 298.0, "learning_rate": 2.398407662717333e-05, "loss": 12.2504, "step": 16409 }, { "epoch": 0.6840065024384144, "grad_norm": 332.0, "learning_rate": 2.3978312537181095e-05, "loss": 14.4377, "step": 16410 }, { "epoch": 0.6840481847359426, "grad_norm": 211.0, "learning_rate": 2.397254892143643e-05, "loss": 11.0009, "step": 16411 }, { "epoch": 0.6840898670334709, "grad_norm": 180.0, "learning_rate": 2.3966785780044365e-05, "loss": 9.5008, "step": 16412 }, { "epoch": 0.6841315493309992, "grad_norm": 274.0, "learning_rate": 2.3961023113109926e-05, "loss": 13.6255, "step": 16413 }, { "epoch": 0.6841732316285274, "grad_norm": 450.0, "learning_rate": 2.3955260920738188e-05, "loss": 16.0001, "step": 16414 }, { "epoch": 0.6842149139260556, "grad_norm": 502.0, "learning_rate": 2.394949920303409e-05, "loss": 16.8761, "step": 16415 }, { "epoch": 0.6842565962235838, "grad_norm": 380.0, "learning_rate": 2.3943737960102725e-05, "loss": 14.2518, "step": 16416 }, { "epoch": 0.6842982785211121, "grad_norm": 101.5, "learning_rate": 2.3937977192049004e-05, "loss": 8.2508, "step": 16417 }, { "epoch": 0.6843399608186403, "grad_norm": 600.0, "learning_rate": 2.3932216898978007e-05, "loss": 20.3758, "step": 16418 }, { "epoch": 0.6843816431161686, "grad_norm": 68.5, "learning_rate": 2.392645708099463e-05, "loss": 7.6879, "step": 16419 }, { "epoch": 0.6844233254136968, "grad_norm": 552.0, "learning_rate": 2.392069773820393e-05, "loss": 18.8757, "step": 16420 }, { "epoch": 0.6844650077112251, "grad_norm": 239.0, "learning_rate": 2.3914938870710785e-05, "loss": 12.0002, "step": 16421 }, { "epoch": 0.6845066900087533, "grad_norm": 300.0, "learning_rate": 2.3909180478620212e-05, "loss": 14.6252, "step": 16422 }, { "epoch": 0.6845483723062815, "grad_norm": 556.0, "learning_rate": 2.3903422562037147e-05, "loss": 14.6897, "step": 16423 }, { "epoch": 0.6845900546038097, "grad_norm": 620.0, "learning_rate": 2.389766512106652e-05, "loss": 19.6257, "step": 16424 }, { "epoch": 0.684631736901338, "grad_norm": 220.0, "learning_rate": 2.3891908155813265e-05, "loss": 11.6252, "step": 16425 }, { "epoch": 0.6846734191988663, "grad_norm": 1664.0, "learning_rate": 2.38861516663823e-05, "loss": 37.0006, "step": 16426 }, { "epoch": 0.6847151014963945, "grad_norm": 336.0, "learning_rate": 2.388039565287854e-05, "loss": 13.9377, "step": 16427 }, { "epoch": 0.6847567837939227, "grad_norm": 75.0, "learning_rate": 2.3874640115406887e-05, "loss": 8.3137, "step": 16428 }, { "epoch": 0.684798466091451, "grad_norm": 156.0, "learning_rate": 2.386888505407224e-05, "loss": 10.0627, "step": 16429 }, { "epoch": 0.6848401483889792, "grad_norm": 286.0, "learning_rate": 2.3863130468979477e-05, "loss": 12.5627, "step": 16430 }, { "epoch": 0.6848818306865074, "grad_norm": 210.0, "learning_rate": 2.3857376360233484e-05, "loss": 10.6256, "step": 16431 }, { "epoch": 0.6849235129840356, "grad_norm": 988.0, "learning_rate": 2.3851622727939126e-05, "loss": 26.5004, "step": 16432 }, { "epoch": 0.684965195281564, "grad_norm": 378.0, "learning_rate": 2.384586957220127e-05, "loss": 14.1878, "step": 16433 }, { "epoch": 0.6850068775790922, "grad_norm": 452.0, "learning_rate": 2.384011689312476e-05, "loss": 16.6254, "step": 16434 }, { "epoch": 0.6850485598766204, "grad_norm": 620.0, "learning_rate": 2.3834364690814442e-05, "loss": 20.0006, "step": 16435 }, { "epoch": 0.6850902421741486, "grad_norm": 1048.0, "learning_rate": 2.3828612965375153e-05, "loss": 27.626, "step": 16436 }, { "epoch": 0.6851319244716769, "grad_norm": 358.0, "learning_rate": 2.3822861716911694e-05, "loss": 13.9394, "step": 16437 }, { "epoch": 0.6851736067692051, "grad_norm": 170.0, "learning_rate": 2.3817110945528937e-05, "loss": 10.8128, "step": 16438 }, { "epoch": 0.6852152890667333, "grad_norm": 280.0, "learning_rate": 2.3811360651331627e-05, "loss": 11.7502, "step": 16439 }, { "epoch": 0.6852569713642616, "grad_norm": 242.0, "learning_rate": 2.380561083442463e-05, "loss": 11.0004, "step": 16440 }, { "epoch": 0.6852986536617899, "grad_norm": 452.0, "learning_rate": 2.3799861494912663e-05, "loss": 15.5023, "step": 16441 }, { "epoch": 0.6853403359593181, "grad_norm": 176.0, "learning_rate": 2.3794112632900585e-05, "loss": 10.8754, "step": 16442 }, { "epoch": 0.6853820182568463, "grad_norm": 482.0, "learning_rate": 2.378836424849309e-05, "loss": 17.0006, "step": 16443 }, { "epoch": 0.6854237005543745, "grad_norm": 87.0, "learning_rate": 2.378261634179502e-05, "loss": 9.1256, "step": 16444 }, { "epoch": 0.6854653828519028, "grad_norm": 99.0, "learning_rate": 2.377686891291106e-05, "loss": 6.6877, "step": 16445 }, { "epoch": 0.685507065149431, "grad_norm": 1296.0, "learning_rate": 2.3771121961946025e-05, "loss": 26.0049, "step": 16446 }, { "epoch": 0.6855487474469593, "grad_norm": 278.0, "learning_rate": 2.3765375489004588e-05, "loss": 13.3754, "step": 16447 }, { "epoch": 0.6855904297444875, "grad_norm": 568.0, "learning_rate": 2.3759629494191532e-05, "loss": 18.2506, "step": 16448 }, { "epoch": 0.6856321120420158, "grad_norm": 736.0, "learning_rate": 2.3753883977611553e-05, "loss": 21.3794, "step": 16449 }, { "epoch": 0.685673794339544, "grad_norm": 464.0, "learning_rate": 2.374813893936937e-05, "loss": 17.0003, "step": 16450 }, { "epoch": 0.6857154766370722, "grad_norm": 434.0, "learning_rate": 2.374239437956969e-05, "loss": 15.3754, "step": 16451 }, { "epoch": 0.6857571589346004, "grad_norm": 676.0, "learning_rate": 2.3736650298317197e-05, "loss": 19.2537, "step": 16452 }, { "epoch": 0.6857988412321288, "grad_norm": 152.0, "learning_rate": 2.3730906695716586e-05, "loss": 8.0007, "step": 16453 }, { "epoch": 0.685840523529657, "grad_norm": 390.0, "learning_rate": 2.3725163571872532e-05, "loss": 15.8131, "step": 16454 }, { "epoch": 0.6858822058271852, "grad_norm": 332.0, "learning_rate": 2.3719420926889702e-05, "loss": 12.7507, "step": 16455 }, { "epoch": 0.6859238881247134, "grad_norm": 386.0, "learning_rate": 2.3713678760872765e-05, "loss": 15.6878, "step": 16456 }, { "epoch": 0.6859655704222417, "grad_norm": 278.0, "learning_rate": 2.370793707392636e-05, "loss": 13.2503, "step": 16457 }, { "epoch": 0.6860072527197699, "grad_norm": 744.0, "learning_rate": 2.370219586615514e-05, "loss": 22.0005, "step": 16458 }, { "epoch": 0.6860489350172981, "grad_norm": 310.0, "learning_rate": 2.369645513766373e-05, "loss": 14.1272, "step": 16459 }, { "epoch": 0.6860906173148263, "grad_norm": 560.0, "learning_rate": 2.3690714888556764e-05, "loss": 20.376, "step": 16460 }, { "epoch": 0.6861322996123547, "grad_norm": 356.0, "learning_rate": 2.3684975118938858e-05, "loss": 13.6253, "step": 16461 }, { "epoch": 0.6861739819098829, "grad_norm": 129.0, "learning_rate": 2.367923582891461e-05, "loss": 8.5635, "step": 16462 }, { "epoch": 0.6862156642074111, "grad_norm": 312.0, "learning_rate": 2.3673497018588607e-05, "loss": 13.8766, "step": 16463 }, { "epoch": 0.6862573465049393, "grad_norm": 330.0, "learning_rate": 2.36677586880655e-05, "loss": 15.1263, "step": 16464 }, { "epoch": 0.6862990288024676, "grad_norm": 239.0, "learning_rate": 2.366202083744979e-05, "loss": 13.2508, "step": 16465 }, { "epoch": 0.6863407110999958, "grad_norm": 232.0, "learning_rate": 2.3656283466846125e-05, "loss": 10.3759, "step": 16466 }, { "epoch": 0.686382393397524, "grad_norm": 306.0, "learning_rate": 2.3650546576358994e-05, "loss": 14.0629, "step": 16467 }, { "epoch": 0.6864240756950523, "grad_norm": 430.0, "learning_rate": 2.364481016609303e-05, "loss": 16.2502, "step": 16468 }, { "epoch": 0.6864657579925806, "grad_norm": 278.0, "learning_rate": 2.3639074236152702e-05, "loss": 11.0017, "step": 16469 }, { "epoch": 0.6865074402901088, "grad_norm": 354.0, "learning_rate": 2.3633338786642622e-05, "loss": 15.5627, "step": 16470 }, { "epoch": 0.686549122587637, "grad_norm": 988.0, "learning_rate": 2.362760381766725e-05, "loss": 25.3754, "step": 16471 }, { "epoch": 0.6865908048851652, "grad_norm": 760.0, "learning_rate": 2.3621869329331153e-05, "loss": 21.5007, "step": 16472 }, { "epoch": 0.6866324871826935, "grad_norm": 524.0, "learning_rate": 2.361613532173883e-05, "loss": 18.876, "step": 16473 }, { "epoch": 0.6866741694802218, "grad_norm": 300.0, "learning_rate": 2.3610401794994786e-05, "loss": 14.5008, "step": 16474 }, { "epoch": 0.68671585177775, "grad_norm": 138.0, "learning_rate": 2.3604668749203512e-05, "loss": 10.563, "step": 16475 }, { "epoch": 0.6867575340752782, "grad_norm": 460.0, "learning_rate": 2.359893618446949e-05, "loss": 17.0008, "step": 16476 }, { "epoch": 0.6867992163728065, "grad_norm": 700.0, "learning_rate": 2.3593204100897203e-05, "loss": 19.3751, "step": 16477 }, { "epoch": 0.6868408986703347, "grad_norm": 600.0, "learning_rate": 2.3587472498591112e-05, "loss": 19.5003, "step": 16478 }, { "epoch": 0.6868825809678629, "grad_norm": 354.0, "learning_rate": 2.358174137765568e-05, "loss": 13.6251, "step": 16479 }, { "epoch": 0.6869242632653911, "grad_norm": 332.0, "learning_rate": 2.357601073819536e-05, "loss": 13.8754, "step": 16480 }, { "epoch": 0.6869659455629195, "grad_norm": 672.0, "learning_rate": 2.3570280580314587e-05, "loss": 19.3753, "step": 16481 }, { "epoch": 0.6870076278604477, "grad_norm": 436.0, "learning_rate": 2.35645509041178e-05, "loss": 14.8752, "step": 16482 }, { "epoch": 0.6870493101579759, "grad_norm": 139.0, "learning_rate": 2.3558821709709418e-05, "loss": 10.2502, "step": 16483 }, { "epoch": 0.6870909924555042, "grad_norm": 78.0, "learning_rate": 2.3553092997193855e-05, "loss": 8.688, "step": 16484 }, { "epoch": 0.6871326747530324, "grad_norm": 904.0, "learning_rate": 2.3547364766675524e-05, "loss": 23.1252, "step": 16485 }, { "epoch": 0.6871743570505606, "grad_norm": 89.5, "learning_rate": 2.354163701825881e-05, "loss": 7.2815, "step": 16486 }, { "epoch": 0.6872160393480888, "grad_norm": 1248.0, "learning_rate": 2.3535909752048096e-05, "loss": 31.8753, "step": 16487 }, { "epoch": 0.6872577216456172, "grad_norm": 372.0, "learning_rate": 2.3530182968147818e-05, "loss": 15.3129, "step": 16488 }, { "epoch": 0.6872994039431454, "grad_norm": 152.0, "learning_rate": 2.3524456666662266e-05, "loss": 9.5028, "step": 16489 }, { "epoch": 0.6873410862406736, "grad_norm": 520.0, "learning_rate": 2.3518730847695874e-05, "loss": 17.1285, "step": 16490 }, { "epoch": 0.6873827685382018, "grad_norm": 294.0, "learning_rate": 2.351300551135293e-05, "loss": 14.438, "step": 16491 }, { "epoch": 0.6874244508357301, "grad_norm": 1256.0, "learning_rate": 2.350728065773785e-05, "loss": 28.5011, "step": 16492 }, { "epoch": 0.6874661331332583, "grad_norm": 186.0, "learning_rate": 2.3501556286954896e-05, "loss": 12.127, "step": 16493 }, { "epoch": 0.6875078154307865, "grad_norm": 270.0, "learning_rate": 2.3495832399108464e-05, "loss": 13.1257, "step": 16494 }, { "epoch": 0.6875494977283148, "grad_norm": 314.0, "learning_rate": 2.34901089943028e-05, "loss": 13.2505, "step": 16495 }, { "epoch": 0.6875911800258431, "grad_norm": 125.5, "learning_rate": 2.3484386072642294e-05, "loss": 9.4378, "step": 16496 }, { "epoch": 0.6876328623233713, "grad_norm": 156.0, "learning_rate": 2.3478663634231167e-05, "loss": 8.9377, "step": 16497 }, { "epoch": 0.6876745446208995, "grad_norm": 205.0, "learning_rate": 2.347294167917377e-05, "loss": 9.7504, "step": 16498 }, { "epoch": 0.6877162269184277, "grad_norm": 772.0, "learning_rate": 2.346722020757437e-05, "loss": 24.0003, "step": 16499 }, { "epoch": 0.687757909215956, "grad_norm": 118.0, "learning_rate": 2.346149921953723e-05, "loss": 7.1257, "step": 16500 }, { "epoch": 0.6877995915134842, "grad_norm": 944.0, "learning_rate": 2.3455778715166627e-05, "loss": 21.6302, "step": 16501 }, { "epoch": 0.6878412738110125, "grad_norm": 172.0, "learning_rate": 2.3450058694566806e-05, "loss": 7.219, "step": 16502 }, { "epoch": 0.6878829561085407, "grad_norm": 203.0, "learning_rate": 2.3444339157842034e-05, "loss": 11.5007, "step": 16503 }, { "epoch": 0.687924638406069, "grad_norm": 147.0, "learning_rate": 2.3438620105096525e-05, "loss": 5.5317, "step": 16504 }, { "epoch": 0.6879663207035972, "grad_norm": 89.5, "learning_rate": 2.343290153643453e-05, "loss": 9.3133, "step": 16505 }, { "epoch": 0.6880080030011254, "grad_norm": 251.0, "learning_rate": 2.3427183451960265e-05, "loss": 12.0004, "step": 16506 }, { "epoch": 0.6880496852986536, "grad_norm": 235.0, "learning_rate": 2.3421465851777935e-05, "loss": 12.6254, "step": 16507 }, { "epoch": 0.688091367596182, "grad_norm": 736.0, "learning_rate": 2.3415748735991754e-05, "loss": 22.3796, "step": 16508 }, { "epoch": 0.6881330498937102, "grad_norm": 132.0, "learning_rate": 2.341003210470591e-05, "loss": 9.688, "step": 16509 }, { "epoch": 0.6881747321912384, "grad_norm": 184.0, "learning_rate": 2.340431595802459e-05, "loss": 9.8133, "step": 16510 }, { "epoch": 0.6882164144887666, "grad_norm": 163.0, "learning_rate": 2.3398600296051974e-05, "loss": 10.3129, "step": 16511 }, { "epoch": 0.6882580967862949, "grad_norm": 640.0, "learning_rate": 2.339288511889223e-05, "loss": 19.3753, "step": 16512 }, { "epoch": 0.6882997790838231, "grad_norm": 246.0, "learning_rate": 2.3387170426649496e-05, "loss": 11.0023, "step": 16513 }, { "epoch": 0.6883414613813513, "grad_norm": 334.0, "learning_rate": 2.3381456219427984e-05, "loss": 12.8775, "step": 16514 }, { "epoch": 0.6883831436788795, "grad_norm": 290.0, "learning_rate": 2.3375742497331755e-05, "loss": 13.5001, "step": 16515 }, { "epoch": 0.6884248259764079, "grad_norm": 728.0, "learning_rate": 2.337002926046502e-05, "loss": 21.7502, "step": 16516 }, { "epoch": 0.6884665082739361, "grad_norm": 330.0, "learning_rate": 2.336431650893183e-05, "loss": 11.8153, "step": 16517 }, { "epoch": 0.6885081905714643, "grad_norm": 560.0, "learning_rate": 2.3358604242836375e-05, "loss": 18.3752, "step": 16518 }, { "epoch": 0.6885498728689925, "grad_norm": 648.0, "learning_rate": 2.3352892462282684e-05, "loss": 18.8754, "step": 16519 }, { "epoch": 0.6885915551665208, "grad_norm": 424.0, "learning_rate": 2.3347181167374927e-05, "loss": 16.7503, "step": 16520 }, { "epoch": 0.688633237464049, "grad_norm": 480.0, "learning_rate": 2.3341470358217126e-05, "loss": 17.0005, "step": 16521 }, { "epoch": 0.6886749197615772, "grad_norm": 450.0, "learning_rate": 2.3335760034913412e-05, "loss": 16.5009, "step": 16522 }, { "epoch": 0.6887166020591055, "grad_norm": 189.0, "learning_rate": 2.3330050197567838e-05, "loss": 10.9377, "step": 16523 }, { "epoch": 0.6887582843566338, "grad_norm": 696.0, "learning_rate": 2.3324340846284464e-05, "loss": 18.8767, "step": 16524 }, { "epoch": 0.688799966654162, "grad_norm": 422.0, "learning_rate": 2.331863198116735e-05, "loss": 15.5001, "step": 16525 }, { "epoch": 0.6888416489516902, "grad_norm": 249.0, "learning_rate": 2.3312923602320536e-05, "loss": 13.6252, "step": 16526 }, { "epoch": 0.6888833312492184, "grad_norm": 300.0, "learning_rate": 2.3307215709848057e-05, "loss": 13.9381, "step": 16527 }, { "epoch": 0.6889250135467467, "grad_norm": 302.0, "learning_rate": 2.3301508303853943e-05, "loss": 14.0631, "step": 16528 }, { "epoch": 0.688966695844275, "grad_norm": 120.0, "learning_rate": 2.329580138444221e-05, "loss": 9.5636, "step": 16529 }, { "epoch": 0.6890083781418032, "grad_norm": 420.0, "learning_rate": 2.3290094951716868e-05, "loss": 15.9379, "step": 16530 }, { "epoch": 0.6890500604393314, "grad_norm": 620.0, "learning_rate": 2.3284389005781915e-05, "loss": 17.753, "step": 16531 }, { "epoch": 0.6890917427368597, "grad_norm": 628.0, "learning_rate": 2.3278683546741348e-05, "loss": 19.501, "step": 16532 }, { "epoch": 0.6891334250343879, "grad_norm": 312.0, "learning_rate": 2.3272978574699138e-05, "loss": 14.6261, "step": 16533 }, { "epoch": 0.6891751073319161, "grad_norm": 448.0, "learning_rate": 2.3267274089759274e-05, "loss": 16.5004, "step": 16534 }, { "epoch": 0.6892167896294443, "grad_norm": 246.0, "learning_rate": 2.3261570092025707e-05, "loss": 11.9384, "step": 16535 }, { "epoch": 0.6892584719269726, "grad_norm": 428.0, "learning_rate": 2.3255866581602402e-05, "loss": 15.4378, "step": 16536 }, { "epoch": 0.6893001542245009, "grad_norm": 292.0, "learning_rate": 2.32501635585933e-05, "loss": 13.5638, "step": 16537 }, { "epoch": 0.6893418365220291, "grad_norm": 1032.0, "learning_rate": 2.3244461023102343e-05, "loss": 24.3756, "step": 16538 }, { "epoch": 0.6893835188195573, "grad_norm": 284.0, "learning_rate": 2.3238758975233444e-05, "loss": 10.0001, "step": 16539 }, { "epoch": 0.6894252011170856, "grad_norm": 253.0, "learning_rate": 2.323305741509057e-05, "loss": 12.6253, "step": 16540 }, { "epoch": 0.6894668834146138, "grad_norm": 231.0, "learning_rate": 2.3227356342777568e-05, "loss": 11.9379, "step": 16541 }, { "epoch": 0.689508565712142, "grad_norm": 286.0, "learning_rate": 2.322165575839841e-05, "loss": 13.8756, "step": 16542 }, { "epoch": 0.6895502480096702, "grad_norm": 540.0, "learning_rate": 2.321595566205691e-05, "loss": 15.7505, "step": 16543 }, { "epoch": 0.6895919303071986, "grad_norm": 358.0, "learning_rate": 2.3210256053857038e-05, "loss": 14.9394, "step": 16544 }, { "epoch": 0.6896336126047268, "grad_norm": 159.0, "learning_rate": 2.3204556933902587e-05, "loss": 10.1259, "step": 16545 }, { "epoch": 0.689675294902255, "grad_norm": 231.0, "learning_rate": 2.3198858302297505e-05, "loss": 11.9376, "step": 16546 }, { "epoch": 0.6897169771997832, "grad_norm": 255.0, "learning_rate": 2.3193160159145572e-05, "loss": 12.6883, "step": 16547 }, { "epoch": 0.6897586594973115, "grad_norm": 376.0, "learning_rate": 2.3187462504550693e-05, "loss": 15.1891, "step": 16548 }, { "epoch": 0.6898003417948397, "grad_norm": 612.0, "learning_rate": 2.318176533861669e-05, "loss": 21.3763, "step": 16549 }, { "epoch": 0.689842024092368, "grad_norm": 260.0, "learning_rate": 2.3176068661447397e-05, "loss": 11.6876, "step": 16550 }, { "epoch": 0.6898837063898962, "grad_norm": 664.0, "learning_rate": 2.317037247314663e-05, "loss": 19.8752, "step": 16551 }, { "epoch": 0.6899253886874245, "grad_norm": 239.0, "learning_rate": 2.316467677381821e-05, "loss": 12.5629, "step": 16552 }, { "epoch": 0.6899670709849527, "grad_norm": 91.0, "learning_rate": 2.3158981563565936e-05, "loss": 9.2504, "step": 16553 }, { "epoch": 0.6900087532824809, "grad_norm": 400.0, "learning_rate": 2.3153286842493605e-05, "loss": 15.1877, "step": 16554 }, { "epoch": 0.6900504355800092, "grad_norm": 219.0, "learning_rate": 2.3147592610705005e-05, "loss": 12.3127, "step": 16555 }, { "epoch": 0.6900921178775374, "grad_norm": 166.0, "learning_rate": 2.3141898868303914e-05, "loss": 12.4379, "step": 16556 }, { "epoch": 0.6901338001750656, "grad_norm": 308.0, "learning_rate": 2.3136205615394103e-05, "loss": 13.2504, "step": 16557 }, { "epoch": 0.6901754824725939, "grad_norm": 856.0, "learning_rate": 2.3130512852079323e-05, "loss": 23.8752, "step": 16558 }, { "epoch": 0.6902171647701222, "grad_norm": 856.0, "learning_rate": 2.3124820578463334e-05, "loss": 24.751, "step": 16559 }, { "epoch": 0.6902588470676504, "grad_norm": 328.0, "learning_rate": 2.311912879464988e-05, "loss": 14.4379, "step": 16560 }, { "epoch": 0.6903005293651786, "grad_norm": 430.0, "learning_rate": 2.3113437500742686e-05, "loss": 15.3132, "step": 16561 }, { "epoch": 0.6903422116627068, "grad_norm": 284.0, "learning_rate": 2.310774669684548e-05, "loss": 13.8753, "step": 16562 }, { "epoch": 0.6903838939602351, "grad_norm": 616.0, "learning_rate": 2.310205638306196e-05, "loss": 18.7524, "step": 16563 }, { "epoch": 0.6904255762577634, "grad_norm": 324.0, "learning_rate": 2.3096366559495885e-05, "loss": 14.0006, "step": 16564 }, { "epoch": 0.6904672585552916, "grad_norm": 396.0, "learning_rate": 2.3090677226250885e-05, "loss": 14.5002, "step": 16565 }, { "epoch": 0.6905089408528198, "grad_norm": 141.0, "learning_rate": 2.3084988383430718e-05, "loss": 11.3134, "step": 16566 }, { "epoch": 0.6905506231503481, "grad_norm": 151.0, "learning_rate": 2.307930003113899e-05, "loss": 4.4069, "step": 16567 }, { "epoch": 0.6905923054478763, "grad_norm": 170.0, "learning_rate": 2.3073612169479443e-05, "loss": 10.5005, "step": 16568 }, { "epoch": 0.6906339877454045, "grad_norm": 396.0, "learning_rate": 2.3067924798555668e-05, "loss": 15.813, "step": 16569 }, { "epoch": 0.6906756700429327, "grad_norm": 181.0, "learning_rate": 2.3062237918471396e-05, "loss": 11.1253, "step": 16570 }, { "epoch": 0.690717352340461, "grad_norm": 215.0, "learning_rate": 2.305655152933019e-05, "loss": 11.064, "step": 16571 }, { "epoch": 0.6907590346379893, "grad_norm": 199.0, "learning_rate": 2.3050865631235757e-05, "loss": 12.3752, "step": 16572 }, { "epoch": 0.6908007169355175, "grad_norm": 110.0, "learning_rate": 2.3045180224291657e-05, "loss": 7.8446, "step": 16573 }, { "epoch": 0.6908423992330457, "grad_norm": 548.0, "learning_rate": 2.3039495308601555e-05, "loss": 18.8754, "step": 16574 }, { "epoch": 0.690884081530574, "grad_norm": 540.0, "learning_rate": 2.3033810884269048e-05, "loss": 17.5002, "step": 16575 }, { "epoch": 0.6909257638281022, "grad_norm": 460.0, "learning_rate": 2.3028126951397732e-05, "loss": 16.7507, "step": 16576 }, { "epoch": 0.6909674461256304, "grad_norm": 213.0, "learning_rate": 2.3022443510091195e-05, "loss": 9.6877, "step": 16577 }, { "epoch": 0.6910091284231586, "grad_norm": 430.0, "learning_rate": 2.301676056045302e-05, "loss": 15.2504, "step": 16578 }, { "epoch": 0.691050810720687, "grad_norm": 221.0, "learning_rate": 2.301107810258678e-05, "loss": 11.0006, "step": 16579 }, { "epoch": 0.6910924930182152, "grad_norm": 165.0, "learning_rate": 2.3005396136596037e-05, "loss": 10.0629, "step": 16580 }, { "epoch": 0.6911341753157434, "grad_norm": 428.0, "learning_rate": 2.2999714662584348e-05, "loss": 14.9389, "step": 16581 }, { "epoch": 0.6911758576132716, "grad_norm": 600.0, "learning_rate": 2.2994033680655253e-05, "loss": 15.9415, "step": 16582 }, { "epoch": 0.6912175399107999, "grad_norm": 86.0, "learning_rate": 2.298835319091229e-05, "loss": 8.7504, "step": 16583 }, { "epoch": 0.6912592222083281, "grad_norm": 121.5, "learning_rate": 2.2982673193458993e-05, "loss": 9.0003, "step": 16584 }, { "epoch": 0.6913009045058564, "grad_norm": 564.0, "learning_rate": 2.2976993688398873e-05, "loss": 19.3757, "step": 16585 }, { "epoch": 0.6913425868033846, "grad_norm": 442.0, "learning_rate": 2.2971314675835442e-05, "loss": 15.5629, "step": 16586 }, { "epoch": 0.6913842691009129, "grad_norm": 1600.0, "learning_rate": 2.29656361558722e-05, "loss": 34.0039, "step": 16587 }, { "epoch": 0.6914259513984411, "grad_norm": 250.0, "learning_rate": 2.295995812861264e-05, "loss": 10.5005, "step": 16588 }, { "epoch": 0.6914676336959693, "grad_norm": 552.0, "learning_rate": 2.2954280594160223e-05, "loss": 17.0005, "step": 16589 }, { "epoch": 0.6915093159934975, "grad_norm": 374.0, "learning_rate": 2.294860355261848e-05, "loss": 15.0627, "step": 16590 }, { "epoch": 0.6915509982910258, "grad_norm": 60.0, "learning_rate": 2.2942927004090804e-05, "loss": 7.4378, "step": 16591 }, { "epoch": 0.691592680588554, "grad_norm": 197.0, "learning_rate": 2.293725094868072e-05, "loss": 5.0951, "step": 16592 }, { "epoch": 0.6916343628860823, "grad_norm": 195.0, "learning_rate": 2.2931575386491604e-05, "loss": 11.813, "step": 16593 }, { "epoch": 0.6916760451836105, "grad_norm": 1584.0, "learning_rate": 2.292590031762697e-05, "loss": 30.7586, "step": 16594 }, { "epoch": 0.6917177274811388, "grad_norm": 209.0, "learning_rate": 2.2920225742190166e-05, "loss": 11.3779, "step": 16595 }, { "epoch": 0.691759409778667, "grad_norm": 444.0, "learning_rate": 2.2914551660284688e-05, "loss": 16.3752, "step": 16596 }, { "epoch": 0.6918010920761952, "grad_norm": 178.0, "learning_rate": 2.2908878072013874e-05, "loss": 10.5627, "step": 16597 }, { "epoch": 0.6918427743737234, "grad_norm": 378.0, "learning_rate": 2.290320497748118e-05, "loss": 15.9377, "step": 16598 }, { "epoch": 0.6918844566712518, "grad_norm": 688.0, "learning_rate": 2.2897532376789982e-05, "loss": 21.5003, "step": 16599 }, { "epoch": 0.69192613896878, "grad_norm": 588.0, "learning_rate": 2.2891860270043662e-05, "loss": 17.7507, "step": 16600 }, { "epoch": 0.6919678212663082, "grad_norm": 572.0, "learning_rate": 2.2886188657345592e-05, "loss": 18.0002, "step": 16601 }, { "epoch": 0.6920095035638364, "grad_norm": 370.0, "learning_rate": 2.2880517538799144e-05, "loss": 15.2504, "step": 16602 }, { "epoch": 0.6920511858613647, "grad_norm": 392.0, "learning_rate": 2.2874846914507676e-05, "loss": 15.3129, "step": 16603 }, { "epoch": 0.6920928681588929, "grad_norm": 616.0, "learning_rate": 2.286917678457452e-05, "loss": 17.0009, "step": 16604 }, { "epoch": 0.6921345504564211, "grad_norm": 536.0, "learning_rate": 2.2863507149103035e-05, "loss": 14.4458, "step": 16605 }, { "epoch": 0.6921762327539493, "grad_norm": 364.0, "learning_rate": 2.2857838008196535e-05, "loss": 14.1253, "step": 16606 }, { "epoch": 0.6922179150514777, "grad_norm": 296.0, "learning_rate": 2.2852169361958354e-05, "loss": 13.6878, "step": 16607 }, { "epoch": 0.6922595973490059, "grad_norm": 154.0, "learning_rate": 2.284650121049179e-05, "loss": 10.938, "step": 16608 }, { "epoch": 0.6923012796465341, "grad_norm": 416.0, "learning_rate": 2.2840833553900154e-05, "loss": 14.8754, "step": 16609 }, { "epoch": 0.6923429619440623, "grad_norm": 274.0, "learning_rate": 2.2835166392286733e-05, "loss": 12.6877, "step": 16610 }, { "epoch": 0.6923846442415906, "grad_norm": 398.0, "learning_rate": 2.282949972575482e-05, "loss": 15.1878, "step": 16611 }, { "epoch": 0.6924263265391188, "grad_norm": 274.0, "learning_rate": 2.2823833554407686e-05, "loss": 12.3752, "step": 16612 }, { "epoch": 0.692468008836647, "grad_norm": 174.0, "learning_rate": 2.2818167878348583e-05, "loss": 12.1253, "step": 16613 }, { "epoch": 0.6925096911341753, "grad_norm": 197.0, "learning_rate": 2.2812502697680816e-05, "loss": 11.0006, "step": 16614 }, { "epoch": 0.6925513734317036, "grad_norm": 490.0, "learning_rate": 2.2806838012507563e-05, "loss": 14.688, "step": 16615 }, { "epoch": 0.6925930557292318, "grad_norm": 326.0, "learning_rate": 2.2801173822932143e-05, "loss": 11.7524, "step": 16616 }, { "epoch": 0.69263473802676, "grad_norm": 492.0, "learning_rate": 2.2795510129057707e-05, "loss": 18.7517, "step": 16617 }, { "epoch": 0.6926764203242882, "grad_norm": 196.0, "learning_rate": 2.2789846930987545e-05, "loss": 9.8753, "step": 16618 }, { "epoch": 0.6927181026218165, "grad_norm": 124.5, "learning_rate": 2.2784184228824806e-05, "loss": 10.7507, "step": 16619 }, { "epoch": 0.6927597849193448, "grad_norm": 416.0, "learning_rate": 2.2778522022672767e-05, "loss": 16.1255, "step": 16620 }, { "epoch": 0.692801467216873, "grad_norm": 544.0, "learning_rate": 2.2772860312634535e-05, "loss": 15.5629, "step": 16621 }, { "epoch": 0.6928431495144012, "grad_norm": 536.0, "learning_rate": 2.276719909881338e-05, "loss": 18.0001, "step": 16622 }, { "epoch": 0.6928848318119295, "grad_norm": 520.0, "learning_rate": 2.2761538381312403e-05, "loss": 18.1252, "step": 16623 }, { "epoch": 0.6929265141094577, "grad_norm": 129.0, "learning_rate": 2.275587816023483e-05, "loss": 10.6255, "step": 16624 }, { "epoch": 0.6929681964069859, "grad_norm": 748.0, "learning_rate": 2.275021843568379e-05, "loss": 19.2502, "step": 16625 }, { "epoch": 0.6930098787045142, "grad_norm": 474.0, "learning_rate": 2.274455920776244e-05, "loss": 15.7502, "step": 16626 }, { "epoch": 0.6930515610020425, "grad_norm": 97.5, "learning_rate": 2.2738900476573916e-05, "loss": 9.8128, "step": 16627 }, { "epoch": 0.6930932432995707, "grad_norm": 676.0, "learning_rate": 2.2733242242221353e-05, "loss": 20.3752, "step": 16628 }, { "epoch": 0.6931349255970989, "grad_norm": 244.0, "learning_rate": 2.272758450480787e-05, "loss": 12.9379, "step": 16629 }, { "epoch": 0.6931766078946272, "grad_norm": 312.0, "learning_rate": 2.2721927264436582e-05, "loss": 14.1251, "step": 16630 }, { "epoch": 0.6932182901921554, "grad_norm": 294.0, "learning_rate": 2.2716270521210593e-05, "loss": 13.6876, "step": 16631 }, { "epoch": 0.6932599724896836, "grad_norm": 952.0, "learning_rate": 2.271061427523299e-05, "loss": 26.7503, "step": 16632 }, { "epoch": 0.6933016547872118, "grad_norm": 532.0, "learning_rate": 2.2704958526606867e-05, "loss": 18.8773, "step": 16633 }, { "epoch": 0.6933433370847402, "grad_norm": 61.0, "learning_rate": 2.2699303275435297e-05, "loss": 7.594, "step": 16634 }, { "epoch": 0.6933850193822684, "grad_norm": 239.0, "learning_rate": 2.269364852182135e-05, "loss": 11.0629, "step": 16635 }, { "epoch": 0.6934267016797966, "grad_norm": 85.0, "learning_rate": 2.2687994265868084e-05, "loss": 8.6883, "step": 16636 }, { "epoch": 0.6934683839773248, "grad_norm": 452.0, "learning_rate": 2.2682340507678546e-05, "loss": 16.5012, "step": 16637 }, { "epoch": 0.6935100662748531, "grad_norm": 644.0, "learning_rate": 2.2676687247355773e-05, "loss": 20.8752, "step": 16638 }, { "epoch": 0.6935517485723813, "grad_norm": 262.0, "learning_rate": 2.2671034485002785e-05, "loss": 12.5003, "step": 16639 }, { "epoch": 0.6935934308699095, "grad_norm": 142.0, "learning_rate": 2.266538222072266e-05, "loss": 10.3753, "step": 16640 }, { "epoch": 0.6936351131674378, "grad_norm": 292.0, "learning_rate": 2.265973045461833e-05, "loss": 8.4396, "step": 16641 }, { "epoch": 0.6936767954649661, "grad_norm": 354.0, "learning_rate": 2.2654079186792876e-05, "loss": 12.5627, "step": 16642 }, { "epoch": 0.6937184777624943, "grad_norm": 644.0, "learning_rate": 2.264842841734922e-05, "loss": 22.1254, "step": 16643 }, { "epoch": 0.6937601600600225, "grad_norm": 306.0, "learning_rate": 2.264277814639042e-05, "loss": 13.8756, "step": 16644 }, { "epoch": 0.6938018423575507, "grad_norm": 452.0, "learning_rate": 2.2637128374019385e-05, "loss": 14.4377, "step": 16645 }, { "epoch": 0.693843524655079, "grad_norm": 308.0, "learning_rate": 2.2631479100339148e-05, "loss": 11.2501, "step": 16646 }, { "epoch": 0.6938852069526072, "grad_norm": 368.0, "learning_rate": 2.2625830325452598e-05, "loss": 16.2503, "step": 16647 }, { "epoch": 0.6939268892501355, "grad_norm": 168.0, "learning_rate": 2.2620182049462734e-05, "loss": 10.8753, "step": 16648 }, { "epoch": 0.6939685715476637, "grad_norm": 173.0, "learning_rate": 2.2614534272472486e-05, "loss": 9.5004, "step": 16649 }, { "epoch": 0.694010253845192, "grad_norm": 592.0, "learning_rate": 2.2608886994584784e-05, "loss": 18.3758, "step": 16650 }, { "epoch": 0.6940519361427202, "grad_norm": 197.0, "learning_rate": 2.260324021590255e-05, "loss": 11.1877, "step": 16651 }, { "epoch": 0.6940936184402484, "grad_norm": 175.0, "learning_rate": 2.259759393652869e-05, "loss": 5.5944, "step": 16652 }, { "epoch": 0.6941353007377766, "grad_norm": 268.0, "learning_rate": 2.2591948156566113e-05, "loss": 10.6252, "step": 16653 }, { "epoch": 0.694176983035305, "grad_norm": 456.0, "learning_rate": 2.2586302876117714e-05, "loss": 16.6257, "step": 16654 }, { "epoch": 0.6942186653328332, "grad_norm": 302.0, "learning_rate": 2.2580658095286382e-05, "loss": 13.3127, "step": 16655 }, { "epoch": 0.6942603476303614, "grad_norm": 101.0, "learning_rate": 2.257501381417499e-05, "loss": 9.188, "step": 16656 }, { "epoch": 0.6943020299278896, "grad_norm": 398.0, "learning_rate": 2.2569370032886406e-05, "loss": 15.3774, "step": 16657 }, { "epoch": 0.6943437122254179, "grad_norm": 170.0, "learning_rate": 2.2563726751523484e-05, "loss": 8.1253, "step": 16658 }, { "epoch": 0.6943853945229461, "grad_norm": 212.0, "learning_rate": 2.255808397018908e-05, "loss": 11.8129, "step": 16659 }, { "epoch": 0.6944270768204743, "grad_norm": 424.0, "learning_rate": 2.2552441688986035e-05, "loss": 15.0002, "step": 16660 }, { "epoch": 0.6944687591180025, "grad_norm": 916.0, "learning_rate": 2.2546799908017174e-05, "loss": 22.8785, "step": 16661 }, { "epoch": 0.6945104414155309, "grad_norm": 464.0, "learning_rate": 2.2541158627385322e-05, "loss": 16.3753, "step": 16662 }, { "epoch": 0.6945521237130591, "grad_norm": 254.0, "learning_rate": 2.253551784719329e-05, "loss": 12.4382, "step": 16663 }, { "epoch": 0.6945938060105873, "grad_norm": 568.0, "learning_rate": 2.2529877567543882e-05, "loss": 16.8813, "step": 16664 }, { "epoch": 0.6946354883081155, "grad_norm": 226.0, "learning_rate": 2.2524237788539882e-05, "loss": 11.7503, "step": 16665 }, { "epoch": 0.6946771706056438, "grad_norm": 628.0, "learning_rate": 2.2518598510284123e-05, "loss": 18.7527, "step": 16666 }, { "epoch": 0.694718852903172, "grad_norm": 330.0, "learning_rate": 2.25129597328793e-05, "loss": 16.376, "step": 16667 }, { "epoch": 0.6947605352007002, "grad_norm": 418.0, "learning_rate": 2.2507321456428272e-05, "loss": 15.001, "step": 16668 }, { "epoch": 0.6948022174982285, "grad_norm": 219.0, "learning_rate": 2.2501683681033712e-05, "loss": 10.9378, "step": 16669 }, { "epoch": 0.6948438997957568, "grad_norm": 748.0, "learning_rate": 2.2496046406798444e-05, "loss": 18.5037, "step": 16670 }, { "epoch": 0.694885582093285, "grad_norm": 70.0, "learning_rate": 2.249040963382513e-05, "loss": 8.1254, "step": 16671 }, { "epoch": 0.6949272643908132, "grad_norm": 458.0, "learning_rate": 2.248477336221659e-05, "loss": 15.1259, "step": 16672 }, { "epoch": 0.6949689466883414, "grad_norm": 236.0, "learning_rate": 2.2479137592075455e-05, "loss": 12.6257, "step": 16673 }, { "epoch": 0.6950106289858697, "grad_norm": 73.5, "learning_rate": 2.2473502323504498e-05, "loss": 9.8752, "step": 16674 }, { "epoch": 0.695052311283398, "grad_norm": 185.0, "learning_rate": 2.246786755660641e-05, "loss": 11.376, "step": 16675 }, { "epoch": 0.6950939935809262, "grad_norm": 240.0, "learning_rate": 2.2462233291483875e-05, "loss": 12.2507, "step": 16676 }, { "epoch": 0.6951356758784544, "grad_norm": 932.0, "learning_rate": 2.2456599528239587e-05, "loss": 23.0016, "step": 16677 }, { "epoch": 0.6951773581759827, "grad_norm": 422.0, "learning_rate": 2.2450966266976216e-05, "loss": 15.3128, "step": 16678 }, { "epoch": 0.6952190404735109, "grad_norm": 195.0, "learning_rate": 2.244533350779642e-05, "loss": 10.3752, "step": 16679 }, { "epoch": 0.6952607227710391, "grad_norm": 256.0, "learning_rate": 2.2439701250802882e-05, "loss": 10.4387, "step": 16680 }, { "epoch": 0.6953024050685673, "grad_norm": 436.0, "learning_rate": 2.243406949609823e-05, "loss": 15.8128, "step": 16681 }, { "epoch": 0.6953440873660957, "grad_norm": 386.0, "learning_rate": 2.2428438243785106e-05, "loss": 15.8763, "step": 16682 }, { "epoch": 0.6953857696636239, "grad_norm": 360.0, "learning_rate": 2.2422807493966146e-05, "loss": 14.6253, "step": 16683 }, { "epoch": 0.6954274519611521, "grad_norm": 516.0, "learning_rate": 2.2417177246743964e-05, "loss": 18.0002, "step": 16684 }, { "epoch": 0.6954691342586803, "grad_norm": 115.0, "learning_rate": 2.2411547502221182e-05, "loss": 9.3136, "step": 16685 }, { "epoch": 0.6955108165562086, "grad_norm": 424.0, "learning_rate": 2.2405918260500386e-05, "loss": 15.6298, "step": 16686 }, { "epoch": 0.6955524988537368, "grad_norm": 204.0, "learning_rate": 2.2400289521684187e-05, "loss": 11.8756, "step": 16687 }, { "epoch": 0.695594181151265, "grad_norm": 111.0, "learning_rate": 2.2394661285875155e-05, "loss": 10.7506, "step": 16688 }, { "epoch": 0.6956358634487932, "grad_norm": 1544.0, "learning_rate": 2.2389033553175858e-05, "loss": 35.5002, "step": 16689 }, { "epoch": 0.6956775457463216, "grad_norm": 204.0, "learning_rate": 2.2383406323688917e-05, "loss": 10.5003, "step": 16690 }, { "epoch": 0.6957192280438498, "grad_norm": 528.0, "learning_rate": 2.23777795975168e-05, "loss": 17.6295, "step": 16691 }, { "epoch": 0.695760910341378, "grad_norm": 188.0, "learning_rate": 2.237215337476215e-05, "loss": 7.5638, "step": 16692 }, { "epoch": 0.6958025926389062, "grad_norm": 157.0, "learning_rate": 2.2366527655527415e-05, "loss": 8.4377, "step": 16693 }, { "epoch": 0.6958442749364345, "grad_norm": 98.0, "learning_rate": 2.2360902439915198e-05, "loss": 9.9378, "step": 16694 }, { "epoch": 0.6958859572339627, "grad_norm": 218.0, "learning_rate": 2.2355277728027955e-05, "loss": 12.3753, "step": 16695 }, { "epoch": 0.695927639531491, "grad_norm": 167.0, "learning_rate": 2.2349653519968273e-05, "loss": 10.6878, "step": 16696 }, { "epoch": 0.6959693218290192, "grad_norm": 40.75, "learning_rate": 2.2344029815838564e-05, "loss": 6.5628, "step": 16697 }, { "epoch": 0.6960110041265475, "grad_norm": 396.0, "learning_rate": 2.2338406615741408e-05, "loss": 13.5012, "step": 16698 }, { "epoch": 0.6960526864240757, "grad_norm": 398.0, "learning_rate": 2.233278391977921e-05, "loss": 15.7506, "step": 16699 }, { "epoch": 0.6960943687216039, "grad_norm": 163.0, "learning_rate": 2.2327161728054497e-05, "loss": 10.7507, "step": 16700 }, { "epoch": 0.6961360510191322, "grad_norm": 432.0, "learning_rate": 2.232154004066972e-05, "loss": 15.5627, "step": 16701 }, { "epoch": 0.6961777333166604, "grad_norm": 306.0, "learning_rate": 2.2315918857727335e-05, "loss": 11.5648, "step": 16702 }, { "epoch": 0.6962194156141887, "grad_norm": 1136.0, "learning_rate": 2.231029817932978e-05, "loss": 25.8799, "step": 16703 }, { "epoch": 0.6962610979117169, "grad_norm": 154.0, "learning_rate": 2.2304678005579504e-05, "loss": 8.938, "step": 16704 }, { "epoch": 0.6963027802092452, "grad_norm": 388.0, "learning_rate": 2.2299058336578933e-05, "loss": 16.0003, "step": 16705 }, { "epoch": 0.6963444625067734, "grad_norm": 486.0, "learning_rate": 2.2293439172430476e-05, "loss": 17.8758, "step": 16706 }, { "epoch": 0.6963861448043016, "grad_norm": 712.0, "learning_rate": 2.2287820513236553e-05, "loss": 19.1251, "step": 16707 }, { "epoch": 0.6964278271018298, "grad_norm": 238.0, "learning_rate": 2.2282202359099557e-05, "loss": 12.0627, "step": 16708 }, { "epoch": 0.6964695093993581, "grad_norm": 108.5, "learning_rate": 2.2276584710121888e-05, "loss": 9.7503, "step": 16709 }, { "epoch": 0.6965111916968864, "grad_norm": 512.0, "learning_rate": 2.2270967566405925e-05, "loss": 16.7535, "step": 16710 }, { "epoch": 0.6965528739944146, "grad_norm": 740.0, "learning_rate": 2.226535092805404e-05, "loss": 19.5015, "step": 16711 }, { "epoch": 0.6965945562919428, "grad_norm": 214.0, "learning_rate": 2.225973479516859e-05, "loss": 11.7509, "step": 16712 }, { "epoch": 0.6966362385894711, "grad_norm": 101.5, "learning_rate": 2.2254119167851945e-05, "loss": 7.4378, "step": 16713 }, { "epoch": 0.6966779208869993, "grad_norm": 392.0, "learning_rate": 2.224850404620643e-05, "loss": 14.5627, "step": 16714 }, { "epoch": 0.6967196031845275, "grad_norm": 91.5, "learning_rate": 2.2242889430334384e-05, "loss": 8.6256, "step": 16715 }, { "epoch": 0.6967612854820557, "grad_norm": 167.0, "learning_rate": 2.2237275320338174e-05, "loss": 7.4696, "step": 16716 }, { "epoch": 0.6968029677795841, "grad_norm": 98.0, "learning_rate": 2.2231661716320052e-05, "loss": 10.3759, "step": 16717 }, { "epoch": 0.6968446500771123, "grad_norm": 560.0, "learning_rate": 2.2226048618382395e-05, "loss": 18.2502, "step": 16718 }, { "epoch": 0.6968863323746405, "grad_norm": 198.0, "learning_rate": 2.222043602662743e-05, "loss": 11.2502, "step": 16719 }, { "epoch": 0.6969280146721687, "grad_norm": 404.0, "learning_rate": 2.2214823941157524e-05, "loss": 15.4377, "step": 16720 }, { "epoch": 0.696969696969697, "grad_norm": 217.0, "learning_rate": 2.2209212362074876e-05, "loss": 11.813, "step": 16721 }, { "epoch": 0.6970113792672252, "grad_norm": 486.0, "learning_rate": 2.220360128948184e-05, "loss": 17.126, "step": 16722 }, { "epoch": 0.6970530615647534, "grad_norm": 400.0, "learning_rate": 2.2197990723480604e-05, "loss": 15.1879, "step": 16723 }, { "epoch": 0.6970947438622817, "grad_norm": 824.0, "learning_rate": 2.2192380664173472e-05, "loss": 23.5019, "step": 16724 }, { "epoch": 0.69713642615981, "grad_norm": 316.0, "learning_rate": 2.218677111166267e-05, "loss": 14.2508, "step": 16725 }, { "epoch": 0.6971781084573382, "grad_norm": 228.0, "learning_rate": 2.2181162066050433e-05, "loss": 11.5626, "step": 16726 }, { "epoch": 0.6972197907548664, "grad_norm": 332.0, "learning_rate": 2.2175553527438986e-05, "loss": 14.2505, "step": 16727 }, { "epoch": 0.6972614730523946, "grad_norm": 322.0, "learning_rate": 2.216994549593055e-05, "loss": 13.1877, "step": 16728 }, { "epoch": 0.6973031553499229, "grad_norm": 136.0, "learning_rate": 2.2164337971627325e-05, "loss": 10.2506, "step": 16729 }, { "epoch": 0.6973448376474511, "grad_norm": 512.0, "learning_rate": 2.2158730954631513e-05, "loss": 16.2521, "step": 16730 }, { "epoch": 0.6973865199449794, "grad_norm": 292.0, "learning_rate": 2.21531244450453e-05, "loss": 14.0012, "step": 16731 }, { "epoch": 0.6974282022425076, "grad_norm": 234.0, "learning_rate": 2.2147518442970866e-05, "loss": 12.3753, "step": 16732 }, { "epoch": 0.6974698845400359, "grad_norm": 1504.0, "learning_rate": 2.214191294851038e-05, "loss": 30.3789, "step": 16733 }, { "epoch": 0.6975115668375641, "grad_norm": 556.0, "learning_rate": 2.2136307961766002e-05, "loss": 18.3752, "step": 16734 }, { "epoch": 0.6975532491350923, "grad_norm": 122.5, "learning_rate": 2.2130703482839886e-05, "loss": 8.0018, "step": 16735 }, { "epoch": 0.6975949314326205, "grad_norm": 80.5, "learning_rate": 2.2125099511834173e-05, "loss": 7.8445, "step": 16736 }, { "epoch": 0.6976366137301488, "grad_norm": 350.0, "learning_rate": 2.2119496048851002e-05, "loss": 14.8765, "step": 16737 }, { "epoch": 0.6976782960276771, "grad_norm": 81.5, "learning_rate": 2.2113893093992484e-05, "loss": 9.0002, "step": 16738 }, { "epoch": 0.6977199783252053, "grad_norm": 490.0, "learning_rate": 2.2108290647360724e-05, "loss": 17.8751, "step": 16739 }, { "epoch": 0.6977616606227335, "grad_norm": 338.0, "learning_rate": 2.210268870905788e-05, "loss": 13.3127, "step": 16740 }, { "epoch": 0.6978033429202618, "grad_norm": 260.0, "learning_rate": 2.2097087279185973e-05, "loss": 13.6887, "step": 16741 }, { "epoch": 0.69784502521779, "grad_norm": 104.0, "learning_rate": 2.2091486357847163e-05, "loss": 7.6566, "step": 16742 }, { "epoch": 0.6978867075153182, "grad_norm": 190.0, "learning_rate": 2.2085885945143453e-05, "loss": 11.4384, "step": 16743 }, { "epoch": 0.6979283898128464, "grad_norm": 438.0, "learning_rate": 2.208028604117699e-05, "loss": 16.0011, "step": 16744 }, { "epoch": 0.6979700721103748, "grad_norm": 476.0, "learning_rate": 2.2074686646049758e-05, "loss": 17.2504, "step": 16745 }, { "epoch": 0.698011754407903, "grad_norm": 344.0, "learning_rate": 2.206908775986387e-05, "loss": 12.626, "step": 16746 }, { "epoch": 0.6980534367054312, "grad_norm": 196.0, "learning_rate": 2.206348938272131e-05, "loss": 9.6252, "step": 16747 }, { "epoch": 0.6980951190029594, "grad_norm": 224.0, "learning_rate": 2.2057891514724165e-05, "loss": 11.6878, "step": 16748 }, { "epoch": 0.6981368013004877, "grad_norm": 924.0, "learning_rate": 2.2052294155974394e-05, "loss": 24.2508, "step": 16749 }, { "epoch": 0.6981784835980159, "grad_norm": 384.0, "learning_rate": 2.204669730657406e-05, "loss": 11.3781, "step": 16750 }, { "epoch": 0.6982201658955441, "grad_norm": 420.0, "learning_rate": 2.204110096662515e-05, "loss": 14.4377, "step": 16751 }, { "epoch": 0.6982618481930724, "grad_norm": 168.0, "learning_rate": 2.203550513622966e-05, "loss": 11.3128, "step": 16752 }, { "epoch": 0.6983035304906007, "grad_norm": 148.0, "learning_rate": 2.2029909815489568e-05, "loss": 11.2503, "step": 16753 }, { "epoch": 0.6983452127881289, "grad_norm": 158.0, "learning_rate": 2.2024315004506852e-05, "loss": 10.8753, "step": 16754 }, { "epoch": 0.6983868950856571, "grad_norm": 334.0, "learning_rate": 2.201872070338348e-05, "loss": 14.5627, "step": 16755 }, { "epoch": 0.6984285773831853, "grad_norm": 416.0, "learning_rate": 2.2013126912221405e-05, "loss": 15.6253, "step": 16756 }, { "epoch": 0.6984702596807136, "grad_norm": 178.0, "learning_rate": 2.2007533631122578e-05, "loss": 10.5004, "step": 16757 }, { "epoch": 0.6985119419782418, "grad_norm": 193.0, "learning_rate": 2.2001940860188934e-05, "loss": 9.7503, "step": 16758 }, { "epoch": 0.6985536242757701, "grad_norm": 242.0, "learning_rate": 2.1996348599522408e-05, "loss": 15.0006, "step": 16759 }, { "epoch": 0.6985953065732983, "grad_norm": 86.0, "learning_rate": 2.1990756849224915e-05, "loss": 8.7502, "step": 16760 }, { "epoch": 0.6986369888708266, "grad_norm": 472.0, "learning_rate": 2.1985165609398357e-05, "loss": 15.4383, "step": 16761 }, { "epoch": 0.6986786711683548, "grad_norm": 552.0, "learning_rate": 2.197957488014465e-05, "loss": 17.6252, "step": 16762 }, { "epoch": 0.698720353465883, "grad_norm": 258.0, "learning_rate": 2.197398466156567e-05, "loss": 9.7505, "step": 16763 }, { "epoch": 0.6987620357634112, "grad_norm": 133.0, "learning_rate": 2.1968394953763315e-05, "loss": 10.563, "step": 16764 }, { "epoch": 0.6988037180609395, "grad_norm": 246.0, "learning_rate": 2.1962805756839432e-05, "loss": 11.5627, "step": 16765 }, { "epoch": 0.6988454003584678, "grad_norm": 270.0, "learning_rate": 2.1957217070895936e-05, "loss": 12.8752, "step": 16766 }, { "epoch": 0.698887082655996, "grad_norm": 306.0, "learning_rate": 2.1951628896034615e-05, "loss": 11.0007, "step": 16767 }, { "epoch": 0.6989287649535242, "grad_norm": 416.0, "learning_rate": 2.1946041232357385e-05, "loss": 14.7537, "step": 16768 }, { "epoch": 0.6989704472510525, "grad_norm": 225.0, "learning_rate": 2.1940454079966e-05, "loss": 11.438, "step": 16769 }, { "epoch": 0.6990121295485807, "grad_norm": 600.0, "learning_rate": 2.1934867438962376e-05, "loss": 17.6251, "step": 16770 }, { "epoch": 0.6990538118461089, "grad_norm": 564.0, "learning_rate": 2.1929281309448242e-05, "loss": 18.5002, "step": 16771 }, { "epoch": 0.6990954941436373, "grad_norm": 157.0, "learning_rate": 2.1923695691525485e-05, "loss": 9.8127, "step": 16772 }, { "epoch": 0.6991371764411655, "grad_norm": 458.0, "learning_rate": 2.191811058529583e-05, "loss": 16.2503, "step": 16773 }, { "epoch": 0.6991788587386937, "grad_norm": 280.0, "learning_rate": 2.1912525990861123e-05, "loss": 13.8127, "step": 16774 }, { "epoch": 0.6992205410362219, "grad_norm": 140.0, "learning_rate": 2.190694190832312e-05, "loss": 10.3128, "step": 16775 }, { "epoch": 0.6992622233337502, "grad_norm": 216.0, "learning_rate": 2.1901358337783595e-05, "loss": 8.6265, "step": 16776 }, { "epoch": 0.6993039056312784, "grad_norm": 81.5, "learning_rate": 2.189577527934431e-05, "loss": 8.6259, "step": 16777 }, { "epoch": 0.6993455879288066, "grad_norm": 402.0, "learning_rate": 2.1890192733107017e-05, "loss": 14.8129, "step": 16778 }, { "epoch": 0.6993872702263348, "grad_norm": 155.0, "learning_rate": 2.1884610699173458e-05, "loss": 9.8752, "step": 16779 }, { "epoch": 0.6994289525238632, "grad_norm": 210.0, "learning_rate": 2.187902917764536e-05, "loss": 11.0628, "step": 16780 }, { "epoch": 0.6994706348213914, "grad_norm": 352.0, "learning_rate": 2.1873448168624454e-05, "loss": 14.0627, "step": 16781 }, { "epoch": 0.6995123171189196, "grad_norm": 83.0, "learning_rate": 2.186786767221245e-05, "loss": 8.1255, "step": 16782 }, { "epoch": 0.6995539994164478, "grad_norm": 556.0, "learning_rate": 2.1862287688511057e-05, "loss": 17.0002, "step": 16783 }, { "epoch": 0.6995956817139761, "grad_norm": 728.0, "learning_rate": 2.1856708217621967e-05, "loss": 21.0004, "step": 16784 }, { "epoch": 0.6996373640115043, "grad_norm": 276.0, "learning_rate": 2.1851129259646875e-05, "loss": 11.0011, "step": 16785 }, { "epoch": 0.6996790463090325, "grad_norm": 624.0, "learning_rate": 2.1845550814687442e-05, "loss": 20.3755, "step": 16786 }, { "epoch": 0.6997207286065608, "grad_norm": 200.0, "learning_rate": 2.183997288284535e-05, "loss": 10.8753, "step": 16787 }, { "epoch": 0.6997624109040891, "grad_norm": 205.0, "learning_rate": 2.1834395464222253e-05, "loss": 12.5004, "step": 16788 }, { "epoch": 0.6998040932016173, "grad_norm": 424.0, "learning_rate": 2.1828818558919796e-05, "loss": 15.8761, "step": 16789 }, { "epoch": 0.6998457754991455, "grad_norm": 189.0, "learning_rate": 2.182324216703962e-05, "loss": 11.3127, "step": 16790 }, { "epoch": 0.6998874577966737, "grad_norm": 704.0, "learning_rate": 2.1817666288683343e-05, "loss": 21.3754, "step": 16791 }, { "epoch": 0.699929140094202, "grad_norm": 656.0, "learning_rate": 2.1812090923952633e-05, "loss": 19.0005, "step": 16792 }, { "epoch": 0.6999708223917303, "grad_norm": 59.25, "learning_rate": 2.1806516072949028e-05, "loss": 7.2507, "step": 16793 }, { "epoch": 0.7000125046892585, "grad_norm": 111.0, "learning_rate": 2.1800941735774216e-05, "loss": 9.3761, "step": 16794 }, { "epoch": 0.7000541869867867, "grad_norm": 258.0, "learning_rate": 2.1795367912529703e-05, "loss": 11.6881, "step": 16795 }, { "epoch": 0.700095869284315, "grad_norm": 165.0, "learning_rate": 2.178979460331715e-05, "loss": 10.7513, "step": 16796 }, { "epoch": 0.7001375515818432, "grad_norm": 236.0, "learning_rate": 2.178422180823806e-05, "loss": 12.688, "step": 16797 }, { "epoch": 0.7001792338793714, "grad_norm": 940.0, "learning_rate": 2.177864952739407e-05, "loss": 21.7547, "step": 16798 }, { "epoch": 0.7002209161768996, "grad_norm": 75.0, "learning_rate": 2.1773077760886658e-05, "loss": 6.4689, "step": 16799 }, { "epoch": 0.700262598474428, "grad_norm": 128.0, "learning_rate": 2.1767506508817426e-05, "loss": 8.8757, "step": 16800 }, { "epoch": 0.7003042807719562, "grad_norm": 246.0, "learning_rate": 2.1761935771287895e-05, "loss": 12.3751, "step": 16801 }, { "epoch": 0.7003459630694844, "grad_norm": 360.0, "learning_rate": 2.1756365548399594e-05, "loss": 15.377, "step": 16802 }, { "epoch": 0.7003876453670126, "grad_norm": 458.0, "learning_rate": 2.1750795840254036e-05, "loss": 15.5001, "step": 16803 }, { "epoch": 0.7004293276645409, "grad_norm": 264.0, "learning_rate": 2.174522664695273e-05, "loss": 14.0628, "step": 16804 }, { "epoch": 0.7004710099620691, "grad_norm": 430.0, "learning_rate": 2.173965796859718e-05, "loss": 14.5631, "step": 16805 }, { "epoch": 0.7005126922595973, "grad_norm": 892.0, "learning_rate": 2.1734089805288872e-05, "loss": 23.7504, "step": 16806 }, { "epoch": 0.7005543745571255, "grad_norm": 680.0, "learning_rate": 2.1728522157129288e-05, "loss": 17.6284, "step": 16807 }, { "epoch": 0.7005960568546539, "grad_norm": 1392.0, "learning_rate": 2.1722955024219893e-05, "loss": 25.5005, "step": 16808 }, { "epoch": 0.7006377391521821, "grad_norm": 214.0, "learning_rate": 2.1717388406662155e-05, "loss": 12.3133, "step": 16809 }, { "epoch": 0.7006794214497103, "grad_norm": 236.0, "learning_rate": 2.1711822304557528e-05, "loss": 11.1876, "step": 16810 }, { "epoch": 0.7007211037472385, "grad_norm": 434.0, "learning_rate": 2.1706256718007446e-05, "loss": 13.7506, "step": 16811 }, { "epoch": 0.7007627860447668, "grad_norm": 408.0, "learning_rate": 2.1700691647113347e-05, "loss": 14.3757, "step": 16812 }, { "epoch": 0.700804468342295, "grad_norm": 404.0, "learning_rate": 2.1695127091976654e-05, "loss": 17.2503, "step": 16813 }, { "epoch": 0.7008461506398233, "grad_norm": 262.0, "learning_rate": 2.1689563052698787e-05, "loss": 12.0006, "step": 16814 }, { "epoch": 0.7008878329373515, "grad_norm": 932.0, "learning_rate": 2.1683999529381123e-05, "loss": 26.5002, "step": 16815 }, { "epoch": 0.7009295152348798, "grad_norm": 123.5, "learning_rate": 2.1678436522125123e-05, "loss": 10.9378, "step": 16816 }, { "epoch": 0.700971197532408, "grad_norm": 95.0, "learning_rate": 2.167287403103209e-05, "loss": 9.9378, "step": 16817 }, { "epoch": 0.7010128798299362, "grad_norm": 584.0, "learning_rate": 2.166731205620348e-05, "loss": 19.751, "step": 16818 }, { "epoch": 0.7010545621274644, "grad_norm": 736.0, "learning_rate": 2.1661750597740586e-05, "loss": 18.8756, "step": 16819 }, { "epoch": 0.7010962444249927, "grad_norm": 588.0, "learning_rate": 2.1656189655744845e-05, "loss": 19.1251, "step": 16820 }, { "epoch": 0.701137926722521, "grad_norm": 580.0, "learning_rate": 2.1650629230317527e-05, "loss": 18.8765, "step": 16821 }, { "epoch": 0.7011796090200492, "grad_norm": 328.0, "learning_rate": 2.1645069321560042e-05, "loss": 14.8134, "step": 16822 }, { "epoch": 0.7012212913175774, "grad_norm": 162.0, "learning_rate": 2.163950992957364e-05, "loss": 10.6254, "step": 16823 }, { "epoch": 0.7012629736151057, "grad_norm": 680.0, "learning_rate": 2.163395105445974e-05, "loss": 21.1253, "step": 16824 }, { "epoch": 0.7013046559126339, "grad_norm": 428.0, "learning_rate": 2.162839269631955e-05, "loss": 16.3757, "step": 16825 }, { "epoch": 0.7013463382101621, "grad_norm": 386.0, "learning_rate": 2.1622834855254448e-05, "loss": 15.2502, "step": 16826 }, { "epoch": 0.7013880205076903, "grad_norm": 318.0, "learning_rate": 2.1617277531365697e-05, "loss": 14.2505, "step": 16827 }, { "epoch": 0.7014297028052187, "grad_norm": 1288.0, "learning_rate": 2.161172072475458e-05, "loss": 26.2536, "step": 16828 }, { "epoch": 0.7014713851027469, "grad_norm": 326.0, "learning_rate": 2.160616443552238e-05, "loss": 13.6257, "step": 16829 }, { "epoch": 0.7015130674002751, "grad_norm": 502.0, "learning_rate": 2.160060866377035e-05, "loss": 16.7509, "step": 16830 }, { "epoch": 0.7015547496978033, "grad_norm": 502.0, "learning_rate": 2.1595053409599747e-05, "loss": 16.3755, "step": 16831 }, { "epoch": 0.7015964319953316, "grad_norm": 672.0, "learning_rate": 2.1589498673111803e-05, "loss": 20.7503, "step": 16832 }, { "epoch": 0.7016381142928598, "grad_norm": 418.0, "learning_rate": 2.1583944454407795e-05, "loss": 15.0004, "step": 16833 }, { "epoch": 0.701679796590388, "grad_norm": 197.0, "learning_rate": 2.1578390753588895e-05, "loss": 10.5003, "step": 16834 }, { "epoch": 0.7017214788879163, "grad_norm": 276.0, "learning_rate": 2.1572837570756376e-05, "loss": 11.9383, "step": 16835 }, { "epoch": 0.7017631611854446, "grad_norm": 162.0, "learning_rate": 2.1567284906011386e-05, "loss": 10.8757, "step": 16836 }, { "epoch": 0.7018048434829728, "grad_norm": 692.0, "learning_rate": 2.1561732759455183e-05, "loss": 21.6252, "step": 16837 }, { "epoch": 0.701846525780501, "grad_norm": 81.5, "learning_rate": 2.1556181131188897e-05, "loss": 8.0006, "step": 16838 }, { "epoch": 0.7018882080780292, "grad_norm": 382.0, "learning_rate": 2.155063002131376e-05, "loss": 16.3753, "step": 16839 }, { "epoch": 0.7019298903755575, "grad_norm": 412.0, "learning_rate": 2.1545079429930885e-05, "loss": 15.5003, "step": 16840 }, { "epoch": 0.7019715726730857, "grad_norm": 268.0, "learning_rate": 2.1539529357141487e-05, "loss": 12.8754, "step": 16841 }, { "epoch": 0.702013254970614, "grad_norm": 185.0, "learning_rate": 2.153397980304669e-05, "loss": 11.9392, "step": 16842 }, { "epoch": 0.7020549372681422, "grad_norm": 162.0, "learning_rate": 2.1528430767747632e-05, "loss": 9.5002, "step": 16843 }, { "epoch": 0.7020966195656705, "grad_norm": 324.0, "learning_rate": 2.1522882251345454e-05, "loss": 11.8127, "step": 16844 }, { "epoch": 0.7021383018631987, "grad_norm": 187.0, "learning_rate": 2.151733425394128e-05, "loss": 10.5009, "step": 16845 }, { "epoch": 0.7021799841607269, "grad_norm": 728.0, "learning_rate": 2.1511786775636213e-05, "loss": 20.5027, "step": 16846 }, { "epoch": 0.7022216664582552, "grad_norm": 266.0, "learning_rate": 2.1506239816531366e-05, "loss": 11.6878, "step": 16847 }, { "epoch": 0.7022633487557834, "grad_norm": 226.0, "learning_rate": 2.150069337672782e-05, "loss": 12.1878, "step": 16848 }, { "epoch": 0.7023050310533117, "grad_norm": 298.0, "learning_rate": 2.149514745632667e-05, "loss": 12.2528, "step": 16849 }, { "epoch": 0.7023467133508399, "grad_norm": 608.0, "learning_rate": 2.1489602055428993e-05, "loss": 19.0005, "step": 16850 }, { "epoch": 0.7023883956483682, "grad_norm": 1004.0, "learning_rate": 2.148405717413584e-05, "loss": 25.7524, "step": 16851 }, { "epoch": 0.7024300779458964, "grad_norm": 234.0, "learning_rate": 2.1478512812548285e-05, "loss": 8.8751, "step": 16852 }, { "epoch": 0.7024717602434246, "grad_norm": 358.0, "learning_rate": 2.147296897076736e-05, "loss": 15.0005, "step": 16853 }, { "epoch": 0.7025134425409528, "grad_norm": 176.0, "learning_rate": 2.146742564889411e-05, "loss": 11.5627, "step": 16854 }, { "epoch": 0.7025551248384811, "grad_norm": 176.0, "learning_rate": 2.1461882847029562e-05, "loss": 12.6881, "step": 16855 }, { "epoch": 0.7025968071360094, "grad_norm": 360.0, "learning_rate": 2.1456340565274718e-05, "loss": 13.8751, "step": 16856 }, { "epoch": 0.7026384894335376, "grad_norm": 532.0, "learning_rate": 2.1450798803730633e-05, "loss": 16.1251, "step": 16857 }, { "epoch": 0.7026801717310658, "grad_norm": 147.0, "learning_rate": 2.1445257562498234e-05, "loss": 9.3126, "step": 16858 }, { "epoch": 0.7027218540285941, "grad_norm": 468.0, "learning_rate": 2.1439716841678592e-05, "loss": 14.5627, "step": 16859 }, { "epoch": 0.7027635363261223, "grad_norm": 398.0, "learning_rate": 2.1434176641372612e-05, "loss": 14.0037, "step": 16860 }, { "epoch": 0.7028052186236505, "grad_norm": 316.0, "learning_rate": 2.1428636961681332e-05, "loss": 13.376, "step": 16861 }, { "epoch": 0.7028469009211787, "grad_norm": 596.0, "learning_rate": 2.142309780270564e-05, "loss": 20.1252, "step": 16862 }, { "epoch": 0.7028885832187071, "grad_norm": 175.0, "learning_rate": 2.1417559164546563e-05, "loss": 10.3135, "step": 16863 }, { "epoch": 0.7029302655162353, "grad_norm": 516.0, "learning_rate": 2.1412021047304976e-05, "loss": 18.7509, "step": 16864 }, { "epoch": 0.7029719478137635, "grad_norm": 460.0, "learning_rate": 2.140648345108186e-05, "loss": 14.8753, "step": 16865 }, { "epoch": 0.7030136301112917, "grad_norm": 364.0, "learning_rate": 2.1400946375978125e-05, "loss": 14.6877, "step": 16866 }, { "epoch": 0.70305531240882, "grad_norm": 155.0, "learning_rate": 2.1395409822094674e-05, "loss": 7.7819, "step": 16867 }, { "epoch": 0.7030969947063482, "grad_norm": 1088.0, "learning_rate": 2.1389873789532423e-05, "loss": 22.3801, "step": 16868 }, { "epoch": 0.7031386770038764, "grad_norm": 664.0, "learning_rate": 2.1384338278392264e-05, "loss": 20.6256, "step": 16869 }, { "epoch": 0.7031803593014047, "grad_norm": 180.0, "learning_rate": 2.1378803288775074e-05, "loss": 10.5627, "step": 16870 }, { "epoch": 0.703222041598933, "grad_norm": 97.0, "learning_rate": 2.137326882078174e-05, "loss": 9.5002, "step": 16871 }, { "epoch": 0.7032637238964612, "grad_norm": 166.0, "learning_rate": 2.1367734874513124e-05, "loss": 11.5628, "step": 16872 }, { "epoch": 0.7033054061939894, "grad_norm": 111.5, "learning_rate": 2.1362201450070075e-05, "loss": 8.3129, "step": 16873 }, { "epoch": 0.7033470884915176, "grad_norm": 892.0, "learning_rate": 2.135666854755345e-05, "loss": 25.0007, "step": 16874 }, { "epoch": 0.7033887707890459, "grad_norm": 105.0, "learning_rate": 2.1351136167064084e-05, "loss": 11.251, "step": 16875 }, { "epoch": 0.7034304530865741, "grad_norm": 169.0, "learning_rate": 2.1345604308702803e-05, "loss": 10.6257, "step": 16876 }, { "epoch": 0.7034721353841024, "grad_norm": 203.0, "learning_rate": 2.134007297257043e-05, "loss": 11.6252, "step": 16877 }, { "epoch": 0.7035138176816306, "grad_norm": 296.0, "learning_rate": 2.1334542158767773e-05, "loss": 13.5628, "step": 16878 }, { "epoch": 0.7035554999791589, "grad_norm": 336.0, "learning_rate": 2.1329011867395625e-05, "loss": 14.2502, "step": 16879 }, { "epoch": 0.7035971822766871, "grad_norm": 118.0, "learning_rate": 2.1323482098554764e-05, "loss": 9.0003, "step": 16880 }, { "epoch": 0.7036388645742153, "grad_norm": 302.0, "learning_rate": 2.1317952852346023e-05, "loss": 14.0005, "step": 16881 }, { "epoch": 0.7036805468717435, "grad_norm": 636.0, "learning_rate": 2.1312424128870105e-05, "loss": 18.0026, "step": 16882 }, { "epoch": 0.7037222291692719, "grad_norm": 286.0, "learning_rate": 2.130689592822784e-05, "loss": 12.5636, "step": 16883 }, { "epoch": 0.7037639114668001, "grad_norm": 270.0, "learning_rate": 2.1301368250519903e-05, "loss": 12.9381, "step": 16884 }, { "epoch": 0.7038055937643283, "grad_norm": 306.0, "learning_rate": 2.129584109584712e-05, "loss": 11.4392, "step": 16885 }, { "epoch": 0.7038472760618565, "grad_norm": 860.0, "learning_rate": 2.129031446431014e-05, "loss": 23.3783, "step": 16886 }, { "epoch": 0.7038889583593848, "grad_norm": 159.0, "learning_rate": 2.128478835600976e-05, "loss": 9.8751, "step": 16887 }, { "epoch": 0.703930640656913, "grad_norm": 216.0, "learning_rate": 2.1279262771046632e-05, "loss": 11.6265, "step": 16888 }, { "epoch": 0.7039723229544412, "grad_norm": 264.0, "learning_rate": 2.1273737709521523e-05, "loss": 12.4377, "step": 16889 }, { "epoch": 0.7040140052519694, "grad_norm": 170.0, "learning_rate": 2.126821317153506e-05, "loss": 8.6252, "step": 16890 }, { "epoch": 0.7040556875494978, "grad_norm": 142.0, "learning_rate": 2.1262689157187982e-05, "loss": 11.6882, "step": 16891 }, { "epoch": 0.704097369847026, "grad_norm": 120.0, "learning_rate": 2.125716566658094e-05, "loss": 8.2502, "step": 16892 }, { "epoch": 0.7041390521445542, "grad_norm": 400.0, "learning_rate": 2.1251642699814606e-05, "loss": 14.8753, "step": 16893 }, { "epoch": 0.7041807344420824, "grad_norm": 444.0, "learning_rate": 2.1246120256989638e-05, "loss": 15.6252, "step": 16894 }, { "epoch": 0.7042224167396107, "grad_norm": 664.0, "learning_rate": 2.124059833820668e-05, "loss": 20.0004, "step": 16895 }, { "epoch": 0.7042640990371389, "grad_norm": 338.0, "learning_rate": 2.1235076943566372e-05, "loss": 12.8127, "step": 16896 }, { "epoch": 0.7043057813346671, "grad_norm": 266.0, "learning_rate": 2.122955607316934e-05, "loss": 12.1257, "step": 16897 }, { "epoch": 0.7043474636321954, "grad_norm": 880.0, "learning_rate": 2.1224035727116204e-05, "loss": 22.3754, "step": 16898 }, { "epoch": 0.7043891459297237, "grad_norm": 142.0, "learning_rate": 2.121851590550757e-05, "loss": 10.7512, "step": 16899 }, { "epoch": 0.7044308282272519, "grad_norm": 350.0, "learning_rate": 2.1212996608444035e-05, "loss": 13.8131, "step": 16900 }, { "epoch": 0.7044725105247801, "grad_norm": 229.0, "learning_rate": 2.1207477836026195e-05, "loss": 12.0012, "step": 16901 }, { "epoch": 0.7045141928223083, "grad_norm": 290.0, "learning_rate": 2.1201959588354624e-05, "loss": 13.8795, "step": 16902 }, { "epoch": 0.7045558751198366, "grad_norm": 294.0, "learning_rate": 2.1196441865529902e-05, "loss": 12.2503, "step": 16903 }, { "epoch": 0.7045975574173649, "grad_norm": 374.0, "learning_rate": 2.1190924667652585e-05, "loss": 15.1253, "step": 16904 }, { "epoch": 0.7046392397148931, "grad_norm": 308.0, "learning_rate": 2.118540799482322e-05, "loss": 14.1253, "step": 16905 }, { "epoch": 0.7046809220124213, "grad_norm": 468.0, "learning_rate": 2.117989184714233e-05, "loss": 16.1252, "step": 16906 }, { "epoch": 0.7047226043099496, "grad_norm": 492.0, "learning_rate": 2.1174376224710512e-05, "loss": 18.6258, "step": 16907 }, { "epoch": 0.7047642866074778, "grad_norm": 1568.0, "learning_rate": 2.116886112762821e-05, "loss": 35.2501, "step": 16908 }, { "epoch": 0.704805968905006, "grad_norm": 640.0, "learning_rate": 2.1163346555996005e-05, "loss": 18.6253, "step": 16909 }, { "epoch": 0.7048476512025342, "grad_norm": 92.5, "learning_rate": 2.1157832509914332e-05, "loss": 7.9386, "step": 16910 }, { "epoch": 0.7048893335000626, "grad_norm": 85.5, "learning_rate": 2.1152318989483767e-05, "loss": 8.1254, "step": 16911 }, { "epoch": 0.7049310157975908, "grad_norm": 454.0, "learning_rate": 2.11468059948047e-05, "loss": 17.2505, "step": 16912 }, { "epoch": 0.704972698095119, "grad_norm": 424.0, "learning_rate": 2.11412935259777e-05, "loss": 16.0003, "step": 16913 }, { "epoch": 0.7050143803926472, "grad_norm": 536.0, "learning_rate": 2.1135781583103152e-05, "loss": 16.2503, "step": 16914 }, { "epoch": 0.7050560626901755, "grad_norm": 460.0, "learning_rate": 2.1130270166281564e-05, "loss": 16.376, "step": 16915 }, { "epoch": 0.7050977449877037, "grad_norm": 524.0, "learning_rate": 2.1124759275613367e-05, "loss": 18.2504, "step": 16916 }, { "epoch": 0.7051394272852319, "grad_norm": 223.0, "learning_rate": 2.1119248911198987e-05, "loss": 11.6253, "step": 16917 }, { "epoch": 0.7051811095827603, "grad_norm": 143.0, "learning_rate": 2.1113739073138867e-05, "loss": 9.4377, "step": 16918 }, { "epoch": 0.7052227918802885, "grad_norm": 544.0, "learning_rate": 2.1108229761533416e-05, "loss": 18.0004, "step": 16919 }, { "epoch": 0.7052644741778167, "grad_norm": 181.0, "learning_rate": 2.1102720976483036e-05, "loss": 10.1892, "step": 16920 }, { "epoch": 0.7053061564753449, "grad_norm": 620.0, "learning_rate": 2.1097212718088137e-05, "loss": 19.8755, "step": 16921 }, { "epoch": 0.7053478387728732, "grad_norm": 124.0, "learning_rate": 2.10917049864491e-05, "loss": 10.4379, "step": 16922 }, { "epoch": 0.7053895210704014, "grad_norm": 712.0, "learning_rate": 2.108619778166631e-05, "loss": 20.253, "step": 16923 }, { "epoch": 0.7054312033679296, "grad_norm": 584.0, "learning_rate": 2.108069110384013e-05, "loss": 16.379, "step": 16924 }, { "epoch": 0.7054728856654578, "grad_norm": 178.0, "learning_rate": 2.107518495307092e-05, "loss": 11.3752, "step": 16925 }, { "epoch": 0.7055145679629862, "grad_norm": 190.0, "learning_rate": 2.1069679329459037e-05, "loss": 11.3753, "step": 16926 }, { "epoch": 0.7055562502605144, "grad_norm": 145.0, "learning_rate": 2.1064174233104812e-05, "loss": 8.7502, "step": 16927 }, { "epoch": 0.7055979325580426, "grad_norm": 476.0, "learning_rate": 2.1058669664108583e-05, "loss": 17.002, "step": 16928 }, { "epoch": 0.7056396148555708, "grad_norm": 532.0, "learning_rate": 2.1053165622570664e-05, "loss": 18.1252, "step": 16929 }, { "epoch": 0.7056812971530991, "grad_norm": 177.0, "learning_rate": 2.1047662108591377e-05, "loss": 9.8127, "step": 16930 }, { "epoch": 0.7057229794506273, "grad_norm": 217.0, "learning_rate": 2.104215912227101e-05, "loss": 12.5627, "step": 16931 }, { "epoch": 0.7057646617481556, "grad_norm": 1208.0, "learning_rate": 2.103665666370985e-05, "loss": 26.1293, "step": 16932 }, { "epoch": 0.7058063440456838, "grad_norm": 60.25, "learning_rate": 2.1031154733008235e-05, "loss": 8.0633, "step": 16933 }, { "epoch": 0.7058480263432121, "grad_norm": 436.0, "learning_rate": 2.1025653330266355e-05, "loss": 15.6251, "step": 16934 }, { "epoch": 0.7058897086407403, "grad_norm": 516.0, "learning_rate": 2.1020152455584553e-05, "loss": 16.2503, "step": 16935 }, { "epoch": 0.7059313909382685, "grad_norm": 173.0, "learning_rate": 2.1014652109063006e-05, "loss": 10.6253, "step": 16936 }, { "epoch": 0.7059730732357967, "grad_norm": 290.0, "learning_rate": 2.1009152290802038e-05, "loss": 11.5627, "step": 16937 }, { "epoch": 0.706014755533325, "grad_norm": 302.0, "learning_rate": 2.10036530009018e-05, "loss": 14.0628, "step": 16938 }, { "epoch": 0.7060564378308533, "grad_norm": 468.0, "learning_rate": 2.0998154239462603e-05, "loss": 15.6256, "step": 16939 }, { "epoch": 0.7060981201283815, "grad_norm": 478.0, "learning_rate": 2.0992656006584583e-05, "loss": 15.2504, "step": 16940 }, { "epoch": 0.7061398024259097, "grad_norm": 1336.0, "learning_rate": 2.0987158302367997e-05, "loss": 29.6304, "step": 16941 }, { "epoch": 0.706181484723438, "grad_norm": 470.0, "learning_rate": 2.0981661126913026e-05, "loss": 15.1254, "step": 16942 }, { "epoch": 0.7062231670209662, "grad_norm": 94.0, "learning_rate": 2.097616448031986e-05, "loss": 8.9377, "step": 16943 }, { "epoch": 0.7062648493184944, "grad_norm": 354.0, "learning_rate": 2.097066836268867e-05, "loss": 15.1877, "step": 16944 }, { "epoch": 0.7063065316160226, "grad_norm": 346.0, "learning_rate": 2.096517277411963e-05, "loss": 14.6255, "step": 16945 }, { "epoch": 0.706348213913551, "grad_norm": 572.0, "learning_rate": 2.0959677714712895e-05, "loss": 18.5003, "step": 16946 }, { "epoch": 0.7063898962110792, "grad_norm": 246.0, "learning_rate": 2.0954183184568605e-05, "loss": 13.3759, "step": 16947 }, { "epoch": 0.7064315785086074, "grad_norm": 122.5, "learning_rate": 2.0948689183786913e-05, "loss": 8.563, "step": 16948 }, { "epoch": 0.7064732608061356, "grad_norm": 520.0, "learning_rate": 2.0943195712467933e-05, "loss": 18.1251, "step": 16949 }, { "epoch": 0.7065149431036639, "grad_norm": 266.0, "learning_rate": 2.0937702770711792e-05, "loss": 13.0002, "step": 16950 }, { "epoch": 0.7065566254011921, "grad_norm": 172.0, "learning_rate": 2.0932210358618598e-05, "loss": 11.6252, "step": 16951 }, { "epoch": 0.7065983076987203, "grad_norm": 282.0, "learning_rate": 2.0926718476288452e-05, "loss": 5.8767, "step": 16952 }, { "epoch": 0.7066399899962486, "grad_norm": 916.0, "learning_rate": 2.092122712382144e-05, "loss": 23.5005, "step": 16953 }, { "epoch": 0.7066816722937769, "grad_norm": 220.0, "learning_rate": 2.0915736301317645e-05, "loss": 12.3756, "step": 16954 }, { "epoch": 0.7067233545913051, "grad_norm": 205.0, "learning_rate": 2.0910246008877143e-05, "loss": 11.8127, "step": 16955 }, { "epoch": 0.7067650368888333, "grad_norm": 540.0, "learning_rate": 2.0904756246599964e-05, "loss": 17.8754, "step": 16956 }, { "epoch": 0.7068067191863615, "grad_norm": 252.0, "learning_rate": 2.0899267014586234e-05, "loss": 12.8133, "step": 16957 }, { "epoch": 0.7068484014838898, "grad_norm": 984.0, "learning_rate": 2.0893778312935895e-05, "loss": 23.5061, "step": 16958 }, { "epoch": 0.706890083781418, "grad_norm": 112.0, "learning_rate": 2.0888290141749077e-05, "loss": 11.001, "step": 16959 }, { "epoch": 0.7069317660789463, "grad_norm": 326.0, "learning_rate": 2.0882802501125713e-05, "loss": 14.5628, "step": 16960 }, { "epoch": 0.7069734483764745, "grad_norm": 752.0, "learning_rate": 2.08773153911659e-05, "loss": 21.5017, "step": 16961 }, { "epoch": 0.7070151306740028, "grad_norm": 157.0, "learning_rate": 2.0871828811969556e-05, "loss": 11.1252, "step": 16962 }, { "epoch": 0.707056812971531, "grad_norm": 688.0, "learning_rate": 2.086634276363676e-05, "loss": 20.7505, "step": 16963 }, { "epoch": 0.7070984952690592, "grad_norm": 192.0, "learning_rate": 2.0860857246267414e-05, "loss": 11.0627, "step": 16964 }, { "epoch": 0.7071401775665874, "grad_norm": 296.0, "learning_rate": 2.085537225996157e-05, "loss": 14.4381, "step": 16965 }, { "epoch": 0.7071818598641157, "grad_norm": 50.75, "learning_rate": 2.084988780481911e-05, "loss": 7.3754, "step": 16966 }, { "epoch": 0.707223542161644, "grad_norm": 952.0, "learning_rate": 2.084440388094006e-05, "loss": 23.6288, "step": 16967 }, { "epoch": 0.7072652244591722, "grad_norm": 1256.0, "learning_rate": 2.0838920488424333e-05, "loss": 29.1252, "step": 16968 }, { "epoch": 0.7073069067567004, "grad_norm": 116.0, "learning_rate": 2.083343762737187e-05, "loss": 10.1903, "step": 16969 }, { "epoch": 0.7073485890542287, "grad_norm": 171.0, "learning_rate": 2.08279552978826e-05, "loss": 11.5645, "step": 16970 }, { "epoch": 0.7073902713517569, "grad_norm": 564.0, "learning_rate": 2.0822473500056426e-05, "loss": 20.0009, "step": 16971 }, { "epoch": 0.7074319536492851, "grad_norm": 512.0, "learning_rate": 2.081699223399327e-05, "loss": 18.5003, "step": 16972 }, { "epoch": 0.7074736359468133, "grad_norm": 416.0, "learning_rate": 2.081151149979302e-05, "loss": 16.2505, "step": 16973 }, { "epoch": 0.7075153182443417, "grad_norm": 189.0, "learning_rate": 2.0806031297555554e-05, "loss": 9.7504, "step": 16974 }, { "epoch": 0.7075570005418699, "grad_norm": 120.0, "learning_rate": 2.080055162738077e-05, "loss": 11.0629, "step": 16975 }, { "epoch": 0.7075986828393981, "grad_norm": 173.0, "learning_rate": 2.0795072489368522e-05, "loss": 12.1254, "step": 16976 }, { "epoch": 0.7076403651369263, "grad_norm": 153.0, "learning_rate": 2.0789593883618668e-05, "loss": 10.6252, "step": 16977 }, { "epoch": 0.7076820474344546, "grad_norm": 127.5, "learning_rate": 2.0784115810231064e-05, "loss": 9.6879, "step": 16978 }, { "epoch": 0.7077237297319828, "grad_norm": 1408.0, "learning_rate": 2.077863826930554e-05, "loss": 28.2503, "step": 16979 }, { "epoch": 0.707765412029511, "grad_norm": 426.0, "learning_rate": 2.0773161260941926e-05, "loss": 15.4377, "step": 16980 }, { "epoch": 0.7078070943270393, "grad_norm": 432.0, "learning_rate": 2.0767684785240044e-05, "loss": 15.4377, "step": 16981 }, { "epoch": 0.7078487766245676, "grad_norm": 492.0, "learning_rate": 2.0762208842299684e-05, "loss": 17.5001, "step": 16982 }, { "epoch": 0.7078904589220958, "grad_norm": 302.0, "learning_rate": 2.0756733432220697e-05, "loss": 13.3753, "step": 16983 }, { "epoch": 0.707932141219624, "grad_norm": 844.0, "learning_rate": 2.075125855510281e-05, "loss": 23.6255, "step": 16984 }, { "epoch": 0.7079738235171522, "grad_norm": 235.0, "learning_rate": 2.074578421104586e-05, "loss": 12.2502, "step": 16985 }, { "epoch": 0.7080155058146805, "grad_norm": 408.0, "learning_rate": 2.0740310400149558e-05, "loss": 15.5633, "step": 16986 }, { "epoch": 0.7080571881122087, "grad_norm": 326.0, "learning_rate": 2.0734837122513736e-05, "loss": 12.5627, "step": 16987 }, { "epoch": 0.708098870409737, "grad_norm": 156.0, "learning_rate": 2.072936437823806e-05, "loss": 9.8756, "step": 16988 }, { "epoch": 0.7081405527072652, "grad_norm": 300.0, "learning_rate": 2.072389216742236e-05, "loss": 12.876, "step": 16989 }, { "epoch": 0.7081822350047935, "grad_norm": 462.0, "learning_rate": 2.0718420490166284e-05, "loss": 17.8757, "step": 16990 }, { "epoch": 0.7082239173023217, "grad_norm": 203.0, "learning_rate": 2.0712949346569616e-05, "loss": 11.2503, "step": 16991 }, { "epoch": 0.7082655995998499, "grad_norm": 668.0, "learning_rate": 2.0707478736732043e-05, "loss": 19.6254, "step": 16992 }, { "epoch": 0.7083072818973782, "grad_norm": 720.0, "learning_rate": 2.070200866075327e-05, "loss": 21.0026, "step": 16993 }, { "epoch": 0.7083489641949064, "grad_norm": 1096.0, "learning_rate": 2.0696539118732993e-05, "loss": 26.5002, "step": 16994 }, { "epoch": 0.7083906464924347, "grad_norm": 812.0, "learning_rate": 2.0691070110770887e-05, "loss": 22.0006, "step": 16995 }, { "epoch": 0.7084323287899629, "grad_norm": 368.0, "learning_rate": 2.0685601636966635e-05, "loss": 13.44, "step": 16996 }, { "epoch": 0.7084740110874912, "grad_norm": 282.0, "learning_rate": 2.0680133697419897e-05, "loss": 13.3127, "step": 16997 }, { "epoch": 0.7085156933850194, "grad_norm": 320.0, "learning_rate": 2.0674666292230315e-05, "loss": 13.4377, "step": 16998 }, { "epoch": 0.7085573756825476, "grad_norm": 596.0, "learning_rate": 2.0669199421497553e-05, "loss": 19.0003, "step": 16999 }, { "epoch": 0.7085990579800758, "grad_norm": 444.0, "learning_rate": 2.0663733085321236e-05, "loss": 16.0023, "step": 17000 }, { "epoch": 0.7086407402776042, "grad_norm": 412.0, "learning_rate": 2.065826728380098e-05, "loss": 15.6254, "step": 17001 }, { "epoch": 0.7086824225751324, "grad_norm": 386.0, "learning_rate": 2.065280201703641e-05, "loss": 15.1879, "step": 17002 }, { "epoch": 0.7087241048726606, "grad_norm": 190.0, "learning_rate": 2.0647337285127132e-05, "loss": 11.1881, "step": 17003 }, { "epoch": 0.7087657871701888, "grad_norm": 856.0, "learning_rate": 2.0641873088172737e-05, "loss": 19.1295, "step": 17004 }, { "epoch": 0.7088074694677171, "grad_norm": 668.0, "learning_rate": 2.0636409426272806e-05, "loss": 21.8753, "step": 17005 }, { "epoch": 0.7088491517652453, "grad_norm": 235.0, "learning_rate": 2.063094629952691e-05, "loss": 12.0628, "step": 17006 }, { "epoch": 0.7088908340627735, "grad_norm": 134.0, "learning_rate": 2.0625483708034655e-05, "loss": 9.3751, "step": 17007 }, { "epoch": 0.7089325163603017, "grad_norm": 366.0, "learning_rate": 2.0620021651895533e-05, "loss": 15.6252, "step": 17008 }, { "epoch": 0.7089741986578301, "grad_norm": 636.0, "learning_rate": 2.061456013120916e-05, "loss": 19.3752, "step": 17009 }, { "epoch": 0.7090158809553583, "grad_norm": 644.0, "learning_rate": 2.0609099146075002e-05, "loss": 19.876, "step": 17010 }, { "epoch": 0.7090575632528865, "grad_norm": 440.0, "learning_rate": 2.060363869659266e-05, "loss": 17.0004, "step": 17011 }, { "epoch": 0.7090992455504147, "grad_norm": 213.0, "learning_rate": 2.059817878286157e-05, "loss": 10.2503, "step": 17012 }, { "epoch": 0.709140927847943, "grad_norm": 163.0, "learning_rate": 2.059271940498132e-05, "loss": 9.8752, "step": 17013 }, { "epoch": 0.7091826101454712, "grad_norm": 314.0, "learning_rate": 2.0587260563051337e-05, "loss": 13.5627, "step": 17014 }, { "epoch": 0.7092242924429994, "grad_norm": 656.0, "learning_rate": 2.0581802257171172e-05, "loss": 20.8761, "step": 17015 }, { "epoch": 0.7092659747405277, "grad_norm": 256.0, "learning_rate": 2.0576344487440243e-05, "loss": 12.8753, "step": 17016 }, { "epoch": 0.709307657038056, "grad_norm": 784.0, "learning_rate": 2.0570887253958053e-05, "loss": 22.1254, "step": 17017 }, { "epoch": 0.7093493393355842, "grad_norm": 436.0, "learning_rate": 2.0565430556824067e-05, "loss": 16.2502, "step": 17018 }, { "epoch": 0.7093910216331124, "grad_norm": 470.0, "learning_rate": 2.055997439613772e-05, "loss": 15.3754, "step": 17019 }, { "epoch": 0.7094327039306406, "grad_norm": 282.0, "learning_rate": 2.0554518771998456e-05, "loss": 12.6881, "step": 17020 }, { "epoch": 0.7094743862281689, "grad_norm": 233.0, "learning_rate": 2.0549063684505693e-05, "loss": 11.0004, "step": 17021 }, { "epoch": 0.7095160685256972, "grad_norm": 510.0, "learning_rate": 2.054360913375886e-05, "loss": 16.6253, "step": 17022 }, { "epoch": 0.7095577508232254, "grad_norm": 320.0, "learning_rate": 2.053815511985737e-05, "loss": 13.5026, "step": 17023 }, { "epoch": 0.7095994331207536, "grad_norm": 384.0, "learning_rate": 2.053270164290062e-05, "loss": 15.3131, "step": 17024 }, { "epoch": 0.7096411154182819, "grad_norm": 194.0, "learning_rate": 2.0527248702987995e-05, "loss": 11.0628, "step": 17025 }, { "epoch": 0.7096827977158101, "grad_norm": 116.0, "learning_rate": 2.0521796300218878e-05, "loss": 7.8439, "step": 17026 }, { "epoch": 0.7097244800133383, "grad_norm": 438.0, "learning_rate": 2.0516344434692642e-05, "loss": 16.3753, "step": 17027 }, { "epoch": 0.7097661623108665, "grad_norm": 516.0, "learning_rate": 2.0510893106508645e-05, "loss": 18.7511, "step": 17028 }, { "epoch": 0.7098078446083949, "grad_norm": 178.0, "learning_rate": 2.050544231576624e-05, "loss": 11.3128, "step": 17029 }, { "epoch": 0.7098495269059231, "grad_norm": 346.0, "learning_rate": 2.0499992062564766e-05, "loss": 14.5007, "step": 17030 }, { "epoch": 0.7098912092034513, "grad_norm": 376.0, "learning_rate": 2.049454234700356e-05, "loss": 15.8755, "step": 17031 }, { "epoch": 0.7099328915009795, "grad_norm": 536.0, "learning_rate": 2.048909316918191e-05, "loss": 16.3794, "step": 17032 }, { "epoch": 0.7099745737985078, "grad_norm": 221.0, "learning_rate": 2.0483644529199204e-05, "loss": 11.6877, "step": 17033 }, { "epoch": 0.710016256096036, "grad_norm": 532.0, "learning_rate": 2.0478196427154655e-05, "loss": 17.6262, "step": 17034 }, { "epoch": 0.7100579383935642, "grad_norm": 254.0, "learning_rate": 2.0472748863147633e-05, "loss": 12.2502, "step": 17035 }, { "epoch": 0.7100996206910924, "grad_norm": 248.0, "learning_rate": 2.0467301837277353e-05, "loss": 11.5628, "step": 17036 }, { "epoch": 0.7101413029886208, "grad_norm": 688.0, "learning_rate": 2.046185534964315e-05, "loss": 20.6252, "step": 17037 }, { "epoch": 0.710182985286149, "grad_norm": 322.0, "learning_rate": 2.0456409400344225e-05, "loss": 12.8757, "step": 17038 }, { "epoch": 0.7102246675836772, "grad_norm": 532.0, "learning_rate": 2.0450963989479887e-05, "loss": 18.2502, "step": 17039 }, { "epoch": 0.7102663498812054, "grad_norm": 227.0, "learning_rate": 2.0445519117149327e-05, "loss": 5.4695, "step": 17040 }, { "epoch": 0.7103080321787337, "grad_norm": 1320.0, "learning_rate": 2.0440074783451818e-05, "loss": 27.1313, "step": 17041 }, { "epoch": 0.7103497144762619, "grad_norm": 278.0, "learning_rate": 2.043463098848657e-05, "loss": 13.0627, "step": 17042 }, { "epoch": 0.7103913967737902, "grad_norm": 348.0, "learning_rate": 2.04291877323528e-05, "loss": 15.376, "step": 17043 }, { "epoch": 0.7104330790713184, "grad_norm": 246.0, "learning_rate": 2.0423745015149705e-05, "loss": 12.6259, "step": 17044 }, { "epoch": 0.7104747613688467, "grad_norm": 388.0, "learning_rate": 2.0418302836976484e-05, "loss": 14.0002, "step": 17045 }, { "epoch": 0.7105164436663749, "grad_norm": 314.0, "learning_rate": 2.041286119793232e-05, "loss": 14.3768, "step": 17046 }, { "epoch": 0.7105581259639031, "grad_norm": 282.0, "learning_rate": 2.0407420098116385e-05, "loss": 13.7505, "step": 17047 }, { "epoch": 0.7105998082614313, "grad_norm": 536.0, "learning_rate": 2.0401979537627852e-05, "loss": 17.1273, "step": 17048 }, { "epoch": 0.7106414905589596, "grad_norm": 234.0, "learning_rate": 2.0396539516565866e-05, "loss": 13.438, "step": 17049 }, { "epoch": 0.7106831728564879, "grad_norm": 180.0, "learning_rate": 2.0391100035029575e-05, "loss": 9.5628, "step": 17050 }, { "epoch": 0.7107248551540161, "grad_norm": 374.0, "learning_rate": 2.0385661093118113e-05, "loss": 14.6254, "step": 17051 }, { "epoch": 0.7107665374515443, "grad_norm": 452.0, "learning_rate": 2.0380222690930605e-05, "loss": 17.5002, "step": 17052 }, { "epoch": 0.7108082197490726, "grad_norm": 460.0, "learning_rate": 2.0374784828566175e-05, "loss": 16.5004, "step": 17053 }, { "epoch": 0.7108499020466008, "grad_norm": 245.0, "learning_rate": 2.0369347506123914e-05, "loss": 13.3127, "step": 17054 }, { "epoch": 0.710891584344129, "grad_norm": 99.0, "learning_rate": 2.0363910723702928e-05, "loss": 8.1878, "step": 17055 }, { "epoch": 0.7109332666416572, "grad_norm": 318.0, "learning_rate": 2.0358474481402303e-05, "loss": 14.3127, "step": 17056 }, { "epoch": 0.7109749489391856, "grad_norm": 446.0, "learning_rate": 2.035303877932111e-05, "loss": 17.0002, "step": 17057 }, { "epoch": 0.7110166312367138, "grad_norm": 87.0, "learning_rate": 2.0347603617558396e-05, "loss": 8.8761, "step": 17058 }, { "epoch": 0.711058313534242, "grad_norm": 175.0, "learning_rate": 2.034216899621328e-05, "loss": 9.0002, "step": 17059 }, { "epoch": 0.7110999958317702, "grad_norm": 760.0, "learning_rate": 2.0336734915384726e-05, "loss": 20.6274, "step": 17060 }, { "epoch": 0.7111416781292985, "grad_norm": 47.5, "learning_rate": 2.0331301375171847e-05, "loss": 7.3751, "step": 17061 }, { "epoch": 0.7111833604268267, "grad_norm": 480.0, "learning_rate": 2.03258683756736e-05, "loss": 17.5002, "step": 17062 }, { "epoch": 0.7112250427243549, "grad_norm": 137.0, "learning_rate": 2.032043591698907e-05, "loss": 8.3764, "step": 17063 }, { "epoch": 0.7112667250218833, "grad_norm": 604.0, "learning_rate": 2.0315003999217198e-05, "loss": 20.5002, "step": 17064 }, { "epoch": 0.7113084073194115, "grad_norm": 108.0, "learning_rate": 2.0309572622457045e-05, "loss": 9.8757, "step": 17065 }, { "epoch": 0.7113500896169397, "grad_norm": 892.0, "learning_rate": 2.0304141786807536e-05, "loss": 22.6295, "step": 17066 }, { "epoch": 0.7113917719144679, "grad_norm": 1544.0, "learning_rate": 2.0298711492367695e-05, "loss": 27.2554, "step": 17067 }, { "epoch": 0.7114334542119962, "grad_norm": 95.0, "learning_rate": 2.029328173923647e-05, "loss": 8.5018, "step": 17068 }, { "epoch": 0.7114751365095244, "grad_norm": 414.0, "learning_rate": 2.028785252751283e-05, "loss": 15.6254, "step": 17069 }, { "epoch": 0.7115168188070526, "grad_norm": 408.0, "learning_rate": 2.0282423857295707e-05, "loss": 15.3753, "step": 17070 }, { "epoch": 0.7115585011045809, "grad_norm": 420.0, "learning_rate": 2.027699572868405e-05, "loss": 16.0002, "step": 17071 }, { "epoch": 0.7116001834021092, "grad_norm": 498.0, "learning_rate": 2.0271568141776788e-05, "loss": 16.8752, "step": 17072 }, { "epoch": 0.7116418656996374, "grad_norm": 732.0, "learning_rate": 2.026614109667283e-05, "loss": 20.1286, "step": 17073 }, { "epoch": 0.7116835479971656, "grad_norm": 134.0, "learning_rate": 2.0260714593471096e-05, "loss": 9.4378, "step": 17074 }, { "epoch": 0.7117252302946938, "grad_norm": 494.0, "learning_rate": 2.0255288632270474e-05, "loss": 18.3755, "step": 17075 }, { "epoch": 0.7117669125922221, "grad_norm": 466.0, "learning_rate": 2.024986321316986e-05, "loss": 17.0004, "step": 17076 }, { "epoch": 0.7118085948897503, "grad_norm": 398.0, "learning_rate": 2.024443833626813e-05, "loss": 15.1878, "step": 17077 }, { "epoch": 0.7118502771872786, "grad_norm": 152.0, "learning_rate": 2.0239014001664154e-05, "loss": 9.7502, "step": 17078 }, { "epoch": 0.7118919594848068, "grad_norm": 37.5, "learning_rate": 2.023359020945679e-05, "loss": 6.7503, "step": 17079 }, { "epoch": 0.7119336417823351, "grad_norm": 464.0, "learning_rate": 2.022816695974488e-05, "loss": 17.0002, "step": 17080 }, { "epoch": 0.7119753240798633, "grad_norm": 1864.0, "learning_rate": 2.0222744252627274e-05, "loss": 36.5044, "step": 17081 }, { "epoch": 0.7120170063773915, "grad_norm": 336.0, "learning_rate": 2.0217322088202778e-05, "loss": 14.7504, "step": 17082 }, { "epoch": 0.7120586886749197, "grad_norm": 164.0, "learning_rate": 2.0211900466570273e-05, "loss": 11.3755, "step": 17083 }, { "epoch": 0.712100370972448, "grad_norm": 1640.0, "learning_rate": 2.0206479387828485e-05, "loss": 35.5004, "step": 17084 }, { "epoch": 0.7121420532699763, "grad_norm": 308.0, "learning_rate": 2.0201058852076294e-05, "loss": 13.6254, "step": 17085 }, { "epoch": 0.7121837355675045, "grad_norm": 116.5, "learning_rate": 2.019563885941241e-05, "loss": 5.1565, "step": 17086 }, { "epoch": 0.7122254178650327, "grad_norm": 310.0, "learning_rate": 2.0190219409935697e-05, "loss": 14.5007, "step": 17087 }, { "epoch": 0.712267100162561, "grad_norm": 378.0, "learning_rate": 2.018480050374484e-05, "loss": 14.9383, "step": 17088 }, { "epoch": 0.7123087824600892, "grad_norm": 201.0, "learning_rate": 2.0179382140938675e-05, "loss": 11.0003, "step": 17089 }, { "epoch": 0.7123504647576174, "grad_norm": 450.0, "learning_rate": 2.0173964321615884e-05, "loss": 14.8154, "step": 17090 }, { "epoch": 0.7123921470551456, "grad_norm": 520.0, "learning_rate": 2.0168547045875274e-05, "loss": 16.251, "step": 17091 }, { "epoch": 0.712433829352674, "grad_norm": 580.0, "learning_rate": 2.0163130313815514e-05, "loss": 20.0012, "step": 17092 }, { "epoch": 0.7124755116502022, "grad_norm": 203.0, "learning_rate": 2.0157714125535365e-05, "loss": 11.1252, "step": 17093 }, { "epoch": 0.7125171939477304, "grad_norm": 330.0, "learning_rate": 2.0152298481133526e-05, "loss": 14.5627, "step": 17094 }, { "epoch": 0.7125588762452586, "grad_norm": 300.0, "learning_rate": 2.0146883380708698e-05, "loss": 12.7502, "step": 17095 }, { "epoch": 0.7126005585427869, "grad_norm": 388.0, "learning_rate": 2.0141468824359572e-05, "loss": 16.7502, "step": 17096 }, { "epoch": 0.7126422408403151, "grad_norm": 880.0, "learning_rate": 2.0136054812184822e-05, "loss": 20.8754, "step": 17097 }, { "epoch": 0.7126839231378433, "grad_norm": 366.0, "learning_rate": 2.013064134428313e-05, "loss": 13.8129, "step": 17098 }, { "epoch": 0.7127256054353716, "grad_norm": 183.0, "learning_rate": 2.0125228420753145e-05, "loss": 10.3127, "step": 17099 }, { "epoch": 0.7127672877328999, "grad_norm": 188.0, "learning_rate": 2.011981604169353e-05, "loss": 10.6877, "step": 17100 }, { "epoch": 0.7128089700304281, "grad_norm": 876.0, "learning_rate": 2.011440420720292e-05, "loss": 25.2514, "step": 17101 }, { "epoch": 0.7128506523279563, "grad_norm": 178.0, "learning_rate": 2.0108992917379943e-05, "loss": 10.3754, "step": 17102 }, { "epoch": 0.7128923346254845, "grad_norm": 548.0, "learning_rate": 2.010358217232322e-05, "loss": 17.2511, "step": 17103 }, { "epoch": 0.7129340169230128, "grad_norm": 330.0, "learning_rate": 2.0098171972131373e-05, "loss": 11.0009, "step": 17104 }, { "epoch": 0.712975699220541, "grad_norm": 201.0, "learning_rate": 2.0092762316902996e-05, "loss": 11.0003, "step": 17105 }, { "epoch": 0.7130173815180693, "grad_norm": 326.0, "learning_rate": 2.0087353206736675e-05, "loss": 9.3133, "step": 17106 }, { "epoch": 0.7130590638155975, "grad_norm": 314.0, "learning_rate": 2.0081944641731004e-05, "loss": 13.8752, "step": 17107 }, { "epoch": 0.7131007461131258, "grad_norm": 168.0, "learning_rate": 2.0076536621984525e-05, "loss": 11.3134, "step": 17108 }, { "epoch": 0.713142428410654, "grad_norm": 182.0, "learning_rate": 2.007112914759586e-05, "loss": 11.0005, "step": 17109 }, { "epoch": 0.7131841107081822, "grad_norm": 244.0, "learning_rate": 2.006572221866349e-05, "loss": 12.563, "step": 17110 }, { "epoch": 0.7132257930057104, "grad_norm": 60.25, "learning_rate": 2.006031583528602e-05, "loss": 6.1259, "step": 17111 }, { "epoch": 0.7132674753032388, "grad_norm": 1632.0, "learning_rate": 2.005490999756192e-05, "loss": 36.7503, "step": 17112 }, { "epoch": 0.713309157600767, "grad_norm": 384.0, "learning_rate": 2.0049504705589778e-05, "loss": 15.3778, "step": 17113 }, { "epoch": 0.7133508398982952, "grad_norm": 131.0, "learning_rate": 2.0044099959468037e-05, "loss": 8.8131, "step": 17114 }, { "epoch": 0.7133925221958234, "grad_norm": 124.0, "learning_rate": 2.0038695759295267e-05, "loss": 10.4385, "step": 17115 }, { "epoch": 0.7134342044933517, "grad_norm": 278.0, "learning_rate": 2.0033292105169893e-05, "loss": 12.1878, "step": 17116 }, { "epoch": 0.7134758867908799, "grad_norm": 576.0, "learning_rate": 2.0027888997190448e-05, "loss": 18.7504, "step": 17117 }, { "epoch": 0.7135175690884081, "grad_norm": 324.0, "learning_rate": 2.0022486435455385e-05, "loss": 14.4408, "step": 17118 }, { "epoch": 0.7135592513859363, "grad_norm": 322.0, "learning_rate": 2.001708442006317e-05, "loss": 12.3753, "step": 17119 }, { "epoch": 0.7136009336834647, "grad_norm": 708.0, "learning_rate": 2.0011682951112254e-05, "loss": 22.6267, "step": 17120 }, { "epoch": 0.7136426159809929, "grad_norm": 318.0, "learning_rate": 2.0006282028701074e-05, "loss": 15.2508, "step": 17121 }, { "epoch": 0.7136842982785211, "grad_norm": 398.0, "learning_rate": 2.000088165292807e-05, "loss": 14.7508, "step": 17122 }, { "epoch": 0.7137259805760493, "grad_norm": 366.0, "learning_rate": 1.9995481823891658e-05, "loss": 14.6252, "step": 17123 }, { "epoch": 0.7137676628735776, "grad_norm": 458.0, "learning_rate": 1.999008254169026e-05, "loss": 14.9378, "step": 17124 }, { "epoch": 0.7138093451711058, "grad_norm": 660.0, "learning_rate": 1.9984683806422266e-05, "loss": 19.1251, "step": 17125 }, { "epoch": 0.713851027468634, "grad_norm": 512.0, "learning_rate": 1.9979285618186077e-05, "loss": 18.3753, "step": 17126 }, { "epoch": 0.7138927097661623, "grad_norm": 184.0, "learning_rate": 1.997388797708007e-05, "loss": 11.6253, "step": 17127 }, { "epoch": 0.7139343920636906, "grad_norm": 1624.0, "learning_rate": 1.9968490883202623e-05, "loss": 35.2502, "step": 17128 }, { "epoch": 0.7139760743612188, "grad_norm": 370.0, "learning_rate": 1.9963094336652095e-05, "loss": 14.6877, "step": 17129 }, { "epoch": 0.714017756658747, "grad_norm": 316.0, "learning_rate": 1.9957698337526836e-05, "loss": 15.8132, "step": 17130 }, { "epoch": 0.7140594389562752, "grad_norm": 193.0, "learning_rate": 1.9952302885925194e-05, "loss": 11.5631, "step": 17131 }, { "epoch": 0.7141011212538035, "grad_norm": 398.0, "learning_rate": 1.9946907981945478e-05, "loss": 15.063, "step": 17132 }, { "epoch": 0.7141428035513318, "grad_norm": 624.0, "learning_rate": 1.9941513625686075e-05, "loss": 20.1254, "step": 17133 }, { "epoch": 0.71418448584886, "grad_norm": 274.0, "learning_rate": 1.9936119817245213e-05, "loss": 12.1254, "step": 17134 }, { "epoch": 0.7142261681463882, "grad_norm": 266.0, "learning_rate": 1.9930726556721275e-05, "loss": 13.8127, "step": 17135 }, { "epoch": 0.7142678504439165, "grad_norm": 426.0, "learning_rate": 1.992533384421248e-05, "loss": 16.0002, "step": 17136 }, { "epoch": 0.7143095327414447, "grad_norm": 368.0, "learning_rate": 1.991994167981718e-05, "loss": 14.438, "step": 17137 }, { "epoch": 0.7143512150389729, "grad_norm": 167.0, "learning_rate": 1.9914550063633574e-05, "loss": 10.8754, "step": 17138 }, { "epoch": 0.7143928973365012, "grad_norm": 1416.0, "learning_rate": 1.990915899576e-05, "loss": 29.2581, "step": 17139 }, { "epoch": 0.7144345796340295, "grad_norm": 173.0, "learning_rate": 1.9903768476294642e-05, "loss": 10.5003, "step": 17140 }, { "epoch": 0.7144762619315577, "grad_norm": 350.0, "learning_rate": 1.9898378505335806e-05, "loss": 14.4377, "step": 17141 }, { "epoch": 0.7145179442290859, "grad_norm": 418.0, "learning_rate": 1.9892989082981667e-05, "loss": 13.1881, "step": 17142 }, { "epoch": 0.7145596265266142, "grad_norm": 206.0, "learning_rate": 1.9887600209330487e-05, "loss": 11.6252, "step": 17143 }, { "epoch": 0.7146013088241424, "grad_norm": 524.0, "learning_rate": 1.9882211884480468e-05, "loss": 18.6253, "step": 17144 }, { "epoch": 0.7146429911216706, "grad_norm": 197.0, "learning_rate": 1.9876824108529808e-05, "loss": 8.6892, "step": 17145 }, { "epoch": 0.7146846734191988, "grad_norm": 344.0, "learning_rate": 1.9871436881576705e-05, "loss": 14.6877, "step": 17146 }, { "epoch": 0.7147263557167272, "grad_norm": 328.0, "learning_rate": 1.9866050203719338e-05, "loss": 13.3751, "step": 17147 }, { "epoch": 0.7147680380142554, "grad_norm": 720.0, "learning_rate": 1.9860664075055884e-05, "loss": 22.5002, "step": 17148 }, { "epoch": 0.7148097203117836, "grad_norm": 233.0, "learning_rate": 1.98552784956845e-05, "loss": 10.0007, "step": 17149 }, { "epoch": 0.7148514026093118, "grad_norm": 482.0, "learning_rate": 1.984989346570334e-05, "loss": 17.5004, "step": 17150 }, { "epoch": 0.7148930849068401, "grad_norm": 187.0, "learning_rate": 1.984450898521055e-05, "loss": 10.2504, "step": 17151 }, { "epoch": 0.7149347672043683, "grad_norm": 348.0, "learning_rate": 1.9839125054304264e-05, "loss": 13.6877, "step": 17152 }, { "epoch": 0.7149764495018965, "grad_norm": 180.0, "learning_rate": 1.9833741673082597e-05, "loss": 10.9378, "step": 17153 }, { "epoch": 0.7150181317994247, "grad_norm": 247.0, "learning_rate": 1.982835884164367e-05, "loss": 12.9377, "step": 17154 }, { "epoch": 0.7150598140969531, "grad_norm": 788.0, "learning_rate": 1.9822976560085575e-05, "loss": 22.2504, "step": 17155 }, { "epoch": 0.7151014963944813, "grad_norm": 480.0, "learning_rate": 1.981759482850642e-05, "loss": 17.2503, "step": 17156 }, { "epoch": 0.7151431786920095, "grad_norm": 244.0, "learning_rate": 1.981221364700427e-05, "loss": 12.8779, "step": 17157 }, { "epoch": 0.7151848609895377, "grad_norm": 140.0, "learning_rate": 1.9806833015677196e-05, "loss": 10.6878, "step": 17158 }, { "epoch": 0.715226543287066, "grad_norm": 800.0, "learning_rate": 1.98014529346233e-05, "loss": 25.3752, "step": 17159 }, { "epoch": 0.7152682255845942, "grad_norm": 330.0, "learning_rate": 1.9796073403940574e-05, "loss": 14.8127, "step": 17160 }, { "epoch": 0.7153099078821225, "grad_norm": 122.0, "learning_rate": 1.9790694423727124e-05, "loss": 7.8775, "step": 17161 }, { "epoch": 0.7153515901796507, "grad_norm": 77.0, "learning_rate": 1.9785315994080912e-05, "loss": 7.4377, "step": 17162 }, { "epoch": 0.715393272477179, "grad_norm": 332.0, "learning_rate": 1.977993811510004e-05, "loss": 13.5628, "step": 17163 }, { "epoch": 0.7154349547747072, "grad_norm": 312.0, "learning_rate": 1.977456078688244e-05, "loss": 13.1252, "step": 17164 }, { "epoch": 0.7154766370722354, "grad_norm": 1104.0, "learning_rate": 1.9769184009526186e-05, "loss": 24.3753, "step": 17165 }, { "epoch": 0.7155183193697636, "grad_norm": 1160.0, "learning_rate": 1.976380778312921e-05, "loss": 29.2505, "step": 17166 }, { "epoch": 0.715560001667292, "grad_norm": 342.0, "learning_rate": 1.9758432107789525e-05, "loss": 13.3752, "step": 17167 }, { "epoch": 0.7156016839648202, "grad_norm": 189.0, "learning_rate": 1.9753056983605113e-05, "loss": 11.3753, "step": 17168 }, { "epoch": 0.7156433662623484, "grad_norm": 189.0, "learning_rate": 1.974768241067391e-05, "loss": 11.1255, "step": 17169 }, { "epoch": 0.7156850485598766, "grad_norm": 165.0, "learning_rate": 1.974230838909389e-05, "loss": 10.1251, "step": 17170 }, { "epoch": 0.7157267308574049, "grad_norm": 204.0, "learning_rate": 1.9736934918962986e-05, "loss": 12.2503, "step": 17171 }, { "epoch": 0.7157684131549331, "grad_norm": 111.5, "learning_rate": 1.9731562000379127e-05, "loss": 10.5004, "step": 17172 }, { "epoch": 0.7158100954524613, "grad_norm": 430.0, "learning_rate": 1.9726189633440234e-05, "loss": 15.688, "step": 17173 }, { "epoch": 0.7158517777499895, "grad_norm": 239.0, "learning_rate": 1.972081781824423e-05, "loss": 13.064, "step": 17174 }, { "epoch": 0.7158934600475179, "grad_norm": 348.0, "learning_rate": 1.9715446554889007e-05, "loss": 13.4378, "step": 17175 }, { "epoch": 0.7159351423450461, "grad_norm": 434.0, "learning_rate": 1.9710075843472452e-05, "loss": 15.3753, "step": 17176 }, { "epoch": 0.7159768246425743, "grad_norm": 348.0, "learning_rate": 1.970470568409246e-05, "loss": 14.813, "step": 17177 }, { "epoch": 0.7160185069401025, "grad_norm": 196.0, "learning_rate": 1.9699336076846896e-05, "loss": 12.3129, "step": 17178 }, { "epoch": 0.7160601892376308, "grad_norm": 480.0, "learning_rate": 1.969396702183362e-05, "loss": 16.7512, "step": 17179 }, { "epoch": 0.716101871535159, "grad_norm": 484.0, "learning_rate": 1.9688598519150486e-05, "loss": 18.5004, "step": 17180 }, { "epoch": 0.7161435538326872, "grad_norm": 266.0, "learning_rate": 1.9683230568895334e-05, "loss": 11.6884, "step": 17181 }, { "epoch": 0.7161852361302155, "grad_norm": 258.0, "learning_rate": 1.9677863171166e-05, "loss": 13.6883, "step": 17182 }, { "epoch": 0.7162269184277438, "grad_norm": 164.0, "learning_rate": 1.9672496326060296e-05, "loss": 12.1878, "step": 17183 }, { "epoch": 0.716268600725272, "grad_norm": 308.0, "learning_rate": 1.9667130033676023e-05, "loss": 13.192, "step": 17184 }, { "epoch": 0.7163102830228002, "grad_norm": 220.0, "learning_rate": 1.9661764294111036e-05, "loss": 11.6252, "step": 17185 }, { "epoch": 0.7163519653203284, "grad_norm": 282.0, "learning_rate": 1.9656399107463054e-05, "loss": 13.063, "step": 17186 }, { "epoch": 0.7163936476178567, "grad_norm": 418.0, "learning_rate": 1.965103447382992e-05, "loss": 16.2502, "step": 17187 }, { "epoch": 0.7164353299153849, "grad_norm": 209.0, "learning_rate": 1.9645670393309346e-05, "loss": 11.876, "step": 17188 }, { "epoch": 0.7164770122129132, "grad_norm": 296.0, "learning_rate": 1.964030686599916e-05, "loss": 13.8755, "step": 17189 }, { "epoch": 0.7165186945104414, "grad_norm": 360.0, "learning_rate": 1.963494389199704e-05, "loss": 13.7501, "step": 17190 }, { "epoch": 0.7165603768079697, "grad_norm": 442.0, "learning_rate": 1.96295814714008e-05, "loss": 16.7502, "step": 17191 }, { "epoch": 0.7166020591054979, "grad_norm": 186.0, "learning_rate": 1.962421960430809e-05, "loss": 11.3755, "step": 17192 }, { "epoch": 0.7166437414030261, "grad_norm": 170.0, "learning_rate": 1.96188582908167e-05, "loss": 8.0627, "step": 17193 }, { "epoch": 0.7166854237005543, "grad_norm": 182.0, "learning_rate": 1.9613497531024317e-05, "loss": 11.3129, "step": 17194 }, { "epoch": 0.7167271059980826, "grad_norm": 140.0, "learning_rate": 1.9608137325028637e-05, "loss": 9.0626, "step": 17195 }, { "epoch": 0.7167687882956109, "grad_norm": 108.5, "learning_rate": 1.9602777672927346e-05, "loss": 9.6881, "step": 17196 }, { "epoch": 0.7168104705931391, "grad_norm": 119.5, "learning_rate": 1.959741857481814e-05, "loss": 10.5002, "step": 17197 }, { "epoch": 0.7168521528906673, "grad_norm": 354.0, "learning_rate": 1.959206003079867e-05, "loss": 14.8771, "step": 17198 }, { "epoch": 0.7168938351881956, "grad_norm": 123.0, "learning_rate": 1.958670204096661e-05, "loss": 9.3754, "step": 17199 }, { "epoch": 0.7169355174857238, "grad_norm": 93.0, "learning_rate": 1.9581344605419603e-05, "loss": 9.1878, "step": 17200 }, { "epoch": 0.716977199783252, "grad_norm": 488.0, "learning_rate": 1.9575987724255296e-05, "loss": 16.3752, "step": 17201 }, { "epoch": 0.7170188820807802, "grad_norm": 254.0, "learning_rate": 1.957063139757131e-05, "loss": 12.563, "step": 17202 }, { "epoch": 0.7170605643783086, "grad_norm": 494.0, "learning_rate": 1.9565275625465268e-05, "loss": 14.4416, "step": 17203 }, { "epoch": 0.7171022466758368, "grad_norm": 462.0, "learning_rate": 1.9559920408034775e-05, "loss": 15.0657, "step": 17204 }, { "epoch": 0.717143928973365, "grad_norm": 820.0, "learning_rate": 1.955456574537744e-05, "loss": 18.3794, "step": 17205 }, { "epoch": 0.7171856112708932, "grad_norm": 422.0, "learning_rate": 1.9549211637590847e-05, "loss": 14.8752, "step": 17206 }, { "epoch": 0.7172272935684215, "grad_norm": 384.0, "learning_rate": 1.9543858084772575e-05, "loss": 14.5005, "step": 17207 }, { "epoch": 0.7172689758659497, "grad_norm": 400.0, "learning_rate": 1.9538505087020177e-05, "loss": 15.5004, "step": 17208 }, { "epoch": 0.7173106581634779, "grad_norm": 716.0, "learning_rate": 1.953315264443126e-05, "loss": 21.1256, "step": 17209 }, { "epoch": 0.7173523404610063, "grad_norm": 227.0, "learning_rate": 1.9527800757103303e-05, "loss": 11.876, "step": 17210 }, { "epoch": 0.7173940227585345, "grad_norm": 724.0, "learning_rate": 1.9522449425133926e-05, "loss": 21.1268, "step": 17211 }, { "epoch": 0.7174357050560627, "grad_norm": 184.0, "learning_rate": 1.9517098648620573e-05, "loss": 7.6585, "step": 17212 }, { "epoch": 0.7174773873535909, "grad_norm": 544.0, "learning_rate": 1.9511748427660836e-05, "loss": 19.2501, "step": 17213 }, { "epoch": 0.7175190696511192, "grad_norm": 312.0, "learning_rate": 1.950639876235216e-05, "loss": 14.1253, "step": 17214 }, { "epoch": 0.7175607519486474, "grad_norm": 532.0, "learning_rate": 1.950104965279211e-05, "loss": 17.5008, "step": 17215 }, { "epoch": 0.7176024342461756, "grad_norm": 31.25, "learning_rate": 1.9495701099078094e-05, "loss": 6.1877, "step": 17216 }, { "epoch": 0.7176441165437039, "grad_norm": 68.0, "learning_rate": 1.949035310130768e-05, "loss": 9.0003, "step": 17217 }, { "epoch": 0.7176857988412322, "grad_norm": 241.0, "learning_rate": 1.948500565957824e-05, "loss": 11.8752, "step": 17218 }, { "epoch": 0.7177274811387604, "grad_norm": 138.0, "learning_rate": 1.9479658773987314e-05, "loss": 9.6263, "step": 17219 }, { "epoch": 0.7177691634362886, "grad_norm": 125.5, "learning_rate": 1.9474312444632304e-05, "loss": 10.0627, "step": 17220 }, { "epoch": 0.7178108457338168, "grad_norm": 126.0, "learning_rate": 1.9468966671610665e-05, "loss": 10.2511, "step": 17221 }, { "epoch": 0.7178525280313451, "grad_norm": 868.0, "learning_rate": 1.946362145501982e-05, "loss": 22.2529, "step": 17222 }, { "epoch": 0.7178942103288734, "grad_norm": 282.0, "learning_rate": 1.945827679495718e-05, "loss": 11.8127, "step": 17223 }, { "epoch": 0.7179358926264016, "grad_norm": 120.5, "learning_rate": 1.945293269152016e-05, "loss": 9.6257, "step": 17224 }, { "epoch": 0.7179775749239298, "grad_norm": 227.0, "learning_rate": 1.9447589144806154e-05, "loss": 11.2517, "step": 17225 }, { "epoch": 0.7180192572214581, "grad_norm": 362.0, "learning_rate": 1.9442246154912545e-05, "loss": 14.5627, "step": 17226 }, { "epoch": 0.7180609395189863, "grad_norm": 462.0, "learning_rate": 1.9436903721936716e-05, "loss": 15.4383, "step": 17227 }, { "epoch": 0.7181026218165145, "grad_norm": 588.0, "learning_rate": 1.9431561845976025e-05, "loss": 17.7508, "step": 17228 }, { "epoch": 0.7181443041140427, "grad_norm": 133.0, "learning_rate": 1.9426220527127836e-05, "loss": 10.1252, "step": 17229 }, { "epoch": 0.718185986411571, "grad_norm": 628.0, "learning_rate": 1.9420879765489497e-05, "loss": 19.6253, "step": 17230 }, { "epoch": 0.7182276687090993, "grad_norm": 178.0, "learning_rate": 1.941553956115833e-05, "loss": 11.313, "step": 17231 }, { "epoch": 0.7182693510066275, "grad_norm": 202.0, "learning_rate": 1.9410199914231676e-05, "loss": 11.8755, "step": 17232 }, { "epoch": 0.7183110333041557, "grad_norm": 193.0, "learning_rate": 1.9404860824806842e-05, "loss": 10.8754, "step": 17233 }, { "epoch": 0.718352715601684, "grad_norm": 1072.0, "learning_rate": 1.9399522292981114e-05, "loss": 28.7503, "step": 17234 }, { "epoch": 0.7183943978992122, "grad_norm": 452.0, "learning_rate": 1.939418431885185e-05, "loss": 14.0628, "step": 17235 }, { "epoch": 0.7184360801967404, "grad_norm": 370.0, "learning_rate": 1.9388846902516257e-05, "loss": 13.2502, "step": 17236 }, { "epoch": 0.7184777624942686, "grad_norm": 171.0, "learning_rate": 1.938351004407168e-05, "loss": 10.7508, "step": 17237 }, { "epoch": 0.718519444791797, "grad_norm": 203.0, "learning_rate": 1.9378173743615313e-05, "loss": 11.0009, "step": 17238 }, { "epoch": 0.7185611270893252, "grad_norm": 274.0, "learning_rate": 1.9372838001244487e-05, "loss": 11.3129, "step": 17239 }, { "epoch": 0.7186028093868534, "grad_norm": 1264.0, "learning_rate": 1.9367502817056373e-05, "loss": 28.8752, "step": 17240 }, { "epoch": 0.7186444916843816, "grad_norm": 177.0, "learning_rate": 1.9362168191148273e-05, "loss": 10.7503, "step": 17241 }, { "epoch": 0.7186861739819099, "grad_norm": 968.0, "learning_rate": 1.935683412361734e-05, "loss": 25.0051, "step": 17242 }, { "epoch": 0.7187278562794381, "grad_norm": 368.0, "learning_rate": 1.9351500614560848e-05, "loss": 14.3127, "step": 17243 }, { "epoch": 0.7187695385769663, "grad_norm": 712.0, "learning_rate": 1.9346167664075976e-05, "loss": 21.2507, "step": 17244 }, { "epoch": 0.7188112208744946, "grad_norm": 434.0, "learning_rate": 1.9340835272259915e-05, "loss": 16.1255, "step": 17245 }, { "epoch": 0.7188529031720229, "grad_norm": 67.0, "learning_rate": 1.9335503439209856e-05, "loss": 7.4067, "step": 17246 }, { "epoch": 0.7188945854695511, "grad_norm": 177.0, "learning_rate": 1.9330172165022974e-05, "loss": 11.3754, "step": 17247 }, { "epoch": 0.7189362677670793, "grad_norm": 440.0, "learning_rate": 1.932484144979642e-05, "loss": 14.9379, "step": 17248 }, { "epoch": 0.7189779500646075, "grad_norm": 117.0, "learning_rate": 1.9319511293627356e-05, "loss": 8.9378, "step": 17249 }, { "epoch": 0.7190196323621358, "grad_norm": 255.0, "learning_rate": 1.931418169661292e-05, "loss": 13.3755, "step": 17250 }, { "epoch": 0.719061314659664, "grad_norm": 496.0, "learning_rate": 1.9308852658850252e-05, "loss": 17.2505, "step": 17251 }, { "epoch": 0.7191029969571923, "grad_norm": 35.25, "learning_rate": 1.9303524180436468e-05, "loss": 6.1878, "step": 17252 }, { "epoch": 0.7191446792547205, "grad_norm": 286.0, "learning_rate": 1.9298196261468676e-05, "loss": 13.3128, "step": 17253 }, { "epoch": 0.7191863615522488, "grad_norm": 490.0, "learning_rate": 1.9292868902043985e-05, "loss": 16.8751, "step": 17254 }, { "epoch": 0.719228043849777, "grad_norm": 73.0, "learning_rate": 1.928754210225949e-05, "loss": 8.3129, "step": 17255 }, { "epoch": 0.7192697261473052, "grad_norm": 560.0, "learning_rate": 1.9282215862212256e-05, "loss": 17.8777, "step": 17256 }, { "epoch": 0.7193114084448334, "grad_norm": 87.5, "learning_rate": 1.927689018199937e-05, "loss": 6.6252, "step": 17257 }, { "epoch": 0.7193530907423618, "grad_norm": 374.0, "learning_rate": 1.927156506171787e-05, "loss": 15.6877, "step": 17258 }, { "epoch": 0.71939477303989, "grad_norm": 696.0, "learning_rate": 1.926624050146486e-05, "loss": 19.127, "step": 17259 }, { "epoch": 0.7194364553374182, "grad_norm": 172.0, "learning_rate": 1.926091650133731e-05, "loss": 11.0627, "step": 17260 }, { "epoch": 0.7194781376349464, "grad_norm": 438.0, "learning_rate": 1.925559306143232e-05, "loss": 16.8753, "step": 17261 }, { "epoch": 0.7195198199324747, "grad_norm": 604.0, "learning_rate": 1.9250270181846843e-05, "loss": 19.8759, "step": 17262 }, { "epoch": 0.7195615022300029, "grad_norm": 204.0, "learning_rate": 1.9244947862677952e-05, "loss": 9.6877, "step": 17263 }, { "epoch": 0.7196031845275311, "grad_norm": 498.0, "learning_rate": 1.923962610402259e-05, "loss": 14.9413, "step": 17264 }, { "epoch": 0.7196448668250593, "grad_norm": 552.0, "learning_rate": 1.92343049059778e-05, "loss": 16.3753, "step": 17265 }, { "epoch": 0.7196865491225877, "grad_norm": 314.0, "learning_rate": 1.9228984268640505e-05, "loss": 11.0004, "step": 17266 }, { "epoch": 0.7197282314201159, "grad_norm": 924.0, "learning_rate": 1.922366419210773e-05, "loss": 25.0002, "step": 17267 }, { "epoch": 0.7197699137176441, "grad_norm": 1096.0, "learning_rate": 1.921834467647638e-05, "loss": 30.0009, "step": 17268 }, { "epoch": 0.7198115960151723, "grad_norm": 520.0, "learning_rate": 1.921302572184344e-05, "loss": 17.2502, "step": 17269 }, { "epoch": 0.7198532783127006, "grad_norm": 346.0, "learning_rate": 1.9207707328305845e-05, "loss": 15.6878, "step": 17270 }, { "epoch": 0.7198949606102288, "grad_norm": 308.0, "learning_rate": 1.9202389495960523e-05, "loss": 13.8128, "step": 17271 }, { "epoch": 0.719936642907757, "grad_norm": 255.0, "learning_rate": 1.9197072224904378e-05, "loss": 12.7508, "step": 17272 }, { "epoch": 0.7199783252052853, "grad_norm": 322.0, "learning_rate": 1.9191755515234328e-05, "loss": 13.0004, "step": 17273 }, { "epoch": 0.7200200075028136, "grad_norm": 456.0, "learning_rate": 1.918643936704727e-05, "loss": 16.8755, "step": 17274 }, { "epoch": 0.7200616898003418, "grad_norm": 604.0, "learning_rate": 1.9181123780440092e-05, "loss": 18.7502, "step": 17275 }, { "epoch": 0.72010337209787, "grad_norm": 103.5, "learning_rate": 1.9175808755509667e-05, "loss": 8.4379, "step": 17276 }, { "epoch": 0.7201450543953982, "grad_norm": 324.0, "learning_rate": 1.917049429235286e-05, "loss": 13.1253, "step": 17277 }, { "epoch": 0.7201867366929265, "grad_norm": 308.0, "learning_rate": 1.9165180391066532e-05, "loss": 13.3753, "step": 17278 }, { "epoch": 0.7202284189904548, "grad_norm": 179.0, "learning_rate": 1.9159867051747532e-05, "loss": 10.6878, "step": 17279 }, { "epoch": 0.720270101287983, "grad_norm": 624.0, "learning_rate": 1.915455427449269e-05, "loss": 20.6253, "step": 17280 }, { "epoch": 0.7203117835855112, "grad_norm": 328.0, "learning_rate": 1.914924205939884e-05, "loss": 15.2524, "step": 17281 }, { "epoch": 0.7203534658830395, "grad_norm": 500.0, "learning_rate": 1.9143930406562788e-05, "loss": 16.7501, "step": 17282 }, { "epoch": 0.7203951481805677, "grad_norm": 76.5, "learning_rate": 1.9138619316081348e-05, "loss": 8.3756, "step": 17283 }, { "epoch": 0.7204368304780959, "grad_norm": 374.0, "learning_rate": 1.9133308788051286e-05, "loss": 14.0626, "step": 17284 }, { "epoch": 0.7204785127756242, "grad_norm": 470.0, "learning_rate": 1.9127998822569448e-05, "loss": 16.6252, "step": 17285 }, { "epoch": 0.7205201950731525, "grad_norm": 380.0, "learning_rate": 1.9122689419732543e-05, "loss": 12.9378, "step": 17286 }, { "epoch": 0.7205618773706807, "grad_norm": 512.0, "learning_rate": 1.9117380579637395e-05, "loss": 18.5002, "step": 17287 }, { "epoch": 0.7206035596682089, "grad_norm": 49.25, "learning_rate": 1.911207230238069e-05, "loss": 6.0946, "step": 17288 }, { "epoch": 0.7206452419657372, "grad_norm": 174.0, "learning_rate": 1.9106764588059244e-05, "loss": 10.6878, "step": 17289 }, { "epoch": 0.7206869242632654, "grad_norm": 270.0, "learning_rate": 1.9101457436769726e-05, "loss": 13.0001, "step": 17290 }, { "epoch": 0.7207286065607936, "grad_norm": 490.0, "learning_rate": 1.9096150848608925e-05, "loss": 16.004, "step": 17291 }, { "epoch": 0.7207702888583218, "grad_norm": 330.0, "learning_rate": 1.9090844823673477e-05, "loss": 13.0004, "step": 17292 }, { "epoch": 0.7208119711558502, "grad_norm": 760.0, "learning_rate": 1.9085539362060146e-05, "loss": 23.2503, "step": 17293 }, { "epoch": 0.7208536534533784, "grad_norm": 158.0, "learning_rate": 1.9080234463865603e-05, "loss": 9.5003, "step": 17294 }, { "epoch": 0.7208953357509066, "grad_norm": 432.0, "learning_rate": 1.9074930129186536e-05, "loss": 15.9388, "step": 17295 }, { "epoch": 0.7209370180484348, "grad_norm": 502.0, "learning_rate": 1.9069626358119613e-05, "loss": 18.5004, "step": 17296 }, { "epoch": 0.7209787003459631, "grad_norm": 1012.0, "learning_rate": 1.9064323150761493e-05, "loss": 28.5006, "step": 17297 }, { "epoch": 0.7210203826434913, "grad_norm": 189.0, "learning_rate": 1.905902050720883e-05, "loss": 10.5001, "step": 17298 }, { "epoch": 0.7210620649410195, "grad_norm": 79.5, "learning_rate": 1.9053718427558264e-05, "loss": 8.3133, "step": 17299 }, { "epoch": 0.7211037472385478, "grad_norm": 156.0, "learning_rate": 1.9048416911906424e-05, "loss": 10.1259, "step": 17300 }, { "epoch": 0.7211454295360761, "grad_norm": 1136.0, "learning_rate": 1.9043115960349938e-05, "loss": 29.7502, "step": 17301 }, { "epoch": 0.7211871118336043, "grad_norm": 544.0, "learning_rate": 1.9037815572985412e-05, "loss": 15.7508, "step": 17302 }, { "epoch": 0.7212287941311325, "grad_norm": 112.5, "learning_rate": 1.903251574990944e-05, "loss": 10.3753, "step": 17303 }, { "epoch": 0.7212704764286607, "grad_norm": 108.0, "learning_rate": 1.9027216491218625e-05, "loss": 9.5013, "step": 17304 }, { "epoch": 0.721312158726189, "grad_norm": 209.0, "learning_rate": 1.9021917797009538e-05, "loss": 12.6251, "step": 17305 }, { "epoch": 0.7213538410237172, "grad_norm": 504.0, "learning_rate": 1.9016619667378742e-05, "loss": 16.8754, "step": 17306 }, { "epoch": 0.7213955233212455, "grad_norm": 144.0, "learning_rate": 1.9011322102422813e-05, "loss": 9.0004, "step": 17307 }, { "epoch": 0.7214372056187737, "grad_norm": 282.0, "learning_rate": 1.9006025102238285e-05, "loss": 12.7504, "step": 17308 }, { "epoch": 0.721478887916302, "grad_norm": 207.0, "learning_rate": 1.9000728666921697e-05, "loss": 11.5627, "step": 17309 }, { "epoch": 0.7215205702138302, "grad_norm": 764.0, "learning_rate": 1.899543279656957e-05, "loss": 17.8793, "step": 17310 }, { "epoch": 0.7215622525113584, "grad_norm": 474.0, "learning_rate": 1.8990137491278463e-05, "loss": 15.7528, "step": 17311 }, { "epoch": 0.7216039348088866, "grad_norm": 528.0, "learning_rate": 1.8984842751144823e-05, "loss": 18.1252, "step": 17312 }, { "epoch": 0.721645617106415, "grad_norm": 233.0, "learning_rate": 1.8979548576265206e-05, "loss": 12.501, "step": 17313 }, { "epoch": 0.7216872994039432, "grad_norm": 116.0, "learning_rate": 1.897425496673604e-05, "loss": 9.3769, "step": 17314 }, { "epoch": 0.7217289817014714, "grad_norm": 155.0, "learning_rate": 1.896896192265386e-05, "loss": 9.6252, "step": 17315 }, { "epoch": 0.7217706639989996, "grad_norm": 386.0, "learning_rate": 1.896366944411508e-05, "loss": 16.8751, "step": 17316 }, { "epoch": 0.7218123462965279, "grad_norm": 364.0, "learning_rate": 1.8958377531216203e-05, "loss": 14.1252, "step": 17317 }, { "epoch": 0.7218540285940561, "grad_norm": 384.0, "learning_rate": 1.895308618405362e-05, "loss": 15.1884, "step": 17318 }, { "epoch": 0.7218957108915843, "grad_norm": 600.0, "learning_rate": 1.894779540272382e-05, "loss": 19.3752, "step": 17319 }, { "epoch": 0.7219373931891125, "grad_norm": 540.0, "learning_rate": 1.89425051873232e-05, "loss": 18.6254, "step": 17320 }, { "epoch": 0.7219790754866409, "grad_norm": 131.0, "learning_rate": 1.893721553794818e-05, "loss": 10.1252, "step": 17321 }, { "epoch": 0.7220207577841691, "grad_norm": 656.0, "learning_rate": 1.8931926454695165e-05, "loss": 18.6254, "step": 17322 }, { "epoch": 0.7220624400816973, "grad_norm": 234.0, "learning_rate": 1.8926637937660547e-05, "loss": 14.0629, "step": 17323 }, { "epoch": 0.7221041223792255, "grad_norm": 246.0, "learning_rate": 1.892134998694071e-05, "loss": 11.1252, "step": 17324 }, { "epoch": 0.7221458046767538, "grad_norm": 126.0, "learning_rate": 1.8916062602632035e-05, "loss": 10.2503, "step": 17325 }, { "epoch": 0.722187486974282, "grad_norm": 181.0, "learning_rate": 1.8910775784830876e-05, "loss": 10.5632, "step": 17326 }, { "epoch": 0.7222291692718102, "grad_norm": 392.0, "learning_rate": 1.8905489533633586e-05, "loss": 14.6878, "step": 17327 }, { "epoch": 0.7222708515693385, "grad_norm": 332.0, "learning_rate": 1.890020384913651e-05, "loss": 14.5002, "step": 17328 }, { "epoch": 0.7223125338668668, "grad_norm": 832.0, "learning_rate": 1.889491873143598e-05, "loss": 27.1259, "step": 17329 }, { "epoch": 0.722354216164395, "grad_norm": 560.0, "learning_rate": 1.8889634180628318e-05, "loss": 17.7507, "step": 17330 }, { "epoch": 0.7223958984619232, "grad_norm": 212.0, "learning_rate": 1.8884350196809835e-05, "loss": 11.3137, "step": 17331 }, { "epoch": 0.7224375807594514, "grad_norm": 118.5, "learning_rate": 1.8879066780076833e-05, "loss": 10.4379, "step": 17332 }, { "epoch": 0.7224792630569797, "grad_norm": 384.0, "learning_rate": 1.8873783930525597e-05, "loss": 14.7526, "step": 17333 }, { "epoch": 0.722520945354508, "grad_norm": 506.0, "learning_rate": 1.8868501648252397e-05, "loss": 19.3753, "step": 17334 }, { "epoch": 0.7225626276520362, "grad_norm": 596.0, "learning_rate": 1.8863219933353554e-05, "loss": 18.8767, "step": 17335 }, { "epoch": 0.7226043099495644, "grad_norm": 288.0, "learning_rate": 1.8857938785925255e-05, "loss": 13.5635, "step": 17336 }, { "epoch": 0.7226459922470927, "grad_norm": 496.0, "learning_rate": 1.8852658206063822e-05, "loss": 16.6252, "step": 17337 }, { "epoch": 0.7226876745446209, "grad_norm": 748.0, "learning_rate": 1.8847378193865423e-05, "loss": 22.2504, "step": 17338 }, { "epoch": 0.7227293568421491, "grad_norm": 239.0, "learning_rate": 1.884209874942636e-05, "loss": 11.6877, "step": 17339 }, { "epoch": 0.7227710391396773, "grad_norm": 193.0, "learning_rate": 1.8836819872842777e-05, "loss": 10.0631, "step": 17340 }, { "epoch": 0.7228127214372057, "grad_norm": 620.0, "learning_rate": 1.8831541564210953e-05, "loss": 18.377, "step": 17341 }, { "epoch": 0.7228544037347339, "grad_norm": 352.0, "learning_rate": 1.882626382362701e-05, "loss": 15.8753, "step": 17342 }, { "epoch": 0.7228960860322621, "grad_norm": 266.0, "learning_rate": 1.8820986651187217e-05, "loss": 12.563, "step": 17343 }, { "epoch": 0.7229377683297903, "grad_norm": 418.0, "learning_rate": 1.8815710046987676e-05, "loss": 17.0004, "step": 17344 }, { "epoch": 0.7229794506273186, "grad_norm": 796.0, "learning_rate": 1.8810434011124607e-05, "loss": 20.6294, "step": 17345 }, { "epoch": 0.7230211329248468, "grad_norm": 644.0, "learning_rate": 1.880515854369414e-05, "loss": 19.3753, "step": 17346 }, { "epoch": 0.723062815222375, "grad_norm": 450.0, "learning_rate": 1.8799883644792433e-05, "loss": 16.3764, "step": 17347 }, { "epoch": 0.7231044975199032, "grad_norm": 119.0, "learning_rate": 1.8794609314515614e-05, "loss": 11.063, "step": 17348 }, { "epoch": 0.7231461798174316, "grad_norm": 470.0, "learning_rate": 1.8789335552959807e-05, "loss": 16.1252, "step": 17349 }, { "epoch": 0.7231878621149598, "grad_norm": 207.0, "learning_rate": 1.878406236022114e-05, "loss": 11.438, "step": 17350 }, { "epoch": 0.723229544412488, "grad_norm": 600.0, "learning_rate": 1.8778789736395696e-05, "loss": 20.2504, "step": 17351 }, { "epoch": 0.7232712267100162, "grad_norm": 720.0, "learning_rate": 1.8773517681579588e-05, "loss": 20.3794, "step": 17352 }, { "epoch": 0.7233129090075445, "grad_norm": 88.5, "learning_rate": 1.8768246195868895e-05, "loss": 10.6879, "step": 17353 }, { "epoch": 0.7233545913050727, "grad_norm": 184.0, "learning_rate": 1.876297527935968e-05, "loss": 11.1878, "step": 17354 }, { "epoch": 0.723396273602601, "grad_norm": 796.0, "learning_rate": 1.8757704932148018e-05, "loss": 22.1253, "step": 17355 }, { "epoch": 0.7234379559001293, "grad_norm": 494.0, "learning_rate": 1.8752435154329955e-05, "loss": 17.3752, "step": 17356 }, { "epoch": 0.7234796381976575, "grad_norm": 796.0, "learning_rate": 1.8747165946001537e-05, "loss": 22.7576, "step": 17357 }, { "epoch": 0.7235213204951857, "grad_norm": 366.0, "learning_rate": 1.8741897307258792e-05, "loss": 14.1256, "step": 17358 }, { "epoch": 0.7235630027927139, "grad_norm": 386.0, "learning_rate": 1.8736629238197746e-05, "loss": 14.7513, "step": 17359 }, { "epoch": 0.7236046850902422, "grad_norm": 174.0, "learning_rate": 1.8731361738914387e-05, "loss": 11.6885, "step": 17360 }, { "epoch": 0.7236463673877704, "grad_norm": 612.0, "learning_rate": 1.8726094809504773e-05, "loss": 17.1255, "step": 17361 }, { "epoch": 0.7236880496852987, "grad_norm": 248.0, "learning_rate": 1.872082845006482e-05, "loss": 10.9378, "step": 17362 }, { "epoch": 0.7237297319828269, "grad_norm": 105.5, "learning_rate": 1.8715562660690585e-05, "loss": 10.1259, "step": 17363 }, { "epoch": 0.7237714142803552, "grad_norm": 245.0, "learning_rate": 1.8710297441477958e-05, "loss": 11.6881, "step": 17364 }, { "epoch": 0.7238130965778834, "grad_norm": 233.0, "learning_rate": 1.8705032792522974e-05, "loss": 10.9383, "step": 17365 }, { "epoch": 0.7238547788754116, "grad_norm": 354.0, "learning_rate": 1.8699768713921512e-05, "loss": 14.0628, "step": 17366 }, { "epoch": 0.7238964611729398, "grad_norm": 133.0, "learning_rate": 1.869450520576958e-05, "loss": 9.3127, "step": 17367 }, { "epoch": 0.7239381434704681, "grad_norm": 284.0, "learning_rate": 1.868924226816303e-05, "loss": 11.9378, "step": 17368 }, { "epoch": 0.7239798257679964, "grad_norm": 358.0, "learning_rate": 1.868397990119784e-05, "loss": 13.6876, "step": 17369 }, { "epoch": 0.7240215080655246, "grad_norm": 656.0, "learning_rate": 1.86787181049699e-05, "loss": 21.1254, "step": 17370 }, { "epoch": 0.7240631903630528, "grad_norm": 98.0, "learning_rate": 1.8673456879575096e-05, "loss": 9.3753, "step": 17371 }, { "epoch": 0.7241048726605811, "grad_norm": 344.0, "learning_rate": 1.8668196225109324e-05, "loss": 14.4378, "step": 17372 }, { "epoch": 0.7241465549581093, "grad_norm": 1256.0, "learning_rate": 1.8662936141668458e-05, "loss": 30.0011, "step": 17373 }, { "epoch": 0.7241882372556375, "grad_norm": 580.0, "learning_rate": 1.8657676629348363e-05, "loss": 17.5004, "step": 17374 }, { "epoch": 0.7242299195531657, "grad_norm": 184.0, "learning_rate": 1.865241768824489e-05, "loss": 11.0002, "step": 17375 }, { "epoch": 0.7242716018506941, "grad_norm": 290.0, "learning_rate": 1.8647159318453893e-05, "loss": 12.6254, "step": 17376 }, { "epoch": 0.7243132841482223, "grad_norm": 474.0, "learning_rate": 1.8641901520071197e-05, "loss": 17.3753, "step": 17377 }, { "epoch": 0.7243549664457505, "grad_norm": 412.0, "learning_rate": 1.863664429319263e-05, "loss": 16.0002, "step": 17378 }, { "epoch": 0.7243966487432787, "grad_norm": 478.0, "learning_rate": 1.8631387637914004e-05, "loss": 15.813, "step": 17379 }, { "epoch": 0.724438331040807, "grad_norm": 276.0, "learning_rate": 1.8626131554331127e-05, "loss": 12.9379, "step": 17380 }, { "epoch": 0.7244800133383352, "grad_norm": 302.0, "learning_rate": 1.862087604253978e-05, "loss": 13.8753, "step": 17381 }, { "epoch": 0.7245216956358634, "grad_norm": 696.0, "learning_rate": 1.861562110263576e-05, "loss": 23.7516, "step": 17382 }, { "epoch": 0.7245633779333917, "grad_norm": 360.0, "learning_rate": 1.8610366734714828e-05, "loss": 13.0002, "step": 17383 }, { "epoch": 0.72460506023092, "grad_norm": 227.0, "learning_rate": 1.8605112938872737e-05, "loss": 11.8754, "step": 17384 }, { "epoch": 0.7246467425284482, "grad_norm": 488.0, "learning_rate": 1.8599859715205275e-05, "loss": 14.44, "step": 17385 }, { "epoch": 0.7246884248259764, "grad_norm": 159.0, "learning_rate": 1.859460706380813e-05, "loss": 10.8755, "step": 17386 }, { "epoch": 0.7247301071235046, "grad_norm": 1960.0, "learning_rate": 1.8589354984777092e-05, "loss": 42.0003, "step": 17387 }, { "epoch": 0.7247717894210329, "grad_norm": 165.0, "learning_rate": 1.858410347820781e-05, "loss": 10.8753, "step": 17388 }, { "epoch": 0.7248134717185611, "grad_norm": 304.0, "learning_rate": 1.8578852544196068e-05, "loss": 14.3756, "step": 17389 }, { "epoch": 0.7248551540160894, "grad_norm": 366.0, "learning_rate": 1.8573602182837496e-05, "loss": 14.5627, "step": 17390 }, { "epoch": 0.7248968363136176, "grad_norm": 216.0, "learning_rate": 1.8568352394227845e-05, "loss": 10.0017, "step": 17391 }, { "epoch": 0.7249385186111459, "grad_norm": 168.0, "learning_rate": 1.8563103178462727e-05, "loss": 9.9378, "step": 17392 }, { "epoch": 0.7249802009086741, "grad_norm": 366.0, "learning_rate": 1.855785453563788e-05, "loss": 15.2504, "step": 17393 }, { "epoch": 0.7250218832062023, "grad_norm": 656.0, "learning_rate": 1.8552606465848892e-05, "loss": 19.8753, "step": 17394 }, { "epoch": 0.7250635655037305, "grad_norm": 482.0, "learning_rate": 1.854735896919146e-05, "loss": 17.1252, "step": 17395 }, { "epoch": 0.7251052478012588, "grad_norm": 386.0, "learning_rate": 1.85421120457612e-05, "loss": 15.7504, "step": 17396 }, { "epoch": 0.7251469300987871, "grad_norm": 660.0, "learning_rate": 1.8536865695653743e-05, "loss": 18.1306, "step": 17397 }, { "epoch": 0.7251886123963153, "grad_norm": 107.0, "learning_rate": 1.85316199189647e-05, "loss": 10.8758, "step": 17398 }, { "epoch": 0.7252302946938435, "grad_norm": 1616.0, "learning_rate": 1.852637471578968e-05, "loss": 31.6265, "step": 17399 }, { "epoch": 0.7252719769913718, "grad_norm": 456.0, "learning_rate": 1.8521130086224275e-05, "loss": 14.6878, "step": 17400 }, { "epoch": 0.7253136592889, "grad_norm": 516.0, "learning_rate": 1.8515886030364066e-05, "loss": 17.8761, "step": 17401 }, { "epoch": 0.7253553415864282, "grad_norm": 292.0, "learning_rate": 1.851064254830463e-05, "loss": 12.2516, "step": 17402 }, { "epoch": 0.7253970238839564, "grad_norm": 165.0, "learning_rate": 1.850539964014153e-05, "loss": 9.3753, "step": 17403 }, { "epoch": 0.7254387061814848, "grad_norm": 81.5, "learning_rate": 1.850015730597032e-05, "loss": 9.0015, "step": 17404 }, { "epoch": 0.725480388479013, "grad_norm": 184.0, "learning_rate": 1.8494915545886533e-05, "loss": 10.5002, "step": 17405 }, { "epoch": 0.7255220707765412, "grad_norm": 470.0, "learning_rate": 1.848967435998571e-05, "loss": 17.2502, "step": 17406 }, { "epoch": 0.7255637530740694, "grad_norm": 350.0, "learning_rate": 1.8484433748363366e-05, "loss": 13.3751, "step": 17407 }, { "epoch": 0.7256054353715977, "grad_norm": 422.0, "learning_rate": 1.8479193711115017e-05, "loss": 15.5629, "step": 17408 }, { "epoch": 0.7256471176691259, "grad_norm": 772.0, "learning_rate": 1.8473954248336163e-05, "loss": 20.1297, "step": 17409 }, { "epoch": 0.7256887999666541, "grad_norm": 408.0, "learning_rate": 1.8468715360122267e-05, "loss": 14.8131, "step": 17410 }, { "epoch": 0.7257304822641824, "grad_norm": 412.0, "learning_rate": 1.8463477046568876e-05, "loss": 16.5003, "step": 17411 }, { "epoch": 0.7257721645617107, "grad_norm": 320.0, "learning_rate": 1.845823930777138e-05, "loss": 13.2502, "step": 17412 }, { "epoch": 0.7258138468592389, "grad_norm": 676.0, "learning_rate": 1.8453002143825303e-05, "loss": 20.8752, "step": 17413 }, { "epoch": 0.7258555291567671, "grad_norm": 119.0, "learning_rate": 1.844776555482603e-05, "loss": 9.2501, "step": 17414 }, { "epoch": 0.7258972114542953, "grad_norm": 840.0, "learning_rate": 1.8442529540869064e-05, "loss": 22.6281, "step": 17415 }, { "epoch": 0.7259388937518236, "grad_norm": 250.0, "learning_rate": 1.8437294102049768e-05, "loss": 12.0003, "step": 17416 }, { "epoch": 0.7259805760493518, "grad_norm": 217.0, "learning_rate": 1.8432059238463617e-05, "loss": 12.3753, "step": 17417 }, { "epoch": 0.7260222583468801, "grad_norm": 280.0, "learning_rate": 1.8426824950205955e-05, "loss": 13.7504, "step": 17418 }, { "epoch": 0.7260639406444083, "grad_norm": 314.0, "learning_rate": 1.8421591237372237e-05, "loss": 13.7504, "step": 17419 }, { "epoch": 0.7261056229419366, "grad_norm": 274.0, "learning_rate": 1.8416358100057814e-05, "loss": 10.8131, "step": 17420 }, { "epoch": 0.7261473052394648, "grad_norm": 396.0, "learning_rate": 1.841112553835807e-05, "loss": 15.8756, "step": 17421 }, { "epoch": 0.726188987536993, "grad_norm": 154.0, "learning_rate": 1.840589355236837e-05, "loss": 10.6258, "step": 17422 }, { "epoch": 0.7262306698345212, "grad_norm": 572.0, "learning_rate": 1.840066214218406e-05, "loss": 18.3754, "step": 17423 }, { "epoch": 0.7262723521320495, "grad_norm": 320.0, "learning_rate": 1.839543130790049e-05, "loss": 14.1883, "step": 17424 }, { "epoch": 0.7263140344295778, "grad_norm": 756.0, "learning_rate": 1.839020104961299e-05, "loss": 23.2502, "step": 17425 }, { "epoch": 0.726355716727106, "grad_norm": 94.5, "learning_rate": 1.838497136741688e-05, "loss": 8.5629, "step": 17426 }, { "epoch": 0.7263973990246342, "grad_norm": 127.0, "learning_rate": 1.8379742261407477e-05, "loss": 9.3751, "step": 17427 }, { "epoch": 0.7264390813221625, "grad_norm": 1552.0, "learning_rate": 1.8374513731680078e-05, "loss": 35.5056, "step": 17428 }, { "epoch": 0.7264807636196907, "grad_norm": 330.0, "learning_rate": 1.836928577832997e-05, "loss": 13.7535, "step": 17429 }, { "epoch": 0.7265224459172189, "grad_norm": 254.0, "learning_rate": 1.836405840145244e-05, "loss": 14.5633, "step": 17430 }, { "epoch": 0.7265641282147473, "grad_norm": 177.0, "learning_rate": 1.8358831601142755e-05, "loss": 11.5003, "step": 17431 }, { "epoch": 0.7266058105122755, "grad_norm": 422.0, "learning_rate": 1.8353605377496168e-05, "loss": 15.1878, "step": 17432 }, { "epoch": 0.7266474928098037, "grad_norm": 396.0, "learning_rate": 1.834837973060794e-05, "loss": 13.4379, "step": 17433 }, { "epoch": 0.7266891751073319, "grad_norm": 1088.0, "learning_rate": 1.8343154660573292e-05, "loss": 23.8787, "step": 17434 }, { "epoch": 0.7267308574048602, "grad_norm": 330.0, "learning_rate": 1.8337930167487465e-05, "loss": 14.5002, "step": 17435 }, { "epoch": 0.7267725397023884, "grad_norm": 432.0, "learning_rate": 1.833270625144566e-05, "loss": 17.1264, "step": 17436 }, { "epoch": 0.7268142219999166, "grad_norm": 166.0, "learning_rate": 1.832748291254312e-05, "loss": 9.8128, "step": 17437 }, { "epoch": 0.7268559042974448, "grad_norm": 332.0, "learning_rate": 1.8322260150874985e-05, "loss": 13.2503, "step": 17438 }, { "epoch": 0.7268975865949732, "grad_norm": 306.0, "learning_rate": 1.8317037966536505e-05, "loss": 13.6251, "step": 17439 }, { "epoch": 0.7269392688925014, "grad_norm": 868.0, "learning_rate": 1.8311816359622786e-05, "loss": 21.7544, "step": 17440 }, { "epoch": 0.7269809511900296, "grad_norm": 253.0, "learning_rate": 1.830659533022906e-05, "loss": 11.3131, "step": 17441 }, { "epoch": 0.7270226334875578, "grad_norm": 156.0, "learning_rate": 1.8301374878450415e-05, "loss": 7.063, "step": 17442 }, { "epoch": 0.7270643157850861, "grad_norm": 206.0, "learning_rate": 1.829615500438206e-05, "loss": 11.1254, "step": 17443 }, { "epoch": 0.7271059980826143, "grad_norm": 137.0, "learning_rate": 1.829093570811906e-05, "loss": 8.7503, "step": 17444 }, { "epoch": 0.7271476803801425, "grad_norm": 253.0, "learning_rate": 1.828571698975659e-05, "loss": 11.5628, "step": 17445 }, { "epoch": 0.7271893626776708, "grad_norm": 318.0, "learning_rate": 1.8280498849389742e-05, "loss": 13.1887, "step": 17446 }, { "epoch": 0.7272310449751991, "grad_norm": 76.5, "learning_rate": 1.827528128711362e-05, "loss": 9.001, "step": 17447 }, { "epoch": 0.7272727272727273, "grad_norm": 316.0, "learning_rate": 1.8270064303023314e-05, "loss": 14.0003, "step": 17448 }, { "epoch": 0.7273144095702555, "grad_norm": 544.0, "learning_rate": 1.82648478972139e-05, "loss": 17.3784, "step": 17449 }, { "epoch": 0.7273560918677837, "grad_norm": 328.0, "learning_rate": 1.8259632069780453e-05, "loss": 14.1881, "step": 17450 }, { "epoch": 0.727397774165312, "grad_norm": 47.0, "learning_rate": 1.8254416820818026e-05, "loss": 7.6568, "step": 17451 }, { "epoch": 0.7274394564628403, "grad_norm": 342.0, "learning_rate": 1.8249202150421674e-05, "loss": 12.0627, "step": 17452 }, { "epoch": 0.7274811387603685, "grad_norm": 358.0, "learning_rate": 1.8243988058686428e-05, "loss": 15.1877, "step": 17453 }, { "epoch": 0.7275228210578967, "grad_norm": 402.0, "learning_rate": 1.823877454570732e-05, "loss": 15.6252, "step": 17454 }, { "epoch": 0.727564503355425, "grad_norm": 60.75, "learning_rate": 1.8233561611579365e-05, "loss": 7.7817, "step": 17455 }, { "epoch": 0.7276061856529532, "grad_norm": 276.0, "learning_rate": 1.822834925639757e-05, "loss": 12.6877, "step": 17456 }, { "epoch": 0.7276478679504814, "grad_norm": 193.0, "learning_rate": 1.822313748025693e-05, "loss": 12.3133, "step": 17457 }, { "epoch": 0.7276895502480096, "grad_norm": 77.0, "learning_rate": 1.8217926283252434e-05, "loss": 9.938, "step": 17458 }, { "epoch": 0.727731232545538, "grad_norm": 244.0, "learning_rate": 1.8212715665479045e-05, "loss": 11.5629, "step": 17459 }, { "epoch": 0.7277729148430662, "grad_norm": 376.0, "learning_rate": 1.8207505627031723e-05, "loss": 15.3126, "step": 17460 }, { "epoch": 0.7278145971405944, "grad_norm": 245.0, "learning_rate": 1.8202296168005467e-05, "loss": 12.2505, "step": 17461 }, { "epoch": 0.7278562794381226, "grad_norm": 228.0, "learning_rate": 1.8197087288495153e-05, "loss": 11.4378, "step": 17462 }, { "epoch": 0.7278979617356509, "grad_norm": 888.0, "learning_rate": 1.819187898859578e-05, "loss": 24.8752, "step": 17463 }, { "epoch": 0.7279396440331791, "grad_norm": 260.0, "learning_rate": 1.81866712684022e-05, "loss": 12.7503, "step": 17464 }, { "epoch": 0.7279813263307073, "grad_norm": 320.0, "learning_rate": 1.8181464128009396e-05, "loss": 12.5025, "step": 17465 }, { "epoch": 0.7280230086282355, "grad_norm": 516.0, "learning_rate": 1.81762575675122e-05, "loss": 17.1253, "step": 17466 }, { "epoch": 0.7280646909257639, "grad_norm": 350.0, "learning_rate": 1.8171051587005568e-05, "loss": 14.6252, "step": 17467 }, { "epoch": 0.7281063732232921, "grad_norm": 67.5, "learning_rate": 1.8165846186584313e-05, "loss": 8.0016, "step": 17468 }, { "epoch": 0.7281480555208203, "grad_norm": 300.0, "learning_rate": 1.8160641366343368e-05, "loss": 13.9381, "step": 17469 }, { "epoch": 0.7281897378183485, "grad_norm": 424.0, "learning_rate": 1.815543712637753e-05, "loss": 15.6876, "step": 17470 }, { "epoch": 0.7282314201158768, "grad_norm": 187.0, "learning_rate": 1.815023346678169e-05, "loss": 11.2506, "step": 17471 }, { "epoch": 0.728273102413405, "grad_norm": 288.0, "learning_rate": 1.8145030387650675e-05, "loss": 13.8751, "step": 17472 }, { "epoch": 0.7283147847109332, "grad_norm": 133.0, "learning_rate": 1.813982788907931e-05, "loss": 6.0319, "step": 17473 }, { "epoch": 0.7283564670084615, "grad_norm": 416.0, "learning_rate": 1.813462597116241e-05, "loss": 14.7505, "step": 17474 }, { "epoch": 0.7283981493059898, "grad_norm": 700.0, "learning_rate": 1.8129424633994778e-05, "loss": 18.8794, "step": 17475 }, { "epoch": 0.728439831603518, "grad_norm": 209.0, "learning_rate": 1.8124223877671214e-05, "loss": 11.6879, "step": 17476 }, { "epoch": 0.7284815139010462, "grad_norm": 280.0, "learning_rate": 1.8119023702286496e-05, "loss": 12.5002, "step": 17477 }, { "epoch": 0.7285231961985744, "grad_norm": 368.0, "learning_rate": 1.8113824107935408e-05, "loss": 14.6877, "step": 17478 }, { "epoch": 0.7285648784961027, "grad_norm": 178.0, "learning_rate": 1.81086250947127e-05, "loss": 10.6878, "step": 17479 }, { "epoch": 0.728606560793631, "grad_norm": 229.0, "learning_rate": 1.8103426662713135e-05, "loss": 11.3753, "step": 17480 }, { "epoch": 0.7286482430911592, "grad_norm": 840.0, "learning_rate": 1.8098228812031447e-05, "loss": 20.0054, "step": 17481 }, { "epoch": 0.7286899253886874, "grad_norm": 280.0, "learning_rate": 1.8093031542762374e-05, "loss": 12.7505, "step": 17482 }, { "epoch": 0.7287316076862157, "grad_norm": 203.0, "learning_rate": 1.808783485500063e-05, "loss": 11.6254, "step": 17483 }, { "epoch": 0.7287732899837439, "grad_norm": 394.0, "learning_rate": 1.808263874884093e-05, "loss": 15.6255, "step": 17484 }, { "epoch": 0.7288149722812721, "grad_norm": 158.0, "learning_rate": 1.8077443224377976e-05, "loss": 9.4378, "step": 17485 }, { "epoch": 0.7288566545788003, "grad_norm": 280.0, "learning_rate": 1.8072248281706432e-05, "loss": 12.3132, "step": 17486 }, { "epoch": 0.7288983368763287, "grad_norm": 584.0, "learning_rate": 1.8067053920921027e-05, "loss": 19.1252, "step": 17487 }, { "epoch": 0.7289400191738569, "grad_norm": 71.0, "learning_rate": 1.806186014211638e-05, "loss": 8.1251, "step": 17488 }, { "epoch": 0.7289817014713851, "grad_norm": 416.0, "learning_rate": 1.8056666945387184e-05, "loss": 15.3128, "step": 17489 }, { "epoch": 0.7290233837689133, "grad_norm": 616.0, "learning_rate": 1.8051474330828044e-05, "loss": 18.6254, "step": 17490 }, { "epoch": 0.7290650660664416, "grad_norm": 243.0, "learning_rate": 1.804628229853365e-05, "loss": 12.2502, "step": 17491 }, { "epoch": 0.7291067483639698, "grad_norm": 346.0, "learning_rate": 1.8041090848598567e-05, "loss": 14.1878, "step": 17492 }, { "epoch": 0.729148430661498, "grad_norm": 45.0, "learning_rate": 1.8035899981117475e-05, "loss": 7.0316, "step": 17493 }, { "epoch": 0.7291901129590262, "grad_norm": 163.0, "learning_rate": 1.8030709696184912e-05, "loss": 8.6262, "step": 17494 }, { "epoch": 0.7292317952565546, "grad_norm": 430.0, "learning_rate": 1.802551999389551e-05, "loss": 15.6886, "step": 17495 }, { "epoch": 0.7292734775540828, "grad_norm": 169.0, "learning_rate": 1.8020330874343854e-05, "loss": 10.0629, "step": 17496 }, { "epoch": 0.729315159851611, "grad_norm": 684.0, "learning_rate": 1.80151423376245e-05, "loss": 20.7506, "step": 17497 }, { "epoch": 0.7293568421491392, "grad_norm": 1656.0, "learning_rate": 1.8009954383832024e-05, "loss": 39.7502, "step": 17498 }, { "epoch": 0.7293985244466675, "grad_norm": 137.0, "learning_rate": 1.8004767013060964e-05, "loss": 10.0007, "step": 17499 }, { "epoch": 0.7294402067441957, "grad_norm": 213.0, "learning_rate": 1.7999580225405867e-05, "loss": 8.814, "step": 17500 }, { "epoch": 0.729481889041724, "grad_norm": 406.0, "learning_rate": 1.799439402096126e-05, "loss": 15.938, "step": 17501 }, { "epoch": 0.7295235713392523, "grad_norm": 672.0, "learning_rate": 1.798920839982166e-05, "loss": 20.2524, "step": 17502 }, { "epoch": 0.7295652536367805, "grad_norm": 222.0, "learning_rate": 1.7984023362081583e-05, "loss": 8.5009, "step": 17503 }, { "epoch": 0.7296069359343087, "grad_norm": 676.0, "learning_rate": 1.7978838907835522e-05, "loss": 20.7505, "step": 17504 }, { "epoch": 0.7296486182318369, "grad_norm": 376.0, "learning_rate": 1.797365503717796e-05, "loss": 13.5012, "step": 17505 }, { "epoch": 0.7296903005293652, "grad_norm": 296.0, "learning_rate": 1.7968471750203386e-05, "loss": 12.8127, "step": 17506 }, { "epoch": 0.7297319828268934, "grad_norm": 280.0, "learning_rate": 1.7963289047006254e-05, "loss": 12.7503, "step": 17507 }, { "epoch": 0.7297736651244217, "grad_norm": 378.0, "learning_rate": 1.7958106927681024e-05, "loss": 15.3752, "step": 17508 }, { "epoch": 0.7298153474219499, "grad_norm": 564.0, "learning_rate": 1.7952925392322135e-05, "loss": 17.8751, "step": 17509 }, { "epoch": 0.7298570297194782, "grad_norm": 524.0, "learning_rate": 1.7947744441024012e-05, "loss": 18.5003, "step": 17510 }, { "epoch": 0.7298987120170064, "grad_norm": 342.0, "learning_rate": 1.794256407388113e-05, "loss": 13.8128, "step": 17511 }, { "epoch": 0.7299403943145346, "grad_norm": 552.0, "learning_rate": 1.7937384290987823e-05, "loss": 16.6255, "step": 17512 }, { "epoch": 0.7299820766120628, "grad_norm": 211.0, "learning_rate": 1.793220509243857e-05, "loss": 12.0002, "step": 17513 }, { "epoch": 0.7300237589095911, "grad_norm": 832.0, "learning_rate": 1.7927026478327692e-05, "loss": 24.3752, "step": 17514 }, { "epoch": 0.7300654412071194, "grad_norm": 692.0, "learning_rate": 1.792184844874964e-05, "loss": 18.1297, "step": 17515 }, { "epoch": 0.7301071235046476, "grad_norm": 428.0, "learning_rate": 1.7916671003798707e-05, "loss": 12.1254, "step": 17516 }, { "epoch": 0.7301488058021758, "grad_norm": 184.0, "learning_rate": 1.7911494143569337e-05, "loss": 10.4377, "step": 17517 }, { "epoch": 0.7301904880997041, "grad_norm": 147.0, "learning_rate": 1.7906317868155792e-05, "loss": 9.5003, "step": 17518 }, { "epoch": 0.7302321703972323, "grad_norm": 84.5, "learning_rate": 1.790114217765249e-05, "loss": 9.1252, "step": 17519 }, { "epoch": 0.7302738526947605, "grad_norm": 688.0, "learning_rate": 1.789596707215369e-05, "loss": 19.7502, "step": 17520 }, { "epoch": 0.7303155349922887, "grad_norm": 201.0, "learning_rate": 1.7890792551753755e-05, "loss": 10.8127, "step": 17521 }, { "epoch": 0.7303572172898171, "grad_norm": 390.0, "learning_rate": 1.7885618616546974e-05, "loss": 15.6266, "step": 17522 }, { "epoch": 0.7303988995873453, "grad_norm": 166.0, "learning_rate": 1.7880445266627648e-05, "loss": 11.4379, "step": 17523 }, { "epoch": 0.7304405818848735, "grad_norm": 177.0, "learning_rate": 1.7875272502090057e-05, "loss": 11.0002, "step": 17524 }, { "epoch": 0.7304822641824017, "grad_norm": 536.0, "learning_rate": 1.7870100323028476e-05, "loss": 16.3757, "step": 17525 }, { "epoch": 0.73052394647993, "grad_norm": 242.0, "learning_rate": 1.7864928729537167e-05, "loss": 11.313, "step": 17526 }, { "epoch": 0.7305656287774582, "grad_norm": 636.0, "learning_rate": 1.7859757721710384e-05, "loss": 20.0002, "step": 17527 }, { "epoch": 0.7306073110749864, "grad_norm": 1056.0, "learning_rate": 1.785458729964238e-05, "loss": 27.8758, "step": 17528 }, { "epoch": 0.7306489933725147, "grad_norm": 239.0, "learning_rate": 1.7849417463427364e-05, "loss": 13.6877, "step": 17529 }, { "epoch": 0.730690675670043, "grad_norm": 150.0, "learning_rate": 1.7844248213159577e-05, "loss": 11.1878, "step": 17530 }, { "epoch": 0.7307323579675712, "grad_norm": 636.0, "learning_rate": 1.7839079548933213e-05, "loss": 21.7502, "step": 17531 }, { "epoch": 0.7307740402650994, "grad_norm": 266.0, "learning_rate": 1.7833911470842483e-05, "loss": 13.1252, "step": 17532 }, { "epoch": 0.7308157225626276, "grad_norm": 410.0, "learning_rate": 1.782874397898157e-05, "loss": 17.6253, "step": 17533 }, { "epoch": 0.7308574048601559, "grad_norm": 464.0, "learning_rate": 1.782357707344466e-05, "loss": 19.7504, "step": 17534 }, { "epoch": 0.7308990871576841, "grad_norm": 1200.0, "learning_rate": 1.781841075432591e-05, "loss": 27.7549, "step": 17535 }, { "epoch": 0.7309407694552124, "grad_norm": 81.0, "learning_rate": 1.7813245021719467e-05, "loss": 6.2502, "step": 17536 }, { "epoch": 0.7309824517527406, "grad_norm": 524.0, "learning_rate": 1.780807987571952e-05, "loss": 15.8753, "step": 17537 }, { "epoch": 0.7310241340502689, "grad_norm": 584.0, "learning_rate": 1.7802915316420145e-05, "loss": 18.3753, "step": 17538 }, { "epoch": 0.7310658163477971, "grad_norm": 346.0, "learning_rate": 1.7797751343915526e-05, "loss": 14.5664, "step": 17539 }, { "epoch": 0.7311074986453253, "grad_norm": 560.0, "learning_rate": 1.7792587958299718e-05, "loss": 18.8753, "step": 17540 }, { "epoch": 0.7311491809428535, "grad_norm": 115.0, "learning_rate": 1.7787425159666886e-05, "loss": 10.5632, "step": 17541 }, { "epoch": 0.7311908632403818, "grad_norm": 424.0, "learning_rate": 1.7782262948111056e-05, "loss": 15.6881, "step": 17542 }, { "epoch": 0.7312325455379101, "grad_norm": 540.0, "learning_rate": 1.7777101323726376e-05, "loss": 18.2502, "step": 17543 }, { "epoch": 0.7312742278354383, "grad_norm": 87.5, "learning_rate": 1.7771940286606853e-05, "loss": 10.0001, "step": 17544 }, { "epoch": 0.7313159101329665, "grad_norm": 324.0, "learning_rate": 1.7766779836846597e-05, "loss": 12.5003, "step": 17545 }, { "epoch": 0.7313575924304948, "grad_norm": 458.0, "learning_rate": 1.776161997453964e-05, "loss": 14.7511, "step": 17546 }, { "epoch": 0.731399274728023, "grad_norm": 420.0, "learning_rate": 1.7756460699780016e-05, "loss": 16.3768, "step": 17547 }, { "epoch": 0.7314409570255512, "grad_norm": 284.0, "learning_rate": 1.7751302012661763e-05, "loss": 14.0005, "step": 17548 }, { "epoch": 0.7314826393230794, "grad_norm": 172.0, "learning_rate": 1.774614391327889e-05, "loss": 10.4378, "step": 17549 }, { "epoch": 0.7315243216206078, "grad_norm": 238.0, "learning_rate": 1.7740986401725413e-05, "loss": 13.0004, "step": 17550 }, { "epoch": 0.731566003918136, "grad_norm": 342.0, "learning_rate": 1.7735829478095316e-05, "loss": 15.063, "step": 17551 }, { "epoch": 0.7316076862156642, "grad_norm": 362.0, "learning_rate": 1.7730673142482596e-05, "loss": 14.3761, "step": 17552 }, { "epoch": 0.7316493685131924, "grad_norm": 392.0, "learning_rate": 1.7725517394981223e-05, "loss": 14.2503, "step": 17553 }, { "epoch": 0.7316910508107207, "grad_norm": 564.0, "learning_rate": 1.7720362235685156e-05, "loss": 19.6255, "step": 17554 }, { "epoch": 0.7317327331082489, "grad_norm": 86.5, "learning_rate": 1.7715207664688355e-05, "loss": 9.0003, "step": 17555 }, { "epoch": 0.7317744154057771, "grad_norm": 312.0, "learning_rate": 1.771005368208476e-05, "loss": 14.5004, "step": 17556 }, { "epoch": 0.7318160977033054, "grad_norm": 478.0, "learning_rate": 1.7704900287968302e-05, "loss": 17.2511, "step": 17557 }, { "epoch": 0.7318577800008337, "grad_norm": 544.0, "learning_rate": 1.7699747482432905e-05, "loss": 19.5007, "step": 17558 }, { "epoch": 0.7318994622983619, "grad_norm": 230.0, "learning_rate": 1.7694595265572477e-05, "loss": 11.6255, "step": 17559 }, { "epoch": 0.7319411445958901, "grad_norm": 131.0, "learning_rate": 1.76894436374809e-05, "loss": 9.2505, "step": 17560 }, { "epoch": 0.7319828268934183, "grad_norm": 248.0, "learning_rate": 1.7684292598252118e-05, "loss": 11.1255, "step": 17561 }, { "epoch": 0.7320245091909466, "grad_norm": 171.0, "learning_rate": 1.767914214797994e-05, "loss": 8.1254, "step": 17562 }, { "epoch": 0.7320661914884748, "grad_norm": 784.0, "learning_rate": 1.7673992286758297e-05, "loss": 19.1276, "step": 17563 }, { "epoch": 0.7321078737860031, "grad_norm": 138.0, "learning_rate": 1.7668843014680975e-05, "loss": 10.0005, "step": 17564 }, { "epoch": 0.7321495560835313, "grad_norm": 398.0, "learning_rate": 1.7663694331841896e-05, "loss": 13.0661, "step": 17565 }, { "epoch": 0.7321912383810596, "grad_norm": 356.0, "learning_rate": 1.7658546238334827e-05, "loss": 13.4377, "step": 17566 }, { "epoch": 0.7322329206785878, "grad_norm": 182.0, "learning_rate": 1.7653398734253655e-05, "loss": 9.5008, "step": 17567 }, { "epoch": 0.732274602976116, "grad_norm": 564.0, "learning_rate": 1.7648251819692134e-05, "loss": 18.5004, "step": 17568 }, { "epoch": 0.7323162852736442, "grad_norm": 752.0, "learning_rate": 1.7643105494744122e-05, "loss": 21.5001, "step": 17569 }, { "epoch": 0.7323579675711726, "grad_norm": 676.0, "learning_rate": 1.763795975950336e-05, "loss": 21.3756, "step": 17570 }, { "epoch": 0.7323996498687008, "grad_norm": 756.0, "learning_rate": 1.7632814614063664e-05, "loss": 23.7508, "step": 17571 }, { "epoch": 0.732441332166229, "grad_norm": 732.0, "learning_rate": 1.7627670058518798e-05, "loss": 20.5008, "step": 17572 }, { "epoch": 0.7324830144637572, "grad_norm": 205.0, "learning_rate": 1.7622526092962517e-05, "loss": 11.1253, "step": 17573 }, { "epoch": 0.7325246967612855, "grad_norm": 222.0, "learning_rate": 1.7617382717488575e-05, "loss": 11.0627, "step": 17574 }, { "epoch": 0.7325663790588137, "grad_norm": 100.5, "learning_rate": 1.76122399321907e-05, "loss": 7.7503, "step": 17575 }, { "epoch": 0.7326080613563419, "grad_norm": 418.0, "learning_rate": 1.7607097737162638e-05, "loss": 16.0008, "step": 17576 }, { "epoch": 0.7326497436538703, "grad_norm": 77.5, "learning_rate": 1.760195613249809e-05, "loss": 8.1254, "step": 17577 }, { "epoch": 0.7326914259513985, "grad_norm": 584.0, "learning_rate": 1.7596815118290765e-05, "loss": 18.7512, "step": 17578 }, { "epoch": 0.7327331082489267, "grad_norm": 516.0, "learning_rate": 1.7591674694634365e-05, "loss": 16.6253, "step": 17579 }, { "epoch": 0.7327747905464549, "grad_norm": 112.0, "learning_rate": 1.7586534861622566e-05, "loss": 9.6252, "step": 17580 }, { "epoch": 0.7328164728439832, "grad_norm": 224.0, "learning_rate": 1.758139561934905e-05, "loss": 10.8751, "step": 17581 }, { "epoch": 0.7328581551415114, "grad_norm": 249.0, "learning_rate": 1.757625696790748e-05, "loss": 12.5627, "step": 17582 }, { "epoch": 0.7328998374390396, "grad_norm": 292.0, "learning_rate": 1.7571118907391503e-05, "loss": 13.4377, "step": 17583 }, { "epoch": 0.7329415197365678, "grad_norm": 864.0, "learning_rate": 1.756598143789476e-05, "loss": 22.1254, "step": 17584 }, { "epoch": 0.7329832020340962, "grad_norm": 166.0, "learning_rate": 1.7560844559510886e-05, "loss": 10.1251, "step": 17585 }, { "epoch": 0.7330248843316244, "grad_norm": 247.0, "learning_rate": 1.7555708272333483e-05, "loss": 11.2502, "step": 17586 }, { "epoch": 0.7330665666291526, "grad_norm": 340.0, "learning_rate": 1.755057257645621e-05, "loss": 13.8753, "step": 17587 }, { "epoch": 0.7331082489266808, "grad_norm": 161.0, "learning_rate": 1.7545437471972597e-05, "loss": 10.6255, "step": 17588 }, { "epoch": 0.7331499312242091, "grad_norm": 352.0, "learning_rate": 1.7540302958976307e-05, "loss": 14.8149, "step": 17589 }, { "epoch": 0.7331916135217373, "grad_norm": 504.0, "learning_rate": 1.753516903756084e-05, "loss": 17.3752, "step": 17590 }, { "epoch": 0.7332332958192656, "grad_norm": 528.0, "learning_rate": 1.7530035707819832e-05, "loss": 17.1251, "step": 17591 }, { "epoch": 0.7332749781167938, "grad_norm": 214.0, "learning_rate": 1.7524902969846773e-05, "loss": 12.0627, "step": 17592 }, { "epoch": 0.7333166604143221, "grad_norm": 266.0, "learning_rate": 1.751977082373527e-05, "loss": 13.3134, "step": 17593 }, { "epoch": 0.7333583427118503, "grad_norm": 358.0, "learning_rate": 1.7514639269578796e-05, "loss": 15.3127, "step": 17594 }, { "epoch": 0.7334000250093785, "grad_norm": 186.0, "learning_rate": 1.750950830747094e-05, "loss": 10.3753, "step": 17595 }, { "epoch": 0.7334417073069067, "grad_norm": 1280.0, "learning_rate": 1.7504377937505145e-05, "loss": 33.5001, "step": 17596 }, { "epoch": 0.733483389604435, "grad_norm": 476.0, "learning_rate": 1.7499248159774966e-05, "loss": 15.3794, "step": 17597 }, { "epoch": 0.7335250719019633, "grad_norm": 162.0, "learning_rate": 1.7494118974373873e-05, "loss": 10.5627, "step": 17598 }, { "epoch": 0.7335667541994915, "grad_norm": 188.0, "learning_rate": 1.748899038139535e-05, "loss": 10.813, "step": 17599 }, { "epoch": 0.7336084364970197, "grad_norm": 264.0, "learning_rate": 1.7483862380932858e-05, "loss": 13.3767, "step": 17600 }, { "epoch": 0.733650118794548, "grad_norm": 976.0, "learning_rate": 1.7478734973079865e-05, "loss": 24.3752, "step": 17601 }, { "epoch": 0.7336918010920762, "grad_norm": 332.0, "learning_rate": 1.747360815792981e-05, "loss": 14.3752, "step": 17602 }, { "epoch": 0.7337334833896044, "grad_norm": 400.0, "learning_rate": 1.7468481935576136e-05, "loss": 15.2536, "step": 17603 }, { "epoch": 0.7337751656871326, "grad_norm": 252.0, "learning_rate": 1.7463356306112265e-05, "loss": 13.063, "step": 17604 }, { "epoch": 0.733816847984661, "grad_norm": 170.0, "learning_rate": 1.7458231269631614e-05, "loss": 11.8136, "step": 17605 }, { "epoch": 0.7338585302821892, "grad_norm": 100.0, "learning_rate": 1.745310682622759e-05, "loss": 7.5628, "step": 17606 }, { "epoch": 0.7339002125797174, "grad_norm": 320.0, "learning_rate": 1.7447982975993575e-05, "loss": 13.9378, "step": 17607 }, { "epoch": 0.7339418948772456, "grad_norm": 247.0, "learning_rate": 1.744285971902297e-05, "loss": 12.8129, "step": 17608 }, { "epoch": 0.7339835771747739, "grad_norm": 332.0, "learning_rate": 1.743773705540913e-05, "loss": 13.1883, "step": 17609 }, { "epoch": 0.7340252594723021, "grad_norm": 740.0, "learning_rate": 1.7432614985245425e-05, "loss": 23.6264, "step": 17610 }, { "epoch": 0.7340669417698303, "grad_norm": 108.0, "learning_rate": 1.742749350862521e-05, "loss": 8.938, "step": 17611 }, { "epoch": 0.7341086240673586, "grad_norm": 476.0, "learning_rate": 1.7422372625641792e-05, "loss": 14.6879, "step": 17612 }, { "epoch": 0.7341503063648869, "grad_norm": 344.0, "learning_rate": 1.7417252336388563e-05, "loss": 14.3759, "step": 17613 }, { "epoch": 0.7341919886624151, "grad_norm": 197.0, "learning_rate": 1.7412132640958765e-05, "loss": 12.6255, "step": 17614 }, { "epoch": 0.7342336709599433, "grad_norm": 676.0, "learning_rate": 1.7407013539445777e-05, "loss": 17.3793, "step": 17615 }, { "epoch": 0.7342753532574715, "grad_norm": 198.0, "learning_rate": 1.740189503194283e-05, "loss": 12.1878, "step": 17616 }, { "epoch": 0.7343170355549998, "grad_norm": 904.0, "learning_rate": 1.7396777118543274e-05, "loss": 22.0046, "step": 17617 }, { "epoch": 0.734358717852528, "grad_norm": 176.0, "learning_rate": 1.739165979934031e-05, "loss": 10.8751, "step": 17618 }, { "epoch": 0.7344004001500563, "grad_norm": 151.0, "learning_rate": 1.7386543074427282e-05, "loss": 9.8756, "step": 17619 }, { "epoch": 0.7344420824475845, "grad_norm": 856.0, "learning_rate": 1.7381426943897362e-05, "loss": 23.0004, "step": 17620 }, { "epoch": 0.7344837647451128, "grad_norm": 308.0, "learning_rate": 1.737631140784385e-05, "loss": 12.8755, "step": 17621 }, { "epoch": 0.734525447042641, "grad_norm": 207.0, "learning_rate": 1.7371196466359955e-05, "loss": 11.5676, "step": 17622 }, { "epoch": 0.7345671293401692, "grad_norm": 500.0, "learning_rate": 1.7366082119538907e-05, "loss": 17.3751, "step": 17623 }, { "epoch": 0.7346088116376974, "grad_norm": 398.0, "learning_rate": 1.7360968367473905e-05, "loss": 14.0003, "step": 17624 }, { "epoch": 0.7346504939352257, "grad_norm": 888.0, "learning_rate": 1.7355855210258153e-05, "loss": 22.2535, "step": 17625 }, { "epoch": 0.734692176232754, "grad_norm": 86.0, "learning_rate": 1.735074264798484e-05, "loss": 9.1253, "step": 17626 }, { "epoch": 0.7347338585302822, "grad_norm": 160.0, "learning_rate": 1.734563068074714e-05, "loss": 10.3751, "step": 17627 }, { "epoch": 0.7347755408278104, "grad_norm": 900.0, "learning_rate": 1.734051930863822e-05, "loss": 21.8755, "step": 17628 }, { "epoch": 0.7348172231253387, "grad_norm": 156.0, "learning_rate": 1.7335408531751233e-05, "loss": 9.9377, "step": 17629 }, { "epoch": 0.7348589054228669, "grad_norm": 258.0, "learning_rate": 1.7330298350179325e-05, "loss": 12.2545, "step": 17630 }, { "epoch": 0.7349005877203951, "grad_norm": 438.0, "learning_rate": 1.7325188764015633e-05, "loss": 16.2505, "step": 17631 }, { "epoch": 0.7349422700179233, "grad_norm": 414.0, "learning_rate": 1.7320079773353274e-05, "loss": 14.9409, "step": 17632 }, { "epoch": 0.7349839523154517, "grad_norm": 154.0, "learning_rate": 1.731497137828536e-05, "loss": 8.7502, "step": 17633 }, { "epoch": 0.7350256346129799, "grad_norm": 111.5, "learning_rate": 1.7309863578905e-05, "loss": 9.6254, "step": 17634 }, { "epoch": 0.7350673169105081, "grad_norm": 338.0, "learning_rate": 1.7304756375305276e-05, "loss": 15.1877, "step": 17635 }, { "epoch": 0.7351089992080363, "grad_norm": 179.0, "learning_rate": 1.729964976757925e-05, "loss": 11.6887, "step": 17636 }, { "epoch": 0.7351506815055646, "grad_norm": 488.0, "learning_rate": 1.7294543755820048e-05, "loss": 18.251, "step": 17637 }, { "epoch": 0.7351923638030928, "grad_norm": 540.0, "learning_rate": 1.728943834012065e-05, "loss": 18.1253, "step": 17638 }, { "epoch": 0.735234046100621, "grad_norm": 372.0, "learning_rate": 1.728433352057418e-05, "loss": 16.5001, "step": 17639 }, { "epoch": 0.7352757283981493, "grad_norm": 1264.0, "learning_rate": 1.7279229297273597e-05, "loss": 30.8753, "step": 17640 }, { "epoch": 0.7353174106956776, "grad_norm": 206.0, "learning_rate": 1.7274125670312e-05, "loss": 11.8127, "step": 17641 }, { "epoch": 0.7353590929932058, "grad_norm": 468.0, "learning_rate": 1.726902263978234e-05, "loss": 16.3752, "step": 17642 }, { "epoch": 0.735400775290734, "grad_norm": 560.0, "learning_rate": 1.726392020577768e-05, "loss": 18.7505, "step": 17643 }, { "epoch": 0.7354424575882622, "grad_norm": 470.0, "learning_rate": 1.7258818368390943e-05, "loss": 15.5627, "step": 17644 }, { "epoch": 0.7354841398857905, "grad_norm": 260.0, "learning_rate": 1.7253717127715184e-05, "loss": 11.3128, "step": 17645 }, { "epoch": 0.7355258221833187, "grad_norm": 524.0, "learning_rate": 1.72486164838433e-05, "loss": 18.2502, "step": 17646 }, { "epoch": 0.735567504480847, "grad_norm": 318.0, "learning_rate": 1.7243516436868314e-05, "loss": 13.6255, "step": 17647 }, { "epoch": 0.7356091867783753, "grad_norm": 176.0, "learning_rate": 1.7238416986883136e-05, "loss": 11.3127, "step": 17648 }, { "epoch": 0.7356508690759035, "grad_norm": 316.0, "learning_rate": 1.7233318133980726e-05, "loss": 13.5002, "step": 17649 }, { "epoch": 0.7356925513734317, "grad_norm": 438.0, "learning_rate": 1.7228219878254e-05, "loss": 14.688, "step": 17650 }, { "epoch": 0.7357342336709599, "grad_norm": 552.0, "learning_rate": 1.7223122219795873e-05, "loss": 18.5003, "step": 17651 }, { "epoch": 0.7357759159684882, "grad_norm": 380.0, "learning_rate": 1.7218025158699258e-05, "loss": 14.5012, "step": 17652 }, { "epoch": 0.7358175982660164, "grad_norm": 189.0, "learning_rate": 1.721292869505704e-05, "loss": 11.6878, "step": 17653 }, { "epoch": 0.7358592805635447, "grad_norm": 458.0, "learning_rate": 1.7207832828962105e-05, "loss": 16.8752, "step": 17654 }, { "epoch": 0.7359009628610729, "grad_norm": 656.0, "learning_rate": 1.7202737560507338e-05, "loss": 17.2541, "step": 17655 }, { "epoch": 0.7359426451586012, "grad_norm": 192.0, "learning_rate": 1.719764288978558e-05, "loss": 10.6252, "step": 17656 }, { "epoch": 0.7359843274561294, "grad_norm": 432.0, "learning_rate": 1.7192548816889697e-05, "loss": 14.6253, "step": 17657 }, { "epoch": 0.7360260097536576, "grad_norm": 158.0, "learning_rate": 1.718745534191252e-05, "loss": 10.4378, "step": 17658 }, { "epoch": 0.7360676920511858, "grad_norm": 656.0, "learning_rate": 1.7182362464946887e-05, "loss": 20.628, "step": 17659 }, { "epoch": 0.7361093743487142, "grad_norm": 1248.0, "learning_rate": 1.7177270186085614e-05, "loss": 26.7535, "step": 17660 }, { "epoch": 0.7361510566462424, "grad_norm": 1472.0, "learning_rate": 1.7172178505421498e-05, "loss": 28.5049, "step": 17661 }, { "epoch": 0.7361927389437706, "grad_norm": 428.0, "learning_rate": 1.7167087423047334e-05, "loss": 16.3753, "step": 17662 }, { "epoch": 0.7362344212412988, "grad_norm": 1440.0, "learning_rate": 1.7161996939055947e-05, "loss": 33.0019, "step": 17663 }, { "epoch": 0.7362761035388271, "grad_norm": 296.0, "learning_rate": 1.7156907053540045e-05, "loss": 14.8128, "step": 17664 }, { "epoch": 0.7363177858363553, "grad_norm": 366.0, "learning_rate": 1.7151817766592458e-05, "loss": 15.1253, "step": 17665 }, { "epoch": 0.7363594681338835, "grad_norm": 276.0, "learning_rate": 1.7146729078305884e-05, "loss": 12.4378, "step": 17666 }, { "epoch": 0.7364011504314117, "grad_norm": 88.5, "learning_rate": 1.7141640988773118e-05, "loss": 9.6881, "step": 17667 }, { "epoch": 0.7364428327289401, "grad_norm": 123.0, "learning_rate": 1.7136553498086828e-05, "loss": 8.1879, "step": 17668 }, { "epoch": 0.7364845150264683, "grad_norm": 208.0, "learning_rate": 1.7131466606339798e-05, "loss": 7.0006, "step": 17669 }, { "epoch": 0.7365261973239965, "grad_norm": 532.0, "learning_rate": 1.712638031362468e-05, "loss": 18.1253, "step": 17670 }, { "epoch": 0.7365678796215247, "grad_norm": 644.0, "learning_rate": 1.712129462003421e-05, "loss": 20.3755, "step": 17671 }, { "epoch": 0.736609561919053, "grad_norm": 416.0, "learning_rate": 1.711620952566107e-05, "loss": 15.6882, "step": 17672 }, { "epoch": 0.7366512442165812, "grad_norm": 360.0, "learning_rate": 1.7111125030597925e-05, "loss": 14.1876, "step": 17673 }, { "epoch": 0.7366929265141094, "grad_norm": 426.0, "learning_rate": 1.7106041134937452e-05, "loss": 15.9386, "step": 17674 }, { "epoch": 0.7367346088116377, "grad_norm": 1328.0, "learning_rate": 1.7100957838772292e-05, "loss": 25.6306, "step": 17675 }, { "epoch": 0.736776291109166, "grad_norm": 382.0, "learning_rate": 1.70958751421951e-05, "loss": 14.0002, "step": 17676 }, { "epoch": 0.7368179734066942, "grad_norm": 446.0, "learning_rate": 1.7090793045298502e-05, "loss": 15.6251, "step": 17677 }, { "epoch": 0.7368596557042224, "grad_norm": 284.0, "learning_rate": 1.7085711548175127e-05, "loss": 14.0002, "step": 17678 }, { "epoch": 0.7369013380017506, "grad_norm": 1176.0, "learning_rate": 1.708063065091758e-05, "loss": 28.8753, "step": 17679 }, { "epoch": 0.7369430202992789, "grad_norm": 268.0, "learning_rate": 1.7075550353618457e-05, "loss": 13.3129, "step": 17680 }, { "epoch": 0.7369847025968072, "grad_norm": 183.0, "learning_rate": 1.7070470656370357e-05, "loss": 12.0639, "step": 17681 }, { "epoch": 0.7370263848943354, "grad_norm": 284.0, "learning_rate": 1.7065391559265846e-05, "loss": 10.5003, "step": 17682 }, { "epoch": 0.7370680671918636, "grad_norm": 516.0, "learning_rate": 1.7060313062397504e-05, "loss": 18.3752, "step": 17683 }, { "epoch": 0.7371097494893919, "grad_norm": 264.0, "learning_rate": 1.705523516585788e-05, "loss": 13.8755, "step": 17684 }, { "epoch": 0.7371514317869201, "grad_norm": 1064.0, "learning_rate": 1.705015786973952e-05, "loss": 24.6323, "step": 17685 }, { "epoch": 0.7371931140844483, "grad_norm": 434.0, "learning_rate": 1.7045081174134936e-05, "loss": 14.3752, "step": 17686 }, { "epoch": 0.7372347963819765, "grad_norm": 482.0, "learning_rate": 1.7040005079136713e-05, "loss": 17.2504, "step": 17687 }, { "epoch": 0.7372764786795049, "grad_norm": 188.0, "learning_rate": 1.7034929584837284e-05, "loss": 11.6255, "step": 17688 }, { "epoch": 0.7373181609770331, "grad_norm": 288.0, "learning_rate": 1.702985469132922e-05, "loss": 12.0005, "step": 17689 }, { "epoch": 0.7373598432745613, "grad_norm": 194.0, "learning_rate": 1.702478039870496e-05, "loss": 11.5628, "step": 17690 }, { "epoch": 0.7374015255720895, "grad_norm": 318.0, "learning_rate": 1.701970670705703e-05, "loss": 14.7506, "step": 17691 }, { "epoch": 0.7374432078696178, "grad_norm": 1512.0, "learning_rate": 1.701463361647784e-05, "loss": 29.5041, "step": 17692 }, { "epoch": 0.737484890167146, "grad_norm": 134.0, "learning_rate": 1.7009561127059913e-05, "loss": 5.9693, "step": 17693 }, { "epoch": 0.7375265724646742, "grad_norm": 736.0, "learning_rate": 1.7004489238895634e-05, "loss": 20.7506, "step": 17694 }, { "epoch": 0.7375682547622024, "grad_norm": 458.0, "learning_rate": 1.69994179520775e-05, "loss": 15.4384, "step": 17695 }, { "epoch": 0.7376099370597308, "grad_norm": 392.0, "learning_rate": 1.6994347266697867e-05, "loss": 11.6887, "step": 17696 }, { "epoch": 0.737651619357259, "grad_norm": 536.0, "learning_rate": 1.6989277182849206e-05, "loss": 16.8756, "step": 17697 }, { "epoch": 0.7376933016547872, "grad_norm": 448.0, "learning_rate": 1.6984207700623893e-05, "loss": 15.7504, "step": 17698 }, { "epoch": 0.7377349839523154, "grad_norm": 248.0, "learning_rate": 1.697913882011433e-05, "loss": 12.8763, "step": 17699 }, { "epoch": 0.7377766662498437, "grad_norm": 672.0, "learning_rate": 1.697407054141289e-05, "loss": 21.3752, "step": 17700 }, { "epoch": 0.7378183485473719, "grad_norm": 596.0, "learning_rate": 1.696900286461195e-05, "loss": 17.7546, "step": 17701 }, { "epoch": 0.7378600308449001, "grad_norm": 716.0, "learning_rate": 1.6963935789803865e-05, "loss": 18.2504, "step": 17702 }, { "epoch": 0.7379017131424284, "grad_norm": 1056.0, "learning_rate": 1.6958869317080983e-05, "loss": 25.6254, "step": 17703 }, { "epoch": 0.7379433954399567, "grad_norm": 136.0, "learning_rate": 1.695380344653564e-05, "loss": 10.0629, "step": 17704 }, { "epoch": 0.7379850777374849, "grad_norm": 166.0, "learning_rate": 1.694873817826016e-05, "loss": 9.4378, "step": 17705 }, { "epoch": 0.7380267600350131, "grad_norm": 207.0, "learning_rate": 1.6943673512346863e-05, "loss": 11.3127, "step": 17706 }, { "epoch": 0.7380684423325413, "grad_norm": 428.0, "learning_rate": 1.693860944888805e-05, "loss": 14.5003, "step": 17707 }, { "epoch": 0.7381101246300696, "grad_norm": 728.0, "learning_rate": 1.6933545987976014e-05, "loss": 22.8756, "step": 17708 }, { "epoch": 0.7381518069275979, "grad_norm": 704.0, "learning_rate": 1.6928483129703037e-05, "loss": 20.0002, "step": 17709 }, { "epoch": 0.7381934892251261, "grad_norm": 684.0, "learning_rate": 1.6923420874161393e-05, "loss": 20.7518, "step": 17710 }, { "epoch": 0.7382351715226543, "grad_norm": 166.0, "learning_rate": 1.6918359221443342e-05, "loss": 7.6877, "step": 17711 }, { "epoch": 0.7382768538201826, "grad_norm": 241.0, "learning_rate": 1.691329817164111e-05, "loss": 12.0633, "step": 17712 }, { "epoch": 0.7383185361177108, "grad_norm": 84.5, "learning_rate": 1.6908237724846994e-05, "loss": 9.6258, "step": 17713 }, { "epoch": 0.738360218415239, "grad_norm": 292.0, "learning_rate": 1.6903177881153143e-05, "loss": 12.3757, "step": 17714 }, { "epoch": 0.7384019007127672, "grad_norm": 460.0, "learning_rate": 1.6898118640651854e-05, "loss": 15.876, "step": 17715 }, { "epoch": 0.7384435830102956, "grad_norm": 132.0, "learning_rate": 1.689306000343525e-05, "loss": 10.0628, "step": 17716 }, { "epoch": 0.7384852653078238, "grad_norm": 310.0, "learning_rate": 1.6888001969595607e-05, "loss": 13.6881, "step": 17717 }, { "epoch": 0.738526947605352, "grad_norm": 1912.0, "learning_rate": 1.688294453922503e-05, "loss": 37.5003, "step": 17718 }, { "epoch": 0.7385686299028802, "grad_norm": 168.0, "learning_rate": 1.6877887712415764e-05, "loss": 10.3755, "step": 17719 }, { "epoch": 0.7386103122004085, "grad_norm": 196.0, "learning_rate": 1.6872831489259905e-05, "loss": 10.1252, "step": 17720 }, { "epoch": 0.7386519944979367, "grad_norm": 360.0, "learning_rate": 1.6867775869849646e-05, "loss": 14.4386, "step": 17721 }, { "epoch": 0.7386936767954649, "grad_norm": 166.0, "learning_rate": 1.686272085427711e-05, "loss": 10.6253, "step": 17722 }, { "epoch": 0.7387353590929933, "grad_norm": 532.0, "learning_rate": 1.6857666442634427e-05, "loss": 17.5004, "step": 17723 }, { "epoch": 0.7387770413905215, "grad_norm": 94.5, "learning_rate": 1.685261263501372e-05, "loss": 9.2511, "step": 17724 }, { "epoch": 0.7388187236880497, "grad_norm": 1680.0, "learning_rate": 1.6847559431507093e-05, "loss": 31.3773, "step": 17725 }, { "epoch": 0.7388604059855779, "grad_norm": 304.0, "learning_rate": 1.6842506832206635e-05, "loss": 12.6877, "step": 17726 }, { "epoch": 0.7389020882831062, "grad_norm": 58.5, "learning_rate": 1.683745483720443e-05, "loss": 8.0627, "step": 17727 }, { "epoch": 0.7389437705806344, "grad_norm": 110.0, "learning_rate": 1.6832403446592558e-05, "loss": 9.8133, "step": 17728 }, { "epoch": 0.7389854528781626, "grad_norm": 476.0, "learning_rate": 1.6827352660463074e-05, "loss": 16.8751, "step": 17729 }, { "epoch": 0.7390271351756909, "grad_norm": 876.0, "learning_rate": 1.6822302478908036e-05, "loss": 23.1291, "step": 17730 }, { "epoch": 0.7390688174732192, "grad_norm": 210.0, "learning_rate": 1.681725290201948e-05, "loss": 12.1877, "step": 17731 }, { "epoch": 0.7391104997707474, "grad_norm": 360.0, "learning_rate": 1.6812203929889435e-05, "loss": 14.4379, "step": 17732 }, { "epoch": 0.7391521820682756, "grad_norm": 81.0, "learning_rate": 1.6807155562609923e-05, "loss": 8.0627, "step": 17733 }, { "epoch": 0.7391938643658038, "grad_norm": 516.0, "learning_rate": 1.680210780027295e-05, "loss": 17.7516, "step": 17734 }, { "epoch": 0.7392355466633321, "grad_norm": 488.0, "learning_rate": 1.6797060642970502e-05, "loss": 17.8751, "step": 17735 }, { "epoch": 0.7392772289608603, "grad_norm": 120.5, "learning_rate": 1.6792014090794572e-05, "loss": 8.188, "step": 17736 }, { "epoch": 0.7393189112583886, "grad_norm": 504.0, "learning_rate": 1.6786968143837134e-05, "loss": 17.2502, "step": 17737 }, { "epoch": 0.7393605935559168, "grad_norm": 282.0, "learning_rate": 1.6781922802190135e-05, "loss": 13.1879, "step": 17738 }, { "epoch": 0.7394022758534451, "grad_norm": 274.0, "learning_rate": 1.6776878065945572e-05, "loss": 12.5628, "step": 17739 }, { "epoch": 0.7394439581509733, "grad_norm": 494.0, "learning_rate": 1.6771833935195326e-05, "loss": 16.7508, "step": 17740 }, { "epoch": 0.7394856404485015, "grad_norm": 183.0, "learning_rate": 1.6766790410031387e-05, "loss": 6.4084, "step": 17741 }, { "epoch": 0.7395273227460297, "grad_norm": 156.0, "learning_rate": 1.676174749054561e-05, "loss": 9.188, "step": 17742 }, { "epoch": 0.739569005043558, "grad_norm": 444.0, "learning_rate": 1.6756705176829975e-05, "loss": 14.8137, "step": 17743 }, { "epoch": 0.7396106873410863, "grad_norm": 812.0, "learning_rate": 1.6751663468976298e-05, "loss": 24.8751, "step": 17744 }, { "epoch": 0.7396523696386145, "grad_norm": 318.0, "learning_rate": 1.674662236707654e-05, "loss": 13.5626, "step": 17745 }, { "epoch": 0.7396940519361427, "grad_norm": 217.0, "learning_rate": 1.674158187122251e-05, "loss": 11.0002, "step": 17746 }, { "epoch": 0.739735734233671, "grad_norm": 292.0, "learning_rate": 1.673654198150612e-05, "loss": 13.188, "step": 17747 }, { "epoch": 0.7397774165311992, "grad_norm": 354.0, "learning_rate": 1.6731502698019204e-05, "loss": 14.7501, "step": 17748 }, { "epoch": 0.7398190988287274, "grad_norm": 306.0, "learning_rate": 1.6726464020853604e-05, "loss": 12.9394, "step": 17749 }, { "epoch": 0.7398607811262556, "grad_norm": 1320.0, "learning_rate": 1.6721425950101154e-05, "loss": 24.7547, "step": 17750 }, { "epoch": 0.739902463423784, "grad_norm": 474.0, "learning_rate": 1.671638848585367e-05, "loss": 17.0003, "step": 17751 }, { "epoch": 0.7399441457213122, "grad_norm": 390.0, "learning_rate": 1.6711351628202954e-05, "loss": 15.9379, "step": 17752 }, { "epoch": 0.7399858280188404, "grad_norm": 320.0, "learning_rate": 1.6706315377240818e-05, "loss": 12.1882, "step": 17753 }, { "epoch": 0.7400275103163686, "grad_norm": 314.0, "learning_rate": 1.6701279733059035e-05, "loss": 13.3128, "step": 17754 }, { "epoch": 0.7400691926138969, "grad_norm": 314.0, "learning_rate": 1.6696244695749385e-05, "loss": 13.0003, "step": 17755 }, { "epoch": 0.7401108749114251, "grad_norm": 820.0, "learning_rate": 1.6691210265403633e-05, "loss": 21.8752, "step": 17756 }, { "epoch": 0.7401525572089533, "grad_norm": 77.5, "learning_rate": 1.6686176442113533e-05, "loss": 9.4377, "step": 17757 }, { "epoch": 0.7401942395064816, "grad_norm": 800.0, "learning_rate": 1.6681143225970826e-05, "loss": 21.5002, "step": 17758 }, { "epoch": 0.7402359218040099, "grad_norm": 644.0, "learning_rate": 1.6676110617067238e-05, "loss": 20.6256, "step": 17759 }, { "epoch": 0.7402776041015381, "grad_norm": 306.0, "learning_rate": 1.667107861549449e-05, "loss": 14.0627, "step": 17760 }, { "epoch": 0.7403192863990663, "grad_norm": 107.5, "learning_rate": 1.6666047221344293e-05, "loss": 9.8127, "step": 17761 }, { "epoch": 0.7403609686965945, "grad_norm": 238.0, "learning_rate": 1.6661016434708332e-05, "loss": 8.0629, "step": 17762 }, { "epoch": 0.7404026509941228, "grad_norm": 135.0, "learning_rate": 1.6655986255678335e-05, "loss": 9.3751, "step": 17763 }, { "epoch": 0.740444333291651, "grad_norm": 498.0, "learning_rate": 1.665095668434592e-05, "loss": 17.1253, "step": 17764 }, { "epoch": 0.7404860155891793, "grad_norm": 256.0, "learning_rate": 1.6645927720802812e-05, "loss": 12.7505, "step": 17765 }, { "epoch": 0.7405276978867075, "grad_norm": 147.0, "learning_rate": 1.66408993651406e-05, "loss": 10.3127, "step": 17766 }, { "epoch": 0.7405693801842358, "grad_norm": 454.0, "learning_rate": 1.6635871617450988e-05, "loss": 15.5628, "step": 17767 }, { "epoch": 0.740611062481764, "grad_norm": 438.0, "learning_rate": 1.6630844477825547e-05, "loss": 18.126, "step": 17768 }, { "epoch": 0.7406527447792922, "grad_norm": 233.0, "learning_rate": 1.662581794635596e-05, "loss": 11.4378, "step": 17769 }, { "epoch": 0.7406944270768204, "grad_norm": 556.0, "learning_rate": 1.6620792023133767e-05, "loss": 19.1252, "step": 17770 }, { "epoch": 0.7407361093743488, "grad_norm": 224.0, "learning_rate": 1.6615766708250642e-05, "loss": 10.063, "step": 17771 }, { "epoch": 0.740777791671877, "grad_norm": 306.0, "learning_rate": 1.6610742001798097e-05, "loss": 14.188, "step": 17772 }, { "epoch": 0.7408194739694052, "grad_norm": 105.0, "learning_rate": 1.6605717903867756e-05, "loss": 7.7505, "step": 17773 }, { "epoch": 0.7408611562669334, "grad_norm": 125.0, "learning_rate": 1.6600694414551177e-05, "loss": 8.3128, "step": 17774 }, { "epoch": 0.7409028385644617, "grad_norm": 182.0, "learning_rate": 1.65956715339399e-05, "loss": 8.0002, "step": 17775 }, { "epoch": 0.7409445208619899, "grad_norm": 470.0, "learning_rate": 1.659064926212548e-05, "loss": 16.8759, "step": 17776 }, { "epoch": 0.7409862031595181, "grad_norm": 240.0, "learning_rate": 1.6585627599199445e-05, "loss": 12.5635, "step": 17777 }, { "epoch": 0.7410278854570463, "grad_norm": 410.0, "learning_rate": 1.6580606545253307e-05, "loss": 15.8752, "step": 17778 }, { "epoch": 0.7410695677545747, "grad_norm": 274.0, "learning_rate": 1.657558610037858e-05, "loss": 11.3148, "step": 17779 }, { "epoch": 0.7411112500521029, "grad_norm": 430.0, "learning_rate": 1.657056626466677e-05, "loss": 15.8127, "step": 17780 }, { "epoch": 0.7411529323496311, "grad_norm": 157.0, "learning_rate": 1.6565547038209356e-05, "loss": 10.0004, "step": 17781 }, { "epoch": 0.7411946146471593, "grad_norm": 408.0, "learning_rate": 1.6560528421097813e-05, "loss": 15.626, "step": 17782 }, { "epoch": 0.7412362969446876, "grad_norm": 100.5, "learning_rate": 1.6555510413423614e-05, "loss": 9.6258, "step": 17783 }, { "epoch": 0.7412779792422158, "grad_norm": 390.0, "learning_rate": 1.6550493015278202e-05, "loss": 14.3127, "step": 17784 }, { "epoch": 0.741319661539744, "grad_norm": 338.0, "learning_rate": 1.654547622675302e-05, "loss": 15.5627, "step": 17785 }, { "epoch": 0.7413613438372723, "grad_norm": 105.0, "learning_rate": 1.654046004793951e-05, "loss": 9.1892, "step": 17786 }, { "epoch": 0.7414030261348006, "grad_norm": 1464.0, "learning_rate": 1.6535444478929086e-05, "loss": 28.6306, "step": 17787 }, { "epoch": 0.7414447084323288, "grad_norm": 170.0, "learning_rate": 1.6530429519813135e-05, "loss": 8.1878, "step": 17788 }, { "epoch": 0.741486390729857, "grad_norm": 208.0, "learning_rate": 1.6525415170683116e-05, "loss": 11.7579, "step": 17789 }, { "epoch": 0.7415280730273852, "grad_norm": 520.0, "learning_rate": 1.652040143163034e-05, "loss": 17.6251, "step": 17790 }, { "epoch": 0.7415697553249135, "grad_norm": 506.0, "learning_rate": 1.6515388302746253e-05, "loss": 16.3753, "step": 17791 }, { "epoch": 0.7416114376224417, "grad_norm": 350.0, "learning_rate": 1.6510375784122155e-05, "loss": 13.6878, "step": 17792 }, { "epoch": 0.74165311991997, "grad_norm": 512.0, "learning_rate": 1.6505363875849467e-05, "loss": 16.3757, "step": 17793 }, { "epoch": 0.7416948022174983, "grad_norm": 440.0, "learning_rate": 1.650035257801946e-05, "loss": 16.1252, "step": 17794 }, { "epoch": 0.7417364845150265, "grad_norm": 764.0, "learning_rate": 1.6495341890723537e-05, "loss": 22.2503, "step": 17795 }, { "epoch": 0.7417781668125547, "grad_norm": 192.0, "learning_rate": 1.6490331814052945e-05, "loss": 6.7821, "step": 17796 }, { "epoch": 0.7418198491100829, "grad_norm": 588.0, "learning_rate": 1.6485322348099052e-05, "loss": 18.5002, "step": 17797 }, { "epoch": 0.7418615314076112, "grad_norm": 171.0, "learning_rate": 1.6480313492953132e-05, "loss": 9.8128, "step": 17798 }, { "epoch": 0.7419032137051395, "grad_norm": 824.0, "learning_rate": 1.6475305248706473e-05, "loss": 22.8763, "step": 17799 }, { "epoch": 0.7419448960026677, "grad_norm": 1624.0, "learning_rate": 1.6470297615450354e-05, "loss": 34.7507, "step": 17800 }, { "epoch": 0.7419865783001959, "grad_norm": 320.0, "learning_rate": 1.6465290593276034e-05, "loss": 14.6881, "step": 17801 }, { "epoch": 0.7420282605977242, "grad_norm": 167.0, "learning_rate": 1.646028418227477e-05, "loss": 10.0004, "step": 17802 }, { "epoch": 0.7420699428952524, "grad_norm": 984.0, "learning_rate": 1.645527838253781e-05, "loss": 21.506, "step": 17803 }, { "epoch": 0.7421116251927806, "grad_norm": 96.0, "learning_rate": 1.6450273194156374e-05, "loss": 9.3753, "step": 17804 }, { "epoch": 0.7421533074903088, "grad_norm": 332.0, "learning_rate": 1.6445268617221686e-05, "loss": 12.5631, "step": 17805 }, { "epoch": 0.7421949897878372, "grad_norm": 284.0, "learning_rate": 1.644026465182496e-05, "loss": 11.0627, "step": 17806 }, { "epoch": 0.7422366720853654, "grad_norm": 87.0, "learning_rate": 1.643526129805739e-05, "loss": 8.8753, "step": 17807 }, { "epoch": 0.7422783543828936, "grad_norm": 94.5, "learning_rate": 1.6430258556010163e-05, "loss": 8.1254, "step": 17808 }, { "epoch": 0.7423200366804218, "grad_norm": 246.0, "learning_rate": 1.642525642577445e-05, "loss": 11.1879, "step": 17809 }, { "epoch": 0.7423617189779501, "grad_norm": 312.0, "learning_rate": 1.6420254907441423e-05, "loss": 14.1253, "step": 17810 }, { "epoch": 0.7424034012754783, "grad_norm": 121.0, "learning_rate": 1.6415254001102233e-05, "loss": 11.0005, "step": 17811 }, { "epoch": 0.7424450835730065, "grad_norm": 764.0, "learning_rate": 1.6410253706847994e-05, "loss": 21.8758, "step": 17812 }, { "epoch": 0.7424867658705347, "grad_norm": 209.0, "learning_rate": 1.6405254024769908e-05, "loss": 12.6881, "step": 17813 }, { "epoch": 0.7425284481680631, "grad_norm": 406.0, "learning_rate": 1.640025495495901e-05, "loss": 16.6253, "step": 17814 }, { "epoch": 0.7425701304655913, "grad_norm": 632.0, "learning_rate": 1.6395256497506477e-05, "loss": 20.6252, "step": 17815 }, { "epoch": 0.7426118127631195, "grad_norm": 458.0, "learning_rate": 1.6390258652503354e-05, "loss": 17.2502, "step": 17816 }, { "epoch": 0.7426534950606477, "grad_norm": 456.0, "learning_rate": 1.6385261420040776e-05, "loss": 16.2502, "step": 17817 }, { "epoch": 0.742695177358176, "grad_norm": 520.0, "learning_rate": 1.6380264800209767e-05, "loss": 17.3763, "step": 17818 }, { "epoch": 0.7427368596557042, "grad_norm": 572.0, "learning_rate": 1.6375268793101446e-05, "loss": 18.6252, "step": 17819 }, { "epoch": 0.7427785419532325, "grad_norm": 232.0, "learning_rate": 1.6370273398806802e-05, "loss": 11.6253, "step": 17820 }, { "epoch": 0.7428202242507607, "grad_norm": 456.0, "learning_rate": 1.6365278617416946e-05, "loss": 16.7503, "step": 17821 }, { "epoch": 0.742861906548289, "grad_norm": 388.0, "learning_rate": 1.6360284449022837e-05, "loss": 14.6293, "step": 17822 }, { "epoch": 0.7429035888458172, "grad_norm": 89.5, "learning_rate": 1.6355290893715547e-05, "loss": 9.1255, "step": 17823 }, { "epoch": 0.7429452711433454, "grad_norm": 396.0, "learning_rate": 1.635029795158607e-05, "loss": 16.0003, "step": 17824 }, { "epoch": 0.7429869534408736, "grad_norm": 392.0, "learning_rate": 1.63453056227254e-05, "loss": 15.6252, "step": 17825 }, { "epoch": 0.7430286357384019, "grad_norm": 564.0, "learning_rate": 1.634031390722452e-05, "loss": 18.1251, "step": 17826 }, { "epoch": 0.7430703180359302, "grad_norm": 278.0, "learning_rate": 1.6335322805174408e-05, "loss": 8.9377, "step": 17827 }, { "epoch": 0.7431120003334584, "grad_norm": 516.0, "learning_rate": 1.6330332316666025e-05, "loss": 18.2523, "step": 17828 }, { "epoch": 0.7431536826309866, "grad_norm": 932.0, "learning_rate": 1.6325342441790324e-05, "loss": 27.7524, "step": 17829 }, { "epoch": 0.7431953649285149, "grad_norm": 119.0, "learning_rate": 1.6320353180638255e-05, "loss": 9.3133, "step": 17830 }, { "epoch": 0.7432370472260431, "grad_norm": 352.0, "learning_rate": 1.631536453330073e-05, "loss": 13.8758, "step": 17831 }, { "epoch": 0.7432787295235713, "grad_norm": 241.0, "learning_rate": 1.631037649986868e-05, "loss": 12.1877, "step": 17832 }, { "epoch": 0.7433204118210995, "grad_norm": 278.0, "learning_rate": 1.630538908043301e-05, "loss": 12.0627, "step": 17833 }, { "epoch": 0.7433620941186279, "grad_norm": 109.0, "learning_rate": 1.6300402275084615e-05, "loss": 8.1881, "step": 17834 }, { "epoch": 0.7434037764161561, "grad_norm": 1608.0, "learning_rate": 1.629541608391438e-05, "loss": 33.2545, "step": 17835 }, { "epoch": 0.7434454587136843, "grad_norm": 114.0, "learning_rate": 1.629043050701317e-05, "loss": 8.0627, "step": 17836 }, { "epoch": 0.7434871410112125, "grad_norm": 173.0, "learning_rate": 1.6285445544471865e-05, "loss": 9.3752, "step": 17837 }, { "epoch": 0.7435288233087408, "grad_norm": 235.0, "learning_rate": 1.628046119638129e-05, "loss": 11.8127, "step": 17838 }, { "epoch": 0.743570505606269, "grad_norm": 262.0, "learning_rate": 1.627547746283234e-05, "loss": 12.4378, "step": 17839 }, { "epoch": 0.7436121879037972, "grad_norm": 95.5, "learning_rate": 1.627049434391577e-05, "loss": 6.7206, "step": 17840 }, { "epoch": 0.7436538702013255, "grad_norm": 149.0, "learning_rate": 1.626551183972247e-05, "loss": 8.4379, "step": 17841 }, { "epoch": 0.7436955524988538, "grad_norm": 37.5, "learning_rate": 1.6260529950343175e-05, "loss": 6.4064, "step": 17842 }, { "epoch": 0.743737234796382, "grad_norm": 228.0, "learning_rate": 1.6255548675868753e-05, "loss": 11.2504, "step": 17843 }, { "epoch": 0.7437789170939102, "grad_norm": 444.0, "learning_rate": 1.6250568016389917e-05, "loss": 16.501, "step": 17844 }, { "epoch": 0.7438205993914384, "grad_norm": 122.0, "learning_rate": 1.6245587971997507e-05, "loss": 10.0627, "step": 17845 }, { "epoch": 0.7438622816889667, "grad_norm": 454.0, "learning_rate": 1.624060854278222e-05, "loss": 15.2519, "step": 17846 }, { "epoch": 0.7439039639864949, "grad_norm": 652.0, "learning_rate": 1.6235629728834856e-05, "loss": 19.7514, "step": 17847 }, { "epoch": 0.7439456462840232, "grad_norm": 216.0, "learning_rate": 1.6230651530246145e-05, "loss": 10.9379, "step": 17848 }, { "epoch": 0.7439873285815514, "grad_norm": 145.0, "learning_rate": 1.6225673947106796e-05, "loss": 10.0627, "step": 17849 }, { "epoch": 0.7440290108790797, "grad_norm": 344.0, "learning_rate": 1.6220696979507543e-05, "loss": 12.3154, "step": 17850 }, { "epoch": 0.7440706931766079, "grad_norm": 185.0, "learning_rate": 1.621572062753909e-05, "loss": 11.1251, "step": 17851 }, { "epoch": 0.7441123754741361, "grad_norm": 528.0, "learning_rate": 1.6210744891292117e-05, "loss": 16.8752, "step": 17852 }, { "epoch": 0.7441540577716643, "grad_norm": 210.0, "learning_rate": 1.6205769770857328e-05, "loss": 11.3752, "step": 17853 }, { "epoch": 0.7441957400691926, "grad_norm": 204.0, "learning_rate": 1.620079526632538e-05, "loss": 10.6254, "step": 17854 }, { "epoch": 0.7442374223667209, "grad_norm": 1544.0, "learning_rate": 1.619582137778694e-05, "loss": 29.005, "step": 17855 }, { "epoch": 0.7442791046642491, "grad_norm": 388.0, "learning_rate": 1.6190848105332656e-05, "loss": 15.1879, "step": 17856 }, { "epoch": 0.7443207869617773, "grad_norm": 228.0, "learning_rate": 1.6185875449053167e-05, "loss": 11.7502, "step": 17857 }, { "epoch": 0.7443624692593056, "grad_norm": 131.0, "learning_rate": 1.61809034090391e-05, "loss": 10.9379, "step": 17858 }, { "epoch": 0.7444041515568338, "grad_norm": 236.0, "learning_rate": 1.617593198538107e-05, "loss": 12.0637, "step": 17859 }, { "epoch": 0.744445833854362, "grad_norm": 1136.0, "learning_rate": 1.6170961178169686e-05, "loss": 25.2526, "step": 17860 }, { "epoch": 0.7444875161518902, "grad_norm": 328.0, "learning_rate": 1.6165990987495533e-05, "loss": 13.8755, "step": 17861 }, { "epoch": 0.7445291984494186, "grad_norm": 2336.0, "learning_rate": 1.6161021413449196e-05, "loss": 53.002, "step": 17862 }, { "epoch": 0.7445708807469468, "grad_norm": 206.0, "learning_rate": 1.615605245612125e-05, "loss": 7.9387, "step": 17863 }, { "epoch": 0.744612563044475, "grad_norm": 308.0, "learning_rate": 1.6151084115602238e-05, "loss": 11.6251, "step": 17864 }, { "epoch": 0.7446542453420032, "grad_norm": 76.5, "learning_rate": 1.6146116391982757e-05, "loss": 9.1252, "step": 17865 }, { "epoch": 0.7446959276395315, "grad_norm": 256.0, "learning_rate": 1.6141149285353275e-05, "loss": 11.5632, "step": 17866 }, { "epoch": 0.7447376099370597, "grad_norm": 254.0, "learning_rate": 1.613618279580438e-05, "loss": 12.8756, "step": 17867 }, { "epoch": 0.7447792922345879, "grad_norm": 366.0, "learning_rate": 1.6131216923426533e-05, "loss": 15.7523, "step": 17868 }, { "epoch": 0.7448209745321163, "grad_norm": 185.0, "learning_rate": 1.61262516683103e-05, "loss": 10.3131, "step": 17869 }, { "epoch": 0.7448626568296445, "grad_norm": 225.0, "learning_rate": 1.61212870305461e-05, "loss": 12.5629, "step": 17870 }, { "epoch": 0.7449043391271727, "grad_norm": 912.0, "learning_rate": 1.6116323010224487e-05, "loss": 23.8752, "step": 17871 }, { "epoch": 0.7449460214247009, "grad_norm": 130.0, "learning_rate": 1.6111359607435862e-05, "loss": 9.5002, "step": 17872 }, { "epoch": 0.7449877037222292, "grad_norm": 408.0, "learning_rate": 1.6106396822270726e-05, "loss": 15.5633, "step": 17873 }, { "epoch": 0.7450293860197574, "grad_norm": 221.0, "learning_rate": 1.6101434654819526e-05, "loss": 11.1878, "step": 17874 }, { "epoch": 0.7450710683172856, "grad_norm": 492.0, "learning_rate": 1.6096473105172683e-05, "loss": 17.0002, "step": 17875 }, { "epoch": 0.7451127506148139, "grad_norm": 1456.0, "learning_rate": 1.609151217342063e-05, "loss": 29.5037, "step": 17876 }, { "epoch": 0.7451544329123422, "grad_norm": 294.0, "learning_rate": 1.6086551859653774e-05, "loss": 10.9379, "step": 17877 }, { "epoch": 0.7451961152098704, "grad_norm": 446.0, "learning_rate": 1.608159216396253e-05, "loss": 14.3779, "step": 17878 }, { "epoch": 0.7452377975073986, "grad_norm": 716.0, "learning_rate": 1.607663308643727e-05, "loss": 21.7506, "step": 17879 }, { "epoch": 0.7452794798049268, "grad_norm": 247.0, "learning_rate": 1.6071674627168388e-05, "loss": 13.7508, "step": 17880 }, { "epoch": 0.7453211621024551, "grad_norm": 202.0, "learning_rate": 1.6066716786246245e-05, "loss": 12.3753, "step": 17881 }, { "epoch": 0.7453628443999833, "grad_norm": 684.0, "learning_rate": 1.6061759563761203e-05, "loss": 20.7502, "step": 17882 }, { "epoch": 0.7454045266975116, "grad_norm": 180.0, "learning_rate": 1.6056802959803608e-05, "loss": 11.5003, "step": 17883 }, { "epoch": 0.7454462089950398, "grad_norm": 167.0, "learning_rate": 1.6051846974463786e-05, "loss": 9.8755, "step": 17884 }, { "epoch": 0.7454878912925681, "grad_norm": 440.0, "learning_rate": 1.6046891607832072e-05, "loss": 17.0002, "step": 17885 }, { "epoch": 0.7455295735900963, "grad_norm": 472.0, "learning_rate": 1.604193685999877e-05, "loss": 15.8757, "step": 17886 }, { "epoch": 0.7455712558876245, "grad_norm": 490.0, "learning_rate": 1.6036982731054184e-05, "loss": 17.1252, "step": 17887 }, { "epoch": 0.7456129381851527, "grad_norm": 147.0, "learning_rate": 1.6032029221088584e-05, "loss": 9.1888, "step": 17888 }, { "epoch": 0.745654620482681, "grad_norm": 880.0, "learning_rate": 1.6027076330192303e-05, "loss": 24.2511, "step": 17889 }, { "epoch": 0.7456963027802093, "grad_norm": 490.0, "learning_rate": 1.602212405845554e-05, "loss": 16.7502, "step": 17890 }, { "epoch": 0.7457379850777375, "grad_norm": 1304.0, "learning_rate": 1.601717240596861e-05, "loss": 33.5003, "step": 17891 }, { "epoch": 0.7457796673752657, "grad_norm": 260.0, "learning_rate": 1.6012221372821707e-05, "loss": 13.0629, "step": 17892 }, { "epoch": 0.745821349672794, "grad_norm": 256.0, "learning_rate": 1.600727095910511e-05, "loss": 12.8128, "step": 17893 }, { "epoch": 0.7458630319703222, "grad_norm": 392.0, "learning_rate": 1.6002321164908985e-05, "loss": 15.1256, "step": 17894 }, { "epoch": 0.7459047142678504, "grad_norm": 119.0, "learning_rate": 1.599737199032361e-05, "loss": 4.6565, "step": 17895 }, { "epoch": 0.7459463965653786, "grad_norm": 221.0, "learning_rate": 1.599242343543912e-05, "loss": 12.6252, "step": 17896 }, { "epoch": 0.745988078862907, "grad_norm": 209.0, "learning_rate": 1.5987475500345754e-05, "loss": 11.8127, "step": 17897 }, { "epoch": 0.7460297611604352, "grad_norm": 398.0, "learning_rate": 1.5982528185133645e-05, "loss": 13.5047, "step": 17898 }, { "epoch": 0.7460714434579634, "grad_norm": 416.0, "learning_rate": 1.597758148989299e-05, "loss": 16.7516, "step": 17899 }, { "epoch": 0.7461131257554916, "grad_norm": 222.0, "learning_rate": 1.5972635414713934e-05, "loss": 11.9378, "step": 17900 }, { "epoch": 0.7461548080530199, "grad_norm": 292.0, "learning_rate": 1.596768995968662e-05, "loss": 14.1877, "step": 17901 }, { "epoch": 0.7461964903505481, "grad_norm": 676.0, "learning_rate": 1.5962745124901173e-05, "loss": 22.5003, "step": 17902 }, { "epoch": 0.7462381726480763, "grad_norm": 204.0, "learning_rate": 1.5957800910447724e-05, "loss": 12.8755, "step": 17903 }, { "epoch": 0.7462798549456046, "grad_norm": 416.0, "learning_rate": 1.595285731641637e-05, "loss": 15.7503, "step": 17904 }, { "epoch": 0.7463215372431329, "grad_norm": 174.0, "learning_rate": 1.5947914342897215e-05, "loss": 10.6887, "step": 17905 }, { "epoch": 0.7463632195406611, "grad_norm": 330.0, "learning_rate": 1.5942971989980342e-05, "loss": 13.2501, "step": 17906 }, { "epoch": 0.7464049018381893, "grad_norm": 174.0, "learning_rate": 1.593803025775583e-05, "loss": 11.9376, "step": 17907 }, { "epoch": 0.7464465841357175, "grad_norm": 298.0, "learning_rate": 1.5933089146313745e-05, "loss": 14.0002, "step": 17908 }, { "epoch": 0.7464882664332458, "grad_norm": 1176.0, "learning_rate": 1.592814865574413e-05, "loss": 30.0048, "step": 17909 }, { "epoch": 0.746529948730774, "grad_norm": 340.0, "learning_rate": 1.5923208786137027e-05, "loss": 15.6254, "step": 17910 }, { "epoch": 0.7465716310283023, "grad_norm": 352.0, "learning_rate": 1.5918269537582475e-05, "loss": 14.9392, "step": 17911 }, { "epoch": 0.7466133133258305, "grad_norm": 179.0, "learning_rate": 1.5913330910170486e-05, "loss": 10.6878, "step": 17912 }, { "epoch": 0.7466549956233588, "grad_norm": 284.0, "learning_rate": 1.5908392903991064e-05, "loss": 12.6253, "step": 17913 }, { "epoch": 0.746696677920887, "grad_norm": 480.0, "learning_rate": 1.5903455519134192e-05, "loss": 16.6251, "step": 17914 }, { "epoch": 0.7467383602184152, "grad_norm": 732.0, "learning_rate": 1.5898518755689907e-05, "loss": 22.1252, "step": 17915 }, { "epoch": 0.7467800425159434, "grad_norm": 177.0, "learning_rate": 1.5893582613748104e-05, "loss": 11.3131, "step": 17916 }, { "epoch": 0.7468217248134718, "grad_norm": 236.0, "learning_rate": 1.5888647093398824e-05, "loss": 12.2505, "step": 17917 }, { "epoch": 0.746863407111, "grad_norm": 406.0, "learning_rate": 1.5883712194731947e-05, "loss": 16.0004, "step": 17918 }, { "epoch": 0.7469050894085282, "grad_norm": 348.0, "learning_rate": 1.5878777917837473e-05, "loss": 15.3753, "step": 17919 }, { "epoch": 0.7469467717060564, "grad_norm": 204.0, "learning_rate": 1.587384426280527e-05, "loss": 11.0003, "step": 17920 }, { "epoch": 0.7469884540035847, "grad_norm": 484.0, "learning_rate": 1.5868911229725313e-05, "loss": 17.6261, "step": 17921 }, { "epoch": 0.7470301363011129, "grad_norm": 568.0, "learning_rate": 1.5863978818687453e-05, "loss": 19.6251, "step": 17922 }, { "epoch": 0.7470718185986411, "grad_norm": 102.0, "learning_rate": 1.585904702978162e-05, "loss": 10.5004, "step": 17923 }, { "epoch": 0.7471135008961693, "grad_norm": 328.0, "learning_rate": 1.5854115863097692e-05, "loss": 14.8753, "step": 17924 }, { "epoch": 0.7471551831936977, "grad_norm": 1448.0, "learning_rate": 1.5849185318725528e-05, "loss": 31.6261, "step": 17925 }, { "epoch": 0.7471968654912259, "grad_norm": 128.0, "learning_rate": 1.5844255396754993e-05, "loss": 9.1879, "step": 17926 }, { "epoch": 0.7472385477887541, "grad_norm": 524.0, "learning_rate": 1.5839326097275937e-05, "loss": 18.0021, "step": 17927 }, { "epoch": 0.7472802300862823, "grad_norm": 314.0, "learning_rate": 1.58343974203782e-05, "loss": 13.5003, "step": 17928 }, { "epoch": 0.7473219123838106, "grad_norm": 496.0, "learning_rate": 1.5829469366151594e-05, "loss": 16.7502, "step": 17929 }, { "epoch": 0.7473635946813388, "grad_norm": 652.0, "learning_rate": 1.582454193468595e-05, "loss": 21.2503, "step": 17930 }, { "epoch": 0.747405276978867, "grad_norm": 111.5, "learning_rate": 1.5819615126071057e-05, "loss": 6.6257, "step": 17931 }, { "epoch": 0.7474469592763953, "grad_norm": 644.0, "learning_rate": 1.5814688940396717e-05, "loss": 18.5005, "step": 17932 }, { "epoch": 0.7474886415739236, "grad_norm": 620.0, "learning_rate": 1.5809763377752708e-05, "loss": 19.1277, "step": 17933 }, { "epoch": 0.7475303238714518, "grad_norm": 516.0, "learning_rate": 1.580483843822879e-05, "loss": 17.2509, "step": 17934 }, { "epoch": 0.74757200616898, "grad_norm": 310.0, "learning_rate": 1.5799914121914732e-05, "loss": 14.3133, "step": 17935 }, { "epoch": 0.7476136884665082, "grad_norm": 494.0, "learning_rate": 1.579499042890027e-05, "loss": 16.2503, "step": 17936 }, { "epoch": 0.7476553707640365, "grad_norm": 302.0, "learning_rate": 1.579006735927515e-05, "loss": 14.3756, "step": 17937 }, { "epoch": 0.7476970530615648, "grad_norm": 636.0, "learning_rate": 1.578514491312907e-05, "loss": 19.8757, "step": 17938 }, { "epoch": 0.747738735359093, "grad_norm": 226.0, "learning_rate": 1.5780223090551794e-05, "loss": 12.0633, "step": 17939 }, { "epoch": 0.7477804176566213, "grad_norm": 346.0, "learning_rate": 1.5775301891632953e-05, "loss": 15.1254, "step": 17940 }, { "epoch": 0.7478220999541495, "grad_norm": 430.0, "learning_rate": 1.5770381316462313e-05, "loss": 15.7503, "step": 17941 }, { "epoch": 0.7478637822516777, "grad_norm": 249.0, "learning_rate": 1.576546136512948e-05, "loss": 12.5003, "step": 17942 }, { "epoch": 0.7479054645492059, "grad_norm": 238.0, "learning_rate": 1.576054203772418e-05, "loss": 13.0004, "step": 17943 }, { "epoch": 0.7479471468467342, "grad_norm": 152.0, "learning_rate": 1.5755623334336012e-05, "loss": 10.0629, "step": 17944 }, { "epoch": 0.7479888291442625, "grad_norm": 79.0, "learning_rate": 1.5750705255054677e-05, "loss": 8.1881, "step": 17945 }, { "epoch": 0.7480305114417907, "grad_norm": 106.5, "learning_rate": 1.5745787799969752e-05, "loss": 8.5631, "step": 17946 }, { "epoch": 0.7480721937393189, "grad_norm": 215.0, "learning_rate": 1.5740870969170912e-05, "loss": 11.3128, "step": 17947 }, { "epoch": 0.7481138760368472, "grad_norm": 60.5, "learning_rate": 1.573595476274771e-05, "loss": 7.8129, "step": 17948 }, { "epoch": 0.7481555583343754, "grad_norm": 262.0, "learning_rate": 1.5731039180789793e-05, "loss": 13.2504, "step": 17949 }, { "epoch": 0.7481972406319036, "grad_norm": 524.0, "learning_rate": 1.572612422338673e-05, "loss": 15.9377, "step": 17950 }, { "epoch": 0.7482389229294318, "grad_norm": 51.75, "learning_rate": 1.5721209890628092e-05, "loss": 7.8128, "step": 17951 }, { "epoch": 0.7482806052269602, "grad_norm": 744.0, "learning_rate": 1.5716296182603447e-05, "loss": 21.1254, "step": 17952 }, { "epoch": 0.7483222875244884, "grad_norm": 167.0, "learning_rate": 1.5711383099402342e-05, "loss": 10.8126, "step": 17953 }, { "epoch": 0.7483639698220166, "grad_norm": 316.0, "learning_rate": 1.5706470641114336e-05, "loss": 13.1252, "step": 17954 }, { "epoch": 0.7484056521195448, "grad_norm": 434.0, "learning_rate": 1.5701558807828936e-05, "loss": 16.1254, "step": 17955 }, { "epoch": 0.7484473344170731, "grad_norm": 394.0, "learning_rate": 1.5696647599635677e-05, "loss": 15.3134, "step": 17956 }, { "epoch": 0.7484890167146013, "grad_norm": 528.0, "learning_rate": 1.5691737016624057e-05, "loss": 18.5016, "step": 17957 }, { "epoch": 0.7485306990121295, "grad_norm": 1004.0, "learning_rate": 1.568682705888358e-05, "loss": 27.1262, "step": 17958 }, { "epoch": 0.7485723813096578, "grad_norm": 596.0, "learning_rate": 1.5681917726503726e-05, "loss": 15.6904, "step": 17959 }, { "epoch": 0.7486140636071861, "grad_norm": 354.0, "learning_rate": 1.5677009019573963e-05, "loss": 14.1879, "step": 17960 }, { "epoch": 0.7486557459047143, "grad_norm": 310.0, "learning_rate": 1.5672100938183764e-05, "loss": 13.4379, "step": 17961 }, { "epoch": 0.7486974282022425, "grad_norm": 215.0, "learning_rate": 1.5667193482422572e-05, "loss": 11.3752, "step": 17962 }, { "epoch": 0.7487391104997707, "grad_norm": 596.0, "learning_rate": 1.566228665237982e-05, "loss": 17.6267, "step": 17963 }, { "epoch": 0.748780792797299, "grad_norm": 338.0, "learning_rate": 1.565738044814493e-05, "loss": 13.8774, "step": 17964 }, { "epoch": 0.7488224750948272, "grad_norm": 540.0, "learning_rate": 1.565247486980736e-05, "loss": 17.6251, "step": 17965 }, { "epoch": 0.7488641573923555, "grad_norm": 156.0, "learning_rate": 1.5647569917456457e-05, "loss": 10.3754, "step": 17966 }, { "epoch": 0.7489058396898837, "grad_norm": 624.0, "learning_rate": 1.564266559118167e-05, "loss": 20.3763, "step": 17967 }, { "epoch": 0.748947521987412, "grad_norm": 348.0, "learning_rate": 1.5637761891072323e-05, "loss": 13.5004, "step": 17968 }, { "epoch": 0.7489892042849402, "grad_norm": 149.0, "learning_rate": 1.563285881721785e-05, "loss": 8.8131, "step": 17969 }, { "epoch": 0.7490308865824684, "grad_norm": 249.0, "learning_rate": 1.5627956369707537e-05, "loss": 12.1881, "step": 17970 }, { "epoch": 0.7490725688799966, "grad_norm": 620.0, "learning_rate": 1.5623054548630806e-05, "loss": 19.2503, "step": 17971 }, { "epoch": 0.749114251177525, "grad_norm": 520.0, "learning_rate": 1.5618153354076926e-05, "loss": 16.2535, "step": 17972 }, { "epoch": 0.7491559334750532, "grad_norm": 286.0, "learning_rate": 1.561325278613527e-05, "loss": 13.6251, "step": 17973 }, { "epoch": 0.7491976157725814, "grad_norm": 221.0, "learning_rate": 1.5608352844895134e-05, "loss": 10.6256, "step": 17974 }, { "epoch": 0.7492392980701096, "grad_norm": 418.0, "learning_rate": 1.5603453530445823e-05, "loss": 16.5002, "step": 17975 }, { "epoch": 0.7492809803676379, "grad_norm": 334.0, "learning_rate": 1.5598554842876622e-05, "loss": 13.6252, "step": 17976 }, { "epoch": 0.7493226626651661, "grad_norm": 478.0, "learning_rate": 1.559365678227681e-05, "loss": 17.6252, "step": 17977 }, { "epoch": 0.7493643449626943, "grad_norm": 176.0, "learning_rate": 1.558875934873566e-05, "loss": 11.0003, "step": 17978 }, { "epoch": 0.7494060272602225, "grad_norm": 736.0, "learning_rate": 1.5583862542342424e-05, "loss": 20.2504, "step": 17979 }, { "epoch": 0.7494477095577509, "grad_norm": 247.0, "learning_rate": 1.5578966363186353e-05, "loss": 12.1882, "step": 17980 }, { "epoch": 0.7494893918552791, "grad_norm": 272.0, "learning_rate": 1.5574070811356673e-05, "loss": 12.7503, "step": 17981 }, { "epoch": 0.7495310741528073, "grad_norm": 304.0, "learning_rate": 1.556917588694261e-05, "loss": 13.1252, "step": 17982 }, { "epoch": 0.7495727564503355, "grad_norm": 1752.0, "learning_rate": 1.5564281590033374e-05, "loss": 32.2556, "step": 17983 }, { "epoch": 0.7496144387478638, "grad_norm": 270.0, "learning_rate": 1.555938792071816e-05, "loss": 10.5632, "step": 17984 }, { "epoch": 0.749656121045392, "grad_norm": 1960.0, "learning_rate": 1.5554494879086156e-05, "loss": 35.0072, "step": 17985 }, { "epoch": 0.7496978033429202, "grad_norm": 324.0, "learning_rate": 1.5549602465226548e-05, "loss": 14.6881, "step": 17986 }, { "epoch": 0.7497394856404485, "grad_norm": 864.0, "learning_rate": 1.5544710679228484e-05, "loss": 24.3781, "step": 17987 }, { "epoch": 0.7497811679379768, "grad_norm": 616.0, "learning_rate": 1.5539819521181136e-05, "loss": 17.8753, "step": 17988 }, { "epoch": 0.749822850235505, "grad_norm": 81.5, "learning_rate": 1.553492899117363e-05, "loss": 8.5003, "step": 17989 }, { "epoch": 0.7498645325330332, "grad_norm": 568.0, "learning_rate": 1.553003908929509e-05, "loss": 18.7502, "step": 17990 }, { "epoch": 0.7499062148305614, "grad_norm": 548.0, "learning_rate": 1.5525149815634675e-05, "loss": 15.6879, "step": 17991 }, { "epoch": 0.7499478971280897, "grad_norm": 266.0, "learning_rate": 1.552026117028144e-05, "loss": 12.8768, "step": 17992 }, { "epoch": 0.749989579425618, "grad_norm": 199.0, "learning_rate": 1.5515373153324537e-05, "loss": 12.4391, "step": 17993 }, { "epoch": 0.7500312617231462, "grad_norm": 296.0, "learning_rate": 1.5510485764852983e-05, "loss": 12.1876, "step": 17994 }, { "epoch": 0.7500729440206744, "grad_norm": 462.0, "learning_rate": 1.5505599004955922e-05, "loss": 16.3758, "step": 17995 }, { "epoch": 0.7501146263182027, "grad_norm": 442.0, "learning_rate": 1.550071287372235e-05, "loss": 15.5644, "step": 17996 }, { "epoch": 0.7501563086157309, "grad_norm": 752.0, "learning_rate": 1.5495827371241377e-05, "loss": 20.1252, "step": 17997 }, { "epoch": 0.7501979909132591, "grad_norm": 190.0, "learning_rate": 1.5490942497601986e-05, "loss": 11.4379, "step": 17998 }, { "epoch": 0.7502396732107873, "grad_norm": 203.0, "learning_rate": 1.5486058252893242e-05, "loss": 9.6251, "step": 17999 }, { "epoch": 0.7502813555083157, "grad_norm": 300.0, "learning_rate": 1.5481174637204156e-05, "loss": 12.8755, "step": 18000 }, { "epoch": 0.7503230378058439, "grad_norm": 210.0, "learning_rate": 1.547629165062372e-05, "loss": 11.0627, "step": 18001 }, { "epoch": 0.7503647201033721, "grad_norm": 720.0, "learning_rate": 1.547140929324094e-05, "loss": 19.3753, "step": 18002 }, { "epoch": 0.7504064024009003, "grad_norm": 77.0, "learning_rate": 1.546652756514479e-05, "loss": 8.0627, "step": 18003 }, { "epoch": 0.7504480846984286, "grad_norm": 366.0, "learning_rate": 1.5461646466424236e-05, "loss": 14.3129, "step": 18004 }, { "epoch": 0.7504897669959568, "grad_norm": 300.0, "learning_rate": 1.5456765997168245e-05, "loss": 13.563, "step": 18005 }, { "epoch": 0.750531449293485, "grad_norm": 290.0, "learning_rate": 1.545188615746576e-05, "loss": 13.7502, "step": 18006 }, { "epoch": 0.7505731315910132, "grad_norm": 192.0, "learning_rate": 1.5447006947405717e-05, "loss": 11.5009, "step": 18007 }, { "epoch": 0.7506148138885416, "grad_norm": 78.0, "learning_rate": 1.5442128367077034e-05, "loss": 8.4376, "step": 18008 }, { "epoch": 0.7506564961860698, "grad_norm": 426.0, "learning_rate": 1.5437250416568637e-05, "loss": 15.7528, "step": 18009 }, { "epoch": 0.750698178483598, "grad_norm": 304.0, "learning_rate": 1.5432373095969417e-05, "loss": 11.4385, "step": 18010 }, { "epoch": 0.7507398607811262, "grad_norm": 172.0, "learning_rate": 1.5427496405368264e-05, "loss": 12.0628, "step": 18011 }, { "epoch": 0.7507815430786545, "grad_norm": 208.0, "learning_rate": 1.542262034485406e-05, "loss": 10.0649, "step": 18012 }, { "epoch": 0.7508232253761827, "grad_norm": 175.0, "learning_rate": 1.541774491451567e-05, "loss": 11.1882, "step": 18013 }, { "epoch": 0.750864907673711, "grad_norm": 322.0, "learning_rate": 1.541287011444193e-05, "loss": 14.3126, "step": 18014 }, { "epoch": 0.7509065899712393, "grad_norm": 628.0, "learning_rate": 1.5407995944721737e-05, "loss": 18.6301, "step": 18015 }, { "epoch": 0.7509482722687675, "grad_norm": 376.0, "learning_rate": 1.540312240544386e-05, "loss": 15.188, "step": 18016 }, { "epoch": 0.7509899545662957, "grad_norm": 434.0, "learning_rate": 1.5398249496697174e-05, "loss": 14.0003, "step": 18017 }, { "epoch": 0.7510316368638239, "grad_norm": 290.0, "learning_rate": 1.5393377218570433e-05, "loss": 14.1252, "step": 18018 }, { "epoch": 0.7510733191613522, "grad_norm": 458.0, "learning_rate": 1.5388505571152494e-05, "loss": 16.3753, "step": 18019 }, { "epoch": 0.7511150014588804, "grad_norm": 572.0, "learning_rate": 1.538363455453209e-05, "loss": 18.7501, "step": 18020 }, { "epoch": 0.7511566837564086, "grad_norm": 316.0, "learning_rate": 1.5378764168798044e-05, "loss": 14.6252, "step": 18021 }, { "epoch": 0.7511983660539369, "grad_norm": 105.0, "learning_rate": 1.5373894414039065e-05, "loss": 9.1882, "step": 18022 }, { "epoch": 0.7512400483514652, "grad_norm": 84.0, "learning_rate": 1.5369025290343965e-05, "loss": 8.4383, "step": 18023 }, { "epoch": 0.7512817306489934, "grad_norm": 118.0, "learning_rate": 1.536415679780142e-05, "loss": 10.2505, "step": 18024 }, { "epoch": 0.7513234129465216, "grad_norm": 308.0, "learning_rate": 1.535928893650021e-05, "loss": 14.6878, "step": 18025 }, { "epoch": 0.7513650952440498, "grad_norm": 262.0, "learning_rate": 1.5354421706529034e-05, "loss": 11.4378, "step": 18026 }, { "epoch": 0.7514067775415781, "grad_norm": 728.0, "learning_rate": 1.53495551079766e-05, "loss": 22.2507, "step": 18027 }, { "epoch": 0.7514484598391064, "grad_norm": 306.0, "learning_rate": 1.5344689140931594e-05, "loss": 12.813, "step": 18028 }, { "epoch": 0.7514901421366346, "grad_norm": 1024.0, "learning_rate": 1.5339823805482713e-05, "loss": 22.1286, "step": 18029 }, { "epoch": 0.7515318244341628, "grad_norm": 484.0, "learning_rate": 1.5334959101718617e-05, "loss": 16.5002, "step": 18030 }, { "epoch": 0.7515735067316911, "grad_norm": 240.0, "learning_rate": 1.5330095029727963e-05, "loss": 12.5628, "step": 18031 }, { "epoch": 0.7516151890292193, "grad_norm": 276.0, "learning_rate": 1.532523158959941e-05, "loss": 12.7501, "step": 18032 }, { "epoch": 0.7516568713267475, "grad_norm": 165.0, "learning_rate": 1.532036878142159e-05, "loss": 11.4383, "step": 18033 }, { "epoch": 0.7516985536242757, "grad_norm": 182.0, "learning_rate": 1.5315506605283126e-05, "loss": 13.0004, "step": 18034 }, { "epoch": 0.7517402359218041, "grad_norm": 336.0, "learning_rate": 1.5310645061272634e-05, "loss": 13.2502, "step": 18035 }, { "epoch": 0.7517819182193323, "grad_norm": 394.0, "learning_rate": 1.5305784149478714e-05, "loss": 13.8752, "step": 18036 }, { "epoch": 0.7518236005168605, "grad_norm": 185.0, "learning_rate": 1.5300923869989957e-05, "loss": 9.8128, "step": 18037 }, { "epoch": 0.7518652828143887, "grad_norm": 772.0, "learning_rate": 1.5296064222894947e-05, "loss": 22.1288, "step": 18038 }, { "epoch": 0.751906965111917, "grad_norm": 360.0, "learning_rate": 1.5291205208282245e-05, "loss": 15.2512, "step": 18039 }, { "epoch": 0.7519486474094452, "grad_norm": 508.0, "learning_rate": 1.5286346826240388e-05, "loss": 18.5002, "step": 18040 }, { "epoch": 0.7519903297069734, "grad_norm": 516.0, "learning_rate": 1.528148907685798e-05, "loss": 16.7504, "step": 18041 }, { "epoch": 0.7520320120045016, "grad_norm": 82.0, "learning_rate": 1.527663196022348e-05, "loss": 9.3753, "step": 18042 }, { "epoch": 0.75207369430203, "grad_norm": 215.0, "learning_rate": 1.5271775476425482e-05, "loss": 11.5005, "step": 18043 }, { "epoch": 0.7521153765995582, "grad_norm": 229.0, "learning_rate": 1.5266919625552422e-05, "loss": 12.1256, "step": 18044 }, { "epoch": 0.7521570588970864, "grad_norm": 434.0, "learning_rate": 1.526206440769287e-05, "loss": 17.2502, "step": 18045 }, { "epoch": 0.7521987411946146, "grad_norm": 414.0, "learning_rate": 1.5257209822935248e-05, "loss": 15.938, "step": 18046 }, { "epoch": 0.7522404234921429, "grad_norm": 149.0, "learning_rate": 1.525235587136809e-05, "loss": 11.4378, "step": 18047 }, { "epoch": 0.7522821057896711, "grad_norm": 133.0, "learning_rate": 1.52475025530798e-05, "loss": 6.532, "step": 18048 }, { "epoch": 0.7523237880871994, "grad_norm": 560.0, "learning_rate": 1.5242649868158875e-05, "loss": 20.6253, "step": 18049 }, { "epoch": 0.7523654703847276, "grad_norm": 592.0, "learning_rate": 1.5237797816693743e-05, "loss": 20.6252, "step": 18050 }, { "epoch": 0.7524071526822559, "grad_norm": 264.0, "learning_rate": 1.5232946398772829e-05, "loss": 12.5003, "step": 18051 }, { "epoch": 0.7524488349797841, "grad_norm": 366.0, "learning_rate": 1.5228095614484562e-05, "loss": 15.0003, "step": 18052 }, { "epoch": 0.7524905172773123, "grad_norm": 536.0, "learning_rate": 1.522324546391733e-05, "loss": 18.2503, "step": 18053 }, { "epoch": 0.7525321995748405, "grad_norm": 628.0, "learning_rate": 1.5218395947159541e-05, "loss": 19.5006, "step": 18054 }, { "epoch": 0.7525738818723688, "grad_norm": 1496.0, "learning_rate": 1.5213547064299572e-05, "loss": 28.1283, "step": 18055 }, { "epoch": 0.7526155641698971, "grad_norm": 171.0, "learning_rate": 1.5208698815425793e-05, "loss": 10.4378, "step": 18056 }, { "epoch": 0.7526572464674253, "grad_norm": 1232.0, "learning_rate": 1.5203851200626573e-05, "loss": 34.2513, "step": 18057 }, { "epoch": 0.7526989287649535, "grad_norm": 187.0, "learning_rate": 1.5199004219990249e-05, "loss": 10.688, "step": 18058 }, { "epoch": 0.7527406110624818, "grad_norm": 153.0, "learning_rate": 1.5194157873605164e-05, "loss": 10.7502, "step": 18059 }, { "epoch": 0.75278229336001, "grad_norm": 227.0, "learning_rate": 1.5189312161559644e-05, "loss": 12.313, "step": 18060 }, { "epoch": 0.7528239756575382, "grad_norm": 434.0, "learning_rate": 1.5184467083941995e-05, "loss": 15.5001, "step": 18061 }, { "epoch": 0.7528656579550664, "grad_norm": 596.0, "learning_rate": 1.517962264084053e-05, "loss": 20.2502, "step": 18062 }, { "epoch": 0.7529073402525948, "grad_norm": 1704.0, "learning_rate": 1.5174778832343528e-05, "loss": 34.7504, "step": 18063 }, { "epoch": 0.752949022550123, "grad_norm": 368.0, "learning_rate": 1.5169935658539259e-05, "loss": 13.7503, "step": 18064 }, { "epoch": 0.7529907048476512, "grad_norm": 428.0, "learning_rate": 1.5165093119516038e-05, "loss": 13.8757, "step": 18065 }, { "epoch": 0.7530323871451794, "grad_norm": 153.0, "learning_rate": 1.5160251215362054e-05, "loss": 9.6258, "step": 18066 }, { "epoch": 0.7530740694427077, "grad_norm": 454.0, "learning_rate": 1.5155409946165616e-05, "loss": 14.7504, "step": 18067 }, { "epoch": 0.7531157517402359, "grad_norm": 912.0, "learning_rate": 1.5150569312014895e-05, "loss": 22.2535, "step": 18068 }, { "epoch": 0.7531574340377641, "grad_norm": 216.0, "learning_rate": 1.514572931299817e-05, "loss": 12.2501, "step": 18069 }, { "epoch": 0.7531991163352924, "grad_norm": 404.0, "learning_rate": 1.5140889949203595e-05, "loss": 15.6253, "step": 18070 }, { "epoch": 0.7532407986328207, "grad_norm": 376.0, "learning_rate": 1.5136051220719427e-05, "loss": 14.7501, "step": 18071 }, { "epoch": 0.7532824809303489, "grad_norm": 304.0, "learning_rate": 1.5131213127633786e-05, "loss": 11.5628, "step": 18072 }, { "epoch": 0.7533241632278771, "grad_norm": 169.0, "learning_rate": 1.5126375670034914e-05, "loss": 7.594, "step": 18073 }, { "epoch": 0.7533658455254053, "grad_norm": 424.0, "learning_rate": 1.5121538848010908e-05, "loss": 16.1253, "step": 18074 }, { "epoch": 0.7534075278229336, "grad_norm": 664.0, "learning_rate": 1.5116702661649967e-05, "loss": 20.6253, "step": 18075 }, { "epoch": 0.7534492101204618, "grad_norm": 344.0, "learning_rate": 1.5111867111040224e-05, "loss": 13.8129, "step": 18076 }, { "epoch": 0.75349089241799, "grad_norm": 400.0, "learning_rate": 1.5107032196269794e-05, "loss": 14.5004, "step": 18077 }, { "epoch": 0.7535325747155183, "grad_norm": 294.0, "learning_rate": 1.5102197917426802e-05, "loss": 14.0628, "step": 18078 }, { "epoch": 0.7535742570130466, "grad_norm": 452.0, "learning_rate": 1.5097364274599352e-05, "loss": 17.1261, "step": 18079 }, { "epoch": 0.7536159393105748, "grad_norm": 402.0, "learning_rate": 1.5092531267875531e-05, "loss": 14.2532, "step": 18080 }, { "epoch": 0.753657621608103, "grad_norm": 193.0, "learning_rate": 1.5087698897343432e-05, "loss": 11.8759, "step": 18081 }, { "epoch": 0.7536993039056312, "grad_norm": 548.0, "learning_rate": 1.508286716309112e-05, "loss": 18.6255, "step": 18082 }, { "epoch": 0.7537409862031595, "grad_norm": 320.0, "learning_rate": 1.5078036065206647e-05, "loss": 14.6255, "step": 18083 }, { "epoch": 0.7537826685006878, "grad_norm": 167.0, "learning_rate": 1.5073205603778074e-05, "loss": 10.7502, "step": 18084 }, { "epoch": 0.753824350798216, "grad_norm": 330.0, "learning_rate": 1.5068375778893429e-05, "loss": 13.1261, "step": 18085 }, { "epoch": 0.7538660330957443, "grad_norm": 312.0, "learning_rate": 1.5063546590640731e-05, "loss": 13.9377, "step": 18086 }, { "epoch": 0.7539077153932725, "grad_norm": 648.0, "learning_rate": 1.5058718039108e-05, "loss": 20.7504, "step": 18087 }, { "epoch": 0.7539493976908007, "grad_norm": 110.0, "learning_rate": 1.5053890124383235e-05, "loss": 9.0634, "step": 18088 }, { "epoch": 0.7539910799883289, "grad_norm": 322.0, "learning_rate": 1.5049062846554429e-05, "loss": 14.563, "step": 18089 }, { "epoch": 0.7540327622858572, "grad_norm": 426.0, "learning_rate": 1.5044236205709533e-05, "loss": 15.3134, "step": 18090 }, { "epoch": 0.7540744445833855, "grad_norm": 342.0, "learning_rate": 1.503941020193657e-05, "loss": 15.1252, "step": 18091 }, { "epoch": 0.7541161268809137, "grad_norm": 1064.0, "learning_rate": 1.5034584835323428e-05, "loss": 29.6252, "step": 18092 }, { "epoch": 0.7541578091784419, "grad_norm": 132.0, "learning_rate": 1.5029760105958113e-05, "loss": 9.4377, "step": 18093 }, { "epoch": 0.7541994914759702, "grad_norm": 1064.0, "learning_rate": 1.5024936013928492e-05, "loss": 20.505, "step": 18094 }, { "epoch": 0.7542411737734984, "grad_norm": 404.0, "learning_rate": 1.5020112559322552e-05, "loss": 15.5003, "step": 18095 }, { "epoch": 0.7542828560710266, "grad_norm": 410.0, "learning_rate": 1.5015289742228134e-05, "loss": 16.7507, "step": 18096 }, { "epoch": 0.7543245383685548, "grad_norm": 352.0, "learning_rate": 1.5010467562733194e-05, "loss": 10.7502, "step": 18097 }, { "epoch": 0.7543662206660832, "grad_norm": 360.0, "learning_rate": 1.5005646020925556e-05, "loss": 14.2502, "step": 18098 }, { "epoch": 0.7544079029636114, "grad_norm": 474.0, "learning_rate": 1.5000825116893148e-05, "loss": 15.3753, "step": 18099 }, { "epoch": 0.7544495852611396, "grad_norm": 145.0, "learning_rate": 1.49960048507238e-05, "loss": 10.5629, "step": 18100 }, { "epoch": 0.7544912675586678, "grad_norm": 175.0, "learning_rate": 1.4991185222505378e-05, "loss": 10.0002, "step": 18101 }, { "epoch": 0.7545329498561961, "grad_norm": 230.0, "learning_rate": 1.4986366232325711e-05, "loss": 11.0009, "step": 18102 }, { "epoch": 0.7545746321537243, "grad_norm": 688.0, "learning_rate": 1.4981547880272628e-05, "loss": 19.1257, "step": 18103 }, { "epoch": 0.7546163144512525, "grad_norm": 240.0, "learning_rate": 1.4976730166433945e-05, "loss": 10.1255, "step": 18104 }, { "epoch": 0.7546579967487808, "grad_norm": 164.0, "learning_rate": 1.497191309089746e-05, "loss": 6.1879, "step": 18105 }, { "epoch": 0.7546996790463091, "grad_norm": 568.0, "learning_rate": 1.4967096653750972e-05, "loss": 18.5002, "step": 18106 }, { "epoch": 0.7547413613438373, "grad_norm": 119.5, "learning_rate": 1.4962280855082255e-05, "loss": 9.3129, "step": 18107 }, { "epoch": 0.7547830436413655, "grad_norm": 202.0, "learning_rate": 1.4957465694979084e-05, "loss": 11.5004, "step": 18108 }, { "epoch": 0.7548247259388937, "grad_norm": 69.0, "learning_rate": 1.4952651173529208e-05, "loss": 7.2817, "step": 18109 }, { "epoch": 0.754866408236422, "grad_norm": 354.0, "learning_rate": 1.4947837290820377e-05, "loss": 13.4377, "step": 18110 }, { "epoch": 0.7549080905339502, "grad_norm": 1312.0, "learning_rate": 1.4943024046940324e-05, "loss": 26.507, "step": 18111 }, { "epoch": 0.7549497728314785, "grad_norm": 424.0, "learning_rate": 1.4938211441976763e-05, "loss": 16.1254, "step": 18112 }, { "epoch": 0.7549914551290067, "grad_norm": 940.0, "learning_rate": 1.4933399476017418e-05, "loss": 22.8753, "step": 18113 }, { "epoch": 0.755033137426535, "grad_norm": 450.0, "learning_rate": 1.492858814914998e-05, "loss": 14.9381, "step": 18114 }, { "epoch": 0.7550748197240632, "grad_norm": 370.0, "learning_rate": 1.4923777461462135e-05, "loss": 14.7503, "step": 18115 }, { "epoch": 0.7551165020215914, "grad_norm": 1080.0, "learning_rate": 1.491896741304154e-05, "loss": 27.2504, "step": 18116 }, { "epoch": 0.7551581843191196, "grad_norm": 188.0, "learning_rate": 1.4914158003975914e-05, "loss": 8.1252, "step": 18117 }, { "epoch": 0.755199866616648, "grad_norm": 540.0, "learning_rate": 1.4909349234352843e-05, "loss": 17.2503, "step": 18118 }, { "epoch": 0.7552415489141762, "grad_norm": 86.5, "learning_rate": 1.4904541104260028e-05, "loss": 10.0629, "step": 18119 }, { "epoch": 0.7552832312117044, "grad_norm": 494.0, "learning_rate": 1.4899733613785033e-05, "loss": 16.2504, "step": 18120 }, { "epoch": 0.7553249135092326, "grad_norm": 294.0, "learning_rate": 1.4894926763015542e-05, "loss": 13.6257, "step": 18121 }, { "epoch": 0.7553665958067609, "grad_norm": 588.0, "learning_rate": 1.4890120552039095e-05, "loss": 17.8758, "step": 18122 }, { "epoch": 0.7554082781042891, "grad_norm": 167.0, "learning_rate": 1.4885314980943348e-05, "loss": 13.1257, "step": 18123 }, { "epoch": 0.7554499604018173, "grad_norm": 584.0, "learning_rate": 1.4880510049815822e-05, "loss": 20.0017, "step": 18124 }, { "epoch": 0.7554916426993455, "grad_norm": 229.0, "learning_rate": 1.4875705758744136e-05, "loss": 10.5626, "step": 18125 }, { "epoch": 0.7555333249968739, "grad_norm": 80.0, "learning_rate": 1.4870902107815831e-05, "loss": 6.4692, "step": 18126 }, { "epoch": 0.7555750072944021, "grad_norm": 524.0, "learning_rate": 1.4866099097118452e-05, "loss": 16.2545, "step": 18127 }, { "epoch": 0.7556166895919303, "grad_norm": 124.5, "learning_rate": 1.4861296726739532e-05, "loss": 9.438, "step": 18128 }, { "epoch": 0.7556583718894585, "grad_norm": 520.0, "learning_rate": 1.4856494996766601e-05, "loss": 13.1914, "step": 18129 }, { "epoch": 0.7557000541869868, "grad_norm": 222.0, "learning_rate": 1.4851693907287173e-05, "loss": 11.6252, "step": 18130 }, { "epoch": 0.755741736484515, "grad_norm": 1000.0, "learning_rate": 1.4846893458388738e-05, "loss": 28.6254, "step": 18131 }, { "epoch": 0.7557834187820432, "grad_norm": 512.0, "learning_rate": 1.4842093650158794e-05, "loss": 15.939, "step": 18132 }, { "epoch": 0.7558251010795715, "grad_norm": 1020.0, "learning_rate": 1.483729448268481e-05, "loss": 26.3793, "step": 18133 }, { "epoch": 0.7558667833770998, "grad_norm": 508.0, "learning_rate": 1.483249595605426e-05, "loss": 16.7501, "step": 18134 }, { "epoch": 0.755908465674628, "grad_norm": 1320.0, "learning_rate": 1.482769807035459e-05, "loss": 26.2538, "step": 18135 }, { "epoch": 0.7559501479721562, "grad_norm": 442.0, "learning_rate": 1.4822900825673248e-05, "loss": 16.6254, "step": 18136 }, { "epoch": 0.7559918302696844, "grad_norm": 160.0, "learning_rate": 1.4818104222097662e-05, "loss": 10.3752, "step": 18137 }, { "epoch": 0.7560335125672127, "grad_norm": 141.0, "learning_rate": 1.4813308259715248e-05, "loss": 9.8752, "step": 18138 }, { "epoch": 0.756075194864741, "grad_norm": 312.0, "learning_rate": 1.4808512938613411e-05, "loss": 14.2502, "step": 18139 }, { "epoch": 0.7561168771622692, "grad_norm": 436.0, "learning_rate": 1.4803718258879535e-05, "loss": 16.1257, "step": 18140 }, { "epoch": 0.7561585594597974, "grad_norm": 98.5, "learning_rate": 1.4798924220601051e-05, "loss": 9.2507, "step": 18141 }, { "epoch": 0.7562002417573257, "grad_norm": 306.0, "learning_rate": 1.4794130823865271e-05, "loss": 15.0006, "step": 18142 }, { "epoch": 0.7562419240548539, "grad_norm": 1192.0, "learning_rate": 1.4789338068759607e-05, "loss": 24.5037, "step": 18143 }, { "epoch": 0.7562836063523821, "grad_norm": 209.0, "learning_rate": 1.4784545955371353e-05, "loss": 7.4065, "step": 18144 }, { "epoch": 0.7563252886499103, "grad_norm": 139.0, "learning_rate": 1.477975448378791e-05, "loss": 7.0627, "step": 18145 }, { "epoch": 0.7563669709474387, "grad_norm": 318.0, "learning_rate": 1.4774963654096535e-05, "loss": 13.2505, "step": 18146 }, { "epoch": 0.7564086532449669, "grad_norm": 189.0, "learning_rate": 1.4770173466384602e-05, "loss": 10.0003, "step": 18147 }, { "epoch": 0.7564503355424951, "grad_norm": 288.0, "learning_rate": 1.4765383920739357e-05, "loss": 13.9379, "step": 18148 }, { "epoch": 0.7564920178400233, "grad_norm": 326.0, "learning_rate": 1.4760595017248147e-05, "loss": 13.5004, "step": 18149 }, { "epoch": 0.7565337001375516, "grad_norm": 604.0, "learning_rate": 1.4755806755998191e-05, "loss": 20.0003, "step": 18150 }, { "epoch": 0.7565753824350798, "grad_norm": 680.0, "learning_rate": 1.4751019137076804e-05, "loss": 18.7512, "step": 18151 }, { "epoch": 0.756617064732608, "grad_norm": 66.5, "learning_rate": 1.4746232160571221e-05, "loss": 8.1878, "step": 18152 }, { "epoch": 0.7566587470301362, "grad_norm": 396.0, "learning_rate": 1.4741445826568684e-05, "loss": 15.6252, "step": 18153 }, { "epoch": 0.7567004293276646, "grad_norm": 294.0, "learning_rate": 1.4736660135156427e-05, "loss": 12.6907, "step": 18154 }, { "epoch": 0.7567421116251928, "grad_norm": 448.0, "learning_rate": 1.473187508642167e-05, "loss": 16.1257, "step": 18155 }, { "epoch": 0.756783793922721, "grad_norm": 224.0, "learning_rate": 1.4727090680451622e-05, "loss": 12.7506, "step": 18156 }, { "epoch": 0.7568254762202492, "grad_norm": 292.0, "learning_rate": 1.4722306917333478e-05, "loss": 13.7505, "step": 18157 }, { "epoch": 0.7568671585177775, "grad_norm": 70.5, "learning_rate": 1.471752379715442e-05, "loss": 8.5629, "step": 18158 }, { "epoch": 0.7569088408153057, "grad_norm": 78.5, "learning_rate": 1.4712741320001627e-05, "loss": 9.0626, "step": 18159 }, { "epoch": 0.756950523112834, "grad_norm": 215.0, "learning_rate": 1.4707959485962253e-05, "loss": 11.0001, "step": 18160 }, { "epoch": 0.7569922054103623, "grad_norm": 216.0, "learning_rate": 1.4703178295123448e-05, "loss": 11.2503, "step": 18161 }, { "epoch": 0.7570338877078905, "grad_norm": 211.0, "learning_rate": 1.4698397747572351e-05, "loss": 12.3752, "step": 18162 }, { "epoch": 0.7570755700054187, "grad_norm": 400.0, "learning_rate": 1.4693617843396095e-05, "loss": 14.3126, "step": 18163 }, { "epoch": 0.7571172523029469, "grad_norm": 460.0, "learning_rate": 1.4688838582681786e-05, "loss": 16.7503, "step": 18164 }, { "epoch": 0.7571589346004752, "grad_norm": 396.0, "learning_rate": 1.4684059965516528e-05, "loss": 16.3753, "step": 18165 }, { "epoch": 0.7572006168980034, "grad_norm": 108.0, "learning_rate": 1.4679281991987393e-05, "loss": 9.7504, "step": 18166 }, { "epoch": 0.7572422991955317, "grad_norm": 344.0, "learning_rate": 1.4674504662181516e-05, "loss": 15.3762, "step": 18167 }, { "epoch": 0.7572839814930599, "grad_norm": 1184.0, "learning_rate": 1.4669727976185893e-05, "loss": 30.5008, "step": 18168 }, { "epoch": 0.7573256637905882, "grad_norm": 384.0, "learning_rate": 1.4664951934087645e-05, "loss": 15.5629, "step": 18169 }, { "epoch": 0.7573673460881164, "grad_norm": 105.0, "learning_rate": 1.4660176535973758e-05, "loss": 7.844, "step": 18170 }, { "epoch": 0.7574090283856446, "grad_norm": 508.0, "learning_rate": 1.4655401781931316e-05, "loss": 17.8756, "step": 18171 }, { "epoch": 0.7574507106831728, "grad_norm": 149.0, "learning_rate": 1.4650627672047284e-05, "loss": 9.2503, "step": 18172 }, { "epoch": 0.7574923929807011, "grad_norm": 520.0, "learning_rate": 1.4645854206408731e-05, "loss": 17.7503, "step": 18173 }, { "epoch": 0.7575340752782294, "grad_norm": 103.5, "learning_rate": 1.46410813851026e-05, "loss": 9.1254, "step": 18174 }, { "epoch": 0.7575757575757576, "grad_norm": 402.0, "learning_rate": 1.463630920821591e-05, "loss": 16.0005, "step": 18175 }, { "epoch": 0.7576174398732858, "grad_norm": 134.0, "learning_rate": 1.4631537675835622e-05, "loss": 9.6877, "step": 18176 }, { "epoch": 0.7576591221708141, "grad_norm": 159.0, "learning_rate": 1.46267667880487e-05, "loss": 9.9379, "step": 18177 }, { "epoch": 0.7577008044683423, "grad_norm": 434.0, "learning_rate": 1.4621996544942096e-05, "loss": 15.1877, "step": 18178 }, { "epoch": 0.7577424867658705, "grad_norm": 117.5, "learning_rate": 1.4617226946602747e-05, "loss": 8.7505, "step": 18179 }, { "epoch": 0.7577841690633987, "grad_norm": 141.0, "learning_rate": 1.4612457993117574e-05, "loss": 10.5631, "step": 18180 }, { "epoch": 0.7578258513609271, "grad_norm": 376.0, "learning_rate": 1.46076896845735e-05, "loss": 13.5002, "step": 18181 }, { "epoch": 0.7578675336584553, "grad_norm": 404.0, "learning_rate": 1.460292202105742e-05, "loss": 15.4378, "step": 18182 }, { "epoch": 0.7579092159559835, "grad_norm": 458.0, "learning_rate": 1.4598155002656228e-05, "loss": 17.5003, "step": 18183 }, { "epoch": 0.7579508982535117, "grad_norm": 103.5, "learning_rate": 1.4593388629456806e-05, "loss": 9.4378, "step": 18184 }, { "epoch": 0.75799258055104, "grad_norm": 162.0, "learning_rate": 1.4588622901546017e-05, "loss": 10.1257, "step": 18185 }, { "epoch": 0.7580342628485682, "grad_norm": 430.0, "learning_rate": 1.458385781901072e-05, "loss": 16.0002, "step": 18186 }, { "epoch": 0.7580759451460964, "grad_norm": 155.0, "learning_rate": 1.4579093381937759e-05, "loss": 7.719, "step": 18187 }, { "epoch": 0.7581176274436247, "grad_norm": 306.0, "learning_rate": 1.4574329590413965e-05, "loss": 13.3163, "step": 18188 }, { "epoch": 0.758159309741153, "grad_norm": 181.0, "learning_rate": 1.456956644452616e-05, "loss": 10.188, "step": 18189 }, { "epoch": 0.7582009920386812, "grad_norm": 592.0, "learning_rate": 1.4564803944361132e-05, "loss": 18.3753, "step": 18190 }, { "epoch": 0.7582426743362094, "grad_norm": 300.0, "learning_rate": 1.4560042090005732e-05, "loss": 14.1879, "step": 18191 }, { "epoch": 0.7582843566337376, "grad_norm": 108.5, "learning_rate": 1.4555280881546685e-05, "loss": 9.1879, "step": 18192 }, { "epoch": 0.7583260389312659, "grad_norm": 600.0, "learning_rate": 1.4550520319070815e-05, "loss": 19.8752, "step": 18193 }, { "epoch": 0.7583677212287941, "grad_norm": 206.0, "learning_rate": 1.4545760402664826e-05, "loss": 10.2503, "step": 18194 }, { "epoch": 0.7584094035263224, "grad_norm": 226.0, "learning_rate": 1.4541001132415539e-05, "loss": 12.0004, "step": 18195 }, { "epoch": 0.7584510858238506, "grad_norm": 173.0, "learning_rate": 1.453624250840962e-05, "loss": 11.0002, "step": 18196 }, { "epoch": 0.7584927681213789, "grad_norm": 125.0, "learning_rate": 1.4531484530733863e-05, "loss": 9.8753, "step": 18197 }, { "epoch": 0.7585344504189071, "grad_norm": 474.0, "learning_rate": 1.4526727199474916e-05, "loss": 16.6252, "step": 18198 }, { "epoch": 0.7585761327164353, "grad_norm": 482.0, "learning_rate": 1.452197051471954e-05, "loss": 18.1256, "step": 18199 }, { "epoch": 0.7586178150139635, "grad_norm": 342.0, "learning_rate": 1.4517214476554376e-05, "loss": 12.7503, "step": 18200 }, { "epoch": 0.7586594973114918, "grad_norm": 212.0, "learning_rate": 1.4512459085066143e-05, "loss": 11.5627, "step": 18201 }, { "epoch": 0.7587011796090201, "grad_norm": 1048.0, "learning_rate": 1.4507704340341493e-05, "loss": 27.7536, "step": 18202 }, { "epoch": 0.7587428619065483, "grad_norm": 254.0, "learning_rate": 1.4502950242467084e-05, "loss": 12.6252, "step": 18203 }, { "epoch": 0.7587845442040765, "grad_norm": 374.0, "learning_rate": 1.4498196791529555e-05, "loss": 15.8129, "step": 18204 }, { "epoch": 0.7588262265016048, "grad_norm": 213.0, "learning_rate": 1.4493443987615546e-05, "loss": 10.8126, "step": 18205 }, { "epoch": 0.758867908799133, "grad_norm": 276.0, "learning_rate": 1.4488691830811668e-05, "loss": 13.5009, "step": 18206 }, { "epoch": 0.7589095910966612, "grad_norm": 194.0, "learning_rate": 1.4483940321204531e-05, "loss": 9.3753, "step": 18207 }, { "epoch": 0.7589512733941894, "grad_norm": 326.0, "learning_rate": 1.4479189458880743e-05, "loss": 13.8754, "step": 18208 }, { "epoch": 0.7589929556917178, "grad_norm": 210.0, "learning_rate": 1.4474439243926874e-05, "loss": 11.8132, "step": 18209 }, { "epoch": 0.759034637989246, "grad_norm": 236.0, "learning_rate": 1.4469689676429505e-05, "loss": 13.1878, "step": 18210 }, { "epoch": 0.7590763202867742, "grad_norm": 88.5, "learning_rate": 1.4464940756475193e-05, "loss": 7.5349, "step": 18211 }, { "epoch": 0.7591180025843024, "grad_norm": 342.0, "learning_rate": 1.4460192484150498e-05, "loss": 14.3128, "step": 18212 }, { "epoch": 0.7591596848818307, "grad_norm": 207.0, "learning_rate": 1.4455444859541944e-05, "loss": 9.3753, "step": 18213 }, { "epoch": 0.7592013671793589, "grad_norm": 224.0, "learning_rate": 1.4450697882736064e-05, "loss": 11.9383, "step": 18214 }, { "epoch": 0.7592430494768871, "grad_norm": 696.0, "learning_rate": 1.444595155381937e-05, "loss": 20.7504, "step": 18215 }, { "epoch": 0.7592847317744154, "grad_norm": 696.0, "learning_rate": 1.4441205872878349e-05, "loss": 21.7507, "step": 18216 }, { "epoch": 0.7593264140719437, "grad_norm": 206.0, "learning_rate": 1.4436460839999538e-05, "loss": 11.8756, "step": 18217 }, { "epoch": 0.7593680963694719, "grad_norm": 474.0, "learning_rate": 1.4431716455269357e-05, "loss": 15.5627, "step": 18218 }, { "epoch": 0.7594097786670001, "grad_norm": 248.0, "learning_rate": 1.4426972718774333e-05, "loss": 12.1252, "step": 18219 }, { "epoch": 0.7594514609645283, "grad_norm": 434.0, "learning_rate": 1.4422229630600859e-05, "loss": 15.8129, "step": 18220 }, { "epoch": 0.7594931432620566, "grad_norm": 232.0, "learning_rate": 1.4417487190835432e-05, "loss": 12.0001, "step": 18221 }, { "epoch": 0.7595348255595848, "grad_norm": 394.0, "learning_rate": 1.4412745399564437e-05, "loss": 15.251, "step": 18222 }, { "epoch": 0.7595765078571131, "grad_norm": 356.0, "learning_rate": 1.4408004256874341e-05, "loss": 14.1254, "step": 18223 }, { "epoch": 0.7596181901546413, "grad_norm": 496.0, "learning_rate": 1.44032637628515e-05, "loss": 16.1254, "step": 18224 }, { "epoch": 0.7596598724521696, "grad_norm": 63.5, "learning_rate": 1.4398523917582351e-05, "loss": 8.6252, "step": 18225 }, { "epoch": 0.7597015547496978, "grad_norm": 728.0, "learning_rate": 1.4393784721153264e-05, "loss": 18.2591, "step": 18226 }, { "epoch": 0.759743237047226, "grad_norm": 201.0, "learning_rate": 1.438904617365061e-05, "loss": 10.8132, "step": 18227 }, { "epoch": 0.7597849193447542, "grad_norm": 1144.0, "learning_rate": 1.4384308275160751e-05, "loss": 25.5003, "step": 18228 }, { "epoch": 0.7598266016422826, "grad_norm": 768.0, "learning_rate": 1.4379571025770038e-05, "loss": 22.0003, "step": 18229 }, { "epoch": 0.7598682839398108, "grad_norm": 234.0, "learning_rate": 1.4374834425564804e-05, "loss": 11.3127, "step": 18230 }, { "epoch": 0.759909966237339, "grad_norm": 292.0, "learning_rate": 1.4370098474631378e-05, "loss": 11.5632, "step": 18231 }, { "epoch": 0.7599516485348673, "grad_norm": 294.0, "learning_rate": 1.4365363173056068e-05, "loss": 13.9382, "step": 18232 }, { "epoch": 0.7599933308323955, "grad_norm": 468.0, "learning_rate": 1.4360628520925174e-05, "loss": 16.2509, "step": 18233 }, { "epoch": 0.7600350131299237, "grad_norm": 366.0, "learning_rate": 1.4355894518324991e-05, "loss": 13.2504, "step": 18234 }, { "epoch": 0.7600766954274519, "grad_norm": 318.0, "learning_rate": 1.4351161165341798e-05, "loss": 13.3142, "step": 18235 }, { "epoch": 0.7601183777249803, "grad_norm": 688.0, "learning_rate": 1.434642846206185e-05, "loss": 21.5031, "step": 18236 }, { "epoch": 0.7601600600225085, "grad_norm": 688.0, "learning_rate": 1.4341696408571415e-05, "loss": 20.1251, "step": 18237 }, { "epoch": 0.7602017423200367, "grad_norm": 380.0, "learning_rate": 1.4336965004956726e-05, "loss": 14.8753, "step": 18238 }, { "epoch": 0.7602434246175649, "grad_norm": 57.0, "learning_rate": 1.4332234251304016e-05, "loss": 7.7503, "step": 18239 }, { "epoch": 0.7602851069150932, "grad_norm": 298.0, "learning_rate": 1.4327504147699506e-05, "loss": 12.2504, "step": 18240 }, { "epoch": 0.7603267892126214, "grad_norm": 370.0, "learning_rate": 1.4322774694229396e-05, "loss": 15.0629, "step": 18241 }, { "epoch": 0.7603684715101496, "grad_norm": 696.0, "learning_rate": 1.431804589097987e-05, "loss": 25.0004, "step": 18242 }, { "epoch": 0.7604101538076778, "grad_norm": 53.75, "learning_rate": 1.4313317738037158e-05, "loss": 7.8753, "step": 18243 }, { "epoch": 0.7604518361052062, "grad_norm": 520.0, "learning_rate": 1.4308590235487363e-05, "loss": 17.8753, "step": 18244 }, { "epoch": 0.7604935184027344, "grad_norm": 196.0, "learning_rate": 1.4303863383416715e-05, "loss": 11.0628, "step": 18245 }, { "epoch": 0.7605352007002626, "grad_norm": 338.0, "learning_rate": 1.4299137181911292e-05, "loss": 14.3752, "step": 18246 }, { "epoch": 0.7605768829977908, "grad_norm": 258.0, "learning_rate": 1.4294411631057291e-05, "loss": 12.3757, "step": 18247 }, { "epoch": 0.7606185652953191, "grad_norm": 692.0, "learning_rate": 1.4289686730940783e-05, "loss": 21.3751, "step": 18248 }, { "epoch": 0.7606602475928473, "grad_norm": 145.0, "learning_rate": 1.4284962481647934e-05, "loss": 9.8132, "step": 18249 }, { "epoch": 0.7607019298903755, "grad_norm": 502.0, "learning_rate": 1.428023888326478e-05, "loss": 17.1253, "step": 18250 }, { "epoch": 0.7607436121879038, "grad_norm": 57.5, "learning_rate": 1.4275515935877459e-05, "loss": 8.1878, "step": 18251 }, { "epoch": 0.7607852944854321, "grad_norm": 560.0, "learning_rate": 1.4270793639572034e-05, "loss": 17.8755, "step": 18252 }, { "epoch": 0.7608269767829603, "grad_norm": 340.0, "learning_rate": 1.4266071994434566e-05, "loss": 15.0002, "step": 18253 }, { "epoch": 0.7608686590804885, "grad_norm": 234.0, "learning_rate": 1.426135100055111e-05, "loss": 8.9378, "step": 18254 }, { "epoch": 0.7609103413780167, "grad_norm": 239.0, "learning_rate": 1.4256630658007703e-05, "loss": 12.7502, "step": 18255 }, { "epoch": 0.760952023675545, "grad_norm": 220.0, "learning_rate": 1.4251910966890375e-05, "loss": 11.6879, "step": 18256 }, { "epoch": 0.7609937059730733, "grad_norm": 143.0, "learning_rate": 1.4247191927285142e-05, "loss": 9.3752, "step": 18257 }, { "epoch": 0.7610353882706015, "grad_norm": 93.5, "learning_rate": 1.4242473539278013e-05, "loss": 8.7503, "step": 18258 }, { "epoch": 0.7610770705681297, "grad_norm": 376.0, "learning_rate": 1.4237755802954972e-05, "loss": 13.2504, "step": 18259 }, { "epoch": 0.761118752865658, "grad_norm": 350.0, "learning_rate": 1.4233038718402009e-05, "loss": 13.6252, "step": 18260 }, { "epoch": 0.7611604351631862, "grad_norm": 196.0, "learning_rate": 1.4228322285705093e-05, "loss": 10.8127, "step": 18261 }, { "epoch": 0.7612021174607144, "grad_norm": 640.0, "learning_rate": 1.4223606504950177e-05, "loss": 21.0001, "step": 18262 }, { "epoch": 0.7612437997582426, "grad_norm": 306.0, "learning_rate": 1.421889137622321e-05, "loss": 12.2503, "step": 18263 }, { "epoch": 0.761285482055771, "grad_norm": 544.0, "learning_rate": 1.4214176899610122e-05, "loss": 17.1251, "step": 18264 }, { "epoch": 0.7613271643532992, "grad_norm": 210.0, "learning_rate": 1.4209463075196838e-05, "loss": 11.0005, "step": 18265 }, { "epoch": 0.7613688466508274, "grad_norm": 736.0, "learning_rate": 1.4204749903069253e-05, "loss": 20.5004, "step": 18266 }, { "epoch": 0.7614105289483556, "grad_norm": 492.0, "learning_rate": 1.4200037383313308e-05, "loss": 16.8752, "step": 18267 }, { "epoch": 0.7614522112458839, "grad_norm": 288.0, "learning_rate": 1.4195325516014829e-05, "loss": 12.0006, "step": 18268 }, { "epoch": 0.7614938935434121, "grad_norm": 231.0, "learning_rate": 1.4190614301259747e-05, "loss": 12.7502, "step": 18269 }, { "epoch": 0.7615355758409403, "grad_norm": 328.0, "learning_rate": 1.4185903739133871e-05, "loss": 13.8755, "step": 18270 }, { "epoch": 0.7615772581384685, "grad_norm": 316.0, "learning_rate": 1.418119382972311e-05, "loss": 15.1885, "step": 18271 }, { "epoch": 0.7616189404359969, "grad_norm": 211.0, "learning_rate": 1.4176484573113241e-05, "loss": 11.1876, "step": 18272 }, { "epoch": 0.7616606227335251, "grad_norm": 600.0, "learning_rate": 1.4171775969390155e-05, "loss": 20.3796, "step": 18273 }, { "epoch": 0.7617023050310533, "grad_norm": 454.0, "learning_rate": 1.4167068018639595e-05, "loss": 15.9377, "step": 18274 }, { "epoch": 0.7617439873285815, "grad_norm": 462.0, "learning_rate": 1.4162360720947437e-05, "loss": 14.9385, "step": 18275 }, { "epoch": 0.7617856696261098, "grad_norm": 217.0, "learning_rate": 1.4157654076399401e-05, "loss": 12.0006, "step": 18276 }, { "epoch": 0.761827351923638, "grad_norm": 256.0, "learning_rate": 1.415294808508132e-05, "loss": 10.815, "step": 18277 }, { "epoch": 0.7618690342211663, "grad_norm": 262.0, "learning_rate": 1.4148242747078944e-05, "loss": 11.1259, "step": 18278 }, { "epoch": 0.7619107165186945, "grad_norm": 548.0, "learning_rate": 1.4143538062478023e-05, "loss": 18.3752, "step": 18279 }, { "epoch": 0.7619523988162228, "grad_norm": 312.0, "learning_rate": 1.4138834031364306e-05, "loss": 14.063, "step": 18280 }, { "epoch": 0.761994081113751, "grad_norm": 298.0, "learning_rate": 1.4134130653823519e-05, "loss": 13.3141, "step": 18281 }, { "epoch": 0.7620357634112792, "grad_norm": 480.0, "learning_rate": 1.4129427929941385e-05, "loss": 17.7502, "step": 18282 }, { "epoch": 0.7620774457088074, "grad_norm": 346.0, "learning_rate": 1.4124725859803618e-05, "loss": 13.8755, "step": 18283 }, { "epoch": 0.7621191280063357, "grad_norm": 688.0, "learning_rate": 1.41200244434959e-05, "loss": 19.501, "step": 18284 }, { "epoch": 0.762160810303864, "grad_norm": 224.0, "learning_rate": 1.4115323681103926e-05, "loss": 12.7503, "step": 18285 }, { "epoch": 0.7622024926013922, "grad_norm": 268.0, "learning_rate": 1.4110623572713361e-05, "loss": 13.2504, "step": 18286 }, { "epoch": 0.7622441748989204, "grad_norm": 488.0, "learning_rate": 1.4105924118409864e-05, "loss": 16.3775, "step": 18287 }, { "epoch": 0.7622858571964487, "grad_norm": 157.0, "learning_rate": 1.4101225318279093e-05, "loss": 9.7503, "step": 18288 }, { "epoch": 0.7623275394939769, "grad_norm": 136.0, "learning_rate": 1.4096527172406675e-05, "loss": 9.9377, "step": 18289 }, { "epoch": 0.7623692217915051, "grad_norm": 338.0, "learning_rate": 1.4091829680878233e-05, "loss": 14.5003, "step": 18290 }, { "epoch": 0.7624109040890333, "grad_norm": 177.0, "learning_rate": 1.4087132843779389e-05, "loss": 11.1252, "step": 18291 }, { "epoch": 0.7624525863865617, "grad_norm": 416.0, "learning_rate": 1.4082436661195719e-05, "loss": 18.3764, "step": 18292 }, { "epoch": 0.7624942686840899, "grad_norm": 290.0, "learning_rate": 1.4077741133212858e-05, "loss": 11.5627, "step": 18293 }, { "epoch": 0.7625359509816181, "grad_norm": 346.0, "learning_rate": 1.4073046259916322e-05, "loss": 14.0007, "step": 18294 }, { "epoch": 0.7625776332791463, "grad_norm": 221.0, "learning_rate": 1.4068352041391742e-05, "loss": 12.1259, "step": 18295 }, { "epoch": 0.7626193155766746, "grad_norm": 398.0, "learning_rate": 1.4063658477724606e-05, "loss": 14.8752, "step": 18296 }, { "epoch": 0.7626609978742028, "grad_norm": 101.0, "learning_rate": 1.405896556900051e-05, "loss": 10.1253, "step": 18297 }, { "epoch": 0.762702680171731, "grad_norm": 636.0, "learning_rate": 1.4054273315304928e-05, "loss": 22.8752, "step": 18298 }, { "epoch": 0.7627443624692593, "grad_norm": 188.0, "learning_rate": 1.4049581716723437e-05, "loss": 11.0627, "step": 18299 }, { "epoch": 0.7627860447667876, "grad_norm": 308.0, "learning_rate": 1.4044890773341485e-05, "loss": 11.8127, "step": 18300 }, { "epoch": 0.7628277270643158, "grad_norm": 496.0, "learning_rate": 1.4040200485244598e-05, "loss": 16.6252, "step": 18301 }, { "epoch": 0.762869409361844, "grad_norm": 292.0, "learning_rate": 1.4035510852518252e-05, "loss": 12.1251, "step": 18302 }, { "epoch": 0.7629110916593722, "grad_norm": 152.0, "learning_rate": 1.4030821875247918e-05, "loss": 10.2502, "step": 18303 }, { "epoch": 0.7629527739569005, "grad_norm": 264.0, "learning_rate": 1.4026133553519044e-05, "loss": 7.6258, "step": 18304 }, { "epoch": 0.7629944562544287, "grad_norm": 490.0, "learning_rate": 1.4021445887417078e-05, "loss": 15.8776, "step": 18305 }, { "epoch": 0.763036138551957, "grad_norm": 428.0, "learning_rate": 1.4016758877027457e-05, "loss": 15.9379, "step": 18306 }, { "epoch": 0.7630778208494853, "grad_norm": 592.0, "learning_rate": 1.4012072522435598e-05, "loss": 18.001, "step": 18307 }, { "epoch": 0.7631195031470135, "grad_norm": 382.0, "learning_rate": 1.4007386823726914e-05, "loss": 14.9378, "step": 18308 }, { "epoch": 0.7631611854445417, "grad_norm": 119.5, "learning_rate": 1.4002701780986798e-05, "loss": 6.6902, "step": 18309 }, { "epoch": 0.7632028677420699, "grad_norm": 498.0, "learning_rate": 1.3998017394300644e-05, "loss": 18.5011, "step": 18310 }, { "epoch": 0.7632445500395982, "grad_norm": 101.5, "learning_rate": 1.399333366375381e-05, "loss": 9.501, "step": 18311 }, { "epoch": 0.7632862323371264, "grad_norm": 196.0, "learning_rate": 1.3988650589431673e-05, "loss": 10.4381, "step": 18312 }, { "epoch": 0.7633279146346547, "grad_norm": 322.0, "learning_rate": 1.3983968171419576e-05, "loss": 13.1879, "step": 18313 }, { "epoch": 0.7633695969321829, "grad_norm": 282.0, "learning_rate": 1.397928640980285e-05, "loss": 12.1252, "step": 18314 }, { "epoch": 0.7634112792297112, "grad_norm": 217.0, "learning_rate": 1.3974605304666833e-05, "loss": 10.4376, "step": 18315 }, { "epoch": 0.7634529615272394, "grad_norm": 68.5, "learning_rate": 1.396992485609681e-05, "loss": 8.7505, "step": 18316 }, { "epoch": 0.7634946438247676, "grad_norm": 1400.0, "learning_rate": 1.3965245064178146e-05, "loss": 29.5042, "step": 18317 }, { "epoch": 0.7635363261222958, "grad_norm": 516.0, "learning_rate": 1.396056592899605e-05, "loss": 16.6256, "step": 18318 }, { "epoch": 0.7635780084198242, "grad_norm": 302.0, "learning_rate": 1.3955887450635874e-05, "loss": 14.8753, "step": 18319 }, { "epoch": 0.7636196907173524, "grad_norm": 135.0, "learning_rate": 1.3951209629182815e-05, "loss": 9.7507, "step": 18320 }, { "epoch": 0.7636613730148806, "grad_norm": 241.0, "learning_rate": 1.3946532464722196e-05, "loss": 8.1877, "step": 18321 }, { "epoch": 0.7637030553124088, "grad_norm": 254.0, "learning_rate": 1.394185595733919e-05, "loss": 12.313, "step": 18322 }, { "epoch": 0.7637447376099371, "grad_norm": 752.0, "learning_rate": 1.3937180107119091e-05, "loss": 22.7502, "step": 18323 }, { "epoch": 0.7637864199074653, "grad_norm": 656.0, "learning_rate": 1.393250491414706e-05, "loss": 19.2503, "step": 18324 }, { "epoch": 0.7638281022049935, "grad_norm": 193.0, "learning_rate": 1.3927830378508356e-05, "loss": 11.5008, "step": 18325 }, { "epoch": 0.7638697845025217, "grad_norm": 138.0, "learning_rate": 1.392315650028811e-05, "loss": 8.4378, "step": 18326 }, { "epoch": 0.7639114668000501, "grad_norm": 330.0, "learning_rate": 1.391848327957156e-05, "loss": 15.0006, "step": 18327 }, { "epoch": 0.7639531490975783, "grad_norm": 181.0, "learning_rate": 1.3913810716443853e-05, "loss": 10.6881, "step": 18328 }, { "epoch": 0.7639948313951065, "grad_norm": 67.0, "learning_rate": 1.3909138810990151e-05, "loss": 6.1565, "step": 18329 }, { "epoch": 0.7640365136926347, "grad_norm": 340.0, "learning_rate": 1.3904467563295597e-05, "loss": 13.7501, "step": 18330 }, { "epoch": 0.764078195990163, "grad_norm": 222.0, "learning_rate": 1.3899796973445328e-05, "loss": 12.1259, "step": 18331 }, { "epoch": 0.7641198782876912, "grad_norm": 660.0, "learning_rate": 1.3895127041524458e-05, "loss": 20.2503, "step": 18332 }, { "epoch": 0.7641615605852194, "grad_norm": 556.0, "learning_rate": 1.3890457767618109e-05, "loss": 17.5002, "step": 18333 }, { "epoch": 0.7642032428827477, "grad_norm": 217.0, "learning_rate": 1.3885789151811369e-05, "loss": 12.8756, "step": 18334 }, { "epoch": 0.764244925180276, "grad_norm": 544.0, "learning_rate": 1.3881121194189328e-05, "loss": 19.2503, "step": 18335 }, { "epoch": 0.7642866074778042, "grad_norm": 828.0, "learning_rate": 1.3876453894837066e-05, "loss": 23.1252, "step": 18336 }, { "epoch": 0.7643282897753324, "grad_norm": 388.0, "learning_rate": 1.387178725383963e-05, "loss": 16.5027, "step": 18337 }, { "epoch": 0.7643699720728606, "grad_norm": 354.0, "learning_rate": 1.3867121271282085e-05, "loss": 14.5006, "step": 18338 }, { "epoch": 0.7644116543703889, "grad_norm": 788.0, "learning_rate": 1.3862455947249459e-05, "loss": 19.2537, "step": 18339 }, { "epoch": 0.7644533366679171, "grad_norm": 127.5, "learning_rate": 1.3857791281826783e-05, "loss": 9.1881, "step": 18340 }, { "epoch": 0.7644950189654454, "grad_norm": 146.0, "learning_rate": 1.3853127275099064e-05, "loss": 10.1252, "step": 18341 }, { "epoch": 0.7645367012629736, "grad_norm": 350.0, "learning_rate": 1.38484639271513e-05, "loss": 13.5628, "step": 18342 }, { "epoch": 0.7645783835605019, "grad_norm": 260.0, "learning_rate": 1.3843801238068516e-05, "loss": 10.0005, "step": 18343 }, { "epoch": 0.7646200658580301, "grad_norm": 132.0, "learning_rate": 1.3839139207935636e-05, "loss": 10.5627, "step": 18344 }, { "epoch": 0.7646617481555583, "grad_norm": 239.0, "learning_rate": 1.383447783683769e-05, "loss": 12.8143, "step": 18345 }, { "epoch": 0.7647034304530865, "grad_norm": 362.0, "learning_rate": 1.3829817124859557e-05, "loss": 14.5631, "step": 18346 }, { "epoch": 0.7647451127506149, "grad_norm": 119.5, "learning_rate": 1.3825157072086253e-05, "loss": 9.8753, "step": 18347 }, { "epoch": 0.7647867950481431, "grad_norm": 209.0, "learning_rate": 1.3820497678602639e-05, "loss": 9.6877, "step": 18348 }, { "epoch": 0.7648284773456713, "grad_norm": 188.0, "learning_rate": 1.3815838944493698e-05, "loss": 10.2507, "step": 18349 }, { "epoch": 0.7648701596431995, "grad_norm": 195.0, "learning_rate": 1.3811180869844276e-05, "loss": 11.0006, "step": 18350 }, { "epoch": 0.7649118419407278, "grad_norm": 236.0, "learning_rate": 1.3806523454739311e-05, "loss": 11.0627, "step": 18351 }, { "epoch": 0.764953524238256, "grad_norm": 964.0, "learning_rate": 1.3801866699263666e-05, "loss": 26.0002, "step": 18352 }, { "epoch": 0.7649952065357842, "grad_norm": 498.0, "learning_rate": 1.3797210603502215e-05, "loss": 17.5009, "step": 18353 }, { "epoch": 0.7650368888333124, "grad_norm": 204.0, "learning_rate": 1.3792555167539816e-05, "loss": 11.1253, "step": 18354 }, { "epoch": 0.7650785711308408, "grad_norm": 252.0, "learning_rate": 1.3787900391461312e-05, "loss": 12.1882, "step": 18355 }, { "epoch": 0.765120253428369, "grad_norm": 270.0, "learning_rate": 1.3783246275351535e-05, "loss": 13.6253, "step": 18356 }, { "epoch": 0.7651619357258972, "grad_norm": 147.0, "learning_rate": 1.3778592819295315e-05, "loss": 9.9379, "step": 18357 }, { "epoch": 0.7652036180234254, "grad_norm": 245.0, "learning_rate": 1.3773940023377452e-05, "loss": 12.1876, "step": 18358 }, { "epoch": 0.7652453003209537, "grad_norm": 172.0, "learning_rate": 1.3769287887682753e-05, "loss": 11.7511, "step": 18359 }, { "epoch": 0.7652869826184819, "grad_norm": 181.0, "learning_rate": 1.3764636412295995e-05, "loss": 10.9376, "step": 18360 }, { "epoch": 0.7653286649160101, "grad_norm": 408.0, "learning_rate": 1.3759985597301955e-05, "loss": 15.3754, "step": 18361 }, { "epoch": 0.7653703472135384, "grad_norm": 298.0, "learning_rate": 1.3755335442785393e-05, "loss": 13.2507, "step": 18362 }, { "epoch": 0.7654120295110667, "grad_norm": 237.0, "learning_rate": 1.3750685948831065e-05, "loss": 11.9377, "step": 18363 }, { "epoch": 0.7654537118085949, "grad_norm": 272.0, "learning_rate": 1.3746037115523696e-05, "loss": 12.938, "step": 18364 }, { "epoch": 0.7654953941061231, "grad_norm": 378.0, "learning_rate": 1.3741388942948025e-05, "loss": 14.3127, "step": 18365 }, { "epoch": 0.7655370764036513, "grad_norm": 616.0, "learning_rate": 1.3736741431188743e-05, "loss": 18.8769, "step": 18366 }, { "epoch": 0.7655787587011796, "grad_norm": 422.0, "learning_rate": 1.37320945803306e-05, "loss": 14.7502, "step": 18367 }, { "epoch": 0.7656204409987079, "grad_norm": 378.0, "learning_rate": 1.3727448390458219e-05, "loss": 16.2503, "step": 18368 }, { "epoch": 0.7656621232962361, "grad_norm": 218.0, "learning_rate": 1.3722802861656342e-05, "loss": 10.938, "step": 18369 }, { "epoch": 0.7657038055937643, "grad_norm": 306.0, "learning_rate": 1.3718157994009573e-05, "loss": 13.4377, "step": 18370 }, { "epoch": 0.7657454878912926, "grad_norm": 286.0, "learning_rate": 1.3713513787602628e-05, "loss": 13.6878, "step": 18371 }, { "epoch": 0.7657871701888208, "grad_norm": 472.0, "learning_rate": 1.3708870242520083e-05, "loss": 15.0007, "step": 18372 }, { "epoch": 0.765828852486349, "grad_norm": 282.0, "learning_rate": 1.3704227358846634e-05, "loss": 10.2502, "step": 18373 }, { "epoch": 0.7658705347838772, "grad_norm": 242.0, "learning_rate": 1.3699585136666825e-05, "loss": 13.1876, "step": 18374 }, { "epoch": 0.7659122170814056, "grad_norm": 322.0, "learning_rate": 1.3694943576065338e-05, "loss": 14.1254, "step": 18375 }, { "epoch": 0.7659538993789338, "grad_norm": 229.0, "learning_rate": 1.3690302677126694e-05, "loss": 11.3128, "step": 18376 }, { "epoch": 0.765995581676462, "grad_norm": 131.0, "learning_rate": 1.368566243993552e-05, "loss": 9.5629, "step": 18377 }, { "epoch": 0.7660372639739903, "grad_norm": 286.0, "learning_rate": 1.368102286457637e-05, "loss": 13.1251, "step": 18378 }, { "epoch": 0.7660789462715185, "grad_norm": 444.0, "learning_rate": 1.3676383951133808e-05, "loss": 16.5002, "step": 18379 }, { "epoch": 0.7661206285690467, "grad_norm": 376.0, "learning_rate": 1.3671745699692367e-05, "loss": 15.4378, "step": 18380 }, { "epoch": 0.7661623108665749, "grad_norm": 235.0, "learning_rate": 1.366710811033659e-05, "loss": 10.5627, "step": 18381 }, { "epoch": 0.7662039931641033, "grad_norm": 592.0, "learning_rate": 1.3662471183150994e-05, "loss": 18.8753, "step": 18382 }, { "epoch": 0.7662456754616315, "grad_norm": 442.0, "learning_rate": 1.3657834918220086e-05, "loss": 14.1876, "step": 18383 }, { "epoch": 0.7662873577591597, "grad_norm": 356.0, "learning_rate": 1.3653199315628361e-05, "loss": 13.3131, "step": 18384 }, { "epoch": 0.7663290400566879, "grad_norm": 157.0, "learning_rate": 1.3648564375460305e-05, "loss": 8.3758, "step": 18385 }, { "epoch": 0.7663707223542162, "grad_norm": 1032.0, "learning_rate": 1.364393009780039e-05, "loss": 24.7522, "step": 18386 }, { "epoch": 0.7664124046517444, "grad_norm": 214.0, "learning_rate": 1.3639296482733078e-05, "loss": 11.6886, "step": 18387 }, { "epoch": 0.7664540869492726, "grad_norm": 432.0, "learning_rate": 1.3634663530342811e-05, "loss": 15.5002, "step": 18388 }, { "epoch": 0.7664957692468009, "grad_norm": 175.0, "learning_rate": 1.3630031240714036e-05, "loss": 11.251, "step": 18389 }, { "epoch": 0.7665374515443292, "grad_norm": 366.0, "learning_rate": 1.3625399613931166e-05, "loss": 15.6256, "step": 18390 }, { "epoch": 0.7665791338418574, "grad_norm": 424.0, "learning_rate": 1.362076865007862e-05, "loss": 15.7521, "step": 18391 }, { "epoch": 0.7666208161393856, "grad_norm": 1256.0, "learning_rate": 1.3616138349240775e-05, "loss": 25.005, "step": 18392 }, { "epoch": 0.7666624984369138, "grad_norm": 103.0, "learning_rate": 1.3611508711502074e-05, "loss": 9.6877, "step": 18393 }, { "epoch": 0.7667041807344421, "grad_norm": 478.0, "learning_rate": 1.3606879736946826e-05, "loss": 17.3754, "step": 18394 }, { "epoch": 0.7667458630319703, "grad_norm": 1608.0, "learning_rate": 1.3602251425659451e-05, "loss": 41.2502, "step": 18395 }, { "epoch": 0.7667875453294986, "grad_norm": 544.0, "learning_rate": 1.359762377772425e-05, "loss": 18.3751, "step": 18396 }, { "epoch": 0.7668292276270268, "grad_norm": 564.0, "learning_rate": 1.3592996793225616e-05, "loss": 21.1252, "step": 18397 }, { "epoch": 0.7668709099245551, "grad_norm": 172.0, "learning_rate": 1.3588370472247814e-05, "loss": 11.8128, "step": 18398 }, { "epoch": 0.7669125922220833, "grad_norm": 148.0, "learning_rate": 1.3583744814875227e-05, "loss": 9.5626, "step": 18399 }, { "epoch": 0.7669542745196115, "grad_norm": 424.0, "learning_rate": 1.3579119821192093e-05, "loss": 14.4377, "step": 18400 }, { "epoch": 0.7669959568171397, "grad_norm": 556.0, "learning_rate": 1.3574495491282763e-05, "loss": 19.7502, "step": 18401 }, { "epoch": 0.767037639114668, "grad_norm": 528.0, "learning_rate": 1.356987182523145e-05, "loss": 17.7501, "step": 18402 }, { "epoch": 0.7670793214121963, "grad_norm": 308.0, "learning_rate": 1.356524882312248e-05, "loss": 14.688, "step": 18403 }, { "epoch": 0.7671210037097245, "grad_norm": 420.0, "learning_rate": 1.3560626485040085e-05, "loss": 15.7506, "step": 18404 }, { "epoch": 0.7671626860072527, "grad_norm": 482.0, "learning_rate": 1.35560048110685e-05, "loss": 18.2513, "step": 18405 }, { "epoch": 0.767204368304781, "grad_norm": 125.5, "learning_rate": 1.3551383801291968e-05, "loss": 9.9382, "step": 18406 }, { "epoch": 0.7672460506023092, "grad_norm": 179.0, "learning_rate": 1.3546763455794703e-05, "loss": 11.0003, "step": 18407 }, { "epoch": 0.7672877328998374, "grad_norm": 508.0, "learning_rate": 1.3542143774660903e-05, "loss": 17.8758, "step": 18408 }, { "epoch": 0.7673294151973656, "grad_norm": 280.0, "learning_rate": 1.353752475797478e-05, "loss": 13.3128, "step": 18409 }, { "epoch": 0.767371097494894, "grad_norm": 824.0, "learning_rate": 1.3532906405820495e-05, "loss": 22.1257, "step": 18410 }, { "epoch": 0.7674127797924222, "grad_norm": 282.0, "learning_rate": 1.3528288718282234e-05, "loss": 13.5629, "step": 18411 }, { "epoch": 0.7674544620899504, "grad_norm": 458.0, "learning_rate": 1.3523671695444146e-05, "loss": 15.6877, "step": 18412 }, { "epoch": 0.7674961443874786, "grad_norm": 163.0, "learning_rate": 1.351905533739038e-05, "loss": 5.2193, "step": 18413 }, { "epoch": 0.7675378266850069, "grad_norm": 784.0, "learning_rate": 1.3514439644205067e-05, "loss": 23.5002, "step": 18414 }, { "epoch": 0.7675795089825351, "grad_norm": 276.0, "learning_rate": 1.3509824615972334e-05, "loss": 10.8133, "step": 18415 }, { "epoch": 0.7676211912800633, "grad_norm": 219.0, "learning_rate": 1.3505210252776285e-05, "loss": 11.8127, "step": 18416 }, { "epoch": 0.7676628735775916, "grad_norm": 458.0, "learning_rate": 1.3500596554701018e-05, "loss": 16.2502, "step": 18417 }, { "epoch": 0.7677045558751199, "grad_norm": 312.0, "learning_rate": 1.3495983521830608e-05, "loss": 13.7502, "step": 18418 }, { "epoch": 0.7677462381726481, "grad_norm": 480.0, "learning_rate": 1.3491371154249167e-05, "loss": 18.2502, "step": 18419 }, { "epoch": 0.7677879204701763, "grad_norm": 290.0, "learning_rate": 1.3486759452040693e-05, "loss": 12.5628, "step": 18420 }, { "epoch": 0.7678296027677045, "grad_norm": 181.0, "learning_rate": 1.3482148415289302e-05, "loss": 10.0629, "step": 18421 }, { "epoch": 0.7678712850652328, "grad_norm": 402.0, "learning_rate": 1.347753804407897e-05, "loss": 15.7503, "step": 18422 }, { "epoch": 0.767912967362761, "grad_norm": 600.0, "learning_rate": 1.3472928338493779e-05, "loss": 19.3751, "step": 18423 }, { "epoch": 0.7679546496602893, "grad_norm": 704.0, "learning_rate": 1.3468319298617682e-05, "loss": 19.8771, "step": 18424 }, { "epoch": 0.7679963319578175, "grad_norm": 88.0, "learning_rate": 1.3463710924534734e-05, "loss": 9.3754, "step": 18425 }, { "epoch": 0.7680380142553458, "grad_norm": 446.0, "learning_rate": 1.3459103216328872e-05, "loss": 16.3754, "step": 18426 }, { "epoch": 0.768079696552874, "grad_norm": 482.0, "learning_rate": 1.3454496174084114e-05, "loss": 16.5009, "step": 18427 }, { "epoch": 0.7681213788504022, "grad_norm": 612.0, "learning_rate": 1.3449889797884407e-05, "loss": 17.6254, "step": 18428 }, { "epoch": 0.7681630611479304, "grad_norm": 348.0, "learning_rate": 1.3445284087813702e-05, "loss": 15.0006, "step": 18429 }, { "epoch": 0.7682047434454587, "grad_norm": 472.0, "learning_rate": 1.3440679043955939e-05, "loss": 17.0002, "step": 18430 }, { "epoch": 0.768246425742987, "grad_norm": 420.0, "learning_rate": 1.3436074666395044e-05, "loss": 15.9377, "step": 18431 }, { "epoch": 0.7682881080405152, "grad_norm": 420.0, "learning_rate": 1.3431470955214936e-05, "loss": 16.876, "step": 18432 }, { "epoch": 0.7683297903380434, "grad_norm": 488.0, "learning_rate": 1.3426867910499518e-05, "loss": 14.628, "step": 18433 }, { "epoch": 0.7683714726355717, "grad_norm": 253.0, "learning_rate": 1.3422265532332678e-05, "loss": 12.3752, "step": 18434 }, { "epoch": 0.7684131549330999, "grad_norm": 948.0, "learning_rate": 1.3417663820798293e-05, "loss": 25.6252, "step": 18435 }, { "epoch": 0.7684548372306281, "grad_norm": 640.0, "learning_rate": 1.3413062775980234e-05, "loss": 18.6253, "step": 18436 }, { "epoch": 0.7684965195281563, "grad_norm": 262.0, "learning_rate": 1.3408462397962357e-05, "loss": 13.0628, "step": 18437 }, { "epoch": 0.7685382018256847, "grad_norm": 328.0, "learning_rate": 1.34038626868285e-05, "loss": 13.8753, "step": 18438 }, { "epoch": 0.7685798841232129, "grad_norm": 186.0, "learning_rate": 1.339926364266249e-05, "loss": 12.1879, "step": 18439 }, { "epoch": 0.7686215664207411, "grad_norm": 116.5, "learning_rate": 1.3394665265548151e-05, "loss": 10.063, "step": 18440 }, { "epoch": 0.7686632487182693, "grad_norm": 576.0, "learning_rate": 1.3390067555569292e-05, "loss": 19.1252, "step": 18441 }, { "epoch": 0.7687049310157976, "grad_norm": 233.0, "learning_rate": 1.3385470512809683e-05, "loss": 12.3127, "step": 18442 }, { "epoch": 0.7687466133133258, "grad_norm": 126.0, "learning_rate": 1.338087413735315e-05, "loss": 9.7503, "step": 18443 }, { "epoch": 0.768788295610854, "grad_norm": 450.0, "learning_rate": 1.3376278429283412e-05, "loss": 15.5003, "step": 18444 }, { "epoch": 0.7688299779083823, "grad_norm": 264.0, "learning_rate": 1.3371683388684281e-05, "loss": 12.0013, "step": 18445 }, { "epoch": 0.7688716602059106, "grad_norm": 600.0, "learning_rate": 1.336708901563944e-05, "loss": 19.7504, "step": 18446 }, { "epoch": 0.7689133425034388, "grad_norm": 330.0, "learning_rate": 1.3362495310232687e-05, "loss": 14.7506, "step": 18447 }, { "epoch": 0.768955024800967, "grad_norm": 472.0, "learning_rate": 1.3357902272547674e-05, "loss": 14.0637, "step": 18448 }, { "epoch": 0.7689967070984952, "grad_norm": 268.0, "learning_rate": 1.3353309902668176e-05, "loss": 12.5629, "step": 18449 }, { "epoch": 0.7690383893960235, "grad_norm": 756.0, "learning_rate": 1.3348718200677829e-05, "loss": 21.1252, "step": 18450 }, { "epoch": 0.7690800716935517, "grad_norm": 174.0, "learning_rate": 1.334412716666038e-05, "loss": 11.1254, "step": 18451 }, { "epoch": 0.76912175399108, "grad_norm": 612.0, "learning_rate": 1.3339536800699432e-05, "loss": 18.0014, "step": 18452 }, { "epoch": 0.7691634362886083, "grad_norm": 176.0, "learning_rate": 1.3334947102878698e-05, "loss": 10.3128, "step": 18453 }, { "epoch": 0.7692051185861365, "grad_norm": 404.0, "learning_rate": 1.3330358073281806e-05, "loss": 16.2507, "step": 18454 }, { "epoch": 0.7692468008836647, "grad_norm": 320.0, "learning_rate": 1.3325769711992398e-05, "loss": 11.0017, "step": 18455 }, { "epoch": 0.7692884831811929, "grad_norm": 264.0, "learning_rate": 1.3321182019094092e-05, "loss": 12.0003, "step": 18456 }, { "epoch": 0.7693301654787212, "grad_norm": 572.0, "learning_rate": 1.3316594994670494e-05, "loss": 17.8751, "step": 18457 }, { "epoch": 0.7693718477762495, "grad_norm": 448.0, "learning_rate": 1.3312008638805212e-05, "loss": 16.6252, "step": 18458 }, { "epoch": 0.7694135300737777, "grad_norm": 253.0, "learning_rate": 1.3307422951581833e-05, "loss": 11.8133, "step": 18459 }, { "epoch": 0.7694552123713059, "grad_norm": 268.0, "learning_rate": 1.3302837933083923e-05, "loss": 13.501, "step": 18460 }, { "epoch": 0.7694968946688342, "grad_norm": 185.0, "learning_rate": 1.3298253583395048e-05, "loss": 10.3758, "step": 18461 }, { "epoch": 0.7695385769663624, "grad_norm": 544.0, "learning_rate": 1.3293669902598766e-05, "loss": 18.7504, "step": 18462 }, { "epoch": 0.7695802592638906, "grad_norm": 178.0, "learning_rate": 1.3289086890778607e-05, "loss": 10.3755, "step": 18463 }, { "epoch": 0.7696219415614188, "grad_norm": 150.0, "learning_rate": 1.3284504548018095e-05, "loss": 9.5007, "step": 18464 }, { "epoch": 0.7696636238589472, "grad_norm": 181.0, "learning_rate": 1.3279922874400746e-05, "loss": 11.7506, "step": 18465 }, { "epoch": 0.7697053061564754, "grad_norm": 458.0, "learning_rate": 1.3275341870010066e-05, "loss": 16.5003, "step": 18466 }, { "epoch": 0.7697469884540036, "grad_norm": 450.0, "learning_rate": 1.3270761534929538e-05, "loss": 16.3772, "step": 18467 }, { "epoch": 0.7697886707515318, "grad_norm": 1672.0, "learning_rate": 1.3266181869242627e-05, "loss": 35.7548, "step": 18468 }, { "epoch": 0.7698303530490601, "grad_norm": 680.0, "learning_rate": 1.3261602873032846e-05, "loss": 20.2503, "step": 18469 }, { "epoch": 0.7698720353465883, "grad_norm": 205.0, "learning_rate": 1.3257024546383579e-05, "loss": 11.6876, "step": 18470 }, { "epoch": 0.7699137176441165, "grad_norm": 300.0, "learning_rate": 1.3252446889378335e-05, "loss": 12.1253, "step": 18471 }, { "epoch": 0.7699553999416447, "grad_norm": 330.0, "learning_rate": 1.324786990210048e-05, "loss": 13.5637, "step": 18472 }, { "epoch": 0.7699970822391731, "grad_norm": 296.0, "learning_rate": 1.3243293584633482e-05, "loss": 13.9382, "step": 18473 }, { "epoch": 0.7700387645367013, "grad_norm": 804.0, "learning_rate": 1.3238717937060697e-05, "loss": 19.5046, "step": 18474 }, { "epoch": 0.7700804468342295, "grad_norm": 154.0, "learning_rate": 1.3234142959465567e-05, "loss": 9.5626, "step": 18475 }, { "epoch": 0.7701221291317577, "grad_norm": 274.0, "learning_rate": 1.3229568651931417e-05, "loss": 12.6877, "step": 18476 }, { "epoch": 0.770163811429286, "grad_norm": 1064.0, "learning_rate": 1.3224995014541663e-05, "loss": 27.0005, "step": 18477 }, { "epoch": 0.7702054937268142, "grad_norm": 656.0, "learning_rate": 1.3220422047379633e-05, "loss": 21.2505, "step": 18478 }, { "epoch": 0.7702471760243425, "grad_norm": 192.0, "learning_rate": 1.3215849750528675e-05, "loss": 11.3155, "step": 18479 }, { "epoch": 0.7702888583218707, "grad_norm": 768.0, "learning_rate": 1.3211278124072124e-05, "loss": 19.8752, "step": 18480 }, { "epoch": 0.770330540619399, "grad_norm": 264.0, "learning_rate": 1.3206707168093296e-05, "loss": 12.3756, "step": 18481 }, { "epoch": 0.7703722229169272, "grad_norm": 504.0, "learning_rate": 1.3202136882675498e-05, "loss": 17.1253, "step": 18482 }, { "epoch": 0.7704139052144554, "grad_norm": 296.0, "learning_rate": 1.3197567267902017e-05, "loss": 11.5657, "step": 18483 }, { "epoch": 0.7704555875119836, "grad_norm": 233.0, "learning_rate": 1.3192998323856142e-05, "loss": 13.0673, "step": 18484 }, { "epoch": 0.7704972698095119, "grad_norm": 176.0, "learning_rate": 1.3188430050621136e-05, "loss": 9.5629, "step": 18485 }, { "epoch": 0.7705389521070402, "grad_norm": 412.0, "learning_rate": 1.3183862448280265e-05, "loss": 14.6877, "step": 18486 }, { "epoch": 0.7705806344045684, "grad_norm": 195.0, "learning_rate": 1.3179295516916768e-05, "loss": 12.1877, "step": 18487 }, { "epoch": 0.7706223167020966, "grad_norm": 79.5, "learning_rate": 1.3174729256613877e-05, "loss": 8.8753, "step": 18488 }, { "epoch": 0.7706639989996249, "grad_norm": 162.0, "learning_rate": 1.3170163667454815e-05, "loss": 11.1878, "step": 18489 }, { "epoch": 0.7707056812971531, "grad_norm": 500.0, "learning_rate": 1.3165598749522795e-05, "loss": 16.6256, "step": 18490 }, { "epoch": 0.7707473635946813, "grad_norm": 238.0, "learning_rate": 1.3161034502900999e-05, "loss": 10.8755, "step": 18491 }, { "epoch": 0.7707890458922095, "grad_norm": 340.0, "learning_rate": 1.315647092767261e-05, "loss": 15.0004, "step": 18492 }, { "epoch": 0.7708307281897379, "grad_norm": 410.0, "learning_rate": 1.315190802392084e-05, "loss": 14.9382, "step": 18493 }, { "epoch": 0.7708724104872661, "grad_norm": 496.0, "learning_rate": 1.3147345791728782e-05, "loss": 14.1255, "step": 18494 }, { "epoch": 0.7709140927847943, "grad_norm": 204.0, "learning_rate": 1.3142784231179656e-05, "loss": 12.0004, "step": 18495 }, { "epoch": 0.7709557750823225, "grad_norm": 432.0, "learning_rate": 1.313822334235652e-05, "loss": 17.1253, "step": 18496 }, { "epoch": 0.7709974573798508, "grad_norm": 548.0, "learning_rate": 1.3133663125342572e-05, "loss": 16.2505, "step": 18497 }, { "epoch": 0.771039139677379, "grad_norm": 680.0, "learning_rate": 1.3129103580220853e-05, "loss": 21.0008, "step": 18498 }, { "epoch": 0.7710808219749072, "grad_norm": 125.0, "learning_rate": 1.3124544707074527e-05, "loss": 10.813, "step": 18499 }, { "epoch": 0.7711225042724354, "grad_norm": 512.0, "learning_rate": 1.311998650598662e-05, "loss": 17.251, "step": 18500 }, { "epoch": 0.7711641865699638, "grad_norm": 334.0, "learning_rate": 1.3115428977040256e-05, "loss": 13.1878, "step": 18501 }, { "epoch": 0.771205868867492, "grad_norm": 231.0, "learning_rate": 1.3110872120318445e-05, "loss": 11.9383, "step": 18502 }, { "epoch": 0.7712475511650202, "grad_norm": 230.0, "learning_rate": 1.3106315935904284e-05, "loss": 11.3757, "step": 18503 }, { "epoch": 0.7712892334625484, "grad_norm": 416.0, "learning_rate": 1.3101760423880789e-05, "loss": 16.6255, "step": 18504 }, { "epoch": 0.7713309157600767, "grad_norm": 250.0, "learning_rate": 1.3097205584330985e-05, "loss": 12.2502, "step": 18505 }, { "epoch": 0.7713725980576049, "grad_norm": 648.0, "learning_rate": 1.3092651417337882e-05, "loss": 18.38, "step": 18506 }, { "epoch": 0.7714142803551332, "grad_norm": 242.0, "learning_rate": 1.3088097922984489e-05, "loss": 11.0627, "step": 18507 }, { "epoch": 0.7714559626526614, "grad_norm": 95.5, "learning_rate": 1.3083545101353783e-05, "loss": 8.1252, "step": 18508 }, { "epoch": 0.7714976449501897, "grad_norm": 1320.0, "learning_rate": 1.3078992952528745e-05, "loss": 30.0044, "step": 18509 }, { "epoch": 0.7715393272477179, "grad_norm": 83.5, "learning_rate": 1.3074441476592341e-05, "loss": 8.0002, "step": 18510 }, { "epoch": 0.7715810095452461, "grad_norm": 536.0, "learning_rate": 1.3069890673627516e-05, "loss": 17.7502, "step": 18511 }, { "epoch": 0.7716226918427743, "grad_norm": 478.0, "learning_rate": 1.3065340543717214e-05, "loss": 15.5028, "step": 18512 }, { "epoch": 0.7716643741403026, "grad_norm": 243.0, "learning_rate": 1.3060791086944357e-05, "loss": 13.5631, "step": 18513 }, { "epoch": 0.7717060564378309, "grad_norm": 143.0, "learning_rate": 1.3056242303391864e-05, "loss": 9.9378, "step": 18514 }, { "epoch": 0.7717477387353591, "grad_norm": 1176.0, "learning_rate": 1.3051694193142632e-05, "loss": 24.1297, "step": 18515 }, { "epoch": 0.7717894210328873, "grad_norm": 46.75, "learning_rate": 1.3047146756279555e-05, "loss": 7.5635, "step": 18516 }, { "epoch": 0.7718311033304156, "grad_norm": 604.0, "learning_rate": 1.3042599992885512e-05, "loss": 18.6252, "step": 18517 }, { "epoch": 0.7718727856279438, "grad_norm": 103.0, "learning_rate": 1.3038053903043346e-05, "loss": 8.1878, "step": 18518 }, { "epoch": 0.771914467925472, "grad_norm": 1064.0, "learning_rate": 1.303350848683596e-05, "loss": 22.8789, "step": 18519 }, { "epoch": 0.7719561502230002, "grad_norm": 314.0, "learning_rate": 1.3028963744346134e-05, "loss": 13.7506, "step": 18520 }, { "epoch": 0.7719978325205286, "grad_norm": 196.0, "learning_rate": 1.3024419675656751e-05, "loss": 10.5005, "step": 18521 }, { "epoch": 0.7720395148180568, "grad_norm": 248.0, "learning_rate": 1.3019876280850579e-05, "loss": 12.4378, "step": 18522 }, { "epoch": 0.772081197115585, "grad_norm": 536.0, "learning_rate": 1.3015333560010474e-05, "loss": 18.7502, "step": 18523 }, { "epoch": 0.7721228794131133, "grad_norm": 1472.0, "learning_rate": 1.3010791513219161e-05, "loss": 35.5002, "step": 18524 }, { "epoch": 0.7721645617106415, "grad_norm": 88.0, "learning_rate": 1.3006250140559494e-05, "loss": 9.1877, "step": 18525 }, { "epoch": 0.7722062440081697, "grad_norm": 1240.0, "learning_rate": 1.3001709442114169e-05, "loss": 27.7542, "step": 18526 }, { "epoch": 0.7722479263056979, "grad_norm": 440.0, "learning_rate": 1.2997169417966004e-05, "loss": 16.5006, "step": 18527 }, { "epoch": 0.7722896086032263, "grad_norm": 139.0, "learning_rate": 1.299263006819768e-05, "loss": 10.1252, "step": 18528 }, { "epoch": 0.7723312909007545, "grad_norm": 532.0, "learning_rate": 1.2988091392891976e-05, "loss": 18.3756, "step": 18529 }, { "epoch": 0.7723729731982827, "grad_norm": 436.0, "learning_rate": 1.2983553392131587e-05, "loss": 16.6253, "step": 18530 }, { "epoch": 0.7724146554958109, "grad_norm": 326.0, "learning_rate": 1.2979016065999228e-05, "loss": 14.2502, "step": 18531 }, { "epoch": 0.7724563377933392, "grad_norm": 334.0, "learning_rate": 1.297447941457759e-05, "loss": 13.688, "step": 18532 }, { "epoch": 0.7724980200908674, "grad_norm": 692.0, "learning_rate": 1.2969943437949344e-05, "loss": 21.0004, "step": 18533 }, { "epoch": 0.7725397023883956, "grad_norm": 125.5, "learning_rate": 1.2965408136197172e-05, "loss": 10.3752, "step": 18534 }, { "epoch": 0.7725813846859239, "grad_norm": 240.0, "learning_rate": 1.296087350940372e-05, "loss": 9.3129, "step": 18535 }, { "epoch": 0.7726230669834522, "grad_norm": 140.0, "learning_rate": 1.2956339557651642e-05, "loss": 8.813, "step": 18536 }, { "epoch": 0.7726647492809804, "grad_norm": 286.0, "learning_rate": 1.295180628102356e-05, "loss": 12.8752, "step": 18537 }, { "epoch": 0.7727064315785086, "grad_norm": 356.0, "learning_rate": 1.2947273679602096e-05, "loss": 15.0629, "step": 18538 }, { "epoch": 0.7727481138760368, "grad_norm": 146.0, "learning_rate": 1.2942741753469862e-05, "loss": 8.6877, "step": 18539 }, { "epoch": 0.7727897961735651, "grad_norm": 386.0, "learning_rate": 1.2938210502709446e-05, "loss": 14.6881, "step": 18540 }, { "epoch": 0.7728314784710933, "grad_norm": 1064.0, "learning_rate": 1.2933679927403435e-05, "loss": 27.1255, "step": 18541 }, { "epoch": 0.7728731607686216, "grad_norm": 69.5, "learning_rate": 1.29291500276344e-05, "loss": 8.6878, "step": 18542 }, { "epoch": 0.7729148430661498, "grad_norm": 532.0, "learning_rate": 1.2924620803484894e-05, "loss": 18.0007, "step": 18543 }, { "epoch": 0.7729565253636781, "grad_norm": 940.0, "learning_rate": 1.2920092255037453e-05, "loss": 26.2502, "step": 18544 }, { "epoch": 0.7729982076612063, "grad_norm": 1064.0, "learning_rate": 1.291556438237465e-05, "loss": 29.1252, "step": 18545 }, { "epoch": 0.7730398899587345, "grad_norm": 215.0, "learning_rate": 1.291103718557895e-05, "loss": 11.3127, "step": 18546 }, { "epoch": 0.7730815722562627, "grad_norm": 132.0, "learning_rate": 1.2906510664732919e-05, "loss": 7.9379, "step": 18547 }, { "epoch": 0.773123254553791, "grad_norm": 528.0, "learning_rate": 1.2901984819918995e-05, "loss": 19.1252, "step": 18548 }, { "epoch": 0.7731649368513193, "grad_norm": 414.0, "learning_rate": 1.2897459651219717e-05, "loss": 14.8127, "step": 18549 }, { "epoch": 0.7732066191488475, "grad_norm": 111.5, "learning_rate": 1.28929351587175e-05, "loss": 6.9068, "step": 18550 }, { "epoch": 0.7732483014463757, "grad_norm": 284.0, "learning_rate": 1.2888411342494872e-05, "loss": 13.4378, "step": 18551 }, { "epoch": 0.773289983743904, "grad_norm": 139.0, "learning_rate": 1.2883888202634204e-05, "loss": 10.2507, "step": 18552 }, { "epoch": 0.7733316660414322, "grad_norm": 520.0, "learning_rate": 1.2879365739217986e-05, "loss": 18.2504, "step": 18553 }, { "epoch": 0.7733733483389604, "grad_norm": 253.0, "learning_rate": 1.2874843952328624e-05, "loss": 11.1877, "step": 18554 }, { "epoch": 0.7734150306364886, "grad_norm": 203.0, "learning_rate": 1.2870322842048527e-05, "loss": 11.0002, "step": 18555 }, { "epoch": 0.773456712934017, "grad_norm": 320.0, "learning_rate": 1.2865802408460087e-05, "loss": 15.1255, "step": 18556 }, { "epoch": 0.7734983952315452, "grad_norm": 394.0, "learning_rate": 1.2861282651645702e-05, "loss": 13.0004, "step": 18557 }, { "epoch": 0.7735400775290734, "grad_norm": 330.0, "learning_rate": 1.285676357168773e-05, "loss": 15.7508, "step": 18558 }, { "epoch": 0.7735817598266016, "grad_norm": 572.0, "learning_rate": 1.2852245168668543e-05, "loss": 19.6251, "step": 18559 }, { "epoch": 0.7736234421241299, "grad_norm": 252.0, "learning_rate": 1.2847727442670487e-05, "loss": 12.063, "step": 18560 }, { "epoch": 0.7736651244216581, "grad_norm": 220.0, "learning_rate": 1.2843210393775895e-05, "loss": 11.4385, "step": 18561 }, { "epoch": 0.7737068067191863, "grad_norm": 72.5, "learning_rate": 1.2838694022067094e-05, "loss": 8.8129, "step": 18562 }, { "epoch": 0.7737484890167146, "grad_norm": 222.0, "learning_rate": 1.2834178327626389e-05, "loss": 12.3771, "step": 18563 }, { "epoch": 0.7737901713142429, "grad_norm": 378.0, "learning_rate": 1.2829663310536082e-05, "loss": 15.2508, "step": 18564 }, { "epoch": 0.7738318536117711, "grad_norm": 1072.0, "learning_rate": 1.282514897087847e-05, "loss": 27.1286, "step": 18565 }, { "epoch": 0.7738735359092993, "grad_norm": 486.0, "learning_rate": 1.2820635308735813e-05, "loss": 18.0018, "step": 18566 }, { "epoch": 0.7739152182068275, "grad_norm": 332.0, "learning_rate": 1.2816122324190378e-05, "loss": 14.3128, "step": 18567 }, { "epoch": 0.7739569005043558, "grad_norm": 336.0, "learning_rate": 1.2811610017324399e-05, "loss": 13.5013, "step": 18568 }, { "epoch": 0.773998582801884, "grad_norm": 153.0, "learning_rate": 1.2807098388220156e-05, "loss": 10.3131, "step": 18569 }, { "epoch": 0.7740402650994123, "grad_norm": 229.0, "learning_rate": 1.2802587436959823e-05, "loss": 12.0627, "step": 18570 }, { "epoch": 0.7740819473969405, "grad_norm": 780.0, "learning_rate": 1.279807716362566e-05, "loss": 22.5003, "step": 18571 }, { "epoch": 0.7741236296944688, "grad_norm": 988.0, "learning_rate": 1.2793567568299814e-05, "loss": 22.0051, "step": 18572 }, { "epoch": 0.774165311991997, "grad_norm": 245.0, "learning_rate": 1.2789058651064534e-05, "loss": 12.7502, "step": 18573 }, { "epoch": 0.7742069942895252, "grad_norm": 210.0, "learning_rate": 1.2784550412001933e-05, "loss": 12.0628, "step": 18574 }, { "epoch": 0.7742486765870534, "grad_norm": 608.0, "learning_rate": 1.2780042851194235e-05, "loss": 19.6292, "step": 18575 }, { "epoch": 0.7742903588845818, "grad_norm": 314.0, "learning_rate": 1.2775535968723534e-05, "loss": 14.1265, "step": 18576 }, { "epoch": 0.77433204118211, "grad_norm": 185.0, "learning_rate": 1.2771029764672015e-05, "loss": 11.6253, "step": 18577 }, { "epoch": 0.7743737234796382, "grad_norm": 660.0, "learning_rate": 1.2766524239121763e-05, "loss": 20.2501, "step": 18578 }, { "epoch": 0.7744154057771664, "grad_norm": 466.0, "learning_rate": 1.2762019392154922e-05, "loss": 17.6257, "step": 18579 }, { "epoch": 0.7744570880746947, "grad_norm": 372.0, "learning_rate": 1.2757515223853583e-05, "loss": 15.1253, "step": 18580 }, { "epoch": 0.7744987703722229, "grad_norm": 97.0, "learning_rate": 1.2753011734299836e-05, "loss": 9.4378, "step": 18581 }, { "epoch": 0.7745404526697511, "grad_norm": 199.0, "learning_rate": 1.2748508923575757e-05, "loss": 11.7508, "step": 18582 }, { "epoch": 0.7745821349672793, "grad_norm": 336.0, "learning_rate": 1.2744006791763414e-05, "loss": 13.1254, "step": 18583 }, { "epoch": 0.7746238172648077, "grad_norm": 492.0, "learning_rate": 1.2739505338944846e-05, "loss": 19.3754, "step": 18584 }, { "epoch": 0.7746654995623359, "grad_norm": 133.0, "learning_rate": 1.2735004565202108e-05, "loss": 7.9069, "step": 18585 }, { "epoch": 0.7747071818598641, "grad_norm": 462.0, "learning_rate": 1.2730504470617217e-05, "loss": 15.9378, "step": 18586 }, { "epoch": 0.7747488641573923, "grad_norm": 372.0, "learning_rate": 1.2726005055272188e-05, "loss": 14.5627, "step": 18587 }, { "epoch": 0.7747905464549206, "grad_norm": 196.0, "learning_rate": 1.2721506319249027e-05, "loss": 11.5002, "step": 18588 }, { "epoch": 0.7748322287524488, "grad_norm": 304.0, "learning_rate": 1.2717008262629727e-05, "loss": 13.938, "step": 18589 }, { "epoch": 0.774873911049977, "grad_norm": 107.5, "learning_rate": 1.2712510885496259e-05, "loss": 8.5008, "step": 18590 }, { "epoch": 0.7749155933475053, "grad_norm": 136.0, "learning_rate": 1.270801418793059e-05, "loss": 9.6878, "step": 18591 }, { "epoch": 0.7749572756450336, "grad_norm": 258.0, "learning_rate": 1.2703518170014672e-05, "loss": 12.438, "step": 18592 }, { "epoch": 0.7749989579425618, "grad_norm": 520.0, "learning_rate": 1.2699022831830442e-05, "loss": 18.2503, "step": 18593 }, { "epoch": 0.77504064024009, "grad_norm": 175.0, "learning_rate": 1.2694528173459823e-05, "loss": 9.627, "step": 18594 }, { "epoch": 0.7750823225376183, "grad_norm": 464.0, "learning_rate": 1.2690034194984767e-05, "loss": 16.7514, "step": 18595 }, { "epoch": 0.7751240048351465, "grad_norm": 187.0, "learning_rate": 1.2685540896487125e-05, "loss": 10.9378, "step": 18596 }, { "epoch": 0.7751656871326748, "grad_norm": 217.0, "learning_rate": 1.268104827804884e-05, "loss": 11.7503, "step": 18597 }, { "epoch": 0.775207369430203, "grad_norm": 145.0, "learning_rate": 1.2676556339751733e-05, "loss": 5.6254, "step": 18598 }, { "epoch": 0.7752490517277313, "grad_norm": 356.0, "learning_rate": 1.2672065081677725e-05, "loss": 16.0006, "step": 18599 }, { "epoch": 0.7752907340252595, "grad_norm": 412.0, "learning_rate": 1.2667574503908619e-05, "loss": 16.876, "step": 18600 }, { "epoch": 0.7753324163227877, "grad_norm": 418.0, "learning_rate": 1.2663084606526316e-05, "loss": 15.1254, "step": 18601 }, { "epoch": 0.7753740986203159, "grad_norm": 1448.0, "learning_rate": 1.2658595389612571e-05, "loss": 25.8797, "step": 18602 }, { "epoch": 0.7754157809178442, "grad_norm": 466.0, "learning_rate": 1.2654106853249264e-05, "loss": 14.1253, "step": 18603 }, { "epoch": 0.7754574632153725, "grad_norm": 1664.0, "learning_rate": 1.264961899751817e-05, "loss": 31.631, "step": 18604 }, { "epoch": 0.7754991455129007, "grad_norm": 238.0, "learning_rate": 1.2645131822501083e-05, "loss": 11.563, "step": 18605 }, { "epoch": 0.7755408278104289, "grad_norm": 438.0, "learning_rate": 1.2640645328279788e-05, "loss": 15.5634, "step": 18606 }, { "epoch": 0.7755825101079572, "grad_norm": 177.0, "learning_rate": 1.2636159514936052e-05, "loss": 10.5002, "step": 18607 }, { "epoch": 0.7756241924054854, "grad_norm": 157.0, "learning_rate": 1.2631674382551623e-05, "loss": 11.0627, "step": 18608 }, { "epoch": 0.7756658747030136, "grad_norm": 532.0, "learning_rate": 1.2627189931208244e-05, "loss": 18.5003, "step": 18609 }, { "epoch": 0.7757075570005418, "grad_norm": 466.0, "learning_rate": 1.2622706160987646e-05, "loss": 14.813, "step": 18610 }, { "epoch": 0.7757492392980702, "grad_norm": 348.0, "learning_rate": 1.2618223071971546e-05, "loss": 13.8131, "step": 18611 }, { "epoch": 0.7757909215955984, "grad_norm": 580.0, "learning_rate": 1.2613740664241652e-05, "loss": 18.3754, "step": 18612 }, { "epoch": 0.7758326038931266, "grad_norm": 165.0, "learning_rate": 1.2609258937879648e-05, "loss": 12.0012, "step": 18613 }, { "epoch": 0.7758742861906548, "grad_norm": 215.0, "learning_rate": 1.2604777892967223e-05, "loss": 11.6877, "step": 18614 }, { "epoch": 0.7759159684881831, "grad_norm": 217.0, "learning_rate": 1.2600297529586041e-05, "loss": 11.1257, "step": 18615 }, { "epoch": 0.7759576507857113, "grad_norm": 112.5, "learning_rate": 1.2595817847817754e-05, "loss": 10.0002, "step": 18616 }, { "epoch": 0.7759993330832395, "grad_norm": 268.0, "learning_rate": 1.2591338847744011e-05, "loss": 12.2502, "step": 18617 }, { "epoch": 0.7760410153807678, "grad_norm": 238.0, "learning_rate": 1.2586860529446421e-05, "loss": 12.9379, "step": 18618 }, { "epoch": 0.7760826976782961, "grad_norm": 142.0, "learning_rate": 1.258238289300665e-05, "loss": 8.6901, "step": 18619 }, { "epoch": 0.7761243799758243, "grad_norm": 656.0, "learning_rate": 1.257790593850624e-05, "loss": 21.1255, "step": 18620 }, { "epoch": 0.7761660622733525, "grad_norm": 498.0, "learning_rate": 1.2573429666026836e-05, "loss": 17.2506, "step": 18621 }, { "epoch": 0.7762077445708807, "grad_norm": 510.0, "learning_rate": 1.2568954075649997e-05, "loss": 17.6253, "step": 18622 }, { "epoch": 0.776249426868409, "grad_norm": 716.0, "learning_rate": 1.2564479167457288e-05, "loss": 20.0016, "step": 18623 }, { "epoch": 0.7762911091659372, "grad_norm": 540.0, "learning_rate": 1.2560004941530273e-05, "loss": 17.7501, "step": 18624 }, { "epoch": 0.7763327914634655, "grad_norm": 552.0, "learning_rate": 1.2555531397950494e-05, "loss": 17.0008, "step": 18625 }, { "epoch": 0.7763744737609937, "grad_norm": 97.5, "learning_rate": 1.2551058536799476e-05, "loss": 10.6271, "step": 18626 }, { "epoch": 0.776416156058522, "grad_norm": 193.0, "learning_rate": 1.254658635815874e-05, "loss": 12.7504, "step": 18627 }, { "epoch": 0.7764578383560502, "grad_norm": 183.0, "learning_rate": 1.2542114862109795e-05, "loss": 9.8753, "step": 18628 }, { "epoch": 0.7764995206535784, "grad_norm": 161.0, "learning_rate": 1.2537644048734133e-05, "loss": 9.3756, "step": 18629 }, { "epoch": 0.7765412029511066, "grad_norm": 828.0, "learning_rate": 1.2533173918113233e-05, "loss": 19.2545, "step": 18630 }, { "epoch": 0.776582885248635, "grad_norm": 262.0, "learning_rate": 1.2528704470328561e-05, "loss": 11.6263, "step": 18631 }, { "epoch": 0.7766245675461632, "grad_norm": 135.0, "learning_rate": 1.2524235705461578e-05, "loss": 10.6255, "step": 18632 }, { "epoch": 0.7766662498436914, "grad_norm": 784.0, "learning_rate": 1.2519767623593726e-05, "loss": 17.2548, "step": 18633 }, { "epoch": 0.7767079321412196, "grad_norm": 378.0, "learning_rate": 1.2515300224806437e-05, "loss": 14.1255, "step": 18634 }, { "epoch": 0.7767496144387479, "grad_norm": 528.0, "learning_rate": 1.2510833509181114e-05, "loss": 17.8751, "step": 18635 }, { "epoch": 0.7767912967362761, "grad_norm": 356.0, "learning_rate": 1.2506367476799207e-05, "loss": 12.8751, "step": 18636 }, { "epoch": 0.7768329790338043, "grad_norm": 304.0, "learning_rate": 1.2501902127742054e-05, "loss": 13.1252, "step": 18637 }, { "epoch": 0.7768746613313325, "grad_norm": 290.0, "learning_rate": 1.2497437462091094e-05, "loss": 13.0627, "step": 18638 }, { "epoch": 0.7769163436288609, "grad_norm": 1560.0, "learning_rate": 1.2492973479927633e-05, "loss": 36.7514, "step": 18639 }, { "epoch": 0.7769580259263891, "grad_norm": 138.0, "learning_rate": 1.2488510181333091e-05, "loss": 11.126, "step": 18640 }, { "epoch": 0.7769997082239173, "grad_norm": 374.0, "learning_rate": 1.2484047566388751e-05, "loss": 15.2502, "step": 18641 }, { "epoch": 0.7770413905214455, "grad_norm": 378.0, "learning_rate": 1.2479585635176e-05, "loss": 14.1252, "step": 18642 }, { "epoch": 0.7770830728189738, "grad_norm": 145.0, "learning_rate": 1.247512438777611e-05, "loss": 7.6565, "step": 18643 }, { "epoch": 0.777124755116502, "grad_norm": 239.0, "learning_rate": 1.2470663824270417e-05, "loss": 12.3127, "step": 18644 }, { "epoch": 0.7771664374140302, "grad_norm": 127.0, "learning_rate": 1.2466203944740213e-05, "loss": 9.6877, "step": 18645 }, { "epoch": 0.7772081197115585, "grad_norm": 278.0, "learning_rate": 1.2461744749266768e-05, "loss": 14.8131, "step": 18646 }, { "epoch": 0.7772498020090868, "grad_norm": 1376.0, "learning_rate": 1.2457286237931359e-05, "loss": 30.1253, "step": 18647 }, { "epoch": 0.777291484306615, "grad_norm": 366.0, "learning_rate": 1.2452828410815242e-05, "loss": 14.3128, "step": 18648 }, { "epoch": 0.7773331666041432, "grad_norm": 288.0, "learning_rate": 1.2448371267999664e-05, "loss": 13.6252, "step": 18649 }, { "epoch": 0.7773748489016714, "grad_norm": 632.0, "learning_rate": 1.2443914809565849e-05, "loss": 19.127, "step": 18650 }, { "epoch": 0.7774165311991997, "grad_norm": 340.0, "learning_rate": 1.2439459035595024e-05, "loss": 14.6257, "step": 18651 }, { "epoch": 0.777458213496728, "grad_norm": 210.0, "learning_rate": 1.243500394616839e-05, "loss": 11.6257, "step": 18652 }, { "epoch": 0.7774998957942562, "grad_norm": 660.0, "learning_rate": 1.2430549541367142e-05, "loss": 19.6254, "step": 18653 }, { "epoch": 0.7775415780917844, "grad_norm": 432.0, "learning_rate": 1.2426095821272465e-05, "loss": 16.7502, "step": 18654 }, { "epoch": 0.7775832603893127, "grad_norm": 462.0, "learning_rate": 1.2421642785965532e-05, "loss": 15.8129, "step": 18655 }, { "epoch": 0.7776249426868409, "grad_norm": 176.0, "learning_rate": 1.2417190435527487e-05, "loss": 10.7502, "step": 18656 }, { "epoch": 0.7776666249843691, "grad_norm": 173.0, "learning_rate": 1.241273877003949e-05, "loss": 11.0627, "step": 18657 }, { "epoch": 0.7777083072818973, "grad_norm": 320.0, "learning_rate": 1.2408287789582662e-05, "loss": 15.0629, "step": 18658 }, { "epoch": 0.7777499895794256, "grad_norm": 370.0, "learning_rate": 1.2403837494238108e-05, "loss": 15.3128, "step": 18659 }, { "epoch": 0.7777916718769539, "grad_norm": 80.5, "learning_rate": 1.2399387884086988e-05, "loss": 7.5939, "step": 18660 }, { "epoch": 0.7778333541744821, "grad_norm": 386.0, "learning_rate": 1.2394938959210328e-05, "loss": 14.3754, "step": 18661 }, { "epoch": 0.7778750364720103, "grad_norm": 752.0, "learning_rate": 1.2390490719689279e-05, "loss": 20.5002, "step": 18662 }, { "epoch": 0.7779167187695386, "grad_norm": 504.0, "learning_rate": 1.2386043165604844e-05, "loss": 17.7502, "step": 18663 }, { "epoch": 0.7779584010670668, "grad_norm": 165.0, "learning_rate": 1.238159629703814e-05, "loss": 10.0627, "step": 18664 }, { "epoch": 0.778000083364595, "grad_norm": 175.0, "learning_rate": 1.2377150114070151e-05, "loss": 11.0015, "step": 18665 }, { "epoch": 0.7780417656621232, "grad_norm": 157.0, "learning_rate": 1.2372704616781972e-05, "loss": 9.5627, "step": 18666 }, { "epoch": 0.7780834479596516, "grad_norm": 126.5, "learning_rate": 1.2368259805254561e-05, "loss": 9.5036, "step": 18667 }, { "epoch": 0.7781251302571798, "grad_norm": 232.0, "learning_rate": 1.236381567956899e-05, "loss": 10.8128, "step": 18668 }, { "epoch": 0.778166812554708, "grad_norm": 432.0, "learning_rate": 1.2359372239806183e-05, "loss": 15.5629, "step": 18669 }, { "epoch": 0.7782084948522363, "grad_norm": 235.0, "learning_rate": 1.2354929486047179e-05, "loss": 11.9377, "step": 18670 }, { "epoch": 0.7782501771497645, "grad_norm": 508.0, "learning_rate": 1.2350487418372925e-05, "loss": 17.1273, "step": 18671 }, { "epoch": 0.7782918594472927, "grad_norm": 664.0, "learning_rate": 1.2346046036864377e-05, "loss": 17.8764, "step": 18672 }, { "epoch": 0.778333541744821, "grad_norm": 183.0, "learning_rate": 1.2341605341602486e-05, "loss": 11.6878, "step": 18673 }, { "epoch": 0.7783752240423493, "grad_norm": 78.5, "learning_rate": 1.2337165332668182e-05, "loss": 6.9065, "step": 18674 }, { "epoch": 0.7784169063398775, "grad_norm": 200.0, "learning_rate": 1.2332726010142382e-05, "loss": 12.0657, "step": 18675 }, { "epoch": 0.7784585886374057, "grad_norm": 564.0, "learning_rate": 1.2328287374105996e-05, "loss": 18.0033, "step": 18676 }, { "epoch": 0.7785002709349339, "grad_norm": 204.0, "learning_rate": 1.232384942463991e-05, "loss": 12.4378, "step": 18677 }, { "epoch": 0.7785419532324622, "grad_norm": 220.0, "learning_rate": 1.2319412161825017e-05, "loss": 7.5003, "step": 18678 }, { "epoch": 0.7785836355299904, "grad_norm": 482.0, "learning_rate": 1.2314975585742183e-05, "loss": 16.7501, "step": 18679 }, { "epoch": 0.7786253178275186, "grad_norm": 364.0, "learning_rate": 1.2310539696472267e-05, "loss": 11.4378, "step": 18680 }, { "epoch": 0.7786670001250469, "grad_norm": 320.0, "learning_rate": 1.2306104494096104e-05, "loss": 13.19, "step": 18681 }, { "epoch": 0.7787086824225752, "grad_norm": 568.0, "learning_rate": 1.230166997869454e-05, "loss": 19.2501, "step": 18682 }, { "epoch": 0.7787503647201034, "grad_norm": 288.0, "learning_rate": 1.2297236150348385e-05, "loss": 12.3128, "step": 18683 }, { "epoch": 0.7787920470176316, "grad_norm": 280.0, "learning_rate": 1.2292803009138442e-05, "loss": 12.9378, "step": 18684 }, { "epoch": 0.7788337293151598, "grad_norm": 920.0, "learning_rate": 1.2288370555145506e-05, "loss": 25.6251, "step": 18685 }, { "epoch": 0.7788754116126881, "grad_norm": 294.0, "learning_rate": 1.2283938788450389e-05, "loss": 8.9385, "step": 18686 }, { "epoch": 0.7789170939102164, "grad_norm": 181.0, "learning_rate": 1.2279507709133809e-05, "loss": 12.0005, "step": 18687 }, { "epoch": 0.7789587762077446, "grad_norm": 350.0, "learning_rate": 1.2275077317276573e-05, "loss": 12.6879, "step": 18688 }, { "epoch": 0.7790004585052728, "grad_norm": 243.0, "learning_rate": 1.227064761295938e-05, "loss": 12.9378, "step": 18689 }, { "epoch": 0.7790421408028011, "grad_norm": 114.0, "learning_rate": 1.2266218596263008e-05, "loss": 10.1882, "step": 18690 }, { "epoch": 0.7790838231003293, "grad_norm": 72.0, "learning_rate": 1.2261790267268125e-05, "loss": 8.0007, "step": 18691 }, { "epoch": 0.7791255053978575, "grad_norm": 290.0, "learning_rate": 1.225736262605549e-05, "loss": 14.1879, "step": 18692 }, { "epoch": 0.7791671876953857, "grad_norm": 258.0, "learning_rate": 1.2252935672705745e-05, "loss": 12.0627, "step": 18693 }, { "epoch": 0.779208869992914, "grad_norm": 212.0, "learning_rate": 1.2248509407299614e-05, "loss": 12.7514, "step": 18694 }, { "epoch": 0.7792505522904423, "grad_norm": 114.0, "learning_rate": 1.2244083829917746e-05, "loss": 4.6565, "step": 18695 }, { "epoch": 0.7792922345879705, "grad_norm": 109.5, "learning_rate": 1.2239658940640809e-05, "loss": 9.8126, "step": 18696 }, { "epoch": 0.7793339168854987, "grad_norm": 255.0, "learning_rate": 1.2235234739549434e-05, "loss": 13.1259, "step": 18697 }, { "epoch": 0.779375599183027, "grad_norm": 712.0, "learning_rate": 1.2230811226724259e-05, "loss": 23.0002, "step": 18698 }, { "epoch": 0.7794172814805552, "grad_norm": 410.0, "learning_rate": 1.2226388402245903e-05, "loss": 15.1252, "step": 18699 }, { "epoch": 0.7794589637780834, "grad_norm": 225.0, "learning_rate": 1.2221966266194974e-05, "loss": 11.8755, "step": 18700 }, { "epoch": 0.7795006460756116, "grad_norm": 368.0, "learning_rate": 1.221754481865206e-05, "loss": 14.5627, "step": 18701 }, { "epoch": 0.77954232837314, "grad_norm": 294.0, "learning_rate": 1.221312405969775e-05, "loss": 11.8752, "step": 18702 }, { "epoch": 0.7795840106706682, "grad_norm": 668.0, "learning_rate": 1.2208703989412606e-05, "loss": 20.8752, "step": 18703 }, { "epoch": 0.7796256929681964, "grad_norm": 1240.0, "learning_rate": 1.2204284607877186e-05, "loss": 24.3791, "step": 18704 }, { "epoch": 0.7796673752657246, "grad_norm": 512.0, "learning_rate": 1.2199865915172037e-05, "loss": 17.1256, "step": 18705 }, { "epoch": 0.7797090575632529, "grad_norm": 1152.0, "learning_rate": 1.2195447911377684e-05, "loss": 28.0013, "step": 18706 }, { "epoch": 0.7797507398607811, "grad_norm": 166.0, "learning_rate": 1.2191030596574649e-05, "loss": 11.0003, "step": 18707 }, { "epoch": 0.7797924221583094, "grad_norm": 410.0, "learning_rate": 1.2186613970843436e-05, "loss": 16.1262, "step": 18708 }, { "epoch": 0.7798341044558376, "grad_norm": 346.0, "learning_rate": 1.218219803426453e-05, "loss": 14.0628, "step": 18709 }, { "epoch": 0.7798757867533659, "grad_norm": 608.0, "learning_rate": 1.2177782786918451e-05, "loss": 18.7548, "step": 18710 }, { "epoch": 0.7799174690508941, "grad_norm": 284.0, "learning_rate": 1.2173368228885607e-05, "loss": 13.5002, "step": 18711 }, { "epoch": 0.7799591513484223, "grad_norm": 352.0, "learning_rate": 1.2168954360246515e-05, "loss": 14.1878, "step": 18712 }, { "epoch": 0.7800008336459505, "grad_norm": 75.0, "learning_rate": 1.2164541181081557e-05, "loss": 7.8751, "step": 18713 }, { "epoch": 0.7800425159434788, "grad_norm": 740.0, "learning_rate": 1.2160128691471229e-05, "loss": 21.5003, "step": 18714 }, { "epoch": 0.780084198241007, "grad_norm": 600.0, "learning_rate": 1.2155716891495877e-05, "loss": 18.2503, "step": 18715 }, { "epoch": 0.7801258805385353, "grad_norm": 187.0, "learning_rate": 1.2151305781235984e-05, "loss": 10.8755, "step": 18716 }, { "epoch": 0.7801675628360635, "grad_norm": 96.0, "learning_rate": 1.2146895360771865e-05, "loss": 7.7508, "step": 18717 }, { "epoch": 0.7802092451335918, "grad_norm": 342.0, "learning_rate": 1.214248563018397e-05, "loss": 15.6878, "step": 18718 }, { "epoch": 0.78025092743112, "grad_norm": 414.0, "learning_rate": 1.213807658955261e-05, "loss": 15.8126, "step": 18719 }, { "epoch": 0.7802926097286482, "grad_norm": 286.0, "learning_rate": 1.2133668238958174e-05, "loss": 13.2501, "step": 18720 }, { "epoch": 0.7803342920261764, "grad_norm": 494.0, "learning_rate": 1.2129260578481e-05, "loss": 17.2504, "step": 18721 }, { "epoch": 0.7803759743237048, "grad_norm": 276.0, "learning_rate": 1.2124853608201414e-05, "loss": 10.4379, "step": 18722 }, { "epoch": 0.780417656621233, "grad_norm": 398.0, "learning_rate": 1.212044732819973e-05, "loss": 15.6252, "step": 18723 }, { "epoch": 0.7804593389187612, "grad_norm": 552.0, "learning_rate": 1.2116041738556256e-05, "loss": 19.0002, "step": 18724 }, { "epoch": 0.7805010212162894, "grad_norm": 226.0, "learning_rate": 1.2111636839351286e-05, "loss": 11.8753, "step": 18725 }, { "epoch": 0.7805427035138177, "grad_norm": 201.0, "learning_rate": 1.21072326306651e-05, "loss": 10.8129, "step": 18726 }, { "epoch": 0.7805843858113459, "grad_norm": 133.0, "learning_rate": 1.2102829112577963e-05, "loss": 10.5003, "step": 18727 }, { "epoch": 0.7806260681088741, "grad_norm": 460.0, "learning_rate": 1.2098426285170129e-05, "loss": 14.9376, "step": 18728 }, { "epoch": 0.7806677504064023, "grad_norm": 314.0, "learning_rate": 1.2094024148521837e-05, "loss": 15.3132, "step": 18729 }, { "epoch": 0.7807094327039307, "grad_norm": 346.0, "learning_rate": 1.2089622702713321e-05, "loss": 14.3127, "step": 18730 }, { "epoch": 0.7807511150014589, "grad_norm": 137.0, "learning_rate": 1.20852219478248e-05, "loss": 10.6255, "step": 18731 }, { "epoch": 0.7807927972989871, "grad_norm": 378.0, "learning_rate": 1.2080821883936472e-05, "loss": 15.6255, "step": 18732 }, { "epoch": 0.7808344795965153, "grad_norm": 476.0, "learning_rate": 1.2076422511128532e-05, "loss": 17.1252, "step": 18733 }, { "epoch": 0.7808761618940436, "grad_norm": 712.0, "learning_rate": 1.2072023829481161e-05, "loss": 22.1271, "step": 18734 }, { "epoch": 0.7809178441915718, "grad_norm": 458.0, "learning_rate": 1.2067625839074503e-05, "loss": 13.8128, "step": 18735 }, { "epoch": 0.7809595264891, "grad_norm": 1424.0, "learning_rate": 1.2063228539988764e-05, "loss": 29.6253, "step": 18736 }, { "epoch": 0.7810012087866283, "grad_norm": 125.0, "learning_rate": 1.205883193230402e-05, "loss": 6.6255, "step": 18737 }, { "epoch": 0.7810428910841566, "grad_norm": 322.0, "learning_rate": 1.2054436016100468e-05, "loss": 14.1878, "step": 18738 }, { "epoch": 0.7810845733816848, "grad_norm": 320.0, "learning_rate": 1.2050040791458156e-05, "loss": 14.5629, "step": 18739 }, { "epoch": 0.781126255679213, "grad_norm": 290.0, "learning_rate": 1.2045646258457243e-05, "loss": 13.4382, "step": 18740 }, { "epoch": 0.7811679379767413, "grad_norm": 57.25, "learning_rate": 1.2041252417177773e-05, "loss": 7.6883, "step": 18741 }, { "epoch": 0.7812096202742695, "grad_norm": 744.0, "learning_rate": 1.203685926769988e-05, "loss": 20.3795, "step": 18742 }, { "epoch": 0.7812513025717978, "grad_norm": 292.0, "learning_rate": 1.203246681010356e-05, "loss": 11.3762, "step": 18743 }, { "epoch": 0.781292984869326, "grad_norm": 380.0, "learning_rate": 1.2028075044468922e-05, "loss": 14.1894, "step": 18744 }, { "epoch": 0.7813346671668543, "grad_norm": 172.0, "learning_rate": 1.2023683970875983e-05, "loss": 11.5629, "step": 18745 }, { "epoch": 0.7813763494643825, "grad_norm": 420.0, "learning_rate": 1.2019293589404779e-05, "loss": 15.3755, "step": 18746 }, { "epoch": 0.7814180317619107, "grad_norm": 240.0, "learning_rate": 1.2014903900135322e-05, "loss": 13.1881, "step": 18747 }, { "epoch": 0.7814597140594389, "grad_norm": 628.0, "learning_rate": 1.2010514903147607e-05, "loss": 18.0012, "step": 18748 }, { "epoch": 0.7815013963569672, "grad_norm": 708.0, "learning_rate": 1.2006126598521634e-05, "loss": 21.2508, "step": 18749 }, { "epoch": 0.7815430786544955, "grad_norm": 656.0, "learning_rate": 1.200173898633738e-05, "loss": 18.8752, "step": 18750 }, { "epoch": 0.7815847609520237, "grad_norm": 444.0, "learning_rate": 1.19973520666748e-05, "loss": 16.501, "step": 18751 }, { "epoch": 0.7816264432495519, "grad_norm": 512.0, "learning_rate": 1.1992965839613856e-05, "loss": 18.3753, "step": 18752 }, { "epoch": 0.7816681255470802, "grad_norm": 114.5, "learning_rate": 1.1988580305234481e-05, "loss": 8.1878, "step": 18753 }, { "epoch": 0.7817098078446084, "grad_norm": 508.0, "learning_rate": 1.198419546361661e-05, "loss": 15.0668, "step": 18754 }, { "epoch": 0.7817514901421366, "grad_norm": 446.0, "learning_rate": 1.1979811314840144e-05, "loss": 16.2527, "step": 18755 }, { "epoch": 0.7817931724396648, "grad_norm": 251.0, "learning_rate": 1.1975427858985e-05, "loss": 9.0006, "step": 18756 }, { "epoch": 0.7818348547371932, "grad_norm": 392.0, "learning_rate": 1.197104509613105e-05, "loss": 14.8753, "step": 18757 }, { "epoch": 0.7818765370347214, "grad_norm": 342.0, "learning_rate": 1.1966663026358189e-05, "loss": 14.6254, "step": 18758 }, { "epoch": 0.7819182193322496, "grad_norm": 652.0, "learning_rate": 1.196228164974626e-05, "loss": 21.2534, "step": 18759 }, { "epoch": 0.7819599016297778, "grad_norm": 229.0, "learning_rate": 1.195790096637513e-05, "loss": 11.6878, "step": 18760 }, { "epoch": 0.7820015839273061, "grad_norm": 209.0, "learning_rate": 1.1953520976324617e-05, "loss": 12.0629, "step": 18761 }, { "epoch": 0.7820432662248343, "grad_norm": 430.0, "learning_rate": 1.1949141679674591e-05, "loss": 15.6251, "step": 18762 }, { "epoch": 0.7820849485223625, "grad_norm": 388.0, "learning_rate": 1.1944763076504805e-05, "loss": 14.9377, "step": 18763 }, { "epoch": 0.7821266308198908, "grad_norm": 1004.0, "learning_rate": 1.1940385166895124e-05, "loss": 29.5004, "step": 18764 }, { "epoch": 0.7821683131174191, "grad_norm": 352.0, "learning_rate": 1.1936007950925272e-05, "loss": 14.3774, "step": 18765 }, { "epoch": 0.7822099954149473, "grad_norm": 660.0, "learning_rate": 1.1931631428675078e-05, "loss": 17.7543, "step": 18766 }, { "epoch": 0.7822516777124755, "grad_norm": 868.0, "learning_rate": 1.1927255600224257e-05, "loss": 26.1253, "step": 18767 }, { "epoch": 0.7822933600100037, "grad_norm": 125.5, "learning_rate": 1.1922880465652608e-05, "loss": 9.2512, "step": 18768 }, { "epoch": 0.782335042307532, "grad_norm": 332.0, "learning_rate": 1.1918506025039816e-05, "loss": 12.1252, "step": 18769 }, { "epoch": 0.7823767246050602, "grad_norm": 240.0, "learning_rate": 1.1914132278465645e-05, "loss": 11.9404, "step": 18770 }, { "epoch": 0.7824184069025885, "grad_norm": 506.0, "learning_rate": 1.1909759226009792e-05, "loss": 15.4392, "step": 18771 }, { "epoch": 0.7824600892001167, "grad_norm": 316.0, "learning_rate": 1.190538686775196e-05, "loss": 14.4377, "step": 18772 }, { "epoch": 0.782501771497645, "grad_norm": 83.0, "learning_rate": 1.1901015203771837e-05, "loss": 9.1255, "step": 18773 }, { "epoch": 0.7825434537951732, "grad_norm": 360.0, "learning_rate": 1.1896644234149096e-05, "loss": 13.8127, "step": 18774 }, { "epoch": 0.7825851360927014, "grad_norm": 516.0, "learning_rate": 1.189227395896339e-05, "loss": 17.7503, "step": 18775 }, { "epoch": 0.7826268183902296, "grad_norm": 113.0, "learning_rate": 1.1887904378294378e-05, "loss": 8.3126, "step": 18776 }, { "epoch": 0.782668500687758, "grad_norm": 468.0, "learning_rate": 1.1883535492221692e-05, "loss": 16.3752, "step": 18777 }, { "epoch": 0.7827101829852862, "grad_norm": 744.0, "learning_rate": 1.187916730082495e-05, "loss": 21.6251, "step": 18778 }, { "epoch": 0.7827518652828144, "grad_norm": 366.0, "learning_rate": 1.1874799804183772e-05, "loss": 15.1252, "step": 18779 }, { "epoch": 0.7827935475803426, "grad_norm": 121.0, "learning_rate": 1.1870433002377756e-05, "loss": 9.8754, "step": 18780 }, { "epoch": 0.7828352298778709, "grad_norm": 354.0, "learning_rate": 1.1866066895486478e-05, "loss": 13.1254, "step": 18781 }, { "epoch": 0.7828769121753991, "grad_norm": 600.0, "learning_rate": 1.1861701483589515e-05, "loss": 18.6253, "step": 18782 }, { "epoch": 0.7829185944729273, "grad_norm": 334.0, "learning_rate": 1.1857336766766436e-05, "loss": 13.6883, "step": 18783 }, { "epoch": 0.7829602767704555, "grad_norm": 314.0, "learning_rate": 1.1852972745096774e-05, "loss": 14.3752, "step": 18784 }, { "epoch": 0.7830019590679839, "grad_norm": 532.0, "learning_rate": 1.1848609418660056e-05, "loss": 15.5051, "step": 18785 }, { "epoch": 0.7830436413655121, "grad_norm": 187.0, "learning_rate": 1.1844246787535845e-05, "loss": 9.5632, "step": 18786 }, { "epoch": 0.7830853236630403, "grad_norm": 792.0, "learning_rate": 1.18398848518036e-05, "loss": 20.0003, "step": 18787 }, { "epoch": 0.7831270059605685, "grad_norm": 404.0, "learning_rate": 1.183552361154287e-05, "loss": 15.8751, "step": 18788 }, { "epoch": 0.7831686882580968, "grad_norm": 356.0, "learning_rate": 1.1831163066833078e-05, "loss": 15.0629, "step": 18789 }, { "epoch": 0.783210370555625, "grad_norm": 215.0, "learning_rate": 1.1826803217753756e-05, "loss": 11.9377, "step": 18790 }, { "epoch": 0.7832520528531532, "grad_norm": 157.0, "learning_rate": 1.1822444064384308e-05, "loss": 9.8753, "step": 18791 }, { "epoch": 0.7832937351506815, "grad_norm": 1256.0, "learning_rate": 1.181808560680423e-05, "loss": 27.0042, "step": 18792 }, { "epoch": 0.7833354174482098, "grad_norm": 552.0, "learning_rate": 1.1813727845092909e-05, "loss": 19.5037, "step": 18793 }, { "epoch": 0.783377099745738, "grad_norm": 268.0, "learning_rate": 1.1809370779329814e-05, "loss": 12.5002, "step": 18794 }, { "epoch": 0.7834187820432662, "grad_norm": 235.0, "learning_rate": 1.1805014409594294e-05, "loss": 12.6256, "step": 18795 }, { "epoch": 0.7834604643407944, "grad_norm": 278.0, "learning_rate": 1.1800658735965797e-05, "loss": 13.5627, "step": 18796 }, { "epoch": 0.7835021466383227, "grad_norm": 158.0, "learning_rate": 1.1796303758523685e-05, "loss": 9.3134, "step": 18797 }, { "epoch": 0.783543828935851, "grad_norm": 480.0, "learning_rate": 1.1791949477347325e-05, "loss": 17.1252, "step": 18798 }, { "epoch": 0.7835855112333792, "grad_norm": 320.0, "learning_rate": 1.1787595892516079e-05, "loss": 13.7503, "step": 18799 }, { "epoch": 0.7836271935309074, "grad_norm": 136.0, "learning_rate": 1.178324300410929e-05, "loss": 9.9377, "step": 18800 }, { "epoch": 0.7836688758284357, "grad_norm": 368.0, "learning_rate": 1.1778890812206294e-05, "loss": 13.3127, "step": 18801 }, { "epoch": 0.7837105581259639, "grad_norm": 286.0, "learning_rate": 1.1774539316886402e-05, "loss": 11.1897, "step": 18802 }, { "epoch": 0.7837522404234921, "grad_norm": 209.0, "learning_rate": 1.1770188518228925e-05, "loss": 9.1253, "step": 18803 }, { "epoch": 0.7837939227210203, "grad_norm": 520.0, "learning_rate": 1.1765838416313157e-05, "loss": 18.5007, "step": 18804 }, { "epoch": 0.7838356050185487, "grad_norm": 360.0, "learning_rate": 1.1761489011218374e-05, "loss": 13.2503, "step": 18805 }, { "epoch": 0.7838772873160769, "grad_norm": 560.0, "learning_rate": 1.175714030302385e-05, "loss": 17.7502, "step": 18806 }, { "epoch": 0.7839189696136051, "grad_norm": 292.0, "learning_rate": 1.1752792291808839e-05, "loss": 13.0012, "step": 18807 }, { "epoch": 0.7839606519111333, "grad_norm": 124.5, "learning_rate": 1.174844497765259e-05, "loss": 8.7505, "step": 18808 }, { "epoch": 0.7840023342086616, "grad_norm": 432.0, "learning_rate": 1.1744098360634321e-05, "loss": 14.6892, "step": 18809 }, { "epoch": 0.7840440165061898, "grad_norm": 328.0, "learning_rate": 1.1739752440833257e-05, "loss": 13.0628, "step": 18810 }, { "epoch": 0.784085698803718, "grad_norm": 127.0, "learning_rate": 1.1735407218328587e-05, "loss": 9.1878, "step": 18811 }, { "epoch": 0.7841273811012462, "grad_norm": 256.0, "learning_rate": 1.1731062693199541e-05, "loss": 12.1253, "step": 18812 }, { "epoch": 0.7841690633987746, "grad_norm": 198.0, "learning_rate": 1.1726718865525249e-05, "loss": 11.0628, "step": 18813 }, { "epoch": 0.7842107456963028, "grad_norm": 412.0, "learning_rate": 1.1722375735384933e-05, "loss": 15.9398, "step": 18814 }, { "epoch": 0.784252427993831, "grad_norm": 222.0, "learning_rate": 1.171803330285769e-05, "loss": 12.0004, "step": 18815 }, { "epoch": 0.7842941102913593, "grad_norm": 68.5, "learning_rate": 1.1713691568022717e-05, "loss": 8.626, "step": 18816 }, { "epoch": 0.7843357925888875, "grad_norm": 404.0, "learning_rate": 1.1709350530959084e-05, "loss": 14.3753, "step": 18817 }, { "epoch": 0.7843774748864157, "grad_norm": 272.0, "learning_rate": 1.1705010191745963e-05, "loss": 12.5633, "step": 18818 }, { "epoch": 0.784419157183944, "grad_norm": 183.0, "learning_rate": 1.1700670550462405e-05, "loss": 10.5029, "step": 18819 }, { "epoch": 0.7844608394814723, "grad_norm": 516.0, "learning_rate": 1.1696331607187543e-05, "loss": 17.6267, "step": 18820 }, { "epoch": 0.7845025217790005, "grad_norm": 137.0, "learning_rate": 1.1691993362000437e-05, "loss": 10.0002, "step": 18821 }, { "epoch": 0.7845442040765287, "grad_norm": 356.0, "learning_rate": 1.1687655814980147e-05, "loss": 14.6878, "step": 18822 }, { "epoch": 0.7845858863740569, "grad_norm": 342.0, "learning_rate": 1.1683318966205736e-05, "loss": 14.6255, "step": 18823 }, { "epoch": 0.7846275686715852, "grad_norm": 181.0, "learning_rate": 1.1678982815756234e-05, "loss": 10.6252, "step": 18824 }, { "epoch": 0.7846692509691134, "grad_norm": 262.0, "learning_rate": 1.1674647363710678e-05, "loss": 12.1252, "step": 18825 }, { "epoch": 0.7847109332666417, "grad_norm": 222.0, "learning_rate": 1.1670312610148071e-05, "loss": 12.0631, "step": 18826 }, { "epoch": 0.7847526155641699, "grad_norm": 376.0, "learning_rate": 1.1665978555147423e-05, "loss": 16.3754, "step": 18827 }, { "epoch": 0.7847942978616982, "grad_norm": 270.0, "learning_rate": 1.1661645198787713e-05, "loss": 13.8131, "step": 18828 }, { "epoch": 0.7848359801592264, "grad_norm": 972.0, "learning_rate": 1.165731254114793e-05, "loss": 26.5036, "step": 18829 }, { "epoch": 0.7848776624567546, "grad_norm": 97.5, "learning_rate": 1.1652980582307027e-05, "loss": 9.1881, "step": 18830 }, { "epoch": 0.7849193447542828, "grad_norm": 215.0, "learning_rate": 1.1648649322343953e-05, "loss": 11.8751, "step": 18831 }, { "epoch": 0.7849610270518111, "grad_norm": 408.0, "learning_rate": 1.1644318761337652e-05, "loss": 15.6254, "step": 18832 }, { "epoch": 0.7850027093493394, "grad_norm": 183.0, "learning_rate": 1.163998889936705e-05, "loss": 9.5004, "step": 18833 }, { "epoch": 0.7850443916468676, "grad_norm": 880.0, "learning_rate": 1.163565973651105e-05, "loss": 23.8752, "step": 18834 }, { "epoch": 0.7850860739443958, "grad_norm": 170.0, "learning_rate": 1.163133127284855e-05, "loss": 10.1252, "step": 18835 }, { "epoch": 0.7851277562419241, "grad_norm": 61.0, "learning_rate": 1.1627003508458468e-05, "loss": 7.9689, "step": 18836 }, { "epoch": 0.7851694385394523, "grad_norm": 219.0, "learning_rate": 1.1622676443419623e-05, "loss": 12.7503, "step": 18837 }, { "epoch": 0.7852111208369805, "grad_norm": 79.0, "learning_rate": 1.1618350077810935e-05, "loss": 9.1258, "step": 18838 }, { "epoch": 0.7852528031345087, "grad_norm": 876.0, "learning_rate": 1.16140244117112e-05, "loss": 24.7501, "step": 18839 }, { "epoch": 0.7852944854320371, "grad_norm": 278.0, "learning_rate": 1.1609699445199306e-05, "loss": 12.3775, "step": 18840 }, { "epoch": 0.7853361677295653, "grad_norm": 524.0, "learning_rate": 1.1605375178354016e-05, "loss": 16.2504, "step": 18841 }, { "epoch": 0.7853778500270935, "grad_norm": 188.0, "learning_rate": 1.1601051611254199e-05, "loss": 12.0632, "step": 18842 }, { "epoch": 0.7854195323246217, "grad_norm": 324.0, "learning_rate": 1.1596728743978603e-05, "loss": 14.1878, "step": 18843 }, { "epoch": 0.78546121462215, "grad_norm": 181.0, "learning_rate": 1.1592406576606057e-05, "loss": 11.1281, "step": 18844 }, { "epoch": 0.7855028969196782, "grad_norm": 968.0, "learning_rate": 1.1588085109215285e-05, "loss": 24.2544, "step": 18845 }, { "epoch": 0.7855445792172064, "grad_norm": 205.0, "learning_rate": 1.1583764341885083e-05, "loss": 12.6878, "step": 18846 }, { "epoch": 0.7855862615147347, "grad_norm": 462.0, "learning_rate": 1.1579444274694191e-05, "loss": 16.0002, "step": 18847 }, { "epoch": 0.785627943812263, "grad_norm": 258.0, "learning_rate": 1.1575124907721336e-05, "loss": 13.1252, "step": 18848 }, { "epoch": 0.7856696261097912, "grad_norm": 186.0, "learning_rate": 1.157080624104524e-05, "loss": 11.7503, "step": 18849 }, { "epoch": 0.7857113084073194, "grad_norm": 1432.0, "learning_rate": 1.1566488274744614e-05, "loss": 28.002, "step": 18850 }, { "epoch": 0.7857529907048476, "grad_norm": 496.0, "learning_rate": 1.1562171008898149e-05, "loss": 20.1284, "step": 18851 }, { "epoch": 0.7857946730023759, "grad_norm": 330.0, "learning_rate": 1.1557854443584526e-05, "loss": 14.3753, "step": 18852 }, { "epoch": 0.7858363552999041, "grad_norm": 840.0, "learning_rate": 1.1553538578882422e-05, "loss": 22.2502, "step": 18853 }, { "epoch": 0.7858780375974324, "grad_norm": 752.0, "learning_rate": 1.154922341487049e-05, "loss": 19.8803, "step": 18854 }, { "epoch": 0.7859197198949606, "grad_norm": 294.0, "learning_rate": 1.1544908951627376e-05, "loss": 13.2502, "step": 18855 }, { "epoch": 0.7859614021924889, "grad_norm": 126.5, "learning_rate": 1.1540595189231713e-05, "loss": 10.6257, "step": 18856 }, { "epoch": 0.7860030844900171, "grad_norm": 396.0, "learning_rate": 1.153628212776211e-05, "loss": 13.5633, "step": 18857 }, { "epoch": 0.7860447667875453, "grad_norm": 131.0, "learning_rate": 1.1531969767297185e-05, "loss": 9.0628, "step": 18858 }, { "epoch": 0.7860864490850735, "grad_norm": 230.0, "learning_rate": 1.1527658107915523e-05, "loss": 13.313, "step": 18859 }, { "epoch": 0.7861281313826018, "grad_norm": 2080.0, "learning_rate": 1.1523347149695706e-05, "loss": 47.255, "step": 18860 }, { "epoch": 0.7861698136801301, "grad_norm": 528.0, "learning_rate": 1.151903689271629e-05, "loss": 17.5003, "step": 18861 }, { "epoch": 0.7862114959776583, "grad_norm": 98.5, "learning_rate": 1.1514727337055869e-05, "loss": 8.3127, "step": 18862 }, { "epoch": 0.7862531782751865, "grad_norm": 196.0, "learning_rate": 1.1510418482792934e-05, "loss": 11.438, "step": 18863 }, { "epoch": 0.7862948605727148, "grad_norm": 106.0, "learning_rate": 1.150611033000607e-05, "loss": 9.7502, "step": 18864 }, { "epoch": 0.786336542870243, "grad_norm": 85.5, "learning_rate": 1.1501802878773726e-05, "loss": 5.8149, "step": 18865 }, { "epoch": 0.7863782251677712, "grad_norm": 172.0, "learning_rate": 1.1497496129174478e-05, "loss": 10.7504, "step": 18866 }, { "epoch": 0.7864199074652994, "grad_norm": 237.0, "learning_rate": 1.1493190081286748e-05, "loss": 11.6252, "step": 18867 }, { "epoch": 0.7864615897628278, "grad_norm": 1176.0, "learning_rate": 1.1488884735189076e-05, "loss": 27.7508, "step": 18868 }, { "epoch": 0.786503272060356, "grad_norm": 536.0, "learning_rate": 1.1484580090959874e-05, "loss": 16.1277, "step": 18869 }, { "epoch": 0.7865449543578842, "grad_norm": 302.0, "learning_rate": 1.1480276148677627e-05, "loss": 14.1252, "step": 18870 }, { "epoch": 0.7865866366554124, "grad_norm": 396.0, "learning_rate": 1.1475972908420774e-05, "loss": 14.6877, "step": 18871 }, { "epoch": 0.7866283189529407, "grad_norm": 71.0, "learning_rate": 1.1471670370267729e-05, "loss": 8.1253, "step": 18872 }, { "epoch": 0.7866700012504689, "grad_norm": 664.0, "learning_rate": 1.1467368534296913e-05, "loss": 20.7502, "step": 18873 }, { "epoch": 0.7867116835479971, "grad_norm": 354.0, "learning_rate": 1.1463067400586724e-05, "loss": 14.2503, "step": 18874 }, { "epoch": 0.7867533658455254, "grad_norm": 440.0, "learning_rate": 1.1458766969215556e-05, "loss": 13.9378, "step": 18875 }, { "epoch": 0.7867950481430537, "grad_norm": 600.0, "learning_rate": 1.145446724026178e-05, "loss": 20.0002, "step": 18876 }, { "epoch": 0.7868367304405819, "grad_norm": 1040.0, "learning_rate": 1.1450168213803764e-05, "loss": 25.8756, "step": 18877 }, { "epoch": 0.7868784127381101, "grad_norm": 202.0, "learning_rate": 1.1445869889919847e-05, "loss": 10.8128, "step": 18878 }, { "epoch": 0.7869200950356383, "grad_norm": 186.0, "learning_rate": 1.1441572268688377e-05, "loss": 10.3127, "step": 18879 }, { "epoch": 0.7869617773331666, "grad_norm": 342.0, "learning_rate": 1.1437275350187682e-05, "loss": 15.1879, "step": 18880 }, { "epoch": 0.7870034596306948, "grad_norm": 344.0, "learning_rate": 1.1432979134496058e-05, "loss": 13.8128, "step": 18881 }, { "epoch": 0.7870451419282231, "grad_norm": 450.0, "learning_rate": 1.1428683621691815e-05, "loss": 17.1257, "step": 18882 }, { "epoch": 0.7870868242257513, "grad_norm": 524.0, "learning_rate": 1.1424388811853242e-05, "loss": 18.2504, "step": 18883 }, { "epoch": 0.7871285065232796, "grad_norm": 158.0, "learning_rate": 1.1420094705058604e-05, "loss": 11.3757, "step": 18884 }, { "epoch": 0.7871701888208078, "grad_norm": 217.0, "learning_rate": 1.1415801301386164e-05, "loss": 11.6255, "step": 18885 }, { "epoch": 0.787211871118336, "grad_norm": 278.0, "learning_rate": 1.1411508600914173e-05, "loss": 11.0002, "step": 18886 }, { "epoch": 0.7872535534158643, "grad_norm": 620.0, "learning_rate": 1.1407216603720845e-05, "loss": 19.3758, "step": 18887 }, { "epoch": 0.7872952357133925, "grad_norm": 556.0, "learning_rate": 1.140292530988445e-05, "loss": 17.3752, "step": 18888 }, { "epoch": 0.7873369180109208, "grad_norm": 173.0, "learning_rate": 1.1398634719483142e-05, "loss": 6.2506, "step": 18889 }, { "epoch": 0.787378600308449, "grad_norm": 236.0, "learning_rate": 1.1394344832595166e-05, "loss": 11.6881, "step": 18890 }, { "epoch": 0.7874202826059773, "grad_norm": 516.0, "learning_rate": 1.1390055649298659e-05, "loss": 15.6877, "step": 18891 }, { "epoch": 0.7874619649035055, "grad_norm": 420.0, "learning_rate": 1.1385767169671846e-05, "loss": 11.0642, "step": 18892 }, { "epoch": 0.7875036472010337, "grad_norm": 222.0, "learning_rate": 1.138147939379282e-05, "loss": 12.1252, "step": 18893 }, { "epoch": 0.7875453294985619, "grad_norm": 238.0, "learning_rate": 1.1377192321739788e-05, "loss": 13.5005, "step": 18894 }, { "epoch": 0.7875870117960903, "grad_norm": 520.0, "learning_rate": 1.1372905953590835e-05, "loss": 16.8775, "step": 18895 }, { "epoch": 0.7876286940936185, "grad_norm": 540.0, "learning_rate": 1.1368620289424104e-05, "loss": 17.2506, "step": 18896 }, { "epoch": 0.7876703763911467, "grad_norm": 412.0, "learning_rate": 1.1364335329317705e-05, "loss": 13.4384, "step": 18897 }, { "epoch": 0.7877120586886749, "grad_norm": 156.0, "learning_rate": 1.136005107334972e-05, "loss": 12.063, "step": 18898 }, { "epoch": 0.7877537409862032, "grad_norm": 304.0, "learning_rate": 1.1355767521598237e-05, "loss": 13.3754, "step": 18899 }, { "epoch": 0.7877954232837314, "grad_norm": 494.0, "learning_rate": 1.1351484674141316e-05, "loss": 16.8756, "step": 18900 }, { "epoch": 0.7878371055812596, "grad_norm": 189.0, "learning_rate": 1.1347202531057028e-05, "loss": 12.2505, "step": 18901 }, { "epoch": 0.7878787878787878, "grad_norm": 1488.0, "learning_rate": 1.1342921092423397e-05, "loss": 33.757, "step": 18902 }, { "epoch": 0.7879204701763162, "grad_norm": 484.0, "learning_rate": 1.1338640358318463e-05, "loss": 15.3753, "step": 18903 }, { "epoch": 0.7879621524738444, "grad_norm": 220.0, "learning_rate": 1.1334360328820237e-05, "loss": 12.3752, "step": 18904 }, { "epoch": 0.7880038347713726, "grad_norm": 258.0, "learning_rate": 1.1330081004006732e-05, "loss": 12.7505, "step": 18905 }, { "epoch": 0.7880455170689008, "grad_norm": 300.0, "learning_rate": 1.132580238395593e-05, "loss": 13.9378, "step": 18906 }, { "epoch": 0.7880871993664291, "grad_norm": 244.0, "learning_rate": 1.1321524468745814e-05, "loss": 11.5004, "step": 18907 }, { "epoch": 0.7881288816639573, "grad_norm": 229.0, "learning_rate": 1.131724725845435e-05, "loss": 12.1881, "step": 18908 }, { "epoch": 0.7881705639614855, "grad_norm": 236.0, "learning_rate": 1.1312970753159485e-05, "loss": 10.9399, "step": 18909 }, { "epoch": 0.7882122462590138, "grad_norm": 209.0, "learning_rate": 1.130869495293917e-05, "loss": 11.2502, "step": 18910 }, { "epoch": 0.7882539285565421, "grad_norm": 512.0, "learning_rate": 1.1304419857871301e-05, "loss": 17.5003, "step": 18911 }, { "epoch": 0.7882956108540703, "grad_norm": 454.0, "learning_rate": 1.1300145468033846e-05, "loss": 15.8779, "step": 18912 }, { "epoch": 0.7883372931515985, "grad_norm": 230.0, "learning_rate": 1.1295871783504652e-05, "loss": 12.1877, "step": 18913 }, { "epoch": 0.7883789754491267, "grad_norm": 174.0, "learning_rate": 1.1291598804361653e-05, "loss": 11.5042, "step": 18914 }, { "epoch": 0.788420657746655, "grad_norm": 136.0, "learning_rate": 1.1287326530682674e-05, "loss": 6.938, "step": 18915 }, { "epoch": 0.7884623400441833, "grad_norm": 211.0, "learning_rate": 1.1283054962545637e-05, "loss": 12.0009, "step": 18916 }, { "epoch": 0.7885040223417115, "grad_norm": 226.0, "learning_rate": 1.1278784100028334e-05, "loss": 13.0626, "step": 18917 }, { "epoch": 0.7885457046392397, "grad_norm": 131.0, "learning_rate": 1.1274513943208648e-05, "loss": 10.4379, "step": 18918 }, { "epoch": 0.788587386936768, "grad_norm": 235.0, "learning_rate": 1.1270244492164362e-05, "loss": 12.5001, "step": 18919 }, { "epoch": 0.7886290692342962, "grad_norm": 87.5, "learning_rate": 1.1265975746973333e-05, "loss": 8.376, "step": 18920 }, { "epoch": 0.7886707515318244, "grad_norm": 245.0, "learning_rate": 1.1261707707713309e-05, "loss": 12.5003, "step": 18921 }, { "epoch": 0.7887124338293526, "grad_norm": 316.0, "learning_rate": 1.1257440374462113e-05, "loss": 13.3752, "step": 18922 }, { "epoch": 0.788754116126881, "grad_norm": 146.0, "learning_rate": 1.1253173747297507e-05, "loss": 9.6255, "step": 18923 }, { "epoch": 0.7887957984244092, "grad_norm": 390.0, "learning_rate": 1.124890782629725e-05, "loss": 13.7502, "step": 18924 }, { "epoch": 0.7888374807219374, "grad_norm": 316.0, "learning_rate": 1.1244642611539086e-05, "loss": 14.5007, "step": 18925 }, { "epoch": 0.7888791630194656, "grad_norm": 424.0, "learning_rate": 1.1240378103100752e-05, "loss": 14.563, "step": 18926 }, { "epoch": 0.7889208453169939, "grad_norm": 148.0, "learning_rate": 1.1236114301059964e-05, "loss": 10.752, "step": 18927 }, { "epoch": 0.7889625276145221, "grad_norm": 388.0, "learning_rate": 1.1231851205494438e-05, "loss": 16.5003, "step": 18928 }, { "epoch": 0.7890042099120503, "grad_norm": 302.0, "learning_rate": 1.1227588816481866e-05, "loss": 10.5628, "step": 18929 }, { "epoch": 0.7890458922095785, "grad_norm": 324.0, "learning_rate": 1.1223327134099925e-05, "loss": 14.1254, "step": 18930 }, { "epoch": 0.7890875745071069, "grad_norm": 316.0, "learning_rate": 1.1219066158426294e-05, "loss": 14.7503, "step": 18931 }, { "epoch": 0.7891292568046351, "grad_norm": 448.0, "learning_rate": 1.1214805889538621e-05, "loss": 15.6897, "step": 18932 }, { "epoch": 0.7891709391021633, "grad_norm": 250.0, "learning_rate": 1.1210546327514559e-05, "loss": 13.813, "step": 18933 }, { "epoch": 0.7892126213996915, "grad_norm": 528.0, "learning_rate": 1.1206287472431731e-05, "loss": 17.3751, "step": 18934 }, { "epoch": 0.7892543036972198, "grad_norm": 370.0, "learning_rate": 1.1202029324367757e-05, "loss": 14.9378, "step": 18935 }, { "epoch": 0.789295985994748, "grad_norm": 600.0, "learning_rate": 1.1197771883400243e-05, "loss": 19.7505, "step": 18936 }, { "epoch": 0.7893376682922763, "grad_norm": 166.0, "learning_rate": 1.1193515149606766e-05, "loss": 10.5628, "step": 18937 }, { "epoch": 0.7893793505898045, "grad_norm": 96.0, "learning_rate": 1.1189259123064949e-05, "loss": 8.7516, "step": 18938 }, { "epoch": 0.7894210328873328, "grad_norm": 181.0, "learning_rate": 1.11850038038523e-05, "loss": 10.6881, "step": 18939 }, { "epoch": 0.789462715184861, "grad_norm": 219.0, "learning_rate": 1.1180749192046435e-05, "loss": 12.1877, "step": 18940 }, { "epoch": 0.7895043974823892, "grad_norm": 440.0, "learning_rate": 1.117649528772483e-05, "loss": 16.8752, "step": 18941 }, { "epoch": 0.7895460797799174, "grad_norm": 440.0, "learning_rate": 1.117224209096508e-05, "loss": 15.8127, "step": 18942 }, { "epoch": 0.7895877620774457, "grad_norm": 544.0, "learning_rate": 1.1167989601844631e-05, "loss": 17.6252, "step": 18943 }, { "epoch": 0.789629444374974, "grad_norm": 418.0, "learning_rate": 1.1163737820441045e-05, "loss": 14.5627, "step": 18944 }, { "epoch": 0.7896711266725022, "grad_norm": 736.0, "learning_rate": 1.115948674683176e-05, "loss": 19.5035, "step": 18945 }, { "epoch": 0.7897128089700304, "grad_norm": 47.25, "learning_rate": 1.1155236381094287e-05, "loss": 6.4065, "step": 18946 }, { "epoch": 0.7897544912675587, "grad_norm": 206.0, "learning_rate": 1.1150986723306085e-05, "loss": 12.5627, "step": 18947 }, { "epoch": 0.7897961735650869, "grad_norm": 306.0, "learning_rate": 1.1146737773544591e-05, "loss": 13.4377, "step": 18948 }, { "epoch": 0.7898378558626151, "grad_norm": 1328.0, "learning_rate": 1.1142489531887257e-05, "loss": 37.0021, "step": 18949 }, { "epoch": 0.7898795381601433, "grad_norm": 290.0, "learning_rate": 1.1138241998411498e-05, "loss": 13.0002, "step": 18950 }, { "epoch": 0.7899212204576717, "grad_norm": 640.0, "learning_rate": 1.1133995173194733e-05, "loss": 20.6253, "step": 18951 }, { "epoch": 0.7899629027551999, "grad_norm": 364.0, "learning_rate": 1.1129749056314349e-05, "loss": 15.3764, "step": 18952 }, { "epoch": 0.7900045850527281, "grad_norm": 286.0, "learning_rate": 1.1125503647847746e-05, "loss": 12.8128, "step": 18953 }, { "epoch": 0.7900462673502563, "grad_norm": 113.5, "learning_rate": 1.1121258947872282e-05, "loss": 9.1876, "step": 18954 }, { "epoch": 0.7900879496477846, "grad_norm": 820.0, "learning_rate": 1.1117014956465332e-05, "loss": 22.7508, "step": 18955 }, { "epoch": 0.7901296319453128, "grad_norm": 1064.0, "learning_rate": 1.1112771673704231e-05, "loss": 25.0004, "step": 18956 }, { "epoch": 0.790171314242841, "grad_norm": 350.0, "learning_rate": 1.1108529099666326e-05, "loss": 14.3756, "step": 18957 }, { "epoch": 0.7902129965403693, "grad_norm": 496.0, "learning_rate": 1.1104287234428922e-05, "loss": 16.6255, "step": 18958 }, { "epoch": 0.7902546788378976, "grad_norm": 588.0, "learning_rate": 1.1100046078069342e-05, "loss": 17.2537, "step": 18959 }, { "epoch": 0.7902963611354258, "grad_norm": 628.0, "learning_rate": 1.1095805630664874e-05, "loss": 18.8782, "step": 18960 }, { "epoch": 0.790338043432954, "grad_norm": 225.0, "learning_rate": 1.1091565892292787e-05, "loss": 12.3131, "step": 18961 }, { "epoch": 0.7903797257304823, "grad_norm": 532.0, "learning_rate": 1.108732686303039e-05, "loss": 17.7506, "step": 18962 }, { "epoch": 0.7904214080280105, "grad_norm": 600.0, "learning_rate": 1.1083088542954895e-05, "loss": 18.6253, "step": 18963 }, { "epoch": 0.7904630903255387, "grad_norm": 194.0, "learning_rate": 1.1078850932143587e-05, "loss": 12.3765, "step": 18964 }, { "epoch": 0.790504772623067, "grad_norm": 120.0, "learning_rate": 1.1074614030673652e-05, "loss": 10.7506, "step": 18965 }, { "epoch": 0.7905464549205953, "grad_norm": 364.0, "learning_rate": 1.107037783862236e-05, "loss": 15.1892, "step": 18966 }, { "epoch": 0.7905881372181235, "grad_norm": 262.0, "learning_rate": 1.1066142356066856e-05, "loss": 14.1884, "step": 18967 }, { "epoch": 0.7906298195156517, "grad_norm": 848.0, "learning_rate": 1.1061907583084397e-05, "loss": 24.8768, "step": 18968 }, { "epoch": 0.7906715018131799, "grad_norm": 164.0, "learning_rate": 1.1057673519752099e-05, "loss": 9.376, "step": 18969 }, { "epoch": 0.7907131841107082, "grad_norm": 364.0, "learning_rate": 1.1053440166147184e-05, "loss": 13.5628, "step": 18970 }, { "epoch": 0.7907548664082364, "grad_norm": 106.0, "learning_rate": 1.1049207522346755e-05, "loss": 9.6883, "step": 18971 }, { "epoch": 0.7907965487057647, "grad_norm": 720.0, "learning_rate": 1.1044975588427986e-05, "loss": 20.6252, "step": 18972 }, { "epoch": 0.7908382310032929, "grad_norm": 1424.0, "learning_rate": 1.1040744364467997e-05, "loss": 29.129, "step": 18973 }, { "epoch": 0.7908799133008212, "grad_norm": 69.5, "learning_rate": 1.10365138505439e-05, "loss": 8.1881, "step": 18974 }, { "epoch": 0.7909215955983494, "grad_norm": 442.0, "learning_rate": 1.1032284046732799e-05, "loss": 16.2502, "step": 18975 }, { "epoch": 0.7909632778958776, "grad_norm": 476.0, "learning_rate": 1.1028054953111782e-05, "loss": 16.2514, "step": 18976 }, { "epoch": 0.7910049601934058, "grad_norm": 211.0, "learning_rate": 1.1023826569757922e-05, "loss": 10.5006, "step": 18977 }, { "epoch": 0.7910466424909341, "grad_norm": 231.0, "learning_rate": 1.1019598896748279e-05, "loss": 11.5629, "step": 18978 }, { "epoch": 0.7910883247884624, "grad_norm": 132.0, "learning_rate": 1.1015371934159907e-05, "loss": 11.5003, "step": 18979 }, { "epoch": 0.7911300070859906, "grad_norm": 408.0, "learning_rate": 1.1011145682069845e-05, "loss": 15.9379, "step": 18980 }, { "epoch": 0.7911716893835188, "grad_norm": 121.5, "learning_rate": 1.1006920140555115e-05, "loss": 10.1878, "step": 18981 }, { "epoch": 0.7912133716810471, "grad_norm": 344.0, "learning_rate": 1.1002695309692723e-05, "loss": 14.0633, "step": 18982 }, { "epoch": 0.7912550539785753, "grad_norm": 160.0, "learning_rate": 1.0998471189559672e-05, "loss": 10.2501, "step": 18983 }, { "epoch": 0.7912967362761035, "grad_norm": 588.0, "learning_rate": 1.0994247780232952e-05, "loss": 19.3754, "step": 18984 }, { "epoch": 0.7913384185736317, "grad_norm": 828.0, "learning_rate": 1.0990025081789523e-05, "loss": 23.3752, "step": 18985 }, { "epoch": 0.7913801008711601, "grad_norm": 392.0, "learning_rate": 1.0985803094306352e-05, "loss": 13.3756, "step": 18986 }, { "epoch": 0.7914217831686883, "grad_norm": 628.0, "learning_rate": 1.0981581817860365e-05, "loss": 18.7502, "step": 18987 }, { "epoch": 0.7914634654662165, "grad_norm": 64.0, "learning_rate": 1.0977361252528546e-05, "loss": 6.5939, "step": 18988 }, { "epoch": 0.7915051477637447, "grad_norm": 516.0, "learning_rate": 1.097314139838775e-05, "loss": 17.5002, "step": 18989 }, { "epoch": 0.791546830061273, "grad_norm": 214.0, "learning_rate": 1.0968922255514947e-05, "loss": 11.1882, "step": 18990 }, { "epoch": 0.7915885123588012, "grad_norm": 292.0, "learning_rate": 1.0964703823986971e-05, "loss": 14.314, "step": 18991 }, { "epoch": 0.7916301946563294, "grad_norm": 400.0, "learning_rate": 1.0960486103880762e-05, "loss": 15.8129, "step": 18992 }, { "epoch": 0.7916718769538577, "grad_norm": 688.0, "learning_rate": 1.0956269095273137e-05, "loss": 20.2505, "step": 18993 }, { "epoch": 0.791713559251386, "grad_norm": 139.0, "learning_rate": 1.0952052798240992e-05, "loss": 7.1253, "step": 18994 }, { "epoch": 0.7917552415489142, "grad_norm": 588.0, "learning_rate": 1.0947837212861128e-05, "loss": 17.8803, "step": 18995 }, { "epoch": 0.7917969238464424, "grad_norm": 776.0, "learning_rate": 1.0943622339210414e-05, "loss": 21.3764, "step": 18996 }, { "epoch": 0.7918386061439706, "grad_norm": 184.0, "learning_rate": 1.0939408177365646e-05, "loss": 10.8128, "step": 18997 }, { "epoch": 0.7918802884414989, "grad_norm": 181.0, "learning_rate": 1.0935194727403637e-05, "loss": 9.2504, "step": 18998 }, { "epoch": 0.7919219707390271, "grad_norm": 604.0, "learning_rate": 1.0930981989401163e-05, "loss": 18.5027, "step": 18999 }, { "epoch": 0.7919636530365554, "grad_norm": 163.0, "learning_rate": 1.0926769963435019e-05, "loss": 12.0011, "step": 19000 }, { "epoch": 0.7920053353340836, "grad_norm": 94.0, "learning_rate": 1.0922558649581954e-05, "loss": 7.8754, "step": 19001 }, { "epoch": 0.7920470176316119, "grad_norm": 736.0, "learning_rate": 1.091834804791873e-05, "loss": 24.8753, "step": 19002 }, { "epoch": 0.7920886999291401, "grad_norm": 69.0, "learning_rate": 1.0914138158522086e-05, "loss": 7.5943, "step": 19003 }, { "epoch": 0.7921303822266683, "grad_norm": 306.0, "learning_rate": 1.0909928981468737e-05, "loss": 14.5627, "step": 19004 }, { "epoch": 0.7921720645241965, "grad_norm": 298.0, "learning_rate": 1.0905720516835406e-05, "loss": 12.9378, "step": 19005 }, { "epoch": 0.7922137468217249, "grad_norm": 1072.0, "learning_rate": 1.090151276469879e-05, "loss": 23.2556, "step": 19006 }, { "epoch": 0.7922554291192531, "grad_norm": 101.0, "learning_rate": 1.089730572513557e-05, "loss": 9.1259, "step": 19007 }, { "epoch": 0.7922971114167813, "grad_norm": 520.0, "learning_rate": 1.0893099398222428e-05, "loss": 17.8752, "step": 19008 }, { "epoch": 0.7923387937143095, "grad_norm": 628.0, "learning_rate": 1.0888893784036025e-05, "loss": 18.8757, "step": 19009 }, { "epoch": 0.7923804760118378, "grad_norm": 576.0, "learning_rate": 1.0884688882652998e-05, "loss": 18.6297, "step": 19010 }, { "epoch": 0.792422158309366, "grad_norm": 342.0, "learning_rate": 1.0880484694149978e-05, "loss": 15.5627, "step": 19011 }, { "epoch": 0.7924638406068942, "grad_norm": 290.0, "learning_rate": 1.0876281218603623e-05, "loss": 12.1879, "step": 19012 }, { "epoch": 0.7925055229044224, "grad_norm": 458.0, "learning_rate": 1.087207845609049e-05, "loss": 17.5011, "step": 19013 }, { "epoch": 0.7925472052019508, "grad_norm": 350.0, "learning_rate": 1.0867876406687227e-05, "loss": 14.3128, "step": 19014 }, { "epoch": 0.792588887499479, "grad_norm": 125.5, "learning_rate": 1.0863675070470364e-05, "loss": 9.8127, "step": 19015 }, { "epoch": 0.7926305697970072, "grad_norm": 183.0, "learning_rate": 1.0859474447516526e-05, "loss": 10.7508, "step": 19016 }, { "epoch": 0.7926722520945354, "grad_norm": 426.0, "learning_rate": 1.085527453790221e-05, "loss": 15.8754, "step": 19017 }, { "epoch": 0.7927139343920637, "grad_norm": 440.0, "learning_rate": 1.085107534170402e-05, "loss": 17.5002, "step": 19018 }, { "epoch": 0.7927556166895919, "grad_norm": 175.0, "learning_rate": 1.0846876858998428e-05, "loss": 9.8753, "step": 19019 }, { "epoch": 0.7927972989871201, "grad_norm": 154.0, "learning_rate": 1.0842679089862013e-05, "loss": 9.9383, "step": 19020 }, { "epoch": 0.7928389812846484, "grad_norm": 278.0, "learning_rate": 1.0838482034371212e-05, "loss": 13.8752, "step": 19021 }, { "epoch": 0.7928806635821767, "grad_norm": 238.0, "learning_rate": 1.0834285692602574e-05, "loss": 12.7501, "step": 19022 }, { "epoch": 0.7929223458797049, "grad_norm": 392.0, "learning_rate": 1.0830090064632553e-05, "loss": 14.6877, "step": 19023 }, { "epoch": 0.7929640281772331, "grad_norm": 294.0, "learning_rate": 1.0825895150537618e-05, "loss": 13.3754, "step": 19024 }, { "epoch": 0.7930057104747613, "grad_norm": 225.0, "learning_rate": 1.082170095039422e-05, "loss": 10.8752, "step": 19025 }, { "epoch": 0.7930473927722896, "grad_norm": 490.0, "learning_rate": 1.0817507464278803e-05, "loss": 16.1252, "step": 19026 }, { "epoch": 0.7930890750698179, "grad_norm": 134.0, "learning_rate": 1.0813314692267783e-05, "loss": 11.0629, "step": 19027 }, { "epoch": 0.7931307573673461, "grad_norm": 604.0, "learning_rate": 1.0809122634437591e-05, "loss": 20.2506, "step": 19028 }, { "epoch": 0.7931724396648743, "grad_norm": 312.0, "learning_rate": 1.0804931290864618e-05, "loss": 13.8757, "step": 19029 }, { "epoch": 0.7932141219624026, "grad_norm": 180.0, "learning_rate": 1.0800740661625247e-05, "loss": 12.2511, "step": 19030 }, { "epoch": 0.7932558042599308, "grad_norm": 374.0, "learning_rate": 1.0796550746795859e-05, "loss": 14.6253, "step": 19031 }, { "epoch": 0.793297486557459, "grad_norm": 184.0, "learning_rate": 1.0792361546452811e-05, "loss": 10.1254, "step": 19032 }, { "epoch": 0.7933391688549873, "grad_norm": 298.0, "learning_rate": 1.0788173060672457e-05, "loss": 12.0629, "step": 19033 }, { "epoch": 0.7933808511525156, "grad_norm": 290.0, "learning_rate": 1.078398528953113e-05, "loss": 12.3753, "step": 19034 }, { "epoch": 0.7934225334500438, "grad_norm": 154.0, "learning_rate": 1.0779798233105154e-05, "loss": 11.6883, "step": 19035 }, { "epoch": 0.793464215747572, "grad_norm": 166.0, "learning_rate": 1.0775611891470832e-05, "loss": 9.7502, "step": 19036 }, { "epoch": 0.7935058980451003, "grad_norm": 214.0, "learning_rate": 1.0771426264704455e-05, "loss": 11.6878, "step": 19037 }, { "epoch": 0.7935475803426285, "grad_norm": 270.0, "learning_rate": 1.0767241352882345e-05, "loss": 13.4377, "step": 19038 }, { "epoch": 0.7935892626401567, "grad_norm": 300.0, "learning_rate": 1.0763057156080714e-05, "loss": 13.9377, "step": 19039 }, { "epoch": 0.7936309449376849, "grad_norm": 932.0, "learning_rate": 1.075887367437588e-05, "loss": 28.377, "step": 19040 }, { "epoch": 0.7936726272352133, "grad_norm": 480.0, "learning_rate": 1.0754690907844028e-05, "loss": 16.3757, "step": 19041 }, { "epoch": 0.7937143095327415, "grad_norm": 270.0, "learning_rate": 1.0750508856561443e-05, "loss": 13.3134, "step": 19042 }, { "epoch": 0.7937559918302697, "grad_norm": 336.0, "learning_rate": 1.0746327520604294e-05, "loss": 14.0007, "step": 19043 }, { "epoch": 0.7937976741277979, "grad_norm": 412.0, "learning_rate": 1.0742146900048839e-05, "loss": 15.9379, "step": 19044 }, { "epoch": 0.7938393564253262, "grad_norm": 228.0, "learning_rate": 1.073796699497121e-05, "loss": 12.1254, "step": 19045 }, { "epoch": 0.7938810387228544, "grad_norm": 244.0, "learning_rate": 1.0733787805447648e-05, "loss": 13.1254, "step": 19046 }, { "epoch": 0.7939227210203826, "grad_norm": 724.0, "learning_rate": 1.0729609331554263e-05, "loss": 21.2504, "step": 19047 }, { "epoch": 0.7939644033179108, "grad_norm": 117.0, "learning_rate": 1.0725431573367245e-05, "loss": 9.4376, "step": 19048 }, { "epoch": 0.7940060856154392, "grad_norm": 194.0, "learning_rate": 1.0721254530962726e-05, "loss": 10.3753, "step": 19049 }, { "epoch": 0.7940477679129674, "grad_norm": 324.0, "learning_rate": 1.071707820441683e-05, "loss": 14.313, "step": 19050 }, { "epoch": 0.7940894502104956, "grad_norm": 920.0, "learning_rate": 1.0712902593805669e-05, "loss": 25.1259, "step": 19051 }, { "epoch": 0.7941311325080238, "grad_norm": 620.0, "learning_rate": 1.0708727699205346e-05, "loss": 18.6254, "step": 19052 }, { "epoch": 0.7941728148055521, "grad_norm": 322.0, "learning_rate": 1.0704553520691946e-05, "loss": 13.8131, "step": 19053 }, { "epoch": 0.7942144971030803, "grad_norm": 316.0, "learning_rate": 1.0700380058341553e-05, "loss": 14.1254, "step": 19054 }, { "epoch": 0.7942561794006086, "grad_norm": 212.0, "learning_rate": 1.0696207312230217e-05, "loss": 12.1253, "step": 19055 }, { "epoch": 0.7942978616981368, "grad_norm": 384.0, "learning_rate": 1.0692035282433993e-05, "loss": 15.1877, "step": 19056 }, { "epoch": 0.7943395439956651, "grad_norm": 712.0, "learning_rate": 1.0687863969028916e-05, "loss": 19.5035, "step": 19057 }, { "epoch": 0.7943812262931933, "grad_norm": 372.0, "learning_rate": 1.0683693372091008e-05, "loss": 15.1878, "step": 19058 }, { "epoch": 0.7944229085907215, "grad_norm": 71.5, "learning_rate": 1.0679523491696281e-05, "loss": 9.1257, "step": 19059 }, { "epoch": 0.7944645908882497, "grad_norm": 384.0, "learning_rate": 1.0675354327920722e-05, "loss": 11.5628, "step": 19060 }, { "epoch": 0.794506273185778, "grad_norm": 426.0, "learning_rate": 1.0671185880840329e-05, "loss": 16.1266, "step": 19061 }, { "epoch": 0.7945479554833063, "grad_norm": 1328.0, "learning_rate": 1.066701815053106e-05, "loss": 29.1305, "step": 19062 }, { "epoch": 0.7945896377808345, "grad_norm": 150.0, "learning_rate": 1.0662851137068864e-05, "loss": 9.8127, "step": 19063 }, { "epoch": 0.7946313200783627, "grad_norm": 320.0, "learning_rate": 1.0658684840529726e-05, "loss": 14.7507, "step": 19064 }, { "epoch": 0.794673002375891, "grad_norm": 728.0, "learning_rate": 1.0654519260989521e-05, "loss": 21.2509, "step": 19065 }, { "epoch": 0.7947146846734192, "grad_norm": 185.0, "learning_rate": 1.0650354398524226e-05, "loss": 11.6879, "step": 19066 }, { "epoch": 0.7947563669709474, "grad_norm": 152.0, "learning_rate": 1.0646190253209687e-05, "loss": 9.3753, "step": 19067 }, { "epoch": 0.7947980492684756, "grad_norm": 484.0, "learning_rate": 1.0642026825121848e-05, "loss": 17.0003, "step": 19068 }, { "epoch": 0.794839731566004, "grad_norm": 266.0, "learning_rate": 1.0637864114336538e-05, "loss": 12.7504, "step": 19069 }, { "epoch": 0.7948814138635322, "grad_norm": 228.0, "learning_rate": 1.0633702120929678e-05, "loss": 11.0628, "step": 19070 }, { "epoch": 0.7949230961610604, "grad_norm": 484.0, "learning_rate": 1.062954084497706e-05, "loss": 17.2501, "step": 19071 }, { "epoch": 0.7949647784585886, "grad_norm": 516.0, "learning_rate": 1.062538028655457e-05, "loss": 18.0002, "step": 19072 }, { "epoch": 0.7950064607561169, "grad_norm": 256.0, "learning_rate": 1.0621220445738023e-05, "loss": 13.6254, "step": 19073 }, { "epoch": 0.7950481430536451, "grad_norm": 356.0, "learning_rate": 1.0617061322603229e-05, "loss": 14.9379, "step": 19074 }, { "epoch": 0.7950898253511733, "grad_norm": 292.0, "learning_rate": 1.0612902917225987e-05, "loss": 11.3128, "step": 19075 }, { "epoch": 0.7951315076487016, "grad_norm": 348.0, "learning_rate": 1.0608745229682088e-05, "loss": 14.5002, "step": 19076 }, { "epoch": 0.7951731899462299, "grad_norm": 620.0, "learning_rate": 1.06045882600473e-05, "loss": 17.3774, "step": 19077 }, { "epoch": 0.7952148722437581, "grad_norm": 199.0, "learning_rate": 1.0600432008397387e-05, "loss": 11.3128, "step": 19078 }, { "epoch": 0.7952565545412863, "grad_norm": 896.0, "learning_rate": 1.0596276474808103e-05, "loss": 19.5044, "step": 19079 }, { "epoch": 0.7952982368388145, "grad_norm": 528.0, "learning_rate": 1.0592121659355175e-05, "loss": 15.0002, "step": 19080 }, { "epoch": 0.7953399191363428, "grad_norm": 249.0, "learning_rate": 1.058796756211433e-05, "loss": 12.0006, "step": 19081 }, { "epoch": 0.795381601433871, "grad_norm": 1040.0, "learning_rate": 1.0583814183161278e-05, "loss": 22.5045, "step": 19082 }, { "epoch": 0.7954232837313993, "grad_norm": 956.0, "learning_rate": 1.0579661522571705e-05, "loss": 23.5013, "step": 19083 }, { "epoch": 0.7954649660289275, "grad_norm": 720.0, "learning_rate": 1.0575509580421306e-05, "loss": 20.3791, "step": 19084 }, { "epoch": 0.7955066483264558, "grad_norm": 244.0, "learning_rate": 1.0571358356785743e-05, "loss": 12.563, "step": 19085 }, { "epoch": 0.795548330623984, "grad_norm": 217.0, "learning_rate": 1.0567207851740668e-05, "loss": 9.8128, "step": 19086 }, { "epoch": 0.7955900129215122, "grad_norm": 434.0, "learning_rate": 1.0563058065361725e-05, "loss": 16.3755, "step": 19087 }, { "epoch": 0.7956316952190404, "grad_norm": 420.0, "learning_rate": 1.0558908997724576e-05, "loss": 15.9377, "step": 19088 }, { "epoch": 0.7956733775165687, "grad_norm": 154.0, "learning_rate": 1.0554760648904782e-05, "loss": 11.0628, "step": 19089 }, { "epoch": 0.795715059814097, "grad_norm": 374.0, "learning_rate": 1.0550613018978001e-05, "loss": 15.0002, "step": 19090 }, { "epoch": 0.7957567421116252, "grad_norm": 592.0, "learning_rate": 1.0546466108019776e-05, "loss": 18.5002, "step": 19091 }, { "epoch": 0.7957984244091534, "grad_norm": 187.0, "learning_rate": 1.0542319916105736e-05, "loss": 11.3753, "step": 19092 }, { "epoch": 0.7958401067066817, "grad_norm": 316.0, "learning_rate": 1.0538174443311394e-05, "loss": 13.4381, "step": 19093 }, { "epoch": 0.7958817890042099, "grad_norm": 348.0, "learning_rate": 1.053402968971235e-05, "loss": 12.8763, "step": 19094 }, { "epoch": 0.7959234713017381, "grad_norm": 216.0, "learning_rate": 1.0529885655384092e-05, "loss": 13.0005, "step": 19095 }, { "epoch": 0.7959651535992663, "grad_norm": 528.0, "learning_rate": 1.0525742340402196e-05, "loss": 18.251, "step": 19096 }, { "epoch": 0.7960068358967947, "grad_norm": 768.0, "learning_rate": 1.0521599744842125e-05, "loss": 22.8752, "step": 19097 }, { "epoch": 0.7960485181943229, "grad_norm": 692.0, "learning_rate": 1.0517457868779418e-05, "loss": 20.1258, "step": 19098 }, { "epoch": 0.7960902004918511, "grad_norm": 190.0, "learning_rate": 1.0513316712289545e-05, "loss": 11.6252, "step": 19099 }, { "epoch": 0.7961318827893793, "grad_norm": 1360.0, "learning_rate": 1.050917627544798e-05, "loss": 34.0008, "step": 19100 }, { "epoch": 0.7961735650869076, "grad_norm": 163.0, "learning_rate": 1.0505036558330184e-05, "loss": 8.7503, "step": 19101 }, { "epoch": 0.7962152473844358, "grad_norm": 179.0, "learning_rate": 1.0500897561011601e-05, "loss": 12.2505, "step": 19102 }, { "epoch": 0.796256929681964, "grad_norm": 516.0, "learning_rate": 1.049675928356767e-05, "loss": 14.2506, "step": 19103 }, { "epoch": 0.7962986119794923, "grad_norm": 412.0, "learning_rate": 1.0492621726073804e-05, "loss": 15.0632, "step": 19104 }, { "epoch": 0.7963402942770206, "grad_norm": 90.5, "learning_rate": 1.0488484888605415e-05, "loss": 7.9696, "step": 19105 }, { "epoch": 0.7963819765745488, "grad_norm": 244.0, "learning_rate": 1.04843487712379e-05, "loss": 11.5002, "step": 19106 }, { "epoch": 0.796423658872077, "grad_norm": 144.0, "learning_rate": 1.0480213374046633e-05, "loss": 11.1253, "step": 19107 }, { "epoch": 0.7964653411696053, "grad_norm": 424.0, "learning_rate": 1.0476078697106983e-05, "loss": 15.6255, "step": 19108 }, { "epoch": 0.7965070234671335, "grad_norm": 336.0, "learning_rate": 1.0471944740494311e-05, "loss": 12.3777, "step": 19109 }, { "epoch": 0.7965487057646617, "grad_norm": 187.0, "learning_rate": 1.0467811504283959e-05, "loss": 11.1252, "step": 19110 }, { "epoch": 0.79659038806219, "grad_norm": 160.0, "learning_rate": 1.0463678988551246e-05, "loss": 8.2503, "step": 19111 }, { "epoch": 0.7966320703597183, "grad_norm": 668.0, "learning_rate": 1.0459547193371493e-05, "loss": 21.0002, "step": 19112 }, { "epoch": 0.7966737526572465, "grad_norm": 314.0, "learning_rate": 1.0455416118819993e-05, "loss": 13.8128, "step": 19113 }, { "epoch": 0.7967154349547747, "grad_norm": 576.0, "learning_rate": 1.0451285764972068e-05, "loss": 16.8801, "step": 19114 }, { "epoch": 0.7967571172523029, "grad_norm": 252.0, "learning_rate": 1.0447156131902947e-05, "loss": 12.8129, "step": 19115 }, { "epoch": 0.7967987995498312, "grad_norm": 326.0, "learning_rate": 1.044302721968794e-05, "loss": 13.7504, "step": 19116 }, { "epoch": 0.7968404818473594, "grad_norm": 129.0, "learning_rate": 1.043889902840225e-05, "loss": 10.1253, "step": 19117 }, { "epoch": 0.7968821641448877, "grad_norm": 696.0, "learning_rate": 1.0434771558121166e-05, "loss": 20.6253, "step": 19118 }, { "epoch": 0.7969238464424159, "grad_norm": 456.0, "learning_rate": 1.0430644808919848e-05, "loss": 15.8753, "step": 19119 }, { "epoch": 0.7969655287399442, "grad_norm": 161.0, "learning_rate": 1.0426518780873574e-05, "loss": 11.5632, "step": 19120 }, { "epoch": 0.7970072110374724, "grad_norm": 480.0, "learning_rate": 1.0422393474057479e-05, "loss": 17.2515, "step": 19121 }, { "epoch": 0.7970488933350006, "grad_norm": 616.0, "learning_rate": 1.041826888854679e-05, "loss": 21.8752, "step": 19122 }, { "epoch": 0.7970905756325288, "grad_norm": 868.0, "learning_rate": 1.0414145024416665e-05, "loss": 24.5014, "step": 19123 }, { "epoch": 0.7971322579300572, "grad_norm": 252.0, "learning_rate": 1.041002188174226e-05, "loss": 12.6877, "step": 19124 }, { "epoch": 0.7971739402275854, "grad_norm": 160.0, "learning_rate": 1.0405899460598723e-05, "loss": 9.5627, "step": 19125 }, { "epoch": 0.7972156225251136, "grad_norm": 300.0, "learning_rate": 1.0401777761061181e-05, "loss": 13.6879, "step": 19126 }, { "epoch": 0.7972573048226418, "grad_norm": 502.0, "learning_rate": 1.0397656783204756e-05, "loss": 12.8787, "step": 19127 }, { "epoch": 0.7972989871201701, "grad_norm": 676.0, "learning_rate": 1.0393536527104547e-05, "loss": 20.2502, "step": 19128 }, { "epoch": 0.7973406694176983, "grad_norm": 470.0, "learning_rate": 1.038941699283566e-05, "loss": 16.5002, "step": 19129 }, { "epoch": 0.7973823517152265, "grad_norm": 318.0, "learning_rate": 1.0385298180473158e-05, "loss": 13.7502, "step": 19130 }, { "epoch": 0.7974240340127547, "grad_norm": 264.0, "learning_rate": 1.0381180090092113e-05, "loss": 11.0004, "step": 19131 }, { "epoch": 0.7974657163102831, "grad_norm": 296.0, "learning_rate": 1.0377062721767578e-05, "loss": 13.1255, "step": 19132 }, { "epoch": 0.7975073986078113, "grad_norm": 310.0, "learning_rate": 1.0372946075574596e-05, "loss": 13.3751, "step": 19133 }, { "epoch": 0.7975490809053395, "grad_norm": 139.0, "learning_rate": 1.0368830151588188e-05, "loss": 7.8447, "step": 19134 }, { "epoch": 0.7975907632028677, "grad_norm": 580.0, "learning_rate": 1.036471494988337e-05, "loss": 17.7522, "step": 19135 }, { "epoch": 0.797632445500396, "grad_norm": 123.5, "learning_rate": 1.0360600470535137e-05, "loss": 10.7504, "step": 19136 }, { "epoch": 0.7976741277979242, "grad_norm": 736.0, "learning_rate": 1.0356486713618468e-05, "loss": 18.7548, "step": 19137 }, { "epoch": 0.7977158100954524, "grad_norm": 149.0, "learning_rate": 1.0352373679208372e-05, "loss": 10.5627, "step": 19138 }, { "epoch": 0.7977574923929807, "grad_norm": 480.0, "learning_rate": 1.0348261367379764e-05, "loss": 17.1258, "step": 19139 }, { "epoch": 0.797799174690509, "grad_norm": 233.0, "learning_rate": 1.0344149778207635e-05, "loss": 12.1253, "step": 19140 }, { "epoch": 0.7978408569880372, "grad_norm": 115.0, "learning_rate": 1.034003891176687e-05, "loss": 10.188, "step": 19141 }, { "epoch": 0.7978825392855654, "grad_norm": 462.0, "learning_rate": 1.0335928768132447e-05, "loss": 16.379, "step": 19142 }, { "epoch": 0.7979242215830936, "grad_norm": 420.0, "learning_rate": 1.0331819347379213e-05, "loss": 15.1878, "step": 19143 }, { "epoch": 0.7979659038806219, "grad_norm": 358.0, "learning_rate": 1.0327710649582117e-05, "loss": 11.9377, "step": 19144 }, { "epoch": 0.7980075861781502, "grad_norm": 344.0, "learning_rate": 1.0323602674815997e-05, "loss": 13.8128, "step": 19145 }, { "epoch": 0.7980492684756784, "grad_norm": 92.0, "learning_rate": 1.0319495423155761e-05, "loss": 6.3443, "step": 19146 }, { "epoch": 0.7980909507732066, "grad_norm": 215.0, "learning_rate": 1.031538889467622e-05, "loss": 11.5628, "step": 19147 }, { "epoch": 0.7981326330707349, "grad_norm": 33.25, "learning_rate": 1.031128308945225e-05, "loss": 6.1566, "step": 19148 }, { "epoch": 0.7981743153682631, "grad_norm": 500.0, "learning_rate": 1.0307178007558671e-05, "loss": 16.2504, "step": 19149 }, { "epoch": 0.7982159976657913, "grad_norm": 460.0, "learning_rate": 1.03030736490703e-05, "loss": 15.7546, "step": 19150 }, { "epoch": 0.7982576799633195, "grad_norm": 394.0, "learning_rate": 1.0298970014061926e-05, "loss": 15.5628, "step": 19151 }, { "epoch": 0.7982993622608479, "grad_norm": 211.0, "learning_rate": 1.0294867102608358e-05, "loss": 11.8127, "step": 19152 }, { "epoch": 0.7983410445583761, "grad_norm": 201.0, "learning_rate": 1.0290764914784356e-05, "loss": 10.2503, "step": 19153 }, { "epoch": 0.7983827268559043, "grad_norm": 145.0, "learning_rate": 1.028666345066469e-05, "loss": 9.2502, "step": 19154 }, { "epoch": 0.7984244091534325, "grad_norm": 92.5, "learning_rate": 1.0282562710324106e-05, "loss": 9.1257, "step": 19155 }, { "epoch": 0.7984660914509608, "grad_norm": 232.0, "learning_rate": 1.0278462693837348e-05, "loss": 11.689, "step": 19156 }, { "epoch": 0.798507773748489, "grad_norm": 356.0, "learning_rate": 1.027436340127913e-05, "loss": 14.6877, "step": 19157 }, { "epoch": 0.7985494560460172, "grad_norm": 278.0, "learning_rate": 1.0270264832724164e-05, "loss": 12.5627, "step": 19158 }, { "epoch": 0.7985911383435454, "grad_norm": 153.0, "learning_rate": 1.026616698824715e-05, "loss": 11.0004, "step": 19159 }, { "epoch": 0.7986328206410738, "grad_norm": 180.0, "learning_rate": 1.0262069867922768e-05, "loss": 11.5007, "step": 19160 }, { "epoch": 0.798674502938602, "grad_norm": 768.0, "learning_rate": 1.0257973471825694e-05, "loss": 19.0047, "step": 19161 }, { "epoch": 0.7987161852361302, "grad_norm": 984.0, "learning_rate": 1.0253877800030581e-05, "loss": 23.3752, "step": 19162 }, { "epoch": 0.7987578675336584, "grad_norm": 392.0, "learning_rate": 1.0249782852612056e-05, "loss": 14.9381, "step": 19163 }, { "epoch": 0.7987995498311867, "grad_norm": 352.0, "learning_rate": 1.0245688629644796e-05, "loss": 14.0627, "step": 19164 }, { "epoch": 0.7988412321287149, "grad_norm": 264.0, "learning_rate": 1.0241595131203364e-05, "loss": 10.7515, "step": 19165 }, { "epoch": 0.7988829144262432, "grad_norm": 808.0, "learning_rate": 1.0237502357362416e-05, "loss": 19.2549, "step": 19166 }, { "epoch": 0.7989245967237714, "grad_norm": 328.0, "learning_rate": 1.0233410308196495e-05, "loss": 14.5628, "step": 19167 }, { "epoch": 0.7989662790212997, "grad_norm": 688.0, "learning_rate": 1.0229318983780223e-05, "loss": 22.0002, "step": 19168 }, { "epoch": 0.7990079613188279, "grad_norm": 130.0, "learning_rate": 1.0225228384188119e-05, "loss": 5.7816, "step": 19169 }, { "epoch": 0.7990496436163561, "grad_norm": 486.0, "learning_rate": 1.0221138509494782e-05, "loss": 15.4377, "step": 19170 }, { "epoch": 0.7990913259138843, "grad_norm": 169.0, "learning_rate": 1.0217049359774705e-05, "loss": 10.1877, "step": 19171 }, { "epoch": 0.7991330082114126, "grad_norm": 376.0, "learning_rate": 1.0212960935102445e-05, "loss": 14.8756, "step": 19172 }, { "epoch": 0.7991746905089409, "grad_norm": 636.0, "learning_rate": 1.0208873235552501e-05, "loss": 17.7535, "step": 19173 }, { "epoch": 0.7992163728064691, "grad_norm": 256.0, "learning_rate": 1.0204786261199378e-05, "loss": 11.0629, "step": 19174 }, { "epoch": 0.7992580551039973, "grad_norm": 394.0, "learning_rate": 1.0200700012117559e-05, "loss": 14.8754, "step": 19175 }, { "epoch": 0.7992997374015256, "grad_norm": 149.0, "learning_rate": 1.0196614488381518e-05, "loss": 9.6252, "step": 19176 }, { "epoch": 0.7993414196990538, "grad_norm": 113.0, "learning_rate": 1.0192529690065705e-05, "loss": 8.9382, "step": 19177 }, { "epoch": 0.799383101996582, "grad_norm": 249.0, "learning_rate": 1.0188445617244574e-05, "loss": 11.5629, "step": 19178 }, { "epoch": 0.7994247842941103, "grad_norm": 452.0, "learning_rate": 1.018436226999256e-05, "loss": 17.2504, "step": 19179 }, { "epoch": 0.7994664665916386, "grad_norm": 510.0, "learning_rate": 1.0180279648384077e-05, "loss": 16.2532, "step": 19180 }, { "epoch": 0.7995081488891668, "grad_norm": 296.0, "learning_rate": 1.0176197752493528e-05, "loss": 12.3753, "step": 19181 }, { "epoch": 0.799549831186695, "grad_norm": 410.0, "learning_rate": 1.0172116582395313e-05, "loss": 15.2511, "step": 19182 }, { "epoch": 0.7995915134842233, "grad_norm": 364.0, "learning_rate": 1.0168036138163811e-05, "loss": 14.5628, "step": 19183 }, { "epoch": 0.7996331957817515, "grad_norm": 596.0, "learning_rate": 1.0163956419873382e-05, "loss": 19.6253, "step": 19184 }, { "epoch": 0.7996748780792797, "grad_norm": 492.0, "learning_rate": 1.0159877427598386e-05, "loss": 18.5001, "step": 19185 }, { "epoch": 0.7997165603768079, "grad_norm": 338.0, "learning_rate": 1.0155799161413159e-05, "loss": 14.5003, "step": 19186 }, { "epoch": 0.7997582426743363, "grad_norm": 201.0, "learning_rate": 1.0151721621392036e-05, "loss": 11.4381, "step": 19187 }, { "epoch": 0.7997999249718645, "grad_norm": 170.0, "learning_rate": 1.0147644807609313e-05, "loss": 9.6877, "step": 19188 }, { "epoch": 0.7998416072693927, "grad_norm": 255.0, "learning_rate": 1.0143568720139295e-05, "loss": 11.6259, "step": 19189 }, { "epoch": 0.7998832895669209, "grad_norm": 412.0, "learning_rate": 1.0139493359056302e-05, "loss": 14.3752, "step": 19190 }, { "epoch": 0.7999249718644492, "grad_norm": 119.5, "learning_rate": 1.0135418724434553e-05, "loss": 7.6564, "step": 19191 }, { "epoch": 0.7999666541619774, "grad_norm": 406.0, "learning_rate": 1.013134481634836e-05, "loss": 14.8753, "step": 19192 }, { "epoch": 0.8000083364595056, "grad_norm": 402.0, "learning_rate": 1.0127271634871927e-05, "loss": 16.2502, "step": 19193 }, { "epoch": 0.8000500187570339, "grad_norm": 214.0, "learning_rate": 1.0123199180079529e-05, "loss": 11.1255, "step": 19194 }, { "epoch": 0.8000917010545622, "grad_norm": 198.0, "learning_rate": 1.0119127452045341e-05, "loss": 11.0002, "step": 19195 }, { "epoch": 0.8001333833520904, "grad_norm": 243.0, "learning_rate": 1.0115056450843624e-05, "loss": 12.3756, "step": 19196 }, { "epoch": 0.8001750656496186, "grad_norm": 102.5, "learning_rate": 1.0110986176548514e-05, "loss": 10.3128, "step": 19197 }, { "epoch": 0.8002167479471468, "grad_norm": 66.0, "learning_rate": 1.0106916629234236e-05, "loss": 8.1256, "step": 19198 }, { "epoch": 0.8002584302446751, "grad_norm": 181.0, "learning_rate": 1.0102847808974947e-05, "loss": 11.5006, "step": 19199 }, { "epoch": 0.8003001125422033, "grad_norm": 466.0, "learning_rate": 1.0098779715844798e-05, "loss": 16.7505, "step": 19200 } ], "logging_steps": 1.0, "max_steps": 23991, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4800, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }